optimisations
This commit is contained in:
parent
3af28f1666
commit
820c3e459b
|
@ -24,6 +24,7 @@ fn alloc_page(vm: &mut Vm, _mem_addr: u64, _length: usize) -> Result<(), MemoryS
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn memory_msg_handler(
|
||||
vm: &mut Vm,
|
||||
mem_addr: u64,
|
||||
|
@ -82,32 +83,124 @@ pub fn memory_msg_handler(
|
|||
let page_count = msg_vec[1];
|
||||
log::debug!(" {} pages", page_count);
|
||||
}
|
||||
// memcpy
|
||||
// trash but fast memcpy
|
||||
4 => unsafe {
|
||||
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
|
||||
let src = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as *const u8;
|
||||
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
|
||||
src.copy_to(dest, count);
|
||||
|
||||
let mut src_ptr = src;
|
||||
let mut dest_ptr = dest;
|
||||
let mut remaining = count;
|
||||
|
||||
while (dest_ptr as usize) & 7 != 0 && remaining > 0 {
|
||||
*dest_ptr = *src_ptr;
|
||||
src_ptr = src_ptr.add(1);
|
||||
dest_ptr = dest_ptr.add(1);
|
||||
remaining -= 1;
|
||||
}
|
||||
|
||||
let mut src_ptr_64 = src_ptr as *const u64;
|
||||
let mut dest_ptr_64 = dest_ptr as *mut u64;
|
||||
while remaining >= 64 {
|
||||
let (s1, s2, s3, s4, s5, s6, s7, s8) = (
|
||||
*src_ptr_64,
|
||||
*src_ptr_64.add(1),
|
||||
*src_ptr_64.add(2),
|
||||
*src_ptr_64.add(3),
|
||||
*src_ptr_64.add(4),
|
||||
*src_ptr_64.add(5),
|
||||
*src_ptr_64.add(6),
|
||||
*src_ptr_64.add(7),
|
||||
);
|
||||
*dest_ptr_64 = s1;
|
||||
*dest_ptr_64.add(1) = s2;
|
||||
*dest_ptr_64.add(2) = s3;
|
||||
*dest_ptr_64.add(3) = s4;
|
||||
*dest_ptr_64.add(4) = s5;
|
||||
*dest_ptr_64.add(5) = s6;
|
||||
*dest_ptr_64.add(6) = s7;
|
||||
*dest_ptr_64.add(7) = s8;
|
||||
src_ptr_64 = src_ptr_64.add(8);
|
||||
dest_ptr_64 = dest_ptr_64.add(8);
|
||||
remaining -= 64;
|
||||
}
|
||||
|
||||
while remaining >= 8 {
|
||||
*dest_ptr_64 = *src_ptr_64;
|
||||
src_ptr_64 = src_ptr_64.add(1);
|
||||
dest_ptr_64 = dest_ptr_64.add(1);
|
||||
remaining -= 8;
|
||||
}
|
||||
|
||||
src_ptr = src_ptr_64 as *const u8;
|
||||
dest_ptr = dest_ptr_64 as *mut u8;
|
||||
for _ in 0..remaining {
|
||||
*dest_ptr = *src_ptr;
|
||||
src_ptr = src_ptr.add(1);
|
||||
dest_ptr = dest_ptr.add(1);
|
||||
}
|
||||
},
|
||||
// memset
|
||||
|
||||
// trash but fast memset
|
||||
5 => unsafe {
|
||||
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
|
||||
let size = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as usize;
|
||||
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
|
||||
let src = u64::from_le_bytes(msg_vec[25..33].try_into().unwrap_unchecked()) as *mut u8;
|
||||
let src =
|
||||
u64::from_le_bytes(msg_vec[25..33].try_into().unwrap_unchecked()) as *const u8;
|
||||
|
||||
let total_size = count * size;
|
||||
|
||||
if total_size > 32 {
|
||||
core::ptr::copy(src, dest, size);
|
||||
let pattern = core::slice::from_raw_parts(dest, size);
|
||||
let mut offset = size;
|
||||
let mut pattern_512 = [0u8; 64];
|
||||
for i in 0..64 {
|
||||
pattern_512[i] = *src.add(i % size);
|
||||
}
|
||||
let pattern_512_ptr = pattern_512.as_ptr() as *const u64;
|
||||
|
||||
while offset < total_size {
|
||||
let remaining = total_size - offset;
|
||||
let copy_size = remaining.min(offset);
|
||||
core::ptr::copy_nonoverlapping(pattern.as_ptr(), dest.add(offset), copy_size);
|
||||
offset += copy_size;
|
||||
let mut dest_ptr = dest;
|
||||
let mut remaining = total_size;
|
||||
|
||||
while (dest_ptr as usize) & 7 != 0 && remaining > 0 {
|
||||
*dest_ptr = *src;
|
||||
dest_ptr = dest_ptr.add(1);
|
||||
remaining -= 1;
|
||||
}
|
||||
|
||||
let mut dest_ptr_64 = dest_ptr as *mut u64;
|
||||
while remaining >= 64 {
|
||||
let (p1, p2, p3, p4, p5, p6, p7, p8) = (
|
||||
*pattern_512_ptr,
|
||||
*pattern_512_ptr.add(1),
|
||||
*pattern_512_ptr.add(2),
|
||||
*pattern_512_ptr.add(3),
|
||||
*pattern_512_ptr.add(4),
|
||||
*pattern_512_ptr.add(5),
|
||||
*pattern_512_ptr.add(6),
|
||||
*pattern_512_ptr.add(7),
|
||||
);
|
||||
*dest_ptr_64 = p1;
|
||||
*dest_ptr_64.add(1) = p2;
|
||||
*dest_ptr_64.add(2) = p3;
|
||||
*dest_ptr_64.add(3) = p4;
|
||||
*dest_ptr_64.add(4) = p5;
|
||||
*dest_ptr_64.add(5) = p6;
|
||||
*dest_ptr_64.add(6) = p7;
|
||||
*dest_ptr_64.add(7) = p8;
|
||||
dest_ptr_64 = dest_ptr_64.add(8);
|
||||
remaining -= 64;
|
||||
}
|
||||
|
||||
while remaining >= 8 {
|
||||
*dest_ptr_64 = *pattern_512_ptr;
|
||||
dest_ptr_64 = dest_ptr_64.add(1);
|
||||
remaining -= 8;
|
||||
}
|
||||
|
||||
dest_ptr = dest_ptr_64 as *mut u8;
|
||||
for i in 0..remaining {
|
||||
*dest_ptr.add(i) = *src.add(i % size);
|
||||
}
|
||||
} else {
|
||||
for i in 0..total_size {
|
||||
|
|
2
known_bugs.md
Normal file
2
known_bugs.md
Normal file
|
@ -0,0 +1,2 @@
|
|||
# i did not know where to put this
|
||||
- memcpy / memset cause crash on debug builds due to ptr misalignment that is not present on release builds
|
|
@ -68,25 +68,32 @@ put_pixel := fn(pos: Vec2(int), color: Color): void {
|
|||
}
|
||||
|
||||
put_filled_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
|
||||
y := pos.y
|
||||
end_y := y + tr.y
|
||||
loop if y == end_y break else {
|
||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y), @bitcast(tr.x))
|
||||
y += 1
|
||||
start_idx := @inline(screenidx, pos.x, pos.y)
|
||||
end_idx := @inline(screenidx, pos.x, pos.y + tr.y)
|
||||
|
||||
loop if start_idx >= end_idx break else {
|
||||
@inline(memory.set, Color, &color, ctx.buf + start_idx, @bitcast(tr.x))
|
||||
start_idx += ctx.width
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
put_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
|
||||
y := pos.y
|
||||
end_y := y + tr.y
|
||||
loop if y == end_y break else {
|
||||
*(ctx.buf + @inline(screenidx, pos.x, y)) = color;
|
||||
*(ctx.buf + @inline(screenidx, pos.x + tr.x, y)) = color
|
||||
y += 1
|
||||
start_idx := @inline(screenidx, pos.x, pos.y)
|
||||
end_idx := @inline(screenidx, pos.x, pos.y + tr.y)
|
||||
right_start_idx := @inline(screenidx, pos.x + tr.x, pos.y)
|
||||
|
||||
loop if start_idx > end_idx break else {
|
||||
*(ctx.buf + start_idx) = color;
|
||||
*(ctx.buf + right_start_idx) = color
|
||||
start_idx += ctx.width
|
||||
right_start_idx += ctx.width
|
||||
}
|
||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y), @bitcast(tr.x))
|
||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y - tr.y), @bitcast(tr.x))
|
||||
|
||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, pos.y), @bitcast(tr.x + 1))
|
||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, pos.y + tr.y), @bitcast(tr.x + 1))
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -172,10 +179,21 @@ set_dimensions := fn(new: Vec2(int)): void {
|
|||
}
|
||||
|
||||
put_image := fn(image: Image, pos: Vec2(int)): void {
|
||||
y := 0
|
||||
loop if y == image.height break else {
|
||||
@inline(memory.copy, Color, image.buf + y * image.width, ctx.buf + @inline(screenidx, pos.x, pos.y + image.height - y), @intcast(image.width))
|
||||
y += 1
|
||||
// y := 0
|
||||
// loop if y == image.height break else {
|
||||
// @inline(memory.copy, Color, image.buf + y * image.width, ctx.buf + @inline(screenidx, pos.x, pos.y + image.height - y), @intcast(image.width))
|
||||
// y += 1
|
||||
// }
|
||||
// return
|
||||
|
||||
start_idx := @inline(screenidx, pos.x, pos.y)
|
||||
end_idx := @inline(screenidx, pos.x, pos.y + image.height)
|
||||
cursor := image.width * image.height
|
||||
|
||||
loop if start_idx >= end_idx break else {
|
||||
@inline(memory.copy, Color, image.buf + cursor, ctx.buf + start_idx, @intcast(image.width))
|
||||
start_idx += ctx.width
|
||||
cursor -= image.width
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue