optimisations

This commit is contained in:
koniifer 2024-10-14 01:31:23 +01:00
parent 3af28f1666
commit 820c3e459b
3 changed files with 142 additions and 29 deletions

View file

@ -24,6 +24,7 @@ fn alloc_page(vm: &mut Vm, _mem_addr: u64, _length: usize) -> Result<(), MemoryS
Ok(())
}
#[inline(always)]
pub fn memory_msg_handler(
vm: &mut Vm,
mem_addr: u64,
@ -82,32 +83,124 @@ pub fn memory_msg_handler(
let page_count = msg_vec[1];
log::debug!(" {} pages", page_count);
}
// memcpy
// trash but fast memcpy
4 => unsafe {
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
let src = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as *const u8;
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
src.copy_to(dest, count);
let mut src_ptr = src;
let mut dest_ptr = dest;
let mut remaining = count;
while (dest_ptr as usize) & 7 != 0 && remaining > 0 {
*dest_ptr = *src_ptr;
src_ptr = src_ptr.add(1);
dest_ptr = dest_ptr.add(1);
remaining -= 1;
}
let mut src_ptr_64 = src_ptr as *const u64;
let mut dest_ptr_64 = dest_ptr as *mut u64;
while remaining >= 64 {
let (s1, s2, s3, s4, s5, s6, s7, s8) = (
*src_ptr_64,
*src_ptr_64.add(1),
*src_ptr_64.add(2),
*src_ptr_64.add(3),
*src_ptr_64.add(4),
*src_ptr_64.add(5),
*src_ptr_64.add(6),
*src_ptr_64.add(7),
);
*dest_ptr_64 = s1;
*dest_ptr_64.add(1) = s2;
*dest_ptr_64.add(2) = s3;
*dest_ptr_64.add(3) = s4;
*dest_ptr_64.add(4) = s5;
*dest_ptr_64.add(5) = s6;
*dest_ptr_64.add(6) = s7;
*dest_ptr_64.add(7) = s8;
src_ptr_64 = src_ptr_64.add(8);
dest_ptr_64 = dest_ptr_64.add(8);
remaining -= 64;
}
while remaining >= 8 {
*dest_ptr_64 = *src_ptr_64;
src_ptr_64 = src_ptr_64.add(1);
dest_ptr_64 = dest_ptr_64.add(1);
remaining -= 8;
}
src_ptr = src_ptr_64 as *const u8;
dest_ptr = dest_ptr_64 as *mut u8;
for _ in 0..remaining {
*dest_ptr = *src_ptr;
src_ptr = src_ptr.add(1);
dest_ptr = dest_ptr.add(1);
}
},
// memset
// trash but fast memset
5 => unsafe {
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
let size = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as usize;
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
let src = u64::from_le_bytes(msg_vec[25..33].try_into().unwrap_unchecked()) as *mut u8;
let src =
u64::from_le_bytes(msg_vec[25..33].try_into().unwrap_unchecked()) as *const u8;
let total_size = count * size;
if total_size > 32 {
core::ptr::copy(src, dest, size);
let pattern = core::slice::from_raw_parts(dest, size);
let mut offset = size;
let mut pattern_512 = [0u8; 64];
for i in 0..64 {
pattern_512[i] = *src.add(i % size);
}
let pattern_512_ptr = pattern_512.as_ptr() as *const u64;
while offset < total_size {
let remaining = total_size - offset;
let copy_size = remaining.min(offset);
core::ptr::copy_nonoverlapping(pattern.as_ptr(), dest.add(offset), copy_size);
offset += copy_size;
let mut dest_ptr = dest;
let mut remaining = total_size;
while (dest_ptr as usize) & 7 != 0 && remaining > 0 {
*dest_ptr = *src;
dest_ptr = dest_ptr.add(1);
remaining -= 1;
}
let mut dest_ptr_64 = dest_ptr as *mut u64;
while remaining >= 64 {
let (p1, p2, p3, p4, p5, p6, p7, p8) = (
*pattern_512_ptr,
*pattern_512_ptr.add(1),
*pattern_512_ptr.add(2),
*pattern_512_ptr.add(3),
*pattern_512_ptr.add(4),
*pattern_512_ptr.add(5),
*pattern_512_ptr.add(6),
*pattern_512_ptr.add(7),
);
*dest_ptr_64 = p1;
*dest_ptr_64.add(1) = p2;
*dest_ptr_64.add(2) = p3;
*dest_ptr_64.add(3) = p4;
*dest_ptr_64.add(4) = p5;
*dest_ptr_64.add(5) = p6;
*dest_ptr_64.add(6) = p7;
*dest_ptr_64.add(7) = p8;
dest_ptr_64 = dest_ptr_64.add(8);
remaining -= 64;
}
while remaining >= 8 {
*dest_ptr_64 = *pattern_512_ptr;
dest_ptr_64 = dest_ptr_64.add(1);
remaining -= 8;
}
dest_ptr = dest_ptr_64 as *mut u8;
for i in 0..remaining {
*dest_ptr.add(i) = *src.add(i % size);
}
} else {
for i in 0..total_size {

2
known_bugs.md Normal file
View file

@ -0,0 +1,2 @@
# i did not know where to put this
- memcpy / memset cause crash on debug builds due to ptr misalignment that is not present on release builds

View file

@ -68,25 +68,32 @@ put_pixel := fn(pos: Vec2(int), color: Color): void {
}
put_filled_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
y := pos.y
end_y := y + tr.y
loop if y == end_y break else {
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y), @bitcast(tr.x))
y += 1
start_idx := @inline(screenidx, pos.x, pos.y)
end_idx := @inline(screenidx, pos.x, pos.y + tr.y)
loop if start_idx >= end_idx break else {
@inline(memory.set, Color, &color, ctx.buf + start_idx, @bitcast(tr.x))
start_idx += ctx.width
}
return
}
put_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
y := pos.y
end_y := y + tr.y
loop if y == end_y break else {
*(ctx.buf + @inline(screenidx, pos.x, y)) = color;
*(ctx.buf + @inline(screenidx, pos.x + tr.x, y)) = color
y += 1
start_idx := @inline(screenidx, pos.x, pos.y)
end_idx := @inline(screenidx, pos.x, pos.y + tr.y)
right_start_idx := @inline(screenidx, pos.x + tr.x, pos.y)
loop if start_idx > end_idx break else {
*(ctx.buf + start_idx) = color;
*(ctx.buf + right_start_idx) = color
start_idx += ctx.width
right_start_idx += ctx.width
}
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y), @bitcast(tr.x))
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y - tr.y), @bitcast(tr.x))
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, pos.y), @bitcast(tr.x + 1))
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, pos.y + tr.y), @bitcast(tr.x + 1))
return
}
@ -172,10 +179,21 @@ set_dimensions := fn(new: Vec2(int)): void {
}
put_image := fn(image: Image, pos: Vec2(int)): void {
y := 0
loop if y == image.height break else {
@inline(memory.copy, Color, image.buf + y * image.width, ctx.buf + @inline(screenidx, pos.x, pos.y + image.height - y), @intcast(image.width))
y += 1
// y := 0
// loop if y == image.height break else {
// @inline(memory.copy, Color, image.buf + y * image.width, ctx.buf + @inline(screenidx, pos.x, pos.y + image.height - y), @intcast(image.width))
// y += 1
// }
// return
start_idx := @inline(screenidx, pos.x, pos.y)
end_idx := @inline(screenidx, pos.x, pos.y + image.height)
cursor := image.width * image.height
loop if start_idx >= end_idx break else {
@inline(memory.copy, Color, image.buf + cursor, ctx.buf + start_idx, @intcast(image.width))
start_idx += ctx.width
cursor -= image.width
}
return
}