forked from AbleOS/ableos
optimisations
This commit is contained in:
parent
3af28f1666
commit
820c3e459b
|
@ -24,6 +24,7 @@ fn alloc_page(vm: &mut Vm, _mem_addr: u64, _length: usize) -> Result<(), MemoryS
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
pub fn memory_msg_handler(
|
pub fn memory_msg_handler(
|
||||||
vm: &mut Vm,
|
vm: &mut Vm,
|
||||||
mem_addr: u64,
|
mem_addr: u64,
|
||||||
|
@ -82,32 +83,124 @@ pub fn memory_msg_handler(
|
||||||
let page_count = msg_vec[1];
|
let page_count = msg_vec[1];
|
||||||
log::debug!(" {} pages", page_count);
|
log::debug!(" {} pages", page_count);
|
||||||
}
|
}
|
||||||
// memcpy
|
// trash but fast memcpy
|
||||||
4 => unsafe {
|
4 => unsafe {
|
||||||
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
|
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
|
||||||
let src = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as *const u8;
|
let src = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as *const u8;
|
||||||
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
|
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
|
||||||
src.copy_to(dest, count);
|
|
||||||
|
let mut src_ptr = src;
|
||||||
|
let mut dest_ptr = dest;
|
||||||
|
let mut remaining = count;
|
||||||
|
|
||||||
|
while (dest_ptr as usize) & 7 != 0 && remaining > 0 {
|
||||||
|
*dest_ptr = *src_ptr;
|
||||||
|
src_ptr = src_ptr.add(1);
|
||||||
|
dest_ptr = dest_ptr.add(1);
|
||||||
|
remaining -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut src_ptr_64 = src_ptr as *const u64;
|
||||||
|
let mut dest_ptr_64 = dest_ptr as *mut u64;
|
||||||
|
while remaining >= 64 {
|
||||||
|
let (s1, s2, s3, s4, s5, s6, s7, s8) = (
|
||||||
|
*src_ptr_64,
|
||||||
|
*src_ptr_64.add(1),
|
||||||
|
*src_ptr_64.add(2),
|
||||||
|
*src_ptr_64.add(3),
|
||||||
|
*src_ptr_64.add(4),
|
||||||
|
*src_ptr_64.add(5),
|
||||||
|
*src_ptr_64.add(6),
|
||||||
|
*src_ptr_64.add(7),
|
||||||
|
);
|
||||||
|
*dest_ptr_64 = s1;
|
||||||
|
*dest_ptr_64.add(1) = s2;
|
||||||
|
*dest_ptr_64.add(2) = s3;
|
||||||
|
*dest_ptr_64.add(3) = s4;
|
||||||
|
*dest_ptr_64.add(4) = s5;
|
||||||
|
*dest_ptr_64.add(5) = s6;
|
||||||
|
*dest_ptr_64.add(6) = s7;
|
||||||
|
*dest_ptr_64.add(7) = s8;
|
||||||
|
src_ptr_64 = src_ptr_64.add(8);
|
||||||
|
dest_ptr_64 = dest_ptr_64.add(8);
|
||||||
|
remaining -= 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
while remaining >= 8 {
|
||||||
|
*dest_ptr_64 = *src_ptr_64;
|
||||||
|
src_ptr_64 = src_ptr_64.add(1);
|
||||||
|
dest_ptr_64 = dest_ptr_64.add(1);
|
||||||
|
remaining -= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
src_ptr = src_ptr_64 as *const u8;
|
||||||
|
dest_ptr = dest_ptr_64 as *mut u8;
|
||||||
|
for _ in 0..remaining {
|
||||||
|
*dest_ptr = *src_ptr;
|
||||||
|
src_ptr = src_ptr.add(1);
|
||||||
|
dest_ptr = dest_ptr.add(1);
|
||||||
|
}
|
||||||
},
|
},
|
||||||
// memset
|
|
||||||
|
// trash but fast memset
|
||||||
5 => unsafe {
|
5 => unsafe {
|
||||||
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
|
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;
|
||||||
let size = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as usize;
|
let size = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as usize;
|
||||||
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
|
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
|
||||||
let src = u64::from_le_bytes(msg_vec[25..33].try_into().unwrap_unchecked()) as *mut u8;
|
let src =
|
||||||
|
u64::from_le_bytes(msg_vec[25..33].try_into().unwrap_unchecked()) as *const u8;
|
||||||
|
|
||||||
let total_size = count * size;
|
let total_size = count * size;
|
||||||
|
|
||||||
if total_size > 32 {
|
if total_size > 32 {
|
||||||
core::ptr::copy(src, dest, size);
|
let mut pattern_512 = [0u8; 64];
|
||||||
let pattern = core::slice::from_raw_parts(dest, size);
|
for i in 0..64 {
|
||||||
let mut offset = size;
|
pattern_512[i] = *src.add(i % size);
|
||||||
|
}
|
||||||
|
let pattern_512_ptr = pattern_512.as_ptr() as *const u64;
|
||||||
|
|
||||||
while offset < total_size {
|
let mut dest_ptr = dest;
|
||||||
let remaining = total_size - offset;
|
let mut remaining = total_size;
|
||||||
let copy_size = remaining.min(offset);
|
|
||||||
core::ptr::copy_nonoverlapping(pattern.as_ptr(), dest.add(offset), copy_size);
|
while (dest_ptr as usize) & 7 != 0 && remaining > 0 {
|
||||||
offset += copy_size;
|
*dest_ptr = *src;
|
||||||
|
dest_ptr = dest_ptr.add(1);
|
||||||
|
remaining -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut dest_ptr_64 = dest_ptr as *mut u64;
|
||||||
|
while remaining >= 64 {
|
||||||
|
let (p1, p2, p3, p4, p5, p6, p7, p8) = (
|
||||||
|
*pattern_512_ptr,
|
||||||
|
*pattern_512_ptr.add(1),
|
||||||
|
*pattern_512_ptr.add(2),
|
||||||
|
*pattern_512_ptr.add(3),
|
||||||
|
*pattern_512_ptr.add(4),
|
||||||
|
*pattern_512_ptr.add(5),
|
||||||
|
*pattern_512_ptr.add(6),
|
||||||
|
*pattern_512_ptr.add(7),
|
||||||
|
);
|
||||||
|
*dest_ptr_64 = p1;
|
||||||
|
*dest_ptr_64.add(1) = p2;
|
||||||
|
*dest_ptr_64.add(2) = p3;
|
||||||
|
*dest_ptr_64.add(3) = p4;
|
||||||
|
*dest_ptr_64.add(4) = p5;
|
||||||
|
*dest_ptr_64.add(5) = p6;
|
||||||
|
*dest_ptr_64.add(6) = p7;
|
||||||
|
*dest_ptr_64.add(7) = p8;
|
||||||
|
dest_ptr_64 = dest_ptr_64.add(8);
|
||||||
|
remaining -= 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
while remaining >= 8 {
|
||||||
|
*dest_ptr_64 = *pattern_512_ptr;
|
||||||
|
dest_ptr_64 = dest_ptr_64.add(1);
|
||||||
|
remaining -= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
dest_ptr = dest_ptr_64 as *mut u8;
|
||||||
|
for i in 0..remaining {
|
||||||
|
*dest_ptr.add(i) = *src.add(i % size);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for i in 0..total_size {
|
for i in 0..total_size {
|
||||||
|
|
2
known_bugs.md
Normal file
2
known_bugs.md
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# i did not know where to put this
|
||||||
|
- memcpy / memset cause crash on debug builds due to ptr misalignment that is not present on release builds
|
|
@ -68,25 +68,32 @@ put_pixel := fn(pos: Vec2(int), color: Color): void {
|
||||||
}
|
}
|
||||||
|
|
||||||
put_filled_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
|
put_filled_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
|
||||||
y := pos.y
|
start_idx := @inline(screenidx, pos.x, pos.y)
|
||||||
end_y := y + tr.y
|
end_idx := @inline(screenidx, pos.x, pos.y + tr.y)
|
||||||
loop if y == end_y break else {
|
|
||||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y), @bitcast(tr.x))
|
loop if start_idx >= end_idx break else {
|
||||||
y += 1
|
@inline(memory.set, Color, &color, ctx.buf + start_idx, @bitcast(tr.x))
|
||||||
|
start_idx += ctx.width
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
put_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
|
put_rect := fn(pos: Vec2(int), tr: Vec2(int), color: Color): void {
|
||||||
y := pos.y
|
start_idx := @inline(screenidx, pos.x, pos.y)
|
||||||
end_y := y + tr.y
|
end_idx := @inline(screenidx, pos.x, pos.y + tr.y)
|
||||||
loop if y == end_y break else {
|
right_start_idx := @inline(screenidx, pos.x + tr.x, pos.y)
|
||||||
*(ctx.buf + @inline(screenidx, pos.x, y)) = color;
|
|
||||||
*(ctx.buf + @inline(screenidx, pos.x + tr.x, y)) = color
|
loop if start_idx > end_idx break else {
|
||||||
y += 1
|
*(ctx.buf + start_idx) = color;
|
||||||
|
*(ctx.buf + right_start_idx) = color
|
||||||
|
start_idx += ctx.width
|
||||||
|
right_start_idx += ctx.width
|
||||||
}
|
}
|
||||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y), @bitcast(tr.x))
|
|
||||||
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, y - tr.y), @bitcast(tr.x))
|
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, pos.y), @bitcast(tr.x + 1))
|
||||||
|
@inline(memory.set, Color, &color, ctx.buf + @inline(screenidx, pos.x, pos.y + tr.y), @bitcast(tr.x + 1))
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,10 +179,21 @@ set_dimensions := fn(new: Vec2(int)): void {
|
||||||
}
|
}
|
||||||
|
|
||||||
put_image := fn(image: Image, pos: Vec2(int)): void {
|
put_image := fn(image: Image, pos: Vec2(int)): void {
|
||||||
y := 0
|
// y := 0
|
||||||
loop if y == image.height break else {
|
// loop if y == image.height break else {
|
||||||
@inline(memory.copy, Color, image.buf + y * image.width, ctx.buf + @inline(screenidx, pos.x, pos.y + image.height - y), @intcast(image.width))
|
// @inline(memory.copy, Color, image.buf + y * image.width, ctx.buf + @inline(screenidx, pos.x, pos.y + image.height - y), @intcast(image.width))
|
||||||
y += 1
|
// y += 1
|
||||||
|
// }
|
||||||
|
// return
|
||||||
|
|
||||||
|
start_idx := @inline(screenidx, pos.x, pos.y)
|
||||||
|
end_idx := @inline(screenidx, pos.x, pos.y + image.height)
|
||||||
|
cursor := image.width * image.height
|
||||||
|
|
||||||
|
loop if start_idx >= end_idx break else {
|
||||||
|
@inline(memory.copy, Color, image.buf + cursor, ctx.buf + start_idx, @intcast(image.width))
|
||||||
|
start_idx += ctx.width
|
||||||
|
cursor -= image.width
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue