From 41d9c0b82ad5202c480a95cc45f987e2fffa7936 Mon Sep 17 00:00:00 2001 From: koniifer Date: Sat, 19 Oct 2024 19:45:41 +0100 Subject: [PATCH] miscellaneous changes, free performance tweaks, and fiddle with enabling avx finally --- Cargo.lock | 72 ++++----------- dev/src/main.rs | 1 + kernel/.cargo/config.toml | 6 -- kernel/Cargo.toml | 2 +- kernel/src/arch/x86_64/mod.rs | 92 ++++++++++--------- kernel/src/holeybytes/ecah.rs | 15 --- .../holeybytes/kernel_services/mem_serve.rs | 49 +++------- kernel/src/holeybytes/mod.rs | 2 +- ...4-v3-ableos.json => x86_64_v3-ableos.json} | 0 repbuild/Cargo.toml | 5 +- repbuild/src/main.rs | 18 +++- .../render_example/src/examples/surface.hb | 3 +- 12 files changed, 103 insertions(+), 162 deletions(-) rename kernel/targets/{x86_64-v3-ableos.json => x86_64_v3-ableos.json} (100%) diff --git a/Cargo.lock b/Cargo.lock index 8ad2b1a..536d960 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,18 +26,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "allocator-api2" version = "0.2.18" @@ -127,9 +115,6 @@ name = "bumpalo" version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" -dependencies = [ - "allocator-api2", -] [[package]] name = "byteorder" @@ -255,6 +240,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -331,43 +322,37 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" -[[package]] -name = "hashbrown" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -dependencies = [ - "ahash", - "allocator-api2", -] - [[package]] name = "hashbrown" version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "hbbytecode" version = "0.1.0" -source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#6ad0b41759dacd5767b5c9cfbc1b3b11c025396a" +source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#15e4762d4ac8993d12fe2dd54e2b2d842c8a034b" [[package]] name = "hblang" version = "0.1.0" -source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#6ad0b41759dacd5767b5c9cfbc1b3b11c025396a" +source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#15e4762d4ac8993d12fe2dd54e2b2d842c8a034b" dependencies = [ - "hashbrown 0.15.0", + "hashbrown", "hbbytecode", "hbvm", "log", - "regalloc2", ] [[package]] name = "hbvm" version = "0.1.0" -source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#6ad0b41759dacd5767b5c9cfbc1b3b11c025396a" +source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#15e4762d4ac8993d12fe2dd54e2b2d842c8a034b" dependencies = [ "hbbytecode", ] @@ -514,7 +499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.15.0", + "hashbrown", ] [[package]] @@ -545,7 +530,7 @@ dependencies = [ "aarch64-cpu", "crossbeam-queue", "derive_more", - "hashbrown 0.14.5", + "hashbrown", "hbvm", "limine", "log", @@ -830,19 +815,6 @@ dependencies = [ "bitflags 2.6.0", ] -[[package]] -name = "regalloc2" -version = "0.10.2" -source = "git+https://github.com/jakubDoka/regalloc2?branch=reuse-allocations#21c43e3ee182824e92e2b25f1d3c03ed47f9c02b" -dependencies = [ - "allocator-api2", - "bumpalo", - "hashbrown 0.14.5", - "log", - "rustc-hash", - "smallvec", -] - [[package]] name = "regex-syntax" version = "0.8.5" @@ -1035,9 +1007,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.131" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d42a0bd4ac281beff598909bb56a86acaf979b84483e1c79c10dcaf98f8cf3" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", @@ -1325,12 +1297,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "versioning" version = "0.1.3" diff --git a/dev/src/main.rs b/dev/src/main.rs index c712822..aaa017b 100644 --- a/dev/src/main.rs +++ b/dev/src/main.rs @@ -1,3 +1,4 @@ +#![allow(unused)] use std::io::Write; use idl::build_idl; diff --git a/kernel/.cargo/config.toml b/kernel/.cargo/config.toml index dc3c3df..e052590 100644 --- a/kernel/.cargo/config.toml +++ b/kernel/.cargo/config.toml @@ -4,9 +4,3 @@ build-std-features = ["compiler-builtins-mem"] [build] target = "./targets/x86_64-ableos.json" - -# [target.'cfg(target_arch = "x86_64")'] -# rustflags = [ -# "-C", -#"target-feature=+sse4.1,+avx,+aes,+fma,+popcnt,+bmi2,+avx2,+lzcnt,+xsave", -# ] diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml index ff6604b..3f7e4bc 100644 --- a/kernel/Cargo.toml +++ b/kernel/Cargo.toml @@ -16,7 +16,7 @@ uart_16550 = { version = "0.3", features = ["nightly"] } xml.git = "https://git.ablecorp.us/ableos/ableos_userland" versioning.git = "https://git.ablecorp.us/ableos/ableos_userland" # able_graphics_library.git = "https://git.ablecorp.us/ableos/ableos_userland" -hashbrown = { version = "0.14", features = ["nightly"] } +hashbrown = { version = "0.15", features = ["nightly"] } limine = "0.1" [dependencies.crossbeam-queue] diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs index 413bde6..c45a7e2 100644 --- a/kernel/src/arch/x86_64/mod.rs +++ b/kernel/src/arch/x86_64/mod.rs @@ -30,6 +30,7 @@ const INITIAL_KERNEL_HEAP_SIZE: *const () = _initial_kernel_heap_size as _; #[no_mangle] #[naked] +#[cfg(not(target_feature = "avx2"))] unsafe extern "C" fn _kernel_start() -> ! { // Initialise SSE, then jump to kernel entrypoint core::arch::asm!( @@ -49,58 +50,59 @@ unsafe extern "C" fn _kernel_start() -> ! { ) } -// #[no_mangle] -// #[naked] -// unsafe extern "C" fn _kernel_start() -> ! { -// core::arch::asm!( -// // Enable protected mode and configure control registers -// "mov rax, cr0", -// "and ax, 0xFFFB", // Clear CR0.EM (bit 2) for coprocessor emulation -// "or ax, 0x2", // Set CR0.MP (bit 1) for coprocessor monitoring -// "mov cr0, rax", +#[no_mangle] +#[naked] +#[cfg(target_feature = "avx2")] +unsafe extern "C" fn _kernel_start() -> ! { + core::arch::asm!( + // Enable protected mode and configure control registers + "mov rax, cr0", + "and ax, 0xFFFB", // Clear CR0.EM (bit 2) for coprocessor emulation + "or ax, 0x2", // Set CR0.MP (bit 1) for coprocessor monitoring + "mov cr0, rax", -// "mov rax, cr4", -// "or ax, (1 << 9) | (1 << 10)", // Set CR4.OSFXSR (bit 9) and CR4.OSXMMEXCPT (bit 10) -// "mov cr4, rax", + "mov rax, cr4", + "or ax, (1 << 9) | (1 << 10)", // Set CR4.OSFXSR (bit 9) and CR4.OSXMMEXCPT (bit 10) + "mov cr4, rax", -// // Enable OSXSAVE (required for AVX, AVX2, and XSAVE) -// "mov rax, cr4", -// "or eax, 1 << 18", // Set CR4.OSXSAVE (bit 18) -// "mov cr4, rax", + // Enable OSXSAVE (required for AVX, AVX2, and XSAVE) + "mov rax, cr4", + "or eax, 1 << 18", // Set CR4.OSXSAVE (bit 18) + "mov cr4, rax", -// // Enable AVX and AVX2 state saving -// "xor rcx, rcx", -// "xgetbv", -// "or eax, 7", // Enable SSE, AVX, and AVX2 state saving -// "xsetbv", + // Enable AVX and AVX2 state saving + "xor rcx, rcx", + "xgetbv", + "or eax, 7", // Enable SSE, AVX, and AVX2 state saving + "xsetbv", -// // Check for AVX and XSAVE support -// "mov eax, 1", -// "cpuid", -// "and ecx, 0x18000000", -// "cmp ecx, 0x18000000", -// "jne {1}", // Jump if AVX/OSXSAVE is not supported + // Check for AVX and XSAVE support + "mov eax, 1", + "cpuid", + "and ecx, 0x18000000", + "cmp ecx, 0x18000000", + "jne {1}", // Jump if AVX/OSXSAVE is not supported -// // Check for BMI2 and AVX2 support -// "mov eax, 7", -// "xor ecx, ecx", -// "cpuid", -// "and ebx, (1 << 8) | (1 << 5)", // Check BMI2 (bit 8) and AVX2 (bit 5) -// "cmp ebx, (1 << 8) | (1 << 5)", // Compare to ensure both are supported + // Check for BMI2 and AVX2 support + "mov eax, 7", + "xor ecx, ecx", + "cpuid", + "and ebx, (1 << 8) | (1 << 5)", // Check BMI2 (bit 8) and AVX2 (bit 5) + "cmp ebx, (1 << 8) | (1 << 5)", // Compare to ensure both are supported -// // Check for LZCNT and POPCNT support -// "mov eax, 1", -// "cpuid", -// "and ecx, (1 << 5) | (1 << 23)", // Check LZCNT (bit 5) and POPCNT (bit 23) -// "cmp ecx, (1 << 5) | (1 << 23)", // Compare to ensure both are supported + // Check for LZCNT and POPCNT support + "mov eax, 1", + "cpuid", + "and ecx, (1 << 5) | (1 << 23)", // Check LZCNT (bit 5) and POPCNT (bit 23) + "cmp ecx, (1 << 5) | (1 << 23)", // Compare to ensure both are supported -// // Jump to the kernel entry point -// "jmp {0}", -// sym start, -// sym oops, -// options(noreturn), -// ) -// } + // Jump to the kernel entry point + "jmp {0}", + sym start, + sym oops, + options(noreturn), + ) +} unsafe extern "C" fn oops() -> ! { panic!("your cpu is ancient >:(") diff --git a/kernel/src/holeybytes/ecah.rs b/kernel/src/holeybytes/ecah.rs index 0d077d3..5535113 100644 --- a/kernel/src/holeybytes/ecah.rs +++ b/kernel/src/holeybytes/ecah.rs @@ -26,9 +26,6 @@ unsafe fn x86_in(address: u16) -> T { #[inline(always)] pub fn handler(vm: &mut Vm) { let ecall_number = vm.registers[2].cast::(); - // log::info!("eca called :pensive:"); - // debug!("Ecall number {:?}", ecall_number); - //info!("Register dump: {:?}", vm.registers); match ecall_number { 0 => { @@ -207,15 +204,3 @@ pub enum LogError { NoMessages, InvalidLogFormat, } - -// use {alloc::vec, log::Record}; -// fn memory_msg_handler(vm: &mut Vm, mem_addr: u64, length: usize) -> Result<(), LogError> { -// let mut val = alloc::vec::Vec::new(); -// for _ in 0..4096 { -// val.push(0); -// } -// info!("Block address: {:?}", val.as_ptr()); -// vm.registers[1] = hbvm::value::Value(val.as_ptr() as u64); -// vm.registers[2] = hbvm::value::Value(4096); -// Ok(()) -// } diff --git a/kernel/src/holeybytes/kernel_services/mem_serve.rs b/kernel/src/holeybytes/kernel_services/mem_serve.rs index bacf2d5..29c2282 100644 --- a/kernel/src/holeybytes/kernel_services/mem_serve.rs +++ b/kernel/src/holeybytes/kernel_services/mem_serve.rs @@ -17,38 +17,16 @@ pub enum MemoryQuotaType { } fn alloc_page(vm: &mut Vm, _mem_addr: u64, _length: usize) -> Result<(), MemoryServiceError> { - let ptr = unsafe { alloc(Layout::from_size_align_unchecked(4096, 4096)) }; + let ptr = unsafe { alloc(Layout::from_size_align_unchecked(4096, 8)) }; info!("Block address: {:?}", ptr); vm.registers[1] = hbvm::value::Value(ptr as u64); vm.registers[2] = hbvm::value::Value(4096); Ok(()) } -#[inline(always)] -unsafe fn memcpy(mut dest: *mut u8, mut src: *const u8, mut count: usize) { - if count < 16 { - src.copy_to_nonoverlapping(dest, count); - return; - } - - while count >= 8 { - if (src as usize) & 7 == 0 && (dest as usize) & 7 == 0 { - *(dest as *mut u64) = *(src as *const u64); - } else { - src.copy_to_nonoverlapping(dest, 8); - } - dest = dest.add(8); - src = src.add(8); - count -= 8; - } - - if count > 0 { - src.copy_to_nonoverlapping(dest, count); - } -} - #[inline(always)] unsafe fn memset(mut dest: *mut u8, src: *const u8, count: usize, size: usize) { + const BLOCK_SIZE: usize = 64; let mut remaining = count * size; if remaining < 16 { @@ -56,11 +34,11 @@ unsafe fn memset(mut dest: *mut u8, src: *const u8, count: usize, size: usize) { return; } - let mut buffer = [0u8; 64]; + let mut buffer = [0u8; BLOCK_SIZE]; let mut buffer_size = size; src.copy_to_nonoverlapping(buffer.as_mut_ptr(), size); - while buffer_size * 2 <= 64 { + while buffer_size * 2 <= BLOCK_SIZE { buffer .as_mut_ptr() .copy_to_nonoverlapping(buffer.as_mut_ptr().add(buffer_size), buffer_size); @@ -68,12 +46,15 @@ unsafe fn memset(mut dest: *mut u8, src: *const u8, count: usize, size: usize) { } let buffer_ptr = buffer.as_ptr() as *const u64; + + while (dest as usize) & 7 != 0 && remaining >= 8 { + buffer.as_ptr().copy_to_nonoverlapping(dest, 1); + dest = dest.add(1); + remaining -= 1; + } + while remaining >= 8 { - if (dest as usize) & 7 == 0 { - *(dest as *mut u64) = *buffer_ptr; - } else { - buffer.as_ptr().copy_to_nonoverlapping(dest, 8); - } + *(dest as *mut u64) = *buffer_ptr; dest = dest.add(8); remaining -= 8; } @@ -101,7 +82,7 @@ pub fn memory_msg_handler( let ptr = alloc(Layout::from_size_align_unchecked( page_count as usize * 4096, - 4096, + 8, )); vm.registers[1] = hbvm::value::Value(ptr as u64); @@ -117,7 +98,7 @@ pub fn memory_msg_handler( dealloc( mptr as *mut u8, - Layout::from_size_align_unchecked(page_count as usize * 4096, 4096), + Layout::from_size_align_unchecked(page_count as usize * 4096, 8), ) }, 2 => { @@ -147,7 +128,7 @@ pub fn memory_msg_handler( let src = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as *const u8; let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8; - memcpy(dest, src, count); + src.copy_to_nonoverlapping(dest, count); }, 5 => unsafe { let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize; diff --git a/kernel/src/holeybytes/mod.rs b/kernel/src/holeybytes/mod.rs index 9b9a1d2..42e4812 100644 --- a/kernel/src/holeybytes/mod.rs +++ b/kernel/src/holeybytes/mod.rs @@ -97,7 +97,7 @@ impl HandlePageFault for PageFaultHandler { #[inline(always)] const fn stack_layout() -> Layout { - unsafe { Layout::from_size_align_unchecked(STACK_SIZE, 4096) } + unsafe { Layout::from_size_align_unchecked(STACK_SIZE, 8) } } #[inline(always)] diff --git a/kernel/targets/x86_64-v3-ableos.json b/kernel/targets/x86_64_v3-ableos.json similarity index 100% rename from kernel/targets/x86_64-v3-ableos.json rename to kernel/targets/x86_64_v3-ableos.json diff --git a/repbuild/Cargo.toml b/repbuild/Cargo.toml index 4d7abaa..01e7755 100644 --- a/repbuild/Cargo.toml +++ b/repbuild/Cargo.toml @@ -20,8 +20,9 @@ derive_more = { version = "1", default-features = false, features = [ error-stack = "0.5" fatfs = "0.3" toml = "0.8" -# hbasm.git = "https://git.ablecorp.us/AbleOS/holey-bytes.git" -hblang.git = "https://git.ablecorp.us/AbleOS/holey-bytes.git" +hblang = { git = "https://git.ablecorp.us/AbleOS/holey-bytes.git", features = [ + "std", +], default-features = false } log = "0.4" raw-cpuid = "11" diff --git a/repbuild/src/main.rs b/repbuild/src/main.rs index 9963774..f32f993 100644 --- a/repbuild/src/main.rs +++ b/repbuild/src/main.rs @@ -38,6 +38,8 @@ fn main() -> Result<(), Error> { target = Target::Riscv64Virt; } else if arg == "arm64" || arg == "aarch64" || arg == "aarch64-virt" { target = Target::Aarch64; + } else if arg == "avx2" { + target = Target::X86_64Avx2; } else { return Err(report!(Error::InvalidSubCom)); } @@ -61,6 +63,8 @@ fn main() -> Result<(), Error> { target = Target::Aarch64; } else if arg == "--noaccel" { do_accel = false; + } else if arg == "avx2" { + target = Target::X86_64Avx2; } else { return Err(report!(Error::InvalidSubCom)); } @@ -304,6 +308,9 @@ fn build(release: bool, target: Target, debuginfo: bool) -> Result<(), Error> { if target == Target::Aarch64 { com.args(["--target", "targets/aarch64-virt-ableos.json"]); } + if target == Target::X86_64Avx2 { + com.args(["--target", "targets/x86_64_v3-ableos.json"]); + } match com.status() { Ok(s) if s.code() != Some(0) => bail!(Error::Build), @@ -317,6 +324,10 @@ fn build(release: bool, target: Target, debuginfo: bool) -> Result<(), Error> { path.push_str("_x86-64"); "target/x86_64-ableos" } + Target::X86_64Avx2 => { + path.push_str("_x86-64"); + "target/x86_64_v3-ableos" + } Target::Riscv64Virt => "target/riscv64-virt-ableos", Target::Aarch64 => { path.push_str("_aarch64"); @@ -341,7 +352,7 @@ fn build(release: bool, target: Target, debuginfo: bool) -> Result<(), Error> { fn run(release: bool, target: Target, do_accel: bool) -> Result<(), Error> { let target_str = match target { - Target::X86_64 => "qemu-system-x86_64", + Target::X86_64 | Target::X86_64Avx2 => "qemu-system-x86_64", Target::Riscv64Virt => "qemu-system-riscv64", Target::Aarch64 => "qemu-system-aarch64", }; @@ -384,7 +395,7 @@ fn run(release: bool, target: Target, do_accel: bool) -> Result<(), Error> { }; match target { - Target::X86_64 => { + Target::X86_64 | Target::X86_64Avx2 => { #[rustfmt::skip] com.args([ "-bios", &ovmf_path.change_context(Error::OvmfFetch)?, @@ -440,7 +451,7 @@ fn run(release: bool, target: Target, do_accel: bool) -> Result<(), Error> { fn fetch_ovmf(target: Target) -> Result { let (ovmf_url, ovmf_path) = match target { - Target::X86_64 => ( + Target::X86_64 | Target::X86_64Avx2 => ( "https://retrage.github.io/edk2-nightly/bin/RELEASEX64_OVMF.fd", "target/RELEASEX64_OVMF.fd", ), @@ -489,6 +500,7 @@ impl Context for OvmfFetchError {} #[derive(Clone, Copy, PartialEq, Eq)] enum Target { X86_64, + X86_64Avx2, Riscv64Virt, Aarch64, } diff --git a/sysdata/programs/render_example/src/examples/surface.hb b/sysdata/programs/render_example/src/examples/surface.hb index 50353eb..003baec 100644 --- a/sysdata/programs/render_example/src/examples/surface.hb +++ b/sysdata/programs/render_example/src/examples/surface.hb @@ -3,7 +3,7 @@ render := @use("../../../../libraries/render/src/lib.hb") /* expected result: - the square example bounces around the screen */ + bouncing gradient square inside coloured bouncing box inside black screen */ example := fn(): void { screen := render.init(true) @@ -20,7 +20,6 @@ example := fn(): void { target_color := color loop { render.clear(screen, render.black) - // color += .(1, 1, 1, 1) render.put_filled_rect(image, pos_inner, .(side, side), color) render.put_rect(image, pos_inner, .(side, side), render.black) render.put_rect(image, .(0, 0), .(image.width - 1, image.height - 1), color)