miscellaneous changes, free performance tweaks, and fiddle with enabling avx finally

This commit is contained in:
koniifer 2024-10-19 19:45:41 +01:00
parent a94332370a
commit 41d9c0b82a
12 changed files with 103 additions and 162 deletions

72
Cargo.lock generated
View file

@ -26,18 +26,6 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
[[package]]
name = "ahash"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "allocator-api2"
version = "0.2.18"
@ -127,9 +115,6 @@ name = "bumpalo"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
dependencies = [
"allocator-api2",
]
[[package]]
name = "byteorder"
@ -255,6 +240,12 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foldhash"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"
[[package]]
name = "form_urlencoded"
version = "1.2.1"
@ -331,43 +322,37 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "hashbrown"
version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "hbbytecode"
version = "0.1.0"
source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#6ad0b41759dacd5767b5c9cfbc1b3b11c025396a"
source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#15e4762d4ac8993d12fe2dd54e2b2d842c8a034b"
[[package]]
name = "hblang"
version = "0.1.0"
source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#6ad0b41759dacd5767b5c9cfbc1b3b11c025396a"
source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#15e4762d4ac8993d12fe2dd54e2b2d842c8a034b"
dependencies = [
"hashbrown 0.15.0",
"hashbrown",
"hbbytecode",
"hbvm",
"log",
"regalloc2",
]
[[package]]
name = "hbvm"
version = "0.1.0"
source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#6ad0b41759dacd5767b5c9cfbc1b3b11c025396a"
source = "git+https://git.ablecorp.us/AbleOS/holey-bytes.git#15e4762d4ac8993d12fe2dd54e2b2d842c8a034b"
dependencies = [
"hbbytecode",
]
@ -514,7 +499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
dependencies = [
"equivalent",
"hashbrown 0.15.0",
"hashbrown",
]
[[package]]
@ -545,7 +530,7 @@ dependencies = [
"aarch64-cpu",
"crossbeam-queue",
"derive_more",
"hashbrown 0.14.5",
"hashbrown",
"hbvm",
"limine",
"log",
@ -830,19 +815,6 @@ dependencies = [
"bitflags 2.6.0",
]
[[package]]
name = "regalloc2"
version = "0.10.2"
source = "git+https://github.com/jakubDoka/regalloc2?branch=reuse-allocations#21c43e3ee182824e92e2b25f1d3c03ed47f9c02b"
dependencies = [
"allocator-api2",
"bumpalo",
"hashbrown 0.14.5",
"log",
"rustc-hash",
"smallvec",
]
[[package]]
name = "regex-syntax"
version = "0.8.5"
@ -1035,9 +1007,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.131"
version = "1.0.132"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67d42a0bd4ac281beff598909bb56a86acaf979b84483e1c79c10dcaf98f8cf3"
checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
dependencies = [
"itoa",
"memchr",
@ -1325,12 +1297,6 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "versioning"
version = "0.1.3"

View file

@ -1,3 +1,4 @@
#![allow(unused)]
use std::io::Write;
use idl::build_idl;

View file

@ -4,9 +4,3 @@ build-std-features = ["compiler-builtins-mem"]
[build]
target = "./targets/x86_64-ableos.json"
# [target.'cfg(target_arch = "x86_64")']
# rustflags = [
# "-C",
#"target-feature=+sse4.1,+avx,+aes,+fma,+popcnt,+bmi2,+avx2,+lzcnt,+xsave",
# ]

View file

@ -16,7 +16,7 @@ uart_16550 = { version = "0.3", features = ["nightly"] }
xml.git = "https://git.ablecorp.us/ableos/ableos_userland"
versioning.git = "https://git.ablecorp.us/ableos/ableos_userland"
# able_graphics_library.git = "https://git.ablecorp.us/ableos/ableos_userland"
hashbrown = { version = "0.14", features = ["nightly"] }
hashbrown = { version = "0.15", features = ["nightly"] }
limine = "0.1"
[dependencies.crossbeam-queue]

View file

@ -30,6 +30,7 @@ const INITIAL_KERNEL_HEAP_SIZE: *const () = _initial_kernel_heap_size as _;
#[no_mangle]
#[naked]
#[cfg(not(target_feature = "avx2"))]
unsafe extern "C" fn _kernel_start() -> ! {
// Initialise SSE, then jump to kernel entrypoint
core::arch::asm!(
@ -49,58 +50,59 @@ unsafe extern "C" fn _kernel_start() -> ! {
)
}
// #[no_mangle]
// #[naked]
// unsafe extern "C" fn _kernel_start() -> ! {
// core::arch::asm!(
// // Enable protected mode and configure control registers
// "mov rax, cr0",
// "and ax, 0xFFFB", // Clear CR0.EM (bit 2) for coprocessor emulation
// "or ax, 0x2", // Set CR0.MP (bit 1) for coprocessor monitoring
// "mov cr0, rax",
#[no_mangle]
#[naked]
#[cfg(target_feature = "avx2")]
unsafe extern "C" fn _kernel_start() -> ! {
core::arch::asm!(
// Enable protected mode and configure control registers
"mov rax, cr0",
"and ax, 0xFFFB", // Clear CR0.EM (bit 2) for coprocessor emulation
"or ax, 0x2", // Set CR0.MP (bit 1) for coprocessor monitoring
"mov cr0, rax",
// "mov rax, cr4",
// "or ax, (1 << 9) | (1 << 10)", // Set CR4.OSFXSR (bit 9) and CR4.OSXMMEXCPT (bit 10)
// "mov cr4, rax",
"mov rax, cr4",
"or ax, (1 << 9) | (1 << 10)", // Set CR4.OSFXSR (bit 9) and CR4.OSXMMEXCPT (bit 10)
"mov cr4, rax",
// // Enable OSXSAVE (required for AVX, AVX2, and XSAVE)
// "mov rax, cr4",
// "or eax, 1 << 18", // Set CR4.OSXSAVE (bit 18)
// "mov cr4, rax",
// Enable OSXSAVE (required for AVX, AVX2, and XSAVE)
"mov rax, cr4",
"or eax, 1 << 18", // Set CR4.OSXSAVE (bit 18)
"mov cr4, rax",
// // Enable AVX and AVX2 state saving
// "xor rcx, rcx",
// "xgetbv",
// "or eax, 7", // Enable SSE, AVX, and AVX2 state saving
// "xsetbv",
// Enable AVX and AVX2 state saving
"xor rcx, rcx",
"xgetbv",
"or eax, 7", // Enable SSE, AVX, and AVX2 state saving
"xsetbv",
// // Check for AVX and XSAVE support
// "mov eax, 1",
// "cpuid",
// "and ecx, 0x18000000",
// "cmp ecx, 0x18000000",
// "jne {1}", // Jump if AVX/OSXSAVE is not supported
// Check for AVX and XSAVE support
"mov eax, 1",
"cpuid",
"and ecx, 0x18000000",
"cmp ecx, 0x18000000",
"jne {1}", // Jump if AVX/OSXSAVE is not supported
// // Check for BMI2 and AVX2 support
// "mov eax, 7",
// "xor ecx, ecx",
// "cpuid",
// "and ebx, (1 << 8) | (1 << 5)", // Check BMI2 (bit 8) and AVX2 (bit 5)
// "cmp ebx, (1 << 8) | (1 << 5)", // Compare to ensure both are supported
// Check for BMI2 and AVX2 support
"mov eax, 7",
"xor ecx, ecx",
"cpuid",
"and ebx, (1 << 8) | (1 << 5)", // Check BMI2 (bit 8) and AVX2 (bit 5)
"cmp ebx, (1 << 8) | (1 << 5)", // Compare to ensure both are supported
// // Check for LZCNT and POPCNT support
// "mov eax, 1",
// "cpuid",
// "and ecx, (1 << 5) | (1 << 23)", // Check LZCNT (bit 5) and POPCNT (bit 23)
// "cmp ecx, (1 << 5) | (1 << 23)", // Compare to ensure both are supported
// Check for LZCNT and POPCNT support
"mov eax, 1",
"cpuid",
"and ecx, (1 << 5) | (1 << 23)", // Check LZCNT (bit 5) and POPCNT (bit 23)
"cmp ecx, (1 << 5) | (1 << 23)", // Compare to ensure both are supported
// // Jump to the kernel entry point
// "jmp {0}",
// sym start,
// sym oops,
// options(noreturn),
// )
// }
// Jump to the kernel entry point
"jmp {0}",
sym start,
sym oops,
options(noreturn),
)
}
unsafe extern "C" fn oops() -> ! {
panic!("your cpu is ancient >:(")

View file

@ -26,9 +26,6 @@ unsafe fn x86_in<T: x86_64::instructions::port::PortRead>(address: u16) -> T {
#[inline(always)]
pub fn handler(vm: &mut Vm) {
let ecall_number = vm.registers[2].cast::<u64>();
// log::info!("eca called :pensive:");
// debug!("Ecall number {:?}", ecall_number);
//info!("Register dump: {:?}", vm.registers);
match ecall_number {
0 => {
@ -207,15 +204,3 @@ pub enum LogError {
NoMessages,
InvalidLogFormat,
}
// use {alloc::vec, log::Record};
// fn memory_msg_handler(vm: &mut Vm, mem_addr: u64, length: usize) -> Result<(), LogError> {
// let mut val = alloc::vec::Vec::new();
// for _ in 0..4096 {
// val.push(0);
// }
// info!("Block address: {:?}", val.as_ptr());
// vm.registers[1] = hbvm::value::Value(val.as_ptr() as u64);
// vm.registers[2] = hbvm::value::Value(4096);
// Ok(())
// }

View file

@ -17,38 +17,16 @@ pub enum MemoryQuotaType {
}
fn alloc_page(vm: &mut Vm, _mem_addr: u64, _length: usize) -> Result<(), MemoryServiceError> {
let ptr = unsafe { alloc(Layout::from_size_align_unchecked(4096, 4096)) };
let ptr = unsafe { alloc(Layout::from_size_align_unchecked(4096, 8)) };
info!("Block address: {:?}", ptr);
vm.registers[1] = hbvm::value::Value(ptr as u64);
vm.registers[2] = hbvm::value::Value(4096);
Ok(())
}
#[inline(always)]
unsafe fn memcpy(mut dest: *mut u8, mut src: *const u8, mut count: usize) {
if count < 16 {
src.copy_to_nonoverlapping(dest, count);
return;
}
while count >= 8 {
if (src as usize) & 7 == 0 && (dest as usize) & 7 == 0 {
*(dest as *mut u64) = *(src as *const u64);
} else {
src.copy_to_nonoverlapping(dest, 8);
}
dest = dest.add(8);
src = src.add(8);
count -= 8;
}
if count > 0 {
src.copy_to_nonoverlapping(dest, count);
}
}
#[inline(always)]
unsafe fn memset(mut dest: *mut u8, src: *const u8, count: usize, size: usize) {
const BLOCK_SIZE: usize = 64;
let mut remaining = count * size;
if remaining < 16 {
@ -56,11 +34,11 @@ unsafe fn memset(mut dest: *mut u8, src: *const u8, count: usize, size: usize) {
return;
}
let mut buffer = [0u8; 64];
let mut buffer = [0u8; BLOCK_SIZE];
let mut buffer_size = size;
src.copy_to_nonoverlapping(buffer.as_mut_ptr(), size);
while buffer_size * 2 <= 64 {
while buffer_size * 2 <= BLOCK_SIZE {
buffer
.as_mut_ptr()
.copy_to_nonoverlapping(buffer.as_mut_ptr().add(buffer_size), buffer_size);
@ -68,12 +46,15 @@ unsafe fn memset(mut dest: *mut u8, src: *const u8, count: usize, size: usize) {
}
let buffer_ptr = buffer.as_ptr() as *const u64;
while (dest as usize) & 7 != 0 && remaining >= 8 {
buffer.as_ptr().copy_to_nonoverlapping(dest, 1);
dest = dest.add(1);
remaining -= 1;
}
while remaining >= 8 {
if (dest as usize) & 7 == 0 {
*(dest as *mut u64) = *buffer_ptr;
} else {
buffer.as_ptr().copy_to_nonoverlapping(dest, 8);
}
*(dest as *mut u64) = *buffer_ptr;
dest = dest.add(8);
remaining -= 8;
}
@ -101,7 +82,7 @@ pub fn memory_msg_handler(
let ptr = alloc(Layout::from_size_align_unchecked(
page_count as usize * 4096,
4096,
8,
));
vm.registers[1] = hbvm::value::Value(ptr as u64);
@ -117,7 +98,7 @@ pub fn memory_msg_handler(
dealloc(
mptr as *mut u8,
Layout::from_size_align_unchecked(page_count as usize * 4096, 4096),
Layout::from_size_align_unchecked(page_count as usize * 4096, 8),
)
},
2 => {
@ -147,7 +128,7 @@ pub fn memory_msg_handler(
let src = u64::from_le_bytes(msg_vec[9..17].try_into().unwrap_unchecked()) as *const u8;
let dest = u64::from_le_bytes(msg_vec[17..25].try_into().unwrap_unchecked()) as *mut u8;
memcpy(dest, src, count);
src.copy_to_nonoverlapping(dest, count);
},
5 => unsafe {
let count = u64::from_le_bytes(msg_vec[1..9].try_into().unwrap_unchecked()) as usize;

View file

@ -97,7 +97,7 @@ impl HandlePageFault for PageFaultHandler {
#[inline(always)]
const fn stack_layout() -> Layout {
unsafe { Layout::from_size_align_unchecked(STACK_SIZE, 4096) }
unsafe { Layout::from_size_align_unchecked(STACK_SIZE, 8) }
}
#[inline(always)]

View file

@ -20,8 +20,9 @@ derive_more = { version = "1", default-features = false, features = [
error-stack = "0.5"
fatfs = "0.3"
toml = "0.8"
# hbasm.git = "https://git.ablecorp.us/AbleOS/holey-bytes.git"
hblang.git = "https://git.ablecorp.us/AbleOS/holey-bytes.git"
hblang = { git = "https://git.ablecorp.us/AbleOS/holey-bytes.git", features = [
"std",
], default-features = false }
log = "0.4"
raw-cpuid = "11"

View file

@ -38,6 +38,8 @@ fn main() -> Result<(), Error> {
target = Target::Riscv64Virt;
} else if arg == "arm64" || arg == "aarch64" || arg == "aarch64-virt" {
target = Target::Aarch64;
} else if arg == "avx2" {
target = Target::X86_64Avx2;
} else {
return Err(report!(Error::InvalidSubCom));
}
@ -61,6 +63,8 @@ fn main() -> Result<(), Error> {
target = Target::Aarch64;
} else if arg == "--noaccel" {
do_accel = false;
} else if arg == "avx2" {
target = Target::X86_64Avx2;
} else {
return Err(report!(Error::InvalidSubCom));
}
@ -304,6 +308,9 @@ fn build(release: bool, target: Target, debuginfo: bool) -> Result<(), Error> {
if target == Target::Aarch64 {
com.args(["--target", "targets/aarch64-virt-ableos.json"]);
}
if target == Target::X86_64Avx2 {
com.args(["--target", "targets/x86_64_v3-ableos.json"]);
}
match com.status() {
Ok(s) if s.code() != Some(0) => bail!(Error::Build),
@ -317,6 +324,10 @@ fn build(release: bool, target: Target, debuginfo: bool) -> Result<(), Error> {
path.push_str("_x86-64");
"target/x86_64-ableos"
}
Target::X86_64Avx2 => {
path.push_str("_x86-64");
"target/x86_64_v3-ableos"
}
Target::Riscv64Virt => "target/riscv64-virt-ableos",
Target::Aarch64 => {
path.push_str("_aarch64");
@ -341,7 +352,7 @@ fn build(release: bool, target: Target, debuginfo: bool) -> Result<(), Error> {
fn run(release: bool, target: Target, do_accel: bool) -> Result<(), Error> {
let target_str = match target {
Target::X86_64 => "qemu-system-x86_64",
Target::X86_64 | Target::X86_64Avx2 => "qemu-system-x86_64",
Target::Riscv64Virt => "qemu-system-riscv64",
Target::Aarch64 => "qemu-system-aarch64",
};
@ -384,7 +395,7 @@ fn run(release: bool, target: Target, do_accel: bool) -> Result<(), Error> {
};
match target {
Target::X86_64 => {
Target::X86_64 | Target::X86_64Avx2 => {
#[rustfmt::skip]
com.args([
"-bios", &ovmf_path.change_context(Error::OvmfFetch)?,
@ -440,7 +451,7 @@ fn run(release: bool, target: Target, do_accel: bool) -> Result<(), Error> {
fn fetch_ovmf(target: Target) -> Result<String, OvmfFetchError> {
let (ovmf_url, ovmf_path) = match target {
Target::X86_64 => (
Target::X86_64 | Target::X86_64Avx2 => (
"https://retrage.github.io/edk2-nightly/bin/RELEASEX64_OVMF.fd",
"target/RELEASEX64_OVMF.fd",
),
@ -489,6 +500,7 @@ impl Context for OvmfFetchError {}
#[derive(Clone, Copy, PartialEq, Eq)]
enum Target {
X86_64,
X86_64Avx2,
Riscv64Virt,
Aarch64,
}

View file

@ -3,7 +3,7 @@
render := @use("../../../../libraries/render/src/lib.hb")
/* expected result:
the square example bounces around the screen */
bouncing gradient square inside coloured bouncing box inside black screen */
example := fn(): void {
screen := render.init(true)
@ -20,7 +20,6 @@ example := fn(): void {
target_color := color
loop {
render.clear(screen, render.black)
// color += .(1, 1, 1, 1)
render.put_filled_rect(image, pos_inner, .(side, side), color)
render.put_rect(image, pos_inner, .(side, side), render.black)
render.put_rect(image, .(0, 0), .(image.width - 1, image.height - 1), color)