From 54a7f8597864d45ae2133e97c2bdf661c4f97202 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Thu, 10 Oct 2024 08:35:17 +0200 Subject: [PATCH] progress --- .gitignore | 1 + Cargo.toml | 11 +- depell/src/index.css | 14 ++- depell/src/index.js | 140 +++++++++++++++++---- depell/src/main.rs | 45 ++++--- depell/wasm-hbfmt/src/lib.rs | 108 ++++++++++------ hblang/Cargo.toml | 9 +- hblang/src/codegen.rs | 26 ++-- hblang/src/fmt.rs | 144 ++++++++++++--------- hblang/src/fs.rs | 10 +- hblang/src/lexer.rs | 45 ++++--- hblang/src/lib.rs | 40 ++++-- hblang/src/parser.rs | 236 ++++++++++++++++------------------- hblang/src/son.rs | 11 +- 14 files changed, 519 insertions(+), 321 deletions(-) diff --git a/.gitignore b/.gitignore index c73e0df..975a922 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ /hbbytecode/src/instrs.rs /.rgignore rustc-ice-* +db.sqlite diff --git a/Cargo.toml b/Cargo.toml index a76447b..f184c54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,15 @@ [workspace] resolver = "2" -members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit", "depell"] +members = [ + "hbbytecode", + "hbvm", + "hbxrt", + "xtask", + "hblang", + "hbjit", + "depell", + "depell/wasm-hbfmt" +] [profile.release] lto = true diff --git a/depell/src/index.css b/depell/src/index.css index 685023f..e787b96 100644 --- a/depell/src/index.css +++ b/depell/src/index.css @@ -1,16 +1,18 @@ * { - font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; + font-family: var(--font); } body { --primary: white; --secondary: #EFEFEF; + --timestamp: #777777; --error: #ff3333; --placeholder: #333333; } body { --small-gap: 5px; + --font: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; --monospace: 'Courier New', Courier, monospace; nav { @@ -30,6 +32,15 @@ body { } div.preview { + div.info { + display: flex; + gap: var(--small-gap); + + span[apply=timestamp] { + color: var(--timestamp); + } + } + div.stats { display: flex; gap: var(--small-gap); @@ -62,6 +73,7 @@ textarea { margin: var(--small-gap) 0px; font-family: var(--monospace); resize: none; + tab-size: 4; } input { diff --git a/depell/src/index.js b/depell/src/index.js index 81d380d..2ac665a 100644 --- a/depell/src/index.js +++ b/depell/src/index.js @@ -1,18 +1,72 @@ -//// @ts-check +/// @ts-check -if (window.location.hostname === 'localhost') { - let id; setInterval(async () => { - let new_id = await fetch('/hot-reload').then(reps => reps.text()); - id ??= new_id; - if (id !== new_id) window.location.reload(); - }, 300); +/** @return {never} */ +function never() { throw new Error() } + +/**@type{WebAssembly.Instance}*/ let instance; +/**@type{Promise}*/ let instaceFuture; +/** @param {string} code @param {"fmt" | "minify"} action + * @returns {Promise | string | undefined} */ +function modifyCode(code, action) { + if (!instance) { + instaceFuture ??= WebAssembly.instantiateStreaming(fetch("/hbfmt.wasm"), {}); + return (async () => { + instance = (await instaceFuture).instance; + return modifyCodeSync(instance, code, action); + })(); + } else { + return modifyCodeSync(instance, code, action); + } } -document.body.addEventListener('htmx:afterSwap', (ev) => { - wireUp(ev.target); -}); +/** @param {WebAssembly.Instance} instance @param {string} code @param {"fmt" | "minify"} action @returns {string | undefined} */ +function modifyCodeSync(instance, code, action) { + let { + INPUT, INPUT_LEN, + OUTPUT, OUTPUT_LEN, + PANIC_MESSAGE, PANIC_MESSAGE_LEN, + memory, fmt, minify + } = instance.exports; -wireUp(document.body); + if (!(true + && INPUT instanceof WebAssembly.Global + && INPUT_LEN instanceof WebAssembly.Global + && OUTPUT instanceof WebAssembly.Global + && OUTPUT_LEN instanceof WebAssembly.Global + && memory instanceof WebAssembly.Memory + && typeof fmt === "function" + && typeof minify === "function" + )) never(); + + if (action !== "fmt") { + INPUT = OUTPUT; + INPUT_LEN = OUTPUT_LEN; + } + + let dw = new DataView(memory.buffer); + dw.setUint32(INPUT_LEN.value, code.length, true); + new Uint8Array(memory.buffer, INPUT.value) + .set(new TextEncoder().encode(code)); + + try { + if (action === "fmt") fmt(); else minify(); + let result = new TextDecoder() + .decode(new Uint8Array(memory.buffer, OUTPUT.value, + dw.getUint32(OUTPUT_LEN.value, true))); + return result; + } catch (e) { + if (PANIC_MESSAGE instanceof WebAssembly.Global + && PANIC_MESSAGE_LEN instanceof WebAssembly.Global) { + let message = new TextDecoder() + .decode(new Uint8Array(memory.buffer, PANIC_MESSAGE.value, + dw.getUint32(PANIC_MESSAGE_LEN.value, true))); + console.error(message, e); + } else { + console.error(e); + } + return undefined; + } +} /** @param {HTMLElement} target */ function wireUp(target) { @@ -21,24 +75,31 @@ function wireUp(target) { bindTextareaAutoResize(target); } -/** @param {string} content @return {string} */ -function fmtTimestamp(content) { - new Date(parseInt(content) * 1000).toLocaleString() -} +/** @type {{ [key: string]: (content: string) => Promise | string }} */ +const applyFns = { + timestamp: (content) => new Date(parseInt(content) * 1000).toLocaleString(), + fmt: (content) => { + let res = modifyCode(content, "fmt"); + return res instanceof Promise ? res.then(c => c ?? content) : res ?? content; + }, +}; /** @param {HTMLElement} target */ function execApply(target) { - /**@type {HTMLElement}*/ let elem; - for (elem of target.querySelectorAll('[apply]')) { - const funcname = elem.getAttribute('apply'); - elem.textContent = window[funcname](elem.textContent); + for (const elem of target.querySelectorAll('[apply]')) { + if (!(elem instanceof HTMLElement)) continue; + const funcname = elem.getAttribute('apply') ?? never(); + let res = applyFns[funcname](elem.textContent ?? ""); + if (res instanceof Promise) res.then(c => elem.textContent = c); + else elem.textContent = res; } } /** @param {HTMLElement} target */ function bindTextareaAutoResize(target) { - /**@type {HTMLTextAreaElement}*/ let textarea; - for (textarea of target.querySelectorAll("textarea")) { + for (const textarea of target.querySelectorAll("textarea")) { + if (!(textarea instanceof HTMLTextAreaElement)) never(); + textarea.style.height = textarea.scrollHeight + "px"; textarea.style.overflowY = "hidden"; textarea.addEventListener("input", function() { @@ -84,12 +145,37 @@ function cacheInputs(target) { continue; } - /**@type {HTMLInputElement}*/ let input; - for (input of form.elements) { - if ('password submit button'.includes(input.type)) continue; - const key = path + input.name; - input.value = localStorage.getItem(key) ?? ''; - input.addEventListener("input", (ev) => localStorage.setItem(key, ev.target.value)); + for (const input of form.elements) { + if (input instanceof HTMLInputElement || input instanceof HTMLTextAreaElement) { + if ('password submit button'.includes(input.type)) continue; + const key = path + input.name; + input.value = localStorage.getItem(key) ?? ''; + input.addEventListener("input", () => localStorage.setItem(key, input.value)); + } else { + console.warn("unhandled form element: ", input); + } } } } + +if (window.location.hostname === 'localhost') { + let id; setInterval(async () => { + let new_id = await fetch('/hot-reload').then(reps => reps.text()); + id ??= new_id; + if (id !== new_id) window.location.reload(); + }, 300); + + (async function testCodeChange() { + const code = "main:=fn():void{return}"; + const fmtd = await modifyCode(code, "fmt") ?? never(); + const prev = await modifyCode(fmtd, "minify") ?? never(); + if (code != prev) console.error(code, prev); + })() +} + +document.body.addEventListener('htmx:afterSwap', (ev) => { + if (!(ev.target instanceof HTMLElement)) never(); + wireUp(ev.target); +}); + +wireUp(document.body); diff --git a/depell/src/main.rs b/depell/src/main.rs index 748e4e7..3bba97d 100644 --- a/depell/src/main.rs +++ b/depell/src/main.rs @@ -28,6 +28,18 @@ async fn amain() { let router = axum::Router::new() .route("/", get(Index::page)) + .route( + "/hbfmt.wasm", + get(|| async move { + axum::http::Response::builder() + .header("content-type", "application/wasm") + .body(axum::body::Body::from( + include_bytes!("../../target/wasm32-unknown-unknown/small/wasm_hbfmt.wasm") + .to_vec(), + )) + .unwrap() + }), + ) .route("/index-view", get(Index::get)) .route("/feed", get(Index::page)) .route("/profile", get(Profile::page)) @@ -117,7 +129,7 @@ impl Page for Post { - + @@ -158,18 +170,19 @@ impl fmt::Display for Post { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Self { author, name, timestamp, imports, runs, dependencies, code, .. } = self; write_html! { f
-
- author - "/" - name - timestamp +
+ author "/" name + timestamp
- "imps: "imports - "runs: "runs - "deps: "dependencies + for (name, count) in "inps runs deps".split(' ') + .zip([imports, runs, dependencies]) + .filter(|(_, &c)| c != 0) + { + name ": "count + }
-
code
+
code
if *timestamp == 0 { @@ -380,11 +393,11 @@ struct Session { impl axum::extract::FromRequestParts for Session { /// If the extractor fails it'll use this "rejection" type. A rejection is /// a kind of error that can be converted into a response. - type Rejection = axum::response::Redirect; + type Rejection = Redirect; /// Perform the extraction. async fn from_request_parts(parts: &mut Parts, _: &S) -> Result { - let err = || axum::response::Redirect::permanent("/login"); + let err = redirect("/login"); let value = parts .headers @@ -392,20 +405,20 @@ impl axum::extract::FromRequestParts for Session { .into_iter() .find_map(|c| c.to_str().ok()?.trim().strip_prefix("id=")) .map(|c| c.split_once(';').unwrap_or((c, "")).0) - .ok_or(err())?; + .ok_or(err)?; let mut id = [0u8; 32]; - parse_hex(value, &mut id).ok_or(err())?; + parse_hex(value, &mut id).ok_or(err)?; let (name, expiration) = db::with(|db| { db.get_session .query_row((dbg!(id),), |r| Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))) .inspect_err(|e| log::error!("{e}")) - .map_err(|_| err()) + .map_err(|_| err) })?; if expiration < now() { log::error!("expired"); - return Err(err()); + return Err(err); } Ok(Self { name, id }) diff --git a/depell/wasm-hbfmt/src/lib.rs b/depell/wasm-hbfmt/src/lib.rs index 2a283c7..0107696 100644 --- a/depell/wasm-hbfmt/src/lib.rs +++ b/depell/wasm-hbfmt/src/lib.rs @@ -1,31 +1,51 @@ #![no_std] #![feature(slice_take)] #![feature(str_from_raw_parts)] +#![feature(alloc_error_handler)] -use hblang::parser::ParserCtx; - -#[cfg(target_arch = "wasm32")] -#[panic_handler] -fn handle_panic(_: &core::panic::PanicInfo) -> ! { - core::arch::wasm32::unreachable() -} - -use core::{ - alloc::{GlobalAlloc, Layout}, - cell::UnsafeCell, +use { + core::{ + alloc::{GlobalAlloc, Layout}, + cell::UnsafeCell, + }, + hblang::parser::ParserCtx, }; const ARENA_SIZE: usize = 128 * 1024; +const MAX_OUTPUT_SIZE: usize = 1024 * 10; +const MAX_INPUT_SIZE: usize = 1024 * 4; + +#[cfg(target_arch = "wasm32")] +#[panic_handler] +pub fn handle_panic(_info: &core::panic::PanicInfo) -> ! { + //unsafe { + // use core::fmt::Write; + // let mut f = Write(&mut PANIC_MESSAGE[..]); + // _ = writeln!(f, "{}", info); + // PANIC_MESSAGE_LEN = 1024 - f.0.len(); + //} + + core::arch::wasm32::unreachable(); +} + +#[global_allocator] +static ALLOCATOR: ArenaAllocator = ArenaAllocator::new(); + +#[cfg(target_arch = "wasm32")] +#[alloc_error_handler] +fn alloc_error(_: core::alloc::Layout) -> ! { + core::arch::wasm32::unreachable() +} #[repr(C, align(32))] -struct SimpleAllocator { +struct ArenaAllocator { arena: UnsafeCell<[u8; ARENA_SIZE]>, head: UnsafeCell<*mut u8>, } -impl SimpleAllocator { +impl ArenaAllocator { const fn new() -> Self { - SimpleAllocator { + ArenaAllocator { arena: UnsafeCell::new([0; ARENA_SIZE]), head: UnsafeCell::new(core::ptr::null_mut()), } @@ -36,9 +56,9 @@ impl SimpleAllocator { } } -unsafe impl Sync for SimpleAllocator {} +unsafe impl Sync for ArenaAllocator {} -unsafe impl GlobalAlloc for SimpleAllocator { +unsafe impl GlobalAlloc for ArenaAllocator { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { let size = layout.size(); let align = layout.align(); @@ -46,7 +66,7 @@ unsafe impl GlobalAlloc for SimpleAllocator { let until = self.arena.get() as *mut u8; let new_head = (*self.head.get()).sub(size); - let aligned_head = (new_head as usize & (1 << (align - 1))) as *mut u8; + let aligned_head = (new_head as usize & !(1 << (align - 1))) as *mut u8; if until > aligned_head { return core::ptr::null_mut(); @@ -61,41 +81,53 @@ unsafe impl GlobalAlloc for SimpleAllocator { } } -#[cfg_attr(target_arch = "wasm32", global_allocator)] -static ALLOCATOR: SimpleAllocator = SimpleAllocator::new(); +struct Write<'a>(&'a mut [u8]); -const MAX_OUTPUT_SIZE: usize = 1024 * 10; +impl core::fmt::Write for Write<'_> { + fn write_str(&mut self, s: &str) -> core::fmt::Result { + if let Some(m) = self.0.take_mut(..s.len()) { + m.copy_from_slice(s.as_bytes()); + Ok(()) + } else { + Err(core::fmt::Error) + } + } +} + +//#[no_mangle] +//static mut PANIC_MESSAGE: [u8; 1024] = unsafe { core::mem::zeroed() }; +//#[no_mangle] +//static mut PANIC_MESSAGE_LEN: usize = 0; #[no_mangle] static mut OUTPUT: [u8; MAX_OUTPUT_SIZE] = unsafe { core::mem::zeroed() }; - #[no_mangle] static mut OUTPUT_LEN: usize = 0; #[no_mangle] -unsafe extern "C" fn fmt(code: *const u8, len: usize) { +static MAX_INPUT: usize = MAX_INPUT_SIZE; +#[no_mangle] +static mut INPUT: [u8; MAX_INPUT_SIZE] = unsafe { core::mem::zeroed() }; +#[no_mangle] +static mut INPUT_LEN: usize = 0; + +#[no_mangle] +unsafe extern "C" fn fmt() { ALLOCATOR.reset(); - let code = core::str::from_raw_parts(code, len); + let code = core::str::from_raw_parts(core::ptr::addr_of!(INPUT).cast(), INPUT_LEN); let arena = hblang::parser::Arena::default(); let mut ctx = ParserCtx::default(); let exprs = hblang::parser::Parser::parse(&mut ctx, code, "source.hb", &|_, _| Ok(0), &arena); - struct Write<'a>(&'a mut [u8]); - - impl core::fmt::Write for Write<'_> { - fn write_str(&mut self, s: &str) -> core::fmt::Result { - if let Some(m) = self.0.take_mut(..s.len()) { - m.copy_from_slice(s.as_bytes()); - Ok(()) - } else { - Err(core::fmt::Error) - } - } - } - - let mut f = Write(unsafe { &mut OUTPUT[..] }); + let mut f = Write(&mut OUTPUT[..]); hblang::fmt::fmt_file(exprs, code, &mut f).unwrap(); - unsafe { OUTPUT_LEN = MAX_OUTPUT_SIZE - f.0.len() }; + OUTPUT_LEN = MAX_OUTPUT_SIZE - f.0.len(); +} + +#[no_mangle] +unsafe extern "C" fn minify() { + let code = core::str::from_raw_parts_mut(core::ptr::addr_of_mut!(OUTPUT).cast(), OUTPUT_LEN); + OUTPUT_LEN = hblang::fmt::minify(code); } diff --git a/hblang/Cargo.toml b/hblang/Cargo.toml index 9df928a..3b18338 100644 --- a/hblang/Cargo.toml +++ b/hblang/Cargo.toml @@ -12,8 +12,13 @@ hashbrown = { version = "0.15.0", default-features = false, features = ["raw-ent hbbytecode = { version = "0.1.0", path = "../hbbytecode" } hbvm = { path = "../hbvm", features = ["nightly"] } log = { version = "0.4.22", features = ["release_max_level_error"] } -regalloc2 = { git = "https://github.com/jakubDoka/regalloc2", branch = "reuse-allocations", features = [] } + +[dependencies.regalloc2] +git = "https://github.com/jakubDoka/regalloc2" +branch = "reuse-allocations" +optional = true [features] -default = ["std"] +default = ["std", "opts"] std = [] +opts = ["regalloc2"] diff --git a/hblang/src/codegen.rs b/hblang/src/codegen.rs index 13f4a6b..fa25085 100644 --- a/hblang/src/codegen.rs +++ b/hblang/src/codegen.rs @@ -839,8 +839,8 @@ impl Codegen { let index_val = self.expr(index)?; _ = self.assert_ty(index.pos(), index_val.ty, ty::Id::INT, "subsctipt"); - if let ty::Kind::Ptr(ty) = base_val.ty.expand() { - base_val.ty = self.tys.ins.ptrs[ty as usize].base; + if let Some(ty) = self.tys.base_of(base_val.ty) { + base_val.ty = ty; base_val.loc = base_val.loc.into_derefed(); } @@ -1070,7 +1070,7 @@ impl Codegen { Some(Value { ty, loc }) } E::String { pos, mut literal } => { - literal = literal.trim_matches('"'); + literal = &literal[1..literal.len() - 1]; if !literal.ends_with("\\0") { self.report(pos, "string literal must end with null byte (for now)"); @@ -1206,8 +1206,8 @@ impl Codegen { let checkpoint = self.ci.snap(); let mut tal = self.expr(target)?; - if let ty::Kind::Ptr(ty) = tal.ty.expand() { - tal.ty = self.tys.ins.ptrs[ty as usize].base; + if let Some(ty) = self.tys.base_of(tal.ty) { + tal.ty = ty; tal.loc = tal.loc.into_derefed(); } @@ -1306,9 +1306,9 @@ impl Codegen { } E::UnOp { op: T::Mul, val, pos } => { let val = self.expr(val)?; - match val.ty.expand() { - ty::Kind::Ptr(ty) => Some(Value { - ty: self.tys.ins.ptrs[ty as usize].base, + match self.tys.base_of(val.ty) { + Some(ty) => Some(Value { + ty, loc: Loc::reg(self.loc_to_reg(val.loc, self.tys.size_of(val.ty))) .into_derefed(), }), @@ -1640,10 +1640,9 @@ impl Codegen { imm = u64::from_ne_bytes(dst); } if matches!(op, T::Add | T::Sub) - && let ty::Kind::Ptr(ty) = ty::Kind::from_ty(ty) + && let Some(ty) = self.tys.base_of(ty) { - let size = self.tys.size_of(self.tys.ins.ptrs[ty as usize].base); - imm *= size as u64; + imm *= self.tys.size_of(ty) as u64; } self.ci.emit(oper(dst.get(), lhs.get(), imm)); @@ -1676,9 +1675,8 @@ impl Codegen { (lhs.get(), right.ty) }; - let ty::Kind::Ptr(ty) = ty.expand() else { unreachable!() }; - - let size = self.tys.size_of(self.tys.ins.ptrs[ty as usize].base); + let ty = self.tys.base_of(ty).unwrap(); + let size = self.tys.size_of(ty); self.ci.emit(muli64(offset, offset, size as _)); } } diff --git a/hblang/src/fmt.rs b/hblang/src/fmt.rs index 5f85545..446de98 100644 --- a/hblang/src/fmt.rs +++ b/hblang/src/fmt.rs @@ -3,11 +3,10 @@ use { lexer::{self, TokenKind}, parser::{self, CommentOr, CtorField, Expr, Poser, Radix, StructField}, }, - alloc::string::String, core::fmt, }; -pub fn minify(source: &mut str) -> Option<&str> { +pub fn minify(source: &mut str) -> usize { fn needs_space(c: u8) -> bool { matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | 127..) } @@ -15,6 +14,7 @@ pub fn minify(source: &mut str) -> Option<&str> { let mut writer = source.as_mut_ptr(); let mut reader = &source[..]; let mut prev_needs_whitecpace = false; + let mut prev_needs_newline = false; loop { let mut token = lexer::Lexer::new(reader).next(); match token.kind { @@ -23,46 +23,59 @@ pub fn minify(source: &mut str) -> Option<&str> { _ => {} } - let mut suffix = 0; - if token.kind == TokenKind::Comment && reader.as_bytes()[token.end as usize - 1] != b'/' { - token.end = token.start + reader[token.range()].trim_end().len() as u32; - suffix = b'\n'; - } + let cpy_len = token.range().len(); let mut prefix = 0; if prev_needs_whitecpace && needs_space(reader.as_bytes()[token.start as usize]) { prefix = b' '; + debug_assert!(token.start != 0, "{reader}"); + } + prev_needs_whitecpace = needs_space(reader.as_bytes()[token.end as usize - 1]); + + let inbetween_new_lines = + reader[..token.start as usize].bytes().filter(|&b| b == b'\n').count() + + token.kind.precedence().is_some() as usize; + let extra_prefix_new_lines = if inbetween_new_lines > 1 { + 1 + token.kind.precedence().is_none() as usize + } else { + prev_needs_newline as usize + }; + + if token.kind == TokenKind::Comment && reader.as_bytes()[token.end as usize - 1] != b'/' { + prev_needs_newline = true; + prev_needs_whitecpace = false; + } else { + prev_needs_newline = false; } - prev_needs_whitecpace = needs_space(reader.as_bytes()[token.end as usize - 1]); let sstr = reader[token.start as usize..].as_ptr(); reader = &reader[token.end as usize..]; unsafe { - if prefix != 0 { + if extra_prefix_new_lines != 0 { + for _ in 0..extra_prefix_new_lines { + writer.write(b'\n'); + writer = writer.add(1); + } + } else if prefix != 0 { writer.write(prefix); writer = writer.add(1); } - writer.copy_from(sstr, token.range().len()); - writer = writer.add(token.range().len()); - if suffix != 0 { - writer.write(suffix); - writer = writer.add(1); - } + writer.copy_from(sstr, cpy_len); + writer = writer.add(cpy_len); } } - None + unsafe { writer.sub_ptr(source.as_mut_ptr()) } } pub struct Formatter<'a> { source: &'a str, depth: usize, - disp_buff: String, } impl<'a> Formatter<'a> { pub fn new(source: &'a str) -> Self { - Self { source, depth: 0, disp_buff: Default::default() } + Self { source, depth: 0 } } fn fmt_list( @@ -172,7 +185,7 @@ impl<'a> Formatter<'a> { self.fmt(value, f) } Expr::String { literal, .. } => write!(f, "{literal}"), - Expr::Comment { literal, .. } => write!(f, "{}", literal.trim_end()), + Expr::Comment { literal, .. } => write!(f, "{literal}"), Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"), Expr::Field { target, name: field, .. } => { self.fmt_paren(target, f, postfix)?; @@ -194,7 +207,7 @@ impl<'a> Formatter<'a> { write!(f, "{name}: ")?; s.fmt(ty, f)? } - CommentOr::Comment { literal, .. } => write!(f, "{literal}")?, + CommentOr::Comment { literal, .. } => writeln!(f, "{literal}")?, } Ok(field.or().is_some()) }) @@ -294,30 +307,42 @@ impl<'a> Formatter<'a> { write!(f, "{{")?; self.fmt_list(f, true, "}", "", stmts, Self::fmt) } - Expr::Number { value, radix, .. } => match radix { - Radix::Decimal => write!(f, "{value}"), - Radix::Hex => write!(f, "{value:#X}"), - Radix::Octal => write!(f, "{value:#o}"), - Radix::Binary => write!(f, "{value:#b}"), - }, + Expr::Number { value, radix, .. } => { + fn display_radix(radix: Radix, mut value: u64, buf: &mut [u8; 64]) -> &str { + fn conv_radix(d: u8) -> u8 { + match d { + 0..=9 => d + b'0', + _ => d - 10 + b'A', + } + } + + for (i, b) in buf.iter_mut().enumerate().rev() { + let d = (value % radix as u64) as u8; + value /= radix as u64; + *b = conv_radix(d); + if value == 0 { + return unsafe { core::str::from_utf8_unchecked(&buf[i..]) }; + } + } + + unreachable!() + } + let mut buf = [0u8; 64]; + let value = display_radix(radix, value as u64, &mut buf); + match radix { + Radix::Decimal => write!(f, "{value}"), + Radix::Hex => write!(f, "0x{value}"), + Radix::Octal => write!(f, "0o{value}"), + Radix::Binary => write!(f, "0b{value}"), + } + } Expr::Bool { value, .. } => write!(f, "{value}"), Expr::Idk { .. } => write!(f, "idk"), Expr::BinOp { left, op: TokenKind::Assign, - right: Expr::BinOp { left: lleft, op, right }, - } if { - let mut b = core::mem::take(&mut self.disp_buff); - self.fmt(lleft, &mut b)?; - let len = b.len(); - self.fmt(left, &mut b)?; - let (lleft, left) = b.split_at(len); - let res = lleft == left; - b.clear(); - self.disp_buff = b; - res - } => - { + right: &Expr::BinOp { left: lleft, op, right }, + } if left.pos() == lleft.pos() => { self.fmt(left, f)?; write!(f, " {op}= ")?; self.fmt(right, f) @@ -355,7 +380,7 @@ impl<'a> Formatter<'a> { } pub fn preserve_newlines(source: &str) -> usize { - source[source.trim_end().len()..].chars().filter(|&c| c == '\n').count() + source[source.trim_end().len()..].bytes().filter(|&c| c == b'\n').count() } pub fn insert_needed_semicolon(source: &str) -> bool { @@ -365,39 +390,46 @@ pub fn insert_needed_semicolon(source: &str) -> bool { impl core::fmt::Display for parser::Ast { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for (i, expr) in self.exprs().iter().enumerate() { - Formatter::new(&self.file).fmt(expr, f)?; - if let Some(expr) = self.exprs().get(i + 1) - && let Some(rest) = self.file.get(expr.pos() as usize..) - { - if insert_needed_semicolon(rest) { - write!(f, ";")?; - } + fmt_file(self.exprs(), &self.file, f) + } +} - if preserve_newlines(&self.file[..expr.pos() as usize]) > 1 { - writeln!(f)?; - } +pub fn fmt_file(exprs: &[Expr], file: &str, f: &mut impl fmt::Write) -> fmt::Result { + for (i, expr) in exprs.iter().enumerate() { + Formatter::new(file).fmt(expr, f)?; + if let Some(expr) = exprs.get(i + 1) + && let Some(rest) = file.get(expr.pos() as usize..) + { + if insert_needed_semicolon(rest) { + write!(f, ";")?; } - if i + 1 != self.exprs().len() { + if preserve_newlines(&file[..expr.pos() as usize]) > 1 { writeln!(f)?; } } - Ok(()) + + if i + 1 != exprs.len() { + writeln!(f)?; + } } + Ok(()) } #[cfg(test)] pub mod test { use { - crate::parser::{self, StackAlloc}, + crate::parser::{self, ParserCtx}, alloc::borrow::ToOwned, std::{fmt::Write, string::String}, }; pub fn format(ident: &str, input: &str) { - let ast = - parser::Ast::new(ident, input.to_owned(), &mut StackAlloc::default(), &|_, _| Ok(0)); + let mut minned = input.to_owned(); + let len = crate::fmt::minify(&mut minned); + minned.truncate(len); + + let ast = parser::Ast::new(ident, minned, &mut ParserCtx::default(), &|_, _| Ok(0)); let mut output = String::new(); write!(output, "{ast}").unwrap(); diff --git a/hblang/src/fs.rs b/hblang/src/fs.rs index 49a5d93..b34185a 100644 --- a/hblang/src/fs.rs +++ b/hblang/src/fs.rs @@ -1,7 +1,7 @@ use { crate::{ codegen, - parser::{self, Ast, StackAlloc}, + parser::{self, Ast, ParserCtx}, }, alloc::{string::String, vec::Vec}, core::{fmt::Write, num::NonZeroUsize}, @@ -263,22 +263,22 @@ pub fn parse_from_fs(extra_threads: usize, root: &str) -> io::Result> { Ok(id) }; - let execute_task = |stack: &mut _, (_, path): Task| { + let execute_task = |ctx: &mut _, (_, path): Task| { let path = path.to_str().ok_or_else(|| { io::Error::new( io::ErrorKind::InvalidData, format!("path contains invalid characters: {}", display_rel_path(&path)), ) })?; - Ok(Ast::new(path, std::fs::read_to_string(path)?, stack, &|path, from| { + Ok(Ast::new(path, std::fs::read_to_string(path)?, ctx, &|path, from| { loader(path, from).map_err(|e| e.to_string()) })) }; let thread = || { - let mut stack = StackAlloc::default(); + let mut ctx = ParserCtx::default(); while let Some(task @ (indx, ..)) = tasks.pop() { - let res = execute_task(&mut stack, task); + let res = execute_task(&mut ctx, task); let mut ast = ast.lock().unwrap(); let len = ast.len().max(indx as usize + 1); ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into())); diff --git a/hblang/src/lexer.rs b/hblang/src/lexer.rs index 3b2ab8c..8f0a073 100644 --- a/hblang/src/lexer.rs +++ b/hblang/src/lexer.rs @@ -1,4 +1,4 @@ -use crate::{instrs, EncodedInstr}; +use crate::EncodedInstr; const fn ascii_mask(chars: &[u8]) -> u128 { let mut eq = 0; @@ -83,7 +83,7 @@ macro_rules! gen_token_kind { }; } -#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] +#[derive(PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] #[repr(u8)] pub enum TokenKind { Not = b'!', @@ -170,9 +170,16 @@ pub enum TokenKind { ShlAss = b'<' - 5 + 128, } +impl core::fmt::Debug for TokenKind { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + core::fmt::Display::fmt(self, f) + } +} + impl TokenKind { #[allow(clippy::type_complexity)] pub fn cond_op(self, signed: bool) -> Option<(fn(u8, u8, i16) -> EncodedInstr, bool)> { + use crate::instrs; Some(( match self { Self::Le if signed => instrs::jgts, @@ -192,7 +199,7 @@ impl TokenKind { } pub fn binop(self, signed: bool, size: u32) -> Option EncodedInstr> { - use instrs::*; + use crate::instrs::*; macro_rules! div { ($($op:ident),*) => {[$(|a, b, c| $op(a, 0, b, c)),*]}; } macro_rules! rem { ($($op:ident),*) => {[$(|a, b, c| $op(0, a, b, c)),*]}; } @@ -219,7 +226,7 @@ impl TokenKind { #[allow(clippy::type_complexity)] pub fn imm_binop(self, signed: bool, size: u32) -> Option EncodedInstr> { - use instrs::*; + use crate::instrs::*; macro_rules! def_op { ($name:ident |$a:ident, $b:ident, $c:ident| $($tt:tt)*) => { macro_rules! $name { @@ -286,7 +293,7 @@ impl TokenKind { pub fn unop(&self) -> Option EncodedInstr> { Some(match self { - Self::Sub => instrs::neg, + Self::Sub => crate::instrs::neg, _ => return None, }) } @@ -362,7 +369,7 @@ gen_token_kind! { pub struct Lexer<'a> { pos: u32, - bytes: &'a [u8], + source: &'a [u8], } impl<'a> Lexer<'a> { @@ -371,22 +378,22 @@ impl<'a> Lexer<'a> { } pub fn restore(input: &'a str, pos: u32) -> Self { - Self { pos, bytes: input.as_bytes() } + Self { pos, source: input.as_bytes() } } pub fn source(&self) -> &'a str { - unsafe { core::str::from_utf8_unchecked(self.bytes) } + unsafe { core::str::from_utf8_unchecked(self.source) } } pub fn slice(&self, tok: core::ops::Range) -> &'a str { - unsafe { core::str::from_utf8_unchecked(&self.bytes[tok]) } + unsafe { core::str::from_utf8_unchecked(&self.source[tok]) } } fn peek(&self) -> Option { - if core::intrinsics::unlikely(self.pos >= self.bytes.len() as u32) { + if core::intrinsics::unlikely(self.pos >= self.source.len() as u32) { None } else { - Some(unsafe { *self.bytes.get_unchecked(self.pos as usize) }) + Some(unsafe { *self.source.get_unchecked(self.pos as usize) }) } } @@ -453,7 +460,7 @@ impl<'a> Lexer<'a> { } b'a'..=b'z' | b'A'..=b'Z' | b'_' | 127.. => { advance_ident(self); - let ident = &self.bytes[start as usize..self.pos as usize]; + let ident = &self.source[start as usize..self.pos as usize]; T::from_ident(ident) } b'"' | b'\'' => loop { @@ -465,10 +472,18 @@ impl<'a> Lexer<'a> { } }, b'/' if self.advance_if(b'/') => { - while let Some(l) = self.advance() + while let Some(l) = self.peek() && l != b'\n' - {} - T::Comment + { + self.pos += 1; + } + + let end = self.source[..self.pos as usize] + .iter() + .rposition(|&b| !b.is_ascii_whitespace()) + .map_or(self.pos, |i| i as u32 + 1); + + return Token { kind: T::Comment, start, end }; } b'/' if self.advance_if(b'*') => { let mut depth = 1; diff --git a/hblang/src/lib.rs b/hblang/src/lib.rs index 403c8eb..0891abc 100644 --- a/hblang/src/lib.rs +++ b/hblang/src/lib.rs @@ -8,7 +8,6 @@ never_type, unwrap_infallible, slice_partition_dedup, - hash_raw_entry, portable_simd, iter_collect_into, new_uninit, @@ -19,6 +18,8 @@ extract_if, ptr_internals, iter_intersperse, + str_from_raw_parts, + ptr_sub_ptr, slice_from_ptr_range )] #![warn(clippy::dbg_macro)] @@ -32,7 +33,6 @@ use { ident::Ident, lexer::TokenKind, parser::{CommentOr, Expr, ExprRef, FileId, Pos}, - son::reg, ty::ArrayLen, }, alloc::{collections::BTreeMap, string::String, vec::Vec}, @@ -65,11 +65,22 @@ pub mod fmt; #[cfg(any(feature = "std", test))] pub mod fs; pub mod parser; +#[cfg(feature = "opts")] pub mod son; mod lexer; +#[cfg(feature = "opts")] mod vc; +pub mod reg { + pub const STACK_PTR: Reg = 254; + pub const ZERO: Reg = 0; + pub const RET: Reg = 1; + pub const RET_ADDR: Reg = 31; + + pub type Reg = u8; +} + mod ctx_map { use core::hash::BuildHasher; @@ -139,10 +150,12 @@ mod ctx_map { .map(|(k, _)| &k.value) } + #[cfg_attr(not(feature = "opts"), expect(dead_code))] pub fn clear(&mut self) { self.inner.clear(); } + #[cfg_attr(not(feature = "opts"), expect(dead_code))] pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option { let (entry, _) = self.entry(value.key(ctx), ctx); match entry { @@ -193,6 +206,7 @@ mod task { unpack(offset).is_ok() } + #[cfg_attr(not(feature = "opts"), expect(dead_code))] pub fn id(index: usize) -> Offset { 1 << 31 | index as u32 } @@ -397,8 +411,14 @@ mod ty { mod __lc_names { use super::*; - $(pub const $name: &[u8] = &array_to_lower_case(unsafe { - *(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) });)* + $(pub const $name: &str = unsafe { + const LCL: &[u8] = unsafe { + &array_to_lower_case( + *(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) + ) + }; + core::str::from_utf8_unchecked(LCL) + };)* } #[allow(dead_code)] @@ -407,7 +427,7 @@ mod ty { } pub fn from_str(name: &str) -> Option { - match name.as_bytes() { + match name { $(__lc_names::$name => Some($name),)* _ => None, } @@ -415,7 +435,7 @@ mod ty { pub fn to_str(ty: Builtin) -> &'static str { match ty { - $($name => unsafe { core::str::from_utf8_unchecked(__lc_names::$name) },)* + $($name => __lc_names::$name,)* v => unreachable!("invalid type: {}", v), } } @@ -551,6 +571,7 @@ mod ty { } } + #[cfg_attr(not(feature = "opts"), expect(dead_code))] pub fn bin_ret(ty: Id, op: TokenKind) -> Id { use TokenKind as T; match op { @@ -1141,6 +1162,7 @@ impl Types { } } + #[cfg_attr(not(feature = "opts"), expect(dead_code))] fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option { let name = self.names.project(name)?; self.struct_fields(s).iter().position(|f| f.name == name) @@ -1188,8 +1210,8 @@ impl OffsetIter { } } +#[cfg(any(feature = "opts", feature = "std"))] type HashMap = hashbrown::HashMap; -type _HashSet = hashbrown::HashSet; type FnvBuildHasher = core::hash::BuildHasherDefault; struct FnvHasher(u64); @@ -1334,10 +1356,10 @@ fn test_parse_files(ident: &'static str, input: &'static str) -> Vec { path: &'b str, loader: Loader<'b>, lexer: Lexer<'a>, - arena: &'b Arena<'a>, + arena: &'a Arena, + ctx: &'b mut ParserCtx, token: Token, - symbols: &'b mut Symbols, - stack: &'b mut StackAlloc, ns_bound: usize, trailing_sep: bool, packed: bool, - idents: Vec, - captured: Vec, } impl<'a, 'b> Parser<'a, 'b> { - pub fn new( - arena: &'b Arena<'a>, - symbols: &'b mut Symbols, - stack: &'b mut StackAlloc, + pub fn parse( + ctx: &'b mut ParserCtx, + input: &'a str, + path: &'b str, loader: Loader<'b>, - ) -> Self { - let mut lexer = Lexer::new(""); + arena: &'a Arena, + ) -> &'a [Expr<'a>] { + let mut lexer = Lexer::new(input); Self { loader, token: lexer.next(), lexer, - path: "", + path, + ctx, arena, - symbols, - stack, ns_bound: 0, trailing_sep: false, packed: false, - idents: Vec::new(), - captured: Vec::new(), } + .file() } - pub fn file(&mut self, input: &'a str, path: &'b str) -> &'a [Expr<'a>] { - self.path = path; - self.lexer = Lexer::new(input); - self.token = self.lexer.next(); - + fn file(&mut self) -> &'a [Expr<'a>] { let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true)); self.pop_scope(0); - let mut errors = String::new(); - for id in self.idents.drain(..) { - report_to( - self.lexer.source(), - self.path, - ident::pos(id.ident), - format_args!("undeclared identifier: {}", self.lexer.slice(ident::range(id.ident))), - &mut errors, - ); - } - if !errors.is_empty() { + if !self.ctx.idents.is_empty() { // TODO: we need error recovery - log::error!("{errors}"); + log::error!("{}", { + let mut errors = String::new(); + for id in self.ctx.idents.drain(..) { + report_to( + self.lexer.source(), + self.path, + ident::pos(id.ident), + format_args!( + "undeclared identifier: {}", + self.lexer.slice(ident::range(id.ident)) + ), + &mut errors, + ); + } + errors + }); unreachable!(); } @@ -153,36 +150,20 @@ impl<'a, 'b> Parser<'a, 'b> { break; } - let checkpoint = self.token.start; let op = self.next().kind; if op == TokenKind::Decl { self.declare_rec(&fold, top_level); } - let op_ass = op.ass_op().map(|op| { - // this abomination reparses the left side, so that the desubaring adheres to the - // parser invariants. - let source = self.lexer.slice(0..checkpoint as usize); - let prev_lexer = - core::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos())); - let prev_token = core::mem::replace(&mut self.token, self.lexer.next()); - let clone = self.expr(); - self.lexer = prev_lexer; - self.token = prev_token; - - (op, clone) - }); - let right = self.unit_expr(); let right = self.bin_expr(right, prec, false); let right = self.arena.alloc(right); let left = self.arena.alloc(fold); - if let Some((op, clone)) = op_ass { + if let Some(op) = op.ass_op() { self.flag_idents(*left, idfl::MUTABLE); - - let right = Expr::BinOp { left: self.arena.alloc(clone), op, right }; + let right = Expr::BinOp { left: self.arena.alloc(fold), op, right }; fold = Expr::BinOp { left, op: TokenKind::Assign, right: self.arena.alloc(right) }; } else { fold = Expr::BinOp { left, right, op }; @@ -220,15 +201,15 @@ impl<'a, 'b> Parser<'a, 'b> { ); } - let index = self.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up"); - if core::mem::replace(&mut self.idents[index].declared, true) { + let index = self.ctx.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up"); + if core::mem::replace(&mut self.ctx.idents[index].declared, true) { self.report( pos, format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))), ) } - self.idents[index].ordered = ordered; + self.ctx.idents[index].ordered = ordered; } fn resolve_ident(&mut self, token: Token) -> (Ident, bool) { @@ -240,6 +221,7 @@ impl<'a, 'b> Parser<'a, 'b> { } let (i, id, bl) = match self + .ctx .idents .iter_mut() .enumerate() @@ -248,20 +230,20 @@ impl<'a, 'b> Parser<'a, 'b> { Some((i, elem)) => (i, elem, false), None => { let id = ident::new(token.start, name.len() as _); - self.idents.push(ScopeIdent { + self.ctx.idents.push(ScopeIdent { ident: id, declared: false, ordered: false, flags: 0, }); - (self.idents.len() - 1, self.idents.last_mut().unwrap(), true) + (self.ctx.idents.len() - 1, self.ctx.idents.last_mut().unwrap(), true) } }; id.flags |= idfl::COMPTIME * is_ct as u32; if id.declared && id.ordered && self.ns_bound > i { id.flags |= idfl::COMPTIME; - self.captured.push(id.ident); + self.ctx.captured.push(id.ident); } (id.ident, bl) @@ -273,21 +255,22 @@ impl<'a, 'b> Parser<'a, 'b> { fn unit_expr(&mut self) -> Expr<'a> { use {Expr as E, TokenKind as T}; - let frame = self.idents.len(); + let frame = self.ctx.idents.len(); let token @ Token { start: pos, .. } = self.next(); let prev_boundary = self.ns_bound; - let prev_captured = self.captured.len(); + let prev_captured = self.ctx.captured.len(); let mut expr = match token.kind { T::Ct => E::Ct { pos, value: self.ptr_expr() }, T::Directive if self.lexer.slice(token.range()) == "use" => { self.expect_advance(TokenKind::LParen); let str = self.expect_advance(TokenKind::DQuote); self.expect_advance(TokenKind::RParen); - let path = self.lexer.slice(str.range()).trim_matches('"'); + let path = self.lexer.slice(str.range()); + let path = &path[1..path.len() - 1]; E::Mod { pos, - path: self.arena.alloc_str(path), + path, id: match (self.loader)(path, self.path) { Ok(id) => id, Err(e) => { @@ -323,7 +306,7 @@ impl<'a, 'b> Parser<'a, 'b> { T::Struct => E::Struct { packed: core::mem::take(&mut self.packed), fields: { - self.ns_bound = self.idents.len(); + self.ns_bound = self.ctx.idents.len(); self.expect_advance(T::LBrace); self.collect_list(T::Comma, T::RBrace, |s| { let tok = s.token; @@ -342,15 +325,23 @@ impl<'a, 'b> Parser<'a, 'b> { }, captured: { self.ns_bound = prev_boundary; - self.captured[prev_captured..].sort_unstable(); - let preserved = self.captured[prev_captured..].partition_dedup().0.len(); - self.captured.truncate(prev_captured + preserved); - self.arena.alloc_slice(&self.captured[prev_captured..]) + let mut captured = &mut self.ctx.captured[prev_captured..]; + while let Some(it) = captured.take_first_mut() { + for ot in &mut *captured { + if it > ot { + core::mem::swap(it, ot); + } + } + } + debug_assert!(captured.is_sorted()); + let preserved = self.ctx.captured[prev_captured..].partition_dedup().0.len(); + self.ctx.captured.truncate(prev_captured + preserved); + self.arena.alloc_slice(&self.ctx.captured[prev_captured..]) }, pos: { if self.ns_bound == 0 { // we might save some memory - self.captured.clear(); + self.ctx.captured.clear(); } pos }, @@ -427,9 +418,9 @@ impl<'a, 'b> Parser<'a, 'b> { T::Number => { let slice = self.lexer.slice(token.range()); let (slice, radix) = match &slice.get(0..2) { - Some("0x") => (slice.trim_start_matches("0x"), Radix::Hex), - Some("0b") => (slice.trim_start_matches("0b"), Radix::Binary), - Some("0o") => (slice.trim_start_matches("0o"), Radix::Octal), + Some("0x") => (&slice[2..], Radix::Hex), + Some("0b") => (&slice[2..], Radix::Binary), + Some("0o") => (&slice[2..], Radix::Octal), _ => (slice, Radix::Decimal), }; E::Number { @@ -447,7 +438,7 @@ impl<'a, 'b> Parser<'a, 'b> { expr } T::Comment => Expr::Comment { pos, literal: self.tok_str(token) }, - tok => self.report(token.start, format_args!("unexpected token: {tok:?}")), + tok => self.report(token.start, format_args!("unexpected token: {tok}")), }; loop { @@ -528,24 +519,25 @@ impl<'a, 'b> Parser<'a, 'b> { } else { self.report( self.token.start, - format_args!("expected identifier, found {:?}", self.token.kind), + format_args!("expected identifier, found {}", self.token.kind), ) } } fn pop_scope(&mut self, frame: usize) { let mut undeclared_count = frame; - for i in frame..self.idents.len() { - if !&self.idents[i].declared { - self.idents.swap(i, undeclared_count); + for i in frame..self.ctx.idents.len() { + if !&self.ctx.idents[i].declared { + self.ctx.idents.swap(i, undeclared_count); undeclared_count += 1; } } - self.idents + self.ctx + .idents .drain(undeclared_count..) .map(|ident| Symbol { name: ident.ident, flags: ident.flags }) - .collect_into(self.symbols); + .collect_into(&mut self.ctx.symbols); } fn ptr_unit_expr(&mut self) -> &'a Expr<'a> { @@ -558,13 +550,13 @@ impl<'a, 'b> Parser<'a, 'b> { end: TokenKind, mut f: impl FnMut(&mut Self) -> T, ) -> &'a [T] { - let mut view = self.stack.view(); + let mut view = self.ctx.stack.view(); while !self.advance_if(end) { let val = f(self); self.trailing_sep = self.advance_if(delim); - unsafe { self.stack.push(&mut view, val) }; + unsafe { self.ctx.stack.push(&mut view, val) }; } - self.arena.alloc_slice(unsafe { self.stack.finalize(view) }) + self.arena.alloc_slice(unsafe { self.ctx.stack.finalize(view) }) } fn advance_if(&mut self, kind: TokenKind) -> bool { @@ -580,7 +572,7 @@ impl<'a, 'b> Parser<'a, 'b> { if self.token.kind != kind { self.report( self.token.start, - format_args!("expected {:?}, found {:?}", kind, self.token.kind), + format_args!("expected {}, found {}", kind, self.token.kind), ); } self.next() @@ -588,15 +580,17 @@ impl<'a, 'b> Parser<'a, 'b> { #[track_caller] fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! { - let mut str = String::new(); - report_to(self.lexer.source(), self.path, pos, msg, &mut str); - log::error!("{str}"); + log::error!("{}", { + let mut str = String::new(); + report_to(self.lexer.source(), self.path, pos, msg, &mut str); + str + }); unreachable!(); } fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) { match e { - Expr::Ident { id, .. } => find_ident(&mut self.idents, id).flags |= flags, + Expr::Ident { id, .. } => find_ident(&mut self.ctx.idents, id).flags |= flags, Expr::Field { target, .. } => self.flag_idents(*target, flags), _ => {} } @@ -634,7 +628,7 @@ macro_rules! generate_expr { $($field:ident: $ty:ty,)* }, )*}) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] + $(#[$meta])* $vis enum $name<$lt> {$( $(#[$field_meta])* $variant { @@ -649,17 +643,6 @@ macro_rules! generate_expr { $(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)* } } - - pub fn used_bytes(&self) -> usize { - match self {$( - Self::$variant { $($field,)* } => { - #[allow(clippy::size_of_ref)] - let fields = [$(($field as *const _ as usize - self as *const _ as usize, core::mem::size_of_val($field)),)*]; - let (last, size) = fields.iter().copied().max().unwrap(); - last + size - }, - )*} - } } }; @@ -806,6 +789,7 @@ generate_expr! { /// `Expr '.' Ident` Field { target: &'a Self, + // we put it second place because its the pos of '.' pos: Pos, name: &'a str, }, @@ -820,7 +804,7 @@ generate_expr! { }, /// `'@' Ident List('(', ',', ')', Expr)` Directive { - pos: u32, + pos: Pos, name: &'a str, args: &'a [Self], }, @@ -959,6 +943,14 @@ impl core::fmt::Display for Display<'_> { } } +#[derive(Default)] +pub struct ParserCtx { + symbols: Symbols, + stack: StackAlloc, + idents: Vec, + captured: Vec, +} + #[repr(C)] pub struct AstInner { ref_count: AtomicUsize, @@ -978,21 +970,18 @@ impl AstInner<[Symbol]> { .0 } - fn new(file: Box, path: &str, stack: &mut StackAlloc, loader: Loader) -> NonNull { + fn new(file: Box, path: &str, ctx: &mut ParserCtx, loader: Loader) -> NonNull { let arena = Arena::default(); - let mut syms = Vec::new(); - let mut parser = Parser::new(&arena, &mut syms, stack, loader); let exprs = - parser.file(unsafe { &*(&*file as *const _) }, path) as *const [Expr<'static>]; - drop(parser); + unsafe { core::mem::transmute(Parser::parse(ctx, &file, path, loader, &arena)) }; - syms.sort_unstable_by_key(|s| s.name); + ctx.symbols.sort_unstable_by_key(|s| s.name); - let layout = Self::layout(syms.len()); + let layout = Self::layout(ctx.symbols.len()); unsafe { let ptr = alloc::alloc::alloc(layout); - let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); + let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, ctx.symbols.len()); core::ptr::write(inner as *mut AstInner<()>, AstInner { ref_count: AtomicUsize::new(1), @@ -1004,7 +993,7 @@ impl AstInner<[Symbol]> { }); core::ptr::addr_of_mut!((*inner).symbols) .as_mut_ptr() - .copy_from_nonoverlapping(syms.as_ptr(), syms.len()); + .copy_from_nonoverlapping(ctx.symbols.as_ptr(), ctx.symbols.len()); NonNull::new_unchecked(inner) } @@ -1041,8 +1030,8 @@ pub fn report_to( pub struct Ast(NonNull>); impl Ast { - pub fn new(path: &str, content: String, stack: &mut StackAlloc, loader: Loader) -> Self { - Self(AstInner::new(content.into(), path, stack, loader)) + pub fn new(path: &str, content: String, ctx: &mut ParserCtx, loader: Loader) -> Self { + Self(AstInner::new(content.into(), path, ctx, loader)) } pub fn exprs(&self) -> &[Expr] { @@ -1067,7 +1056,7 @@ impl Ast { impl Default for Ast { fn default() -> Self { - Self(AstInner::new("".into(), "", &mut StackAlloc::default(), &no_loader)) + Self(AstInner::new("".into(), "", &mut ParserCtx::default(), &no_loader)) } } @@ -1132,13 +1121,13 @@ impl Deref for Ast { } } -pub struct StackAllocView { +struct StackAllocView { prev: usize, base: usize, _ph: PhantomData, } -pub struct StackAlloc { +struct StackAlloc { data: *mut u8, len: usize, cap: usize, @@ -1203,29 +1192,22 @@ impl Drop for StackAlloc { } #[derive(Default)] -pub struct Arena<'a> { +pub struct Arena { chunk: UnsafeCell, - ph: core::marker::PhantomData<&'a ()>, } -impl<'a> Arena<'a> { - pub fn alloc_str(&self, token: &str) -> &'a str { - let ptr = self.alloc_slice(token.as_bytes()); - unsafe { core::str::from_utf8_unchecked(ptr) } - } - - pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> { - let align = core::mem::align_of::>(); - let size = expr.used_bytes(); - let layout = unsafe { core::alloc::Layout::from_size_align_unchecked(size, align) }; +impl Arena { + pub fn alloc<'a>(&'a self, expr: Expr<'a>) -> &'a Expr<'a> { + let layout = core::alloc::Layout::new::>(); let ptr = self.alloc_low(layout); unsafe { - ptr.cast::().copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8) + ptr.cast::() + .copy_from_nonoverlapping(NonNull::from(&expr).cast(), layout.size() / 8) }; unsafe { ptr.cast::>().as_ref() } } - pub fn alloc_slice(&self, slice: &[T]) -> &'a [T] { + pub fn alloc_slice<'a, T: Copy>(&'a self, slice: &[T]) -> &'a [T] { if slice.is_empty() || core::mem::size_of::() == 0 { return &mut []; } @@ -1266,7 +1248,7 @@ impl Default for ArenaChunk { } impl ArenaChunk { - const ALIGN: usize = core::mem::align_of::(); + const ALIGN: usize = 16; const CHUNK_SIZE: usize = 1 << 16; const LAYOUT: core::alloc::Layout = unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; diff --git a/hblang/src/son.rs b/hblang/src/son.rs index 7d2ee50..96cab58 100644 --- a/hblang/src/son.rs +++ b/hblang/src/son.rs @@ -9,7 +9,7 @@ use { idfl::{self}, Expr, ExprRef, FileId, Pos, }, - task, + reg, task, ty::{self}, vc::{BitSet, Vc}, Func, HashMap, Offset, OffsetIter, Reloc, Sig, Size, SymKey, TypedReloc, Types, @@ -34,15 +34,6 @@ const MEM: Nid = 3; type Nid = u16; -pub mod reg { - pub const STACK_PTR: Reg = 254; - pub const ZERO: Reg = 0; - pub const RET: Reg = 1; - pub const RET_ADDR: Reg = 31; - - pub type Reg = u8; -} - type Lookup = crate::ctx_map::CtxMap; impl crate::ctx_map::CtxEntry for Nid {