This commit is contained in:
Jakub Doka 2024-10-10 08:35:17 +02:00
parent e200c2fc98
commit 54a7f85978
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
14 changed files with 519 additions and 321 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@
/hbbytecode/src/instrs.rs /hbbytecode/src/instrs.rs
/.rgignore /.rgignore
rustc-ice-* rustc-ice-*
db.sqlite

View file

@ -1,6 +1,15 @@
[workspace] [workspace]
resolver = "2" resolver = "2"
members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit", "depell"] members = [
"hbbytecode",
"hbvm",
"hbxrt",
"xtask",
"hblang",
"hbjit",
"depell",
"depell/wasm-hbfmt"
]
[profile.release] [profile.release]
lto = true lto = true

View file

@ -1,16 +1,18 @@
* { * {
font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif; font-family: var(--font);
} }
body { body {
--primary: white; --primary: white;
--secondary: #EFEFEF; --secondary: #EFEFEF;
--timestamp: #777777;
--error: #ff3333; --error: #ff3333;
--placeholder: #333333; --placeholder: #333333;
} }
body { body {
--small-gap: 5px; --small-gap: 5px;
--font: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
--monospace: 'Courier New', Courier, monospace; --monospace: 'Courier New', Courier, monospace;
nav { nav {
@ -30,6 +32,15 @@ body {
} }
div.preview { div.preview {
div.info {
display: flex;
gap: var(--small-gap);
span[apply=timestamp] {
color: var(--timestamp);
}
}
div.stats { div.stats {
display: flex; display: flex;
gap: var(--small-gap); gap: var(--small-gap);
@ -62,6 +73,7 @@ textarea {
margin: var(--small-gap) 0px; margin: var(--small-gap) 0px;
font-family: var(--monospace); font-family: var(--monospace);
resize: none; resize: none;
tab-size: 4;
} }
input { input {

View file

@ -1,18 +1,72 @@
//// @ts-check /// @ts-check
if (window.location.hostname === 'localhost') { /** @return {never} */
let id; setInterval(async () => { function never() { throw new Error() }
let new_id = await fetch('/hot-reload').then(reps => reps.text());
id ??= new_id; /**@type{WebAssembly.Instance}*/ let instance;
if (id !== new_id) window.location.reload(); /**@type{Promise<WebAssembly.WebAssemblyInstantiatedSource>}*/ let instaceFuture;
}, 300); /** @param {string} code @param {"fmt" | "minify"} action
* @returns {Promise<string | undefined> | string | undefined} */
function modifyCode(code, action) {
if (!instance) {
instaceFuture ??= WebAssembly.instantiateStreaming(fetch("/hbfmt.wasm"), {});
return (async () => {
instance = (await instaceFuture).instance;
return modifyCodeSync(instance, code, action);
})();
} else {
return modifyCodeSync(instance, code, action);
}
} }
document.body.addEventListener('htmx:afterSwap', (ev) => { /** @param {WebAssembly.Instance} instance @param {string} code @param {"fmt" | "minify"} action @returns {string | undefined} */
wireUp(ev.target); function modifyCodeSync(instance, code, action) {
}); let {
INPUT, INPUT_LEN,
OUTPUT, OUTPUT_LEN,
PANIC_MESSAGE, PANIC_MESSAGE_LEN,
memory, fmt, minify
} = instance.exports;
wireUp(document.body); if (!(true
&& INPUT instanceof WebAssembly.Global
&& INPUT_LEN instanceof WebAssembly.Global
&& OUTPUT instanceof WebAssembly.Global
&& OUTPUT_LEN instanceof WebAssembly.Global
&& memory instanceof WebAssembly.Memory
&& typeof fmt === "function"
&& typeof minify === "function"
)) never();
if (action !== "fmt") {
INPUT = OUTPUT;
INPUT_LEN = OUTPUT_LEN;
}
let dw = new DataView(memory.buffer);
dw.setUint32(INPUT_LEN.value, code.length, true);
new Uint8Array(memory.buffer, INPUT.value)
.set(new TextEncoder().encode(code));
try {
if (action === "fmt") fmt(); else minify();
let result = new TextDecoder()
.decode(new Uint8Array(memory.buffer, OUTPUT.value,
dw.getUint32(OUTPUT_LEN.value, true)));
return result;
} catch (e) {
if (PANIC_MESSAGE instanceof WebAssembly.Global
&& PANIC_MESSAGE_LEN instanceof WebAssembly.Global) {
let message = new TextDecoder()
.decode(new Uint8Array(memory.buffer, PANIC_MESSAGE.value,
dw.getUint32(PANIC_MESSAGE_LEN.value, true)));
console.error(message, e);
} else {
console.error(e);
}
return undefined;
}
}
/** @param {HTMLElement} target */ /** @param {HTMLElement} target */
function wireUp(target) { function wireUp(target) {
@ -21,24 +75,31 @@ function wireUp(target) {
bindTextareaAutoResize(target); bindTextareaAutoResize(target);
} }
/** @param {string} content @return {string} */ /** @type {{ [key: string]: (content: string) => Promise<string> | string }} */
function fmtTimestamp(content) { const applyFns = {
new Date(parseInt(content) * 1000).toLocaleString() timestamp: (content) => new Date(parseInt(content) * 1000).toLocaleString(),
} fmt: (content) => {
let res = modifyCode(content, "fmt");
return res instanceof Promise ? res.then(c => c ?? content) : res ?? content;
},
};
/** @param {HTMLElement} target */ /** @param {HTMLElement} target */
function execApply(target) { function execApply(target) {
/**@type {HTMLElement}*/ let elem; for (const elem of target.querySelectorAll('[apply]')) {
for (elem of target.querySelectorAll('[apply]')) { if (!(elem instanceof HTMLElement)) continue;
const funcname = elem.getAttribute('apply'); const funcname = elem.getAttribute('apply') ?? never();
elem.textContent = window[funcname](elem.textContent); let res = applyFns[funcname](elem.textContent ?? "");
if (res instanceof Promise) res.then(c => elem.textContent = c);
else elem.textContent = res;
} }
} }
/** @param {HTMLElement} target */ /** @param {HTMLElement} target */
function bindTextareaAutoResize(target) { function bindTextareaAutoResize(target) {
/**@type {HTMLTextAreaElement}*/ let textarea; for (const textarea of target.querySelectorAll("textarea")) {
for (textarea of target.querySelectorAll("textarea")) { if (!(textarea instanceof HTMLTextAreaElement)) never();
textarea.style.height = textarea.scrollHeight + "px"; textarea.style.height = textarea.scrollHeight + "px";
textarea.style.overflowY = "hidden"; textarea.style.overflowY = "hidden";
textarea.addEventListener("input", function() { textarea.addEventListener("input", function() {
@ -84,12 +145,37 @@ function cacheInputs(target) {
continue; continue;
} }
/**@type {HTMLInputElement}*/ let input; for (const input of form.elements) {
for (input of form.elements) { if (input instanceof HTMLInputElement || input instanceof HTMLTextAreaElement) {
if ('password submit button'.includes(input.type)) continue; if ('password submit button'.includes(input.type)) continue;
const key = path + input.name; const key = path + input.name;
input.value = localStorage.getItem(key) ?? ''; input.value = localStorage.getItem(key) ?? '';
input.addEventListener("input", (ev) => localStorage.setItem(key, ev.target.value)); input.addEventListener("input", () => localStorage.setItem(key, input.value));
} else {
console.warn("unhandled form element: ", input);
} }
} }
} }
}
if (window.location.hostname === 'localhost') {
let id; setInterval(async () => {
let new_id = await fetch('/hot-reload').then(reps => reps.text());
id ??= new_id;
if (id !== new_id) window.location.reload();
}, 300);
(async function testCodeChange() {
const code = "main:=fn():void{return}";
const fmtd = await modifyCode(code, "fmt") ?? never();
const prev = await modifyCode(fmtd, "minify") ?? never();
if (code != prev) console.error(code, prev);
})()
}
document.body.addEventListener('htmx:afterSwap', (ev) => {
if (!(ev.target instanceof HTMLElement)) never();
wireUp(ev.target);
});
wireUp(document.body);

View file

@ -28,6 +28,18 @@ async fn amain() {
let router = axum::Router::new() let router = axum::Router::new()
.route("/", get(Index::page)) .route("/", get(Index::page))
.route(
"/hbfmt.wasm",
get(|| async move {
axum::http::Response::builder()
.header("content-type", "application/wasm")
.body(axum::body::Body::from(
include_bytes!("../../target/wasm32-unknown-unknown/small/wasm_hbfmt.wasm")
.to_vec(),
))
.unwrap()
}),
)
.route("/index-view", get(Index::get)) .route("/index-view", get(Index::get))
.route("/feed", get(Index::page)) .route("/feed", get(Index::page))
.route("/profile", get(Profile::page)) .route("/profile", get(Profile::page))
@ -117,7 +129,7 @@ impl Page for Post {
<input name="author" type="text" value={session.name} hidden> <input name="author" type="text" value={session.name} hidden>
<input name="name" type="text" placeholder="name" value=name <input name="name" type="text" placeholder="name" value=name
required maxlength=MAX_POSTNAME_LENGTH> required maxlength=MAX_POSTNAME_LENGTH>
<textarea name="code" placeholder="code" required form="postForm">code</textarea> <textarea name="code" placeholder="code" rows=1 required>code</textarea>
<input type="submit" value="submit"> <input type="submit" value="submit">
<input type="button" "hx-get"="/post-preview" "hx-swap"="outherHTML" <input type="button" "hx-get"="/post-preview" "hx-swap"="outherHTML"
"hx-target"="postForm" value="preview"> "hx-target"="postForm" value="preview">
@ -158,18 +170,19 @@ impl fmt::Display for Post {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self { author, name, timestamp, imports, runs, dependencies, code, .. } = self; let Self { author, name, timestamp, imports, runs, dependencies, code, .. } = self;
write_html! { f <div class="preview"> write_html! { f <div class="preview">
<div> <div class="info">
<span>author</span> <span>author "/" name</span>
"/" <span apply="timestamp">timestamp</span>
<span>name</span>
<span apply="fmtTimestamp">timestamp</span>
</div> </div>
<div class="stats"> <div class="stats">
"imps: "<span>imports</span> for (name, count) in "inps runs deps".split(' ')
"runs: "<span>runs</span> .zip([imports, runs, dependencies])
"deps: "<span>dependencies</span> .filter(|(_, &c)| c != 0)
{
name ": "<span>count</span>
}
</div> </div>
<pre>code</pre> <pre apply="fmt">code</pre>
if *timestamp == 0 { if *timestamp == 0 {
<button "hx-get"="/post" "hx-swap"="outherHTML" <button "hx-get"="/post" "hx-swap"="outherHTML"
"hx-target"="[preview]">"edit"</button> "hx-target"="[preview]">"edit"</button>
@ -380,11 +393,11 @@ struct Session {
impl<S> axum::extract::FromRequestParts<S> for Session { impl<S> axum::extract::FromRequestParts<S> for Session {
/// If the extractor fails it'll use this "rejection" type. A rejection is /// If the extractor fails it'll use this "rejection" type. A rejection is
/// a kind of error that can be converted into a response. /// a kind of error that can be converted into a response.
type Rejection = axum::response::Redirect; type Rejection = Redirect;
/// Perform the extraction. /// Perform the extraction.
async fn from_request_parts(parts: &mut Parts, _: &S) -> Result<Self, Self::Rejection> { async fn from_request_parts(parts: &mut Parts, _: &S) -> Result<Self, Self::Rejection> {
let err = || axum::response::Redirect::permanent("/login"); let err = redirect("/login");
let value = parts let value = parts
.headers .headers
@ -392,20 +405,20 @@ impl<S> axum::extract::FromRequestParts<S> for Session {
.into_iter() .into_iter()
.find_map(|c| c.to_str().ok()?.trim().strip_prefix("id=")) .find_map(|c| c.to_str().ok()?.trim().strip_prefix("id="))
.map(|c| c.split_once(';').unwrap_or((c, "")).0) .map(|c| c.split_once(';').unwrap_or((c, "")).0)
.ok_or(err())?; .ok_or(err)?;
let mut id = [0u8; 32]; let mut id = [0u8; 32];
parse_hex(value, &mut id).ok_or(err())?; parse_hex(value, &mut id).ok_or(err)?;
let (name, expiration) = db::with(|db| { let (name, expiration) = db::with(|db| {
db.get_session db.get_session
.query_row((dbg!(id),), |r| Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))) .query_row((dbg!(id),), |r| Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?)))
.inspect_err(|e| log::error!("{e}")) .inspect_err(|e| log::error!("{e}"))
.map_err(|_| err()) .map_err(|_| err)
})?; })?;
if expiration < now() { if expiration < now() {
log::error!("expired"); log::error!("expired");
return Err(err()); return Err(err);
} }
Ok(Self { name, id }) Ok(Self { name, id })

View file

@ -1,31 +1,51 @@
#![no_std] #![no_std]
#![feature(slice_take)] #![feature(slice_take)]
#![feature(str_from_raw_parts)] #![feature(str_from_raw_parts)]
#![feature(alloc_error_handler)]
use hblang::parser::ParserCtx; use {
core::{
#[cfg(target_arch = "wasm32")]
#[panic_handler]
fn handle_panic(_: &core::panic::PanicInfo) -> ! {
core::arch::wasm32::unreachable()
}
use core::{
alloc::{GlobalAlloc, Layout}, alloc::{GlobalAlloc, Layout},
cell::UnsafeCell, cell::UnsafeCell,
},
hblang::parser::ParserCtx,
}; };
const ARENA_SIZE: usize = 128 * 1024; const ARENA_SIZE: usize = 128 * 1024;
const MAX_OUTPUT_SIZE: usize = 1024 * 10;
const MAX_INPUT_SIZE: usize = 1024 * 4;
#[cfg(target_arch = "wasm32")]
#[panic_handler]
pub fn handle_panic(_info: &core::panic::PanicInfo) -> ! {
//unsafe {
// use core::fmt::Write;
// let mut f = Write(&mut PANIC_MESSAGE[..]);
// _ = writeln!(f, "{}", info);
// PANIC_MESSAGE_LEN = 1024 - f.0.len();
//}
core::arch::wasm32::unreachable();
}
#[global_allocator]
static ALLOCATOR: ArenaAllocator = ArenaAllocator::new();
#[cfg(target_arch = "wasm32")]
#[alloc_error_handler]
fn alloc_error(_: core::alloc::Layout) -> ! {
core::arch::wasm32::unreachable()
}
#[repr(C, align(32))] #[repr(C, align(32))]
struct SimpleAllocator { struct ArenaAllocator {
arena: UnsafeCell<[u8; ARENA_SIZE]>, arena: UnsafeCell<[u8; ARENA_SIZE]>,
head: UnsafeCell<*mut u8>, head: UnsafeCell<*mut u8>,
} }
impl SimpleAllocator { impl ArenaAllocator {
const fn new() -> Self { const fn new() -> Self {
SimpleAllocator { ArenaAllocator {
arena: UnsafeCell::new([0; ARENA_SIZE]), arena: UnsafeCell::new([0; ARENA_SIZE]),
head: UnsafeCell::new(core::ptr::null_mut()), head: UnsafeCell::new(core::ptr::null_mut()),
} }
@ -36,9 +56,9 @@ impl SimpleAllocator {
} }
} }
unsafe impl Sync for SimpleAllocator {} unsafe impl Sync for ArenaAllocator {}
unsafe impl GlobalAlloc for SimpleAllocator { unsafe impl GlobalAlloc for ArenaAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 { unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let size = layout.size(); let size = layout.size();
let align = layout.align(); let align = layout.align();
@ -46,7 +66,7 @@ unsafe impl GlobalAlloc for SimpleAllocator {
let until = self.arena.get() as *mut u8; let until = self.arena.get() as *mut u8;
let new_head = (*self.head.get()).sub(size); let new_head = (*self.head.get()).sub(size);
let aligned_head = (new_head as usize & (1 << (align - 1))) as *mut u8; let aligned_head = (new_head as usize & !(1 << (align - 1))) as *mut u8;
if until > aligned_head { if until > aligned_head {
return core::ptr::null_mut(); return core::ptr::null_mut();
@ -61,27 +81,6 @@ unsafe impl GlobalAlloc for SimpleAllocator {
} }
} }
#[cfg_attr(target_arch = "wasm32", global_allocator)]
static ALLOCATOR: SimpleAllocator = SimpleAllocator::new();
const MAX_OUTPUT_SIZE: usize = 1024 * 10;
#[no_mangle]
static mut OUTPUT: [u8; MAX_OUTPUT_SIZE] = unsafe { core::mem::zeroed() };
#[no_mangle]
static mut OUTPUT_LEN: usize = 0;
#[no_mangle]
unsafe extern "C" fn fmt(code: *const u8, len: usize) {
ALLOCATOR.reset();
let code = core::str::from_raw_parts(code, len);
let arena = hblang::parser::Arena::default();
let mut ctx = ParserCtx::default();
let exprs = hblang::parser::Parser::parse(&mut ctx, code, "source.hb", &|_, _| Ok(0), &arena);
struct Write<'a>(&'a mut [u8]); struct Write<'a>(&'a mut [u8]);
impl core::fmt::Write for Write<'_> { impl core::fmt::Write for Write<'_> {
@ -95,7 +94,40 @@ unsafe extern "C" fn fmt(code: *const u8, len: usize) {
} }
} }
let mut f = Write(unsafe { &mut OUTPUT[..] }); //#[no_mangle]
//static mut PANIC_MESSAGE: [u8; 1024] = unsafe { core::mem::zeroed() };
//#[no_mangle]
//static mut PANIC_MESSAGE_LEN: usize = 0;
#[no_mangle]
static mut OUTPUT: [u8; MAX_OUTPUT_SIZE] = unsafe { core::mem::zeroed() };
#[no_mangle]
static mut OUTPUT_LEN: usize = 0;
#[no_mangle]
static MAX_INPUT: usize = MAX_INPUT_SIZE;
#[no_mangle]
static mut INPUT: [u8; MAX_INPUT_SIZE] = unsafe { core::mem::zeroed() };
#[no_mangle]
static mut INPUT_LEN: usize = 0;
#[no_mangle]
unsafe extern "C" fn fmt() {
ALLOCATOR.reset();
let code = core::str::from_raw_parts(core::ptr::addr_of!(INPUT).cast(), INPUT_LEN);
let arena = hblang::parser::Arena::default();
let mut ctx = ParserCtx::default();
let exprs = hblang::parser::Parser::parse(&mut ctx, code, "source.hb", &|_, _| Ok(0), &arena);
let mut f = Write(&mut OUTPUT[..]);
hblang::fmt::fmt_file(exprs, code, &mut f).unwrap(); hblang::fmt::fmt_file(exprs, code, &mut f).unwrap();
unsafe { OUTPUT_LEN = MAX_OUTPUT_SIZE - f.0.len() }; OUTPUT_LEN = MAX_OUTPUT_SIZE - f.0.len();
}
#[no_mangle]
unsafe extern "C" fn minify() {
let code = core::str::from_raw_parts_mut(core::ptr::addr_of_mut!(OUTPUT).cast(), OUTPUT_LEN);
OUTPUT_LEN = hblang::fmt::minify(code);
} }

View file

@ -12,8 +12,13 @@ hashbrown = { version = "0.15.0", default-features = false, features = ["raw-ent
hbbytecode = { version = "0.1.0", path = "../hbbytecode" } hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
hbvm = { path = "../hbvm", features = ["nightly"] } hbvm = { path = "../hbvm", features = ["nightly"] }
log = { version = "0.4.22", features = ["release_max_level_error"] } log = { version = "0.4.22", features = ["release_max_level_error"] }
regalloc2 = { git = "https://github.com/jakubDoka/regalloc2", branch = "reuse-allocations", features = [] }
[dependencies.regalloc2]
git = "https://github.com/jakubDoka/regalloc2"
branch = "reuse-allocations"
optional = true
[features] [features]
default = ["std"] default = ["std", "opts"]
std = [] std = []
opts = ["regalloc2"]

View file

@ -839,8 +839,8 @@ impl Codegen {
let index_val = self.expr(index)?; let index_val = self.expr(index)?;
_ = self.assert_ty(index.pos(), index_val.ty, ty::Id::INT, "subsctipt"); _ = self.assert_ty(index.pos(), index_val.ty, ty::Id::INT, "subsctipt");
if let ty::Kind::Ptr(ty) = base_val.ty.expand() { if let Some(ty) = self.tys.base_of(base_val.ty) {
base_val.ty = self.tys.ins.ptrs[ty as usize].base; base_val.ty = ty;
base_val.loc = base_val.loc.into_derefed(); base_val.loc = base_val.loc.into_derefed();
} }
@ -1070,7 +1070,7 @@ impl Codegen {
Some(Value { ty, loc }) Some(Value { ty, loc })
} }
E::String { pos, mut literal } => { E::String { pos, mut literal } => {
literal = literal.trim_matches('"'); literal = &literal[1..literal.len() - 1];
if !literal.ends_with("\\0") { if !literal.ends_with("\\0") {
self.report(pos, "string literal must end with null byte (for now)"); self.report(pos, "string literal must end with null byte (for now)");
@ -1206,8 +1206,8 @@ impl Codegen {
let checkpoint = self.ci.snap(); let checkpoint = self.ci.snap();
let mut tal = self.expr(target)?; let mut tal = self.expr(target)?;
if let ty::Kind::Ptr(ty) = tal.ty.expand() { if let Some(ty) = self.tys.base_of(tal.ty) {
tal.ty = self.tys.ins.ptrs[ty as usize].base; tal.ty = ty;
tal.loc = tal.loc.into_derefed(); tal.loc = tal.loc.into_derefed();
} }
@ -1306,9 +1306,9 @@ impl Codegen {
} }
E::UnOp { op: T::Mul, val, pos } => { E::UnOp { op: T::Mul, val, pos } => {
let val = self.expr(val)?; let val = self.expr(val)?;
match val.ty.expand() { match self.tys.base_of(val.ty) {
ty::Kind::Ptr(ty) => Some(Value { Some(ty) => Some(Value {
ty: self.tys.ins.ptrs[ty as usize].base, ty,
loc: Loc::reg(self.loc_to_reg(val.loc, self.tys.size_of(val.ty))) loc: Loc::reg(self.loc_to_reg(val.loc, self.tys.size_of(val.ty)))
.into_derefed(), .into_derefed(),
}), }),
@ -1640,10 +1640,9 @@ impl Codegen {
imm = u64::from_ne_bytes(dst); imm = u64::from_ne_bytes(dst);
} }
if matches!(op, T::Add | T::Sub) if matches!(op, T::Add | T::Sub)
&& let ty::Kind::Ptr(ty) = ty::Kind::from_ty(ty) && let Some(ty) = self.tys.base_of(ty)
{ {
let size = self.tys.size_of(self.tys.ins.ptrs[ty as usize].base); imm *= self.tys.size_of(ty) as u64;
imm *= size as u64;
} }
self.ci.emit(oper(dst.get(), lhs.get(), imm)); self.ci.emit(oper(dst.get(), lhs.get(), imm));
@ -1676,9 +1675,8 @@ impl Codegen {
(lhs.get(), right.ty) (lhs.get(), right.ty)
}; };
let ty::Kind::Ptr(ty) = ty.expand() else { unreachable!() }; let ty = self.tys.base_of(ty).unwrap();
let size = self.tys.size_of(ty);
let size = self.tys.size_of(self.tys.ins.ptrs[ty as usize].base);
self.ci.emit(muli64(offset, offset, size as _)); self.ci.emit(muli64(offset, offset, size as _));
} }
} }

View file

@ -3,11 +3,10 @@ use {
lexer::{self, TokenKind}, lexer::{self, TokenKind},
parser::{self, CommentOr, CtorField, Expr, Poser, Radix, StructField}, parser::{self, CommentOr, CtorField, Expr, Poser, Radix, StructField},
}, },
alloc::string::String,
core::fmt, core::fmt,
}; };
pub fn minify(source: &mut str) -> Option<&str> { pub fn minify(source: &mut str) -> usize {
fn needs_space(c: u8) -> bool { fn needs_space(c: u8) -> bool {
matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | 127..) matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | 127..)
} }
@ -15,6 +14,7 @@ pub fn minify(source: &mut str) -> Option<&str> {
let mut writer = source.as_mut_ptr(); let mut writer = source.as_mut_ptr();
let mut reader = &source[..]; let mut reader = &source[..];
let mut prev_needs_whitecpace = false; let mut prev_needs_whitecpace = false;
let mut prev_needs_newline = false;
loop { loop {
let mut token = lexer::Lexer::new(reader).next(); let mut token = lexer::Lexer::new(reader).next();
match token.kind { match token.kind {
@ -23,46 +23,59 @@ pub fn minify(source: &mut str) -> Option<&str> {
_ => {} _ => {}
} }
let mut suffix = 0; let cpy_len = token.range().len();
if token.kind == TokenKind::Comment && reader.as_bytes()[token.end as usize - 1] != b'/' {
token.end = token.start + reader[token.range()].trim_end().len() as u32;
suffix = b'\n';
}
let mut prefix = 0; let mut prefix = 0;
if prev_needs_whitecpace && needs_space(reader.as_bytes()[token.start as usize]) { if prev_needs_whitecpace && needs_space(reader.as_bytes()[token.start as usize]) {
prefix = b' '; prefix = b' ';
debug_assert!(token.start != 0, "{reader}");
}
prev_needs_whitecpace = needs_space(reader.as_bytes()[token.end as usize - 1]);
let inbetween_new_lines =
reader[..token.start as usize].bytes().filter(|&b| b == b'\n').count()
+ token.kind.precedence().is_some() as usize;
let extra_prefix_new_lines = if inbetween_new_lines > 1 {
1 + token.kind.precedence().is_none() as usize
} else {
prev_needs_newline as usize
};
if token.kind == TokenKind::Comment && reader.as_bytes()[token.end as usize - 1] != b'/' {
prev_needs_newline = true;
prev_needs_whitecpace = false;
} else {
prev_needs_newline = false;
} }
prev_needs_whitecpace = needs_space(reader.as_bytes()[token.end as usize - 1]);
let sstr = reader[token.start as usize..].as_ptr(); let sstr = reader[token.start as usize..].as_ptr();
reader = &reader[token.end as usize..]; reader = &reader[token.end as usize..];
unsafe { unsafe {
if prefix != 0 { if extra_prefix_new_lines != 0 {
for _ in 0..extra_prefix_new_lines {
writer.write(b'\n');
writer = writer.add(1);
}
} else if prefix != 0 {
writer.write(prefix); writer.write(prefix);
writer = writer.add(1); writer = writer.add(1);
} }
writer.copy_from(sstr, token.range().len()); writer.copy_from(sstr, cpy_len);
writer = writer.add(token.range().len()); writer = writer.add(cpy_len);
if suffix != 0 {
writer.write(suffix);
writer = writer.add(1);
}
} }
} }
None unsafe { writer.sub_ptr(source.as_mut_ptr()) }
} }
pub struct Formatter<'a> { pub struct Formatter<'a> {
source: &'a str, source: &'a str,
depth: usize, depth: usize,
disp_buff: String,
} }
impl<'a> Formatter<'a> { impl<'a> Formatter<'a> {
pub fn new(source: &'a str) -> Self { pub fn new(source: &'a str) -> Self {
Self { source, depth: 0, disp_buff: Default::default() } Self { source, depth: 0 }
} }
fn fmt_list<T: Poser, F: core::fmt::Write>( fn fmt_list<T: Poser, F: core::fmt::Write>(
@ -172,7 +185,7 @@ impl<'a> Formatter<'a> {
self.fmt(value, f) self.fmt(value, f)
} }
Expr::String { literal, .. } => write!(f, "{literal}"), Expr::String { literal, .. } => write!(f, "{literal}"),
Expr::Comment { literal, .. } => write!(f, "{}", literal.trim_end()), Expr::Comment { literal, .. } => write!(f, "{literal}"),
Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"), Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"),
Expr::Field { target, name: field, .. } => { Expr::Field { target, name: field, .. } => {
self.fmt_paren(target, f, postfix)?; self.fmt_paren(target, f, postfix)?;
@ -194,7 +207,7 @@ impl<'a> Formatter<'a> {
write!(f, "{name}: ")?; write!(f, "{name}: ")?;
s.fmt(ty, f)? s.fmt(ty, f)?
} }
CommentOr::Comment { literal, .. } => write!(f, "{literal}")?, CommentOr::Comment { literal, .. } => writeln!(f, "{literal}")?,
} }
Ok(field.or().is_some()) Ok(field.or().is_some())
}) })
@ -294,30 +307,42 @@ impl<'a> Formatter<'a> {
write!(f, "{{")?; write!(f, "{{")?;
self.fmt_list(f, true, "}", "", stmts, Self::fmt) self.fmt_list(f, true, "}", "", stmts, Self::fmt)
} }
Expr::Number { value, radix, .. } => match radix { Expr::Number { value, radix, .. } => {
fn display_radix(radix: Radix, mut value: u64, buf: &mut [u8; 64]) -> &str {
fn conv_radix(d: u8) -> u8 {
match d {
0..=9 => d + b'0',
_ => d - 10 + b'A',
}
}
for (i, b) in buf.iter_mut().enumerate().rev() {
let d = (value % radix as u64) as u8;
value /= radix as u64;
*b = conv_radix(d);
if value == 0 {
return unsafe { core::str::from_utf8_unchecked(&buf[i..]) };
}
}
unreachable!()
}
let mut buf = [0u8; 64];
let value = display_radix(radix, value as u64, &mut buf);
match radix {
Radix::Decimal => write!(f, "{value}"), Radix::Decimal => write!(f, "{value}"),
Radix::Hex => write!(f, "{value:#X}"), Radix::Hex => write!(f, "0x{value}"),
Radix::Octal => write!(f, "{value:#o}"), Radix::Octal => write!(f, "0o{value}"),
Radix::Binary => write!(f, "{value:#b}"), Radix::Binary => write!(f, "0b{value}"),
}, }
}
Expr::Bool { value, .. } => write!(f, "{value}"), Expr::Bool { value, .. } => write!(f, "{value}"),
Expr::Idk { .. } => write!(f, "idk"), Expr::Idk { .. } => write!(f, "idk"),
Expr::BinOp { Expr::BinOp {
left, left,
op: TokenKind::Assign, op: TokenKind::Assign,
right: Expr::BinOp { left: lleft, op, right }, right: &Expr::BinOp { left: lleft, op, right },
} if { } if left.pos() == lleft.pos() => {
let mut b = core::mem::take(&mut self.disp_buff);
self.fmt(lleft, &mut b)?;
let len = b.len();
self.fmt(left, &mut b)?;
let (lleft, left) = b.split_at(len);
let res = lleft == left;
b.clear();
self.disp_buff = b;
res
} =>
{
self.fmt(left, f)?; self.fmt(left, f)?;
write!(f, " {op}= ")?; write!(f, " {op}= ")?;
self.fmt(right, f) self.fmt(right, f)
@ -355,7 +380,7 @@ impl<'a> Formatter<'a> {
} }
pub fn preserve_newlines(source: &str) -> usize { pub fn preserve_newlines(source: &str) -> usize {
source[source.trim_end().len()..].chars().filter(|&c| c == '\n').count() source[source.trim_end().len()..].bytes().filter(|&c| c == b'\n').count()
} }
pub fn insert_needed_semicolon(source: &str) -> bool { pub fn insert_needed_semicolon(source: &str) -> bool {
@ -365,39 +390,46 @@ pub fn insert_needed_semicolon(source: &str) -> bool {
impl core::fmt::Display for parser::Ast { impl core::fmt::Display for parser::Ast {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, expr) in self.exprs().iter().enumerate() { fmt_file(self.exprs(), &self.file, f)
Formatter::new(&self.file).fmt(expr, f)?; }
if let Some(expr) = self.exprs().get(i + 1) }
&& let Some(rest) = self.file.get(expr.pos() as usize..)
pub fn fmt_file(exprs: &[Expr], file: &str, f: &mut impl fmt::Write) -> fmt::Result {
for (i, expr) in exprs.iter().enumerate() {
Formatter::new(file).fmt(expr, f)?;
if let Some(expr) = exprs.get(i + 1)
&& let Some(rest) = file.get(expr.pos() as usize..)
{ {
if insert_needed_semicolon(rest) { if insert_needed_semicolon(rest) {
write!(f, ";")?; write!(f, ";")?;
} }
if preserve_newlines(&self.file[..expr.pos() as usize]) > 1 { if preserve_newlines(&file[..expr.pos() as usize]) > 1 {
writeln!(f)?; writeln!(f)?;
} }
} }
if i + 1 != self.exprs().len() { if i + 1 != exprs.len() {
writeln!(f)?; writeln!(f)?;
} }
} }
Ok(()) Ok(())
} }
}
#[cfg(test)] #[cfg(test)]
pub mod test { pub mod test {
use { use {
crate::parser::{self, StackAlloc}, crate::parser::{self, ParserCtx},
alloc::borrow::ToOwned, alloc::borrow::ToOwned,
std::{fmt::Write, string::String}, std::{fmt::Write, string::String},
}; };
pub fn format(ident: &str, input: &str) { pub fn format(ident: &str, input: &str) {
let ast = let mut minned = input.to_owned();
parser::Ast::new(ident, input.to_owned(), &mut StackAlloc::default(), &|_, _| Ok(0)); let len = crate::fmt::minify(&mut minned);
minned.truncate(len);
let ast = parser::Ast::new(ident, minned, &mut ParserCtx::default(), &|_, _| Ok(0));
let mut output = String::new(); let mut output = String::new();
write!(output, "{ast}").unwrap(); write!(output, "{ast}").unwrap();

View file

@ -1,7 +1,7 @@
use { use {
crate::{ crate::{
codegen, codegen,
parser::{self, Ast, StackAlloc}, parser::{self, Ast, ParserCtx},
}, },
alloc::{string::String, vec::Vec}, alloc::{string::String, vec::Vec},
core::{fmt::Write, num::NonZeroUsize}, core::{fmt::Write, num::NonZeroUsize},
@ -263,22 +263,22 @@ pub fn parse_from_fs(extra_threads: usize, root: &str) -> io::Result<Vec<Ast>> {
Ok(id) Ok(id)
}; };
let execute_task = |stack: &mut _, (_, path): Task| { let execute_task = |ctx: &mut _, (_, path): Task| {
let path = path.to_str().ok_or_else(|| { let path = path.to_str().ok_or_else(|| {
io::Error::new( io::Error::new(
io::ErrorKind::InvalidData, io::ErrorKind::InvalidData,
format!("path contains invalid characters: {}", display_rel_path(&path)), format!("path contains invalid characters: {}", display_rel_path(&path)),
) )
})?; })?;
Ok(Ast::new(path, std::fs::read_to_string(path)?, stack, &|path, from| { Ok(Ast::new(path, std::fs::read_to_string(path)?, ctx, &|path, from| {
loader(path, from).map_err(|e| e.to_string()) loader(path, from).map_err(|e| e.to_string())
})) }))
}; };
let thread = || { let thread = || {
let mut stack = StackAlloc::default(); let mut ctx = ParserCtx::default();
while let Some(task @ (indx, ..)) = tasks.pop() { while let Some(task @ (indx, ..)) = tasks.pop() {
let res = execute_task(&mut stack, task); let res = execute_task(&mut ctx, task);
let mut ast = ast.lock().unwrap(); let mut ast = ast.lock().unwrap();
let len = ast.len().max(indx as usize + 1); let len = ast.len().max(indx as usize + 1);
ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into())); ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into()));

View file

@ -1,4 +1,4 @@
use crate::{instrs, EncodedInstr}; use crate::EncodedInstr;
const fn ascii_mask(chars: &[u8]) -> u128 { const fn ascii_mask(chars: &[u8]) -> u128 {
let mut eq = 0; let mut eq = 0;
@ -83,7 +83,7 @@ macro_rules! gen_token_kind {
}; };
} }
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] #[derive(PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
#[repr(u8)] #[repr(u8)]
pub enum TokenKind { pub enum TokenKind {
Not = b'!', Not = b'!',
@ -170,9 +170,16 @@ pub enum TokenKind {
ShlAss = b'<' - 5 + 128, ShlAss = b'<' - 5 + 128,
} }
impl core::fmt::Debug for TokenKind {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Display::fmt(self, f)
}
}
impl TokenKind { impl TokenKind {
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn cond_op(self, signed: bool) -> Option<(fn(u8, u8, i16) -> EncodedInstr, bool)> { pub fn cond_op(self, signed: bool) -> Option<(fn(u8, u8, i16) -> EncodedInstr, bool)> {
use crate::instrs;
Some(( Some((
match self { match self {
Self::Le if signed => instrs::jgts, Self::Le if signed => instrs::jgts,
@ -192,7 +199,7 @@ impl TokenKind {
} }
pub fn binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u8) -> EncodedInstr> { pub fn binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u8) -> EncodedInstr> {
use instrs::*; use crate::instrs::*;
macro_rules! div { ($($op:ident),*) => {[$(|a, b, c| $op(a, 0, b, c)),*]}; } macro_rules! div { ($($op:ident),*) => {[$(|a, b, c| $op(a, 0, b, c)),*]}; }
macro_rules! rem { ($($op:ident),*) => {[$(|a, b, c| $op(0, a, b, c)),*]}; } macro_rules! rem { ($($op:ident),*) => {[$(|a, b, c| $op(0, a, b, c)),*]}; }
@ -219,7 +226,7 @@ impl TokenKind {
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn imm_binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u64) -> EncodedInstr> { pub fn imm_binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u64) -> EncodedInstr> {
use instrs::*; use crate::instrs::*;
macro_rules! def_op { macro_rules! def_op {
($name:ident |$a:ident, $b:ident, $c:ident| $($tt:tt)*) => { ($name:ident |$a:ident, $b:ident, $c:ident| $($tt:tt)*) => {
macro_rules! $name { macro_rules! $name {
@ -286,7 +293,7 @@ impl TokenKind {
pub fn unop(&self) -> Option<fn(u8, u8) -> EncodedInstr> { pub fn unop(&self) -> Option<fn(u8, u8) -> EncodedInstr> {
Some(match self { Some(match self {
Self::Sub => instrs::neg, Self::Sub => crate::instrs::neg,
_ => return None, _ => return None,
}) })
} }
@ -362,7 +369,7 @@ gen_token_kind! {
pub struct Lexer<'a> { pub struct Lexer<'a> {
pos: u32, pos: u32,
bytes: &'a [u8], source: &'a [u8],
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
@ -371,22 +378,22 @@ impl<'a> Lexer<'a> {
} }
pub fn restore(input: &'a str, pos: u32) -> Self { pub fn restore(input: &'a str, pos: u32) -> Self {
Self { pos, bytes: input.as_bytes() } Self { pos, source: input.as_bytes() }
} }
pub fn source(&self) -> &'a str { pub fn source(&self) -> &'a str {
unsafe { core::str::from_utf8_unchecked(self.bytes) } unsafe { core::str::from_utf8_unchecked(self.source) }
} }
pub fn slice(&self, tok: core::ops::Range<usize>) -> &'a str { pub fn slice(&self, tok: core::ops::Range<usize>) -> &'a str {
unsafe { core::str::from_utf8_unchecked(&self.bytes[tok]) } unsafe { core::str::from_utf8_unchecked(&self.source[tok]) }
} }
fn peek(&self) -> Option<u8> { fn peek(&self) -> Option<u8> {
if core::intrinsics::unlikely(self.pos >= self.bytes.len() as u32) { if core::intrinsics::unlikely(self.pos >= self.source.len() as u32) {
None None
} else { } else {
Some(unsafe { *self.bytes.get_unchecked(self.pos as usize) }) Some(unsafe { *self.source.get_unchecked(self.pos as usize) })
} }
} }
@ -453,7 +460,7 @@ impl<'a> Lexer<'a> {
} }
b'a'..=b'z' | b'A'..=b'Z' | b'_' | 127.. => { b'a'..=b'z' | b'A'..=b'Z' | b'_' | 127.. => {
advance_ident(self); advance_ident(self);
let ident = &self.bytes[start as usize..self.pos as usize]; let ident = &self.source[start as usize..self.pos as usize];
T::from_ident(ident) T::from_ident(ident)
} }
b'"' | b'\'' => loop { b'"' | b'\'' => loop {
@ -465,10 +472,18 @@ impl<'a> Lexer<'a> {
} }
}, },
b'/' if self.advance_if(b'/') => { b'/' if self.advance_if(b'/') => {
while let Some(l) = self.advance() while let Some(l) = self.peek()
&& l != b'\n' && l != b'\n'
{} {
T::Comment self.pos += 1;
}
let end = self.source[..self.pos as usize]
.iter()
.rposition(|&b| !b.is_ascii_whitespace())
.map_or(self.pos, |i| i as u32 + 1);
return Token { kind: T::Comment, start, end };
} }
b'/' if self.advance_if(b'*') => { b'/' if self.advance_if(b'*') => {
let mut depth = 1; let mut depth = 1;

View file

@ -8,7 +8,6 @@
never_type, never_type,
unwrap_infallible, unwrap_infallible,
slice_partition_dedup, slice_partition_dedup,
hash_raw_entry,
portable_simd, portable_simd,
iter_collect_into, iter_collect_into,
new_uninit, new_uninit,
@ -19,6 +18,8 @@
extract_if, extract_if,
ptr_internals, ptr_internals,
iter_intersperse, iter_intersperse,
str_from_raw_parts,
ptr_sub_ptr,
slice_from_ptr_range slice_from_ptr_range
)] )]
#![warn(clippy::dbg_macro)] #![warn(clippy::dbg_macro)]
@ -32,7 +33,6 @@ use {
ident::Ident, ident::Ident,
lexer::TokenKind, lexer::TokenKind,
parser::{CommentOr, Expr, ExprRef, FileId, Pos}, parser::{CommentOr, Expr, ExprRef, FileId, Pos},
son::reg,
ty::ArrayLen, ty::ArrayLen,
}, },
alloc::{collections::BTreeMap, string::String, vec::Vec}, alloc::{collections::BTreeMap, string::String, vec::Vec},
@ -65,11 +65,22 @@ pub mod fmt;
#[cfg(any(feature = "std", test))] #[cfg(any(feature = "std", test))]
pub mod fs; pub mod fs;
pub mod parser; pub mod parser;
#[cfg(feature = "opts")]
pub mod son; pub mod son;
mod lexer; mod lexer;
#[cfg(feature = "opts")]
mod vc; mod vc;
pub mod reg {
pub const STACK_PTR: Reg = 254;
pub const ZERO: Reg = 0;
pub const RET: Reg = 1;
pub const RET_ADDR: Reg = 31;
pub type Reg = u8;
}
mod ctx_map { mod ctx_map {
use core::hash::BuildHasher; use core::hash::BuildHasher;
@ -139,10 +150,12 @@ mod ctx_map {
.map(|(k, _)| &k.value) .map(|(k, _)| &k.value)
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn clear(&mut self) { pub fn clear(&mut self) {
self.inner.clear(); self.inner.clear();
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option<T> { pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option<T> {
let (entry, _) = self.entry(value.key(ctx), ctx); let (entry, _) = self.entry(value.key(ctx), ctx);
match entry { match entry {
@ -193,6 +206,7 @@ mod task {
unpack(offset).is_ok() unpack(offset).is_ok()
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn id(index: usize) -> Offset { pub fn id(index: usize) -> Offset {
1 << 31 | index as u32 1 << 31 | index as u32
} }
@ -397,8 +411,14 @@ mod ty {
mod __lc_names { mod __lc_names {
use super::*; use super::*;
$(pub const $name: &[u8] = &array_to_lower_case(unsafe { $(pub const $name: &str = unsafe {
*(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) });)* const LCL: &[u8] = unsafe {
&array_to_lower_case(
*(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()])
)
};
core::str::from_utf8_unchecked(LCL)
};)*
} }
#[allow(dead_code)] #[allow(dead_code)]
@ -407,7 +427,7 @@ mod ty {
} }
pub fn from_str(name: &str) -> Option<Builtin> { pub fn from_str(name: &str) -> Option<Builtin> {
match name.as_bytes() { match name {
$(__lc_names::$name => Some($name),)* $(__lc_names::$name => Some($name),)*
_ => None, _ => None,
} }
@ -415,7 +435,7 @@ mod ty {
pub fn to_str(ty: Builtin) -> &'static str { pub fn to_str(ty: Builtin) -> &'static str {
match ty { match ty {
$($name => unsafe { core::str::from_utf8_unchecked(__lc_names::$name) },)* $($name => __lc_names::$name,)*
v => unreachable!("invalid type: {}", v), v => unreachable!("invalid type: {}", v),
} }
} }
@ -551,6 +571,7 @@ mod ty {
} }
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn bin_ret(ty: Id, op: TokenKind) -> Id { pub fn bin_ret(ty: Id, op: TokenKind) -> Id {
use TokenKind as T; use TokenKind as T;
match op { match op {
@ -1141,6 +1162,7 @@ impl Types {
} }
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option<usize> { fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option<usize> {
let name = self.names.project(name)?; let name = self.names.project(name)?;
self.struct_fields(s).iter().position(|f| f.name == name) self.struct_fields(s).iter().position(|f| f.name == name)
@ -1188,8 +1210,8 @@ impl OffsetIter {
} }
} }
#[cfg(any(feature = "opts", feature = "std"))]
type HashMap<K, V> = hashbrown::HashMap<K, V, FnvBuildHasher>; type HashMap<K, V> = hashbrown::HashMap<K, V, FnvBuildHasher>;
type _HashSet<K> = hashbrown::HashSet<K, FnvBuildHasher>;
type FnvBuildHasher = core::hash::BuildHasherDefault<FnvHasher>; type FnvBuildHasher = core::hash::BuildHasherDefault<FnvHasher>;
struct FnvHasher(u64); struct FnvHasher(u64);
@ -1334,10 +1356,10 @@ fn test_parse_files(ident: &'static str, input: &'static str) -> Vec<parser::Ast
.ok_or("Not Found".to_string()) .ok_or("Not Found".to_string())
}; };
let mut stack = parser::StackAlloc::default(); let mut ctx = parser::ParserCtx::default();
module_map module_map
.iter() .iter()
.map(|&(path, content)| parser::Ast::new(path, content.to_owned(), &mut stack, &loader)) .map(|&(path, content)| parser::Ast::new(path, content.to_owned(), &mut ctx, &loader))
.collect() .collect()
} }

View file

@ -63,63 +63,60 @@ pub struct Parser<'a, 'b> {
path: &'b str, path: &'b str,
loader: Loader<'b>, loader: Loader<'b>,
lexer: Lexer<'a>, lexer: Lexer<'a>,
arena: &'b Arena<'a>, arena: &'a Arena,
ctx: &'b mut ParserCtx,
token: Token, token: Token,
symbols: &'b mut Symbols,
stack: &'b mut StackAlloc,
ns_bound: usize, ns_bound: usize,
trailing_sep: bool, trailing_sep: bool,
packed: bool, packed: bool,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
} }
impl<'a, 'b> Parser<'a, 'b> { impl<'a, 'b> Parser<'a, 'b> {
pub fn new( pub fn parse(
arena: &'b Arena<'a>, ctx: &'b mut ParserCtx,
symbols: &'b mut Symbols, input: &'a str,
stack: &'b mut StackAlloc, path: &'b str,
loader: Loader<'b>, loader: Loader<'b>,
) -> Self { arena: &'a Arena,
let mut lexer = Lexer::new(""); ) -> &'a [Expr<'a>] {
let mut lexer = Lexer::new(input);
Self { Self {
loader, loader,
token: lexer.next(), token: lexer.next(),
lexer, lexer,
path: "", path,
ctx,
arena, arena,
symbols,
stack,
ns_bound: 0, ns_bound: 0,
trailing_sep: false, trailing_sep: false,
packed: false, packed: false,
idents: Vec::new(),
captured: Vec::new(),
} }
.file()
} }
pub fn file(&mut self, input: &'a str, path: &'b str) -> &'a [Expr<'a>] { fn file(&mut self) -> &'a [Expr<'a>] {
self.path = path;
self.lexer = Lexer::new(input);
self.token = self.lexer.next();
let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true)); let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true));
self.pop_scope(0); self.pop_scope(0);
if !self.ctx.idents.is_empty() {
// TODO: we need error recovery
log::error!("{}", {
let mut errors = String::new(); let mut errors = String::new();
for id in self.idents.drain(..) { for id in self.ctx.idents.drain(..) {
report_to( report_to(
self.lexer.source(), self.lexer.source(),
self.path, self.path,
ident::pos(id.ident), ident::pos(id.ident),
format_args!("undeclared identifier: {}", self.lexer.slice(ident::range(id.ident))), format_args!(
"undeclared identifier: {}",
self.lexer.slice(ident::range(id.ident))
),
&mut errors, &mut errors,
); );
} }
errors
if !errors.is_empty() { });
// TODO: we need error recovery
log::error!("{errors}");
unreachable!(); unreachable!();
} }
@ -153,36 +150,20 @@ impl<'a, 'b> Parser<'a, 'b> {
break; break;
} }
let checkpoint = self.token.start;
let op = self.next().kind; let op = self.next().kind;
if op == TokenKind::Decl { if op == TokenKind::Decl {
self.declare_rec(&fold, top_level); self.declare_rec(&fold, top_level);
} }
let op_ass = op.ass_op().map(|op| {
// this abomination reparses the left side, so that the desubaring adheres to the
// parser invariants.
let source = self.lexer.slice(0..checkpoint as usize);
let prev_lexer =
core::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos()));
let prev_token = core::mem::replace(&mut self.token, self.lexer.next());
let clone = self.expr();
self.lexer = prev_lexer;
self.token = prev_token;
(op, clone)
});
let right = self.unit_expr(); let right = self.unit_expr();
let right = self.bin_expr(right, prec, false); let right = self.bin_expr(right, prec, false);
let right = self.arena.alloc(right); let right = self.arena.alloc(right);
let left = self.arena.alloc(fold); let left = self.arena.alloc(fold);
if let Some((op, clone)) = op_ass { if let Some(op) = op.ass_op() {
self.flag_idents(*left, idfl::MUTABLE); self.flag_idents(*left, idfl::MUTABLE);
let right = Expr::BinOp { left: self.arena.alloc(fold), op, right };
let right = Expr::BinOp { left: self.arena.alloc(clone), op, right };
fold = Expr::BinOp { left, op: TokenKind::Assign, right: self.arena.alloc(right) }; fold = Expr::BinOp { left, op: TokenKind::Assign, right: self.arena.alloc(right) };
} else { } else {
fold = Expr::BinOp { left, right, op }; fold = Expr::BinOp { left, right, op };
@ -220,15 +201,15 @@ impl<'a, 'b> Parser<'a, 'b> {
); );
} }
let index = self.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up"); let index = self.ctx.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up");
if core::mem::replace(&mut self.idents[index].declared, true) { if core::mem::replace(&mut self.ctx.idents[index].declared, true) {
self.report( self.report(
pos, pos,
format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))), format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))),
) )
} }
self.idents[index].ordered = ordered; self.ctx.idents[index].ordered = ordered;
} }
fn resolve_ident(&mut self, token: Token) -> (Ident, bool) { fn resolve_ident(&mut self, token: Token) -> (Ident, bool) {
@ -240,6 +221,7 @@ impl<'a, 'b> Parser<'a, 'b> {
} }
let (i, id, bl) = match self let (i, id, bl) = match self
.ctx
.idents .idents
.iter_mut() .iter_mut()
.enumerate() .enumerate()
@ -248,20 +230,20 @@ impl<'a, 'b> Parser<'a, 'b> {
Some((i, elem)) => (i, elem, false), Some((i, elem)) => (i, elem, false),
None => { None => {
let id = ident::new(token.start, name.len() as _); let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent { self.ctx.idents.push(ScopeIdent {
ident: id, ident: id,
declared: false, declared: false,
ordered: false, ordered: false,
flags: 0, flags: 0,
}); });
(self.idents.len() - 1, self.idents.last_mut().unwrap(), true) (self.ctx.idents.len() - 1, self.ctx.idents.last_mut().unwrap(), true)
} }
}; };
id.flags |= idfl::COMPTIME * is_ct as u32; id.flags |= idfl::COMPTIME * is_ct as u32;
if id.declared && id.ordered && self.ns_bound > i { if id.declared && id.ordered && self.ns_bound > i {
id.flags |= idfl::COMPTIME; id.flags |= idfl::COMPTIME;
self.captured.push(id.ident); self.ctx.captured.push(id.ident);
} }
(id.ident, bl) (id.ident, bl)
@ -273,21 +255,22 @@ impl<'a, 'b> Parser<'a, 'b> {
fn unit_expr(&mut self) -> Expr<'a> { fn unit_expr(&mut self) -> Expr<'a> {
use {Expr as E, TokenKind as T}; use {Expr as E, TokenKind as T};
let frame = self.idents.len(); let frame = self.ctx.idents.len();
let token @ Token { start: pos, .. } = self.next(); let token @ Token { start: pos, .. } = self.next();
let prev_boundary = self.ns_bound; let prev_boundary = self.ns_bound;
let prev_captured = self.captured.len(); let prev_captured = self.ctx.captured.len();
let mut expr = match token.kind { let mut expr = match token.kind {
T::Ct => E::Ct { pos, value: self.ptr_expr() }, T::Ct => E::Ct { pos, value: self.ptr_expr() },
T::Directive if self.lexer.slice(token.range()) == "use" => { T::Directive if self.lexer.slice(token.range()) == "use" => {
self.expect_advance(TokenKind::LParen); self.expect_advance(TokenKind::LParen);
let str = self.expect_advance(TokenKind::DQuote); let str = self.expect_advance(TokenKind::DQuote);
self.expect_advance(TokenKind::RParen); self.expect_advance(TokenKind::RParen);
let path = self.lexer.slice(str.range()).trim_matches('"'); let path = self.lexer.slice(str.range());
let path = &path[1..path.len() - 1];
E::Mod { E::Mod {
pos, pos,
path: self.arena.alloc_str(path), path,
id: match (self.loader)(path, self.path) { id: match (self.loader)(path, self.path) {
Ok(id) => id, Ok(id) => id,
Err(e) => { Err(e) => {
@ -323,7 +306,7 @@ impl<'a, 'b> Parser<'a, 'b> {
T::Struct => E::Struct { T::Struct => E::Struct {
packed: core::mem::take(&mut self.packed), packed: core::mem::take(&mut self.packed),
fields: { fields: {
self.ns_bound = self.idents.len(); self.ns_bound = self.ctx.idents.len();
self.expect_advance(T::LBrace); self.expect_advance(T::LBrace);
self.collect_list(T::Comma, T::RBrace, |s| { self.collect_list(T::Comma, T::RBrace, |s| {
let tok = s.token; let tok = s.token;
@ -342,15 +325,23 @@ impl<'a, 'b> Parser<'a, 'b> {
}, },
captured: { captured: {
self.ns_bound = prev_boundary; self.ns_bound = prev_boundary;
self.captured[prev_captured..].sort_unstable(); let mut captured = &mut self.ctx.captured[prev_captured..];
let preserved = self.captured[prev_captured..].partition_dedup().0.len(); while let Some(it) = captured.take_first_mut() {
self.captured.truncate(prev_captured + preserved); for ot in &mut *captured {
self.arena.alloc_slice(&self.captured[prev_captured..]) if it > ot {
core::mem::swap(it, ot);
}
}
}
debug_assert!(captured.is_sorted());
let preserved = self.ctx.captured[prev_captured..].partition_dedup().0.len();
self.ctx.captured.truncate(prev_captured + preserved);
self.arena.alloc_slice(&self.ctx.captured[prev_captured..])
}, },
pos: { pos: {
if self.ns_bound == 0 { if self.ns_bound == 0 {
// we might save some memory // we might save some memory
self.captured.clear(); self.ctx.captured.clear();
} }
pos pos
}, },
@ -427,9 +418,9 @@ impl<'a, 'b> Parser<'a, 'b> {
T::Number => { T::Number => {
let slice = self.lexer.slice(token.range()); let slice = self.lexer.slice(token.range());
let (slice, radix) = match &slice.get(0..2) { let (slice, radix) = match &slice.get(0..2) {
Some("0x") => (slice.trim_start_matches("0x"), Radix::Hex), Some("0x") => (&slice[2..], Radix::Hex),
Some("0b") => (slice.trim_start_matches("0b"), Radix::Binary), Some("0b") => (&slice[2..], Radix::Binary),
Some("0o") => (slice.trim_start_matches("0o"), Radix::Octal), Some("0o") => (&slice[2..], Radix::Octal),
_ => (slice, Radix::Decimal), _ => (slice, Radix::Decimal),
}; };
E::Number { E::Number {
@ -447,7 +438,7 @@ impl<'a, 'b> Parser<'a, 'b> {
expr expr
} }
T::Comment => Expr::Comment { pos, literal: self.tok_str(token) }, T::Comment => Expr::Comment { pos, literal: self.tok_str(token) },
tok => self.report(token.start, format_args!("unexpected token: {tok:?}")), tok => self.report(token.start, format_args!("unexpected token: {tok}")),
}; };
loop { loop {
@ -528,24 +519,25 @@ impl<'a, 'b> Parser<'a, 'b> {
} else { } else {
self.report( self.report(
self.token.start, self.token.start,
format_args!("expected identifier, found {:?}", self.token.kind), format_args!("expected identifier, found {}", self.token.kind),
) )
} }
} }
fn pop_scope(&mut self, frame: usize) { fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame; let mut undeclared_count = frame;
for i in frame..self.idents.len() { for i in frame..self.ctx.idents.len() {
if !&self.idents[i].declared { if !&self.ctx.idents[i].declared {
self.idents.swap(i, undeclared_count); self.ctx.idents.swap(i, undeclared_count);
undeclared_count += 1; undeclared_count += 1;
} }
} }
self.idents self.ctx
.idents
.drain(undeclared_count..) .drain(undeclared_count..)
.map(|ident| Symbol { name: ident.ident, flags: ident.flags }) .map(|ident| Symbol { name: ident.ident, flags: ident.flags })
.collect_into(self.symbols); .collect_into(&mut self.ctx.symbols);
} }
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> { fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
@ -558,13 +550,13 @@ impl<'a, 'b> Parser<'a, 'b> {
end: TokenKind, end: TokenKind,
mut f: impl FnMut(&mut Self) -> T, mut f: impl FnMut(&mut Self) -> T,
) -> &'a [T] { ) -> &'a [T] {
let mut view = self.stack.view(); let mut view = self.ctx.stack.view();
while !self.advance_if(end) { while !self.advance_if(end) {
let val = f(self); let val = f(self);
self.trailing_sep = self.advance_if(delim); self.trailing_sep = self.advance_if(delim);
unsafe { self.stack.push(&mut view, val) }; unsafe { self.ctx.stack.push(&mut view, val) };
} }
self.arena.alloc_slice(unsafe { self.stack.finalize(view) }) self.arena.alloc_slice(unsafe { self.ctx.stack.finalize(view) })
} }
fn advance_if(&mut self, kind: TokenKind) -> bool { fn advance_if(&mut self, kind: TokenKind) -> bool {
@ -580,7 +572,7 @@ impl<'a, 'b> Parser<'a, 'b> {
if self.token.kind != kind { if self.token.kind != kind {
self.report( self.report(
self.token.start, self.token.start,
format_args!("expected {:?}, found {:?}", kind, self.token.kind), format_args!("expected {}, found {}", kind, self.token.kind),
); );
} }
self.next() self.next()
@ -588,15 +580,17 @@ impl<'a, 'b> Parser<'a, 'b> {
#[track_caller] #[track_caller]
fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! { fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! {
log::error!("{}", {
let mut str = String::new(); let mut str = String::new();
report_to(self.lexer.source(), self.path, pos, msg, &mut str); report_to(self.lexer.source(), self.path, pos, msg, &mut str);
log::error!("{str}"); str
});
unreachable!(); unreachable!();
} }
fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) { fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) {
match e { match e {
Expr::Ident { id, .. } => find_ident(&mut self.idents, id).flags |= flags, Expr::Ident { id, .. } => find_ident(&mut self.ctx.idents, id).flags |= flags,
Expr::Field { target, .. } => self.flag_idents(*target, flags), Expr::Field { target, .. } => self.flag_idents(*target, flags),
_ => {} _ => {}
} }
@ -634,7 +628,7 @@ macro_rules! generate_expr {
$($field:ident: $ty:ty,)* $($field:ident: $ty:ty,)*
}, },
)*}) => { )*}) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq)] $(#[$meta])*
$vis enum $name<$lt> {$( $vis enum $name<$lt> {$(
$(#[$field_meta])* $(#[$field_meta])*
$variant { $variant {
@ -649,17 +643,6 @@ macro_rules! generate_expr {
$(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)* $(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)*
} }
} }
pub fn used_bytes(&self) -> usize {
match self {$(
Self::$variant { $($field,)* } => {
#[allow(clippy::size_of_ref)]
let fields = [$(($field as *const _ as usize - self as *const _ as usize, core::mem::size_of_val($field)),)*];
let (last, size) = fields.iter().copied().max().unwrap();
last + size
},
)*}
}
} }
}; };
@ -806,6 +789,7 @@ generate_expr! {
/// `Expr '.' Ident` /// `Expr '.' Ident`
Field { Field {
target: &'a Self, target: &'a Self,
// we put it second place because its the pos of '.'
pos: Pos, pos: Pos,
name: &'a str, name: &'a str,
}, },
@ -820,7 +804,7 @@ generate_expr! {
}, },
/// `'@' Ident List('(', ',', ')', Expr)` /// `'@' Ident List('(', ',', ')', Expr)`
Directive { Directive {
pos: u32, pos: Pos,
name: &'a str, name: &'a str,
args: &'a [Self], args: &'a [Self],
}, },
@ -959,6 +943,14 @@ impl core::fmt::Display for Display<'_> {
} }
} }
#[derive(Default)]
pub struct ParserCtx {
symbols: Symbols,
stack: StackAlloc,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
}
#[repr(C)] #[repr(C)]
pub struct AstInner<T: ?Sized> { pub struct AstInner<T: ?Sized> {
ref_count: AtomicUsize, ref_count: AtomicUsize,
@ -978,21 +970,18 @@ impl AstInner<[Symbol]> {
.0 .0
} }
fn new(file: Box<str>, path: &str, stack: &mut StackAlloc, loader: Loader) -> NonNull<Self> { fn new(file: Box<str>, path: &str, ctx: &mut ParserCtx, loader: Loader) -> NonNull<Self> {
let arena = Arena::default(); let arena = Arena::default();
let mut syms = Vec::new();
let mut parser = Parser::new(&arena, &mut syms, stack, loader);
let exprs = let exprs =
parser.file(unsafe { &*(&*file as *const _) }, path) as *const [Expr<'static>]; unsafe { core::mem::transmute(Parser::parse(ctx, &file, path, loader, &arena)) };
drop(parser);
syms.sort_unstable_by_key(|s| s.name); ctx.symbols.sort_unstable_by_key(|s| s.name);
let layout = Self::layout(syms.len()); let layout = Self::layout(ctx.symbols.len());
unsafe { unsafe {
let ptr = alloc::alloc::alloc(layout); let ptr = alloc::alloc::alloc(layout);
let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, ctx.symbols.len());
core::ptr::write(inner as *mut AstInner<()>, AstInner { core::ptr::write(inner as *mut AstInner<()>, AstInner {
ref_count: AtomicUsize::new(1), ref_count: AtomicUsize::new(1),
@ -1004,7 +993,7 @@ impl AstInner<[Symbol]> {
}); });
core::ptr::addr_of_mut!((*inner).symbols) core::ptr::addr_of_mut!((*inner).symbols)
.as_mut_ptr() .as_mut_ptr()
.copy_from_nonoverlapping(syms.as_ptr(), syms.len()); .copy_from_nonoverlapping(ctx.symbols.as_ptr(), ctx.symbols.len());
NonNull::new_unchecked(inner) NonNull::new_unchecked(inner)
} }
@ -1041,8 +1030,8 @@ pub fn report_to(
pub struct Ast(NonNull<AstInner<[Symbol]>>); pub struct Ast(NonNull<AstInner<[Symbol]>>);
impl Ast { impl Ast {
pub fn new(path: &str, content: String, stack: &mut StackAlloc, loader: Loader) -> Self { pub fn new(path: &str, content: String, ctx: &mut ParserCtx, loader: Loader) -> Self {
Self(AstInner::new(content.into(), path, stack, loader)) Self(AstInner::new(content.into(), path, ctx, loader))
} }
pub fn exprs(&self) -> &[Expr] { pub fn exprs(&self) -> &[Expr] {
@ -1067,7 +1056,7 @@ impl Ast {
impl Default for Ast { impl Default for Ast {
fn default() -> Self { fn default() -> Self {
Self(AstInner::new("".into(), "", &mut StackAlloc::default(), &no_loader)) Self(AstInner::new("".into(), "", &mut ParserCtx::default(), &no_loader))
} }
} }
@ -1132,13 +1121,13 @@ impl Deref for Ast {
} }
} }
pub struct StackAllocView<T> { struct StackAllocView<T> {
prev: usize, prev: usize,
base: usize, base: usize,
_ph: PhantomData<T>, _ph: PhantomData<T>,
} }
pub struct StackAlloc { struct StackAlloc {
data: *mut u8, data: *mut u8,
len: usize, len: usize,
cap: usize, cap: usize,
@ -1203,29 +1192,22 @@ impl Drop for StackAlloc {
} }
#[derive(Default)] #[derive(Default)]
pub struct Arena<'a> { pub struct Arena {
chunk: UnsafeCell<ArenaChunk>, chunk: UnsafeCell<ArenaChunk>,
ph: core::marker::PhantomData<&'a ()>,
} }
impl<'a> Arena<'a> { impl Arena {
pub fn alloc_str(&self, token: &str) -> &'a str { pub fn alloc<'a>(&'a self, expr: Expr<'a>) -> &'a Expr<'a> {
let ptr = self.alloc_slice(token.as_bytes()); let layout = core::alloc::Layout::new::<Expr<'a>>();
unsafe { core::str::from_utf8_unchecked(ptr) }
}
pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> {
let align = core::mem::align_of::<Expr<'a>>();
let size = expr.used_bytes();
let layout = unsafe { core::alloc::Layout::from_size_align_unchecked(size, align) };
let ptr = self.alloc_low(layout); let ptr = self.alloc_low(layout);
unsafe { unsafe {
ptr.cast::<u64>().copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8) ptr.cast::<u64>()
.copy_from_nonoverlapping(NonNull::from(&expr).cast(), layout.size() / 8)
}; };
unsafe { ptr.cast::<Expr<'a>>().as_ref() } unsafe { ptr.cast::<Expr<'a>>().as_ref() }
} }
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &'a [T] { pub fn alloc_slice<'a, T: Copy>(&'a self, slice: &[T]) -> &'a [T] {
if slice.is_empty() || core::mem::size_of::<T>() == 0 { if slice.is_empty() || core::mem::size_of::<T>() == 0 {
return &mut []; return &mut [];
} }
@ -1266,7 +1248,7 @@ impl Default for ArenaChunk {
} }
impl ArenaChunk { impl ArenaChunk {
const ALIGN: usize = core::mem::align_of::<Self>(); const ALIGN: usize = 16;
const CHUNK_SIZE: usize = 1 << 16; const CHUNK_SIZE: usize = 1 << 16;
const LAYOUT: core::alloc::Layout = const LAYOUT: core::alloc::Layout =
unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) };

View file

@ -9,7 +9,7 @@ use {
idfl::{self}, idfl::{self},
Expr, ExprRef, FileId, Pos, Expr, ExprRef, FileId, Pos,
}, },
task, reg, task,
ty::{self}, ty::{self},
vc::{BitSet, Vc}, vc::{BitSet, Vc},
Func, HashMap, Offset, OffsetIter, Reloc, Sig, Size, SymKey, TypedReloc, Types, Func, HashMap, Offset, OffsetIter, Reloc, Sig, Size, SymKey, TypedReloc, Types,
@ -34,15 +34,6 @@ const MEM: Nid = 3;
type Nid = u16; type Nid = u16;
pub mod reg {
pub const STACK_PTR: Reg = 254;
pub const ZERO: Reg = 0;
pub const RET: Reg = 1;
pub const RET_ADDR: Reg = 31;
pub type Reg = u8;
}
type Lookup = crate::ctx_map::CtxMap<Nid>; type Lookup = crate::ctx_map::CtxMap<Nid>;
impl crate::ctx_map::CtxEntry for Nid { impl crate::ctx_map::CtxEntry for Nid {