Compare commits

...

3 commits

Author SHA1 Message Date
Jakub Doka 54a7f85978
progress 2024-10-10 08:35:17 +02:00
Jakub Doka e200c2fc98
lock 2024-10-09 00:17:48 +02:00
Jakub Doka 1626734c1a
some progress 2024-10-09 00:17:13 +02:00
21 changed files with 1972 additions and 1033 deletions

1
.gitignore vendored
View file

@ -2,3 +2,4 @@
/hbbytecode/src/instrs.rs /hbbytecode/src/instrs.rs
/.rgignore /.rgignore
rustc-ice-* rustc-ice-*
db.sqlite

846
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,15 @@
[workspace] [workspace]
resolver = "2" resolver = "2"
members = ["hbbytecode", "hbvm", "hbxrt", "xtask", "hblang", "hbjit", "depell"] members = [
"hbbytecode",
"hbvm",
"hbxrt",
"xtask",
"hblang",
"hbjit",
"depell",
"depell/wasm-hbfmt"
]
[profile.release] [profile.release]
lto = true lto = true

View file

@ -4,8 +4,12 @@ version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
aes-gcm = { version = "0.10.3", default-features = false, features = ["aes", "rand_core"] } anyhow = "1.0.89"
ed25519-dalek = { version = "2.1.1", default-features = false, features = ["rand_core"] } axum = "0.7.7"
getrandom = "0.2.15" getrandom = "0.2.15"
rand_core = { version = "0.6.4", features = ["getrandom"] } htmlm = "0.3.0"
x25519-dalek = { version = "2.0.1", default-features = false } log = "0.4.22"
rusqlite = "0.32.1"
serde = { version = "1.0.210", features = ["derive"] }
time = "0.3.36"
tokio = { version = "1.40.0", features = ["rt"] }

100
depell/src/index.css Normal file
View file

@ -0,0 +1,100 @@
* {
font-family: var(--font);
}
body {
--primary: white;
--secondary: #EFEFEF;
--timestamp: #777777;
--error: #ff3333;
--placeholder: #333333;
}
body {
--small-gap: 5px;
--font: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
--monospace: 'Courier New', Courier, monospace;
nav {
display: flex;
justify-content: space-between;
align-items: center;
section:last-child {
display: flex;
gap: var(--small-gap);
}
}
main {
margin-top: var(--small-gap);
}
}
div.preview {
div.info {
display: flex;
gap: var(--small-gap);
span[apply=timestamp] {
color: var(--timestamp);
}
}
div.stats {
display: flex;
gap: var(--small-gap);
}
}
form {
display: flex;
flex-direction: column;
gap: var(--small-gap);
::placeholder {
color: var(--placeholder);
}
.error {
color: var(--error);
text-align: center;
}
}
pre,
textarea {
outline: none;
border: none;
background: var(--secondary);
padding: var(--small-gap);
padding-top: calc(var(--small-gap) * 1.5);
margin: var(--small-gap) 0px;
font-family: var(--monospace);
resize: none;
tab-size: 4;
}
input {
font-size: inherit;
outline: none;
border: none;
background: var(--secondary);
padding: var(--small-gap);
}
input:is(:hover, :focus) {
background: white;
}
button {
border: none;
outline: none;
font-size: inherit;
background: var(--secondary);
}
button:hover:not(:active) {
background: white;
}

181
depell/src/index.js Normal file
View file

@ -0,0 +1,181 @@
/// @ts-check
/** @return {never} */
function never() { throw new Error() }
/**@type{WebAssembly.Instance}*/ let instance;
/**@type{Promise<WebAssembly.WebAssemblyInstantiatedSource>}*/ let instaceFuture;
/** @param {string} code @param {"fmt" | "minify"} action
* @returns {Promise<string | undefined> | string | undefined} */
function modifyCode(code, action) {
if (!instance) {
instaceFuture ??= WebAssembly.instantiateStreaming(fetch("/hbfmt.wasm"), {});
return (async () => {
instance = (await instaceFuture).instance;
return modifyCodeSync(instance, code, action);
})();
} else {
return modifyCodeSync(instance, code, action);
}
}
/** @param {WebAssembly.Instance} instance @param {string} code @param {"fmt" | "minify"} action @returns {string | undefined} */
function modifyCodeSync(instance, code, action) {
let {
INPUT, INPUT_LEN,
OUTPUT, OUTPUT_LEN,
PANIC_MESSAGE, PANIC_MESSAGE_LEN,
memory, fmt, minify
} = instance.exports;
if (!(true
&& INPUT instanceof WebAssembly.Global
&& INPUT_LEN instanceof WebAssembly.Global
&& OUTPUT instanceof WebAssembly.Global
&& OUTPUT_LEN instanceof WebAssembly.Global
&& memory instanceof WebAssembly.Memory
&& typeof fmt === "function"
&& typeof minify === "function"
)) never();
if (action !== "fmt") {
INPUT = OUTPUT;
INPUT_LEN = OUTPUT_LEN;
}
let dw = new DataView(memory.buffer);
dw.setUint32(INPUT_LEN.value, code.length, true);
new Uint8Array(memory.buffer, INPUT.value)
.set(new TextEncoder().encode(code));
try {
if (action === "fmt") fmt(); else minify();
let result = new TextDecoder()
.decode(new Uint8Array(memory.buffer, OUTPUT.value,
dw.getUint32(OUTPUT_LEN.value, true)));
return result;
} catch (e) {
if (PANIC_MESSAGE instanceof WebAssembly.Global
&& PANIC_MESSAGE_LEN instanceof WebAssembly.Global) {
let message = new TextDecoder()
.decode(new Uint8Array(memory.buffer, PANIC_MESSAGE.value,
dw.getUint32(PANIC_MESSAGE_LEN.value, true)));
console.error(message, e);
} else {
console.error(e);
}
return undefined;
}
}
/** @param {HTMLElement} target */
function wireUp(target) {
execApply(target);
cacheInputs(target);
bindTextareaAutoResize(target);
}
/** @type {{ [key: string]: (content: string) => Promise<string> | string }} */
const applyFns = {
timestamp: (content) => new Date(parseInt(content) * 1000).toLocaleString(),
fmt: (content) => {
let res = modifyCode(content, "fmt");
return res instanceof Promise ? res.then(c => c ?? content) : res ?? content;
},
};
/** @param {HTMLElement} target */
function execApply(target) {
for (const elem of target.querySelectorAll('[apply]')) {
if (!(elem instanceof HTMLElement)) continue;
const funcname = elem.getAttribute('apply') ?? never();
let res = applyFns[funcname](elem.textContent ?? "");
if (res instanceof Promise) res.then(c => elem.textContent = c);
else elem.textContent = res;
}
}
/** @param {HTMLElement} target */
function bindTextareaAutoResize(target) {
for (const textarea of target.querySelectorAll("textarea")) {
if (!(textarea instanceof HTMLTextAreaElement)) never();
textarea.style.height = textarea.scrollHeight + "px";
textarea.style.overflowY = "hidden";
textarea.addEventListener("input", function() {
textarea.style.height = "auto";
textarea.style.height = textarea.scrollHeight + "px";
});
textarea.onkeydown = (ev) => {
const selecting = textarea.selectionStart !== textarea.selectionEnd;
if (ev.key === "Tab") {
ev.preventDefault();
const prevPos = textarea.selectionStart;
textarea.value = textarea.value.slice(0, textarea.selectionStart) +
' ' + textarea.value.slice(textarea.selectionEnd);
textarea.selectionStart = textarea.selectionEnd = prevPos + 4;
}
if (ev.key === "Backspace" && textarea.selectionStart != 0 && !selecting) {
let i = textarea.selectionStart, looped = false;
while (textarea.value.charCodeAt(--i) === ' '.charCodeAt(0)) looped = true;
if (textarea.value.charCodeAt(i) === '\n'.charCodeAt(0) && looped) {
ev.preventDefault();
let toDelete = (textarea.selectionStart - (i + 1)) % 4;
if (toDelete === 0) toDelete = 4;
const prevPos = textarea.selectionStart;
textarea.value = textarea.value.slice(0, textarea.selectionStart - toDelete) +
textarea.value.slice(textarea.selectionEnd);
textarea.selectionStart = textarea.selectionEnd = prevPos - toDelete;
}
}
}
}
}
/** @param {HTMLElement} target */
function cacheInputs(target) {
/**@type {HTMLFormElement}*/ let form;
for (form of target.querySelectorAll('form')) {
const path = form.getAttribute('hx-post') || form.getAttribute('hx-delete');
if (!path) {
console.warn('form does not have a hx-post or hx-delete attribute', form);
continue;
}
for (const input of form.elements) {
if (input instanceof HTMLInputElement || input instanceof HTMLTextAreaElement) {
if ('password submit button'.includes(input.type)) continue;
const key = path + input.name;
input.value = localStorage.getItem(key) ?? '';
input.addEventListener("input", () => localStorage.setItem(key, input.value));
} else {
console.warn("unhandled form element: ", input);
}
}
}
}
if (window.location.hostname === 'localhost') {
let id; setInterval(async () => {
let new_id = await fetch('/hot-reload').then(reps => reps.text());
id ??= new_id;
if (id !== new_id) window.location.reload();
}, 300);
(async function testCodeChange() {
const code = "main:=fn():void{return}";
const fmtd = await modifyCode(code, "fmt") ?? never();
const prev = await modifyCode(fmtd, "minify") ?? never();
if (code != prev) console.error(code, prev);
})()
}
document.body.addEventListener('htmx:afterSwap', (ev) => {
if (!(ev.target instanceof HTMLElement)) never();
wireUp(ev.target);
});
wireUp(document.body);

File diff suppressed because it is too large Load diff

12
depell/src/post-page.html Normal file
View file

@ -0,0 +1,12 @@
<h3>About posting code</h3>
<p>
If you are unfammiliar with <a href="https://git.ablecorp.us/AbleOS/holey-bytes">hblang</a>, refer to the
<strong>hblang/README.md</strong> or
vizit <a href="/profile/mlokis">mlokis'es posts</a>. Preferably don't edit the code here.
</p>
<h3>Extra textarea features</h3>
<ul>
<li>proper tab behaviour</li>
<li>snap to previous tab boundary on "empty" lines</li>
</ul>

View file

51
depell/src/schema.sql Normal file
View file

@ -0,0 +1,51 @@
PRAGMA foreign_keys = ON;
CREATE TABLE IF NOT EXISTS user(
name TEXT NOT NULL,
password_hash TEXT NOT NULL,
PRIMARY KEY (name)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS session(
id BLOB NOT NULL,
username TEXT NOT NULL,
expiration INTEGER NOT NULL,
FOREIGN KEY (username) REFERENCES user (name)
PRIMARY KEY (username)
) WITHOUT ROWID;
CREATE UNIQUE INDEX IF NOT EXISTS
session_id ON session (id);
CREATE TABLE IF NOT EXISTS post(
name TEXT NOT NULL,
author TEXT,
timestamp INTEGER,
code TEXT NOT NULL,
FOREIGN KEY (author) REFERENCES user (name) ON DELETE SET NULL,
PRIMARY KEY (author, name)
);
CREATE TABLE IF NOT EXISTS import(
from_name TEXT NOT NULL,
from_author TEXT,
to_name TEXT NOT NULL,
to_author TEXT,
FOREIGN KEY (from_name, from_author) REFERENCES post (name, author),
FOREIGN KEY (to_name, to_author) REFERENCES post (name, author)
);
CREATE INDEX IF NOT EXISTS
dependencies ON import(from_name, from_author);
CREATE INDEX IF NOT EXISTS
dependants ON import(to_name, to_author);
CREATE TABLE IF NOT EXISTS run(
code_name TEXT NOT NULL,
code_author TEXT NOT NULL,
runner TEXT NOT NULL,
FOREIGN KEY (code_name, code_author) REFERENCES post (name, author),
FOREIGN KEY (runner) REFERENCES user(name),
PRIMARY KEY (code_name, code_author, runner)
);

View file

@ -0,0 +1,17 @@
<h1>Welcome to depell</h1>
<p>
Depell (dependency hell) is a simple "social" media site best compared to twitter, except that all you can post is
<a href="https://git.ablecorp.us/AbleOS/holey-bytes">hblang</a> code with no comments allowed. Instead of likes you
run the program, and instead of retweets you import the program as dependency. Run counts even when ran indirectly.
</p>
<p>
The backend only serves the code and frontend compiles and runs it locally. All posts are immutable.
</p>
<h2>Security?</h2>
<p>
All code runs in WASM (inside a holey-bytes VM until hblang compiles to wasm) and is controlled by JavaScript. WASM
cant do any form of IO without going trough JavaScript so as long as JS import does not allow wasm to execute
arbitrary JS code, WASM can act as a container inside the JS.
</p>

View file

@ -0,0 +1,11 @@
[package]
name = "wasm-hbfmt"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
hblang = { version = "0.1.0", path = "../../hblang", default-features = false }
log = { version = "0.4.22", features = ["max_level_off"] }

View file

@ -0,0 +1,133 @@
#![no_std]
#![feature(slice_take)]
#![feature(str_from_raw_parts)]
#![feature(alloc_error_handler)]
use {
core::{
alloc::{GlobalAlloc, Layout},
cell::UnsafeCell,
},
hblang::parser::ParserCtx,
};
const ARENA_SIZE: usize = 128 * 1024;
const MAX_OUTPUT_SIZE: usize = 1024 * 10;
const MAX_INPUT_SIZE: usize = 1024 * 4;
#[cfg(target_arch = "wasm32")]
#[panic_handler]
pub fn handle_panic(_info: &core::panic::PanicInfo) -> ! {
//unsafe {
// use core::fmt::Write;
// let mut f = Write(&mut PANIC_MESSAGE[..]);
// _ = writeln!(f, "{}", info);
// PANIC_MESSAGE_LEN = 1024 - f.0.len();
//}
core::arch::wasm32::unreachable();
}
#[global_allocator]
static ALLOCATOR: ArenaAllocator = ArenaAllocator::new();
#[cfg(target_arch = "wasm32")]
#[alloc_error_handler]
fn alloc_error(_: core::alloc::Layout) -> ! {
core::arch::wasm32::unreachable()
}
#[repr(C, align(32))]
struct ArenaAllocator {
arena: UnsafeCell<[u8; ARENA_SIZE]>,
head: UnsafeCell<*mut u8>,
}
impl ArenaAllocator {
const fn new() -> Self {
ArenaAllocator {
arena: UnsafeCell::new([0; ARENA_SIZE]),
head: UnsafeCell::new(core::ptr::null_mut()),
}
}
unsafe fn reset(&self) {
(*self.head.get()) = self.arena.get().cast::<u8>().add(ARENA_SIZE);
}
}
unsafe impl Sync for ArenaAllocator {}
unsafe impl GlobalAlloc for ArenaAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
let size = layout.size();
let align = layout.align();
let until = self.arena.get() as *mut u8;
let new_head = (*self.head.get()).sub(size);
let aligned_head = (new_head as usize & !(1 << (align - 1))) as *mut u8;
if until > aligned_head {
return core::ptr::null_mut();
}
*self.head.get() = aligned_head;
aligned_head
}
unsafe fn dealloc(&self, _ptr: *mut u8, _layout: Layout) {
/* lol */
}
}
struct Write<'a>(&'a mut [u8]);
impl core::fmt::Write for Write<'_> {
fn write_str(&mut self, s: &str) -> core::fmt::Result {
if let Some(m) = self.0.take_mut(..s.len()) {
m.copy_from_slice(s.as_bytes());
Ok(())
} else {
Err(core::fmt::Error)
}
}
}
//#[no_mangle]
//static mut PANIC_MESSAGE: [u8; 1024] = unsafe { core::mem::zeroed() };
//#[no_mangle]
//static mut PANIC_MESSAGE_LEN: usize = 0;
#[no_mangle]
static mut OUTPUT: [u8; MAX_OUTPUT_SIZE] = unsafe { core::mem::zeroed() };
#[no_mangle]
static mut OUTPUT_LEN: usize = 0;
#[no_mangle]
static MAX_INPUT: usize = MAX_INPUT_SIZE;
#[no_mangle]
static mut INPUT: [u8; MAX_INPUT_SIZE] = unsafe { core::mem::zeroed() };
#[no_mangle]
static mut INPUT_LEN: usize = 0;
#[no_mangle]
unsafe extern "C" fn fmt() {
ALLOCATOR.reset();
let code = core::str::from_raw_parts(core::ptr::addr_of!(INPUT).cast(), INPUT_LEN);
let arena = hblang::parser::Arena::default();
let mut ctx = ParserCtx::default();
let exprs = hblang::parser::Parser::parse(&mut ctx, code, "source.hb", &|_, _| Ok(0), &arena);
let mut f = Write(&mut OUTPUT[..]);
hblang::fmt::fmt_file(exprs, code, &mut f).unwrap();
OUTPUT_LEN = MAX_OUTPUT_SIZE - f.0.len();
}
#[no_mangle]
unsafe extern "C" fn minify() {
let code = core::str::from_raw_parts_mut(core::ptr::addr_of_mut!(OUTPUT).cast(), OUTPUT_LEN);
OUTPUT_LEN = hblang::fmt::minify(code);
}

View file

@ -12,8 +12,13 @@ hashbrown = { version = "0.15.0", default-features = false, features = ["raw-ent
hbbytecode = { version = "0.1.0", path = "../hbbytecode" } hbbytecode = { version = "0.1.0", path = "../hbbytecode" }
hbvm = { path = "../hbvm", features = ["nightly"] } hbvm = { path = "../hbvm", features = ["nightly"] }
log = { version = "0.4.22", features = ["release_max_level_error"] } log = { version = "0.4.22", features = ["release_max_level_error"] }
regalloc2 = { git = "https://github.com/jakubDoka/regalloc2", branch = "reuse-allocations", features = [] }
[dependencies.regalloc2]
git = "https://github.com/jakubDoka/regalloc2"
branch = "reuse-allocations"
optional = true
[features] [features]
default = ["std"] default = ["std", "opts"]
std = [] std = []
opts = ["regalloc2"]

View file

@ -839,8 +839,8 @@ impl Codegen {
let index_val = self.expr(index)?; let index_val = self.expr(index)?;
_ = self.assert_ty(index.pos(), index_val.ty, ty::Id::INT, "subsctipt"); _ = self.assert_ty(index.pos(), index_val.ty, ty::Id::INT, "subsctipt");
if let ty::Kind::Ptr(ty) = base_val.ty.expand() { if let Some(ty) = self.tys.base_of(base_val.ty) {
base_val.ty = self.tys.ins.ptrs[ty as usize].base; base_val.ty = ty;
base_val.loc = base_val.loc.into_derefed(); base_val.loc = base_val.loc.into_derefed();
} }
@ -1070,7 +1070,7 @@ impl Codegen {
Some(Value { ty, loc }) Some(Value { ty, loc })
} }
E::String { pos, mut literal } => { E::String { pos, mut literal } => {
literal = literal.trim_matches('"'); literal = &literal[1..literal.len() - 1];
if !literal.ends_with("\\0") { if !literal.ends_with("\\0") {
self.report(pos, "string literal must end with null byte (for now)"); self.report(pos, "string literal must end with null byte (for now)");
@ -1206,8 +1206,8 @@ impl Codegen {
let checkpoint = self.ci.snap(); let checkpoint = self.ci.snap();
let mut tal = self.expr(target)?; let mut tal = self.expr(target)?;
if let ty::Kind::Ptr(ty) = tal.ty.expand() { if let Some(ty) = self.tys.base_of(tal.ty) {
tal.ty = self.tys.ins.ptrs[ty as usize].base; tal.ty = ty;
tal.loc = tal.loc.into_derefed(); tal.loc = tal.loc.into_derefed();
} }
@ -1306,9 +1306,9 @@ impl Codegen {
} }
E::UnOp { op: T::Mul, val, pos } => { E::UnOp { op: T::Mul, val, pos } => {
let val = self.expr(val)?; let val = self.expr(val)?;
match val.ty.expand() { match self.tys.base_of(val.ty) {
ty::Kind::Ptr(ty) => Some(Value { Some(ty) => Some(Value {
ty: self.tys.ins.ptrs[ty as usize].base, ty,
loc: Loc::reg(self.loc_to_reg(val.loc, self.tys.size_of(val.ty))) loc: Loc::reg(self.loc_to_reg(val.loc, self.tys.size_of(val.ty)))
.into_derefed(), .into_derefed(),
}), }),
@ -1640,10 +1640,9 @@ impl Codegen {
imm = u64::from_ne_bytes(dst); imm = u64::from_ne_bytes(dst);
} }
if matches!(op, T::Add | T::Sub) if matches!(op, T::Add | T::Sub)
&& let ty::Kind::Ptr(ty) = ty::Kind::from_ty(ty) && let Some(ty) = self.tys.base_of(ty)
{ {
let size = self.tys.size_of(self.tys.ins.ptrs[ty as usize].base); imm *= self.tys.size_of(ty) as u64;
imm *= size as u64;
} }
self.ci.emit(oper(dst.get(), lhs.get(), imm)); self.ci.emit(oper(dst.get(), lhs.get(), imm));
@ -1676,9 +1675,8 @@ impl Codegen {
(lhs.get(), right.ty) (lhs.get(), right.ty)
}; };
let ty::Kind::Ptr(ty) = ty.expand() else { unreachable!() }; let ty = self.tys.base_of(ty).unwrap();
let size = self.tys.size_of(ty);
let size = self.tys.size_of(self.tys.ins.ptrs[ty as usize].base);
self.ci.emit(muli64(offset, offset, size as _)); self.ci.emit(muli64(offset, offset, size as _));
} }
} }

View file

@ -3,11 +3,10 @@ use {
lexer::{self, TokenKind}, lexer::{self, TokenKind},
parser::{self, CommentOr, CtorField, Expr, Poser, Radix, StructField}, parser::{self, CommentOr, CtorField, Expr, Poser, Radix, StructField},
}, },
alloc::string::String,
core::fmt, core::fmt,
}; };
pub fn minify(source: &mut str) -> Option<&str> { pub fn minify(source: &mut str) -> usize {
fn needs_space(c: u8) -> bool { fn needs_space(c: u8) -> bool {
matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | 127..) matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | 127..)
} }
@ -15,6 +14,7 @@ pub fn minify(source: &mut str) -> Option<&str> {
let mut writer = source.as_mut_ptr(); let mut writer = source.as_mut_ptr();
let mut reader = &source[..]; let mut reader = &source[..];
let mut prev_needs_whitecpace = false; let mut prev_needs_whitecpace = false;
let mut prev_needs_newline = false;
loop { loop {
let mut token = lexer::Lexer::new(reader).next(); let mut token = lexer::Lexer::new(reader).next();
match token.kind { match token.kind {
@ -23,46 +23,59 @@ pub fn minify(source: &mut str) -> Option<&str> {
_ => {} _ => {}
} }
let mut suffix = 0; let cpy_len = token.range().len();
if token.kind == TokenKind::Comment && reader.as_bytes()[token.end as usize - 1] != b'/' {
token.end = token.start + reader[token.range()].trim_end().len() as u32;
suffix = b'\n';
}
let mut prefix = 0; let mut prefix = 0;
if prev_needs_whitecpace && needs_space(reader.as_bytes()[token.start as usize]) { if prev_needs_whitecpace && needs_space(reader.as_bytes()[token.start as usize]) {
prefix = b' '; prefix = b' ';
debug_assert!(token.start != 0, "{reader}");
}
prev_needs_whitecpace = needs_space(reader.as_bytes()[token.end as usize - 1]);
let inbetween_new_lines =
reader[..token.start as usize].bytes().filter(|&b| b == b'\n').count()
+ token.kind.precedence().is_some() as usize;
let extra_prefix_new_lines = if inbetween_new_lines > 1 {
1 + token.kind.precedence().is_none() as usize
} else {
prev_needs_newline as usize
};
if token.kind == TokenKind::Comment && reader.as_bytes()[token.end as usize - 1] != b'/' {
prev_needs_newline = true;
prev_needs_whitecpace = false;
} else {
prev_needs_newline = false;
} }
prev_needs_whitecpace = needs_space(reader.as_bytes()[token.end as usize - 1]);
let sstr = reader[token.start as usize..].as_ptr(); let sstr = reader[token.start as usize..].as_ptr();
reader = &reader[token.end as usize..]; reader = &reader[token.end as usize..];
unsafe { unsafe {
if prefix != 0 { if extra_prefix_new_lines != 0 {
for _ in 0..extra_prefix_new_lines {
writer.write(b'\n');
writer = writer.add(1);
}
} else if prefix != 0 {
writer.write(prefix); writer.write(prefix);
writer = writer.add(1); writer = writer.add(1);
} }
writer.copy_from(sstr, token.range().len()); writer.copy_from(sstr, cpy_len);
writer = writer.add(token.range().len()); writer = writer.add(cpy_len);
if suffix != 0 {
writer.write(suffix);
writer = writer.add(1);
}
} }
} }
None unsafe { writer.sub_ptr(source.as_mut_ptr()) }
} }
pub struct Formatter<'a> { pub struct Formatter<'a> {
source: &'a str, source: &'a str,
depth: usize, depth: usize,
disp_buff: String,
} }
impl<'a> Formatter<'a> { impl<'a> Formatter<'a> {
pub fn new(source: &'a str) -> Self { pub fn new(source: &'a str) -> Self {
Self { source, depth: 0, disp_buff: Default::default() } Self { source, depth: 0 }
} }
fn fmt_list<T: Poser, F: core::fmt::Write>( fn fmt_list<T: Poser, F: core::fmt::Write>(
@ -172,7 +185,7 @@ impl<'a> Formatter<'a> {
self.fmt(value, f) self.fmt(value, f)
} }
Expr::String { literal, .. } => write!(f, "{literal}"), Expr::String { literal, .. } => write!(f, "{literal}"),
Expr::Comment { literal, .. } => write!(f, "{}", literal.trim_end()), Expr::Comment { literal, .. } => write!(f, "{literal}"),
Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"), Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"),
Expr::Field { target, name: field, .. } => { Expr::Field { target, name: field, .. } => {
self.fmt_paren(target, f, postfix)?; self.fmt_paren(target, f, postfix)?;
@ -194,7 +207,7 @@ impl<'a> Formatter<'a> {
write!(f, "{name}: ")?; write!(f, "{name}: ")?;
s.fmt(ty, f)? s.fmt(ty, f)?
} }
CommentOr::Comment { literal, .. } => write!(f, "{literal}")?, CommentOr::Comment { literal, .. } => writeln!(f, "{literal}")?,
} }
Ok(field.or().is_some()) Ok(field.or().is_some())
}) })
@ -294,30 +307,42 @@ impl<'a> Formatter<'a> {
write!(f, "{{")?; write!(f, "{{")?;
self.fmt_list(f, true, "}", "", stmts, Self::fmt) self.fmt_list(f, true, "}", "", stmts, Self::fmt)
} }
Expr::Number { value, radix, .. } => match radix { Expr::Number { value, radix, .. } => {
fn display_radix(radix: Radix, mut value: u64, buf: &mut [u8; 64]) -> &str {
fn conv_radix(d: u8) -> u8 {
match d {
0..=9 => d + b'0',
_ => d - 10 + b'A',
}
}
for (i, b) in buf.iter_mut().enumerate().rev() {
let d = (value % radix as u64) as u8;
value /= radix as u64;
*b = conv_radix(d);
if value == 0 {
return unsafe { core::str::from_utf8_unchecked(&buf[i..]) };
}
}
unreachable!()
}
let mut buf = [0u8; 64];
let value = display_radix(radix, value as u64, &mut buf);
match radix {
Radix::Decimal => write!(f, "{value}"), Radix::Decimal => write!(f, "{value}"),
Radix::Hex => write!(f, "{value:#X}"), Radix::Hex => write!(f, "0x{value}"),
Radix::Octal => write!(f, "{value:#o}"), Radix::Octal => write!(f, "0o{value}"),
Radix::Binary => write!(f, "{value:#b}"), Radix::Binary => write!(f, "0b{value}"),
}, }
}
Expr::Bool { value, .. } => write!(f, "{value}"), Expr::Bool { value, .. } => write!(f, "{value}"),
Expr::Idk { .. } => write!(f, "idk"), Expr::Idk { .. } => write!(f, "idk"),
Expr::BinOp { Expr::BinOp {
left, left,
op: TokenKind::Assign, op: TokenKind::Assign,
right: Expr::BinOp { left: lleft, op, right }, right: &Expr::BinOp { left: lleft, op, right },
} if { } if left.pos() == lleft.pos() => {
let mut b = core::mem::take(&mut self.disp_buff);
self.fmt(lleft, &mut b)?;
let len = b.len();
self.fmt(left, &mut b)?;
let (lleft, left) = b.split_at(len);
let res = lleft == left;
b.clear();
self.disp_buff = b;
res
} =>
{
self.fmt(left, f)?; self.fmt(left, f)?;
write!(f, " {op}= ")?; write!(f, " {op}= ")?;
self.fmt(right, f) self.fmt(right, f)
@ -355,7 +380,7 @@ impl<'a> Formatter<'a> {
} }
pub fn preserve_newlines(source: &str) -> usize { pub fn preserve_newlines(source: &str) -> usize {
source[source.trim_end().len()..].chars().filter(|&c| c == '\n').count() source[source.trim_end().len()..].bytes().filter(|&c| c == b'\n').count()
} }
pub fn insert_needed_semicolon(source: &str) -> bool { pub fn insert_needed_semicolon(source: &str) -> bool {
@ -365,39 +390,46 @@ pub fn insert_needed_semicolon(source: &str) -> bool {
impl core::fmt::Display for parser::Ast { impl core::fmt::Display for parser::Ast {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, expr) in self.exprs().iter().enumerate() { fmt_file(self.exprs(), &self.file, f)
Formatter::new(&self.file).fmt(expr, f)?; }
if let Some(expr) = self.exprs().get(i + 1) }
&& let Some(rest) = self.file.get(expr.pos() as usize..)
pub fn fmt_file(exprs: &[Expr], file: &str, f: &mut impl fmt::Write) -> fmt::Result {
for (i, expr) in exprs.iter().enumerate() {
Formatter::new(file).fmt(expr, f)?;
if let Some(expr) = exprs.get(i + 1)
&& let Some(rest) = file.get(expr.pos() as usize..)
{ {
if insert_needed_semicolon(rest) { if insert_needed_semicolon(rest) {
write!(f, ";")?; write!(f, ";")?;
} }
if preserve_newlines(&self.file[..expr.pos() as usize]) > 1 { if preserve_newlines(&file[..expr.pos() as usize]) > 1 {
writeln!(f)?; writeln!(f)?;
} }
} }
if i + 1 != self.exprs().len() { if i + 1 != exprs.len() {
writeln!(f)?; writeln!(f)?;
} }
} }
Ok(()) Ok(())
} }
}
#[cfg(test)] #[cfg(test)]
pub mod test { pub mod test {
use { use {
crate::parser::{self, StackAlloc}, crate::parser::{self, ParserCtx},
alloc::borrow::ToOwned, alloc::borrow::ToOwned,
std::{fmt::Write, string::String}, std::{fmt::Write, string::String},
}; };
pub fn format(ident: &str, input: &str) { pub fn format(ident: &str, input: &str) {
let ast = let mut minned = input.to_owned();
parser::Ast::new(ident, input.to_owned(), &mut StackAlloc::default(), &|_, _| Ok(0)); let len = crate::fmt::minify(&mut minned);
minned.truncate(len);
let ast = parser::Ast::new(ident, minned, &mut ParserCtx::default(), &|_, _| Ok(0));
let mut output = String::new(); let mut output = String::new();
write!(output, "{ast}").unwrap(); write!(output, "{ast}").unwrap();

View file

@ -1,7 +1,7 @@
use { use {
crate::{ crate::{
codegen, codegen,
parser::{self, Ast, StackAlloc}, parser::{self, Ast, ParserCtx},
}, },
alloc::{string::String, vec::Vec}, alloc::{string::String, vec::Vec},
core::{fmt::Write, num::NonZeroUsize}, core::{fmt::Write, num::NonZeroUsize},
@ -263,22 +263,22 @@ pub fn parse_from_fs(extra_threads: usize, root: &str) -> io::Result<Vec<Ast>> {
Ok(id) Ok(id)
}; };
let execute_task = |stack: &mut _, (_, path): Task| { let execute_task = |ctx: &mut _, (_, path): Task| {
let path = path.to_str().ok_or_else(|| { let path = path.to_str().ok_or_else(|| {
io::Error::new( io::Error::new(
io::ErrorKind::InvalidData, io::ErrorKind::InvalidData,
format!("path contains invalid characters: {}", display_rel_path(&path)), format!("path contains invalid characters: {}", display_rel_path(&path)),
) )
})?; })?;
Ok(Ast::new(path, std::fs::read_to_string(path)?, stack, &|path, from| { Ok(Ast::new(path, std::fs::read_to_string(path)?, ctx, &|path, from| {
loader(path, from).map_err(|e| e.to_string()) loader(path, from).map_err(|e| e.to_string())
})) }))
}; };
let thread = || { let thread = || {
let mut stack = StackAlloc::default(); let mut ctx = ParserCtx::default();
while let Some(task @ (indx, ..)) = tasks.pop() { while let Some(task @ (indx, ..)) = tasks.pop() {
let res = execute_task(&mut stack, task); let res = execute_task(&mut ctx, task);
let mut ast = ast.lock().unwrap(); let mut ast = ast.lock().unwrap();
let len = ast.len().max(indx as usize + 1); let len = ast.len().max(indx as usize + 1);
ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into())); ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into()));

View file

@ -1,4 +1,4 @@
use crate::{instrs, EncodedInstr}; use crate::EncodedInstr;
const fn ascii_mask(chars: &[u8]) -> u128 { const fn ascii_mask(chars: &[u8]) -> u128 {
let mut eq = 0; let mut eq = 0;
@ -83,7 +83,7 @@ macro_rules! gen_token_kind {
}; };
} }
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)] #[derive(PartialEq, Eq, Clone, Copy, Hash, PartialOrd, Ord)]
#[repr(u8)] #[repr(u8)]
pub enum TokenKind { pub enum TokenKind {
Not = b'!', Not = b'!',
@ -170,9 +170,16 @@ pub enum TokenKind {
ShlAss = b'<' - 5 + 128, ShlAss = b'<' - 5 + 128,
} }
impl core::fmt::Debug for TokenKind {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
core::fmt::Display::fmt(self, f)
}
}
impl TokenKind { impl TokenKind {
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn cond_op(self, signed: bool) -> Option<(fn(u8, u8, i16) -> EncodedInstr, bool)> { pub fn cond_op(self, signed: bool) -> Option<(fn(u8, u8, i16) -> EncodedInstr, bool)> {
use crate::instrs;
Some(( Some((
match self { match self {
Self::Le if signed => instrs::jgts, Self::Le if signed => instrs::jgts,
@ -192,7 +199,7 @@ impl TokenKind {
} }
pub fn binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u8) -> EncodedInstr> { pub fn binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u8) -> EncodedInstr> {
use instrs::*; use crate::instrs::*;
macro_rules! div { ($($op:ident),*) => {[$(|a, b, c| $op(a, 0, b, c)),*]}; } macro_rules! div { ($($op:ident),*) => {[$(|a, b, c| $op(a, 0, b, c)),*]}; }
macro_rules! rem { ($($op:ident),*) => {[$(|a, b, c| $op(0, a, b, c)),*]}; } macro_rules! rem { ($($op:ident),*) => {[$(|a, b, c| $op(0, a, b, c)),*]}; }
@ -219,7 +226,7 @@ impl TokenKind {
#[allow(clippy::type_complexity)] #[allow(clippy::type_complexity)]
pub fn imm_binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u64) -> EncodedInstr> { pub fn imm_binop(self, signed: bool, size: u32) -> Option<fn(u8, u8, u64) -> EncodedInstr> {
use instrs::*; use crate::instrs::*;
macro_rules! def_op { macro_rules! def_op {
($name:ident |$a:ident, $b:ident, $c:ident| $($tt:tt)*) => { ($name:ident |$a:ident, $b:ident, $c:ident| $($tt:tt)*) => {
macro_rules! $name { macro_rules! $name {
@ -286,7 +293,7 @@ impl TokenKind {
pub fn unop(&self) -> Option<fn(u8, u8) -> EncodedInstr> { pub fn unop(&self) -> Option<fn(u8, u8) -> EncodedInstr> {
Some(match self { Some(match self {
Self::Sub => instrs::neg, Self::Sub => crate::instrs::neg,
_ => return None, _ => return None,
}) })
} }
@ -362,7 +369,7 @@ gen_token_kind! {
pub struct Lexer<'a> { pub struct Lexer<'a> {
pos: u32, pos: u32,
bytes: &'a [u8], source: &'a [u8],
} }
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
@ -371,22 +378,22 @@ impl<'a> Lexer<'a> {
} }
pub fn restore(input: &'a str, pos: u32) -> Self { pub fn restore(input: &'a str, pos: u32) -> Self {
Self { pos, bytes: input.as_bytes() } Self { pos, source: input.as_bytes() }
} }
pub fn source(&self) -> &'a str { pub fn source(&self) -> &'a str {
unsafe { core::str::from_utf8_unchecked(self.bytes) } unsafe { core::str::from_utf8_unchecked(self.source) }
} }
pub fn slice(&self, tok: core::ops::Range<usize>) -> &'a str { pub fn slice(&self, tok: core::ops::Range<usize>) -> &'a str {
unsafe { core::str::from_utf8_unchecked(&self.bytes[tok]) } unsafe { core::str::from_utf8_unchecked(&self.source[tok]) }
} }
fn peek(&self) -> Option<u8> { fn peek(&self) -> Option<u8> {
if core::intrinsics::unlikely(self.pos >= self.bytes.len() as u32) { if core::intrinsics::unlikely(self.pos >= self.source.len() as u32) {
None None
} else { } else {
Some(unsafe { *self.bytes.get_unchecked(self.pos as usize) }) Some(unsafe { *self.source.get_unchecked(self.pos as usize) })
} }
} }
@ -453,7 +460,7 @@ impl<'a> Lexer<'a> {
} }
b'a'..=b'z' | b'A'..=b'Z' | b'_' | 127.. => { b'a'..=b'z' | b'A'..=b'Z' | b'_' | 127.. => {
advance_ident(self); advance_ident(self);
let ident = &self.bytes[start as usize..self.pos as usize]; let ident = &self.source[start as usize..self.pos as usize];
T::from_ident(ident) T::from_ident(ident)
} }
b'"' | b'\'' => loop { b'"' | b'\'' => loop {
@ -465,10 +472,18 @@ impl<'a> Lexer<'a> {
} }
}, },
b'/' if self.advance_if(b'/') => { b'/' if self.advance_if(b'/') => {
while let Some(l) = self.advance() while let Some(l) = self.peek()
&& l != b'\n' && l != b'\n'
{} {
T::Comment self.pos += 1;
}
let end = self.source[..self.pos as usize]
.iter()
.rposition(|&b| !b.is_ascii_whitespace())
.map_or(self.pos, |i| i as u32 + 1);
return Token { kind: T::Comment, start, end };
} }
b'/' if self.advance_if(b'*') => { b'/' if self.advance_if(b'*') => {
let mut depth = 1; let mut depth = 1;

View file

@ -8,7 +8,6 @@
never_type, never_type,
unwrap_infallible, unwrap_infallible,
slice_partition_dedup, slice_partition_dedup,
hash_raw_entry,
portable_simd, portable_simd,
iter_collect_into, iter_collect_into,
new_uninit, new_uninit,
@ -19,6 +18,8 @@
extract_if, extract_if,
ptr_internals, ptr_internals,
iter_intersperse, iter_intersperse,
str_from_raw_parts,
ptr_sub_ptr,
slice_from_ptr_range slice_from_ptr_range
)] )]
#![warn(clippy::dbg_macro)] #![warn(clippy::dbg_macro)]
@ -32,7 +33,6 @@ use {
ident::Ident, ident::Ident,
lexer::TokenKind, lexer::TokenKind,
parser::{CommentOr, Expr, ExprRef, FileId, Pos}, parser::{CommentOr, Expr, ExprRef, FileId, Pos},
son::reg,
ty::ArrayLen, ty::ArrayLen,
}, },
alloc::{collections::BTreeMap, string::String, vec::Vec}, alloc::{collections::BTreeMap, string::String, vec::Vec},
@ -65,11 +65,22 @@ pub mod fmt;
#[cfg(any(feature = "std", test))] #[cfg(any(feature = "std", test))]
pub mod fs; pub mod fs;
pub mod parser; pub mod parser;
#[cfg(feature = "opts")]
pub mod son; pub mod son;
mod lexer; mod lexer;
#[cfg(feature = "opts")]
mod vc; mod vc;
pub mod reg {
pub const STACK_PTR: Reg = 254;
pub const ZERO: Reg = 0;
pub const RET: Reg = 1;
pub const RET_ADDR: Reg = 31;
pub type Reg = u8;
}
mod ctx_map { mod ctx_map {
use core::hash::BuildHasher; use core::hash::BuildHasher;
@ -139,10 +150,12 @@ mod ctx_map {
.map(|(k, _)| &k.value) .map(|(k, _)| &k.value)
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn clear(&mut self) { pub fn clear(&mut self) {
self.inner.clear(); self.inner.clear();
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option<T> { pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option<T> {
let (entry, _) = self.entry(value.key(ctx), ctx); let (entry, _) = self.entry(value.key(ctx), ctx);
match entry { match entry {
@ -193,6 +206,7 @@ mod task {
unpack(offset).is_ok() unpack(offset).is_ok()
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn id(index: usize) -> Offset { pub fn id(index: usize) -> Offset {
1 << 31 | index as u32 1 << 31 | index as u32
} }
@ -397,8 +411,14 @@ mod ty {
mod __lc_names { mod __lc_names {
use super::*; use super::*;
$(pub const $name: &[u8] = &array_to_lower_case(unsafe { $(pub const $name: &str = unsafe {
*(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) });)* const LCL: &[u8] = unsafe {
&array_to_lower_case(
*(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()])
)
};
core::str::from_utf8_unchecked(LCL)
};)*
} }
#[allow(dead_code)] #[allow(dead_code)]
@ -407,7 +427,7 @@ mod ty {
} }
pub fn from_str(name: &str) -> Option<Builtin> { pub fn from_str(name: &str) -> Option<Builtin> {
match name.as_bytes() { match name {
$(__lc_names::$name => Some($name),)* $(__lc_names::$name => Some($name),)*
_ => None, _ => None,
} }
@ -415,7 +435,7 @@ mod ty {
pub fn to_str(ty: Builtin) -> &'static str { pub fn to_str(ty: Builtin) -> &'static str {
match ty { match ty {
$($name => unsafe { core::str::from_utf8_unchecked(__lc_names::$name) },)* $($name => __lc_names::$name,)*
v => unreachable!("invalid type: {}", v), v => unreachable!("invalid type: {}", v),
} }
} }
@ -551,6 +571,7 @@ mod ty {
} }
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
pub fn bin_ret(ty: Id, op: TokenKind) -> Id { pub fn bin_ret(ty: Id, op: TokenKind) -> Id {
use TokenKind as T; use TokenKind as T;
match op { match op {
@ -1141,6 +1162,7 @@ impl Types {
} }
} }
#[cfg_attr(not(feature = "opts"), expect(dead_code))]
fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option<usize> { fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option<usize> {
let name = self.names.project(name)?; let name = self.names.project(name)?;
self.struct_fields(s).iter().position(|f| f.name == name) self.struct_fields(s).iter().position(|f| f.name == name)
@ -1188,8 +1210,8 @@ impl OffsetIter {
} }
} }
#[cfg(any(feature = "opts", feature = "std"))]
type HashMap<K, V> = hashbrown::HashMap<K, V, FnvBuildHasher>; type HashMap<K, V> = hashbrown::HashMap<K, V, FnvBuildHasher>;
type _HashSet<K> = hashbrown::HashSet<K, FnvBuildHasher>;
type FnvBuildHasher = core::hash::BuildHasherDefault<FnvHasher>; type FnvBuildHasher = core::hash::BuildHasherDefault<FnvHasher>;
struct FnvHasher(u64); struct FnvHasher(u64);
@ -1334,10 +1356,10 @@ fn test_parse_files(ident: &'static str, input: &'static str) -> Vec<parser::Ast
.ok_or("Not Found".to_string()) .ok_or("Not Found".to_string())
}; };
let mut stack = parser::StackAlloc::default(); let mut ctx = parser::ParserCtx::default();
module_map module_map
.iter() .iter()
.map(|&(path, content)| parser::Ast::new(path, content.to_owned(), &mut stack, &loader)) .map(|&(path, content)| parser::Ast::new(path, content.to_owned(), &mut ctx, &loader))
.collect() .collect()
} }

View file

@ -63,63 +63,60 @@ pub struct Parser<'a, 'b> {
path: &'b str, path: &'b str,
loader: Loader<'b>, loader: Loader<'b>,
lexer: Lexer<'a>, lexer: Lexer<'a>,
arena: &'b Arena<'a>, arena: &'a Arena,
ctx: &'b mut ParserCtx,
token: Token, token: Token,
symbols: &'b mut Symbols,
stack: &'b mut StackAlloc,
ns_bound: usize, ns_bound: usize,
trailing_sep: bool, trailing_sep: bool,
packed: bool, packed: bool,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
} }
impl<'a, 'b> Parser<'a, 'b> { impl<'a, 'b> Parser<'a, 'b> {
pub fn new( pub fn parse(
arena: &'b Arena<'a>, ctx: &'b mut ParserCtx,
symbols: &'b mut Symbols, input: &'a str,
stack: &'b mut StackAlloc, path: &'b str,
loader: Loader<'b>, loader: Loader<'b>,
) -> Self { arena: &'a Arena,
let mut lexer = Lexer::new(""); ) -> &'a [Expr<'a>] {
let mut lexer = Lexer::new(input);
Self { Self {
loader, loader,
token: lexer.next(), token: lexer.next(),
lexer, lexer,
path: "", path,
ctx,
arena, arena,
symbols,
stack,
ns_bound: 0, ns_bound: 0,
trailing_sep: false, trailing_sep: false,
packed: false, packed: false,
idents: Vec::new(),
captured: Vec::new(),
} }
.file()
} }
pub fn file(&mut self, input: &'a str, path: &'b str) -> &'a [Expr<'a>] { fn file(&mut self) -> &'a [Expr<'a>] {
self.path = path;
self.lexer = Lexer::new(input);
self.token = self.lexer.next();
let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true)); let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, |s| s.expr_low(true));
self.pop_scope(0); self.pop_scope(0);
if !self.ctx.idents.is_empty() {
// TODO: we need error recovery
log::error!("{}", {
let mut errors = String::new(); let mut errors = String::new();
for id in self.idents.drain(..) { for id in self.ctx.idents.drain(..) {
report_to( report_to(
self.lexer.source(), self.lexer.source(),
self.path, self.path,
ident::pos(id.ident), ident::pos(id.ident),
format_args!("undeclared identifier: {}", self.lexer.slice(ident::range(id.ident))), format_args!(
"undeclared identifier: {}",
self.lexer.slice(ident::range(id.ident))
),
&mut errors, &mut errors,
); );
} }
errors
if !errors.is_empty() { });
// TODO: we need error recovery
log::error!("{errors}");
unreachable!(); unreachable!();
} }
@ -153,36 +150,20 @@ impl<'a, 'b> Parser<'a, 'b> {
break; break;
} }
let checkpoint = self.token.start;
let op = self.next().kind; let op = self.next().kind;
if op == TokenKind::Decl { if op == TokenKind::Decl {
self.declare_rec(&fold, top_level); self.declare_rec(&fold, top_level);
} }
let op_ass = op.ass_op().map(|op| {
// this abomination reparses the left side, so that the desubaring adheres to the
// parser invariants.
let source = self.lexer.slice(0..checkpoint as usize);
let prev_lexer =
core::mem::replace(&mut self.lexer, Lexer::restore(source, fold.pos()));
let prev_token = core::mem::replace(&mut self.token, self.lexer.next());
let clone = self.expr();
self.lexer = prev_lexer;
self.token = prev_token;
(op, clone)
});
let right = self.unit_expr(); let right = self.unit_expr();
let right = self.bin_expr(right, prec, false); let right = self.bin_expr(right, prec, false);
let right = self.arena.alloc(right); let right = self.arena.alloc(right);
let left = self.arena.alloc(fold); let left = self.arena.alloc(fold);
if let Some((op, clone)) = op_ass { if let Some(op) = op.ass_op() {
self.flag_idents(*left, idfl::MUTABLE); self.flag_idents(*left, idfl::MUTABLE);
let right = Expr::BinOp { left: self.arena.alloc(fold), op, right };
let right = Expr::BinOp { left: self.arena.alloc(clone), op, right };
fold = Expr::BinOp { left, op: TokenKind::Assign, right: self.arena.alloc(right) }; fold = Expr::BinOp { left, op: TokenKind::Assign, right: self.arena.alloc(right) };
} else { } else {
fold = Expr::BinOp { left, right, op }; fold = Expr::BinOp { left, right, op };
@ -220,15 +201,15 @@ impl<'a, 'b> Parser<'a, 'b> {
); );
} }
let index = self.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up"); let index = self.ctx.idents.binary_search_by_key(&id, |s| s.ident).expect("fck up");
if core::mem::replace(&mut self.idents[index].declared, true) { if core::mem::replace(&mut self.ctx.idents[index].declared, true) {
self.report( self.report(
pos, pos,
format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))), format_args!("redeclaration of identifier: {}", self.lexer.slice(ident::range(id))),
) )
} }
self.idents[index].ordered = ordered; self.ctx.idents[index].ordered = ordered;
} }
fn resolve_ident(&mut self, token: Token) -> (Ident, bool) { fn resolve_ident(&mut self, token: Token) -> (Ident, bool) {
@ -240,6 +221,7 @@ impl<'a, 'b> Parser<'a, 'b> {
} }
let (i, id, bl) = match self let (i, id, bl) = match self
.ctx
.idents .idents
.iter_mut() .iter_mut()
.enumerate() .enumerate()
@ -248,20 +230,20 @@ impl<'a, 'b> Parser<'a, 'b> {
Some((i, elem)) => (i, elem, false), Some((i, elem)) => (i, elem, false),
None => { None => {
let id = ident::new(token.start, name.len() as _); let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent { self.ctx.idents.push(ScopeIdent {
ident: id, ident: id,
declared: false, declared: false,
ordered: false, ordered: false,
flags: 0, flags: 0,
}); });
(self.idents.len() - 1, self.idents.last_mut().unwrap(), true) (self.ctx.idents.len() - 1, self.ctx.idents.last_mut().unwrap(), true)
} }
}; };
id.flags |= idfl::COMPTIME * is_ct as u32; id.flags |= idfl::COMPTIME * is_ct as u32;
if id.declared && id.ordered && self.ns_bound > i { if id.declared && id.ordered && self.ns_bound > i {
id.flags |= idfl::COMPTIME; id.flags |= idfl::COMPTIME;
self.captured.push(id.ident); self.ctx.captured.push(id.ident);
} }
(id.ident, bl) (id.ident, bl)
@ -273,21 +255,22 @@ impl<'a, 'b> Parser<'a, 'b> {
fn unit_expr(&mut self) -> Expr<'a> { fn unit_expr(&mut self) -> Expr<'a> {
use {Expr as E, TokenKind as T}; use {Expr as E, TokenKind as T};
let frame = self.idents.len(); let frame = self.ctx.idents.len();
let token @ Token { start: pos, .. } = self.next(); let token @ Token { start: pos, .. } = self.next();
let prev_boundary = self.ns_bound; let prev_boundary = self.ns_bound;
let prev_captured = self.captured.len(); let prev_captured = self.ctx.captured.len();
let mut expr = match token.kind { let mut expr = match token.kind {
T::Ct => E::Ct { pos, value: self.ptr_expr() }, T::Ct => E::Ct { pos, value: self.ptr_expr() },
T::Directive if self.lexer.slice(token.range()) == "use" => { T::Directive if self.lexer.slice(token.range()) == "use" => {
self.expect_advance(TokenKind::LParen); self.expect_advance(TokenKind::LParen);
let str = self.expect_advance(TokenKind::DQuote); let str = self.expect_advance(TokenKind::DQuote);
self.expect_advance(TokenKind::RParen); self.expect_advance(TokenKind::RParen);
let path = self.lexer.slice(str.range()).trim_matches('"'); let path = self.lexer.slice(str.range());
let path = &path[1..path.len() - 1];
E::Mod { E::Mod {
pos, pos,
path: self.arena.alloc_str(path), path,
id: match (self.loader)(path, self.path) { id: match (self.loader)(path, self.path) {
Ok(id) => id, Ok(id) => id,
Err(e) => { Err(e) => {
@ -323,7 +306,7 @@ impl<'a, 'b> Parser<'a, 'b> {
T::Struct => E::Struct { T::Struct => E::Struct {
packed: core::mem::take(&mut self.packed), packed: core::mem::take(&mut self.packed),
fields: { fields: {
self.ns_bound = self.idents.len(); self.ns_bound = self.ctx.idents.len();
self.expect_advance(T::LBrace); self.expect_advance(T::LBrace);
self.collect_list(T::Comma, T::RBrace, |s| { self.collect_list(T::Comma, T::RBrace, |s| {
let tok = s.token; let tok = s.token;
@ -342,15 +325,23 @@ impl<'a, 'b> Parser<'a, 'b> {
}, },
captured: { captured: {
self.ns_bound = prev_boundary; self.ns_bound = prev_boundary;
self.captured[prev_captured..].sort_unstable(); let mut captured = &mut self.ctx.captured[prev_captured..];
let preserved = self.captured[prev_captured..].partition_dedup().0.len(); while let Some(it) = captured.take_first_mut() {
self.captured.truncate(prev_captured + preserved); for ot in &mut *captured {
self.arena.alloc_slice(&self.captured[prev_captured..]) if it > ot {
core::mem::swap(it, ot);
}
}
}
debug_assert!(captured.is_sorted());
let preserved = self.ctx.captured[prev_captured..].partition_dedup().0.len();
self.ctx.captured.truncate(prev_captured + preserved);
self.arena.alloc_slice(&self.ctx.captured[prev_captured..])
}, },
pos: { pos: {
if self.ns_bound == 0 { if self.ns_bound == 0 {
// we might save some memory // we might save some memory
self.captured.clear(); self.ctx.captured.clear();
} }
pos pos
}, },
@ -427,9 +418,9 @@ impl<'a, 'b> Parser<'a, 'b> {
T::Number => { T::Number => {
let slice = self.lexer.slice(token.range()); let slice = self.lexer.slice(token.range());
let (slice, radix) = match &slice.get(0..2) { let (slice, radix) = match &slice.get(0..2) {
Some("0x") => (slice.trim_start_matches("0x"), Radix::Hex), Some("0x") => (&slice[2..], Radix::Hex),
Some("0b") => (slice.trim_start_matches("0b"), Radix::Binary), Some("0b") => (&slice[2..], Radix::Binary),
Some("0o") => (slice.trim_start_matches("0o"), Radix::Octal), Some("0o") => (&slice[2..], Radix::Octal),
_ => (slice, Radix::Decimal), _ => (slice, Radix::Decimal),
}; };
E::Number { E::Number {
@ -447,7 +438,7 @@ impl<'a, 'b> Parser<'a, 'b> {
expr expr
} }
T::Comment => Expr::Comment { pos, literal: self.tok_str(token) }, T::Comment => Expr::Comment { pos, literal: self.tok_str(token) },
tok => self.report(token.start, format_args!("unexpected token: {tok:?}")), tok => self.report(token.start, format_args!("unexpected token: {tok}")),
}; };
loop { loop {
@ -528,24 +519,25 @@ impl<'a, 'b> Parser<'a, 'b> {
} else { } else {
self.report( self.report(
self.token.start, self.token.start,
format_args!("expected identifier, found {:?}", self.token.kind), format_args!("expected identifier, found {}", self.token.kind),
) )
} }
} }
fn pop_scope(&mut self, frame: usize) { fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame; let mut undeclared_count = frame;
for i in frame..self.idents.len() { for i in frame..self.ctx.idents.len() {
if !&self.idents[i].declared { if !&self.ctx.idents[i].declared {
self.idents.swap(i, undeclared_count); self.ctx.idents.swap(i, undeclared_count);
undeclared_count += 1; undeclared_count += 1;
} }
} }
self.idents self.ctx
.idents
.drain(undeclared_count..) .drain(undeclared_count..)
.map(|ident| Symbol { name: ident.ident, flags: ident.flags }) .map(|ident| Symbol { name: ident.ident, flags: ident.flags })
.collect_into(self.symbols); .collect_into(&mut self.ctx.symbols);
} }
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> { fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
@ -558,13 +550,13 @@ impl<'a, 'b> Parser<'a, 'b> {
end: TokenKind, end: TokenKind,
mut f: impl FnMut(&mut Self) -> T, mut f: impl FnMut(&mut Self) -> T,
) -> &'a [T] { ) -> &'a [T] {
let mut view = self.stack.view(); let mut view = self.ctx.stack.view();
while !self.advance_if(end) { while !self.advance_if(end) {
let val = f(self); let val = f(self);
self.trailing_sep = self.advance_if(delim); self.trailing_sep = self.advance_if(delim);
unsafe { self.stack.push(&mut view, val) }; unsafe { self.ctx.stack.push(&mut view, val) };
} }
self.arena.alloc_slice(unsafe { self.stack.finalize(view) }) self.arena.alloc_slice(unsafe { self.ctx.stack.finalize(view) })
} }
fn advance_if(&mut self, kind: TokenKind) -> bool { fn advance_if(&mut self, kind: TokenKind) -> bool {
@ -580,7 +572,7 @@ impl<'a, 'b> Parser<'a, 'b> {
if self.token.kind != kind { if self.token.kind != kind {
self.report( self.report(
self.token.start, self.token.start,
format_args!("expected {:?}, found {:?}", kind, self.token.kind), format_args!("expected {}, found {}", kind, self.token.kind),
); );
} }
self.next() self.next()
@ -588,15 +580,17 @@ impl<'a, 'b> Parser<'a, 'b> {
#[track_caller] #[track_caller]
fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! { fn report(&self, pos: Pos, msg: impl fmt::Display) -> ! {
log::error!("{}", {
let mut str = String::new(); let mut str = String::new();
report_to(self.lexer.source(), self.path, pos, msg, &mut str); report_to(self.lexer.source(), self.path, pos, msg, &mut str);
log::error!("{str}"); str
});
unreachable!(); unreachable!();
} }
fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) { fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) {
match e { match e {
Expr::Ident { id, .. } => find_ident(&mut self.idents, id).flags |= flags, Expr::Ident { id, .. } => find_ident(&mut self.ctx.idents, id).flags |= flags,
Expr::Field { target, .. } => self.flag_idents(*target, flags), Expr::Field { target, .. } => self.flag_idents(*target, flags),
_ => {} _ => {}
} }
@ -634,7 +628,7 @@ macro_rules! generate_expr {
$($field:ident: $ty:ty,)* $($field:ident: $ty:ty,)*
}, },
)*}) => { )*}) => {
#[derive(Debug, Clone, Copy, PartialEq, Eq)] $(#[$meta])*
$vis enum $name<$lt> {$( $vis enum $name<$lt> {$(
$(#[$field_meta])* $(#[$field_meta])*
$variant { $variant {
@ -649,17 +643,6 @@ macro_rules! generate_expr {
$(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)* $(Self::$variant { $($field),* } => generate_expr!(@first $(($field),)*).posi(),)*
} }
} }
pub fn used_bytes(&self) -> usize {
match self {$(
Self::$variant { $($field,)* } => {
#[allow(clippy::size_of_ref)]
let fields = [$(($field as *const _ as usize - self as *const _ as usize, core::mem::size_of_val($field)),)*];
let (last, size) = fields.iter().copied().max().unwrap();
last + size
},
)*}
}
} }
}; };
@ -806,6 +789,7 @@ generate_expr! {
/// `Expr '.' Ident` /// `Expr '.' Ident`
Field { Field {
target: &'a Self, target: &'a Self,
// we put it second place because its the pos of '.'
pos: Pos, pos: Pos,
name: &'a str, name: &'a str,
}, },
@ -820,7 +804,7 @@ generate_expr! {
}, },
/// `'@' Ident List('(', ',', ')', Expr)` /// `'@' Ident List('(', ',', ')', Expr)`
Directive { Directive {
pos: u32, pos: Pos,
name: &'a str, name: &'a str,
args: &'a [Self], args: &'a [Self],
}, },
@ -959,6 +943,14 @@ impl core::fmt::Display for Display<'_> {
} }
} }
#[derive(Default)]
pub struct ParserCtx {
symbols: Symbols,
stack: StackAlloc,
idents: Vec<ScopeIdent>,
captured: Vec<Ident>,
}
#[repr(C)] #[repr(C)]
pub struct AstInner<T: ?Sized> { pub struct AstInner<T: ?Sized> {
ref_count: AtomicUsize, ref_count: AtomicUsize,
@ -978,21 +970,18 @@ impl AstInner<[Symbol]> {
.0 .0
} }
fn new(file: Box<str>, path: &str, stack: &mut StackAlloc, loader: Loader) -> NonNull<Self> { fn new(file: Box<str>, path: &str, ctx: &mut ParserCtx, loader: Loader) -> NonNull<Self> {
let arena = Arena::default(); let arena = Arena::default();
let mut syms = Vec::new();
let mut parser = Parser::new(&arena, &mut syms, stack, loader);
let exprs = let exprs =
parser.file(unsafe { &*(&*file as *const _) }, path) as *const [Expr<'static>]; unsafe { core::mem::transmute(Parser::parse(ctx, &file, path, loader, &arena)) };
drop(parser);
syms.sort_unstable_by_key(|s| s.name); ctx.symbols.sort_unstable_by_key(|s| s.name);
let layout = Self::layout(syms.len()); let layout = Self::layout(ctx.symbols.len());
unsafe { unsafe {
let ptr = alloc::alloc::alloc(layout); let ptr = alloc::alloc::alloc(layout);
let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, syms.len()); let inner: *mut Self = core::ptr::from_raw_parts_mut(ptr as *mut _, ctx.symbols.len());
core::ptr::write(inner as *mut AstInner<()>, AstInner { core::ptr::write(inner as *mut AstInner<()>, AstInner {
ref_count: AtomicUsize::new(1), ref_count: AtomicUsize::new(1),
@ -1004,7 +993,7 @@ impl AstInner<[Symbol]> {
}); });
core::ptr::addr_of_mut!((*inner).symbols) core::ptr::addr_of_mut!((*inner).symbols)
.as_mut_ptr() .as_mut_ptr()
.copy_from_nonoverlapping(syms.as_ptr(), syms.len()); .copy_from_nonoverlapping(ctx.symbols.as_ptr(), ctx.symbols.len());
NonNull::new_unchecked(inner) NonNull::new_unchecked(inner)
} }
@ -1041,8 +1030,8 @@ pub fn report_to(
pub struct Ast(NonNull<AstInner<[Symbol]>>); pub struct Ast(NonNull<AstInner<[Symbol]>>);
impl Ast { impl Ast {
pub fn new(path: &str, content: String, stack: &mut StackAlloc, loader: Loader) -> Self { pub fn new(path: &str, content: String, ctx: &mut ParserCtx, loader: Loader) -> Self {
Self(AstInner::new(content.into(), path, stack, loader)) Self(AstInner::new(content.into(), path, ctx, loader))
} }
pub fn exprs(&self) -> &[Expr] { pub fn exprs(&self) -> &[Expr] {
@ -1067,7 +1056,7 @@ impl Ast {
impl Default for Ast { impl Default for Ast {
fn default() -> Self { fn default() -> Self {
Self(AstInner::new("".into(), "", &mut StackAlloc::default(), &no_loader)) Self(AstInner::new("".into(), "", &mut ParserCtx::default(), &no_loader))
} }
} }
@ -1132,13 +1121,13 @@ impl Deref for Ast {
} }
} }
pub struct StackAllocView<T> { struct StackAllocView<T> {
prev: usize, prev: usize,
base: usize, base: usize,
_ph: PhantomData<T>, _ph: PhantomData<T>,
} }
pub struct StackAlloc { struct StackAlloc {
data: *mut u8, data: *mut u8,
len: usize, len: usize,
cap: usize, cap: usize,
@ -1203,29 +1192,22 @@ impl Drop for StackAlloc {
} }
#[derive(Default)] #[derive(Default)]
pub struct Arena<'a> { pub struct Arena {
chunk: UnsafeCell<ArenaChunk>, chunk: UnsafeCell<ArenaChunk>,
ph: core::marker::PhantomData<&'a ()>,
} }
impl<'a> Arena<'a> { impl Arena {
pub fn alloc_str(&self, token: &str) -> &'a str { pub fn alloc<'a>(&'a self, expr: Expr<'a>) -> &'a Expr<'a> {
let ptr = self.alloc_slice(token.as_bytes()); let layout = core::alloc::Layout::new::<Expr<'a>>();
unsafe { core::str::from_utf8_unchecked(ptr) }
}
pub fn alloc(&self, expr: Expr<'a>) -> &'a Expr<'a> {
let align = core::mem::align_of::<Expr<'a>>();
let size = expr.used_bytes();
let layout = unsafe { core::alloc::Layout::from_size_align_unchecked(size, align) };
let ptr = self.alloc_low(layout); let ptr = self.alloc_low(layout);
unsafe { unsafe {
ptr.cast::<u64>().copy_from_nonoverlapping(NonNull::from(&expr).cast(), size / 8) ptr.cast::<u64>()
.copy_from_nonoverlapping(NonNull::from(&expr).cast(), layout.size() / 8)
}; };
unsafe { ptr.cast::<Expr<'a>>().as_ref() } unsafe { ptr.cast::<Expr<'a>>().as_ref() }
} }
pub fn alloc_slice<T: Copy>(&self, slice: &[T]) -> &'a [T] { pub fn alloc_slice<'a, T: Copy>(&'a self, slice: &[T]) -> &'a [T] {
if slice.is_empty() || core::mem::size_of::<T>() == 0 { if slice.is_empty() || core::mem::size_of::<T>() == 0 {
return &mut []; return &mut [];
} }
@ -1266,7 +1248,7 @@ impl Default for ArenaChunk {
} }
impl ArenaChunk { impl ArenaChunk {
const ALIGN: usize = core::mem::align_of::<Self>(); const ALIGN: usize = 16;
const CHUNK_SIZE: usize = 1 << 16; const CHUNK_SIZE: usize = 1 << 16;
const LAYOUT: core::alloc::Layout = const LAYOUT: core::alloc::Layout =
unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) }; unsafe { core::alloc::Layout::from_size_align_unchecked(Self::CHUNK_SIZE, Self::ALIGN) };

View file

@ -9,7 +9,7 @@ use {
idfl::{self}, idfl::{self},
Expr, ExprRef, FileId, Pos, Expr, ExprRef, FileId, Pos,
}, },
task, reg, task,
ty::{self}, ty::{self},
vc::{BitSet, Vc}, vc::{BitSet, Vc},
Func, HashMap, Offset, OffsetIter, Reloc, Sig, Size, SymKey, TypedReloc, Types, Func, HashMap, Offset, OffsetIter, Reloc, Sig, Size, SymKey, TypedReloc, Types,
@ -34,15 +34,6 @@ const MEM: Nid = 3;
type Nid = u16; type Nid = u16;
pub mod reg {
pub const STACK_PTR: Reg = 254;
pub const ZERO: Reg = 0;
pub const RET: Reg = 1;
pub const RET_ADDR: Reg = 31;
pub type Reg = u8;
}
type Lookup = crate::ctx_map::CtxMap<Nid>; type Lookup = crate::ctx_map::CtxMap<Nid>;
impl crate::ctx_map::CtxEntry for Nid { impl crate::ctx_map::CtxEntry for Nid {