From 3b4b30b2bd198151782fd2afca23b13e9e64841d Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sun, 1 Dec 2024 14:01:44 +0100 Subject: [PATCH] Restructuring the compiler Signed-off-by: Jakub Doka --- depell/wasm-hbc/src/lib.rs | 3 +- lang/README.md | 10 +- lang/src/{son => backend}/hbvm.rs | 24 +- lang/src/{son => backend}/hbvm/regalloc.rs | 68 +- lang/src/fmt.rs | 82 +- lang/src/fs.rs | 3 +- lang/src/fuzz.rs | 3 +- lang/src/lib.rs | 1247 ++------------------ lang/src/parser.rs | 119 +- lang/src/son.rs | 446 ++++--- lang/src/ty.rs | 1077 +++++++++++++++++ lang/tests/son_tests_fb_driver.txt | 2 +- 12 files changed, 1600 insertions(+), 1484 deletions(-) rename lang/src/{son => backend}/hbvm.rs (98%) rename lang/src/{son => backend}/hbvm/regalloc.rs (94%) create mode 100644 lang/src/ty.rs diff --git a/depell/wasm-hbc/src/lib.rs b/depell/wasm-hbc/src/lib.rs index ba2edda0..c34f2adc 100644 --- a/depell/wasm-hbc/src/lib.rs +++ b/depell/wasm-hbc/src/lib.rs @@ -6,7 +6,8 @@ use { alloc::{string::String, vec::Vec}, core::ffi::CStr, hblang::{ - son::{hbvm::HbvmBackend, Codegen, CodegenCtx}, + backend::hbvm::HbvmBackend, + son::{Codegen, CodegenCtx}, ty::Module, Ent, }, diff --git a/lang/README.md b/lang/README.md index 06421dd6..81d228b4 100644 --- a/lang/README.md +++ b/lang/README.md @@ -244,7 +244,13 @@ main := fn(): uint { #### enums ```hb -Enum := enum {A, B, C} +Enum := enum { + A, + B, + C, + + $default := Self.A +} some_enum := fn(): Enum return .A @@ -252,7 +258,7 @@ main := fn(): uint { e := some_enum() match e { - .A => return 0, + Enum.default => return 0, _ => return 100, } } diff --git a/lang/src/son/hbvm.rs b/lang/src/backend/hbvm.rs similarity index 98% rename from lang/src/son/hbvm.rs rename to lang/src/backend/hbvm.rs index 8e7cfec3..513e945b 100644 --- a/lang/src/son/hbvm.rs +++ b/lang/src/backend/hbvm.rs @@ -1,15 +1,14 @@ use { - super::{AssemblySpec, Backend, Nid, Node, Nodes, VOID}, + super::{AssemblySpec, Backend}, crate::{ lexer::TokenKind, parser, - son::{debug_assert_matches, Kind, MEM}, - ty::{self, Arg, Loc, Module}, + son::{Kind, Nid, Node, Nodes, MEM, VOID}, + ty::{self, Arg, Loc, Module, Offset, Sig, Size, Types}, utils::{Ent, EntVec}, - Offset, Sig, Size, Types, }, alloc::{boxed::Box, collections::BTreeMap, string::String, vec::Vec}, - core::{mem, ops::Range}, + core::{assert_matches::debug_assert_matches, mem, ops::Range}, hbbytecode::{self as instrs, *}, reg::Reg, }; @@ -254,30 +253,24 @@ impl Backend for HbvmBackend { hbbytecode::disasm(&mut sluce, &functions, output, eca_handler) } - fn emit_ct_body( - &mut self, - id: ty::Func, - nodes: &mut Nodes, - tys: &Types, - files: &[parser::Ast], - ) { + fn emit_ct_body(&mut self, id: ty::Func, nodes: &Nodes, tys: &Types, files: &[parser::Ast]) { self.emit_body(id, nodes, tys, files); let fd = &mut self.funcs[id]; fd.code.truncate(fd.code.len() - instrs::jala(0, 0, 0).0); emit(&mut fd.code, instrs::tx()); } - fn emit_body(&mut self, id: ty::Func, nodes: &mut Nodes, tys: &Types, files: &[parser::Ast]) { + fn emit_body(&mut self, id: ty::Func, nodes: &Nodes, tys: &Types, files: &[parser::Ast]) { let sig = tys.ins.funcs[id].sig.unwrap(); debug_assert!(self.code.is_empty()); self.offsets.clear(); - self.offsets.resize(nodes.values.len(), Offset::MAX); + self.offsets.resize(nodes.len(), Offset::MAX); let mut stack_size = 0; '_compute_stack: { - let mems = mem::take(&mut nodes[MEM].outputs); + let mems = &nodes[MEM].outputs; for &stck in mems.iter() { if !matches!(nodes[stck].kind, Kind::Stck | Kind::Arg) { debug_assert_matches!( @@ -300,7 +293,6 @@ impl Backend for HbvmBackend { } self.offsets[stck as usize] = stack_size - self.offsets[stck as usize]; } - nodes[MEM].outputs = mems; } let (saved, tail) = self.emit_body_code(nodes, sig, tys, files); diff --git a/lang/src/son/hbvm/regalloc.rs b/lang/src/backend/hbvm/regalloc.rs similarity index 94% rename from lang/src/son/hbvm/regalloc.rs rename to lang/src/backend/hbvm/regalloc.rs index 7ac01e42..0b681bfa 100644 --- a/lang/src/son/hbvm/regalloc.rs +++ b/lang/src/backend/hbvm/regalloc.rs @@ -1,20 +1,16 @@ use { crate::{ - parser, quad_sort, - son::{ - debug_assert_matches, - hbvm::{ - reg::{self, Reg}, - HbvmBackend, Nid, Nodes, PLoc, - }, - Kind, ARG_START, MEM, VOID, + backend::hbvm::{ + reg::{self, Reg}, + HbvmBackend, Nid, Nodes, PLoc, }, - ty::{self, Arg, Loc}, + parser, quad_sort, + son::{Kind, ARG_START, MEM, VOID}, + ty::{self, Arg, Loc, Sig, Types}, utils::BitSet, - Sig, Types, }, alloc::{borrow::ToOwned, vec::Vec}, - core::{mem, ops::Range, u8, usize}, + core::{assert_matches::debug_assert_matches, mem, ops::Range}, hbbytecode::{self as instrs}, }; @@ -27,7 +23,6 @@ impl HbvmBackend { files: &[parser::Ast], ) -> (usize, bool) { let tail = Function::build(nodes, tys, &mut self.ralloc, sig); - nodes.basic_blocks(); let strip_load = |value| match nodes[value].kind { Kind::Load { .. } if nodes[value].ty.loc(tys) == Loc::Stack => nodes[value].inputs[1], @@ -69,9 +64,8 @@ impl HbvmBackend { let atr = |allc: Nid| { let allc = strip_load(allc); - debug_assert_eq!( - nodes[allc].lock_rc.get(), - 0, + debug_assert!( + nodes.is_unlocked(allc), "{:?} {}", nodes[allc], ty::Display::new(tys, files, nodes[allc].ty) @@ -114,9 +108,8 @@ impl HbvmBackend { let atr = |allc: Nid| { let allc = strip_load(allc); - debug_assert_eq!( - nodes[allc].lock_rc.get(), - 0, + debug_assert!( + nodes.is_unlocked(allc), "{:?} {}", nodes[allc], ty::Display::new(tys, files, nodes[allc].ty) @@ -164,12 +157,23 @@ impl HbvmBackend { } } - debug_assert_eq!(moves.len(), { - moves.sort_unstable(); - moves.dedup(); - moves.len() - }); + // code makes sure all moves are ordered so that register is only moved + // into after all its uses + // + // in case of cycles, swaps are used instead in which case the conflicting + // move is removed and remining moves are replaced with swaps + const CYCLE_SENTINEL: u8 = u8::MAX; + + debug_assert_eq!( + { + let mut dests = moves.iter().map(|&[d, ..]| d).collect::>(); + dests.sort_unstable(); + dests.dedup(); + dests.len() + }, + moves.len() + ); let mut graph = [u8::MAX; 256]; for &[d, s, _] in moves.iter() { graph[d as usize] = s; @@ -193,18 +197,20 @@ impl HbvmBackend { // cut the cycle graph[c as usize] = u8::MAX; // mark cycyle - *depth = u8::MAX; + *depth = CYCLE_SENTINEL; } quad_sort(&mut moves, |a, b| a[2].cmp(&b[2]).reverse()); for [mut d, mut s, depth] in moves { - if depth == u8::MAX { + if depth == CYCLE_SENTINEL { while graph[s as usize] != u8::MAX { self.emit(instrs::swa(d, s)); d = s; mem::swap(&mut graph[s as usize], &mut s); } + // trivial cycle denotes this move was already generated in a + // cycyle graph[s as usize] = s; } else if graph[s as usize] != s { self.emit(instrs::cp(d, s)); @@ -383,8 +389,8 @@ impl<'a> Function<'a> { fn build(nodes: &'a Nodes, tys: &'a Types, func: &'a mut Res, sig: Sig) -> bool { func.blocks.clear(); func.instrs.clear(); - func.backrefs.resize(nodes.values.len(), u16::MAX); - func.visited.clear(nodes.values.len()); + func.backrefs.resize(nodes.len(), u16::MAX); + func.visited.clear(nodes.len()); let mut s = Self { tail: true, nodes, tys, sig, func }; s.emit_node(VOID); debug_assert!(s.func.blocks.array_chunks().all(|[a, b]| a.end == b.start)); @@ -528,7 +534,7 @@ impl<'a> Function<'a> { impl Nodes { fn vreg_count(&self) -> usize { - self.values.len() + self.len() } fn use_block_of(&self, inst: Nid, uinst: Nid) -> Nid { @@ -576,7 +582,7 @@ impl Nodes { .flat_map(|(p, ls)| ls.iter().map(move |l| (p, l))) .filter(|&(o, &n)| self.is_data_dep(o, n)) .map(|(p, &n)| (self.use_block_of(p, n), n)) - .inspect(|&(_, n)| debug_assert_eq!(self[n].lock_rc.get(), 0)), + .inspect(|&(_, n)| debug_assert!(self.is_unlocked(n))), ) .into_iter() .flatten() @@ -616,7 +622,7 @@ impl<'a> Regalloc<'a> { debug_assert!(self.res.dfs_buf.is_empty()); let mut bundle = Bundle::new(self.res.instrs.len()); - self.res.visited.clear(self.nodes.values.len()); + self.res.visited.clear(self.nodes.len()); for i in (0..self.res.blocks.len()).rev() { for [a, rest @ ..] in self.nodes.phi_inputs_of(self.res.blocks[i].entry) { @@ -650,7 +656,7 @@ impl<'a> Regalloc<'a> { fn collect_bundle(&mut self, inst: Nid, into: &mut Bundle) { let dom = self.nodes.idom_of(inst); - self.res.dfs_seem.clear(self.nodes.values.len()); + self.res.dfs_seem.clear(self.nodes.len()); for (cursor, uinst) in self.nodes.uses_of(inst) { if !self.res.dfs_seem.set(uinst) { continue; diff --git a/lang/src/fmt.rs b/lang/src/fmt.rs index cc84c972..4ef1508f 100644 --- a/lang/src/fmt.rs +++ b/lang/src/fmt.rs @@ -1,7 +1,9 @@ use { crate::{ lexer::{self, Lexer, TokenKind}, - parser::{self, CommentOr, CtorField, EnumField, Expr, Poser, Radix, StructField}, + parser::{ + self, CommentOr, CtorField, EnumField, Expr, FieldList, Poser, Radix, StructField, + }, }, core::{ fmt::{self}, @@ -260,6 +262,32 @@ impl<'a> Formatter<'a> { } } + fn fmt_fields( + &mut self, + f: &mut F, + keyword: &str, + trailing_comma: bool, + fields: FieldList, + fmt: impl Fn(&mut Self, &T, &mut F) -> Result<(), fmt::Error>, + ) -> fmt::Result { + f.write_str(keyword)?; + f.write_str(" {")?; + self.fmt_list_low(f, trailing_comma, "}", ",", fields, |s, field, f| { + match field { + CommentOr::Or(Ok(field)) => fmt(s, field, f)?, + CommentOr::Or(Err(scope)) => { + s.fmt_list(f, true, "", "", scope, Self::fmt)?; + return Ok(false); + } + CommentOr::Comment { literal, .. } => { + f.write_str(literal)?; + f.write_str("\n")?; + } + } + Ok(field.or().is_some()) + }) + } + pub fn fmt(&mut self, expr: &Expr, f: &mut F) -> fmt::Result { macro_rules! impl_parenter { ($($name:ident => $pat:pat,)*) => { @@ -305,41 +333,25 @@ impl<'a> Formatter<'a> { f.write_str("packed ")?; } - f.write_str("struct {")?; - self.fmt_list_low(f, trailing_comma, "}", ",", fields, |s, field, f| { - match field { - CommentOr::Or(Ok(StructField { name, ty, .. })) => { - f.write_str(name)?; - f.write_str(": ")?; - s.fmt(ty, f)? - } - CommentOr::Or(Err(scope)) => { - s.fmt_list(f, true, "", "", scope, Self::fmt)?; - return Ok(false); - } - CommentOr::Comment { literal, .. } => { - f.write_str(literal)?; - f.write_str("\n")?; - } - } - Ok(field.or().is_some()) - }) - } - Expr::Enum { variants, trailing_comma, .. } => { - f.write_str("enum {")?; - self.fmt_list_low(f, trailing_comma, "}", ",", variants, |_, var, f| { - match var { - CommentOr::Or(EnumField { name, .. }) => { - f.write_str(name)?; - } - CommentOr::Comment { literal, .. } => { - f.write_str(literal)?; - f.write_str("\n")?; - } - } - Ok(var.or().is_some()) - }) + self.fmt_fields( + f, + "struct", + trailing_comma, + fields, + |s, StructField { name, ty, .. }, f| { + f.write_str(name)?; + f.write_str(": ")?; + s.fmt(ty, f) + }, + ) } + Expr::Enum { variants, trailing_comma, .. } => self.fmt_fields( + f, + "enum", + trailing_comma, + variants, + |_, EnumField { name, .. }, f| f.write_str(name), + ), Expr::Ctor { ty, fields, trailing_comma, .. } => { if let Some(ty) = ty { self.fmt_paren(ty, f, unary)?; diff --git a/lang/src/fs.rs b/lang/src/fs.rs index 5106330e..e638a28c 100644 --- a/lang/src/fs.rs +++ b/lang/src/fs.rs @@ -1,7 +1,8 @@ use { crate::{ + backend::hbvm::HbvmBackend, parser::{Ast, Ctx, FileKind}, - son::{self, hbvm::HbvmBackend}, + son::{self}, ty, FnvBuildHasher, }, alloc::{string::String, vec::Vec}, diff --git a/lang/src/fuzz.rs b/lang/src/fuzz.rs index f509e94a..0ce9ae44 100644 --- a/lang/src/fuzz.rs +++ b/lang/src/fuzz.rs @@ -1,8 +1,9 @@ use { crate::{ + backend::hbvm::HbvmBackend, lexer::TokenKind, parser, - son::{hbvm::HbvmBackend, Codegen, CodegenCtx}, + son::{Codegen, CodegenCtx}, ty::Module, }, alloc::string::String, diff --git a/lang/src/lib.rs b/lang/src/lib.rs index ef7fa73c..2bea54ba 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -23,9 +23,9 @@ slice_from_ptr_range, iter_next_chunk, pointer_is_aligned_to, - maybe_uninit_fill + maybe_uninit_fill, + array_chunks )] -#![feature(array_chunks)] #![warn(clippy::dbg_macro)] #![expect(internal_features)] #![no_std] @@ -33,16 +33,7 @@ #[cfg(feature = "std")] pub use fs::*; pub use utils::Ent; -use { - self::{ - parser::{CommentOr, Expr, ExprRef, Pos}, - ty::{ArrayLen, Builtin, Module}, - utils::EntVec, - }, - alloc::{string::String, vec::Vec}, - core::{cell::Cell, ops::Range}, - hashbrown::hash_map, -}; +use {self::ty::Builtin, alloc::vec::Vec}; #[macro_use] extern crate alloc; @@ -53,6 +44,7 @@ extern crate std; #[cfg(test)] const README: &str = include_str!("../README.md"); +#[cfg(test)] #[macro_export] macro_rules! run_tests { ($runner:path: $($name:ident;)*) => {$( @@ -70,6 +62,52 @@ pub mod fuzz; pub mod lexer; pub mod parser; pub mod son; +pub mod ty; + +pub mod backend { + use { + crate::{ + parser, + son::Nodes, + ty::{self, Types}, + }, + alloc::{string::String, vec::Vec}, + }; + + pub mod hbvm; + + pub struct AssemblySpec { + pub entry: u32, + pub code_length: u64, + pub data_length: u64, + } + + pub trait Backend { + fn assemble_reachable( + &mut self, + from: ty::Func, + types: &Types, + to: &mut Vec, + ) -> AssemblySpec; + fn disasm<'a>( + &'a self, + sluce: &[u8], + eca_handler: &mut dyn FnMut(&mut &[u8]), + types: &'a Types, + files: &'a [parser::Ast], + output: &mut String, + ) -> Result<(), hbbytecode::DisasmError<'a>>; + fn emit_body(&mut self, id: ty::Func, ci: &Nodes, tys: &Types, files: &[parser::Ast]); + + fn emit_ct_body(&mut self, id: ty::Func, ci: &Nodes, tys: &Types, files: &[parser::Ast]) { + self.emit_body(id, ci, tys, files); + } + + fn assemble_bin(&mut self, from: ty::Func, types: &Types, to: &mut Vec) { + self.assemble_reachable(from, types, to); + } + } +} mod utils; @@ -247,1036 +285,98 @@ impl Ident { } } -pub mod ty { - use { - crate::{ - lexer::TokenKind, - parser::{self, Pos}, - utils::Ent, - Ident, Size, Types, - }, - core::{num::NonZeroU32, ops::Range}, +fn endoce_string( + literal: &str, + str: &mut Vec, + report: impl Fn(&core::str::Bytes, &str), +) -> Option<()> { + let report = |bytes: &core::str::Bytes, msg: &_| { + report(bytes, msg); + None:: }; - pub type ArrayLen = u32; + let decode_braces = |str: &mut Vec, bytes: &mut core::str::Bytes| { + while let Some(b) = bytes.next() + && b != b'}' + { + let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?; + let decode = |b: u8| { + Some(match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => b - b'a' + 10, + b'A'..=b'F' => b - b'A' + 10, + _ => report(bytes, "expected hex digit or '}'")?, + }) + }; + str.push(decode(b)? << 4 | decode(c)?); + } - impl Func { - pub const ECA: Func = Func(u32::MAX); - pub const MAIN: Func = Func(u32::MIN); - } + Some(()) + }; - #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default, PartialOrd, Ord)] - pub struct Tuple(pub u32); - - impl Tuple { - const LEN_BITS: u32 = 5; - const LEN_MASK: usize = Self::MAX_LEN - 1; - const MAX_LEN: usize = 1 << Self::LEN_BITS; - - pub fn new(pos: usize, len: usize) -> Option { - if len >= Self::MAX_LEN { - return None; - } - - Some(Self((pos << Self::LEN_BITS | len) as u32)) - } - - pub fn range(self) -> Range { - let start = self.0 as usize >> Self::LEN_BITS; - start..start + self.len() - } - - pub fn len(self) -> usize { - self.0 as usize & Self::LEN_MASK - } - - pub fn is_empty(self) -> bool { - self.len() == 0 - } - - pub fn empty() -> Self { - Self(0) - } - - pub fn args(self) -> ArgIter { - ArgIter(self.range()) - } - } - - pub struct ArgIter(Range); - - pub enum Arg { - Type(Id), - Value(Id), - } - - impl ArgIter { - pub(crate) fn next(&mut self, tys: &Types) -> Option { - let ty = tys.ins.args[self.0.next()?]; - if ty == Id::TYPE { - return Some(Arg::Type(tys.ins.args[self.0.next().unwrap()])); - } - Some(Arg::Value(ty)) - } - - pub(crate) fn next_value(&mut self, tys: &Types) -> Option { - loop { - match self.next(tys)? { - Arg::Type(_) => continue, - Arg::Value(id) => break Some(id), - } - } - } - } - - #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] - pub struct Id(NonZeroU32); - - impl From for i64 { - fn from(value: Id) -> Self { - value.0.get() as _ - } - } - - impl crate::ctx_map::CtxEntry for Id { - type Ctx = crate::TypeIns; - type Key<'a> = crate::SymKey<'a>; - - fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a> { - match self.expand() { - Kind::Struct(s) => { - let st = &ctx.structs[s]; - debug_assert_ne!(st.pos, Pos::MAX); - crate::SymKey::Struct(st.file, st.pos, st.captured) - } - Kind::Enum(e) => { - let en = &ctx.enums[e]; - debug_assert_ne!(en.pos, Pos::MAX); - crate::SymKey::Enum(en.file, en.pos) - } - Kind::Ptr(p) => crate::SymKey::Pointer(&ctx.ptrs[p]), - Kind::Opt(p) => crate::SymKey::Optional(&ctx.opts[p]), - Kind::Func(f) => { - let fc = &ctx.funcs[f]; - if let Some(base) = fc.base { - // TODO: merge base and sig - crate::SymKey::FuncInst(base, fc.sig.unwrap().args) - } else { - crate::SymKey::Decl(fc.parent, fc.name) - } - } - Kind::Global(g) => { - let gb = &ctx.globals[g]; - crate::SymKey::Decl(gb.file.into(), gb.name) - } - Kind::Slice(s) => crate::SymKey::Array(&ctx.slices[s]), - Kind::Module(_) | Kind::Builtin(_) => { - crate::SymKey::Decl(Module::default().into(), Ident::INVALID) - } - Kind::Const(c) => crate::SymKey::Constant(&ctx.consts[c]), - } - } - } - - impl Default for Id { - fn default() -> Self { - Self(unsafe { NonZeroU32::new_unchecked(UNDECLARED) }) - } - } - - impl Id { - pub const DINT: Self = Self::UINT; - - pub fn bin_ret(self, op: TokenKind) -> Id { - if op.is_compatison() { - Self::BOOL - } else { - self - } - } - - pub fn is_float(self) -> bool { - matches!(self.repr(), F32 | F64) || self.is_never() - } - - pub fn is_signed(self) -> bool { - matches!(self.repr(), I8..=INT) || self.is_never() - } - - pub fn is_unsigned(self) -> bool { - matches!(self.repr(), U8..=UINT) || self.is_never() - } - - pub fn is_integer(self) -> bool { - matches!(self.repr(), U8..=INT) || self.is_never() - } - - pub fn is_never(self) -> bool { - self == Self::NEVER - } - - pub fn strip_pointer(self) -> Self { - match self.expand() { - Kind::Ptr(_) => Id::UINT, - _ => self, - } - } - - pub fn is_pointer(self) -> bool { - matches!(self.expand(), Kind::Ptr(_)) || self.is_never() - } - - pub fn is_optional(self) -> bool { - matches!(self.expand(), Kind::Opt(_)) || self.is_never() - } - - pub fn try_upcast(self, ob: Self) -> Option { - self.try_upcast_low(ob, false) - } - - pub fn try_upcast_low(self, ob: Self, coerce_pointer: bool) -> Option { - let (oa, ob) = (Self(self.0.min(ob.0)), Self(self.0.max(ob.0))); - let (a, b) = (oa.strip_pointer(), ob.strip_pointer()); - Some(match () { - _ if oa == Id::NEVER => ob, - _ if ob == Id::NEVER => oa, - _ if oa == ob => oa, - _ if ob.is_optional() => ob, - _ if oa.is_pointer() && ob.is_pointer() => return None, - _ if a.is_signed() && b.is_signed() || a.is_unsigned() && b.is_unsigned() => ob, - _ if a.is_unsigned() && b.is_signed() && a.repr() - U8 < b.repr() - I8 => ob, - _ if a.is_unsigned() && b.is_signed() && a.repr() - U8 > b.repr() - I8 => oa, - _ if oa.is_integer() && ob.is_pointer() && coerce_pointer => ob, - _ => return None, - }) - } - - pub fn expand(self) -> Kind { - Kind::from_ty(self) - } - - pub const fn repr(self) -> u32 { - self.0.get() - } - - pub(crate) fn simple_size(&self) -> Option { - Some(match self.expand() { - Kind::Ptr(_) => 8, - Kind::Builtin(Builtin(VOID)) => 0, - Kind::Builtin(Builtin(NEVER)) => 0, - Kind::Builtin(Builtin(INT | UINT | F64)) => 8, - Kind::Builtin(Builtin(I32 | U32 | TYPE | F32)) => 4, - Kind::Builtin(Builtin(I16 | U16)) => 2, - Kind::Builtin(Builtin(I8 | U8 | BOOL)) => 1, - _ => return None, - }) - } - - pub(crate) fn extend(self) -> Self { - if self.is_signed() { - Self::INT - } else if self.is_pointer() { - self - } else { - Self::UINT - } - } - - pub(crate) fn loc(&self, tys: &Types) -> Loc { - match self.expand() { - Kind::Opt(o) - if let ty = tys.ins.opts[o].base - && ty.loc(tys) == Loc::Reg - && (ty.is_pointer() || tys.size_of(ty) < 8) => - { - Loc::Reg - } - Kind::Ptr(_) | Kind::Enum(_) | Kind::Builtin(_) => Loc::Reg, - Kind::Struct(_) if tys.size_of(*self) == 0 => Loc::Reg, - Kind::Struct(_) | Kind::Slice(_) | Kind::Opt(_) => Loc::Stack, - c @ (Kind::Func(_) | Kind::Global(_) | Kind::Module(_) | Kind::Const(_)) => { - unreachable!("{c:?}") - } - } - } - - pub(crate) fn has_pointers(&self, tys: &Types) -> bool { - match self.expand() { - Kind::Struct(s) => tys.struct_fields(s).iter().any(|f| f.ty.has_pointers(tys)), - Kind::Ptr(_) => true, - Kind::Slice(s) => tys.ins.slices[s].len == ArrayLen::MAX, - _ => false, - } - } - } - - #[derive(PartialEq, Eq, Clone, Copy)] - pub enum Loc { - Reg, - Stack, - } - - impl From for Id { - fn from(id: u64) -> Self { - Self(unsafe { NonZeroU32::new_unchecked(id as _) }) - } - } - - const fn array_to_lower_case(array: [u8; N]) -> [u8; N] { - let mut result = [0; N]; - let mut i = 0; - while i < N { - result[i] = array[i].to_ascii_lowercase(); - i += 1; - } - result - } - // const string to lower case - - macro_rules! builtin_type { - ($($name:ident;)*) => { - $(const $name: u32 = ${index(0)} + 1;)* - - mod __lc_names { - use super::*; - $(pub const $name: &str = unsafe { - const LCL: &[u8] = unsafe { - &array_to_lower_case( - *(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) - ) - }; - core::str::from_utf8_unchecked(LCL) - };)* - } - - impl Builtin { - $(pub const $name: Self = Builtin($name);)* - } - - impl Id { - $(pub const $name: Self = Kind::Builtin(Builtin($name)).compress();)* - } - - impl Kind { - $(pub const $name: Self = Kind::Builtin(Builtin($name));)* - } - - pub fn from_str(name: &str) -> Option { - match name { - $(__lc_names::$name => Some(Builtin($name)),)* - _ => None, - } - } - - pub fn to_str(ty: Builtin) -> &'static str { - match ty.0 { - $($name => __lc_names::$name,)* - v => unreachable!("invalid type: {}", v), - } + let mut bytes = literal.bytes(); + while let Some(b) = bytes.next() { + if b != b'\\' { + str.push(b); + continue; + } + let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? { + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'\'' => b'\'', + b'"' => b'"', + b'0' => b'\0', + b'{' => { + decode_braces(str, &mut bytes); + continue; } + _ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?, }; + str.push(b); } - builtin_type! { - UNDECLARED; - LEFT_UNREACHABLE; - RIGHT_UNREACHABLE; - NEVER; - VOID; - TYPE; - BOOL; - U8; - U16; - U32; - UINT; - I8; - I16; - I32; - INT; - F32; - F64; + if str.last() != Some(&0) { + report(&bytes, "string literal must end with null byte (for now)"); } - macro_rules! type_kind { - ($(#[$meta:meta])* $vis:vis enum $name:ident {$( $variant:ident, )*}) => { - crate::utils::decl_ent! { - $(pub struct $variant(u32);)* - } + Some(()) +} - $(#[$meta])* - $vis enum $name { - $($variant($variant),)* - } - - impl $name { - const FLAG_BITS: u32 = (${count($variant)} as u32).next_power_of_two().ilog2(); - const FLAG_OFFSET: u32 = core::mem::size_of::() as u32 * 8 - Self::FLAG_BITS; - const INDEX_MASK: u32 = (1 << (32 - Self::FLAG_BITS)) - 1; - - $vis fn from_ty(ty: Id) -> Self { - let (flag, index) = (ty.repr() >> Self::FLAG_OFFSET, ty.repr() & Self::INDEX_MASK); - match flag { - $(${index(0)} => Self::$variant($variant(index)),)* - i => unreachable!("{i}"), - } - } - - $vis const fn compress(self) -> Id { - let (index, flag) = match self { - $(Self::$variant(index) => (index.0, ${index(0)}),)* - }; - Id(unsafe { NonZeroU32::new_unchecked((flag << Self::FLAG_OFFSET) | index) }) - } - } - - $( - impl From<$variant> for $name { - fn from(value: $variant) -> Self { - Self::$variant(value) - } - } - - impl From<$variant> for i64 { - fn from(value: $variant) -> Self { - Id::from(value).into() - } - } - - impl From<$variant> for Id { - fn from(value: $variant) -> Self { - $name::$variant(value).compress() - } - } - )* - }; - } - - type_kind! { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - pub enum Kind { - Builtin, - Struct, - Enum, - Ptr, - Slice, - Opt, - Func, - Global, - Module, - Const, - } - } - - impl Module { - pub const MAIN: Self = Self(0); - } - - impl Default for Module { - fn default() -> Self { - Self(u32::MAX) - } - } - - impl TryFrom for Builtin { - type Error = (); - - fn try_from(value: Ident) -> Result { - if value.is_null() { - Ok(Self(value.len())) - } else { - Err(()) +pub fn quad_sort(mut slice: &mut [T], mut cmp: impl FnMut(&T, &T) -> core::cmp::Ordering) { + while let Some(it) = slice.take_first_mut() { + for ot in &mut *slice { + if cmp(it, ot) == core::cmp::Ordering::Greater { + core::mem::swap(it, ot); } } } + debug_assert!(slice.is_sorted_by(|a, b| cmp(a, b) != core::cmp::Ordering::Greater)); +} - impl Default for Kind { - fn default() -> Self { - Id::UNDECLARED.expand() - } +type FnvBuildHasher = core::hash::BuildHasherDefault; + +struct FnvHasher(u64); + +impl core::hash::Hasher for FnvHasher { + fn finish(&self) -> u64 { + self.0 } - pub struct Display<'a> { - tys: &'a super::Types, - files: &'a [parser::Ast], - ty: Id, - } - - impl<'a> Display<'a> { - pub fn new(tys: &'a super::Types, files: &'a [parser::Ast], ty: Id) -> Self { - Self { tys, files, ty } - } - - pub fn rety(&self, ty: Id) -> Self { - Self::new(self.tys, self.files, ty) - } - } - - impl core::fmt::Display for Display<'_> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - use Kind as TK; - match TK::from_ty(self.ty) { - TK::Module(idx) => { - f.write_str("@use(\"")?; - self.files[idx.index()].path.fmt(f)?; - f.write_str(")[")?; - idx.fmt(f)?; - f.write_str("]") - } - TK::Builtin(ty) => f.write_str(to_str(ty)), - TK::Opt(ty) => { - f.write_str("?")?; - self.rety(self.tys.ins.opts[ty].base).fmt(f) - } - TK::Ptr(ty) => { - f.write_str("^")?; - self.rety(self.tys.ins.ptrs[ty].base).fmt(f) - } - TK::Struct(idx) => { - let record = &self.tys.ins.structs[idx]; - if record.name.is_null() { - f.write_str("[")?; - idx.fmt(f)?; - f.write_str("]{")?; - for (i, &super::StructField { name, ty }) in - self.tys.struct_fields(idx).iter().enumerate() - { - if i != 0 { - f.write_str(", ")?; - } - f.write_str(self.tys.names.ident_str(name))?; - f.write_str(": ")?; - self.rety(ty).fmt(f)?; - } - f.write_str("}") - } else { - let file = &self.files[record.file.index()]; - f.write_str(file.ident_str(record.name)) - } - } - TK::Enum(idx) => { - let enm = &self.tys.ins.enums[idx]; - debug_assert!(!enm.name.is_null()); - let file = &self.files[enm.file.index()]; - f.write_str(file.ident_str(enm.name)) - } - TK::Func(idx) => { - f.write_str("fn")?; - idx.fmt(f) - } - TK::Global(idx) => { - let global = &self.tys.ins.globals[idx]; - let file = &self.files[global.file.index()]; - f.write_str(file.ident_str(global.name))?; - f.write_str(" (global)") - } - TK::Slice(idx) => { - let array = self.tys.ins.slices[idx]; - f.write_str("[")?; - self.rety(array.elem).fmt(f)?; - if array.len != ArrayLen::MAX { - f.write_str("; ")?; - array.len.fmt(f)?; - } - f.write_str("]") - } - TK::Const(idx) => { - let cnst = &self.tys.ins.consts[idx]; - let file = &self.files[cnst.file.index()]; - f.write_str(file.ident_str(cnst.name))?; - f.write_str(" (const)") - } - } - } + fn write(&mut self, bytes: &[u8]) { + self.0 = bytes.iter().fold(self.0, |hash, &byte| { + let mut hash = hash; + hash ^= byte as u64; + hash = hash.wrapping_mul(0x100000001B3); + hash + }); } } -type Offset = u32; -type Size = u32; - -#[derive(PartialEq, Eq, Hash, Clone, Copy)] -pub enum SymKey<'a> { - Pointer(&'a Ptr), - Optional(&'a Opt), - Struct(Module, Pos, ty::Tuple), - Enum(Module, Pos), - FuncInst(ty::Func, ty::Tuple), - Decl(ty::Id, Ident), - Array(&'a Array), - Constant(&'a Const), -} - -#[derive(Clone, Copy)] -pub struct Sig { - args: ty::Tuple, - ret: ty::Id, -} - -#[derive(Default, Clone, Copy)] -struct Func { - file: Module, - parent: ty::Id, - name: Ident, - base: Option, - expr: ExprRef, - sig: Option, - is_inline: bool, - returns_type: bool, - comp_state: [CompState; 2], -} - -#[derive(Default, PartialEq, Eq, Clone, Copy)] -enum CompState { - #[default] - Dead, - Queued(usize), - Compiled, -} - -#[derive(Clone, Default)] -struct Global { - file: Module, - name: Ident, - ty: ty::Id, - data: Vec, -} - -#[derive(PartialEq, Eq, Hash)] -pub struct Const { - ast: ExprRef, - name: Ident, - file: Module, - parent: ty::Id, -} - -struct EnumField { - name: Ident, -} - -#[derive(Default)] -struct Enum { - name: Ident, - pos: Pos, - file: Module, - field_start: u32, -} - -struct StructField { - name: Ident, - ty: ty::Id, -} - -#[derive(Default)] -struct Struct { - name: Ident, - pos: Pos, - file: Module, - size: Cell, - align: Cell, - captured: ty::Tuple, - explicit_alignment: Option, - field_start: u32, - ast: ExprRef, -} - -#[derive(PartialEq, Eq, Hash, Clone, Copy)] -pub struct Opt { - base: ty::Id, -} - -#[derive(PartialEq, Eq, Hash, Clone, Copy)] -pub struct Ptr { - base: ty::Id, -} - -#[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct Array { - elem: ty::Id, - len: ArrayLen, -} -impl Array { - fn len(&self) -> Option { - (self.len != ArrayLen::MAX).then_some(self.len as usize) - } -} - -impl ctx_map::CtxEntry for Ident { - type Ctx = str; - type Key<'a> = &'a str; - - fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a> { - unsafe { ctx.get_unchecked(self.range()) } - } -} - -#[derive(Default)] -struct IdentInterner { - lookup: ctx_map::CtxMap, - strings: String, -} - -impl IdentInterner { - fn intern(&mut self, ident: &str) -> Ident { - let (entry, hash) = self.lookup.entry(ident, &self.strings); - match entry { - hash_map::RawEntryMut::Occupied(o) => o.get_key_value().0.value, - hash_map::RawEntryMut::Vacant(v) => { - let id = Ident::new(self.strings.len() as _, ident.len() as _).unwrap(); - self.strings.push_str(ident); - v.insert(ctx_map::Key { hash, value: id }, ()); - id - } - } - } - - fn ident_str(&self, ident: Ident) -> &str { - &self.strings[ident.range()] - } - - fn project(&self, ident: &str) -> Option { - self.lookup.get(ident, &self.strings).copied() - } - - fn clear(&mut self) { - self.lookup.clear(); - self.strings.clear() - } -} - -#[derive(Default)] -struct TypesTmp { - struct_fields: Vec, - enum_fields: Vec, - args: Vec, -} - -#[derive(Default)] -pub struct TypeIns { - args: Vec, - struct_fields: Vec, - enum_fields: Vec, - funcs: EntVec, - globals: EntVec, - consts: EntVec, - structs: EntVec, - enums: EntVec, - ptrs: EntVec, - opts: EntVec, - slices: EntVec, -} - -struct FTask { - file: Module, - id: ty::Func, - ct: bool, -} - -struct StringRef(ty::Global); - -impl ctx_map::CtxEntry for StringRef { - type Ctx = EntVec; - type Key<'a> = &'a [u8]; - - fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a> { - &ctx[self.0].data - } -} - -#[derive(Default)] -pub struct Types { - syms: ctx_map::CtxMap, - names: IdentInterner, - strings: ctx_map::CtxMap, - ins: TypeIns, - tmp: TypesTmp, - tasks: Vec>, -} - -impl Types { - pub fn case(&self, ty: ty::Id) -> fn(&str) -> Result<(), &'static str> { - match ty.expand() { - ty::Kind::NEVER => |_| Ok(()), - ty::Kind::Enum(_) - | ty::Kind::Struct(_) - | ty::Kind::Builtin(_) - | ty::Kind::Ptr(_) - | ty::Kind::Slice(_) - | ty::Kind::Opt(_) => utils::is_pascal_case, - ty::Kind::Func(f) if self.ins.funcs[f].returns_type => utils::is_pascal_case, - ty::Kind::Func(_) | ty::Kind::Global(_) | ty::Kind::Module(_) => utils::is_snake_case, - ty::Kind::Const(_) => utils::is_screaming_case, - } - } - - fn pack_args(&mut self, arg_base: usize) -> Option { - let base = self.ins.args.len(); - self.ins.args.extend(self.tmp.args.drain(arg_base..)); - let needle = &self.ins.args[base..]; - if needle.is_empty() { - return Some(ty::Tuple::empty()); - } - let len = needle.len(); - // FIXME: maybe later when this becomes a bottleneck we use more - // efficient search (SIMD?, indexing?) - let sp = self.ins.args.windows(needle.len()).position(|val| val == needle).unwrap(); - self.ins.args.truncate((sp + needle.len()).max(base)); - ty::Tuple::new(sp, len) - } - - fn struct_fields(&self, strct: ty::Struct) -> &[StructField] { - &self.ins.struct_fields[self.struct_field_range(strct)] - } - - fn struct_field_range(&self, strct: ty::Struct) -> Range { - let start = self.ins.structs[strct].field_start as usize; - let end = self - .ins - .structs - .next(strct) - .map_or(self.ins.struct_fields.len(), |s| s.field_start as usize); - start..end - } - - fn enum_fields(&self, enm: ty::Enum) -> &[EnumField] { - &self.ins.enum_fields[self.enum_field_range(enm)] - } - - fn enum_field_range(&self, enm: ty::Enum) -> Range { - let start = self.ins.enums[enm].field_start as usize; - let end = - self.ins.enums.next(enm).map_or(self.ins.enum_fields.len(), |s| s.field_start as usize); - start..end - } - - fn make_opt(&mut self, base: ty::Id) -> ty::Id { - self.make_generic_ty(Opt { base }, |ins| &mut ins.opts, |e| SymKey::Optional(e)) - } - - fn make_ptr(&mut self, base: ty::Id) -> ty::Id { - self.make_generic_ty(Ptr { base }, |ins| &mut ins.ptrs, |e| SymKey::Pointer(e)) - } - - fn make_array(&mut self, elem: ty::Id, len: ArrayLen) -> ty::Id { - self.make_generic_ty(Array { elem, len }, |ins| &mut ins.slices, |e| SymKey::Array(e)) - } - - fn make_generic_ty, T: Copy>( - &mut self, - ty: T, - get_col: fn(&mut TypeIns) -> &mut EntVec, - key: fn(&T) -> SymKey, - ) -> ty::Id { - *self.syms.get_or_insert(key(&{ ty }), &mut self.ins, |ins| get_col(ins).push(ty).into()) - } - - fn size_of(&self, ty: ty::Id) -> Size { - match ty.expand() { - ty::Kind::Slice(arr) => { - let arr = &self.ins.slices[arr]; - match arr.len { - 0 => 0, - ArrayLen::MAX => 16, - len => self.size_of(arr.elem) * len, - } - } - ty::Kind::Struct(stru) => { - if self.ins.structs[stru].size.get() != 0 { - return self.ins.structs[stru].size.get(); - } - - let mut oiter = OffsetIter::new(stru, self); - while oiter.next(self).is_some() {} - self.ins.structs[stru].size.set(oiter.offset); - oiter.offset - } - ty::Kind::Enum(enm) => (self.enum_field_range(enm).len().ilog2() + 7) / 8, - ty::Kind::Opt(opt) => { - let base = self.ins.opts[opt].base; - if self.nieche_of(base).is_some() { - self.size_of(base) - } else { - self.size_of(base) + self.align_of(base) - } - } - _ if let Some(size) = ty.simple_size() => size, - ty => unimplemented!("size_of: {:?}", ty), - } - } - - fn align_of(&self, ty: ty::Id) -> Size { - match ty.expand() { - ty::Kind::Struct(stru) => { - if self.ins.structs[stru].align.get() != 0 { - return self.ins.structs[stru].align.get() as _; - } - let align = self.ins.structs[stru].explicit_alignment.map_or_else( - || { - self.struct_fields(stru) - .iter() - .map(|&StructField { ty, .. }| self.align_of(ty)) - .max() - .unwrap_or(1) - }, - |a| a as _, - ); - self.ins.structs[stru].align.set(align.try_into().unwrap()); - align - } - ty::Kind::Slice(arr) => { - let arr = &self.ins.slices[arr]; - match arr.len { - ArrayLen::MAX => 8, - _ => self.align_of(arr.elem), - } - } - _ => self.size_of(ty).max(1), - } - } - - fn base_of(&self, ty: ty::Id) -> Option { - match ty.expand() { - ty::Kind::Ptr(p) => Some(self.ins.ptrs[p].base), - _ => None, - } - } - - fn inner_of(&self, ty: ty::Id) -> Option { - match ty.expand() { - ty::Kind::Opt(o) => Some(self.ins.opts[o].base), - _ => None, - } - } - - fn opt_layout(&self, inner_ty: ty::Id) -> OptLayout { - match self.nieche_of(inner_ty) { - Some((_, flag_offset, flag_ty)) => { - OptLayout { flag_ty, flag_offset, payload_offset: 0 } - } - None => OptLayout { - flag_ty: ty::Id::BOOL, - flag_offset: 0, - payload_offset: self.align_of(inner_ty), - }, - } - } - - fn nieche_of(&self, ty: ty::Id) -> Option<(bool, Offset, ty::Id)> { - match ty.expand() { - ty::Kind::Ptr(_) => Some((false, 0, ty::Id::UINT)), - // TODO: cache this - ty::Kind::Struct(s) => OffsetIter::new(s, self).into_iter(self).find_map(|(f, off)| { - self.nieche_of(f.ty).map(|(uninit, o, ty)| (uninit, o + off, ty)) - }), - _ => None, - } - } - - fn find_struct_field(&self, s: ty::Struct, name: &str) -> Option { - let name = self.names.project(name)?; - self.struct_fields(s).iter().position(|f| f.name == name) - } - - fn clear(&mut self) { - self.syms.clear(); - self.names.clear(); - self.strings.clear(); - - self.ins.funcs.clear(); - self.ins.args.clear(); - self.ins.globals.clear(); - self.ins.structs.clear(); - self.ins.struct_fields.clear(); - self.ins.ptrs.clear(); - self.ins.slices.clear(); - - debug_assert_eq!(self.tmp.struct_fields.len(), 0); - debug_assert_eq!(self.tmp.args.len(), 0); - - debug_assert_eq!(self.tasks.len(), 0); - } - - fn scope_of<'a>(&self, parent: ty::Id, file: &'a parser::Ast) -> Option<&'a [Expr<'a>]> { - match parent.expand() { - ty::Kind::Struct(s) => { - if let Expr::Struct { fields: [.., CommentOr::Or(Err(scope))], .. } = - self.ins.structs[s].ast.get(file) - { - Some(scope) - } else { - Some(&[]) - } - } - _ => None, - } - } - - fn parent_of(&self, ty: ty::Id) -> Option { - match ty.expand() { - ty::Kind::Struct(s) => Some(self.ins.structs[s].file.into()), - _ => None, - } - } - - fn captures_of<'a>( - &self, - ty: ty::Id, - file: &'a parser::Ast, - ) -> Option<(&'a [Ident], ty::Tuple)> { - match ty.expand() { - ty::Kind::Struct(s) => { - let Expr::Struct { captured, .. } = self.ins.structs[s].ast.get(file) else { - unreachable!() - }; - Some((captured, self.ins.structs[s].captured)) - } - _ => None, - } - } -} - -struct OptLayout { - flag_ty: ty::Id, - flag_offset: Offset, - payload_offset: Offset, -} - -struct OffsetIter { - strct: ty::Struct, - offset: Offset, - fields: Range, -} - -impl OffsetIter { - fn new(strct: ty::Struct, tys: &Types) -> Self { - Self { strct, offset: 0, fields: tys.struct_field_range(strct) } - } - - fn offset_of(tys: &Types, idx: ty::Struct, field: &str) -> Option<(Offset, ty::Id)> { - let field_id = tys.names.project(field)?; - OffsetIter::new(idx, tys) - .into_iter(tys) - .find(|(f, _)| f.name == field_id) - .map(|(f, off)| (off, f.ty)) - } - - fn next<'a>(&mut self, tys: &'a Types) -> Option<(&'a StructField, Offset)> { - let stru = &tys.ins.structs[self.strct]; - let field = &tys.ins.struct_fields[self.fields.next()?]; - - let align = stru.explicit_alignment.map_or_else(|| tys.align_of(field.ty), |a| a as u32); - self.offset = (self.offset + align - 1) & !(align - 1); - - let off = self.offset; - self.offset += tys.size_of(field.ty); - Some((field, off)) - } - - fn next_ty(&mut self, tys: &Types) -> Option<(ty::Id, Offset)> { - let (field, off) = self.next(tys)?; - Some((field.ty, off)) - } - - fn into_iter(mut self, tys: &Types) -> impl Iterator { - core::iter::from_fn(move || self.next(tys)) +impl Default for FnvHasher { + fn default() -> Self { + Self(0xCBF29CE484222325) } } @@ -1285,9 +385,13 @@ pub fn run_test( name: &'static str, ident: &'static str, input: &'static str, - test: fn(&'static str, &'static str, &mut String), + test: fn(&'static str, &'static str, &mut alloc::string::String), ) { - use std::{io::Write, path::PathBuf, string::ToString}; + use std::{ + io::Write, + path::PathBuf, + string::{String, ToString}, + }; let filter = std::env::var("PT_FILTER").unwrap_or_default(); if !filter.is_empty() && !name.contains(&filter) { @@ -1426,98 +530,3 @@ fn test_parse_files( embed_map.iter().map(|&(_, content)| content.to_owned().into_bytes()).collect(), ) } - -fn endoce_string( - literal: &str, - str: &mut Vec, - report: impl Fn(&core::str::Bytes, &str), -) -> Option<()> { - let report = |bytes: &core::str::Bytes, msg: &_| { - report(bytes, msg); - None:: - }; - - let decode_braces = |str: &mut Vec, bytes: &mut core::str::Bytes| { - while let Some(b) = bytes.next() - && b != b'}' - { - let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?; - let decode = |b: u8| { - Some(match b { - b'0'..=b'9' => b - b'0', - b'a'..=b'f' => b - b'a' + 10, - b'A'..=b'F' => b - b'A' + 10, - _ => report(bytes, "expected hex digit or '}'")?, - }) - }; - str.push(decode(b)? << 4 | decode(c)?); - } - - Some(()) - }; - - let mut bytes = literal.bytes(); - while let Some(b) = bytes.next() { - if b != b'\\' { - str.push(b); - continue; - } - let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? { - b'n' => b'\n', - b'r' => b'\r', - b't' => b'\t', - b'\\' => b'\\', - b'\'' => b'\'', - b'"' => b'"', - b'0' => b'\0', - b'{' => { - decode_braces(str, &mut bytes); - continue; - } - _ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?, - }; - str.push(b); - } - - if str.last() != Some(&0) { - report(&bytes, "string literal must end with null byte (for now)"); - } - - Some(()) -} - -pub fn quad_sort(mut slice: &mut [T], mut cmp: impl FnMut(&T, &T) -> core::cmp::Ordering) { - while let Some(it) = slice.take_first_mut() { - for ot in &mut *slice { - if cmp(it, ot) == core::cmp::Ordering::Greater { - core::mem::swap(it, ot); - } - } - } - debug_assert!(slice.is_sorted_by(|a, b| cmp(a, b) != core::cmp::Ordering::Greater)); -} - -type FnvBuildHasher = core::hash::BuildHasherDefault; - -struct FnvHasher(u64); - -impl core::hash::Hasher for FnvHasher { - fn finish(&self) -> u64 { - self.0 - } - - fn write(&mut self, bytes: &[u8]) { - self.0 = bytes.iter().fold(self.0, |hash, &byte| { - let mut hash = hash; - hash ^= byte as u64; - hash = hash.wrapping_mul(0x100000001B3); - hash - }); - } -} - -impl Default for FnvHasher { - fn default() -> Self { - Self(0xCBF29CE484222325) - } -} diff --git a/lang/src/parser.rs b/lang/src/parser.rs index c4ca472c..40a79dbf 100644 --- a/lang/src/parser.rs +++ b/lang/src/parser.rs @@ -369,62 +369,35 @@ impl<'a, 'b> Parser<'a, 'b> { expr } T::Struct => E::Struct { + pos, packed: core::mem::take(&mut self.packed), - fields: { - self.ns_bound = self.ctx.idents.len(); - self.expect_advance(T::LBrace)?; - self.collect_list(T::Comma, T::RBrace, |s| { - let tok = s.token; - Some(if s.advance_if(T::Comment) { - CommentOr::Comment { literal: s.tok_str(tok), pos: tok.start } - } else if s.lexer.taste().kind == T::Colon { - let name = s.expect_advance(T::Ident)?; - s.expect_advance(T::Colon)?; - CommentOr::Or(Ok(StructField { - pos: name.start, - name: s.tok_str(name), - ty: s.expr()?, - })) - } else { - must_trail = true; - CommentOr::Or(Err( - s.collect_list_low(T::Semi, T::RBrace, true, |s| s.expr_low(true)) - )) - }) - }) - }, - captured: { - self.ns_bound = prev_boundary; - let captured = &mut self.ctx.captured[prev_captured..]; - crate::quad_sort(captured, core::cmp::Ord::cmp); - let preserved = captured.partition_dedup().0.len(); - self.ctx.captured.truncate(prev_captured + preserved); - self.arena.alloc_slice(&self.ctx.captured[prev_captured..]) - }, - pos: { - if self.ns_bound == 0 { - // we might save some memory - self.ctx.captured.clear(); + fields: self.collect_fields(&mut must_trail, |s| { + if s.lexer.taste().kind != T::Colon { + return Some(None); } - pos - }, + let name = s.expect_advance(T::Ident)?; + s.expect_advance(T::Colon)?; + Some(Some(StructField { + pos: name.start, + name: s.tok_str(name), + ty: s.expr()?, + })) + })?, + captured: self.collect_captures(prev_boundary, prev_captured), trailing_comma: core::mem::take(&mut self.trailing_sep) || must_trail, }, T::Enum => E::Enum { pos, - variants: { - self.expect_advance(T::LBrace)?; - self.collect_list(T::Comma, T::RBrace, |s| { - let tok = s.token; - Some(if s.advance_if(T::Comment) { - CommentOr::Comment { literal: s.tok_str(tok), pos: tok.start } - } else { - let name = s.expect_advance(T::Ident)?; - CommentOr::Or(EnumField { pos: name.start, name: s.tok_str(name) }) - }) - }) - }, - trailing_comma: core::mem::take(&mut self.trailing_sep), + variants: self.collect_fields(&mut must_trail, |s| { + if !matches!(s.lexer.taste().kind, T::Comma | T::RBrace) { + return Some(None); + } + + let name = s.expect_advance(T::Ident)?; + Some(Some(EnumField { pos: name.start, name: s.tok_str(name) })) + })?, + captured: self.collect_captures(prev_boundary, prev_captured), + trailing_comma: core::mem::take(&mut self.trailing_sep) || must_trail, }, T::Ident | T::CtIdent => { let (id, is_first) = self.resolve_ident(token); @@ -624,6 +597,45 @@ impl<'a, 'b> Parser<'a, 'b> { } } + fn collect_fields( + &mut self, + must_trail: &mut bool, + mut parse_field: impl FnMut(&mut Self) -> Option>, + ) -> Option> { + use TokenKind as T; + self.ns_bound = self.ctx.idents.len(); + self.expect_advance(T::LBrace)?; + Some(self.collect_list(T::Comma, T::RBrace, |s| { + let tok = s.token; + Some(if s.advance_if(T::Comment) { + CommentOr::Comment { literal: s.tok_str(tok), pos: tok.start } + } else if let Some(field) = parse_field(s)? { + CommentOr::Or(Ok(field)) + } else { + *must_trail = true; + CommentOr::Or(Err( + s.collect_list_low(T::Semi, T::RBrace, true, |s| s.expr_low(true)) + )) + }) + })) + } + + fn collect_captures(&mut self, prev_captured: usize, prev_boundary: usize) -> &'a [Ident] { + self.ns_bound = prev_boundary; + let captured = &mut self.ctx.captured[prev_captured..]; + crate::quad_sort(captured, core::cmp::Ord::cmp); + let preserved = captured.partition_dedup().0.len(); + self.ctx.captured.truncate(prev_captured + preserved); + let slc = self.arena.alloc_slice(&self.ctx.captured[prev_captured..]); + + if self.ns_bound == 0 { + // we might save some memory + self.ctx.captured.clear(); + } + + slc + } + fn advance_ident(&mut self) -> Option { let next = self.next(); if matches!(next.kind, TokenKind::Ident | TokenKind::CtIdent) { @@ -841,6 +853,8 @@ pub enum Radix { Decimal = 10, } +pub type FieldList<'a, T> = &'a [CommentOr<'a, Result]>>]; + generate_expr! { /// `LIST(start, sep, end, elem) => start { elem sep } [elem] end` /// `OP := grep for `#define OP:` @@ -958,7 +972,7 @@ generate_expr! { /// `'struct' LIST('{', ',', '}', Ident ':' Expr)` Struct { pos: Pos, - fields: &'a [CommentOr<'a, Result, &'a[Self]>>], + fields: FieldList<'a, StructField<'a>>, captured: &'a [Ident], trailing_comma: bool, packed: bool, @@ -966,7 +980,8 @@ generate_expr! { /// `'enum' LIST('{', ',', '}', Ident)` Enum { pos: Pos, - variants: &'a [CommentOr<'a, EnumField<'a>>], + variants: FieldList<'a, EnumField<'a>>, + captured: &'a [Ident], trailing_comma: bool, }, /// `[Expr] LIST('.{', ',', '}', Ident [':' Expr])` diff --git a/lang/src/son.rs b/lang/src/son.rs index 6697eb30..88b312c1 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -1,21 +1,25 @@ use { - self::{ - hbvm::{Comptime, HbvmBackend}, - strong_ref::StrongRef, - }, + self::strong_ref::StrongRef, crate::{ + backend::{ + hbvm::{Comptime, HbvmBackend}, + Backend, + }, ctx_map::CtxEntry, debug, lexer::{self, TokenKind}, parser::{ self, idfl::{self}, - CommentOr, CtorField, Expr, ExprRef, MatchBranch, Pos, + CommentOr, CtorField, Expr, ExprRef, FieldList, MatchBranch, Pos, + }, + ty::{ + self, Arg, ArrayLen, CompState, ConstData, EnumData, EnumField, FTask, FuncData, + GlobalData, Loc, Module, Offset, OffsetIter, OptLayout, Sig, StringRef, StructData, + StructField, SymKey, Tuple, TypeBase, TypeIns, Types, }, - ty::{self, Arg, ArrayLen, Loc, Module, Tuple}, utils::{BitSet, Ent, Vc}, - CompState, Const, Enum, EnumField, FTask, Func, Global, Ident, Offset, OffsetIter, - OptLayout, Sig, StringRef, Struct, StructField, SymKey, Types, + Ident, }, alloc::{string::String, vec::Vec}, core::{ @@ -29,52 +33,18 @@ use { hbbytecode::DisasmError, }; -const VOID: Nid = 0; -const NEVER: Nid = 1; -const ENTRY: Nid = 2; -const MEM: Nid = 3; -const LOOPS: Nid = 4; -const ARG_START: usize = 3; +pub const VOID: Nid = 0; +pub const NEVER: Nid = 1; +pub const ENTRY: Nid = 2; +pub const MEM: Nid = 3; +pub const LOOPS: Nid = 4; +pub const ARG_START: usize = 3; const DEFAULT_ACLASS: usize = 0; const GLOBAL_ACLASS: usize = 1; -pub mod hbvm; - -type Nid = u16; +pub type Nid = u16; type AClassId = i16; -pub struct AssemblySpec { - entry: u32, - code_length: u64, - data_length: u64, -} - -pub trait Backend { - fn assemble_reachable( - &mut self, - from: ty::Func, - types: &Types, - to: &mut Vec, - ) -> AssemblySpec; - fn disasm<'a>( - &'a self, - sluce: &[u8], - eca_handler: &mut dyn FnMut(&mut &[u8]), - types: &'a Types, - files: &'a [parser::Ast], - output: &mut String, - ) -> Result<(), hbbytecode::DisasmError<'a>>; - fn emit_body(&mut self, id: ty::Func, ci: &mut Nodes, tys: &Types, files: &[parser::Ast]); - - fn emit_ct_body(&mut self, id: ty::Func, ci: &mut Nodes, tys: &Types, files: &[parser::Ast]) { - self.emit_body(id, ci, tys, files); - } - - fn assemble_bin(&mut self, from: ty::Func, types: &Types, to: &mut Vec) { - self.assemble_reachable(from, types, to); - } -} - type Lookup = crate::ctx_map::CtxMap; impl crate::ctx_map::CtxEntry for Nid { @@ -124,7 +94,25 @@ impl Default for Nodes { } impl Nodes { - fn loop_depth(&self, target: Nid, scheds: Option<&[Nid]>) -> LoopDepth { + #[inline] + pub fn len(&self) -> usize { + self.values.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.values.is_empty() + } + + fn as_ty(&self, cint: Nid) -> ty::Id { + debug_assert_eq!(self[cint].ty, ty::Id::TYPE); + ty::Id::from(match self[cint].kind { + Kind::CInt { value } => value as u64, + _ => unreachable!("triing to cast non constant to a type: {:?}", self[cint]), + }) + } + + pub fn loop_depth(&self, target: Nid, scheds: Option<&[Nid]>) -> LoopDepth { self[target].loop_depth.set(match self[target].kind { Kind::Region | Kind::Entry | Kind::Then | Kind::Else | Kind::Call { .. } | Kind::If => { if self[target].loop_depth.get() != 0 { @@ -552,7 +540,7 @@ impl Nodes { self[to].inputs.push(from); } - fn use_block(&self, target: Nid, from: Nid, scheds: Option<&[Nid]>) -> Nid { + pub fn use_block(&self, target: Nid, from: Nid, scheds: Option<&[Nid]>) -> Nid { if self[from].kind != Kind::Phi { return self.idom(from, scheds); } @@ -563,7 +551,7 @@ impl Nodes { self[self[from].inputs[0]].inputs[index - 1] } - fn idom(&self, target: Nid, scheds: Option<&[Nid]>) -> Nid { + pub fn idom(&self, target: Nid, scheds: Option<&[Nid]>) -> Nid { match self[target].kind { Kind::Start => unreachable!(), Kind::End => unreachable!(), @@ -839,20 +827,22 @@ impl Nodes { Value::new(self.new_node(ty, kind, inps, tys)).ty(ty) } - fn is_locked(&self, target: Nid) -> bool { + // TODO: make this internal to son and force backends to track locks thelself + + pub fn is_locked(&self, target: Nid) -> bool { self[target].lock_rc.get() != 0 } - fn is_unlocked(&self, target: Nid) -> bool { + pub fn is_unlocked(&self, target: Nid) -> bool { self[target].lock_rc.get() == 0 } - fn lock(&self, target: Nid) { + pub fn lock(&self, target: Nid) { self[target].lock_rc.set(self[target].lock_rc.get() + 1); } #[track_caller] - fn unlock(&self, target: Nid) { + pub fn unlock(&self, target: Nid) { self[target].lock_rc.set(self[target].lock_rc.get() - 1); } @@ -1650,7 +1640,7 @@ impl Nodes { } } - fn is_const(&self, id: Nid) -> bool { + pub fn is_const(&self, id: Nid) -> bool { matches!(self[id].kind, Kind::CInt { .. }) } @@ -2007,7 +1997,7 @@ impl Nodes { } } - fn dominates(&self, dominator: Nid, mut dominated: Nid, scheds: Option<&[Nid]>) -> bool { + pub fn dominates(&self, dominator: Nid, mut dominated: Nid, scheds: Option<&[Nid]>) -> bool { loop { if dominator == dominated { break true; @@ -2023,7 +2013,7 @@ impl Nodes { } } - fn is_data_dep(&self, val: Nid, user: Nid) -> bool { + pub fn is_data_dep(&self, val: Nid, user: Nid) -> bool { match self[user].kind { Kind::Return { .. } => self[user].inputs[1] == val, _ if self.is_cfg(user) && !matches!(self[user].kind, Kind::Call { .. } | Kind::If) => { @@ -2050,7 +2040,7 @@ impl Nodes { } } - fn this_or_delegates<'a>(&'a self, source: Nid, target: &'a Nid) -> (Nid, &'a [Nid]) { + pub fn this_or_delegates<'a>(&'a self, source: Nid, target: &'a Nid) -> (Nid, &'a [Nid]) { if self.is_unlocked(*target) { (source, core::slice::from_ref(target)) } else { @@ -2058,7 +2048,7 @@ impl Nodes { } } - fn is_hard_zero(&self, nid: Nid) -> bool { + pub fn is_hard_zero(&self, nid: Nid) -> bool { self[nid].kind == Kind::CInt { value: 0 } && self[nid].outputs.iter().all(|&n| self[n].kind != Kind::Phi) } @@ -2178,7 +2168,7 @@ impl Kind { matches!(self, Self::Arg | Self::Mem | Self::Loops | Self::Entry) } - fn is_cfg(&self) -> bool { + pub fn is_cfg(&self) -> bool { matches!( self, Self::Start @@ -2199,7 +2189,7 @@ impl Kind { matches!(self, Self::Return { .. } | Self::If | Self::End | Self::Die) } - fn starts_basic_block(&self) -> bool { + pub fn starts_basic_block(&self) -> bool { matches!(self, Self::Region | Self::Loop | Self::Start | Kind::Then | Kind::Else) } @@ -2224,13 +2214,14 @@ impl fmt::Display for Kind { #[derive(Debug, Default, Clone)] pub struct Node { - kind: Kind, - inputs: Vc, - outputs: Vc, - peep_triggers: Vc, - clobbers: BitSet, - ty: ty::Id, - pos: Pos, + pub kind: Kind, + pub inputs: Vc, + pub outputs: Vc, + pub peep_triggers: Vc, + pub clobbers: BitSet, + pub ty: ty::Id, + pub pos: Pos, + depth: Cell, lock_rc: Cell, loop_depth: Cell, @@ -2260,11 +2251,11 @@ impl Node { matches!(self.kind, Kind::Stre | Kind::Load | Kind::Stck) } - fn is_data_phi(&self) -> bool { + pub fn is_data_phi(&self) -> bool { self.kind == Kind::Phi && self.ty != ty::Id::VOID } - fn has_no_value(&self) -> bool { + pub fn has_no_value(&self) -> bool { (self.kind.is_cfg() && (!self.kind.is_call() || self.ty == ty::Id::VOID)) || matches!(self.kind, Kind::Stre) } @@ -2712,7 +2703,7 @@ impl<'a> Codegen<'a> { pub fn push_embeds(&mut self, embeds: Vec>) { for data in embeds { - let g = Global { + let g = GlobalData { ty: self.tys.make_array(ty::Id::U8, data.len() as _), data, ..Default::default() @@ -2740,13 +2731,13 @@ impl<'a> Codegen<'a> { return 1; } - let fuc = self.tys.ins.funcs.push(Func { + let fuc = self.tys.ins.funcs.push(FuncData { file, sig: Some(Sig { args: Tuple::empty(), ret }), ..Default::default() }); - self.ct_backend.emit_ct_body(fuc, &mut self.ci.nodes, self.tys, self.files); + self.ct_backend.emit_ct_body(fuc, &self.ci.nodes, self.tys, self.files); // TODO: return them back @@ -2948,7 +2939,7 @@ impl<'a> Codegen<'a> { let literal = &literal[1..literal.len() - 1]; let report = |bytes: &core::str::Bytes, message: &str| { - self.error(pos + (literal.len() - bytes.len()) as u32 - 1, message) + self.error(pos + (literal.len() - bytes.len()) as u32 - 1, message); }; let mut data = Vec::::with_capacity(literal.len()); @@ -2960,8 +2951,11 @@ impl<'a> Codegen<'a> { occupied_entry.get_key_value().0.value.0 } (hash_map::RawEntryMut::Vacant(vacant_entry), hash) => { - let global = - self.tys.ins.globals.push(Global { data, ty, ..Default::default() }); + let global = self.tys.ins.globals.push(GlobalData { + data, + ty, + ..Default::default() + }); vacant_entry .insert(crate::ctx_map::Key { value: StringRef(global), hash }, ()) .0 @@ -4217,20 +4211,13 @@ impl<'a> Codegen<'a> { let tty = vtarget.ty; match self.tys.base_of(tty).unwrap_or(tty).expand() { - ty::Kind::Module(m) => { - match self.find_type(pos, self.ci.file, m, self.ci.parent, Err(name)).expand() { - ty::Kind::NEVER => Value::NEVER, - ty::Kind::Global(global) => self.gen_global(global), - ty::Kind::Const(cnst) => self.gen_const(cnst, ctx), - v => Some(self.ci.nodes.new_const_lit(ty::Id::TYPE, v.compress())), - } - } + ty::Kind::Module(m) => self.find_type_as_value(pos, m, m, Err(name), ctx), ty::Kind::Enum(e) => { let intrnd = self.tys.names.project(name); self.gen_enum_variant(pos, e, intrnd) } ty::Kind::Struct(s) => { - let Struct { ast, file, .. } = self.tys.ins.structs[s]; + let TypeBase { ast, file, .. } = *self.tys.ins.structs[s]; if let Some((offset, ty)) = OffsetIter::offset_of(self.tys, s, name) { Some(Value::ptr(self.offset(vtarget.id, offset)).ty(ty)) } else if let Expr::Struct { fields: [.., CommentOr::Or(Err(_))], .. } = @@ -4258,51 +4245,39 @@ impl<'a> Codegen<'a> { Value::NEVER } } - ty::Kind::TYPE => match ty::Id::from(match self.ci.nodes[vtarget.id].kind { - Kind::CInt { value } => value as u64, - _ => unreachable!(), - }) - .expand() - { + ty::Kind::TYPE => match self.ci.nodes.as_ty(vtarget.id).expand() { ty::Kind::Struct(s) => { - let Struct { file, .. } = self.tys.ins.structs[s]; - match self.find_type(pos, self.ci.file, file, s.into(), Err(name)).expand() { - ty::Kind::NEVER => Value::NEVER, - ty::Kind::Global(global) => self.gen_global(global), - ty::Kind::Const(cnst) => self.gen_const(cnst, ctx), - v => Some(self.ci.nodes.new_const_lit(ty::Id::TYPE, v.compress())), + let TypeBase { file, .. } = *self.tys.ins.structs[s]; + self.find_type_as_value(pos, file, s, Err(name), ctx) + } + ty::Kind::Module(m) => self.find_type_as_value(pos, m, m, Err(name), ctx), + ty::Kind::Enum(e) => { + let intrnd = self.tys.names.project(name); + if let Some(index) = + self.tys.enum_fields(e).iter().position(|f| Some(f.name) == intrnd) + { + Some(self.ci.nodes.new_const_lit(e.into(), index as i64)) + } else { + let TypeBase { file, .. } = *self.tys.ins.enums[e]; + self.find_type_as_value(pos, file, e, Err(name), ctx) } } - ty::Kind::Module(m) => { - match self.find_type(pos, self.ci.file, m, m.into(), Err(name)).expand() { - ty::Kind::NEVER => Value::NEVER, - ty::Kind::Global(global) => self.gen_global(global), - ty::Kind::Const(cnst) => self.gen_const(cnst, ctx), - v => Some(self.ci.nodes.new_const_lit(ty::Id::TYPE, v.compress())), - } - } - ty => { - self.error( - pos, - fa!( - "accesing scope on '{}' is not supported yet", - self.ty_display(ty.compress()) - ), - ); - Value::NEVER - } - }, - _ => { - self.error( + ty => self.error( pos, fa!( - "the '{}' is not a struct, or pointer to one, or enum, \ - fo field access does not make sense", - self.ty_display(tty) + "accesing scope on '{}' is not supported yet", + self.ty_display(ty.compress()) ), - ); - Value::NEVER - } + ), + }, + _ => self.error( + pos, + fa!( + "the '{}' is not a struct, or pointer to one, or enum, \ + fo field access does not make sense", + self.ty_display(tty) + ), + ), } .map(Ok) } @@ -4385,13 +4360,7 @@ impl<'a> Codegen<'a> { match self.gen_field(Ctx::default(), target, pos, name)? { Ok(mut fexpr) => { self.assert_ty(func.pos(), &mut fexpr, ty::Id::TYPE, "function"); - ( - ty::Id::from(match self.ci.nodes[fexpr.id].kind { - Kind::CInt { value } => value as u64, - _ => unreachable!(), - }), - None, - ) + (self.ci.nodes.as_ty(fexpr.id), None) } Err((ty, val)) => (ty, Some(val)), } @@ -4410,7 +4379,7 @@ impl<'a> Codegen<'a> { inline |= sig.ret == ty::Id::TYPE; - let Func { expr, file, is_inline, parent, .. } = self.tys.ins.funcs[fu]; + let FuncData { expr, file, is_inline, parent, .. } = self.tys.ins.funcs[fu]; let ast = &self.files[file.index()]; let &Expr::Closure { args: cargs, body, .. } = expr.get(ast) else { unreachable!() }; @@ -4700,7 +4669,9 @@ impl<'a> Codegen<'a> { ); } } - _ => self.error(pos, fa!("'{0} {op} {0}' is not supported", self.ty_display(ty))), + _ => { + _ = self.error(pos, fa!("'{0} {op} {0}' is not supported", self.ty_display(ty))) + } } } @@ -4769,7 +4740,7 @@ impl<'a> Codegen<'a> { } fn compute_signature(&mut self, func: &mut ty::Func, pos: Pos, args: &[Expr]) -> Option { - let Func { file, expr, sig, parent, .. } = self.tys.ins.funcs[*func]; + let FuncData { file, expr, sig, parent, .. } = self.tys.ins.funcs[*func]; let fast = &self.files[file.index()]; let &Expr::Closure { args: cargs, ret, .. } = expr.get(fast) else { unreachable!(); @@ -4825,11 +4796,11 @@ impl<'a> Codegen<'a> { self.ci.scope.vars.drain(base..).for_each(|v| v.remove(&mut self.ci.nodes)); let sym = SymKey::FuncInst(*func, args); - let ct = |ins: &mut crate::TypeIns| { + let ct = |ins: &mut TypeIns| { let fuc = ins.funcs[*func]; debug_assert!(fuc.comp_state.iter().all(|&s| s == CompState::default())); ins.funcs - .push(Func { base: Some(*func), sig: Some(Sig { args, ret }), ..fuc }) + .push(FuncData { base: Some(*func), sig: Some(Sig { args, ret }), ..fuc }) .into() }; let ty::Kind::Func(f) = @@ -5078,7 +5049,7 @@ impl<'a> Codegen<'a> { if self.finalize(prev_err_len) { let backend = if !cct { &mut *self.backend } else { &mut *self.ct_backend }; - backend.emit_body(id, &mut self.ci.nodes, self.tys, self.files); + backend.emit_body(id, &self.ci.nodes, self.tys, self.files); } self.ci.pos.pop(); @@ -5396,9 +5367,10 @@ impl<'a> Codegen<'a> { } #[track_caller] - fn error(&self, pos: Pos, msg: impl core::fmt::Display) { + fn error(&self, pos: Pos, msg: impl core::fmt::Display) -> Option { let mut buf = self.errors.borrow_mut(); write!(buf, "{}", self.file().report(pos, msg)).unwrap(); + Value::NEVER } #[track_caller] @@ -5419,10 +5391,7 @@ impl<'a> Codegen<'a> { .vars .iter() .filter(|v| v.ty == ty::Id::TYPE) - .map(|v| match self.ci.nodes[v.value.get()].kind { - Kind::CInt { value } => (value, v.id), - _ => unreachable!(), - }) + .map(|v| (self.ci.nodes.as_ty(v.value()), v.id)) .collect::>(); self.pool.push_ci(file, self.ci.parent, Some(ret), self.tys.tasks.len(), &mut self.ci); self.ci.scope.vars = scope @@ -5475,7 +5444,7 @@ impl<'a> Codegen<'a> { fn eval_global(&mut self, file: Module, name: Ident, expr: &Expr) -> ty::Id { self.ct.activate(); - let gid = self.tys.ins.globals.push(Global { file, name, ..Default::default() }); + let gid = self.tys.ins.globals.push(GlobalData { file, name, ..Default::default() }); self.pool.push_ci(file, self.ci.parent, None, self.tys.tasks.len(), &mut self.ci); let prev_err_len = self.errors.borrow().len(); @@ -5509,12 +5478,12 @@ impl<'a> Codegen<'a> { } fn find_local_ty(&mut self, ident: Ident) -> Option { - self.ci.scope.vars.iter().rfind(|v| (v.id == ident && v.ty == ty::Id::TYPE)).map(|v| { - match self.ci.nodes[v.value.get()].kind { - Kind::CInt { value } => ty::Id::from(value as u64), - k => unreachable!("{k:?}"), - } - }) + self.ci + .scope + .vars + .iter() + .rfind(|v| (v.id == ident && v.ty == ty::Id::TYPE)) + .map(|v| self.ci.nodes.as_ty(v.value())) } fn find_type_in_file(&mut self, pos: Pos, file: Module, id: Result) -> ty::Id { @@ -5525,6 +5494,22 @@ impl<'a> Codegen<'a> { self.find_type(pos, self.ci.file, self.ci.file, self.ci.parent, id) } + fn find_type_as_value( + &mut self, + pos: Pos, + file: Module, + parent: impl Into, + id: Result, + ctx: Ctx, + ) -> Option { + match self.find_type(pos, self.ci.file, file, parent.into(), id).expand() { + ty::Kind::NEVER => Value::NEVER, + ty::Kind::Global(global) => self.gen_global(global), + ty::Kind::Const(cnst) => self.gen_const(cnst, ctx), + v => Some(self.ci.nodes.new_const_lit(ty::Id::TYPE, v.compress())), + } + } + fn find_type( &mut self, pos: Pos, @@ -5548,7 +5533,7 @@ impl<'a> Codegen<'a> { let mut piter = parent; let Some((expr @ Expr::BinOp { left, right, .. }, name)) = (loop { if let Some(f) = - parser::find_decl(self.tys.scope_of(piter, f).unwrap_or(f.exprs()), &f.file, id) + parser::find_decl(self.tys.scope_of(piter, f).expect("TODO"), &f.file, id) { break Some(f); } @@ -5556,7 +5541,6 @@ impl<'a> Codegen<'a> { if let Some((captures, capture_tuple)) = self.tys.captures_of(piter, f) && let Some(idx) = captures.iter().position(|&cid| Ok(cid) == id) { - debug_assert_eq!(captures.len(), capture_tuple.len()); return self.tys.ins.args[capture_tuple.range().start + idx]; } @@ -5605,7 +5589,7 @@ impl<'a> Codegen<'a> { self.tys .ins .consts - .push(Const { ast: ExprRef::new(expr), name, file, parent }) + .push(ConstData { ast: ExprRef::new(expr), name, file, parent }) .into() } else { self.parse_ty( @@ -5687,78 +5671,37 @@ impl<'a> Codegen<'a> { .map_or(ArrayLen::MAX, |expr| self.eval_const(sc.file, expr, ty::Id::U32) as _); self.tys.make_array(ty, len) } - Expr::Struct { pos, fields, packed, captured, .. } => { - let captures_start = self.tys.tmp.args.len(); - for &cp in captured { - let ty = self.find_local_ty(cp).expect("TODO"); - self.tys.tmp.args.push(ty); - } - let captured = self.tys.pack_args(captures_start).expect("TODO"); - - let sym = SymKey::Struct(sc.file, pos, captured); - if let Some(&ty) = self.tys.syms.get(sym, &self.tys.ins) { - return ty; - } - - let prev_tmp = self.tys.tmp.struct_fields.len(); - for field in fields.iter().filter_map(CommentOr::or).filter_map(Result::ok) { - let ty = self.parse_ty(sc.anon(), &field.ty); - let field = StructField { name: self.tys.names.intern(field.name), ty }; - self.tys.tmp.struct_fields.push(field); - } - - let ty = self - .tys - .ins - .structs - .push(Struct { - file: sc.file, - pos, - captured, - name: sc.name.unwrap_or_default(), - field_start: self.tys.ins.struct_fields.len() as _, + Expr::Struct { pos, fields, packed, captured, .. } => self.parse_base_ty( + pos, + expr, + captured, + fields, + sc, + |s| [&mut s.ins.struct_fields, &mut s.tmp.struct_fields], + |s, field| { + let ty = s.parse_ty(sc.anon(), &field.ty); + StructField { name: s.tys.names.intern(field.name), ty } + }, + |s, base| { + s.ins.structs.push(StructData { + base, explicit_alignment: packed.then_some(1), - ast: ExprRef::new(expr), ..Default::default() }) - .into(); - - self.tys.ins.struct_fields.extend(self.tys.tmp.struct_fields.drain(prev_tmp..)); - - self.tys.syms.insert(sym, ty, &self.tys.ins); - ty - } - Expr::Enum { pos, variants, .. } => { - let sym = SymKey::Enum(sc.file, pos); - if let Some(&ty) = self.tys.syms.get(sym, &self.tys.ins) { - return ty; - } - - let prev_tmp = self.tys.tmp.enum_fields.len(); - for field in variants.iter().filter_map(CommentOr::or) { - let field = EnumField { name: self.tys.names.intern(field.name) }; - self.tys.tmp.enum_fields.push(field); - } - - let ty = self - .tys - .ins - .enums - .push(Enum { - file: sc.file, - pos, - name: sc.name.unwrap_or_default(), - field_start: self.tys.ins.enum_fields.len() as _, - }) - .into(); - - self.tys.ins.enum_fields.extend(self.tys.tmp.enum_fields.drain(prev_tmp..)); - - self.tys.syms.insert(sym, ty, &self.tys.ins); - ty - } + }, + ), + Expr::Enum { pos, variants, captured, .. } => self.parse_base_ty( + pos, + expr, + captured, + variants, + sc, + |s| [&mut s.ins.enum_fields, &mut s.tmp.enum_fields], + |s, field| EnumField { name: s.tys.names.intern(field.name) }, + |s, base| s.ins.enums.push(EnumData { base }), + ), Expr::Closure { pos, args, ret, .. } if let Some(name) = sc.name => { - let func = Func { + let func = FuncData { file: sc.file, parent: sc.parent, name, @@ -5801,6 +5744,56 @@ impl<'a> Codegen<'a> { } } } + + #[expect(clippy::too_many_arguments)] + fn parse_base_ty>( + &mut self, + pos: Pos, + expr: &Expr, + captured: &[Ident], + fields: FieldList, + sc: TyScope, + get_fields: impl Fn(&mut Types) -> [&mut Vec; 2], + check_field: impl Fn(&mut Self, A) -> F, + check: impl Fn(&mut Types, TypeBase) -> T, + ) -> ty::Id { + let captures_start = self.tys.tmp.args.len(); + for &cp in captured { + let ty = self.find_local_ty(cp).expect("TODO"); + self.tys.tmp.args.push(ty); + } + let captured = self.tys.pack_args(captures_start).expect("TODO"); + + let sym = SymKey::Type(sc.file, pos, captured); + if let Some(&ty) = self.tys.syms.get(sym, &self.tys.ins) { + return ty; + } + + let prev_tmp = get_fields(self.tys)[1].len(); + for field in fields.iter().filter_map(CommentOr::or).filter_map(Result::ok) { + let field = check_field(self, field); + get_fields(self.tys)[1].push(field); + } + + let base = TypeBase { + file: sc.file, + parent: sc.parent, + pos, + captured, + name: sc.name.unwrap_or_default(), + field_start: self.tys.ins.struct_fields.len() as _, + ast: ExprRef::new(expr), + }; + + let [ins, tmp] = get_fields(self.tys); + ins.extend(tmp.drain(prev_tmp..)); + + let ty = check(self.tys, base).into(); + + self.tys.syms.insert(sym, ty, &self.tys.ins); + + ty + } } #[derive(Clone, Copy)] @@ -5820,8 +5813,11 @@ impl TyScope { #[cfg(test)] mod tests { use { - super::{hbvm::HbvmBackend, CodegenCtx}, - crate::ty, + crate::{ + backend::hbvm::{self, HbvmBackend}, + son::CodegenCtx, + ty, + }, alloc::{string::String, vec::Vec}, core::fmt::Write, }; @@ -5856,7 +5852,7 @@ mod tests { } else { log::info!("================ running {ident} =============="); log::trace!("{output}"); - super::hbvm::test_run_vm(&out, output); + hbvm::test_run_vm(&out, output); } } diff --git a/lang/src/ty.rs b/lang/src/ty.rs new file mode 100644 index 00000000..31c5d427 --- /dev/null +++ b/lang/src/ty.rs @@ -0,0 +1,1077 @@ +use { + crate::{ + ctx_map, + lexer::TokenKind, + parser::{self, CommentOr, Expr, ExprRef, Pos}, + utils::{self, Ent, EntVec}, + Ident, + }, + alloc::{string::String, vec::Vec}, + core::{ + cell::Cell, + num::NonZeroU32, + ops::{Deref, DerefMut, Range}, + }, + hashbrown::hash_map, +}; +macro_rules! impl_deref { + ($for:ty { $name:ident: $base:ty }) => { + impl Deref for $for { + type Target = $base; + + fn deref(&self) -> &Self::Target { + &self.$name + } + } + + impl DerefMut for $for { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.$name + } + } + }; +} + +pub type ArrayLen = u32; + +impl Func { + pub const ECA: Func = Func(u32::MAX); + pub const MAIN: Func = Func(u32::MIN); +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Default, PartialOrd, Ord)] +pub struct Tuple(pub u32); + +impl Tuple { + const LEN_BITS: u32 = 5; + const LEN_MASK: usize = Self::MAX_LEN - 1; + const MAX_LEN: usize = 1 << Self::LEN_BITS; + + pub fn new(pos: usize, len: usize) -> Option { + if len >= Self::MAX_LEN { + return None; + } + + Some(Self((pos << Self::LEN_BITS | len) as u32)) + } + + pub fn range(self) -> Range { + let start = self.0 as usize >> Self::LEN_BITS; + start..start + self.len() + } + + pub fn len(self) -> usize { + self.0 as usize & Self::LEN_MASK + } + + pub fn is_empty(self) -> bool { + self.len() == 0 + } + + pub fn empty() -> Self { + Self(0) + } + + pub fn args(self) -> ArgIter { + ArgIter(self.range()) + } +} + +pub struct ArgIter(Range); + +pub enum Arg { + Type(Id), + Value(Id), +} + +impl ArgIter { + pub(crate) fn next(&mut self, tys: &Types) -> Option { + let ty = tys.ins.args[self.0.next()?]; + if ty == Id::TYPE { + return Some(Arg::Type(tys.ins.args[self.0.next().unwrap()])); + } + Some(Arg::Value(ty)) + } + + pub(crate) fn next_value(&mut self, tys: &Types) -> Option { + loop { + match self.next(tys)? { + Arg::Type(_) => continue, + Arg::Value(id) => break Some(id), + } + } + } +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +pub struct Id(NonZeroU32); + +impl From for i64 { + fn from(value: Id) -> Self { + value.0.get() as _ + } +} + +impl crate::ctx_map::CtxEntry for Id { + type Ctx = TypeIns; + type Key<'a> = SymKey<'a>; + + fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a> { + match self.expand() { + Kind::Struct(s) => { + let st = &ctx.structs[s]; + debug_assert_ne!(st.pos, Pos::MAX); + SymKey::Type(st.file, st.pos, st.captured) + } + Kind::Enum(e) => { + let en = &ctx.enums[e]; + debug_assert_ne!(en.pos, Pos::MAX); + SymKey::Type(en.file, en.pos, en.captured) + } + Kind::Ptr(p) => SymKey::Pointer(&ctx.ptrs[p]), + Kind::Opt(p) => SymKey::Optional(&ctx.opts[p]), + Kind::Func(f) => { + let fc = &ctx.funcs[f]; + if let Some(base) = fc.base { + // TODO: merge base and sig + SymKey::FuncInst(base, fc.sig.unwrap().args) + } else { + SymKey::Decl(fc.parent, fc.name) + } + } + Kind::Global(g) => { + let gb = &ctx.globals[g]; + SymKey::Decl(gb.file.into(), gb.name) + } + Kind::Slice(s) => SymKey::Array(&ctx.slices[s]), + Kind::Module(_) | Kind::Builtin(_) => { + SymKey::Decl(Module::default().into(), Ident::INVALID) + } + Kind::Const(c) => SymKey::Constant(&ctx.consts[c]), + } + } +} + +impl Default for Id { + fn default() -> Self { + Self(unsafe { NonZeroU32::new_unchecked(UNDECLARED) }) + } +} + +impl Id { + pub const DINT: Self = Self::UINT; + + pub fn bin_ret(self, op: TokenKind) -> Id { + if op.is_compatison() { + Self::BOOL + } else { + self + } + } + + pub fn is_float(self) -> bool { + matches!(self.repr(), F32 | F64) || self.is_never() + } + + pub fn is_signed(self) -> bool { + matches!(self.repr(), I8..=INT) || self.is_never() + } + + pub fn is_unsigned(self) -> bool { + matches!(self.repr(), U8..=UINT) || self.is_never() + } + + pub fn is_integer(self) -> bool { + matches!(self.repr(), U8..=INT) || self.is_never() + } + + pub fn is_never(self) -> bool { + self == Self::NEVER + } + + pub fn strip_pointer(self) -> Self { + match self.expand() { + Kind::Ptr(_) => Id::UINT, + _ => self, + } + } + + pub fn is_pointer(self) -> bool { + matches!(self.expand(), Kind::Ptr(_)) || self.is_never() + } + + pub fn is_optional(self) -> bool { + matches!(self.expand(), Kind::Opt(_)) || self.is_never() + } + + pub fn try_upcast(self, ob: Self) -> Option { + self.try_upcast_low(ob, false) + } + + pub fn try_upcast_low(self, ob: Self, coerce_pointer: bool) -> Option { + let (oa, ob) = (Self(self.0.min(ob.0)), Self(self.0.max(ob.0))); + let (a, b) = (oa.strip_pointer(), ob.strip_pointer()); + Some(match () { + _ if oa == Id::NEVER => ob, + _ if ob == Id::NEVER => oa, + _ if oa == ob => oa, + _ if ob.is_optional() => ob, + _ if oa.is_pointer() && ob.is_pointer() => return None, + _ if a.is_signed() && b.is_signed() || a.is_unsigned() && b.is_unsigned() => ob, + _ if a.is_unsigned() && b.is_signed() && a.repr() - U8 < b.repr() - I8 => ob, + _ if a.is_unsigned() && b.is_signed() && a.repr() - U8 > b.repr() - I8 => oa, + _ if oa.is_integer() && ob.is_pointer() && coerce_pointer => ob, + _ => return None, + }) + } + + pub fn expand(self) -> Kind { + Kind::from_ty(self) + } + + pub const fn repr(self) -> u32 { + self.0.get() + } + + pub(crate) fn simple_size(&self) -> Option { + Some(match self.expand() { + Kind::Ptr(_) => 8, + Kind::Builtin(Builtin(VOID)) => 0, + Kind::Builtin(Builtin(NEVER)) => 0, + Kind::Builtin(Builtin(INT | UINT | F64)) => 8, + Kind::Builtin(Builtin(I32 | U32 | TYPE | F32)) => 4, + Kind::Builtin(Builtin(I16 | U16)) => 2, + Kind::Builtin(Builtin(I8 | U8 | BOOL)) => 1, + _ => return None, + }) + } + + pub(crate) fn extend(self) -> Self { + if self.is_signed() { + Self::INT + } else if self.is_pointer() { + self + } else { + Self::UINT + } + } + + pub(crate) fn loc(&self, tys: &Types) -> Loc { + match self.expand() { + Kind::Opt(o) + if let ty = tys.ins.opts[o].base + && ty.loc(tys) == Loc::Reg + && (ty.is_pointer() || tys.size_of(ty) < 8) => + { + Loc::Reg + } + Kind::Ptr(_) | Kind::Enum(_) | Kind::Builtin(_) => Loc::Reg, + Kind::Struct(_) if tys.size_of(*self) == 0 => Loc::Reg, + Kind::Struct(_) | Kind::Slice(_) | Kind::Opt(_) => Loc::Stack, + c @ (Kind::Func(_) | Kind::Global(_) | Kind::Module(_) | Kind::Const(_)) => { + unreachable!("{c:?}") + } + } + } + + pub(crate) fn has_pointers(&self, tys: &Types) -> bool { + match self.expand() { + Kind::Struct(s) => tys.struct_fields(s).iter().any(|f| f.ty.has_pointers(tys)), + Kind::Ptr(_) => true, + Kind::Slice(s) => tys.ins.slices[s].len == ArrayLen::MAX, + _ => false, + } + } +} + +#[derive(PartialEq, Eq, Clone, Copy)] +pub enum Loc { + Reg, + Stack, +} + +impl From for Id { + fn from(id: u64) -> Self { + Self(unsafe { NonZeroU32::new_unchecked(id as _) }) + } +} + +const fn array_to_lower_case(array: [u8; N]) -> [u8; N] { + let mut result = [0; N]; + let mut i = 0; + while i < N { + result[i] = array[i].to_ascii_lowercase(); + i += 1; + } + result +} +// const string to lower case + +macro_rules! builtin_type { + ($($name:ident;)*) => { + $(const $name: u32 = ${index(0)} + 1;)* + + mod __lc_names { + use super::*; + $(pub const $name: &str = unsafe { + const LCL: &[u8] = unsafe { + &array_to_lower_case( + *(stringify!($name).as_ptr() as *const [u8; stringify!($name).len()]) + ) + }; + core::str::from_utf8_unchecked(LCL) + };)* + } + + impl Builtin { + $(pub const $name: Self = Builtin($name);)* + } + + impl Id { + $(pub const $name: Self = Kind::Builtin(Builtin($name)).compress();)* + } + + impl Kind { + $(pub const $name: Self = Kind::Builtin(Builtin($name));)* + } + + pub fn from_str(name: &str) -> Option { + match name { + $(__lc_names::$name => Some(Builtin($name)),)* + _ => None, + } + } + + pub fn to_str(ty: Builtin) -> &'static str { + match ty.0 { + $($name => __lc_names::$name,)* + v => unreachable!("invalid type: {}", v), + } + } + }; +} + +builtin_type! { + UNDECLARED; + LEFT_UNREACHABLE; + RIGHT_UNREACHABLE; + NEVER; + VOID; + TYPE; + BOOL; + U8; + U16; + U32; + UINT; + I8; + I16; + I32; + INT; + F32; + F64; +} + +macro_rules! type_kind { + ($(#[$meta:meta])* $vis:vis enum $name:ident {$( $variant:ident, )*}) => { + crate::utils::decl_ent! { + $(pub struct $variant(u32);)* + } + + $(#[$meta])* + $vis enum $name { + $($variant($variant),)* + } + + impl $name { + const FLAG_BITS: u32 = (${count($variant)} as u32).next_power_of_two().ilog2(); + const FLAG_OFFSET: u32 = core::mem::size_of::() as u32 * 8 - Self::FLAG_BITS; + const INDEX_MASK: u32 = (1 << (32 - Self::FLAG_BITS)) - 1; + + $vis fn from_ty(ty: Id) -> Self { + let (flag, index) = (ty.repr() >> Self::FLAG_OFFSET, ty.repr() & Self::INDEX_MASK); + match flag { + $(${index(0)} => Self::$variant($variant(index)),)* + i => unreachable!("{i}"), + } + } + + $vis const fn compress(self) -> Id { + let (index, flag) = match self { + $(Self::$variant(index) => (index.0, ${index(0)}),)* + }; + Id(unsafe { NonZeroU32::new_unchecked((flag << Self::FLAG_OFFSET) | index) }) + } + } + + $( + impl From<$variant> for $name { + fn from(value: $variant) -> Self { + Self::$variant(value) + } + } + + impl From<$variant> for i64 { + fn from(value: $variant) -> Self { + Id::from(value).into() + } + } + + impl From<$variant> for Id { + fn from(value: $variant) -> Self { + $name::$variant(value).compress() + } + } + )* + }; +} + +type_kind! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum Kind { + Builtin, + Struct, + Enum, + Ptr, + Slice, + Opt, + Func, + Global, + Module, + Const, + } +} + +impl Module { + pub const MAIN: Self = Self(0); +} + +impl Default for Module { + fn default() -> Self { + Self(u32::MAX) + } +} + +impl TryFrom for Builtin { + type Error = (); + + fn try_from(value: Ident) -> Result { + if value.is_null() { + Ok(Self(value.len())) + } else { + Err(()) + } + } +} + +impl Default for Kind { + fn default() -> Self { + Id::UNDECLARED.expand() + } +} + +pub struct Display<'a> { + tys: &'a Types, + files: &'a [parser::Ast], + ty: Id, +} + +impl<'a> Display<'a> { + pub fn new(tys: &'a Types, files: &'a [parser::Ast], ty: Id) -> Self { + Self { tys, files, ty } + } + + pub fn rety(&self, ty: Id) -> Self { + Self::new(self.tys, self.files, ty) + } +} + +impl core::fmt::Display for Display<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use Kind as TK; + match TK::from_ty(self.ty) { + TK::Module(idx) => { + f.write_str("@use(\"")?; + self.files[idx.index()].path.fmt(f)?; + f.write_str(")[")?; + idx.fmt(f)?; + f.write_str("]") + } + TK::Builtin(ty) => f.write_str(to_str(ty)), + TK::Opt(ty) => { + f.write_str("?")?; + self.rety(self.tys.ins.opts[ty].base).fmt(f) + } + TK::Ptr(ty) => { + f.write_str("^")?; + self.rety(self.tys.ins.ptrs[ty].base).fmt(f) + } + TK::Struct(idx) => { + let record = &self.tys.ins.structs[idx]; + if record.name.is_null() { + f.write_str("[")?; + idx.fmt(f)?; + f.write_str("]{")?; + for (i, &StructField { name, ty }) in + self.tys.struct_fields(idx).iter().enumerate() + { + if i != 0 { + f.write_str(", ")?; + } + f.write_str(self.tys.names.ident_str(name))?; + f.write_str(": ")?; + self.rety(ty).fmt(f)?; + } + f.write_str("}") + } else { + let file = &self.files[record.file.index()]; + f.write_str(file.ident_str(record.name)) + } + } + TK::Enum(idx) => { + let enm = &self.tys.ins.enums[idx]; + debug_assert!(!enm.name.is_null()); + let file = &self.files[enm.file.index()]; + f.write_str(file.ident_str(enm.name)) + } + TK::Func(idx) => { + f.write_str("fn")?; + idx.fmt(f) + } + TK::Global(idx) => { + let global = &self.tys.ins.globals[idx]; + let file = &self.files[global.file.index()]; + f.write_str(file.ident_str(global.name))?; + f.write_str(" (global)") + } + TK::Slice(idx) => { + let array = self.tys.ins.slices[idx]; + f.write_str("[")?; + self.rety(array.elem).fmt(f)?; + if array.len != ArrayLen::MAX { + f.write_str("; ")?; + array.len.fmt(f)?; + } + f.write_str("]") + } + TK::Const(idx) => { + let cnst = &self.tys.ins.consts[idx]; + let file = &self.files[cnst.file.index()]; + f.write_str(file.ident_str(cnst.name))?; + f.write_str(" (const)") + } + } + } +} + +pub type Offset = u32; +pub type Size = u32; + +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +pub enum SymKey<'a> { + Pointer(&'a PtrData), + Optional(&'a OptData), + Type(Module, Pos, Tuple), + FuncInst(Func, Tuple), + Decl(Id, Ident), + Array(&'a ArrayData), + Constant(&'a ConstData), +} + +#[derive(Clone, Copy)] +pub struct Sig { + pub args: Tuple, + pub ret: Id, +} + +#[derive(Default, Clone, Copy)] +pub struct FuncData { + pub file: Module, + pub parent: Id, + pub name: Ident, + pub base: Option, + pub expr: ExprRef, + pub sig: Option, + pub is_inline: bool, + pub returns_type: bool, + pub comp_state: [CompState; 2], +} + +#[derive(Default, PartialEq, Eq, Clone, Copy)] +pub enum CompState { + #[default] + Dead, + Queued(usize), + Compiled, +} + +#[derive(Clone, Default)] +pub struct GlobalData { + pub file: Module, + pub name: Ident, + pub ty: Id, + pub data: Vec, +} + +#[derive(PartialEq, Eq, Hash)] +pub struct ConstData { + pub ast: ExprRef, + pub name: Ident, + pub file: Module, + pub parent: Id, +} + +pub struct EnumField { + pub name: Ident, +} + +#[derive(Default)] +pub struct TypeBase { + pub file: Module, + pub parent: Id, + pub pos: Pos, + pub name: Ident, + pub field_start: u32, + pub captured: Tuple, + pub ast: ExprRef, +} + +#[derive(Default)] +pub struct EnumData { + pub base: TypeBase, +} + +impl_deref!(EnumData { base: TypeBase }); + +pub struct StructField { + pub name: Ident, + pub ty: Id, +} + +#[derive(Default)] +pub struct StructData { + pub base: TypeBase, + pub size: Cell, + pub align: Cell, + // TODO: make this compact + pub explicit_alignment: Option, +} + +impl_deref!(StructData { base: TypeBase }); + +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +pub struct OptData { + pub base: Id, +} + +#[derive(PartialEq, Eq, Hash, Clone, Copy)] +pub struct PtrData { + pub base: Id, +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct ArrayData { + pub elem: Id, + pub len: ArrayLen, +} + +impl ArrayData { + #[expect(clippy::len_without_is_empty)] + pub fn len(&self) -> Option { + (self.len != ArrayLen::MAX).then_some(self.len as usize) + } +} + +impl ctx_map::CtxEntry for Ident { + type Ctx = str; + type Key<'a> = &'a str; + + fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a> { + unsafe { ctx.get_unchecked(self.range()) } + } +} + +#[derive(Default)] +pub struct IdentInterner { + lookup: ctx_map::CtxMap, + strings: String, +} + +impl IdentInterner { + pub fn intern(&mut self, ident: &str) -> Ident { + let (entry, hash) = self.lookup.entry(ident, &self.strings); + match entry { + hash_map::RawEntryMut::Occupied(o) => o.get_key_value().0.value, + hash_map::RawEntryMut::Vacant(v) => { + let id = Ident::new(self.strings.len() as _, ident.len() as _).unwrap(); + self.strings.push_str(ident); + v.insert(ctx_map::Key { hash, value: id }, ()); + id + } + } + } + + pub fn ident_str(&self, ident: Ident) -> &str { + &self.strings[ident.range()] + } + + pub fn project(&self, ident: &str) -> Option { + self.lookup.get(ident, &self.strings).copied() + } + + fn clear(&mut self) { + self.lookup.clear(); + self.strings.clear() + } +} + +#[derive(Default)] +pub struct TypesTmp { + pub struct_fields: Vec, + pub enum_fields: Vec, + pub args: Vec, +} + +#[derive(Default)] +pub struct TypeIns { + pub args: Vec, + pub struct_fields: Vec, + pub enum_fields: Vec, + pub funcs: EntVec, + pub globals: EntVec, + pub consts: EntVec, + pub structs: EntVec, + pub enums: EntVec, + pub ptrs: EntVec, + pub opts: EntVec, + pub slices: EntVec, +} + +pub struct FTask { + pub file: Module, + pub id: Func, + pub ct: bool, +} + +pub struct StringRef(pub Global); + +impl ctx_map::CtxEntry for StringRef { + type Ctx = EntVec; + type Key<'a> = &'a [u8]; + + fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a> { + &ctx[self.0].data + } +} + +#[derive(Default)] +pub struct Types { + pub syms: ctx_map::CtxMap, + pub names: IdentInterner, + pub strings: ctx_map::CtxMap, + pub ins: TypeIns, + pub tmp: TypesTmp, + pub tasks: Vec>, +} + +impl Types { + pub fn case(&self, ty: Id) -> fn(&str) -> Result<(), &'static str> { + match ty.expand() { + Kind::NEVER => |_| Ok(()), + Kind::Enum(_) + | Kind::Struct(_) + | Kind::Builtin(_) + | Kind::Ptr(_) + | Kind::Slice(_) + | Kind::Opt(_) => utils::is_pascal_case, + Kind::Func(f) if self.ins.funcs[f].returns_type => utils::is_pascal_case, + Kind::Func(_) | Kind::Global(_) | Kind::Module(_) => utils::is_snake_case, + Kind::Const(_) => utils::is_screaming_case, + } + } + + pub fn pack_args(&mut self, arg_base: usize) -> Option { + let base = self.ins.args.len(); + self.ins.args.extend(self.tmp.args.drain(arg_base..)); + let needle = &self.ins.args[base..]; + if needle.is_empty() { + return Some(Tuple::empty()); + } + let len = needle.len(); + // FIXME: maybe later when this becomes a bottleneck we use more + // efficient search (SIMD?, indexing?) + let sp = self.ins.args.windows(needle.len()).position(|val| val == needle).unwrap(); + self.ins.args.truncate((sp + needle.len()).max(base)); + Tuple::new(sp, len) + } + + pub fn struct_fields(&self, strct: Struct) -> &[StructField] { + &self.ins.struct_fields[self.struct_field_range(strct)] + } + + fn struct_field_range(&self, strct: Struct) -> Range { + let start = self.ins.structs[strct].field_start as usize; + let end = self + .ins + .structs + .next(strct) + .map_or(self.ins.struct_fields.len(), |s| s.field_start as usize); + start..end + } + + pub fn enum_fields(&self, enm: Enum) -> &[EnumField] { + &self.ins.enum_fields[self.enum_field_range(enm)] + } + + pub fn enum_field_range(&self, enm: Enum) -> Range { + let start = self.ins.enums[enm].field_start as usize; + let end = + self.ins.enums.next(enm).map_or(self.ins.enum_fields.len(), |s| s.field_start as usize); + start..end + } + + pub fn make_opt(&mut self, base: Id) -> Id { + self.make_generic_ty(OptData { base }, |ins| &mut ins.opts, |e| SymKey::Optional(e)) + } + + pub fn make_ptr(&mut self, base: Id) -> Id { + self.make_generic_ty(PtrData { base }, |ins| &mut ins.ptrs, |e| SymKey::Pointer(e)) + } + + pub fn make_array(&mut self, elem: Id, len: ArrayLen) -> Id { + self.make_generic_ty(ArrayData { elem, len }, |ins| &mut ins.slices, |e| SymKey::Array(e)) + } + + fn make_generic_ty, T: Copy>( + &mut self, + ty: T, + get_col: fn(&mut TypeIns) -> &mut EntVec, + key: fn(&T) -> SymKey, + ) -> Id { + *self.syms.get_or_insert(key(&{ ty }), &mut self.ins, |ins| get_col(ins).push(ty).into()) + } + + pub fn size_of(&self, ty: Id) -> Size { + match ty.expand() { + Kind::Slice(arr) => { + let arr = &self.ins.slices[arr]; + match arr.len { + 0 => 0, + ArrayLen::MAX => 16, + len => self.size_of(arr.elem) * len, + } + } + Kind::Struct(stru) => { + if self.ins.structs[stru].size.get() != 0 { + return self.ins.structs[stru].size.get(); + } + + let mut oiter = OffsetIter::new(stru, self); + while oiter.next(self).is_some() {} + self.ins.structs[stru].size.set(oiter.offset); + oiter.offset + } + Kind::Enum(enm) => (self.enum_field_range(enm).len().ilog2() + 7) / 8, + Kind::Opt(opt) => { + let base = self.ins.opts[opt].base; + if self.nieche_of(base).is_some() { + self.size_of(base) + } else { + self.size_of(base) + self.align_of(base) + } + } + _ if let Some(size) = ty.simple_size() => size, + ty => unimplemented!("size_of: {:?}", ty), + } + } + + pub fn align_of(&self, ty: Id) -> Size { + match ty.expand() { + Kind::Struct(stru) => { + if self.ins.structs[stru].align.get() != 0 { + return self.ins.structs[stru].align.get() as _; + } + let align = self.ins.structs[stru].explicit_alignment.map_or_else( + || { + self.struct_fields(stru) + .iter() + .map(|&StructField { ty, .. }| self.align_of(ty)) + .max() + .unwrap_or(1) + }, + |a| a as _, + ); + self.ins.structs[stru].align.set(align.try_into().unwrap()); + align + } + Kind::Slice(arr) => { + let arr = &self.ins.slices[arr]; + match arr.len { + ArrayLen::MAX => 8, + _ => self.align_of(arr.elem), + } + } + _ => self.size_of(ty).max(1), + } + } + + pub fn base_of(&self, ty: Id) -> Option { + match ty.expand() { + Kind::Ptr(p) => Some(self.ins.ptrs[p].base), + _ => None, + } + } + + pub fn inner_of(&self, ty: Id) -> Option { + match ty.expand() { + Kind::Opt(o) => Some(self.ins.opts[o].base), + _ => None, + } + } + + pub fn opt_layout(&self, inner_ty: Id) -> OptLayout { + match self.nieche_of(inner_ty) { + Some((_, flag_offset, flag_ty)) => { + OptLayout { flag_ty, flag_offset, payload_offset: 0 } + } + None => OptLayout { + flag_ty: Id::BOOL, + flag_offset: 0, + payload_offset: self.align_of(inner_ty), + }, + } + } + + pub fn nieche_of(&self, ty: Id) -> Option<(bool, Offset, Id)> { + match ty.expand() { + Kind::Ptr(_) => Some((false, 0, Id::UINT)), + // TODO: cache this + Kind::Struct(s) => OffsetIter::new(s, self).into_iter(self).find_map(|(f, off)| { + self.nieche_of(f.ty).map(|(uninit, o, ty)| (uninit, o + off, ty)) + }), + _ => None, + } + } + + pub fn find_struct_field(&self, s: Struct, name: &str) -> Option { + let name = self.names.project(name)?; + self.struct_fields(s).iter().position(|f| f.name == name) + } + + pub fn clear(&mut self) { + self.syms.clear(); + self.names.clear(); + self.strings.clear(); + + self.ins.funcs.clear(); + self.ins.args.clear(); + self.ins.globals.clear(); + self.ins.structs.clear(); + self.ins.struct_fields.clear(); + self.ins.ptrs.clear(); + self.ins.slices.clear(); + + debug_assert_eq!(self.tmp.struct_fields.len(), 0); + debug_assert_eq!(self.tmp.args.len(), 0); + + debug_assert_eq!(self.tasks.len(), 0); + } + + pub fn scope_of<'a>(&self, parent: Id, file: &'a parser::Ast) -> Option<&'a [Expr<'a>]> { + match parent.expand() { + Kind::Struct(s) => { + if let Expr::Struct { fields: [.., CommentOr::Or(Err(scope))], .. } = + self.ins.structs[s].ast.get(file) + { + Some(scope) + } else { + Some(&[]) + } + } + Kind::Enum(e) => { + if let Expr::Enum { variants: [.., CommentOr::Or(Err(scope))], .. } = + self.ins.enums[e].ast.get(file) + { + Some(scope) + } else { + Some(&[]) + } + } + Kind::Module(_) => Some(file.exprs()), + _ => None, + } + } + + pub fn parent_of(&self, ty: Id) -> Option { + match ty.expand() { + Kind::Struct(s) => Some(self.ins.structs[s].parent), + Kind::Enum(e) => Some(self.ins.enums[e].parent), + _ => None, + } + } + + pub fn captures_of<'a>(&self, ty: Id, file: &'a parser::Ast) -> Option<(&'a [Ident], Tuple)> { + match ty.expand() { + Kind::Struct(s) => { + let &Expr::Struct { captured, .. } = self.ins.structs[s].ast.get(file) else { + unreachable!() + }; + Some((captured, self.ins.structs[s].captured)) + } + Kind::Enum(e) => { + let &Expr::Enum { captured, .. } = self.ins.enums[e].ast.get(file) else { + unreachable!() + }; + Some((captured, self.ins.enums[e].captured)) + } + _ => None, + } + .inspect(|(a, b)| debug_assert_eq!(a.len(), b.len())) + } +} + +pub struct OptLayout { + pub flag_ty: Id, + pub flag_offset: Offset, + pub payload_offset: Offset, +} + +pub struct OffsetIter { + strct: Struct, + offset: Offset, + fields: Range, +} + +impl OffsetIter { + pub fn new(strct: Struct, tys: &Types) -> Self { + Self { strct, offset: 0, fields: tys.struct_field_range(strct) } + } + + pub fn offset_of(tys: &Types, idx: Struct, field: &str) -> Option<(Offset, Id)> { + let field_id = tys.names.project(field)?; + OffsetIter::new(idx, tys) + .into_iter(tys) + .find(|(f, _)| f.name == field_id) + .map(|(f, off)| (off, f.ty)) + } + + fn next<'a>(&mut self, tys: &'a Types) -> Option<(&'a StructField, Offset)> { + let stru = &tys.ins.structs[self.strct]; + let field = &tys.ins.struct_fields[self.fields.next()?]; + + let align = stru.explicit_alignment.map_or_else(|| tys.align_of(field.ty), |a| a as u32); + self.offset = (self.offset + align - 1) & !(align - 1); + + let off = self.offset; + self.offset += tys.size_of(field.ty); + Some((field, off)) + } + + pub fn next_ty(&mut self, tys: &Types) -> Option<(Id, Offset)> { + let (field, off) = self.next(tys)?; + Some((field.ty, off)) + } + + pub fn into_iter(mut self, tys: &Types) -> impl Iterator { + core::iter::from_fn(move || self.next(tys)) + } +} diff --git a/lang/tests/son_tests_fb_driver.txt b/lang/tests/son_tests_fb_driver.txt index c26958b8..0ae0a267 100644 --- a/lang/tests/son_tests_fb_driver.txt +++ b/lang/tests/son_tests_fb_driver.txt @@ -14,9 +14,9 @@ main: CP r35, r0 LI64 r36, 30d LI64 r37, 100d + CP r34, r35 CP r32, r35 CP r33, r35 - CP r34, r35 5: JLTU r34, r36, :0 ADDI64 r32, r32, 1d CP r2, r35