making progress on parallelization

This commit is contained in:
mlokr 2024-05-17 19:53:59 +02:00
parent 71c4d3632a
commit b922dbd232
5 changed files with 679 additions and 131 deletions

View file

@ -5,7 +5,10 @@ use std::{
use hbvm::Vm; use hbvm::Vm;
use crate::ident::{self, Ident}; use crate::{
ident::{self, Ident},
parser::Symbols,
};
use { use {
crate::{ crate::{
@ -556,6 +559,8 @@ pub struct Codegen<'a> {
globals: Vec<Global>, globals: Vec<Global>,
main: Option<LabelId>, main: Option<LabelId>,
pub symbols: Symbols,
vm: Vm<CompileMem, 0>, vm: Vm<CompileMem, 0>,
} }
@ -632,8 +637,8 @@ impl<'a> Codegen<'a> {
log::dbg!("fn-args"); log::dbg!("fn-args");
let mut parama = self.param_alloc(fn_label.ret); let mut parama = self.param_alloc(fn_label.ret);
for (arg, &ty) in args.iter().zip(fn_label.args.iter()) { for (arg, &ty) in args.iter().zip(fn_label.args.iter()) {
let refed = arg.last.map_or(0, Cell::get); let sym = parser::find_symbol(&self.symbols, arg.id);
let loc = self.load_arg(refed, ty, &mut parama); let loc = self.load_arg(sym.flags, ty, &mut parama);
self.vars.push(Variable { self.vars.push(Variable {
id: arg.id, id: arg.id,
value: Value { ty, loc }, value: Value { ty, loc },
@ -1049,14 +1054,15 @@ impl<'a> Codegen<'a> {
} }
} }
E::BinOp { E::BinOp {
left: E::Ident { id, last, .. }, left: E::Ident { id, .. },
op: T::Decl, op: T::Decl,
right, right,
} => { } => {
let val = self.expr(right)?; let val = self.expr(right)?;
let loc = self.make_loc_owned(val.loc, val.ty); let loc = self.make_loc_owned(val.loc, val.ty);
let sym = parser::find_symbol(&self.symbols, *id);
let loc = match loc { let loc = match loc {
Loc::Reg(r) if last.is_some_and(|l| l.get() & parser::REFERENCED != 0) => { Loc::Reg(r) if sym.flags & parser::REFERENCED == 0 => {
let size = self.size_of(val.ty); let size = self.size_of(val.ty);
let stack = self.alloc_stack(size); let stack = self.alloc_stack(size);
self.store_stack(r.0, stack.offset, size as _); self.store_stack(r.0, stack.offset, size as _);
@ -1098,19 +1104,16 @@ impl<'a> Codegen<'a> {
loc, loc,
}); });
} }
E::Ident { E::Ident { name, id, index } => {
name,
id,
last,
index,
} => {
let Some((var_index, var)) = let Some((var_index, var)) =
self.vars.iter_mut().enumerate().find(|(_, v)| v.id == id) self.vars.iter_mut().enumerate().find(|(_, v)| v.id == id)
else { else {
self.report(expr.pos(), format_args!("unknown variable: {}", name)) self.report(expr.pos(), format_args!("unknown variable: {}", name))
}; };
let loc = match last.is_some_and(|l| parser::ident_flag_index(l.get()) == index) let sym = parser::find_symbol(&self.symbols, id);
let loc = match parser::ident_flag_index(sym.flags) == index
&& !self.loops.last().is_some_and(|l| l.var_count > var_index) && !self.loops.last().is_some_and(|l| l.var_count > var_index)
{ {
true => std::mem::replace(&mut var.value.loc, Loc::Imm(0)), true => std::mem::replace(&mut var.value.loc, Loc::Imm(0)),
@ -1940,6 +1943,8 @@ impl Loc {
mod tests { mod tests {
use crate::{instrs, log}; use crate::{instrs, log};
use super::parser;
struct TestMem; struct TestMem;
impl hbvm::mem::Memory for TestMem { impl hbvm::mem::Memory for TestMem {
@ -2009,7 +2014,8 @@ mod tests {
fn generate(input: &'static str, output: &mut String) { fn generate(input: &'static str, output: &mut String) {
let path = "test"; let path = "test";
let arena = crate::parser::Arena::default(); let arena = crate::parser::Arena::default();
let mut parser = super::parser::Parser::new(&arena); let mut symbols = crate::parser::Symbols::new();
let mut parser = parser::Parser::new(&arena, &mut symbols, &parser::no_loader);
let exprs = parser.file(input, path); let exprs = parser.file(input, path);
let mut codegen = super::Codegen::default(); let mut codegen = super::Codegen::default();
codegen.file(path, input.as_bytes(), &exprs); codegen.file(path, input.as_bytes(), &exprs);

View file

@ -93,6 +93,7 @@ gen_token_kind! {
Eof, Eof,
Error, Error,
Driective, Driective,
String,
#[keywords] #[keywords]
Return = b"return", Return = b"return",
If = b"if", If = b"if",
@ -210,6 +211,16 @@ impl<'a> Lexer<'a> {
T::from_ident(ident) T::from_ident(ident)
} }
} }
b'"' => {
while let Some(c) = self.advance() {
match c {
b'"' => break,
b'\\' => _ = self.advance(),
_ => {}
}
}
T::String
}
b':' if self.advance_if(b'=') => T::Decl, b':' if self.advance_if(b'=') => T::Decl,
b':' => T::Colon, b':' => T::Colon,
b',' => T::Comma, b',' => T::Comma,

View file

@ -1,8 +1,17 @@
#![feature(noop_waker)] #![feature(noop_waker)]
#![feature(iter_collect_into)]
#![feature(macro_metavar_expr)] #![feature(macro_metavar_expr)]
#![feature(let_chains)] #![feature(let_chains)]
#![allow(dead_code)] #![feature(ptr_metadata)]
#![feature(const_mut_refs)] #![feature(const_mut_refs)]
#![feature(slice_ptr_get)]
#![allow(dead_code)]
use std::{
collections::{HashSet, VecDeque},
io,
sync::{mpsc, Arc, Mutex},
};
#[macro_export] #[macro_export]
macro_rules! run_tests { macro_rules! run_tests {
@ -29,3 +38,122 @@ unsafe fn encode<T>(instr: T) -> (usize, [u8; instrs::MAX_SIZE]) {
std::ptr::write(buf.as_mut_ptr() as *mut T, instr); std::ptr::write(buf.as_mut_ptr() as *mut T, instr);
(std::mem::size_of::<T>(), buf) (std::mem::size_of::<T>(), buf)
} }
struct TaskQueue<T> {
inner: Mutex<TaskQueueInner<T>>,
}
impl<T> TaskQueue<T> {
fn new(max_waiters: usize) -> Self {
Self {
inner: Mutex::new(TaskQueueInner::new(max_waiters)),
}
}
pub fn push(&self, message: T) {
self.extend([message]);
}
pub fn extend(&self, messages: impl IntoIterator<Item = T>) {
self.inner.lock().unwrap().push(messages);
}
pub fn pop(&self) -> Option<T> {
TaskQueueInner::pop(&self.inner)
}
}
enum TaskSlot<T> {
Waiting,
Delivered(T),
Closed,
}
struct TaskQueueInner<T> {
max_waiters: usize,
messages: VecDeque<T>,
parked: VecDeque<(*mut TaskSlot<T>, std::thread::Thread)>,
}
unsafe impl<T: Send> Send for TaskQueueInner<T> {}
unsafe impl<T: Send + Sync> Sync for TaskQueueInner<T> {}
impl<T> TaskQueueInner<T> {
fn new(max_waiters: usize) -> Self {
Self {
max_waiters,
messages: Default::default(),
parked: Default::default(),
}
}
fn push(&mut self, messages: impl IntoIterator<Item = T>) {
for msg in messages {
if let Some((dest, thread)) = self.parked.pop_front() {
unsafe { *dest = TaskSlot::Delivered(msg) };
thread.unpark();
} else {
self.messages.push_back(msg);
}
}
}
fn pop(s: &Mutex<Self>) -> Option<T> {
let mut res = TaskSlot::Waiting;
{
let mut s = s.lock().unwrap();
if let Some(msg) = s.messages.pop_front() {
return Some(msg);
}
if s.max_waiters == s.parked.len() + 1 {
for (dest, thread) in s.parked.drain(..) {
unsafe { *dest = TaskSlot::Closed };
thread.unpark();
}
return None;
}
s.parked.push_back((&mut res, std::thread::current()));
}
loop {
std::thread::park();
let _s = s.lock().unwrap();
match std::mem::replace(&mut res, TaskSlot::Waiting) {
TaskSlot::Delivered(msg) => return Some(msg),
TaskSlot::Closed => return None,
TaskSlot::Waiting => {}
}
}
}
}
#[cfg(test)]
mod test {
use std::sync::Arc;
#[test]
fn task_queue_sanity() {
let queue = Arc::new(super::TaskQueue::new(1000));
let threads = (0..10)
.map(|_| {
let queue = queue.clone();
std::thread::spawn(move || {
for _ in 0..100 {
queue.extend([queue.pop().unwrap()]);
//dbg!();
}
})
})
.collect::<Vec<_>>();
queue.extend(0..5);
for t in threads {
t.join().unwrap();
}
}
}

View file

@ -14,12 +14,13 @@ fn main() -> io::Result<()> {
.skip(1) .skip(1)
.map(|path| std::fs::read_to_string(&path).map(|src| (path, src))) .map(|path| std::fs::read_to_string(&path).map(|src| (path, src)))
.collect::<io::Result<Vec<_>>>()?; .collect::<io::Result<Vec<_>>>()?;
let arena = parser::Arena::default(); let mut arena = parser::Arena::default();
let mut parser = parser::Parser::new(&arena);
let mut codegen = codegen::Codegen::default(); let mut codegen = codegen::Codegen::default();
for (path, content) in files.iter() { for (path, content) in files.iter() {
let mut parser = parser::Parser::new(&arena, &mut codegen.symbols, &parser::no_loader);
let file = parser.file(&path, content.as_str()); let file = parser.file(&path, content.as_str());
codegen.file(path, content.as_bytes(), file); codegen.file(path, content.as_bytes(), file);
arena.clear();
} }
codegen.dump(&mut std::io::stdout()) codegen.dump(&mut std::io::stdout())
} }

View file

@ -1,57 +1,365 @@
use std::{cell::Cell, ops::Not, ptr::NonNull}; use std::{
cell::{Cell, UnsafeCell},
collections::{HashMap, HashSet},
io::{self, Read},
ops::Not,
path::{Path, PathBuf},
ptr::NonNull,
sync::{
atomic::{AtomicU32, AtomicUsize},
Mutex,
},
};
use crate::{ use crate::{
codegen::bt, codegen::bt,
ident::{self, Ident}, ident::{self, Ident},
lexer::{Lexer, Token, TokenKind}, lexer::{Lexer, Token, TokenKind},
TaskQueue,
}; };
pub type Pos = u32; pub type Pos = u32;
pub type IdentFlags = u32; pub type IdentFlags = u32;
pub type Symbols = Vec<Symbol>;
pub type FileId = u32;
pub type Loader<'a> = &'a (dyn Fn(&str, &str) -> io::Result<Option<FileId>> + 'a);
pub const MUTABLE: IdentFlags = 1 << std::mem::size_of::<IdentFlags>() * 8 - 1; pub const MUTABLE: IdentFlags = 1 << std::mem::size_of::<IdentFlags>() * 8 - 1;
pub const REFERENCED: IdentFlags = 1 << std::mem::size_of::<IdentFlags>() * 8 - 2; pub const REFERENCED: IdentFlags = 1 << std::mem::size_of::<IdentFlags>() * 8 - 2;
const GIT_DEPS_DIR: &str = "git-deps";
pub fn parse_all(root: &str, threads: usize) -> io::Result<Vec<Ast>> {
enum ImportPath<'a> {
Root {
path: &'a str,
},
Rel {
path: &'a str,
},
Git {
link: &'a str,
path: &'a str,
branch: Option<&'a str>,
tag: Option<&'a str>,
rev: Option<&'a str>,
},
}
impl<'a> TryFrom<&'a str> for ImportPath<'a> {
type Error = ParseImportError;
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
let (prefix, path) = value.split_once(':').unwrap_or(("", value));
match prefix {
"" => Ok(Self::Root { path }),
"rel" => Ok(Self::Rel { path }),
"git" => {
let (link, path) =
path.split_once(':').ok_or(ParseImportError::ExpectedPath)?;
let (link, params) = link.split_once('?').unwrap_or((link, ""));
let [mut branch, mut tag, mut rev] = [None; 3];
for (key, value) in params.split('&').filter_map(|s| s.split_once('=')) {
match key {
"branch" => branch = Some(value),
"tag" => tag = Some(value),
"rev" => rev = Some(value),
_ => return Err(ParseImportError::UnexpectedParam),
}
}
Ok(Self::Git {
link,
path,
branch,
tag,
rev,
})
}
_ => Err(ParseImportError::InvalidPrefix),
}
}
}
fn preprocess_git(link: &str) -> &str {
let link = link.strip_prefix("https://").unwrap_or(link);
link.strip_suffix(".git").unwrap_or(link)
}
impl<'a> ImportPath<'a> {
fn resolve(&self, from: &str, root: &str) -> Result<PathBuf, CantLoadFile> {
match self {
Self::Root { path } => Ok(PathBuf::from_iter([root, path])),
Self::Rel { path } => {
let path = PathBuf::from_iter([from, path]);
match path.canonicalize() {
Ok(path) => Ok(path),
Err(e) => Err(CantLoadFile(path, e)),
}
}
Self::Git { path, link, .. } => {
let link = preprocess_git(link);
Ok(PathBuf::from_iter([root, GIT_DEPS_DIR, link, path]))
}
}
}
}
#[derive(Debug)]
enum ParseImportError {
ExpectedPath,
InvalidPrefix,
ExpectedGitAlias,
UnexpectedParam,
}
impl std::fmt::Display for ParseImportError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::ExpectedPath => write!(f, "expected path"),
Self::InvalidPrefix => write!(
f,
"invalid prefix, expected one of rel, \
git or none followed by colon"
),
Self::ExpectedGitAlias => write!(f, "expected git alias as ':<alias>$'"),
Self::UnexpectedParam => {
write!(f, "unexpected git param, expected branch, tag or rev")
}
}
}
}
impl std::error::Error for ParseImportError {}
impl From<ParseImportError> for io::Error {
fn from(e: ParseImportError) -> Self {
io::Error::new(io::ErrorKind::InvalidInput, e)
}
}
#[derive(Debug)]
struct CantLoadFile(PathBuf, io::Error);
impl std::fmt::Display for CantLoadFile {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "can't load file: {}", self.0.display())
}
}
impl std::error::Error for CantLoadFile {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.1)
}
}
impl From<CantLoadFile> for io::Error {
fn from(e: CantLoadFile) -> Self {
io::Error::new(io::ErrorKind::InvalidData, e)
}
}
#[derive(Debug)]
struct InvalidFileData(std::str::Utf8Error);
impl std::fmt::Display for InvalidFileData {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "invalid file data")
}
}
impl std::error::Error for InvalidFileData {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.0)
}
}
impl From<InvalidFileData> for io::Error {
fn from(e: InvalidFileData) -> Self {
io::Error::new(io::ErrorKind::InvalidData, e)
}
}
enum Task {
LoadFile {
id: FileId,
physiscal_path: PathBuf,
},
FetchGit {
id: FileId,
physiscal_path: PathBuf,
command: std::process::Command,
},
}
let seen = Mutex::new(HashMap::<PathBuf, FileId>::new());
let tasks = TaskQueue::<Task>::new(threads);
let ast = Mutex::new(Vec::<io::Result<Ast>>::new());
let loader = |path: &str, from: &str| {
let path = ImportPath::try_from(path)?;
let physiscal_path = path.resolve(from, root)?;
let id = {
let mut seen = seen.lock().unwrap();
let len = seen.len();
match seen.entry(physiscal_path.clone()) {
std::collections::hash_map::Entry::Occupied(entry) => {
return Ok(Some(*entry.get()));
}
std::collections::hash_map::Entry::Vacant(entry) => {
entry.insert(len as _);
len as FileId
}
}
};
if physiscal_path.exists() {
tasks.push(Task::LoadFile { id, physiscal_path });
return Ok(Some(id));
}
let ImportPath::Git {
link,
path,
branch,
rev,
tag,
} = path
else {
return Err(io::Error::new(
io::ErrorKind::NotFound,
format!("can't find file: {}", physiscal_path.display()),
));
};
let root = PathBuf::from_iter([root, GIT_DEPS_DIR, preprocess_git(link)]);
let mut command = std::process::Command::new("git");
command
.args(["clone", "--depth", "1"])
.args(branch.map(|b| ["--branch", b]).into_iter().flatten())
.args(tag.map(|t| ["--tag", t]).into_iter().flatten())
.args(rev.map(|r| ["--rev", r]).into_iter().flatten())
.arg(link)
.arg(root);
tasks.push(Task::FetchGit {
id,
physiscal_path,
command,
});
Ok(Some(id))
};
let load_from_path = |path: &Path, buffer: &mut Vec<u8>| -> io::Result<Ast> {
let path = path.to_str().ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
"path contains invalid characters",
)
})?;
let mut file = std::fs::File::open(&path)?;
file.read_to_end(buffer)?;
let src = std::str::from_utf8(buffer).map_err(InvalidFileData)?;
Ok(Ast::new(&path, src, &loader))
};
let execute_task = |task: Task, buffer: &mut Vec<u8>| match task {
Task::LoadFile { id, physiscal_path } => (id, load_from_path(&physiscal_path, buffer)),
Task::FetchGit {
id,
physiscal_path,
mut command,
} => {
let output = match command.output() {
Ok(output) => output,
Err(e) => return (id, Err(e)),
};
if !output.status.success() {
let msg = format!(
"git command failed: {}",
String::from_utf8_lossy(&output.stderr)
);
return (id, Err(io::Error::new(io::ErrorKind::Other, msg)));
}
(id, load_from_path(&physiscal_path, buffer))
}
};
let thread = || {
let mut buffer = Vec::new();
while let Some(task) = tasks.pop() {
let (indx, res) = execute_task(task, &mut buffer);
let mut ast = ast.lock().unwrap();
let len = ast.len().max(indx as usize + 1);
ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into()));
ast[indx as usize] = res;
buffer.clear();
}
};
std::thread::scope(|s| (0..threads).for_each(|_| _ = s.spawn(thread)));
ast.into_inner()
.unwrap()
.into_iter()
.collect::<io::Result<Vec<_>>>()
}
pub fn ident_flag_index(flag: IdentFlags) -> u32 { pub fn ident_flag_index(flag: IdentFlags) -> u32 {
flag & !(MUTABLE | REFERENCED) flag & !(MUTABLE | REFERENCED)
} }
struct ScopeIdent<'a> { pub fn no_loader(_: &str, _: &str) -> io::Result<Option<FileId>> {
Ok(None)
}
pub struct Symbol {
pub name: Ident,
pub flags: IdentFlags,
}
struct ScopeIdent {
ident: Ident, ident: Ident,
declared: bool, declared: bool,
last: &'a Cell<IdentFlags>, flags: IdentFlags,
} }
pub struct Parser<'a, 'b> { pub struct Parser<'a, 'b> {
path: &'a str, path: &'b str,
lexer: Lexer<'a>, loader: Loader<'b>,
lexer: Lexer<'b>,
arena: &'b Arena<'a>, arena: &'b Arena<'a>,
token: Token, token: Token,
idents: Vec<ScopeIdent<'a>>, idents: Vec<ScopeIdent>,
referening: bool, symbols: &'b mut Symbols,
} }
impl<'a, 'b> Parser<'a, 'b> { impl<'a, 'b> Parser<'a, 'b> {
pub fn new(arena: &'b Arena<'a>) -> Self { pub fn new(arena: &'b Arena<'a>, symbols: &'b mut Symbols, loader: Loader<'b>) -> Self {
let mut lexer = Lexer::new(""); let mut lexer = Lexer::new("");
let token = lexer.next();
Self { Self {
loader,
token: lexer.next(),
lexer, lexer,
token,
path: "", path: "",
arena, arena,
idents: Vec::new(), idents: Vec::new(),
referening: false, symbols,
} }
} }
pub fn file(&mut self, input: &'a str, path: &'a str) -> &'a [Expr<'a>] { pub fn file(&mut self, input: &'b str, path: &'b str) -> &'a [Expr<'a>] {
self.path = path; self.path = path;
self.lexer = Lexer::new(input); self.lexer = Lexer::new(input);
self.token = self.lexer.next(); self.token = self.lexer.next();
let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, Self::expr); let f = self.collect_list(TokenKind::Semi, TokenKind::Eof, Self::expr);
self.pop_scope(0); self.pop_scope(0);
let has_undeclared = !self.idents.is_empty(); let has_undeclared = !self.idents.is_empty();
for id in self.idents.drain(..) { for id in self.idents.drain(..) {
@ -66,6 +374,7 @@ impl<'a, 'b> Parser<'a, 'b> {
} }
if has_undeclared { if has_undeclared {
// TODO: we need error recovery
unreachable!(); unreachable!();
} }
@ -102,7 +411,7 @@ impl<'a, 'b> Parser<'a, 'b> {
let left = &*self.arena.alloc(fold); let left = &*self.arena.alloc(fold);
if let Some(op) = op.assign_op() { if let Some(op) = op.assign_op() {
fold.mark_mut(); self.flag_idents(*left, MUTABLE);
let right = Expr::BinOp { left, op, right }; let right = Expr::BinOp { left, op, right };
fold = Expr::BinOp { fold = Expr::BinOp {
left, left,
@ -112,7 +421,7 @@ impl<'a, 'b> Parser<'a, 'b> {
} else { } else {
fold = Expr::BinOp { left, right, op }; fold = Expr::BinOp { left, right, op };
if op == TokenKind::Assign { if op == TokenKind::Assign {
fold.mark_mut(); self.flag_idents(*left, MUTABLE);
} }
} }
} }
@ -137,11 +446,11 @@ impl<'a, 'b> Parser<'a, 'b> {
}) })
} }
fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, Option<&'a Cell<IdentFlags>>) { fn resolve_ident(&mut self, token: Token, decl: bool) -> (Ident, u32) {
let name = self.lexer.slice(token.range()); let name = self.lexer.slice(token.range());
if let Some(builtin) = Self::try_resolve_builtin(name) { if let Some(builtin) = Self::try_resolve_builtin(name) {
return (builtin, None); return (builtin, 0);
} }
let id = match self let id = match self
@ -153,26 +462,27 @@ impl<'a, 'b> Parser<'a, 'b> {
self.report(format_args!("redeclaration of identifier: {name}")) self.report(format_args!("redeclaration of identifier: {name}"))
} }
Some(elem) => { Some(elem) => {
elem.last.set(elem.last.get() + 1); elem.flags += 1;
elem elem
} }
None => { None => {
let last = self.arena.alloc(Cell::new(0));
let id = ident::new(token.start, name.len() as _); let id = ident::new(token.start, name.len() as _);
self.idents.push(ScopeIdent { self.idents.push(ScopeIdent {
ident: id, ident: id,
declared: false, declared: false,
last, flags: 0,
}); });
self.idents.last_mut().unwrap() self.idents.last_mut().unwrap()
} }
}; };
id.declared |= decl; id.declared |= decl;
id.last
.set(id.last.get() | (REFERENCED * self.referening as u32));
(id.ident, Some(id.last)) (id.ident, ident_flag_index(id.flags))
}
fn move_str(&mut self, range: Token) -> &'a str {
self.arena.alloc_str(self.lexer.slice(range.range()))
} }
fn unit_expr(&mut self) -> Expr<'a> { fn unit_expr(&mut self) -> Expr<'a> {
@ -182,7 +492,7 @@ impl<'a, 'b> Parser<'a, 'b> {
let mut expr = match token.kind { let mut expr = match token.kind {
T::Driective => E::Directive { T::Driective => E::Directive {
pos: token.start, pos: token.start,
name: self.lexer.slice(token.range()), name: self.move_str(token),
args: { args: {
self.expect_advance(T::LParen); self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, Self::expr) self.collect_list(T::Comma, T::RParen, Self::expr)
@ -200,18 +510,14 @@ impl<'a, 'b> Parser<'a, 'b> {
let name = s.expect_advance(T::Ident); let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon); s.expect_advance(T::Colon);
let ty = s.expr(); let ty = s.expr();
(s.lexer.slice(name.range()), ty) (s.move_str(name), ty)
}) })
}, },
}, },
T::Ident => { T::Ident => {
let (id, last) = self.resolve_ident(token, self.token.kind == T::Decl); let (id, index) = self.resolve_ident(token, self.token.kind == T::Decl);
E::Ident { let name = self.move_str(token);
name: self.lexer.slice(token.range()), E::Ident { name, id, index }
id,
last,
index: last.map_or(0, |l| ident_flag_index(l.get())),
}
} }
T::If => E::If { T::If => E::If {
pos: token.start, pos: token.start,
@ -235,12 +541,12 @@ impl<'a, 'b> Parser<'a, 'b> {
self.expect_advance(T::LParen); self.expect_advance(T::LParen);
self.collect_list(T::Comma, T::RParen, |s| { self.collect_list(T::Comma, T::RParen, |s| {
let name = s.expect_advance(T::Ident); let name = s.expect_advance(T::Ident);
let (id, last) = s.resolve_ident(name, true); let (id, index) = s.resolve_ident(name, true);
s.expect_advance(T::Colon); s.expect_advance(T::Colon);
Arg { Arg {
name: s.lexer.slice(name.range()), name: s.move_str(name),
id, id,
last, index,
ty: s.expr(), ty: s.expr(),
} }
}) })
@ -254,9 +560,10 @@ impl<'a, 'b> Parser<'a, 'b> {
T::Band | T::Mul => E::UnOp { T::Band | T::Mul => E::UnOp {
pos: token.start, pos: token.start,
op: token.kind, op: token.kind,
val: match token.kind { val: {
T::Band => self.referenced(Self::ptr_unit_expr), let expr = self.ptr_unit_expr();
_ => self.ptr_unit_expr(), self.flag_idents(*expr, REFERENCED);
expr
}, },
}, },
T::LBrace => E::Block { T::LBrace => E::Block {
@ -287,9 +594,7 @@ impl<'a, 'b> Parser<'a, 'b> {
expr = match token.kind { expr = match token.kind {
T::LParen => Expr::Call { T::LParen => Expr::Call {
func: self.arena.alloc(expr), func: self.arena.alloc(expr),
args: self args: self.collect_list(T::Comma, T::RParen, Self::expr),
.calcel_ref()
.collect_list(T::Comma, T::RParen, Self::expr),
}, },
T::Ctor => E::Ctor { T::Ctor => E::Ctor {
pos: token.start, pos: token.start,
@ -298,7 +603,7 @@ impl<'a, 'b> Parser<'a, 'b> {
let name = s.expect_advance(T::Ident); let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon); s.expect_advance(T::Colon);
let val = s.expr(); let val = s.expr();
(Some(s.lexer.slice(name.range())), val) (Some(s.move_str(name)), val)
}), }),
}, },
T::Tupl => E::Ctor { T::Tupl => E::Ctor {
@ -310,7 +615,7 @@ impl<'a, 'b> Parser<'a, 'b> {
target: self.arena.alloc(expr), target: self.arena.alloc(expr),
field: { field: {
let token = self.expect_advance(T::Ident); let token = self.expect_advance(T::Ident);
self.lexer.slice(token.range()) self.move_str(token)
}, },
}, },
_ => break, _ => break,
@ -328,16 +633,6 @@ impl<'a, 'b> Parser<'a, 'b> {
expr expr
} }
fn referenced<T>(&mut self, f: impl Fn(&mut Self) -> T) -> T {
if self.referening {
self.report("cannot take reference of reference, (souwy)");
}
self.referening = true;
let expr = f(self);
self.referening = false;
expr
}
fn pop_scope(&mut self, frame: usize) { fn pop_scope(&mut self, frame: usize) {
let mut undeclared_count = frame; let mut undeclared_count = frame;
for i in frame..self.idents.len() { for i in frame..self.idents.len() {
@ -347,7 +642,13 @@ impl<'a, 'b> Parser<'a, 'b> {
} }
} }
self.idents.drain(undeclared_count..); self.idents
.drain(undeclared_count..)
.map(|ident| Symbol {
name: ident.ident,
flags: ident.flags,
})
.collect_into(self.symbols);
} }
fn ptr_unit_expr(&mut self) -> &'a Expr<'a> { fn ptr_unit_expr(&mut self) -> &'a Expr<'a> {
@ -399,17 +700,34 @@ impl<'a, 'b> Parser<'a, 'b> {
unreachable!(); unreachable!();
} }
fn calcel_ref(&mut self) -> &mut Self { fn flag_idents(&mut self, e: Expr<'a>, flags: IdentFlags) {
self.referening = false; match e {
self Expr::Ident { id, .. } => find_ident(&mut self.idents, id).flags |= flags,
Expr::Field { target, .. } => self.flag_idents(*target, flags),
_ => {}
} }
} }
}
fn find_ident(idents: &mut [ScopeIdent], id: Ident) -> &mut ScopeIdent {
idents
.binary_search_by_key(&id, |si| si.ident)
.map(|i| &mut idents[i])
.unwrap()
}
pub fn find_symbol(symbols: &[Symbol], id: Ident) -> &Symbol {
symbols
.binary_search_by_key(&id, |s| s.name)
.map(|i| &symbols[i])
.unwrap()
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Arg<'a> { pub struct Arg<'a> {
pub name: &'a str, pub name: &'a str,
pub id: Ident, pub id: Ident,
pub last: Option<&'a Cell<IdentFlags>>, pub index: u32,
pub ty: Expr<'a>, pub ty: Expr<'a>,
} }
@ -439,7 +757,6 @@ pub enum Expr<'a> {
name: &'a str, name: &'a str,
id: Ident, id: Ident,
index: u32, index: u32,
last: Option<&'a Cell<IdentFlags>>,
}, },
Block { Block {
pos: Pos, pos: Pos,
@ -515,14 +832,6 @@ impl<'a> Expr<'a> {
Self::Field { target, .. } => target.pos(), Self::Field { target, .. } => target.pos(),
} }
} }
fn mark_mut(&self) {
match self {
Self::Ident { last, .. } => _ = last.map(|l| l.set(l.get() | MUTABLE)),
Self::Field { target, .. } => target.mark_mut(),
_ => {}
}
}
} }
impl<'a> std::fmt::Display for Expr<'a> { impl<'a> std::fmt::Display for Expr<'a> {
@ -670,9 +979,109 @@ impl<'a> std::fmt::Display for Expr<'a> {
} }
} }
#[repr(C)]
struct AstInner<T: ?Sized> {
ref_count: AtomicUsize,
mem: ArenaChunk,
exprs: *const [Expr<'static>],
path: String,
symbols: T,
}
impl AstInner<[Symbol]> {
fn layout(syms: usize) -> std::alloc::Layout {
std::alloc::Layout::new::<AstInner<()>>()
.extend(std::alloc::Layout::array::<Symbol>(syms).unwrap())
.unwrap()
.0
}
fn new(content: &str, path: &str, loader: Loader) -> NonNull<Self> {
let arena = Arena::default();
let mut syms = Vec::new();
let mut parser = Parser::new(&arena, &mut syms, loader);
let exprs = parser.file(content, path) as *const [Expr<'static>];
syms.sort_unstable_by_key(|s| s.name);
let layout = Self::layout(syms.len());
let ptr = unsafe { std::alloc::alloc(layout) };
let inner: *mut Self = std::ptr::from_raw_parts_mut(ptr as *mut _, syms.len());
unsafe {
*(inner as *mut AstInner<()>) = AstInner {
ref_count: AtomicUsize::new(1),
mem: ArenaChunk::default(),
exprs,
path: path.to_owned(),
symbols: (),
};
std::ptr::addr_of_mut!((*inner).symbols)
.as_mut_ptr()
.copy_from_nonoverlapping(syms.as_ptr(), syms.len());
NonNull::new_unchecked(inner)
}
}
}
#[derive(PartialEq, Eq, Hash)]
pub struct Ast(NonNull<AstInner<[Symbol]>>);
impl Ast {
pub fn new(path: &str, content: &str, loader: Loader) -> Self {
Self(AstInner::new(content, path, loader))
}
pub fn exprs(&self) -> &[Expr] {
unsafe { &*self.inner().exprs }
}
pub fn symbols(&self) -> &[Symbol] {
&self.inner().symbols
}
pub fn path(&self) -> &str {
&self.inner().path
}
fn inner(&self) -> &AstInner<[Symbol]> {
unsafe { self.0.as_ref() }
}
}
unsafe impl Send for Ast {}
unsafe impl Sync for Ast {}
impl Clone for Ast {
fn clone(&self) -> Self {
unsafe { self.0.as_ref() }
.ref_count
.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
Self(self.0)
}
}
impl Drop for Ast {
fn drop(&mut self) {
let inner = unsafe { self.0.as_ref() };
if inner
.ref_count
.fetch_sub(1, std::sync::atomic::Ordering::Relaxed)
== 1
{
unsafe { std::ptr::drop_in_place(self.0.as_ptr()) };
let layout = AstInner::layout(inner.symbols.len());
unsafe {
std::alloc::dealloc(self.0.as_ptr() as _, layout);
}
}
}
}
#[derive(Default)] #[derive(Default)]
pub struct Arena<'a> { pub struct Arena<'a> {
chunk: Cell<ArenaChunk>, chunk: UnsafeCell<ArenaChunk>,
ph: std::marker::PhantomData<&'a ()>, ph: std::marker::PhantomData<&'a ()>,
} }
@ -717,17 +1126,12 @@ impl<'a> Arena<'a> {
chunk.end = unsafe { chunk.base.add(ArenaChunk::PREV_OFFSET) }; chunk.end = unsafe { chunk.base.add(ArenaChunk::PREV_OFFSET) };
} }
fn with_chunk<R>(&self, f: impl FnOnce(&mut ArenaChunk) -> R) -> R {
let mut chunk = self.chunk.get();
let r = f(&mut chunk);
self.chunk.set(chunk);
r
}
fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull<u8> { fn alloc_low(&self, layout: std::alloc::Layout) -> NonNull<u8> {
assert!(layout.align() <= ArenaChunk::ALIGN); assert!(layout.align() <= ArenaChunk::ALIGN);
assert!(layout.size() <= ArenaChunk::CHUNK_SIZE); assert!(layout.size() <= ArenaChunk::CHUNK_SIZE);
self.with_chunk(|chunk| {
let chunk = unsafe { &mut *self.chunk.get() };
if let Some(ptr) = chunk.alloc(layout) { if let Some(ptr) = chunk.alloc(layout) {
return ptr; return ptr;
} }
@ -739,32 +1143,9 @@ impl<'a> Arena<'a> {
} }
chunk.alloc(layout).unwrap() chunk.alloc(layout).unwrap()
})
} }
} }
impl<'a> Drop for Arena<'a> {
fn drop(&mut self) {
use ArenaChunk as AC;
let mut current = self.chunk.get().base;
let mut prev = AC::prev(current);
while !prev.is_null() {
let next = AC::next(prev);
unsafe { std::alloc::dealloc(prev, AC::LAYOUT) };
prev = next;
}
while !current.is_null() {
let next = AC::next(current);
unsafe { std::alloc::dealloc(current, AC::LAYOUT) };
current = next;
}
}
}
#[derive(Clone, Copy)]
struct ArenaChunk { struct ArenaChunk {
base: *mut u8, base: *mut u8,
end: *mut u8, end: *mut u8,
@ -839,12 +1220,33 @@ impl ArenaChunk {
} }
} }
impl Drop for ArenaChunk {
fn drop(&mut self) {
let mut current = self.base;
let mut prev = Self::prev(current);
while !prev.is_null() {
let next = Self::prev(prev);
unsafe { std::alloc::dealloc(prev, Self::LAYOUT) };
prev = next;
}
while !current.is_null() {
let next = Self::next(current);
unsafe { std::alloc::dealloc(current, Self::LAYOUT) };
current = next;
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
fn parse(input: &'static str, output: &mut String) { fn parse(input: &'static str, output: &mut String) {
use std::fmt::Write; use std::fmt::Write;
let mut arena = super::Arena::default(); let mut arena = super::Arena::default();
let mut parser = super::Parser::new(&arena); let mut symbols = Vec::new();
let mut parser = super::Parser::new(&arena, &mut symbols, &super::no_loader);
for expr in parser.file(input, "test") { for expr in parser.file(input, "test") {
writeln!(output, "{}", expr).unwrap(); writeln!(output, "{}", expr).unwrap();
} }