Compare commits

..

No commits in common. "91907a90ff2312526278d11d4d76bc3afbf3ff8b" and "f9e46b4641d0505ace19fbc431d53ec358909f97" have entirely different histories.

5 changed files with 45 additions and 200 deletions

View file

@ -252,20 +252,6 @@ main := fn(): int {
- `@bitcast(<expr>)`: tell compiler to assume `@TypeOf(<expr>)` is whatever is inferred, so long as size and alignment did not change - `@bitcast(<expr>)`: tell compiler to assume `@TypeOf(<expr>)` is whatever is inferred, so long as size and alignment did not change
- `@eca(<ty>, <expr>...)`: invoke `eca` instruction, where `<ty>` is the type this will return and `<expr>...` are arguments passed to the call - `@eca(<ty>, <expr>...)`: invoke `eca` instruction, where `<ty>` is the type this will return and `<expr>...` are arguments passed to the call
#### c_strings
```hb
main := fn(): int {
// when string ends with '\0' its a C string and thus type is '^u8'
some_str := "abඞ\n\r\t\{ff}\{fff0f0ff}\0";
len := 0;
loop if *some_str == 0 break else {
len += 1;
some_str += 1;
}
return len;
}
```
### Incomplete Examples ### Incomplete Examples
#### generic_types #### generic_types

View file

@ -1,13 +1,12 @@
use { use std::{ops::Range, rc::Rc};
crate::{
ident::{self, Ident}, use crate::{
instrs::{self, *}, ident::{self, Ident},
lexer::TokenKind, instrs::{self, *},
log, lexer::TokenKind,
parser::{self, find_symbol, idfl, Expr, ExprRef, FileId, Pos}, log,
HashMap, parser::{self, find_symbol, idfl, Expr, ExprRef, FileId, Pos},
}, HashMap,
std::{ops::Range, rc::Rc},
}; };
use self::reg::{RET_ADDR, STACK_PTR, ZERO}; use self::reg::{RET_ADDR, STACK_PTR, ZERO};
@ -976,7 +975,6 @@ pub struct Snapshot {
code: usize, code: usize,
funcs: usize, funcs: usize,
globals: usize, globals: usize,
strings: usize,
} }
#[derive(Default)] #[derive(Default)]
@ -984,7 +982,6 @@ struct Output {
code: Vec<u8>, code: Vec<u8>,
funcs: Vec<(ty::Func, Reloc)>, funcs: Vec<(ty::Func, Reloc)>,
globals: Vec<(ty::Global, Reloc)>, globals: Vec<(ty::Global, Reloc)>,
strings: Vec<StringReloc>,
} }
impl Output { impl Output {
@ -1025,24 +1022,21 @@ impl Output {
self.emit(tx()); self.emit(tx());
} }
fn reloc_iter_mut(&mut self, snap: &Snapshot) -> impl Iterator<Item = &mut Reloc> {
self.globals[snap.globals..]
.iter_mut()
.chain(&mut self.funcs[snap.funcs..])
.map(|(_, rel)| rel)
.chain(
self.strings[snap.strings..]
.iter_mut()
.map(|rl| &mut rl.reloc),
)
}
fn append(&mut self, val: &mut Self) { fn append(&mut self, val: &mut Self) {
val.pop(self, &Snapshot::default()); for (_, rel) in val.globals.iter_mut().chain(&mut val.funcs) {
rel.offset += self.code.len() as Offset;
}
self.code.append(&mut val.code);
self.funcs.append(&mut val.funcs);
self.globals.append(&mut val.globals);
} }
fn pop(&mut self, stash: &mut Self, snap: &Snapshot) { fn pop(&mut self, stash: &mut Self, snap: &Snapshot) {
for rel in self.reloc_iter_mut(snap) { for (_, rel) in self.globals[snap.globals..]
.iter_mut()
.chain(&mut self.funcs[snap.funcs..])
{
rel.offset -= snap.code as Offset; rel.offset -= snap.code as Offset;
rel.offset += stash.code.len() as Offset; rel.offset += stash.code.len() as Offset;
} }
@ -1050,14 +1044,12 @@ impl Output {
stash.code.extend(self.code.drain(snap.code..)); stash.code.extend(self.code.drain(snap.code..));
stash.funcs.extend(self.funcs.drain(snap.funcs..)); stash.funcs.extend(self.funcs.drain(snap.funcs..));
stash.globals.extend(self.globals.drain(snap.globals..)); stash.globals.extend(self.globals.drain(snap.globals..));
stash.strings.extend(self.strings.drain(snap.strings..));
} }
fn trunc(&mut self, snap: &Snapshot) { fn trunc(&mut self, snap: &Snapshot) {
self.code.truncate(snap.code); self.code.truncate(snap.code);
self.globals.truncate(snap.globals); self.globals.truncate(snap.globals);
self.funcs.truncate(snap.funcs); self.funcs.truncate(snap.funcs);
self.strings.truncate(snap.strings);
} }
fn write_trap(&mut self, trap: Trap) { fn write_trap(&mut self, trap: Trap) {
@ -1071,7 +1063,6 @@ impl Output {
code: self.code.len(), code: self.code.len(),
funcs: self.funcs.len(), funcs: self.funcs.len(),
globals: self.globals.len(), globals: self.globals.len(),
strings: self.strings.len(),
} }
} }
@ -1218,21 +1209,10 @@ enum Trap {
}, },
} }
struct StringReloc {
reloc: Reloc,
range: std::ops::Range<u32>,
}
impl StringReloc {
fn range(&self) -> std::ops::Range<usize> {
self.range.start as _..self.range.end as _
}
}
#[derive(Default)] #[derive(Default)]
pub struct Codegen { pub struct Codegen {
pub files: Vec<parser::Ast>, pub files: Vec<parser::Ast>,
tasks: Vec<Option<FTask>>, tasks: Vec<Option<FTask>>,
string_data: Vec<u8>,
tys: Types, tys: Types,
ci: ItemCtx, ci: ItemCtx,
@ -1456,73 +1436,6 @@ impl Codegen {
ty: ty::BOOL.into(), ty: ty::BOOL.into(),
loc: Loc::imm(value as u64), loc: Loc::imm(value as u64),
}), }),
E::String { pos, mut literal } => {
literal = literal.trim_matches('"');
if !literal.ends_with("\\0") {
self.report(pos, "string literal must end with null byte (for now)");
}
let reloc = Reloc::new(self.output.code.len() as _, 3, 4);
let start = self.string_data.len();
let report = |s: &Codegen, bytes: &std::str::Bytes, message| {
s.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
};
let decode_braces = |s: &mut Codegen, bytes: &mut std::str::Bytes| {
while let Some(b) = bytes.next()
&& b != b'}'
{
let c = bytes
.next()
.unwrap_or_else(|| report(s, bytes, "incomplete escape sequence"));
let decode = |s: &Codegen, b: u8| match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => report(s, bytes, "expected hex digit or '}'"),
};
s.string_data.push(decode(s, b) << 4 | decode(s, c));
}
};
let mut bytes = literal.bytes();
while let Some(b) = bytes.next() {
if b != b'\\' {
self.string_data.push(b);
continue;
}
let b = match bytes
.next()
.unwrap_or_else(|| report(self, &bytes, "incomplete escape sequence"))
{
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'\'' => b'\'',
b'"' => b'"',
b'0' => b'\0',
b'{' => {
decode_braces(self, &mut bytes);
continue;
}
_ => report(
self,
&bytes,
"unknown escape sequence, expected [nrt\\\"'{0]",
),
};
self.string_data.push(b);
}
let range = start as _..self.string_data.len() as _;
self.output.strings.push(StringReloc { reloc, range });
let reg = self.ci.regs.allocate();
self.output.emit(instrs::lra(reg.get(), 0, 0));
Some(Value::new(self.tys.make_ptr(ty::U8.into()), reg))
}
E::Ctor { E::Ctor {
pos, ty, fields, .. pos, ty, fields, ..
} => { } => {
@ -1786,14 +1699,20 @@ impl Codegen {
.find(|(_, v)| v.id == id) => .find(|(_, v)| v.id == id) =>
{ {
let sym = parser::find_symbol(&self.files[self.ci.file as usize].symbols, id); let sym = parser::find_symbol(&self.files[self.ci.file as usize].symbols, id);
let loc = match idfl::index(sym.flags) == index let loc = match idfl::index(sym.flags) == dbg!(index)
&& !self && !self
.ci .ci
.loops .loops
.last() .last()
.is_some_and(|l| l.var_count > var_index as u32) .is_some_and(|l| l.var_count > var_index as u32)
{ {
true => std::mem::take(&mut var.value.loc), true => {
dbg!(
log::dbg!("braj: {expr}"),
std::mem::take(&mut var.value.loc)
)
.1
}
false => var.value.loc.as_ref(), false => var.value.loc.as_ref(),
}; };
@ -2570,45 +2489,7 @@ impl Codegen {
_ = task::unpack(self.tys.globals[g as usize].offset) _ = task::unpack(self.tys.globals[g as usize].offset)
.map(|off| rel.apply_jump(&mut self.output.code, off)); .map(|off| rel.apply_jump(&mut self.output.code, off));
true true
}); })
self.compress_strings();
let base = self.output.code.len() as u32;
self.output.code.append(&mut self.string_data);
for srel in self.output.strings.drain(..) {
srel.reloc
.apply_jump(&mut self.output.code, srel.range.start + base);
}
}
fn compress_strings(&mut self) {
// FIXME: we can go faster
self.output
.strings
.sort_by(|a, b| self.string_data[b.range()].cmp(&self.string_data[a.range()]));
let mut cursor = 0;
let mut anchor = 0;
for i in 1..self.output.strings.len() {
let [a, b] = self.output.strings.get_many_mut([anchor, i]).unwrap();
if self.string_data[a.range()].ends_with(&self.string_data[b.range()]) {
b.range.end = a.range.end;
b.range.start = a.range.end - (b.range.end - b.range.start);
} else {
self.string_data.copy_within(a.range(), cursor);
cursor += a.range.len();
anchor = i;
}
}
if !self.output.strings.is_empty() {
let a = &self.output.strings[anchor];
self.string_data.copy_within(a.range(), cursor);
cursor += a.range.len();
}
self.string_data.truncate(cursor)
} }
// TODO: sometimes its better to do this in bulk // TODO: sometimes its better to do this in bulk
@ -3025,6 +2906,5 @@ mod tests {
global_variables => README; global_variables => README;
generic_types => README; generic_types => README;
generic_functions => README; generic_functions => README;
c_strings => README;
} }
} }

View file

@ -1,5 +1,4 @@
#![feature(vec_pop_if)] #![feature(vec_pop_if)]
#![feature(get_many_mut)]
#![feature(core_intrinsics)] #![feature(core_intrinsics)]
#![feature(new_uninit)] #![feature(new_uninit)]
#![feature(anonymous_lifetime_in_impl_trait)] #![feature(anonymous_lifetime_in_impl_trait)]

View file

@ -264,10 +264,6 @@ impl<'a, 'b> Parser<'a, 'b> {
pos: token.start, pos: token.start,
value: true, value: true,
}, },
T::DQuote => E::String {
pos: token.start,
literal: self.move_str(token),
},
T::Struct => E::Struct { T::Struct => E::Struct {
fields: { fields: {
self.ns_bound = self.idents.len(); self.ns_bound = self.idents.len();
@ -393,21 +389,19 @@ impl<'a, 'b> Parser<'a, 'b> {
trailing_comma: std::mem::take(&mut self.trailing_sep), trailing_comma: std::mem::take(&mut self.trailing_sep),
}, },
T::Ctor => E::Ctor { T::Ctor => E::Ctor {
pos: token.start, pos: token.start,
ty: Some(self.arena.alloc(expr)), ty: Some(self.arena.alloc(expr)),
fields: self.collect_list(T::Comma, T::RBrace, |s| { fields: self.collect_list(T::Comma, T::RBrace, |s| {
let name = s.expect_advance(T::Ident); let name = s.expect_advance(T::Ident);
s.expect_advance(T::Colon); s.expect_advance(T::Colon);
let val = s.expr(); let val = s.expr();
(Some(s.move_str(name)), val) (Some(s.move_str(name)), val)
}), }),
trailing_comma: std::mem::take(&mut self.trailing_sep),
}, },
T::Tupl => E::Ctor { T::Tupl => E::Ctor {
pos: token.start, pos: token.start,
ty: Some(self.arena.alloc(expr)), ty: Some(self.arena.alloc(expr)),
fields: self.collect_list(T::Comma, T::RParen, |s| (None, s.expr())), fields: self.collect_list(T::Comma, T::RParen, |s| (None, s.expr())),
trailing_comma: std::mem::take(&mut self.trailing_sep),
}, },
T::Dot => E::Field { T::Dot => E::Field {
target: self.arena.alloc(expr), target: self.arena.alloc(expr),
@ -590,10 +584,6 @@ macro_rules! generate_expr {
generate_expr! { generate_expr! {
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Expr<'a> { pub enum Expr<'a> {
String {
pos: Pos,
literal: &'a str,
},
Comment { Comment {
pos: Pos, pos: Pos,
literal: &'a str, literal: &'a str,
@ -662,7 +652,6 @@ generate_expr! {
pos: Pos, pos: Pos,
ty: Option<&'a Self>, ty: Option<&'a Self>,
fields: &'a [(Option<&'a str>, Self)], fields: &'a [(Option<&'a str>, Self)],
trailing_comma: bool,
}, },
Field { Field {
target: &'a Self, target: &'a Self,
@ -799,7 +788,6 @@ impl<'a> std::fmt::Display for Expr<'a> {
} }
match *self { match *self {
Self::String { literal, .. } => write!(f, "{}", literal),
Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()), Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()),
Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"), Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"),
Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)), Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)),
@ -811,33 +799,28 @@ impl<'a> std::fmt::Display for Expr<'a> {
write!(f, "struct {{")?; write!(f, "struct {{")?;
fmt_list(f, "}", fields, |(name, val), f| write!(f, "{name}: {val}",)) fmt_list(f, "}", fields, |(name, val), f| write!(f, "{name}: {val}",))
} }
Self::Ctor { Self::Ctor { ty, fields, .. } => {
ty,
fields,
trailing_comma,
..
} => {
let (left, rith) = if fields.iter().any(|(name, _)| name.is_some()) { let (left, rith) = if fields.iter().any(|(name, _)| name.is_some()) {
('{', "}") ('{', '}')
} else { } else {
('(', ")") ('(', ')')
}; };
if let Some(ty) = ty { if let Some(ty) = ty {
write!(f, "{}", Unary(ty))?; write!(f, "{}", Unary(ty))?;
} }
write!(f, ".{left}")?; write!(f, ".{left}")?;
let fmt_field = |(name, val): &_, f: &mut std::fmt::Formatter| { let first = &mut true;
for (name, val) in fields {
if !std::mem::take(first) {
write!(f, ", ")?;
}
if let Some(name) = name { if let Some(name) = name {
write!(f, "{name}: ")?; write!(f, "{name}: ")?;
} }
write!(f, "{val}") write!(f, "{val}")?;
};
if trailing_comma {
fmt_trailing_list(f, rith, fields, fmt_field)
} else {
fmt_list(f, rith, fields, fmt_field)
} }
write!(f, "{rith}")
} }
Self::UnOp { op, val, .. } => write!(f, "{op}{}", Unary(val)), Self::UnOp { op, val, .. } => write!(f, "{op}{}", Unary(val)),
Self::Break { .. } => write!(f, "break;"), Self::Break { .. } => write!(f, "break;"),
@ -1223,8 +1206,5 @@ mod test {
some_ordinary_code => "loft := fn(): int return loft(1, 2, 3);\n"; some_ordinary_code => "loft := fn(): int return loft(1, 2, 3);\n";
some_arg_per_line_code => "loft := fn(): int return loft(\ some_arg_per_line_code => "loft := fn(): int return loft(\
\n\t1,\n\t2,\n\t3,\n);\n"; \n\t1,\n\t2,\n\t3,\n);\n";
some_ordinary_struct => "loft := fn(): int return loft.{a: 1, b: 2};\n";
some_ordinary_fild_per_lin_struct => "loft := fn(): int return loft.{\
\n\ta: 1,\n\tb: 2,\n};\n";
} }
} }