forked from AbleOS/holey-bytes
-__-
This commit is contained in:
parent
e147358fce
commit
91907a90ff
|
@ -252,6 +252,20 @@ main := fn(): int {
|
||||||
- `@bitcast(<expr>)`: tell compiler to assume `@TypeOf(<expr>)` is whatever is inferred, so long as size and alignment did not change
|
- `@bitcast(<expr>)`: tell compiler to assume `@TypeOf(<expr>)` is whatever is inferred, so long as size and alignment did not change
|
||||||
- `@eca(<ty>, <expr>...)`: invoke `eca` instruction, where `<ty>` is the type this will return and `<expr>...` are arguments passed to the call
|
- `@eca(<ty>, <expr>...)`: invoke `eca` instruction, where `<ty>` is the type this will return and `<expr>...` are arguments passed to the call
|
||||||
|
|
||||||
|
#### c_strings
|
||||||
|
```hb
|
||||||
|
main := fn(): int {
|
||||||
|
// when string ends with '\0' its a C string and thus type is '^u8'
|
||||||
|
some_str := "abඞ\n\r\t\{ff}\{fff0f0ff}\0";
|
||||||
|
len := 0;
|
||||||
|
loop if *some_str == 0 break else {
|
||||||
|
len += 1;
|
||||||
|
some_str += 1;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
### Incomplete Examples
|
### Incomplete Examples
|
||||||
|
|
||||||
#### generic_types
|
#### generic_types
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
use std::{ops::Range, rc::Rc};
|
use {
|
||||||
|
crate::{
|
||||||
use crate::{
|
|
||||||
ident::{self, Ident},
|
ident::{self, Ident},
|
||||||
instrs::{self, *},
|
instrs::{self, *},
|
||||||
lexer::TokenKind,
|
lexer::TokenKind,
|
||||||
log,
|
log,
|
||||||
parser::{self, find_symbol, idfl, Expr, ExprRef, FileId, Pos},
|
parser::{self, find_symbol, idfl, Expr, ExprRef, FileId, Pos},
|
||||||
HashMap,
|
HashMap,
|
||||||
|
},
|
||||||
|
std::{ops::Range, rc::Rc},
|
||||||
};
|
};
|
||||||
|
|
||||||
use self::reg::{RET_ADDR, STACK_PTR, ZERO};
|
use self::reg::{RET_ADDR, STACK_PTR, ZERO};
|
||||||
|
@ -975,6 +976,7 @@ pub struct Snapshot {
|
||||||
code: usize,
|
code: usize,
|
||||||
funcs: usize,
|
funcs: usize,
|
||||||
globals: usize,
|
globals: usize,
|
||||||
|
strings: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -982,6 +984,7 @@ struct Output {
|
||||||
code: Vec<u8>,
|
code: Vec<u8>,
|
||||||
funcs: Vec<(ty::Func, Reloc)>,
|
funcs: Vec<(ty::Func, Reloc)>,
|
||||||
globals: Vec<(ty::Global, Reloc)>,
|
globals: Vec<(ty::Global, Reloc)>,
|
||||||
|
strings: Vec<StringReloc>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Output {
|
impl Output {
|
||||||
|
@ -1022,21 +1025,24 @@ impl Output {
|
||||||
self.emit(tx());
|
self.emit(tx());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn append(&mut self, val: &mut Self) {
|
fn reloc_iter_mut(&mut self, snap: &Snapshot) -> impl Iterator<Item = &mut Reloc> {
|
||||||
for (_, rel) in val.globals.iter_mut().chain(&mut val.funcs) {
|
self.globals[snap.globals..]
|
||||||
rel.offset += self.code.len() as Offset;
|
.iter_mut()
|
||||||
|
.chain(&mut self.funcs[snap.funcs..])
|
||||||
|
.map(|(_, rel)| rel)
|
||||||
|
.chain(
|
||||||
|
self.strings[snap.strings..]
|
||||||
|
.iter_mut()
|
||||||
|
.map(|rl| &mut rl.reloc),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
self.code.append(&mut val.code);
|
fn append(&mut self, val: &mut Self) {
|
||||||
self.funcs.append(&mut val.funcs);
|
val.pop(self, &Snapshot::default());
|
||||||
self.globals.append(&mut val.globals);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pop(&mut self, stash: &mut Self, snap: &Snapshot) {
|
fn pop(&mut self, stash: &mut Self, snap: &Snapshot) {
|
||||||
for (_, rel) in self.globals[snap.globals..]
|
for rel in self.reloc_iter_mut(snap) {
|
||||||
.iter_mut()
|
|
||||||
.chain(&mut self.funcs[snap.funcs..])
|
|
||||||
{
|
|
||||||
rel.offset -= snap.code as Offset;
|
rel.offset -= snap.code as Offset;
|
||||||
rel.offset += stash.code.len() as Offset;
|
rel.offset += stash.code.len() as Offset;
|
||||||
}
|
}
|
||||||
|
@ -1044,12 +1050,14 @@ impl Output {
|
||||||
stash.code.extend(self.code.drain(snap.code..));
|
stash.code.extend(self.code.drain(snap.code..));
|
||||||
stash.funcs.extend(self.funcs.drain(snap.funcs..));
|
stash.funcs.extend(self.funcs.drain(snap.funcs..));
|
||||||
stash.globals.extend(self.globals.drain(snap.globals..));
|
stash.globals.extend(self.globals.drain(snap.globals..));
|
||||||
|
stash.strings.extend(self.strings.drain(snap.strings..));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn trunc(&mut self, snap: &Snapshot) {
|
fn trunc(&mut self, snap: &Snapshot) {
|
||||||
self.code.truncate(snap.code);
|
self.code.truncate(snap.code);
|
||||||
self.globals.truncate(snap.globals);
|
self.globals.truncate(snap.globals);
|
||||||
self.funcs.truncate(snap.funcs);
|
self.funcs.truncate(snap.funcs);
|
||||||
|
self.strings.truncate(snap.strings);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_trap(&mut self, trap: Trap) {
|
fn write_trap(&mut self, trap: Trap) {
|
||||||
|
@ -1063,6 +1071,7 @@ impl Output {
|
||||||
code: self.code.len(),
|
code: self.code.len(),
|
||||||
funcs: self.funcs.len(),
|
funcs: self.funcs.len(),
|
||||||
globals: self.globals.len(),
|
globals: self.globals.len(),
|
||||||
|
strings: self.strings.len(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1209,10 +1218,21 @@ enum Trap {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct StringReloc {
|
||||||
|
reloc: Reloc,
|
||||||
|
range: std::ops::Range<u32>,
|
||||||
|
}
|
||||||
|
impl StringReloc {
|
||||||
|
fn range(&self) -> std::ops::Range<usize> {
|
||||||
|
self.range.start as _..self.range.end as _
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct Codegen {
|
pub struct Codegen {
|
||||||
pub files: Vec<parser::Ast>,
|
pub files: Vec<parser::Ast>,
|
||||||
tasks: Vec<Option<FTask>>,
|
tasks: Vec<Option<FTask>>,
|
||||||
|
string_data: Vec<u8>,
|
||||||
|
|
||||||
tys: Types,
|
tys: Types,
|
||||||
ci: ItemCtx,
|
ci: ItemCtx,
|
||||||
|
@ -1436,6 +1456,73 @@ impl Codegen {
|
||||||
ty: ty::BOOL.into(),
|
ty: ty::BOOL.into(),
|
||||||
loc: Loc::imm(value as u64),
|
loc: Loc::imm(value as u64),
|
||||||
}),
|
}),
|
||||||
|
E::String { pos, mut literal } => {
|
||||||
|
literal = literal.trim_matches('"');
|
||||||
|
|
||||||
|
if !literal.ends_with("\\0") {
|
||||||
|
self.report(pos, "string literal must end with null byte (for now)");
|
||||||
|
}
|
||||||
|
|
||||||
|
let reloc = Reloc::new(self.output.code.len() as _, 3, 4);
|
||||||
|
let start = self.string_data.len();
|
||||||
|
|
||||||
|
let report = |s: &Codegen, bytes: &std::str::Bytes, message| {
|
||||||
|
s.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
|
||||||
|
};
|
||||||
|
|
||||||
|
let decode_braces = |s: &mut Codegen, bytes: &mut std::str::Bytes| {
|
||||||
|
while let Some(b) = bytes.next()
|
||||||
|
&& b != b'}'
|
||||||
|
{
|
||||||
|
let c = bytes
|
||||||
|
.next()
|
||||||
|
.unwrap_or_else(|| report(s, bytes, "incomplete escape sequence"));
|
||||||
|
let decode = |s: &Codegen, b: u8| match b {
|
||||||
|
b'0'..=b'9' => b - b'0',
|
||||||
|
b'a'..=b'f' => b - b'a' + 10,
|
||||||
|
b'A'..=b'F' => b - b'A' + 10,
|
||||||
|
_ => report(s, bytes, "expected hex digit or '}'"),
|
||||||
|
};
|
||||||
|
s.string_data.push(decode(s, b) << 4 | decode(s, c));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut bytes = literal.bytes();
|
||||||
|
while let Some(b) = bytes.next() {
|
||||||
|
if b != b'\\' {
|
||||||
|
self.string_data.push(b);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let b = match bytes
|
||||||
|
.next()
|
||||||
|
.unwrap_or_else(|| report(self, &bytes, "incomplete escape sequence"))
|
||||||
|
{
|
||||||
|
b'n' => b'\n',
|
||||||
|
b'r' => b'\r',
|
||||||
|
b't' => b'\t',
|
||||||
|
b'\\' => b'\\',
|
||||||
|
b'\'' => b'\'',
|
||||||
|
b'"' => b'"',
|
||||||
|
b'0' => b'\0',
|
||||||
|
b'{' => {
|
||||||
|
decode_braces(self, &mut bytes);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
_ => report(
|
||||||
|
self,
|
||||||
|
&bytes,
|
||||||
|
"unknown escape sequence, expected [nrt\\\"'{0]",
|
||||||
|
),
|
||||||
|
};
|
||||||
|
self.string_data.push(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
let range = start as _..self.string_data.len() as _;
|
||||||
|
self.output.strings.push(StringReloc { reloc, range });
|
||||||
|
let reg = self.ci.regs.allocate();
|
||||||
|
self.output.emit(instrs::lra(reg.get(), 0, 0));
|
||||||
|
Some(Value::new(self.tys.make_ptr(ty::U8.into()), reg))
|
||||||
|
}
|
||||||
E::Ctor {
|
E::Ctor {
|
||||||
pos, ty, fields, ..
|
pos, ty, fields, ..
|
||||||
} => {
|
} => {
|
||||||
|
@ -1699,20 +1786,14 @@ impl Codegen {
|
||||||
.find(|(_, v)| v.id == id) =>
|
.find(|(_, v)| v.id == id) =>
|
||||||
{
|
{
|
||||||
let sym = parser::find_symbol(&self.files[self.ci.file as usize].symbols, id);
|
let sym = parser::find_symbol(&self.files[self.ci.file as usize].symbols, id);
|
||||||
let loc = match idfl::index(sym.flags) == dbg!(index)
|
let loc = match idfl::index(sym.flags) == index
|
||||||
&& !self
|
&& !self
|
||||||
.ci
|
.ci
|
||||||
.loops
|
.loops
|
||||||
.last()
|
.last()
|
||||||
.is_some_and(|l| l.var_count > var_index as u32)
|
.is_some_and(|l| l.var_count > var_index as u32)
|
||||||
{
|
{
|
||||||
true => {
|
true => std::mem::take(&mut var.value.loc),
|
||||||
dbg!(
|
|
||||||
log::dbg!("braj: {expr}"),
|
|
||||||
std::mem::take(&mut var.value.loc)
|
|
||||||
)
|
|
||||||
.1
|
|
||||||
}
|
|
||||||
false => var.value.loc.as_ref(),
|
false => var.value.loc.as_ref(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2489,7 +2570,45 @@ impl Codegen {
|
||||||
_ = task::unpack(self.tys.globals[g as usize].offset)
|
_ = task::unpack(self.tys.globals[g as usize].offset)
|
||||||
.map(|off| rel.apply_jump(&mut self.output.code, off));
|
.map(|off| rel.apply_jump(&mut self.output.code, off));
|
||||||
true
|
true
|
||||||
})
|
});
|
||||||
|
|
||||||
|
self.compress_strings();
|
||||||
|
let base = self.output.code.len() as u32;
|
||||||
|
self.output.code.append(&mut self.string_data);
|
||||||
|
|
||||||
|
for srel in self.output.strings.drain(..) {
|
||||||
|
srel.reloc
|
||||||
|
.apply_jump(&mut self.output.code, srel.range.start + base);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compress_strings(&mut self) {
|
||||||
|
// FIXME: we can go faster
|
||||||
|
self.output
|
||||||
|
.strings
|
||||||
|
.sort_by(|a, b| self.string_data[b.range()].cmp(&self.string_data[a.range()]));
|
||||||
|
|
||||||
|
let mut cursor = 0;
|
||||||
|
let mut anchor = 0;
|
||||||
|
for i in 1..self.output.strings.len() {
|
||||||
|
let [a, b] = self.output.strings.get_many_mut([anchor, i]).unwrap();
|
||||||
|
if self.string_data[a.range()].ends_with(&self.string_data[b.range()]) {
|
||||||
|
b.range.end = a.range.end;
|
||||||
|
b.range.start = a.range.end - (b.range.end - b.range.start);
|
||||||
|
} else {
|
||||||
|
self.string_data.copy_within(a.range(), cursor);
|
||||||
|
cursor += a.range.len();
|
||||||
|
anchor = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.output.strings.is_empty() {
|
||||||
|
let a = &self.output.strings[anchor];
|
||||||
|
self.string_data.copy_within(a.range(), cursor);
|
||||||
|
cursor += a.range.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
self.string_data.truncate(cursor)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: sometimes its better to do this in bulk
|
// TODO: sometimes its better to do this in bulk
|
||||||
|
@ -2906,5 +3025,6 @@ mod tests {
|
||||||
global_variables => README;
|
global_variables => README;
|
||||||
generic_types => README;
|
generic_types => README;
|
||||||
generic_functions => README;
|
generic_functions => README;
|
||||||
|
c_strings => README;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
#![feature(vec_pop_if)]
|
#![feature(vec_pop_if)]
|
||||||
|
#![feature(get_many_mut)]
|
||||||
#![feature(core_intrinsics)]
|
#![feature(core_intrinsics)]
|
||||||
#![feature(new_uninit)]
|
#![feature(new_uninit)]
|
||||||
#![feature(anonymous_lifetime_in_impl_trait)]
|
#![feature(anonymous_lifetime_in_impl_trait)]
|
||||||
|
|
|
@ -264,6 +264,10 @@ impl<'a, 'b> Parser<'a, 'b> {
|
||||||
pos: token.start,
|
pos: token.start,
|
||||||
value: true,
|
value: true,
|
||||||
},
|
},
|
||||||
|
T::DQuote => E::String {
|
||||||
|
pos: token.start,
|
||||||
|
literal: self.move_str(token),
|
||||||
|
},
|
||||||
T::Struct => E::Struct {
|
T::Struct => E::Struct {
|
||||||
fields: {
|
fields: {
|
||||||
self.ns_bound = self.idents.len();
|
self.ns_bound = self.idents.len();
|
||||||
|
@ -586,6 +590,10 @@ macro_rules! generate_expr {
|
||||||
generate_expr! {
|
generate_expr! {
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum Expr<'a> {
|
pub enum Expr<'a> {
|
||||||
|
String {
|
||||||
|
pos: Pos,
|
||||||
|
literal: &'a str,
|
||||||
|
},
|
||||||
Comment {
|
Comment {
|
||||||
pos: Pos,
|
pos: Pos,
|
||||||
literal: &'a str,
|
literal: &'a str,
|
||||||
|
@ -791,6 +799,7 @@ impl<'a> std::fmt::Display for Expr<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
match *self {
|
match *self {
|
||||||
|
Self::String { literal, .. } => write!(f, "{}", literal),
|
||||||
Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()),
|
Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()),
|
||||||
Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"),
|
Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"),
|
||||||
Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)),
|
Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)),
|
||||||
|
|
0
hblang/tests/codegen_tests_c_strings.txt
Normal file
0
hblang/tests/codegen_tests_c_strings.txt
Normal file
Loading…
Reference in a new issue