forked from AbleOS/holey-bytes
-__-
This commit is contained in:
parent
e147358fce
commit
91907a90ff
|
@ -252,6 +252,20 @@ main := fn(): int {
|
|||
- `@bitcast(<expr>)`: tell compiler to assume `@TypeOf(<expr>)` is whatever is inferred, so long as size and alignment did not change
|
||||
- `@eca(<ty>, <expr>...)`: invoke `eca` instruction, where `<ty>` is the type this will return and `<expr>...` are arguments passed to the call
|
||||
|
||||
#### c_strings
|
||||
```hb
|
||||
main := fn(): int {
|
||||
// when string ends with '\0' its a C string and thus type is '^u8'
|
||||
some_str := "abඞ\n\r\t\{ff}\{fff0f0ff}\0";
|
||||
len := 0;
|
||||
loop if *some_str == 0 break else {
|
||||
len += 1;
|
||||
some_str += 1;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
```
|
||||
|
||||
### Incomplete Examples
|
||||
|
||||
#### generic_types
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
use std::{ops::Range, rc::Rc};
|
||||
|
||||
use crate::{
|
||||
use {
|
||||
crate::{
|
||||
ident::{self, Ident},
|
||||
instrs::{self, *},
|
||||
lexer::TokenKind,
|
||||
log,
|
||||
parser::{self, find_symbol, idfl, Expr, ExprRef, FileId, Pos},
|
||||
HashMap,
|
||||
},
|
||||
std::{ops::Range, rc::Rc},
|
||||
};
|
||||
|
||||
use self::reg::{RET_ADDR, STACK_PTR, ZERO};
|
||||
|
@ -975,6 +976,7 @@ pub struct Snapshot {
|
|||
code: usize,
|
||||
funcs: usize,
|
||||
globals: usize,
|
||||
strings: usize,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
@ -982,6 +984,7 @@ struct Output {
|
|||
code: Vec<u8>,
|
||||
funcs: Vec<(ty::Func, Reloc)>,
|
||||
globals: Vec<(ty::Global, Reloc)>,
|
||||
strings: Vec<StringReloc>,
|
||||
}
|
||||
|
||||
impl Output {
|
||||
|
@ -1022,21 +1025,24 @@ impl Output {
|
|||
self.emit(tx());
|
||||
}
|
||||
|
||||
fn append(&mut self, val: &mut Self) {
|
||||
for (_, rel) in val.globals.iter_mut().chain(&mut val.funcs) {
|
||||
rel.offset += self.code.len() as Offset;
|
||||
fn reloc_iter_mut(&mut self, snap: &Snapshot) -> impl Iterator<Item = &mut Reloc> {
|
||||
self.globals[snap.globals..]
|
||||
.iter_mut()
|
||||
.chain(&mut self.funcs[snap.funcs..])
|
||||
.map(|(_, rel)| rel)
|
||||
.chain(
|
||||
self.strings[snap.strings..]
|
||||
.iter_mut()
|
||||
.map(|rl| &mut rl.reloc),
|
||||
)
|
||||
}
|
||||
|
||||
self.code.append(&mut val.code);
|
||||
self.funcs.append(&mut val.funcs);
|
||||
self.globals.append(&mut val.globals);
|
||||
fn append(&mut self, val: &mut Self) {
|
||||
val.pop(self, &Snapshot::default());
|
||||
}
|
||||
|
||||
fn pop(&mut self, stash: &mut Self, snap: &Snapshot) {
|
||||
for (_, rel) in self.globals[snap.globals..]
|
||||
.iter_mut()
|
||||
.chain(&mut self.funcs[snap.funcs..])
|
||||
{
|
||||
for rel in self.reloc_iter_mut(snap) {
|
||||
rel.offset -= snap.code as Offset;
|
||||
rel.offset += stash.code.len() as Offset;
|
||||
}
|
||||
|
@ -1044,12 +1050,14 @@ impl Output {
|
|||
stash.code.extend(self.code.drain(snap.code..));
|
||||
stash.funcs.extend(self.funcs.drain(snap.funcs..));
|
||||
stash.globals.extend(self.globals.drain(snap.globals..));
|
||||
stash.strings.extend(self.strings.drain(snap.strings..));
|
||||
}
|
||||
|
||||
fn trunc(&mut self, snap: &Snapshot) {
|
||||
self.code.truncate(snap.code);
|
||||
self.globals.truncate(snap.globals);
|
||||
self.funcs.truncate(snap.funcs);
|
||||
self.strings.truncate(snap.strings);
|
||||
}
|
||||
|
||||
fn write_trap(&mut self, trap: Trap) {
|
||||
|
@ -1063,6 +1071,7 @@ impl Output {
|
|||
code: self.code.len(),
|
||||
funcs: self.funcs.len(),
|
||||
globals: self.globals.len(),
|
||||
strings: self.strings.len(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1209,10 +1218,21 @@ enum Trap {
|
|||
},
|
||||
}
|
||||
|
||||
struct StringReloc {
|
||||
reloc: Reloc,
|
||||
range: std::ops::Range<u32>,
|
||||
}
|
||||
impl StringReloc {
|
||||
fn range(&self) -> std::ops::Range<usize> {
|
||||
self.range.start as _..self.range.end as _
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Codegen {
|
||||
pub files: Vec<parser::Ast>,
|
||||
tasks: Vec<Option<FTask>>,
|
||||
string_data: Vec<u8>,
|
||||
|
||||
tys: Types,
|
||||
ci: ItemCtx,
|
||||
|
@ -1436,6 +1456,73 @@ impl Codegen {
|
|||
ty: ty::BOOL.into(),
|
||||
loc: Loc::imm(value as u64),
|
||||
}),
|
||||
E::String { pos, mut literal } => {
|
||||
literal = literal.trim_matches('"');
|
||||
|
||||
if !literal.ends_with("\\0") {
|
||||
self.report(pos, "string literal must end with null byte (for now)");
|
||||
}
|
||||
|
||||
let reloc = Reloc::new(self.output.code.len() as _, 3, 4);
|
||||
let start = self.string_data.len();
|
||||
|
||||
let report = |s: &Codegen, bytes: &std::str::Bytes, message| {
|
||||
s.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
|
||||
};
|
||||
|
||||
let decode_braces = |s: &mut Codegen, bytes: &mut std::str::Bytes| {
|
||||
while let Some(b) = bytes.next()
|
||||
&& b != b'}'
|
||||
{
|
||||
let c = bytes
|
||||
.next()
|
||||
.unwrap_or_else(|| report(s, bytes, "incomplete escape sequence"));
|
||||
let decode = |s: &Codegen, b: u8| match b {
|
||||
b'0'..=b'9' => b - b'0',
|
||||
b'a'..=b'f' => b - b'a' + 10,
|
||||
b'A'..=b'F' => b - b'A' + 10,
|
||||
_ => report(s, bytes, "expected hex digit or '}'"),
|
||||
};
|
||||
s.string_data.push(decode(s, b) << 4 | decode(s, c));
|
||||
}
|
||||
};
|
||||
|
||||
let mut bytes = literal.bytes();
|
||||
while let Some(b) = bytes.next() {
|
||||
if b != b'\\' {
|
||||
self.string_data.push(b);
|
||||
continue;
|
||||
}
|
||||
let b = match bytes
|
||||
.next()
|
||||
.unwrap_or_else(|| report(self, &bytes, "incomplete escape sequence"))
|
||||
{
|
||||
b'n' => b'\n',
|
||||
b'r' => b'\r',
|
||||
b't' => b'\t',
|
||||
b'\\' => b'\\',
|
||||
b'\'' => b'\'',
|
||||
b'"' => b'"',
|
||||
b'0' => b'\0',
|
||||
b'{' => {
|
||||
decode_braces(self, &mut bytes);
|
||||
continue;
|
||||
}
|
||||
_ => report(
|
||||
self,
|
||||
&bytes,
|
||||
"unknown escape sequence, expected [nrt\\\"'{0]",
|
||||
),
|
||||
};
|
||||
self.string_data.push(b);
|
||||
}
|
||||
|
||||
let range = start as _..self.string_data.len() as _;
|
||||
self.output.strings.push(StringReloc { reloc, range });
|
||||
let reg = self.ci.regs.allocate();
|
||||
self.output.emit(instrs::lra(reg.get(), 0, 0));
|
||||
Some(Value::new(self.tys.make_ptr(ty::U8.into()), reg))
|
||||
}
|
||||
E::Ctor {
|
||||
pos, ty, fields, ..
|
||||
} => {
|
||||
|
@ -1699,20 +1786,14 @@ impl Codegen {
|
|||
.find(|(_, v)| v.id == id) =>
|
||||
{
|
||||
let sym = parser::find_symbol(&self.files[self.ci.file as usize].symbols, id);
|
||||
let loc = match idfl::index(sym.flags) == dbg!(index)
|
||||
let loc = match idfl::index(sym.flags) == index
|
||||
&& !self
|
||||
.ci
|
||||
.loops
|
||||
.last()
|
||||
.is_some_and(|l| l.var_count > var_index as u32)
|
||||
{
|
||||
true => {
|
||||
dbg!(
|
||||
log::dbg!("braj: {expr}"),
|
||||
std::mem::take(&mut var.value.loc)
|
||||
)
|
||||
.1
|
||||
}
|
||||
true => std::mem::take(&mut var.value.loc),
|
||||
false => var.value.loc.as_ref(),
|
||||
};
|
||||
|
||||
|
@ -2489,7 +2570,45 @@ impl Codegen {
|
|||
_ = task::unpack(self.tys.globals[g as usize].offset)
|
||||
.map(|off| rel.apply_jump(&mut self.output.code, off));
|
||||
true
|
||||
})
|
||||
});
|
||||
|
||||
self.compress_strings();
|
||||
let base = self.output.code.len() as u32;
|
||||
self.output.code.append(&mut self.string_data);
|
||||
|
||||
for srel in self.output.strings.drain(..) {
|
||||
srel.reloc
|
||||
.apply_jump(&mut self.output.code, srel.range.start + base);
|
||||
}
|
||||
}
|
||||
|
||||
fn compress_strings(&mut self) {
|
||||
// FIXME: we can go faster
|
||||
self.output
|
||||
.strings
|
||||
.sort_by(|a, b| self.string_data[b.range()].cmp(&self.string_data[a.range()]));
|
||||
|
||||
let mut cursor = 0;
|
||||
let mut anchor = 0;
|
||||
for i in 1..self.output.strings.len() {
|
||||
let [a, b] = self.output.strings.get_many_mut([anchor, i]).unwrap();
|
||||
if self.string_data[a.range()].ends_with(&self.string_data[b.range()]) {
|
||||
b.range.end = a.range.end;
|
||||
b.range.start = a.range.end - (b.range.end - b.range.start);
|
||||
} else {
|
||||
self.string_data.copy_within(a.range(), cursor);
|
||||
cursor += a.range.len();
|
||||
anchor = i;
|
||||
}
|
||||
}
|
||||
|
||||
if !self.output.strings.is_empty() {
|
||||
let a = &self.output.strings[anchor];
|
||||
self.string_data.copy_within(a.range(), cursor);
|
||||
cursor += a.range.len();
|
||||
}
|
||||
|
||||
self.string_data.truncate(cursor)
|
||||
}
|
||||
|
||||
// TODO: sometimes its better to do this in bulk
|
||||
|
@ -2906,5 +3025,6 @@ mod tests {
|
|||
global_variables => README;
|
||||
generic_types => README;
|
||||
generic_functions => README;
|
||||
c_strings => README;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
#![feature(vec_pop_if)]
|
||||
#![feature(get_many_mut)]
|
||||
#![feature(core_intrinsics)]
|
||||
#![feature(new_uninit)]
|
||||
#![feature(anonymous_lifetime_in_impl_trait)]
|
||||
|
|
|
@ -264,6 +264,10 @@ impl<'a, 'b> Parser<'a, 'b> {
|
|||
pos: token.start,
|
||||
value: true,
|
||||
},
|
||||
T::DQuote => E::String {
|
||||
pos: token.start,
|
||||
literal: self.move_str(token),
|
||||
},
|
||||
T::Struct => E::Struct {
|
||||
fields: {
|
||||
self.ns_bound = self.idents.len();
|
||||
|
@ -586,6 +590,10 @@ macro_rules! generate_expr {
|
|||
generate_expr! {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Expr<'a> {
|
||||
String {
|
||||
pos: Pos,
|
||||
literal: &'a str,
|
||||
},
|
||||
Comment {
|
||||
pos: Pos,
|
||||
literal: &'a str,
|
||||
|
@ -791,6 +799,7 @@ impl<'a> std::fmt::Display for Expr<'a> {
|
|||
}
|
||||
|
||||
match *self {
|
||||
Self::String { literal, .. } => write!(f, "{}", literal),
|
||||
Self::Comment { literal, .. } => write!(f, "{}", literal.trim_end()),
|
||||
Self::Mod { path, .. } => write!(f, "@mod(\"{path}\")"),
|
||||
Self::Field { target, field } => write!(f, "{}.{field}", Postfix(target)),
|
||||
|
|
0
hblang/tests/codegen_tests_c_strings.txt
Normal file
0
hblang/tests/codegen_tests_c_strings.txt
Normal file
Loading…
Reference in a new issue