forked from AbleOS/holey-bytes
YEEEEEEEEEEEEEEEEEEEEEEEEEEEES
This commit is contained in:
parent
12c7467be2
commit
3807fe22da
|
@ -254,15 +254,23 @@ main := fn(): int {
|
||||||
|
|
||||||
#### c_strings
|
#### c_strings
|
||||||
```hb
|
```hb
|
||||||
|
|
||||||
|
str_len := fn(str: ^u8): int {
|
||||||
|
len := 0;
|
||||||
|
loop if *str == 0 break else {
|
||||||
|
len += 1;
|
||||||
|
str += 1;
|
||||||
|
}
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
main := fn(): int {
|
main := fn(): int {
|
||||||
// when string ends with '\0' its a C string and thus type is '^u8'
|
// when string ends with '\0' its a C string and thus type is '^u8'
|
||||||
some_str := "abඞ\n\r\t\{ff}\{fff0f0ff}\0";
|
some_str := "abඞ\n\r\t\{ff}\{fff0f0ff}\0";
|
||||||
len := 0;
|
len := str_len(some_str);
|
||||||
loop if *some_str == 0 break else {
|
some_other_str := "fff\0";
|
||||||
len += 1;
|
lep := str_len(some_other_str);
|
||||||
some_str += 1;
|
return lep + len;
|
||||||
}
|
|
||||||
return len;
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -807,7 +807,10 @@ impl ItemCtx {
|
||||||
let mut exmpl = Output::default();
|
let mut exmpl = Output::default();
|
||||||
exmpl.emit_prelude();
|
exmpl.emit_prelude();
|
||||||
|
|
||||||
debug_assert!(output.code[self.snap.code..].starts_with(&exmpl.code));
|
debug_assert_eq!(
|
||||||
|
exmpl.code.as_slice(),
|
||||||
|
&output.code[self.snap.code..][..exmpl.code.len()],
|
||||||
|
);
|
||||||
|
|
||||||
write_reloc(&mut output.code, allocate(3), -(pushed + stack), 8);
|
write_reloc(&mut output.code, allocate(3), -(pushed + stack), 8);
|
||||||
write_reloc(&mut output.code, allocate(8 + 3), stack, 8);
|
write_reloc(&mut output.code, allocate(8 + 3), stack, 8);
|
||||||
|
@ -1003,18 +1006,20 @@ struct FTask {
|
||||||
|
|
||||||
#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)]
|
#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)]
|
||||||
pub struct Snapshot {
|
pub struct Snapshot {
|
||||||
code: usize,
|
code: usize,
|
||||||
funcs: usize,
|
string_data: usize,
|
||||||
globals: usize,
|
funcs: usize,
|
||||||
strings: usize,
|
globals: usize,
|
||||||
|
strings: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct Output {
|
struct Output {
|
||||||
code: Vec<u8>,
|
code: Vec<u8>,
|
||||||
funcs: Vec<(ty::Func, Reloc)>,
|
string_data: Vec<u8>,
|
||||||
globals: Vec<(ty::Global, Reloc)>,
|
funcs: Vec<(ty::Func, Reloc)>,
|
||||||
strings: Vec<StringReloc>,
|
globals: Vec<(ty::Global, Reloc)>,
|
||||||
|
strings: Vec<StringReloc>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Output {
|
impl Output {
|
||||||
|
@ -1072,15 +1077,12 @@ impl Output {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pop(&mut self, stash: &mut Self, snap: &Snapshot) {
|
fn pop(&mut self, stash: &mut Self, snap: &Snapshot) {
|
||||||
// for rel in self.reloc_iter_mut(snap) {
|
|
||||||
// debug_assert!(snap.code < rel.offset as usize);
|
|
||||||
// rel.offset -= snap.code as Offset;
|
|
||||||
// rel.offset += stash.code.len() as Offset;
|
|
||||||
// }
|
|
||||||
|
|
||||||
let init_code = stash.code.len();
|
let init_code = stash.code.len();
|
||||||
|
|
||||||
stash.code.extend(self.code.drain(snap.code..));
|
stash.code.extend(self.code.drain(snap.code..));
|
||||||
|
stash
|
||||||
|
.string_data
|
||||||
|
.extend(self.string_data.drain(snap.string_data..));
|
||||||
stash.funcs.extend(
|
stash.funcs.extend(
|
||||||
self.funcs.drain(snap.funcs..).inspect(|(_, rel)| {
|
self.funcs.drain(snap.funcs..).inspect(|(_, rel)| {
|
||||||
debug_assert!(rel.offset as usize + init_code < stash.code.len())
|
debug_assert!(rel.offset as usize + init_code < stash.code.len())
|
||||||
|
@ -1106,6 +1108,7 @@ impl Output {
|
||||||
|
|
||||||
fn trunc(&mut self, snap: &Snapshot) {
|
fn trunc(&mut self, snap: &Snapshot) {
|
||||||
self.code.truncate(snap.code);
|
self.code.truncate(snap.code);
|
||||||
|
self.string_data.truncate(snap.string_data);
|
||||||
self.globals.truncate(snap.globals);
|
self.globals.truncate(snap.globals);
|
||||||
self.funcs.truncate(snap.funcs);
|
self.funcs.truncate(snap.funcs);
|
||||||
self.strings.truncate(snap.strings);
|
self.strings.truncate(snap.strings);
|
||||||
|
@ -1119,10 +1122,11 @@ impl Output {
|
||||||
|
|
||||||
fn snap(&mut self) -> Snapshot {
|
fn snap(&mut self) -> Snapshot {
|
||||||
Snapshot {
|
Snapshot {
|
||||||
code: self.code.len(),
|
code: self.code.len(),
|
||||||
funcs: self.funcs.len(),
|
string_data: self.string_data.len(),
|
||||||
globals: self.globals.len(),
|
funcs: self.funcs.len(),
|
||||||
strings: self.strings.len(),
|
globals: self.globals.len(),
|
||||||
|
strings: self.strings.len(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1264,8 +1268,10 @@ enum Trap {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct StringReloc {
|
struct StringReloc {
|
||||||
reloc: Reloc,
|
reloc: Reloc,
|
||||||
range: std::ops::Range<u32>,
|
range: std::ops::Range<u32>,
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
shifted: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StringReloc {
|
impl StringReloc {
|
||||||
|
@ -1276,9 +1282,8 @@ impl StringReloc {
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct Codegen {
|
pub struct Codegen {
|
||||||
pub files: Vec<parser::Ast>,
|
pub files: Vec<parser::Ast>,
|
||||||
tasks: Vec<Option<FTask>>,
|
tasks: Vec<Option<FTask>>,
|
||||||
string_data: Vec<u8>,
|
|
||||||
|
|
||||||
tys: Types,
|
tys: Types,
|
||||||
ci: ItemCtx,
|
ci: ItemCtx,
|
||||||
|
@ -1509,13 +1514,12 @@ impl Codegen {
|
||||||
self.report(pos, "string literal must end with null byte (for now)");
|
self.report(pos, "string literal must end with null byte (for now)");
|
||||||
}
|
}
|
||||||
|
|
||||||
let reloc = Reloc::new(self.local_offset() as _, 3, 4);
|
|
||||||
let start = self.string_data.len();
|
|
||||||
|
|
||||||
let report = |s: &Codegen, bytes: &std::str::Bytes, message| {
|
let report = |s: &Codegen, bytes: &std::str::Bytes, message| {
|
||||||
s.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
|
s.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let start = self.output.string_data.len();
|
||||||
|
|
||||||
let decode_braces = |s: &mut Codegen, bytes: &mut std::str::Bytes| {
|
let decode_braces = |s: &mut Codegen, bytes: &mut std::str::Bytes| {
|
||||||
while let Some(b) = bytes.next()
|
while let Some(b) = bytes.next()
|
||||||
&& b != b'}'
|
&& b != b'}'
|
||||||
|
@ -1529,14 +1533,14 @@ impl Codegen {
|
||||||
b'A'..=b'F' => b - b'A' + 10,
|
b'A'..=b'F' => b - b'A' + 10,
|
||||||
_ => report(s, bytes, "expected hex digit or '}'"),
|
_ => report(s, bytes, "expected hex digit or '}'"),
|
||||||
};
|
};
|
||||||
s.string_data.push(decode(s, b) << 4 | decode(s, c));
|
s.output.string_data.push(decode(s, b) << 4 | decode(s, c));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut bytes = literal.bytes();
|
let mut bytes = literal.bytes();
|
||||||
while let Some(b) = bytes.next() {
|
while let Some(b) = bytes.next() {
|
||||||
if b != b'\\' {
|
if b != b'\\' {
|
||||||
self.string_data.push(b);
|
self.output.string_data.push(b);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let b = match bytes
|
let b = match bytes
|
||||||
|
@ -1560,11 +1564,17 @@ impl Codegen {
|
||||||
"unknown escape sequence, expected [nrt\\\"'{0]",
|
"unknown escape sequence, expected [nrt\\\"'{0]",
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
self.string_data.push(b);
|
self.output.string_data.push(b);
|
||||||
}
|
}
|
||||||
|
|
||||||
let range = start as _..self.string_data.len() as _;
|
let range = start as _..self.output.string_data.len() as _;
|
||||||
self.output.strings.push(StringReloc { reloc, range });
|
let reloc = Reloc::new(self.local_offset() as _, 3, 4);
|
||||||
|
self.output.strings.push(StringReloc {
|
||||||
|
reloc,
|
||||||
|
range,
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
shifted: false,
|
||||||
|
});
|
||||||
let reg = self.ci.regs.allocate();
|
let reg = self.ci.regs.allocate();
|
||||||
self.output.emit(instrs::lra(reg.get(), 0, 0));
|
self.output.emit(instrs::lra(reg.get(), 0, 0));
|
||||||
Some(Value::new(self.tys.make_ptr(ty::U8.into()), reg))
|
Some(Value::new(self.tys.make_ptr(ty::U8.into()), reg))
|
||||||
|
@ -2305,6 +2315,8 @@ impl Codegen {
|
||||||
fn complete_call_graph(&mut self) -> Output {
|
fn complete_call_graph(&mut self) -> Output {
|
||||||
let stash = self.pop_stash();
|
let stash = self.pop_stash();
|
||||||
self.complete_call_graph_low();
|
self.complete_call_graph_low();
|
||||||
|
|
||||||
|
self.ci.snap = self.output.snap();
|
||||||
stash
|
stash
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2315,7 +2327,22 @@ impl Codegen {
|
||||||
let Some(task) = task_slot else { continue };
|
let Some(task) = task_slot else { continue };
|
||||||
self.handle_task(task);
|
self.handle_task(task);
|
||||||
}
|
}
|
||||||
self.ci.snap = self.output.snap();
|
|
||||||
|
let base = self.output.code.len() as u32;
|
||||||
|
let prev_data_len = self.output.string_data.len();
|
||||||
|
self.output.code.append(&mut self.output.string_data);
|
||||||
|
for srel in self.output.strings.iter_mut() {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
{
|
||||||
|
if std::mem::replace(&mut srel.shifted, true) {
|
||||||
|
panic!("str reloc visited twice");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
debug_assert!(srel.range.end <= prev_data_len as u32);
|
||||||
|
debug_assert!(srel.range.start <= srel.range.end);
|
||||||
|
srel.range.start += base;
|
||||||
|
srel.range.end += base;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_task(&mut self, FTask { file, id }: FTask) {
|
fn handle_task(&mut self, FTask { file, id }: FTask) {
|
||||||
|
@ -2630,12 +2657,15 @@ impl Codegen {
|
||||||
});
|
});
|
||||||
|
|
||||||
//self.compress_strings();
|
//self.compress_strings();
|
||||||
let base = self.output.code.len() as u32;
|
|
||||||
self.output.code.append(&mut self.string_data);
|
|
||||||
|
|
||||||
for srel in self.output.strings.drain(..) {
|
for srel in self.output.strings.drain(..) {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
assert!(srel.shifted);
|
||||||
|
log::err!(
|
||||||
|
"{:?}",
|
||||||
|
&self.output.code[srel.range.start as usize..srel.range.end as usize]
|
||||||
|
);
|
||||||
srel.reloc
|
srel.reloc
|
||||||
.apply_jump(&mut self.output.code, srel.range.start + base);
|
.apply_jump(&mut self.output.code, srel.range.start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2964,15 +2994,19 @@ impl Codegen {
|
||||||
|
|
||||||
fn local_snap(&self) -> Snapshot {
|
fn local_snap(&self) -> Snapshot {
|
||||||
Snapshot {
|
Snapshot {
|
||||||
code: self.output.code.len() - self.ci.snap.code,
|
code: self.output.code.len() - self.ci.snap.code,
|
||||||
funcs: self.output.funcs.len() - self.ci.snap.funcs,
|
string_data: self.output.string_data.len() - self.ci.snap.string_data,
|
||||||
globals: self.output.globals.len() - self.ci.snap.globals,
|
funcs: self.output.funcs.len() - self.ci.snap.funcs,
|
||||||
strings: self.output.strings.len() - self.ci.snap.strings,
|
globals: self.output.globals.len() - self.ci.snap.globals,
|
||||||
|
strings: self.output.strings.len() - self.ci.snap.strings,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pop_local_snap(&mut self, snap: Snapshot) {
|
fn pop_local_snap(&mut self, snap: Snapshot) {
|
||||||
self.output.code.truncate(snap.code + self.ci.snap.code);
|
self.output.code.truncate(snap.code + self.ci.snap.code);
|
||||||
|
self.output
|
||||||
|
.string_data
|
||||||
|
.truncate(snap.string_data + self.ci.snap.string_data);
|
||||||
self.output.funcs.truncate(snap.funcs + self.ci.snap.funcs);
|
self.output.funcs.truncate(snap.funcs + self.ci.snap.funcs);
|
||||||
self.output
|
self.output
|
||||||
.globals
|
.globals
|
||||||
|
@ -3045,6 +3079,8 @@ mod tests {
|
||||||
let mut out = Vec::new();
|
let mut out = Vec::new();
|
||||||
codegen.dump(&mut out).unwrap();
|
codegen.dump(&mut out).unwrap();
|
||||||
|
|
||||||
|
log::dbg!("code: {}", String::from_utf8_lossy(&out));
|
||||||
|
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
|
|
||||||
let mut stack = [0_u64; 128];
|
let mut stack = [0_u64; 128];
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
code size: 202
|
code size: 313
|
||||||
ret: 13
|
ret: 16
|
||||||
status: Ok(())
|
status: Ok(())
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
code size: 522
|
code size: 536
|
||||||
ret: 512
|
ret: 512
|
||||||
status: Ok(())
|
status: Ok(())
|
||||||
|
|
Loading…
Reference in a new issue