implementing strings

This commit is contained in:
Jakub Doka 2024-10-20 12:22:28 +02:00
parent 00949c4ea8
commit c3a6e62bf2
No known key found for this signature in database
GPG key ID: C6E9A89936B8C143
5 changed files with 130 additions and 76 deletions

View file

@ -1069,61 +1069,19 @@ impl Codegen {
Some(Value { ty, loc }) Some(Value { ty, loc })
} }
E::String { pos, mut literal } => { E::String { pos, literal } => {
literal = &literal[1..literal.len() - 1]; let literal = &literal[1..literal.len() - 1];
if !literal.ends_with("\\0") { if !literal.ends_with("\\0") {
self.report(pos, "string literal must end with null byte (for now)"); self.report(pos, "string literal must end with null byte (for now)");
} }
let report = |bytes: &core::str::Bytes, message| { let report = |bytes: &core::str::Bytes, message: &str| {
self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message) self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
}; };
let mut str = Vec::<u8>::with_capacity(literal.len()); let mut str = Vec::<u8>::with_capacity(literal.len());
crate::endoce_string(literal, &mut str, report);
let decode_braces = |str: &mut Vec<u8>, bytes: &mut core::str::Bytes| {
while let Some(b) = bytes.next()
&& b != b'}'
{
let c = bytes
.next()
.unwrap_or_else(|| report(bytes, "incomplete escape sequence"));
let decode = |b: u8| match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => report(bytes, "expected hex digit or '}'"),
};
str.push(decode(b) << 4 | decode(c));
}
};
let mut bytes = literal.bytes();
while let Some(b) = bytes.next() {
if b != b'\\' {
str.push(b);
continue;
}
let b = match bytes
.next()
.unwrap_or_else(|| report(&bytes, "incomplete escape sequence"))
{
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'\'' => b'\'',
b'"' => b'"',
b'0' => b'\0',
b'{' => {
decode_braces(&mut str, &mut bytes);
continue;
}
_ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]"),
};
str.push(b);
}
let reloc = Reloc::new(self.ci.code.len() as _, 3, 4); let reloc = Reloc::new(self.ci.code.len() as _, 3, 4);
let glob = self.tys.ins.globals.len() as ty::Global; let glob = self.tys.ins.globals.len() as ty::Global;

View file

@ -1619,6 +1619,61 @@ impl hbvm::mem::Memory for LoggedMem {
} }
} }
fn endoce_string(
literal: &str,
str: &mut Vec<u8>,
report: impl Fn(&core::str::Bytes, &str),
) -> Option<()> {
let report = |bytes: &core::str::Bytes, msg: &_| {
report(bytes, msg);
None::<u8>
};
let decode_braces = |str: &mut Vec<u8>, bytes: &mut core::str::Bytes| {
while let Some(b) = bytes.next()
&& b != b'}'
{
let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?;
let decode = |b: u8| {
Some(match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => report(bytes, "expected hex digit or '}'")?,
})
};
str.push(decode(b)? << 4 | decode(c)?);
}
Some(())
};
let mut bytes = literal.bytes();
while let Some(b) = bytes.next() {
if b != b'\\' {
str.push(b);
continue;
}
let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? {
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'\'' => b'\'',
b'"' => b'"',
b'0' => b'\0',
b'{' => {
decode_braces(str, &mut bytes);
continue;
}
_ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?,
};
str.push(b);
}
Some(())
}
struct AsHex<'a>(&'a [u8]); struct AsHex<'a>(&'a [u8]);
impl core::fmt::Display for AsHex<'_> { impl core::fmt::Display for AsHex<'_> {

View file

@ -41,7 +41,7 @@ trait StoreId: Sized {
impl StoreId for Nid { impl StoreId for Nid {
fn to_store(self) -> Option<Self> { fn to_store(self) -> Option<Self> {
(self != NEVER).then_some(self) (self != ENTRY).then_some(self)
} }
} }
@ -1013,12 +1013,12 @@ impl ItemCtx {
let mem = self.nodes.new_node(ty::Id::VOID, Kind::Mem, [VOID]); let mem = self.nodes.new_node(ty::Id::VOID, Kind::Mem, [VOID]);
debug_assert_eq!(mem, MEM); debug_assert_eq!(mem, MEM);
self.nodes.lock(mem); self.nodes.lock(mem);
self.nodes.lock(end); self.nodes.lock(self.ctrl);
self.scope.store = end; self.scope.store = self.ctrl;
} }
fn finalize(&mut self) { fn finalize(&mut self) {
self.nodes.unlock(NEVER); self.nodes.unlock(ENTRY);
self.nodes.unlock(NEVER); self.nodes.unlock(NEVER);
self.nodes.unlock_remove_scope(&core::mem::take(&mut self.scope)); self.nodes.unlock_remove_scope(&core::mem::take(&mut self.scope));
self.nodes.unlock(MEM); self.nodes.unlock(MEM);
@ -1477,7 +1477,11 @@ impl<'a> Codegen<'a> {
fn raw_expr_ctx(&mut self, expr: &Expr, ctx: Ctx) -> Option<Value> { fn raw_expr_ctx(&mut self, expr: &Expr, ctx: Ctx) -> Option<Value> {
// ordered by complexity of the expression // ordered by complexity of the expression
match *expr { match *expr {
Expr::Comment { .. } => Some(Value::VOID), Expr::Number { value, .. } => Some(self.ci.nodes.new_node_lit(
ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT),
Kind::CInt { value },
[VOID],
)),
Expr::Ident { id, .. } Expr::Ident { id, .. }
if let Some(index) = self.ci.scope.vars.iter().rposition(|v| v.id == id) => if let Some(index) = self.ci.scope.vars.iter().rposition(|v| v.id == id) =>
{ {
@ -1489,25 +1493,35 @@ impl<'a> Codegen<'a> {
Expr::Ident { id, pos, .. } => { Expr::Ident { id, pos, .. } => {
let decl = self.find_or_declare(pos, self.ci.file, Ok(id)); let decl = self.find_or_declare(pos, self.ci.file, Ok(id));
match decl { match decl {
ty::Kind::Global(g) => { ty::Kind::Global(global) => {
let gl = &self.tys.ins.globals[g as usize]; let gl = &self.tys.ins.globals[global as usize];
Some( let value = self.ci.nodes.new_node(gl.ty, Kind::Global { global }, [VOID]);
Value::ptr(self.ci.nodes.new_node( Some(Value::ptr(value).ty(gl.ty))
gl.ty,
Kind::Global { global: g },
[VOID],
))
.ty(gl.ty),
)
} }
_ => todo!("{decl:?}"), _ => todo!("{decl:?}"),
} }
} }
Expr::Number { value, .. } => Some(self.ci.nodes.new_node_lit( Expr::Comment { .. } => Some(Value::VOID),
ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT), Expr::String { pos, literal } => {
Kind::CInt { value }, let literal = &literal[1..literal.len() - 1];
[VOID],
)), if !literal.ends_with("\\0") {
self.report(pos, "string literal must end with null byte (for now)");
}
let report = |bytes: &core::str::Bytes, message: &str| {
self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
};
let mut data = Vec::<u8>::with_capacity(literal.len());
crate::endoce_string(literal, &mut data, report).unwrap();
let global = self.tys.ins.globals.len() as ty::Global;
let ty = self.tys.make_ptr(ty::Id::U8);
self.tys.ins.globals.push(Global { data, ty, ..Default::default() });
let global = self.ci.nodes.new_node(ty, Kind::Global { global }, [VOID]);
Some(Value::new(global).ty(ty))
}
Expr::Return { pos, val } => { Expr::Return { pos, val } => {
let value = if let Some(val) = val { let value = if let Some(val) = val {
self.expr_ctx(val, Ctx { ty: self.ci.ret })? self.expr_ctx(val, Ctx { ty: self.ci.ret })?
@ -2076,6 +2090,9 @@ impl<'a> Codegen<'a> {
self.ci.nodes.unlock_remove(scope_value); self.ci.nodes.unlock_remove(scope_value);
} }
scope.loads.iter().for_each(|&n| _ = self.ci.nodes.unlock_remove(n));
bres.loads.iter().for_each(|&n| _ = self.ci.nodes.unlock_remove(n));
self.ci.nodes.unlock(self.ci.ctrl); self.ci.nodes.unlock(self.ci.ctrl);
Some(Value::VOID) Some(Value::VOID)
@ -2155,10 +2172,7 @@ impl<'a> Codegen<'a> {
Some(Value::VOID) Some(Value::VOID)
} }
ref e => { ref e => self.report_unhandled_ast(e, "bruh"),
self.report_unhandled_ast(e, "bruh");
Value::NEVER
}
} }
} }
@ -2325,8 +2339,7 @@ impl<'a> Codegen<'a> {
return ty; return ty;
} }
self.report_unhandled_ast(expr, "type"); self.report_unhandled_ast(expr, "type")
ty::Id::NEVER
} }
fn find_or_declare(&mut self, pos: Pos, file: FileId, name: Result<Ident, &str>) -> ty::Kind { fn find_or_declare(&mut self, pos: Pos, file: FileId, name: Result<Ident, &str>) -> ty::Kind {
@ -2546,7 +2559,7 @@ impl<'a> Codegen<'a> {
} }
#[track_caller] #[track_caller]
fn report_unhandled_ast(&self, ast: &Expr, hint: &str) { fn report_unhandled_ast(&self, ast: &Expr, hint: &str) -> ! {
log::info!("{ast:#?}"); log::info!("{ast:#?}");
self.fatal_report(ast.pos(), fa!("compiler does not (yet) know how to handle ({hint})")); self.fatal_report(ast.pos(), fa!("compiler does not (yet) know how to handle ({hint})"));
} }
@ -3093,7 +3106,7 @@ fn idepth(nodes: &mut Nodes, target: Nid) -> IDomDepth {
} }
if nodes[target].depth == 0 { if nodes[target].depth == 0 {
nodes[target].depth = match nodes[target].kind { nodes[target].depth = match nodes[target].kind {
Kind::End | Kind::Start => unreachable!(), Kind::End | Kind::Start => unreachable!("{:?}", nodes[target].kind),
Kind::Region => { Kind::Region => {
idepth(nodes, nodes[target].inputs[0]).max(idepth(nodes, nodes[target].inputs[1])) idepth(nodes, nodes[target].inputs[0]).max(idepth(nodes, nodes[target].inputs[1]))
} }
@ -3329,7 +3342,7 @@ mod tests {
global_variables; global_variables;
//generic_types; //generic_types;
//generic_functions; //generic_functions;
//c_strings; c_strings;
//struct_patterns; //struct_patterns;
arrays; arrays;
//struct_return_from_module_function; //struct_return_from_module_function;
@ -3345,7 +3358,7 @@ mod tests {
branch_assignments; branch_assignments;
exhaustive_loop_testing; exhaustive_loop_testing;
//idk; //idk;
//comptime_min_reg_leak; comptime_min_reg_leak;
//some_generic_code; //some_generic_code;
//integer_inference_issues; //integer_inference_issues;
//writing_into_string; //writing_into_string;

View file

@ -238,6 +238,9 @@ impl Deref for Vc {
type Target = [Nid]; type Target = [Nid];
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
if self.as_slice().iter().position(|&i| i == 1) == Some(2) {
log::info!("foo {}", std::backtrace::Backtrace::capture());
}
self.as_slice() self.as_slice()
} }
} }

View file

@ -0,0 +1,25 @@
main:
ADDI64 r254, r254, -16d
ST r31, r254, 0a, 16h
LRA r2, r0, :"abඞ\n\r\t56789\0"
JAL r31, r0, :str_len
CP r32, r1
LRA r2, r0, :"fff\0"
JAL r31, r0, :str_len
ADD64 r1, r1, r32
LD r31, r254, 0a, 16h
ADDI64 r254, r254, 16d
JALA r0, r31, 0a
str_len:
LI64 r6, 0d
LI64 r1, 0d
2: LD r8, r2, 0a, 1h
JNE r8, r6, :0
JMP :1
0: ADDI64 r2, r2, 1d
ADDI64 r1, r1, 1d
JMP :2
1: JALA r0, r31, 0a
code size: 201
ret: 16
status: Ok(())