From c3a6e62bf2203028542da4278e10bdbbd9fe7b38 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sun, 20 Oct 2024 12:22:28 +0200 Subject: [PATCH] implementing strings --- lang/src/codegen.rs | 50 ++------------------ lang/src/lib.rs | 55 ++++++++++++++++++++++ lang/src/son.rs | 73 ++++++++++++++++++------------ lang/src/vc.rs | 3 ++ lang/tests/son_tests_c_strings.txt | 25 ++++++++++ 5 files changed, 130 insertions(+), 76 deletions(-) create mode 100644 lang/tests/son_tests_c_strings.txt diff --git a/lang/src/codegen.rs b/lang/src/codegen.rs index 1869abb..3abe42b 100644 --- a/lang/src/codegen.rs +++ b/lang/src/codegen.rs @@ -1069,61 +1069,19 @@ impl Codegen { Some(Value { ty, loc }) } - E::String { pos, mut literal } => { - literal = &literal[1..literal.len() - 1]; + E::String { pos, literal } => { + let literal = &literal[1..literal.len() - 1]; if !literal.ends_with("\\0") { self.report(pos, "string literal must end with null byte (for now)"); } - let report = |bytes: &core::str::Bytes, message| { + let report = |bytes: &core::str::Bytes, message: &str| { self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message) }; let mut str = Vec::::with_capacity(literal.len()); - - let decode_braces = |str: &mut Vec, bytes: &mut core::str::Bytes| { - while let Some(b) = bytes.next() - && b != b'}' - { - let c = bytes - .next() - .unwrap_or_else(|| report(bytes, "incomplete escape sequence")); - let decode = |b: u8| match b { - b'0'..=b'9' => b - b'0', - b'a'..=b'f' => b - b'a' + 10, - b'A'..=b'F' => b - b'A' + 10, - _ => report(bytes, "expected hex digit or '}'"), - }; - str.push(decode(b) << 4 | decode(c)); - } - }; - - let mut bytes = literal.bytes(); - while let Some(b) = bytes.next() { - if b != b'\\' { - str.push(b); - continue; - } - let b = match bytes - .next() - .unwrap_or_else(|| report(&bytes, "incomplete escape sequence")) - { - b'n' => b'\n', - b'r' => b'\r', - b't' => b'\t', - b'\\' => b'\\', - b'\'' => b'\'', - b'"' => b'"', - b'0' => b'\0', - b'{' => { - decode_braces(&mut str, &mut bytes); - continue; - } - _ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]"), - }; - str.push(b); - } + crate::endoce_string(literal, &mut str, report); let reloc = Reloc::new(self.ci.code.len() as _, 3, 4); let glob = self.tys.ins.globals.len() as ty::Global; diff --git a/lang/src/lib.rs b/lang/src/lib.rs index e2b0f26..bb98b8f 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -1619,6 +1619,61 @@ impl hbvm::mem::Memory for LoggedMem { } } +fn endoce_string( + literal: &str, + str: &mut Vec, + report: impl Fn(&core::str::Bytes, &str), +) -> Option<()> { + let report = |bytes: &core::str::Bytes, msg: &_| { + report(bytes, msg); + None:: + }; + + let decode_braces = |str: &mut Vec, bytes: &mut core::str::Bytes| { + while let Some(b) = bytes.next() + && b != b'}' + { + let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?; + let decode = |b: u8| { + Some(match b { + b'0'..=b'9' => b - b'0', + b'a'..=b'f' => b - b'a' + 10, + b'A'..=b'F' => b - b'A' + 10, + _ => report(bytes, "expected hex digit or '}'")?, + }) + }; + str.push(decode(b)? << 4 | decode(c)?); + } + + Some(()) + }; + + let mut bytes = literal.bytes(); + while let Some(b) = bytes.next() { + if b != b'\\' { + str.push(b); + continue; + } + let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? { + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'\'' => b'\'', + b'"' => b'"', + b'0' => b'\0', + b'{' => { + decode_braces(str, &mut bytes); + continue; + } + _ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?, + }; + str.push(b); + } + + Some(()) +} + struct AsHex<'a>(&'a [u8]); impl core::fmt::Display for AsHex<'_> { diff --git a/lang/src/son.rs b/lang/src/son.rs index d8f3c02..4a704dd 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -41,7 +41,7 @@ trait StoreId: Sized { impl StoreId for Nid { fn to_store(self) -> Option { - (self != NEVER).then_some(self) + (self != ENTRY).then_some(self) } } @@ -1013,12 +1013,12 @@ impl ItemCtx { let mem = self.nodes.new_node(ty::Id::VOID, Kind::Mem, [VOID]); debug_assert_eq!(mem, MEM); self.nodes.lock(mem); - self.nodes.lock(end); - self.scope.store = end; + self.nodes.lock(self.ctrl); + self.scope.store = self.ctrl; } fn finalize(&mut self) { - self.nodes.unlock(NEVER); + self.nodes.unlock(ENTRY); self.nodes.unlock(NEVER); self.nodes.unlock_remove_scope(&core::mem::take(&mut self.scope)); self.nodes.unlock(MEM); @@ -1477,7 +1477,11 @@ impl<'a> Codegen<'a> { fn raw_expr_ctx(&mut self, expr: &Expr, ctx: Ctx) -> Option { // ordered by complexity of the expression match *expr { - Expr::Comment { .. } => Some(Value::VOID), + Expr::Number { value, .. } => Some(self.ci.nodes.new_node_lit( + ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT), + Kind::CInt { value }, + [VOID], + )), Expr::Ident { id, .. } if let Some(index) = self.ci.scope.vars.iter().rposition(|v| v.id == id) => { @@ -1489,25 +1493,35 @@ impl<'a> Codegen<'a> { Expr::Ident { id, pos, .. } => { let decl = self.find_or_declare(pos, self.ci.file, Ok(id)); match decl { - ty::Kind::Global(g) => { - let gl = &self.tys.ins.globals[g as usize]; - Some( - Value::ptr(self.ci.nodes.new_node( - gl.ty, - Kind::Global { global: g }, - [VOID], - )) - .ty(gl.ty), - ) + ty::Kind::Global(global) => { + let gl = &self.tys.ins.globals[global as usize]; + let value = self.ci.nodes.new_node(gl.ty, Kind::Global { global }, [VOID]); + Some(Value::ptr(value).ty(gl.ty)) } _ => todo!("{decl:?}"), } } - Expr::Number { value, .. } => Some(self.ci.nodes.new_node_lit( - ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT), - Kind::CInt { value }, - [VOID], - )), + Expr::Comment { .. } => Some(Value::VOID), + Expr::String { pos, literal } => { + let literal = &literal[1..literal.len() - 1]; + + if !literal.ends_with("\\0") { + self.report(pos, "string literal must end with null byte (for now)"); + } + + let report = |bytes: &core::str::Bytes, message: &str| { + self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message) + }; + + let mut data = Vec::::with_capacity(literal.len()); + crate::endoce_string(literal, &mut data, report).unwrap(); + + let global = self.tys.ins.globals.len() as ty::Global; + let ty = self.tys.make_ptr(ty::Id::U8); + self.tys.ins.globals.push(Global { data, ty, ..Default::default() }); + let global = self.ci.nodes.new_node(ty, Kind::Global { global }, [VOID]); + Some(Value::new(global).ty(ty)) + } Expr::Return { pos, val } => { let value = if let Some(val) = val { self.expr_ctx(val, Ctx { ty: self.ci.ret })? @@ -2076,6 +2090,9 @@ impl<'a> Codegen<'a> { self.ci.nodes.unlock_remove(scope_value); } + scope.loads.iter().for_each(|&n| _ = self.ci.nodes.unlock_remove(n)); + bres.loads.iter().for_each(|&n| _ = self.ci.nodes.unlock_remove(n)); + self.ci.nodes.unlock(self.ci.ctrl); Some(Value::VOID) @@ -2155,10 +2172,7 @@ impl<'a> Codegen<'a> { Some(Value::VOID) } - ref e => { - self.report_unhandled_ast(e, "bruh"); - Value::NEVER - } + ref e => self.report_unhandled_ast(e, "bruh"), } } @@ -2325,8 +2339,7 @@ impl<'a> Codegen<'a> { return ty; } - self.report_unhandled_ast(expr, "type"); - ty::Id::NEVER + self.report_unhandled_ast(expr, "type") } fn find_or_declare(&mut self, pos: Pos, file: FileId, name: Result) -> ty::Kind { @@ -2546,7 +2559,7 @@ impl<'a> Codegen<'a> { } #[track_caller] - fn report_unhandled_ast(&self, ast: &Expr, hint: &str) { + fn report_unhandled_ast(&self, ast: &Expr, hint: &str) -> ! { log::info!("{ast:#?}"); self.fatal_report(ast.pos(), fa!("compiler does not (yet) know how to handle ({hint})")); } @@ -3093,7 +3106,7 @@ fn idepth(nodes: &mut Nodes, target: Nid) -> IDomDepth { } if nodes[target].depth == 0 { nodes[target].depth = match nodes[target].kind { - Kind::End | Kind::Start => unreachable!(), + Kind::End | Kind::Start => unreachable!("{:?}", nodes[target].kind), Kind::Region => { idepth(nodes, nodes[target].inputs[0]).max(idepth(nodes, nodes[target].inputs[1])) } @@ -3329,7 +3342,7 @@ mod tests { global_variables; //generic_types; //generic_functions; - //c_strings; + c_strings; //struct_patterns; arrays; //struct_return_from_module_function; @@ -3345,7 +3358,7 @@ mod tests { branch_assignments; exhaustive_loop_testing; //idk; - //comptime_min_reg_leak; + comptime_min_reg_leak; //some_generic_code; //integer_inference_issues; //writing_into_string; diff --git a/lang/src/vc.rs b/lang/src/vc.rs index 609c40c..dae96f7 100644 --- a/lang/src/vc.rs +++ b/lang/src/vc.rs @@ -238,6 +238,9 @@ impl Deref for Vc { type Target = [Nid]; fn deref(&self) -> &Self::Target { + if self.as_slice().iter().position(|&i| i == 1) == Some(2) { + log::info!("foo {}", std::backtrace::Backtrace::capture()); + } self.as_slice() } } diff --git a/lang/tests/son_tests_c_strings.txt b/lang/tests/son_tests_c_strings.txt new file mode 100644 index 0000000..cdfc100 --- /dev/null +++ b/lang/tests/son_tests_c_strings.txt @@ -0,0 +1,25 @@ +main: + ADDI64 r254, r254, -16d + ST r31, r254, 0a, 16h + LRA r2, r0, :"abāļž\n\r\t56789\0" + JAL r31, r0, :str_len + CP r32, r1 + LRA r2, r0, :"fff\0" + JAL r31, r0, :str_len + ADD64 r1, r1, r32 + LD r31, r254, 0a, 16h + ADDI64 r254, r254, 16d + JALA r0, r31, 0a +str_len: + LI64 r6, 0d + LI64 r1, 0d + 2: LD r8, r2, 0a, 1h + JNE r8, r6, :0 + JMP :1 + 0: ADDI64 r2, r2, 1d + ADDI64 r1, r1, 1d + JMP :2 + 1: JALA r0, r31, 0a +code size: 201 +ret: 16 +status: Ok(())