implementing strings
This commit is contained in:
parent
00949c4ea8
commit
c3a6e62bf2
|
@ -1069,61 +1069,19 @@ impl Codegen {
|
|||
|
||||
Some(Value { ty, loc })
|
||||
}
|
||||
E::String { pos, mut literal } => {
|
||||
literal = &literal[1..literal.len() - 1];
|
||||
E::String { pos, literal } => {
|
||||
let literal = &literal[1..literal.len() - 1];
|
||||
|
||||
if !literal.ends_with("\\0") {
|
||||
self.report(pos, "string literal must end with null byte (for now)");
|
||||
}
|
||||
|
||||
let report = |bytes: &core::str::Bytes, message| {
|
||||
let report = |bytes: &core::str::Bytes, message: &str| {
|
||||
self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
|
||||
};
|
||||
|
||||
let mut str = Vec::<u8>::with_capacity(literal.len());
|
||||
|
||||
let decode_braces = |str: &mut Vec<u8>, bytes: &mut core::str::Bytes| {
|
||||
while let Some(b) = bytes.next()
|
||||
&& b != b'}'
|
||||
{
|
||||
let c = bytes
|
||||
.next()
|
||||
.unwrap_or_else(|| report(bytes, "incomplete escape sequence"));
|
||||
let decode = |b: u8| match b {
|
||||
b'0'..=b'9' => b - b'0',
|
||||
b'a'..=b'f' => b - b'a' + 10,
|
||||
b'A'..=b'F' => b - b'A' + 10,
|
||||
_ => report(bytes, "expected hex digit or '}'"),
|
||||
};
|
||||
str.push(decode(b) << 4 | decode(c));
|
||||
}
|
||||
};
|
||||
|
||||
let mut bytes = literal.bytes();
|
||||
while let Some(b) = bytes.next() {
|
||||
if b != b'\\' {
|
||||
str.push(b);
|
||||
continue;
|
||||
}
|
||||
let b = match bytes
|
||||
.next()
|
||||
.unwrap_or_else(|| report(&bytes, "incomplete escape sequence"))
|
||||
{
|
||||
b'n' => b'\n',
|
||||
b'r' => b'\r',
|
||||
b't' => b'\t',
|
||||
b'\\' => b'\\',
|
||||
b'\'' => b'\'',
|
||||
b'"' => b'"',
|
||||
b'0' => b'\0',
|
||||
b'{' => {
|
||||
decode_braces(&mut str, &mut bytes);
|
||||
continue;
|
||||
}
|
||||
_ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]"),
|
||||
};
|
||||
str.push(b);
|
||||
}
|
||||
crate::endoce_string(literal, &mut str, report);
|
||||
|
||||
let reloc = Reloc::new(self.ci.code.len() as _, 3, 4);
|
||||
let glob = self.tys.ins.globals.len() as ty::Global;
|
||||
|
|
|
@ -1619,6 +1619,61 @@ impl hbvm::mem::Memory for LoggedMem {
|
|||
}
|
||||
}
|
||||
|
||||
fn endoce_string(
|
||||
literal: &str,
|
||||
str: &mut Vec<u8>,
|
||||
report: impl Fn(&core::str::Bytes, &str),
|
||||
) -> Option<()> {
|
||||
let report = |bytes: &core::str::Bytes, msg: &_| {
|
||||
report(bytes, msg);
|
||||
None::<u8>
|
||||
};
|
||||
|
||||
let decode_braces = |str: &mut Vec<u8>, bytes: &mut core::str::Bytes| {
|
||||
while let Some(b) = bytes.next()
|
||||
&& b != b'}'
|
||||
{
|
||||
let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?;
|
||||
let decode = |b: u8| {
|
||||
Some(match b {
|
||||
b'0'..=b'9' => b - b'0',
|
||||
b'a'..=b'f' => b - b'a' + 10,
|
||||
b'A'..=b'F' => b - b'A' + 10,
|
||||
_ => report(bytes, "expected hex digit or '}'")?,
|
||||
})
|
||||
};
|
||||
str.push(decode(b)? << 4 | decode(c)?);
|
||||
}
|
||||
|
||||
Some(())
|
||||
};
|
||||
|
||||
let mut bytes = literal.bytes();
|
||||
while let Some(b) = bytes.next() {
|
||||
if b != b'\\' {
|
||||
str.push(b);
|
||||
continue;
|
||||
}
|
||||
let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? {
|
||||
b'n' => b'\n',
|
||||
b'r' => b'\r',
|
||||
b't' => b'\t',
|
||||
b'\\' => b'\\',
|
||||
b'\'' => b'\'',
|
||||
b'"' => b'"',
|
||||
b'0' => b'\0',
|
||||
b'{' => {
|
||||
decode_braces(str, &mut bytes);
|
||||
continue;
|
||||
}
|
||||
_ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?,
|
||||
};
|
||||
str.push(b);
|
||||
}
|
||||
|
||||
Some(())
|
||||
}
|
||||
|
||||
struct AsHex<'a>(&'a [u8]);
|
||||
|
||||
impl core::fmt::Display for AsHex<'_> {
|
||||
|
|
|
@ -41,7 +41,7 @@ trait StoreId: Sized {
|
|||
|
||||
impl StoreId for Nid {
|
||||
fn to_store(self) -> Option<Self> {
|
||||
(self != NEVER).then_some(self)
|
||||
(self != ENTRY).then_some(self)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1013,12 +1013,12 @@ impl ItemCtx {
|
|||
let mem = self.nodes.new_node(ty::Id::VOID, Kind::Mem, [VOID]);
|
||||
debug_assert_eq!(mem, MEM);
|
||||
self.nodes.lock(mem);
|
||||
self.nodes.lock(end);
|
||||
self.scope.store = end;
|
||||
self.nodes.lock(self.ctrl);
|
||||
self.scope.store = self.ctrl;
|
||||
}
|
||||
|
||||
fn finalize(&mut self) {
|
||||
self.nodes.unlock(NEVER);
|
||||
self.nodes.unlock(ENTRY);
|
||||
self.nodes.unlock(NEVER);
|
||||
self.nodes.unlock_remove_scope(&core::mem::take(&mut self.scope));
|
||||
self.nodes.unlock(MEM);
|
||||
|
@ -1477,7 +1477,11 @@ impl<'a> Codegen<'a> {
|
|||
fn raw_expr_ctx(&mut self, expr: &Expr, ctx: Ctx) -> Option<Value> {
|
||||
// ordered by complexity of the expression
|
||||
match *expr {
|
||||
Expr::Comment { .. } => Some(Value::VOID),
|
||||
Expr::Number { value, .. } => Some(self.ci.nodes.new_node_lit(
|
||||
ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT),
|
||||
Kind::CInt { value },
|
||||
[VOID],
|
||||
)),
|
||||
Expr::Ident { id, .. }
|
||||
if let Some(index) = self.ci.scope.vars.iter().rposition(|v| v.id == id) =>
|
||||
{
|
||||
|
@ -1489,25 +1493,35 @@ impl<'a> Codegen<'a> {
|
|||
Expr::Ident { id, pos, .. } => {
|
||||
let decl = self.find_or_declare(pos, self.ci.file, Ok(id));
|
||||
match decl {
|
||||
ty::Kind::Global(g) => {
|
||||
let gl = &self.tys.ins.globals[g as usize];
|
||||
Some(
|
||||
Value::ptr(self.ci.nodes.new_node(
|
||||
gl.ty,
|
||||
Kind::Global { global: g },
|
||||
[VOID],
|
||||
))
|
||||
.ty(gl.ty),
|
||||
)
|
||||
ty::Kind::Global(global) => {
|
||||
let gl = &self.tys.ins.globals[global as usize];
|
||||
let value = self.ci.nodes.new_node(gl.ty, Kind::Global { global }, [VOID]);
|
||||
Some(Value::ptr(value).ty(gl.ty))
|
||||
}
|
||||
_ => todo!("{decl:?}"),
|
||||
}
|
||||
}
|
||||
Expr::Number { value, .. } => Some(self.ci.nodes.new_node_lit(
|
||||
ctx.ty.filter(|ty| ty.is_integer() || ty.is_pointer()).unwrap_or(ty::Id::INT),
|
||||
Kind::CInt { value },
|
||||
[VOID],
|
||||
)),
|
||||
Expr::Comment { .. } => Some(Value::VOID),
|
||||
Expr::String { pos, literal } => {
|
||||
let literal = &literal[1..literal.len() - 1];
|
||||
|
||||
if !literal.ends_with("\\0") {
|
||||
self.report(pos, "string literal must end with null byte (for now)");
|
||||
}
|
||||
|
||||
let report = |bytes: &core::str::Bytes, message: &str| {
|
||||
self.report(pos + (literal.len() - bytes.len()) as u32 - 1, message)
|
||||
};
|
||||
|
||||
let mut data = Vec::<u8>::with_capacity(literal.len());
|
||||
crate::endoce_string(literal, &mut data, report).unwrap();
|
||||
|
||||
let global = self.tys.ins.globals.len() as ty::Global;
|
||||
let ty = self.tys.make_ptr(ty::Id::U8);
|
||||
self.tys.ins.globals.push(Global { data, ty, ..Default::default() });
|
||||
let global = self.ci.nodes.new_node(ty, Kind::Global { global }, [VOID]);
|
||||
Some(Value::new(global).ty(ty))
|
||||
}
|
||||
Expr::Return { pos, val } => {
|
||||
let value = if let Some(val) = val {
|
||||
self.expr_ctx(val, Ctx { ty: self.ci.ret })?
|
||||
|
@ -2076,6 +2090,9 @@ impl<'a> Codegen<'a> {
|
|||
self.ci.nodes.unlock_remove(scope_value);
|
||||
}
|
||||
|
||||
scope.loads.iter().for_each(|&n| _ = self.ci.nodes.unlock_remove(n));
|
||||
bres.loads.iter().for_each(|&n| _ = self.ci.nodes.unlock_remove(n));
|
||||
|
||||
self.ci.nodes.unlock(self.ci.ctrl);
|
||||
|
||||
Some(Value::VOID)
|
||||
|
@ -2155,10 +2172,7 @@ impl<'a> Codegen<'a> {
|
|||
|
||||
Some(Value::VOID)
|
||||
}
|
||||
ref e => {
|
||||
self.report_unhandled_ast(e, "bruh");
|
||||
Value::NEVER
|
||||
}
|
||||
ref e => self.report_unhandled_ast(e, "bruh"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2325,8 +2339,7 @@ impl<'a> Codegen<'a> {
|
|||
return ty;
|
||||
}
|
||||
|
||||
self.report_unhandled_ast(expr, "type");
|
||||
ty::Id::NEVER
|
||||
self.report_unhandled_ast(expr, "type")
|
||||
}
|
||||
|
||||
fn find_or_declare(&mut self, pos: Pos, file: FileId, name: Result<Ident, &str>) -> ty::Kind {
|
||||
|
@ -2546,7 +2559,7 @@ impl<'a> Codegen<'a> {
|
|||
}
|
||||
|
||||
#[track_caller]
|
||||
fn report_unhandled_ast(&self, ast: &Expr, hint: &str) {
|
||||
fn report_unhandled_ast(&self, ast: &Expr, hint: &str) -> ! {
|
||||
log::info!("{ast:#?}");
|
||||
self.fatal_report(ast.pos(), fa!("compiler does not (yet) know how to handle ({hint})"));
|
||||
}
|
||||
|
@ -3093,7 +3106,7 @@ fn idepth(nodes: &mut Nodes, target: Nid) -> IDomDepth {
|
|||
}
|
||||
if nodes[target].depth == 0 {
|
||||
nodes[target].depth = match nodes[target].kind {
|
||||
Kind::End | Kind::Start => unreachable!(),
|
||||
Kind::End | Kind::Start => unreachable!("{:?}", nodes[target].kind),
|
||||
Kind::Region => {
|
||||
idepth(nodes, nodes[target].inputs[0]).max(idepth(nodes, nodes[target].inputs[1]))
|
||||
}
|
||||
|
@ -3329,7 +3342,7 @@ mod tests {
|
|||
global_variables;
|
||||
//generic_types;
|
||||
//generic_functions;
|
||||
//c_strings;
|
||||
c_strings;
|
||||
//struct_patterns;
|
||||
arrays;
|
||||
//struct_return_from_module_function;
|
||||
|
@ -3345,7 +3358,7 @@ mod tests {
|
|||
branch_assignments;
|
||||
exhaustive_loop_testing;
|
||||
//idk;
|
||||
//comptime_min_reg_leak;
|
||||
comptime_min_reg_leak;
|
||||
//some_generic_code;
|
||||
//integer_inference_issues;
|
||||
//writing_into_string;
|
||||
|
|
|
@ -238,6 +238,9 @@ impl Deref for Vc {
|
|||
type Target = [Nid];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
if self.as_slice().iter().position(|&i| i == 1) == Some(2) {
|
||||
log::info!("foo {}", std::backtrace::Backtrace::capture());
|
||||
}
|
||||
self.as_slice()
|
||||
}
|
||||
}
|
||||
|
|
25
lang/tests/son_tests_c_strings.txt
Normal file
25
lang/tests/son_tests_c_strings.txt
Normal file
|
@ -0,0 +1,25 @@
|
|||
main:
|
||||
ADDI64 r254, r254, -16d
|
||||
ST r31, r254, 0a, 16h
|
||||
LRA r2, r0, :"abඞ\n\r\t56789\0"
|
||||
JAL r31, r0, :str_len
|
||||
CP r32, r1
|
||||
LRA r2, r0, :"fff\0"
|
||||
JAL r31, r0, :str_len
|
||||
ADD64 r1, r1, r32
|
||||
LD r31, r254, 0a, 16h
|
||||
ADDI64 r254, r254, 16d
|
||||
JALA r0, r31, 0a
|
||||
str_len:
|
||||
LI64 r6, 0d
|
||||
LI64 r1, 0d
|
||||
2: LD r8, r2, 0a, 1h
|
||||
JNE r8, r6, :0
|
||||
JMP :1
|
||||
0: ADDI64 r2, r2, 1d
|
||||
ADDI64 r1, r1, 1d
|
||||
JMP :2
|
||||
1: JALA r0, r31, 0a
|
||||
code size: 201
|
||||
ret: 16
|
||||
status: Ok(())
|
Loading…
Reference in a new issue