From 00f6729d31b7a8e473be4662dd504d29cc25a4a2 Mon Sep 17 00:00:00 2001 From: Jakub Doka Date: Sat, 14 Dec 2024 21:02:29 +0100 Subject: [PATCH] supporting ascii literals Signed-off-by: Jakub Doka --- lang/README.md | 2 ++ lang/src/fmt.rs | 1 + lang/src/lib.rs | 10 +++----- lang/src/parser.rs | 6 +++++ lang/src/son.rs | 62 ++++++++++++++++++++++++++++++++-------------- 5 files changed, 56 insertions(+), 25 deletions(-) diff --git a/lang/README.md b/lang/README.md index 8d1ced38d..3c3918ba0 100644 --- a/lang/README.md +++ b/lang/README.md @@ -121,10 +121,12 @@ main := fn(): uint { decimal := 255 octal := 0o377 binary := 0b11111111 + ascii := '\n' if hex == decimal & octal == decimal & binary == decimal { return 0 } + return 1 } ``` diff --git a/lang/src/fmt.rs b/lang/src/fmt.rs index b72d6f0de..210197b1b 100644 --- a/lang/src/fmt.rs +++ b/lang/src/fmt.rs @@ -314,6 +314,7 @@ impl<'a> Formatter<'a> { } Expr::Slf { .. } => f.write_str("Self"), Expr::String { literal, .. } => f.write_str(literal), + Expr::Char { literal, .. } => f.write_str(literal), Expr::Comment { literal, .. } => f.write_str(literal), Expr::Mod { path, .. } => write!(f, "@use(\"{path}\")"), Expr::Embed { path, .. } => write!(f, "@embed(\"{path}\")"), diff --git a/lang/src/lib.rs b/lang/src/lib.rs index 9eff8c2fc..21a9d8e6d 100644 --- a/lang/src/lib.rs +++ b/lang/src/lib.rs @@ -306,7 +306,7 @@ fn endoce_string( literal: &str, str: &mut Vec, report: impl Fn(&core::str::Bytes, &str), -) -> Option<()> { +) -> Option { let report = |bytes: &core::str::Bytes, msg: &_| { report(bytes, msg); None:: @@ -332,7 +332,9 @@ fn endoce_string( }; let mut bytes = literal.bytes(); + let mut char_len = 0; while let Some(b) = bytes.next() { + char_len += 1; if b != b'\\' { str.push(b); continue; @@ -354,11 +356,7 @@ fn endoce_string( str.push(b); } - if str.last() != Some(&0) { - report(&bytes, "string literal must end with null byte (for now)"); - } - - Some(()) + Some(char_len) } pub fn quad_sort(mut slice: &mut [T], mut cmp: impl FnMut(&T, &T) -> core::cmp::Ordering) { diff --git a/lang/src/parser.rs b/lang/src/parser.rs index b1bf9d616..8d64302a2 100644 --- a/lang/src/parser.rs +++ b/lang/src/parser.rs @@ -356,6 +356,7 @@ impl<'a, 'b> Parser<'a, 'b> { T::Idk => E::Idk { pos }, T::Die => E::Die { pos }, T::DQuote => E::String { pos, literal: self.tok_str(token) }, + T::Quote => E::Char { pos, literal: self.tok_str(token) }, T::Packed => { self.packed = true; let expr = self.unit_expr()?; @@ -896,6 +897,11 @@ generate_expr! { pos: Pos, literal: &'a str, }, + /// `'\'([^']|\\\')\''` + Char { + pos: Pos, + literal: &'a str, + }, /// `'//[^\n]' | '/*' { '([^/*]|*/)*' | Comment } '*/' Comment { pos: Pos, diff --git a/lang/src/son.rs b/lang/src/son.rs index 093de0516..1b1d3e832 100644 --- a/lang/src/son.rs +++ b/lang/src/son.rs @@ -2975,7 +2975,7 @@ impl<'a> Codegen<'a> { } Expr::Ident { id, pos, .. } => self.find_type_as_value(pos, self.ci.parent, id, ctx), Expr::Comment { .. } => Some(Value::VOID), - Expr::String { pos, literal } => { + Expr::Char { pos, literal } | Expr::String { pos, literal } => { let literal = &literal[1..literal.len() - 1]; let mut data = core::mem::take(&mut self.pool.lit_buf); debug_assert!(data.is_empty()); @@ -2984,27 +2984,51 @@ impl<'a> Codegen<'a> { self.error(pos + (literal.len() - bytes.len()) as u32 - 1, message); }; - crate::endoce_string(literal, &mut data, report).unwrap(); + let char_count = crate::endoce_string(literal, &mut data, report).unwrap(); - let ty = self.tys.make_ptr(ty::Id::U8); - let global = self - .tys - .strings - .get_or_insert(&data, &mut self.tys.ins.globals, |globals| { - StringRef(globals.push(GlobalData { - data: data.clone(), - ty, - ..Default::default() - })) - }) - .0; - let global = self.ci.nodes.new_node_nop(ty, Kind::Global { global }, [VOID]); - self.ci.nodes[global].aclass = GLOBAL_ACLASS as _; + if matches!(expr, Expr::Char { .. }) { + if char_count != 1 { + return self.error( + pos, + fa!("character literal can only contain one character, \ + but you supplied {char_count}"), + ); + } - data.clear(); - self.pool.lit_buf = data; + let value = match data.as_slice() { + &[v] => v as i64, + _ => return self.error(pos, "TODO: support utf-8 characters"), + }; - Some(Value::new(global).ty(ty)) + data.clear(); + self.pool.lit_buf = data; + + self.gen_inferred_const(ctx, ty::Id::U8, value, ty::Id::is_integer) + } else { + if data.last() != Some(&0) { + self.error(pos, "string literal must end with null byte (for now)"); + } + + let ty = self.tys.make_ptr(ty::Id::U8); + let global = self + .tys + .strings + .get_or_insert(&data, &mut self.tys.ins.globals, |globals| { + StringRef(globals.push(GlobalData { + data: data.clone(), + ty, + ..Default::default() + })) + }) + .0; + let global = self.ci.nodes.new_node_nop(ty, Kind::Global { global }, [VOID]); + self.ci.nodes[global].aclass = GLOBAL_ACLASS as _; + + data.clear(); + self.pool.lit_buf = data; + + Some(Value::new(global).ty(ty)) + } } Expr::Defer { pos, value } => { self.ci.defers.push((pos, ExprRef::new(value)));