holey-bytes/lang/src/lib.rs
Jakub Doka 00f6729d31
supporting ascii literals
Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
2024-12-14 21:02:29 +01:00

548 lines
14 KiB
Rust

#![feature(
iter_array_chunks,
assert_matches,
let_chains,
if_let_guard,
macro_metavar_expr,
anonymous_lifetime_in_impl_trait,
core_intrinsics,
never_type,
unwrap_infallible,
slice_partition_dedup,
portable_simd,
iter_collect_into,
ptr_metadata,
slice_ptr_get,
slice_take,
map_try_insert,
extract_if,
ptr_internals,
iter_intersperse,
str_from_raw_parts,
ptr_sub_ptr,
slice_from_ptr_range,
iter_next_chunk,
pointer_is_aligned_to,
maybe_uninit_fill,
array_chunks
)]
#![warn(clippy::dbg_macro)]
#![expect(internal_features)]
#![no_std]
#[cfg(feature = "std")]
pub use fs::*;
pub use utils::Ent;
use {self::ty::Builtin, alloc::vec::Vec};
#[macro_use]
extern crate alloc;
#[cfg(any(feature = "std", test))]
extern crate std;
#[cfg(test)]
const README: &str = include_str!("../README.md");
#[cfg(test)]
#[macro_export]
macro_rules! run_tests {
($runner:path: $($name:ident;)*) => {$(
#[test]
fn $name() {
$crate::run_test(core::any::type_name_of_val(&$name), stringify!($name), $crate::README, $runner);
}
)*};
}
pub mod fmt;
#[cfg(any(feature = "std", test))]
pub mod fs;
pub mod fuzz;
pub mod lexer;
pub mod parser;
pub mod son;
pub mod ty;
pub mod backend {
use {
crate::{
parser,
son::Nodes,
ty::{self, Module, Types},
utils::EntSlice,
},
alloc::{string::String, vec::Vec},
};
pub mod hbvm;
pub struct AssemblySpec {
pub entry: u32,
pub code_length: u64,
pub data_length: u64,
}
pub trait Backend {
fn assemble_reachable(
&mut self,
from: ty::Func,
types: &Types,
to: &mut Vec<u8>,
) -> AssemblySpec;
fn disasm<'a>(
&'a self,
sluce: &[u8],
eca_handler: &mut dyn FnMut(&mut &[u8]),
types: &'a Types,
files: &'a EntSlice<Module, parser::Ast>,
output: &mut String,
) -> Result<(), hbbytecode::DisasmError<'a>>;
fn emit_body(
&mut self,
id: ty::Func,
ci: &Nodes,
tys: &Types,
files: &EntSlice<Module, parser::Ast>,
);
fn emit_ct_body(
&mut self,
id: ty::Func,
ci: &Nodes,
tys: &Types,
files: &EntSlice<Module, parser::Ast>,
) {
self.emit_body(id, ci, tys, files);
}
fn assemble_bin(&mut self, from: ty::Func, types: &Types, to: &mut Vec<u8>) {
self.assemble_reachable(from, types, to);
}
}
}
mod utils;
mod debug {
pub fn panicking() -> bool {
#[cfg(feature = "std")]
{
std::thread::panicking()
}
#[cfg(not(feature = "std"))]
{
false
}
}
#[cfg(all(debug_assertions, feature = "std"))]
pub type Trace = std::rc::Rc<std::backtrace::Backtrace>;
#[cfg(not(all(debug_assertions, feature = "std")))]
pub type Trace = ();
pub fn trace() -> Trace {
#[cfg(all(debug_assertions, feature = "std"))]
{
std::rc::Rc::new(std::backtrace::Backtrace::capture())
}
#[cfg(not(all(debug_assertions, feature = "std")))]
{}
}
}
mod ctx_map {
use core::hash::BuildHasher;
pub type Hash = u64;
pub type HashBuilder = core::hash::BuildHasherDefault<IdentityHasher>;
#[derive(Default)]
pub struct IdentityHasher(u64);
impl core::hash::Hasher for IdentityHasher {
fn finish(&self) -> u64 {
self.0
}
fn write(&mut self, _: &[u8]) {
unimplemented!()
}
fn write_u64(&mut self, i: u64) {
self.0 = i;
}
}
#[derive(Clone)]
pub struct Key<T> {
pub value: T,
pub hash: Hash,
}
impl<T> core::hash::Hash for Key<T> {
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
state.write_u64(self.hash);
}
}
pub trait CtxEntry {
type Ctx: ?Sized;
type Key<'a>: Eq + core::hash::Hash;
fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a>;
}
#[derive(Clone)]
pub struct CtxMap<T> {
inner: hashbrown::HashMap<Key<T>, (), HashBuilder>,
}
impl<T> Default for CtxMap<T> {
fn default() -> Self {
Self { inner: Default::default() }
}
}
impl<T: CtxEntry> CtxMap<T> {
pub fn entry<'a, 'b>(
&'a mut self,
key: T::Key<'b>,
ctx: &'b T::Ctx,
) -> (hashbrown::hash_map::RawEntryMut<'a, Key<T>, (), HashBuilder>, Hash) {
let hash = crate::FnvBuildHasher::default().hash_one(&key);
(self.inner.raw_entry_mut().from_hash(hash, |k| k.value.key(ctx) == key), hash)
}
pub fn get<'a>(&self, key: T::Key<'a>, ctx: &'a T::Ctx) -> Option<&T> {
let hash = crate::FnvBuildHasher::default().hash_one(&key);
self.inner
.raw_entry()
.from_hash(hash, |k| k.value.key(ctx) == key)
.map(|(k, _)| &k.value)
}
pub fn clear(&mut self) {
self.inner.clear();
}
pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option<T> {
let (entry, _) = self.entry(value.key(ctx), ctx);
match entry {
hashbrown::hash_map::RawEntryMut::Occupied(o) => Some(o.remove_entry().0.value),
hashbrown::hash_map::RawEntryMut::Vacant(_) => None,
}
}
pub fn insert<'a>(&mut self, key: T::Key<'a>, value: T, ctx: &'a T::Ctx) {
let (entry, hash) = self.entry(key, ctx);
match entry {
hashbrown::hash_map::RawEntryMut::Occupied(_) => unreachable!(),
hashbrown::hash_map::RawEntryMut::Vacant(v) => {
_ = v.insert(Key { hash, value }, ())
}
}
}
pub fn get_or_insert<'a>(
&mut self,
key: T::Key<'a>,
ctx: &'a mut T::Ctx,
with: impl FnOnce(&'a mut T::Ctx) -> T,
) -> &mut T {
let (entry, hash) = self.entry(key, unsafe { &mut *(&mut *ctx as *mut _) });
match entry {
hashbrown::hash_map::RawEntryMut::Occupied(o) => &mut o.into_key_value().0.value,
hashbrown::hash_map::RawEntryMut::Vacant(v) => {
&mut v.insert(Key { hash, value: with(ctx) }, ()).0.value
}
}
}
}
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
pub struct Ident(u32);
impl Ident {
pub const INVALID: Self = Self(u32::MAX);
const LEN_BITS: u32 = 6;
pub fn len(self) -> u32 {
self.0 & ((1 << Self::LEN_BITS) - 1)
}
pub fn is_type(self) -> bool {
ty::Builtin::try_from(self) == Ok(ty::Builtin::TYPE)
}
pub fn is_empty(self) -> bool {
self.len() == 0
}
pub fn is_null(self) -> bool {
(self.0 >> Self::LEN_BITS) == 0
}
pub fn pos(self) -> u32 {
(self.0 >> Self::LEN_BITS).saturating_sub(1)
}
pub fn new(pos: u32, len: u32) -> Option<Self> {
(len < (1 << Self::LEN_BITS)).then_some(((pos + 1) << Self::LEN_BITS) | len).map(Self)
}
pub fn range(self) -> core::ops::Range<usize> {
let (len, pos) = (self.len() as usize, self.pos() as usize);
pos..pos + len
}
fn builtin(builtin: Builtin) -> Ident {
Self(builtin.index() as _)
}
}
fn endoce_string(
literal: &str,
str: &mut Vec<u8>,
report: impl Fn(&core::str::Bytes, &str),
) -> Option<usize> {
let report = |bytes: &core::str::Bytes, msg: &_| {
report(bytes, msg);
None::<u8>
};
let decode_braces = |str: &mut Vec<u8>, bytes: &mut core::str::Bytes| {
while let Some(b) = bytes.next()
&& b != b'}'
{
let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?;
let decode = |b: u8| {
Some(match b {
b'0'..=b'9' => b - b'0',
b'a'..=b'f' => b - b'a' + 10,
b'A'..=b'F' => b - b'A' + 10,
_ => report(bytes, "expected hex digit or '}'")?,
})
};
str.push(decode(b)? << 4 | decode(c)?);
}
Some(())
};
let mut bytes = literal.bytes();
let mut char_len = 0;
while let Some(b) = bytes.next() {
char_len += 1;
if b != b'\\' {
str.push(b);
continue;
}
let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? {
b'n' => b'\n',
b'r' => b'\r',
b't' => b'\t',
b'\\' => b'\\',
b'\'' => b'\'',
b'"' => b'"',
b'0' => b'\0',
b'{' => {
decode_braces(str, &mut bytes);
continue;
}
_ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?,
};
str.push(b);
}
Some(char_len)
}
pub fn quad_sort<T>(mut slice: &mut [T], mut cmp: impl FnMut(&T, &T) -> core::cmp::Ordering) {
while let Some(it) = slice.take_first_mut() {
for ot in &mut *slice {
if cmp(it, ot) == core::cmp::Ordering::Greater {
core::mem::swap(it, ot);
}
}
}
debug_assert!(slice.is_sorted_by(|a, b| cmp(a, b) != core::cmp::Ordering::Greater));
}
type FnvBuildHasher = core::hash::BuildHasherDefault<FnvHasher>;
struct FnvHasher(u64);
impl core::hash::Hasher for FnvHasher {
fn finish(&self) -> u64 {
self.0
}
fn write(&mut self, bytes: &[u8]) {
self.0 = bytes.iter().fold(self.0, |hash, &byte| {
let mut hash = hash;
hash ^= byte as u64;
hash = hash.wrapping_mul(0x100000001B3);
hash
});
}
}
impl Default for FnvHasher {
fn default() -> Self {
Self(0xCBF29CE484222325)
}
}
#[cfg(test)]
pub fn run_test(
name: &'static str,
ident: &'static str,
input: &'static str,
test: fn(&'static str, &'static str, &mut alloc::string::String),
) {
use std::{
io::Write,
path::PathBuf,
string::{String, ToString},
};
let filter = std::env::var("PT_FILTER").unwrap_or_default();
if !filter.is_empty() && !name.contains(&filter) {
return;
}
let mut output = String::new();
{
struct DumpOut<'a>(&'a mut String);
impl Drop for DumpOut<'_> {
fn drop(&mut self) {
if std::thread::panicking() {
std::println!("{}", self.0);
}
}
}
let dump = DumpOut(&mut output);
test(ident, input, dump.0);
}
let mut root = PathBuf::from(
std::env::var("PT_TEST_ROOT")
.unwrap_or(concat!(env!("CARGO_MANIFEST_DIR"), "/tests").to_string()),
);
root.push(name.replace("::", "_").replace(concat!(env!("CARGO_PKG_NAME"), "_"), ""));
root.set_extension("txt");
let expected = std::fs::read_to_string(&root).unwrap_or_default();
if output == expected {
return;
}
if std::env::var("PT_UPDATE").is_ok() {
std::fs::write(&root, output).unwrap();
return;
}
if !root.exists() {
std::fs::create_dir_all(root.parent().unwrap()).unwrap();
std::fs::write(&root, vec![]).unwrap();
}
let mut proc = std::process::Command::new("diff")
.arg("-u")
.arg("--color")
.arg(&root)
.arg("-")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::inherit())
.spawn()
.unwrap();
proc.stdin.as_mut().unwrap().write_all(output.as_bytes()).unwrap();
proc.wait().unwrap();
panic!("test failed");
}
#[cfg(test)]
fn test_parse_files(
ident: &str,
input: &str,
ctx: &mut parser::Ctx,
) -> (Vec<parser::Ast>, Vec<Vec<u8>>) {
use {
self::parser::FileKind,
std::{borrow::ToOwned, string::ToString},
};
fn find_block<'a>(mut input: &'a str, test_name: &str) -> &'a str {
const CASE_PREFIX: &str = "#### ";
const CASE_SUFFIX: &str = "\n```hb";
loop {
let Some(pos) = input.find(CASE_PREFIX) else {
unreachable!("test {test_name} not found");
};
input = unsafe { input.get_unchecked(pos + CASE_PREFIX.len()..) };
if !input.starts_with(test_name) {
continue;
}
input = unsafe { input.get_unchecked(test_name.len()..) };
if !input.starts_with(CASE_SUFFIX) {
continue;
}
input = unsafe { input.get_unchecked(CASE_SUFFIX.len()..) };
let end = input.find("```").unwrap_or(input.len());
break unsafe { input.get_unchecked(..end) };
}
}
let input = find_block(input, ident);
let mut module_map = Vec::new();
let mut embed_map = Vec::new();
let mut last_start = 0;
let mut last_module_name = "test.hb";
for (i, m) in input.match_indices("// in module: ") {
if last_module_name.ends_with(".hb") {
fmt::test::format(ident, input[last_start..i].trim());
module_map.push((last_module_name, &input[last_start..i]));
} else {
embed_map.push((last_module_name, &input[last_start..i]));
}
let (module_name, _) = input[i + m.len()..].split_once('\n').unwrap();
last_module_name = module_name;
last_start = i + m.len() + module_name.len() + 1;
}
if last_module_name.ends_with(".hb") {
fmt::test::format(ident, input[last_start..].trim());
module_map.push((last_module_name, &input[last_start..]));
} else {
embed_map.push((last_module_name, &input[last_start..]));
}
let mut loader = |path: &str, _: &str, kind| match kind {
FileKind::Module => module_map
.iter()
.position(|&(name, _)| name == path)
.ok_or("Module Not Found".to_string()),
FileKind::Embed => embed_map
.iter()
.position(|&(name, _)| name == path)
.ok_or("Embed Not Found".to_string()),
};
(
module_map
.iter()
.map(|&(path, content)| parser::Ast::new(path, content.to_owned(), ctx, &mut loader))
.collect(),
embed_map.iter().map(|&(_, content)| content.to_owned().into_bytes()).collect(),
)
}