forked from AbleOS/holey-bytes
71ba2c2486
Signed-off-by: Jakub Doka <jakub.doka2@gmail.com>
550 lines
15 KiB
Rust
550 lines
15 KiB
Rust
#![feature(
|
|
iter_array_chunks,
|
|
assert_matches,
|
|
let_chains,
|
|
if_let_guard,
|
|
macro_metavar_expr,
|
|
anonymous_lifetime_in_impl_trait,
|
|
core_intrinsics,
|
|
never_type,
|
|
unwrap_infallible,
|
|
slice_partition_dedup,
|
|
portable_simd,
|
|
iter_collect_into,
|
|
ptr_metadata,
|
|
slice_ptr_get,
|
|
slice_take,
|
|
map_try_insert,
|
|
extract_if,
|
|
ptr_internals,
|
|
iter_intersperse,
|
|
str_from_raw_parts,
|
|
ptr_sub_ptr,
|
|
slice_from_ptr_range,
|
|
iter_next_chunk,
|
|
pointer_is_aligned_to,
|
|
maybe_uninit_fill,
|
|
array_chunks
|
|
)]
|
|
#![warn(clippy::dbg_macro)]
|
|
#![expect(internal_features)]
|
|
#![no_std]
|
|
|
|
#[cfg(feature = "std")]
|
|
pub use fs::*;
|
|
pub use utils::Ent;
|
|
use {self::ty::Builtin, alloc::vec::Vec};
|
|
|
|
#[macro_use]
|
|
extern crate alloc;
|
|
|
|
#[cfg(any(feature = "std", test))]
|
|
extern crate std;
|
|
|
|
#[cfg(test)]
|
|
const README: &str = include_str!("../README.md");
|
|
|
|
#[cfg(test)]
|
|
#[macro_export]
|
|
macro_rules! run_tests {
|
|
($runner:path: $($name:ident;)*) => {$(
|
|
#[test]
|
|
fn $name() {
|
|
$crate::run_test(core::any::type_name_of_val(&$name), stringify!($name), $crate::README, $runner);
|
|
}
|
|
)*};
|
|
}
|
|
|
|
pub mod fmt;
|
|
#[cfg(any(feature = "std", test))]
|
|
pub mod fs;
|
|
pub mod fuzz;
|
|
pub mod lexer;
|
|
pub mod parser;
|
|
pub mod son;
|
|
pub mod ty;
|
|
|
|
pub mod backend {
|
|
use {
|
|
crate::{
|
|
parser,
|
|
son::Nodes,
|
|
ty::{self, Module, Types},
|
|
utils::EntSlice,
|
|
},
|
|
alloc::{string::String, vec::Vec},
|
|
};
|
|
|
|
pub mod hbvm;
|
|
|
|
pub struct AssemblySpec {
|
|
pub entry: u32,
|
|
pub code_length: u64,
|
|
pub data_length: u64,
|
|
}
|
|
|
|
pub trait Backend {
|
|
fn assemble_reachable(
|
|
&mut self,
|
|
from: ty::Func,
|
|
types: &Types,
|
|
to: &mut Vec<u8>,
|
|
) -> AssemblySpec;
|
|
fn disasm<'a>(
|
|
&'a self,
|
|
sluce: &[u8],
|
|
eca_handler: &mut dyn FnMut(&mut &[u8]),
|
|
types: &'a Types,
|
|
files: &'a EntSlice<Module, parser::Ast>,
|
|
output: &mut String,
|
|
) -> Result<(), hbbytecode::DisasmError<'a>>;
|
|
fn emit_body(
|
|
&mut self,
|
|
id: ty::Func,
|
|
ci: &Nodes,
|
|
tys: &Types,
|
|
files: &EntSlice<Module, parser::Ast>,
|
|
);
|
|
|
|
fn emit_ct_body(
|
|
&mut self,
|
|
id: ty::Func,
|
|
ci: &Nodes,
|
|
tys: &Types,
|
|
files: &EntSlice<Module, parser::Ast>,
|
|
) {
|
|
self.emit_body(id, ci, tys, files);
|
|
}
|
|
|
|
fn assemble_bin(&mut self, from: ty::Func, types: &Types, to: &mut Vec<u8>) {
|
|
self.assemble_reachable(from, types, to);
|
|
}
|
|
}
|
|
}
|
|
|
|
mod utils;
|
|
|
|
mod debug {
|
|
pub fn panicking() -> bool {
|
|
#[cfg(feature = "std")]
|
|
{
|
|
std::thread::panicking()
|
|
}
|
|
#[cfg(not(feature = "std"))]
|
|
{
|
|
false
|
|
}
|
|
}
|
|
|
|
#[cfg(all(debug_assertions, feature = "std"))]
|
|
pub type Trace = std::rc::Rc<std::backtrace::Backtrace>;
|
|
#[cfg(not(all(debug_assertions, feature = "std")))]
|
|
pub type Trace = ();
|
|
|
|
pub fn trace() -> Trace {
|
|
#[cfg(all(debug_assertions, feature = "std"))]
|
|
{
|
|
std::rc::Rc::new(std::backtrace::Backtrace::capture())
|
|
}
|
|
#[cfg(not(all(debug_assertions, feature = "std")))]
|
|
{}
|
|
}
|
|
}
|
|
|
|
mod ctx_map {
|
|
use core::hash::BuildHasher;
|
|
|
|
pub type Hash = u64;
|
|
pub type HashBuilder = core::hash::BuildHasherDefault<IdentityHasher>;
|
|
|
|
#[derive(Default)]
|
|
pub struct IdentityHasher(u64);
|
|
|
|
impl core::hash::Hasher for IdentityHasher {
|
|
fn finish(&self) -> u64 {
|
|
self.0
|
|
}
|
|
|
|
fn write(&mut self, _: &[u8]) {
|
|
unimplemented!()
|
|
}
|
|
|
|
fn write_u64(&mut self, i: u64) {
|
|
self.0 = i;
|
|
}
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct Key<T> {
|
|
pub value: T,
|
|
pub hash: Hash,
|
|
}
|
|
|
|
impl<T> core::hash::Hash for Key<T> {
|
|
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
|
|
state.write_u64(self.hash);
|
|
}
|
|
}
|
|
|
|
pub trait CtxEntry {
|
|
type Ctx: ?Sized;
|
|
type Key<'a>: Eq + core::hash::Hash;
|
|
|
|
fn key<'a>(&self, ctx: &'a Self::Ctx) -> Self::Key<'a>;
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
pub struct CtxMap<T> {
|
|
inner: hashbrown::HashMap<Key<T>, (), HashBuilder>,
|
|
}
|
|
|
|
impl<T> Default for CtxMap<T> {
|
|
fn default() -> Self {
|
|
Self { inner: Default::default() }
|
|
}
|
|
}
|
|
|
|
impl<T: CtxEntry> CtxMap<T> {
|
|
pub fn entry<'a, 'b>(
|
|
&'a mut self,
|
|
key: T::Key<'b>,
|
|
ctx: &'b T::Ctx,
|
|
) -> (hashbrown::hash_map::RawEntryMut<'a, Key<T>, (), HashBuilder>, Hash) {
|
|
let hash = crate::FnvBuildHasher::default().hash_one(&key);
|
|
(self.inner.raw_entry_mut().from_hash(hash, |k| k.value.key(ctx) == key), hash)
|
|
}
|
|
|
|
pub fn get<'a>(&self, key: T::Key<'a>, ctx: &'a T::Ctx) -> Option<&T> {
|
|
let hash = crate::FnvBuildHasher::default().hash_one(&key);
|
|
self.inner
|
|
.raw_entry()
|
|
.from_hash(hash, |k| k.value.key(ctx) == key)
|
|
.map(|(k, _)| &k.value)
|
|
}
|
|
|
|
pub fn clear(&mut self) {
|
|
self.inner.clear();
|
|
}
|
|
|
|
pub fn remove(&mut self, value: &T, ctx: &T::Ctx) -> Option<T> {
|
|
let (entry, _) = self.entry(value.key(ctx), ctx);
|
|
match entry {
|
|
hashbrown::hash_map::RawEntryMut::Occupied(o) => Some(o.remove_entry().0.value),
|
|
hashbrown::hash_map::RawEntryMut::Vacant(_) => None,
|
|
}
|
|
}
|
|
|
|
pub fn insert<'a>(&mut self, key: T::Key<'a>, value: T, ctx: &'a T::Ctx) {
|
|
let (entry, hash) = self.entry(key, ctx);
|
|
match entry {
|
|
hashbrown::hash_map::RawEntryMut::Occupied(_) => unreachable!(),
|
|
hashbrown::hash_map::RawEntryMut::Vacant(v) => {
|
|
_ = v.insert(Key { hash, value }, ())
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn get_or_insert<'a>(
|
|
&mut self,
|
|
key: T::Key<'a>,
|
|
ctx: &'a mut T::Ctx,
|
|
with: impl FnOnce(&'a mut T::Ctx) -> T,
|
|
) -> &mut T {
|
|
let (entry, hash) = self.entry(key, unsafe { &mut *(&mut *ctx as *mut _) });
|
|
match entry {
|
|
hashbrown::hash_map::RawEntryMut::Occupied(o) => &mut o.into_key_value().0.value,
|
|
hashbrown::hash_map::RawEntryMut::Vacant(v) => {
|
|
&mut v.insert(Key { hash, value: with(ctx) }, ()).0.value
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)]
|
|
pub struct Ident(u32);
|
|
|
|
impl Ident {
|
|
pub const INVALID: Self = Self(u32::MAX);
|
|
const LEN_BITS: u32 = 6;
|
|
|
|
pub fn len(self) -> u32 {
|
|
self.0 & ((1 << Self::LEN_BITS) - 1)
|
|
}
|
|
|
|
pub fn is_type(self) -> bool {
|
|
ty::Builtin::try_from(self) == Ok(ty::Builtin::TYPE)
|
|
}
|
|
|
|
pub fn is_empty(self) -> bool {
|
|
self.len() == 0
|
|
}
|
|
|
|
pub fn is_null(self) -> bool {
|
|
(self.0 >> Self::LEN_BITS) == 0
|
|
}
|
|
|
|
pub fn pos(self) -> u32 {
|
|
(self.0 >> Self::LEN_BITS).saturating_sub(1)
|
|
}
|
|
|
|
pub fn new(pos: u32, len: u32) -> Option<Self> {
|
|
(len < (1 << Self::LEN_BITS)).then_some(((pos + 1) << Self::LEN_BITS) | len).map(Self)
|
|
}
|
|
|
|
pub fn range(self) -> core::ops::Range<usize> {
|
|
let (len, pos) = (self.len() as usize, self.pos() as usize);
|
|
pos..pos + len
|
|
}
|
|
|
|
fn builtin(builtin: Builtin) -> Ident {
|
|
Self(builtin.index() as _)
|
|
}
|
|
}
|
|
|
|
fn endoce_string(
|
|
literal: &str,
|
|
str: &mut Vec<u8>,
|
|
report: impl Fn(&core::str::Bytes, &str),
|
|
) -> Option<()> {
|
|
let report = |bytes: &core::str::Bytes, msg: &_| {
|
|
report(bytes, msg);
|
|
None::<u8>
|
|
};
|
|
|
|
let decode_braces = |str: &mut Vec<u8>, bytes: &mut core::str::Bytes| {
|
|
while let Some(b) = bytes.next()
|
|
&& b != b'}'
|
|
{
|
|
let c = bytes.next().or_else(|| report(bytes, "incomplete escape sequence"))?;
|
|
let decode = |b: u8| {
|
|
Some(match b {
|
|
b'0'..=b'9' => b - b'0',
|
|
b'a'..=b'f' => b - b'a' + 10,
|
|
b'A'..=b'F' => b - b'A' + 10,
|
|
_ => report(bytes, "expected hex digit or '}'")?,
|
|
})
|
|
};
|
|
str.push(decode(b)? << 4 | decode(c)?);
|
|
}
|
|
|
|
Some(())
|
|
};
|
|
|
|
let mut bytes = literal.bytes();
|
|
while let Some(b) = bytes.next() {
|
|
if b != b'\\' {
|
|
str.push(b);
|
|
continue;
|
|
}
|
|
let b = match bytes.next().or_else(|| report(&bytes, "incomplete escape sequence"))? {
|
|
b'n' => b'\n',
|
|
b'r' => b'\r',
|
|
b't' => b'\t',
|
|
b'\\' => b'\\',
|
|
b'\'' => b'\'',
|
|
b'"' => b'"',
|
|
b'0' => b'\0',
|
|
b'{' => {
|
|
decode_braces(str, &mut bytes);
|
|
continue;
|
|
}
|
|
_ => report(&bytes, "unknown escape sequence, expected [nrt\\\"'{0]")?,
|
|
};
|
|
str.push(b);
|
|
}
|
|
|
|
if str.last() != Some(&0) {
|
|
report(&bytes, "string literal must end with null byte (for now)");
|
|
}
|
|
|
|
Some(())
|
|
}
|
|
|
|
pub fn quad_sort<T>(mut slice: &mut [T], mut cmp: impl FnMut(&T, &T) -> core::cmp::Ordering) {
|
|
while let Some(it) = slice.take_first_mut() {
|
|
for ot in &mut *slice {
|
|
if cmp(it, ot) == core::cmp::Ordering::Greater {
|
|
core::mem::swap(it, ot);
|
|
}
|
|
}
|
|
}
|
|
debug_assert!(slice.is_sorted_by(|a, b| cmp(a, b) != core::cmp::Ordering::Greater));
|
|
}
|
|
|
|
type FnvBuildHasher = core::hash::BuildHasherDefault<FnvHasher>;
|
|
|
|
struct FnvHasher(u64);
|
|
|
|
impl core::hash::Hasher for FnvHasher {
|
|
fn finish(&self) -> u64 {
|
|
self.0
|
|
}
|
|
|
|
fn write(&mut self, bytes: &[u8]) {
|
|
self.0 = bytes.iter().fold(self.0, |hash, &byte| {
|
|
let mut hash = hash;
|
|
hash ^= byte as u64;
|
|
hash = hash.wrapping_mul(0x100000001B3);
|
|
hash
|
|
});
|
|
}
|
|
}
|
|
|
|
impl Default for FnvHasher {
|
|
fn default() -> Self {
|
|
Self(0xCBF29CE484222325)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub fn run_test(
|
|
name: &'static str,
|
|
ident: &'static str,
|
|
input: &'static str,
|
|
test: fn(&'static str, &'static str, &mut alloc::string::String),
|
|
) {
|
|
use std::{
|
|
io::Write,
|
|
path::PathBuf,
|
|
string::{String, ToString},
|
|
};
|
|
|
|
let filter = std::env::var("PT_FILTER").unwrap_or_default();
|
|
if !filter.is_empty() && !name.contains(&filter) {
|
|
return;
|
|
}
|
|
|
|
let mut output = String::new();
|
|
{
|
|
struct DumpOut<'a>(&'a mut String);
|
|
impl Drop for DumpOut<'_> {
|
|
fn drop(&mut self) {
|
|
if std::thread::panicking() {
|
|
std::println!("{}", self.0);
|
|
}
|
|
}
|
|
}
|
|
|
|
let dump = DumpOut(&mut output);
|
|
test(ident, input, dump.0);
|
|
}
|
|
|
|
let mut root = PathBuf::from(
|
|
std::env::var("PT_TEST_ROOT")
|
|
.unwrap_or(concat!(env!("CARGO_MANIFEST_DIR"), "/tests").to_string()),
|
|
);
|
|
root.push(name.replace("::", "_").replace(concat!(env!("CARGO_PKG_NAME"), "_"), ""));
|
|
root.set_extension("txt");
|
|
|
|
let expected = std::fs::read_to_string(&root).unwrap_or_default();
|
|
|
|
if output == expected {
|
|
return;
|
|
}
|
|
|
|
if std::env::var("PT_UPDATE").is_ok() {
|
|
std::fs::write(&root, output).unwrap();
|
|
return;
|
|
}
|
|
|
|
if !root.exists() {
|
|
std::fs::create_dir_all(root.parent().unwrap()).unwrap();
|
|
std::fs::write(&root, vec![]).unwrap();
|
|
}
|
|
|
|
let mut proc = std::process::Command::new("diff")
|
|
.arg("-u")
|
|
.arg("--color")
|
|
.arg(&root)
|
|
.arg("-")
|
|
.stdin(std::process::Stdio::piped())
|
|
.stdout(std::process::Stdio::inherit())
|
|
.spawn()
|
|
.unwrap();
|
|
|
|
proc.stdin.as_mut().unwrap().write_all(output.as_bytes()).unwrap();
|
|
|
|
proc.wait().unwrap();
|
|
|
|
panic!("test failed");
|
|
}
|
|
|
|
#[cfg(test)]
|
|
fn test_parse_files(
|
|
ident: &str,
|
|
input: &str,
|
|
ctx: &mut parser::Ctx,
|
|
) -> (Vec<parser::Ast>, Vec<Vec<u8>>) {
|
|
use {
|
|
self::parser::FileKind,
|
|
std::{borrow::ToOwned, string::ToString},
|
|
};
|
|
|
|
fn find_block<'a>(mut input: &'a str, test_name: &str) -> &'a str {
|
|
const CASE_PREFIX: &str = "#### ";
|
|
const CASE_SUFFIX: &str = "\n```hb";
|
|
loop {
|
|
let Some(pos) = input.find(CASE_PREFIX) else {
|
|
unreachable!("test {test_name} not found");
|
|
};
|
|
|
|
input = unsafe { input.get_unchecked(pos + CASE_PREFIX.len()..) };
|
|
if !input.starts_with(test_name) {
|
|
continue;
|
|
}
|
|
input = unsafe { input.get_unchecked(test_name.len()..) };
|
|
if !input.starts_with(CASE_SUFFIX) {
|
|
continue;
|
|
}
|
|
input = unsafe { input.get_unchecked(CASE_SUFFIX.len()..) };
|
|
|
|
let end = input.find("```").unwrap_or(input.len());
|
|
break unsafe { input.get_unchecked(..end) };
|
|
}
|
|
}
|
|
|
|
let input = find_block(input, ident);
|
|
|
|
let mut module_map = Vec::new();
|
|
let mut embed_map = Vec::new();
|
|
let mut last_start = 0;
|
|
let mut last_module_name = "test.hb";
|
|
for (i, m) in input.match_indices("// in module: ") {
|
|
if last_module_name.ends_with(".hb") {
|
|
fmt::test::format(ident, input[last_start..i].trim());
|
|
module_map.push((last_module_name, &input[last_start..i]));
|
|
} else {
|
|
embed_map.push((last_module_name, &input[last_start..i]));
|
|
}
|
|
let (module_name, _) = input[i + m.len()..].split_once('\n').unwrap();
|
|
last_module_name = module_name;
|
|
last_start = i + m.len() + module_name.len() + 1;
|
|
}
|
|
if last_module_name.ends_with(".hb") {
|
|
fmt::test::format(ident, input[last_start..].trim());
|
|
module_map.push((last_module_name, &input[last_start..]));
|
|
} else {
|
|
embed_map.push((last_module_name, &input[last_start..]));
|
|
}
|
|
|
|
let mut loader = |path: &str, _: &str, kind| match kind {
|
|
FileKind::Module => module_map
|
|
.iter()
|
|
.position(|&(name, _)| name == path)
|
|
.ok_or("Module Not Found".to_string()),
|
|
FileKind::Embed => embed_map
|
|
.iter()
|
|
.position(|&(name, _)| name == path)
|
|
.ok_or("Embed Not Found".to_string()),
|
|
};
|
|
|
|
(
|
|
module_map
|
|
.iter()
|
|
.map(|&(path, content)| parser::Ast::new(path, content.to_owned(), ctx, &mut loader))
|
|
.collect(),
|
|
embed_map.iter().map(|&(_, content)| content.to_owned().into_bytes()).collect(),
|
|
)
|
|
}
|