holey-bytes/lang/src/fs.rs

420 lines
13 KiB
Rust
Raw Normal View History

2024-09-30 12:09:17 -05:00
use {
crate::{
parser::{Ast, Ctx, FileKind},
son::{self, hbvm::HbvmBackend},
2024-11-16 07:22:34 -06:00
ty, FnvBuildHasher,
2024-09-30 12:09:17 -05:00
},
alloc::{string::String, vec::Vec},
2024-10-27 12:04:50 -05:00
core::{fmt::Write, num::NonZeroUsize, ops::Deref},
2024-09-30 12:09:17 -05:00
hashbrown::hash_map,
std::{
collections::VecDeque,
2024-09-30 12:27:00 -05:00
eprintln,
2024-09-30 12:09:17 -05:00
ffi::OsStr,
2024-10-13 08:33:57 -05:00
io::{self, Write as _},
2024-09-30 12:09:17 -05:00
path::{Path, PathBuf},
string::ToString,
sync::Mutex,
},
};
2024-11-16 07:22:34 -06:00
type HashMap<K, V> = hashbrown::HashMap<K, V, FnvBuildHasher>;
2024-09-30 12:27:00 -05:00
pub struct Logger;
impl log::Log for Logger {
fn enabled(&self, _: &log::Metadata) -> bool {
true
2024-09-30 12:27:00 -05:00
}
fn log(&self, record: &log::Record) {
2024-09-30 12:41:52 -05:00
if self.enabled(record.metadata()) {
eprintln!("{}", record.args())
}
2024-09-30 12:27:00 -05:00
}
fn flush(&self) {}
}
2024-09-30 12:09:17 -05:00
#[derive(Default)]
pub struct Options<'a> {
2024-09-30 12:09:17 -05:00
pub fmt: bool,
pub fmt_stdout: bool,
pub dump_asm: bool,
pub extra_threads: usize,
pub resolver: Option<PathResolver<'a>>,
2024-09-30 12:09:17 -05:00
}
impl<'a> Options<'a> {
pub fn from_args(
args: &[&str],
out: &mut Vec<u8>,
resolvers: &'a [(&str, PathResolver)],
) -> std::io::Result<Self> {
2024-09-30 12:35:25 -05:00
if args.contains(&"--help") || args.contains(&"-h") {
writeln!(out, "Usage: hbc [OPTIONS...] <FILE>")?;
writeln!(out, include_str!("../command-help.txt"))?;
2024-09-30 12:35:25 -05:00
return Err(std::io::ErrorKind::Other.into());
}
Ok(Options {
fmt: args.contains(&"--fmt"),
fmt_stdout: args.contains(&"--fmt-stdout"),
dump_asm: args.contains(&"--dump-asm"),
extra_threads: args
.iter()
.position(|&a| a == "--threads")
.map(|i| {
args[i + 1].parse::<NonZeroUsize>().map_err(|e| {
writeln!(out, "--threads expects non zero integer: {e}")
.err()
.unwrap_or(std::io::ErrorKind::Other.into())
2024-09-30 12:35:25 -05:00
})
})
.transpose()?
.map_or(1, NonZeroUsize::get)
- 1,
resolver: args
.iter()
.position(|&a| a == "--path-resolver")
.map(|i| {
resolvers.iter().find(|&&(n, _)| args[i + 1] == n).map(|&(_, r)| r).ok_or_else(
|| {
writeln!(
out,
"--path-resolver can only be one of: {}",
resolvers
.iter()
.map(|&(n, _)| n)
.intersperse(", ")
.collect::<String>()
)
.err()
.unwrap_or(std::io::ErrorKind::Other.into())
},
)
})
.transpose()?,
2024-09-30 12:35:25 -05:00
})
}
}
2024-11-17 11:15:58 -06:00
pub fn run_compiler(
root_file: &str,
options: Options,
out: &mut Vec<u8>,
warnings: &mut String,
) -> std::io::Result<()> {
let parsed = parse_from_fs(
options.extra_threads,
root_file,
options.resolver.unwrap_or(&default_resolve),
)?;
2024-09-30 12:09:17 -05:00
if (options.fmt || options.fmt_stdout) && !parsed.errors.is_empty() {
*out = parsed.errors.into_bytes();
return Err(std::io::Error::other("fmt fialed (errors are in out)"));
2024-09-30 12:09:17 -05:00
}
if options.fmt {
let mut output = String::new();
for ast in parsed.ast {
write!(output, "{ast}").unwrap();
if ast.file.deref().trim() != output.as_str().trim() {
std::fs::write(&*ast.path, &output)?;
}
output.clear();
2024-09-30 12:09:17 -05:00
}
} else if options.fmt_stdout {
write!(out, "{}", &parsed.ast[0])?;
2024-10-27 07:57:00 -05:00
} else {
let mut backend = HbvmBackend::default();
2024-11-14 14:34:31 -06:00
let mut ctx = crate::son::CodegenCtx::default();
*ctx.parser.errors.get_mut() = parsed.errors;
let mut codegen = son::Codegen::new(&mut backend, &parsed.ast, &mut ctx);
codegen.push_embeds(parsed.embeds);
2024-11-08 03:25:34 -06:00
codegen.generate(ty::Module::MAIN);
2024-10-23 05:26:07 -05:00
2024-11-17 11:15:58 -06:00
*warnings = core::mem::take(&mut *codegen.warnings.borrow_mut());
2024-10-23 05:26:07 -05:00
if !codegen.errors.borrow().is_empty() {
drop(codegen);
*out = ctx.parser.errors.into_inner().into_bytes();
return Err(std::io::Error::other("compilation faoled (errors are in out)"));
2024-10-23 05:26:07 -05:00
}
codegen.assemble(out);
2024-09-30 12:09:17 -05:00
if options.dump_asm {
let mut disasm = String::new();
codegen.disasm(&mut disasm, out).map_err(|e| io::Error::other(e.to_string()))?;
*out = disasm.into_bytes();
2024-09-30 12:09:17 -05:00
}
}
Ok(())
}
struct TaskQueue<T> {
inner: Mutex<TaskQueueInner<T>>,
}
impl<T> TaskQueue<T> {
fn new(max_waiters: usize) -> Self {
Self { inner: Mutex::new(TaskQueueInner::new(max_waiters)) }
}
pub fn push(&self, message: T) {
self.extend([message]);
}
pub fn extend(&self, messages: impl IntoIterator<Item = T>) {
self.inner.lock().unwrap().push(messages);
}
pub fn pop(&self) -> Option<T> {
TaskQueueInner::pop(&self.inner)
}
}
enum TaskSlot<T> {
Waiting,
Delivered(T),
Closed,
}
struct TaskQueueInner<T> {
max_waiters: usize,
messages: VecDeque<T>,
parked: VecDeque<(*mut TaskSlot<T>, std::thread::Thread)>,
}
unsafe impl<T: Send> Send for TaskQueueInner<T> {}
unsafe impl<T: Send + Sync> Sync for TaskQueueInner<T> {}
impl<T> TaskQueueInner<T> {
fn new(max_waiters: usize) -> Self {
Self { max_waiters, messages: Default::default(), parked: Default::default() }
}
fn push(&mut self, messages: impl IntoIterator<Item = T>) {
for msg in messages {
if let Some((dest, thread)) = self.parked.pop_front() {
unsafe { *dest = TaskSlot::Delivered(msg) };
thread.unpark();
} else {
self.messages.push_back(msg);
}
}
}
fn pop(s: &Mutex<Self>) -> Option<T> {
let mut res = TaskSlot::Waiting;
{
let mut s = s.lock().unwrap();
if let Some(msg) = s.messages.pop_front() {
return Some(msg);
}
if s.max_waiters == s.parked.len() + 1 {
for (dest, thread) in s.parked.drain(..) {
unsafe { *dest = TaskSlot::Closed };
thread.unpark();
}
return None;
}
s.parked.push_back((&mut res, std::thread::current()));
}
loop {
std::thread::park();
let _s = s.lock().unwrap();
match core::mem::replace(&mut res, TaskSlot::Waiting) {
TaskSlot::Delivered(msg) => return Some(msg),
TaskSlot::Closed => return None,
TaskSlot::Waiting => {}
}
}
}
}
2024-10-13 08:22:16 -05:00
pub struct Loaded {
ast: Vec<Ast>,
embeds: Vec<Vec<u8>>,
errors: String,
2024-10-13 08:22:16 -05:00
}
fn default_resolve(path: &str, from: &str, tmp: &mut PathBuf) -> Result<PathBuf, CantLoadFile> {
tmp.clear();
match Path::new(from).parent() {
Some(parent) => tmp.extend([parent, Path::new(path)]),
None => tmp.push(path),
};
2024-09-30 12:09:17 -05:00
tmp.canonicalize().map_err(|source| CantLoadFile { path: std::mem::take(tmp), source })
}
2024-09-30 12:09:17 -05:00
/// fn(path, from, tmp)
pub type PathResolver<'a> =
&'a (dyn Fn(&str, &str, &mut PathBuf) -> Result<PathBuf, CantLoadFile> + Send + Sync);
#[derive(Debug)]
pub struct CantLoadFile {
pub path: PathBuf,
pub source: io::Error,
}
2024-09-30 12:09:17 -05:00
impl core::fmt::Display for CantLoadFile {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "can't load file: {}", display_rel_path(&self.path),)
2024-09-30 12:09:17 -05:00
}
}
2024-09-30 12:09:17 -05:00
impl core::error::Error for CantLoadFile {
fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
Some(&self.source)
2024-09-30 12:09:17 -05:00
}
}
2024-09-30 12:09:17 -05:00
impl From<CantLoadFile> for io::Error {
fn from(e: CantLoadFile) -> Self {
io::Error::new(io::ErrorKind::InvalidData, e)
2024-09-30 12:09:17 -05:00
}
}
2024-09-30 12:09:17 -05:00
pub fn parse_from_fs(
extra_threads: usize,
root: &str,
resolve: PathResolver,
) -> io::Result<Loaded> {
2024-11-08 03:25:34 -06:00
type Task = (usize, PathBuf);
2024-09-30 12:09:17 -05:00
2024-11-16 07:22:34 -06:00
let seen_modules = Mutex::new(HashMap::<PathBuf, usize>::default());
let seen_embeds = Mutex::new(HashMap::<PathBuf, usize>::default());
2024-09-30 12:09:17 -05:00
let tasks = TaskQueue::<Task>::new(extra_threads + 1);
let ast = Mutex::new(Vec::<io::Result<Ast>>::new());
2024-10-13 08:22:16 -05:00
let embeds = Mutex::new(Vec::<Vec<u8>>::new());
2024-09-30 12:09:17 -05:00
2024-10-13 08:22:16 -05:00
let loader = |path: &str, from: &str, kind: FileKind, tmp: &mut _| {
let mut physiscal_path = resolve(path, from, tmp)?;
2024-09-30 12:09:17 -05:00
2024-10-13 08:22:16 -05:00
match kind {
FileKind::Module => {
let id = {
let mut seen = seen_modules.lock().unwrap();
let len = seen.len();
match seen.entry(physiscal_path) {
hash_map::Entry::Occupied(entry) => {
return Ok(*entry.get());
}
hash_map::Entry::Vacant(entry) => {
physiscal_path = entry.insert_entry(len as _).key().clone();
2024-11-08 03:25:34 -06:00
len
2024-10-13 08:22:16 -05:00
}
}
};
if !physiscal_path.exists() {
return Err(io::Error::new(
io::ErrorKind::NotFound,
format!("can't find file: {}", display_rel_path(&physiscal_path)),
));
2024-09-30 12:09:17 -05:00
}
2024-10-13 08:22:16 -05:00
tasks.push((id, physiscal_path));
Ok(id)
}
FileKind::Embed => {
let id = {
let mut seen = seen_embeds.lock().unwrap();
let len = seen.len();
match seen.entry(physiscal_path) {
hash_map::Entry::Occupied(entry) => {
return Ok(*entry.get());
}
hash_map::Entry::Vacant(entry) => {
physiscal_path = entry.insert_entry(len as _).key().clone();
2024-11-08 03:25:34 -06:00
len
2024-10-13 08:22:16 -05:00
}
}
};
let content = std::fs::read(&physiscal_path).map_err(|e| {
io::Error::new(
e.kind(),
format!(
"can't load embed file: {}: {e}",
display_rel_path(&physiscal_path)
),
)
})?;
let mut embeds = embeds.lock().unwrap();
if id >= embeds.len() {
embeds.resize(id + 1, Default::default());
2024-09-30 12:09:17 -05:00
}
embeds[id] = content;
2024-10-13 08:22:16 -05:00
Ok(id)
2024-09-30 12:09:17 -05:00
}
}
};
let execute_task = |ctx: &mut _, (_, path): Task, tmp: &mut _| {
2024-09-30 12:09:17 -05:00
let path = path.to_str().ok_or_else(|| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("path contains invalid characters: {}", display_rel_path(&path)),
)
})?;
2024-10-13 08:22:16 -05:00
Ok(Ast::new(path, std::fs::read_to_string(path)?, ctx, &mut |path, from, kind| {
loader(path, from, kind, tmp).map_err(|e| e.to_string())
2024-09-30 12:09:17 -05:00
}))
};
let thread = || {
let mut ctx = Ctx::default();
let mut tmp = PathBuf::new();
2024-09-30 12:09:17 -05:00
while let Some(task @ (indx, ..)) = tasks.pop() {
let res = execute_task(&mut ctx, task, &mut tmp);
2024-09-30 12:09:17 -05:00
let mut ast = ast.lock().unwrap();
2024-11-08 03:25:34 -06:00
let len = ast.len().max(indx + 1);
2024-09-30 12:09:17 -05:00
ast.resize_with(len, || Err(io::ErrorKind::InvalidData.into()));
2024-11-08 03:25:34 -06:00
ast[indx] = res;
2024-09-30 12:09:17 -05:00
}
ctx.errors.into_inner()
2024-09-30 12:09:17 -05:00
};
let path = Path::new(root).canonicalize().map_err(|e| {
io::Error::new(e.kind(), format!("can't canonicalize root file path ({root})"))
})?;
2024-10-13 08:22:16 -05:00
seen_modules.lock().unwrap().insert(path.clone(), 0);
2024-09-30 12:09:17 -05:00
tasks.push((0, path));
let errors = if extra_threads == 0 {
thread()
2024-09-30 12:09:17 -05:00
} else {
std::thread::scope(|s| {
(0..extra_threads + 1)
.map(|_| s.spawn(thread))
.collect::<Vec<_>>()
.into_iter()
.map(|t| t.join().unwrap())
.collect::<String>()
})
};
2024-09-30 12:09:17 -05:00
2024-10-13 08:22:16 -05:00
Ok(Loaded {
ast: ast.into_inner().unwrap().into_iter().collect::<io::Result<Vec<_>>>()?,
embeds: embeds.into_inner().unwrap(),
errors,
2024-10-13 08:22:16 -05:00
})
2024-09-30 12:09:17 -05:00
}
pub fn display_rel_path(path: &(impl AsRef<OsStr> + ?Sized)) -> std::path::Display {
static CWD: std::sync::LazyLock<PathBuf> =
std::sync::LazyLock::new(|| std::env::current_dir().unwrap_or_default());
std::path::Path::new(path).strip_prefix(&*CWD).unwrap_or(std::path::Path::new(path)).display()
}