:trollface: 2

pull/4/head
azur 2022-12-13 23:13:54 +07:00
parent 5df43cf779
commit 348d96ec8e
12 changed files with 1498 additions and 0 deletions

8
compiler/Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "compiler"
version = "0.1.0"
edition = "2021"
[dependencies]
parser = { path = "../parser" }
vm = { path = "../vm" }

83
compiler/src/lib.rs Normal file
View File

@ -0,0 +1,83 @@
#![allow(clippy::new_without_default)]
use parser::{Expr, Literal, Span, Stmt};
use vm::model::{Instr, Value};
pub struct Compiler {}
impl Compiler {
pub fn new() -> Self {
Self {}
}
pub fn compile_expr(&mut self, expr: Expr) -> Vec<Instr> {
match expr {
Expr::Error => unreachable!(),
Expr::Literal(x) => match x {
Literal::Num(x) => vec![Instr::NumPush(x)],
Literal::Bool(x) => vec![Instr::BoolPush(x)],
Literal::Str(x) => vec![Instr::StrPush(x)],
},
Expr::Sym(name) => vec![Instr::Get(name)],
Expr::Vec(xs) => {
let mut instrs = vec![];
let count = xs.len();
for x in xs {
instrs.extend(self.compile_expr(x.0));
}
instrs.push(Instr::ListMake(count));
instrs
}
Expr::Unary(_, _) => todo!(),
Expr::Binary(_, _, _) => todo!(),
Expr::Lambda(args, body) => {
vec![Instr::FuncMake(args, self.compile_expr(body.0))]
}
Expr::Call(f, xs) => {
let mut instrs = vec![];
for x in xs {
instrs.extend(self.compile_expr(x.0));
}
if f.0 == Expr::Sym("print".to_string()) {
instrs.push(Instr::Print);
} else {
instrs.extend(self.compile_expr(f.0));
instrs.push(Instr::FuncApply);
}
instrs
}
Expr::Let(_, _) => todo!(),
Expr::Do(es) => {
let mut instrs = vec![];
for e in es {
instrs.extend(self.compile_expr(e.0));
}
instrs
}
}
}
pub fn compile_stmt(&mut self, stmt: Stmt) -> Vec<Instr> {
match stmt {
Stmt::Fun(name, args, body) => {
let is_main = name == "main";
let mut instrs = vec![
Instr::FuncMake(args, self.compile_expr(body.0)),
Instr::Set(name),
];
if is_main {
instrs.pop();
instrs.push(Instr::FuncApply);
}
instrs
}
}
}
pub fn compile_program(&mut self, stmts: Vec<(Stmt, Span)>) -> Vec<Instr> {
let mut instrs = vec![];
for (stmt, _) in stmts {
instrs.extend(self.compile_stmt(stmt));
}
instrs
}
}

9
entry/Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "entry"
version = "0.1.0"
edition = "2021"
[dependencies]
parser = { path = "../parser" }
compiler = { path = "../compiler" }
vm = { path = "../vm" }

35
entry/src/main.rs Normal file
View File

@ -0,0 +1,35 @@
use compiler::Compiler;
use parser::{lex, parse, report};
use vm::exec::Executor;
fn main() {
let path = std::env::args().nth(1).expect("No file path provided");
let src = std::fs::read_to_string(path).expect("Failed to read file");
let (tokens, lex_errors) = lex(src.to_string());
let parse_errors = if let Some(tokens) = tokens {
let (ast, parse_errors) = parse(tokens, src.len());
if let Some(ast) = ast {
let mut compiler = Compiler::new();
let instrs = compiler.compile_program(ast);
let mut executor = Executor::new(instrs);
match executor.run() {
Ok(_) => {}
Err(e) => println!("Runtime error: {:?}", e),
}
}
parse_errors
} else {
Vec::new()
};
if !lex_errors.is_empty() || !parse_errors.is_empty() {
lex_errors
.into_iter()
.map(|e| e.map(|c| c.to_string()))
.chain(parse_errors.into_iter().map(|e| e.map(|t| t.to_string())))
.for_each(|e| report(e, &src));
}
}

122
parser/Cargo.lock generated Normal file
View File

@ -0,0 +1,122 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "ahash"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217"
dependencies = [
"const-random",
]
[[package]]
name = "ariadne"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1cb2a2046bea8ce5e875551f5772024882de0b540c7f93dfc5d6cf1ca8b030c"
dependencies = [
"yansi",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4"
dependencies = [
"ahash",
]
[[package]]
name = "const-random"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368a7a772ead6ce7e1de82bfb04c485f3db8ec744f72925af5735e29a22cc18e"
dependencies = [
"const-random-macro",
"proc-macro-hack",
]
[[package]]
name = "const-random-macro"
version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d7d6ab3c3a2282db210df5f02c4dab6e0a7057af0fb7ebd4070f30fe05c0ddb"
dependencies = [
"getrandom",
"once_cell",
"proc-macro-hack",
"tiny-keccak",
]
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "getrandom"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.138"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db6d7e329c562c5dfab7a46a2afabc8b987ab9a4834c9d1ca04dc54c1546cef8"
[[package]]
name = "once_cell"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86f0b0d4bf799edbc74508c1e8bf170ff5f41238e5f8225603ca7caaae2b7860"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "yansi"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec"
[[package]]
name = "yuushi"
version = "0.1.0"
dependencies = [
"ariadne",
"chumsky",
]

8
parser/Cargo.toml Normal file
View File

@ -0,0 +1,8 @@
[package]
name = "parser"
version = "0.1.0"
edition = "2021"
[dependencies]
chumsky = "0.8.0"
ariadne = "0.1.5"

555
parser/src/lib.rs Normal file
View File

@ -0,0 +1,555 @@
#![feature(trait_alias)]
use ariadne::{Color, Fmt, Label, Report, ReportKind, Source};
use chumsky::{error, prelude::*, Stream};
pub type Span = std::ops::Range<usize>;
pub type Spanned<T> = (T, Span);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Delimiter {
Paren,
Brack,
Brace,
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum Token {
Num(i64),
Bool(bool),
Str(String),
Sym(String),
Add,
Sub,
Mul,
Div,
Lt,
Le,
Gt,
Ge,
Eq,
Ne,
And,
Or,
Not,
Pipe,
Assign,
Arrow,
Backslash,
Comma,
Semi,
Open(Delimiter),
Close(Delimiter),
Fun,
Let,
In,
Do,
End,
}
impl std::fmt::Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Token::Num(n) => write!(f, "{}", n),
Token::Bool(b) => write!(f, "{}", b),
Token::Str(s) => write!(f, "{}", s),
Token::Sym(s) => write!(f, "{}", s),
Token::Add => write!(f, "+"),
Token::Sub => write!(f, "-"),
Token::Mul => write!(f, "*"),
Token::Div => write!(f, "/"),
Token::Lt => write!(f, "<"),
Token::Le => write!(f, "<="),
Token::Gt => write!(f, ">"),
Token::Ge => write!(f, ">="),
Token::Eq => write!(f, "=="),
Token::Ne => write!(f, "!="),
Token::And => write!(f, "&&"),
Token::Or => write!(f, "||"),
Token::Not => write!(f, "!"),
Token::Pipe => write!(f, "|>"),
Token::Assign => write!(f, "="),
Token::Arrow => write!(f, "->"),
Token::Backslash => write!(f, "\\"),
Token::Comma => write!(f, ","),
Token::Semi => write!(f, ";"),
Token::Open(d) => write!(
f,
"{}",
match d {
Delimiter::Paren => "(",
Delimiter::Brack => "[",
Delimiter::Brace => "{",
}
),
Token::Close(d) => write!(
f,
"{}",
match d {
Delimiter::Paren => ")",
Delimiter::Brack => "]",
Delimiter::Brace => "}",
}
),
Token::Fun => write!(f, "fun"),
Token::Let => write!(f, "let"),
Token::In => write!(f, "in"),
Token::Do => write!(f, "do"),
Token::End => write!(f, "end"),
}
}
}
pub fn lexer() -> impl Parser<char, Vec<(Token, Span)>, Error = Simple<char>> {
let int = text::int(10).map(|s: String| Token::Num(s.parse().unwrap()));
let string = just('"')
.ignore_then(filter(|c| *c != '"').repeated())
.then_ignore(just('"'))
.collect::<String>()
.map(Token::Str);
let symbol = choice((
just("->").to(Token::Arrow),
just('+').to(Token::Add),
just('-').to(Token::Sub),
just('*').to(Token::Mul),
just('/').to(Token::Div),
just("|>").to(Token::Pipe),
just("<=").to(Token::Le),
just('<').to(Token::Lt),
just(">=").to(Token::Ge),
just('>').to(Token::Gt),
just("!=").to(Token::Ne),
just("==").to(Token::Eq),
just("&&").to(Token::And),
just("||").to(Token::Or),
just('!').to(Token::Not),
just('=').to(Token::Assign),
just('\\').to(Token::Backslash),
just(',').to(Token::Comma),
just(';').to(Token::Semi),
));
let delim = choice((
just('(').to(Token::Open(Delimiter::Paren)),
just(')').to(Token::Close(Delimiter::Paren)),
just('[').to(Token::Open(Delimiter::Brack)),
just(']').to(Token::Close(Delimiter::Brack)),
just('{').to(Token::Open(Delimiter::Brace)),
just('}').to(Token::Close(Delimiter::Brace)),
));
let keyword = text::ident().map(|s: String| match s.as_str() {
"true" => Token::Bool(true),
"false" => Token::Bool(false),
"fun" => Token::Fun,
"let" => Token::Let,
"in" => Token::In,
"do" => Token::Do,
"end" => Token::End,
_ => Token::Sym(s),
});
let token = int
.or(string)
.or(symbol)
.or(delim)
.or(keyword)
.recover_with(skip_then_retry_until([]));
let comment = just("--").then(take_until(just('\n'))).padded();
token
.padded_by(comment.repeated())
.map_with_span(|token, span| (token, span))
.padded()
.repeated()
}
pub fn lex(src: String) -> (Option<Vec<(Token, Span)>>, Vec<Simple<char>>) {
let (tokens, lex_error) = lexer().parse_recovery(src.as_str());
(tokens, lex_error)
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Literal {
Num(i64),
Bool(bool),
Str(String),
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum UnaryOp {
Neg,
Not,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BinaryOp {
Add,
Sub,
Mul,
Div,
Lt,
Le,
Gt,
Ge,
Eq,
Ne,
And,
Or,
Pipe,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum Expr {
Error,
Literal(Literal),
Sym(String),
Vec(Vec<Spanned<Self>>),
Unary(Spanned<UnaryOp>, Box<Spanned<Self>>),
Binary(Spanned<BinaryOp>, Box<Spanned<Self>>, Box<Spanned<Self>>),
Lambda(Vec<String>, Box<Spanned<Self>>),
Call(Box<Spanned<Self>>, Vec<Spanned<Self>>),
Let(Vec<(String, Spanned<Self>)>, Option<Box<Spanned<Self>>>),
Do(Vec<Spanned<Expr>>),
}
#[derive(Clone, Debug)]
pub enum Stmt {
Fun(String, Vec<String>, Spanned<Expr>),
}
pub trait P<T> = chumsky::Parser<Token, T, Error = Simple<Token>> + Clone;
pub fn literal_parser() -> impl P<Literal> {
filter_map(|span, token| match token {
Token::Num(i) => Ok(Literal::Num(i)),
Token::Bool(b) => Ok(Literal::Bool(b)),
Token::Str(s) => Ok(Literal::Str(s)),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))),
})
.labelled("literal")
}
pub fn symbol_parser() -> impl P<String> {
filter_map(|span, token| match token {
Token::Sym(s) => Ok(s),
_ => Err(Simple::expected_input_found(span, Vec::new(), Some(token))),
})
.labelled("symbol")
}
pub fn nested_parser<'a, T: 'a>(
parser: impl P<T> + 'a,
delim: Delimiter,
f: impl Fn(Span) -> T + Clone + 'a,
) -> impl P<T> + 'a {
parser
.delimited_by(just(Token::Open(delim)), just(Token::Close(delim)))
.recover_with(nested_delimiters(
Token::Open(delim),
Token::Close(delim),
[
(
Token::Open(Delimiter::Paren),
Token::Close(Delimiter::Paren),
),
(
Token::Open(Delimiter::Brack),
Token::Close(Delimiter::Brack),
),
(
Token::Open(Delimiter::Brace),
Token::Close(Delimiter::Brace),
),
],
f,
))
.boxed()
}
pub fn expr_parser() -> impl P<Spanned<Expr>> {
recursive(|expr| {
let lit = literal_parser().map(Expr::Literal);
let ident = symbol_parser().map(Expr::Sym);
let vec = nested_parser(
expr.clone()
.separated_by(just(Token::Comma))
.allow_trailing()
.map(Some),
Delimiter::Brack,
|_| None,
)
.map(|elems| match elems {
Some(elems) => Expr::Vec(elems),
None => Expr::Vec(Vec::new()),
})
.labelled("vector");
let lam = just(Token::Backslash)
.ignore_then(symbol_parser().repeated())
.then_ignore(just(Token::Arrow))
.then(expr.clone())
.map(|(args, body)| Expr::Lambda(args, Box::new(body)))
.labelled("lambda");
let let_binds = symbol_parser()
.then_ignore(just(Token::Assign))
.then(expr.clone())
.map(|(sym, expr)| (sym, expr))
.separated_by(just(Token::Comma))
.allow_trailing()
.labelled("let bindings");
let let_in = just(Token::Let)
.ignore_then(let_binds.clone())
.then_ignore(just(Token::In))
.then(expr.clone())
.map(|(binds, body)| Expr::Let(binds, Some(Box::new(body))))
.boxed()
.labelled("let..in");
let let_def = just(Token::Let)
.ignore_then(let_binds)
.map(|binds| Expr::Let(binds, None))
.labelled("let");
let block = just(Token::Do)
.ignore_then(expr.clone().repeated())
.then_ignore(just(Token::End))
.map(Expr::Do)
.labelled("do block");
let atom = lit
.or(ident)
.or(vec)
.or(lam)
.or(let_in)
.or(let_def)
.or(block)
.map_with_span(|e, s| (e, s))
.boxed()
.labelled("atom");
let call = atom
.then(
nested_parser(
expr.clone()
.separated_by(just(Token::Comma))
.allow_trailing()
.map(Some),
Delimiter::Paren,
|_| None,
)
.or_not(),
)
.map_with_span(|(f, args), s| match args {
Some(Some(args)) => (Expr::Call(Box::new(f), args), s),
Some(None) => (Expr::Error, s),
None => f,
});
let unary = choice((
just(Token::Sub).to(UnaryOp::Neg),
just(Token::Not).to(UnaryOp::Not),
))
.map_with_span(|op, s| (op, s))
.repeated()
.then(call)
.foldr(|op, expr| {
let s = op.1.start()..expr.1.end();
(Expr::Unary(op, Box::new(expr)), s)
})
.boxed();
let product = unary
.clone()
.then(
choice((
just(Token::Mul).to(BinaryOp::Mul),
just(Token::Div).to(BinaryOp::Div),
))
.map_with_span(|op, s| (op, s))
.then(unary)
.repeated(),
)
.foldl(|lhs, (op, rhs)| {
let s = lhs.1.start()..rhs.1.end();
(Expr::Binary(op, Box::new(lhs), Box::new(rhs)), s)
})
.boxed();
let sum = product
.clone()
.then(
choice((
just(Token::Add).to(BinaryOp::Add),
just(Token::Sub).to(BinaryOp::Sub),
))
.map_with_span(|op, s| (op, s))
.then(product)
.repeated(),
)
.foldl(|lhs, (op, rhs)| {
let s = lhs.1.start()..rhs.1.end();
(Expr::Binary(op, Box::new(lhs), Box::new(rhs)), s)
})
.boxed();
let comparison = sum
.clone()
.then(
choice((
just(Token::Eq).to(BinaryOp::Eq),
just(Token::Ne).to(BinaryOp::Ne),
just(Token::Lt).to(BinaryOp::Lt),
just(Token::Le).to(BinaryOp::Le),
just(Token::Gt).to(BinaryOp::Gt),
just(Token::Ge).to(BinaryOp::Ge),
))
.map_with_span(|op, s| (op, s))
.then(sum)
.repeated(),
)
.foldl(|lhs, (op, rhs)| {
let s = lhs.1.start()..rhs.1.end();
(Expr::Binary(op, Box::new(lhs), Box::new(rhs)), s)
})
.boxed();
let logical = comparison
.clone()
.then(
choice((
just(Token::And).to(BinaryOp::And),
just(Token::Or).to(BinaryOp::Or),
))
.map_with_span(|op, s| (op, s))
.then(comparison)
.repeated(),
)
.foldl(|lhs, (op, rhs)| {
let s = lhs.1.start()..rhs.1.end();
(Expr::Binary(op, Box::new(lhs), Box::new(rhs)), s)
})
.boxed();
let pipe = logical
.clone()
.then(
just(Token::Pipe)
.to(BinaryOp::Pipe)
.map_with_span(|op, s| (op, s))
.then(logical)
.repeated(),
)
.foldl(|lhs, (op, rhs)| {
let s = lhs.1.start()..rhs.1.end();
(Expr::Binary(op, Box::new(lhs), Box::new(rhs)), s)
})
.boxed();
pipe
})
}
pub fn stmt_parser() -> impl P<Spanned<Stmt>> {
let fun = just(Token::Fun)
.ignore_then(symbol_parser())
.then(symbol_parser().repeated())
.then_ignore(just(Token::Assign))
.then(expr_parser())
.map(|((name, args), body)| Stmt::Fun(name, args, body));
fun.map_with_span(|e, s| (e, s))
}
pub fn stmts_parser() -> impl P<Vec<Spanned<Stmt>>> {
stmt_parser().repeated()
}
pub fn parse(
tokens: Vec<Spanned<Token>>,
len: usize,
) -> (Option<Vec<Spanned<Stmt>>>, Vec<Simple<Token>>) {
let (ast, parse_error) = stmts_parser()
.then_ignore(end())
.parse_recovery(Stream::from_iter(len..len + 1, tokens.into_iter()));
(ast, parse_error)
}
pub fn report(e: Simple<String>, src: &str) {
let report = Report::build(ReportKind::Error, (), e.span().start());
let report = match e.reason() {
error::SimpleReason::Unclosed { span, delimiter } => report
.with_message("Unclosed delimiter")
.with_label(
Label::new(span.clone())
.with_message(format!("Unclosed {}", delimiter.fg(Color::Yellow)))
.with_color(Color::Yellow),
)
.with_label(
Label::new(e.span())
.with_message(format!(
"Delimiter must be closed before {}",
e.found()
.unwrap_or(&"end of file".to_string())
.fg(Color::Red)
))
.with_color(Color::Red),
),
error::SimpleReason::Unexpected => report
.with_message(format!(
"Unexpected {}, expected {}",
if e.found().is_some() {
"token in input"
} else {
"end of input"
},
if e.expected().len() == 0 {
"something else".to_string()
} else {
e.expected()
.map(|expected| match expected {
Some(expected) => expected.to_string(),
None => "end of input".to_string(),
})
.collect::<Vec<_>>()
.join(", ")
}
))
.with_label(
Label::new(e.span())
.with_message(format!(
"Unexpected token {}",
e.found()
.unwrap_or(&"end of file".to_string())
.fg(Color::Red)
))
.with_color(Color::Red),
),
chumsky::error::SimpleReason::Custom(msg) => report.with_message(msg).with_label(
Label::new(e.span())
.with_message(format!("{}", msg.fg(Color::Red)))
.with_color(Color::Red),
),
};
report.finish().eprint(Source::from(&src)).unwrap();
}

16
vm/Cargo.lock generated Normal file
View File

@ -0,0 +1,16 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "mecha"
version = "0.1.0"
dependencies = [
"fnv",
]

7
vm/Cargo.toml Normal file
View File

@ -0,0 +1,7 @@
[package]
name = "vm"
version = "0.1.0"
edition = "2021"
[dependencies]
fnv = "1.0.7"

324
vm/src/exec.rs Normal file
View File

@ -0,0 +1,324 @@
use crate::model::*;
use std::{cell::RefCell, rc::Rc};
#[derive(Clone, Debug)]
pub struct Executor {
pub stack: Vec<Value>,
pub env: Rc<RefCell<Env>>,
pub outer_env: Option<Rc<RefCell<Env>>>,
pub instrs: Vec<Instr>,
pub ip: usize,
}
#[derive(Debug)]
pub struct Error(String, usize);
impl Error {
pub fn make<S: Into<String>>(msg: S, ip: usize) -> Self {
Self(msg.into(), ip)
}
}
impl Executor {
pub fn new(instrs: Vec<Instr>) -> Self {
Self {
stack: Vec::new(),
env: Rc::new(RefCell::new(Env::new())),
outer_env: None,
instrs,
ip: 0,
}
}
pub fn run(&mut self) -> Result<(), Error> {
for _ in 0..self.instrs.len() {
self.step()?;
self.ip += 1;
}
Ok(())
}
fn err(&self, msg: &str) -> Error {
Error::make(msg, self.ip)
}
fn push(&mut self, v: Value) -> Result<(), Error> {
self.stack.push(v);
Ok(())
}
fn pop(&mut self) -> Result<Value, Error> {
self.stack.pop().ok_or_else(|| self.err("stack underflow"))
}
fn get(&self, name: &str) -> Result<Value, Error> {
// Get from the current environment first
self.env
.borrow()
.binds
.get(name)
.cloned()
// If it doesn't exist then try the outer environment
.or_else(|| {
self.outer_env
.as_ref()
.and_then(|env| env.borrow().binds.get(name).cloned())
.or(None)
})
.ok_or_else(|| self.err(format!("undefined variable {}", name).as_str()))
}
fn set(&mut self, name: &str, v: Value) -> Result<(), Error> {
// Set the variable in the current environment if it is defined
if self.env.borrow().binds.contains_key(name) {
self.env.borrow_mut().binds.insert(name.to_string(), v);
// If it is not defined in the current environment then try the outer environment
} else if let Some(env) = &self.outer_env {
if env.borrow().binds.contains_key(name) {
env.borrow_mut().binds.insert(name.to_string(), v);
} else {
// If not then define it in the current environment
self.env.borrow_mut().binds.insert(name.to_string(), v);
}
} else {
self.env.borrow_mut().binds.insert(name.to_string(), v);
}
Ok(())
}
fn step(&mut self) -> Result<(), Error> {
let instr = self.instrs.clone(); // TODO: maybe don't clone here
let instr = instr
.get(self.ip)
.ok_or_else(|| self.err("invalid instruction pointer"))?;
macro_rules! impl_num_binop {
($op:tt) => {
match (self.pop()?, self.pop()?) {
(Value::Num(a), Value::Num(b)) => {
self.stack.push(Value::Num(a $op b));
}
_ => return Err(Error::make("can't apply operator to non-numbers", self.ip)),
}
};
}
macro_rules! impl_bool_binop {
($op:tt) => {
match (self.pop()?, self.pop()?) {
(Value::Bool(a), Value::Bool(b)) => {
self.stack.push(Value::Bool(a $op b));
}
_ => return Err(Error::make("can't apply operator to non-booleans", self.ip)),
}
};
}
match instr {
Instr::NumPush(x) => {
self.push(Value::Num(*x))?;
}
Instr::NumAdd => impl_num_binop!(+),
Instr::NumSub => impl_num_binop!(-),
Instr::NumMul => impl_num_binop!(*),
Instr::NumDiv => impl_num_binop!(/),
Instr::NumMod => impl_num_binop!(%),
Instr::BoolPush(x) => {
self.push(Value::Bool(*x))?;
}
Instr::BoolAnd => impl_bool_binop!(&&),
Instr::BoolOr => impl_bool_binop!(||),
Instr::BoolNot => {
if let Value::Bool(b) = self.pop()? {
self.push(Value::Bool(!b))?;
} else {
return Err(Error::make("can't apply `not` to non-boolean", self.ip));
}
}
Instr::StrPush(x) => {
self.push(Value::Str(x.clone()))?;
}
Instr::Pop => {
self.pop()?;
}
Instr::Dup => {
let v = self.pop()?;
self.push(v.clone())?;
self.push(v)?;
}
Instr::ListMake(len) => {
let mut list = Vec::new();
for _ in 0..*len {
list.push(
self.pop()
.map_err(|_| self.err("not enough arguments to make List"))?,
);
}
list.reverse();
self.push(Value::List(list))?;
}
Instr::ListGet(index) => {
if let Value::List(list) = self.pop()? {
let v = list
.get(*index)
.cloned()
.ok_or_else(|| self.err("index out of bounds"))?;
self.push(v)?;
} else {
return Err(Error::make("can't get from non-List", self.ip));
}
}
Instr::ListSet(index) => {
if let Value::List(mut list) = self.pop()? {
let v = self.pop()?;
list.get_mut(*index)
.ok_or_else(|| self.err("index out of bounds"))?
.clone_from(&v);
self.push(Value::List(list))?;
} else {
return Err(Error::make("can't set in non-List", self.ip));
}
}
Instr::ListLen => {
if let Value::List(list) = self.pop()? {
self.push(Value::Num(list.len() as i64))?;
} else {
return Err(Error::make("can't get length of non-List", self.ip));
}
}
Instr::ListJoin => {
if let (Value::List(mut list1), Value::List(list2)) = (self.pop()?, self.pop()?) {
list1.extend(list2);
self.push(Value::List(list1))?;
} else {
return Err(Error::make("can't join non-Lists", self.ip));
}
}
Instr::FuncMake(args, instrs) => {
let closure = Func::new(args.to_vec(), Rc::clone(&self.env), instrs.clone());
self.push(Value::Func(closure))?;
}
Instr::FuncApply => {
let v = self.pop()?;
if let Value::Func(closure) = v {
// Pop the arguments
let mut args = Vec::new();
for _ in 0..closure.args.len() {
args.push(
self.pop()
.map_err(|_| self.err("not enough arguments to apply Function"))?,
);
}
args.reverse();
self.stack.append(&mut closure.run(args)?);
} else {
return Err(Error::make(
format!("can't apply non-Function, got {:?}", v),
self.ip,
));
}
}
Instr::FuncCall(name) => {
if let Value::Func(closure) = self.get(name)? {
let mut args = Vec::new();
for _ in 0..closure.args.len() {
args.push(
self.pop()
.map_err(|_| self.err("not enough arguments to call Function"))?,
);
}
args.reverse();
self.stack.append(&mut closure.run(args)?);
} else {
return Err(Error::make("can't call non-Function", self.ip));
}
}
Instr::Get(name) => {
let v = self.get(name)?;
self.push(v)?;
}
Instr::Set(name) => {
let v = self.pop()?;
self.set(name, v)?;
}
Instr::Print => {
let v = self.pop()?;
println!("{}", v);
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn exec_expect(executor: &mut Executor, expected: Vec<Value>) {
match executor.run() {
Ok(_) => {
assert_eq!(executor.stack, expected);
}
Err(e) => panic!("{:?}", e),
}
}
#[test]
fn test_sanity() {
let mut executor = Executor::new(vec![Instr::NumPush(1), Instr::NumPush(2), Instr::NumAdd]);
exec_expect(&mut executor, vec![Value::Num(3)]);
}
#[test]
#[should_panic]
fn test_pop_underflow() {
let mut executor = Executor::new(vec![Instr::NumAdd]);
executor.run().unwrap();
}
#[test]
fn test_closure() {
let mut executor = Executor::new(vec![
Instr::FuncMake(
vec![],
vec![
Instr::NumPush(0),
Instr::Set("total".to_string()),
Instr::FuncMake(
vec![],
vec![
Instr::Get("total".to_string()),
Instr::NumPush(1),
Instr::NumAdd,
Instr::Set("total".to_string()),
Instr::Get("total".to_string()),
],
),
Instr::Set("counter".to_string()),
Instr::Get("counter".to_string()),
],
),
Instr::FuncApply,
Instr::Set("tally".to_string()),
Instr::Get("tally".to_string()),
Instr::FuncApply,
Instr::Get("tally".to_string()),
Instr::FuncApply,
Instr::Get("tally".to_string()),
Instr::FuncApply,
]);
exec_expect(
&mut executor,
vec![Value::Num(1), Value::Num(2), Value::Num(3)],
);
}
}

39
vm/src/lib.rs Normal file
View File

@ -0,0 +1,39 @@
#![allow(clippy::new_without_default)]
pub mod exec;
pub mod model;
// fn _main() {
// let instrs = vec![
// Instr::NumPush(34),
// Instr::NumPush(34),
// Instr::FuncMake(
// vec!["abc".to_string()],
// vec![
// Instr::Get("abc".to_string()),
// Instr::NumPush(1),
// Instr::NumAdd,
// ],
// ),
// Instr::FuncApply,
// Instr::NumAdd,
// Instr::Print,
// ];
// // instrs.iter().for_each(|instr| {
// // println!(
// // "{}",
// // instr
// // .to_bytes()
// // .iter()
// // .map(|b| format!("{:02x}", b))
// // .collect::<Vec<_>>()
// // .join(" ")
// // )
// // });
// let mut executor = Executor::new(instrs);
// match executor.run() {
// Ok(()) => (),
// Err(e) => println!("{:?}", e),
// }
// }

292
vm/src/model.rs Normal file
View File

@ -0,0 +1,292 @@
use crate::exec::{Error, Executor};
use fnv::FnvHashMap;
use std::{
cell::{Cell, RefCell},
fmt::{Debug, Display},
rc::Rc,
};
#[derive(Clone, Eq, PartialEq)]
pub enum Value {
Num(i64),
Bool(bool),
Str(String),
List(Vec<Self>),
Func(Func),
}
impl Debug for Value {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Value::Num(n) => write!(f, "Num({})", n),
Value::Bool(b) => write!(f, "Bool({})", b),
Value::Str(s) => write!(f, "Str({})", s),
Value::List(xs) => write!(f, "List({:?})", xs),
Value::Func(c) => write!(f, "Func({})", c.args.len()),
}
}
}
impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Value::Num(n) => write!(f, "{}", n),
Value::Bool(b) => write!(f, "{}", b),
Value::Str(s) => write!(f, "{}", s),
Value::List(xs) => write!(
f,
"[{}]",
xs.iter()
.map(|x| x.to_string())
.collect::<Vec<_>>()
.join(", ")
),
Value::Func(_) => write!(f, "<Func>"),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Env {
pub binds: FnvHashMap<String, Value>,
}
impl Env {
pub fn new() -> Self {
Self {
binds: FnvHashMap::default(),
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Func {
pub args: Vec<String>,
pub env: Rc<RefCell<Env>>,
pub instrs: Vec<Instr>,
}
impl Func {
pub fn new(args: Vec<String>, env: Rc<RefCell<Env>>, instrs: Vec<Instr>) -> Self {
Self { args, env, instrs }
}
pub fn run(self, args: Vec<Value>) -> Result<Vec<Value>, Error> {
// Create a new environment for the closure
let mut new_env = Env::new();
for (arg, val) in self.args.iter().zip(args) {
new_env.binds.insert(arg.clone(), val);
}
let new_env = Rc::new(RefCell::new(new_env));
// Execute the closure
let mut new_executor = Executor {
stack: Vec::new(),
env: new_env,
outer_env: Some(Rc::clone(&self.env)),
instrs: self.instrs,
ip: 0,
};
new_executor.run()?;
Ok(new_executor.stack)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Instr {
// Example: NumPush -34, NumPush 103, NumAdd
// 00 de ff ff ff ff ff ff ff
// 00 67 00 00 00 00 00 00 00
// 01
NumPush(i64), // 9 bytes: 1 byte for the enum, 8 bytes for the i64
NumAdd, // ┐
NumSub, // │ 1 byte
NumMul, // │
NumDiv, // │
NumMod, // ┘
BoolPush(bool), // 2 bytes: 1 byte for the enum, 1 byte for the bool
BoolAnd, // ┐ 1 byte
BoolOr, // │
BoolNot, // ┘
// StrPush:
// ┌─┬───╶╶╶┐
// │x│ s... [00]
// └─┴───╶╶╶┘
// where x is the enum (1 byte)
// s is the string (n bytes)
// Example: StrPush "Hello, World!"
// [05] [48 65 6c 6c 6f 2c 20 57 6f 72 6c 64 21] [00]
// └────┼────────────────────────────────────────┼─╼ enum
// └────────────────────────────────────────┼─╼ string
// └─╼ null delimiter
// Total of 15 bytes (1 + 13 + 1)
StrPush(String), // 1 + string.len() + 1 bytes
Pop, // ┐ 1 byte
Dup, // ┘
ListMake(usize), // ┐ 9 bytes: 1 byte for the enum, 8 bytes for the usize (64-bit)
ListGet(usize), // │
ListSet(usize), // ┘
ListLen, // ┐ 1 byte
ListJoin, // ┘
// FuncMake:
// ┌─┬───┬───┬─────╶╶╶┬──────╶╶╶╶╶
// │x│ n │ m │ a... │ i...
// └─┴───┴───┴─────╶╶╶┴──────╶╶╶╶╶
// where x is the enum (1 byte)
// n is the number of arguments (8 bytes)
// m is the number of instructions (8 bytes)
// a is the arguments (n bytes, null delimited)
// ╴╴┬──────┬────┬╶╶
// │ s... │ 00 │ // For example: "a", "bc" -> [61 00 62 63 00]
// ╴╴┴──────┴────┴╶╶
// i is the instructions (m bytes)
// Example: FuncMake ["x", "y"] [Get "x", Get "yz", NumAdd]
// [0d] [02 ..] [03 ..] [78 00 79 7a 00] [16 78 00 16 79 7a 00 01]
// └────┼───────┼───────┼────────────────┼─╼ enum
// └───────┼───────┼────────────────┼─╼ number of arguments
// └───────┼────────────────┼─╼ number of instructions
// └────────────────┼─╼ arguments (null delimited)
// └─╼ instructions
FuncMake(Vec<String>, Vec<Instr>), // 1 + 8 + 8 + args.len() + instrs.len() bytes
FuncApply, // 1 byte
FuncCall(String), // 1 + string.len() + 1 bytes
Get(String), // ┐ 1 + string.len() + 1 bytes
Set(String), // ┘
Print, // 1 byte
}
static mut INSTR_INDEX: Cell<u8> = Cell::new(0);
impl Instr {
pub fn size(&self) -> usize {
match self {
Instr::NumPush(_) => 1 + std::mem::size_of::<i64>(),
Instr::NumAdd | Instr::NumSub | Instr::NumMul | Instr::NumDiv | Instr::NumMod => 1,
Instr::BoolPush(_) => 1 + std::mem::size_of::<bool>(),
Instr::BoolAnd | Instr::BoolOr | Instr::BoolNot => 1,
Instr::StrPush(s) => 1 + s.len() + 1,
Instr::Pop | Instr::Dup => 1,
Instr::ListMake(_) | Instr::ListGet(_) | Instr::ListSet(_) => {
std::mem::size_of::<usize>() + 1
}
Instr::ListLen | Instr::ListJoin => 1,
Instr::FuncMake(args, instrs) => {
1 + 8
+ 8
+ args.iter().map(|s| s.len() + 1).sum::<usize>()
+ instrs.iter().map(|i| i.size()).sum::<usize>()
}
Instr::FuncApply => 1,
Instr::FuncCall(s) => 1 + s.len() + 1,
Instr::Get(s) | Instr::Set(s) => 1 + s.len() + 1,
Instr::Print => 1,
}
}
pub fn to_bytes(&self) -> Vec<u8> {
// a macro that will return the next index and increment it
// so we don't have to rewrite all the first bytes again when
// we changes the order or add new instructions
macro_rules! index {
() => {
unsafe {
let i = INSTR_INDEX.get();
INSTR_INDEX.set(i + 1);
i
}
};
}
let mut bytes = Vec::new();
match self {
Instr::NumPush(n) => {
bytes.push(index!());
bytes.extend(n.to_le_bytes());
}
Instr::NumAdd => bytes.push(index!()),
Instr::NumSub => bytes.push(index!()),
Instr::NumMul => bytes.push(index!()),
Instr::NumDiv => bytes.push(index!()),
Instr::NumMod => bytes.push(index!()),
Instr::BoolPush(b) => {
bytes.push(index!());
bytes.push(*b as u8);
}
Instr::BoolAnd => bytes.push(index!()),
Instr::BoolOr => bytes.push(index!()),
Instr::BoolNot => bytes.push(index!()),
Instr::StrPush(s) => {
bytes.push(index!());
bytes.extend(s.as_bytes());
bytes.push(0x00);
}
Instr::Pop => bytes.push(index!()),
Instr::Dup => bytes.push(index!()),
Instr::ListMake(n) => {
bytes.push(index!());
bytes.extend(n.to_le_bytes());
}
Instr::ListGet(n) => {
bytes.push(index!());
bytes.extend(n.to_le_bytes());
}
Instr::ListSet(n) => {
bytes.push(index!());
bytes.extend(n.to_le_bytes());
}
Instr::ListLen => bytes.push(index!()),
Instr::ListJoin => bytes.push(index!()),
Instr::FuncMake(args, instrs) => {
bytes.push(index!());
bytes.extend((args.len() as u64).to_le_bytes());
bytes.extend((instrs.len() as u64).to_le_bytes());
for arg in args {
bytes.extend(arg.as_bytes());
bytes.push(0x00);
}
for instr in instrs {
bytes.extend(instr.to_bytes());
}
}
Instr::FuncApply => bytes.push(index!()),
Instr::FuncCall(s) => {
bytes.push(index!());
bytes.extend(s.as_bytes());
bytes.push(0x00);
}
Instr::Get(s) => {
bytes.push(index!());
bytes.extend(s.as_bytes());
bytes.push(0x00);
}
Instr::Set(s) => {
bytes.push(index!());
bytes.extend(s.as_bytes());
bytes.push(0x00);
}
Instr::Print => bytes.push(index!()),
}
bytes
}
}