drim/drimc_rs/src/ir_untyped.rs

477 lines
16 KiB
Rust

//! Untyped intermediate representation.
//!
//! Untyped IR is immediately generated after the AST has been successfully parsed. It dramatically
//! simplifies the complexity of the syntax tree, and is used to perform type inference, at which
//! point it is translated into typed IR.
use std::fmt::Display;
use crate::{
syntax::{self, Identifier, Literal, SyntaxTree},
typeck::Type,
};
/// A program represented in untyped IR.
#[derive(Debug)]
pub struct Program {
/// The list of top-level declarations. Each declaration has a fully-namespaced name, and a list
/// of instructions that should return the name's value. E.g., for
/// ```
/// def x = 5;
/// ```
/// the name `x` becomes associated with a list of instructions that return the value 5. When
/// the program starts up, each one of these top-level functions is immediately called to
/// initialize global variables, and then `main` is invoked.
defs: Vec<(Identifier, Vec<Instruction>)>,
}
impl Display for Program {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for def in self.defs.iter() {
writeln!(f, "PR {}", def.0)?;
for inst in def.1.iter() {
writeln!(f, "{}", inst)?;
}
}
Ok(())
}
}
/// An instruction in untyped IR.
#[derive(Debug)]
enum Instruction {
/// Apply a single-argument function to a value, and store the result in another location.
Apply {
/// The target into which to store the result.
target: Location,
/// The function to call.
func: Location,
/// The argument to call the function with.
argument: Location,
},
/// Collect zero or more values into a storage location as a tuple.
Collect {
/// The target into which to store the result.
target: Location,
/// The source locations to pull from. If there is exactly one of these, then the variable
/// is simply moved (since we define that a tuple of one element is the same as just that
/// element); otherwise, the variables are collected into a tuple and moved into `target`.
/// Also this means that if `source` is an empty `Vec`, then `target` becomes the unit value
/// `()`.
source: Vec<Location>,
},
/// Branch depending on which variant of its type a particular storage location is.
Branch {
/// The storage location to inspect.
target: Location,
/// The variant to test `target` against. This should be the name of a constructor. It is
/// *undefined behavior* for this constructor to be for a type other than that of `target`;
/// the AST translator must never produce a `Branch` node without also emitting a `FixType`
/// node that specifies the type here.
constructor: Identifier,
/// Code to execute if the `target` is of the `variant` variant.
iftrue: Vec<Instruction>,
/// Code to execute otherwise.
iffalse: Vec<Instruction>,
},
/// Copy a value from one of a number of locations, whichever is initialized.
Phi {
/// The location to store the new value in.
target: Location,
/// The source locations; exactly one of these must have been initialized.
sources: Vec<Location>,
},
/// Destructure an algebraic object into its component pieces.
DestructureData {
/// The object we're destructuring.
source: Location,
/// The constructor that the `target` was constructed with. It is *undefined behavior* if
/// `target` is of a variant type and this is not the constructor `target` was constructed
/// with; the AST translator must never produce a `DestructureData` node without a guarding
/// `Branch` node if there exists more than one variant of the underlying type.
constructor: Identifier,
/// The list of locations to store the parameters to the constructor. It is a type error
/// (caught at type checking time) for the number of targets here to be different from the
/// number of parameters to the constructor.
targets: Vec<Location>,
},
/// Destructure a tuple into its component pieces.
DestructureTuple {
/// The tuple we're destructuring.
source: Location,
/// The list of locations to store the elements of the tuple. It is a type error (caught at
/// type checking time) for the number of targets here to be different from the number of
/// elements in the tuple.
targets: Vec<Location>,
},
/// Create a new lambda with one parameter, and store it in a location.
DefLambda {
/// The location to store the lambda in.
target: Location,
/// The name of the parameter to the lambda.
param: Location,
/// Code to execute when the lambda is called.
body: Vec<Instruction>,
},
/// Return a value from a function. It is *undefined behavior* for a function body to not
/// include a `Return` instruction, and no instructions should ever follow a `Return`
/// instruction.
Return {
/// The location to return.
target: Location,
},
/// Mark this location as unreachable. If this node is reachable at runtime, then the entire
/// program is undefined behavior.
Unreachable,
/// Assert that a storage location is of a particular type. While this node does nothing on its
/// own, `FixType` is used as the fundamental anchor for type inference: all types in the
/// program are deduced relative to this node.
FixType {
/// The storage location being annotated.
target: Location,
/// The type that the storage location should have.
typ: Type,
},
}
impl Display for Instruction {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Instruction::Apply {
target,
func,
argument,
} => write!(f, "AP {target} <- {func} {argument}"),
Instruction::Collect { target, source } => {
write!(f, "CL {target} <- ")?;
for s in source {
write!(f, "{s},")?;
}
Ok(())
}
Instruction::Branch {
target,
constructor,
iftrue,
iffalse,
} => {
writeln!(f, "IF {constructor} = {target} THEN [")?;
for inst in iftrue {
writeln!(f, "{inst}")?;
}
writeln!(f, "] ELSE [")?;
for inst in iffalse {
writeln!(f, "{inst}")?;
}
write!(f, "] ENDIF")?;
Ok(())
}
Instruction::Phi { target, sources } => {
write!(f, "PH {target} ")?;
for source in sources {
write!(f, "{source},")?;
}
Ok(())
}
Instruction::DestructureData {
source,
constructor,
targets,
} => {
write!(f, "DS {constructor} (")?;
for target in targets {
write!(f, "{target},")?;
}
write!(f, ") << {source}")?;
Ok(())
}
Instruction::DestructureTuple { source, targets } => {
write!(f, "DT (")?;
for target in targets {
write!(f, "{target},")?;
}
write!(f, ") <- {source}")?;
Ok(())
}
Instruction::DefLambda {
target,
param,
body,
} => {
writeln!(f, "DL {target} <- \\{param} [")?;
for inst in body {
writeln!(f, "{}", inst)?;
}
write!(f, "]")?;
Ok(())
}
Instruction::Return { target } => {
write!(f, "RT {target}")
}
Instruction::Unreachable => {
write!(f, "UN")
}
Instruction::FixType { target, typ } => todo!(),
}
}
}
/// A storage location in untyped IR.
#[derive(Debug, Clone)]
enum Location {
/// A named location. Identifiers can be read from to access globally-defined functions and
/// constants; but identifiers that are bound to (e.g., the `x` in `let x = 5`) must not be
/// namespaced.
Named(Identifier),
/// A compiler-generated temporary location.
Temporary(u64),
/// A constant value.
Literal(Literal),
}
impl Display for Location {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Location::Named(id) => write!(f, "n`{}`", id),
Location::Temporary(t) => write!(f, "t`{}`", t),
Location::Literal(l) => write!(f, "c`{}`", l),
}
}
}
/// Converts a program's abstract syntax tree into untyped IR code.
pub fn ast_to_untyped_ir(ast: SyntaxTree) -> Program {
let mut defs = vec![];
let mut counter = 0;
for stmt in ast.0.into_iter() {
match stmt {
syntax::Statement::TypeDefinition {
typ: _,
constructors: _,
} => todo!(),
syntax::Statement::InstanceDefinition {
class_name: _,
typ: _,
decls: _,
} => todo!(),
syntax::Statement::ClassDefinition {
name: _,
var: _,
decls: _,
} => todo!(),
syntax::Statement::ClassMember(syntax::ClassMember::Function {
name,
arguments,
definition,
}) => {
defs.push((
name,
convert_fn(
&mut counter,
arguments,
definition.expect("Empty functions unimplemented"),
),
));
}
syntax::Statement::ClassMember(syntax::ClassMember::TypeAlias {
left: _,
right: _,
}) => {
todo!()
}
}
}
Program { defs }
}
/// Generates a new temporary location name that's guaranteed to be unique.
fn temporary(counter: &mut u64) -> Location {
let n = *counter;
*counter += 1;
Location::Temporary(n)
}
/// Converts a function definition into a list of instructions.
fn convert_fn(
counter: &mut u64,
mut arguments: Vec<syntax::Pattern>,
definition: syntax::Expr,
) -> Vec<Instruction> {
if arguments.is_empty() {
let ret_loc = temporary(counter);
eval_expr(counter, definition, &ret_loc)
.into_iter()
.chain(std::iter::once(Instruction::Return { target: ret_loc }))
.collect()
} else {
let first = arguments.remove(0);
let lambda_loc = temporary(counter);
let arg_loc = temporary(counter);
vec![
Instruction::DefLambda {
target: lambda_loc.clone(),
param: arg_loc.clone(),
body: bind_pattern(counter, first, &arg_loc)
.into_iter()
.chain(convert_fn(counter, arguments, definition))
.collect(),
},
Instruction::Return { target: lambda_loc },
]
}
}
/// Emits instructions that bind the given pattern to the variable stored in location `l`.
fn bind_pattern(counter: &mut u64, p: syntax::Pattern, l: &Location) -> Vec<Instruction> {
match p {
syntax::Pattern::Capture(name) => {
vec![Instruction::Collect {
target: Location::Named(syntax::Identifier { elems: vec![name] }),
source: vec![l.to_owned()],
}]
}
syntax::Pattern::Tuple(pats) => {
let pat_locs: Vec<_> = pats.iter().map(|_| temporary(counter)).collect();
std::iter::once(Instruction::DestructureTuple {
source: l.to_owned(),
targets: pat_locs.clone(),
})
.chain(
Iterator::zip(pats.into_iter(), pat_locs)
.map(|(pat, pat_loc)| bind_pattern(counter, pat, &pat_loc))
.flatten(),
)
.collect()
}
syntax::Pattern::Record {
members,
inexhaustive,
} => todo!(),
syntax::Pattern::TypeAnnotated { pat, typ } => std::iter::once(Instruction::FixType {
target: l.to_owned(),
// typ: *typ,
typ: todo!(),
})
.chain(bind_pattern(counter, *pat, l))
.collect(),
syntax::Pattern::Destructure(_, _) => todo!(),
syntax::Pattern::Ignore => Vec::new(),
syntax::Pattern::Literal(_) => Vec::new(),
}
}
/// Emits instructions that check whether the expression `e` matches the pattern, and if so,
/// evaluates `e1` and places the result in `l`; otherwise, evaluates `e2` and places the result in
/// `l`.
fn conditional(
counter: &mut u64,
e: syntax::Expr,
p: syntax::Pattern,
e1: syntax::Expr,
e2: syntax::Expr,
l: &Location,
) -> Vec<Instruction> {
todo!()
}
/// Emits instructions that evaluate the expression `e`, then place the result in location `l`.
fn eval_expr(counter: &mut u64, e: syntax::Expr, l: &Location) -> Vec<Instruction> {
match e {
syntax::Expr::BinaryOp {
kind: _,
left,
right,
translation,
} => eval_expr(
counter,
syntax::Expr::Application {
func: Box::new(syntax::Expr::Application {
func: Box::new(syntax::Expr::VariableReference(Identifier {
elems: vec![translation],
})),
argument: left,
}),
argument: right,
},
l,
),
syntax::Expr::Application { func, argument } => {
let func_e = temporary(counter);
let arg_e = temporary(counter);
Iterator::chain(
eval_expr(counter, *func, &func_e).into_iter(),
eval_expr(counter, *argument, &arg_e).into_iter(),
)
.chain(vec![Instruction::Apply {
target: l.to_owned(),
func: func_e,
argument: arg_e,
}])
.collect()
}
syntax::Expr::Let { left, right, into } => {
let right_e = temporary(counter);
let eval_right = eval_expr(counter, *right, &right_e);
let bind_left = bind_pattern(counter, left, &right_e);
let eval_into = eval_expr(counter, *into, l);
eval_right
.into_iter()
.chain(bind_left)
.chain(eval_into)
.collect()
}
syntax::Expr::Match { matcher, cases } => todo!(),
syntax::Expr::Record(_) => todo!(),
syntax::Expr::Lambda { arguments, result } => todo!(),
syntax::Expr::DotSubscript { value, subscript } => todo!(),
syntax::Expr::BracketSubscript { value, subscript } => todo!(),
syntax::Expr::Tuple(elems) => {
let elem_locs: Vec<_> = elems.iter().map(|_| temporary(counter)).collect();
elems
.into_iter()
.zip(elem_locs.iter())
.map(|(elem, elem_loc)| eval_expr(counter, elem, elem_loc))
.flatten()
.chain(std::iter::once(Instruction::Collect {
target: l.to_owned(),
source: elem_locs.clone(),
}))
.collect()
}
syntax::Expr::VariableReference(name) => vec![Instruction::Collect {
target: l.clone(),
source: vec![Location::Named(name)],
}],
syntax::Expr::Literal(lit) => vec![Instruction::Collect {
target: l.clone(),
source: vec![Location::Literal(lit)],
}],
}
}