More AST work

This commit is contained in:
Alex Bethel 2022-08-02 21:16:16 -06:00
parent ff92f73211
commit b0df87bd6a
5 changed files with 278 additions and 60 deletions

View file

@ -17,25 +17,60 @@ deterministic memory management.
## Tools
This repository contains the following current and planned tools:
This repository contains the following tools:
- `axc`, the AlexScript compiler. This can be used as a binary with a
fairly standard compiler CLI, or as a library for use in other
programs.
The following tools do not exist yet, but are planned:
- `axci`, the interactive AlexScript interpreter.
- `axcd`, the Language Server Protocol (LSP) server for AlexScript
code support in editors, supporting definition peeking and lookup,
renaming variables and modules, etc. (Planned; does not exist yet.)
renaming variables and modules, etc.
- `axfmt`, the standard formatter for AlexScript code; all AlexScript
code used in this repository must be formatted with `axfmt`, and its
use is recommended for other projects. (Planned; does not exist
yet.)
use is recommended for other projects.
- `axdoc`, the documentation generator.
- `alexscript-mode`, an Emacs mode for editing AlexScript code,
supporting syntax highlighting, automatic indentation, some basic
keybindings for common tasks, Emacs-side LSP integration for
communicating with `acxd`, and a collection of `yasnippet` snippets
for inserting common AlexScript constructs. (Planned; does not exist
yet.)
for inserting common AlexScript constructs.
- `alexscript-vsc`, Visual Studio Code plugins and tools for editing
AlexScript code. (Planned; does not exist yet.)
AlexScript code.
- `alexscript-vim`, tools and configuration files for optimizing Vim
and Neovim for editing AlexScript code. (Planned; does not exist
yet.)
and Neovim for editing AlexScript code.
## Language features
The language is mostly influenced by Rust and Haskell: it has strict
safety requirements and borrow-checked memory management like that of
Rust, but its syntax and type system are similar to those of Haskell.
Some features the language will most likely have:
- All functions are pure by default; side effects are chained together
using an `IO` monad.
- Despite the language's purity, expressions will be strictly
evaluated to provide more programmer control.
- Different monads represent different levels of safety, and can be
converted using functions marked as `UNSAFE`. The intention is that
code can be audited by manually checking that all the `UNSAFE`
transformations are sound, and code that contains no `UNSAFE`
function calls are guaranteed to satisfy varying definitions of
soundness:
- The `IO` monad represents computations that might have side
effects on the real world. If a computation of type `IO` is known
by the programmer to not have side effects on the real world, then
it can be converted to a pure computation using the standard
library function `UNSAFE_assertPure : IO a -> a`.
- The `MemoryUnsafe` monad represents computations that might read
from or write to memory that is not allocated correctly: for
example, `readPtr`, which reads from a raw pointer, is of type
`MemoryUnsafe a` because the pointer is not known to be valid. If
a computation has been confirmed to be safe by the programmer, it
can be converted to an `IO` computation using
`UNSAFE_assertMemorySafe : MemoryUnsafe a -> IO a`.
- Further safety monads may be added in the future.
-
## Compilation

47
axc/Cargo.lock generated
View file

@ -11,14 +11,6 @@ dependencies = [
"const-random",
]
[[package]]
name = "alexc"
version = "0.1.0"
dependencies = [
"chumsky",
"clap",
]
[[package]]
name = "atty"
version = "0.2.14"
@ -36,6 +28,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "axc"
version = "0.1.0"
dependencies = [
"chumsky",
"clap",
"num-bigint",
]
[[package]]
name = "bitflags"
version = "1.3.2"
@ -178,6 +179,36 @@ version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "num-bigint"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
dependencies = [
"autocfg",
"num-integer",
"num-traits",
]
[[package]]
name = "num-integer"
version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.13.0"

View file

@ -1,5 +1,5 @@
[package]
name = "alexc"
name = "axc"
version = "0.1.0"
edition = "2021"
@ -8,3 +8,4 @@ edition = "2021"
[dependencies]
chumsky = "0.8.0"
clap = { version = "3.2.16", features = ["derive"] }
num-bigint = "0.4.3"

View file

@ -2,54 +2,142 @@
//!
//! AlexScript is a based programming language, for based people.
use num_bigint::BigUint;
/// A concrete syntax tree. This represents the full content of an AlexScript program, including all
/// whitespace, comments, and tokens: the source code of the original program can be recovered
/// completely using the syntax tree.
pub struct SyntaxTree {}
/// Top-level statements, making up the overall program.
pub enum Statement {
/// Declaration of an abstract data type.
TypeDefinition {
/// The type being defined. This is only allowed to be `Named` or `Application`.
left: Type,
/// The possible constructors of the data type.
constructors: Vec<TypeConstructor>,
},
/// Declaration that a type implements a type class.
InstanceDefinition {
/// The name of the type class.
class_name: String,
/// The type that conforms to the type class.
typ: Type,
/// The list of declarations that dictate the type's behavior when treated as an instance of
/// the type class.
decls: Vec<ClassDeclaration>,
},
/// Other declarations.
ClassDeclaration(ClassDeclaration),
}
/// Top-level statements that are also allowed to occur within a type class definition, and which
/// therefore have an optional rather than strictly-required right-hand side, e.g., `type X;` rather
/// than `type X = Y;`.
pub enum ClassDeclaration {
/// Declaration of a function or constant.
Function {
/// Name of the function and its arguments.
name: String,
/// The function arguments.
arguments: Vec<Pattern>,
/// The definition of the function.
definition: Option<Expr>,
},
/// Declaration of a type that is a literal alias for another type.
TypeAlias {
/// The type being defined. This is only allowed to be `Named` or `Application`.
left: Type,
/// The target type.
right: Option<Type>,
},
/// Declaration of a type class.
ClassDefinition {
/// The name of the class.
name: String,
/// The type variable representing a type conforming to the class.
var: String,
/// The list of declarations (optionally filled-in) that are necessary for a type to conform
/// to the type class.
decls: Vec<ClassDeclaration>,
},
}
/// A possible constructor for an abstract data type.
pub struct TypeConstructor {
/// The name of the constructor.
pub name: String,
/// The arguments to the abstract data type.
pub args: Vec<Type>,
}
/// Expressions.
pub enum Expr {
/// Unary operators, e.g., "-5".
UnaryOp { kind: UnaryOpKind, val: Box<Expr> },
/// Unary operators, e.g., `-5`.
UnaryOp { kind: String, val: Box<Expr> },
/// Binary operators, e.g., "5 + 5".
/// Binary operators, e.g., `5 + 5`.
BinaryOp {
kind: BinaryOpKind,
kind: String,
left: Box<Expr>,
right: Box<Expr>,
},
/// Function application, e.g., "sin x".
/// Function application, e.g., `sin x`.
Application {
func: Box<Expr>,
argument: Box<Expr>,
},
/// Matching of multiple cases, e.g., "match x { 5 => 'a', 6 => 'b' }".
/// Defining of temporary variables, e.g., `let x = 5 in x + x`.
Let { left: Pattern, right: Box<Expr> },
/// Matching of multiple cases, e.g., `match x { 5 => 'a', 6 => 'b' }`.
Match {
matcher: Box<Expr>,
cases: Vec<(Pattern, Expr)>,
},
/// Struct initialization, e.g., "Vector { pointer: xyz, length: 12 }".
/// Syntax sugar for matching on booleans, e.g., `if foo then bar else baz`.
If {
subject: Box<Expr>,
iftrue: Box<Expr>,
iffalse: Box<Expr>,
},
/// Struct initialization, e.g., `Vector { pointer: xyz, length: 12 }`.
StructInit {
name: String,
elements: Vec<(String, Expr)>,
},
/// Anonymous functions.
/// Anonymous functions, e.g., `fn x -> x + 1`.
Lambda {
arguments: Vec<Pattern>,
result: Box<Expr>,
},
/// Variable references, possibly namespaced, e.g., "foo::bar::baz".
/// Variable references, possibly namespaced, e.g., `foo::bar::baz`.
VariableReference(Vec<String>),
/// Dot subscripts, e.g., "foo.bar".
/// Dot subscripts, e.g., `foo.bar`.
DotSubscript { value: Box<Expr>, subscript: String },
/// Bracket subscripts, e.g., "foo[bar]".
/// Bracket subscripts, e.g., `foo[bar]`.
BracketSubscript {
value: Box<Expr>,
subscript: Box<Expr>,
@ -59,47 +147,71 @@ pub enum Expr {
Literal(Literal),
}
/// Kinds of unary operators, that are placed before an expression.
pub enum UnaryOpKind {
/// +x, equivalent to absolute value.
Plus,
/// Type names.
pub enum Type {
/// `Foo`
Named(String),
/// -x, multiplication by -1.
Minus,
/// `List Int`
Application {
// TODO: is this right?
function: Box<Expr>,
expression: Box<Expr>,
},
/// `(a, b)`
Tuple(Vec<Type>),
/// `{ a: x, b: y }`
Record(Vec<(String, Type)>),
}
/// Kinds of binary operations, that are placed between two expressions.
///
/// As a convention, all binary operations should be one character.
pub enum BinaryOpKind {
/// a + b
Add,
/// Patterns for use in function arguments, lambda arguments, `let` statements, and `match`
/// statements.
pub enum Pattern {
/// `(a, b)`
Tuple(Vec<Pattern>),
/// a - b
Sub,
/// `a: String`
TypeAnnotated {
pat: Box<Pattern>,
// Note that types are expressions, to simplify parsing.
typ: Box<Expr>,
},
/// a * b
Mul,
/// `Foo`
Exact(String),
/// a / b
Div,
/// `Foo { a: x, b: y, ... }`
Destructure(String, Record),
/// a % b
Modulo,
/// `a`
Capture(String),
/// a ^ b
Exponent,
/// `_`
Ignore,
/// a & b
And,
/// a | b
Or,
/// `"hello"`
Literal(Literal),
}
///
pub enum Pattern {}
/// Record syntax blocks, e.g., "{a: b, c: d, ...}".
pub struct Record {
/// The named members of the record, in order of occurrence.
pub members: Vec<(String, Expr)>,
pub enum Literal {}
/// Whether the record ends with "..."; this allows ignoring blocks.
pub inexhaustive: bool,
}
pub enum Token {}
/// Literal values included in source code.
pub enum Literal {
/// `"hello"`
String(String),
/// `123`
Integer(BigUint),
/// `123.456`
Float(f64),
}

39
examples/unwrap.axs Normal file
View file

@ -0,0 +1,39 @@
// Example of using type classes to implement `unwrap`.
// Types that can be unwrapped.
class Unwrap a {
// The result that `unwrap`ping will produce when successfully applied.
type Result;
// Unwraps `self` into the result type, panicking on failure.
def unwrap (self: a) -> Result;
}
// Either an `a`, or no data.
data Option a
= Some a
| None;
// `Option a` can be unwrapped to produce `a`.
instance Unwrap (Option a) {
type Result = a;
def unwrap self = match self {
Some x -> x,
None -> panic "Attempt to unwrap None",
};
}
// Either an `a`, or an error of type `e` (which must be representable as a string, hence the `Show`
// bound).
data Result a (e: Show)
= Ok a
| Err e;
// `Result a e` can be unwrapped to produce `a`.
instance Unwrap (Result a e) {
type Result = a;
def unwrap self = match self {
Ok x -> x,
Err e -> panic ("Attempt to unwrap Err " ++ show e),
}
}