More AST work
This commit is contained in:
parent
ff92f73211
commit
b0df87bd6a
53
README.md
53
README.md
|
@ -17,25 +17,60 @@ deterministic memory management.
|
|||
|
||||
## Tools
|
||||
|
||||
This repository contains the following current and planned tools:
|
||||
This repository contains the following tools:
|
||||
- `axc`, the AlexScript compiler. This can be used as a binary with a
|
||||
fairly standard compiler CLI, or as a library for use in other
|
||||
programs.
|
||||
|
||||
The following tools do not exist yet, but are planned:
|
||||
- `axci`, the interactive AlexScript interpreter.
|
||||
- `axcd`, the Language Server Protocol (LSP) server for AlexScript
|
||||
code support in editors, supporting definition peeking and lookup,
|
||||
renaming variables and modules, etc. (Planned; does not exist yet.)
|
||||
renaming variables and modules, etc.
|
||||
- `axfmt`, the standard formatter for AlexScript code; all AlexScript
|
||||
code used in this repository must be formatted with `axfmt`, and its
|
||||
use is recommended for other projects. (Planned; does not exist
|
||||
yet.)
|
||||
use is recommended for other projects.
|
||||
- `axdoc`, the documentation generator.
|
||||
- `alexscript-mode`, an Emacs mode for editing AlexScript code,
|
||||
supporting syntax highlighting, automatic indentation, some basic
|
||||
keybindings for common tasks, Emacs-side LSP integration for
|
||||
communicating with `acxd`, and a collection of `yasnippet` snippets
|
||||
for inserting common AlexScript constructs. (Planned; does not exist
|
||||
yet.)
|
||||
for inserting common AlexScript constructs.
|
||||
- `alexscript-vsc`, Visual Studio Code plugins and tools for editing
|
||||
AlexScript code. (Planned; does not exist yet.)
|
||||
AlexScript code.
|
||||
- `alexscript-vim`, tools and configuration files for optimizing Vim
|
||||
and Neovim for editing AlexScript code. (Planned; does not exist
|
||||
yet.)
|
||||
and Neovim for editing AlexScript code.
|
||||
|
||||
## Language features
|
||||
|
||||
The language is mostly influenced by Rust and Haskell: it has strict
|
||||
safety requirements and borrow-checked memory management like that of
|
||||
Rust, but its syntax and type system are similar to those of Haskell.
|
||||
|
||||
Some features the language will most likely have:
|
||||
- All functions are pure by default; side effects are chained together
|
||||
using an `IO` monad.
|
||||
- Despite the language's purity, expressions will be strictly
|
||||
evaluated to provide more programmer control.
|
||||
- Different monads represent different levels of safety, and can be
|
||||
converted using functions marked as `UNSAFE`. The intention is that
|
||||
code can be audited by manually checking that all the `UNSAFE`
|
||||
transformations are sound, and code that contains no `UNSAFE`
|
||||
function calls are guaranteed to satisfy varying definitions of
|
||||
soundness:
|
||||
- The `IO` monad represents computations that might have side
|
||||
effects on the real world. If a computation of type `IO` is known
|
||||
by the programmer to not have side effects on the real world, then
|
||||
it can be converted to a pure computation using the standard
|
||||
library function `UNSAFE_assertPure : IO a -> a`.
|
||||
- The `MemoryUnsafe` monad represents computations that might read
|
||||
from or write to memory that is not allocated correctly: for
|
||||
example, `readPtr`, which reads from a raw pointer, is of type
|
||||
`MemoryUnsafe a` because the pointer is not known to be valid. If
|
||||
a computation has been confirmed to be safe by the programmer, it
|
||||
can be converted to an `IO` computation using
|
||||
`UNSAFE_assertMemorySafe : MemoryUnsafe a -> IO a`.
|
||||
- Further safety monads may be added in the future.
|
||||
-
|
||||
|
||||
## Compilation
|
||||
|
|
47
axc/Cargo.lock
generated
47
axc/Cargo.lock
generated
|
@ -11,14 +11,6 @@ dependencies = [
|
|||
"const-random",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "alexc"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"clap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
|
@ -36,6 +28,15 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "axc"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"chumsky",
|
||||
"clap",
|
||||
"num-bigint",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
|
@ -178,6 +179,36 @@ version = "0.2.126"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-integer",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-integer"
|
||||
version = "0.1.45"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.13.0"
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[package]
|
||||
name = "alexc"
|
||||
name = "axc"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
|
@ -8,3 +8,4 @@ edition = "2021"
|
|||
[dependencies]
|
||||
chumsky = "0.8.0"
|
||||
clap = { version = "3.2.16", features = ["derive"] }
|
||||
num-bigint = "0.4.3"
|
||||
|
|
196
axc/src/lib.rs
196
axc/src/lib.rs
|
@ -2,54 +2,142 @@
|
|||
//!
|
||||
//! AlexScript is a based programming language, for based people.
|
||||
|
||||
use num_bigint::BigUint;
|
||||
|
||||
/// A concrete syntax tree. This represents the full content of an AlexScript program, including all
|
||||
/// whitespace, comments, and tokens: the source code of the original program can be recovered
|
||||
/// completely using the syntax tree.
|
||||
pub struct SyntaxTree {}
|
||||
|
||||
/// Top-level statements, making up the overall program.
|
||||
pub enum Statement {
|
||||
/// Declaration of an abstract data type.
|
||||
TypeDefinition {
|
||||
/// The type being defined. This is only allowed to be `Named` or `Application`.
|
||||
left: Type,
|
||||
|
||||
/// The possible constructors of the data type.
|
||||
constructors: Vec<TypeConstructor>,
|
||||
},
|
||||
|
||||
/// Declaration that a type implements a type class.
|
||||
InstanceDefinition {
|
||||
/// The name of the type class.
|
||||
class_name: String,
|
||||
|
||||
/// The type that conforms to the type class.
|
||||
typ: Type,
|
||||
|
||||
/// The list of declarations that dictate the type's behavior when treated as an instance of
|
||||
/// the type class.
|
||||
decls: Vec<ClassDeclaration>,
|
||||
},
|
||||
|
||||
/// Other declarations.
|
||||
ClassDeclaration(ClassDeclaration),
|
||||
}
|
||||
|
||||
/// Top-level statements that are also allowed to occur within a type class definition, and which
|
||||
/// therefore have an optional rather than strictly-required right-hand side, e.g., `type X;` rather
|
||||
/// than `type X = Y;`.
|
||||
pub enum ClassDeclaration {
|
||||
/// Declaration of a function or constant.
|
||||
Function {
|
||||
/// Name of the function and its arguments.
|
||||
name: String,
|
||||
|
||||
/// The function arguments.
|
||||
arguments: Vec<Pattern>,
|
||||
|
||||
/// The definition of the function.
|
||||
definition: Option<Expr>,
|
||||
},
|
||||
|
||||
/// Declaration of a type that is a literal alias for another type.
|
||||
TypeAlias {
|
||||
/// The type being defined. This is only allowed to be `Named` or `Application`.
|
||||
left: Type,
|
||||
|
||||
/// The target type.
|
||||
right: Option<Type>,
|
||||
},
|
||||
|
||||
/// Declaration of a type class.
|
||||
ClassDefinition {
|
||||
/// The name of the class.
|
||||
name: String,
|
||||
|
||||
/// The type variable representing a type conforming to the class.
|
||||
var: String,
|
||||
|
||||
/// The list of declarations (optionally filled-in) that are necessary for a type to conform
|
||||
/// to the type class.
|
||||
decls: Vec<ClassDeclaration>,
|
||||
},
|
||||
}
|
||||
|
||||
/// A possible constructor for an abstract data type.
|
||||
pub struct TypeConstructor {
|
||||
/// The name of the constructor.
|
||||
pub name: String,
|
||||
|
||||
/// The arguments to the abstract data type.
|
||||
pub args: Vec<Type>,
|
||||
}
|
||||
|
||||
/// Expressions.
|
||||
pub enum Expr {
|
||||
/// Unary operators, e.g., "-5".
|
||||
UnaryOp { kind: UnaryOpKind, val: Box<Expr> },
|
||||
/// Unary operators, e.g., `-5`.
|
||||
UnaryOp { kind: String, val: Box<Expr> },
|
||||
|
||||
/// Binary operators, e.g., "5 + 5".
|
||||
/// Binary operators, e.g., `5 + 5`.
|
||||
BinaryOp {
|
||||
kind: BinaryOpKind,
|
||||
kind: String,
|
||||
left: Box<Expr>,
|
||||
right: Box<Expr>,
|
||||
},
|
||||
|
||||
/// Function application, e.g., "sin x".
|
||||
/// Function application, e.g., `sin x`.
|
||||
Application {
|
||||
func: Box<Expr>,
|
||||
argument: Box<Expr>,
|
||||
},
|
||||
|
||||
/// Matching of multiple cases, e.g., "match x { 5 => 'a', 6 => 'b' }".
|
||||
/// Defining of temporary variables, e.g., `let x = 5 in x + x`.
|
||||
Let { left: Pattern, right: Box<Expr> },
|
||||
|
||||
/// Matching of multiple cases, e.g., `match x { 5 => 'a', 6 => 'b' }`.
|
||||
Match {
|
||||
matcher: Box<Expr>,
|
||||
cases: Vec<(Pattern, Expr)>,
|
||||
},
|
||||
|
||||
/// Struct initialization, e.g., "Vector { pointer: xyz, length: 12 }".
|
||||
/// Syntax sugar for matching on booleans, e.g., `if foo then bar else baz`.
|
||||
If {
|
||||
subject: Box<Expr>,
|
||||
iftrue: Box<Expr>,
|
||||
iffalse: Box<Expr>,
|
||||
},
|
||||
|
||||
/// Struct initialization, e.g., `Vector { pointer: xyz, length: 12 }`.
|
||||
StructInit {
|
||||
name: String,
|
||||
elements: Vec<(String, Expr)>,
|
||||
},
|
||||
|
||||
/// Anonymous functions.
|
||||
/// Anonymous functions, e.g., `fn x -> x + 1`.
|
||||
Lambda {
|
||||
arguments: Vec<Pattern>,
|
||||
result: Box<Expr>,
|
||||
},
|
||||
|
||||
/// Variable references, possibly namespaced, e.g., "foo::bar::baz".
|
||||
/// Variable references, possibly namespaced, e.g., `foo::bar::baz`.
|
||||
VariableReference(Vec<String>),
|
||||
|
||||
/// Dot subscripts, e.g., "foo.bar".
|
||||
/// Dot subscripts, e.g., `foo.bar`.
|
||||
DotSubscript { value: Box<Expr>, subscript: String },
|
||||
|
||||
/// Bracket subscripts, e.g., "foo[bar]".
|
||||
/// Bracket subscripts, e.g., `foo[bar]`.
|
||||
BracketSubscript {
|
||||
value: Box<Expr>,
|
||||
subscript: Box<Expr>,
|
||||
|
@ -59,47 +147,71 @@ pub enum Expr {
|
|||
Literal(Literal),
|
||||
}
|
||||
|
||||
/// Kinds of unary operators, that are placed before an expression.
|
||||
pub enum UnaryOpKind {
|
||||
/// +x, equivalent to absolute value.
|
||||
Plus,
|
||||
/// Type names.
|
||||
pub enum Type {
|
||||
/// `Foo`
|
||||
Named(String),
|
||||
|
||||
/// -x, multiplication by -1.
|
||||
Minus,
|
||||
/// `List Int`
|
||||
Application {
|
||||
// TODO: is this right?
|
||||
function: Box<Expr>,
|
||||
expression: Box<Expr>,
|
||||
},
|
||||
|
||||
/// `(a, b)`
|
||||
Tuple(Vec<Type>),
|
||||
|
||||
/// `{ a: x, b: y }`
|
||||
Record(Vec<(String, Type)>),
|
||||
}
|
||||
|
||||
/// Kinds of binary operations, that are placed between two expressions.
|
||||
///
|
||||
/// As a convention, all binary operations should be one character.
|
||||
pub enum BinaryOpKind {
|
||||
/// a + b
|
||||
Add,
|
||||
/// Patterns for use in function arguments, lambda arguments, `let` statements, and `match`
|
||||
/// statements.
|
||||
pub enum Pattern {
|
||||
/// `(a, b)`
|
||||
Tuple(Vec<Pattern>),
|
||||
|
||||
/// a - b
|
||||
Sub,
|
||||
/// `a: String`
|
||||
TypeAnnotated {
|
||||
pat: Box<Pattern>,
|
||||
// Note that types are expressions, to simplify parsing.
|
||||
typ: Box<Expr>,
|
||||
},
|
||||
|
||||
/// a * b
|
||||
Mul,
|
||||
/// `Foo`
|
||||
Exact(String),
|
||||
|
||||
/// a / b
|
||||
Div,
|
||||
/// `Foo { a: x, b: y, ... }`
|
||||
Destructure(String, Record),
|
||||
|
||||
/// a % b
|
||||
Modulo,
|
||||
/// `a`
|
||||
Capture(String),
|
||||
|
||||
/// a ^ b
|
||||
Exponent,
|
||||
/// `_`
|
||||
Ignore,
|
||||
|
||||
/// a & b
|
||||
And,
|
||||
|
||||
/// a | b
|
||||
Or,
|
||||
/// `"hello"`
|
||||
Literal(Literal),
|
||||
}
|
||||
|
||||
///
|
||||
pub enum Pattern {}
|
||||
/// Record syntax blocks, e.g., "{a: b, c: d, ...}".
|
||||
pub struct Record {
|
||||
/// The named members of the record, in order of occurrence.
|
||||
pub members: Vec<(String, Expr)>,
|
||||
|
||||
pub enum Literal {}
|
||||
/// Whether the record ends with "..."; this allows ignoring blocks.
|
||||
pub inexhaustive: bool,
|
||||
}
|
||||
|
||||
pub enum Token {}
|
||||
/// Literal values included in source code.
|
||||
pub enum Literal {
|
||||
/// `"hello"`
|
||||
String(String),
|
||||
|
||||
/// `123`
|
||||
Integer(BigUint),
|
||||
|
||||
/// `123.456`
|
||||
Float(f64),
|
||||
}
|
||||
|
|
39
examples/unwrap.axs
Normal file
39
examples/unwrap.axs
Normal file
|
@ -0,0 +1,39 @@
|
|||
// Example of using type classes to implement `unwrap`.
|
||||
|
||||
// Types that can be unwrapped.
|
||||
class Unwrap a {
|
||||
// The result that `unwrap`ping will produce when successfully applied.
|
||||
type Result;
|
||||
|
||||
// Unwraps `self` into the result type, panicking on failure.
|
||||
def unwrap (self: a) -> Result;
|
||||
}
|
||||
|
||||
// Either an `a`, or no data.
|
||||
data Option a
|
||||
= Some a
|
||||
| None;
|
||||
|
||||
// `Option a` can be unwrapped to produce `a`.
|
||||
instance Unwrap (Option a) {
|
||||
type Result = a;
|
||||
def unwrap self = match self {
|
||||
Some x -> x,
|
||||
None -> panic "Attempt to unwrap None",
|
||||
};
|
||||
}
|
||||
|
||||
// Either an `a`, or an error of type `e` (which must be representable as a string, hence the `Show`
|
||||
// bound).
|
||||
data Result a (e: Show)
|
||||
= Ok a
|
||||
| Err e;
|
||||
|
||||
// `Result a e` can be unwrapped to produce `a`.
|
||||
instance Unwrap (Result a e) {
|
||||
type Result = a;
|
||||
def unwrap self = match self {
|
||||
Ok x -> x,
|
||||
Err e -> panic ("Attempt to unwrap Err " ++ show e),
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue