From b0df87bd6ace92d20c5ed941cac673b6861b729c Mon Sep 17 00:00:00 2001 From: Alex Bethel Date: Tue, 2 Aug 2022 21:16:16 -0600 Subject: [PATCH] More AST work --- README.md | 53 ++++++++++-- axc/Cargo.lock | 47 +++++++++-- axc/Cargo.toml | 3 +- axc/src/lib.rs | 196 ++++++++++++++++++++++++++++++++++---------- examples/unwrap.axs | 39 +++++++++ 5 files changed, 278 insertions(+), 60 deletions(-) create mode 100644 examples/unwrap.axs diff --git a/README.md b/README.md index 2428aec..1dc4b04 100644 --- a/README.md +++ b/README.md @@ -17,25 +17,60 @@ deterministic memory management. ## Tools -This repository contains the following current and planned tools: +This repository contains the following tools: - `axc`, the AlexScript compiler. This can be used as a binary with a fairly standard compiler CLI, or as a library for use in other programs. + +The following tools do not exist yet, but are planned: +- `axci`, the interactive AlexScript interpreter. - `axcd`, the Language Server Protocol (LSP) server for AlexScript code support in editors, supporting definition peeking and lookup, - renaming variables and modules, etc. (Planned; does not exist yet.) + renaming variables and modules, etc. - `axfmt`, the standard formatter for AlexScript code; all AlexScript code used in this repository must be formatted with `axfmt`, and its - use is recommended for other projects. (Planned; does not exist - yet.) + use is recommended for other projects. +- `axdoc`, the documentation generator. - `alexscript-mode`, an Emacs mode for editing AlexScript code, supporting syntax highlighting, automatic indentation, some basic keybindings for common tasks, Emacs-side LSP integration for communicating with `acxd`, and a collection of `yasnippet` snippets - for inserting common AlexScript constructs. (Planned; does not exist - yet.) + for inserting common AlexScript constructs. - `alexscript-vsc`, Visual Studio Code plugins and tools for editing - AlexScript code. (Planned; does not exist yet.) + AlexScript code. - `alexscript-vim`, tools and configuration files for optimizing Vim - and Neovim for editing AlexScript code. (Planned; does not exist - yet.) + and Neovim for editing AlexScript code. + +## Language features + +The language is mostly influenced by Rust and Haskell: it has strict +safety requirements and borrow-checked memory management like that of +Rust, but its syntax and type system are similar to those of Haskell. + +Some features the language will most likely have: +- All functions are pure by default; side effects are chained together + using an `IO` monad. +- Despite the language's purity, expressions will be strictly + evaluated to provide more programmer control. +- Different monads represent different levels of safety, and can be + converted using functions marked as `UNSAFE`. The intention is that + code can be audited by manually checking that all the `UNSAFE` + transformations are sound, and code that contains no `UNSAFE` + function calls are guaranteed to satisfy varying definitions of + soundness: + - The `IO` monad represents computations that might have side + effects on the real world. If a computation of type `IO` is known + by the programmer to not have side effects on the real world, then + it can be converted to a pure computation using the standard + library function `UNSAFE_assertPure : IO a -> a`. + - The `MemoryUnsafe` monad represents computations that might read + from or write to memory that is not allocated correctly: for + example, `readPtr`, which reads from a raw pointer, is of type + `MemoryUnsafe a` because the pointer is not known to be valid. If + a computation has been confirmed to be safe by the programmer, it + can be converted to an `IO` computation using + `UNSAFE_assertMemorySafe : MemoryUnsafe a -> IO a`. + - Further safety monads may be added in the future. +- + +## Compilation diff --git a/axc/Cargo.lock b/axc/Cargo.lock index 4aad2e8..1f03237 100644 --- a/axc/Cargo.lock +++ b/axc/Cargo.lock @@ -11,14 +11,6 @@ dependencies = [ "const-random", ] -[[package]] -name = "alexc" -version = "0.1.0" -dependencies = [ - "chumsky", - "clap", -] - [[package]] name = "atty" version = "0.2.14" @@ -36,6 +28,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "axc" +version = "0.1.0" +dependencies = [ + "chumsky", + "clap", + "num-bigint", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -178,6 +179,36 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.13.0" diff --git a/axc/Cargo.toml b/axc/Cargo.toml index b74ad1e..024b574 100644 --- a/axc/Cargo.toml +++ b/axc/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "alexc" +name = "axc" version = "0.1.0" edition = "2021" @@ -8,3 +8,4 @@ edition = "2021" [dependencies] chumsky = "0.8.0" clap = { version = "3.2.16", features = ["derive"] } +num-bigint = "0.4.3" diff --git a/axc/src/lib.rs b/axc/src/lib.rs index 61a8c13..180b7c6 100644 --- a/axc/src/lib.rs +++ b/axc/src/lib.rs @@ -2,54 +2,142 @@ //! //! AlexScript is a based programming language, for based people. +use num_bigint::BigUint; + /// A concrete syntax tree. This represents the full content of an AlexScript program, including all /// whitespace, comments, and tokens: the source code of the original program can be recovered /// completely using the syntax tree. pub struct SyntaxTree {} +/// Top-level statements, making up the overall program. +pub enum Statement { + /// Declaration of an abstract data type. + TypeDefinition { + /// The type being defined. This is only allowed to be `Named` or `Application`. + left: Type, + + /// The possible constructors of the data type. + constructors: Vec, + }, + + /// Declaration that a type implements a type class. + InstanceDefinition { + /// The name of the type class. + class_name: String, + + /// The type that conforms to the type class. + typ: Type, + + /// The list of declarations that dictate the type's behavior when treated as an instance of + /// the type class. + decls: Vec, + }, + + /// Other declarations. + ClassDeclaration(ClassDeclaration), +} + +/// Top-level statements that are also allowed to occur within a type class definition, and which +/// therefore have an optional rather than strictly-required right-hand side, e.g., `type X;` rather +/// than `type X = Y;`. +pub enum ClassDeclaration { + /// Declaration of a function or constant. + Function { + /// Name of the function and its arguments. + name: String, + + /// The function arguments. + arguments: Vec, + + /// The definition of the function. + definition: Option, + }, + + /// Declaration of a type that is a literal alias for another type. + TypeAlias { + /// The type being defined. This is only allowed to be `Named` or `Application`. + left: Type, + + /// The target type. + right: Option, + }, + + /// Declaration of a type class. + ClassDefinition { + /// The name of the class. + name: String, + + /// The type variable representing a type conforming to the class. + var: String, + + /// The list of declarations (optionally filled-in) that are necessary for a type to conform + /// to the type class. + decls: Vec, + }, +} + +/// A possible constructor for an abstract data type. +pub struct TypeConstructor { + /// The name of the constructor. + pub name: String, + + /// The arguments to the abstract data type. + pub args: Vec, +} + /// Expressions. pub enum Expr { - /// Unary operators, e.g., "-5". - UnaryOp { kind: UnaryOpKind, val: Box }, + /// Unary operators, e.g., `-5`. + UnaryOp { kind: String, val: Box }, - /// Binary operators, e.g., "5 + 5". + /// Binary operators, e.g., `5 + 5`. BinaryOp { - kind: BinaryOpKind, + kind: String, left: Box, right: Box, }, - /// Function application, e.g., "sin x". + /// Function application, e.g., `sin x`. Application { func: Box, argument: Box, }, - /// Matching of multiple cases, e.g., "match x { 5 => 'a', 6 => 'b' }". + /// Defining of temporary variables, e.g., `let x = 5 in x + x`. + Let { left: Pattern, right: Box }, + + /// Matching of multiple cases, e.g., `match x { 5 => 'a', 6 => 'b' }`. Match { matcher: Box, cases: Vec<(Pattern, Expr)>, }, - /// Struct initialization, e.g., "Vector { pointer: xyz, length: 12 }". + /// Syntax sugar for matching on booleans, e.g., `if foo then bar else baz`. + If { + subject: Box, + iftrue: Box, + iffalse: Box, + }, + + /// Struct initialization, e.g., `Vector { pointer: xyz, length: 12 }`. StructInit { name: String, elements: Vec<(String, Expr)>, }, - /// Anonymous functions. + /// Anonymous functions, e.g., `fn x -> x + 1`. Lambda { arguments: Vec, result: Box, }, - /// Variable references, possibly namespaced, e.g., "foo::bar::baz". + /// Variable references, possibly namespaced, e.g., `foo::bar::baz`. VariableReference(Vec), - /// Dot subscripts, e.g., "foo.bar". + /// Dot subscripts, e.g., `foo.bar`. DotSubscript { value: Box, subscript: String }, - /// Bracket subscripts, e.g., "foo[bar]". + /// Bracket subscripts, e.g., `foo[bar]`. BracketSubscript { value: Box, subscript: Box, @@ -59,47 +147,71 @@ pub enum Expr { Literal(Literal), } -/// Kinds of unary operators, that are placed before an expression. -pub enum UnaryOpKind { - /// +x, equivalent to absolute value. - Plus, +/// Type names. +pub enum Type { + /// `Foo` + Named(String), - /// -x, multiplication by -1. - Minus, + /// `List Int` + Application { + // TODO: is this right? + function: Box, + expression: Box, + }, + + /// `(a, b)` + Tuple(Vec), + + /// `{ a: x, b: y }` + Record(Vec<(String, Type)>), } -/// Kinds of binary operations, that are placed between two expressions. -/// -/// As a convention, all binary operations should be one character. -pub enum BinaryOpKind { - /// a + b - Add, +/// Patterns for use in function arguments, lambda arguments, `let` statements, and `match` +/// statements. +pub enum Pattern { + /// `(a, b)` + Tuple(Vec), - /// a - b - Sub, + /// `a: String` + TypeAnnotated { + pat: Box, + // Note that types are expressions, to simplify parsing. + typ: Box, + }, - /// a * b - Mul, + /// `Foo` + Exact(String), - /// a / b - Div, + /// `Foo { a: x, b: y, ... }` + Destructure(String, Record), - /// a % b - Modulo, + /// `a` + Capture(String), - /// a ^ b - Exponent, + /// `_` + Ignore, - /// a & b - And, - - /// a | b - Or, + /// `"hello"` + Literal(Literal), } -/// -pub enum Pattern {} +/// Record syntax blocks, e.g., "{a: b, c: d, ...}". +pub struct Record { + /// The named members of the record, in order of occurrence. + pub members: Vec<(String, Expr)>, -pub enum Literal {} + /// Whether the record ends with "..."; this allows ignoring blocks. + pub inexhaustive: bool, +} -pub enum Token {} +/// Literal values included in source code. +pub enum Literal { + /// `"hello"` + String(String), + + /// `123` + Integer(BigUint), + + /// `123.456` + Float(f64), +} diff --git a/examples/unwrap.axs b/examples/unwrap.axs new file mode 100644 index 0000000..17c0890 --- /dev/null +++ b/examples/unwrap.axs @@ -0,0 +1,39 @@ +// Example of using type classes to implement `unwrap`. + +// Types that can be unwrapped. +class Unwrap a { + // The result that `unwrap`ping will produce when successfully applied. + type Result; + + // Unwraps `self` into the result type, panicking on failure. + def unwrap (self: a) -> Result; +} + +// Either an `a`, or no data. +data Option a + = Some a + | None; + +// `Option a` can be unwrapped to produce `a`. +instance Unwrap (Option a) { + type Result = a; + def unwrap self = match self { + Some x -> x, + None -> panic "Attempt to unwrap None", + }; +} + +// Either an `a`, or an error of type `e` (which must be representable as a string, hence the `Show` +// bound). +data Result a (e: Show) + = Ok a + | Err e; + +// `Result a e` can be unwrapped to produce `a`. +instance Unwrap (Result a e) { + type Result = a; + def unwrap self = match self { + Ok x -> x, + Err e -> panic ("Attempt to unwrap Err " ++ show e), + } +}