From 1044939b32cea0dfc339c1ae268c802cd8a715fe Mon Sep 17 00:00:00 2001 From: Alex Bethel Date: Fri, 5 Aug 2022 11:44:31 -0500 Subject: [PATCH] Move syntax tree to its own file --- axc/src/ast2ir.rs | 2 +- axc/src/lib.rs | 255 +--------------------------------------------- axc/src/parser.rs | 10 +- axc/src/syntax.rs | 253 +++++++++++++++++++++++++++++++++++++++++++++ axc/src/typeck.rs | 2 +- 5 files changed, 266 insertions(+), 256 deletions(-) create mode 100644 axc/src/syntax.rs diff --git a/axc/src/ast2ir.rs b/axc/src/ast2ir.rs index 183e8eb..6e7f89b 100644 --- a/axc/src/ast2ir.rs +++ b/axc/src/ast2ir.rs @@ -1,7 +1,7 @@ //! Conversion of AST to intermediate representation. use crate::ir::IR; -use crate::SyntaxTree; +use crate::syntax::SyntaxTree; /// Compiles an abstract syntax tree into intermediate representation; this assumes the code already /// type-checks, and emits unoptimized IR. diff --git a/axc/src/lib.rs b/axc/src/lib.rs index c11f5a3..9cc8b66 100644 --- a/axc/src/lib.rs +++ b/axc/src/lib.rs @@ -5,259 +5,8 @@ #![deny(missing_docs)] pub mod ast2ir; +pub mod backends; pub mod ir; pub mod parser; +pub mod syntax; pub mod typeck; -pub mod backends; - -use num_bigint::BigUint; - -/// A concrete syntax tree. This represents the full content of an AlexScript program, including all -/// whitespace, comments, and tokens: the source code of the original program can be recovered -/// completely using the syntax tree. -pub struct SyntaxTree {} - -/// Top-level statements, making up the overall program. -pub enum Statement { - /// Declaration of an abstract data type. - TypeDefinition { - /// The type being defined. This is only allowed to be `Named` or `Application`. - left: Type, - - /// The possible constructors of the data type. - constructors: Vec, - }, - - /// Declaration that a type implements a type class. - InstanceDefinition { - /// The name of the type class. - class_name: String, - - /// The type that conforms to the type class. - typ: Type, - - /// The list of declarations that dictate the type's behavior when treated as an instance of - /// the type class. - decls: Vec, - }, - - /// Other declarations. - ClassDeclaration(ClassDeclaration), -} - -/// Top-level statements that are also allowed to occur within a type class definition, and which -/// therefore have an optional rather than strictly-required right-hand side, e.g., `type X;` rather -/// than `type X = Y;`. -pub enum ClassDeclaration { - /// Declaration of a function or constant. - Function { - /// Name of the function and its arguments. - name: String, - - /// The function arguments. - arguments: Vec, - - /// The definition of the function. - definition: Option, - }, - - /// Declaration of a type that is a literal alias for another type. - TypeAlias { - /// The type being defined. This is only allowed to be `Named` or `Application`. - left: Type, - - /// The target type. - right: Option, - }, - - /// Declaration of a type class. - ClassDefinition { - /// The name of the class. - name: String, - - /// The type variable representing a type conforming to the class. - var: String, - - /// The list of declarations (optionally filled-in) that are necessary for a type to conform - /// to the type class. - decls: Vec, - }, -} - -/// A possible constructor for an abstract data type. -pub struct TypeConstructor { - /// The name of the constructor. - pub name: String, - - /// The arguments to the abstract data type. - pub args: Vec, -} - -/// Expressions. -pub enum Expr { - /// Unary operators, e.g., `-5`. - UnaryOp { - /// The text of the operator. - kind: String, - - /// The value being operated upon. - val: Box, - }, - - /// Binary operators, e.g., `5 + 5`. - BinaryOp { - /// The text of the operator. - kind: String, - - /// The left side of the operator. - left: Box, - - /// The right side of the operator. - right: Box, - }, - - /// Function application, e.g., `sin x`. - Application { - /// The function being applied. For curried functions with multiple arguments (e.g., `atan2 - /// y x`), this is another expression of type `Application`. - func: Box, - - /// The argument to which the function is being applied. - argument: Box, - }, - - /// Defining of temporary variables, e.g., `let x = 5 in x + x`. - Let { - /// The pattern being bound. - left: Pattern, - - /// The variable the pattern is matching. - right: Box, - - /// The expression the pattern is being substituted into. - into: Box, - }, - - /// Matching of multiple cases, e.g., `match x { 5 => 'a', 6 => 'b' }`. - Match { - /// The expression being matched upon. - matcher: Box, - - /// The possible cases of the `match` expression. - cases: Vec<(Pattern, Expr)>, - }, - - /// Record initialization, e.g., `{ pointer: xyz, length: 12 }`. - Record { - /// The elements of the record. - elements: Vec<(String, Expr)>, - }, - - /// Anonymous functions, e.g., `fn x -> x + 1`. - Lambda { - /// Arguments to the lambda; multiple of these are equivalent to stacking lambdas by - /// currying. - arguments: Vec, - - /// The result of the lambda. - result: Box, - }, - - /// Variable references, possibly namespaced, e.g., `foo::bar::baz`. - VariableReference(Vec), - - /// Dot subscripts, e.g., `foo.bar`. - DotSubscript { - /// The left side of the subscript. - value: Box, - - /// The right side of the subscript; this is only allowed to be a single word. - subscript: String, - }, - - /// Bracket subscripts, e.g., `foo[bar]`. - BracketSubscript { - /// The left side of the subscript. - value: Box, - - /// The right side of the subscript. - subscript: Box, - }, - - /// Literal tokens, e.g., strings and numbers. - Literal(Literal), -} - -/// Type names. -pub enum Type { - /// `Foo` - Named(String), - - /// `List Int` - Application { - /// The function being applied. This must be a generic type. - function: Box, - - /// The expression given as an argument to the type. This can be any expression, to allow - /// const generics; in most cases, though, it should be just a normal type. - expression: Box, - }, - - /// `(a, b)` - Tuple(Vec), - - /// `{ a: x, b: y }` - Record(Vec<(String, Type)>), -} - -/// Patterns for use in function arguments, lambda arguments, `let` statements, and `match` -/// statements. -pub enum Pattern { - /// `(a, b)` - Tuple(Vec), - - /// `a: String` - TypeAnnotated { - /// The pattern being annotated. - pat: Box, - - /// The type that `pat` is being asserted to have. - typ: Box, - }, - - /// `Foo` - Exact(String), - - /// `Foo { a: x, b: y, ... }` - Destructure(String, Record), - - /// `a` - Capture(String), - - /// `_` - Ignore, - - /// `"hello"` - Literal(Literal), -} - -/// Record syntax blocks, e.g., "{a: b, c: d, ...}". -pub struct Record { - /// The named members of the record, in order of occurrence. - pub members: Vec<(String, Expr)>, - - /// Whether the record ends with "..."; this allows ignoring blocks. - pub inexhaustive: bool, -} - -/// Literal values included in source code. -pub enum Literal { - /// `"hello"` - String(String), - - /// `123` - Integer(BigUint), - - /// `123.456` - Float(f64), -} diff --git a/axc/src/parser.rs b/axc/src/parser.rs index 6424e3a..276dada 100644 --- a/axc/src/parser.rs +++ b/axc/src/parser.rs @@ -23,6 +23,14 @@ impl Display for ParserError { impl Error for ParserError {} /// Parser for AlexScript code. -pub fn parser() -> impl Parser> { +pub fn parser() -> impl Parser> { filter(|c: &char| c.is_numeric()).map(|_| todo!()) } + +fn parse_expression() -> impl Parser> { + parser().map(|_| todo!()) +} + +fn parse_type() -> impl Parser> { + parser().map(|_| todo!()) +} diff --git a/axc/src/syntax.rs b/axc/src/syntax.rs new file mode 100644 index 0000000..22c87b1 --- /dev/null +++ b/axc/src/syntax.rs @@ -0,0 +1,253 @@ +//! Syntax tree for AlexScript code. + +use num_bigint::BigUint; + +/// A concrete syntax tree. This represents the full content of an AlexScript program, including all +/// whitespace, comments, and tokens: the source code of the original program can be recovered +/// completely using the syntax tree. +pub struct SyntaxTree {} + +/// Top-level statements, making up the overall program. +pub enum Statement { + /// Declaration of an abstract data type. + TypeDefinition { + /// The type being defined. This is only allowed to be `Named` or `Application`. + left: Type, + + /// The possible constructors of the data type. + constructors: Vec, + }, + + /// Declaration that a type implements a type class. + InstanceDefinition { + /// The name of the type class. + class_name: String, + + /// The type that conforms to the type class. + typ: Type, + + /// The list of declarations that dictate the type's behavior when treated as an instance of + /// the type class. + decls: Vec, + }, + + /// Other declarations. + ClassDeclaration(ClassDeclaration), +} + +/// Top-level statements that are also allowed to occur within a type class definition, and which +/// therefore have an optional rather than strictly-required right-hand side, e.g., `type X;` rather +/// than `type X = Y;`. +pub enum ClassDeclaration { + /// Declaration of a function or constant. + Function { + /// Name of the function and its arguments. + name: String, + + /// The function arguments. + arguments: Vec, + + /// The definition of the function. + definition: Option, + }, + + /// Declaration of a type that is a literal alias for another type. + TypeAlias { + /// The type being defined. This is only allowed to be `Named` or `Application`. + left: Type, + + /// The target type. + right: Option, + }, + + /// Declaration of a type class. + ClassDefinition { + /// The name of the class. + name: String, + + /// The type variable representing a type conforming to the class. + var: String, + + /// The list of declarations (optionally filled-in) that are necessary for a type to conform + /// to the type class. + decls: Vec, + }, +} + +/// A possible constructor for an abstract data type. +pub struct TypeConstructor { + /// The name of the constructor. + pub name: String, + + /// The arguments to the abstract data type. + pub args: Vec, +} + +/// Expressions. +pub enum Expr { + /// Unary operators, e.g., `-5`. + UnaryOp { + /// The text of the operator. + kind: String, + + /// The value being operated upon. + val: Box, + }, + + /// Binary operators, e.g., `5 + 5`. + BinaryOp { + /// The text of the operator. + kind: String, + + /// The left side of the operator. + left: Box, + + /// The right side of the operator. + right: Box, + }, + + /// Function application, e.g., `sin x`. + Application { + /// The function being applied. For curried functions with multiple arguments (e.g., `atan2 + /// y x`), this is another expression of type `Application`. + func: Box, + + /// The argument to which the function is being applied. + argument: Box, + }, + + /// Defining of temporary variables, e.g., `let x = 5 in x + x`. + Let { + /// The pattern being bound. + left: Pattern, + + /// The variable the pattern is matching. + right: Box, + + /// The expression the pattern is being substituted into. + into: Box, + }, + + /// Matching of multiple cases, e.g., `match x { 5 => 'a', 6 => 'b' }`. + Match { + /// The expression being matched upon. + matcher: Box, + + /// The possible cases of the `match` expression. + cases: Vec<(Pattern, Expr)>, + }, + + /// Record initialization, e.g., `{ pointer: xyz, length: 12 }`. + Record { + /// The elements of the record. + elements: Vec<(String, Expr)>, + }, + + /// Anonymous functions, e.g., `fn x -> x + 1`. + Lambda { + /// Arguments to the lambda; multiple of these are equivalent to stacking lambdas by + /// currying. + arguments: Vec, + + /// The result of the lambda. + result: Box, + }, + + /// Variable references, possibly namespaced, e.g., `foo::bar::baz`. + VariableReference(Vec), + + /// Dot subscripts, e.g., `foo.bar`. + DotSubscript { + /// The left side of the subscript. + value: Box, + + /// The right side of the subscript; this is only allowed to be a single word. + subscript: String, + }, + + /// Bracket subscripts, e.g., `foo[bar]`. + BracketSubscript { + /// The left side of the subscript. + value: Box, + + /// The right side of the subscript. + subscript: Box, + }, + + /// Literal tokens, e.g., strings and numbers. + Literal(Literal), +} + +/// Type names. +pub enum Type { + /// `Foo` + Named(String), + + /// `List Int` + Application { + /// The function being applied. This must be a generic type. + function: Box, + + /// The expression given as an argument to the type. This can be any expression, to allow + /// const generics; in most cases, though, it should be just a normal type. + expression: Box, + }, + + /// `(a, b)` + Tuple(Vec), + + /// `{ a: x, b: y }` + Record(Vec<(String, Type)>), +} + +/// Patterns for use in function arguments, lambda arguments, `let` statements, and `match` +/// statements. +pub enum Pattern { + /// `(a, b)` + Tuple(Vec), + + /// `a: String` + TypeAnnotated { + /// The pattern being annotated. + pat: Box, + + /// The type that `pat` is being asserted to have. + typ: Box, + }, + + /// `Foo` + Exact(String), + + /// `Foo { a: x, b: y, ... }` + Destructure(String, Record), + + /// `a` + Capture(String), + + /// `_` + Ignore, + + /// `"hello"` + Literal(Literal), +} + +/// Record syntax blocks, e.g., "{a: b, c: d, ...}". +pub struct Record { + /// The named members of the record, in order of occurrence. + pub members: Vec<(String, Expr)>, + + /// Whether the record ends with "..."; this allows ignoring blocks. + pub inexhaustive: bool, +} + +/// Literal values included in source code. +pub enum Literal { + /// `"hello"` + String(String), + + /// `123` + Integer(BigUint), + + /// `123.456` + Float(f64), +} diff --git a/axc/src/typeck.rs b/axc/src/typeck.rs index d622d6c..56de735 100644 --- a/axc/src/typeck.rs +++ b/axc/src/typeck.rs @@ -2,7 +2,7 @@ use std::{error::Error, fmt::Display}; -use crate::SyntaxTree; +use crate::syntax::SyntaxTree; /// A compile-time type error from the user's source code. #[derive(Debug)]