From e50e93e13eadbe525e8e4df0b9868f4d80d30ef6 Mon Sep 17 00:00:00 2001 From: Gers2017 Date: Sat, 22 Apr 2023 18:57:55 -0600 Subject: [PATCH] Cpp.js initial commit | cpp club --- .gitignore | 6 + README.md | 43 +++++++ main.cpp | 11 ++ main.js | 332 +++++++++++++++++++++++++++++++++++++++++++++++++++ package.json | 18 +++ parser.js | 162 +++++++++++++++++++++++++ run.sh | 5 + stuff.js | 59 +++++++++ utils.js | 38 ++++++ 9 files changed, 674 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 main.cpp create mode 100644 main.js create mode 100644 package.json create mode 100644 parser.js create mode 100755 run.sh create mode 100644 stuff.js create mode 100644 utils.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c6fb05b --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +/bin +/**/*.out +/ignore +main.rs +test.cpp +test.rs \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..33411f5 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# Cpp.js + +> ## C++ compiler written in Vanilla Javascript + +## Features + +- [x] Compiles c++ to Rust +- [ ] Compiles c++ to Php (Not yet) + +## Motivation + +Because I had no other choice. + +## Example + +```sh +# node main.js .cpp +node main.js main.cpp # compiles a main.cpp file to rust +``` + +## Test rust code + +```sh +# rustc .rs -o .out +rustc main.rs -o main.out +``` + +## Support + +- [ ] `//` comments +- [ ] `printf("...");` +- [ ] `return` + +```cpp +#include + +int main() +{ + // This is comment + printf("Hello from c++\n"); + return 0; +} +``` diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..d902d0a --- /dev/null +++ b/main.cpp @@ -0,0 +1,11 @@ +#include + +int main() +{ + // This is comment + // return 1 + printf("Hello from c++\n\n"); + printf("Hi rust!!!!\n"); + printf("123123\n"); + return 0; +} diff --git a/main.js b/main.js new file mode 100644 index 0000000..6949e4a --- /dev/null +++ b/main.js @@ -0,0 +1,332 @@ +import { readFileSync, writeFileSync } from "fs"; +import { Parser } from "./parser.js"; +import { PrintStmt, ReturnStmt } from "./stuff.js"; +import { + is_alpha, + is_alphanum, + is_digit, + is_whitespace, + pretty_error, + panic, +} from "./utils.js"; + +// Javascript enums at home +export const TokenType = { + LEFT_PAREN: 1, + RIGHT_PAREN: 2, + LEFT_BRACE: 3, + RIGHT_BRACE: 4, + SEMICOLON: 5, + COMMA: 6, + IDENTIFIER: 7, + STRING: 8, + NUMBER: 9, + PRINTF: 10, + RETURN: 11, + IF: 12, + ELSE: 13, + INT_TYPE: 14, + EOF: 15, +}; + +const LITERAL_TOKENS = { + "(": TokenType.LEFT_PAREN, + ")": TokenType.RIGHT_PAREN, + "{": TokenType.LEFT_BRACE, + "}": TokenType.RIGHT_BRACE, + ",": TokenType.COMMA, + ";": TokenType.SEMICOLON, +}; + +const KEYWORDS = { + printf: TokenType.PRINTF, + int: TokenType.INT_TYPE, + return: TokenType.RETURN, + if: TokenType.IF, + else: TokenType.ELSE, +}; + +export class Token { + /** + * @param {number} token_type + * @param {string} value + * @param {Location} loc + */ + constructor(token_type, value, loc) { + this.token_type = token_type; + this.value = value; + this.loc = loc; + } + + display() { + return `Token { type: ${this.token_type}, value: '${ + this.value + }', location: ${this.loc.display()} }`; + } +} + +export class Location { + /** + * @param { number } row + * @param { number } column + */ + constructor(row, column) { + this.row = row; + this.column = column; + } + + display() { + // human readable + return `(row: ${this.row + 1}, col: ${this.column + 1})`; + } +} + +// ------- Lexer -------- + +class Lexer { + /** + * @param {string} source + */ + constructor(source) { + this.source = source; + this.index = 0; + this.cursor = 0; // index at the start of new scan + this.row = 0; + this.line_start = 0; + /** @type {Token[]} */ + this.tokens = []; + } + + length() { + return this.source.length; + } + + is_end() { + return this.index >= this.length(); + } + + is_not_end() { + return this.index < this.length(); + } + + advance() { + if (this.is_not_end()) { + let ch = this.peek(); + this.index++; + + if (ch === "\n") { + this.line_start = this.index; + this.row++; + } + } + } + + get_column() { + return this.index - this.line_start; + } + + get_location() { + return new Location(this.row, this.get_column()); + } + + peek() { + return this.source[this.index]; + } + + /** + * @returns { string | null } + */ + peek_next() { + if (this.index + 1 >= this.length()) { + return null; + } + + return this.source[this.index + 1]; + } + + matches(ch) { + return this.peek() === ch; + } + + next_matches(ch) { + let next = this.peek_next(); + if (next === null) return false; + return next === ch; + } + + /** + * @param { (ch: string) => boolean } predicate + */ + drop_while(predicate) { + while (this.is_not_end() && predicate(this.peek())) { + this.advance(); // skip ch + } + } + + trim_left() { + this.drop_while((ch) => is_whitespace(ch)); + } + + drop_line() { + this.drop_while((ch) => ch !== "\n"); + if (this.is_not_end()) { + this.advance(); + } + } + + scan_tokens() { + while (this.is_not_end()) { + this.cursor = this.index; + this.scan_token(); + } + + this.push_token(new Token(TokenType.EOF, "", this.get_location())); + return this.tokens; + } + + scan_token() { + const ch = this.peek(); + const location = this.get_location(); + + if (is_whitespace(ch)) { + this.advance(); + return; + } + + if ( + this.matches("#") || + (this.matches("/") && this.next_matches("/")) + ) { + this.drop_line(); + return; + } + + switch (ch) { + case '"': + this.string(location); + break; + default: + if (ch in LITERAL_TOKENS) { + this.push_token( + new Token(LITERAL_TOKENS[ch], ch, location) + ); + this.advance(); + } else if (is_alpha(ch)) { + this.identifier(location); + } else if (is_digit(ch)) { + this.digit(location); + } else { + panic(`Lexer error: Unknown token '${ch}'`); + } + break; + } + } + + push_token(token) { + this.tokens.push(token); + } + + slice(start) { + if (!start) { + start = this.cursor; + } + return this.source.slice(start, this.index); + } + + identifier(location) { + while (this.is_not_end() && is_alphanum(this.peek())) { + this.advance(); + } + + const value = this.slice(); + const type = value in KEYWORDS ? KEYWORDS[value] : TokenType.IDENTIFIER; + this.push_token(new Token(type, value, location)); + } + + digit(location) { + while (this.is_not_end() && is_digit(this.peek())) { + this.advance(); + } + + const value = this.slice(); + this.push_token(new Token(TokenType.NUMBER, value, location)); + } + + string(location) { + this.advance(); // skip '"' + + const start_col = this.get_column(); + const start_row = this.row; + const start = this.index; + + while (this.is_not_end() && this.peek() !== '"') { + this.advance(); + } + + if (this.is_end()) { + let err_line = this.source.slice(this.cursor - 1, this.index); + let newline_idx = err_line.indexOf("\n"); + + if (newline_idx !== -1) { + err_line = err_line.substring(0, newline_idx); + } + + pretty_error( + start_row, + start_col, + err_line, + "You forgot to close the string!" + ); + } + + let value = this.slice(start); + + this.advance(); // skip '"' + + this.push_token(new Token(TokenType.STRING, value, location)); + } +} + +try { + _main(); +} catch (e) { + console.error(e); +} + +function _main() { + const CPP_REGEX = /.\.cpp/i; + const args = process.argv; + let filename = "main.cpp"; + + for (const arg of args) { + if (CPP_REGEX.test(arg)) { + filename = arg; + } + } + + const source = readFileSync(filename, "utf8"); + const lexer = new Lexer(source); + const tokens = lexer.scan_tokens(); + + // console.log(lexer.tokens); + + const parser = new Parser(tokens); + const statements = parser.parse(); + + const output = []; + output.push("fn main() {"); + + for (const stmt of statements) { + if (stmt instanceof PrintStmt) { + output.push(`\tprint!("${stmt.string}");`); + } else if (stmt instanceof ReturnStmt) { + output.push(`\tstd::process::exit(${stmt.value});`); + } + } + + output.push("}\n"); + const text = output.join("\n"); + + filename = filename.replace("cpp", "rs"); + writeFileSync(filename, text); +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..9ddabc9 --- /dev/null +++ b/package.json @@ -0,0 +1,18 @@ +{ + "name": "cpp-js", + "version": "1.0.0", + "description": "C (or maybe not C++) compiler written in Javascript for fun and profit.", + "main": "main.js", + "type": "module", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [ + "cpp", + "blazingly-fast", + "unsafe-rust", + "lightspeed" + ], + "author": "The ultimate life form | kirby", + "license": "GPL-3.0-or-later" +} \ No newline at end of file diff --git a/parser.js b/parser.js new file mode 100644 index 0000000..3378465 --- /dev/null +++ b/parser.js @@ -0,0 +1,162 @@ +// ------- Parser -------- + +import { Token, TokenType } from "./main.js"; +import { PrintStmt, ReturnStmt } from "./stuff.js"; +import { panic } from "./utils.js"; + +const Errors = { + EXPECT_INT_ERR: "Expected type annotation for main function", + LEFT_PAREN_ERR: "Expected '('", + RIGHT_PAREN_ERR: "Expected ')'", + LEFT_BRACE_ERR: "Expected '{'", + RIGHT_BRACE_ERR: "Expected '}'", + PRINTF_ERR: 'Expected "printf" statement', + SEMI_COLON_ERR: "Expected ';'", + STRING_ERR: "Expected string literal", + RETURN_ERR: "Expected return statement", + RETURN_CODE_ERR: "Expected return code", + NO_MORE_TOKENS_ERR: "No more tokens to parse!", + EOF_ERR: "Expected EOF token at the end", +}; + +/** + * @typedef { { token_type: number, error_message: string } } Pattern + */ + +export class Parser { + /** @param { Token[] } tokens */ + constructor(tokens) { + this.tokens = tokens; + this.index = 0; + this.back_index = this.tokens.length - 1; + /** + * @type { Stmt[] } + */ + this.statements = []; + } + + is_empty() { + return this.tokens.length == 0; + } + + is_not_empty() { + return this.tokens.length > 0; + } + + peek() { + if (this.is_empty()) { + panic(Errors.NO_MORE_TOKENS_ERR); + } + return this.tokens[0]; + } + + peek_back() { + return this.tokens[this.tokens.length - 1]; + } + + pop_front() { + if (this.is_empty()) { + return null; + } + + return this.tokens.shift(); + } + + pop_back() { + if (this.is_empty()) { + return null; + } + + return this.tokens.pop(); + } + + parse() { + // start main + this.expect(TokenType.INT_TYPE, Errors.EXPECT_INT_ERR); + + const fn_name = this.expect(TokenType.IDENTIFIER).value; + if (fn_name !== "main") { + panic(`Invalid function name: "${fn_name}". Expected "main"`); + } + + this.expect(TokenType.LEFT_PAREN, Errors.LEFT_PAREN_ERR); + this.expect(TokenType.RIGHT_PAREN, Errors.RIGHT_PAREN_ERR); + + this.expect(TokenType.LEFT_BRACE, Errors.LEFT_BRACE_ERR); + + this.expect_back(TokenType.EOF, Errors.EOF_ERR); + this.expect_back(TokenType.RIGHT_BRACE, Errors.RIGHT_BRACE_ERR); + + // end main + + while (this.is_not_empty()) { + this.statements.push(this.get_next_stmt()); + } + + return this.statements; + } + + /** + * @param {number} expected_type + * @param {string} error_message + * @returns { Token } + */ + expect(expected_type, error_message) { + if (this.is_empty()) { + panic(error_message); + } else if (this.peek().token_type !== expected_type) { + panic(`${error_message} at ${this.peek().loc.display()}`); + } + + return this.pop_front(); + } + + /** + * @param { number } expected_type + * @param { string } error_message + * @returns { Token } + */ + expect_back(expected_type, error_message) { + if (this.is_empty()) { + panic(error_message); + } else if (this.peek_back().token_type !== expected_type) { + panic(`${error_message} at ${this.peek().loc.display()}`); + } + + return this.pop_back(); + } + + get_next_stmt() { + /** + * @type { Token } + */ + const token = this.peek(); + + if (token.token_type === TokenType.PRINTF) { + this.expect(TokenType.PRINTF, Errors.PRINTF_ERR); // skip print + this.expect(TokenType.LEFT_PAREN, Errors.LEFT_PAREN_ERR); // skip '(' + const value = this.expect( + TokenType.STRING, + Errors.STRING_ERR + ).value; // get the string! + this.expect(TokenType.RIGHT_PAREN, Errors.RIGHT_PAREN_ERR); // skip ')' + this.expect(TokenType.SEMICOLON, Errors.SEMI_COLON_ERR); + + return new PrintStmt(value); + } else if (token.token_type === TokenType.RETURN) { + // console.log("current:", this.peek()?.display()); + + this.expect(TokenType.RETURN, Errors.RETURN_ERR); // skip return + + const value = this.expect( + TokenType.NUMBER, + Errors.RETURN_CODE_ERR + ).value; // skip number + + this.expect(TokenType.SEMICOLON, Errors.SEMI_COLON_ERR); + return new ReturnStmt(value); + } else { + panic(`Unexpected '${token.value}' ${token.display()}`); + } + } +} diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..dff40da --- /dev/null +++ b/run.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +node main.js main.cpp && +rustc main.rs -o main.out && +./main.out diff --git a/stuff.js b/stuff.js new file mode 100644 index 0000000..1c55053 --- /dev/null +++ b/stuff.js @@ -0,0 +1,59 @@ +export class AST {} +export class Stmt {} + +export class Block extends AST { + constructor(stmts) { + super(); + this.stmts = stmts; + } +} + +export class PrintStmt extends Stmt { + /** + * + * @param {string} string + * @param {string[] | undefined} args + */ + constructor(string, args) { + super(); + this.string = string; + this.args = args ?? []; + } +} + +export class ReturnStmt extends Stmt { + /** + * + * @param {NumberExpr} value + */ + constructor(value) { + super(); + this.value = value; + } +} + +// TODO +export class IfStmt extends Stmt {} + +export class ElseStmt extends Stmt {} + +export class Expr {} + +export class EqualsExpr { + constructor(a, b) { + this.a = a; + this.b = b; + } +} + +export class Num { + constructor(value) { + this.value = value; + } +} + +export class FunctionDeclaration extends Stmt { + constructor(name, params, body) { + super(); + } +} diff --git a/utils.js b/utils.js new file mode 100644 index 0000000..f7cc6f6 --- /dev/null +++ b/utils.js @@ -0,0 +1,38 @@ +const ALPHA_REGEX = /[a-z]/i; +const ALPHA_NUM_REGEX = /[a-z0-9]/i; +const WHITESPACE_REGEX = /\s+/i; + +export function is_digit(ch) { + const code = ch.charCodeAt(0); + return code > 47 && code < 58; +} + +export function is_alpha(ch) { + // return ch.charCodeAt(0) > 64 && ch.charCodeAt() < 123; + return ch.match(ALPHA_REGEX); +} + +export function is_alphanum(ch) { + return ALPHA_NUM_REGEX.test(ch); +} + +export function is_whitespace(ch) { + return WHITESPACE_REGEX.test(ch); +} + +export function panic(message) { + console.error(message); + exit_rand(); +} + +export function pretty_error(row, column, error_line, error_message) { + console.error(`\x1b[31mError at line: ${row + 1} column: ${column + 1}`); + console.error(`\n\x1b[33m ${error_line}`); + console.error(` \x1b[0m^^^${error_message}`); + exit_rand(); +} + +export function exit_rand() { + const rand_code = Math.round(42 + Math.random() * 28); + process.exit(rand_code); +}