cpp.js/main.js

386 lines
8.7 KiB
JavaScript

import { readFileSync } from "fs";
import { Parser } from "./parser.js";
import { generate_fasm_linux, generate_rust_code } from "./interpreter.js";
import {
is_alpha,
is_alphanum,
is_digit,
is_whitespace,
pretty_error,
panic,
exit_rand,
} from "./utils.js";
// Javascript enums at home
export const TokenType = {
LEFT_PAREN: 1,
RIGHT_PAREN: 2,
LEFT_BRACE: 3,
RIGHT_BRACE: 4,
SEMICOLON: 5,
COMMA: 6,
IDENTIFIER: 7,
STRING: 8,
NUMBER: 9,
PRINTF: 10,
RETURN: 11,
IF: 12,
ELSE: 13,
INT_TYPE: 14,
EOF: 15,
};
const LITERAL_TOKENS = {
"(": TokenType.LEFT_PAREN,
")": TokenType.RIGHT_PAREN,
"{": TokenType.LEFT_BRACE,
"}": TokenType.RIGHT_BRACE,
",": TokenType.COMMA,
";": TokenType.SEMICOLON,
};
const KEYWORDS = {
printf: TokenType.PRINTF,
int: TokenType.INT_TYPE,
return: TokenType.RETURN,
if: TokenType.IF,
else: TokenType.ELSE,
};
export class Token {
/**
* @param {number} token_type
* @param {string} value
* @param {Location} loc
*/
constructor(token_type, value, loc) {
this.token_type = token_type;
this.value = value;
this.loc = loc;
}
display() {
return `Token { type: ${this.token_type}, value: '${
this.value
}', location: ${this.loc.display()} }`;
}
}
export class Location {
/**
* @param { number } row
* @param { number } column
*/
constructor(row, column) {
this.row = row;
this.column = column;
}
display() {
// human readable
return `(row: ${this.row + 1}, col: ${this.column + 1})`;
}
}
// ------- Lexer --------
class Lexer {
/**
* @param {string} source
*/
constructor(source) {
this.source = source;
this.index = 0;
this.cursor = 0; // index at the start of new scan
this.row = 0;
this.line_start = 0;
/** @type {Token[]} */
this.tokens = [];
}
length() {
return this.source.length;
}
is_end() {
return this.index >= this.length();
}
is_not_end() {
return this.index < this.length();
}
advance() {
if (this.is_not_end()) {
let ch = this.peek();
this.index++;
if (ch === "\n") {
this.line_start = this.index;
this.row++;
}
}
}
get_column() {
return this.index - this.line_start;
}
get_location() {
return new Location(this.row, this.get_column());
}
peek() {
return this.source[this.index];
}
/**
* @returns { string | null }
*/
peek_next() {
if (this.index + 1 >= this.length()) {
return null;
}
return this.source[this.index + 1];
}
matches(ch) {
return this.peek() === ch;
}
next_matches(ch) {
let next = this.peek_next();
if (next === null) return false;
return next === ch;
}
/**
* @param { (ch: string) => boolean } predicate
*/
drop_while(predicate) {
while (this.is_not_end() && predicate(this.peek())) {
this.advance(); // skip ch
}
}
trim_left() {
this.drop_while((ch) => is_whitespace(ch));
}
drop_line() {
this.drop_while((ch) => ch !== "\n");
if (this.is_not_end()) {
this.advance();
}
}
scan_tokens() {
while (this.is_not_end()) {
this.cursor = this.index;
this.scan_token();
}
this.push_token(new Token(TokenType.EOF, "", this.get_location()));
return this.tokens;
}
scan_token() {
const ch = this.peek();
const location = this.get_location();
if (is_whitespace(ch)) {
this.advance();
return;
}
if (
this.matches("#") ||
(this.matches("/") && this.next_matches("/"))
) {
this.drop_line();
return;
}
switch (ch) {
case '"':
this.string(location);
break;
default:
if (ch in LITERAL_TOKENS) {
this.push_token(
new Token(LITERAL_TOKENS[ch], ch, location)
);
this.advance();
} else if (is_alpha(ch)) {
this.identifier(location);
} else if (is_digit(ch)) {
this.digit(location);
} else {
panic(`Lexer error: Unknown token '${ch}'`);
}
break;
}
}
push_token(token) {
this.tokens.push(token);
}
slice(start) {
if (!start) {
start = this.cursor;
}
return this.source.slice(start, this.index);
}
identifier(location) {
while (this.is_not_end() && is_alphanum(this.peek())) {
this.advance();
}
const value = this.slice();
const type = value in KEYWORDS ? KEYWORDS[value] : TokenType.IDENTIFIER;
this.push_token(new Token(type, value, location));
}
digit(location) {
while (this.is_not_end() && is_digit(this.peek())) {
this.advance();
}
const value = this.slice();
this.push_token(new Token(TokenType.NUMBER, value, location));
}
string(location) {
this.advance(); // skip '"'
const start_col = this.get_column();
const start_row = this.row;
const start = this.index;
while (this.is_not_end() && this.peek() !== '"') {
this.advance();
}
if (this.is_end()) {
let err_line = this.source.slice(this.cursor - 1, this.index);
let newline_idx = err_line.indexOf("\n");
if (newline_idx !== -1) {
err_line = err_line.substring(0, newline_idx);
}
pretty_error(
start_row,
start_col,
err_line,
"You forgot to close the string!"
);
}
let value = this.slice(start);
this.advance(); // skip '"'
this.push_token(new Token(TokenType.STRING, value, location));
}
}
const TARGETS = [
"rust",
"x86_64-fasm-linux-gnu",
// "x86_64-fasm-windows"
];
try {
_main();
} catch (e) {
console.error(e);
}
function check_next_arg(arg, next_arg) {
if (next_arg === null && !next_arg.startsWith("-")) {
console.error(`Missing or invalid value for flag "${arg}"`);
exit_rand();
}
return true;
}
function print_usage() {
console.log("Cpp.js");
console.log("USAGE:");
console.log(" node cpp.js <filename>");
console.log(" node cpp.js <filename> --target <target>");
console.log(" node cpp.js --target list");
console.log(" node cpp.js --help");
console.log();
console.log(" <filename>: The cpp file to compile");
console.log(" <target>: The target platform");
}
function _main() {
const CPP_REGEX = /.\.cpp/i;
const args = process.argv;
let filename = "main.cpp";
let target = TARGETS[0]; // default target is rust
for (let i = 0; i < args.length; i++) {
const arg = args[i];
const next_arg = i + 1 < args.length ? args[i + 1] : null;
if (arg === "--help") {
print_usage();
process.exit(0);
}
if (CPP_REGEX.test(arg)) {
filename = arg;
}
if (arg === "--target" && check_next_arg(arg, next_arg)) {
if (next_arg === "list") {
console.log("Valid targets:");
for (const target of TARGETS) {
console.log(target);
}
process.exit(0);
}
if (!TARGETS.includes(next_arg)) {
console.error(
`Unknown target "${next_arg}\nMaybe try: --target list`
);
exit_rand();
}
target = next_arg;
}
}
const source = readFileSync(filename, "utf8");
const lexer = new Lexer(source);
const tokens = lexer.scan_tokens();
const is_asm = target == TARGETS[1];
const parser = new Parser(tokens, { true_newline: is_asm });
const statements = parser.parse();
switch (target) {
case TARGETS[0]: // rust
filename = filename.replace(".cpp", ".rs");
generate_rust_code(filename, statements);
break;
case TARGETS[1]: // x86_64-fasm-linux-gnu
filename = filename.replace(".cpp", ".asm");
generate_fasm_linux(filename, statements);
default:
break;
}
}