From 2034152c8353d1ffdeb785bce8d76218df0a6408 Mon Sep 17 00:00:00 2001 From: Igor null Date: Sat, 16 Nov 2024 13:48:31 -0600 Subject: [PATCH] added syntax highlighting to depell --- depell/src/index.css | 21 ++++++++ depell/src/index.js | 102 ++++++++++++++++++++++++++++++++----- depell/wasm-fmt/src/lib.rs | 10 +++- lang/src/fmt.rs | 69 +++++++++++++++++++++++++ 4 files changed, 189 insertions(+), 13 deletions(-) diff --git a/depell/src/index.css b/depell/src/index.css index ae7f55ca..8e219c9f 100644 --- a/depell/src/index.css +++ b/depell/src/index.css @@ -141,3 +141,24 @@ div#dep-list { } } } + +.fmt { + font-family: monospace; +} +.syn { + font-family: monospace; + &.Comment { color: #939f91; } + &.Keyword { color: #f85552; } + &.Identifier { color: #3a94c5; } + &.Directive { color: #3a94c5; } + &.Number {} + &.String { color: #8da101; } + &.Op { color: #f57d26; } + &.Assign { color: #f57d26; } + &.Paren { color: #5c6a72; } + &.Bracket { color: #5c6a72; } + &.Colon { color: #5c6a72; } + &.Comma { color: #5c6a72; } + &.Dot { color: #5c6a72; } + &.Ctor { color: #3a94c5; } +} diff --git a/depell/src/index.js b/depell/src/index.js index 56548924..040c30d9 100644 --- a/depell/src/index.js +++ b/depell/src/index.js @@ -50,18 +50,20 @@ function modifyCode(instance, code, action) { let { INPUT, INPUT_LEN, OUTPUT, OUTPUT_LEN, - memory, fmt, minify + memory, fmt, tok, minify } = instance.exports; + let funs = { fmt, tok, minify }; if (!(true && memory instanceof WebAssembly.Memory && INPUT instanceof WebAssembly.Global && INPUT_LEN instanceof WebAssembly.Global && OUTPUT instanceof WebAssembly.Global && OUTPUT_LEN instanceof WebAssembly.Global - && typeof fmt === "function" - && typeof minify === "function" + && funs.hasOwnProperty(action) + && typeof funs[action] === "function" )) never(); + let fun = funs[action]; if (action !== "fmt") { INPUT = OUTPUT; @@ -72,8 +74,14 @@ function modifyCode(instance, code, action) { dw.setUint32(INPUT_LEN.value, code.length, true); new Uint8Array(memory.buffer, INPUT.value).set(new TextEncoder().encode(code)); - return runWasmFunction(instance, action === "fmt" ? fmt : minify) ? - bufToString(memory, OUTPUT, OUTPUT_LEN) : undefined; + if (!runWasmFunction(instance, fun)) { + return undefined; + } + if (action === "tok") { + return bufSlice(memory, OUTPUT, OUTPUT_LEN); + } else { + return bufToString(memory, OUTPUT, OUTPUT_LEN); + } } @@ -119,6 +127,15 @@ function packPosts(posts, view) { return len; } +/** @param {WebAssembly.Memory} mem + * @param {WebAssembly.Global} ptr + * @param {WebAssembly.Global} len + * @return {Uint8Array} */ +function bufSlice(mem, ptr, len) { + return new Uint8Array(mem.buffer, ptr.value, + new DataView(mem.buffer).getUint32(len.value, true)); +} + /** @param {WebAssembly.Memory} mem * @param {WebAssembly.Global} ptr * @param {WebAssembly.Global} len @@ -265,19 +282,80 @@ async function bindCodeEdit(target) { edit.dispatchEvent(new InputEvent("input")); } -/** @type {{ [key: string]: (content: string) => Promise | string }} */ +/** + * @type {{ Array }} + * to be synched with `enum TokenGroup` in bytecode/src/fmt.rs */ +const TOK_CLASSES = [ + 'Blank', + 'Comment', + 'Keyword', + 'Identifier', + 'Directive', + 'Number', + 'String', + 'Op', + 'Assign', + 'Paren', + 'Bracket', + 'Colon', + 'Comma', + 'Dot', + 'Ctor', +]; + +/** @type {{ [key: string]: (el: HTMLElement) => undefined | Promise }} */ const applyFns = { - timestamp: (content) => new Date(parseInt(content) * 1000).toLocaleString(), - fmt: (content) => getFmtInstance().then(i => modifyCode(i, content, "fmt") ?? "invalid code"), + timestamp: (el) => { + const timestamp = el.innerText; + const date = new Date(parseInt(timestamp) * 1000); + el.innerText = date.toLocaleString(); + }, + fmt, }; + +/** + * @param {HTMLElement} target + * @param {string} code */ +async function fmt(target) { + const code = target.innerText; + const instance = await getFmtInstance(); + const decoder = new TextDecoder('utf-8'); + const fmt = modifyCode(instance, code, 'fmt'); + const codeBytes = new TextEncoder('utf-8').encode(fmt); + const tok = modifyCode(instance, fmt, 'tok'); + target.innerHTML = ''; + let start = 0; + let kind = tok[0]; + for (let ii = 1; ii <= tok.length; ii += 1) { + // split over same tokens and buffer end + if (tok[ii] === kind && ii < tok.length) { + continue; + } + const text = decoder.decode(codeBytes.subarray(start, ii)); + const textNode = document.createTextNode(text);; + if (kind === 0) { + target.appendChild(textNode); + } else { + const el = document.createElement('span'); + el.classList.add('syn'); + el.classList.add(TOK_CLASSES[kind]); + el.appendChild(textNode); + target.appendChild(el); + } + if (ii == tok.length) { + break; + } + start = ii; + kind = tok[ii]; + } +} + /** @param {HTMLElement} target */ -function execApply(target) { +async function execApply(target) { for (const elem of target.querySelectorAll('[apply]')) { if (!(elem instanceof HTMLElement)) continue; const funcname = elem.getAttribute('apply') ?? never(); - let res = applyFns[funcname](elem.textContent ?? ""); - if (res instanceof Promise) res.then(c => elem.textContent = c); - else elem.textContent = res; + applyFns[funcname](elem); } } diff --git a/depell/wasm-fmt/src/lib.rs b/depell/wasm-fmt/src/lib.rs index f39e8bda..700760d6 100644 --- a/depell/wasm-fmt/src/lib.rs +++ b/depell/wasm-fmt/src/lib.rs @@ -27,8 +27,16 @@ unsafe extern "C" fn fmt() { OUTPUT_LEN = MAX_OUTPUT_SIZE - f.0.len(); } +#[no_mangle] +unsafe extern "C" fn tok() { + let code = core::slice::from_raw_parts_mut( + core::ptr::addr_of_mut!(OUTPUT).cast(), OUTPUT_LEN); + OUTPUT_LEN = fmt::get_token_kinds(code); +} + #[no_mangle] unsafe extern "C" fn minify() { - let code = core::str::from_raw_parts_mut(core::ptr::addr_of_mut!(OUTPUT).cast(), OUTPUT_LEN); + let code = core::str::from_raw_parts_mut( + core::ptr::addr_of_mut!(OUTPUT).cast(), OUTPUT_LEN); OUTPUT_LEN = fmt::minify(code); } diff --git a/lang/src/fmt.rs b/lang/src/fmt.rs index e8c18628..08e6ef66 100644 --- a/lang/src/fmt.rs +++ b/lang/src/fmt.rs @@ -26,6 +26,75 @@ pub fn display_radix(radix: Radix, mut value: u64, buf: &mut [u8; 64]) -> &str { unreachable!() } +#[repr(u8)] +enum TokenGroup { + Blank = 0, + Comment = 1, + Keyword = 2, + Identifier = 3, + Directive = 4, + Number = 5, + String = 6, + Op = 7, + Assign = 8, + Paren = 9, + Bracket = 10, + Colon = 11, + Comma = 12, + Dot = 13, + Ctor = 14, +} + +fn token_group(kind: TokenKind) -> TokenGroup { + use crate::lexer::TokenKind::*; + match kind { + // unused/unimplemented + | BSlash | Pound | Eof | Ct => TokenGroup::Blank, + | Comment => TokenGroup::Comment, + | Directive => TokenGroup::Directive, + | Colon => TokenGroup::Colon, + | Semi | Comma => TokenGroup::Comma, + | Dot => TokenGroup::Dot, + | Ctor | Tupl => TokenGroup::Ctor, + | LParen | RParen => TokenGroup::Paren, + | LBrace | RBrace | LBrack | RBrack => TokenGroup::Bracket, + | Number | Float => TokenGroup::Number, + | Under | CtIdent | Ident => TokenGroup::Identifier, + | Tick | Tilde | Que + | Not | Mod | Band | Bor | Xor + | Mul | Add | Sub | Div + | Shl | Shr | Or | And + | Lt | Gt | Eq | Le | Ge | Ne => TokenGroup::Op, + | Decl | Assign + | BorAss | XorAss | BandAss + | AddAss | SubAss | MulAss | DivAss | ModAss + | ShrAss | ShlAss => TokenGroup::Assign, + | DQuote | Quote => TokenGroup::String, + | Return | If | Else | Loop | Break | Continue | Fn | Idk | Die + | Struct | Packed | True | False | Null => TokenGroup::Keyword, + } +} + +pub fn get_token_kinds(mut source: &mut [u8]) -> usize { + let len = source.len(); + loop { + let src = unsafe { core::str::from_utf8_unchecked(source) }; + let mut token = lexer::Lexer::new(src).eat(); + match token.kind { + TokenKind::Eof => break, + // ??? + TokenKind::CtIdent | TokenKind::Directive => token.start -= 1, + _ => {} + } + let start = token.start as usize; + let end = token.end as usize; + source[..start].fill(0); + source[start..end].fill(token_group(token.kind) as u8); + source = &mut source[end..]; + } + len +} + pub fn minify(source: &mut str) -> usize { fn needs_space(c: u8) -> bool { matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | 127..)