From 4dfd6f2fc0de004e540153509b9dc36f5edd1daa Mon Sep 17 00:00:00 2001 From: Igor M Date: Sun, 17 Mar 2024 12:42:11 +0200 Subject: [PATCH] implemented .db .dw .dd .dq .align --- examples/example.S | 10 +++- src/error.h | 16 +++++ src/hbas.c | 144 +++++++++++++++++++++++++++++++++++++++++++-- src/token.c | 58 ++++++++++++++++++ 4 files changed, 223 insertions(+), 5 deletions(-) diff --git a/examples/example.S b/examples/example.S index 6fe78f1..c28a4a0 100644 --- a/examples/example.S +++ b/examples/example.S @@ -8,10 +8,18 @@ start: jmp end un - ; .db "hello world\n" add16 r1, r2, r255 addi8 r1, r2, -128 lra r1, r0, start jmp start end: tx +hello_string: + .db "Hello,", " world\n", 0 +hello_string_end: + .db 42, "hi", 43 + ; TODO .db 'H', 'e', 'l', 'l', 'o', '\0' + .align 4 + .dw 42 + .dd 42 + .dq 42 diff --git a/src/error.h b/src/error.h index d0b293d..6dd2760 100644 --- a/src/error.h +++ b/src/error.h @@ -18,6 +18,14 @@ typedef enum AsmError_e { ErrDirectiveNotImplemented, ErrUnexpectedToken, ErrTriedNegateNonNumber, + ErrInvalidDirective, + ErrStringNewLine, + ErrDanglingEscape, + ErrStringBadHex, + ErrBadStringEscape, + ErrStringDataNotByte, + ErrAlignNeedsNumber, + ErrAlignNeedsPow2, } AsmError; char *ERRORS[] = { "Success", @@ -39,4 +47,12 @@ char *ERRORS[] = { "Directive is not implemented", "Unexpected token", "Negation only works on numbers", + "Invalid directive", + "String contains a raw newline (did you forget to close the quote?)", + "Dangling escape in string literal", + "Bad hex in string literal", + "Bad escape sequence in string literal", + "String literals can be used only in .db directive", + ".align requires a number", + ".align requires a power of two as an argument", }; diff --git a/src/hbas.c b/src/hbas.c index 038419d..c9326a3 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -178,6 +178,53 @@ AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { return ErrOk; } +AsmError push_string(char *buf, char *input, size_t len) { + size_t ndata = 0; + for (size_t pos = 0; pos < len; pos += 1) { + char chr = input[pos]; + if (chr == '\\') { + pos += 1; + chr = input[pos]; + switch (chr) { + case '\\': + chr = '\\'; + break; + case '"': + chr = '"'; + break; + case 'r': + chr = '\r'; + break; + case 'n': + chr = '\n'; + break; + case '0': + chr = '\0'; + break; + case 't': + chr = '\t'; + break; + case 'x': + if (pos + 2 >= len) { + return ErrDanglingEscape; + } + char high = get_hex(input[pos + 1]); + char low = get_hex(input[pos + 2]); + if (high > 15 || low > 15) { + return ErrStringBadHex; + } + chr = high << 4 | low; + break; + default: + return ErrBadStringEscape; + } + } + buf[ndata] = chr; + ndata += 1; + } + return ErrOk; +} + static AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, ByteVec *rv, HoleVec *holes) { @@ -265,6 +312,8 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, return ErrBadNumOverflow; } num_to_write = (uint64_t)tmp; + } else if (meta.sign == 2 && (int)num_to_write < 0) { + return ErrBadNumOverflow; } AsmError err = push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign); @@ -278,6 +327,89 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, return ErrOk; } +static +AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok, size_t word_size) { + while (1) { + *tok = token(input, len, tok->start + tok->len); + if (tok->kind == TokNumber) { + if (ensure_push(out, 1, word_size) != 0) { + return ErrOutOfMemory; + } + push_int_le(&out->buf[out->len], tok->num, word_size, 3); + out->len += word_size; + } else if (tok->kind == TokString) { + if (word_size != 1) { + return ErrStringDataNotByte; + } + if (ensure_push(out, 1, tok->num) != 0) { + return ErrOutOfMemory; + } + + char *str = &input[tok->start + 1]; + AsmError err = push_string(&out->buf[out->len], str, tok->len - 2); + if (err != ErrOk) { + return err; + } + out->len += tok->num; + } else { + return ErrUnexpectedToken; + } + *tok = token(input, len, tok->start + tok->len); + if (tok->kind == TokNewline || tok->kind == TokEOF) { + return ErrOk; + } + if (tok->kind == TokComma) { + continue; + } + return ErrInvalidToken; + } +} + +AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) { + if (tok->len < 2) { + return ErrInvalidDirective; + } + size_t pos = tok->start; + char byte0 = input[pos]; + char byte1 = input[pos + 1]; + if (byte0 == 'd') { + size_t word_size; + switch (byte1) { + case 'b': + word_size = 1; + break; + case 'w': + word_size = 2; + break; + case 'd': + word_size = 4; + break; + case 'q': + word_size = 8; + break; + default: + return ErrInvalidDirective; + } + return push_data(input, len, out, tok, word_size); + } + if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) { + *tok = token(input, len, tok->start + tok->len); + if (tok->kind != TokNumber) { + return ErrAlignNeedsNumber; + } + size_t mask = tok->num - 1; + if ((tok->num & mask) != 0) { + return ErrAlignNeedsPow2; + } + size_t aligned = (out->len + mask) & ~mask; + if (ensure_push(out, 1, aligned - out->len) != 0) { + return ErrOutOfMemory; + } + out->len = aligned; + } + return ErrOk; +} + AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) { ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; @@ -317,13 +449,17 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, } if (tok.kind == TokDot) { Token next = token(input, len, pos); - if (next.kind == TokIdent) { - err = ErrDirectiveNotImplemented; - goto end; - } else { + einfo->token = next; + if (next.kind != TokIdent) { err = ErrNeedDirectiveAfterDot; goto end; } + err = assemble_directive(input, len, &rv, &next); + pos = next.start + next.len; + einfo->token = next; + if (err != ErrOk) { + goto end; + } continue; } if (tok.kind == TokIdent) { diff --git a/src/token.c b/src/token.c index 8612073..4ce15eb 100644 --- a/src/token.c +++ b/src/token.c @@ -10,6 +10,7 @@ typedef enum TokenKind_e { TokColon = ':', TokComment = ';', TokNewline = 'n', + TokString = 's', } TokenKind; typedef struct Token_s { TokenKind kind; @@ -111,6 +112,60 @@ Token token_number(char *input, size_t len, size_t pos) { } } +static +char get_hex(char chr) { + char chru = chr & ~0x20; + if (chr >= '0' && chr <= '9') { + return chr - '0'; + } + if (chru >= 'A' && chru <= 'F') { + return chru - ('A' - 10); + } + return 16; +} + +static +Token token_string(char *input, size_t len, size_t pos) { + size_t start = pos; + size_t ndata = 0; + for (pos += 1; pos < len; pos += 1) { + if (input[pos] == '"') { + return (Token){TokString, start, pos + 1 - start, ndata}; + } + if (input[pos] == '\n' || input[pos] == '\r') { + return (Token){TokInvalid, start, pos + 1 - start, ErrStringNewLine}; + } + if (input[pos] == '\\') { + if (pos + 1 >= len) { + return (Token){TokInvalid, start, pos - start, ErrDanglingEscape}; + } + pos += 1; + switch (input[pos]) { + case '\\': + case '"': + case 'r': + case 'n': + case '0': + case 't': + break; + case 'x': + if (pos + 2 >= len) { + return (Token){TokInvalid, start, pos - start, ErrDanglingEscape}; + } + if (get_hex(input[pos + 1]) > 15 || get_hex(input[pos + 2]) > 15) { + return (Token){TokInvalid, start, pos - start, ErrStringBadHex}; + } + pos += 2; + break; + default: + return (Token){TokInvalid, start, pos - start, ErrBadStringEscape}; + } + } + ndata += 1; + } + return (Token){TokString, start, pos - start, ndata}; +} + static Token token(char *input, size_t len, size_t pos) { char chr, chru; @@ -142,6 +197,9 @@ Token token(char *input, size_t len, size_t pos) { } return (Token){TokComment, pos, clen, 0}; } + if (chr == '"') { + return token_string(input, len, pos); + } if (chr >= '0' && chr <= '9') { return token_number(input, len, pos); }