implemented .db .dw .dd .dq .align

This commit is contained in:
Igor M 2024-03-17 12:42:11 +02:00
parent 930cd4c020
commit 4dfd6f2fc0
4 changed files with 223 additions and 5 deletions

View file

@ -8,10 +8,18 @@
start: start:
jmp end jmp end
un un
; .db "hello world\n"
add16 r1, r2, r255 add16 r1, r2, r255
addi8 r1, r2, -128 addi8 r1, r2, -128
lra r1, r0, start lra r1, r0, start
jmp start jmp start
end: end:
tx tx
hello_string:
.db "Hello,", " world\n", 0
hello_string_end:
.db 42, "hi", 43
; TODO .db 'H', 'e', 'l', 'l', 'o', '\0'
.align 4
.dw 42
.dd 42
.dq 42

View file

@ -18,6 +18,14 @@ typedef enum AsmError_e {
ErrDirectiveNotImplemented, ErrDirectiveNotImplemented,
ErrUnexpectedToken, ErrUnexpectedToken,
ErrTriedNegateNonNumber, ErrTriedNegateNonNumber,
ErrInvalidDirective,
ErrStringNewLine,
ErrDanglingEscape,
ErrStringBadHex,
ErrBadStringEscape,
ErrStringDataNotByte,
ErrAlignNeedsNumber,
ErrAlignNeedsPow2,
} AsmError; } AsmError;
char *ERRORS[] = { char *ERRORS[] = {
"Success", "Success",
@ -39,4 +47,12 @@ char *ERRORS[] = {
"Directive is not implemented", "Directive is not implemented",
"Unexpected token", "Unexpected token",
"Negation only works on numbers", "Negation only works on numbers",
"Invalid directive",
"String contains a raw newline (did you forget to close the quote?)",
"Dangling escape in string literal",
"Bad hex in string literal",
"Bad escape sequence in string literal",
"String literals can be used only in .db directive",
".align requires a number",
".align requires a power of two as an argument",
}; };

View file

@ -178,6 +178,53 @@ AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
return ErrOk; return ErrOk;
} }
AsmError push_string(char *buf, char *input, size_t len) {
size_t ndata = 0;
for (size_t pos = 0; pos < len; pos += 1) {
char chr = input[pos];
if (chr == '\\') {
pos += 1;
chr = input[pos];
switch (chr) {
case '\\':
chr = '\\';
break;
case '"':
chr = '"';
break;
case 'r':
chr = '\r';
break;
case 'n':
chr = '\n';
break;
case '0':
chr = '\0';
break;
case 't':
chr = '\t';
break;
case 'x':
if (pos + 2 >= len) {
return ErrDanglingEscape;
}
char high = get_hex(input[pos + 1]);
char low = get_hex(input[pos + 2]);
if (high > 15 || low > 15) {
return ErrStringBadHex;
}
chr = high << 4 | low;
break;
default:
return ErrBadStringEscape;
}
}
buf[ndata] = chr;
ndata += 1;
}
return ErrOk;
}
static static
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
ByteVec *rv, HoleVec *holes) { ByteVec *rv, HoleVec *holes) {
@ -265,6 +312,8 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
return ErrBadNumOverflow; return ErrBadNumOverflow;
} }
num_to_write = (uint64_t)tmp; num_to_write = (uint64_t)tmp;
} else if (meta.sign == 2 && (int)num_to_write < 0) {
return ErrBadNumOverflow;
} }
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write, AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
meta.size, meta.sign); meta.size, meta.sign);
@ -278,6 +327,89 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
return ErrOk; return ErrOk;
} }
static
AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok, size_t word_size) {
while (1) {
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNumber) {
if (ensure_push(out, 1, word_size) != 0) {
return ErrOutOfMemory;
}
push_int_le(&out->buf[out->len], tok->num, word_size, 3);
out->len += word_size;
} else if (tok->kind == TokString) {
if (word_size != 1) {
return ErrStringDataNotByte;
}
if (ensure_push(out, 1, tok->num) != 0) {
return ErrOutOfMemory;
}
char *str = &input[tok->start + 1];
AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
if (err != ErrOk) {
return err;
}
out->len += tok->num;
} else {
return ErrUnexpectedToken;
}
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNewline || tok->kind == TokEOF) {
return ErrOk;
}
if (tok->kind == TokComma) {
continue;
}
return ErrInvalidToken;
}
}
AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
if (tok->len < 2) {
return ErrInvalidDirective;
}
size_t pos = tok->start;
char byte0 = input[pos];
char byte1 = input[pos + 1];
if (byte0 == 'd') {
size_t word_size;
switch (byte1) {
case 'b':
word_size = 1;
break;
case 'w':
word_size = 2;
break;
case 'd':
word_size = 4;
break;
case 'q':
word_size = 8;
break;
default:
return ErrInvalidDirective;
}
return push_data(input, len, out, tok, word_size);
}
if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
*tok = token(input, len, tok->start + tok->len);
if (tok->kind != TokNumber) {
return ErrAlignNeedsNumber;
}
size_t mask = tok->num - 1;
if ((tok->num & mask) != 0) {
return ErrAlignNeedsPow2;
}
size_t aligned = (out->len + mask) & ~mask;
if (ensure_push(out, 1, aligned - out->len) != 0) {
return ErrOutOfMemory;
}
out->len = aligned;
}
return ErrOk;
}
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
EInfo *einfo) { EInfo *einfo) {
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
@ -317,13 +449,17 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
} }
if (tok.kind == TokDot) { if (tok.kind == TokDot) {
Token next = token(input, len, pos); Token next = token(input, len, pos);
if (next.kind == TokIdent) { einfo->token = next;
err = ErrDirectiveNotImplemented; if (next.kind != TokIdent) {
goto end;
} else {
err = ErrNeedDirectiveAfterDot; err = ErrNeedDirectiveAfterDot;
goto end; goto end;
} }
err = assemble_directive(input, len, &rv, &next);
pos = next.start + next.len;
einfo->token = next;
if (err != ErrOk) {
goto end;
}
continue; continue;
} }
if (tok.kind == TokIdent) { if (tok.kind == TokIdent) {

View file

@ -10,6 +10,7 @@ typedef enum TokenKind_e {
TokColon = ':', TokColon = ':',
TokComment = ';', TokComment = ';',
TokNewline = 'n', TokNewline = 'n',
TokString = 's',
} TokenKind; } TokenKind;
typedef struct Token_s { typedef struct Token_s {
TokenKind kind; TokenKind kind;
@ -111,6 +112,60 @@ Token token_number(char *input, size_t len, size_t pos) {
} }
} }
static
char get_hex(char chr) {
char chru = chr & ~0x20;
if (chr >= '0' && chr <= '9') {
return chr - '0';
}
if (chru >= 'A' && chru <= 'F') {
return chru - ('A' - 10);
}
return 16;
}
static
Token token_string(char *input, size_t len, size_t pos) {
size_t start = pos;
size_t ndata = 0;
for (pos += 1; pos < len; pos += 1) {
if (input[pos] == '"') {
return (Token){TokString, start, pos + 1 - start, ndata};
}
if (input[pos] == '\n' || input[pos] == '\r') {
return (Token){TokInvalid, start, pos + 1 - start, ErrStringNewLine};
}
if (input[pos] == '\\') {
if (pos + 1 >= len) {
return (Token){TokInvalid, start, pos - start, ErrDanglingEscape};
}
pos += 1;
switch (input[pos]) {
case '\\':
case '"':
case 'r':
case 'n':
case '0':
case 't':
break;
case 'x':
if (pos + 2 >= len) {
return (Token){TokInvalid, start, pos - start, ErrDanglingEscape};
}
if (get_hex(input[pos + 1]) > 15 || get_hex(input[pos + 2]) > 15) {
return (Token){TokInvalid, start, pos - start, ErrStringBadHex};
}
pos += 2;
break;
default:
return (Token){TokInvalid, start, pos - start, ErrBadStringEscape};
}
}
ndata += 1;
}
return (Token){TokString, start, pos - start, ndata};
}
static static
Token token(char *input, size_t len, size_t pos) { Token token(char *input, size_t len, size_t pos) {
char chr, chru; char chr, chru;
@ -142,6 +197,9 @@ Token token(char *input, size_t len, size_t pos) {
} }
return (Token){TokComment, pos, clen, 0}; return (Token){TokComment, pos, clen, 0};
} }
if (chr == '"') {
return token_string(input, len, pos);
}
if (chr >= '0' && chr <= '9') { if (chr >= '0' && chr <= '9') {
return token_number(input, len, pos); return token_number(input, len, pos);
} }