implemented .db .dw .dd .dq .align
This commit is contained in:
parent
930cd4c020
commit
4dfd6f2fc0
|
@ -8,10 +8,18 @@
|
|||
start:
|
||||
jmp end
|
||||
un
|
||||
; .db "hello world\n"
|
||||
add16 r1, r2, r255
|
||||
addi8 r1, r2, -128
|
||||
lra r1, r0, start
|
||||
jmp start
|
||||
end:
|
||||
tx
|
||||
hello_string:
|
||||
.db "Hello,", " world\n", 0
|
||||
hello_string_end:
|
||||
.db 42, "hi", 43
|
||||
; TODO .db 'H', 'e', 'l', 'l', 'o', '\0'
|
||||
.align 4
|
||||
.dw 42
|
||||
.dd 42
|
||||
.dq 42
|
||||
|
|
16
src/error.h
16
src/error.h
|
@ -18,6 +18,14 @@ typedef enum AsmError_e {
|
|||
ErrDirectiveNotImplemented,
|
||||
ErrUnexpectedToken,
|
||||
ErrTriedNegateNonNumber,
|
||||
ErrInvalidDirective,
|
||||
ErrStringNewLine,
|
||||
ErrDanglingEscape,
|
||||
ErrStringBadHex,
|
||||
ErrBadStringEscape,
|
||||
ErrStringDataNotByte,
|
||||
ErrAlignNeedsNumber,
|
||||
ErrAlignNeedsPow2,
|
||||
} AsmError;
|
||||
char *ERRORS[] = {
|
||||
"Success",
|
||||
|
@ -39,4 +47,12 @@ char *ERRORS[] = {
|
|||
"Directive is not implemented",
|
||||
"Unexpected token",
|
||||
"Negation only works on numbers",
|
||||
"Invalid directive",
|
||||
"String contains a raw newline (did you forget to close the quote?)",
|
||||
"Dangling escape in string literal",
|
||||
"Bad hex in string literal",
|
||||
"Bad escape sequence in string literal",
|
||||
"String literals can be used only in .db directive",
|
||||
".align requires a number",
|
||||
".align requires a power of two as an argument",
|
||||
};
|
||||
|
|
144
src/hbas.c
144
src/hbas.c
|
@ -178,6 +178,53 @@ AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
|
|||
return ErrOk;
|
||||
}
|
||||
|
||||
AsmError push_string(char *buf, char *input, size_t len) {
|
||||
size_t ndata = 0;
|
||||
for (size_t pos = 0; pos < len; pos += 1) {
|
||||
char chr = input[pos];
|
||||
if (chr == '\\') {
|
||||
pos += 1;
|
||||
chr = input[pos];
|
||||
switch (chr) {
|
||||
case '\\':
|
||||
chr = '\\';
|
||||
break;
|
||||
case '"':
|
||||
chr = '"';
|
||||
break;
|
||||
case 'r':
|
||||
chr = '\r';
|
||||
break;
|
||||
case 'n':
|
||||
chr = '\n';
|
||||
break;
|
||||
case '0':
|
||||
chr = '\0';
|
||||
break;
|
||||
case 't':
|
||||
chr = '\t';
|
||||
break;
|
||||
case 'x':
|
||||
if (pos + 2 >= len) {
|
||||
return ErrDanglingEscape;
|
||||
}
|
||||
char high = get_hex(input[pos + 1]);
|
||||
char low = get_hex(input[pos + 2]);
|
||||
if (high > 15 || low > 15) {
|
||||
return ErrStringBadHex;
|
||||
}
|
||||
chr = high << 4 | low;
|
||||
break;
|
||||
default:
|
||||
return ErrBadStringEscape;
|
||||
}
|
||||
}
|
||||
buf[ndata] = chr;
|
||||
ndata += 1;
|
||||
}
|
||||
return ErrOk;
|
||||
}
|
||||
|
||||
static
|
||||
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
||||
ByteVec *rv, HoleVec *holes) {
|
||||
|
@ -265,6 +312,8 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
|||
return ErrBadNumOverflow;
|
||||
}
|
||||
num_to_write = (uint64_t)tmp;
|
||||
} else if (meta.sign == 2 && (int)num_to_write < 0) {
|
||||
return ErrBadNumOverflow;
|
||||
}
|
||||
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
|
||||
meta.size, meta.sign);
|
||||
|
@ -278,6 +327,89 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
|||
return ErrOk;
|
||||
}
|
||||
|
||||
static
|
||||
AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok, size_t word_size) {
|
||||
while (1) {
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind == TokNumber) {
|
||||
if (ensure_push(out, 1, word_size) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
push_int_le(&out->buf[out->len], tok->num, word_size, 3);
|
||||
out->len += word_size;
|
||||
} else if (tok->kind == TokString) {
|
||||
if (word_size != 1) {
|
||||
return ErrStringDataNotByte;
|
||||
}
|
||||
if (ensure_push(out, 1, tok->num) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
|
||||
char *str = &input[tok->start + 1];
|
||||
AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
|
||||
if (err != ErrOk) {
|
||||
return err;
|
||||
}
|
||||
out->len += tok->num;
|
||||
} else {
|
||||
return ErrUnexpectedToken;
|
||||
}
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind == TokNewline || tok->kind == TokEOF) {
|
||||
return ErrOk;
|
||||
}
|
||||
if (tok->kind == TokComma) {
|
||||
continue;
|
||||
}
|
||||
return ErrInvalidToken;
|
||||
}
|
||||
}
|
||||
|
||||
AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
|
||||
if (tok->len < 2) {
|
||||
return ErrInvalidDirective;
|
||||
}
|
||||
size_t pos = tok->start;
|
||||
char byte0 = input[pos];
|
||||
char byte1 = input[pos + 1];
|
||||
if (byte0 == 'd') {
|
||||
size_t word_size;
|
||||
switch (byte1) {
|
||||
case 'b':
|
||||
word_size = 1;
|
||||
break;
|
||||
case 'w':
|
||||
word_size = 2;
|
||||
break;
|
||||
case 'd':
|
||||
word_size = 4;
|
||||
break;
|
||||
case 'q':
|
||||
word_size = 8;
|
||||
break;
|
||||
default:
|
||||
return ErrInvalidDirective;
|
||||
}
|
||||
return push_data(input, len, out, tok, word_size);
|
||||
}
|
||||
if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind != TokNumber) {
|
||||
return ErrAlignNeedsNumber;
|
||||
}
|
||||
size_t mask = tok->num - 1;
|
||||
if ((tok->num & mask) != 0) {
|
||||
return ErrAlignNeedsPow2;
|
||||
}
|
||||
size_t aligned = (out->len + mask) & ~mask;
|
||||
if (ensure_push(out, 1, aligned - out->len) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
out->len = aligned;
|
||||
}
|
||||
return ErrOk;
|
||||
}
|
||||
|
||||
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
|
||||
EInfo *einfo) {
|
||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||
|
@ -317,13 +449,17 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
|
|||
}
|
||||
if (tok.kind == TokDot) {
|
||||
Token next = token(input, len, pos);
|
||||
if (next.kind == TokIdent) {
|
||||
err = ErrDirectiveNotImplemented;
|
||||
goto end;
|
||||
} else {
|
||||
einfo->token = next;
|
||||
if (next.kind != TokIdent) {
|
||||
err = ErrNeedDirectiveAfterDot;
|
||||
goto end;
|
||||
}
|
||||
err = assemble_directive(input, len, &rv, &next);
|
||||
pos = next.start + next.len;
|
||||
einfo->token = next;
|
||||
if (err != ErrOk) {
|
||||
goto end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokIdent) {
|
||||
|
|
58
src/token.c
58
src/token.c
|
@ -10,6 +10,7 @@ typedef enum TokenKind_e {
|
|||
TokColon = ':',
|
||||
TokComment = ';',
|
||||
TokNewline = 'n',
|
||||
TokString = 's',
|
||||
} TokenKind;
|
||||
typedef struct Token_s {
|
||||
TokenKind kind;
|
||||
|
@ -111,6 +112,60 @@ Token token_number(char *input, size_t len, size_t pos) {
|
|||
}
|
||||
}
|
||||
|
||||
static
|
||||
char get_hex(char chr) {
|
||||
char chru = chr & ~0x20;
|
||||
if (chr >= '0' && chr <= '9') {
|
||||
return chr - '0';
|
||||
}
|
||||
if (chru >= 'A' && chru <= 'F') {
|
||||
return chru - ('A' - 10);
|
||||
}
|
||||
return 16;
|
||||
}
|
||||
|
||||
static
|
||||
Token token_string(char *input, size_t len, size_t pos) {
|
||||
size_t start = pos;
|
||||
size_t ndata = 0;
|
||||
for (pos += 1; pos < len; pos += 1) {
|
||||
if (input[pos] == '"') {
|
||||
return (Token){TokString, start, pos + 1 - start, ndata};
|
||||
}
|
||||
if (input[pos] == '\n' || input[pos] == '\r') {
|
||||
return (Token){TokInvalid, start, pos + 1 - start, ErrStringNewLine};
|
||||
}
|
||||
if (input[pos] == '\\') {
|
||||
if (pos + 1 >= len) {
|
||||
return (Token){TokInvalid, start, pos - start, ErrDanglingEscape};
|
||||
}
|
||||
pos += 1;
|
||||
switch (input[pos]) {
|
||||
case '\\':
|
||||
case '"':
|
||||
case 'r':
|
||||
case 'n':
|
||||
case '0':
|
||||
case 't':
|
||||
break;
|
||||
case 'x':
|
||||
if (pos + 2 >= len) {
|
||||
return (Token){TokInvalid, start, pos - start, ErrDanglingEscape};
|
||||
}
|
||||
if (get_hex(input[pos + 1]) > 15 || get_hex(input[pos + 2]) > 15) {
|
||||
return (Token){TokInvalid, start, pos - start, ErrStringBadHex};
|
||||
}
|
||||
pos += 2;
|
||||
break;
|
||||
default:
|
||||
return (Token){TokInvalid, start, pos - start, ErrBadStringEscape};
|
||||
}
|
||||
}
|
||||
ndata += 1;
|
||||
}
|
||||
return (Token){TokString, start, pos - start, ndata};
|
||||
}
|
||||
|
||||
static
|
||||
Token token(char *input, size_t len, size_t pos) {
|
||||
char chr, chru;
|
||||
|
@ -142,6 +197,9 @@ Token token(char *input, size_t len, size_t pos) {
|
|||
}
|
||||
return (Token){TokComment, pos, clen, 0};
|
||||
}
|
||||
if (chr == '"') {
|
||||
return token_string(input, len, pos);
|
||||
}
|
||||
if (chr >= '0' && chr <= '9') {
|
||||
return token_number(input, len, pos);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue