Merge pull request #10 from m1el/db-align
Implement db, align, fix some bugs
This commit is contained in:
commit
b745c4621c
2
Makefile
2
Makefile
|
@ -17,7 +17,7 @@ check-format:
|
||||||
build:
|
build:
|
||||||
mkdir -p build
|
mkdir -p build
|
||||||
|
|
||||||
build/hbas: build src/hbas.c
|
build/hbas: build $(wildcard src/*.h src/*.c)
|
||||||
${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas
|
${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas
|
||||||
|
|
||||||
build/example.hbf: build build/hbas examples/example.S
|
build/example.hbf: build build/hbas examples/example.S
|
||||||
|
|
|
@ -2,16 +2,25 @@
|
||||||
; https://git.ablecorp.us/AbleOS/holey-bytes/src/branch/trunk/spec.md
|
; https://git.ablecorp.us/AbleOS/holey-bytes/src/branch/trunk/spec.md
|
||||||
; TODO:
|
; TODO:
|
||||||
; .origin 0x1000
|
; .origin 0x1000
|
||||||
; .align 0x100
|
; 'c' char literals
|
||||||
; .db "hello"
|
|
||||||
; .struct
|
; .struct
|
||||||
start:
|
start:
|
||||||
jmp end
|
jmp end
|
||||||
un
|
un
|
||||||
; .db "hello world\n"
|
|
||||||
add16 r1, r2, r255
|
add16 r1, r2, r255
|
||||||
addi8 r1, r2, -128
|
addi8 r1, r2, -128
|
||||||
lra r1, r0, start
|
lra r1, r0, start
|
||||||
jmp start
|
jmp start
|
||||||
end:
|
end:
|
||||||
tx
|
tx
|
||||||
|
hello_string:
|
||||||
|
.db "Hello, w\x6frld\n", 0
|
||||||
|
hello_string_end:
|
||||||
|
.db "hi"
|
||||||
|
; TODO .db 'H', 'e', 'l', 'l', 'o', '\0'
|
||||||
|
.align 2
|
||||||
|
.dw 0x4546
|
||||||
|
.align 4
|
||||||
|
.dd 0x4748494a
|
||||||
|
.align 8
|
||||||
|
.dq 0x5051525354555657
|
||||||
|
|
|
@ -56,8 +56,7 @@ const char *TYPE_STR[] = {
|
||||||
|
|
||||||
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
|
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
|
||||||
|
|
||||||
static
|
static ArgMeta arg_meta(char arg) {
|
||||||
ArgMeta arg_meta(char arg) {
|
|
||||||
for (size_t ii = 0; ii < NARGS; ii += 1) {
|
for (size_t ii = 0; ii < NARGS; ii += 1) {
|
||||||
ArgMeta meta = ARGS[ii];
|
ArgMeta meta = ARGS[ii];
|
||||||
if (meta.chr == arg) {
|
if (meta.chr == arg) {
|
||||||
|
|
|
@ -6,8 +6,7 @@ typedef struct ByteVec_s {
|
||||||
size_t len;
|
size_t len;
|
||||||
} ByteVec;
|
} ByteVec;
|
||||||
|
|
||||||
static
|
static AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
|
||||||
AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
|
|
||||||
if (vec->len + extra < vec->len) {
|
if (vec->len + extra < vec->len) {
|
||||||
return ErrOutOfMemory;
|
return ErrOutOfMemory;
|
||||||
}
|
}
|
||||||
|
|
140
src/directive.c
Normal file
140
src/directive.c
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
AsmError push_string(char *buf, char *input, size_t len) {
|
||||||
|
size_t ndata = 0;
|
||||||
|
for (size_t pos = 0; pos < len; pos += 1) {
|
||||||
|
char chr = input[pos];
|
||||||
|
if (chr == '\\') {
|
||||||
|
if (pos + 1 >= len) {
|
||||||
|
return ErrDanglingEscape;
|
||||||
|
}
|
||||||
|
pos += 1;
|
||||||
|
chr = input[pos];
|
||||||
|
size_t offset = 1;
|
||||||
|
switch (chr) {
|
||||||
|
case '\\':
|
||||||
|
chr = '\\';
|
||||||
|
break;
|
||||||
|
case '"':
|
||||||
|
chr = '"';
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
chr = '\r';
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
chr = '\n';
|
||||||
|
break;
|
||||||
|
case '0':
|
||||||
|
chr = '\0';
|
||||||
|
break;
|
||||||
|
case 't':
|
||||||
|
chr = '\t';
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
if (pos + 2 >= len) {
|
||||||
|
return ErrDanglingEscape;
|
||||||
|
}
|
||||||
|
char high = get_hex(input[pos + 1]);
|
||||||
|
char low = get_hex(input[pos + 2]);
|
||||||
|
offset = 2;
|
||||||
|
if (high > 15 || low > 15) {
|
||||||
|
return ErrStringBadHex;
|
||||||
|
}
|
||||||
|
chr = high << 4 | low;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return ErrBadStringEscape;
|
||||||
|
}
|
||||||
|
pos += offset;
|
||||||
|
}
|
||||||
|
buf[ndata] = chr;
|
||||||
|
ndata += 1;
|
||||||
|
}
|
||||||
|
return ErrOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
static AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok,
|
||||||
|
size_t word_size) {
|
||||||
|
while (1) {
|
||||||
|
*tok = token(input, len, tok->start + tok->len);
|
||||||
|
if (tok->kind == TokNumber) {
|
||||||
|
if (ensure_push(out, 1, word_size) != 0) {
|
||||||
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
|
push_int_le(&out->buf[out->len], tok->num, word_size, 3);
|
||||||
|
out->len += word_size;
|
||||||
|
} else if (tok->kind == TokString) {
|
||||||
|
if (word_size != 1) {
|
||||||
|
return ErrStringDataNotByte;
|
||||||
|
}
|
||||||
|
if (ensure_push(out, 1, tok->num) != 0) {
|
||||||
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *str = &input[tok->start + 1];
|
||||||
|
AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
|
||||||
|
if (err != ErrOk) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
out->len += tok->num;
|
||||||
|
} else {
|
||||||
|
return ErrNeedsDataLiteral;
|
||||||
|
}
|
||||||
|
*tok = token(input, len, tok->start + tok->len);
|
||||||
|
if (tok->kind == TokNewline || tok->kind == TokEOF) {
|
||||||
|
return ErrOk;
|
||||||
|
}
|
||||||
|
if (tok->kind == TokComma) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return ErrNeedCommaOrNewline;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
|
||||||
|
if (tok->len < 2) {
|
||||||
|
return ErrInvalidDirective;
|
||||||
|
}
|
||||||
|
size_t pos = tok->start;
|
||||||
|
char byte0 = input[pos];
|
||||||
|
char byte1 = input[pos + 1];
|
||||||
|
if (tok->len == 2 && byte0 == 'd') {
|
||||||
|
size_t word_size;
|
||||||
|
switch (byte1) {
|
||||||
|
case 'b':
|
||||||
|
word_size = 1;
|
||||||
|
break;
|
||||||
|
case 'w':
|
||||||
|
word_size = 2;
|
||||||
|
break;
|
||||||
|
case 'd':
|
||||||
|
word_size = 4;
|
||||||
|
break;
|
||||||
|
case 'q':
|
||||||
|
word_size = 8;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return ErrInvalidDirective;
|
||||||
|
}
|
||||||
|
return push_data(input, len, out, tok, word_size);
|
||||||
|
}
|
||||||
|
if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
|
||||||
|
*tok = token(input, len, tok->start + tok->len);
|
||||||
|
if (tok->kind != TokNumber) {
|
||||||
|
return ErrAlignNeedsNumber;
|
||||||
|
}
|
||||||
|
size_t mask = tok->num - 1;
|
||||||
|
if (tok->num == 0 || (tok->num & mask) != 0) {
|
||||||
|
return ErrAlignNeedsPow2;
|
||||||
|
}
|
||||||
|
if ((~(size_t)0) - mask < out->len) {
|
||||||
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
|
size_t aligned = (out->len + mask) & ~mask;
|
||||||
|
if (ensure_push(out, 1, aligned - out->len) != 0) {
|
||||||
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
|
// TODO: zero-fill?
|
||||||
|
out->len = aligned;
|
||||||
|
return ErrOk;
|
||||||
|
}
|
||||||
|
return ErrInvalidDirective;
|
||||||
|
}
|
20
src/error.h
20
src/error.h
|
@ -18,6 +18,16 @@ typedef enum AsmError_e {
|
||||||
ErrDirectiveNotImplemented,
|
ErrDirectiveNotImplemented,
|
||||||
ErrUnexpectedToken,
|
ErrUnexpectedToken,
|
||||||
ErrTriedNegateNonNumber,
|
ErrTriedNegateNonNumber,
|
||||||
|
ErrInvalidDirective,
|
||||||
|
ErrStringNewLine,
|
||||||
|
ErrDanglingEscape,
|
||||||
|
ErrStringBadHex,
|
||||||
|
ErrBadStringEscape,
|
||||||
|
ErrStringDataNotByte,
|
||||||
|
ErrAlignNeedsNumber,
|
||||||
|
ErrAlignNeedsPow2,
|
||||||
|
ErrNeedCommaOrNewline,
|
||||||
|
ErrNeedsDataLiteral,
|
||||||
} AsmError;
|
} AsmError;
|
||||||
char *ERRORS[] = {
|
char *ERRORS[] = {
|
||||||
"Success",
|
"Success",
|
||||||
|
@ -39,4 +49,14 @@ char *ERRORS[] = {
|
||||||
"Directive is not implemented",
|
"Directive is not implemented",
|
||||||
"Unexpected token",
|
"Unexpected token",
|
||||||
"Negation only works on numbers",
|
"Negation only works on numbers",
|
||||||
|
"Invalid directive",
|
||||||
|
"String contains a raw newline (did you forget to close the quote?)",
|
||||||
|
"Dangling escape in string literal",
|
||||||
|
"Bad hex in string literal",
|
||||||
|
"Bad escape sequence in string literal",
|
||||||
|
"String literals can be used only in .db directive",
|
||||||
|
".align requires a number",
|
||||||
|
".align requires a power of two as an argument",
|
||||||
|
"Need comma or newline after data literal",
|
||||||
|
"Data literal expects a number or a string",
|
||||||
};
|
};
|
||||||
|
|
|
@ -5,8 +5,7 @@ typedef struct InstHtNode_s {
|
||||||
} InstHtNode;
|
} InstHtNode;
|
||||||
typedef InstHtNode *InstHt;
|
typedef InstHtNode *InstHt;
|
||||||
|
|
||||||
static
|
static uint32_t inst_hash(const char *s, size_t len) {
|
||||||
uint32_t inst_hash(const char *s, size_t len) {
|
|
||||||
uint32_t hash = 0;
|
uint32_t hash = 0;
|
||||||
uint32_t mul = 75;
|
uint32_t mul = 75;
|
||||||
for (size_t ii = 0; ii < len; ii += 1) {
|
for (size_t ii = 0; ii < len; ii += 1) {
|
||||||
|
@ -16,8 +15,7 @@ uint32_t inst_hash(const char *s, size_t len) {
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static InstHt build_lookup(void) {
|
||||||
InstHt build_lookup(void) {
|
|
||||||
const size_t size = 256;
|
const size_t size = 256;
|
||||||
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
|
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
|
||||||
if (table == NULL) {
|
if (table == NULL) {
|
||||||
|
@ -42,8 +40,7 @@ InstHt build_lookup(void) {
|
||||||
return table;
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static size_t inst_lookup(InstHt ht, const char *s, size_t len) {
|
||||||
size_t inst_lookup(InstHt ht, const char *s, size_t len) {
|
|
||||||
uint32_t hash = inst_hash(s, len);
|
uint32_t hash = inst_hash(s, len);
|
||||||
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
|
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
|
||||||
for (size_t ii = 0; ii < 2; ii += 1) {
|
for (size_t ii = 0; ii < 2; ii += 1) {
|
||||||
|
|
91
src/hbas.c
91
src/hbas.c
|
@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
@ -35,15 +35,16 @@ SOFTWARE.
|
||||||
//
|
//
|
||||||
#include "hash.c"
|
#include "hash.c"
|
||||||
//
|
//
|
||||||
|
#include "push_int.c"
|
||||||
#include "register.c"
|
#include "register.c"
|
||||||
#include "token.c"
|
#include "token.c"
|
||||||
//
|
//
|
||||||
|
#include "directive.c"
|
||||||
#include "einfo.h"
|
#include "einfo.h"
|
||||||
|
|
||||||
// Print space-separated hex dump of each byte, 16 bytes per line.
|
// Print space-separated hex dump of each byte, 16 bytes per line.
|
||||||
// Can be reversed with `xxd -p -r`.
|
// Can be reversed with `xxd -p -r`.
|
||||||
static
|
static void hex_dump(char *data, size_t len) {
|
||||||
void hex_dump(char *data, size_t len) {
|
|
||||||
char buf[48];
|
char buf[48];
|
||||||
const char *alphabet = "0123456789abcdef";
|
const char *alphabet = "0123456789abcdef";
|
||||||
for (size_t ii = 0; ii < len; ii += 1) {
|
for (size_t ii = 0; ii < len; ii += 1) {
|
||||||
|
@ -61,8 +62,7 @@ void hex_dump(char *data, size_t len) {
|
||||||
|
|
||||||
#define MIN_SIZE 4096
|
#define MIN_SIZE 4096
|
||||||
|
|
||||||
static
|
static int slurp(FILE *fd, ByteVec *out) {
|
||||||
int slurp(FILE *fd, ByteVec *out) {
|
|
||||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||||
size_t bread = 1;
|
size_t bread = 1;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
@ -109,8 +109,7 @@ typedef struct LabelVec_s {
|
||||||
size_t len;
|
size_t len;
|
||||||
} LabelVec;
|
} LabelVec;
|
||||||
|
|
||||||
static
|
static size_t label_lookup(LabelVec *labels, char *name, size_t len) {
|
||||||
size_t label_lookup(LabelVec *labels, char *name, size_t len) {
|
|
||||||
size_t nlabels = labels->len;
|
size_t nlabels = labels->len;
|
||||||
Label *buf = labels->buf;
|
Label *buf = labels->buf;
|
||||||
for (size_t ii = 0; ii < nlabels; ii += 1) {
|
for (size_t ii = 0; ii < nlabels; ii += 1) {
|
||||||
|
@ -122,65 +121,8 @@ size_t label_lookup(LabelVec *labels, char *name, size_t len) {
|
||||||
return INVALID;
|
return INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
||||||
bool check_valid_int(uint64_t val, size_t size, uint8_t sign) {
|
ByteVec *rv, HoleVec *holes) {
|
||||||
// All 64-bit values are considered valid.
|
|
||||||
if (size == 8) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// Unsigned integers must have all upper bits set to zero. To check this,
|
|
||||||
// we shift the value right by the integer size and verify it equals zero.
|
|
||||||
int valid_uint = (val >> (size * 8)) == 0;
|
|
||||||
|
|
||||||
// For signed integers, the sign-extended high bits must match the sign bit.
|
|
||||||
// By shifting right by one less than the total bit size (size * 8 - 1),
|
|
||||||
// we isolate the sign bit and any sign-extended bits. For a value fitting
|
|
||||||
// in the signed range, this operation results in either 0 (for non-negative
|
|
||||||
// values) or -1 (for negative values due to sign extension).
|
|
||||||
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
|
|
||||||
|
|
||||||
// To unify the check for both positive and negative cases, we adjust
|
|
||||||
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
|
|
||||||
// enabling a single check for 0 to validate both cases. This adjustment
|
|
||||||
// simplifies the validation logic, allowing us to use a single condition to
|
|
||||||
// check for proper sign extension or zero extension in the original value.
|
|
||||||
int_shifted += int_shifted != 0;
|
|
||||||
|
|
||||||
// A valid signed integer will have `int_shifted` equal to 0
|
|
||||||
// after adjustment, indicating proper sign extension.
|
|
||||||
int valid_int = int_shifted == 0;
|
|
||||||
|
|
||||||
// Validity bitmask to represents whether the value
|
|
||||||
// fits as signed, unsigned, or both.
|
|
||||||
int validity = valid_int | (valid_uint << 1);
|
|
||||||
|
|
||||||
// If the value's validity doesn't match the `sign` requirements,
|
|
||||||
// we report an overflow.
|
|
||||||
return (validity & sign) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// safety: assumes the buffer has enough place for specified integer size.
|
|
||||||
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
|
|
||||||
// and bit `2` indicates that value accepts an unsigned int.
|
|
||||||
static
|
|
||||||
AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
|
|
||||||
if (!check_valid_int(val, size, sign)) {
|
|
||||||
return ErrImmediateOverflow;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write out the bytes of the integer to the buffer in little-endian order,
|
|
||||||
// starting with the lowest byte first.
|
|
||||||
for (size_t ii = 0; ii < size; ii += 1) {
|
|
||||||
buf[ii] = val & 0xff;
|
|
||||||
val >>= 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ErrOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
|
||||||
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
|
||||||
ByteVec *rv, HoleVec *holes) {
|
|
||||||
const InstDesc *inst;
|
const InstDesc *inst;
|
||||||
const char *type_str;
|
const char *type_str;
|
||||||
size_t nargs;
|
size_t nargs;
|
||||||
|
@ -265,6 +207,8 @@ AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
||||||
return ErrBadNumOverflow;
|
return ErrBadNumOverflow;
|
||||||
}
|
}
|
||||||
num_to_write = (uint64_t)tmp;
|
num_to_write = (uint64_t)tmp;
|
||||||
|
} else if (meta.sign == 2 && (int)num_to_write < 0) {
|
||||||
|
return ErrBadNumOverflow;
|
||||||
}
|
}
|
||||||
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
|
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
|
||||||
meta.size, meta.sign);
|
meta.size, meta.sign);
|
||||||
|
@ -283,6 +227,9 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
|
||||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||||
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
|
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
|
||||||
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
|
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
|
||||||
|
if (rv.buf == NULL || holes.buf == NULL || labels.buf == NULL) {
|
||||||
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
size_t line = 0;
|
size_t line = 0;
|
||||||
size_t line_start = 0;
|
size_t line_start = 0;
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
|
@ -317,13 +264,17 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
|
||||||
}
|
}
|
||||||
if (tok.kind == TokDot) {
|
if (tok.kind == TokDot) {
|
||||||
Token next = token(input, len, pos);
|
Token next = token(input, len, pos);
|
||||||
if (next.kind == TokIdent) {
|
einfo->token = next;
|
||||||
err = ErrDirectiveNotImplemented;
|
if (next.kind != TokIdent) {
|
||||||
goto end;
|
|
||||||
} else {
|
|
||||||
err = ErrNeedDirectiveAfterDot;
|
err = ErrNeedDirectiveAfterDot;
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
err = assemble_directive(input, len, &rv, &next);
|
||||||
|
pos = next.start + next.len;
|
||||||
|
einfo->token = next;
|
||||||
|
if (err != ErrOk) {
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (tok.kind == TokIdent) {
|
if (tok.kind == TokIdent) {
|
||||||
|
|
55
src/push_int.c
Normal file
55
src/push_int.c
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
|
||||||
|
static bool check_valid_int(uint64_t val, size_t size, uint8_t sign) {
|
||||||
|
// All 64-bit values are considered valid.
|
||||||
|
if (size == 8) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Unsigned integers must have all upper bits set to zero. To check this,
|
||||||
|
// we shift the value right by the integer size and verify it equals zero.
|
||||||
|
int valid_uint = (val >> (size * 8)) == 0;
|
||||||
|
|
||||||
|
// For signed integers, the sign-extended high bits must match the sign bit.
|
||||||
|
// By shifting right by one less than the total bit size (size * 8 - 1),
|
||||||
|
// we isolate the sign bit and any sign-extended bits. For a value fitting
|
||||||
|
// in the signed range, this operation results in either 0 (for non-negative
|
||||||
|
// values) or -1 (for negative values due to sign extension).
|
||||||
|
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
|
||||||
|
|
||||||
|
// To unify the check for both positive and negative cases, we adjust
|
||||||
|
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
|
||||||
|
// enabling a single check for 0 to validate both cases. This adjustment
|
||||||
|
// simplifies the validation logic, allowing us to use a single condition to
|
||||||
|
// check for proper sign extension or zero extension in the original value.
|
||||||
|
int_shifted += int_shifted != 0;
|
||||||
|
|
||||||
|
// A valid signed integer will have `int_shifted` equal to 0
|
||||||
|
// after adjustment, indicating proper sign extension.
|
||||||
|
int valid_int = int_shifted == 0;
|
||||||
|
|
||||||
|
// Validity bitmask to represents whether the value
|
||||||
|
// fits as signed, unsigned, or both.
|
||||||
|
int validity = valid_int | (valid_uint << 1);
|
||||||
|
|
||||||
|
// If the value's validity doesn't match the `sign` requirements,
|
||||||
|
// we report an overflow.
|
||||||
|
return (validity & sign) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// safety: assumes the buffer has enough place for specified integer size.
|
||||||
|
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
|
||||||
|
// and bit `2` indicates that value accepts an unsigned int.
|
||||||
|
static AsmError push_int_le(char *buf, uint64_t val, size_t size,
|
||||||
|
uint8_t sign) {
|
||||||
|
if (!check_valid_int(val, size, sign)) {
|
||||||
|
return ErrImmediateOverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write out the bytes of the integer to the buffer in little-endian order,
|
||||||
|
// starting with the lowest byte first.
|
||||||
|
for (size_t ii = 0; ii < size; ii += 1) {
|
||||||
|
buf[ii] = val & 0xff;
|
||||||
|
val >>= 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
return ErrOk;
|
||||||
|
}
|
|
@ -1,5 +1,4 @@
|
||||||
static
|
static int parse_register(char *name, size_t len) {
|
||||||
int parse_register(char *name, size_t len) {
|
|
||||||
if (name[0] != 'r') {
|
if (name[0] != 'r') {
|
||||||
return 256; // Register name should start with 'r'
|
return 256; // Register name should start with 'r'
|
||||||
}
|
}
|
||||||
|
|
71
src/token.c
71
src/token.c
|
@ -10,6 +10,7 @@ typedef enum TokenKind_e {
|
||||||
TokColon = ':',
|
TokColon = ':',
|
||||||
TokComment = ';',
|
TokComment = ';',
|
||||||
TokNewline = 'n',
|
TokNewline = 'n',
|
||||||
|
TokString = 's',
|
||||||
} TokenKind;
|
} TokenKind;
|
||||||
typedef struct Token_s {
|
typedef struct Token_s {
|
||||||
TokenKind kind;
|
TokenKind kind;
|
||||||
|
@ -18,8 +19,7 @@ typedef struct Token_s {
|
||||||
uint64_t num;
|
uint64_t num;
|
||||||
} Token;
|
} Token;
|
||||||
|
|
||||||
static
|
static Token token_ident(char *input, size_t len, size_t pos) {
|
||||||
Token token_ident(char *input, size_t len, size_t pos) {
|
|
||||||
size_t start = pos;
|
size_t start = pos;
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
char chr = input[pos];
|
char chr = input[pos];
|
||||||
|
@ -34,8 +34,7 @@ Token token_ident(char *input, size_t len, size_t pos) {
|
||||||
return (Token){TokIdent, start, pos - start, 0};
|
return (Token){TokIdent, start, pos - start, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static Token token_number(char *input, size_t len, size_t pos) {
|
||||||
Token token_number(char *input, size_t len, size_t pos) {
|
|
||||||
char *ptr = &input[pos];
|
char *ptr = &input[pos];
|
||||||
char next = '\0';
|
char next = '\0';
|
||||||
size_t start = pos;
|
size_t start = pos;
|
||||||
|
@ -111,8 +110,65 @@ Token token_number(char *input, size_t len, size_t pos) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static char get_hex(char chr) {
|
||||||
Token token(char *input, size_t len, size_t pos) {
|
char chru = chr & ~0x20;
|
||||||
|
if (chr >= '0' && chr <= '9') {
|
||||||
|
return chr - '0';
|
||||||
|
}
|
||||||
|
if (chru >= 'A' && chru <= 'F') {
|
||||||
|
return chru - ('A' - 10);
|
||||||
|
}
|
||||||
|
return 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Token token_string(char *input, size_t len, size_t pos) {
|
||||||
|
size_t start = pos;
|
||||||
|
size_t ndata = 0;
|
||||||
|
for (pos += 1; pos < len; pos += 1) {
|
||||||
|
if (input[pos] == '"') {
|
||||||
|
return (Token){TokString, start, pos + 1 - start, ndata};
|
||||||
|
}
|
||||||
|
if (input[pos] == '\n' || input[pos] == '\r') {
|
||||||
|
return (Token){TokInvalid, start, pos + 1 - start,
|
||||||
|
ErrStringNewLine};
|
||||||
|
}
|
||||||
|
if (input[pos] == '\\') {
|
||||||
|
if (pos + 1 >= len) {
|
||||||
|
return (Token){TokInvalid, start, pos - start,
|
||||||
|
ErrDanglingEscape};
|
||||||
|
}
|
||||||
|
pos += 1;
|
||||||
|
switch (input[pos]) {
|
||||||
|
case '\\':
|
||||||
|
case '"':
|
||||||
|
case 'r':
|
||||||
|
case 'n':
|
||||||
|
case '0':
|
||||||
|
case 't':
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
if (pos + 2 >= len) {
|
||||||
|
return (Token){TokInvalid, start, pos - start,
|
||||||
|
ErrDanglingEscape};
|
||||||
|
}
|
||||||
|
if (get_hex(input[pos + 1]) > 15 ||
|
||||||
|
get_hex(input[pos + 2]) > 15) {
|
||||||
|
return (Token){TokInvalid, start, pos - start,
|
||||||
|
ErrStringBadHex};
|
||||||
|
}
|
||||||
|
pos += 2;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return (Token){TokInvalid, start, pos - start,
|
||||||
|
ErrBadStringEscape};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ndata += 1;
|
||||||
|
}
|
||||||
|
return (Token){TokString, start, pos - start, ndata};
|
||||||
|
}
|
||||||
|
|
||||||
|
static Token token(char *input, size_t len, size_t pos) {
|
||||||
char chr, chru;
|
char chr, chru;
|
||||||
char *ptr = &input[pos];
|
char *ptr = &input[pos];
|
||||||
while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) {
|
while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) {
|
||||||
|
@ -142,6 +198,9 @@ Token token(char *input, size_t len, size_t pos) {
|
||||||
}
|
}
|
||||||
return (Token){TokComment, pos, clen, 0};
|
return (Token){TokComment, pos, clen, 0};
|
||||||
}
|
}
|
||||||
|
if (chr == '"') {
|
||||||
|
return token_string(input, len, pos);
|
||||||
|
}
|
||||||
if (chr >= '0' && chr <= '9') {
|
if (chr >= '0' && chr <= '9') {
|
||||||
return token_number(input, len, pos);
|
return token_number(input, len, pos);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue