Compare commits

..

No commits in common. "main" and "int_comments_label_deref" have entirely different histories.

16 changed files with 625 additions and 1061 deletions

View file

@ -1,19 +0,0 @@
name: Cee-lang CI
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: make
run: make
- name: example
run: make example

View file

@ -1,36 +1,18 @@
CC = gcc CC = gcc
CFLAGS_EXTRA = CFLAGS_EXTRA =
CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3
CLANG_FORMAT_STYLE = '{ BasedOnStyle: Google, IndentWidth: 4 }'
.PHONY: clean hbas example format check-format .PHONY: clean
hbas: build/hbas hbas: src/hbas.c
example: build/example.hbf
hello: build/hello.hbf
format:
clang-format --style=${CLANG_FORMAT_STYLE} -i src/*
check-format:
clang-format --style=${CLANG_FORMAT_STYLE} -i --dry-run -Werror src/*
build:
mkdir -p build
build/hbas: build $(wildcard src/*.h src/*.c)
${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas ${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas
build/example.hbf: build build/hbas examples/example.S example: hbas example.S
./build/hbas < examples/example.S > build/example.hbf ./hbas < example.S > example
xxd build/example.hbf xxd example
build/hello.hbf: build build/hbas examples/hello.S
./build/hbas < examples/hello.S > build/hello.hbf
xxd build/hello.hbf
clean: clean:
rm -rf build rm -f example hbas
all: all:
hbas hbas

View file

@ -2,25 +2,16 @@
; https://git.ablecorp.us/AbleOS/holey-bytes/src/branch/trunk/spec.md ; https://git.ablecorp.us/AbleOS/holey-bytes/src/branch/trunk/spec.md
; TODO: ; TODO:
; .origin 0x1000 ; .origin 0x1000
; 'c' char literals ; .align 0x100
; .db "hello"
; .struct ; .struct
start: start:
jmp end jmp end
un un
; .db "hello world\n"
add16 r1, r2, r255 add16 r1, r2, r255
addi8 r1, r2, -128 addi8 r1, r2, -128
lra r1, r0, start lra r1, r0, start
jmp start jmp start
end: end:
tx tx
hello_string:
.db "Hello, w\x6frld\n", 0
hello_string_end:
.db "hi"
; TODO .db 'H', 'e', 'l', 'l', 'o', '\0'
.align 2
.dw 0x4546
.align 4
.dd 0x4748494a
.align 8
.dq 0x5051525354555657

View file

@ -1,103 +0,0 @@
jmp entry
puts:
; Write string to console
; r2: [IN] *const u8 String pointer
; r3: [IN] usize String length
li8 r1, 0x1 ; Write syscall
brc r2, r3, 2 ; Copy parameters
li8 r2, 0x1 ; STDOUT
eca
jal r0, r31, 0
gets:
; Read string until end of buffer or LF
; r2: [IN] *mut u8 Buffer
; r3: [IN] usize Buffer length
; Register allocations:
; r33: *mut u8 Buffer end
; r34: u8 Immediate char
; r35: u8 Const [0x0A = LF]
li8 r35, 0x0A
add64 r33, r2, r3
; Setup syscall
li8 r2, 0x1 ; Stdin
cp r3, r2
li8 r4, 0x1 ; Read one char
jeq r3, r33, end
loop:
li8 r1, 0x1 ; Read syscall
eca
addi64 r3, r3, 1
ld r34, r3, 0, 1
jeq r34, r35, end
jne r3, r33, loop
end:
; Set copied amount
sub64 r1, r33, r3
addi64 r1, r1, -1
jal r0, r31, 0
alloc_pages:
; Allocate pages
; r1: [OUT] *mut u8 Pointer to page
; r2: [IN] u16 Page count
muli16 r3, r2, 4096 ; page count
li8 r1, 0x9 ; mmap syscall
li8 r2, 0x0 ; no address set, kernel chosen
li8 r4, 0x2 ; PROT_WRITE
li8 r5, 0x20 ; MAP_ANONYMOUS
li64 r6, -1 ; Doesn't map file
li8 r7, 0x0 ; Doesn't map file
eca
jal r0, r31, 0
entry:
; Program entrypoint
; Register allocations:
; r32: *mut u8 Buffer
; r36: usize Read buffer length
; Allocate one page (4096 KiB)
li8 r2, 1
jal r31, r0, alloc_pages
cp r32, r1
; Print message
lra16 r2, r0, enter_your_name
li8 r3, 17
jal r31, r0, puts
; Read name
cp r2, r32
li16 r3, 4096
jal r31, r0, gets
cp r36, r1
; Print your name is
lra16 r2, r0, your_name_is
li8 r3, 15
jal r31, r0, puts
; And now print the name
cp r2, r32
cp r3, r36
jal r31, r0, puts
tx
enter_your_name:
.db "Enter your name: "
your_name_is:
.db "\nYour name is: "

View file

@ -1,8 +0,0 @@
li8 r1, 1 ; 1->sys::write
li8 r2, 1 ; fildes=stdout
lra16 r3, r0, hello_string ; buf=hello_string
li8 r4, 0x11 ; nbyte=0x11
eca ; sys::write(stdout, hello_string, 0x11)
tx
hello_string:
.db "Hello, AbleCorp!\n"

View file

@ -1,11 +1,11 @@
typedef struct ArgMeta_s { typedef struct ArgMeta_s {
char chr; char chr;
uint8_t size; uint8_t size;
// This is a bitset of acceptable overflow states, // This is a bitset of acceptable overflow states,
// where accept signed = 1, accept unsigned = 2. // where accept signed = 1, accept unsigned = 2.
// 1 -> signed, 2 -> unsigned, 3 -> whatever // 1 -> signed, 2 -> unsigned, 3 -> whatever
uint8_t sign; uint8_t sign;
uint8_t rel; uint8_t rel;
} ArgMeta; } ArgMeta;
const ArgMeta ARGS[] = { const ArgMeta ARGS[] = {
{'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0}, {'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0},
@ -15,31 +15,31 @@ const ArgMeta ARGS[] = {
}; };
typedef enum Operands_e { typedef enum Operands_e {
Empty = 0, Empty = 0,
R, R,
RR, RR,
RRR, RRR,
RRRR, RRRR,
Rx8, Rx8,
Rx16, Rx16,
Rx32, Rx32,
Rx64, Rx64,
RRx8, RRx8,
RRx16, RRx16,
RRx32, RRx32,
RRx64, RRx64,
RRs32, RRs32,
RRs64, RRs64,
RRu8, RRu8,
RRu16, RRu16,
RRu64, RRu64,
r16, r16,
r32, r32,
RRr16, RRr16,
RRr32, RRr32,
RRr16u16, RRr16u16,
RRr32u16, RRr32u16,
RRu64u16, RRu64u16,
} Operands; } Operands;
// R -> register, // R -> register,
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, // 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
@ -55,13 +55,12 @@ const char *TYPE_STR[] = {
}; };
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
ArgMeta arg_meta(char arg) {
static ArgMeta arg_meta(char arg) { for (size_t ii = 0; ii < NARGS; ii += 1) {
for (size_t ii = 0; ii < NARGS; ii += 1) { ArgMeta meta = ARGS[ii];
ArgMeta meta = ARGS[ii]; if (meta.chr == arg) {
if (meta.chr == arg) { return meta;
return meta;
}
} }
return ARGS[NARGS - 1]; }
return ARGS[NARGS - 1];
} }

View file

@ -1,29 +1,29 @@
const size_t INVALID = ~(size_t)0; const size_t INVALID = ~(size_t)0;
typedef struct ByteVec_s { typedef struct ByteVec_s {
char *buf; char *buf;
size_t cap; size_t cap;
size_t len; size_t len;
} ByteVec; } ByteVec;
static AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) { AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
if (vec->len + extra < vec->len) { if (vec->len + extra < vec->len) {
return ErrOutOfMemory; return ErrOutOfMemory;
}
while (vec->len + extra > vec->cap) {
if ((~(size_t)0) / 2 < vec->cap) {
return ErrOutOfMemory;
} }
while (vec->len + extra > vec->cap) { vec->cap *= 2;
if ((~(size_t)0) / 2 < vec->cap) { // multiply overflow
return ErrOutOfMemory; if ((~(size_t)0) / el_size < vec->cap) {
} return ErrOutOfMemory;
vec->cap *= 2;
// multiply overflow
if ((~(size_t)0) / el_size < vec->cap) {
return ErrOutOfMemory;
}
vec->buf = realloc(vec->buf, el_size * vec->cap);
if (vec->buf == NULL) {
vec->cap = 0;
return ErrOutOfMemory;
}
} }
return 0; vec->buf = realloc(vec->buf, el_size * vec->cap);
if (vec->buf == NULL) {
vec->cap = 0;
return ErrOutOfMemory;
}
}
return 0;
} }

View file

@ -1,138 +0,0 @@
AsmError push_string(char *buf, char *input, size_t len) {
size_t ndata = 0;
for (size_t pos = 0; pos < len; pos += 1) {
char chr = input[pos];
if (chr == '\\') {
if (pos + 1 >= len) {
return ErrDanglingEscape;
}
pos += 1;
chr = input[pos];
switch (chr) {
case '\\':
chr = '\\';
break;
case '"':
chr = '"';
break;
case 'r':
chr = '\r';
break;
case 'n':
chr = '\n';
break;
case '0':
chr = '\0';
break;
case 't':
chr = '\t';
break;
case 'x':
if (pos + 2 >= len) {
return ErrDanglingEscape;
}
char high = get_hex(input[pos + 1]);
char low = get_hex(input[pos + 2]);
if (high > 15 || low > 15) {
return ErrStringBadHex;
}
pos += 2;
chr = high << 4 | low;
break;
default:
return ErrBadStringEscape;
}
}
buf[ndata] = chr;
ndata += 1;
}
return ErrOk;
}
static AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok,
size_t word_size) {
while (1) {
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNumber) {
if (ensure_push(out, 1, word_size) != 0) {
return ErrOutOfMemory;
}
push_int_le(&out->buf[out->len], tok->num, word_size, 3);
out->len += word_size;
} else if (tok->kind == TokString) {
if (word_size != 1) {
return ErrStringDataNotByte;
}
if (ensure_push(out, 1, tok->num) != 0) {
return ErrOutOfMemory;
}
char *str = &input[tok->start + 1];
AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
if (err != ErrOk) {
return err;
}
out->len += tok->num;
} else {
return ErrNeedsDataLiteral;
}
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNewline || tok->kind == TokEOF) {
return ErrOk;
}
if (tok->kind == TokComma) {
continue;
}
return ErrNeedCommaOrNewline;
}
}
AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
if (tok->len < 2) {
return ErrInvalidDirective;
}
size_t pos = tok->start;
char byte0 = input[pos];
char byte1 = input[pos + 1];
if (tok->len == 2 && byte0 == 'd') {
size_t word_size;
switch (byte1) {
case 'b':
word_size = 1;
break;
case 'w':
word_size = 2;
break;
case 'd':
word_size = 4;
break;
case 'q':
word_size = 8;
break;
default:
return ErrInvalidDirective;
}
return push_data(input, len, out, tok, word_size);
}
if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
*tok = token(input, len, tok->start + tok->len);
if (tok->kind != TokNumber) {
return ErrAlignNeedsNumber;
}
size_t mask = tok->num - 1;
if (tok->num == 0 || (tok->num & mask) != 0) {
return ErrAlignNeedsPow2;
}
if ((~(size_t)0) - mask < out->len) {
return ErrOutOfMemory;
}
size_t aligned = (out->len + mask) & ~mask;
if (ensure_push(out, 1, aligned - out->len) != 0) {
return ErrOutOfMemory;
}
// TODO: zero-fill?
out->len = aligned;
return ErrOk;
}
return ErrInvalidDirective;
}

View file

@ -1,5 +1,5 @@
typedef struct EInfo_s { typedef struct EInfo_s {
Token token; Token token;
size_t line; size_t line;
size_t line_start; size_t line_start;
} EInfo; } EInfo;

View file

@ -1,33 +1,22 @@
typedef enum AsmError_e { typedef enum AsmError_e {
ErrOk = 0, ErrOk = 0,
ErrBadRegister, ErrBadRegister,
ErrImmediateOverflow, ErrImmediateOverflow,
ErrInvalidToken, ErrInvalidToken,
ErrBadArgumentMeta, ErrBadArgumentMeta,
ErrNeedCommaAfterArgument, ErrNeedCommaAfterArgument,
ErrLabelImmediate, ErrLabelImmediate,
ErrNumberImmediate, ErrNumberImmediate,
ErrBadNumOverflow, ErrBadNumOverflow,
ErrBadNumDigit, ErrBadNumDigit,
ErrBadNumNoDigit, ErrBadNumNoDigit,
ErrLabelAfterLabel, ErrLabelAfterLabel,
ErrOutOfMemory, ErrOutOfMemory,
ErrDuplicateLabel, ErrDuplicateLabel,
ErrTrailingLine, ErrTrailingLine,
ErrNeedDirectiveAfterDot, ErrNeedDirectiveAfterDot,
ErrDirectiveNotImplemented, ErrDirectiveNotImplemented,
ErrUnexpectedToken, ErrUnexpectedToken,
ErrTriedNegateNonNumber,
ErrInvalidDirective,
ErrStringNewLine,
ErrDanglingEscape,
ErrStringBadHex,
ErrBadStringEscape,
ErrStringDataNotByte,
ErrAlignNeedsNumber,
ErrAlignNeedsPow2,
ErrNeedCommaOrNewline,
ErrNeedsDataLiteral,
} AsmError; } AsmError;
char *ERRORS[] = { char *ERRORS[] = {
"Success", "Success",
@ -48,15 +37,4 @@ char *ERRORS[] = {
"Expected directive after dot", "Expected directive after dot",
"Directive is not implemented", "Directive is not implemented",
"Unexpected token", "Unexpected token",
"Negation only works on numbers",
"Invalid directive",
"String contains a raw newline (did you forget to close the quote?)",
"Dangling escape in string literal",
"Bad hex in string literal",
"Bad escape sequence in string literal",
"String literals can be used only in .db directive",
".align requires a number",
".align requires a power of two as an argument",
"Need comma or newline after data literal",
"Data literal expects a number or a string",
}; };

View file

@ -1,57 +1,57 @@
// Instruction Hash table, for faster lookups // Instruction Hash table, for faster lookups
typedef struct InstHtNode_s { typedef struct InstHtNode_s {
uint8_t index1; uint8_t index1;
uint8_t index2; uint8_t index2;
} InstHtNode; } InstHtNode;
typedef InstHtNode *InstHt; typedef InstHtNode *InstHt;
static uint32_t inst_hash(const char *s, size_t len) { uint32_t inst_hash(const char *s, size_t len) {
uint32_t hash = 0; uint32_t hash = 0;
uint32_t mul = 75; uint32_t mul = 75;
for (size_t ii = 0; ii < len; ii += 1) { for (size_t ii = 0; ii < len; ii += 1) {
hash ^= s[ii] * mul; hash ^= s[ii] * mul;
hash *= mul; hash *= mul;
} }
return hash; return hash;
} }
static InstHt build_lookup(void) { InstHt build_lookup(void) {
const size_t size = 256; const size_t size = 256;
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
if (table == NULL) { if (table == NULL) {
return table;
}
for (size_t ii = 0; ii < size; ii += 1) {
table[ii] = (InstHtNode){0xff, 0xff};
}
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *mnemonic = INST[ii].mnemonic;
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
InstHtNode *node = &table[hash & 0xff];
if (node->index1 == 0xff) {
node->index1 = ii;
} else if (node->index2 == 0xff) {
node->index2 = ii;
} else {
fprintf(stderr, "more than 1 collision in hash table\n");
exit(1);
}
}
return table; return table;
}
for (size_t ii = 0; ii < size; ii += 1) {
table[ii] = (InstHtNode){0xff, 0xff};
}
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *mnemonic = INST[ii].mnemonic;
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
InstHtNode *node = &table[hash & 0xff];
if (node->index1 == 0xff) {
node->index1 = ii;
} else if (node->index2 == 0xff) {
node->index2 = ii;
} else {
fprintf(stderr, "more than 1 collision in hash table\n");
exit(1);
}
}
return table;
} }
static size_t inst_lookup(InstHt ht, const char *s, size_t len) { size_t inst_lookup(InstHt ht, const char *s, size_t len) {
uint32_t hash = inst_hash(s, len); uint32_t hash = inst_hash(s, len);
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
for (size_t ii = 0; ii < 2; ii += 1) { for (size_t ii = 0; ii < 2; ii += 1) {
size_t idx = (size_t)node[ii]; size_t idx = (size_t)node[ii];
if (idx == 0xff) { if (idx == 0xff) {
break; break;
}
const char *mnemonic = INST[idx].mnemonic;
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
return idx;
}
} }
return INVALID; const char *mnemonic = INST[idx].mnemonic;
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
return idx;
}
}
return INVALID;
} }

View file

@ -20,7 +20,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.
*/ */
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -35,359 +34,382 @@ SOFTWARE.
// //
#include "hash.c" #include "hash.c"
// //
#include "push_int.c"
#include "register.c" #include "register.c"
#include "token.c" #include "token.c"
// //
#include "directive.c"
#include "einfo.h" #include "einfo.h"
// Print space-separated hex dump of each byte, 16 bytes per line. void hd(char *data, size_t len) {
// Can be reversed with `xxd -p -r`. for (size_t ii = 0; ii < len; ii += 1) {
static void hex_dump(char *data, size_t len) { if (ii > 0 && (ii & 15) == 0) {
char buf[48]; printf("\n");
const char *alphabet = "0123456789abcdef";
for (size_t ii = 0; ii < len; ii += 1) {
size_t val = (uint8_t)data[ii];
size_t pos = (ii & 0x0f) * 3;
buf[pos] = alphabet[val >> 4];
buf[pos + 1] = alphabet[val & 0x0f];
buf[pos + 2] = ' ';
if (((ii & 0x0f) == 0x0f) || ii + 1 == len) {
buf[pos + 2] = '\n';
fwrite(&buf[0], 1, pos + 3, stdout);
}
} }
printf("%02x", (uint8_t)data[ii]);
}
printf("\n");
} }
#define MIN_SIZE 4096 #define MIN_SIZE 4096
static int slurp(FILE *fd, ByteVec *out) { int slurp(FILE *fd, ByteVec *out) {
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
size_t bread = 1; size_t bread = 1;
int err = 0; int err = 0;
if (rv.buf == NULL) { if (rv.buf == NULL) {
rv.cap = 0; rv.cap = 0;
err = ErrOutOfMemory; err = ErrOutOfMemory;
bread = 0; bread = 0;
}
while (bread > 0) {
if (ensure_push(&rv, 1, 1) != 0) {
err = ErrOutOfMemory;
break;
} }
while (bread > 0) { bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
if (ensure_push(&rv, 1, 1) != 0) { rv.len += bread;
err = ErrOutOfMemory; }
break; *out = rv;
} if (err == 0) {
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); err = ferror(fd);
rv.len += bread; }
} return err;
*out = rv;
if (err == 0) {
err = ferror(fd);
}
return err;
} }
typedef struct Hole_s { typedef struct Hole_s {
size_t location; size_t location;
size_t origin; size_t origin;
char *str; char *str;
size_t len; size_t len;
size_t size; size_t size;
} Hole; } Hole;
typedef struct HoleVec_s { typedef struct HoleVec_s {
Hole *buf; Hole *buf;
size_t cap; size_t cap;
size_t len; size_t len;
} HoleVec; } HoleVec;
typedef struct Label_s { typedef struct Label_s {
size_t location; size_t location;
char *str; char *str;
size_t len; size_t len;
} Label; } Label;
typedef struct LabelVec_s { typedef struct LabelVec_s {
Label *buf; Label *buf;
size_t cap; size_t cap;
size_t len; size_t len;
} LabelVec; } LabelVec;
static size_t label_lookup(LabelVec *labels, char *name, size_t len) { size_t label_lookup(LabelVec *labels, char *name, size_t len) {
size_t nlabels = labels->len; size_t nlabels = labels->len;
Label *buf = labels->buf; Label *buf = labels->buf;
for (size_t ii = 0; ii < nlabels; ii += 1) { for (size_t ii = 0; ii < nlabels; ii += 1) {
if (len == buf->len && strncmp(buf->str, name, len) == 0) { if (len == buf->len && strncmp(buf->str, name, len) == 0) {
return ii; return ii;
}
buf += 1;
} }
return INVALID; buf += 1;
}
return INVALID;
} }
static AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, // safety: assumes the buffer has enough place for specified integer size.
ByteVec *rv, HoleVec *holes) { // `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
const InstDesc *inst; // and bit `2` indicates that value accepts an unsigned int.
const char *type_str; AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
size_t nargs; // Unsigned integers must have all upper bits set to zero. To check this,
size_t size; // we shift the value right by the integer size and verify it equals zero.
size_t idx = inst_lookup(ht, &input[tok->start], tok->len); int valid_uint = (val >> (size * 8)) == 0;
size_t inst_start = rv->len;
if (idx == INVALID) { // For signed integers, the sign-extended high bits must match the sign bit.
return ErrInvalidToken; // By shifting right by one less than the total bit size (size * 8 - 1),
// we isolate the sign bit and any sign-extended bits. For a value fitting
// in the signed range, this operation results in either 0 (for non-negative
// values) or -1 (for negative values due to sign extension).
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
// To unify the check for both positive and negative cases, we adjust
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
// enabling a single check for 0 to validate both cases. This adjustment
// simplifies the validation logic, allowing us to use a single condition to
// check for proper sign extension or zero extension in the original value.
int_shifted += int_shifted != 0;
// A valid signed integer will have `int_shifted` equal to 0
// after adjustment, indicating proper sign extension.
int valid_int = int_shifted == 0;
// Validity bitmask to represents whether the value
// fits as signed, unsigned, or both.
int validity = valid_int | (valid_uint << 1);
// If the value's validity doesn't match the `sign` requirements,
// we report an overflow.
if ((validity & sign) == 0) {
return ErrImmediateOverflow;
}
// Write out the bytes of the integer to the buffer in little-endian order,
// starting with the lowest byte first.
for (size_t ii = 0; ii < size; ii += 1) {
buf[ii] = val & 0xff;
val >>= 8;
}
return ErrOk;
}
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
ByteVec *rv, HoleVec *holes) {
const InstDesc *inst;
const char *type_str;
size_t nargs;
size_t size;
size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
size_t inst_start = rv->len;
if (idx == INVALID) {
return ErrInvalidToken;
}
inst = &INST[idx];
type_str = TYPE_STR[inst->type];
nargs = strlen(type_str);
size = 1;
for (size_t ii = 0; ii < nargs; ii += 1) {
char chr = type_str[ii];
ArgMeta meta = arg_meta(chr);
if (meta.chr == 0) {
return ErrBadArgumentMeta;
} }
inst = &INST[idx]; size += meta.size;
type_str = TYPE_STR[inst->type]; }
nargs = strlen(type_str); if (ensure_push(rv, 1, size) != 0) {
size = 1; return ErrOutOfMemory;
for (size_t ii = 0; ii < nargs; ii += 1) { }
char chr = type_str[ii]; rv->buf[rv->len] = inst->opcode;
ArgMeta meta = arg_meta(chr); rv->len += 1;
if (meta.chr == 0) { for (size_t ii = 0; ii < nargs; ii += 1) {
return ErrBadArgumentMeta; if (ii > 0) {
} *tok = token(input, len, tok->start + tok->len);
size += meta.size; if (tok->kind != TokComma) {
return ErrNeedCommaAfterArgument;
}
} }
if (ensure_push(rv, 1, size) != 0) { char chr = type_str[ii];
return ErrOutOfMemory; ArgMeta meta = arg_meta(chr);
uint64_t is_negative = 0;
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNeg) {
*tok = token(input, len, tok->start + tok->len);
if (tok->kind != TokNumber) {
return ErrTriedNegateNonNumber;
}
is_negative -= 1;
} }
rv->buf[rv->len] = inst->opcode; if (chr == 'R') {
rv->len += 1; int reg = parse_register(&input[tok->start], tok->len);
for (size_t ii = 0; ii < nargs; ii += 1) { if (reg > 255) {
if (ii > 0) { return ErrBadRegister;
*tok = token(input, len, tok->start + tok->len); }
if (tok->kind != TokComma) { rv->buf[rv->len] = (char)(reg & 0xff);
return ErrNeedCommaAfterArgument; rv->len += 1;
} } else {
} uint64_t num_to_write;
char chr = type_str[ii]; if (meta.rel == 1 || meta.size == 8) {
ArgMeta meta = arg_meta(chr); if (tok->kind == TokIdent) {
uint64_t is_negative = 0; if (ensure_push((ByteVec*)holes, sizeof(Hole), 1) != 0) {
*tok = token(input, len, tok->start + tok->len); return ErrOutOfMemory;
if (tok->kind == TokNeg) { }
*tok = token(input, len, tok->start + tok->len); holes->buf[holes->len] = (Hole) {
if (tok->kind != TokNumber) { .location = rv->len,
return ErrTriedNegateNonNumber; .origin = inst_start,
} .str = &input[tok->start],
is_negative -= 1; .len = tok->len,
} .size = (size_t)meta.size,
if (chr == 'R') { };
int reg = parse_register(&input[tok->start], tok->len); holes->len += 1;
if (reg > 255) { num_to_write = 0;
return ErrBadRegister; } else if (tok->kind == TokNumber) {
} num_to_write = tok->num;
rv->buf[rv->len] = (char)(reg & 0xff);
rv->len += 1;
} else { } else {
uint64_t num_to_write; return ErrLabelImmediate;
if (meta.rel == 1 || meta.size == 8) {
if (tok->kind == TokIdent) {
if (ensure_push((ByteVec *)holes, sizeof(Hole), 1) != 0) {
return ErrOutOfMemory;
}
holes->buf[holes->len] = (Hole){
.location = rv->len,
.origin = inst_start,
.str = &input[tok->start],
.len = tok->len,
.size = (size_t)meta.size,
};
holes->len += 1;
num_to_write = 0;
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrLabelImmediate;
}
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrNumberImmediate;
}
// num_to_write = num_to_write ^ is_negative - is_negative;
if (is_negative) {
int64_t tmp = -(int64_t)num_to_write;
if (tmp > 0) {
return ErrBadNumOverflow;
}
num_to_write = (uint64_t)tmp;
} else if (meta.sign == 2 && (int64_t)num_to_write < 0) {
return ErrBadNumOverflow;
}
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
meta.size, meta.sign);
if (err != ErrOk) {
return err;
}
rv->len += meta.size;
} }
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrNumberImmediate;
}
// num_to_write = num_to_write ^ is_negative - is_negative;
if (is_negative) {
int64_t tmp = -(int64_t)num_to_write;
if (tmp > 0) {
return ErrBadNumOverflow;
}
num_to_write = (uint64_t)tmp;
}
AsmError err =
push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign);
if (err != ErrOk) {
return err;
}
rv->len += meta.size;
} }
}
return ErrOk; return ErrOk;
} }
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) {
EInfo *einfo) { ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; size_t line = 0;
if (rv.buf == NULL || holes.buf == NULL || labels.buf == NULL) { size_t line_start = 0;
return ErrOutOfMemory; size_t pos = 0;
} // init=0, label=1, instruction=2, comment=3, newline -> 0
size_t line = 0; size_t line_state = 0;
size_t line_start = 0; AsmError err = ErrOk;
size_t pos = 0;
// init=0, label=1, instruction=2, comment=3, newline -> 0
size_t line_state = 0;
AsmError err = ErrOk;
while (1) { while (1) {
Token tok = token(input, len, pos); Token tok = token(input, len, pos);
einfo->token = tok; einfo->token = tok;
pos = tok.start + tok.len; pos = tok.start + tok.len;
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) { if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
if (tok.num) { if (tok.num) {
err = (AsmError)tok.num; err = (AsmError)tok.num;
} else { } else {
err = ErrInvalidToken; err = ErrInvalidToken;
} }
break; break;
} }
if (tok.kind == TokEOF) { if (tok.kind == TokEOF) {
break; break;
} }
if (tok.kind == TokComment) { if (tok.kind == TokComment) {
line_state = 3; line_state = 3;
continue; continue;
} }
if (tok.kind == TokNewline) { if (tok.kind == TokNewline) {
line += 1; line += 1;
line_start = tok.start + tok.len; line_start = tok.start + tok.len;
line_state = 0; line_state = 0;
continue; continue;
} }
if (tok.kind == TokDot) { if (tok.kind == TokDot) {
Token next = token(input, len, pos); Token next = token(input, len, pos);
einfo->token = next; if (next.kind == TokIdent) {
if (next.kind != TokIdent) { err = ErrDirectiveNotImplemented;
err = ErrNeedDirectiveAfterDot;
goto end;
}
err = assemble_directive(input, len, &rv, &next);
pos = next.start + next.len;
einfo->token = next;
if (err != ErrOk) {
goto end;
}
continue;
}
if (tok.kind == TokIdent) {
Token next = token(input, len, pos);
if (next.kind == TokColon) {
// Label
pos = next.start + next.len;
if (line_state >= 1) {
err = ErrLabelAfterLabel;
einfo->token = next;
goto end;
}
line_state = 1;
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
err = ErrOutOfMemory;
goto end;
}
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
if (idx != INVALID) {
err = ErrDuplicateLabel;
goto end;
}
labels.buf[labels.len] = (Label){
.location = rv.len,
.str = &input[tok.start],
.len = tok.len,
};
labels.len += 1;
} else {
// Instruction
if (line_state >= 2) {
err = ErrTrailingLine;
goto end;
}
line_state = 2;
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
pos = tok.start + tok.len;
if (err != 0) {
goto end;
}
}
continue;
}
err = ErrUnexpectedToken;
goto end; goto end;
} else {
err = ErrNeedDirectiveAfterDot;
goto end;
}
continue;
} }
if (tok.kind == TokIdent) {
for (size_t ii = 0; ii < holes.len; ii += 1) { Token next = token(input, len, pos);
Hole *hole = &holes.buf[ii]; if (next.kind == TokColon) {
size_t idx = label_lookup(&labels, hole->str, hole->len); // Label
uint64_t num_to_write = labels.buf[idx].location; pos = next.start + next.len;
uint8_t sign = 2; if (line_state >= 1) {
if (hole->size != 8) { err = ErrLabelAfterLabel;
sign = 1; einfo->token = next;
num_to_write -= hole->origin; goto end;
} }
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, line_state = 1;
sign); if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
err = ErrOutOfMemory;
goto end;
}
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
if (idx != INVALID) {
err = ErrDuplicateLabel;
goto end;
}
labels.buf[labels.len] = (Label){
.location = rv.len,
.str = &input[tok.start],
.len = tok.len,
};
labels.len += 1;
} else {
// Instruction
if (line_state >= 2) {
err = ErrTrailingLine;
goto end;
}
line_state = 2;
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
pos = tok.start + tok.len;
if (err != 0) { if (err != 0) {
goto end; goto end;
} }
}
continue;
} }
err = ErrUnexpectedToken;
goto end;
}
for (size_t ii = 0; ii < holes.len; ii += 1) {
Hole *hole = &holes.buf[ii];
size_t idx = label_lookup(&labels, hole->str, hole->len);
uint64_t num_to_write = labels.buf[idx].location;
uint8_t sign = 2;
if (hole->size != 8) {
sign = 1;
num_to_write -= hole->origin;
}
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign);
if (err != 0) {
goto end;
}
}
end: end:
free(holes.buf); free(holes.buf);
free(labels.buf); free(labels.buf);
*out = rv; *out = rv;
einfo->line = line + 1; einfo->line = line + 1;
einfo->line_start = line_start; einfo->line_start = line_start;
return err; return err;
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
int hex_out = 0; int hex_out = 0;
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) { if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
hex_out = 1; hex_out = 1;
} }
int err = 0; int err = 0;
InstHt ht = NULL; InstHt ht = NULL;
ByteVec input; ByteVec input;
err = slurp(stdin, &input); err = slurp(stdin, &input);
if (err != 0) { if (err != 0) {
fprintf(stderr, "failed to read the file: %d\n", err); fprintf(stderr, "failed to read the file: %d\n", err);
goto done; goto done;
} }
ht = build_lookup(); ht = build_lookup();
if (ht == NULL) { if (ht == NULL) {
err = ErrOutOfMemory; err = ErrOutOfMemory;
fprintf(stderr, "failed to init hash table: %d\n", err); fprintf(stderr, "failed to init hash table: %d\n", err);
goto done; goto done;
} }
ByteVec out; ByteVec out;
EInfo einfo; EInfo einfo;
err = assemble(ht, input.buf, input.len, &out, &einfo); err = assemble(ht, input.buf, input.len, &out, &einfo);
if (err != 0) { if (err != 0) {
size_t column = einfo.token.start - einfo.line_start + 1; size_t column = einfo.token.start - einfo.line_start + 1;
fprintf(stderr, fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
"failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", ERRORS[err], einfo.line, column, (int)einfo.token.len,
ERRORS[err], einfo.line, column, (int)einfo.token.len, &input.buf[einfo.token.start]);
&input.buf[einfo.token.start]); goto done;
goto done; }
} if (hex_out) {
if (hex_out) { hd(out.buf, out.len);
hex_dump(out.buf, out.len); } else {
} else { fwrite(out.buf, 1, out.len, stdout);
fwrite(out.buf, 1, out.len, stdout); }
}
done: done:
free(ht); free(ht);
free(input.buf); free(input.buf);
free(out.buf); free(out.buf);
return err; return err;
} }

View file

@ -1,138 +1,78 @@
typedef struct InstDesc_s { typedef struct InstDesc_s {
char *mnemonic; char *mnemonic;
unsigned char opcode; unsigned char opcode;
Operands type; Operands type;
} InstDesc; } InstDesc;
const InstDesc INST[] = { const InstDesc INST[] = {
{.opcode = 0x00, .mnemonic = "un", .type = Empty}, {"un", 0x00, Empty}, {"tx", 0x01, Empty},
{.opcode = 0x01, .mnemonic = "tx", .type = Empty}, {"nop", 0x02, Empty}, {"add8", 0x03, RRR},
{.opcode = 0x02, .mnemonic = "nop", .type = Empty}, {"add16", 0x04, RRR}, {"add32", 0x05, RRR},
{.opcode = 0x03, .mnemonic = "add8", .type = RRR}, {"add64", 0x06, RRR}, {"sub8", 0x07, RRR},
{.opcode = 0x04, .mnemonic = "add16", .type = RRR}, {"sub16", 0x08, RRR}, {"sub32", 0x09, RRR},
{.opcode = 0x05, .mnemonic = "add32", .type = RRR}, {"sub64", 0x0A, RRR}, {"mul8", 0x0B, RRR},
{.opcode = 0x06, .mnemonic = "add64", .type = RRR}, {"mul16", 0x0C, RRR}, {"mul32", 0x0D, RRR},
{.opcode = 0x07, .mnemonic = "sub8", .type = RRR}, {"mul64", 0x0E, RRR}, {"and", 0x0F, RRR},
{.opcode = 0x08, .mnemonic = "sub16", .type = RRR}, {"or", 0x10, RRR}, {"xor", 0x11, RRR},
{.opcode = 0x09, .mnemonic = "sub32", .type = RRR}, {"slu8", 0x12, RRR}, {"slu16", 0x13, RRR},
{.opcode = 0x0A, .mnemonic = "sub64", .type = RRR}, {"slu32", 0x14, RRR}, {"slu64", 0x15, RRR},
{.opcode = 0x0B, .mnemonic = "mul8", .type = RRR}, {"sru8", 0x16, RRR}, {"sru16", 0x17, RRR},
{.opcode = 0x0C, .mnemonic = "mul16", .type = RRR}, {"sru32", 0x18, RRR}, {"sru64", 0x19, RRR},
{.opcode = 0x0D, .mnemonic = "mul32", .type = RRR}, {"srs8", 0x1A, RRR}, {"srs16", 0x1B, RRR},
{.opcode = 0x0E, .mnemonic = "mul64", .type = RRR}, {"srs32", 0x1C, RRR}, {"srs64", 0x1D, RRR},
{.opcode = 0x0F, .mnemonic = "and", .type = RRR}, {"cmpu", 0x1E, RRR}, {"cmps", 0x1F, RRR},
{.opcode = 0x10, .mnemonic = "or", .type = RRR}, {"diru8", 0x20, RRRR}, {"diru16", 0x21, RRRR},
{.opcode = 0x11, .mnemonic = "xor", .type = RRR}, {"diru32", 0x22, RRRR}, {"diru64", 0x23, RRRR},
{.opcode = 0x12, .mnemonic = "slu8", .type = RRR}, {"dirs8", 0x24, RRRR}, {"dirs16", 0x25, RRRR},
{.opcode = 0x13, .mnemonic = "slu16", .type = RRR}, {"dirs32", 0x26, RRRR}, {"dirs64", 0x27, RRRR},
{.opcode = 0x14, .mnemonic = "slu32", .type = RRR}, {"neg", 0x28, RR}, {"not", 0x29, RR},
{.opcode = 0x15, .mnemonic = "slu64", .type = RRR}, {"sxt8", 0x2A, RR}, {"sxt16", 0x2B, RR},
{.opcode = 0x16, .mnemonic = "sru8", .type = RRR}, {"sxt32", 0x2C, RR}, {"addi8", 0x2D, RRx8},
{.opcode = 0x17, .mnemonic = "sru16", .type = RRR}, {"addi16", 0x2E, RRx16}, {"addi32", 0x2F, RRx32},
{.opcode = 0x18, .mnemonic = "sru32", .type = RRR}, {"addi64", 0x30, RRx64}, {"muli8", 0x31, RRx8},
{.opcode = 0x19, .mnemonic = "sru64", .type = RRR}, {"muli16", 0x32, RRx16}, {"muli32", 0x33, RRx32},
{.opcode = 0x1A, .mnemonic = "srs8", .type = RRR}, {"muli64", 0x34, RRx64}, {"andi", 0x35, RRx64},
{.opcode = 0x1B, .mnemonic = "srs16", .type = RRR}, {"ori", 0x36, RRx64}, {"xori", 0x37, RRx64},
{.opcode = 0x1C, .mnemonic = "srs32", .type = RRR}, {"slui8", 0x38, RRu8}, {"slui16", 0x39, RRu8},
{.opcode = 0x1D, .mnemonic = "srs64", .type = RRR}, {"slui32", 0x3A, RRu8}, {"slui64", 0x3B, RRu8},
{.opcode = 0x1E, .mnemonic = "cmpu", .type = RRR}, {"srui8", 0x3C, RRu8}, {"srui16", 0x3D, RRu8},
{.opcode = 0x1F, .mnemonic = "cmps", .type = RRR}, {"srui32", 0x3E, RRu8}, {"srui64", 0x3F, RRu8},
{.opcode = 0x20, .mnemonic = "diru8", .type = RRRR}, {"srsi8", 0x40, RRu8}, {"srsi16", 0x41, RRu8},
{.opcode = 0x21, .mnemonic = "diru16", .type = RRRR}, {"srsi32", 0x42, RRu8}, {"srsi64", 0x43, RRu8},
{.opcode = 0x22, .mnemonic = "diru32", .type = RRRR}, {"cmpui", 0x44, RRu64}, {"cmpsi", 0x45, RRs64},
{.opcode = 0x23, .mnemonic = "diru64", .type = RRRR}, {"cp", 0x46, RR}, {"swa", 0x47, RR},
{.opcode = 0x24, .mnemonic = "dirs8", .type = RRRR}, {"li8", 0x48, Rx8}, {"li16", 0x49, Rx16},
{.opcode = 0x25, .mnemonic = "dirs16", .type = RRRR}, {"li32", 0x4A, Rx32}, {"li64", 0x4B, Rx64},
{.opcode = 0x26, .mnemonic = "dirs32", .type = RRRR}, {"lra", 0x4C, RRr32}, {"ld", 0x4D, RRu64u16},
{.opcode = 0x27, .mnemonic = "dirs64", .type = RRRR}, {"st", 0x4E, RRu64u16}, {"ldr", 0x4F, RRr32u16},
{.opcode = 0x28, .mnemonic = "neg", .type = RR}, {"str", 0x50, RRr32u16}, {"bmc", 0x51, RRu16},
{.opcode = 0x29, .mnemonic = "not", .type = RR}, {"brc", 0x52, RRu8}, {"jmp", 0x53, r32},
{.opcode = 0x2A, .mnemonic = "sxt8", .type = RR}, {"jal", 0x54, RRr32}, {"jala", 0x55, RRu64},
{.opcode = 0x2B, .mnemonic = "sxt16", .type = RR}, {"jeq", 0x56, RRr16}, {"jne", 0x57, RRr16},
{.opcode = 0x2C, .mnemonic = "sxt32", .type = RR}, {"jltu", 0x58, RRr16}, {"jgtu", 0x59, RRr16},
{.opcode = 0x2D, .mnemonic = "addi8", .type = RRx8}, {"jlts", 0x5A, RRr16}, {"jgts", 0x5B, RRr16},
{.opcode = 0x2E, .mnemonic = "addi16", .type = RRx16}, {"eca", 0x5C, Empty}, {"ebp", 0x5D, Empty},
{.opcode = 0x2F, .mnemonic = "addi32", .type = RRx32}, {"fadd32", 0x5E, RRR}, {"fadd64", 0x5F, RRR},
{.opcode = 0x30, .mnemonic = "addi64", .type = RRx64}, {"fsub32", 0x60, RRR}, {"fsub64", 0x61, RRR},
{.opcode = 0x31, .mnemonic = "muli8", .type = RRx8}, {"fmul32", 0x62, RRR}, {"fmul64", 0x63, RRR},
{.opcode = 0x32, .mnemonic = "muli16", .type = RRx16}, {"fdiv32", 0x64, RRR}, {"fdiv64", 0x65, RRR},
{.opcode = 0x33, .mnemonic = "muli32", .type = RRx32}, {"fma32", 0x66, RRRR}, {"fma64", 0x67, RRRR},
{.opcode = 0x34, .mnemonic = "muli64", .type = RRx64}, {"fcmplt32", 0x6A, RRR}, {"fcmplt64", 0x6B, RRR},
{.opcode = 0x35, .mnemonic = "andi", .type = RRx64}, {"fcmpgt32", 0x6C, RRR}, {"fcmpgt64", 0x6D, RRR},
{.opcode = 0x36, .mnemonic = "ori", .type = RRx64}, {"itf32", 0x6E, RR}, {"itf64", 0x6F, RR},
{.opcode = 0x37, .mnemonic = "xori", .type = RRx64}, {"fti32", 0x70, RRu8}, {"fti64", 0x71, RRu8},
{.opcode = 0x38, .mnemonic = "slui8", .type = RRu8}, {"fc32t64", 0x72, RR}, {"fc64t32", 0x73, RR},
{.opcode = 0x39, .mnemonic = "slui16", .type = RRu8}, {"lra16", 0x74, RRr16}, {"ldr16", 0x75, RRr16u16},
{.opcode = 0x3A, .mnemonic = "slui32", .type = RRu8}, {"str16", 0x76, RRr16u16}, {"jmp16", 0x77, r16},
{.opcode = 0x3B, .mnemonic = "slui64", .type = RRu8},
{.opcode = 0x3C, .mnemonic = "srui8", .type = RRu8},
{.opcode = 0x3D, .mnemonic = "srui16", .type = RRu8},
{.opcode = 0x3E, .mnemonic = "srui32", .type = RRu8},
{.opcode = 0x3F, .mnemonic = "srui64", .type = RRu8},
{.opcode = 0x40, .mnemonic = "srsi8", .type = RRu8},
{.opcode = 0x41, .mnemonic = "srsi16", .type = RRu8},
{.opcode = 0x42, .mnemonic = "srsi32", .type = RRu8},
{.opcode = 0x43, .mnemonic = "srsi64", .type = RRu8},
{.opcode = 0x44, .mnemonic = "cmpui", .type = RRu64},
{.opcode = 0x45, .mnemonic = "cmpsi", .type = RRs64},
{.opcode = 0x46, .mnemonic = "cp", .type = RR},
{.opcode = 0x47, .mnemonic = "swa", .type = RR},
{.opcode = 0x48, .mnemonic = "li8", .type = Rx8},
{.opcode = 0x49, .mnemonic = "li16", .type = Rx16},
{.opcode = 0x4A, .mnemonic = "li32", .type = Rx32},
{.opcode = 0x4B, .mnemonic = "li64", .type = Rx64},
{.opcode = 0x4C, .mnemonic = "lra", .type = RRr32},
{.opcode = 0x4D, .mnemonic = "ld", .type = RRu64u16},
{.opcode = 0x4E, .mnemonic = "st", .type = RRu64u16},
{.opcode = 0x4F, .mnemonic = "ldr", .type = RRr32u16},
{.opcode = 0x50, .mnemonic = "str", .type = RRr32u16},
{.opcode = 0x51, .mnemonic = "bmc", .type = RRu16},
{.opcode = 0x52, .mnemonic = "brc", .type = RRu8},
{.opcode = 0x53, .mnemonic = "jmp", .type = r32},
{.opcode = 0x54, .mnemonic = "jal", .type = RRr32},
{.opcode = 0x55, .mnemonic = "jala", .type = RRu64},
{.opcode = 0x56, .mnemonic = "jeq", .type = RRr16},
{.opcode = 0x57, .mnemonic = "jne", .type = RRr16},
{.opcode = 0x58, .mnemonic = "jltu", .type = RRr16},
{.opcode = 0x59, .mnemonic = "jgtu", .type = RRr16},
{.opcode = 0x5A, .mnemonic = "jlts", .type = RRr16},
{.opcode = 0x5B, .mnemonic = "jgts", .type = RRr16},
{.opcode = 0x5C, .mnemonic = "eca", .type = Empty},
{.opcode = 0x5D, .mnemonic = "ebp", .type = Empty},
{.opcode = 0x5E, .mnemonic = "fadd32", .type = RRR},
{.opcode = 0x5F, .mnemonic = "fadd64", .type = RRR},
{.opcode = 0x60, .mnemonic = "fsub32", .type = RRR},
{.opcode = 0x61, .mnemonic = "fsub64", .type = RRR},
{.opcode = 0x62, .mnemonic = "fmul32", .type = RRR},
{.opcode = 0x63, .mnemonic = "fmul64", .type = RRR},
{.opcode = 0x64, .mnemonic = "fdiv32", .type = RRR},
{.opcode = 0x65, .mnemonic = "fdiv64", .type = RRR},
{.opcode = 0x66, .mnemonic = "fma32", .type = RRRR},
{.opcode = 0x67, .mnemonic = "fma64", .type = RRRR},
// 68, 69?
{.opcode = 0x6A, .mnemonic = "fcmplt32", .type = RRR},
{.opcode = 0x6B, .mnemonic = "fcmplt64", .type = RRR},
{.opcode = 0x6C, .mnemonic = "fcmpgt32", .type = RRR},
{.opcode = 0x6D, .mnemonic = "fcmpgt64", .type = RRR},
{.opcode = 0x6E, .mnemonic = "itf32", .type = RR},
{.opcode = 0x6F, .mnemonic = "itf64", .type = RR},
{.opcode = 0x70, .mnemonic = "fti32", .type = RRu8},
{.opcode = 0x71, .mnemonic = "fti64", .type = RRu8},
{.opcode = 0x72, .mnemonic = "fc32t64", .type = RR},
{.opcode = 0x73, .mnemonic = "fc64t32", .type = RR},
{.opcode = 0x74, .mnemonic = "lra16", .type = RRr16},
{.opcode = 0x75, .mnemonic = "ldr16", .type = RRr16u16},
{.opcode = 0x76, .mnemonic = "str16", .type = RRr16u16},
{.opcode = 0x77, .mnemonic = "jmp16", .type = r16},
}; };
const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]);
size_t inst_find(const char *mnemonic, size_t len) { size_t inst_find(const char *mnemonic, size_t len) {
for (size_t ii = 0; ii < INST_CNT; ii += 1) { for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *entry = INST[ii].mnemonic; const char *entry = INST[ii].mnemonic;
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') { if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
return ii; return ii;
}
} }
return INVALID; }
return INVALID;
} }

View file

@ -1,55 +0,0 @@
static bool check_valid_int(uint64_t val, size_t size, uint8_t sign) {
// All 64-bit values are considered valid.
if (size == 8) {
return true;
}
// Unsigned integers must have all upper bits set to zero. To check this,
// we shift the value right by the integer size and verify it equals zero.
int valid_uint = (val >> (size * 8)) == 0;
// For signed integers, the sign-extended high bits must match the sign bit.
// By shifting right by one less than the total bit size (size * 8 - 1),
// we isolate the sign bit and any sign-extended bits. For a value fitting
// in the signed range, this operation results in either 0 (for non-negative
// values) or -1 (for negative values due to sign extension).
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
// To unify the check for both positive and negative cases, we adjust
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
// enabling a single check for 0 to validate both cases. This adjustment
// simplifies the validation logic, allowing us to use a single condition to
// check for proper sign extension or zero extension in the original value.
int_shifted += int_shifted != 0;
// A valid signed integer will have `int_shifted` equal to 0
// after adjustment, indicating proper sign extension.
int valid_int = int_shifted == 0;
// Validity bitmask to represents whether the value
// fits as signed, unsigned, or both.
int validity = valid_int | (valid_uint << 1);
// If the value's validity doesn't match the `sign` requirements,
// we report an overflow.
return (validity & sign) != 0;
}
// safety: assumes the buffer has enough place for specified integer size.
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
// and bit `2` indicates that value accepts an unsigned int.
static AsmError push_int_le(char *buf, uint64_t val, size_t size,
uint8_t sign) {
if (!check_valid_int(val, size, sign)) {
return ErrImmediateOverflow;
}
// Write out the bytes of the integer to the buffer in little-endian order,
// starting with the lowest byte first.
for (size_t ii = 0; ii < size; ii += 1) {
buf[ii] = val & 0xff;
val >>= 8;
}
return ErrOk;
}

View file

@ -1,23 +1,23 @@
static int parse_register(char *name, size_t len) { int parse_register(char *name, size_t len) {
if (name[0] != 'r') { if (name[0] != 'r') {
return 256; // Register name should start with 'r' return 256; // Register name should start with 'r'
}
if (len > 4) {
return 256; // Register name too long
}
uint16_t rv = 0;
if (len > 2 && name[1] == '0') {
return 256; // Extra zero suffix
}
for (size_t ii = 1; ii < len; ii += 1) {
char chr = name[ii];
if (!(chr >= '0' && chr <= '9')) {
return 256; // Register name must only contain numbers
} }
if (len > 4) { rv = rv * 10 + (chr - '0');
return 256; // Register name too long }
} if (rv > 255) {
uint16_t rv = 0; return 256; // Register number too large
if (len > 2 && name[1] == '0') { }
return 256; // Extra zero suffix return (int)rv;
}
for (size_t ii = 1; ii < len; ii += 1) {
char chr = name[ii];
if (!(chr >= '0' && chr <= '9')) {
return 256; // Register name must only contain numbers
}
rv = rv * 10 + (chr - '0');
}
if (rv > 255) {
return 256; // Register number too large
}
return (int)rv;
} }

View file

@ -1,4 +1,5 @@
typedef enum TokenKind_e { typedef enum TokenKind_e
{
TokInvalid = '!', TokInvalid = '!',
TokEOF = '$', TokEOF = '$',
TokIdent = 'A', TokIdent = 'A',
@ -10,23 +11,25 @@ typedef enum TokenKind_e {
TokColon = ':', TokColon = ':',
TokComment = ';', TokComment = ';',
TokNewline = 'n', TokNewline = 'n',
TokString = 's',
} TokenKind; } TokenKind;
typedef struct Token_s { typedef struct Token_s
{
TokenKind kind; TokenKind kind;
size_t start; size_t start;
size_t len; size_t len;
uint64_t num; uint64_t num;
} Token; } Token;
static Token token_ident(char *input, size_t len, size_t pos) { Token token_ident(char *input, size_t len, size_t pos)
{
size_t start = pos; size_t start = pos;
while (pos < len) { while (pos < len)
{
char chr = input[pos]; char chr = input[pos];
char chru = chr & ~0x20; char chru = chr & ~0x20;
int good = chr == '_' || (chr >= '0' && chr <= '9') || int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z');
(chru >= 'A' && chru <= 'Z'); if (!good)
if (!good) { {
break; break;
} }
pos += 1; pos += 1;
@ -34,7 +37,8 @@ static Token token_ident(char *input, size_t len, size_t pos) {
return (Token){TokIdent, start, pos - start, 0}; return (Token){TokIdent, start, pos - start, 0};
} }
static Token token_number(char *input, size_t len, size_t pos) { Token token_number(char *input, size_t len, size_t pos)
{
char *ptr = &input[pos]; char *ptr = &input[pos];
char next = '\0'; char next = '\0';
size_t start = pos; size_t start = pos;
@ -44,21 +48,30 @@ static Token token_number(char *input, size_t len, size_t pos) {
uint64_t pre_overflow; uint64_t pre_overflow;
AsmError bad_num = ErrOk; AsmError bad_num = ErrOk;
if (pos + 1 < len) { if (pos + 1 < len)
{
next = ptr[1] & ~0x20; next = ptr[1] & ~0x20;
} }
if (input[pos] == '0') { if (input[pos] == '0')
if (next == 'X') { {
if (next == 'X')
{
base = 16; base = 16;
pos += 2; pos += 2;
} else if (next == 'D') { }
else if (next == 'D')
{
base = 10; base = 10;
pos += 2; pos += 2;
} else if (next == 'O') { }
else if (next == 'O')
{
base = 8; base = 8;
pos += 2; pos += 2;
} else if (next == 'B') { }
else if (next == 'B')
{
base = 2; base = 2;
pos += 2; pos += 2;
} }
@ -66,23 +79,30 @@ static Token token_number(char *input, size_t len, size_t pos) {
pre_overflow = (~(size_t)0) / base; pre_overflow = (~(size_t)0) / base;
// valid: "0x_0", "0_" // valid: "0x_0", "0_"
// invalid: "0x_" // invalid: "0x_"
while (pos < len) { while (pos < len)
{
uint64_t digit; uint64_t digit;
uint64_t next; uint64_t next;
char chr = input[pos]; char chr = input[pos];
char chru = chr & ~0x20; char chru = chr & ~0x20;
if (chr == '_') { if (chr == '_')
{
pos += 1; pos += 1;
continue; continue;
} }
digit = (uint64_t)chr - (uint64_t)'0'; digit = (uint64_t)chr - (uint64_t)'0';
if (digit >= 10) { if (digit >= 10)
{
digit = (uint64_t)chru - (uint64_t)('A' - 10); digit = (uint64_t)chru - (uint64_t)('A' - 10);
} }
if (digit >= base) { if (digit >= base)
if (chr >= '0' && chr <= '9') { {
if (chr >= '0' && chr <= '9')
{
bad_num = ErrBadNumDigit; bad_num = ErrBadNumDigit;
} else if (chru >= 'A' && chru <= 'Z') { }
else if (chru >= 'A' && chru <= 'Z')
{
bad_num = ErrBadNumDigit; bad_num = ErrBadNumDigit;
} }
break; break;
@ -92,120 +112,75 @@ static Token token_number(char *input, size_t len, size_t pos) {
digits += 1; digits += 1;
next = rv * base + digit; next = rv * base + digit;
if (rv > pre_overflow || next < rv) { if (rv > pre_overflow || next < rv)
{
bad_num = ErrBadNumOverflow; bad_num = ErrBadNumOverflow;
break; break;
} }
rv = next; rv = next;
} }
if (digits == 0) { if (digits == 0)
{
bad_num = ErrBadNumNoDigit; bad_num = ErrBadNumNoDigit;
} }
if (bad_num) { if (bad_num)
{
return (Token){TokBadNumber, start, pos - start, bad_num}; return (Token){TokBadNumber, start, pos - start, bad_num};
} else { }
else
{
return (Token){TokNumber, start, pos - start, rv}; return (Token){TokNumber, start, pos - start, rv};
} }
} }
static char get_hex(char chr) { Token token(char *input, size_t len, size_t pos)
char chru = chr & ~0x20; {
if (chr >= '0' && chr <= '9') {
return chr - '0';
}
if (chru >= 'A' && chru <= 'F') {
return chru - ('A' - 10);
}
return 16;
}
static Token token_string(char *input, size_t len, size_t pos) {
size_t start = pos;
size_t ndata = 0;
for (pos += 1; pos < len; pos += 1) {
if (input[pos] == '"') {
return (Token){TokString, start, pos + 1 - start, ndata};
}
if (input[pos] == '\n' || input[pos] == '\r') {
return (Token){TokInvalid, start, pos + 1 - start,
ErrStringNewLine};
}
if (input[pos] == '\\') {
if (pos + 1 >= len) {
return (Token){TokInvalid, start, pos - start,
ErrDanglingEscape};
}
pos += 1;
switch (input[pos]) {
case '\\':
case '"':
case 'r':
case 'n':
case '0':
case 't':
break;
case 'x':
if (pos + 2 >= len) {
return (Token){TokInvalid, start, pos - start,
ErrDanglingEscape};
}
if (get_hex(input[pos + 1]) > 15 ||
get_hex(input[pos + 2]) > 15) {
return (Token){TokInvalid, start, pos - start,
ErrStringBadHex};
}
pos += 2;
break;
default:
return (Token){TokInvalid, start, pos - start,
ErrBadStringEscape};
}
}
ndata += 1;
}
return (Token){TokString, start, pos - start, ndata};
}
static Token token(char *input, size_t len, size_t pos) {
char chr, chru; char chr, chru;
char *ptr = &input[pos]; char *ptr = &input[pos];
while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) { while (pos < len && (input[pos] == ' ' || input[pos] == '\t'))
{
pos += 1; pos += 1;
} }
if (pos == len) { if (pos == len)
{
return (Token){TokEOF, pos, 0, 0}; return (Token){TokEOF, pos, 0, 0};
} }
ptr = &input[pos]; ptr = &input[pos];
chr = *ptr; chr = *ptr;
if (chr == ',' || chr == '-' || chr == '.' || chr == ':') { if (chr == ',' || chr == '-' || chr == '.' || chr == ':')
{
return (Token){(TokenKind)chr, pos, 1, 0}; return (Token){(TokenKind)chr, pos, 1, 0};
} }
if (chr == '\n') { if (chr == '\n')
{
return (Token){TokNewline, pos, 1, 0}; return (Token){TokNewline, pos, 1, 0};
} }
if (chr == '\r') { if (chr == '\r')
if (pos + 1 < len && ptr[1] == '\n') { {
if (pos + 1 < len && ptr[1] == '\n')
{
return (Token){TokNewline, pos, 2, 0}; return (Token){TokNewline, pos, 2, 0};
} }
return (Token){TokNewline, pos, 1, 0}; return (Token){TokNewline, pos, 1, 0};
} }
if (chr == ';') { if (chr == ';')
{
size_t clen = 1; size_t clen = 1;
while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') { while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r')
{
clen += 1; clen += 1;
} }
return (Token){TokComment, pos, clen, 0}; return (Token){TokComment, pos, clen, 0};
} }
if (chr == '"') { if (chr >= '0' && chr <= '9')
return token_string(input, len, pos); {
}
if (chr >= '0' && chr <= '9') {
return token_number(input, len, pos); return token_number(input, len, pos);
} }
chru = chr & ~0x20; chru = chr & ~0x20;
if (chr == '_' || (chru >= 'A' && chru <= 'Z')) { if (chr == '_' || (chru >= 'A' && chru <= 'Z'))
{
return token_ident(input, len, pos); return token_ident(input, len, pos);
} }
return (Token){TokInvalid, pos, 1, 0}; return (Token){TokInvalid, pos, 1, 0};