Compare commits
No commits in common. "main" and "m1el-patch-1" have entirely different histories.
main
...
m1el-patch
4
.github/workflows/c-cpp.yml
vendored
4
.github/workflows/c-cpp.yml
vendored
|
@ -1,4 +1,4 @@
|
||||||
name: Cee-lang CI
|
name: C CI
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
|
@ -12,7 +12,7 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v3
|
||||||
- name: make
|
- name: make
|
||||||
run: make
|
run: make
|
||||||
- name: example
|
- name: example
|
||||||
|
|
16
Makefile
16
Makefile
|
@ -1,34 +1,22 @@
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS_EXTRA =
|
CFLAGS_EXTRA =
|
||||||
CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3
|
CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3
|
||||||
CLANG_FORMAT_STYLE = '{ BasedOnStyle: Google, IndentWidth: 4 }'
|
|
||||||
|
|
||||||
.PHONY: clean hbas example format check-format
|
.PHONY: clean build-dir hbas example
|
||||||
|
|
||||||
hbas: build/hbas
|
hbas: build/hbas
|
||||||
example: build/example.hbf
|
example: build/example.hbf
|
||||||
hello: build/hello.hbf
|
|
||||||
|
|
||||||
format:
|
|
||||||
clang-format --style=${CLANG_FORMAT_STYLE} -i src/*
|
|
||||||
|
|
||||||
check-format:
|
|
||||||
clang-format --style=${CLANG_FORMAT_STYLE} -i --dry-run -Werror src/*
|
|
||||||
|
|
||||||
build:
|
build:
|
||||||
mkdir -p build
|
mkdir -p build
|
||||||
|
|
||||||
build/hbas: build $(wildcard src/*.h src/*.c)
|
build/hbas: build src/hbas.c
|
||||||
${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas
|
${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas
|
||||||
|
|
||||||
build/example.hbf: build build/hbas examples/example.S
|
build/example.hbf: build build/hbas examples/example.S
|
||||||
./build/hbas < examples/example.S > build/example.hbf
|
./build/hbas < examples/example.S > build/example.hbf
|
||||||
xxd build/example.hbf
|
xxd build/example.hbf
|
||||||
|
|
||||||
build/hello.hbf: build build/hbas examples/hello.S
|
|
||||||
./build/hbas < examples/hello.S > build/hello.hbf
|
|
||||||
xxd build/hello.hbf
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -rf build
|
rm -rf build
|
||||||
|
|
||||||
|
|
|
@ -2,25 +2,16 @@
|
||||||
; https://git.ablecorp.us/AbleOS/holey-bytes/src/branch/trunk/spec.md
|
; https://git.ablecorp.us/AbleOS/holey-bytes/src/branch/trunk/spec.md
|
||||||
; TODO:
|
; TODO:
|
||||||
; .origin 0x1000
|
; .origin 0x1000
|
||||||
; 'c' char literals
|
; .align 0x100
|
||||||
|
; .db "hello"
|
||||||
; .struct
|
; .struct
|
||||||
start:
|
start:
|
||||||
jmp end
|
jmp end
|
||||||
un
|
un
|
||||||
|
; .db "hello world\n"
|
||||||
add16 r1, r2, r255
|
add16 r1, r2, r255
|
||||||
addi8 r1, r2, -128
|
addi8 r1, r2, -128
|
||||||
lra r1, r0, start
|
lra r1, r0, start
|
||||||
jmp start
|
jmp start
|
||||||
end:
|
end:
|
||||||
tx
|
tx
|
||||||
hello_string:
|
|
||||||
.db "Hello, w\x6frld\n", 0
|
|
||||||
hello_string_end:
|
|
||||||
.db "hi"
|
|
||||||
; TODO .db 'H', 'e', 'l', 'l', 'o', '\0'
|
|
||||||
.align 2
|
|
||||||
.dw 0x4546
|
|
||||||
.align 4
|
|
||||||
.dd 0x4748494a
|
|
||||||
.align 8
|
|
||||||
.dq 0x5051525354555657
|
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
jmp entry
|
|
||||||
|
|
||||||
puts:
|
|
||||||
; Write string to console
|
|
||||||
; r2: [IN] *const u8 String pointer
|
|
||||||
; r3: [IN] usize String length
|
|
||||||
|
|
||||||
li8 r1, 0x1 ; Write syscall
|
|
||||||
brc r2, r3, 2 ; Copy parameters
|
|
||||||
li8 r2, 0x1 ; STDOUT
|
|
||||||
eca
|
|
||||||
|
|
||||||
jal r0, r31, 0
|
|
||||||
|
|
||||||
gets:
|
|
||||||
; Read string until end of buffer or LF
|
|
||||||
; r2: [IN] *mut u8 Buffer
|
|
||||||
; r3: [IN] usize Buffer length
|
|
||||||
|
|
||||||
; Register allocations:
|
|
||||||
; r33: *mut u8 Buffer end
|
|
||||||
; r34: u8 Immediate char
|
|
||||||
; r35: u8 Const [0x0A = LF]
|
|
||||||
|
|
||||||
li8 r35, 0x0A
|
|
||||||
add64 r33, r2, r3
|
|
||||||
|
|
||||||
; Setup syscall
|
|
||||||
li8 r2, 0x1 ; Stdin
|
|
||||||
cp r3, r2
|
|
||||||
li8 r4, 0x1 ; Read one char
|
|
||||||
|
|
||||||
jeq r3, r33, end
|
|
||||||
loop:
|
|
||||||
li8 r1, 0x1 ; Read syscall
|
|
||||||
eca
|
|
||||||
addi64 r3, r3, 1
|
|
||||||
ld r34, r3, 0, 1
|
|
||||||
jeq r34, r35, end
|
|
||||||
jne r3, r33, loop
|
|
||||||
|
|
||||||
end:
|
|
||||||
; Set copied amount
|
|
||||||
sub64 r1, r33, r3
|
|
||||||
addi64 r1, r1, -1
|
|
||||||
jal r0, r31, 0
|
|
||||||
|
|
||||||
alloc_pages:
|
|
||||||
; Allocate pages
|
|
||||||
; r1: [OUT] *mut u8 Pointer to page
|
|
||||||
; r2: [IN] u16 Page count
|
|
||||||
|
|
||||||
muli16 r3, r2, 4096 ; page count
|
|
||||||
li8 r1, 0x9 ; mmap syscall
|
|
||||||
li8 r2, 0x0 ; no address set, kernel chosen
|
|
||||||
li8 r4, 0x2 ; PROT_WRITE
|
|
||||||
li8 r5, 0x20 ; MAP_ANONYMOUS
|
|
||||||
li64 r6, -1 ; Doesn't map file
|
|
||||||
li8 r7, 0x0 ; Doesn't map file
|
|
||||||
eca
|
|
||||||
|
|
||||||
jal r0, r31, 0
|
|
||||||
|
|
||||||
entry:
|
|
||||||
; Program entrypoint
|
|
||||||
|
|
||||||
; Register allocations:
|
|
||||||
; r32: *mut u8 Buffer
|
|
||||||
; r36: usize Read buffer length
|
|
||||||
|
|
||||||
; Allocate one page (4096 KiB)
|
|
||||||
li8 r2, 1
|
|
||||||
jal r31, r0, alloc_pages
|
|
||||||
cp r32, r1
|
|
||||||
|
|
||||||
; Print message
|
|
||||||
lra16 r2, r0, enter_your_name
|
|
||||||
li8 r3, 17
|
|
||||||
jal r31, r0, puts
|
|
||||||
|
|
||||||
; Read name
|
|
||||||
cp r2, r32
|
|
||||||
li16 r3, 4096
|
|
||||||
jal r31, r0, gets
|
|
||||||
cp r36, r1
|
|
||||||
|
|
||||||
; Print your name is
|
|
||||||
lra16 r2, r0, your_name_is
|
|
||||||
li8 r3, 15
|
|
||||||
jal r31, r0, puts
|
|
||||||
|
|
||||||
; And now print the name
|
|
||||||
cp r2, r32
|
|
||||||
cp r3, r36
|
|
||||||
jal r31, r0, puts
|
|
||||||
|
|
||||||
tx
|
|
||||||
|
|
||||||
|
|
||||||
enter_your_name:
|
|
||||||
.db "Enter your name: "
|
|
||||||
your_name_is:
|
|
||||||
.db "\nYour name is: "
|
|
|
@ -1,8 +0,0 @@
|
||||||
li8 r1, 1 ; 1->sys::write
|
|
||||||
li8 r2, 1 ; fildes=stdout
|
|
||||||
lra16 r3, r0, hello_string ; buf=hello_string
|
|
||||||
li8 r4, 0x11 ; nbyte=0x11
|
|
||||||
eca ; sys::write(stdout, hello_string, 0x11)
|
|
||||||
tx
|
|
||||||
hello_string:
|
|
||||||
.db "Hello, AbleCorp!\n"
|
|
79
src/args.c
79
src/args.c
|
@ -1,11 +1,11 @@
|
||||||
typedef struct ArgMeta_s {
|
typedef struct ArgMeta_s {
|
||||||
char chr;
|
char chr;
|
||||||
uint8_t size;
|
uint8_t size;
|
||||||
// This is a bitset of acceptable overflow states,
|
// This is a bitset of acceptable overflow states,
|
||||||
// where accept signed = 1, accept unsigned = 2.
|
// where accept signed = 1, accept unsigned = 2.
|
||||||
// 1 -> signed, 2 -> unsigned, 3 -> whatever
|
// 1 -> signed, 2 -> unsigned, 3 -> whatever
|
||||||
uint8_t sign;
|
uint8_t sign;
|
||||||
uint8_t rel;
|
uint8_t rel;
|
||||||
} ArgMeta;
|
} ArgMeta;
|
||||||
const ArgMeta ARGS[] = {
|
const ArgMeta ARGS[] = {
|
||||||
{'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0},
|
{'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0},
|
||||||
|
@ -15,31 +15,31 @@ const ArgMeta ARGS[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef enum Operands_e {
|
typedef enum Operands_e {
|
||||||
Empty = 0,
|
Empty = 0,
|
||||||
R,
|
R,
|
||||||
RR,
|
RR,
|
||||||
RRR,
|
RRR,
|
||||||
RRRR,
|
RRRR,
|
||||||
Rx8,
|
Rx8,
|
||||||
Rx16,
|
Rx16,
|
||||||
Rx32,
|
Rx32,
|
||||||
Rx64,
|
Rx64,
|
||||||
RRx8,
|
RRx8,
|
||||||
RRx16,
|
RRx16,
|
||||||
RRx32,
|
RRx32,
|
||||||
RRx64,
|
RRx64,
|
||||||
RRs32,
|
RRs32,
|
||||||
RRs64,
|
RRs64,
|
||||||
RRu8,
|
RRu8,
|
||||||
RRu16,
|
RRu16,
|
||||||
RRu64,
|
RRu64,
|
||||||
r16,
|
r16,
|
||||||
r32,
|
r32,
|
||||||
RRr16,
|
RRr16,
|
||||||
RRr32,
|
RRr32,
|
||||||
RRr16u16,
|
RRr16u16,
|
||||||
RRr32u16,
|
RRr32u16,
|
||||||
RRu64u16,
|
RRu64u16,
|
||||||
} Operands;
|
} Operands;
|
||||||
// R -> register,
|
// R -> register,
|
||||||
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
|
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
|
||||||
|
@ -55,13 +55,12 @@ const char *TYPE_STR[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
|
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
|
||||||
|
ArgMeta arg_meta(char arg) {
|
||||||
static ArgMeta arg_meta(char arg) {
|
for (size_t ii = 0; ii < NARGS; ii += 1) {
|
||||||
for (size_t ii = 0; ii < NARGS; ii += 1) {
|
ArgMeta meta = ARGS[ii];
|
||||||
ArgMeta meta = ARGS[ii];
|
if (meta.chr == arg) {
|
||||||
if (meta.chr == arg) {
|
return meta;
|
||||||
return meta;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return ARGS[NARGS - 1];
|
}
|
||||||
|
return ARGS[NARGS - 1];
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,29 @@
|
||||||
const size_t INVALID = ~(size_t)0;
|
const size_t INVALID = ~(size_t)0;
|
||||||
|
|
||||||
typedef struct ByteVec_s {
|
typedef struct ByteVec_s {
|
||||||
char *buf;
|
char *buf;
|
||||||
size_t cap;
|
size_t cap;
|
||||||
size_t len;
|
size_t len;
|
||||||
} ByteVec;
|
} ByteVec;
|
||||||
|
|
||||||
static AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
|
AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
|
||||||
if (vec->len + extra < vec->len) {
|
if (vec->len + extra < vec->len) {
|
||||||
return ErrOutOfMemory;
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
|
while (vec->len + extra > vec->cap) {
|
||||||
|
if ((~(size_t)0) / 2 < vec->cap) {
|
||||||
|
return ErrOutOfMemory;
|
||||||
}
|
}
|
||||||
while (vec->len + extra > vec->cap) {
|
vec->cap *= 2;
|
||||||
if ((~(size_t)0) / 2 < vec->cap) {
|
// multiply overflow
|
||||||
return ErrOutOfMemory;
|
if ((~(size_t)0) / el_size < vec->cap) {
|
||||||
}
|
return ErrOutOfMemory;
|
||||||
vec->cap *= 2;
|
|
||||||
// multiply overflow
|
|
||||||
if ((~(size_t)0) / el_size < vec->cap) {
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
vec->buf = realloc(vec->buf, el_size * vec->cap);
|
|
||||||
if (vec->buf == NULL) {
|
|
||||||
vec->cap = 0;
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return 0;
|
vec->buf = realloc(vec->buf, el_size * vec->cap);
|
||||||
|
if (vec->buf == NULL) {
|
||||||
|
vec->cap = 0;
|
||||||
|
return ErrOutOfMemory;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
138
src/directive.c
138
src/directive.c
|
@ -1,138 +0,0 @@
|
||||||
AsmError push_string(char *buf, char *input, size_t len) {
|
|
||||||
size_t ndata = 0;
|
|
||||||
for (size_t pos = 0; pos < len; pos += 1) {
|
|
||||||
char chr = input[pos];
|
|
||||||
if (chr == '\\') {
|
|
||||||
if (pos + 1 >= len) {
|
|
||||||
return ErrDanglingEscape;
|
|
||||||
}
|
|
||||||
pos += 1;
|
|
||||||
chr = input[pos];
|
|
||||||
switch (chr) {
|
|
||||||
case '\\':
|
|
||||||
chr = '\\';
|
|
||||||
break;
|
|
||||||
case '"':
|
|
||||||
chr = '"';
|
|
||||||
break;
|
|
||||||
case 'r':
|
|
||||||
chr = '\r';
|
|
||||||
break;
|
|
||||||
case 'n':
|
|
||||||
chr = '\n';
|
|
||||||
break;
|
|
||||||
case '0':
|
|
||||||
chr = '\0';
|
|
||||||
break;
|
|
||||||
case 't':
|
|
||||||
chr = '\t';
|
|
||||||
break;
|
|
||||||
case 'x':
|
|
||||||
if (pos + 2 >= len) {
|
|
||||||
return ErrDanglingEscape;
|
|
||||||
}
|
|
||||||
char high = get_hex(input[pos + 1]);
|
|
||||||
char low = get_hex(input[pos + 2]);
|
|
||||||
if (high > 15 || low > 15) {
|
|
||||||
return ErrStringBadHex;
|
|
||||||
}
|
|
||||||
pos += 2;
|
|
||||||
chr = high << 4 | low;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return ErrBadStringEscape;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buf[ndata] = chr;
|
|
||||||
ndata += 1;
|
|
||||||
}
|
|
||||||
return ErrOk;
|
|
||||||
}
|
|
||||||
|
|
||||||
static AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok,
|
|
||||||
size_t word_size) {
|
|
||||||
while (1) {
|
|
||||||
*tok = token(input, len, tok->start + tok->len);
|
|
||||||
if (tok->kind == TokNumber) {
|
|
||||||
if (ensure_push(out, 1, word_size) != 0) {
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
push_int_le(&out->buf[out->len], tok->num, word_size, 3);
|
|
||||||
out->len += word_size;
|
|
||||||
} else if (tok->kind == TokString) {
|
|
||||||
if (word_size != 1) {
|
|
||||||
return ErrStringDataNotByte;
|
|
||||||
}
|
|
||||||
if (ensure_push(out, 1, tok->num) != 0) {
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
|
|
||||||
char *str = &input[tok->start + 1];
|
|
||||||
AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
|
|
||||||
if (err != ErrOk) {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
out->len += tok->num;
|
|
||||||
} else {
|
|
||||||
return ErrNeedsDataLiteral;
|
|
||||||
}
|
|
||||||
*tok = token(input, len, tok->start + tok->len);
|
|
||||||
if (tok->kind == TokNewline || tok->kind == TokEOF) {
|
|
||||||
return ErrOk;
|
|
||||||
}
|
|
||||||
if (tok->kind == TokComma) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return ErrNeedCommaOrNewline;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
|
|
||||||
if (tok->len < 2) {
|
|
||||||
return ErrInvalidDirective;
|
|
||||||
}
|
|
||||||
size_t pos = tok->start;
|
|
||||||
char byte0 = input[pos];
|
|
||||||
char byte1 = input[pos + 1];
|
|
||||||
if (tok->len == 2 && byte0 == 'd') {
|
|
||||||
size_t word_size;
|
|
||||||
switch (byte1) {
|
|
||||||
case 'b':
|
|
||||||
word_size = 1;
|
|
||||||
break;
|
|
||||||
case 'w':
|
|
||||||
word_size = 2;
|
|
||||||
break;
|
|
||||||
case 'd':
|
|
||||||
word_size = 4;
|
|
||||||
break;
|
|
||||||
case 'q':
|
|
||||||
word_size = 8;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return ErrInvalidDirective;
|
|
||||||
}
|
|
||||||
return push_data(input, len, out, tok, word_size);
|
|
||||||
}
|
|
||||||
if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
|
|
||||||
*tok = token(input, len, tok->start + tok->len);
|
|
||||||
if (tok->kind != TokNumber) {
|
|
||||||
return ErrAlignNeedsNumber;
|
|
||||||
}
|
|
||||||
size_t mask = tok->num - 1;
|
|
||||||
if (tok->num == 0 || (tok->num & mask) != 0) {
|
|
||||||
return ErrAlignNeedsPow2;
|
|
||||||
}
|
|
||||||
if ((~(size_t)0) - mask < out->len) {
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
size_t aligned = (out->len + mask) & ~mask;
|
|
||||||
if (ensure_push(out, 1, aligned - out->len) != 0) {
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
// TODO: zero-fill?
|
|
||||||
out->len = aligned;
|
|
||||||
return ErrOk;
|
|
||||||
}
|
|
||||||
return ErrInvalidDirective;
|
|
||||||
}
|
|
|
@ -1,5 +1,5 @@
|
||||||
typedef struct EInfo_s {
|
typedef struct EInfo_s {
|
||||||
Token token;
|
Token token;
|
||||||
size_t line;
|
size_t line;
|
||||||
size_t line_start;
|
size_t line_start;
|
||||||
} EInfo;
|
} EInfo;
|
||||||
|
|
58
src/error.h
58
src/error.h
|
@ -1,33 +1,23 @@
|
||||||
typedef enum AsmError_e {
|
typedef enum AsmError_e {
|
||||||
ErrOk = 0,
|
ErrOk = 0,
|
||||||
ErrBadRegister,
|
ErrBadRegister,
|
||||||
ErrImmediateOverflow,
|
ErrImmediateOverflow,
|
||||||
ErrInvalidToken,
|
ErrInvalidToken,
|
||||||
ErrBadArgumentMeta,
|
ErrBadArgumentMeta,
|
||||||
ErrNeedCommaAfterArgument,
|
ErrNeedCommaAfterArgument,
|
||||||
ErrLabelImmediate,
|
ErrLabelImmediate,
|
||||||
ErrNumberImmediate,
|
ErrNumberImmediate,
|
||||||
ErrBadNumOverflow,
|
ErrBadNumOverflow,
|
||||||
ErrBadNumDigit,
|
ErrBadNumDigit,
|
||||||
ErrBadNumNoDigit,
|
ErrBadNumNoDigit,
|
||||||
ErrLabelAfterLabel,
|
ErrLabelAfterLabel,
|
||||||
ErrOutOfMemory,
|
ErrOutOfMemory,
|
||||||
ErrDuplicateLabel,
|
ErrDuplicateLabel,
|
||||||
ErrTrailingLine,
|
ErrTrailingLine,
|
||||||
ErrNeedDirectiveAfterDot,
|
ErrNeedDirectiveAfterDot,
|
||||||
ErrDirectiveNotImplemented,
|
ErrDirectiveNotImplemented,
|
||||||
ErrUnexpectedToken,
|
ErrUnexpectedToken,
|
||||||
ErrTriedNegateNonNumber,
|
ErrTriedNegateNonNumber,
|
||||||
ErrInvalidDirective,
|
|
||||||
ErrStringNewLine,
|
|
||||||
ErrDanglingEscape,
|
|
||||||
ErrStringBadHex,
|
|
||||||
ErrBadStringEscape,
|
|
||||||
ErrStringDataNotByte,
|
|
||||||
ErrAlignNeedsNumber,
|
|
||||||
ErrAlignNeedsPow2,
|
|
||||||
ErrNeedCommaOrNewline,
|
|
||||||
ErrNeedsDataLiteral,
|
|
||||||
} AsmError;
|
} AsmError;
|
||||||
char *ERRORS[] = {
|
char *ERRORS[] = {
|
||||||
"Success",
|
"Success",
|
||||||
|
@ -49,14 +39,4 @@ char *ERRORS[] = {
|
||||||
"Directive is not implemented",
|
"Directive is not implemented",
|
||||||
"Unexpected token",
|
"Unexpected token",
|
||||||
"Negation only works on numbers",
|
"Negation only works on numbers",
|
||||||
"Invalid directive",
|
|
||||||
"String contains a raw newline (did you forget to close the quote?)",
|
|
||||||
"Dangling escape in string literal",
|
|
||||||
"Bad hex in string literal",
|
|
||||||
"Bad escape sequence in string literal",
|
|
||||||
"String literals can be used only in .db directive",
|
|
||||||
".align requires a number",
|
|
||||||
".align requires a power of two as an argument",
|
|
||||||
"Need comma or newline after data literal",
|
|
||||||
"Data literal expects a number or a string",
|
|
||||||
};
|
};
|
||||||
|
|
90
src/hash.c
90
src/hash.c
|
@ -1,57 +1,57 @@
|
||||||
// Instruction Hash table, for faster lookups
|
// Instruction Hash table, for faster lookups
|
||||||
typedef struct InstHtNode_s {
|
typedef struct InstHtNode_s {
|
||||||
uint8_t index1;
|
uint8_t index1;
|
||||||
uint8_t index2;
|
uint8_t index2;
|
||||||
} InstHtNode;
|
} InstHtNode;
|
||||||
typedef InstHtNode *InstHt;
|
typedef InstHtNode *InstHt;
|
||||||
|
|
||||||
static uint32_t inst_hash(const char *s, size_t len) {
|
uint32_t inst_hash(const char *s, size_t len) {
|
||||||
uint32_t hash = 0;
|
uint32_t hash = 0;
|
||||||
uint32_t mul = 75;
|
uint32_t mul = 75;
|
||||||
for (size_t ii = 0; ii < len; ii += 1) {
|
for (size_t ii = 0; ii < len; ii += 1) {
|
||||||
hash ^= s[ii] * mul;
|
hash ^= s[ii] * mul;
|
||||||
hash *= mul;
|
hash *= mul;
|
||||||
}
|
}
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
static InstHt build_lookup(void) {
|
InstHt build_lookup(void) {
|
||||||
const size_t size = 256;
|
const size_t size = 256;
|
||||||
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
|
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
|
||||||
if (table == NULL) {
|
if (table == NULL) {
|
||||||
return table;
|
|
||||||
}
|
|
||||||
for (size_t ii = 0; ii < size; ii += 1) {
|
|
||||||
table[ii] = (InstHtNode){0xff, 0xff};
|
|
||||||
}
|
|
||||||
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
|
||||||
const char *mnemonic = INST[ii].mnemonic;
|
|
||||||
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
|
|
||||||
InstHtNode *node = &table[hash & 0xff];
|
|
||||||
if (node->index1 == 0xff) {
|
|
||||||
node->index1 = ii;
|
|
||||||
} else if (node->index2 == 0xff) {
|
|
||||||
node->index2 = ii;
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "more than 1 collision in hash table\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return table;
|
return table;
|
||||||
|
}
|
||||||
|
for (size_t ii = 0; ii < size; ii += 1) {
|
||||||
|
table[ii] = (InstHtNode){0xff, 0xff};
|
||||||
|
}
|
||||||
|
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
||||||
|
const char *mnemonic = INST[ii].mnemonic;
|
||||||
|
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
|
||||||
|
InstHtNode *node = &table[hash & 0xff];
|
||||||
|
if (node->index1 == 0xff) {
|
||||||
|
node->index1 = ii;
|
||||||
|
} else if (node->index2 == 0xff) {
|
||||||
|
node->index2 = ii;
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "more than 1 collision in hash table\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t inst_lookup(InstHt ht, const char *s, size_t len) {
|
size_t inst_lookup(InstHt ht, const char *s, size_t len) {
|
||||||
uint32_t hash = inst_hash(s, len);
|
uint32_t hash = inst_hash(s, len);
|
||||||
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
|
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
|
||||||
for (size_t ii = 0; ii < 2; ii += 1) {
|
for (size_t ii = 0; ii < 2; ii += 1) {
|
||||||
size_t idx = (size_t)node[ii];
|
size_t idx = (size_t)node[ii];
|
||||||
if (idx == 0xff) {
|
if (idx == 0xff) {
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
const char *mnemonic = INST[idx].mnemonic;
|
|
||||||
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
|
|
||||||
return idx;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return INVALID;
|
const char *mnemonic = INST[idx].mnemonic;
|
||||||
|
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return INVALID;
|
||||||
}
|
}
|
||||||
|
|
634
src/hbas.c
634
src/hbas.c
|
@ -20,7 +20,6 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
@ -35,359 +34,382 @@ SOFTWARE.
|
||||||
//
|
//
|
||||||
#include "hash.c"
|
#include "hash.c"
|
||||||
//
|
//
|
||||||
#include "push_int.c"
|
|
||||||
#include "register.c"
|
#include "register.c"
|
||||||
#include "token.c"
|
#include "token.c"
|
||||||
//
|
//
|
||||||
#include "directive.c"
|
|
||||||
#include "einfo.h"
|
#include "einfo.h"
|
||||||
|
|
||||||
// Print space-separated hex dump of each byte, 16 bytes per line.
|
void hd(char *data, size_t len) {
|
||||||
// Can be reversed with `xxd -p -r`.
|
for (size_t ii = 0; ii < len; ii += 1) {
|
||||||
static void hex_dump(char *data, size_t len) {
|
if (ii > 0 && (ii & 15) == 0) {
|
||||||
char buf[48];
|
printf("\n");
|
||||||
const char *alphabet = "0123456789abcdef";
|
|
||||||
for (size_t ii = 0; ii < len; ii += 1) {
|
|
||||||
size_t val = (uint8_t)data[ii];
|
|
||||||
size_t pos = (ii & 0x0f) * 3;
|
|
||||||
buf[pos] = alphabet[val >> 4];
|
|
||||||
buf[pos + 1] = alphabet[val & 0x0f];
|
|
||||||
buf[pos + 2] = ' ';
|
|
||||||
if (((ii & 0x0f) == 0x0f) || ii + 1 == len) {
|
|
||||||
buf[pos + 2] = '\n';
|
|
||||||
fwrite(&buf[0], 1, pos + 3, stdout);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
printf("%02x", (uint8_t)data[ii]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MIN_SIZE 4096
|
#define MIN_SIZE 4096
|
||||||
|
|
||||||
static int slurp(FILE *fd, ByteVec *out) {
|
int slurp(FILE *fd, ByteVec *out) {
|
||||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||||
size_t bread = 1;
|
size_t bread = 1;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
if (rv.buf == NULL) {
|
if (rv.buf == NULL) {
|
||||||
rv.cap = 0;
|
rv.cap = 0;
|
||||||
err = ErrOutOfMemory;
|
err = ErrOutOfMemory;
|
||||||
bread = 0;
|
bread = 0;
|
||||||
|
}
|
||||||
|
while (bread > 0) {
|
||||||
|
if (ensure_push(&rv, 1, 1) != 0) {
|
||||||
|
err = ErrOutOfMemory;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
while (bread > 0) {
|
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
|
||||||
if (ensure_push(&rv, 1, 1) != 0) {
|
rv.len += bread;
|
||||||
err = ErrOutOfMemory;
|
}
|
||||||
break;
|
*out = rv;
|
||||||
}
|
if (err == 0) {
|
||||||
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
|
err = ferror(fd);
|
||||||
rv.len += bread;
|
}
|
||||||
}
|
return err;
|
||||||
*out = rv;
|
|
||||||
if (err == 0) {
|
|
||||||
err = ferror(fd);
|
|
||||||
}
|
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct Hole_s {
|
typedef struct Hole_s {
|
||||||
size_t location;
|
size_t location;
|
||||||
size_t origin;
|
size_t origin;
|
||||||
char *str;
|
char *str;
|
||||||
size_t len;
|
size_t len;
|
||||||
size_t size;
|
size_t size;
|
||||||
} Hole;
|
} Hole;
|
||||||
typedef struct HoleVec_s {
|
typedef struct HoleVec_s {
|
||||||
Hole *buf;
|
Hole *buf;
|
||||||
size_t cap;
|
size_t cap;
|
||||||
size_t len;
|
size_t len;
|
||||||
} HoleVec;
|
} HoleVec;
|
||||||
typedef struct Label_s {
|
typedef struct Label_s {
|
||||||
size_t location;
|
size_t location;
|
||||||
char *str;
|
char *str;
|
||||||
size_t len;
|
size_t len;
|
||||||
} Label;
|
} Label;
|
||||||
typedef struct LabelVec_s {
|
typedef struct LabelVec_s {
|
||||||
Label *buf;
|
Label *buf;
|
||||||
size_t cap;
|
size_t cap;
|
||||||
size_t len;
|
size_t len;
|
||||||
} LabelVec;
|
} LabelVec;
|
||||||
|
|
||||||
static size_t label_lookup(LabelVec *labels, char *name, size_t len) {
|
size_t label_lookup(LabelVec *labels, char *name, size_t len) {
|
||||||
size_t nlabels = labels->len;
|
size_t nlabels = labels->len;
|
||||||
Label *buf = labels->buf;
|
Label *buf = labels->buf;
|
||||||
for (size_t ii = 0; ii < nlabels; ii += 1) {
|
for (size_t ii = 0; ii < nlabels; ii += 1) {
|
||||||
if (len == buf->len && strncmp(buf->str, name, len) == 0) {
|
if (len == buf->len && strncmp(buf->str, name, len) == 0) {
|
||||||
return ii;
|
return ii;
|
||||||
}
|
|
||||||
buf += 1;
|
|
||||||
}
|
}
|
||||||
return INVALID;
|
buf += 1;
|
||||||
|
}
|
||||||
|
return INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
static AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
// safety: assumes the buffer has enough place for specified integer size.
|
||||||
ByteVec *rv, HoleVec *holes) {
|
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
|
||||||
const InstDesc *inst;
|
// and bit `2` indicates that value accepts an unsigned int.
|
||||||
const char *type_str;
|
AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
|
||||||
size_t nargs;
|
// Unsigned integers must have all upper bits set to zero. To check this,
|
||||||
size_t size;
|
// we shift the value right by the integer size and verify it equals zero.
|
||||||
size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
|
int valid_uint = (val >> (size * 8)) == 0;
|
||||||
size_t inst_start = rv->len;
|
|
||||||
if (idx == INVALID) {
|
// For signed integers, the sign-extended high bits must match the sign bit.
|
||||||
return ErrInvalidToken;
|
// By shifting right by one less than the total bit size (size * 8 - 1),
|
||||||
|
// we isolate the sign bit and any sign-extended bits. For a value fitting
|
||||||
|
// in the signed range, this operation results in either 0 (for non-negative
|
||||||
|
// values) or -1 (for negative values due to sign extension).
|
||||||
|
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
|
||||||
|
|
||||||
|
// To unify the check for both positive and negative cases, we adjust
|
||||||
|
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
|
||||||
|
// enabling a single check for 0 to validate both cases. This adjustment
|
||||||
|
// simplifies the validation logic, allowing us to use a single condition to
|
||||||
|
// check for proper sign extension or zero extension in the original value.
|
||||||
|
int_shifted += int_shifted != 0;
|
||||||
|
|
||||||
|
// A valid signed integer will have `int_shifted` equal to 0
|
||||||
|
// after adjustment, indicating proper sign extension.
|
||||||
|
int valid_int = int_shifted == 0;
|
||||||
|
|
||||||
|
// Validity bitmask to represents whether the value
|
||||||
|
// fits as signed, unsigned, or both.
|
||||||
|
int validity = valid_int | (valid_uint << 1);
|
||||||
|
|
||||||
|
// If the value's validity doesn't match the `sign` requirements,
|
||||||
|
// we report an overflow.
|
||||||
|
if ((validity & sign) == 0) {
|
||||||
|
return ErrImmediateOverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write out the bytes of the integer to the buffer in little-endian order,
|
||||||
|
// starting with the lowest byte first.
|
||||||
|
for (size_t ii = 0; ii < size; ii += 1) {
|
||||||
|
buf[ii] = val & 0xff;
|
||||||
|
val >>= 8;
|
||||||
|
}
|
||||||
|
return ErrOk;
|
||||||
|
}
|
||||||
|
|
||||||
|
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
||||||
|
ByteVec *rv, HoleVec *holes) {
|
||||||
|
const InstDesc *inst;
|
||||||
|
const char *type_str;
|
||||||
|
size_t nargs;
|
||||||
|
size_t size;
|
||||||
|
size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
|
||||||
|
size_t inst_start = rv->len;
|
||||||
|
if (idx == INVALID) {
|
||||||
|
return ErrInvalidToken;
|
||||||
|
}
|
||||||
|
inst = &INST[idx];
|
||||||
|
type_str = TYPE_STR[inst->type];
|
||||||
|
nargs = strlen(type_str);
|
||||||
|
size = 1;
|
||||||
|
for (size_t ii = 0; ii < nargs; ii += 1) {
|
||||||
|
char chr = type_str[ii];
|
||||||
|
ArgMeta meta = arg_meta(chr);
|
||||||
|
if (meta.chr == 0) {
|
||||||
|
return ErrBadArgumentMeta;
|
||||||
}
|
}
|
||||||
inst = &INST[idx];
|
size += meta.size;
|
||||||
type_str = TYPE_STR[inst->type];
|
}
|
||||||
nargs = strlen(type_str);
|
if (ensure_push(rv, 1, size) != 0) {
|
||||||
size = 1;
|
return ErrOutOfMemory;
|
||||||
for (size_t ii = 0; ii < nargs; ii += 1) {
|
}
|
||||||
char chr = type_str[ii];
|
rv->buf[rv->len] = inst->opcode;
|
||||||
ArgMeta meta = arg_meta(chr);
|
rv->len += 1;
|
||||||
if (meta.chr == 0) {
|
for (size_t ii = 0; ii < nargs; ii += 1) {
|
||||||
return ErrBadArgumentMeta;
|
if (ii > 0) {
|
||||||
}
|
*tok = token(input, len, tok->start + tok->len);
|
||||||
size += meta.size;
|
if (tok->kind != TokComma) {
|
||||||
|
return ErrNeedCommaAfterArgument;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (ensure_push(rv, 1, size) != 0) {
|
char chr = type_str[ii];
|
||||||
return ErrOutOfMemory;
|
ArgMeta meta = arg_meta(chr);
|
||||||
|
uint64_t is_negative = 0;
|
||||||
|
*tok = token(input, len, tok->start + tok->len);
|
||||||
|
if (tok->kind == TokNeg) {
|
||||||
|
*tok = token(input, len, tok->start + tok->len);
|
||||||
|
if (tok->kind != TokNumber) {
|
||||||
|
return ErrTriedNegateNonNumber;
|
||||||
|
}
|
||||||
|
is_negative -= 1;
|
||||||
}
|
}
|
||||||
rv->buf[rv->len] = inst->opcode;
|
if (chr == 'R') {
|
||||||
rv->len += 1;
|
int reg = parse_register(&input[tok->start], tok->len);
|
||||||
for (size_t ii = 0; ii < nargs; ii += 1) {
|
if (reg > 255) {
|
||||||
if (ii > 0) {
|
return ErrBadRegister;
|
||||||
*tok = token(input, len, tok->start + tok->len);
|
}
|
||||||
if (tok->kind != TokComma) {
|
rv->buf[rv->len] = (char)(reg & 0xff);
|
||||||
return ErrNeedCommaAfterArgument;
|
rv->len += 1;
|
||||||
}
|
} else {
|
||||||
}
|
uint64_t num_to_write;
|
||||||
char chr = type_str[ii];
|
if (meta.rel == 1 || meta.size == 8) {
|
||||||
ArgMeta meta = arg_meta(chr);
|
if (tok->kind == TokIdent) {
|
||||||
uint64_t is_negative = 0;
|
if (ensure_push((ByteVec*)holes, sizeof(Hole), 1) != 0) {
|
||||||
*tok = token(input, len, tok->start + tok->len);
|
return ErrOutOfMemory;
|
||||||
if (tok->kind == TokNeg) {
|
}
|
||||||
*tok = token(input, len, tok->start + tok->len);
|
holes->buf[holes->len] = (Hole) {
|
||||||
if (tok->kind != TokNumber) {
|
.location = rv->len,
|
||||||
return ErrTriedNegateNonNumber;
|
.origin = inst_start,
|
||||||
}
|
.str = &input[tok->start],
|
||||||
is_negative -= 1;
|
.len = tok->len,
|
||||||
}
|
.size = (size_t)meta.size,
|
||||||
if (chr == 'R') {
|
};
|
||||||
int reg = parse_register(&input[tok->start], tok->len);
|
holes->len += 1;
|
||||||
if (reg > 255) {
|
num_to_write = 0;
|
||||||
return ErrBadRegister;
|
} else if (tok->kind == TokNumber) {
|
||||||
}
|
num_to_write = tok->num;
|
||||||
rv->buf[rv->len] = (char)(reg & 0xff);
|
|
||||||
rv->len += 1;
|
|
||||||
} else {
|
} else {
|
||||||
uint64_t num_to_write;
|
return ErrLabelImmediate;
|
||||||
if (meta.rel == 1 || meta.size == 8) {
|
|
||||||
if (tok->kind == TokIdent) {
|
|
||||||
if (ensure_push((ByteVec *)holes, sizeof(Hole), 1) != 0) {
|
|
||||||
return ErrOutOfMemory;
|
|
||||||
}
|
|
||||||
holes->buf[holes->len] = (Hole){
|
|
||||||
.location = rv->len,
|
|
||||||
.origin = inst_start,
|
|
||||||
.str = &input[tok->start],
|
|
||||||
.len = tok->len,
|
|
||||||
.size = (size_t)meta.size,
|
|
||||||
};
|
|
||||||
holes->len += 1;
|
|
||||||
num_to_write = 0;
|
|
||||||
} else if (tok->kind == TokNumber) {
|
|
||||||
num_to_write = tok->num;
|
|
||||||
} else {
|
|
||||||
return ErrLabelImmediate;
|
|
||||||
}
|
|
||||||
} else if (tok->kind == TokNumber) {
|
|
||||||
num_to_write = tok->num;
|
|
||||||
} else {
|
|
||||||
return ErrNumberImmediate;
|
|
||||||
}
|
|
||||||
// num_to_write = num_to_write ^ is_negative - is_negative;
|
|
||||||
if (is_negative) {
|
|
||||||
int64_t tmp = -(int64_t)num_to_write;
|
|
||||||
if (tmp > 0) {
|
|
||||||
return ErrBadNumOverflow;
|
|
||||||
}
|
|
||||||
num_to_write = (uint64_t)tmp;
|
|
||||||
} else if (meta.sign == 2 && (int64_t)num_to_write < 0) {
|
|
||||||
return ErrBadNumOverflow;
|
|
||||||
}
|
|
||||||
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
|
|
||||||
meta.size, meta.sign);
|
|
||||||
if (err != ErrOk) {
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
rv->len += meta.size;
|
|
||||||
}
|
}
|
||||||
|
} else if (tok->kind == TokNumber) {
|
||||||
|
num_to_write = tok->num;
|
||||||
|
} else {
|
||||||
|
return ErrNumberImmediate;
|
||||||
|
}
|
||||||
|
// num_to_write = num_to_write ^ is_negative - is_negative;
|
||||||
|
if (is_negative) {
|
||||||
|
int64_t tmp = -(int64_t)num_to_write;
|
||||||
|
if (tmp > 0) {
|
||||||
|
return ErrBadNumOverflow;
|
||||||
|
}
|
||||||
|
num_to_write = (uint64_t)tmp;
|
||||||
|
}
|
||||||
|
AsmError err =
|
||||||
|
push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign);
|
||||||
|
if (err != ErrOk) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
rv->len += meta.size;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return ErrOk;
|
return ErrOk;
|
||||||
}
|
}
|
||||||
|
|
||||||
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
|
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) {
|
||||||
EInfo *einfo) {
|
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
|
||||||
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
|
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
|
||||||
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
|
size_t line = 0;
|
||||||
if (rv.buf == NULL || holes.buf == NULL || labels.buf == NULL) {
|
size_t line_start = 0;
|
||||||
return ErrOutOfMemory;
|
size_t pos = 0;
|
||||||
}
|
// init=0, label=1, instruction=2, comment=3, newline -> 0
|
||||||
size_t line = 0;
|
size_t line_state = 0;
|
||||||
size_t line_start = 0;
|
AsmError err = ErrOk;
|
||||||
size_t pos = 0;
|
|
||||||
// init=0, label=1, instruction=2, comment=3, newline -> 0
|
|
||||||
size_t line_state = 0;
|
|
||||||
AsmError err = ErrOk;
|
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
Token tok = token(input, len, pos);
|
Token tok = token(input, len, pos);
|
||||||
einfo->token = tok;
|
einfo->token = tok;
|
||||||
pos = tok.start + tok.len;
|
pos = tok.start + tok.len;
|
||||||
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
|
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
|
||||||
if (tok.num) {
|
if (tok.num) {
|
||||||
err = (AsmError)tok.num;
|
err = (AsmError)tok.num;
|
||||||
} else {
|
} else {
|
||||||
err = ErrInvalidToken;
|
err = ErrInvalidToken;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (tok.kind == TokEOF) {
|
if (tok.kind == TokEOF) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (tok.kind == TokComment) {
|
if (tok.kind == TokComment) {
|
||||||
line_state = 3;
|
line_state = 3;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (tok.kind == TokNewline) {
|
if (tok.kind == TokNewline) {
|
||||||
line += 1;
|
line += 1;
|
||||||
line_start = tok.start + tok.len;
|
line_start = tok.start + tok.len;
|
||||||
line_state = 0;
|
line_state = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (tok.kind == TokDot) {
|
if (tok.kind == TokDot) {
|
||||||
Token next = token(input, len, pos);
|
Token next = token(input, len, pos);
|
||||||
einfo->token = next;
|
if (next.kind == TokIdent) {
|
||||||
if (next.kind != TokIdent) {
|
err = ErrDirectiveNotImplemented;
|
||||||
err = ErrNeedDirectiveAfterDot;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
err = assemble_directive(input, len, &rv, &next);
|
|
||||||
pos = next.start + next.len;
|
|
||||||
einfo->token = next;
|
|
||||||
if (err != ErrOk) {
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (tok.kind == TokIdent) {
|
|
||||||
Token next = token(input, len, pos);
|
|
||||||
if (next.kind == TokColon) {
|
|
||||||
// Label
|
|
||||||
pos = next.start + next.len;
|
|
||||||
if (line_state >= 1) {
|
|
||||||
err = ErrLabelAfterLabel;
|
|
||||||
einfo->token = next;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
line_state = 1;
|
|
||||||
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
|
|
||||||
err = ErrOutOfMemory;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
|
|
||||||
if (idx != INVALID) {
|
|
||||||
err = ErrDuplicateLabel;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
labels.buf[labels.len] = (Label){
|
|
||||||
.location = rv.len,
|
|
||||||
.str = &input[tok.start],
|
|
||||||
.len = tok.len,
|
|
||||||
};
|
|
||||||
labels.len += 1;
|
|
||||||
} else {
|
|
||||||
// Instruction
|
|
||||||
if (line_state >= 2) {
|
|
||||||
err = ErrTrailingLine;
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
line_state = 2;
|
|
||||||
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
|
|
||||||
pos = tok.start + tok.len;
|
|
||||||
if (err != 0) {
|
|
||||||
goto end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
err = ErrUnexpectedToken;
|
|
||||||
goto end;
|
goto end;
|
||||||
|
} else {
|
||||||
|
err = ErrNeedDirectiveAfterDot;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
if (tok.kind == TokIdent) {
|
||||||
for (size_t ii = 0; ii < holes.len; ii += 1) {
|
Token next = token(input, len, pos);
|
||||||
Hole *hole = &holes.buf[ii];
|
if (next.kind == TokColon) {
|
||||||
size_t idx = label_lookup(&labels, hole->str, hole->len);
|
// Label
|
||||||
uint64_t num_to_write = labels.buf[idx].location;
|
pos = next.start + next.len;
|
||||||
uint8_t sign = 2;
|
if (line_state >= 1) {
|
||||||
if (hole->size != 8) {
|
err = ErrLabelAfterLabel;
|
||||||
sign = 1;
|
einfo->token = next;
|
||||||
num_to_write -= hole->origin;
|
goto end;
|
||||||
}
|
}
|
||||||
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size,
|
line_state = 1;
|
||||||
sign);
|
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
|
||||||
|
err = ErrOutOfMemory;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
|
||||||
|
if (idx != INVALID) {
|
||||||
|
err = ErrDuplicateLabel;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
labels.buf[labels.len] = (Label){
|
||||||
|
.location = rv.len,
|
||||||
|
.str = &input[tok.start],
|
||||||
|
.len = tok.len,
|
||||||
|
};
|
||||||
|
labels.len += 1;
|
||||||
|
} else {
|
||||||
|
// Instruction
|
||||||
|
if (line_state >= 2) {
|
||||||
|
err = ErrTrailingLine;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
line_state = 2;
|
||||||
|
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
|
||||||
|
pos = tok.start + tok.len;
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
err = ErrUnexpectedToken;
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t ii = 0; ii < holes.len; ii += 1) {
|
||||||
|
Hole *hole = &holes.buf[ii];
|
||||||
|
size_t idx = label_lookup(&labels, hole->str, hole->len);
|
||||||
|
uint64_t num_to_write = labels.buf[idx].location;
|
||||||
|
uint8_t sign = 2;
|
||||||
|
if (hole->size != 8) {
|
||||||
|
sign = 1;
|
||||||
|
num_to_write -= hole->origin;
|
||||||
|
}
|
||||||
|
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign);
|
||||||
|
if (err != 0) {
|
||||||
|
goto end;
|
||||||
|
}
|
||||||
|
}
|
||||||
end:
|
end:
|
||||||
free(holes.buf);
|
free(holes.buf);
|
||||||
free(labels.buf);
|
free(labels.buf);
|
||||||
*out = rv;
|
*out = rv;
|
||||||
einfo->line = line + 1;
|
einfo->line = line + 1;
|
||||||
einfo->line_start = line_start;
|
einfo->line_start = line_start;
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
int hex_out = 0;
|
int hex_out = 0;
|
||||||
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
|
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
|
||||||
hex_out = 1;
|
hex_out = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int err = 0;
|
int err = 0;
|
||||||
InstHt ht = NULL;
|
InstHt ht = NULL;
|
||||||
ByteVec input;
|
ByteVec input;
|
||||||
|
|
||||||
err = slurp(stdin, &input);
|
err = slurp(stdin, &input);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
fprintf(stderr, "failed to read the file: %d\n", err);
|
fprintf(stderr, "failed to read the file: %d\n", err);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
ht = build_lookup();
|
ht = build_lookup();
|
||||||
if (ht == NULL) {
|
if (ht == NULL) {
|
||||||
err = ErrOutOfMemory;
|
err = ErrOutOfMemory;
|
||||||
fprintf(stderr, "failed to init hash table: %d\n", err);
|
fprintf(stderr, "failed to init hash table: %d\n", err);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
ByteVec out;
|
ByteVec out;
|
||||||
EInfo einfo;
|
EInfo einfo;
|
||||||
err = assemble(ht, input.buf, input.len, &out, &einfo);
|
err = assemble(ht, input.buf, input.len, &out, &einfo);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
size_t column = einfo.token.start - einfo.line_start + 1;
|
size_t column = einfo.token.start - einfo.line_start + 1;
|
||||||
fprintf(stderr,
|
fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
|
||||||
"failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
|
ERRORS[err], einfo.line, column, (int)einfo.token.len,
|
||||||
ERRORS[err], einfo.line, column, (int)einfo.token.len,
|
&input.buf[einfo.token.start]);
|
||||||
&input.buf[einfo.token.start]);
|
goto done;
|
||||||
goto done;
|
}
|
||||||
}
|
if (hex_out) {
|
||||||
if (hex_out) {
|
hd(out.buf, out.len);
|
||||||
hex_dump(out.buf, out.len);
|
} else {
|
||||||
} else {
|
fwrite(out.buf, 1, out.len, stdout);
|
||||||
fwrite(out.buf, 1, out.len, stdout);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
free(ht);
|
free(ht);
|
||||||
free(input.buf);
|
free(input.buf);
|
||||||
free(out.buf);
|
free(out.buf);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,138 +1,78 @@
|
||||||
typedef struct InstDesc_s {
|
typedef struct InstDesc_s {
|
||||||
char *mnemonic;
|
char *mnemonic;
|
||||||
unsigned char opcode;
|
unsigned char opcode;
|
||||||
Operands type;
|
Operands type;
|
||||||
} InstDesc;
|
} InstDesc;
|
||||||
|
|
||||||
const InstDesc INST[] = {
|
const InstDesc INST[] = {
|
||||||
{.opcode = 0x00, .mnemonic = "un", .type = Empty},
|
{"un", 0x00, Empty}, {"tx", 0x01, Empty},
|
||||||
{.opcode = 0x01, .mnemonic = "tx", .type = Empty},
|
{"nop", 0x02, Empty}, {"add8", 0x03, RRR},
|
||||||
{.opcode = 0x02, .mnemonic = "nop", .type = Empty},
|
{"add16", 0x04, RRR}, {"add32", 0x05, RRR},
|
||||||
{.opcode = 0x03, .mnemonic = "add8", .type = RRR},
|
{"add64", 0x06, RRR}, {"sub8", 0x07, RRR},
|
||||||
{.opcode = 0x04, .mnemonic = "add16", .type = RRR},
|
{"sub16", 0x08, RRR}, {"sub32", 0x09, RRR},
|
||||||
{.opcode = 0x05, .mnemonic = "add32", .type = RRR},
|
{"sub64", 0x0A, RRR}, {"mul8", 0x0B, RRR},
|
||||||
{.opcode = 0x06, .mnemonic = "add64", .type = RRR},
|
{"mul16", 0x0C, RRR}, {"mul32", 0x0D, RRR},
|
||||||
{.opcode = 0x07, .mnemonic = "sub8", .type = RRR},
|
{"mul64", 0x0E, RRR}, {"and", 0x0F, RRR},
|
||||||
{.opcode = 0x08, .mnemonic = "sub16", .type = RRR},
|
{"or", 0x10, RRR}, {"xor", 0x11, RRR},
|
||||||
{.opcode = 0x09, .mnemonic = "sub32", .type = RRR},
|
{"slu8", 0x12, RRR}, {"slu16", 0x13, RRR},
|
||||||
{.opcode = 0x0A, .mnemonic = "sub64", .type = RRR},
|
{"slu32", 0x14, RRR}, {"slu64", 0x15, RRR},
|
||||||
{.opcode = 0x0B, .mnemonic = "mul8", .type = RRR},
|
{"sru8", 0x16, RRR}, {"sru16", 0x17, RRR},
|
||||||
{.opcode = 0x0C, .mnemonic = "mul16", .type = RRR},
|
{"sru32", 0x18, RRR}, {"sru64", 0x19, RRR},
|
||||||
{.opcode = 0x0D, .mnemonic = "mul32", .type = RRR},
|
{"srs8", 0x1A, RRR}, {"srs16", 0x1B, RRR},
|
||||||
{.opcode = 0x0E, .mnemonic = "mul64", .type = RRR},
|
{"srs32", 0x1C, RRR}, {"srs64", 0x1D, RRR},
|
||||||
{.opcode = 0x0F, .mnemonic = "and", .type = RRR},
|
{"cmpu", 0x1E, RRR}, {"cmps", 0x1F, RRR},
|
||||||
{.opcode = 0x10, .mnemonic = "or", .type = RRR},
|
{"diru8", 0x20, RRRR}, {"diru16", 0x21, RRRR},
|
||||||
{.opcode = 0x11, .mnemonic = "xor", .type = RRR},
|
{"diru32", 0x22, RRRR}, {"diru64", 0x23, RRRR},
|
||||||
{.opcode = 0x12, .mnemonic = "slu8", .type = RRR},
|
{"dirs8", 0x24, RRRR}, {"dirs16", 0x25, RRRR},
|
||||||
{.opcode = 0x13, .mnemonic = "slu16", .type = RRR},
|
{"dirs32", 0x26, RRRR}, {"dirs64", 0x27, RRRR},
|
||||||
{.opcode = 0x14, .mnemonic = "slu32", .type = RRR},
|
{"neg", 0x28, RR}, {"not", 0x29, RR},
|
||||||
{.opcode = 0x15, .mnemonic = "slu64", .type = RRR},
|
{"sxt8", 0x2A, RR}, {"sxt16", 0x2B, RR},
|
||||||
{.opcode = 0x16, .mnemonic = "sru8", .type = RRR},
|
{"sxt32", 0x2C, RR}, {"addi8", 0x2D, RRx8},
|
||||||
{.opcode = 0x17, .mnemonic = "sru16", .type = RRR},
|
{"addi16", 0x2E, RRx16}, {"addi32", 0x2F, RRx32},
|
||||||
{.opcode = 0x18, .mnemonic = "sru32", .type = RRR},
|
{"addi64", 0x30, RRx64}, {"muli8", 0x31, RRx8},
|
||||||
{.opcode = 0x19, .mnemonic = "sru64", .type = RRR},
|
{"muli16", 0x32, RRx16}, {"muli32", 0x33, RRx32},
|
||||||
{.opcode = 0x1A, .mnemonic = "srs8", .type = RRR},
|
{"muli64", 0x34, RRx64}, {"andi", 0x35, RRx64},
|
||||||
{.opcode = 0x1B, .mnemonic = "srs16", .type = RRR},
|
{"ori", 0x36, RRx64}, {"xori", 0x37, RRx64},
|
||||||
{.opcode = 0x1C, .mnemonic = "srs32", .type = RRR},
|
{"slui8", 0x38, RRu8}, {"slui16", 0x39, RRu8},
|
||||||
{.opcode = 0x1D, .mnemonic = "srs64", .type = RRR},
|
{"slui32", 0x3A, RRu8}, {"slui64", 0x3B, RRu8},
|
||||||
{.opcode = 0x1E, .mnemonic = "cmpu", .type = RRR},
|
{"srui8", 0x3C, RRu8}, {"srui16", 0x3D, RRu8},
|
||||||
{.opcode = 0x1F, .mnemonic = "cmps", .type = RRR},
|
{"srui32", 0x3E, RRu8}, {"srui64", 0x3F, RRu8},
|
||||||
{.opcode = 0x20, .mnemonic = "diru8", .type = RRRR},
|
{"srsi8", 0x40, RRu8}, {"srsi16", 0x41, RRu8},
|
||||||
{.opcode = 0x21, .mnemonic = "diru16", .type = RRRR},
|
{"srsi32", 0x42, RRu8}, {"srsi64", 0x43, RRu8},
|
||||||
{.opcode = 0x22, .mnemonic = "diru32", .type = RRRR},
|
{"cmpui", 0x44, RRu64}, {"cmpsi", 0x45, RRs64},
|
||||||
{.opcode = 0x23, .mnemonic = "diru64", .type = RRRR},
|
{"cp", 0x46, RR}, {"swa", 0x47, RR},
|
||||||
{.opcode = 0x24, .mnemonic = "dirs8", .type = RRRR},
|
{"li8", 0x48, Rx8}, {"li16", 0x49, Rx16},
|
||||||
{.opcode = 0x25, .mnemonic = "dirs16", .type = RRRR},
|
{"li32", 0x4A, Rx32}, {"li64", 0x4B, Rx64},
|
||||||
{.opcode = 0x26, .mnemonic = "dirs32", .type = RRRR},
|
{"lra", 0x4C, RRr32}, {"ld", 0x4D, RRu64u16},
|
||||||
{.opcode = 0x27, .mnemonic = "dirs64", .type = RRRR},
|
{"st", 0x4E, RRu64u16}, {"ldr", 0x4F, RRr32u16},
|
||||||
{.opcode = 0x28, .mnemonic = "neg", .type = RR},
|
{"str", 0x50, RRr32u16}, {"bmc", 0x51, RRu16},
|
||||||
{.opcode = 0x29, .mnemonic = "not", .type = RR},
|
{"brc", 0x52, RRu8}, {"jmp", 0x53, r32},
|
||||||
{.opcode = 0x2A, .mnemonic = "sxt8", .type = RR},
|
{"jal", 0x54, RRr32}, {"jala", 0x55, RRu64},
|
||||||
{.opcode = 0x2B, .mnemonic = "sxt16", .type = RR},
|
{"jeq", 0x56, RRr16}, {"jne", 0x57, RRr16},
|
||||||
{.opcode = 0x2C, .mnemonic = "sxt32", .type = RR},
|
{"jltu", 0x58, RRr16}, {"jgtu", 0x59, RRr16},
|
||||||
{.opcode = 0x2D, .mnemonic = "addi8", .type = RRx8},
|
{"jlts", 0x5A, RRr16}, {"jgts", 0x5B, RRr16},
|
||||||
{.opcode = 0x2E, .mnemonic = "addi16", .type = RRx16},
|
{"eca", 0x5C, Empty}, {"ebp", 0x5D, Empty},
|
||||||
{.opcode = 0x2F, .mnemonic = "addi32", .type = RRx32},
|
{"fadd32", 0x5E, RRR}, {"fadd64", 0x5F, RRR},
|
||||||
{.opcode = 0x30, .mnemonic = "addi64", .type = RRx64},
|
{"fsub32", 0x60, RRR}, {"fsub64", 0x61, RRR},
|
||||||
{.opcode = 0x31, .mnemonic = "muli8", .type = RRx8},
|
{"fmul32", 0x62, RRR}, {"fmul64", 0x63, RRR},
|
||||||
{.opcode = 0x32, .mnemonic = "muli16", .type = RRx16},
|
{"fdiv32", 0x64, RRR}, {"fdiv64", 0x65, RRR},
|
||||||
{.opcode = 0x33, .mnemonic = "muli32", .type = RRx32},
|
{"fma32", 0x66, RRRR}, {"fma64", 0x67, RRRR},
|
||||||
{.opcode = 0x34, .mnemonic = "muli64", .type = RRx64},
|
{"fcmplt32", 0x6A, RRR}, {"fcmplt64", 0x6B, RRR},
|
||||||
{.opcode = 0x35, .mnemonic = "andi", .type = RRx64},
|
{"fcmpgt32", 0x6C, RRR}, {"fcmpgt64", 0x6D, RRR},
|
||||||
{.opcode = 0x36, .mnemonic = "ori", .type = RRx64},
|
{"itf32", 0x6E, RR}, {"itf64", 0x6F, RR},
|
||||||
{.opcode = 0x37, .mnemonic = "xori", .type = RRx64},
|
{"fti32", 0x70, RRu8}, {"fti64", 0x71, RRu8},
|
||||||
{.opcode = 0x38, .mnemonic = "slui8", .type = RRu8},
|
{"fc32t64", 0x72, RR}, {"fc64t32", 0x73, RR},
|
||||||
{.opcode = 0x39, .mnemonic = "slui16", .type = RRu8},
|
{"lra16", 0x74, RRr16}, {"ldr16", 0x75, RRr16u16},
|
||||||
{.opcode = 0x3A, .mnemonic = "slui32", .type = RRu8},
|
{"str16", 0x76, RRr16u16}, {"jmp16", 0x77, r16},
|
||||||
{.opcode = 0x3B, .mnemonic = "slui64", .type = RRu8},
|
|
||||||
{.opcode = 0x3C, .mnemonic = "srui8", .type = RRu8},
|
|
||||||
{.opcode = 0x3D, .mnemonic = "srui16", .type = RRu8},
|
|
||||||
{.opcode = 0x3E, .mnemonic = "srui32", .type = RRu8},
|
|
||||||
{.opcode = 0x3F, .mnemonic = "srui64", .type = RRu8},
|
|
||||||
{.opcode = 0x40, .mnemonic = "srsi8", .type = RRu8},
|
|
||||||
{.opcode = 0x41, .mnemonic = "srsi16", .type = RRu8},
|
|
||||||
{.opcode = 0x42, .mnemonic = "srsi32", .type = RRu8},
|
|
||||||
{.opcode = 0x43, .mnemonic = "srsi64", .type = RRu8},
|
|
||||||
{.opcode = 0x44, .mnemonic = "cmpui", .type = RRu64},
|
|
||||||
{.opcode = 0x45, .mnemonic = "cmpsi", .type = RRs64},
|
|
||||||
{.opcode = 0x46, .mnemonic = "cp", .type = RR},
|
|
||||||
{.opcode = 0x47, .mnemonic = "swa", .type = RR},
|
|
||||||
{.opcode = 0x48, .mnemonic = "li8", .type = Rx8},
|
|
||||||
{.opcode = 0x49, .mnemonic = "li16", .type = Rx16},
|
|
||||||
{.opcode = 0x4A, .mnemonic = "li32", .type = Rx32},
|
|
||||||
{.opcode = 0x4B, .mnemonic = "li64", .type = Rx64},
|
|
||||||
{.opcode = 0x4C, .mnemonic = "lra", .type = RRr32},
|
|
||||||
{.opcode = 0x4D, .mnemonic = "ld", .type = RRu64u16},
|
|
||||||
{.opcode = 0x4E, .mnemonic = "st", .type = RRu64u16},
|
|
||||||
{.opcode = 0x4F, .mnemonic = "ldr", .type = RRr32u16},
|
|
||||||
{.opcode = 0x50, .mnemonic = "str", .type = RRr32u16},
|
|
||||||
{.opcode = 0x51, .mnemonic = "bmc", .type = RRu16},
|
|
||||||
{.opcode = 0x52, .mnemonic = "brc", .type = RRu8},
|
|
||||||
{.opcode = 0x53, .mnemonic = "jmp", .type = r32},
|
|
||||||
{.opcode = 0x54, .mnemonic = "jal", .type = RRr32},
|
|
||||||
{.opcode = 0x55, .mnemonic = "jala", .type = RRu64},
|
|
||||||
{.opcode = 0x56, .mnemonic = "jeq", .type = RRr16},
|
|
||||||
{.opcode = 0x57, .mnemonic = "jne", .type = RRr16},
|
|
||||||
{.opcode = 0x58, .mnemonic = "jltu", .type = RRr16},
|
|
||||||
{.opcode = 0x59, .mnemonic = "jgtu", .type = RRr16},
|
|
||||||
{.opcode = 0x5A, .mnemonic = "jlts", .type = RRr16},
|
|
||||||
{.opcode = 0x5B, .mnemonic = "jgts", .type = RRr16},
|
|
||||||
{.opcode = 0x5C, .mnemonic = "eca", .type = Empty},
|
|
||||||
{.opcode = 0x5D, .mnemonic = "ebp", .type = Empty},
|
|
||||||
{.opcode = 0x5E, .mnemonic = "fadd32", .type = RRR},
|
|
||||||
{.opcode = 0x5F, .mnemonic = "fadd64", .type = RRR},
|
|
||||||
{.opcode = 0x60, .mnemonic = "fsub32", .type = RRR},
|
|
||||||
{.opcode = 0x61, .mnemonic = "fsub64", .type = RRR},
|
|
||||||
{.opcode = 0x62, .mnemonic = "fmul32", .type = RRR},
|
|
||||||
{.opcode = 0x63, .mnemonic = "fmul64", .type = RRR},
|
|
||||||
{.opcode = 0x64, .mnemonic = "fdiv32", .type = RRR},
|
|
||||||
{.opcode = 0x65, .mnemonic = "fdiv64", .type = RRR},
|
|
||||||
{.opcode = 0x66, .mnemonic = "fma32", .type = RRRR},
|
|
||||||
{.opcode = 0x67, .mnemonic = "fma64", .type = RRRR},
|
|
||||||
// 68, 69?
|
|
||||||
{.opcode = 0x6A, .mnemonic = "fcmplt32", .type = RRR},
|
|
||||||
{.opcode = 0x6B, .mnemonic = "fcmplt64", .type = RRR},
|
|
||||||
{.opcode = 0x6C, .mnemonic = "fcmpgt32", .type = RRR},
|
|
||||||
{.opcode = 0x6D, .mnemonic = "fcmpgt64", .type = RRR},
|
|
||||||
{.opcode = 0x6E, .mnemonic = "itf32", .type = RR},
|
|
||||||
{.opcode = 0x6F, .mnemonic = "itf64", .type = RR},
|
|
||||||
{.opcode = 0x70, .mnemonic = "fti32", .type = RRu8},
|
|
||||||
{.opcode = 0x71, .mnemonic = "fti64", .type = RRu8},
|
|
||||||
{.opcode = 0x72, .mnemonic = "fc32t64", .type = RR},
|
|
||||||
{.opcode = 0x73, .mnemonic = "fc64t32", .type = RR},
|
|
||||||
{.opcode = 0x74, .mnemonic = "lra16", .type = RRr16},
|
|
||||||
{.opcode = 0x75, .mnemonic = "ldr16", .type = RRr16u16},
|
|
||||||
{.opcode = 0x76, .mnemonic = "str16", .type = RRr16u16},
|
|
||||||
{.opcode = 0x77, .mnemonic = "jmp16", .type = r16},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]);
|
const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]);
|
||||||
size_t inst_find(const char *mnemonic, size_t len) {
|
size_t inst_find(const char *mnemonic, size_t len) {
|
||||||
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
||||||
const char *entry = INST[ii].mnemonic;
|
const char *entry = INST[ii].mnemonic;
|
||||||
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
|
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
|
||||||
return ii;
|
return ii;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return INVALID;
|
}
|
||||||
|
return INVALID;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,55 +0,0 @@
|
||||||
|
|
||||||
static bool check_valid_int(uint64_t val, size_t size, uint8_t sign) {
|
|
||||||
// All 64-bit values are considered valid.
|
|
||||||
if (size == 8) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// Unsigned integers must have all upper bits set to zero. To check this,
|
|
||||||
// we shift the value right by the integer size and verify it equals zero.
|
|
||||||
int valid_uint = (val >> (size * 8)) == 0;
|
|
||||||
|
|
||||||
// For signed integers, the sign-extended high bits must match the sign bit.
|
|
||||||
// By shifting right by one less than the total bit size (size * 8 - 1),
|
|
||||||
// we isolate the sign bit and any sign-extended bits. For a value fitting
|
|
||||||
// in the signed range, this operation results in either 0 (for non-negative
|
|
||||||
// values) or -1 (for negative values due to sign extension).
|
|
||||||
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
|
|
||||||
|
|
||||||
// To unify the check for both positive and negative cases, we adjust
|
|
||||||
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
|
|
||||||
// enabling a single check for 0 to validate both cases. This adjustment
|
|
||||||
// simplifies the validation logic, allowing us to use a single condition to
|
|
||||||
// check for proper sign extension or zero extension in the original value.
|
|
||||||
int_shifted += int_shifted != 0;
|
|
||||||
|
|
||||||
// A valid signed integer will have `int_shifted` equal to 0
|
|
||||||
// after adjustment, indicating proper sign extension.
|
|
||||||
int valid_int = int_shifted == 0;
|
|
||||||
|
|
||||||
// Validity bitmask to represents whether the value
|
|
||||||
// fits as signed, unsigned, or both.
|
|
||||||
int validity = valid_int | (valid_uint << 1);
|
|
||||||
|
|
||||||
// If the value's validity doesn't match the `sign` requirements,
|
|
||||||
// we report an overflow.
|
|
||||||
return (validity & sign) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// safety: assumes the buffer has enough place for specified integer size.
|
|
||||||
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
|
|
||||||
// and bit `2` indicates that value accepts an unsigned int.
|
|
||||||
static AsmError push_int_le(char *buf, uint64_t val, size_t size,
|
|
||||||
uint8_t sign) {
|
|
||||||
if (!check_valid_int(val, size, sign)) {
|
|
||||||
return ErrImmediateOverflow;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write out the bytes of the integer to the buffer in little-endian order,
|
|
||||||
// starting with the lowest byte first.
|
|
||||||
for (size_t ii = 0; ii < size; ii += 1) {
|
|
||||||
buf[ii] = val & 0xff;
|
|
||||||
val >>= 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ErrOk;
|
|
||||||
}
|
|
|
@ -1,23 +1,23 @@
|
||||||
static int parse_register(char *name, size_t len) {
|
int parse_register(char *name, size_t len) {
|
||||||
if (name[0] != 'r') {
|
if (name[0] != 'r') {
|
||||||
return 256; // Register name should start with 'r'
|
return 256; // Register name should start with 'r'
|
||||||
|
}
|
||||||
|
if (len > 4) {
|
||||||
|
return 256; // Register name too long
|
||||||
|
}
|
||||||
|
uint16_t rv = 0;
|
||||||
|
if (len > 2 && name[1] == '0') {
|
||||||
|
return 256; // Extra zero suffix
|
||||||
|
}
|
||||||
|
for (size_t ii = 1; ii < len; ii += 1) {
|
||||||
|
char chr = name[ii];
|
||||||
|
if (!(chr >= '0' && chr <= '9')) {
|
||||||
|
return 256; // Register name must only contain numbers
|
||||||
}
|
}
|
||||||
if (len > 4) {
|
rv = rv * 10 + (chr - '0');
|
||||||
return 256; // Register name too long
|
}
|
||||||
}
|
if (rv > 255) {
|
||||||
uint16_t rv = 0;
|
return 256; // Register number too large
|
||||||
if (len > 2 && name[1] == '0') {
|
}
|
||||||
return 256; // Extra zero suffix
|
return (int)rv;
|
||||||
}
|
|
||||||
for (size_t ii = 1; ii < len; ii += 1) {
|
|
||||||
char chr = name[ii];
|
|
||||||
if (!(chr >= '0' && chr <= '9')) {
|
|
||||||
return 256; // Register name must only contain numbers
|
|
||||||
}
|
|
||||||
rv = rv * 10 + (chr - '0');
|
|
||||||
}
|
|
||||||
if (rv > 255) {
|
|
||||||
return 256; // Register number too large
|
|
||||||
}
|
|
||||||
return (int)rv;
|
|
||||||
}
|
}
|
||||||
|
|
71
src/token.c
71
src/token.c
|
@ -10,7 +10,6 @@ typedef enum TokenKind_e {
|
||||||
TokColon = ':',
|
TokColon = ':',
|
||||||
TokComment = ';',
|
TokComment = ';',
|
||||||
TokNewline = 'n',
|
TokNewline = 'n',
|
||||||
TokString = 's',
|
|
||||||
} TokenKind;
|
} TokenKind;
|
||||||
typedef struct Token_s {
|
typedef struct Token_s {
|
||||||
TokenKind kind;
|
TokenKind kind;
|
||||||
|
@ -19,13 +18,12 @@ typedef struct Token_s {
|
||||||
uint64_t num;
|
uint64_t num;
|
||||||
} Token;
|
} Token;
|
||||||
|
|
||||||
static Token token_ident(char *input, size_t len, size_t pos) {
|
Token token_ident(char *input, size_t len, size_t pos) {
|
||||||
size_t start = pos;
|
size_t start = pos;
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
char chr = input[pos];
|
char chr = input[pos];
|
||||||
char chru = chr & ~0x20;
|
char chru = chr & ~0x20;
|
||||||
int good = chr == '_' || (chr >= '0' && chr <= '9') ||
|
int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z');
|
||||||
(chru >= 'A' && chru <= 'Z');
|
|
||||||
if (!good) {
|
if (!good) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -34,7 +32,7 @@ static Token token_ident(char *input, size_t len, size_t pos) {
|
||||||
return (Token){TokIdent, start, pos - start, 0};
|
return (Token){TokIdent, start, pos - start, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
static Token token_number(char *input, size_t len, size_t pos) {
|
Token token_number(char *input, size_t len, size_t pos) {
|
||||||
char *ptr = &input[pos];
|
char *ptr = &input[pos];
|
||||||
char next = '\0';
|
char next = '\0';
|
||||||
size_t start = pos;
|
size_t start = pos;
|
||||||
|
@ -110,65 +108,7 @@ static Token token_number(char *input, size_t len, size_t pos) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static char get_hex(char chr) {
|
Token token(char *input, size_t len, size_t pos) {
|
||||||
char chru = chr & ~0x20;
|
|
||||||
if (chr >= '0' && chr <= '9') {
|
|
||||||
return chr - '0';
|
|
||||||
}
|
|
||||||
if (chru >= 'A' && chru <= 'F') {
|
|
||||||
return chru - ('A' - 10);
|
|
||||||
}
|
|
||||||
return 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
static Token token_string(char *input, size_t len, size_t pos) {
|
|
||||||
size_t start = pos;
|
|
||||||
size_t ndata = 0;
|
|
||||||
for (pos += 1; pos < len; pos += 1) {
|
|
||||||
if (input[pos] == '"') {
|
|
||||||
return (Token){TokString, start, pos + 1 - start, ndata};
|
|
||||||
}
|
|
||||||
if (input[pos] == '\n' || input[pos] == '\r') {
|
|
||||||
return (Token){TokInvalid, start, pos + 1 - start,
|
|
||||||
ErrStringNewLine};
|
|
||||||
}
|
|
||||||
if (input[pos] == '\\') {
|
|
||||||
if (pos + 1 >= len) {
|
|
||||||
return (Token){TokInvalid, start, pos - start,
|
|
||||||
ErrDanglingEscape};
|
|
||||||
}
|
|
||||||
pos += 1;
|
|
||||||
switch (input[pos]) {
|
|
||||||
case '\\':
|
|
||||||
case '"':
|
|
||||||
case 'r':
|
|
||||||
case 'n':
|
|
||||||
case '0':
|
|
||||||
case 't':
|
|
||||||
break;
|
|
||||||
case 'x':
|
|
||||||
if (pos + 2 >= len) {
|
|
||||||
return (Token){TokInvalid, start, pos - start,
|
|
||||||
ErrDanglingEscape};
|
|
||||||
}
|
|
||||||
if (get_hex(input[pos + 1]) > 15 ||
|
|
||||||
get_hex(input[pos + 2]) > 15) {
|
|
||||||
return (Token){TokInvalid, start, pos - start,
|
|
||||||
ErrStringBadHex};
|
|
||||||
}
|
|
||||||
pos += 2;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return (Token){TokInvalid, start, pos - start,
|
|
||||||
ErrBadStringEscape};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ndata += 1;
|
|
||||||
}
|
|
||||||
return (Token){TokString, start, pos - start, ndata};
|
|
||||||
}
|
|
||||||
|
|
||||||
static Token token(char *input, size_t len, size_t pos) {
|
|
||||||
char chr, chru;
|
char chr, chru;
|
||||||
char *ptr = &input[pos];
|
char *ptr = &input[pos];
|
||||||
while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) {
|
while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) {
|
||||||
|
@ -198,9 +138,6 @@ static Token token(char *input, size_t len, size_t pos) {
|
||||||
}
|
}
|
||||||
return (Token){TokComment, pos, clen, 0};
|
return (Token){TokComment, pos, clen, 0};
|
||||||
}
|
}
|
||||||
if (chr == '"') {
|
|
||||||
return token_string(input, len, pos);
|
|
||||||
}
|
|
||||||
if (chr >= '0' && chr <= '9') {
|
if (chr >= '0' && chr <= '9') {
|
||||||
return token_number(input, len, pos);
|
return token_number(input, len, pos);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue