From 3dff79adc5de0433c433d22ffec272471ca96b34 Mon Sep 17 00:00:00 2001 From: able Date: Thu, 7 Mar 2024 06:58:28 -0600 Subject: [PATCH 1/8] include stdint --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 8fd1bae..89675ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ hbas -example \ No newline at end of file +*.hbf \ No newline at end of file From ed28936be162aca6f9b2e167d4c48aaa57890188 Mon Sep 17 00:00:00 2001 From: able Date: Thu, 7 Mar 2024 08:09:18 -0600 Subject: [PATCH 2/8] modularize --- hash.c | 73 +++++ hbas.c | 719 +++++++++++++++++++++---------------------------- instructions.c | 142 ++++++++++ op.h | 62 +++++ 4 files changed, 591 insertions(+), 405 deletions(-) create mode 100644 hash.c create mode 100644 instructions.c create mode 100644 op.h diff --git a/hash.c b/hash.c new file mode 100644 index 0000000..a28d6ca --- /dev/null +++ b/hash.c @@ -0,0 +1,73 @@ +// Instruction Hash table, for faster lookups +typedef struct InstHtNode_s +{ + uint8_t index1; + uint8_t index2; +} InstHtNode; +typedef InstHtNode *InstHt; + +uint32_t inst_hash(const char *s, size_t len) +{ + uint32_t hash = 0; + uint32_t mul = 75; + for (size_t ii = 0; ii < len; ii += 1) + { + hash ^= s[ii] * mul; + hash *= mul; + } + return hash; +} + +InstHt build_lookup(void) +{ + const size_t size = 256; + InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); + if (table == NULL) + { + return table; + } + for (size_t ii = 0; ii < size; ii += 1) + { + table[ii] = (InstHtNode){0xff, 0xff}; + } + for (size_t ii = 0; ii < INST_CNT; ii += 1) + { + const char *mnemonic = INST[ii].mnemonic; + uint32_t hash = inst_hash(mnemonic, strlen(mnemonic)); + InstHtNode *node = &table[hash & 0xff]; + if (node->index1 == 0xff) + { + node->index1 = ii; + } + else if (node->index2 == 0xff) + { + node->index2 = ii; + } + else + { + fprintf(stderr, "more than 1 collision in hash table\n"); + exit(1); + } + } + return table; +} + +size_t inst_lookup(InstHt ht, const char *s, size_t len) +{ + uint32_t hash = inst_hash(s, len); + uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; + for (size_t ii = 0; ii < 2; ii += 1) + { + size_t idx = (size_t)node[ii]; + if (idx == 0xff) + { + break; + } + const char *mnemonic = INST[idx].mnemonic; + if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) + { + return idx; + } + } + return INVALID; +} diff --git a/hbas.c b/hbas.c index 88a9750..1fda60f 100644 --- a/hbas.c +++ b/hbas.c @@ -25,9 +25,16 @@ SOFTWARE. #include #include -void hd(char *data, size_t len) { - for (size_t ii = 0; ii < len; ii += 1) { - if (ii > 0 && (ii & 15) == 0) { +#include "op.h" +#include "instructions.c" +#include "hash.c" + +void hd(char *data, size_t len) +{ + for (size_t ii = 0; ii < len; ii += 1) + { + if (ii > 0 && (ii & 15) == 0) + { printf("\n"); } printf("%02x", (uint8_t)data[ii]); @@ -35,41 +42,8 @@ void hd(char *data, size_t len) { printf("\n"); } -typedef enum OpType_e { - Empty = 0, - R, RR, RRR, RRRR, - Rx8, Rx16, Rx32, Rx64, - RRx8, RRx16, RRx32, RRx64, - RRs32, RRs64, - RRu8, RRu16, RRu64, - r16, r32, - RRr16, RRr32, - RRr16u16, - RRr32u16, - RRu64u16, -} OpType; -// R -> register, -// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, -// b -> Si8, h -> Si16, w -> Si32, d -> Si64, -// B -> Ui8, H -> Ui16, W -> Ui32, D -> Ui64, -// o -> 16 bit relative offset, -// O -> 32 bit relative offset, - -const char *TYPE_STR[] = { - "", - "R", "RR", "RRR", "RRRR", - "R1", "R2", "R4", "R8", - "RR1", "RR2", "RR4", "RR8", - "RRw", "RRd", - "RRB", "RRH", "RRD", - "o", "O", - "RRo", "RRO", - "RRoH", - "RROH", - "RRDH", -}; - -typedef struct ArgMeta_s { +typedef struct ArgMeta_s +{ char chr; uint8_t size; // This is a bitset of acceptable overflow states, @@ -79,232 +53,39 @@ typedef struct ArgMeta_s { uint8_t rel; } ArgMeta; const ArgMeta ARGS[] = { - { 'R', 1, 2, 0 }, - { '1', 1, 3, 0 }, - { 'b', 1, 1, 0 }, - { 'B', 1, 2, 0 }, - { '2', 2, 3, 0 }, - { 'o', 2, 1, 1 }, - { 'h', 2, 1, 0 }, - { 'H', 2, 2, 0 }, - { '4', 4, 3, 0 }, - { 'w', 4, 1, 0 }, - { 'O', 4, 1, 1 }, - { 'W', 4, 2, 0 }, - { '8', 8, 3, 0 }, - { 'd', 8, 1, 0 }, - { 'D', 8, 2, 0 }, - { 0 }, + {'R', 1, 2, 0}, + {'1', 1, 3, 0}, + {'b', 1, 1, 0}, + {'B', 1, 2, 0}, + {'2', 2, 3, 0}, + {'o', 2, 1, 1}, + {'h', 2, 1, 0}, + {'H', 2, 2, 0}, + {'4', 4, 3, 0}, + {'w', 4, 1, 0}, + {'O', 4, 1, 1}, + {'W', 4, 2, 0}, + {'8', 8, 3, 0}, + {'d', 8, 1, 0}, + {'D', 8, 2, 0}, + {0}, }; const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); -ArgMeta arg_meta(char arg) { - for (size_t ii = 0; ii < NARGS; ii += 1) { +ArgMeta arg_meta(char arg) +{ + for (size_t ii = 0; ii < NARGS; ii += 1) + { ArgMeta meta = ARGS[ii]; - if (meta.chr == arg) { + if (meta.chr == arg) + { return meta; - } + } } return ARGS[NARGS - 1]; } -typedef struct InstDesc_s { - char *mnemonic; - unsigned char opcode; - OpType type; -} InstDesc; - -const InstDesc INST[] = { - { "un", 0x00, Empty }, - { "tx", 0x01, Empty }, - { "nop", 0x02, Empty }, - { "add8", 0x03, RRR }, - { "add16", 0x04, RRR }, - { "add32", 0x05, RRR }, - { "add64", 0x06, RRR }, - { "sub8", 0x07, RRR }, - { "sub16", 0x08, RRR }, - { "sub32", 0x09, RRR }, - { "sub64", 0x0A, RRR }, - { "mul8", 0x0B, RRR }, - { "mul16", 0x0C, RRR }, - { "mul32", 0x0D, RRR }, - { "mul64", 0x0E, RRR }, - { "and", 0x0F, RRR }, - { "or", 0x10, RRR }, - { "xor", 0x11, RRR }, - { "slu8", 0x12, RRR }, - { "slu16", 0x13, RRR }, - { "slu32", 0x14, RRR }, - { "slu64", 0x15, RRR }, - { "sru8", 0x16, RRR }, - { "sru16", 0x17, RRR }, - { "sru32", 0x18, RRR }, - { "sru64", 0x19, RRR }, - { "srs8", 0x1A, RRR }, - { "srs16", 0x1B, RRR }, - { "srs32", 0x1C, RRR }, - { "srs64", 0x1D, RRR }, - { "cmpu", 0x1E, RRR }, - { "cmps", 0x1F, RRR }, - { "diru8", 0x20, RRRR }, - { "diru16", 0x21, RRRR }, - { "diru32", 0x22, RRRR }, - { "diru64", 0x23, RRRR }, - { "dirs8", 0x24, RRRR }, - { "dirs16", 0x25, RRRR }, - { "dirs32", 0x26, RRRR }, - { "dirs64", 0x27, RRRR }, - { "neg", 0x28, RR }, - { "not", 0x29, RR }, - { "sxt8", 0x2A, RR }, - { "sxt16", 0x2B, RR }, - { "sxt32", 0x2C, RR }, - { "addi8", 0x2D, RRx8 }, - { "addi16", 0x2E, RRx16 }, - { "addi32", 0x2F, RRx32 }, - { "addi64", 0x30, RRx64 }, - { "muli8", 0x31, RRx8 }, - { "muli16", 0x32, RRx16 }, - { "muli32", 0x33, RRx32 }, - { "muli64", 0x34, RRx64 }, - { "andi", 0x35, RRx64 }, - { "ori", 0x36, RRx64 }, - { "xori", 0x37, RRx64 }, - { "slui8", 0x38, RRu8 }, - { "slui16", 0x39, RRu8 }, - { "slui32", 0x3A, RRu8 }, - { "slui64", 0x3B, RRu8 }, - { "srui8", 0x3C, RRu8 }, - { "srui16", 0x3D, RRu8 }, - { "srui32", 0x3E, RRu8 }, - { "srui64", 0x3F, RRu8 }, - { "srsi8", 0x40, RRu8 }, - { "srsi16", 0x41, RRu8 }, - { "srsi32", 0x42, RRu8 }, - { "srsi64", 0x43, RRu8 }, - { "cmpui", 0x44, RRu64 }, - { "cmpsi", 0x45, RRs64 }, - { "cp", 0x46, RR }, - { "swa", 0x47, RR }, - { "li8", 0x48, Rx8 }, - { "li16", 0x49, Rx16 }, - { "li32", 0x4A, Rx32 }, - { "li64", 0x4B, Rx64 }, - { "lra", 0x4C, RRr32 }, - { "ld", 0x4D, RRu64u16 }, - { "st", 0x4E, RRu64u16 }, - { "ldr", 0x4F, RRr32u16 }, - { "str", 0x50, RRr32u16 }, - { "bmc", 0x51, RRu16 }, - { "brc", 0x52, RRu8 }, - { "jmp", 0x53, r32 }, - { "jal", 0x54, RRr32 }, - { "jala", 0x55, RRu64 }, - { "jeq", 0x56, RRr16 }, - { "jne", 0x57, RRr16 }, - { "jltu", 0x58, RRr16 }, - { "jgtu", 0x59, RRr16 }, - { "jlts", 0x5A, RRr16 }, - { "jgts", 0x5B, RRr16 }, - { "eca", 0x5C, Empty }, - { "ebp", 0x5D, Empty }, - { "fadd32", 0x5E, RRR }, - { "fadd64", 0x5F, RRR }, - { "fsub32", 0x60, RRR }, - { "fsub64", 0x61, RRR }, - { "fmul32", 0x62, RRR }, - { "fmul64", 0x63, RRR }, - { "fdiv32", 0x64, RRR }, - { "fdiv64", 0x65, RRR }, - { "fma32", 0x66, RRRR }, - { "fma64", 0x67, RRRR }, - { "fcmplt32", 0x6A, RRR }, - { "fcmplt64", 0x6B, RRR }, - { "fcmpgt32", 0x6C, RRR }, - { "fcmpgt64", 0x6D, RRR }, - { "itf32", 0x6E, RR }, - { "itf64", 0x6F, RR }, - { "fti32", 0x70, RRu8 }, - { "fti64", 0x71, RRu8 }, - { "fc32t64", 0x72, RR }, - { "fc64t32", 0x73, RR }, - { "lra16", 0x74, RRr16 }, - { "ldr16", 0x75, RRr16u16 }, - { "str16", 0x76, RRr16u16 }, - { "jmp16", 0x77, r16 }, -}; - -const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); -const size_t INVALID = ~(size_t)0; -size_t inst_find(const char *mnemonic, size_t len) { - for (size_t ii = 0; ii < INST_CNT; ii += 1) { - const char *entry = INST[ii].mnemonic; - if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') { - return ii; - } - } - return INVALID; -} - -// Instruction Hash table, for faster lookups -typedef struct InstHtNode_s { - uint8_t index1; - uint8_t index2; -} InstHtNode; -typedef InstHtNode *InstHt; - -uint32_t inst_hash(const char *s, size_t len) { - uint32_t hash = 0; - uint32_t mul = 75; - for (size_t ii = 0; ii < len; ii += 1) { - hash ^= s[ii] * mul; - hash *= mul; - } - return hash; -} - -InstHt build_lookup(void) { - const size_t size = 256; - InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); - if (table == NULL) { - return table; - } - for (size_t ii = 0; ii < size; ii += 1) { - table[ii] = (InstHtNode) { 0xff, 0xff }; - } - for (size_t ii = 0; ii < INST_CNT; ii += 1) { - const char *mnemonic = INST[ii].mnemonic; - uint32_t hash = inst_hash(mnemonic, strlen(mnemonic)); - InstHtNode *node = &table[hash & 0xff]; - if (node->index1 == 0xff) { - node->index1 = ii; - } else if (node->index2 == 0xff) { - node->index2 = ii; - } else { - fprintf(stderr, "more than 1 collision in hash table\n"); - exit(1); - } - } - return table; -} - -size_t inst_lookup(InstHt ht, const char *s, size_t len) { - uint32_t hash = inst_hash(s, len); - uint8_t *node = (uint8_t*)&ht[(size_t)(hash & 0xff)]; - for (size_t ii = 0; ii < 2; ii += 1) { - size_t idx = (size_t)node[ii]; - if (idx == 0xff) { - break; - } - const char *mnemonic = INST[idx].mnemonic; - if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) { - return idx; - } - } - return INVALID; -} - -typedef enum AsmError_e { +typedef enum AsmError_e +{ ErrOk = 0, ErrBadRegister, ErrImmediateOverflow, @@ -345,27 +126,34 @@ char *ERRORS[] = { "Unexpected token", }; -typedef struct ByteVec_s { +typedef struct ByteVec_s +{ char *buf; size_t cap; size_t len; } ByteVec; -AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) { - if (vec->len + extra < vec->len) { +AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) +{ + if (vec->len + extra < vec->len) + { return ErrOutOfMemory; } - while (vec->len + extra > vec->cap) { - if ((~(size_t)0) / 2 < vec->cap) { + while (vec->len + extra > vec->cap) + { + if ((~(size_t)0) / 2 < vec->cap) + { return ErrOutOfMemory; } vec->cap *= 2; // multiply overflow - if ((~(size_t)0) / el_size < vec->cap) { + if ((~(size_t)0) / el_size < vec->cap) + { return ErrOutOfMemory; } vec->buf = realloc(vec->buf, el_size * vec->cap); - if (vec->buf == NULL) { + if (vec->buf == NULL) + { vec->cap = 0; return ErrOutOfMemory; } @@ -375,17 +163,21 @@ AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) { #define MIN_SIZE 4096 -int slurp(FILE *fd, ByteVec *out) { - ByteVec rv = { malloc(MIN_SIZE), MIN_SIZE, 0 }; +int slurp(FILE *fd, ByteVec *out) +{ + ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; size_t bread = 1; int err = 0; - if (rv.buf == NULL) { + if (rv.buf == NULL) + { rv.cap = 0; err = ErrOutOfMemory; bread = 0; } - while (bread > 0) { - if (ensure_push(&rv, 1, 1) != 0) { + while (bread > 0) + { + if (ensure_push(&rv, 1, 1) != 0) + { err = ErrOutOfMemory; break; } @@ -393,13 +185,15 @@ int slurp(FILE *fd, ByteVec *out) { rv.len += bread; } *out = rv; - if (err == 0) { + if (err == 0) + { err = ferror(fd); } return err; } -typedef enum TokenKind_e { +typedef enum TokenKind_e +{ TokInvalid = '!', TokEOF = '$', TokIdent = 'A', @@ -412,30 +206,33 @@ typedef enum TokenKind_e { TokComment = ';', TokNewline = 'n', } TokenKind; -typedef struct Token_s { +typedef struct Token_s +{ TokenKind kind; size_t start; size_t len; uint64_t num; } Token; -Token token_ident(char *input, size_t len, size_t pos) { +Token token_ident(char *input, size_t len, size_t pos) +{ size_t start = pos; - while (pos < len) { + while (pos < len) + { char chr = input[pos]; char chru = chr & ~0x20; - int good = chr == '_' - || (chr >= '0' && chr <= '9') - || (chru >= 'A' && chru <= 'Z'); - if (!good) { + int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); + if (!good) + { break; } pos += 1; } - return (Token) { TokIdent, start, pos - start, 0 }; + return (Token){TokIdent, start, pos - start, 0}; } -Token token_number(char *input, size_t len, size_t pos) { +Token token_number(char *input, size_t len, size_t pos) +{ char *ptr = &input[pos]; char next = '\0'; size_t start = pos; @@ -445,21 +242,30 @@ Token token_number(char *input, size_t len, size_t pos) { uint64_t pre_overflow; AsmError bad_num = ErrOk; - if (pos + 1 < len) { + if (pos + 1 < len) + { next = ptr[1] & ~0x20; } - if (input[pos] == '0') { - if (next == 'X') { + if (input[pos] == '0') + { + if (next == 'X') + { base = 16; pos += 2; - } else if (next == 'D') { + } + else if (next == 'D') + { base = 10; pos += 2; - } else if (next == 'O') { + } + else if (next == 'O') + { base = 8; pos += 2; - } else if (next == 'B') { + } + else if (next == 'B') + { base = 2; pos += 2; } @@ -467,20 +273,30 @@ Token token_number(char *input, size_t len, size_t pos) { pre_overflow = (~(size_t)0) / base; // valid: "0x_0", "0_" // invalid: "0x_" - while (pos < len) { + while (pos < len) + { uint64_t digit; uint64_t next; char chr = input[pos]; char chru = chr & ~0x20; - if (chr == '_') { pos += 1; continue; } + if (chr == '_') + { + pos += 1; + continue; + } digit = (uint64_t)chr - (uint64_t)'0'; - if (digit >= 10) { + if (digit >= 10) + { digit = (uint64_t)chru - (uint64_t)('A' - 10); } - if (digit >= base) { - if (chr >= '0' && chr <= '9') { + if (digit >= base) + { + if (chr >= '0' && chr <= '9') + { bad_num = ErrBadNumDigit; - } else if (chru >= 'A' && chru <= 'Z') { + } + else if (chru >= 'A' && chru <= 'Z') + { bad_num = ErrBadNumDigit; } break; @@ -490,134 +306,167 @@ Token token_number(char *input, size_t len, size_t pos) { digits += 1; next = rv * base + digit; - if (rv > pre_overflow || next < rv) { + if (rv > pre_overflow || next < rv) + { bad_num = ErrBadNumOverflow; break; } rv = next; } - if (digits == 0) { + if (digits == 0) + { bad_num = ErrBadNumNoDigit; } - if (bad_num) { - return (Token) { TokBadNumber, start, pos - start, bad_num }; - } else { - return (Token) { TokNumber, start, pos - start, rv }; + if (bad_num) + { + return (Token){TokBadNumber, start, pos - start, bad_num}; + } + else + { + return (Token){TokNumber, start, pos - start, rv}; } } -Token token(char *input, size_t len, size_t pos) { +Token token(char *input, size_t len, size_t pos) +{ char chr, chru; char *ptr = &input[pos]; - while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) { + while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) + { pos += 1; } - if (pos == len) { - return (Token) { TokEOF, pos, 0, 0 }; + if (pos == len) + { + return (Token){TokEOF, pos, 0, 0}; } ptr = &input[pos]; chr = *ptr; - if (chr == ',' || chr == '-' || chr == '.' || chr == ':') { - return (Token) { (TokenKind)chr, pos, 1, 0 }; + if (chr == ',' || chr == '-' || chr == '.' || chr == ':') + { + return (Token){(TokenKind)chr, pos, 1, 0}; } - if (chr == '\n') { - return (Token) { TokNewline, pos, 1, 0 }; + if (chr == '\n') + { + return (Token){TokNewline, pos, 1, 0}; } - if (chr == '\r') { - if (pos + 1 < len && ptr[1] == '\n') { - return (Token) { TokNewline, pos, 2, 0 }; + if (chr == '\r') + { + if (pos + 1 < len && ptr[1] == '\n') + { + return (Token){TokNewline, pos, 2, 0}; } - return (Token) { TokNewline, pos, 1, 0 }; + return (Token){TokNewline, pos, 1, 0}; } - if (chr == ';') { + if (chr == ';') + { size_t clen = 1; - while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') { + while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') + { clen += 1; } - return (Token) { TokComment, pos, clen, 0 }; + return (Token){TokComment, pos, clen, 0}; } - if (chr >= '0' && chr <= '9') { + if (chr >= '0' && chr <= '9') + { return token_number(input, len, pos); } chru = chr & ~0x20; - if (chr == '_' || (chru >= 'A' && chru <= 'Z')) { + if (chr == '_' || (chru >= 'A' && chru <= 'Z')) + { return token_ident(input, len, pos); } - return (Token) { TokInvalid, pos, 1, 0 }; + return (Token){TokInvalid, pos, 1, 0}; } -typedef struct Hole_s { +typedef struct Hole_s +{ size_t location; size_t origin; char *str; size_t len; size_t size; } Hole; -typedef struct HoleVec_s { +typedef struct HoleVec_s +{ Hole *buf; size_t cap; size_t len; } HoleVec; -typedef struct Label_s { +typedef struct Label_s +{ size_t location; char *str; size_t len; } Label; -typedef struct LabelVec_s { +typedef struct LabelVec_s +{ Label *buf; size_t cap; size_t len; } LabelVec; -size_t label_lookup(LabelVec *labels, char* name, size_t len) { +size_t label_lookup(LabelVec *labels, char *name, size_t len) +{ size_t nlabels = labels->len; Label *buf = labels->buf; - for (size_t ii = 0; ii < nlabels; ii += 1) { - if (len == buf->len && strncmp(buf->str, name, len) == 0) { + for (size_t ii = 0; ii < nlabels; ii += 1) + { + if (len == buf->len && strncmp(buf->str, name, len) == 0) + { return ii; - } + } buf += 1; } return INVALID; } -int parse_register(char *name, size_t len) { - if (name[0] != 'r') { +int parse_register(char *name, size_t len) +{ + if (name[0] != 'r') + { return 256; // Register name should start with 'r' } - if (len > 4) { + if (len > 4) + { return 256; // Register name too long } uint16_t rv = 0; - if (len > 2 && name[1] == '0') { + if (len > 2 && name[1] == '0') + { return 256; // Extra zero suffix } - for (size_t ii = 1; ii < len; ii += 1) { + for (size_t ii = 1; ii < len; ii += 1) + { char chr = name[ii]; - if (!(chr >= '0' && chr <= '9')) { + if (!(chr >= '0' && chr <= '9')) + { return 256; // Register name must only contain numbers } rv = rv * 10 + (chr - '0'); } - if (rv > 255) { + if (rv > 255) + { return 256; // Register number too large } return (int)rv; } // safety: assumes the buffer has enough place for specified integer size -AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { +AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) +{ int valid_uint = val >> (size * 8) == 0; int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); int valid_int = int_shifted == 0 || (~int_shifted) == 0; // Note: this assumes the format for `sign` is a bitset. int validity = valid_int | (valid_uint << 1); - if ((validity & sign) == 0) { + if ((validity & sign) == 0) + { return ErrImmediateOverflow; } - for (size_t ii = 0; ii < size; ii += 1) { + for (size_t ii = 0; ii < size; ii += 1) + { buf[ii] = val & 0xff; val >>= 8; } @@ -626,38 +475,45 @@ AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { AsmError assemble_instr( InstHt ht, char *input, size_t len, Token *tok, - ByteVec *rv, HoleVec *holes, LabelVec *labels -) { + ByteVec *rv, HoleVec *holes, LabelVec *labels) +{ const InstDesc *inst; const char *type_str; size_t nargs; size_t size; size_t idx = inst_lookup(ht, &input[tok->start], tok->len); size_t inst_start = rv->len; - if (idx == INVALID) { + if (idx == INVALID) + { return ErrInvalidToken; } inst = &INST[idx]; type_str = TYPE_STR[inst->type]; nargs = strlen(type_str); size = 1; - for (size_t ii = 0; ii < nargs; ii += 1) { + for (size_t ii = 0; ii < nargs; ii += 1) + { char chr = type_str[ii]; ArgMeta meta = arg_meta(chr); - if (meta.chr == 0) { + if (meta.chr == 0) + { return ErrBadArgumentMeta; } size += meta.size; } - if (ensure_push(rv, 1, size) != 0) { + if (ensure_push(rv, 1, size) != 0) + { return ErrOutOfMemory; } rv->buf[rv->len] = inst->opcode; rv->len += 1; - for (size_t ii = 0; ii < nargs; ii += 1) { - if (ii > 0) { + for (size_t ii = 0; ii < nargs; ii += 1) + { + if (ii > 0) + { *tok = token(input, len, tok->start + tok->len); - if (tok->kind != TokComma) { + if (tok->kind != TokComma) + { return ErrNeedCommaAfterArgument; } } @@ -665,27 +521,36 @@ AsmError assemble_instr( ArgMeta meta = arg_meta(chr); uint64_t is_negative = 0; *tok = token(input, len, tok->start + tok->len); - if (tok->kind == TokNeg) { + if (tok->kind == TokNeg) + { *tok = token(input, len, tok->start + tok->len); is_negative = ~(uint64_t)0; } - if (chr == 'R') { + if (chr == 'R') + { int reg = parse_register(&input[tok->start], tok->len); - if (reg > 255) { + if (reg > 255) + { return ErrBadRegister; } rv->buf[rv->len] = (char)(reg & 0xff); rv->len += 1; - } else { + } + else + { uint64_t num_to_write; - if (meta.rel == 1 || meta.size == 8) { - if (tok->kind == TokIdent) { + if (meta.rel == 1 || meta.size == 8) + { + if (tok->kind == TokIdent) + { size_t idx = label_lookup(labels, &input[tok->start], tok->len); - if (idx == INVALID) { - if (ensure_push((ByteVec*)holes, 1, sizeof(Hole)) != 0) { + if (idx == INVALID) + { + if (ensure_push((ByteVec *)holes, 1, sizeof(Hole)) != 0) + { return ErrOutOfMemory; } - holes->buf[holes->len] = (Hole) { + holes->buf[holes->len] = (Hole){ .location = rv->len, .origin = inst_start, .str = &input[tok->start], @@ -694,34 +559,47 @@ AsmError assemble_instr( }; holes->len += 1; num_to_write = 0; - } else { + } + else + { num_to_write = labels->buf[idx].location; - if (meta.size != 8) { + if (meta.size != 8) + { num_to_write -= inst_start; } } - } else if (tok->kind == TokNumber) { + } + else if (tok->kind == TokNumber) + { num_to_write = tok->num; - } else { + } + else + { return ErrLabelImmediate; } - } else if (tok->kind == TokNumber) { + } + else if (tok->kind == TokNumber) + { num_to_write = tok->num; - } else { + } + else + { return ErrNumberImmediate; } // num_to_write = num_to_write ^ is_negative - is_negative; - if (is_negative) { + if (is_negative) + { int64_t tmp = -(int64_t)num_to_write; - if (tmp > 0) { + if (tmp > 0) + { return ErrBadNumOverflow; } num_to_write = (uint64_t)tmp; } AsmError err = push_int_le( - &rv->buf[rv->len], num_to_write, meta.size, meta.sign - ); - if (err != 0) { + &rv->buf[rv->len], num_to_write, meta.size, meta.sign); + if (err != 0) + { return err; } rv->len += meta.size; @@ -731,16 +609,18 @@ AsmError assemble_instr( return 0; } -typedef struct EInfo_s { +typedef struct EInfo_s +{ Token token; size_t line; size_t line_start; } EInfo; -AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) { - ByteVec rv = { malloc(MIN_SIZE), MIN_SIZE, 0 }; - HoleVec holes = { malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0 }; - LabelVec labels = { malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0 }; +AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) +{ + ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; + HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; + LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; size_t line = 0; size_t line_start = 0; size_t pos = 0; @@ -748,81 +628,101 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo size_t line_state = 0; AsmError err = ErrOk; - while (1) { + while (1) + { Token tok = token(input, len, pos); einfo->token = tok; pos = tok.start + tok.len; - if (tok.kind == TokInvalid || tok.kind == TokBadNumber) { - if (tok.num) { + if (tok.kind == TokInvalid || tok.kind == TokBadNumber) + { + if (tok.num) + { err = (AsmError)tok.num; - } else { + } + else + { err = ErrInvalidToken; } break; } - if (tok.kind == TokEOF) { + if (tok.kind == TokEOF) + { break; } - if (tok.kind == TokComment) { + if (tok.kind == TokComment) + { line_state = 3; continue; } - if (tok.kind == TokNewline) { + if (tok.kind == TokNewline) + { line += 1; line_start = tok.start + tok.len; line_state = 0; continue; } - if (tok.kind == TokDot) { + if (tok.kind == TokDot) + { Token next = token(input, len, pos); - if (next.kind == TokIdent) { + if (next.kind == TokIdent) + { err = ErrDirectiveNotImplemented; goto end; - } else { + } + else + { err = ErrNeedDirectiveAfterDot; goto end; } continue; } - if (tok.kind == TokIdent) { + if (tok.kind == TokIdent) + { Token next = token(input, len, pos); - if (next.kind == TokColon) { + if (next.kind == TokColon) + { // Label pos = next.start + next.len; - if (line_state >= 1) { + if (line_state >= 1) + { err = ErrLabelAfterLabel; einfo->token = next; goto end; } line_state = 1; - if (ensure_push((ByteVec*)&labels, sizeof(Label), 1) != 0) { + if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) + { err = ErrOutOfMemory; goto end; } size_t idx = label_lookup(&labels, &input[tok.start], tok.len); - if (idx != INVALID) { + if (idx != INVALID) + { err = ErrDuplicateLabel; goto end; } - labels.buf[labels.len] = (Label) { + labels.buf[labels.len] = (Label){ .location = rv.len, .str = &input[tok.start], .len = tok.len, }; labels.len += 1; - } else { + } + else + { // Instruction - if (line_state >= 2) { + if (line_state >= 2) + { err = ErrTrailingLine; goto end; } line_state = 2; err = assemble_instr( ht, input, len, &tok, - &rv, &holes, &labels - ); + &rv, &holes, &labels); pos = tok.start + tok.len; - if (err != 0) { + if (err != 0) + { goto end; } } @@ -832,23 +732,25 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo goto end; } - for (size_t ii = 0; ii < holes.len; ii += 1) { + for (size_t ii = 0; ii < holes.len; ii += 1) + { Hole *hole = &holes.buf[ii]; size_t idx = label_lookup(&labels, hole->str, hole->len); uint64_t num_to_write = labels.buf[idx].location; uint8_t sign = 1; - if (hole->size != 8) { + if (hole->size != 8) + { sign = 2; num_to_write -= hole->origin; } err = push_int_le( - &rv.buf[hole->location], num_to_write, hole->size, sign - ); - if (err != 0) { + &rv.buf[hole->location], num_to_write, hole->size, sign); + if (err != 0) + { goto end; } } - end: +end: free(holes.buf); free(labels.buf); *out = rv; @@ -857,9 +759,11 @@ AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo return err; } -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ int hex_out = 0; - if (argc >= 2 && strcmp(argv[1], "--hex") == 0) { + if (argc >= 2 && strcmp(argv[1], "--hex") == 0) + { hex_out = 1; } @@ -867,14 +771,15 @@ int main(int argc, char **argv) { InstHt ht = NULL; ByteVec input; - err = slurp(stdin, &input); - if (err != 0) { + if (err != 0) + { fprintf(stderr, "failed to read the file: %d\n", err); goto done; } ht = build_lookup(); - if (ht == NULL) { + if (ht == NULL) + { err = ErrOutOfMemory; fprintf(stderr, "failed to init hash table: %d\n", err); goto done; @@ -883,20 +788,24 @@ int main(int argc, char **argv) { ByteVec out; EInfo einfo; err = assemble(ht, input.buf, input.len, &out, &einfo); - if (err != 0) { + if (err != 0) + { size_t column = einfo.token.start - einfo.line_start + 1; fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", - ERRORS[err], einfo.line, column, - (int)einfo.token.len, &input.buf[einfo.token.start]); + ERRORS[err], einfo.line, column, + (int)einfo.token.len, &input.buf[einfo.token.start]); goto done; } - if (hex_out) { + if (hex_out) + { hd(out.buf, out.len); - } else { + } + else + { fwrite(out.buf, 1, out.len, stdout); } - done: +done: free(ht); free(input.buf); free(out.buf); diff --git a/instructions.c b/instructions.c new file mode 100644 index 0000000..77a1566 --- /dev/null +++ b/instructions.c @@ -0,0 +1,142 @@ +typedef struct InstDesc_s +{ + char *mnemonic; + unsigned char opcode; + OpType type; +} InstDesc; + +const InstDesc INST[] = { + {"un", 0x00, Empty}, + {"tx", 0x01, Empty}, + {"nop", 0x02, Empty}, + {"add8", 0x03, RRR}, + {"add16", 0x04, RRR}, + {"add32", 0x05, RRR}, + {"add64", 0x06, RRR}, + {"sub8", 0x07, RRR}, + {"sub16", 0x08, RRR}, + {"sub32", 0x09, RRR}, + {"sub64", 0x0A, RRR}, + {"mul8", 0x0B, RRR}, + {"mul16", 0x0C, RRR}, + {"mul32", 0x0D, RRR}, + {"mul64", 0x0E, RRR}, + {"and", 0x0F, RRR}, + {"or", 0x10, RRR}, + {"xor", 0x11, RRR}, + {"slu8", 0x12, RRR}, + {"slu16", 0x13, RRR}, + {"slu32", 0x14, RRR}, + {"slu64", 0x15, RRR}, + {"sru8", 0x16, RRR}, + {"sru16", 0x17, RRR}, + {"sru32", 0x18, RRR}, + {"sru64", 0x19, RRR}, + {"srs8", 0x1A, RRR}, + {"srs16", 0x1B, RRR}, + {"srs32", 0x1C, RRR}, + {"srs64", 0x1D, RRR}, + {"cmpu", 0x1E, RRR}, + {"cmps", 0x1F, RRR}, + {"diru8", 0x20, RRRR}, + {"diru16", 0x21, RRRR}, + {"diru32", 0x22, RRRR}, + {"diru64", 0x23, RRRR}, + {"dirs8", 0x24, RRRR}, + {"dirs16", 0x25, RRRR}, + {"dirs32", 0x26, RRRR}, + {"dirs64", 0x27, RRRR}, + {"neg", 0x28, RR}, + {"not", 0x29, RR}, + {"sxt8", 0x2A, RR}, + {"sxt16", 0x2B, RR}, + {"sxt32", 0x2C, RR}, + {"addi8", 0x2D, RRx8}, + {"addi16", 0x2E, RRx16}, + {"addi32", 0x2F, RRx32}, + {"addi64", 0x30, RRx64}, + {"muli8", 0x31, RRx8}, + {"muli16", 0x32, RRx16}, + {"muli32", 0x33, RRx32}, + {"muli64", 0x34, RRx64}, + {"andi", 0x35, RRx64}, + {"ori", 0x36, RRx64}, + {"xori", 0x37, RRx64}, + {"slui8", 0x38, RRu8}, + {"slui16", 0x39, RRu8}, + {"slui32", 0x3A, RRu8}, + {"slui64", 0x3B, RRu8}, + {"srui8", 0x3C, RRu8}, + {"srui16", 0x3D, RRu8}, + {"srui32", 0x3E, RRu8}, + {"srui64", 0x3F, RRu8}, + {"srsi8", 0x40, RRu8}, + {"srsi16", 0x41, RRu8}, + {"srsi32", 0x42, RRu8}, + {"srsi64", 0x43, RRu8}, + {"cmpui", 0x44, RRu64}, + {"cmpsi", 0x45, RRs64}, + {"cp", 0x46, RR}, + {"swa", 0x47, RR}, + {"li8", 0x48, Rx8}, + {"li16", 0x49, Rx16}, + {"li32", 0x4A, Rx32}, + {"li64", 0x4B, Rx64}, + {"lra", 0x4C, RRr32}, + {"ld", 0x4D, RRu64u16}, + {"st", 0x4E, RRu64u16}, + {"ldr", 0x4F, RRr32u16}, + {"str", 0x50, RRr32u16}, + {"bmc", 0x51, RRu16}, + {"brc", 0x52, RRu8}, + {"jmp", 0x53, r32}, + {"jal", 0x54, RRr32}, + {"jala", 0x55, RRu64}, + {"jeq", 0x56, RRr16}, + {"jne", 0x57, RRr16}, + {"jltu", 0x58, RRr16}, + {"jgtu", 0x59, RRr16}, + {"jlts", 0x5A, RRr16}, + {"jgts", 0x5B, RRr16}, + {"eca", 0x5C, Empty}, + {"ebp", 0x5D, Empty}, + {"fadd32", 0x5E, RRR}, + {"fadd64", 0x5F, RRR}, + {"fsub32", 0x60, RRR}, + {"fsub64", 0x61, RRR}, + {"fmul32", 0x62, RRR}, + {"fmul64", 0x63, RRR}, + {"fdiv32", 0x64, RRR}, + {"fdiv64", 0x65, RRR}, + {"fma32", 0x66, RRRR}, + {"fma64", 0x67, RRRR}, + {"fcmplt32", 0x6A, RRR}, + {"fcmplt64", 0x6B, RRR}, + {"fcmpgt32", 0x6C, RRR}, + {"fcmpgt64", 0x6D, RRR}, + {"itf32", 0x6E, RR}, + {"itf64", 0x6F, RR}, + {"fti32", 0x70, RRu8}, + {"fti64", 0x71, RRu8}, + {"fc32t64", 0x72, RR}, + {"fc64t32", 0x73, RR}, + {"lra16", 0x74, RRr16}, + {"ldr16", 0x75, RRr16u16}, + {"str16", 0x76, RRr16u16}, + {"jmp16", 0x77, r16}, +}; + +const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); +const size_t INVALID = ~(size_t)0; +size_t inst_find(const char *mnemonic, size_t len) +{ + for (size_t ii = 0; ii < INST_CNT; ii += 1) + { + const char *entry = INST[ii].mnemonic; + if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') + { + return ii; + } + } + return INVALID; +} diff --git a/op.h b/op.h new file mode 100644 index 0000000..685e028 --- /dev/null +++ b/op.h @@ -0,0 +1,62 @@ +typedef enum OpType_e +{ + Empty = 0, + R, + RR, + RRR, + RRRR, + Rx8, + Rx16, + Rx32, + Rx64, + RRx8, + RRx16, + RRx32, + RRx64, + RRs32, + RRs64, + RRu8, + RRu16, + RRu64, + r16, + r32, + RRr16, + RRr32, + RRr16u16, + RRr32u16, + RRu64u16, +} OpType; +// R -> register, +// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, +// b -> Si8, h -> Si16, w -> Si32, d -> Si64, +// B -> Ui8, H -> Ui16, W -> Ui32, D -> Ui64, +// o -> 16 bit relative offset, +// O -> 32 bit relative offset, + +const char *TYPE_STR[] = { + "", + "R", + "RR", + "RRR", + "RRRR", + "R1", + "R2", + "R4", + "R8", + "RR1", + "RR2", + "RR4", + "RR8", + "RRw", + "RRd", + "RRB", + "RRH", + "RRD", + "o", + "O", + "RRo", + "RRO", + "RRoH", + "RROH", + "RRDH", +}; From 45babd1afe3cac3a2981cf5e9465ce9e2e4de326 Mon Sep 17 00:00:00 2001 From: able Date: Thu, 7 Mar 2024 08:35:44 -0600 Subject: [PATCH 3/8] modulize error --- error.h | 41 +++++++++++++++++++++++++++++++++++++++++ hbas.c | 43 +------------------------------------------ 2 files changed, 42 insertions(+), 42 deletions(-) create mode 100644 error.h diff --git a/error.h b/error.h new file mode 100644 index 0000000..5f2dc4a --- /dev/null +++ b/error.h @@ -0,0 +1,41 @@ +typedef enum AsmError_e +{ + ErrOk = 0, + ErrBadRegister, + ErrImmediateOverflow, + ErrInvalidToken, + ErrBadArgumentMeta, + ErrNeedCommaAfterArgument, + ErrLabelImmediate, + ErrNumberImmediate, + ErrBadNumOverflow, + ErrBadNumDigit, + ErrBadNumNoDigit, + ErrLabelAfterLabel, + ErrOutOfMemory, + ErrDuplicateLabel, + ErrTrailingLine, + ErrNeedDirectiveAfterDot, + ErrDirectiveNotImplemented, + ErrUnexpectedToken, +} AsmError; +char *ERRORS[] = { + "Success", + "Bad register name", + "Immediate integer OR relative offset overflow", + "Invalid token", + "Bad argument char? (blame developer of this program)", + "Expected comma after the argument, got something else", + "Label immediate needs label or number", + "Immediate needs to be a number", + "Bad number: u64 overflow", + "Bad number: encountered bad gidit", + "Bad number: no digits presented after the suffix", + "Encountered label after label", + "Out of Memory", + "Duplicate label", + "Encountered trailing identifier after instruction", + "Expected directive after dot", + "Directive is not implemented", + "Unexpected token", +}; diff --git a/hbas.c b/hbas.c index 1fda60f..5e810fe 100644 --- a/hbas.c +++ b/hbas.c @@ -26,6 +26,7 @@ SOFTWARE. #include #include "op.h" +#include "error.h" #include "instructions.c" #include "hash.c" @@ -84,48 +85,6 @@ ArgMeta arg_meta(char arg) return ARGS[NARGS - 1]; } -typedef enum AsmError_e -{ - ErrOk = 0, - ErrBadRegister, - ErrImmediateOverflow, - ErrInvalidToken, - ErrBadArgumentMeta, - ErrNeedCommaAfterArgument, - ErrLabelImmediate, - ErrNumberImmediate, - ErrBadNumOverflow, - ErrBadNumDigit, - ErrBadNumNoDigit, - ErrLabelAfterLabel, - ErrOutOfMemory, - ErrDuplicateLabel, - ErrTrailingLine, - ErrNeedDirectiveAfterDot, - ErrDirectiveNotImplemented, - ErrUnexpectedToken, -} AsmError; -char *ERRORS[] = { - "Success", - "Bad register name", - "Immediate integer OR relative offset overflow", - "Invalid token", - "Bad argument char? (blame developer of this program)", - "Expected comma after the argument, got something else", - "Label immediate needs label or number", - "Immediate needs to be a number", - "Bad number: u64 overflow", - "Bad number: encountered bad gidit", - "Bad number: no digits presented after the suffix", - "Encountered label after label", - "Out of Memory", - "Duplicate label", - "Encountered trailing identifier after instruction", - "Expected directive after dot", - "Directive is not implemented", - "Unexpected token", -}; - typedef struct ByteVec_s { char *buf; From 8a7639d6ba403ff4d8378f0f45599c78fbc81d34 Mon Sep 17 00:00:00 2001 From: able Date: Thu, 7 Mar 2024 10:34:39 -0600 Subject: [PATCH 4/8] organization --- Makefile | 2 +- error.h => src/error.h | 0 hash.c => src/hash.c | 0 hbas.c => src/hbas.c | 0 instructions.c => src/instructions.c | 0 op.h => src/op.h | 0 6 files changed, 1 insertion(+), 1 deletion(-) rename error.h => src/error.h (100%) rename hash.c => src/hash.c (100%) rename hbas.c => src/hbas.c (100%) rename instructions.c => src/instructions.c (100%) rename op.h => src/op.h (100%) diff --git a/Makefile b/Makefile index 10c499e..ab7f06b 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 .PHONY: clean -hbas: hbas.c +hbas: src/hbas.c ${CC} ${CFLAGS} ${CFLAGS_EXTRA} hbas.c -o hbas example: hbas example.S diff --git a/error.h b/src/error.h similarity index 100% rename from error.h rename to src/error.h diff --git a/hash.c b/src/hash.c similarity index 100% rename from hash.c rename to src/hash.c diff --git a/hbas.c b/src/hbas.c similarity index 100% rename from hbas.c rename to src/hbas.c diff --git a/instructions.c b/src/instructions.c similarity index 100% rename from instructions.c rename to src/instructions.c diff --git a/op.h b/src/op.h similarity index 100% rename from op.h rename to src/op.h From 8597d97566832169925dd5608f1fd3f6e1664df1 Mon Sep 17 00:00:00 2001 From: able Date: Thu, 7 Mar 2024 10:39:55 -0600 Subject: [PATCH 5/8] register ripout --- src/hbas.c | 32 +------------------------------- src/register.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 31 deletions(-) create mode 100644 src/register.c diff --git a/src/hbas.c b/src/hbas.c index 5e810fe..bd8ae6e 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -29,6 +29,7 @@ SOFTWARE. #include "error.h" #include "instructions.c" #include "hash.c" +#include "register.c" void hd(char *data, size_t len) { @@ -381,37 +382,6 @@ size_t label_lookup(LabelVec *labels, char *name, size_t len) return INVALID; } -int parse_register(char *name, size_t len) -{ - if (name[0] != 'r') - { - return 256; // Register name should start with 'r' - } - if (len > 4) - { - return 256; // Register name too long - } - uint16_t rv = 0; - if (len > 2 && name[1] == '0') - { - return 256; // Extra zero suffix - } - for (size_t ii = 1; ii < len; ii += 1) - { - char chr = name[ii]; - if (!(chr >= '0' && chr <= '9')) - { - return 256; // Register name must only contain numbers - } - rv = rv * 10 + (chr - '0'); - } - if (rv > 255) - { - return 256; // Register number too large - } - return (int)rv; -} - // safety: assumes the buffer has enough place for specified integer size AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { diff --git a/src/register.c b/src/register.c new file mode 100644 index 0000000..ee8637c --- /dev/null +++ b/src/register.c @@ -0,0 +1,30 @@ +int parse_register(char *name, size_t len) +{ + if (name[0] != 'r') + { + return 256; // Register name should start with 'r' + } + if (len > 4) + { + return 256; // Register name too long + } + uint16_t rv = 0; + if (len > 2 && name[1] == '0') + { + return 256; // Extra zero suffix + } + for (size_t ii = 1; ii < len; ii += 1) + { + char chr = name[ii]; + if (!(chr >= '0' && chr <= '9')) + { + return 256; // Register name must only contain numbers + } + rv = rv * 10 + (chr - '0'); + } + if (rv > 255) + { + return 256; // Register number too large + } + return (int)rv; +} \ No newline at end of file From 99a385752f595bceb2fa8fd797f3f7aec0f61ea6 Mon Sep 17 00:00:00 2001 From: able Date: Thu, 7 Mar 2024 11:04:12 -0600 Subject: [PATCH 6/8] continued modularization --- Makefile | 2 +- src/einfo.h | 6 ++ src/hbas.c | 197 +------------------------------------------------ src/register.c | 2 +- src/token.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 197 insertions(+), 197 deletions(-) create mode 100644 src/einfo.h create mode 100644 src/token.c diff --git a/Makefile b/Makefile index ab7f06b..5b7b14a 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 .PHONY: clean hbas: src/hbas.c - ${CC} ${CFLAGS} ${CFLAGS_EXTRA} hbas.c -o hbas + ${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o hbas example: hbas example.S ./hbas < example.S > example diff --git a/src/einfo.h b/src/einfo.h new file mode 100644 index 0000000..d6de8b3 --- /dev/null +++ b/src/einfo.h @@ -0,0 +1,6 @@ +typedef struct EInfo_s +{ + Token token; + size_t line; + size_t line_start; +} EInfo; diff --git a/src/hbas.c b/src/hbas.c index bd8ae6e..4fd08a6 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -30,6 +30,8 @@ SOFTWARE. #include "instructions.c" #include "hash.c" #include "register.c" +#include "token.c" +#include "einfo.h" void hd(char *data, size_t len) { @@ -152,194 +154,6 @@ int slurp(FILE *fd, ByteVec *out) return err; } -typedef enum TokenKind_e -{ - TokInvalid = '!', - TokEOF = '$', - TokIdent = 'A', - TokNeg = '-', - TokNumber = '0', - TokBadNumber = '9', - TokComma = ',', - TokDot = '.', - TokColon = ':', - TokComment = ';', - TokNewline = 'n', -} TokenKind; -typedef struct Token_s -{ - TokenKind kind; - size_t start; - size_t len; - uint64_t num; -} Token; - -Token token_ident(char *input, size_t len, size_t pos) -{ - size_t start = pos; - while (pos < len) - { - char chr = input[pos]; - char chru = chr & ~0x20; - int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); - if (!good) - { - break; - } - pos += 1; - } - return (Token){TokIdent, start, pos - start, 0}; -} - -Token token_number(char *input, size_t len, size_t pos) -{ - char *ptr = &input[pos]; - char next = '\0'; - size_t start = pos; - size_t digits = 0; - uint64_t base = 10; - uint64_t rv = 0; - uint64_t pre_overflow; - AsmError bad_num = ErrOk; - - if (pos + 1 < len) - { - next = ptr[1] & ~0x20; - } - - if (input[pos] == '0') - { - if (next == 'X') - { - base = 16; - pos += 2; - } - else if (next == 'D') - { - base = 10; - pos += 2; - } - else if (next == 'O') - { - base = 8; - pos += 2; - } - else if (next == 'B') - { - base = 2; - pos += 2; - } - } - pre_overflow = (~(size_t)0) / base; - // valid: "0x_0", "0_" - // invalid: "0x_" - while (pos < len) - { - uint64_t digit; - uint64_t next; - char chr = input[pos]; - char chru = chr & ~0x20; - if (chr == '_') - { - pos += 1; - continue; - } - digit = (uint64_t)chr - (uint64_t)'0'; - if (digit >= 10) - { - digit = (uint64_t)chru - (uint64_t)('A' - 10); - } - if (digit >= base) - { - if (chr >= '0' && chr <= '9') - { - bad_num = ErrBadNumDigit; - } - else if (chru >= 'A' && chru <= 'Z') - { - bad_num = ErrBadNumDigit; - } - break; - } - - pos += 1; - digits += 1; - - next = rv * base + digit; - if (rv > pre_overflow || next < rv) - { - bad_num = ErrBadNumOverflow; - break; - } - rv = next; - } - - if (digits == 0) - { - bad_num = ErrBadNumNoDigit; - } - - if (bad_num) - { - return (Token){TokBadNumber, start, pos - start, bad_num}; - } - else - { - return (Token){TokNumber, start, pos - start, rv}; - } -} - -Token token(char *input, size_t len, size_t pos) -{ - char chr, chru; - char *ptr = &input[pos]; - while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) - { - pos += 1; - } - if (pos == len) - { - return (Token){TokEOF, pos, 0, 0}; - } - ptr = &input[pos]; - chr = *ptr; - if (chr == ',' || chr == '-' || chr == '.' || chr == ':') - { - return (Token){(TokenKind)chr, pos, 1, 0}; - } - if (chr == '\n') - { - return (Token){TokNewline, pos, 1, 0}; - } - if (chr == '\r') - { - if (pos + 1 < len && ptr[1] == '\n') - { - return (Token){TokNewline, pos, 2, 0}; - } - return (Token){TokNewline, pos, 1, 0}; - } - if (chr == ';') - { - size_t clen = 1; - while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') - { - clen += 1; - } - return (Token){TokComment, pos, clen, 0}; - } - if (chr >= '0' && chr <= '9') - { - return token_number(input, len, pos); - } - chru = chr & ~0x20; - if (chr == '_' || (chru >= 'A' && chru <= 'Z')) - { - return token_ident(input, len, pos); - } - return (Token){TokInvalid, pos, 1, 0}; -} - typedef struct Hole_s { size_t location; @@ -538,13 +352,6 @@ AsmError assemble_instr( return 0; } -typedef struct EInfo_s -{ - Token token; - size_t line; - size_t line_start; -} EInfo; - AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) { ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; diff --git a/src/register.c b/src/register.c index ee8637c..0083a6f 100644 --- a/src/register.c +++ b/src/register.c @@ -27,4 +27,4 @@ int parse_register(char *name, size_t len) return 256; // Register number too large } return (int)rv; -} \ No newline at end of file +} diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..dec4919 --- /dev/null +++ b/src/token.c @@ -0,0 +1,187 @@ +typedef enum TokenKind_e +{ + TokInvalid = '!', + TokEOF = '$', + TokIdent = 'A', + TokNeg = '-', + TokNumber = '0', + TokBadNumber = '9', + TokComma = ',', + TokDot = '.', + TokColon = ':', + TokComment = ';', + TokNewline = 'n', +} TokenKind; +typedef struct Token_s +{ + TokenKind kind; + size_t start; + size_t len; + uint64_t num; +} Token; + +Token token_ident(char *input, size_t len, size_t pos) +{ + size_t start = pos; + while (pos < len) + { + char chr = input[pos]; + char chru = chr & ~0x20; + int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); + if (!good) + { + break; + } + pos += 1; + } + return (Token){TokIdent, start, pos - start, 0}; +} + +Token token_number(char *input, size_t len, size_t pos) +{ + char *ptr = &input[pos]; + char next = '\0'; + size_t start = pos; + size_t digits = 0; + uint64_t base = 10; + uint64_t rv = 0; + uint64_t pre_overflow; + AsmError bad_num = ErrOk; + + if (pos + 1 < len) + { + next = ptr[1] & ~0x20; + } + + if (input[pos] == '0') + { + if (next == 'X') + { + base = 16; + pos += 2; + } + else if (next == 'D') + { + base = 10; + pos += 2; + } + else if (next == 'O') + { + base = 8; + pos += 2; + } + else if (next == 'B') + { + base = 2; + pos += 2; + } + } + pre_overflow = (~(size_t)0) / base; + // valid: "0x_0", "0_" + // invalid: "0x_" + while (pos < len) + { + uint64_t digit; + uint64_t next; + char chr = input[pos]; + char chru = chr & ~0x20; + if (chr == '_') + { + pos += 1; + continue; + } + digit = (uint64_t)chr - (uint64_t)'0'; + if (digit >= 10) + { + digit = (uint64_t)chru - (uint64_t)('A' - 10); + } + if (digit >= base) + { + if (chr >= '0' && chr <= '9') + { + bad_num = ErrBadNumDigit; + } + else if (chru >= 'A' && chru <= 'Z') + { + bad_num = ErrBadNumDigit; + } + break; + } + + pos += 1; + digits += 1; + + next = rv * base + digit; + if (rv > pre_overflow || next < rv) + { + bad_num = ErrBadNumOverflow; + break; + } + rv = next; + } + + if (digits == 0) + { + bad_num = ErrBadNumNoDigit; + } + + if (bad_num) + { + return (Token){TokBadNumber, start, pos - start, bad_num}; + } + else + { + return (Token){TokNumber, start, pos - start, rv}; + } +} + +Token token(char *input, size_t len, size_t pos) +{ + char chr, chru; + char *ptr = &input[pos]; + while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) + { + pos += 1; + } + if (pos == len) + { + return (Token){TokEOF, pos, 0, 0}; + } + ptr = &input[pos]; + chr = *ptr; + if (chr == ',' || chr == '-' || chr == '.' || chr == ':') + { + return (Token){(TokenKind)chr, pos, 1, 0}; + } + if (chr == '\n') + { + return (Token){TokNewline, pos, 1, 0}; + } + if (chr == '\r') + { + if (pos + 1 < len && ptr[1] == '\n') + { + return (Token){TokNewline, pos, 2, 0}; + } + return (Token){TokNewline, pos, 1, 0}; + } + if (chr == ';') + { + size_t clen = 1; + while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') + { + clen += 1; + } + return (Token){TokComment, pos, clen, 0}; + } + if (chr >= '0' && chr <= '9') + { + return token_number(input, len, pos); + } + chru = chr & ~0x20; + if (chr == '_' || (chru >= 'A' && chru <= 'Z')) + { + return token_ident(input, len, pos); + } + return (Token){TokInvalid, pos, 1, 0}; +} From 78b9b99ab3d1351cb28c7385698819a3a1d6354a Mon Sep 17 00:00:00 2001 From: able Date: Fri, 8 Mar 2024 05:23:47 -0600 Subject: [PATCH 7/8] modularization --- .gitignore | 3 +- Makefile | 2 +- example.S => examples/example.S | 0 run.sh | 2 +- src/args.c | 41 +++++++++++++++++ src/bytevec.c | 35 +++++++++++++++ src/hbas.c | 79 +-------------------------------- 7 files changed, 82 insertions(+), 80 deletions(-) rename example.S => examples/example.S (100%) create mode 100644 src/args.c create mode 100644 src/bytevec.c diff --git a/.gitignore b/.gitignore index 89675ce..b448513 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ hbas -*.hbf \ No newline at end of file +*.hbf +build/* diff --git a/Makefile b/Makefile index 5b7b14a..932107e 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 .PHONY: clean hbas: src/hbas.c - ${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o hbas + ${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o build/hbas example: hbas example.S ./hbas < example.S > example diff --git a/example.S b/examples/example.S similarity index 100% rename from example.S rename to examples/example.S diff --git a/run.sh b/run.sh index e5e8659..0ac3fb3 100755 --- a/run.sh +++ b/run.sh @@ -1,2 +1,2 @@ make CC=clang -./hbas < example.S > example.hbf +./build/hbas < examples/example.S > build/example.hbf diff --git a/src/args.c b/src/args.c new file mode 100644 index 0000000..f126ab8 --- /dev/null +++ b/src/args.c @@ -0,0 +1,41 @@ +typedef struct ArgMeta_s +{ + char chr; + uint8_t size; + // This is a bitset of acceptable overflow states, + // where accept signed = 1, accept unsigned = 2. + // 1 -> signed, 2 -> unsigned, 3 -> whatever + uint8_t sign; + uint8_t rel; +} ArgMeta; +const ArgMeta ARGS[] = { + {'R', 1, 2, 0}, + {'1', 1, 3, 0}, + {'b', 1, 1, 0}, + {'B', 1, 2, 0}, + {'2', 2, 3, 0}, + {'o', 2, 1, 1}, + {'h', 2, 1, 0}, + {'H', 2, 2, 0}, + {'4', 4, 3, 0}, + {'w', 4, 1, 0}, + {'O', 4, 1, 1}, + {'W', 4, 2, 0}, + {'8', 8, 3, 0}, + {'d', 8, 1, 0}, + {'D', 8, 2, 0}, + {0}, +}; +const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); +ArgMeta arg_meta(char arg) +{ + for (size_t ii = 0; ii < NARGS; ii += 1) + { + ArgMeta meta = ARGS[ii]; + if (meta.chr == arg) + { + return meta; + } + } + return ARGS[NARGS - 1]; +} diff --git a/src/bytevec.c b/src/bytevec.c new file mode 100644 index 0000000..2df7718 --- /dev/null +++ b/src/bytevec.c @@ -0,0 +1,35 @@ + +typedef struct ByteVec_s +{ + char *buf; + size_t cap; + size_t len; +} ByteVec; + +AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) +{ + if (vec->len + extra < vec->len) + { + return ErrOutOfMemory; + } + while (vec->len + extra > vec->cap) + { + if ((~(size_t)0) / 2 < vec->cap) + { + return ErrOutOfMemory; + } + vec->cap *= 2; + // multiply overflow + if ((~(size_t)0) / el_size < vec->cap) + { + return ErrOutOfMemory; + } + vec->buf = realloc(vec->buf, el_size * vec->cap); + if (vec->buf == NULL) + { + vec->cap = 0; + return ErrOutOfMemory; + } + } + return 0; +} diff --git a/src/hbas.c b/src/hbas.c index 4fd08a6..d187d26 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -25,6 +25,7 @@ SOFTWARE. #include #include +#include "args.c" #include "op.h" #include "error.h" #include "instructions.c" @@ -32,6 +33,7 @@ SOFTWARE. #include "register.c" #include "token.c" #include "einfo.h" +#include "bytevec.c" void hd(char *data, size_t len) { @@ -46,83 +48,6 @@ void hd(char *data, size_t len) printf("\n"); } -typedef struct ArgMeta_s -{ - char chr; - uint8_t size; - // This is a bitset of acceptable overflow states, - // where accept signed = 1, accept unsigned = 2. - // 1 -> signed, 2 -> unsigned, 3 -> whatever - uint8_t sign; - uint8_t rel; -} ArgMeta; -const ArgMeta ARGS[] = { - {'R', 1, 2, 0}, - {'1', 1, 3, 0}, - {'b', 1, 1, 0}, - {'B', 1, 2, 0}, - {'2', 2, 3, 0}, - {'o', 2, 1, 1}, - {'h', 2, 1, 0}, - {'H', 2, 2, 0}, - {'4', 4, 3, 0}, - {'w', 4, 1, 0}, - {'O', 4, 1, 1}, - {'W', 4, 2, 0}, - {'8', 8, 3, 0}, - {'d', 8, 1, 0}, - {'D', 8, 2, 0}, - {0}, -}; -const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); -ArgMeta arg_meta(char arg) -{ - for (size_t ii = 0; ii < NARGS; ii += 1) - { - ArgMeta meta = ARGS[ii]; - if (meta.chr == arg) - { - return meta; - } - } - return ARGS[NARGS - 1]; -} - -typedef struct ByteVec_s -{ - char *buf; - size_t cap; - size_t len; -} ByteVec; - -AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) -{ - if (vec->len + extra < vec->len) - { - return ErrOutOfMemory; - } - while (vec->len + extra > vec->cap) - { - if ((~(size_t)0) / 2 < vec->cap) - { - return ErrOutOfMemory; - } - vec->cap *= 2; - // multiply overflow - if ((~(size_t)0) / el_size < vec->cap) - { - return ErrOutOfMemory; - } - vec->buf = realloc(vec->buf, el_size * vec->cap); - if (vec->buf == NULL) - { - vec->cap = 0; - return ErrOutOfMemory; - } - } - return 0; -} - #define MIN_SIZE 4096 int slurp(FILE *fd, ByteVec *out) From 2575e7fcede6f6a7b7762e00d32cc9c5eda118be Mon Sep 17 00:00:00 2001 From: able Date: Mon, 11 Mar 2024 05:15:06 -0500 Subject: [PATCH 8/8] Addressing the haters(comments) --- src/args.c | 95 +++--- src/bytevec.c | 52 ++-- src/einfo.h | 9 +- src/error.h | 39 ++- src/hash.c | 108 +++---- src/hbas.c | 715 ++++++++++++++++++++------------------------- src/instructions.c | 204 +++++-------- src/op.h | 62 ---- src/register.c | 49 ++-- 9 files changed, 560 insertions(+), 773 deletions(-) delete mode 100644 src/op.h diff --git a/src/args.c b/src/args.c index f126ab8..11668dd 100644 --- a/src/args.c +++ b/src/args.c @@ -1,41 +1,66 @@ -typedef struct ArgMeta_s -{ - char chr; - uint8_t size; - // This is a bitset of acceptable overflow states, - // where accept signed = 1, accept unsigned = 2. - // 1 -> signed, 2 -> unsigned, 3 -> whatever - uint8_t sign; - uint8_t rel; +typedef struct ArgMeta_s { + char chr; + uint8_t size; + // This is a bitset of acceptable overflow states, + // where accept signed = 1, accept unsigned = 2. + // 1 -> signed, 2 -> unsigned, 3 -> whatever + uint8_t sign; + uint8_t rel; } ArgMeta; const ArgMeta ARGS[] = { - {'R', 1, 2, 0}, - {'1', 1, 3, 0}, - {'b', 1, 1, 0}, - {'B', 1, 2, 0}, - {'2', 2, 3, 0}, - {'o', 2, 1, 1}, - {'h', 2, 1, 0}, - {'H', 2, 2, 0}, - {'4', 4, 3, 0}, - {'w', 4, 1, 0}, - {'O', 4, 1, 1}, - {'W', 4, 2, 0}, - {'8', 8, 3, 0}, - {'d', 8, 1, 0}, - {'D', 8, 2, 0}, - {0}, + {'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0}, + {'2', 2, 3, 0}, {'o', 2, 1, 1}, {'h', 2, 1, 0}, {'H', 2, 2, 0}, + {'4', 4, 3, 0}, {'w', 4, 1, 0}, {'O', 4, 1, 1}, {'W', 4, 2, 0}, + {'8', 8, 3, 0}, {'d', 8, 1, 0}, {'D', 8, 2, 0}, {0}, }; + +typedef enum Operands_e { + Empty = 0, + R, + RR, + RRR, + RRRR, + Rx8, + Rx16, + Rx32, + Rx64, + RRx8, + RRx16, + RRx32, + RRx64, + RRs32, + RRs64, + RRu8, + RRu16, + RRu64, + r16, + r32, + RRr16, + RRr32, + RRr16u16, + RRr32u16, + RRu64u16, +} Operands; +// R -> register, +// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, +// b -> Si8, h -> Si16, w -> Si32, d -> Si64, +// B -> Ui8, H -> Ui16, W -> Ui32, D -> Ui64, +// o -> 16 bit relative offset, +// O -> 32 bit relative offset, + +const char *TYPE_STR[] = { + "", "R", "RR", "RRR", "RRRR", "R1", "R2", "R4", "R8", + "RR1", "RR2", "RR4", "RR8", "RRw", "RRd", "RRB", "RRH", "RRD", + "o", "O", "RRo", "RRO", "RRoH", "RROH", "RRDH", +}; + const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); -ArgMeta arg_meta(char arg) -{ - for (size_t ii = 0; ii < NARGS; ii += 1) - { - ArgMeta meta = ARGS[ii]; - if (meta.chr == arg) - { - return meta; - } +ArgMeta arg_meta(char arg) { + for (size_t ii = 0; ii < NARGS; ii += 1) { + ArgMeta meta = ARGS[ii]; + if (meta.chr == arg) { + return meta; } - return ARGS[NARGS - 1]; + } + return ARGS[NARGS - 1]; } diff --git a/src/bytevec.c b/src/bytevec.c index 2df7718..ed774e8 100644 --- a/src/bytevec.c +++ b/src/bytevec.c @@ -1,35 +1,29 @@ +const size_t INVALID = ~(size_t)0; -typedef struct ByteVec_s -{ - char *buf; - size_t cap; - size_t len; +typedef struct ByteVec_s { + char *buf; + size_t cap; + size_t len; } ByteVec; -AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) -{ - if (vec->len + extra < vec->len) - { - return ErrOutOfMemory; +AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) { + if (vec->len + extra < vec->len) { + return ErrOutOfMemory; + } + while (vec->len + extra > vec->cap) { + if ((~(size_t)0) / 2 < vec->cap) { + return ErrOutOfMemory; } - while (vec->len + extra > vec->cap) - { - if ((~(size_t)0) / 2 < vec->cap) - { - return ErrOutOfMemory; - } - vec->cap *= 2; - // multiply overflow - if ((~(size_t)0) / el_size < vec->cap) - { - return ErrOutOfMemory; - } - vec->buf = realloc(vec->buf, el_size * vec->cap); - if (vec->buf == NULL) - { - vec->cap = 0; - return ErrOutOfMemory; - } + vec->cap *= 2; + // multiply overflow + if ((~(size_t)0) / el_size < vec->cap) { + return ErrOutOfMemory; } - return 0; + vec->buf = realloc(vec->buf, el_size * vec->cap); + if (vec->buf == NULL) { + vec->cap = 0; + return ErrOutOfMemory; + } + } + return 0; } diff --git a/src/einfo.h b/src/einfo.h index d6de8b3..d52021c 100644 --- a/src/einfo.h +++ b/src/einfo.h @@ -1,6 +1,5 @@ -typedef struct EInfo_s -{ - Token token; - size_t line; - size_t line_start; +typedef struct EInfo_s { + Token token; + size_t line; + size_t line_start; } EInfo; diff --git a/src/error.h b/src/error.h index 5f2dc4a..eb61c40 100644 --- a/src/error.h +++ b/src/error.h @@ -1,23 +1,22 @@ -typedef enum AsmError_e -{ - ErrOk = 0, - ErrBadRegister, - ErrImmediateOverflow, - ErrInvalidToken, - ErrBadArgumentMeta, - ErrNeedCommaAfterArgument, - ErrLabelImmediate, - ErrNumberImmediate, - ErrBadNumOverflow, - ErrBadNumDigit, - ErrBadNumNoDigit, - ErrLabelAfterLabel, - ErrOutOfMemory, - ErrDuplicateLabel, - ErrTrailingLine, - ErrNeedDirectiveAfterDot, - ErrDirectiveNotImplemented, - ErrUnexpectedToken, +typedef enum AsmError_e { + ErrOk = 0, + ErrBadRegister, + ErrImmediateOverflow, + ErrInvalidToken, + ErrBadArgumentMeta, + ErrNeedCommaAfterArgument, + ErrLabelImmediate, + ErrNumberImmediate, + ErrBadNumOverflow, + ErrBadNumDigit, + ErrBadNumNoDigit, + ErrLabelAfterLabel, + ErrOutOfMemory, + ErrDuplicateLabel, + ErrTrailingLine, + ErrNeedDirectiveAfterDot, + ErrDirectiveNotImplemented, + ErrUnexpectedToken, } AsmError; char *ERRORS[] = { "Success", diff --git a/src/hash.c b/src/hash.c index a28d6ca..34d74ef 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1,73 +1,57 @@ // Instruction Hash table, for faster lookups -typedef struct InstHtNode_s -{ - uint8_t index1; - uint8_t index2; +typedef struct InstHtNode_s { + uint8_t index1; + uint8_t index2; } InstHtNode; typedef InstHtNode *InstHt; -uint32_t inst_hash(const char *s, size_t len) -{ - uint32_t hash = 0; - uint32_t mul = 75; - for (size_t ii = 0; ii < len; ii += 1) - { - hash ^= s[ii] * mul; - hash *= mul; - } - return hash; +uint32_t inst_hash(const char *s, size_t len) { + uint32_t hash = 0; + uint32_t mul = 75; + for (size_t ii = 0; ii < len; ii += 1) { + hash ^= s[ii] * mul; + hash *= mul; + } + return hash; } -InstHt build_lookup(void) -{ - const size_t size = 256; - InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); - if (table == NULL) - { - return table; - } - for (size_t ii = 0; ii < size; ii += 1) - { - table[ii] = (InstHtNode){0xff, 0xff}; - } - for (size_t ii = 0; ii < INST_CNT; ii += 1) - { - const char *mnemonic = INST[ii].mnemonic; - uint32_t hash = inst_hash(mnemonic, strlen(mnemonic)); - InstHtNode *node = &table[hash & 0xff]; - if (node->index1 == 0xff) - { - node->index1 = ii; - } - else if (node->index2 == 0xff) - { - node->index2 = ii; - } - else - { - fprintf(stderr, "more than 1 collision in hash table\n"); - exit(1); - } - } +InstHt build_lookup(void) { + const size_t size = 256; + InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); + if (table == NULL) { return table; + } + for (size_t ii = 0; ii < size; ii += 1) { + table[ii] = (InstHtNode){0xff, 0xff}; + } + for (size_t ii = 0; ii < INST_CNT; ii += 1) { + const char *mnemonic = INST[ii].mnemonic; + uint32_t hash = inst_hash(mnemonic, strlen(mnemonic)); + InstHtNode *node = &table[hash & 0xff]; + if (node->index1 == 0xff) { + node->index1 = ii; + } else if (node->index2 == 0xff) { + node->index2 = ii; + } else { + fprintf(stderr, "more than 1 collision in hash table\n"); + exit(1); + } + } + return table; } -size_t inst_lookup(InstHt ht, const char *s, size_t len) -{ - uint32_t hash = inst_hash(s, len); - uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; - for (size_t ii = 0; ii < 2; ii += 1) - { - size_t idx = (size_t)node[ii]; - if (idx == 0xff) - { - break; - } - const char *mnemonic = INST[idx].mnemonic; - if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) - { - return idx; - } +size_t inst_lookup(InstHt ht, const char *s, size_t len) { + uint32_t hash = inst_hash(s, len); + uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; + for (size_t ii = 0; ii < 2; ii += 1) { + size_t idx = (size_t)node[ii]; + if (idx == 0xff) { + break; } - return INVALID; + const char *mnemonic = INST[idx].mnemonic; + if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) { + return idx; + } + } + return INVALID; } diff --git a/src/hbas.c b/src/hbas.c index d187d26..eb3b3e9 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -20,455 +20,374 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#include #include #include #include -#include -#include "args.c" -#include "op.h" #include "error.h" +// +#include "bytevec.c" +// +#include "args.c" #include "instructions.c" +// #include "hash.c" +// #include "register.c" #include "token.c" +// #include "einfo.h" -#include "bytevec.c" -void hd(char *data, size_t len) -{ - for (size_t ii = 0; ii < len; ii += 1) - { - if (ii > 0 && (ii & 15) == 0) - { - printf("\n"); - } - printf("%02x", (uint8_t)data[ii]); +void hd(char *data, size_t len) { + for (size_t ii = 0; ii < len; ii += 1) { + if (ii > 0 && (ii & 15) == 0) { + printf("\n"); } - printf("\n"); + printf("%02x", (uint8_t)data[ii]); + } + printf("\n"); } #define MIN_SIZE 4096 -int slurp(FILE *fd, ByteVec *out) -{ - ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; - size_t bread = 1; - int err = 0; - if (rv.buf == NULL) - { - rv.cap = 0; - err = ErrOutOfMemory; - bread = 0; +int slurp(FILE *fd, ByteVec *out) { + ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; + size_t bread = 1; + int err = 0; + if (rv.buf == NULL) { + rv.cap = 0; + err = ErrOutOfMemory; + bread = 0; + } + while (bread > 0) { + if (ensure_push(&rv, 1, 1) != 0) { + err = ErrOutOfMemory; + break; } - while (bread > 0) - { - if (ensure_push(&rv, 1, 1) != 0) - { - err = ErrOutOfMemory; - break; - } - bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); - rv.len += bread; - } - *out = rv; - if (err == 0) - { - err = ferror(fd); - } - return err; + bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); + rv.len += bread; + } + *out = rv; + if (err == 0) { + err = ferror(fd); + } + return err; } -typedef struct Hole_s -{ - size_t location; - size_t origin; - char *str; - size_t len; - size_t size; +typedef struct Hole_s { + size_t location; + size_t origin; + char *str; + size_t len; + size_t size; } Hole; -typedef struct HoleVec_s -{ - Hole *buf; - size_t cap; - size_t len; +typedef struct HoleVec_s { + Hole *buf; + size_t cap; + size_t len; } HoleVec; -typedef struct Label_s -{ - size_t location; - char *str; - size_t len; +typedef struct Label_s { + size_t location; + char *str; + size_t len; } Label; -typedef struct LabelVec_s -{ - Label *buf; - size_t cap; - size_t len; +typedef struct LabelVec_s { + Label *buf; + size_t cap; + size_t len; } LabelVec; -size_t label_lookup(LabelVec *labels, char *name, size_t len) -{ - size_t nlabels = labels->len; - Label *buf = labels->buf; - for (size_t ii = 0; ii < nlabels; ii += 1) - { - if (len == buf->len && strncmp(buf->str, name, len) == 0) - { - return ii; - } - buf += 1; +size_t label_lookup(LabelVec *labels, char *name, size_t len) { + size_t nlabels = labels->len; + Label *buf = labels->buf; + for (size_t ii = 0; ii < nlabels; ii += 1) { + if (len == buf->len && strncmp(buf->str, name, len) == 0) { + return ii; } - return INVALID; + buf += 1; + } + return INVALID; } // safety: assumes the buffer has enough place for specified integer size -AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) -{ - int valid_uint = val >> (size * 8) == 0; - int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); - int valid_int = int_shifted == 0 || (~int_shifted) == 0; - // Note: this assumes the format for `sign` is a bitset. - int validity = valid_int | (valid_uint << 1); - if ((validity & sign) == 0) - { - return ErrImmediateOverflow; - } - for (size_t ii = 0; ii < size; ii += 1) - { - buf[ii] = val & 0xff; - val >>= 8; - } - return ErrOk; +AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { + int valid_uint = val >> (size * 8) == 0; + int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); + int valid_int = int_shifted == 0 || (~int_shifted) == 0; + // Note: this assumes the format for `sign` is a bitset. + int validity = valid_int | (valid_uint << 1); + if ((validity & sign) == 0) { + return ErrImmediateOverflow; + } + for (size_t ii = 0; ii < size; ii += 1) { + buf[ii] = val & 0xff; + val >>= 8; + } + return ErrOk; } -AsmError assemble_instr( - InstHt ht, char *input, size_t len, Token *tok, - ByteVec *rv, HoleVec *holes, LabelVec *labels) -{ - const InstDesc *inst; - const char *type_str; - size_t nargs; - size_t size; - size_t idx = inst_lookup(ht, &input[tok->start], tok->len); - size_t inst_start = rv->len; - if (idx == INVALID) - { - return ErrInvalidToken; +AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, + ByteVec *rv, HoleVec *holes, LabelVec *labels) { + const InstDesc *inst; + const char *type_str; + size_t nargs; + size_t size; + size_t idx = inst_lookup(ht, &input[tok->start], tok->len); + size_t inst_start = rv->len; + if (idx == INVALID) { + return ErrInvalidToken; + } + inst = &INST[idx]; + type_str = TYPE_STR[inst->type]; + nargs = strlen(type_str); + size = 1; + for (size_t ii = 0; ii < nargs; ii += 1) { + char chr = type_str[ii]; + ArgMeta meta = arg_meta(chr); + if (meta.chr == 0) { + return ErrBadArgumentMeta; } - inst = &INST[idx]; - type_str = TYPE_STR[inst->type]; - nargs = strlen(type_str); - size = 1; - for (size_t ii = 0; ii < nargs; ii += 1) - { - char chr = type_str[ii]; - ArgMeta meta = arg_meta(chr); - if (meta.chr == 0) - { - return ErrBadArgumentMeta; - } - size += meta.size; + size += meta.size; + } + if (ensure_push(rv, 1, size) != 0) { + return ErrOutOfMemory; + } + rv->buf[rv->len] = inst->opcode; + rv->len += 1; + for (size_t ii = 0; ii < nargs; ii += 1) { + if (ii > 0) { + *tok = token(input, len, tok->start + tok->len); + if (tok->kind != TokComma) { + return ErrNeedCommaAfterArgument; + } } - if (ensure_push(rv, 1, size) != 0) - { - return ErrOutOfMemory; + char chr = type_str[ii]; + ArgMeta meta = arg_meta(chr); + uint64_t is_negative = 0; + *tok = token(input, len, tok->start + tok->len); + if (tok->kind == TokNeg) { + *tok = token(input, len, tok->start + tok->len); + is_negative = ~(uint64_t)0; } - rv->buf[rv->len] = inst->opcode; - rv->len += 1; - for (size_t ii = 0; ii < nargs; ii += 1) - { - if (ii > 0) - { - *tok = token(input, len, tok->start + tok->len); - if (tok->kind != TokComma) - { - return ErrNeedCommaAfterArgument; + if (chr == 'R') { + int reg = parse_register(&input[tok->start], tok->len); + if (reg > 255) { + return ErrBadRegister; + } + rv->buf[rv->len] = (char)(reg & 0xff); + rv->len += 1; + } else { + uint64_t num_to_write; + if (meta.rel == 1 || meta.size == 8) { + if (tok->kind == TokIdent) { + size_t idx = label_lookup(labels, &input[tok->start], tok->len); + if (idx == INVALID) { + if (ensure_push((ByteVec *)holes, 1, sizeof(Hole)) != 0) { + return ErrOutOfMemory; } + holes->buf[holes->len] = (Hole){ + .location = rv->len, + .origin = inst_start, + .str = &input[tok->start], + .len = tok->len, + .size = (size_t)meta.size, + }; + holes->len += 1; + num_to_write = 0; + } else { + num_to_write = labels->buf[idx].location; + if (meta.size != 8) { + num_to_write -= inst_start; + } + } + } else if (tok->kind == TokNumber) { + num_to_write = tok->num; + } else { + return ErrLabelImmediate; } - char chr = type_str[ii]; - ArgMeta meta = arg_meta(chr); - uint64_t is_negative = 0; - *tok = token(input, len, tok->start + tok->len); - if (tok->kind == TokNeg) - { - *tok = token(input, len, tok->start + tok->len); - is_negative = ~(uint64_t)0; - } - if (chr == 'R') - { - int reg = parse_register(&input[tok->start], tok->len); - if (reg > 255) - { - return ErrBadRegister; - } - rv->buf[rv->len] = (char)(reg & 0xff); - rv->len += 1; - } - else - { - uint64_t num_to_write; - if (meta.rel == 1 || meta.size == 8) - { - if (tok->kind == TokIdent) - { - size_t idx = label_lookup(labels, &input[tok->start], tok->len); - if (idx == INVALID) - { - if (ensure_push((ByteVec *)holes, 1, sizeof(Hole)) != 0) - { - return ErrOutOfMemory; - } - holes->buf[holes->len] = (Hole){ - .location = rv->len, - .origin = inst_start, - .str = &input[tok->start], - .len = tok->len, - .size = (size_t)meta.size, - }; - holes->len += 1; - num_to_write = 0; - } - else - { - num_to_write = labels->buf[idx].location; - if (meta.size != 8) - { - num_to_write -= inst_start; - } - } - } - else if (tok->kind == TokNumber) - { - num_to_write = tok->num; - } - else - { - return ErrLabelImmediate; - } - } - else if (tok->kind == TokNumber) - { - num_to_write = tok->num; - } - else - { - return ErrNumberImmediate; - } - // num_to_write = num_to_write ^ is_negative - is_negative; - if (is_negative) - { - int64_t tmp = -(int64_t)num_to_write; - if (tmp > 0) - { - return ErrBadNumOverflow; - } - num_to_write = (uint64_t)tmp; - } - AsmError err = push_int_le( - &rv->buf[rv->len], num_to_write, meta.size, meta.sign); - if (err != 0) - { - return err; - } - rv->len += meta.size; + } else if (tok->kind == TokNumber) { + num_to_write = tok->num; + } else { + return ErrNumberImmediate; + } + // num_to_write = num_to_write ^ is_negative - is_negative; + if (is_negative) { + int64_t tmp = -(int64_t)num_to_write; + if (tmp > 0) { + return ErrBadNumOverflow; } + num_to_write = (uint64_t)tmp; + } + AsmError err = + push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign); + if (err != 0) { + return err; + } + rv->len += meta.size; } + } - return 0; + return 0; } -AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) -{ - ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; - HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; - LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; - size_t line = 0; - size_t line_start = 0; - size_t pos = 0; - // init=0, label=1, instruction=2, comment=3, newline -> 0 - size_t line_state = 0; - AsmError err = ErrOk; +AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, + EInfo *einfo) { + ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; + HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; + LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; + size_t line = 0; + size_t line_start = 0; + size_t pos = 0; + // init=0, label=1, instruction=2, comment=3, newline -> 0 + size_t line_state = 0; + AsmError err = ErrOk; - while (1) - { - Token tok = token(input, len, pos); - einfo->token = tok; - pos = tok.start + tok.len; - if (tok.kind == TokInvalid || tok.kind == TokBadNumber) - { - if (tok.num) - { - err = (AsmError)tok.num; - } - else - { - err = ErrInvalidToken; - } - break; - } - if (tok.kind == TokEOF) - { - break; - } - if (tok.kind == TokComment) - { - line_state = 3; - continue; - } - if (tok.kind == TokNewline) - { - line += 1; - line_start = tok.start + tok.len; - line_state = 0; - continue; - } - if (tok.kind == TokDot) - { - Token next = token(input, len, pos); - if (next.kind == TokIdent) - { - err = ErrDirectiveNotImplemented; - goto end; - } - else - { - err = ErrNeedDirectiveAfterDot; - goto end; - } - continue; - } - if (tok.kind == TokIdent) - { - Token next = token(input, len, pos); - if (next.kind == TokColon) - { - // Label - pos = next.start + next.len; - if (line_state >= 1) - { - err = ErrLabelAfterLabel; - einfo->token = next; - goto end; - } - line_state = 1; - if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) - { - err = ErrOutOfMemory; - goto end; - } - size_t idx = label_lookup(&labels, &input[tok.start], tok.len); - if (idx != INVALID) - { - err = ErrDuplicateLabel; - goto end; - } - labels.buf[labels.len] = (Label){ - .location = rv.len, - .str = &input[tok.start], - .len = tok.len, - }; - labels.len += 1; - } - else - { - // Instruction - if (line_state >= 2) - { - err = ErrTrailingLine; - goto end; - } - line_state = 2; - err = assemble_instr( - ht, input, len, &tok, - &rv, &holes, &labels); - pos = tok.start + tok.len; - if (err != 0) - { - goto end; - } - } - continue; - } - err = ErrUnexpectedToken; + while (1) { + Token tok = token(input, len, pos); + einfo->token = tok; + pos = tok.start + tok.len; + if (tok.kind == TokInvalid || tok.kind == TokBadNumber) { + if (tok.num) { + err = (AsmError)tok.num; + } else { + err = ErrInvalidToken; + } + break; + } + if (tok.kind == TokEOF) { + break; + } + if (tok.kind == TokComment) { + line_state = 3; + continue; + } + if (tok.kind == TokNewline) { + line += 1; + line_start = tok.start + tok.len; + line_state = 0; + continue; + } + if (tok.kind == TokDot) { + Token next = token(input, len, pos); + if (next.kind == TokIdent) { + err = ErrDirectiveNotImplemented; goto end; + } else { + err = ErrNeedDirectiveAfterDot; + goto end; + } + continue; } + if (tok.kind == TokIdent) { + Token next = token(input, len, pos); + if (next.kind == TokColon) { + // Label + pos = next.start + next.len; + if (line_state >= 1) { + err = ErrLabelAfterLabel; + einfo->token = next; + goto end; + } + line_state = 1; + if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) { + err = ErrOutOfMemory; + goto end; + } + size_t idx = label_lookup(&labels, &input[tok.start], tok.len); + if (idx != INVALID) { + err = ErrDuplicateLabel; + goto end; + } + labels.buf[labels.len] = (Label){ + .location = rv.len, + .str = &input[tok.start], + .len = tok.len, + }; + labels.len += 1; + } else { + // Instruction + if (line_state >= 2) { + err = ErrTrailingLine; + goto end; + } + line_state = 2; + err = assemble_instr(ht, input, len, &tok, &rv, &holes, &labels); + pos = tok.start + tok.len; + if (err != 0) { + goto end; + } + } + continue; + } + err = ErrUnexpectedToken; + goto end; + } - for (size_t ii = 0; ii < holes.len; ii += 1) - { - Hole *hole = &holes.buf[ii]; - size_t idx = label_lookup(&labels, hole->str, hole->len); - uint64_t num_to_write = labels.buf[idx].location; - uint8_t sign = 1; - if (hole->size != 8) - { - sign = 2; - num_to_write -= hole->origin; - } - err = push_int_le( - &rv.buf[hole->location], num_to_write, hole->size, sign); - if (err != 0) - { - goto end; - } + for (size_t ii = 0; ii < holes.len; ii += 1) { + Hole *hole = &holes.buf[ii]; + size_t idx = label_lookup(&labels, hole->str, hole->len); + uint64_t num_to_write = labels.buf[idx].location; + uint8_t sign = 1; + if (hole->size != 8) { + sign = 2; + num_to_write -= hole->origin; } + err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign); + if (err != 0) { + goto end; + } + } end: - free(holes.buf); - free(labels.buf); - *out = rv; - einfo->line = line + 1; - einfo->line_start = line_start; - return err; + free(holes.buf); + free(labels.buf); + *out = rv; + einfo->line = line + 1; + einfo->line_start = line_start; + return err; } -int main(int argc, char **argv) -{ - int hex_out = 0; - if (argc >= 2 && strcmp(argv[1], "--hex") == 0) - { - hex_out = 1; - } +int main(int argc, char **argv) { + int hex_out = 0; + if (argc >= 2 && strcmp(argv[1], "--hex") == 0) { + hex_out = 1; + } - int err = 0; - InstHt ht = NULL; - ByteVec input; + int err = 0; + InstHt ht = NULL; + ByteVec input; - err = slurp(stdin, &input); - if (err != 0) - { - fprintf(stderr, "failed to read the file: %d\n", err); - goto done; - } - ht = build_lookup(); - if (ht == NULL) - { - err = ErrOutOfMemory; - fprintf(stderr, "failed to init hash table: %d\n", err); - goto done; - } + err = slurp(stdin, &input); + if (err != 0) { + fprintf(stderr, "failed to read the file: %d\n", err); + goto done; + } + ht = build_lookup(); + if (ht == NULL) { + err = ErrOutOfMemory; + fprintf(stderr, "failed to init hash table: %d\n", err); + goto done; + } - ByteVec out; - EInfo einfo; - err = assemble(ht, input.buf, input.len, &out, &einfo); - if (err != 0) - { - size_t column = einfo.token.start - einfo.line_start + 1; - fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", - ERRORS[err], einfo.line, column, - (int)einfo.token.len, &input.buf[einfo.token.start]); - goto done; - } - if (hex_out) - { - hd(out.buf, out.len); - } - else - { - fwrite(out.buf, 1, out.len, stdout); - } + ByteVec out; + EInfo einfo; + err = assemble(ht, input.buf, input.len, &out, &einfo); + if (err != 0) { + size_t column = einfo.token.start - einfo.line_start + 1; + fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", + ERRORS[err], einfo.line, column, (int)einfo.token.len, + &input.buf[einfo.token.start]); + goto done; + } + if (hex_out) { + hd(out.buf, out.len); + } else { + fwrite(out.buf, 1, out.len, stdout); + } done: - free(ht); - free(input.buf); - free(out.buf); - return err; + free(ht); + free(input.buf); + free(out.buf); + return err; } diff --git a/src/instructions.c b/src/instructions.c index 77a1566..44f2e02 100644 --- a/src/instructions.c +++ b/src/instructions.c @@ -1,142 +1,78 @@ -typedef struct InstDesc_s -{ - char *mnemonic; - unsigned char opcode; - OpType type; +typedef struct InstDesc_s { + char *mnemonic; + unsigned char opcode; + Operands type; } InstDesc; const InstDesc INST[] = { - {"un", 0x00, Empty}, - {"tx", 0x01, Empty}, - {"nop", 0x02, Empty}, - {"add8", 0x03, RRR}, - {"add16", 0x04, RRR}, - {"add32", 0x05, RRR}, - {"add64", 0x06, RRR}, - {"sub8", 0x07, RRR}, - {"sub16", 0x08, RRR}, - {"sub32", 0x09, RRR}, - {"sub64", 0x0A, RRR}, - {"mul8", 0x0B, RRR}, - {"mul16", 0x0C, RRR}, - {"mul32", 0x0D, RRR}, - {"mul64", 0x0E, RRR}, - {"and", 0x0F, RRR}, - {"or", 0x10, RRR}, - {"xor", 0x11, RRR}, - {"slu8", 0x12, RRR}, - {"slu16", 0x13, RRR}, - {"slu32", 0x14, RRR}, - {"slu64", 0x15, RRR}, - {"sru8", 0x16, RRR}, - {"sru16", 0x17, RRR}, - {"sru32", 0x18, RRR}, - {"sru64", 0x19, RRR}, - {"srs8", 0x1A, RRR}, - {"srs16", 0x1B, RRR}, - {"srs32", 0x1C, RRR}, - {"srs64", 0x1D, RRR}, - {"cmpu", 0x1E, RRR}, - {"cmps", 0x1F, RRR}, - {"diru8", 0x20, RRRR}, - {"diru16", 0x21, RRRR}, - {"diru32", 0x22, RRRR}, - {"diru64", 0x23, RRRR}, - {"dirs8", 0x24, RRRR}, - {"dirs16", 0x25, RRRR}, - {"dirs32", 0x26, RRRR}, - {"dirs64", 0x27, RRRR}, - {"neg", 0x28, RR}, - {"not", 0x29, RR}, - {"sxt8", 0x2A, RR}, - {"sxt16", 0x2B, RR}, - {"sxt32", 0x2C, RR}, - {"addi8", 0x2D, RRx8}, - {"addi16", 0x2E, RRx16}, - {"addi32", 0x2F, RRx32}, - {"addi64", 0x30, RRx64}, - {"muli8", 0x31, RRx8}, - {"muli16", 0x32, RRx16}, - {"muli32", 0x33, RRx32}, - {"muli64", 0x34, RRx64}, - {"andi", 0x35, RRx64}, - {"ori", 0x36, RRx64}, - {"xori", 0x37, RRx64}, - {"slui8", 0x38, RRu8}, - {"slui16", 0x39, RRu8}, - {"slui32", 0x3A, RRu8}, - {"slui64", 0x3B, RRu8}, - {"srui8", 0x3C, RRu8}, - {"srui16", 0x3D, RRu8}, - {"srui32", 0x3E, RRu8}, - {"srui64", 0x3F, RRu8}, - {"srsi8", 0x40, RRu8}, - {"srsi16", 0x41, RRu8}, - {"srsi32", 0x42, RRu8}, - {"srsi64", 0x43, RRu8}, - {"cmpui", 0x44, RRu64}, - {"cmpsi", 0x45, RRs64}, - {"cp", 0x46, RR}, - {"swa", 0x47, RR}, - {"li8", 0x48, Rx8}, - {"li16", 0x49, Rx16}, - {"li32", 0x4A, Rx32}, - {"li64", 0x4B, Rx64}, - {"lra", 0x4C, RRr32}, - {"ld", 0x4D, RRu64u16}, - {"st", 0x4E, RRu64u16}, - {"ldr", 0x4F, RRr32u16}, - {"str", 0x50, RRr32u16}, - {"bmc", 0x51, RRu16}, - {"brc", 0x52, RRu8}, - {"jmp", 0x53, r32}, - {"jal", 0x54, RRr32}, - {"jala", 0x55, RRu64}, - {"jeq", 0x56, RRr16}, - {"jne", 0x57, RRr16}, - {"jltu", 0x58, RRr16}, - {"jgtu", 0x59, RRr16}, - {"jlts", 0x5A, RRr16}, - {"jgts", 0x5B, RRr16}, - {"eca", 0x5C, Empty}, - {"ebp", 0x5D, Empty}, - {"fadd32", 0x5E, RRR}, - {"fadd64", 0x5F, RRR}, - {"fsub32", 0x60, RRR}, - {"fsub64", 0x61, RRR}, - {"fmul32", 0x62, RRR}, - {"fmul64", 0x63, RRR}, - {"fdiv32", 0x64, RRR}, - {"fdiv64", 0x65, RRR}, - {"fma32", 0x66, RRRR}, - {"fma64", 0x67, RRRR}, - {"fcmplt32", 0x6A, RRR}, - {"fcmplt64", 0x6B, RRR}, - {"fcmpgt32", 0x6C, RRR}, - {"fcmpgt64", 0x6D, RRR}, - {"itf32", 0x6E, RR}, - {"itf64", 0x6F, RR}, - {"fti32", 0x70, RRu8}, - {"fti64", 0x71, RRu8}, - {"fc32t64", 0x72, RR}, - {"fc64t32", 0x73, RR}, - {"lra16", 0x74, RRr16}, - {"ldr16", 0x75, RRr16u16}, - {"str16", 0x76, RRr16u16}, - {"jmp16", 0x77, r16}, + {"un", 0x00, Empty}, {"tx", 0x01, Empty}, + {"nop", 0x02, Empty}, {"add8", 0x03, RRR}, + {"add16", 0x04, RRR}, {"add32", 0x05, RRR}, + {"add64", 0x06, RRR}, {"sub8", 0x07, RRR}, + {"sub16", 0x08, RRR}, {"sub32", 0x09, RRR}, + {"sub64", 0x0A, RRR}, {"mul8", 0x0B, RRR}, + {"mul16", 0x0C, RRR}, {"mul32", 0x0D, RRR}, + {"mul64", 0x0E, RRR}, {"and", 0x0F, RRR}, + {"or", 0x10, RRR}, {"xor", 0x11, RRR}, + {"slu8", 0x12, RRR}, {"slu16", 0x13, RRR}, + {"slu32", 0x14, RRR}, {"slu64", 0x15, RRR}, + {"sru8", 0x16, RRR}, {"sru16", 0x17, RRR}, + {"sru32", 0x18, RRR}, {"sru64", 0x19, RRR}, + {"srs8", 0x1A, RRR}, {"srs16", 0x1B, RRR}, + {"srs32", 0x1C, RRR}, {"srs64", 0x1D, RRR}, + {"cmpu", 0x1E, RRR}, {"cmps", 0x1F, RRR}, + {"diru8", 0x20, RRRR}, {"diru16", 0x21, RRRR}, + {"diru32", 0x22, RRRR}, {"diru64", 0x23, RRRR}, + {"dirs8", 0x24, RRRR}, {"dirs16", 0x25, RRRR}, + {"dirs32", 0x26, RRRR}, {"dirs64", 0x27, RRRR}, + {"neg", 0x28, RR}, {"not", 0x29, RR}, + {"sxt8", 0x2A, RR}, {"sxt16", 0x2B, RR}, + {"sxt32", 0x2C, RR}, {"addi8", 0x2D, RRx8}, + {"addi16", 0x2E, RRx16}, {"addi32", 0x2F, RRx32}, + {"addi64", 0x30, RRx64}, {"muli8", 0x31, RRx8}, + {"muli16", 0x32, RRx16}, {"muli32", 0x33, RRx32}, + {"muli64", 0x34, RRx64}, {"andi", 0x35, RRx64}, + {"ori", 0x36, RRx64}, {"xori", 0x37, RRx64}, + {"slui8", 0x38, RRu8}, {"slui16", 0x39, RRu8}, + {"slui32", 0x3A, RRu8}, {"slui64", 0x3B, RRu8}, + {"srui8", 0x3C, RRu8}, {"srui16", 0x3D, RRu8}, + {"srui32", 0x3E, RRu8}, {"srui64", 0x3F, RRu8}, + {"srsi8", 0x40, RRu8}, {"srsi16", 0x41, RRu8}, + {"srsi32", 0x42, RRu8}, {"srsi64", 0x43, RRu8}, + {"cmpui", 0x44, RRu64}, {"cmpsi", 0x45, RRs64}, + {"cp", 0x46, RR}, {"swa", 0x47, RR}, + {"li8", 0x48, Rx8}, {"li16", 0x49, Rx16}, + {"li32", 0x4A, Rx32}, {"li64", 0x4B, Rx64}, + {"lra", 0x4C, RRr32}, {"ld", 0x4D, RRu64u16}, + {"st", 0x4E, RRu64u16}, {"ldr", 0x4F, RRr32u16}, + {"str", 0x50, RRr32u16}, {"bmc", 0x51, RRu16}, + {"brc", 0x52, RRu8}, {"jmp", 0x53, r32}, + {"jal", 0x54, RRr32}, {"jala", 0x55, RRu64}, + {"jeq", 0x56, RRr16}, {"jne", 0x57, RRr16}, + {"jltu", 0x58, RRr16}, {"jgtu", 0x59, RRr16}, + {"jlts", 0x5A, RRr16}, {"jgts", 0x5B, RRr16}, + {"eca", 0x5C, Empty}, {"ebp", 0x5D, Empty}, + {"fadd32", 0x5E, RRR}, {"fadd64", 0x5F, RRR}, + {"fsub32", 0x60, RRR}, {"fsub64", 0x61, RRR}, + {"fmul32", 0x62, RRR}, {"fmul64", 0x63, RRR}, + {"fdiv32", 0x64, RRR}, {"fdiv64", 0x65, RRR}, + {"fma32", 0x66, RRRR}, {"fma64", 0x67, RRRR}, + {"fcmplt32", 0x6A, RRR}, {"fcmplt64", 0x6B, RRR}, + {"fcmpgt32", 0x6C, RRR}, {"fcmpgt64", 0x6D, RRR}, + {"itf32", 0x6E, RR}, {"itf64", 0x6F, RR}, + {"fti32", 0x70, RRu8}, {"fti64", 0x71, RRu8}, + {"fc32t64", 0x72, RR}, {"fc64t32", 0x73, RR}, + {"lra16", 0x74, RRr16}, {"ldr16", 0x75, RRr16u16}, + {"str16", 0x76, RRr16u16}, {"jmp16", 0x77, r16}, }; const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); -const size_t INVALID = ~(size_t)0; -size_t inst_find(const char *mnemonic, size_t len) -{ - for (size_t ii = 0; ii < INST_CNT; ii += 1) - { - const char *entry = INST[ii].mnemonic; - if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') - { - return ii; - } +size_t inst_find(const char *mnemonic, size_t len) { + for (size_t ii = 0; ii < INST_CNT; ii += 1) { + const char *entry = INST[ii].mnemonic; + if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') { + return ii; } - return INVALID; + } + return INVALID; } diff --git a/src/op.h b/src/op.h deleted file mode 100644 index 685e028..0000000 --- a/src/op.h +++ /dev/null @@ -1,62 +0,0 @@ -typedef enum OpType_e -{ - Empty = 0, - R, - RR, - RRR, - RRRR, - Rx8, - Rx16, - Rx32, - Rx64, - RRx8, - RRx16, - RRx32, - RRx64, - RRs32, - RRs64, - RRu8, - RRu16, - RRu64, - r16, - r32, - RRr16, - RRr32, - RRr16u16, - RRr32u16, - RRu64u16, -} OpType; -// R -> register, -// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, -// b -> Si8, h -> Si16, w -> Si32, d -> Si64, -// B -> Ui8, H -> Ui16, W -> Ui32, D -> Ui64, -// o -> 16 bit relative offset, -// O -> 32 bit relative offset, - -const char *TYPE_STR[] = { - "", - "R", - "RR", - "RRR", - "RRRR", - "R1", - "R2", - "R4", - "R8", - "RR1", - "RR2", - "RR4", - "RR8", - "RRw", - "RRd", - "RRB", - "RRH", - "RRD", - "o", - "O", - "RRo", - "RRO", - "RRoH", - "RROH", - "RRDH", -}; diff --git a/src/register.c b/src/register.c index 0083a6f..15047d2 100644 --- a/src/register.c +++ b/src/register.c @@ -1,30 +1,23 @@ -int parse_register(char *name, size_t len) -{ - if (name[0] != 'r') - { - return 256; // Register name should start with 'r' +int parse_register(char *name, size_t len) { + if (name[0] != 'r') { + return 256; // Register name should start with 'r' + } + if (len > 4) { + return 256; // Register name too long + } + uint16_t rv = 0; + if (len > 2 && name[1] == '0') { + return 256; // Extra zero suffix + } + for (size_t ii = 1; ii < len; ii += 1) { + char chr = name[ii]; + if (!(chr >= '0' && chr <= '9')) { + return 256; // Register name must only contain numbers } - if (len > 4) - { - return 256; // Register name too long - } - uint16_t rv = 0; - if (len > 2 && name[1] == '0') - { - return 256; // Extra zero suffix - } - for (size_t ii = 1; ii < len; ii += 1) - { - char chr = name[ii]; - if (!(chr >= '0' && chr <= '9')) - { - return 256; // Register name must only contain numbers - } - rv = rv * 10 + (chr - '0'); - } - if (rv > 255) - { - return 256; // Register number too large - } - return (int)rv; + rv = rv * 10 + (chr - '0'); + } + if (rv > 255) { + return 256; // Register number too large + } + return (int)rv; }