diff --git a/Makefile b/Makefile index c3346c6..cd9954b 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,19 @@ CC = gcc CFLAGS_EXTRA = CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 +CLANG_FORMAT_STYLE = '{ BasedOnStyle: Google, IndentWidth: 4 }' -.PHONY: clean build-dir hbas example +.PHONY: clean hbas example format check-format hbas: build/hbas example: build/example.hbf +format: + clang-format --style=${CLANG_FORMAT_STYLE} -i src/* + +check-format: + clang-format --style=${CLANG_FORMAT_STYLE} -i --dry-run -Werror src/* + build: mkdir -p build diff --git a/src/args.c b/src/args.c index 11668dd..be74e1e 100644 --- a/src/args.c +++ b/src/args.c @@ -1,11 +1,11 @@ typedef struct ArgMeta_s { - char chr; - uint8_t size; - // This is a bitset of acceptable overflow states, - // where accept signed = 1, accept unsigned = 2. - // 1 -> signed, 2 -> unsigned, 3 -> whatever - uint8_t sign; - uint8_t rel; + char chr; + uint8_t size; + // This is a bitset of acceptable overflow states, + // where accept signed = 1, accept unsigned = 2. + // 1 -> signed, 2 -> unsigned, 3 -> whatever + uint8_t sign; + uint8_t rel; } ArgMeta; const ArgMeta ARGS[] = { {'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0}, @@ -15,31 +15,31 @@ const ArgMeta ARGS[] = { }; typedef enum Operands_e { - Empty = 0, - R, - RR, - RRR, - RRRR, - Rx8, - Rx16, - Rx32, - Rx64, - RRx8, - RRx16, - RRx32, - RRx64, - RRs32, - RRs64, - RRu8, - RRu16, - RRu64, - r16, - r32, - RRr16, - RRr32, - RRr16u16, - RRr32u16, - RRu64u16, + Empty = 0, + R, + RR, + RRR, + RRRR, + Rx8, + Rx16, + Rx32, + Rx64, + RRx8, + RRx16, + RRx32, + RRx64, + RRs32, + RRs64, + RRu8, + RRu16, + RRu64, + r16, + r32, + RRr16, + RRr32, + RRr16u16, + RRr32u16, + RRu64u16, } Operands; // R -> register, // 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, @@ -56,11 +56,11 @@ const char *TYPE_STR[] = { const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); ArgMeta arg_meta(char arg) { - for (size_t ii = 0; ii < NARGS; ii += 1) { - ArgMeta meta = ARGS[ii]; - if (meta.chr == arg) { - return meta; + for (size_t ii = 0; ii < NARGS; ii += 1) { + ArgMeta meta = ARGS[ii]; + if (meta.chr == arg) { + return meta; + } } - } - return ARGS[NARGS - 1]; + return ARGS[NARGS - 1]; } diff --git a/src/bytevec.c b/src/bytevec.c index ed774e8..0d87620 100644 --- a/src/bytevec.c +++ b/src/bytevec.c @@ -1,29 +1,29 @@ const size_t INVALID = ~(size_t)0; typedef struct ByteVec_s { - char *buf; - size_t cap; - size_t len; + char *buf; + size_t cap; + size_t len; } ByteVec; AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) { - if (vec->len + extra < vec->len) { - return ErrOutOfMemory; - } - while (vec->len + extra > vec->cap) { - if ((~(size_t)0) / 2 < vec->cap) { - return ErrOutOfMemory; + if (vec->len + extra < vec->len) { + return ErrOutOfMemory; } - vec->cap *= 2; - // multiply overflow - if ((~(size_t)0) / el_size < vec->cap) { - return ErrOutOfMemory; + while (vec->len + extra > vec->cap) { + if ((~(size_t)0) / 2 < vec->cap) { + return ErrOutOfMemory; + } + vec->cap *= 2; + // multiply overflow + if ((~(size_t)0) / el_size < vec->cap) { + return ErrOutOfMemory; + } + vec->buf = realloc(vec->buf, el_size * vec->cap); + if (vec->buf == NULL) { + vec->cap = 0; + return ErrOutOfMemory; + } } - vec->buf = realloc(vec->buf, el_size * vec->cap); - if (vec->buf == NULL) { - vec->cap = 0; - return ErrOutOfMemory; - } - } - return 0; + return 0; } diff --git a/src/einfo.h b/src/einfo.h index d52021c..1dcad3f 100644 --- a/src/einfo.h +++ b/src/einfo.h @@ -1,5 +1,5 @@ typedef struct EInfo_s { - Token token; - size_t line; - size_t line_start; + Token token; + size_t line; + size_t line_start; } EInfo; diff --git a/src/error.h b/src/error.h index f1d6e35..d0b293d 100644 --- a/src/error.h +++ b/src/error.h @@ -1,23 +1,23 @@ typedef enum AsmError_e { - ErrOk = 0, - ErrBadRegister, - ErrImmediateOverflow, - ErrInvalidToken, - ErrBadArgumentMeta, - ErrNeedCommaAfterArgument, - ErrLabelImmediate, - ErrNumberImmediate, - ErrBadNumOverflow, - ErrBadNumDigit, - ErrBadNumNoDigit, - ErrLabelAfterLabel, - ErrOutOfMemory, - ErrDuplicateLabel, - ErrTrailingLine, - ErrNeedDirectiveAfterDot, - ErrDirectiveNotImplemented, - ErrUnexpectedToken, - ErrTriedNegateNonNumber, + ErrOk = 0, + ErrBadRegister, + ErrImmediateOverflow, + ErrInvalidToken, + ErrBadArgumentMeta, + ErrNeedCommaAfterArgument, + ErrLabelImmediate, + ErrNumberImmediate, + ErrBadNumOverflow, + ErrBadNumDigit, + ErrBadNumNoDigit, + ErrLabelAfterLabel, + ErrOutOfMemory, + ErrDuplicateLabel, + ErrTrailingLine, + ErrNeedDirectiveAfterDot, + ErrDirectiveNotImplemented, + ErrUnexpectedToken, + ErrTriedNegateNonNumber, } AsmError; char *ERRORS[] = { "Success", diff --git a/src/hash.c b/src/hash.c index 34d74ef..21d1d0f 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1,57 +1,57 @@ // Instruction Hash table, for faster lookups typedef struct InstHtNode_s { - uint8_t index1; - uint8_t index2; + uint8_t index1; + uint8_t index2; } InstHtNode; typedef InstHtNode *InstHt; uint32_t inst_hash(const char *s, size_t len) { - uint32_t hash = 0; - uint32_t mul = 75; - for (size_t ii = 0; ii < len; ii += 1) { - hash ^= s[ii] * mul; - hash *= mul; - } - return hash; + uint32_t hash = 0; + uint32_t mul = 75; + for (size_t ii = 0; ii < len; ii += 1) { + hash ^= s[ii] * mul; + hash *= mul; + } + return hash; } InstHt build_lookup(void) { - const size_t size = 256; - InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); - if (table == NULL) { - return table; - } - for (size_t ii = 0; ii < size; ii += 1) { - table[ii] = (InstHtNode){0xff, 0xff}; - } - for (size_t ii = 0; ii < INST_CNT; ii += 1) { - const char *mnemonic = INST[ii].mnemonic; - uint32_t hash = inst_hash(mnemonic, strlen(mnemonic)); - InstHtNode *node = &table[hash & 0xff]; - if (node->index1 == 0xff) { - node->index1 = ii; - } else if (node->index2 == 0xff) { - node->index2 = ii; - } else { - fprintf(stderr, "more than 1 collision in hash table\n"); - exit(1); + const size_t size = 256; + InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); + if (table == NULL) { + return table; } - } - return table; + for (size_t ii = 0; ii < size; ii += 1) { + table[ii] = (InstHtNode){0xff, 0xff}; + } + for (size_t ii = 0; ii < INST_CNT; ii += 1) { + const char *mnemonic = INST[ii].mnemonic; + uint32_t hash = inst_hash(mnemonic, strlen(mnemonic)); + InstHtNode *node = &table[hash & 0xff]; + if (node->index1 == 0xff) { + node->index1 = ii; + } else if (node->index2 == 0xff) { + node->index2 = ii; + } else { + fprintf(stderr, "more than 1 collision in hash table\n"); + exit(1); + } + } + return table; } size_t inst_lookup(InstHt ht, const char *s, size_t len) { - uint32_t hash = inst_hash(s, len); - uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; - for (size_t ii = 0; ii < 2; ii += 1) { - size_t idx = (size_t)node[ii]; - if (idx == 0xff) { - break; + uint32_t hash = inst_hash(s, len); + uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; + for (size_t ii = 0; ii < 2; ii += 1) { + size_t idx = (size_t)node[ii]; + if (idx == 0xff) { + break; + } + const char *mnemonic = INST[idx].mnemonic; + if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) { + return idx; + } } - const char *mnemonic = INST[idx].mnemonic; - if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) { - return idx; - } - } - return INVALID; + return INVALID; } diff --git a/src/hbas.c b/src/hbas.c index 453c446..7c08e11 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -40,376 +40,379 @@ SOFTWARE. #include "einfo.h" void hd(char *data, size_t len) { - for (size_t ii = 0; ii < len; ii += 1) { - if (ii > 0 && (ii & 15) == 0) { - printf("\n"); + for (size_t ii = 0; ii < len; ii += 1) { + if (ii > 0 && (ii & 15) == 0) { + printf("\n"); + } + printf("%02x", (uint8_t)data[ii]); } - printf("%02x", (uint8_t)data[ii]); - } - printf("\n"); + printf("\n"); } #define MIN_SIZE 4096 int slurp(FILE *fd, ByteVec *out) { - ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; - size_t bread = 1; - int err = 0; - if (rv.buf == NULL) { - rv.cap = 0; - err = ErrOutOfMemory; - bread = 0; - } - while (bread > 0) { - if (ensure_push(&rv, 1, 1) != 0) { - err = ErrOutOfMemory; - break; + ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; + size_t bread = 1; + int err = 0; + if (rv.buf == NULL) { + rv.cap = 0; + err = ErrOutOfMemory; + bread = 0; } - bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); - rv.len += bread; - } - *out = rv; - if (err == 0) { - err = ferror(fd); - } - return err; + while (bread > 0) { + if (ensure_push(&rv, 1, 1) != 0) { + err = ErrOutOfMemory; + break; + } + bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); + rv.len += bread; + } + *out = rv; + if (err == 0) { + err = ferror(fd); + } + return err; } typedef struct Hole_s { - size_t location; - size_t origin; - char *str; - size_t len; - size_t size; + size_t location; + size_t origin; + char *str; + size_t len; + size_t size; } Hole; typedef struct HoleVec_s { - Hole *buf; - size_t cap; - size_t len; + Hole *buf; + size_t cap; + size_t len; } HoleVec; typedef struct Label_s { - size_t location; - char *str; - size_t len; + size_t location; + char *str; + size_t len; } Label; typedef struct LabelVec_s { - Label *buf; - size_t cap; - size_t len; + Label *buf; + size_t cap; + size_t len; } LabelVec; size_t label_lookup(LabelVec *labels, char *name, size_t len) { - size_t nlabels = labels->len; - Label *buf = labels->buf; - for (size_t ii = 0; ii < nlabels; ii += 1) { - if (len == buf->len && strncmp(buf->str, name, len) == 0) { - return ii; + size_t nlabels = labels->len; + Label *buf = labels->buf; + for (size_t ii = 0; ii < nlabels; ii += 1) { + if (len == buf->len && strncmp(buf->str, name, len) == 0) { + return ii; + } + buf += 1; } - buf += 1; - } - return INVALID; + return INVALID; } // safety: assumes the buffer has enough place for specified integer size. // `sign` is a bitset, where bit `1` indicates that value accepts a signed int, // and bit `2` indicates that value accepts an unsigned int. AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { - // Unsigned integers must have all upper bits set to zero. To check this, - // we shift the value right by the integer size and verify it equals zero. - int valid_uint = (val >> (size * 8)) == 0; + // Unsigned integers must have all upper bits set to zero. To check this, + // we shift the value right by the integer size and verify it equals zero. + int valid_uint = (val >> (size * 8)) == 0; - // For signed integers, the sign-extended high bits must match the sign bit. - // By shifting right by one less than the total bit size (size * 8 - 1), - // we isolate the sign bit and any sign-extended bits. For a value fitting - // in the signed range, this operation results in either 0 (for non-negative - // values) or -1 (for negative values due to sign extension). - int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); + // For signed integers, the sign-extended high bits must match the sign bit. + // By shifting right by one less than the total bit size (size * 8 - 1), + // we isolate the sign bit and any sign-extended bits. For a value fitting + // in the signed range, this operation results in either 0 (for non-negative + // values) or -1 (for negative values due to sign extension). + int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); - // To unify the check for both positive and negative cases, we adjust - // non-zero values (-1) by incrementing by 1. This turns -1 into 0, - // enabling a single check for 0 to validate both cases. This adjustment - // simplifies the validation logic, allowing us to use a single condition to - // check for proper sign extension or zero extension in the original value. - int_shifted += int_shifted != 0; + // To unify the check for both positive and negative cases, we adjust + // non-zero values (-1) by incrementing by 1. This turns -1 into 0, + // enabling a single check for 0 to validate both cases. This adjustment + // simplifies the validation logic, allowing us to use a single condition to + // check for proper sign extension or zero extension in the original value. + int_shifted += int_shifted != 0; - // A valid signed integer will have `int_shifted` equal to 0 - // after adjustment, indicating proper sign extension. - int valid_int = int_shifted == 0; + // A valid signed integer will have `int_shifted` equal to 0 + // after adjustment, indicating proper sign extension. + int valid_int = int_shifted == 0; - // Validity bitmask to represents whether the value - // fits as signed, unsigned, or both. - int validity = valid_int | (valid_uint << 1); + // Validity bitmask to represents whether the value + // fits as signed, unsigned, or both. + int validity = valid_int | (valid_uint << 1); - // If the value's validity doesn't match the `sign` requirements, - // we report an overflow. - if ((validity & sign) == 0) { - return ErrImmediateOverflow; - } + // If the value's validity doesn't match the `sign` requirements, + // we report an overflow. + if ((validity & sign) == 0) { + return ErrImmediateOverflow; + } - // Write out the bytes of the integer to the buffer in little-endian order, - // starting with the lowest byte first. - for (size_t ii = 0; ii < size; ii += 1) { - buf[ii] = val & 0xff; - val >>= 8; - } - return ErrOk; + // Write out the bytes of the integer to the buffer in little-endian order, + // starting with the lowest byte first. + for (size_t ii = 0; ii < size; ii += 1) { + buf[ii] = val & 0xff; + val >>= 8; + } + return ErrOk; } AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, ByteVec *rv, HoleVec *holes) { - const InstDesc *inst; - const char *type_str; - size_t nargs; - size_t size; - size_t idx = inst_lookup(ht, &input[tok->start], tok->len); - size_t inst_start = rv->len; - if (idx == INVALID) { - return ErrInvalidToken; - } - inst = &INST[idx]; - type_str = TYPE_STR[inst->type]; - nargs = strlen(type_str); - size = 1; - for (size_t ii = 0; ii < nargs; ii += 1) { - char chr = type_str[ii]; - ArgMeta meta = arg_meta(chr); - if (meta.chr == 0) { - return ErrBadArgumentMeta; + const InstDesc *inst; + const char *type_str; + size_t nargs; + size_t size; + size_t idx = inst_lookup(ht, &input[tok->start], tok->len); + size_t inst_start = rv->len; + if (idx == INVALID) { + return ErrInvalidToken; } - size += meta.size; - } - if (ensure_push(rv, 1, size) != 0) { - return ErrOutOfMemory; - } - rv->buf[rv->len] = inst->opcode; - rv->len += 1; - for (size_t ii = 0; ii < nargs; ii += 1) { - if (ii > 0) { - *tok = token(input, len, tok->start + tok->len); - if (tok->kind != TokComma) { - return ErrNeedCommaAfterArgument; - } + inst = &INST[idx]; + type_str = TYPE_STR[inst->type]; + nargs = strlen(type_str); + size = 1; + for (size_t ii = 0; ii < nargs; ii += 1) { + char chr = type_str[ii]; + ArgMeta meta = arg_meta(chr); + if (meta.chr == 0) { + return ErrBadArgumentMeta; + } + size += meta.size; } - char chr = type_str[ii]; - ArgMeta meta = arg_meta(chr); - uint64_t is_negative = 0; - *tok = token(input, len, tok->start + tok->len); - if (tok->kind == TokNeg) { - *tok = token(input, len, tok->start + tok->len); - if (tok->kind != TokNumber) { - return ErrTriedNegateNonNumber; - } - is_negative -= 1; + if (ensure_push(rv, 1, size) != 0) { + return ErrOutOfMemory; } - if (chr == 'R') { - int reg = parse_register(&input[tok->start], tok->len); - if (reg > 255) { - return ErrBadRegister; - } - rv->buf[rv->len] = (char)(reg & 0xff); - rv->len += 1; - } else { - uint64_t num_to_write; - if (meta.rel == 1 || meta.size == 8) { - if (tok->kind == TokIdent) { - if (ensure_push((ByteVec*)holes, sizeof(Hole), 1) != 0) { - return ErrOutOfMemory; - } - holes->buf[holes->len] = (Hole) { - .location = rv->len, - .origin = inst_start, - .str = &input[tok->start], - .len = tok->len, - .size = (size_t)meta.size, - }; - holes->len += 1; - num_to_write = 0; - } else if (tok->kind == TokNumber) { - num_to_write = tok->num; + rv->buf[rv->len] = inst->opcode; + rv->len += 1; + for (size_t ii = 0; ii < nargs; ii += 1) { + if (ii > 0) { + *tok = token(input, len, tok->start + tok->len); + if (tok->kind != TokComma) { + return ErrNeedCommaAfterArgument; + } + } + char chr = type_str[ii]; + ArgMeta meta = arg_meta(chr); + uint64_t is_negative = 0; + *tok = token(input, len, tok->start + tok->len); + if (tok->kind == TokNeg) { + *tok = token(input, len, tok->start + tok->len); + if (tok->kind != TokNumber) { + return ErrTriedNegateNonNumber; + } + is_negative -= 1; + } + if (chr == 'R') { + int reg = parse_register(&input[tok->start], tok->len); + if (reg > 255) { + return ErrBadRegister; + } + rv->buf[rv->len] = (char)(reg & 0xff); + rv->len += 1; } else { - return ErrLabelImmediate; + uint64_t num_to_write; + if (meta.rel == 1 || meta.size == 8) { + if (tok->kind == TokIdent) { + if (ensure_push((ByteVec *)holes, sizeof(Hole), 1) != 0) { + return ErrOutOfMemory; + } + holes->buf[holes->len] = (Hole){ + .location = rv->len, + .origin = inst_start, + .str = &input[tok->start], + .len = tok->len, + .size = (size_t)meta.size, + }; + holes->len += 1; + num_to_write = 0; + } else if (tok->kind == TokNumber) { + num_to_write = tok->num; + } else { + return ErrLabelImmediate; + } + } else if (tok->kind == TokNumber) { + num_to_write = tok->num; + } else { + return ErrNumberImmediate; + } + // num_to_write = num_to_write ^ is_negative - is_negative; + if (is_negative) { + int64_t tmp = -(int64_t)num_to_write; + if (tmp > 0) { + return ErrBadNumOverflow; + } + num_to_write = (uint64_t)tmp; + } + AsmError err = push_int_le(&rv->buf[rv->len], num_to_write, + meta.size, meta.sign); + if (err != ErrOk) { + return err; + } + rv->len += meta.size; } - } else if (tok->kind == TokNumber) { - num_to_write = tok->num; - } else { - return ErrNumberImmediate; - } - // num_to_write = num_to_write ^ is_negative - is_negative; - if (is_negative) { - int64_t tmp = -(int64_t)num_to_write; - if (tmp > 0) { - return ErrBadNumOverflow; - } - num_to_write = (uint64_t)tmp; - } - AsmError err = - push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign); - if (err != ErrOk) { - return err; - } - rv->len += meta.size; } - } - return ErrOk; + return ErrOk; } -AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) { - ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; - HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; - LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; - size_t line = 0; - size_t line_start = 0; - size_t pos = 0; - // init=0, label=1, instruction=2, comment=3, newline -> 0 - size_t line_state = 0; - AsmError err = ErrOk; +AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, + EInfo *einfo) { + ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; + HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; + LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; + size_t line = 0; + size_t line_start = 0; + size_t pos = 0; + // init=0, label=1, instruction=2, comment=3, newline -> 0 + size_t line_state = 0; + AsmError err = ErrOk; - while (1) { - Token tok = token(input, len, pos); - einfo->token = tok; - pos = tok.start + tok.len; - if (tok.kind == TokInvalid || tok.kind == TokBadNumber) { - if (tok.num) { - err = (AsmError)tok.num; - } else { - err = ErrInvalidToken; - } - break; - } - if (tok.kind == TokEOF) { - break; - } - if (tok.kind == TokComment) { - line_state = 3; - continue; - } - if (tok.kind == TokNewline) { - line += 1; - line_start = tok.start + tok.len; - line_state = 0; - continue; - } - if (tok.kind == TokDot) { - Token next = token(input, len, pos); - if (next.kind == TokIdent) { - err = ErrDirectiveNotImplemented; - goto end; - } else { - err = ErrNeedDirectiveAfterDot; - goto end; - } - continue; - } - if (tok.kind == TokIdent) { - Token next = token(input, len, pos); - if (next.kind == TokColon) { - // Label - pos = next.start + next.len; - if (line_state >= 1) { - err = ErrLabelAfterLabel; - einfo->token = next; - goto end; - } - line_state = 1; - if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) { - err = ErrOutOfMemory; - goto end; - } - size_t idx = label_lookup(&labels, &input[tok.start], tok.len); - if (idx != INVALID) { - err = ErrDuplicateLabel; - goto end; - } - labels.buf[labels.len] = (Label){ - .location = rv.len, - .str = &input[tok.start], - .len = tok.len, - }; - labels.len += 1; - } else { - // Instruction - if (line_state >= 2) { - err = ErrTrailingLine; - goto end; - } - line_state = 2; - err = assemble_instr(ht, input, len, &tok, &rv, &holes); + while (1) { + Token tok = token(input, len, pos); + einfo->token = tok; pos = tok.start + tok.len; - if (err != 0) { - goto end; + if (tok.kind == TokInvalid || tok.kind == TokBadNumber) { + if (tok.num) { + err = (AsmError)tok.num; + } else { + err = ErrInvalidToken; + } + break; } - } - continue; + if (tok.kind == TokEOF) { + break; + } + if (tok.kind == TokComment) { + line_state = 3; + continue; + } + if (tok.kind == TokNewline) { + line += 1; + line_start = tok.start + tok.len; + line_state = 0; + continue; + } + if (tok.kind == TokDot) { + Token next = token(input, len, pos); + if (next.kind == TokIdent) { + err = ErrDirectiveNotImplemented; + goto end; + } else { + err = ErrNeedDirectiveAfterDot; + goto end; + } + continue; + } + if (tok.kind == TokIdent) { + Token next = token(input, len, pos); + if (next.kind == TokColon) { + // Label + pos = next.start + next.len; + if (line_state >= 1) { + err = ErrLabelAfterLabel; + einfo->token = next; + goto end; + } + line_state = 1; + if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) { + err = ErrOutOfMemory; + goto end; + } + size_t idx = label_lookup(&labels, &input[tok.start], tok.len); + if (idx != INVALID) { + err = ErrDuplicateLabel; + goto end; + } + labels.buf[labels.len] = (Label){ + .location = rv.len, + .str = &input[tok.start], + .len = tok.len, + }; + labels.len += 1; + } else { + // Instruction + if (line_state >= 2) { + err = ErrTrailingLine; + goto end; + } + line_state = 2; + err = assemble_instr(ht, input, len, &tok, &rv, &holes); + pos = tok.start + tok.len; + if (err != 0) { + goto end; + } + } + continue; + } + err = ErrUnexpectedToken; + goto end; } - err = ErrUnexpectedToken; - goto end; - } - for (size_t ii = 0; ii < holes.len; ii += 1) { - Hole *hole = &holes.buf[ii]; - size_t idx = label_lookup(&labels, hole->str, hole->len); - uint64_t num_to_write = labels.buf[idx].location; - uint8_t sign = 2; - if (hole->size != 8) { - sign = 1; - num_to_write -= hole->origin; + for (size_t ii = 0; ii < holes.len; ii += 1) { + Hole *hole = &holes.buf[ii]; + size_t idx = label_lookup(&labels, hole->str, hole->len); + uint64_t num_to_write = labels.buf[idx].location; + uint8_t sign = 2; + if (hole->size != 8) { + sign = 1; + num_to_write -= hole->origin; + } + err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, + sign); + if (err != 0) { + goto end; + } } - err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign); - if (err != 0) { - goto end; - } - } end: - free(holes.buf); - free(labels.buf); - *out = rv; - einfo->line = line + 1; - einfo->line_start = line_start; - return err; + free(holes.buf); + free(labels.buf); + *out = rv; + einfo->line = line + 1; + einfo->line_start = line_start; + return err; } int main(int argc, char **argv) { - int hex_out = 0; - if (argc >= 2 && strcmp(argv[1], "--hex") == 0) { - hex_out = 1; - } + int hex_out = 0; + if (argc >= 2 && strcmp(argv[1], "--hex") == 0) { + hex_out = 1; + } - int err = 0; - InstHt ht = NULL; - ByteVec input; + int err = 0; + InstHt ht = NULL; + ByteVec input; - err = slurp(stdin, &input); - if (err != 0) { - fprintf(stderr, "failed to read the file: %d\n", err); - goto done; - } - ht = build_lookup(); - if (ht == NULL) { - err = ErrOutOfMemory; - fprintf(stderr, "failed to init hash table: %d\n", err); - goto done; - } + err = slurp(stdin, &input); + if (err != 0) { + fprintf(stderr, "failed to read the file: %d\n", err); + goto done; + } + ht = build_lookup(); + if (ht == NULL) { + err = ErrOutOfMemory; + fprintf(stderr, "failed to init hash table: %d\n", err); + goto done; + } - ByteVec out; - EInfo einfo; - err = assemble(ht, input.buf, input.len, &out, &einfo); - if (err != 0) { - size_t column = einfo.token.start - einfo.line_start + 1; - fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", - ERRORS[err], einfo.line, column, (int)einfo.token.len, - &input.buf[einfo.token.start]); - goto done; - } - if (hex_out) { - hd(out.buf, out.len); - } else { - fwrite(out.buf, 1, out.len, stdout); - } + ByteVec out; + EInfo einfo; + err = assemble(ht, input.buf, input.len, &out, &einfo); + if (err != 0) { + size_t column = einfo.token.start - einfo.line_start + 1; + fprintf(stderr, + "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", + ERRORS[err], einfo.line, column, (int)einfo.token.len, + &input.buf[einfo.token.start]); + goto done; + } + if (hex_out) { + hd(out.buf, out.len); + } else { + fwrite(out.buf, 1, out.len, stdout); + } done: - free(ht); - free(input.buf); - free(out.buf); - return err; + free(ht); + free(input.buf); + free(out.buf); + return err; } diff --git a/src/instructions.c b/src/instructions.c index 44f2e02..1af9d08 100644 --- a/src/instructions.c +++ b/src/instructions.c @@ -1,7 +1,7 @@ typedef struct InstDesc_s { - char *mnemonic; - unsigned char opcode; - Operands type; + char *mnemonic; + unsigned char opcode; + Operands type; } InstDesc; const InstDesc INST[] = { @@ -68,11 +68,11 @@ const InstDesc INST[] = { const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); size_t inst_find(const char *mnemonic, size_t len) { - for (size_t ii = 0; ii < INST_CNT; ii += 1) { - const char *entry = INST[ii].mnemonic; - if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') { - return ii; + for (size_t ii = 0; ii < INST_CNT; ii += 1) { + const char *entry = INST[ii].mnemonic; + if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') { + return ii; + } } - } - return INVALID; + return INVALID; } diff --git a/src/register.c b/src/register.c index 15047d2..b393d8b 100644 --- a/src/register.c +++ b/src/register.c @@ -1,23 +1,23 @@ int parse_register(char *name, size_t len) { - if (name[0] != 'r') { - return 256; // Register name should start with 'r' - } - if (len > 4) { - return 256; // Register name too long - } - uint16_t rv = 0; - if (len > 2 && name[1] == '0') { - return 256; // Extra zero suffix - } - for (size_t ii = 1; ii < len; ii += 1) { - char chr = name[ii]; - if (!(chr >= '0' && chr <= '9')) { - return 256; // Register name must only contain numbers + if (name[0] != 'r') { + return 256; // Register name should start with 'r' } - rv = rv * 10 + (chr - '0'); - } - if (rv > 255) { - return 256; // Register number too large - } - return (int)rv; + if (len > 4) { + return 256; // Register name too long + } + uint16_t rv = 0; + if (len > 2 && name[1] == '0') { + return 256; // Extra zero suffix + } + for (size_t ii = 1; ii < len; ii += 1) { + char chr = name[ii]; + if (!(chr >= '0' && chr <= '9')) { + return 256; // Register name must only contain numbers + } + rv = rv * 10 + (chr - '0'); + } + if (rv > 255) { + return 256; // Register number too large + } + return (int)rv; } diff --git a/src/token.c b/src/token.c index d8ee1a2..c8bf3ae 100644 --- a/src/token.c +++ b/src/token.c @@ -23,7 +23,8 @@ Token token_ident(char *input, size_t len, size_t pos) { while (pos < len) { char chr = input[pos]; char chru = chr & ~0x20; - int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); + int good = chr == '_' || (chr >= '0' && chr <= '9') || + (chru >= 'A' && chru <= 'Z'); if (!good) { break; }