Merge pull request #8 from m1el/clang-format

added clang format
This commit is contained in:
m1el 2024-03-11 16:01:52 +02:00 committed by GitHub
commit aa45a8eba4
10 changed files with 476 additions and 465 deletions

View file

@ -1,12 +1,19 @@
CC = gcc CC = gcc
CFLAGS_EXTRA = CFLAGS_EXTRA =
CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3
CLANG_FORMAT_STYLE = '{ BasedOnStyle: Google, IndentWidth: 4 }'
.PHONY: clean build-dir hbas example .PHONY: clean hbas example format check-format
hbas: build/hbas hbas: build/hbas
example: build/example.hbf example: build/example.hbf
format:
clang-format --style=${CLANG_FORMAT_STYLE} -i src/*
check-format:
clang-format --style=${CLANG_FORMAT_STYLE} -i --dry-run -Werror src/*
build: build:
mkdir -p build mkdir -p build

View file

@ -1,11 +1,11 @@
typedef struct ArgMeta_s { typedef struct ArgMeta_s {
char chr; char chr;
uint8_t size; uint8_t size;
// This is a bitset of acceptable overflow states, // This is a bitset of acceptable overflow states,
// where accept signed = 1, accept unsigned = 2. // where accept signed = 1, accept unsigned = 2.
// 1 -> signed, 2 -> unsigned, 3 -> whatever // 1 -> signed, 2 -> unsigned, 3 -> whatever
uint8_t sign; uint8_t sign;
uint8_t rel; uint8_t rel;
} ArgMeta; } ArgMeta;
const ArgMeta ARGS[] = { const ArgMeta ARGS[] = {
{'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0}, {'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0},
@ -15,31 +15,31 @@ const ArgMeta ARGS[] = {
}; };
typedef enum Operands_e { typedef enum Operands_e {
Empty = 0, Empty = 0,
R, R,
RR, RR,
RRR, RRR,
RRRR, RRRR,
Rx8, Rx8,
Rx16, Rx16,
Rx32, Rx32,
Rx64, Rx64,
RRx8, RRx8,
RRx16, RRx16,
RRx32, RRx32,
RRx64, RRx64,
RRs32, RRs32,
RRs64, RRs64,
RRu8, RRu8,
RRu16, RRu16,
RRu64, RRu64,
r16, r16,
r32, r32,
RRr16, RRr16,
RRr32, RRr32,
RRr16u16, RRr16u16,
RRr32u16, RRr32u16,
RRu64u16, RRu64u16,
} Operands; } Operands;
// R -> register, // R -> register,
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64, // 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
@ -56,11 +56,11 @@ const char *TYPE_STR[] = {
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
ArgMeta arg_meta(char arg) { ArgMeta arg_meta(char arg) {
for (size_t ii = 0; ii < NARGS; ii += 1) { for (size_t ii = 0; ii < NARGS; ii += 1) {
ArgMeta meta = ARGS[ii]; ArgMeta meta = ARGS[ii];
if (meta.chr == arg) { if (meta.chr == arg) {
return meta; return meta;
}
} }
} return ARGS[NARGS - 1];
return ARGS[NARGS - 1];
} }

View file

@ -1,29 +1,29 @@
const size_t INVALID = ~(size_t)0; const size_t INVALID = ~(size_t)0;
typedef struct ByteVec_s { typedef struct ByteVec_s {
char *buf; char *buf;
size_t cap; size_t cap;
size_t len; size_t len;
} ByteVec; } ByteVec;
AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) { AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
if (vec->len + extra < vec->len) { if (vec->len + extra < vec->len) {
return ErrOutOfMemory; return ErrOutOfMemory;
}
while (vec->len + extra > vec->cap) {
if ((~(size_t)0) / 2 < vec->cap) {
return ErrOutOfMemory;
} }
vec->cap *= 2; while (vec->len + extra > vec->cap) {
// multiply overflow if ((~(size_t)0) / 2 < vec->cap) {
if ((~(size_t)0) / el_size < vec->cap) { return ErrOutOfMemory;
return ErrOutOfMemory; }
vec->cap *= 2;
// multiply overflow
if ((~(size_t)0) / el_size < vec->cap) {
return ErrOutOfMemory;
}
vec->buf = realloc(vec->buf, el_size * vec->cap);
if (vec->buf == NULL) {
vec->cap = 0;
return ErrOutOfMemory;
}
} }
vec->buf = realloc(vec->buf, el_size * vec->cap); return 0;
if (vec->buf == NULL) {
vec->cap = 0;
return ErrOutOfMemory;
}
}
return 0;
} }

View file

@ -1,5 +1,5 @@
typedef struct EInfo_s { typedef struct EInfo_s {
Token token; Token token;
size_t line; size_t line;
size_t line_start; size_t line_start;
} EInfo; } EInfo;

View file

@ -1,23 +1,23 @@
typedef enum AsmError_e { typedef enum AsmError_e {
ErrOk = 0, ErrOk = 0,
ErrBadRegister, ErrBadRegister,
ErrImmediateOverflow, ErrImmediateOverflow,
ErrInvalidToken, ErrInvalidToken,
ErrBadArgumentMeta, ErrBadArgumentMeta,
ErrNeedCommaAfterArgument, ErrNeedCommaAfterArgument,
ErrLabelImmediate, ErrLabelImmediate,
ErrNumberImmediate, ErrNumberImmediate,
ErrBadNumOverflow, ErrBadNumOverflow,
ErrBadNumDigit, ErrBadNumDigit,
ErrBadNumNoDigit, ErrBadNumNoDigit,
ErrLabelAfterLabel, ErrLabelAfterLabel,
ErrOutOfMemory, ErrOutOfMemory,
ErrDuplicateLabel, ErrDuplicateLabel,
ErrTrailingLine, ErrTrailingLine,
ErrNeedDirectiveAfterDot, ErrNeedDirectiveAfterDot,
ErrDirectiveNotImplemented, ErrDirectiveNotImplemented,
ErrUnexpectedToken, ErrUnexpectedToken,
ErrTriedNegateNonNumber, ErrTriedNegateNonNumber,
} AsmError; } AsmError;
char *ERRORS[] = { char *ERRORS[] = {
"Success", "Success",

View file

@ -1,57 +1,57 @@
// Instruction Hash table, for faster lookups // Instruction Hash table, for faster lookups
typedef struct InstHtNode_s { typedef struct InstHtNode_s {
uint8_t index1; uint8_t index1;
uint8_t index2; uint8_t index2;
} InstHtNode; } InstHtNode;
typedef InstHtNode *InstHt; typedef InstHtNode *InstHt;
uint32_t inst_hash(const char *s, size_t len) { uint32_t inst_hash(const char *s, size_t len) {
uint32_t hash = 0; uint32_t hash = 0;
uint32_t mul = 75; uint32_t mul = 75;
for (size_t ii = 0; ii < len; ii += 1) { for (size_t ii = 0; ii < len; ii += 1) {
hash ^= s[ii] * mul; hash ^= s[ii] * mul;
hash *= mul; hash *= mul;
} }
return hash; return hash;
} }
InstHt build_lookup(void) { InstHt build_lookup(void) {
const size_t size = 256; const size_t size = 256;
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
if (table == NULL) { if (table == NULL) {
return table; return table;
}
for (size_t ii = 0; ii < size; ii += 1) {
table[ii] = (InstHtNode){0xff, 0xff};
}
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *mnemonic = INST[ii].mnemonic;
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
InstHtNode *node = &table[hash & 0xff];
if (node->index1 == 0xff) {
node->index1 = ii;
} else if (node->index2 == 0xff) {
node->index2 = ii;
} else {
fprintf(stderr, "more than 1 collision in hash table\n");
exit(1);
} }
} for (size_t ii = 0; ii < size; ii += 1) {
return table; table[ii] = (InstHtNode){0xff, 0xff};
}
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *mnemonic = INST[ii].mnemonic;
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
InstHtNode *node = &table[hash & 0xff];
if (node->index1 == 0xff) {
node->index1 = ii;
} else if (node->index2 == 0xff) {
node->index2 = ii;
} else {
fprintf(stderr, "more than 1 collision in hash table\n");
exit(1);
}
}
return table;
} }
size_t inst_lookup(InstHt ht, const char *s, size_t len) { size_t inst_lookup(InstHt ht, const char *s, size_t len) {
uint32_t hash = inst_hash(s, len); uint32_t hash = inst_hash(s, len);
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
for (size_t ii = 0; ii < 2; ii += 1) { for (size_t ii = 0; ii < 2; ii += 1) {
size_t idx = (size_t)node[ii]; size_t idx = (size_t)node[ii];
if (idx == 0xff) { if (idx == 0xff) {
break; break;
}
const char *mnemonic = INST[idx].mnemonic;
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
return idx;
}
} }
const char *mnemonic = INST[idx].mnemonic; return INVALID;
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
return idx;
}
}
return INVALID;
} }

View file

@ -40,376 +40,379 @@ SOFTWARE.
#include "einfo.h" #include "einfo.h"
void hd(char *data, size_t len) { void hd(char *data, size_t len) {
for (size_t ii = 0; ii < len; ii += 1) { for (size_t ii = 0; ii < len; ii += 1) {
if (ii > 0 && (ii & 15) == 0) { if (ii > 0 && (ii & 15) == 0) {
printf("\n"); printf("\n");
}
printf("%02x", (uint8_t)data[ii]);
} }
printf("%02x", (uint8_t)data[ii]); printf("\n");
}
printf("\n");
} }
#define MIN_SIZE 4096 #define MIN_SIZE 4096
int slurp(FILE *fd, ByteVec *out) { int slurp(FILE *fd, ByteVec *out) {
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
size_t bread = 1; size_t bread = 1;
int err = 0; int err = 0;
if (rv.buf == NULL) { if (rv.buf == NULL) {
rv.cap = 0; rv.cap = 0;
err = ErrOutOfMemory; err = ErrOutOfMemory;
bread = 0; bread = 0;
}
while (bread > 0) {
if (ensure_push(&rv, 1, 1) != 0) {
err = ErrOutOfMemory;
break;
} }
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); while (bread > 0) {
rv.len += bread; if (ensure_push(&rv, 1, 1) != 0) {
} err = ErrOutOfMemory;
*out = rv; break;
if (err == 0) { }
err = ferror(fd); bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
} rv.len += bread;
return err; }
*out = rv;
if (err == 0) {
err = ferror(fd);
}
return err;
} }
typedef struct Hole_s { typedef struct Hole_s {
size_t location; size_t location;
size_t origin; size_t origin;
char *str; char *str;
size_t len; size_t len;
size_t size; size_t size;
} Hole; } Hole;
typedef struct HoleVec_s { typedef struct HoleVec_s {
Hole *buf; Hole *buf;
size_t cap; size_t cap;
size_t len; size_t len;
} HoleVec; } HoleVec;
typedef struct Label_s { typedef struct Label_s {
size_t location; size_t location;
char *str; char *str;
size_t len; size_t len;
} Label; } Label;
typedef struct LabelVec_s { typedef struct LabelVec_s {
Label *buf; Label *buf;
size_t cap; size_t cap;
size_t len; size_t len;
} LabelVec; } LabelVec;
size_t label_lookup(LabelVec *labels, char *name, size_t len) { size_t label_lookup(LabelVec *labels, char *name, size_t len) {
size_t nlabels = labels->len; size_t nlabels = labels->len;
Label *buf = labels->buf; Label *buf = labels->buf;
for (size_t ii = 0; ii < nlabels; ii += 1) { for (size_t ii = 0; ii < nlabels; ii += 1) {
if (len == buf->len && strncmp(buf->str, name, len) == 0) { if (len == buf->len && strncmp(buf->str, name, len) == 0) {
return ii; return ii;
}
buf += 1;
} }
buf += 1; return INVALID;
}
return INVALID;
} }
// safety: assumes the buffer has enough place for specified integer size. // safety: assumes the buffer has enough place for specified integer size.
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int, // `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
// and bit `2` indicates that value accepts an unsigned int. // and bit `2` indicates that value accepts an unsigned int.
AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) { AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
// Unsigned integers must have all upper bits set to zero. To check this, // Unsigned integers must have all upper bits set to zero. To check this,
// we shift the value right by the integer size and verify it equals zero. // we shift the value right by the integer size and verify it equals zero.
int valid_uint = (val >> (size * 8)) == 0; int valid_uint = (val >> (size * 8)) == 0;
// For signed integers, the sign-extended high bits must match the sign bit. // For signed integers, the sign-extended high bits must match the sign bit.
// By shifting right by one less than the total bit size (size * 8 - 1), // By shifting right by one less than the total bit size (size * 8 - 1),
// we isolate the sign bit and any sign-extended bits. For a value fitting // we isolate the sign bit and any sign-extended bits. For a value fitting
// in the signed range, this operation results in either 0 (for non-negative // in the signed range, this operation results in either 0 (for non-negative
// values) or -1 (for negative values due to sign extension). // values) or -1 (for negative values due to sign extension).
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
// To unify the check for both positive and negative cases, we adjust // To unify the check for both positive and negative cases, we adjust
// non-zero values (-1) by incrementing by 1. This turns -1 into 0, // non-zero values (-1) by incrementing by 1. This turns -1 into 0,
// enabling a single check for 0 to validate both cases. This adjustment // enabling a single check for 0 to validate both cases. This adjustment
// simplifies the validation logic, allowing us to use a single condition to // simplifies the validation logic, allowing us to use a single condition to
// check for proper sign extension or zero extension in the original value. // check for proper sign extension or zero extension in the original value.
int_shifted += int_shifted != 0; int_shifted += int_shifted != 0;
// A valid signed integer will have `int_shifted` equal to 0 // A valid signed integer will have `int_shifted` equal to 0
// after adjustment, indicating proper sign extension. // after adjustment, indicating proper sign extension.
int valid_int = int_shifted == 0; int valid_int = int_shifted == 0;
// Validity bitmask to represents whether the value // Validity bitmask to represents whether the value
// fits as signed, unsigned, or both. // fits as signed, unsigned, or both.
int validity = valid_int | (valid_uint << 1); int validity = valid_int | (valid_uint << 1);
// If the value's validity doesn't match the `sign` requirements, // If the value's validity doesn't match the `sign` requirements,
// we report an overflow. // we report an overflow.
if ((validity & sign) == 0) { if ((validity & sign) == 0) {
return ErrImmediateOverflow; return ErrImmediateOverflow;
} }
// Write out the bytes of the integer to the buffer in little-endian order, // Write out the bytes of the integer to the buffer in little-endian order,
// starting with the lowest byte first. // starting with the lowest byte first.
for (size_t ii = 0; ii < size; ii += 1) { for (size_t ii = 0; ii < size; ii += 1) {
buf[ii] = val & 0xff; buf[ii] = val & 0xff;
val >>= 8; val >>= 8;
} }
return ErrOk; return ErrOk;
} }
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok, AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
ByteVec *rv, HoleVec *holes) { ByteVec *rv, HoleVec *holes) {
const InstDesc *inst; const InstDesc *inst;
const char *type_str; const char *type_str;
size_t nargs; size_t nargs;
size_t size; size_t size;
size_t idx = inst_lookup(ht, &input[tok->start], tok->len); size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
size_t inst_start = rv->len; size_t inst_start = rv->len;
if (idx == INVALID) { if (idx == INVALID) {
return ErrInvalidToken; return ErrInvalidToken;
}
inst = &INST[idx];
type_str = TYPE_STR[inst->type];
nargs = strlen(type_str);
size = 1;
for (size_t ii = 0; ii < nargs; ii += 1) {
char chr = type_str[ii];
ArgMeta meta = arg_meta(chr);
if (meta.chr == 0) {
return ErrBadArgumentMeta;
} }
size += meta.size; inst = &INST[idx];
} type_str = TYPE_STR[inst->type];
if (ensure_push(rv, 1, size) != 0) { nargs = strlen(type_str);
return ErrOutOfMemory; size = 1;
} for (size_t ii = 0; ii < nargs; ii += 1) {
rv->buf[rv->len] = inst->opcode; char chr = type_str[ii];
rv->len += 1; ArgMeta meta = arg_meta(chr);
for (size_t ii = 0; ii < nargs; ii += 1) { if (meta.chr == 0) {
if (ii > 0) { return ErrBadArgumentMeta;
*tok = token(input, len, tok->start + tok->len); }
if (tok->kind != TokComma) { size += meta.size;
return ErrNeedCommaAfterArgument;
}
} }
char chr = type_str[ii]; if (ensure_push(rv, 1, size) != 0) {
ArgMeta meta = arg_meta(chr); return ErrOutOfMemory;
uint64_t is_negative = 0;
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNeg) {
*tok = token(input, len, tok->start + tok->len);
if (tok->kind != TokNumber) {
return ErrTriedNegateNonNumber;
}
is_negative -= 1;
} }
if (chr == 'R') { rv->buf[rv->len] = inst->opcode;
int reg = parse_register(&input[tok->start], tok->len); rv->len += 1;
if (reg > 255) { for (size_t ii = 0; ii < nargs; ii += 1) {
return ErrBadRegister; if (ii > 0) {
} *tok = token(input, len, tok->start + tok->len);
rv->buf[rv->len] = (char)(reg & 0xff); if (tok->kind != TokComma) {
rv->len += 1; return ErrNeedCommaAfterArgument;
} else { }
uint64_t num_to_write; }
if (meta.rel == 1 || meta.size == 8) { char chr = type_str[ii];
if (tok->kind == TokIdent) { ArgMeta meta = arg_meta(chr);
if (ensure_push((ByteVec*)holes, sizeof(Hole), 1) != 0) { uint64_t is_negative = 0;
return ErrOutOfMemory; *tok = token(input, len, tok->start + tok->len);
} if (tok->kind == TokNeg) {
holes->buf[holes->len] = (Hole) { *tok = token(input, len, tok->start + tok->len);
.location = rv->len, if (tok->kind != TokNumber) {
.origin = inst_start, return ErrTriedNegateNonNumber;
.str = &input[tok->start], }
.len = tok->len, is_negative -= 1;
.size = (size_t)meta.size, }
}; if (chr == 'R') {
holes->len += 1; int reg = parse_register(&input[tok->start], tok->len);
num_to_write = 0; if (reg > 255) {
} else if (tok->kind == TokNumber) { return ErrBadRegister;
num_to_write = tok->num; }
rv->buf[rv->len] = (char)(reg & 0xff);
rv->len += 1;
} else { } else {
return ErrLabelImmediate; uint64_t num_to_write;
if (meta.rel == 1 || meta.size == 8) {
if (tok->kind == TokIdent) {
if (ensure_push((ByteVec *)holes, sizeof(Hole), 1) != 0) {
return ErrOutOfMemory;
}
holes->buf[holes->len] = (Hole){
.location = rv->len,
.origin = inst_start,
.str = &input[tok->start],
.len = tok->len,
.size = (size_t)meta.size,
};
holes->len += 1;
num_to_write = 0;
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrLabelImmediate;
}
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrNumberImmediate;
}
// num_to_write = num_to_write ^ is_negative - is_negative;
if (is_negative) {
int64_t tmp = -(int64_t)num_to_write;
if (tmp > 0) {
return ErrBadNumOverflow;
}
num_to_write = (uint64_t)tmp;
}
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
meta.size, meta.sign);
if (err != ErrOk) {
return err;
}
rv->len += meta.size;
} }
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrNumberImmediate;
}
// num_to_write = num_to_write ^ is_negative - is_negative;
if (is_negative) {
int64_t tmp = -(int64_t)num_to_write;
if (tmp > 0) {
return ErrBadNumOverflow;
}
num_to_write = (uint64_t)tmp;
}
AsmError err =
push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign);
if (err != ErrOk) {
return err;
}
rv->len += meta.size;
} }
}
return ErrOk; return ErrOk;
} }
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) { AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; EInfo *einfo) {
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
size_t line = 0; LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
size_t line_start = 0; size_t line = 0;
size_t pos = 0; size_t line_start = 0;
// init=0, label=1, instruction=2, comment=3, newline -> 0 size_t pos = 0;
size_t line_state = 0; // init=0, label=1, instruction=2, comment=3, newline -> 0
AsmError err = ErrOk; size_t line_state = 0;
AsmError err = ErrOk;
while (1) { while (1) {
Token tok = token(input, len, pos); Token tok = token(input, len, pos);
einfo->token = tok; einfo->token = tok;
pos = tok.start + tok.len;
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
if (tok.num) {
err = (AsmError)tok.num;
} else {
err = ErrInvalidToken;
}
break;
}
if (tok.kind == TokEOF) {
break;
}
if (tok.kind == TokComment) {
line_state = 3;
continue;
}
if (tok.kind == TokNewline) {
line += 1;
line_start = tok.start + tok.len;
line_state = 0;
continue;
}
if (tok.kind == TokDot) {
Token next = token(input, len, pos);
if (next.kind == TokIdent) {
err = ErrDirectiveNotImplemented;
goto end;
} else {
err = ErrNeedDirectiveAfterDot;
goto end;
}
continue;
}
if (tok.kind == TokIdent) {
Token next = token(input, len, pos);
if (next.kind == TokColon) {
// Label
pos = next.start + next.len;
if (line_state >= 1) {
err = ErrLabelAfterLabel;
einfo->token = next;
goto end;
}
line_state = 1;
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
err = ErrOutOfMemory;
goto end;
}
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
if (idx != INVALID) {
err = ErrDuplicateLabel;
goto end;
}
labels.buf[labels.len] = (Label){
.location = rv.len,
.str = &input[tok.start],
.len = tok.len,
};
labels.len += 1;
} else {
// Instruction
if (line_state >= 2) {
err = ErrTrailingLine;
goto end;
}
line_state = 2;
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
pos = tok.start + tok.len; pos = tok.start + tok.len;
if (err != 0) { if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
goto end; if (tok.num) {
err = (AsmError)tok.num;
} else {
err = ErrInvalidToken;
}
break;
} }
} if (tok.kind == TokEOF) {
continue; break;
}
if (tok.kind == TokComment) {
line_state = 3;
continue;
}
if (tok.kind == TokNewline) {
line += 1;
line_start = tok.start + tok.len;
line_state = 0;
continue;
}
if (tok.kind == TokDot) {
Token next = token(input, len, pos);
if (next.kind == TokIdent) {
err = ErrDirectiveNotImplemented;
goto end;
} else {
err = ErrNeedDirectiveAfterDot;
goto end;
}
continue;
}
if (tok.kind == TokIdent) {
Token next = token(input, len, pos);
if (next.kind == TokColon) {
// Label
pos = next.start + next.len;
if (line_state >= 1) {
err = ErrLabelAfterLabel;
einfo->token = next;
goto end;
}
line_state = 1;
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
err = ErrOutOfMemory;
goto end;
}
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
if (idx != INVALID) {
err = ErrDuplicateLabel;
goto end;
}
labels.buf[labels.len] = (Label){
.location = rv.len,
.str = &input[tok.start],
.len = tok.len,
};
labels.len += 1;
} else {
// Instruction
if (line_state >= 2) {
err = ErrTrailingLine;
goto end;
}
line_state = 2;
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
pos = tok.start + tok.len;
if (err != 0) {
goto end;
}
}
continue;
}
err = ErrUnexpectedToken;
goto end;
} }
err = ErrUnexpectedToken;
goto end;
}
for (size_t ii = 0; ii < holes.len; ii += 1) { for (size_t ii = 0; ii < holes.len; ii += 1) {
Hole *hole = &holes.buf[ii]; Hole *hole = &holes.buf[ii];
size_t idx = label_lookup(&labels, hole->str, hole->len); size_t idx = label_lookup(&labels, hole->str, hole->len);
uint64_t num_to_write = labels.buf[idx].location; uint64_t num_to_write = labels.buf[idx].location;
uint8_t sign = 2; uint8_t sign = 2;
if (hole->size != 8) { if (hole->size != 8) {
sign = 1; sign = 1;
num_to_write -= hole->origin; num_to_write -= hole->origin;
}
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size,
sign);
if (err != 0) {
goto end;
}
} }
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign);
if (err != 0) {
goto end;
}
}
end: end:
free(holes.buf); free(holes.buf);
free(labels.buf); free(labels.buf);
*out = rv; *out = rv;
einfo->line = line + 1; einfo->line = line + 1;
einfo->line_start = line_start; einfo->line_start = line_start;
return err; return err;
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
int hex_out = 0; int hex_out = 0;
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) { if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
hex_out = 1; hex_out = 1;
} }
int err = 0; int err = 0;
InstHt ht = NULL; InstHt ht = NULL;
ByteVec input; ByteVec input;
err = slurp(stdin, &input); err = slurp(stdin, &input);
if (err != 0) { if (err != 0) {
fprintf(stderr, "failed to read the file: %d\n", err); fprintf(stderr, "failed to read the file: %d\n", err);
goto done; goto done;
} }
ht = build_lookup(); ht = build_lookup();
if (ht == NULL) { if (ht == NULL) {
err = ErrOutOfMemory; err = ErrOutOfMemory;
fprintf(stderr, "failed to init hash table: %d\n", err); fprintf(stderr, "failed to init hash table: %d\n", err);
goto done; goto done;
} }
ByteVec out; ByteVec out;
EInfo einfo; EInfo einfo;
err = assemble(ht, input.buf, input.len, &out, &einfo); err = assemble(ht, input.buf, input.len, &out, &einfo);
if (err != 0) { if (err != 0) {
size_t column = einfo.token.start - einfo.line_start + 1; size_t column = einfo.token.start - einfo.line_start + 1;
fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", fprintf(stderr,
ERRORS[err], einfo.line, column, (int)einfo.token.len, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
&input.buf[einfo.token.start]); ERRORS[err], einfo.line, column, (int)einfo.token.len,
goto done; &input.buf[einfo.token.start]);
} goto done;
if (hex_out) { }
hd(out.buf, out.len); if (hex_out) {
} else { hd(out.buf, out.len);
fwrite(out.buf, 1, out.len, stdout); } else {
} fwrite(out.buf, 1, out.len, stdout);
}
done: done:
free(ht); free(ht);
free(input.buf); free(input.buf);
free(out.buf); free(out.buf);
return err; return err;
} }

View file

@ -1,7 +1,7 @@
typedef struct InstDesc_s { typedef struct InstDesc_s {
char *mnemonic; char *mnemonic;
unsigned char opcode; unsigned char opcode;
Operands type; Operands type;
} InstDesc; } InstDesc;
const InstDesc INST[] = { const InstDesc INST[] = {
@ -68,11 +68,11 @@ const InstDesc INST[] = {
const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]);
size_t inst_find(const char *mnemonic, size_t len) { size_t inst_find(const char *mnemonic, size_t len) {
for (size_t ii = 0; ii < INST_CNT; ii += 1) { for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *entry = INST[ii].mnemonic; const char *entry = INST[ii].mnemonic;
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') { if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
return ii; return ii;
}
} }
} return INVALID;
return INVALID;
} }

View file

@ -1,23 +1,23 @@
int parse_register(char *name, size_t len) { int parse_register(char *name, size_t len) {
if (name[0] != 'r') { if (name[0] != 'r') {
return 256; // Register name should start with 'r' return 256; // Register name should start with 'r'
}
if (len > 4) {
return 256; // Register name too long
}
uint16_t rv = 0;
if (len > 2 && name[1] == '0') {
return 256; // Extra zero suffix
}
for (size_t ii = 1; ii < len; ii += 1) {
char chr = name[ii];
if (!(chr >= '0' && chr <= '9')) {
return 256; // Register name must only contain numbers
} }
rv = rv * 10 + (chr - '0'); if (len > 4) {
} return 256; // Register name too long
if (rv > 255) { }
return 256; // Register number too large uint16_t rv = 0;
} if (len > 2 && name[1] == '0') {
return (int)rv; return 256; // Extra zero suffix
}
for (size_t ii = 1; ii < len; ii += 1) {
char chr = name[ii];
if (!(chr >= '0' && chr <= '9')) {
return 256; // Register name must only contain numbers
}
rv = rv * 10 + (chr - '0');
}
if (rv > 255) {
return 256; // Register number too large
}
return (int)rv;
} }

View file

@ -23,7 +23,8 @@ Token token_ident(char *input, size_t len, size_t pos) {
while (pos < len) { while (pos < len) {
char chr = input[pos]; char chr = input[pos];
char chru = chr & ~0x20; char chru = chr & ~0x20;
int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); int good = chr == '_' || (chr >= '0' && chr <= '9') ||
(chru >= 'A' && chru <= 'Z');
if (!good) { if (!good) {
break; break;
} }