commit
aa45a8eba4
9
Makefile
9
Makefile
|
@ -1,12 +1,19 @@
|
|||
CC = gcc
|
||||
CFLAGS_EXTRA =
|
||||
CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3
|
||||
CLANG_FORMAT_STYLE = '{ BasedOnStyle: Google, IndentWidth: 4 }'
|
||||
|
||||
.PHONY: clean build-dir hbas example
|
||||
.PHONY: clean hbas example format check-format
|
||||
|
||||
hbas: build/hbas
|
||||
example: build/example.hbf
|
||||
|
||||
format:
|
||||
clang-format --style=${CLANG_FORMAT_STYLE} -i src/*
|
||||
|
||||
check-format:
|
||||
clang-format --style=${CLANG_FORMAT_STYLE} -i --dry-run -Werror src/*
|
||||
|
||||
build:
|
||||
mkdir -p build
|
||||
|
||||
|
|
76
src/args.c
76
src/args.c
|
@ -1,11 +1,11 @@
|
|||
typedef struct ArgMeta_s {
|
||||
char chr;
|
||||
uint8_t size;
|
||||
// This is a bitset of acceptable overflow states,
|
||||
// where accept signed = 1, accept unsigned = 2.
|
||||
// 1 -> signed, 2 -> unsigned, 3 -> whatever
|
||||
uint8_t sign;
|
||||
uint8_t rel;
|
||||
char chr;
|
||||
uint8_t size;
|
||||
// This is a bitset of acceptable overflow states,
|
||||
// where accept signed = 1, accept unsigned = 2.
|
||||
// 1 -> signed, 2 -> unsigned, 3 -> whatever
|
||||
uint8_t sign;
|
||||
uint8_t rel;
|
||||
} ArgMeta;
|
||||
const ArgMeta ARGS[] = {
|
||||
{'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0},
|
||||
|
@ -15,31 +15,31 @@ const ArgMeta ARGS[] = {
|
|||
};
|
||||
|
||||
typedef enum Operands_e {
|
||||
Empty = 0,
|
||||
R,
|
||||
RR,
|
||||
RRR,
|
||||
RRRR,
|
||||
Rx8,
|
||||
Rx16,
|
||||
Rx32,
|
||||
Rx64,
|
||||
RRx8,
|
||||
RRx16,
|
||||
RRx32,
|
||||
RRx64,
|
||||
RRs32,
|
||||
RRs64,
|
||||
RRu8,
|
||||
RRu16,
|
||||
RRu64,
|
||||
r16,
|
||||
r32,
|
||||
RRr16,
|
||||
RRr32,
|
||||
RRr16u16,
|
||||
RRr32u16,
|
||||
RRu64u16,
|
||||
Empty = 0,
|
||||
R,
|
||||
RR,
|
||||
RRR,
|
||||
RRRR,
|
||||
Rx8,
|
||||
Rx16,
|
||||
Rx32,
|
||||
Rx64,
|
||||
RRx8,
|
||||
RRx16,
|
||||
RRx32,
|
||||
RRx64,
|
||||
RRs32,
|
||||
RRs64,
|
||||
RRu8,
|
||||
RRu16,
|
||||
RRu64,
|
||||
r16,
|
||||
r32,
|
||||
RRr16,
|
||||
RRr32,
|
||||
RRr16u16,
|
||||
RRr32u16,
|
||||
RRu64u16,
|
||||
} Operands;
|
||||
// R -> register,
|
||||
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
|
||||
|
@ -56,11 +56,11 @@ const char *TYPE_STR[] = {
|
|||
|
||||
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
|
||||
ArgMeta arg_meta(char arg) {
|
||||
for (size_t ii = 0; ii < NARGS; ii += 1) {
|
||||
ArgMeta meta = ARGS[ii];
|
||||
if (meta.chr == arg) {
|
||||
return meta;
|
||||
for (size_t ii = 0; ii < NARGS; ii += 1) {
|
||||
ArgMeta meta = ARGS[ii];
|
||||
if (meta.chr == arg) {
|
||||
return meta;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGS[NARGS - 1];
|
||||
return ARGS[NARGS - 1];
|
||||
}
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
const size_t INVALID = ~(size_t)0;
|
||||
|
||||
typedef struct ByteVec_s {
|
||||
char *buf;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
char *buf;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
} ByteVec;
|
||||
|
||||
AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
|
||||
if (vec->len + extra < vec->len) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
while (vec->len + extra > vec->cap) {
|
||||
if ((~(size_t)0) / 2 < vec->cap) {
|
||||
return ErrOutOfMemory;
|
||||
if (vec->len + extra < vec->len) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
vec->cap *= 2;
|
||||
// multiply overflow
|
||||
if ((~(size_t)0) / el_size < vec->cap) {
|
||||
return ErrOutOfMemory;
|
||||
while (vec->len + extra > vec->cap) {
|
||||
if ((~(size_t)0) / 2 < vec->cap) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
vec->cap *= 2;
|
||||
// multiply overflow
|
||||
if ((~(size_t)0) / el_size < vec->cap) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
vec->buf = realloc(vec->buf, el_size * vec->cap);
|
||||
if (vec->buf == NULL) {
|
||||
vec->cap = 0;
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
}
|
||||
vec->buf = realloc(vec->buf, el_size * vec->cap);
|
||||
if (vec->buf == NULL) {
|
||||
vec->cap = 0;
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
typedef struct EInfo_s {
|
||||
Token token;
|
||||
size_t line;
|
||||
size_t line_start;
|
||||
Token token;
|
||||
size_t line;
|
||||
size_t line_start;
|
||||
} EInfo;
|
||||
|
|
38
src/error.h
38
src/error.h
|
@ -1,23 +1,23 @@
|
|||
typedef enum AsmError_e {
|
||||
ErrOk = 0,
|
||||
ErrBadRegister,
|
||||
ErrImmediateOverflow,
|
||||
ErrInvalidToken,
|
||||
ErrBadArgumentMeta,
|
||||
ErrNeedCommaAfterArgument,
|
||||
ErrLabelImmediate,
|
||||
ErrNumberImmediate,
|
||||
ErrBadNumOverflow,
|
||||
ErrBadNumDigit,
|
||||
ErrBadNumNoDigit,
|
||||
ErrLabelAfterLabel,
|
||||
ErrOutOfMemory,
|
||||
ErrDuplicateLabel,
|
||||
ErrTrailingLine,
|
||||
ErrNeedDirectiveAfterDot,
|
||||
ErrDirectiveNotImplemented,
|
||||
ErrUnexpectedToken,
|
||||
ErrTriedNegateNonNumber,
|
||||
ErrOk = 0,
|
||||
ErrBadRegister,
|
||||
ErrImmediateOverflow,
|
||||
ErrInvalidToken,
|
||||
ErrBadArgumentMeta,
|
||||
ErrNeedCommaAfterArgument,
|
||||
ErrLabelImmediate,
|
||||
ErrNumberImmediate,
|
||||
ErrBadNumOverflow,
|
||||
ErrBadNumDigit,
|
||||
ErrBadNumNoDigit,
|
||||
ErrLabelAfterLabel,
|
||||
ErrOutOfMemory,
|
||||
ErrDuplicateLabel,
|
||||
ErrTrailingLine,
|
||||
ErrNeedDirectiveAfterDot,
|
||||
ErrDirectiveNotImplemented,
|
||||
ErrUnexpectedToken,
|
||||
ErrTriedNegateNonNumber,
|
||||
} AsmError;
|
||||
char *ERRORS[] = {
|
||||
"Success",
|
||||
|
|
84
src/hash.c
84
src/hash.c
|
@ -1,57 +1,57 @@
|
|||
// Instruction Hash table, for faster lookups
|
||||
typedef struct InstHtNode_s {
|
||||
uint8_t index1;
|
||||
uint8_t index2;
|
||||
uint8_t index1;
|
||||
uint8_t index2;
|
||||
} InstHtNode;
|
||||
typedef InstHtNode *InstHt;
|
||||
|
||||
uint32_t inst_hash(const char *s, size_t len) {
|
||||
uint32_t hash = 0;
|
||||
uint32_t mul = 75;
|
||||
for (size_t ii = 0; ii < len; ii += 1) {
|
||||
hash ^= s[ii] * mul;
|
||||
hash *= mul;
|
||||
}
|
||||
return hash;
|
||||
uint32_t hash = 0;
|
||||
uint32_t mul = 75;
|
||||
for (size_t ii = 0; ii < len; ii += 1) {
|
||||
hash ^= s[ii] * mul;
|
||||
hash *= mul;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
InstHt build_lookup(void) {
|
||||
const size_t size = 256;
|
||||
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
|
||||
if (table == NULL) {
|
||||
return table;
|
||||
}
|
||||
for (size_t ii = 0; ii < size; ii += 1) {
|
||||
table[ii] = (InstHtNode){0xff, 0xff};
|
||||
}
|
||||
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
||||
const char *mnemonic = INST[ii].mnemonic;
|
||||
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
|
||||
InstHtNode *node = &table[hash & 0xff];
|
||||
if (node->index1 == 0xff) {
|
||||
node->index1 = ii;
|
||||
} else if (node->index2 == 0xff) {
|
||||
node->index2 = ii;
|
||||
} else {
|
||||
fprintf(stderr, "more than 1 collision in hash table\n");
|
||||
exit(1);
|
||||
const size_t size = 256;
|
||||
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
|
||||
if (table == NULL) {
|
||||
return table;
|
||||
}
|
||||
}
|
||||
return table;
|
||||
for (size_t ii = 0; ii < size; ii += 1) {
|
||||
table[ii] = (InstHtNode){0xff, 0xff};
|
||||
}
|
||||
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
||||
const char *mnemonic = INST[ii].mnemonic;
|
||||
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
|
||||
InstHtNode *node = &table[hash & 0xff];
|
||||
if (node->index1 == 0xff) {
|
||||
node->index1 = ii;
|
||||
} else if (node->index2 == 0xff) {
|
||||
node->index2 = ii;
|
||||
} else {
|
||||
fprintf(stderr, "more than 1 collision in hash table\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
size_t inst_lookup(InstHt ht, const char *s, size_t len) {
|
||||
uint32_t hash = inst_hash(s, len);
|
||||
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
|
||||
for (size_t ii = 0; ii < 2; ii += 1) {
|
||||
size_t idx = (size_t)node[ii];
|
||||
if (idx == 0xff) {
|
||||
break;
|
||||
uint32_t hash = inst_hash(s, len);
|
||||
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
|
||||
for (size_t ii = 0; ii < 2; ii += 1) {
|
||||
size_t idx = (size_t)node[ii];
|
||||
if (idx == 0xff) {
|
||||
break;
|
||||
}
|
||||
const char *mnemonic = INST[idx].mnemonic;
|
||||
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
const char *mnemonic = INST[idx].mnemonic;
|
||||
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
|
||||
return idx;
|
||||
}
|
||||
}
|
||||
return INVALID;
|
||||
return INVALID;
|
||||
}
|
||||
|
|
627
src/hbas.c
627
src/hbas.c
|
@ -40,376 +40,379 @@ SOFTWARE.
|
|||
#include "einfo.h"
|
||||
|
||||
void hd(char *data, size_t len) {
|
||||
for (size_t ii = 0; ii < len; ii += 1) {
|
||||
if (ii > 0 && (ii & 15) == 0) {
|
||||
printf("\n");
|
||||
for (size_t ii = 0; ii < len; ii += 1) {
|
||||
if (ii > 0 && (ii & 15) == 0) {
|
||||
printf("\n");
|
||||
}
|
||||
printf("%02x", (uint8_t)data[ii]);
|
||||
}
|
||||
printf("%02x", (uint8_t)data[ii]);
|
||||
}
|
||||
printf("\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#define MIN_SIZE 4096
|
||||
|
||||
int slurp(FILE *fd, ByteVec *out) {
|
||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||
size_t bread = 1;
|
||||
int err = 0;
|
||||
if (rv.buf == NULL) {
|
||||
rv.cap = 0;
|
||||
err = ErrOutOfMemory;
|
||||
bread = 0;
|
||||
}
|
||||
while (bread > 0) {
|
||||
if (ensure_push(&rv, 1, 1) != 0) {
|
||||
err = ErrOutOfMemory;
|
||||
break;
|
||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||
size_t bread = 1;
|
||||
int err = 0;
|
||||
if (rv.buf == NULL) {
|
||||
rv.cap = 0;
|
||||
err = ErrOutOfMemory;
|
||||
bread = 0;
|
||||
}
|
||||
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
|
||||
rv.len += bread;
|
||||
}
|
||||
*out = rv;
|
||||
if (err == 0) {
|
||||
err = ferror(fd);
|
||||
}
|
||||
return err;
|
||||
while (bread > 0) {
|
||||
if (ensure_push(&rv, 1, 1) != 0) {
|
||||
err = ErrOutOfMemory;
|
||||
break;
|
||||
}
|
||||
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
|
||||
rv.len += bread;
|
||||
}
|
||||
*out = rv;
|
||||
if (err == 0) {
|
||||
err = ferror(fd);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
typedef struct Hole_s {
|
||||
size_t location;
|
||||
size_t origin;
|
||||
char *str;
|
||||
size_t len;
|
||||
size_t size;
|
||||
size_t location;
|
||||
size_t origin;
|
||||
char *str;
|
||||
size_t len;
|
||||
size_t size;
|
||||
} Hole;
|
||||
typedef struct HoleVec_s {
|
||||
Hole *buf;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
Hole *buf;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
} HoleVec;
|
||||
typedef struct Label_s {
|
||||
size_t location;
|
||||
char *str;
|
||||
size_t len;
|
||||
size_t location;
|
||||
char *str;
|
||||
size_t len;
|
||||
} Label;
|
||||
typedef struct LabelVec_s {
|
||||
Label *buf;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
Label *buf;
|
||||
size_t cap;
|
||||
size_t len;
|
||||
} LabelVec;
|
||||
|
||||
size_t label_lookup(LabelVec *labels, char *name, size_t len) {
|
||||
size_t nlabels = labels->len;
|
||||
Label *buf = labels->buf;
|
||||
for (size_t ii = 0; ii < nlabels; ii += 1) {
|
||||
if (len == buf->len && strncmp(buf->str, name, len) == 0) {
|
||||
return ii;
|
||||
size_t nlabels = labels->len;
|
||||
Label *buf = labels->buf;
|
||||
for (size_t ii = 0; ii < nlabels; ii += 1) {
|
||||
if (len == buf->len && strncmp(buf->str, name, len) == 0) {
|
||||
return ii;
|
||||
}
|
||||
buf += 1;
|
||||
}
|
||||
buf += 1;
|
||||
}
|
||||
return INVALID;
|
||||
return INVALID;
|
||||
}
|
||||
|
||||
// safety: assumes the buffer has enough place for specified integer size.
|
||||
// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
|
||||
// and bit `2` indicates that value accepts an unsigned int.
|
||||
AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
|
||||
// Unsigned integers must have all upper bits set to zero. To check this,
|
||||
// we shift the value right by the integer size and verify it equals zero.
|
||||
int valid_uint = (val >> (size * 8)) == 0;
|
||||
// Unsigned integers must have all upper bits set to zero. To check this,
|
||||
// we shift the value right by the integer size and verify it equals zero.
|
||||
int valid_uint = (val >> (size * 8)) == 0;
|
||||
|
||||
// For signed integers, the sign-extended high bits must match the sign bit.
|
||||
// By shifting right by one less than the total bit size (size * 8 - 1),
|
||||
// we isolate the sign bit and any sign-extended bits. For a value fitting
|
||||
// in the signed range, this operation results in either 0 (for non-negative
|
||||
// values) or -1 (for negative values due to sign extension).
|
||||
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
|
||||
// For signed integers, the sign-extended high bits must match the sign bit.
|
||||
// By shifting right by one less than the total bit size (size * 8 - 1),
|
||||
// we isolate the sign bit and any sign-extended bits. For a value fitting
|
||||
// in the signed range, this operation results in either 0 (for non-negative
|
||||
// values) or -1 (for negative values due to sign extension).
|
||||
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
|
||||
|
||||
// To unify the check for both positive and negative cases, we adjust
|
||||
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
|
||||
// enabling a single check for 0 to validate both cases. This adjustment
|
||||
// simplifies the validation logic, allowing us to use a single condition to
|
||||
// check for proper sign extension or zero extension in the original value.
|
||||
int_shifted += int_shifted != 0;
|
||||
// To unify the check for both positive and negative cases, we adjust
|
||||
// non-zero values (-1) by incrementing by 1. This turns -1 into 0,
|
||||
// enabling a single check for 0 to validate both cases. This adjustment
|
||||
// simplifies the validation logic, allowing us to use a single condition to
|
||||
// check for proper sign extension or zero extension in the original value.
|
||||
int_shifted += int_shifted != 0;
|
||||
|
||||
// A valid signed integer will have `int_shifted` equal to 0
|
||||
// after adjustment, indicating proper sign extension.
|
||||
int valid_int = int_shifted == 0;
|
||||
// A valid signed integer will have `int_shifted` equal to 0
|
||||
// after adjustment, indicating proper sign extension.
|
||||
int valid_int = int_shifted == 0;
|
||||
|
||||
// Validity bitmask to represents whether the value
|
||||
// fits as signed, unsigned, or both.
|
||||
int validity = valid_int | (valid_uint << 1);
|
||||
// Validity bitmask to represents whether the value
|
||||
// fits as signed, unsigned, or both.
|
||||
int validity = valid_int | (valid_uint << 1);
|
||||
|
||||
// If the value's validity doesn't match the `sign` requirements,
|
||||
// we report an overflow.
|
||||
if ((validity & sign) == 0) {
|
||||
return ErrImmediateOverflow;
|
||||
}
|
||||
// If the value's validity doesn't match the `sign` requirements,
|
||||
// we report an overflow.
|
||||
if ((validity & sign) == 0) {
|
||||
return ErrImmediateOverflow;
|
||||
}
|
||||
|
||||
// Write out the bytes of the integer to the buffer in little-endian order,
|
||||
// starting with the lowest byte first.
|
||||
for (size_t ii = 0; ii < size; ii += 1) {
|
||||
buf[ii] = val & 0xff;
|
||||
val >>= 8;
|
||||
}
|
||||
return ErrOk;
|
||||
// Write out the bytes of the integer to the buffer in little-endian order,
|
||||
// starting with the lowest byte first.
|
||||
for (size_t ii = 0; ii < size; ii += 1) {
|
||||
buf[ii] = val & 0xff;
|
||||
val >>= 8;
|
||||
}
|
||||
return ErrOk;
|
||||
}
|
||||
|
||||
AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
|
||||
ByteVec *rv, HoleVec *holes) {
|
||||
const InstDesc *inst;
|
||||
const char *type_str;
|
||||
size_t nargs;
|
||||
size_t size;
|
||||
size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
|
||||
size_t inst_start = rv->len;
|
||||
if (idx == INVALID) {
|
||||
return ErrInvalidToken;
|
||||
}
|
||||
inst = &INST[idx];
|
||||
type_str = TYPE_STR[inst->type];
|
||||
nargs = strlen(type_str);
|
||||
size = 1;
|
||||
for (size_t ii = 0; ii < nargs; ii += 1) {
|
||||
char chr = type_str[ii];
|
||||
ArgMeta meta = arg_meta(chr);
|
||||
if (meta.chr == 0) {
|
||||
return ErrBadArgumentMeta;
|
||||
const InstDesc *inst;
|
||||
const char *type_str;
|
||||
size_t nargs;
|
||||
size_t size;
|
||||
size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
|
||||
size_t inst_start = rv->len;
|
||||
if (idx == INVALID) {
|
||||
return ErrInvalidToken;
|
||||
}
|
||||
size += meta.size;
|
||||
}
|
||||
if (ensure_push(rv, 1, size) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
rv->buf[rv->len] = inst->opcode;
|
||||
rv->len += 1;
|
||||
for (size_t ii = 0; ii < nargs; ii += 1) {
|
||||
if (ii > 0) {
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind != TokComma) {
|
||||
return ErrNeedCommaAfterArgument;
|
||||
}
|
||||
inst = &INST[idx];
|
||||
type_str = TYPE_STR[inst->type];
|
||||
nargs = strlen(type_str);
|
||||
size = 1;
|
||||
for (size_t ii = 0; ii < nargs; ii += 1) {
|
||||
char chr = type_str[ii];
|
||||
ArgMeta meta = arg_meta(chr);
|
||||
if (meta.chr == 0) {
|
||||
return ErrBadArgumentMeta;
|
||||
}
|
||||
size += meta.size;
|
||||
}
|
||||
char chr = type_str[ii];
|
||||
ArgMeta meta = arg_meta(chr);
|
||||
uint64_t is_negative = 0;
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind == TokNeg) {
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind != TokNumber) {
|
||||
return ErrTriedNegateNonNumber;
|
||||
}
|
||||
is_negative -= 1;
|
||||
if (ensure_push(rv, 1, size) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
if (chr == 'R') {
|
||||
int reg = parse_register(&input[tok->start], tok->len);
|
||||
if (reg > 255) {
|
||||
return ErrBadRegister;
|
||||
}
|
||||
rv->buf[rv->len] = (char)(reg & 0xff);
|
||||
rv->len += 1;
|
||||
} else {
|
||||
uint64_t num_to_write;
|
||||
if (meta.rel == 1 || meta.size == 8) {
|
||||
if (tok->kind == TokIdent) {
|
||||
if (ensure_push((ByteVec*)holes, sizeof(Hole), 1) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
holes->buf[holes->len] = (Hole) {
|
||||
.location = rv->len,
|
||||
.origin = inst_start,
|
||||
.str = &input[tok->start],
|
||||
.len = tok->len,
|
||||
.size = (size_t)meta.size,
|
||||
};
|
||||
holes->len += 1;
|
||||
num_to_write = 0;
|
||||
} else if (tok->kind == TokNumber) {
|
||||
num_to_write = tok->num;
|
||||
rv->buf[rv->len] = inst->opcode;
|
||||
rv->len += 1;
|
||||
for (size_t ii = 0; ii < nargs; ii += 1) {
|
||||
if (ii > 0) {
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind != TokComma) {
|
||||
return ErrNeedCommaAfterArgument;
|
||||
}
|
||||
}
|
||||
char chr = type_str[ii];
|
||||
ArgMeta meta = arg_meta(chr);
|
||||
uint64_t is_negative = 0;
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind == TokNeg) {
|
||||
*tok = token(input, len, tok->start + tok->len);
|
||||
if (tok->kind != TokNumber) {
|
||||
return ErrTriedNegateNonNumber;
|
||||
}
|
||||
is_negative -= 1;
|
||||
}
|
||||
if (chr == 'R') {
|
||||
int reg = parse_register(&input[tok->start], tok->len);
|
||||
if (reg > 255) {
|
||||
return ErrBadRegister;
|
||||
}
|
||||
rv->buf[rv->len] = (char)(reg & 0xff);
|
||||
rv->len += 1;
|
||||
} else {
|
||||
return ErrLabelImmediate;
|
||||
uint64_t num_to_write;
|
||||
if (meta.rel == 1 || meta.size == 8) {
|
||||
if (tok->kind == TokIdent) {
|
||||
if (ensure_push((ByteVec *)holes, sizeof(Hole), 1) != 0) {
|
||||
return ErrOutOfMemory;
|
||||
}
|
||||
holes->buf[holes->len] = (Hole){
|
||||
.location = rv->len,
|
||||
.origin = inst_start,
|
||||
.str = &input[tok->start],
|
||||
.len = tok->len,
|
||||
.size = (size_t)meta.size,
|
||||
};
|
||||
holes->len += 1;
|
||||
num_to_write = 0;
|
||||
} else if (tok->kind == TokNumber) {
|
||||
num_to_write = tok->num;
|
||||
} else {
|
||||
return ErrLabelImmediate;
|
||||
}
|
||||
} else if (tok->kind == TokNumber) {
|
||||
num_to_write = tok->num;
|
||||
} else {
|
||||
return ErrNumberImmediate;
|
||||
}
|
||||
// num_to_write = num_to_write ^ is_negative - is_negative;
|
||||
if (is_negative) {
|
||||
int64_t tmp = -(int64_t)num_to_write;
|
||||
if (tmp > 0) {
|
||||
return ErrBadNumOverflow;
|
||||
}
|
||||
num_to_write = (uint64_t)tmp;
|
||||
}
|
||||
AsmError err = push_int_le(&rv->buf[rv->len], num_to_write,
|
||||
meta.size, meta.sign);
|
||||
if (err != ErrOk) {
|
||||
return err;
|
||||
}
|
||||
rv->len += meta.size;
|
||||
}
|
||||
} else if (tok->kind == TokNumber) {
|
||||
num_to_write = tok->num;
|
||||
} else {
|
||||
return ErrNumberImmediate;
|
||||
}
|
||||
// num_to_write = num_to_write ^ is_negative - is_negative;
|
||||
if (is_negative) {
|
||||
int64_t tmp = -(int64_t)num_to_write;
|
||||
if (tmp > 0) {
|
||||
return ErrBadNumOverflow;
|
||||
}
|
||||
num_to_write = (uint64_t)tmp;
|
||||
}
|
||||
AsmError err =
|
||||
push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign);
|
||||
if (err != ErrOk) {
|
||||
return err;
|
||||
}
|
||||
rv->len += meta.size;
|
||||
}
|
||||
}
|
||||
|
||||
return ErrOk;
|
||||
return ErrOk;
|
||||
}
|
||||
|
||||
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) {
|
||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
|
||||
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
|
||||
size_t line = 0;
|
||||
size_t line_start = 0;
|
||||
size_t pos = 0;
|
||||
// init=0, label=1, instruction=2, comment=3, newline -> 0
|
||||
size_t line_state = 0;
|
||||
AsmError err = ErrOk;
|
||||
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
|
||||
EInfo *einfo) {
|
||||
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
|
||||
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
|
||||
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
|
||||
size_t line = 0;
|
||||
size_t line_start = 0;
|
||||
size_t pos = 0;
|
||||
// init=0, label=1, instruction=2, comment=3, newline -> 0
|
||||
size_t line_state = 0;
|
||||
AsmError err = ErrOk;
|
||||
|
||||
while (1) {
|
||||
Token tok = token(input, len, pos);
|
||||
einfo->token = tok;
|
||||
pos = tok.start + tok.len;
|
||||
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
|
||||
if (tok.num) {
|
||||
err = (AsmError)tok.num;
|
||||
} else {
|
||||
err = ErrInvalidToken;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (tok.kind == TokEOF) {
|
||||
break;
|
||||
}
|
||||
if (tok.kind == TokComment) {
|
||||
line_state = 3;
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokNewline) {
|
||||
line += 1;
|
||||
line_start = tok.start + tok.len;
|
||||
line_state = 0;
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokDot) {
|
||||
Token next = token(input, len, pos);
|
||||
if (next.kind == TokIdent) {
|
||||
err = ErrDirectiveNotImplemented;
|
||||
goto end;
|
||||
} else {
|
||||
err = ErrNeedDirectiveAfterDot;
|
||||
goto end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokIdent) {
|
||||
Token next = token(input, len, pos);
|
||||
if (next.kind == TokColon) {
|
||||
// Label
|
||||
pos = next.start + next.len;
|
||||
if (line_state >= 1) {
|
||||
err = ErrLabelAfterLabel;
|
||||
einfo->token = next;
|
||||
goto end;
|
||||
}
|
||||
line_state = 1;
|
||||
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
|
||||
err = ErrOutOfMemory;
|
||||
goto end;
|
||||
}
|
||||
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
|
||||
if (idx != INVALID) {
|
||||
err = ErrDuplicateLabel;
|
||||
goto end;
|
||||
}
|
||||
labels.buf[labels.len] = (Label){
|
||||
.location = rv.len,
|
||||
.str = &input[tok.start],
|
||||
.len = tok.len,
|
||||
};
|
||||
labels.len += 1;
|
||||
} else {
|
||||
// Instruction
|
||||
if (line_state >= 2) {
|
||||
err = ErrTrailingLine;
|
||||
goto end;
|
||||
}
|
||||
line_state = 2;
|
||||
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
|
||||
while (1) {
|
||||
Token tok = token(input, len, pos);
|
||||
einfo->token = tok;
|
||||
pos = tok.start + tok.len;
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
|
||||
if (tok.num) {
|
||||
err = (AsmError)tok.num;
|
||||
} else {
|
||||
err = ErrInvalidToken;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
if (tok.kind == TokEOF) {
|
||||
break;
|
||||
}
|
||||
if (tok.kind == TokComment) {
|
||||
line_state = 3;
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokNewline) {
|
||||
line += 1;
|
||||
line_start = tok.start + tok.len;
|
||||
line_state = 0;
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokDot) {
|
||||
Token next = token(input, len, pos);
|
||||
if (next.kind == TokIdent) {
|
||||
err = ErrDirectiveNotImplemented;
|
||||
goto end;
|
||||
} else {
|
||||
err = ErrNeedDirectiveAfterDot;
|
||||
goto end;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (tok.kind == TokIdent) {
|
||||
Token next = token(input, len, pos);
|
||||
if (next.kind == TokColon) {
|
||||
// Label
|
||||
pos = next.start + next.len;
|
||||
if (line_state >= 1) {
|
||||
err = ErrLabelAfterLabel;
|
||||
einfo->token = next;
|
||||
goto end;
|
||||
}
|
||||
line_state = 1;
|
||||
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
|
||||
err = ErrOutOfMemory;
|
||||
goto end;
|
||||
}
|
||||
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
|
||||
if (idx != INVALID) {
|
||||
err = ErrDuplicateLabel;
|
||||
goto end;
|
||||
}
|
||||
labels.buf[labels.len] = (Label){
|
||||
.location = rv.len,
|
||||
.str = &input[tok.start],
|
||||
.len = tok.len,
|
||||
};
|
||||
labels.len += 1;
|
||||
} else {
|
||||
// Instruction
|
||||
if (line_state >= 2) {
|
||||
err = ErrTrailingLine;
|
||||
goto end;
|
||||
}
|
||||
line_state = 2;
|
||||
err = assemble_instr(ht, input, len, &tok, &rv, &holes);
|
||||
pos = tok.start + tok.len;
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
err = ErrUnexpectedToken;
|
||||
goto end;
|
||||
}
|
||||
err = ErrUnexpectedToken;
|
||||
goto end;
|
||||
}
|
||||
|
||||
for (size_t ii = 0; ii < holes.len; ii += 1) {
|
||||
Hole *hole = &holes.buf[ii];
|
||||
size_t idx = label_lookup(&labels, hole->str, hole->len);
|
||||
uint64_t num_to_write = labels.buf[idx].location;
|
||||
uint8_t sign = 2;
|
||||
if (hole->size != 8) {
|
||||
sign = 1;
|
||||
num_to_write -= hole->origin;
|
||||
for (size_t ii = 0; ii < holes.len; ii += 1) {
|
||||
Hole *hole = &holes.buf[ii];
|
||||
size_t idx = label_lookup(&labels, hole->str, hole->len);
|
||||
uint64_t num_to_write = labels.buf[idx].location;
|
||||
uint8_t sign = 2;
|
||||
if (hole->size != 8) {
|
||||
sign = 1;
|
||||
num_to_write -= hole->origin;
|
||||
}
|
||||
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size,
|
||||
sign);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign);
|
||||
if (err != 0) {
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
end:
|
||||
free(holes.buf);
|
||||
free(labels.buf);
|
||||
*out = rv;
|
||||
einfo->line = line + 1;
|
||||
einfo->line_start = line_start;
|
||||
return err;
|
||||
free(holes.buf);
|
||||
free(labels.buf);
|
||||
*out = rv;
|
||||
einfo->line = line + 1;
|
||||
einfo->line_start = line_start;
|
||||
return err;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int hex_out = 0;
|
||||
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
|
||||
hex_out = 1;
|
||||
}
|
||||
int hex_out = 0;
|
||||
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
|
||||
hex_out = 1;
|
||||
}
|
||||
|
||||
int err = 0;
|
||||
InstHt ht = NULL;
|
||||
ByteVec input;
|
||||
int err = 0;
|
||||
InstHt ht = NULL;
|
||||
ByteVec input;
|
||||
|
||||
err = slurp(stdin, &input);
|
||||
if (err != 0) {
|
||||
fprintf(stderr, "failed to read the file: %d\n", err);
|
||||
goto done;
|
||||
}
|
||||
ht = build_lookup();
|
||||
if (ht == NULL) {
|
||||
err = ErrOutOfMemory;
|
||||
fprintf(stderr, "failed to init hash table: %d\n", err);
|
||||
goto done;
|
||||
}
|
||||
err = slurp(stdin, &input);
|
||||
if (err != 0) {
|
||||
fprintf(stderr, "failed to read the file: %d\n", err);
|
||||
goto done;
|
||||
}
|
||||
ht = build_lookup();
|
||||
if (ht == NULL) {
|
||||
err = ErrOutOfMemory;
|
||||
fprintf(stderr, "failed to init hash table: %d\n", err);
|
||||
goto done;
|
||||
}
|
||||
|
||||
ByteVec out;
|
||||
EInfo einfo;
|
||||
err = assemble(ht, input.buf, input.len, &out, &einfo);
|
||||
if (err != 0) {
|
||||
size_t column = einfo.token.start - einfo.line_start + 1;
|
||||
fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
|
||||
ERRORS[err], einfo.line, column, (int)einfo.token.len,
|
||||
&input.buf[einfo.token.start]);
|
||||
goto done;
|
||||
}
|
||||
if (hex_out) {
|
||||
hd(out.buf, out.len);
|
||||
} else {
|
||||
fwrite(out.buf, 1, out.len, stdout);
|
||||
}
|
||||
ByteVec out;
|
||||
EInfo einfo;
|
||||
err = assemble(ht, input.buf, input.len, &out, &einfo);
|
||||
if (err != 0) {
|
||||
size_t column = einfo.token.start - einfo.line_start + 1;
|
||||
fprintf(stderr,
|
||||
"failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
|
||||
ERRORS[err], einfo.line, column, (int)einfo.token.len,
|
||||
&input.buf[einfo.token.start]);
|
||||
goto done;
|
||||
}
|
||||
if (hex_out) {
|
||||
hd(out.buf, out.len);
|
||||
} else {
|
||||
fwrite(out.buf, 1, out.len, stdout);
|
||||
}
|
||||
|
||||
done:
|
||||
free(ht);
|
||||
free(input.buf);
|
||||
free(out.buf);
|
||||
return err;
|
||||
free(ht);
|
||||
free(input.buf);
|
||||
free(out.buf);
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
typedef struct InstDesc_s {
|
||||
char *mnemonic;
|
||||
unsigned char opcode;
|
||||
Operands type;
|
||||
char *mnemonic;
|
||||
unsigned char opcode;
|
||||
Operands type;
|
||||
} InstDesc;
|
||||
|
||||
const InstDesc INST[] = {
|
||||
|
@ -68,11 +68,11 @@ const InstDesc INST[] = {
|
|||
|
||||
const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]);
|
||||
size_t inst_find(const char *mnemonic, size_t len) {
|
||||
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
||||
const char *entry = INST[ii].mnemonic;
|
||||
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
|
||||
return ii;
|
||||
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
|
||||
const char *entry = INST[ii].mnemonic;
|
||||
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
|
||||
return ii;
|
||||
}
|
||||
}
|
||||
}
|
||||
return INVALID;
|
||||
return INVALID;
|
||||
}
|
||||
|
|
|
@ -1,23 +1,23 @@
|
|||
int parse_register(char *name, size_t len) {
|
||||
if (name[0] != 'r') {
|
||||
return 256; // Register name should start with 'r'
|
||||
}
|
||||
if (len > 4) {
|
||||
return 256; // Register name too long
|
||||
}
|
||||
uint16_t rv = 0;
|
||||
if (len > 2 && name[1] == '0') {
|
||||
return 256; // Extra zero suffix
|
||||
}
|
||||
for (size_t ii = 1; ii < len; ii += 1) {
|
||||
char chr = name[ii];
|
||||
if (!(chr >= '0' && chr <= '9')) {
|
||||
return 256; // Register name must only contain numbers
|
||||
if (name[0] != 'r') {
|
||||
return 256; // Register name should start with 'r'
|
||||
}
|
||||
rv = rv * 10 + (chr - '0');
|
||||
}
|
||||
if (rv > 255) {
|
||||
return 256; // Register number too large
|
||||
}
|
||||
return (int)rv;
|
||||
if (len > 4) {
|
||||
return 256; // Register name too long
|
||||
}
|
||||
uint16_t rv = 0;
|
||||
if (len > 2 && name[1] == '0') {
|
||||
return 256; // Extra zero suffix
|
||||
}
|
||||
for (size_t ii = 1; ii < len; ii += 1) {
|
||||
char chr = name[ii];
|
||||
if (!(chr >= '0' && chr <= '9')) {
|
||||
return 256; // Register name must only contain numbers
|
||||
}
|
||||
rv = rv * 10 + (chr - '0');
|
||||
}
|
||||
if (rv > 255) {
|
||||
return 256; // Register number too large
|
||||
}
|
||||
return (int)rv;
|
||||
}
|
||||
|
|
|
@ -23,7 +23,8 @@ Token token_ident(char *input, size_t len, size_t pos) {
|
|||
while (pos < len) {
|
||||
char chr = input[pos];
|
||||
char chru = chr & ~0x20;
|
||||
int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z');
|
||||
int good = chr == '_' || (chr >= '0' && chr <= '9') ||
|
||||
(chru >= 'A' && chru <= 'Z');
|
||||
if (!good) {
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue