Addressing the haters(comments)

This commit is contained in:
able 2024-03-11 05:15:06 -05:00
parent 78b9b99ab3
commit 2575e7fced
9 changed files with 560 additions and 773 deletions

View file

@ -1,41 +1,66 @@
typedef struct ArgMeta_s typedef struct ArgMeta_s {
{ char chr;
char chr; uint8_t size;
uint8_t size; // This is a bitset of acceptable overflow states,
// This is a bitset of acceptable overflow states, // where accept signed = 1, accept unsigned = 2.
// where accept signed = 1, accept unsigned = 2. // 1 -> signed, 2 -> unsigned, 3 -> whatever
// 1 -> signed, 2 -> unsigned, 3 -> whatever uint8_t sign;
uint8_t sign; uint8_t rel;
uint8_t rel;
} ArgMeta; } ArgMeta;
const ArgMeta ARGS[] = { const ArgMeta ARGS[] = {
{'R', 1, 2, 0}, {'R', 1, 2, 0}, {'1', 1, 3, 0}, {'b', 1, 1, 0}, {'B', 1, 2, 0},
{'1', 1, 3, 0}, {'2', 2, 3, 0}, {'o', 2, 1, 1}, {'h', 2, 1, 0}, {'H', 2, 2, 0},
{'b', 1, 1, 0}, {'4', 4, 3, 0}, {'w', 4, 1, 0}, {'O', 4, 1, 1}, {'W', 4, 2, 0},
{'B', 1, 2, 0}, {'8', 8, 3, 0}, {'d', 8, 1, 0}, {'D', 8, 2, 0}, {0},
{'2', 2, 3, 0},
{'o', 2, 1, 1},
{'h', 2, 1, 0},
{'H', 2, 2, 0},
{'4', 4, 3, 0},
{'w', 4, 1, 0},
{'O', 4, 1, 1},
{'W', 4, 2, 0},
{'8', 8, 3, 0},
{'d', 8, 1, 0},
{'D', 8, 2, 0},
{0},
}; };
typedef enum Operands_e {
Empty = 0,
R,
RR,
RRR,
RRRR,
Rx8,
Rx16,
Rx32,
Rx64,
RRx8,
RRx16,
RRx32,
RRx64,
RRs32,
RRs64,
RRu8,
RRu16,
RRu64,
r16,
r32,
RRr16,
RRr32,
RRr16u16,
RRr32u16,
RRu64u16,
} Operands;
// R -> register,
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
// b -> Si8, h -> Si16, w -> Si32, d -> Si64,
// B -> Ui8, H -> Ui16, W -> Ui32, D -> Ui64,
// o -> 16 bit relative offset,
// O -> 32 bit relative offset,
const char *TYPE_STR[] = {
"", "R", "RR", "RRR", "RRRR", "R1", "R2", "R4", "R8",
"RR1", "RR2", "RR4", "RR8", "RRw", "RRd", "RRB", "RRH", "RRD",
"o", "O", "RRo", "RRO", "RRoH", "RROH", "RRDH",
};
const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]); const size_t NARGS = sizeof(ARGS) / sizeof(ARGS[0]);
ArgMeta arg_meta(char arg) ArgMeta arg_meta(char arg) {
{ for (size_t ii = 0; ii < NARGS; ii += 1) {
for (size_t ii = 0; ii < NARGS; ii += 1) ArgMeta meta = ARGS[ii];
{ if (meta.chr == arg) {
ArgMeta meta = ARGS[ii]; return meta;
if (meta.chr == arg)
{
return meta;
}
} }
return ARGS[NARGS - 1]; }
return ARGS[NARGS - 1];
} }

View file

@ -1,35 +1,29 @@
const size_t INVALID = ~(size_t)0;
typedef struct ByteVec_s typedef struct ByteVec_s {
{ char *buf;
char *buf; size_t cap;
size_t cap; size_t len;
size_t len;
} ByteVec; } ByteVec;
AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) AsmError ensure_push(ByteVec *vec, size_t el_size, size_t extra) {
{ if (vec->len + extra < vec->len) {
if (vec->len + extra < vec->len) return ErrOutOfMemory;
{ }
return ErrOutOfMemory; while (vec->len + extra > vec->cap) {
if ((~(size_t)0) / 2 < vec->cap) {
return ErrOutOfMemory;
} }
while (vec->len + extra > vec->cap) vec->cap *= 2;
{ // multiply overflow
if ((~(size_t)0) / 2 < vec->cap) if ((~(size_t)0) / el_size < vec->cap) {
{ return ErrOutOfMemory;
return ErrOutOfMemory;
}
vec->cap *= 2;
// multiply overflow
if ((~(size_t)0) / el_size < vec->cap)
{
return ErrOutOfMemory;
}
vec->buf = realloc(vec->buf, el_size * vec->cap);
if (vec->buf == NULL)
{
vec->cap = 0;
return ErrOutOfMemory;
}
} }
return 0; vec->buf = realloc(vec->buf, el_size * vec->cap);
if (vec->buf == NULL) {
vec->cap = 0;
return ErrOutOfMemory;
}
}
return 0;
} }

View file

@ -1,6 +1,5 @@
typedef struct EInfo_s typedef struct EInfo_s {
{ Token token;
Token token; size_t line;
size_t line; size_t line_start;
size_t line_start;
} EInfo; } EInfo;

View file

@ -1,23 +1,22 @@
typedef enum AsmError_e typedef enum AsmError_e {
{ ErrOk = 0,
ErrOk = 0, ErrBadRegister,
ErrBadRegister, ErrImmediateOverflow,
ErrImmediateOverflow, ErrInvalidToken,
ErrInvalidToken, ErrBadArgumentMeta,
ErrBadArgumentMeta, ErrNeedCommaAfterArgument,
ErrNeedCommaAfterArgument, ErrLabelImmediate,
ErrLabelImmediate, ErrNumberImmediate,
ErrNumberImmediate, ErrBadNumOverflow,
ErrBadNumOverflow, ErrBadNumDigit,
ErrBadNumDigit, ErrBadNumNoDigit,
ErrBadNumNoDigit, ErrLabelAfterLabel,
ErrLabelAfterLabel, ErrOutOfMemory,
ErrOutOfMemory, ErrDuplicateLabel,
ErrDuplicateLabel, ErrTrailingLine,
ErrTrailingLine, ErrNeedDirectiveAfterDot,
ErrNeedDirectiveAfterDot, ErrDirectiveNotImplemented,
ErrDirectiveNotImplemented, ErrUnexpectedToken,
ErrUnexpectedToken,
} AsmError; } AsmError;
char *ERRORS[] = { char *ERRORS[] = {
"Success", "Success",

View file

@ -1,73 +1,57 @@
// Instruction Hash table, for faster lookups // Instruction Hash table, for faster lookups
typedef struct InstHtNode_s typedef struct InstHtNode_s {
{ uint8_t index1;
uint8_t index1; uint8_t index2;
uint8_t index2;
} InstHtNode; } InstHtNode;
typedef InstHtNode *InstHt; typedef InstHtNode *InstHt;
uint32_t inst_hash(const char *s, size_t len) uint32_t inst_hash(const char *s, size_t len) {
{ uint32_t hash = 0;
uint32_t hash = 0; uint32_t mul = 75;
uint32_t mul = 75; for (size_t ii = 0; ii < len; ii += 1) {
for (size_t ii = 0; ii < len; ii += 1) hash ^= s[ii] * mul;
{ hash *= mul;
hash ^= s[ii] * mul; }
hash *= mul; return hash;
}
return hash;
} }
InstHt build_lookup(void) InstHt build_lookup(void) {
{ const size_t size = 256;
const size_t size = 256; InstHt table = (InstHt)malloc(size * sizeof(InstHtNode));
InstHt table = (InstHt)malloc(size * sizeof(InstHtNode)); if (table == NULL) {
if (table == NULL)
{
return table;
}
for (size_t ii = 0; ii < size; ii += 1)
{
table[ii] = (InstHtNode){0xff, 0xff};
}
for (size_t ii = 0; ii < INST_CNT; ii += 1)
{
const char *mnemonic = INST[ii].mnemonic;
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
InstHtNode *node = &table[hash & 0xff];
if (node->index1 == 0xff)
{
node->index1 = ii;
}
else if (node->index2 == 0xff)
{
node->index2 = ii;
}
else
{
fprintf(stderr, "more than 1 collision in hash table\n");
exit(1);
}
}
return table; return table;
}
for (size_t ii = 0; ii < size; ii += 1) {
table[ii] = (InstHtNode){0xff, 0xff};
}
for (size_t ii = 0; ii < INST_CNT; ii += 1) {
const char *mnemonic = INST[ii].mnemonic;
uint32_t hash = inst_hash(mnemonic, strlen(mnemonic));
InstHtNode *node = &table[hash & 0xff];
if (node->index1 == 0xff) {
node->index1 = ii;
} else if (node->index2 == 0xff) {
node->index2 = ii;
} else {
fprintf(stderr, "more than 1 collision in hash table\n");
exit(1);
}
}
return table;
} }
size_t inst_lookup(InstHt ht, const char *s, size_t len) size_t inst_lookup(InstHt ht, const char *s, size_t len) {
{ uint32_t hash = inst_hash(s, len);
uint32_t hash = inst_hash(s, len); uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)];
uint8_t *node = (uint8_t *)&ht[(size_t)(hash & 0xff)]; for (size_t ii = 0; ii < 2; ii += 1) {
for (size_t ii = 0; ii < 2; ii += 1) size_t idx = (size_t)node[ii];
{ if (idx == 0xff) {
size_t idx = (size_t)node[ii]; break;
if (idx == 0xff)
{
break;
}
const char *mnemonic = INST[idx].mnemonic;
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0)
{
return idx;
}
} }
return INVALID; const char *mnemonic = INST[idx].mnemonic;
if (strncmp(s, mnemonic, len) == 0 && mnemonic[len] == 0) {
return idx;
}
}
return INVALID;
} }

View file

@ -20,455 +20,374 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE. SOFTWARE.
*/ */
#include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdint.h>
#include "args.c"
#include "op.h"
#include "error.h" #include "error.h"
//
#include "bytevec.c"
//
#include "args.c"
#include "instructions.c" #include "instructions.c"
//
#include "hash.c" #include "hash.c"
//
#include "register.c" #include "register.c"
#include "token.c" #include "token.c"
//
#include "einfo.h" #include "einfo.h"
#include "bytevec.c"
void hd(char *data, size_t len) void hd(char *data, size_t len) {
{ for (size_t ii = 0; ii < len; ii += 1) {
for (size_t ii = 0; ii < len; ii += 1) if (ii > 0 && (ii & 15) == 0) {
{ printf("\n");
if (ii > 0 && (ii & 15) == 0)
{
printf("\n");
}
printf("%02x", (uint8_t)data[ii]);
} }
printf("\n"); printf("%02x", (uint8_t)data[ii]);
}
printf("\n");
} }
#define MIN_SIZE 4096 #define MIN_SIZE 4096
int slurp(FILE *fd, ByteVec *out) int slurp(FILE *fd, ByteVec *out) {
{ ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; size_t bread = 1;
size_t bread = 1; int err = 0;
int err = 0; if (rv.buf == NULL) {
if (rv.buf == NULL) rv.cap = 0;
{ err = ErrOutOfMemory;
rv.cap = 0; bread = 0;
err = ErrOutOfMemory; }
bread = 0; while (bread > 0) {
if (ensure_push(&rv, 1, 1) != 0) {
err = ErrOutOfMemory;
break;
} }
while (bread > 0) bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd);
{ rv.len += bread;
if (ensure_push(&rv, 1, 1) != 0) }
{ *out = rv;
err = ErrOutOfMemory; if (err == 0) {
break; err = ferror(fd);
} }
bread = fread(&rv.buf[rv.len], 1, rv.cap - rv.len, fd); return err;
rv.len += bread;
}
*out = rv;
if (err == 0)
{
err = ferror(fd);
}
return err;
} }
typedef struct Hole_s typedef struct Hole_s {
{ size_t location;
size_t location; size_t origin;
size_t origin; char *str;
char *str; size_t len;
size_t len; size_t size;
size_t size;
} Hole; } Hole;
typedef struct HoleVec_s typedef struct HoleVec_s {
{ Hole *buf;
Hole *buf; size_t cap;
size_t cap; size_t len;
size_t len;
} HoleVec; } HoleVec;
typedef struct Label_s typedef struct Label_s {
{ size_t location;
size_t location; char *str;
char *str; size_t len;
size_t len;
} Label; } Label;
typedef struct LabelVec_s typedef struct LabelVec_s {
{ Label *buf;
Label *buf; size_t cap;
size_t cap; size_t len;
size_t len;
} LabelVec; } LabelVec;
size_t label_lookup(LabelVec *labels, char *name, size_t len) size_t label_lookup(LabelVec *labels, char *name, size_t len) {
{ size_t nlabels = labels->len;
size_t nlabels = labels->len; Label *buf = labels->buf;
Label *buf = labels->buf; for (size_t ii = 0; ii < nlabels; ii += 1) {
for (size_t ii = 0; ii < nlabels; ii += 1) if (len == buf->len && strncmp(buf->str, name, len) == 0) {
{ return ii;
if (len == buf->len && strncmp(buf->str, name, len) == 0)
{
return ii;
}
buf += 1;
} }
return INVALID; buf += 1;
}
return INVALID;
} }
// safety: assumes the buffer has enough place for specified integer size // safety: assumes the buffer has enough place for specified integer size
AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) AsmError push_int_le(char *buf, uint64_t val, size_t size, uint8_t sign) {
{ int valid_uint = val >> (size * 8) == 0;
int valid_uint = val >> (size * 8) == 0; int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1); int valid_int = int_shifted == 0 || (~int_shifted) == 0;
int valid_int = int_shifted == 0 || (~int_shifted) == 0; // Note: this assumes the format for `sign` is a bitset.
// Note: this assumes the format for `sign` is a bitset. int validity = valid_int | (valid_uint << 1);
int validity = valid_int | (valid_uint << 1); if ((validity & sign) == 0) {
if ((validity & sign) == 0) return ErrImmediateOverflow;
{ }
return ErrImmediateOverflow; for (size_t ii = 0; ii < size; ii += 1) {
} buf[ii] = val & 0xff;
for (size_t ii = 0; ii < size; ii += 1) val >>= 8;
{ }
buf[ii] = val & 0xff; return ErrOk;
val >>= 8;
}
return ErrOk;
} }
AsmError assemble_instr( AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
InstHt ht, char *input, size_t len, Token *tok, ByteVec *rv, HoleVec *holes, LabelVec *labels) {
ByteVec *rv, HoleVec *holes, LabelVec *labels) const InstDesc *inst;
{ const char *type_str;
const InstDesc *inst; size_t nargs;
const char *type_str; size_t size;
size_t nargs; size_t idx = inst_lookup(ht, &input[tok->start], tok->len);
size_t size; size_t inst_start = rv->len;
size_t idx = inst_lookup(ht, &input[tok->start], tok->len); if (idx == INVALID) {
size_t inst_start = rv->len; return ErrInvalidToken;
if (idx == INVALID) }
{ inst = &INST[idx];
return ErrInvalidToken; type_str = TYPE_STR[inst->type];
nargs = strlen(type_str);
size = 1;
for (size_t ii = 0; ii < nargs; ii += 1) {
char chr = type_str[ii];
ArgMeta meta = arg_meta(chr);
if (meta.chr == 0) {
return ErrBadArgumentMeta;
} }
inst = &INST[idx]; size += meta.size;
type_str = TYPE_STR[inst->type]; }
nargs = strlen(type_str); if (ensure_push(rv, 1, size) != 0) {
size = 1; return ErrOutOfMemory;
for (size_t ii = 0; ii < nargs; ii += 1) }
{ rv->buf[rv->len] = inst->opcode;
char chr = type_str[ii]; rv->len += 1;
ArgMeta meta = arg_meta(chr); for (size_t ii = 0; ii < nargs; ii += 1) {
if (meta.chr == 0) if (ii > 0) {
{ *tok = token(input, len, tok->start + tok->len);
return ErrBadArgumentMeta; if (tok->kind != TokComma) {
} return ErrNeedCommaAfterArgument;
size += meta.size; }
} }
if (ensure_push(rv, 1, size) != 0) char chr = type_str[ii];
{ ArgMeta meta = arg_meta(chr);
return ErrOutOfMemory; uint64_t is_negative = 0;
*tok = token(input, len, tok->start + tok->len);
if (tok->kind == TokNeg) {
*tok = token(input, len, tok->start + tok->len);
is_negative = ~(uint64_t)0;
} }
rv->buf[rv->len] = inst->opcode; if (chr == 'R') {
rv->len += 1; int reg = parse_register(&input[tok->start], tok->len);
for (size_t ii = 0; ii < nargs; ii += 1) if (reg > 255) {
{ return ErrBadRegister;
if (ii > 0) }
{ rv->buf[rv->len] = (char)(reg & 0xff);
*tok = token(input, len, tok->start + tok->len); rv->len += 1;
if (tok->kind != TokComma) } else {
{ uint64_t num_to_write;
return ErrNeedCommaAfterArgument; if (meta.rel == 1 || meta.size == 8) {
if (tok->kind == TokIdent) {
size_t idx = label_lookup(labels, &input[tok->start], tok->len);
if (idx == INVALID) {
if (ensure_push((ByteVec *)holes, 1, sizeof(Hole)) != 0) {
return ErrOutOfMemory;
} }
holes->buf[holes->len] = (Hole){
.location = rv->len,
.origin = inst_start,
.str = &input[tok->start],
.len = tok->len,
.size = (size_t)meta.size,
};
holes->len += 1;
num_to_write = 0;
} else {
num_to_write = labels->buf[idx].location;
if (meta.size != 8) {
num_to_write -= inst_start;
}
}
} else if (tok->kind == TokNumber) {
num_to_write = tok->num;
} else {
return ErrLabelImmediate;
} }
char chr = type_str[ii]; } else if (tok->kind == TokNumber) {
ArgMeta meta = arg_meta(chr); num_to_write = tok->num;
uint64_t is_negative = 0; } else {
*tok = token(input, len, tok->start + tok->len); return ErrNumberImmediate;
if (tok->kind == TokNeg) }
{ // num_to_write = num_to_write ^ is_negative - is_negative;
*tok = token(input, len, tok->start + tok->len); if (is_negative) {
is_negative = ~(uint64_t)0; int64_t tmp = -(int64_t)num_to_write;
} if (tmp > 0) {
if (chr == 'R') return ErrBadNumOverflow;
{
int reg = parse_register(&input[tok->start], tok->len);
if (reg > 255)
{
return ErrBadRegister;
}
rv->buf[rv->len] = (char)(reg & 0xff);
rv->len += 1;
}
else
{
uint64_t num_to_write;
if (meta.rel == 1 || meta.size == 8)
{
if (tok->kind == TokIdent)
{
size_t idx = label_lookup(labels, &input[tok->start], tok->len);
if (idx == INVALID)
{
if (ensure_push((ByteVec *)holes, 1, sizeof(Hole)) != 0)
{
return ErrOutOfMemory;
}
holes->buf[holes->len] = (Hole){
.location = rv->len,
.origin = inst_start,
.str = &input[tok->start],
.len = tok->len,
.size = (size_t)meta.size,
};
holes->len += 1;
num_to_write = 0;
}
else
{
num_to_write = labels->buf[idx].location;
if (meta.size != 8)
{
num_to_write -= inst_start;
}
}
}
else if (tok->kind == TokNumber)
{
num_to_write = tok->num;
}
else
{
return ErrLabelImmediate;
}
}
else if (tok->kind == TokNumber)
{
num_to_write = tok->num;
}
else
{
return ErrNumberImmediate;
}
// num_to_write = num_to_write ^ is_negative - is_negative;
if (is_negative)
{
int64_t tmp = -(int64_t)num_to_write;
if (tmp > 0)
{
return ErrBadNumOverflow;
}
num_to_write = (uint64_t)tmp;
}
AsmError err = push_int_le(
&rv->buf[rv->len], num_to_write, meta.size, meta.sign);
if (err != 0)
{
return err;
}
rv->len += meta.size;
} }
num_to_write = (uint64_t)tmp;
}
AsmError err =
push_int_le(&rv->buf[rv->len], num_to_write, meta.size, meta.sign);
if (err != 0) {
return err;
}
rv->len += meta.size;
} }
}
return 0; return 0;
} }
AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
{ EInfo *einfo) {
ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0}; HoleVec holes = {malloc(MIN_SIZE * sizeof(Hole)), MIN_SIZE, 0};
LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0}; LabelVec labels = {malloc(MIN_SIZE * sizeof(Label)), MIN_SIZE, 0};
size_t line = 0; size_t line = 0;
size_t line_start = 0; size_t line_start = 0;
size_t pos = 0; size_t pos = 0;
// init=0, label=1, instruction=2, comment=3, newline -> 0 // init=0, label=1, instruction=2, comment=3, newline -> 0
size_t line_state = 0; size_t line_state = 0;
AsmError err = ErrOk; AsmError err = ErrOk;
while (1) while (1) {
{ Token tok = token(input, len, pos);
Token tok = token(input, len, pos); einfo->token = tok;
einfo->token = tok; pos = tok.start + tok.len;
pos = tok.start + tok.len; if (tok.kind == TokInvalid || tok.kind == TokBadNumber) {
if (tok.kind == TokInvalid || tok.kind == TokBadNumber) if (tok.num) {
{ err = (AsmError)tok.num;
if (tok.num) } else {
{ err = ErrInvalidToken;
err = (AsmError)tok.num; }
} break;
else }
{ if (tok.kind == TokEOF) {
err = ErrInvalidToken; break;
} }
break; if (tok.kind == TokComment) {
} line_state = 3;
if (tok.kind == TokEOF) continue;
{ }
break; if (tok.kind == TokNewline) {
} line += 1;
if (tok.kind == TokComment) line_start = tok.start + tok.len;
{ line_state = 0;
line_state = 3; continue;
continue; }
} if (tok.kind == TokDot) {
if (tok.kind == TokNewline) Token next = token(input, len, pos);
{ if (next.kind == TokIdent) {
line += 1; err = ErrDirectiveNotImplemented;
line_start = tok.start + tok.len;
line_state = 0;
continue;
}
if (tok.kind == TokDot)
{
Token next = token(input, len, pos);
if (next.kind == TokIdent)
{
err = ErrDirectiveNotImplemented;
goto end;
}
else
{
err = ErrNeedDirectiveAfterDot;
goto end;
}
continue;
}
if (tok.kind == TokIdent)
{
Token next = token(input, len, pos);
if (next.kind == TokColon)
{
// Label
pos = next.start + next.len;
if (line_state >= 1)
{
err = ErrLabelAfterLabel;
einfo->token = next;
goto end;
}
line_state = 1;
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0)
{
err = ErrOutOfMemory;
goto end;
}
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
if (idx != INVALID)
{
err = ErrDuplicateLabel;
goto end;
}
labels.buf[labels.len] = (Label){
.location = rv.len,
.str = &input[tok.start],
.len = tok.len,
};
labels.len += 1;
}
else
{
// Instruction
if (line_state >= 2)
{
err = ErrTrailingLine;
goto end;
}
line_state = 2;
err = assemble_instr(
ht, input, len, &tok,
&rv, &holes, &labels);
pos = tok.start + tok.len;
if (err != 0)
{
goto end;
}
}
continue;
}
err = ErrUnexpectedToken;
goto end; goto end;
} else {
err = ErrNeedDirectiveAfterDot;
goto end;
}
continue;
} }
if (tok.kind == TokIdent) {
Token next = token(input, len, pos);
if (next.kind == TokColon) {
// Label
pos = next.start + next.len;
if (line_state >= 1) {
err = ErrLabelAfterLabel;
einfo->token = next;
goto end;
}
line_state = 1;
if (ensure_push((ByteVec *)&labels, sizeof(Label), 1) != 0) {
err = ErrOutOfMemory;
goto end;
}
size_t idx = label_lookup(&labels, &input[tok.start], tok.len);
if (idx != INVALID) {
err = ErrDuplicateLabel;
goto end;
}
labels.buf[labels.len] = (Label){
.location = rv.len,
.str = &input[tok.start],
.len = tok.len,
};
labels.len += 1;
} else {
// Instruction
if (line_state >= 2) {
err = ErrTrailingLine;
goto end;
}
line_state = 2;
err = assemble_instr(ht, input, len, &tok, &rv, &holes, &labels);
pos = tok.start + tok.len;
if (err != 0) {
goto end;
}
}
continue;
}
err = ErrUnexpectedToken;
goto end;
}
for (size_t ii = 0; ii < holes.len; ii += 1) for (size_t ii = 0; ii < holes.len; ii += 1) {
{ Hole *hole = &holes.buf[ii];
Hole *hole = &holes.buf[ii]; size_t idx = label_lookup(&labels, hole->str, hole->len);
size_t idx = label_lookup(&labels, hole->str, hole->len); uint64_t num_to_write = labels.buf[idx].location;
uint64_t num_to_write = labels.buf[idx].location; uint8_t sign = 1;
uint8_t sign = 1; if (hole->size != 8) {
if (hole->size != 8) sign = 2;
{ num_to_write -= hole->origin;
sign = 2;
num_to_write -= hole->origin;
}
err = push_int_le(
&rv.buf[hole->location], num_to_write, hole->size, sign);
if (err != 0)
{
goto end;
}
} }
err = push_int_le(&rv.buf[hole->location], num_to_write, hole->size, sign);
if (err != 0) {
goto end;
}
}
end: end:
free(holes.buf); free(holes.buf);
free(labels.buf); free(labels.buf);
*out = rv; *out = rv;
einfo->line = line + 1; einfo->line = line + 1;
einfo->line_start = line_start; einfo->line_start = line_start;
return err; return err;
} }
int main(int argc, char **argv) int main(int argc, char **argv) {
{ int hex_out = 0;
int hex_out = 0; if (argc >= 2 && strcmp(argv[1], "--hex") == 0) {
if (argc >= 2 && strcmp(argv[1], "--hex") == 0) hex_out = 1;
{ }
hex_out = 1;
}
int err = 0; int err = 0;
InstHt ht = NULL; InstHt ht = NULL;
ByteVec input; ByteVec input;
err = slurp(stdin, &input); err = slurp(stdin, &input);
if (err != 0) if (err != 0) {
{ fprintf(stderr, "failed to read the file: %d\n", err);
fprintf(stderr, "failed to read the file: %d\n", err); goto done;
goto done; }
} ht = build_lookup();
ht = build_lookup(); if (ht == NULL) {
if (ht == NULL) err = ErrOutOfMemory;
{ fprintf(stderr, "failed to init hash table: %d\n", err);
err = ErrOutOfMemory; goto done;
fprintf(stderr, "failed to init hash table: %d\n", err); }
goto done;
}
ByteVec out; ByteVec out;
EInfo einfo; EInfo einfo;
err = assemble(ht, input.buf, input.len, &out, &einfo); err = assemble(ht, input.buf, input.len, &out, &einfo);
if (err != 0) if (err != 0) {
{ size_t column = einfo.token.start - einfo.line_start + 1;
size_t column = einfo.token.start - einfo.line_start + 1; fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n",
fprintf(stderr, "failed to assemble, %s, line=%zu, col=%zu token=%.*s\n", ERRORS[err], einfo.line, column, (int)einfo.token.len,
ERRORS[err], einfo.line, column, &input.buf[einfo.token.start]);
(int)einfo.token.len, &input.buf[einfo.token.start]); goto done;
goto done; }
} if (hex_out) {
if (hex_out) hd(out.buf, out.len);
{ } else {
hd(out.buf, out.len); fwrite(out.buf, 1, out.len, stdout);
} }
else
{
fwrite(out.buf, 1, out.len, stdout);
}
done: done:
free(ht); free(ht);
free(input.buf); free(input.buf);
free(out.buf); free(out.buf);
return err; return err;
} }

View file

@ -1,142 +1,78 @@
typedef struct InstDesc_s typedef struct InstDesc_s {
{ char *mnemonic;
char *mnemonic; unsigned char opcode;
unsigned char opcode; Operands type;
OpType type;
} InstDesc; } InstDesc;
const InstDesc INST[] = { const InstDesc INST[] = {
{"un", 0x00, Empty}, {"un", 0x00, Empty}, {"tx", 0x01, Empty},
{"tx", 0x01, Empty}, {"nop", 0x02, Empty}, {"add8", 0x03, RRR},
{"nop", 0x02, Empty}, {"add16", 0x04, RRR}, {"add32", 0x05, RRR},
{"add8", 0x03, RRR}, {"add64", 0x06, RRR}, {"sub8", 0x07, RRR},
{"add16", 0x04, RRR}, {"sub16", 0x08, RRR}, {"sub32", 0x09, RRR},
{"add32", 0x05, RRR}, {"sub64", 0x0A, RRR}, {"mul8", 0x0B, RRR},
{"add64", 0x06, RRR}, {"mul16", 0x0C, RRR}, {"mul32", 0x0D, RRR},
{"sub8", 0x07, RRR}, {"mul64", 0x0E, RRR}, {"and", 0x0F, RRR},
{"sub16", 0x08, RRR}, {"or", 0x10, RRR}, {"xor", 0x11, RRR},
{"sub32", 0x09, RRR}, {"slu8", 0x12, RRR}, {"slu16", 0x13, RRR},
{"sub64", 0x0A, RRR}, {"slu32", 0x14, RRR}, {"slu64", 0x15, RRR},
{"mul8", 0x0B, RRR}, {"sru8", 0x16, RRR}, {"sru16", 0x17, RRR},
{"mul16", 0x0C, RRR}, {"sru32", 0x18, RRR}, {"sru64", 0x19, RRR},
{"mul32", 0x0D, RRR}, {"srs8", 0x1A, RRR}, {"srs16", 0x1B, RRR},
{"mul64", 0x0E, RRR}, {"srs32", 0x1C, RRR}, {"srs64", 0x1D, RRR},
{"and", 0x0F, RRR}, {"cmpu", 0x1E, RRR}, {"cmps", 0x1F, RRR},
{"or", 0x10, RRR}, {"diru8", 0x20, RRRR}, {"diru16", 0x21, RRRR},
{"xor", 0x11, RRR}, {"diru32", 0x22, RRRR}, {"diru64", 0x23, RRRR},
{"slu8", 0x12, RRR}, {"dirs8", 0x24, RRRR}, {"dirs16", 0x25, RRRR},
{"slu16", 0x13, RRR}, {"dirs32", 0x26, RRRR}, {"dirs64", 0x27, RRRR},
{"slu32", 0x14, RRR}, {"neg", 0x28, RR}, {"not", 0x29, RR},
{"slu64", 0x15, RRR}, {"sxt8", 0x2A, RR}, {"sxt16", 0x2B, RR},
{"sru8", 0x16, RRR}, {"sxt32", 0x2C, RR}, {"addi8", 0x2D, RRx8},
{"sru16", 0x17, RRR}, {"addi16", 0x2E, RRx16}, {"addi32", 0x2F, RRx32},
{"sru32", 0x18, RRR}, {"addi64", 0x30, RRx64}, {"muli8", 0x31, RRx8},
{"sru64", 0x19, RRR}, {"muli16", 0x32, RRx16}, {"muli32", 0x33, RRx32},
{"srs8", 0x1A, RRR}, {"muli64", 0x34, RRx64}, {"andi", 0x35, RRx64},
{"srs16", 0x1B, RRR}, {"ori", 0x36, RRx64}, {"xori", 0x37, RRx64},
{"srs32", 0x1C, RRR}, {"slui8", 0x38, RRu8}, {"slui16", 0x39, RRu8},
{"srs64", 0x1D, RRR}, {"slui32", 0x3A, RRu8}, {"slui64", 0x3B, RRu8},
{"cmpu", 0x1E, RRR}, {"srui8", 0x3C, RRu8}, {"srui16", 0x3D, RRu8},
{"cmps", 0x1F, RRR}, {"srui32", 0x3E, RRu8}, {"srui64", 0x3F, RRu8},
{"diru8", 0x20, RRRR}, {"srsi8", 0x40, RRu8}, {"srsi16", 0x41, RRu8},
{"diru16", 0x21, RRRR}, {"srsi32", 0x42, RRu8}, {"srsi64", 0x43, RRu8},
{"diru32", 0x22, RRRR}, {"cmpui", 0x44, RRu64}, {"cmpsi", 0x45, RRs64},
{"diru64", 0x23, RRRR}, {"cp", 0x46, RR}, {"swa", 0x47, RR},
{"dirs8", 0x24, RRRR}, {"li8", 0x48, Rx8}, {"li16", 0x49, Rx16},
{"dirs16", 0x25, RRRR}, {"li32", 0x4A, Rx32}, {"li64", 0x4B, Rx64},
{"dirs32", 0x26, RRRR}, {"lra", 0x4C, RRr32}, {"ld", 0x4D, RRu64u16},
{"dirs64", 0x27, RRRR}, {"st", 0x4E, RRu64u16}, {"ldr", 0x4F, RRr32u16},
{"neg", 0x28, RR}, {"str", 0x50, RRr32u16}, {"bmc", 0x51, RRu16},
{"not", 0x29, RR}, {"brc", 0x52, RRu8}, {"jmp", 0x53, r32},
{"sxt8", 0x2A, RR}, {"jal", 0x54, RRr32}, {"jala", 0x55, RRu64},
{"sxt16", 0x2B, RR}, {"jeq", 0x56, RRr16}, {"jne", 0x57, RRr16},
{"sxt32", 0x2C, RR}, {"jltu", 0x58, RRr16}, {"jgtu", 0x59, RRr16},
{"addi8", 0x2D, RRx8}, {"jlts", 0x5A, RRr16}, {"jgts", 0x5B, RRr16},
{"addi16", 0x2E, RRx16}, {"eca", 0x5C, Empty}, {"ebp", 0x5D, Empty},
{"addi32", 0x2F, RRx32}, {"fadd32", 0x5E, RRR}, {"fadd64", 0x5F, RRR},
{"addi64", 0x30, RRx64}, {"fsub32", 0x60, RRR}, {"fsub64", 0x61, RRR},
{"muli8", 0x31, RRx8}, {"fmul32", 0x62, RRR}, {"fmul64", 0x63, RRR},
{"muli16", 0x32, RRx16}, {"fdiv32", 0x64, RRR}, {"fdiv64", 0x65, RRR},
{"muli32", 0x33, RRx32}, {"fma32", 0x66, RRRR}, {"fma64", 0x67, RRRR},
{"muli64", 0x34, RRx64}, {"fcmplt32", 0x6A, RRR}, {"fcmplt64", 0x6B, RRR},
{"andi", 0x35, RRx64}, {"fcmpgt32", 0x6C, RRR}, {"fcmpgt64", 0x6D, RRR},
{"ori", 0x36, RRx64}, {"itf32", 0x6E, RR}, {"itf64", 0x6F, RR},
{"xori", 0x37, RRx64}, {"fti32", 0x70, RRu8}, {"fti64", 0x71, RRu8},
{"slui8", 0x38, RRu8}, {"fc32t64", 0x72, RR}, {"fc64t32", 0x73, RR},
{"slui16", 0x39, RRu8}, {"lra16", 0x74, RRr16}, {"ldr16", 0x75, RRr16u16},
{"slui32", 0x3A, RRu8}, {"str16", 0x76, RRr16u16}, {"jmp16", 0x77, r16},
{"slui64", 0x3B, RRu8},
{"srui8", 0x3C, RRu8},
{"srui16", 0x3D, RRu8},
{"srui32", 0x3E, RRu8},
{"srui64", 0x3F, RRu8},
{"srsi8", 0x40, RRu8},
{"srsi16", 0x41, RRu8},
{"srsi32", 0x42, RRu8},
{"srsi64", 0x43, RRu8},
{"cmpui", 0x44, RRu64},
{"cmpsi", 0x45, RRs64},
{"cp", 0x46, RR},
{"swa", 0x47, RR},
{"li8", 0x48, Rx8},
{"li16", 0x49, Rx16},
{"li32", 0x4A, Rx32},
{"li64", 0x4B, Rx64},
{"lra", 0x4C, RRr32},
{"ld", 0x4D, RRu64u16},
{"st", 0x4E, RRu64u16},
{"ldr", 0x4F, RRr32u16},
{"str", 0x50, RRr32u16},
{"bmc", 0x51, RRu16},
{"brc", 0x52, RRu8},
{"jmp", 0x53, r32},
{"jal", 0x54, RRr32},
{"jala", 0x55, RRu64},
{"jeq", 0x56, RRr16},
{"jne", 0x57, RRr16},
{"jltu", 0x58, RRr16},
{"jgtu", 0x59, RRr16},
{"jlts", 0x5A, RRr16},
{"jgts", 0x5B, RRr16},
{"eca", 0x5C, Empty},
{"ebp", 0x5D, Empty},
{"fadd32", 0x5E, RRR},
{"fadd64", 0x5F, RRR},
{"fsub32", 0x60, RRR},
{"fsub64", 0x61, RRR},
{"fmul32", 0x62, RRR},
{"fmul64", 0x63, RRR},
{"fdiv32", 0x64, RRR},
{"fdiv64", 0x65, RRR},
{"fma32", 0x66, RRRR},
{"fma64", 0x67, RRRR},
{"fcmplt32", 0x6A, RRR},
{"fcmplt64", 0x6B, RRR},
{"fcmpgt32", 0x6C, RRR},
{"fcmpgt64", 0x6D, RRR},
{"itf32", 0x6E, RR},
{"itf64", 0x6F, RR},
{"fti32", 0x70, RRu8},
{"fti64", 0x71, RRu8},
{"fc32t64", 0x72, RR},
{"fc64t32", 0x73, RR},
{"lra16", 0x74, RRr16},
{"ldr16", 0x75, RRr16u16},
{"str16", 0x76, RRr16u16},
{"jmp16", 0x77, r16},
}; };
const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]); const size_t INST_CNT = sizeof(INST) / sizeof(INST[0]);
const size_t INVALID = ~(size_t)0; size_t inst_find(const char *mnemonic, size_t len) {
size_t inst_find(const char *mnemonic, size_t len) for (size_t ii = 0; ii < INST_CNT; ii += 1) {
{ const char *entry = INST[ii].mnemonic;
for (size_t ii = 0; ii < INST_CNT; ii += 1) if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0') {
{ return ii;
const char *entry = INST[ii].mnemonic;
if (strncmp(entry, mnemonic, len) == 0 && entry[len] == '\0')
{
return ii;
}
} }
return INVALID; }
return INVALID;
} }

View file

@ -1,62 +0,0 @@
typedef enum OpType_e
{
Empty = 0,
R,
RR,
RRR,
RRRR,
Rx8,
Rx16,
Rx32,
Rx64,
RRx8,
RRx16,
RRx32,
RRx64,
RRs32,
RRs64,
RRu8,
RRu16,
RRu64,
r16,
r32,
RRr16,
RRr32,
RRr16u16,
RRr32u16,
RRu64u16,
} OpType;
// R -> register,
// 1 -> Xi8, 2 -> Xi16, 4 -> Xi32, 8 -> Xi64,
// b -> Si8, h -> Si16, w -> Si32, d -> Si64,
// B -> Ui8, H -> Ui16, W -> Ui32, D -> Ui64,
// o -> 16 bit relative offset,
// O -> 32 bit relative offset,
const char *TYPE_STR[] = {
"",
"R",
"RR",
"RRR",
"RRRR",
"R1",
"R2",
"R4",
"R8",
"RR1",
"RR2",
"RR4",
"RR8",
"RRw",
"RRd",
"RRB",
"RRH",
"RRD",
"o",
"O",
"RRo",
"RRO",
"RRoH",
"RROH",
"RRDH",
};

View file

@ -1,30 +1,23 @@
int parse_register(char *name, size_t len) int parse_register(char *name, size_t len) {
{ if (name[0] != 'r') {
if (name[0] != 'r') return 256; // Register name should start with 'r'
{ }
return 256; // Register name should start with 'r' if (len > 4) {
return 256; // Register name too long
}
uint16_t rv = 0;
if (len > 2 && name[1] == '0') {
return 256; // Extra zero suffix
}
for (size_t ii = 1; ii < len; ii += 1) {
char chr = name[ii];
if (!(chr >= '0' && chr <= '9')) {
return 256; // Register name must only contain numbers
} }
if (len > 4) rv = rv * 10 + (chr - '0');
{ }
return 256; // Register name too long if (rv > 255) {
} return 256; // Register number too large
uint16_t rv = 0; }
if (len > 2 && name[1] == '0') return (int)rv;
{
return 256; // Extra zero suffix
}
for (size_t ii = 1; ii < len; ii += 1)
{
char chr = name[ii];
if (!(chr >= '0' && chr <= '9'))
{
return 256; // Register name must only contain numbers
}
rv = rv * 10 + (chr - '0');
}
if (rv > 255)
{
return 256; // Register number too large
}
return (int)rv;
} }