diff --git a/Makefile b/Makefile index ab7f06b..5b7b14a 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CFLAGS = -Wall -Wextra -Wpedantic -std=c17 -O3 .PHONY: clean hbas: src/hbas.c - ${CC} ${CFLAGS} ${CFLAGS_EXTRA} hbas.c -o hbas + ${CC} ${CFLAGS} ${CFLAGS_EXTRA} src/hbas.c -o hbas example: hbas example.S ./hbas < example.S > example diff --git a/src/einfo.h b/src/einfo.h new file mode 100644 index 0000000..d6de8b3 --- /dev/null +++ b/src/einfo.h @@ -0,0 +1,6 @@ +typedef struct EInfo_s +{ + Token token; + size_t line; + size_t line_start; +} EInfo; diff --git a/src/hbas.c b/src/hbas.c index bd8ae6e..4fd08a6 100644 --- a/src/hbas.c +++ b/src/hbas.c @@ -30,6 +30,8 @@ SOFTWARE. #include "instructions.c" #include "hash.c" #include "register.c" +#include "token.c" +#include "einfo.h" void hd(char *data, size_t len) { @@ -152,194 +154,6 @@ int slurp(FILE *fd, ByteVec *out) return err; } -typedef enum TokenKind_e -{ - TokInvalid = '!', - TokEOF = '$', - TokIdent = 'A', - TokNeg = '-', - TokNumber = '0', - TokBadNumber = '9', - TokComma = ',', - TokDot = '.', - TokColon = ':', - TokComment = ';', - TokNewline = 'n', -} TokenKind; -typedef struct Token_s -{ - TokenKind kind; - size_t start; - size_t len; - uint64_t num; -} Token; - -Token token_ident(char *input, size_t len, size_t pos) -{ - size_t start = pos; - while (pos < len) - { - char chr = input[pos]; - char chru = chr & ~0x20; - int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); - if (!good) - { - break; - } - pos += 1; - } - return (Token){TokIdent, start, pos - start, 0}; -} - -Token token_number(char *input, size_t len, size_t pos) -{ - char *ptr = &input[pos]; - char next = '\0'; - size_t start = pos; - size_t digits = 0; - uint64_t base = 10; - uint64_t rv = 0; - uint64_t pre_overflow; - AsmError bad_num = ErrOk; - - if (pos + 1 < len) - { - next = ptr[1] & ~0x20; - } - - if (input[pos] == '0') - { - if (next == 'X') - { - base = 16; - pos += 2; - } - else if (next == 'D') - { - base = 10; - pos += 2; - } - else if (next == 'O') - { - base = 8; - pos += 2; - } - else if (next == 'B') - { - base = 2; - pos += 2; - } - } - pre_overflow = (~(size_t)0) / base; - // valid: "0x_0", "0_" - // invalid: "0x_" - while (pos < len) - { - uint64_t digit; - uint64_t next; - char chr = input[pos]; - char chru = chr & ~0x20; - if (chr == '_') - { - pos += 1; - continue; - } - digit = (uint64_t)chr - (uint64_t)'0'; - if (digit >= 10) - { - digit = (uint64_t)chru - (uint64_t)('A' - 10); - } - if (digit >= base) - { - if (chr >= '0' && chr <= '9') - { - bad_num = ErrBadNumDigit; - } - else if (chru >= 'A' && chru <= 'Z') - { - bad_num = ErrBadNumDigit; - } - break; - } - - pos += 1; - digits += 1; - - next = rv * base + digit; - if (rv > pre_overflow || next < rv) - { - bad_num = ErrBadNumOverflow; - break; - } - rv = next; - } - - if (digits == 0) - { - bad_num = ErrBadNumNoDigit; - } - - if (bad_num) - { - return (Token){TokBadNumber, start, pos - start, bad_num}; - } - else - { - return (Token){TokNumber, start, pos - start, rv}; - } -} - -Token token(char *input, size_t len, size_t pos) -{ - char chr, chru; - char *ptr = &input[pos]; - while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) - { - pos += 1; - } - if (pos == len) - { - return (Token){TokEOF, pos, 0, 0}; - } - ptr = &input[pos]; - chr = *ptr; - if (chr == ',' || chr == '-' || chr == '.' || chr == ':') - { - return (Token){(TokenKind)chr, pos, 1, 0}; - } - if (chr == '\n') - { - return (Token){TokNewline, pos, 1, 0}; - } - if (chr == '\r') - { - if (pos + 1 < len && ptr[1] == '\n') - { - return (Token){TokNewline, pos, 2, 0}; - } - return (Token){TokNewline, pos, 1, 0}; - } - if (chr == ';') - { - size_t clen = 1; - while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') - { - clen += 1; - } - return (Token){TokComment, pos, clen, 0}; - } - if (chr >= '0' && chr <= '9') - { - return token_number(input, len, pos); - } - chru = chr & ~0x20; - if (chr == '_' || (chru >= 'A' && chru <= 'Z')) - { - return token_ident(input, len, pos); - } - return (Token){TokInvalid, pos, 1, 0}; -} - typedef struct Hole_s { size_t location; @@ -538,13 +352,6 @@ AsmError assemble_instr( return 0; } -typedef struct EInfo_s -{ - Token token; - size_t line; - size_t line_start; -} EInfo; - AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out, EInfo *einfo) { ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0}; diff --git a/src/register.c b/src/register.c index ee8637c..0083a6f 100644 --- a/src/register.c +++ b/src/register.c @@ -27,4 +27,4 @@ int parse_register(char *name, size_t len) return 256; // Register number too large } return (int)rv; -} \ No newline at end of file +} diff --git a/src/token.c b/src/token.c new file mode 100644 index 0000000..dec4919 --- /dev/null +++ b/src/token.c @@ -0,0 +1,187 @@ +typedef enum TokenKind_e +{ + TokInvalid = '!', + TokEOF = '$', + TokIdent = 'A', + TokNeg = '-', + TokNumber = '0', + TokBadNumber = '9', + TokComma = ',', + TokDot = '.', + TokColon = ':', + TokComment = ';', + TokNewline = 'n', +} TokenKind; +typedef struct Token_s +{ + TokenKind kind; + size_t start; + size_t len; + uint64_t num; +} Token; + +Token token_ident(char *input, size_t len, size_t pos) +{ + size_t start = pos; + while (pos < len) + { + char chr = input[pos]; + char chru = chr & ~0x20; + int good = chr == '_' || (chr >= '0' && chr <= '9') || (chru >= 'A' && chru <= 'Z'); + if (!good) + { + break; + } + pos += 1; + } + return (Token){TokIdent, start, pos - start, 0}; +} + +Token token_number(char *input, size_t len, size_t pos) +{ + char *ptr = &input[pos]; + char next = '\0'; + size_t start = pos; + size_t digits = 0; + uint64_t base = 10; + uint64_t rv = 0; + uint64_t pre_overflow; + AsmError bad_num = ErrOk; + + if (pos + 1 < len) + { + next = ptr[1] & ~0x20; + } + + if (input[pos] == '0') + { + if (next == 'X') + { + base = 16; + pos += 2; + } + else if (next == 'D') + { + base = 10; + pos += 2; + } + else if (next == 'O') + { + base = 8; + pos += 2; + } + else if (next == 'B') + { + base = 2; + pos += 2; + } + } + pre_overflow = (~(size_t)0) / base; + // valid: "0x_0", "0_" + // invalid: "0x_" + while (pos < len) + { + uint64_t digit; + uint64_t next; + char chr = input[pos]; + char chru = chr & ~0x20; + if (chr == '_') + { + pos += 1; + continue; + } + digit = (uint64_t)chr - (uint64_t)'0'; + if (digit >= 10) + { + digit = (uint64_t)chru - (uint64_t)('A' - 10); + } + if (digit >= base) + { + if (chr >= '0' && chr <= '9') + { + bad_num = ErrBadNumDigit; + } + else if (chru >= 'A' && chru <= 'Z') + { + bad_num = ErrBadNumDigit; + } + break; + } + + pos += 1; + digits += 1; + + next = rv * base + digit; + if (rv > pre_overflow || next < rv) + { + bad_num = ErrBadNumOverflow; + break; + } + rv = next; + } + + if (digits == 0) + { + bad_num = ErrBadNumNoDigit; + } + + if (bad_num) + { + return (Token){TokBadNumber, start, pos - start, bad_num}; + } + else + { + return (Token){TokNumber, start, pos - start, rv}; + } +} + +Token token(char *input, size_t len, size_t pos) +{ + char chr, chru; + char *ptr = &input[pos]; + while (pos < len && (input[pos] == ' ' || input[pos] == '\t')) + { + pos += 1; + } + if (pos == len) + { + return (Token){TokEOF, pos, 0, 0}; + } + ptr = &input[pos]; + chr = *ptr; + if (chr == ',' || chr == '-' || chr == '.' || chr == ':') + { + return (Token){(TokenKind)chr, pos, 1, 0}; + } + if (chr == '\n') + { + return (Token){TokNewline, pos, 1, 0}; + } + if (chr == '\r') + { + if (pos + 1 < len && ptr[1] == '\n') + { + return (Token){TokNewline, pos, 2, 0}; + } + return (Token){TokNewline, pos, 1, 0}; + } + if (chr == ';') + { + size_t clen = 1; + while (pos + clen < len && ptr[clen] != '\n' && ptr[clen] != '\r') + { + clen += 1; + } + return (Token){TokComment, pos, clen, 0}; + } + if (chr >= '0' && chr <= '9') + { + return token_number(input, len, pos); + } + chru = chr & ~0x20; + if (chr == '_' || (chru >= 'A' && chru <= 'Z')) + { + return token_ident(input, len, pos); + } + return (Token){TokInvalid, pos, 1, 0}; +}