From 084766029300c49ab3888aacfc226a820e035e09 Mon Sep 17 00:00:00 2001
From: Igor M <m1el.2027@gmail.com>
Date: Sun, 17 Mar 2024 12:49:23 +0200
Subject: [PATCH] push int as separate file

---
 examples/example.S |   2 +-
 src/directive.c    | 133 ++++++++++++++++++++++++++++++++
 src/hbas.c         | 187 +--------------------------------------------
 src/push_int.c     |  55 +++++++++++++
 4 files changed, 191 insertions(+), 186 deletions(-)
 create mode 100644 src/directive.c
 create mode 100644 src/push_int.c

diff --git a/examples/example.S b/examples/example.S
index c28a4a0..e920e85 100644
--- a/examples/example.S
+++ b/examples/example.S
@@ -15,7 +15,7 @@ start:
 end:
     tx
 hello_string:
-    .db "Hello,", " world\n", 0
+    .db "Hello,\x20world\n", 0
 hello_string_end:
     .db 42, "hi", 43
     ; TODO .db 'H', 'e', 'l', 'l', 'o', '\0'
diff --git a/src/directive.c b/src/directive.c
new file mode 100644
index 0000000..0804981
--- /dev/null
+++ b/src/directive.c
@@ -0,0 +1,133 @@
+AsmError push_string(char *buf, char *input, size_t len) {
+    size_t ndata = 0;
+    for (size_t pos = 0; pos < len; pos += 1) {
+        char chr = input[pos];
+        if (chr == '\\') {
+            pos += 1;
+            chr = input[pos];
+            switch (chr) {
+                case '\\':
+                    chr = '\\';
+                    break;
+                case '"':
+                    chr = '"';
+                    break;
+                case 'r':
+                    chr = '\r';
+                    break;
+                case 'n':
+                    chr = '\n';
+                    break;
+                case '0':
+                    chr = '\0';
+                    break;
+                case 't':
+                    chr = '\t';
+                    break;
+                case 'x':
+                    if (pos + 2 >= len) {
+                        return ErrDanglingEscape;
+                    }
+                    char high = get_hex(input[pos + 1]);
+                    char low = get_hex(input[pos + 2]);
+                    pos += 2;
+                    if (high > 15 || low > 15) {
+                        return ErrStringBadHex;
+                    }
+                    chr = high << 4 | low;
+                    break;
+                default:
+                    return ErrBadStringEscape;
+            }
+        }
+        buf[ndata] = chr;
+        ndata += 1;
+    }
+    return ErrOk;
+}
+
+static AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok,
+                          size_t word_size) {
+    while (1) {
+        *tok = token(input, len, tok->start + tok->len);
+        if (tok->kind == TokNumber) {
+            if (ensure_push(out, 1, word_size) != 0) {
+                return ErrOutOfMemory;
+            }
+            push_int_le(&out->buf[out->len], tok->num, word_size, 3);
+            out->len += word_size;
+        } else if (tok->kind == TokString) {
+            if (word_size != 1) {
+                return ErrStringDataNotByte;
+            }
+            if (ensure_push(out, 1, tok->num) != 0) {
+                return ErrOutOfMemory;
+            }
+
+            char *str = &input[tok->start + 1];
+            AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
+            if (err != ErrOk) {
+                return err;
+            }
+            out->len += tok->num;
+        } else {
+            return ErrUnexpectedToken;
+        }
+        *tok = token(input, len, tok->start + tok->len);
+        if (tok->kind == TokNewline || tok->kind == TokEOF) {
+            return ErrOk;
+        }
+        if (tok->kind == TokComma) {
+            continue;
+        }
+        return ErrInvalidToken;
+    }
+}
+
+AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
+    if (tok->len < 2) {
+        return ErrInvalidDirective;
+    }
+    size_t pos = tok->start;
+    char byte0 = input[pos];
+    char byte1 = input[pos + 1];
+    if (byte0 == 'd') {
+        size_t word_size;
+        switch (byte1) {
+            case 'b':
+                word_size = 1;
+                break;
+            case 'w':
+                word_size = 2;
+                break;
+            case 'd':
+                word_size = 4;
+                break;
+            case 'q':
+                word_size = 8;
+                break;
+            default:
+                return ErrInvalidDirective;
+        }
+        return push_data(input, len, out, tok, word_size);
+    }
+    if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
+        *tok = token(input, len, tok->start + tok->len);
+        if (tok->kind != TokNumber) {
+            return ErrAlignNeedsNumber;
+        }
+        size_t mask = tok->num - 1;
+        if ((tok->num & mask) != 0) {
+            return ErrAlignNeedsPow2;
+        }
+        if ((~(size_t)0) - mask < out->len) {
+            return ErrOutOfMemory;
+        }
+        size_t aligned = (out->len + mask) & ~mask;
+        if (ensure_push(out, 1, aligned - out->len) != 0) {
+            return ErrOutOfMemory;
+        }
+        out->len = aligned;
+    }
+    return ErrOk;
+}
diff --git a/src/hbas.c b/src/hbas.c
index c387d42..da14a4b 100644
--- a/src/hbas.c
+++ b/src/hbas.c
@@ -37,6 +37,8 @@ SOFTWARE.
 //
 #include "register.c"
 #include "token.c"
+#include "push_int.c"
+#include "directive.c"
 //
 #include "einfo.h"
 
@@ -119,108 +121,6 @@ static size_t label_lookup(LabelVec *labels, char *name, size_t len) {
     return INVALID;
 }
 
-static bool check_valid_int(uint64_t val, size_t size, uint8_t sign) {
-    // All 64-bit values are considered valid.
-    if (size == 8) {
-        return true;
-    }
-    // Unsigned integers must have all upper bits set to zero. To check this,
-    // we shift the value right by the integer size and verify it equals zero.
-    int valid_uint = (val >> (size * 8)) == 0;
-
-    // For signed integers, the sign-extended high bits must match the sign bit.
-    // By shifting right by one less than the total bit size (size * 8 - 1),
-    // we isolate the sign bit and any sign-extended bits. For a value fitting
-    // in the signed range, this operation results in either 0 (for non-negative
-    // values) or -1 (for negative values due to sign extension).
-    int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
-
-    // To unify the check for both positive and negative cases, we adjust
-    // non-zero values (-1) by incrementing by 1.  This turns -1 into 0,
-    // enabling a single check for 0 to validate both cases.  This adjustment
-    // simplifies the validation logic, allowing us to use a single condition to
-    // check for proper sign extension or zero extension in the original value.
-    int_shifted += int_shifted != 0;
-
-    // A valid signed integer will have `int_shifted` equal to 0
-    // after adjustment, indicating proper sign extension.
-    int valid_int = int_shifted == 0;
-
-    // Validity bitmask to represents whether the value
-    // fits as signed, unsigned, or both.
-    int validity = valid_int | (valid_uint << 1);
-
-    // If the value's validity doesn't match the `sign` requirements,
-    // we report an overflow.
-    return (validity & sign) != 0;
-}
-
-// safety: assumes the buffer has enough place for specified integer size.
-// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
-// and bit `2` indicates that value accepts an unsigned int.
-static AsmError push_int_le(char *buf, uint64_t val, size_t size,
-                            uint8_t sign) {
-    if (!check_valid_int(val, size, sign)) {
-        return ErrImmediateOverflow;
-    }
-
-    // Write out the bytes of the integer to the buffer in little-endian order,
-    // starting with the lowest byte first.
-    for (size_t ii = 0; ii < size; ii += 1) {
-        buf[ii] = val & 0xff;
-        val >>= 8;
-    }
-
-    return ErrOk;
-}
-
-AsmError push_string(char *buf, char *input, size_t len) {
-    size_t ndata = 0;
-    for (size_t pos = 0; pos < len; pos += 1) {
-        char chr = input[pos];
-        if (chr == '\\') {
-            pos += 1;
-            chr = input[pos];
-            switch (chr) {
-                case '\\':
-                    chr = '\\';
-                    break;
-                case '"':
-                    chr = '"';
-                    break;
-                case 'r':
-                    chr = '\r';
-                    break;
-                case 'n':
-                    chr = '\n';
-                    break;
-                case '0':
-                    chr = '\0';
-                    break;
-                case 't':
-                    chr = '\t';
-                    break;
-                case 'x':
-                    if (pos + 2 >= len) {
-                        return ErrDanglingEscape;
-                    }
-                    char high = get_hex(input[pos + 1]);
-                    char low = get_hex(input[pos + 2]);
-                    if (high > 15 || low > 15) {
-                        return ErrStringBadHex;
-                    }
-                    chr = high << 4 | low;
-                    break;
-                default:
-                    return ErrBadStringEscape;
-            }
-        }
-        buf[ndata] = chr;
-        ndata += 1;
-    }
-    return ErrOk;
-}
-
 static AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
                                ByteVec *rv, HoleVec *holes) {
     const InstDesc *inst;
@@ -322,89 +222,6 @@ static AsmError assemble_instr(InstHt ht, char *input, size_t len, Token *tok,
     return ErrOk;
 }
 
-static AsmError push_data(char *input, size_t len, ByteVec *out, Token *tok,
-                          size_t word_size) {
-    while (1) {
-        *tok = token(input, len, tok->start + tok->len);
-        if (tok->kind == TokNumber) {
-            if (ensure_push(out, 1, word_size) != 0) {
-                return ErrOutOfMemory;
-            }
-            push_int_le(&out->buf[out->len], tok->num, word_size, 3);
-            out->len += word_size;
-        } else if (tok->kind == TokString) {
-            if (word_size != 1) {
-                return ErrStringDataNotByte;
-            }
-            if (ensure_push(out, 1, tok->num) != 0) {
-                return ErrOutOfMemory;
-            }
-
-            char *str = &input[tok->start + 1];
-            AsmError err = push_string(&out->buf[out->len], str, tok->len - 2);
-            if (err != ErrOk) {
-                return err;
-            }
-            out->len += tok->num;
-        } else {
-            return ErrUnexpectedToken;
-        }
-        *tok = token(input, len, tok->start + tok->len);
-        if (tok->kind == TokNewline || tok->kind == TokEOF) {
-            return ErrOk;
-        }
-        if (tok->kind == TokComma) {
-            continue;
-        }
-        return ErrInvalidToken;
-    }
-}
-
-AsmError assemble_directive(char *input, size_t len, ByteVec *out, Token *tok) {
-    if (tok->len < 2) {
-        return ErrInvalidDirective;
-    }
-    size_t pos = tok->start;
-    char byte0 = input[pos];
-    char byte1 = input[pos + 1];
-    if (byte0 == 'd') {
-        size_t word_size;
-        switch (byte1) {
-            case 'b':
-                word_size = 1;
-                break;
-            case 'w':
-                word_size = 2;
-                break;
-            case 'd':
-                word_size = 4;
-                break;
-            case 'q':
-                word_size = 8;
-                break;
-            default:
-                return ErrInvalidDirective;
-        }
-        return push_data(input, len, out, tok, word_size);
-    }
-    if (tok->len == 5 && strncmp("align", &input[pos], 5) == 0) {
-        *tok = token(input, len, tok->start + tok->len);
-        if (tok->kind != TokNumber) {
-            return ErrAlignNeedsNumber;
-        }
-        size_t mask = tok->num - 1;
-        if ((tok->num & mask) != 0) {
-            return ErrAlignNeedsPow2;
-        }
-        size_t aligned = (out->len + mask) & ~mask;
-        if (ensure_push(out, 1, aligned - out->len) != 0) {
-            return ErrOutOfMemory;
-        }
-        out->len = aligned;
-    }
-    return ErrOk;
-}
-
 AsmError assemble(InstHt ht, char *input, size_t len, ByteVec *out,
                   EInfo *einfo) {
     ByteVec rv = {malloc(MIN_SIZE), MIN_SIZE, 0};
diff --git a/src/push_int.c b/src/push_int.c
new file mode 100644
index 0000000..2d07dda
--- /dev/null
+++ b/src/push_int.c
@@ -0,0 +1,55 @@
+
+static bool check_valid_int(uint64_t val, size_t size, uint8_t sign) {
+    // All 64-bit values are considered valid.
+    if (size == 8) {
+        return true;
+    }
+    // Unsigned integers must have all upper bits set to zero. To check this,
+    // we shift the value right by the integer size and verify it equals zero.
+    int valid_uint = (val >> (size * 8)) == 0;
+
+    // For signed integers, the sign-extended high bits must match the sign bit.
+    // By shifting right by one less than the total bit size (size * 8 - 1),
+    // we isolate the sign bit and any sign-extended bits. For a value fitting
+    // in the signed range, this operation results in either 0 (for non-negative
+    // values) or -1 (for negative values due to sign extension).
+    int64_t int_shifted = ((int64_t)val) >> (size * 8 - 1);
+
+    // To unify the check for both positive and negative cases, we adjust
+    // non-zero values (-1) by incrementing by 1.  This turns -1 into 0,
+    // enabling a single check for 0 to validate both cases.  This adjustment
+    // simplifies the validation logic, allowing us to use a single condition to
+    // check for proper sign extension or zero extension in the original value.
+    int_shifted += int_shifted != 0;
+
+    // A valid signed integer will have `int_shifted` equal to 0
+    // after adjustment, indicating proper sign extension.
+    int valid_int = int_shifted == 0;
+
+    // Validity bitmask to represents whether the value
+    // fits as signed, unsigned, or both.
+    int validity = valid_int | (valid_uint << 1);
+
+    // If the value's validity doesn't match the `sign` requirements,
+    // we report an overflow.
+    return (validity & sign) != 0;
+}
+
+// safety: assumes the buffer has enough place for specified integer size.
+// `sign` is a bitset, where bit `1` indicates that value accepts a signed int,
+// and bit `2` indicates that value accepts an unsigned int.
+static AsmError push_int_le(char *buf, uint64_t val, size_t size,
+                            uint8_t sign) {
+    if (!check_valid_int(val, size, sign)) {
+        return ErrImmediateOverflow;
+    }
+
+    // Write out the bytes of the integer to the buffer in little-endian order,
+    // starting with the lowest byte first.
+    for (size_t ii = 0; ii < size; ii += 1) {
+        buf[ii] = val & 0xff;
+        val >>= 8;
+    }
+
+    return ErrOk;
+}