Updated spec!

2023-10-22 18:18:50 +02:00 · 2023-10-22 18:18:50 +02:00 · cb557d1361
parent 2715bc9107
commit cb557d1361
3 changed files with 475 additions and 395 deletions
--- a/hbbytecode/instructions.in
+++ b/hbbytecode/instructions.in
@ -28,8 +28,8 @@
 0x1B, SRS16,    RRR,  "Signed right bitshift (16b)"                 ;
 0x1C, SRS32,    RRR,  "Signed right bitshift (32b)"                 ;
 0x1D, SRS64,    RRR,  "Signed right bitshift (64b)"                 ;
-0x1E, CMP,      RRR,  "Signed comparsion"                           ;
+0x1E, CMPU,     RRR,  "Unsigned comparsion"                         ;
-0x1F, CMPU,     RRR,  "Unsigned comparsion"                         ;
+0x1F, CMPS,     RRR,  "Signed comparsion"                           ;
 0x20, DIRU8,    RRRR, "Merged divide-remainder (unsigned 8b)"       ;
 0x21, DIRU16,   RRRR, "Merged divide-remainder (unsigned 16b)"      ;
 0x22, DIRU32,   RRRR, "Merged divide-remainder (unsigned 32b)"      ;
@ -66,8 +66,8 @@
 0x41, SRSI16,   RRW,  "Signed right bitshift with immediate"        ;
 0x42, SRSI32,   RRW,  "Signed right bitshift with immediate"        ;
 0x43, SRSI64,   RRW,  "Signed right bitshift with immediate"        ;
-0x44, CMPI,     RRD,  "Signed compare with immediate"               ;
+0x44, CMPUI,    RRD,  "Unsigned compare with immediate"             ;
-0x45, CMPUI,    RRD,  "Unsigned compare with immediate"             ;
+0x45, CMPSI,    RRD,  "Signed compare with immediate"               ;
 0x46, CP,       RR,   "Copy register"                               ;
 0x47, SWA,      RR,   "Swap registers"                              ;
 0x48, LI8,      RB,   "Load immediate (8b)"                         ;
@ -86,10 +86,10 @@
 0x55, JALA,     RRA,  "Linking absolute jump"                       ;
 0x56, JEQ,      RRP,  "Branch on equal"                             ;
 0x57, JNE,      RRP,  "Branch on nonequal"                          ;
-0x58, JLT,      RRP,  "Branch on lesser-than (signed)"              ;
+0x58, JLTU,     RRP,  "Branch on lesser-than (unsigned)"            ;
-0x59, JGT,      RRP,  "Branch on greater-than (signed)"             ;
+0x59, JGTU,     RRP,  "Branch on greater-than (unsigned)"           ;
-0x5A, JLTU,     RRP,  "Branch on lesser-than (unsigned)"            ;
+0x5A, JLTS,     RRP,  "Branch on lesser-than (signed)"              ;
-0x5B, JGTU,     RRP,  "Branch on greater-than (unsigned)"           ;
+0x5B, JGTS,     RRP,  "Branch on greater-than (signed)"             ;
 0x5C, ECA,      N,    "Environment call trap"                       ;
 0x5D, EBP,      N,    "Environment breakpoint"                      ;
 0x5E, FADD32,   RRR,  "Floating point addition (32b)"               ;
@ -100,8 +100,8 @@
 0x63, FMUL64,   RRR,  "Floating point multiply (64b)"               ;
 0x64, FDIV32,   RRR,  "Floating point division (32b)"               ;
 0x65, FDIV64,   RRR,  "Floating point division (64b)"               ;
-0x66, FMA32,    RRR,  "Float fused multiply-add (32b)"              ;
+0x66, FMA32,    RRRR, "Float fused multiply-add (32b)"              ;
-0x67, FMA64,    RRR,  "Float fused multiply-add (64b)"              ;
+0x67, FMA64,    RRRR, "Float fused multiply-add (64b)"              ;
 0x68, FINV32,   RR,   "Float reciprocal (32b)"                      ;
 0x69, FINV64,   RR,   "Float reciprocal (64b)"                      ;
 0x6A, FCMPLT32, RRR,  "Flaot compare less than (32b)"               ;
--- a/hbvm/src/vmrun.rs
+++ b/hbvm/src/vmrun.rs
@ -96,32 +96,16 @@ where
                    SRS16 => self.binary_op(|l: i16, r| i16::wrapping_shl(l, r as u32)),
                    SRS32 => self.binary_op(|l: i32, r| i32::wrapping_shl(l, r as u32)),
                    SRS64 => self.binary_op(|l: i64, r| i64::wrapping_shl(l, r as u32)),
-                    CMP => handler!(self, |OpsRRR(tg, a0, a1)| {
+                    CMPU => handler!(self, |OpsRRR(tg, a0, a1)| self.cmp(
-                        // Compare a0 <=> a1
+                        tg,
-                        // < →  0
+                        a0,
-                        // > →  1
+                        self.read_reg(a1).cast::<u64>()
-                        // = →  2
+                    )),
-
+                    CMPS => handler!(self, |OpsRRR(tg, a0, a1)| self.cmp(
-                        self.write_reg(
+                        tg,
-                            tg,
+                        a0,
-                            self.read_reg(a0)
+                        self.read_reg(a1).cast::<i64>()
-                                .cast::<i64>()
+                    )),
                                .cmp(&self.read_reg(a1).cast::<i64>())
                                as i64
                                + 1,
                        );
                    }),
                    CMPU => handler!(self, |OpsRRR(tg, a0, a1)| {
                        // Unsigned comparsion
                        self.write_reg(
                            tg,
                            self.read_reg(a0)
                                .cast::<u64>()
                                .cmp(&self.read_reg(a1).cast::<u64>())
                                as i64
                                + 1,
                        );
                    }),
                    DIRU8 => self.dir::<u8>(),
                    DIRU16 => self.dir::<u16>(),
                    DIRU32 => self.dir::<u32>(),
@ -170,21 +154,9 @@ where
                    SRSI16 => self.binary_op_ims::<i16>(ops::Shr::shr),
                    SRSI32 => self.binary_op_ims::<i32>(ops::Shr::shr),
                    SRSI64 => self.binary_op_ims::<i64>(ops::Shr::shr),
-                    CMPI => handler!(self, |OpsRRD(tg, a0, imm)| {
+                    CMPUI => handler!(self, |OpsRRD(tg, a0, imm)| { self.cmp(tg, a0, imm) }),
-                        self.write_reg(
+                    CMPSI => handler!(self, |OpsRRD(tg, a0, imm)| { self.cmp(tg, a0, imm as i64) }),
-                            tg,
+                    CP => handler!(self, |OpsRR(tg, a0)| self.write_reg(tg, self.read_reg(a0))),
                            self.read_reg(a0)
                                .cast::<i64>()
                                .cmp(&Value::from(imm).cast::<i64>())
                                as i64,
                        );
                    }),
                    CMPUI => handler!(self, |OpsRRD(tg, a0, imm)| {
                        self.write_reg(tg, self.read_reg(a0).cast::<u64>().cmp(&imm) as i64);
                    }),
                    CP => handler!(self, |OpsRR(tg, a0)| {
                        self.write_reg(tg, self.read_reg(a0));
                    }),
                    SWA => handler!(self, |OpsRR(r0, r1)| {
                        // Swap registers
                        match (r0, r1) {
@ -202,28 +174,30 @@ where
                    LI16 => handler!(self, |OpsRH(tg, imm)| self.write_reg(tg, imm)),
                    LI32 => handler!(self, |OpsRW(tg, imm)| self.write_reg(tg, imm)),
                    LI64 => handler!(self, |OpsRD(tg, imm)| self.write_reg(tg, imm)),
-                    LRA => handler!(self, |OpsRRO(tg, reg, off)| {
+                    LRA => handler!(self, |OpsRRO(tg, reg, off)| self.write_reg(
-                        self.write_reg(
+                        tg,
-                            tg,
+                        self.pcrel(off, 3)
-                            self.pcrel(off, 3)
+                            .wrapping_add(self.read_reg(reg).cast::<i64>())
-                                .wrapping_add(self.read_reg(reg).cast::<i64>())
+                            .get(),
-                                .get(),
+                    )),
-                        );
+                    // Load. If loading more than register size, continue on adjecent registers
-                    }),
+                    LD => handler!(self, |OpsRRAH(dst, base, off, count)| self
-                    LD => handler!(self, |OpsRRAH(dst, base, off, count)| {
+                        .load(dst, base, off, count)?),
-                        // Load. If loading more than register size, continue on adjecent registers
+                    // Store. Same rules apply as to LD
-                        self.load(dst, base, off, count)?;
+                    ST => handler!(self, |OpsRRAH(dst, base, off, count)| self
-                    }),
+                        .store(dst, base, off, count)?),
-                    ST => handler!(self, |OpsRRAH(dst, base, off, count)| {
+                    LDR => handler!(self, |OpsRROH(dst, base, off, count)| self.load(
-                        // Store. Same rules apply as to LD
+                        dst,
-                        self.store(dst, base, off, count)?;
+                        base,
-                    }),
+                        self.pcrel(off, 3).get(),
-                    LDR => handler!(self, |OpsRROH(dst, base, off, count)| {
+                        count
-                        self.load(dst, base, self.pcrel(off, 3).get(), count)?;
+                    )?),
-                    }),
+                    STR => handler!(self, |OpsRROH(dst, base, off, count)| self.store(
-                    STR => handler!(self, |OpsRROH(dst, base, off, count)| {
+                        dst,
-                        self.store(dst, base, self.pcrel(off, 3).get(), count)?;
+                        base,
-                    }),
+                        self.pcrel(off, 3).get(),
                        count
                    )?),
                    BMC => {
                        // Block memory copy
                        match if let Some(copier) = &mut self.copier {
@ -295,13 +269,12 @@ where
                    }
                    // Conditional jumps, jump only to immediates
                    JEQ => self.cond_jmp::<u64>(Ordering::Equal),
-                    JNE => handler!(self, |OpsRRP(a0, a1, ja)| {
+                    JNE => {
                        let OpsRRP(a0, a1, ja) = self.decode();
                        if self.read_reg(a0).cast::<u64>() != self.read_reg(a1).cast::<u64>() {
-                            self.pc = Address::new(
+                            self.pc = self.pcrel(ja, 3);
                                ((self.pc.get() as i64).wrapping_add(ja as i64)) as u64,
                            )
                        }
-                    }),
+                    }
                    JLT => self.cond_jmp::<u64>(Ordering::Less),
                    JGT => self.cond_jmp::<u64>(Ordering::Greater),
                    JLTU => self.cond_jmp::<i64>(Ordering::Less),
@ -329,67 +302,60 @@ where
                    FDIV64 => self.binary_op::<f64>(ops::Div::div),
                    FMA32 => self.fma::<f32>(),
                    FMA64 => self.fma::<f64>(),
-                    FINV32 => handler!(self, |OpsRR(tg, reg)| {
+                    FINV32 => handler!(self, |OpsRR(tg, reg)| self
-                        self.write_reg(tg, 1. / self.read_reg(reg).cast::<f32>())
+                        .write_reg(tg, 1. / self.read_reg(reg).cast::<f32>())),
-                    }),
+                    FINV64 => handler!(self, |OpsRR(tg, reg)| self
-                    FINV64 => handler!(self, |OpsRR(tg, reg)| {
+                        .write_reg(tg, 1. / self.read_reg(reg).cast::<f64>())),
                        self.write_reg(tg, 1. / self.read_reg(reg).cast::<f64>())
                    }),
                    FCMPLT32 => self.fcmp::<f32>(Ordering::Less),
                    FCMPLT64 => self.fcmp::<f64>(Ordering::Less),
                    FCMPGT32 => self.fcmp::<f32>(Ordering::Greater),
                    FCMPGT64 => self.fcmp::<f64>(Ordering::Greater),
-                    ITF32 => handler!(self, |OpsRR(tg, reg)| {
+                    ITF32 => handler!(self, |OpsRR(tg, reg)| self
-                        self.write_reg(tg, self.read_reg(reg).cast::<i64>() as f32);
+                        .write_reg(tg, self.read_reg(reg).cast::<i64>() as f32)),
-                    }),
+                    ITF64 => handler!(self, |OpsRR(tg, reg)| self
-                    ITF64 => handler!(self, |OpsRR(tg, reg)| {
+                        .write_reg(tg, self.read_reg(reg).cast::<i64>() as f64)),
-                        self.write_reg(tg, self.read_reg(reg).cast::<i64>() as f64);
+                    FTI32 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg(
-                    }),
+                        tg,
-                    FTI32 => handler!(self, |OpsRRB(tg, reg, mode)| {
+                        crate::float::f32toint(
-                        self.write_reg(
+                            self.read_reg(reg).cast::<f32>(),
-                            tg,
+                            RoundingMode::try_from(mode)
-                            crate::float::f32toint(
+                                .map_err(|()| VmRunError::InvalidOperand)?,
-                                self.read_reg(reg).cast::<f32>(),
+                        ),
-                                RoundingMode::try_from(mode)
+                    )),
-                                    .map_err(|()| VmRunError::InvalidOperand)?,
+                    FTI64 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg(
-                            ),
+                        tg,
-                        );
+                        crate::float::f64toint(
-                    }),
+                            self.read_reg(reg).cast::<f64>(),
-                    FTI64 => handler!(self, |OpsRRB(tg, reg, mode)| {
+                            RoundingMode::try_from(mode)
-                        self.write_reg(
+                                .map_err(|()| VmRunError::InvalidOperand)?,
-                            tg,
+                        ),
-                            crate::float::f64toint(
+                    )),
-                                self.read_reg(reg).cast::<f64>(),
+                    FC32T64 => handler!(self, |OpsRR(tg, reg)| self
-                                RoundingMode::try_from(mode)
+                        .write_reg(tg, self.read_reg(reg).cast::<f32>() as f64)),
-                                    .map_err(|()| VmRunError::InvalidOperand)?,
+                    FC64T32 => handler!(self, |OpsRRB(tg, reg, mode)| self.write_reg(
-                            ),
+                        tg,
-                        );
+                        crate::float::conv64to32(
-                    }),
+                            self.read_reg(reg).cast(),
-                    FC32T64 => handler!(self, |OpsRR(tg, reg)| {
+                            RoundingMode::try_from(mode)
-                        self.write_reg(tg, self.read_reg(reg).cast::<f32>() as f64);
+                                .map_err(|()| VmRunError::InvalidOperand)?,
-                    }),
+                        ),
-                    FC64T32 => handler!(self, |OpsRRB(tg, reg, mode)| {
+                    )),
-                        self.write_reg(
+                    LRA16 => handler!(self, |OpsRRP(tg, reg, imm)| self.write_reg(
-                            tg,
+                        tg,
-                            crate::float::conv64to32(
+                        (self.pc + self.read_reg(reg).cast::<u64>() + imm + 3_u16).get(),
-                                self.read_reg(reg).cast(),
+                    )),
-                                RoundingMode::try_from(mode)
+                    LDR16 => handler!(self, |OpsRRPH(dst, base, off, count)| self.load(
-                                    .map_err(|()| VmRunError::InvalidOperand)?,
+                        dst,
-                            ),
+                        base,
-                        )
+                        self.pcrel(off, 3).get(),
-                    }),
+                        count
-                    LRA16 => handler!(self, |OpsRRP(tg, reg, imm)| {
+                    )?),
-                        self.write_reg(
+                    STR16 => handler!(self, |OpsRRPH(dst, base, off, count)| self.store(
-                            tg,
+                        dst,
-                            (self.pc + self.read_reg(reg).cast::<u64>() + imm + 3_u16).get(),
+                        base,
-                        );
+                        self.pcrel(off, 3).get(),
-                    }),
+                        count
-                    LDR16 => handler!(self, |OpsRRPH(dst, base, off, count)| {
+                    )?),
                        self.load(dst, base, self.pcrel(off, 3).get(), count)?;
                    }),
                    STR16 => handler!(self, |OpsRRPH(dst, base, off, count)| {
                        self.store(dst, base, self.pcrel(off, 3).get(), count)?;
                    }),
                    JMP16 => {
                        let OpsP(off) = self.decode();
                        self.pc = self.pcrel(off, 1);
@ -464,6 +430,12 @@ where
        Ok(())
    }
    /// Three-way comparsion
    #[inline(always)]
    unsafe fn cmp<T: ValueVariant + Ord>(&mut self, to: u8, reg: u8, val: T) {
        self.write_reg(to, self.read_reg(reg).cast::<T>().cmp(&val) as i64);
    }
    /// Perform binary operating over two registers
    #[inline(always)]
    unsafe fn binary_op<T: ValueVariant>(&mut self, op: impl Fn(T, T) -> T) {
--- a/spec.md
+++ b/spec.md
@ -1,332 +1,440 @@
 # HoleyBytes ISA Specification
 # Bytecode format
- Holey Bytes program should start with following magic: `[0xAB, 0x1E, 0x0B]`
+- Image format is not specified, though ELF is recommended
 - All numbers are encoded little-endian
 - There is 256 registers, they are represented by a byte
- Immediate values are 64 bit
+- Immediate values are 8, 16, 32 or 64 bit
 - Program is by spec required to be terminated with 12 zero bytes
-### Instruction encoding
+## Instruction encoding
- Instruction parameters are packed (no alignment)
+- Instruction operands are packed (no alignment)
- [opcode, …parameters…]
+- [opcode, operand 0, operand 1, …]
-### Instruction parameter types
+## Instruction parameter types
- B = Byte
+- `R`: Register (8 bits)
- D = Doubleword (64 bits)
+- Relative program-counter offset immediates:
- H = Halfword (16 bits)
+    - `O`: 32 bit (Si32)
    - `P`: 16 bit (Si16)
 - Immediates:
    - `B`: Byte, 8 bit (Xi8)
    - `H`: Half-word, 16 bit (Xi16)
    - `W`: Word, 32 bit (Xi32)
    - `D`: Double-word, 64 bit (Xi64)
 - `A`: Absolute address immediate, 64 bit (Ui64)
-| Name | Size    |
+## Types
-|:----:|:--------|
+- Si*n*: Signed integer of size *n* bits (Si8, Si16, Si32, Si64)
-| BBBB | 32 bits |
+- Ui*n*: Unsigned integer of size *n* bits (Ui8, Ui16, Ui32, Ui64)
-| BBB  | 24 bits |
+- Xi*n*: Sign-agnostic integer of size *n* bits (Xi8, Xi16, Xi32, Xi64)
-| BBDH | 96 bits |
+- Fl*n*: Floating point number of size *n* bits (Fl32, Fl64)
-| BBD  | 80 bits |
+
-| BBW  | 48 bits |
+# Behaviours
-|  BB  | 16 bits |
+- Integer operations are always wrapping, including signed numbers
-|  BD  | 72 bits |
+- Two's complement
-|  D   | 64 bits |
+- Floats as specified by IEEE 754
-|  N   | 0  bits |
+
 ## Relative addressing
 Relative addresses are computed from address of the first byte
 of offset in the code. Not from the beginning of current or following instruction.
 ## Zero register
 - Register 0
 - Cannot be clobbered
    - Write is no-op
 - Load always yields 0
 ## Rounding modes
 | Rounding mode            | Value |
 |:-------------------------|:------|
 | To nearest, ties to even | 0b00  |
 | Towards 0 (truncate)     | 0b01  |
 | Towards +∞ (up)          | 0b10  |
 | Towards -∞ (down)        | 0b11  |
 # Instructions
 - `#n`: register in parameter *n*
- `imm #n`: for immediate in parameter *n*
+- `$n`: for immediate in parameter *n*
- `P ← V`: Set register P to value V
+- `#P ← V`: Set register P to value V
 - `[x]`: Address x
 - `XY`: X bytes from location Y
 - `pc`: Program counter
 - `<XYZ>`: Placeholder
 - `Type(X)`: Cast
 ## Program execution control
- N type
+- Type `N`
-| Opcode | Name |            Action             |
+| Opcode | Mnemonic | Action                                      |
-|:------:|:----:|:-----------------------------:|
+|:-------|:---------|:--------------------------------------------|
-|   0    |  UN  | Trigger unreachable code trap |
+| 0x00   | UN       | Throw unreachable code exception            |
-|   1    |  TX  |      Terminate execution      |
+| 0x01   | TX       | Terminate execution (eg. on end of program) |
-|   2    | NOP  |          Do nothing           |
+| 0x02   | NOP      | Do nothing                                  |
-## Integer binary ops.
+## Binary register-immediate ops
- BBB type
+- Type `RR<IMM>`
- `#0 ← #1 <op> #2`
+- Action: `#0 ← #1 <OP> #2`
-| Opcode | Name |         Action          |
+## Addition (`+`)
-|:------:|:----:|:-----------------------:|
+| Opcode | Mnemonic | Type |
-|   3    | ADD  |    Wrapping addition    |
+|:-------|:---------|:-----|
-|   4    | SUB  |  Wrapping subtraction   |
+| 0x03   | ADD8     | Xi8  |
-|   5    | MUL  | Wrapping multiplication |
+| 0x04   | ADD16    | Xi16 |
-|   6    | AND  |         Bitand          |
+| 0x05   | ADD32    | Xi32 |
-|   7    |  OR  |          Bitor          |
+| 0x06   | ADD64    | Xi64 |
 |   8    | XOR  |         Bitxor          |
 |   9    |  SL  | Unsigned left bitshift  |
 |   10   |  SR  | Unsigned right bitshift |
 |   11   | SRS  |  Signed right bitshift  |
-### Comparsion
+## Subtraction (`-`)
-| Opcode | Name |       Action        |
+| Opcode | Mnemonic | Type |
-|:------:|:----:|:-------------------:|
+|:-------|:---------|:-----|
-|   12   | CMP  |  Signed comparsion  |
+| 0x07   | SUB8     | Xi8  |
-|   13   | CMPU | Unsigned comparsion |
+| 0x08   | SUB16    | Xi16 |
 | 0x09   | SUB32    | Xi32 |
 | 0x0A   | SUB64    | Xi64 |
-#### Comparsion table
+## Multiplication (`*`)
-| #1 *op* #2 | Result |
+| Opcode | Mnemonic | Type |
-|:----------:|:------:|
+|:-------|:---------|:-----|
-|     <      |   0    |
+| 0x0B   | MUL8     | Xi8  |
-|     =      |   1    |
+| 0x0C   | MUL16    | Xi16 |
-|     >      |   2    |
+| 0x0D   | MUL32    | Xi32 |
 | 0x0E   | MUL64    | Xi64 |
-### Division-remainder
+## Bitwise ops (type: Xi64)
- Type BBBB
+| Opcode | Mnemonic | Operation           |
- In case of `#3` is zero, the resulting value is all-ones
+|:-------|:---------|:--------------------|
- `#0 ← #2 ÷ #3`
+| 0x0F   | AND      | Conjunction (&)     |
- `#1 ← #2 % #3`
+| 0x10   | OR       | Disjunction (\|)    |
 | 0x11   | XOR      | Non-equivalence (^) |
-| Opcode | Name |             Action              |
+## Unsigned left bitshift (`<<`)
-|:------:|:----:|:-------------------------------:|
+| Opcode | Mnemonic | Type |
-|   14   | DIR  | Divide and remainder combinated |
+|:-------|:---------|:-----|
 | 0x12   | SLU8     | Ui8  |
 | 0x13   | SLU16    | Ui16 |
 | 0x14   | SLU32    | Ui32 |
 | 0x15   | SLU64    | Ui64 |
-### Negations
+## Unsigned right bitshift (`>>`)
- Type BB
+| Opcode | Mnemonic | Type |
- `#0 ← #1 <op> #2`
+|:-------|:---------|:-----|
 | 0x16   | SRU8     | Ui8  |
 | 0x17   | SRU16    | Ui16 |
 | 0x18   | SRU32    | Ui32 |
 | 0x19   | SRU64    | Ui64 |
-| Opcode | Name |      Action      |
+## Signed right bitshift (`>>`)
-|:------:|:----:|:----------------:|
+| Opcode | Mnemonic | Type |
-|   15   | NEG  |   Bit negation   |
+|:-------|:---------|:-----|
-|   16   | NOT  | Logical negation |
+| 0x1A   | SRS8     | Si8  |
 | 0x1B   | SRS16    | Si16 |
 | 0x1C   | SRS32    | Si32 |
 | 0x1D   | SRS64    | Si64 |
-## Integer immediate binary ops.
+## Comparsion
- Type BBD
+- Compares two numbers, saves result to register
- `#0 ← #1 <op> imm #2`
+- Operation: `#0 ← #1 <=> #2`
-| Opcode | Name |        Action        |
+| Ordering | Number |
-|:------:|:----:|:--------------------:|
+|:---------|:-------|
-|   17   | ADDI |  Wrapping addition   |
+| <        | -1     |
-|   18   | MULI | Wrapping subtraction |
+| =        | 0      |
-|   19   | ANDI |        Bitand        |
+| >        | 1      |
 |   20   | ORI  |        Bitor         |
 |   21   | XORI |        Bitxor        |
-### Bitshifts
+| Opcode | Mnemonic | Type |
- Type BBW
+|:-------|:---------|:-----|
-| Opcode | Name |         Action          |
+| 0x1E   | CMPU     | Ui64 |
-|:------:|:----:|:-----------------------:|
+| 0x1F   | CMPS     | Si64 |
 |   22   | SLI  | Unsigned left bitshift  |
 |   23   | SRI  | Unsigned right bitshift |
 |   24   | SRSI |  Signed right bitshift  |
-### Comparsion
+# Merged divide-remainder
- Comparsion is the same as when RRR type
+- Type `RRRR`
 - Operation:
    - `#0 ← #2 / #3`
    - `#1 ← #2 % #3`
-| Opcode | Name  |       Action        |
+- If dividing by zero:
-|:------:|:-----:|:-------------------:|
+    - `#0 ← Ui64(-1)`
-|   25   | CMPI  |  Signed comparsion  |
+    - `#1 ← #2`
 |   26   | CMPUI | Unsigned comparsion |
-## Register value set / copy
+| Opcode | Mnemonic | Type |
 |:-------|:---------|:-----|
 | 0x20   | DIRU8    | Ui8  |
 | 0x21   | DIRU16   | Ui16 |
 | 0x22   | DIRU32   | Ui32 |
 | 0x23   | DIRU64   | Ui64 |
 | 0x24   | DIRS8    | Si8  |
 | 0x25   | DIRS16   | Si16 |
 | 0x26   | DIRS32   | Si32 |
 | 0x27   | DIRS64   | Si64 |
-### Copy
+# Unary register operations (type: Xi64)
- Type BB
+- Type: `RR`
- `#0 ← #1`
+- Operation: `#0 ← <OP> #1`
-| Opcode | Name | Action |
+| Opcode | Mnemonic | Operation                |
-|:------:|:----:|:------:|
+|:-------|:---------|:-------------------------|
-|   27   |  CP  |  Copy  |
+| 0x28   | NEG      | Bitwise complement (`~`) |
 | 0x29   | NOT      | Logical negation (`!`)   |
-### Swap
+## Sign extensions
- Type BB
+- Operation: `#0 ← Si64(#1)`
 - Swap #0 and #1
 - Zero register rules:
    - Both: no-op
    - One: Copy zero to the non-zero register
-| Opcode | Name | Action |
+| Opcode | Mnemonic | Source type |
-|:------:|:----:|:------:|
+|:-------|:---------|:------------|
-|   28   | SWA  |  Swap  |
+| 0x2A   | SXT8     | Si8         |
 | 0x2B   | SXT16    | Si16        |
 | 0x2C   | SXT32    | Si32        |
-### Load immediate
+# Binary register-immediate operations
- Type BD
+- Type: `RR<IMM>`
- `#0 ← #1`
+- Operation: `#0 ← #1 <OP> $2`
-| Opcode | Name |     Action     |
+## Addition (`+`)
-|:------:|:----:|:--------------:|
+| Opcode | Mnemonic | Type |
-|   29   |  LI  | Load immediate |
+|:-------|:---------|:-----|
 | 0x2D   | ADDI8    | Xi8  |
 | 0x2E   | ADDI16   | Xi16 |
 | 0x2F   | ADDI32   | Xi32 |
 | 0x30   | ADDI64   | Xi64 |
-### Load relative address
+## Multiplication (`*`)
- Type BBW
+| Opcode | Mnemonic | Type |
-| Opcode | Name |         Action          |
+|:-------|:---------|:-----|
-|:------:|:----:|:-----------------------:|
+| 0x31   | MULI8    | Xi8  |
-|   30   | LRA  | `#0 ← #1 + imm #2 + PC` |
+| 0x32   | MULI16   | Xi16 |
 | 0x33   | MULI32   | Xi32 |
 | 0x34   | MULI64   | Xi64 |
-## Memory operations
+## Bitwise ops (type: Xi64)
- Type BBDH
+| Opcode | Mnemonic | Operation           |
- If loaded/store value exceeds one register size, continue accessing following registers
+|:-------|:---------|:--------------------|
 | 0x35   | ANDI     | Conjunction (&)     |
 | 0x36   | ORI      | Disjunction (\|)    |
 | 0x37   | XORI     | Non-equivalence (^) |
-### Load / Store
+## Unsigned left bitshift (`<<`)
-| Opcode | Name |                 Action                  |
+| Opcode | Mnemonic | Type |
-|:------:|:----:|:---------------------------------------:|
+|:-------|:---------|:-----|
-|   31   |  LD  | `#0 ← [#1 + imm #2], copy imm #3 bytes` |
+| 0x38   | SLUI8    | Ui8  |
-|   32   |  ST  | `[#1 + imm #2] ← #0, copy imm #3 bytes` |
+| 0x39   | SLUI16   | Ui16 |
 | 0x3A   | SLUI32   | Ui32 |
 | 0x3B   | SLUI64   | Ui64 |
-### PC relative Load / Store
+## Unsigned right bitshift (`>>`)
- Type BBDW
+| Opcode | Mnemonic | Type |
-| Opcode | Name |                    Action                    |
+|:-------|:---------|:-----|
-|:------:|:----:|:--------------------------------------------:|
+| 0x3C   | SRUI8    | Ui8  |
-|   33   | LDR  | `#0 ← [#1 + imm #2 + PC], copy imm #3 bytes` |
+| 0x3D   | SRUI16   | Ui16 |
-|   34   | STR  | `[#1 + imm #2 + PC] ← #0, copy imm #3 bytes` |
+| 0x3E   | SRUI32   | Ui32 |
 | 0x3F   | SRUI64   | Ui64 |
-## Block copy
+## Signed right bitshift (`>>`)
- Block copy source and target can overlap
+| Opcode | Mnemonic | Type |
 |:-------|:---------|:-----|
 | 0x40   | SRSI8    | Si8  |
 | 0x41   | SRSI16   | Si16 |
 | 0x42   | SRSI32   | Si32 |
 | 0x43   | SRSI64   | Si64 |
-### Memory copy
+## Comparsion
- Type BBD
+- Compares two numbers, saves result to register
 - Operation: `#0 ← #1 <=> $2`
 - Comparsion table same for register-register one
-| Opcode | Name |              Action              |
+| Opcode | Mnemonic | Type |
-|:------:|:----:|:--------------------------------:|
+|:-------|:---------|:-----|
-|   35   | BMC  | `[#1] ← [#0], copy imm #2 bytes` |
+| 0x44   | CMPUI    | Ui64 |
 | 0x45   | CMPSI    | Si64 |
-### Register copy
+# Register copies
- Type BBB
+- Type: `RR`
 - Copy a block a register to another location (again, overflowing to following registers)
-| Opcode | Name |              Action              |
+| Opcode | Mnemonic | Operation                        |
-|:------:|:----:|:--------------------------------:|
+|:-------|:---------|:---------------------------------|
-|   36   | BRC  | `#1 ← #0, copy imm #2 registers` |
+| 0x46   | CP       | Copy register value (`#0 ← #1`)  |
 | 0x47   | SWA      | Swap register values (`#0 ⇆ #1`) |
-## Control flow
+# Load immediate
 - Load immediate value from code to register
 - Type: `R<IMM>`
 - Operation: `#0 ← $1`
-### Unconditional jump
+| Opcode | Mnemonic | Type |
- Type D
+|:-------|:---------|:-----|
-| Opcode | Name |                   Action                    |
+| 0x48   | LI8      | Xi8  |
-|:------:|:----:|:-------------------------------------------:|
+| 0x49   | LI16     | Xi16 |
-|   37   | JMPR | Jump at address relative to program counter |
+| 0x4A   | Li32     | Xi32 |
 | 0x4B   | Li64     | Xi64 |
-### Unconditional linking jump
+# Load relative address
- Type BBD
+- Compute value from program counter, register value and offset
 - Type: `RRO`
 - Operation: `#0 ← pc + #1 + $2`
-| Opcode | Name |                         Action                          |
+| Opcode | Mnemonic |
-|:------:|:----:|:-------------------------------------------------------:|
+|:-------|:---------|
-|   38   | JAL  |   Save PC past JAL to `#0` and jump at `#1 + imm #2`    |
+| 0x4C   | LRA      |
 |   39   | JALR | Save PC past JAL to `#0` and jump at `#1 + imm #2 + PC` |
-### Conditional jumps
+# Memory access operations
- Type BBH
+- Immediate `$3` specifies size
- Jump at `PC + imm #2` if `#0 <op> #1`
+- If size is greater than register size,
    it overflows to adjecent register
    (eg. copying 16 bytes to register `r1` copies first 8 bytes to it
         and the remaining to `r2`)
-| Opcode | Name |  Comparsion  |
+## Absolute addressing
-|:------:|:----:|:------------:|
+- Type: `RRAH`
-|   40   | JEQ  |      =       |
+- Computes address from base register and absolute offset
 |   41   | JNE  |      ≠       |
 |   42   | JLT  |  < (signed)  |
 |   43   | JGT  |  > (signed)  |
 |   44   | JLTU | < (unsigned) |
 |   45   | JGTU | > (unsigned) |
-### Environment call
+| Opcode | Mnemonic | Operation          |
- Type N
+|:-------|:---------|:-------------------|
 | 0x4D   | LD       | `#0 ← $3[#1 + $2]` |
 | 0x4E   | ST       | `$3[#1 + $2] ← #0` |
-| Opcode | Name |                Action                 |
+## Relative addressing
-|:------:|:----:|:-------------------------------------:|
+- Type: `RROH`
-|   46   | ECA  | Cause an trap to the host environment |
+- Computes address from register and offset from program counter
 |   47   | EBP  | Cause breakproint trap to environment |
-## Floating point operations
+| Opcode | Mnemonic | Operation               |
- Type BBB
+|:-------|:---------|:------------------------|
- `#0 ← #1 <op> #2`
+| 0x4F   | LDR      | `#0 ← $3[pc + #1 + $2]` |
 | 0x50   | STR      | `$3[pc + #1 + $2] ← #0` |
-| Opcode | Name |     Action     |
+# Block memory copy
-|:------:|:----:|:--------------:|
+- Type: `RRH`
-|   48   | ADDF |    Addition    |
+- Copies block of `$3` bytes from memory location on address on `#0` to `#1`
 |   49   | SUBF |  Subtraction   |
 |   50   | MULF | Multiplication |
-### Division-remainder
+| Opcode | Mnemonic | Operation         |
- Type BBBB
+|:-------|:---------|:------------------|
 | 0x51   | BMC      | `$3[#1] ← $3[x0]` |
-| Opcode | Name |          Action           |
+# Block register copy
-|:------:|:----:|:-------------------------:|
+- Type: `RRB`
-|   51   | DIRF | Same as for integer `DIR` |
+- Copy block of `$3` registers starting with `#0` to `#1`
 - Copying over the 256 registers causes an exception
-### Fused Multiply-Add
+| Opcode | Mnemonic | Operation     |
- Type BBBB
+|:-------|:---------|:--------------|
 | 0x52   | BRC      | `$3#1 ← $3#0` |
-| Opcode | Name |        Action         |
+# Relative jump
-|:------:|:----:|:---------------------:|
+- Type: `O`
 |   52   | FMAF | `#0 ← (#1 * #2) + #3` |
-### Negation
+| Opcode | Mnemonic | Operation      |
- Type BB
+|:-------|:---------|:---------------|
-| Opcode | Name |   Action   |
+| 0x53   | JMP      | `pc ← pc + $0` |
 |:------:|:----:|:----------:|
 |   53   | NEGF | `#0 ← -#1` |
-### Conversion
+# Linking jump
- Type BB
+- Operation:
- Signed
+    - Save address of following instruction to `#0`
- `#0 ← #1 as _`
+        - `#0 ← pc+<instruction size>`
    - Jump to specified address 
-| Opcode | Name |    Action    |
+| Opcode | Mnemonic | Instruction type  | Address                  |
-|:------:|:----:|:------------:|
+|:-------|:---------|:------------------|:-------------------------|
-|   54   | ITF  | Int to Float |
+| 0x54   | JAL      | RRO (size = 6 B)  | Relative, `pc + #1 + $2` |
-|   55   | FTI  | Float to Int |
+| 0x55   | JALA     | RRA (size = 10 B) | Absolute, `#1 + $2`      |
-## Floating point immediate operations
+# Conditional jump
- Type BBD
+- Perform comparsion, if operation met, jump to relative address
- `#0 ← #1 <op> imm #2`
+- Type: `RRP`
 - Operation: `if #0 <CMP> #1 { pc ← pc + $2 }`
-| Opcode | Name  |     Action     |
+| Opcode | Mnemonic | Condition          | Type |
-|:------:|:-----:|:--------------:|
+|:-------|:---------|:-------------------|:-----|
-|   56   | ADDFI |    Addition    |
+| 0x56   | JEQ      | Equals (`=`)       | Xi64 |
-|   57   | MULFI | Multiplication |
+| 0x57   | JNE      | Not-equals (`≠`)   | Xi64 |
 | 0x58   | JLTU     | Less-than (`<`)    | Ui64 |
 | 0x59   | JGTU     | Greater-than (`>`) | Ui64 |
 | 0x5A   | JLTS     | Less-than (`<`)    | Si64 |
 | 0x5B   | JGTS     | Greater-than (`>`) | Si64 |
-# Registers
+# Environment traps
- There is 255 registers + one zero register (with index 0)
+- Traps to the environment
- Reading from zero register yields zero
+- Type: `N`
 - Writing to zero register is a no-op
-# Memory
+| Opcode | Mnemonic | Trap type        |
- Addresses are 64 bit
+|:-------|:---------|:-----------------|
- Program should be in the same address space as all other data
+| 0x5C   | ECA      | Environment call |
- Memory implementation is arbitrary
+| 0x5D   | EBP      | Breakpoint       |
    - Address `0x0` may or may not be valid. Count with compilers
      considering it invalid!
 - In case of accessing invalid address:
    - Program shall trap (LoadAccessEx, StoreAccessEx) with parameter of accessed address
    - Value of register when trapped is undefined
-## Recommendations
+# Floating point binary operations
- If paging used:
+- Type: `RRR`
-    - Leave first page invalid
+- Operation: `#0 ← #1 <OP> #2`
    - Pages should be at least 4 KiB
-# Program execution
+| Opcode | Mnemonic | Operation            | Type |
- The way of program execution is implementation defined
+|:-------|:---------|:---------------------|:-----|
- The execution is arbitrary, as long all effects are obervable
+| 0x5E   | FADD32   | Addition (`+`)       | Fl32 |
-    in the way as program was executed literally, in order.
+| 0x5F   | FADD64   | Addition (`+`)       | Fl64 |
 | 0x60   | FSUB32   | Subtraction (`-`)    | Fl32 |
 | 0x61   | FSUB64   | Subtraction (`-`)    | Fl64 |
 | 0x62   | FMUL32   | Multiplication (`*`) | Fl32 |
 | 0x63   | FMUL64   | Multiplication (`*`) | Fl64 |
 | 0x64   | FDIV32   | Division (`/`)       | Fl32 |
 | 0x65   | FDIV64   | Division (`/`)       | Fl64 |
-# Program validation
+# Fused multiply-add
- Invalid program should cause runtime error:
+- Type: `RRRR`
-    - The form of error is arbitrary. Can be a trap or an interpreter-specified error
+- Operation: `#0 ← (#1 * #2) + #3`
    - It shall not be handleable from within the program
 - Executing invalid opcode should trap
 - Program can be validaded either before execution or when executing
-# Traps
+| Opcode | Mnemonic | Type |
-Program should at least implement these traps:
+|:-------|:---------|:-----|
- Environment call
+| 0x66   | FMA32    | Fl32 |
- Invalid instruction exception
+| 0x67   | FMA64    | Fl64 |
 - Load address exception
 - Store address exception
 - Unreachable instruction
-and executing environment should be able to get information about them,
+# Comparsions
-like the opcode of invalid instruction or attempted address to load/store.
+- Type: `RRR`
-Details about these are left as an implementation detail.
+- Operation: `#0 ← #1 <=> #2`
 - Comparsion table same as for `CMPx`/`CMPxI`
 - NaN is less-than/greater-than depends on variant
-# Assembly
+| Opcode | Mnemonic | Type | NaN is |
-HoleyBytes assembly format is not defined, this is just a weak description
+|:-------|:---------|:-----|:-------|
-of `hbasm` syntax.
+| 0x6A   | FCMPLT32 | Fl32 | <      |
 | 0x6B   | FCMPLT64 | Fl64 | <      |
 | 0x6C   | FCMPGT32 | Fl32 | >      |
 | 0x6D   | FCMPGT64 | Fl64 | >      |
- Opcode names correspond to specified opcode names, lowercase (`nop`)
+# Int to float
- Parameters are separated by comma (`addi r0, r0, 1`)
+- Type: `RR`
- Instructions are separated by either line feed or semicolon
+- Converts from `Si64`
- Registers are represented by `r` followed by the number (`r10`)
+- Operation: `#0 ← Fl<SIZE>(#1)`
- Labels are defined by label name followed with colon (`loop:`)
+
- Labels are references simply by their name (`print`)
+| Opcode | Mnemonic | Type |
- Immediates are entered plainly. Negative numbers supported.
+|:-------|:---------|:-----|
 | 0x6E   | ITF32    | Fl32 |
 | 0x6F   | ITF64    | Fl64 |
 # Float to int
 - Type: `RRB`
 - Operation: `#0 ← Si64(#1)`
 - Immediate `$2` specifies rounding mode
 | Opcode | Mnemonic | Type |
 |:-------|:---------|:-----|
 | 0x70   | FTI32    | Fl32 |
 | 0x71   | FTI64    | Fl64 |
 # Fl32 to Fl64
 - Type: `RR`
 - Operation: `#0 ← Fl64(#1)`
 | Opcode | Mnemonic |
 |:-------|:---------|
 | 0x72   | FC32T64  |
 # Fl64 to Fl32
 - Type: `RRB`
 - Operation: `#0 ← Fl32(#1)`
 - Immediate `$2` specified rounding mode
 | Opcode | Mnemonic |
 |:-------|:---------|
 | 0x73   | FC64T32  |
 # 16-bit relative address instruction variants
 | Opcode | Mnemonic | Type | Variant of |
 |:-------|:---------|:-----|:-----------|
 | 0x74   | LRA16    | RRP  | LRA        |
 | 0x75   | LDR16    | RRPH | LDR        |
 | 0x76   | STR16    | RRPH | STR        |
 | 0x77   | JMP16    | P    | JMP        |