From 7b5ae34bfd3b56041667820cc18e608506ba1913 Mon Sep 17 00:00:00 2001 From: tdark Date: Sat, 15 May 2021 13:21:28 +0200 Subject: [PATCH] Implemented brainfuck interpreter --- Cargo.lock | 143 ++------------------ Cargo.toml | 3 +- src/brian.rs | 374 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 1 - 4 files changed, 374 insertions(+), 147 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e7086bf..3594179 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,29 +6,19 @@ version = 3 name = "able-script" version = "0.1.0" dependencies = [ - "brainfuck", "clap", "logos", "rand", "rustyline", ] -[[package]] -name = "aho-corasick" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" -dependencies = [ - "memchr 0.1.11", -] - [[package]] name = "ansi_term" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" dependencies = [ - "winapi 0.3.9", + "winapi", ] [[package]] @@ -39,7 +29,7 @@ checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", "libc", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -54,16 +44,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" -[[package]] -name = "brainfuck" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f8c57b0ea922157799312b2fa42b4ea14bb5d77ddf6699e846f6c8b787b3f8" -dependencies = [ - "docopt", - "rustc-serialize", -] - [[package]] name = "cc" version = "1.0.67" @@ -85,7 +65,7 @@ dependencies = [ "ansi_term", "atty", "bitflags", - "strsim 0.8.0", + "strsim", "textwrap", "unicode-width", "vec_map", @@ -109,19 +89,7 @@ checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", "redox_users", - "winapi 0.3.9", -] - -[[package]] -name = "docopt" -version = "0.6.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9" -dependencies = [ - "lazy_static", - "regex", - "rustc-serialize", - "strsim 0.5.2", + "winapi", ] [[package]] @@ -143,7 +111,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" dependencies = [ "libc", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -166,22 +134,6 @@ dependencies = [ "libc", ] -[[package]] -name = "kernel32-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -dependencies = [ - "winapi 0.2.8", - "winapi-build", -] - -[[package]] -name = "lazy_static" -version = "0.2.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" - [[package]] name = "libc" version = "0.2.93" @@ -216,18 +168,9 @@ dependencies = [ "fnv", "proc-macro2", "quote", - "regex-syntax 0.6.23", + "regex-syntax", "syn", - "utf8-ranges 1.0.4", -] - -[[package]] -name = "memchr" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" -dependencies = [ - "libc", + "utf8-ranges", ] [[package]] @@ -350,37 +293,12 @@ dependencies = [ "redox_syscall", ] -[[package]] -name = "regex" -version = "0.1.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" -dependencies = [ - "aho-corasick", - "memchr 0.1.11", - "regex-syntax 0.3.9", - "thread_local", - "utf8-ranges 0.1.3", -] - -[[package]] -name = "regex-syntax" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" - [[package]] name = "regex-syntax" version = "0.6.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5f089152e60f62d28b835fbff2cd2e8dc0baf1ac13343bef92ab7eed84548" -[[package]] -name = "rustc-serialize" -version = "0.3.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda" - [[package]] name = "rustyline" version = "8.0.0" @@ -393,7 +311,7 @@ dependencies = [ "fs2", "libc", "log", - "memchr 2.3.4", + "memchr", "nix", "radix_trie", "scopeguard", @@ -401,7 +319,7 @@ dependencies = [ "unicode-segmentation", "unicode-width", "utf8parse", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -416,12 +334,6 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" -[[package]] -name = "strsim" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67f84c44fbb2f91db7fef94554e6b2ac05909c9c0b0bc23bb98d3a1aebfe7f7c" - [[package]] name = "strsim" version = "0.8.0" @@ -448,25 +360,6 @@ dependencies = [ "unicode-width", ] -[[package]] -name = "thread-id" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" -dependencies = [ - "kernel32-sys", - "libc", -] - -[[package]] -name = "thread_local" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" -dependencies = [ - "thread-id", -] - [[package]] name = "unicode-segmentation" version = "1.7.1" @@ -485,12 +378,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" -[[package]] -name = "utf8-ranges" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" - [[package]] name = "utf8-ranges" version = "1.0.4" @@ -515,12 +402,6 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" -[[package]] -name = "winapi" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" - [[package]] name = "winapi" version = "0.3.9" @@ -531,12 +412,6 @@ dependencies = [ "winapi-x86_64-pc-windows-gnu", ] -[[package]] -name = "winapi-build" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" - [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 132e1ca..3ed504f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,7 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap="*" +clap = "*" logos = "0.12" rand = "*" rustyline = "8.0.0" -brainfuck = "0.2.1" diff --git a/src/brian.rs b/src/brian.rs index 2b21080..dfcfada 100644 --- a/src/brian.rs +++ b/src/brian.rs @@ -1,13 +1,367 @@ +//! A brainfuck interpreter capable of executing arbitrary code, with arbitrary inputs and outputs. +//! +//! If you just want to execute some simple brainfuck, check the [`interpret_with_io`] function. +//! +//! To construct the interpreter, use the [`from_ascii`] or [`from_ascii_with_input_buffer`] methods. The latter grants access to +//! the method [`add_input`], which allows for the addition of input while the interpreter is running. +//! +//! [`from_ascii`]: Interpreter::from_ascii +//! [`from_ascii_with_input_buffer`]: Interpreter::from_ascii_with_input_buffer +//! [`add_input`]: Interpreter::add_input +//! +//! Finally, to run the interpreter, you can use the [`advance`], [`advance_until_io`], or [`interpret_with_output`] methods. +//! +//! [`advance`]: Interpreter::advance +//! [`advance_until_io`]: Interpreter::advance_until_io +//! [`interpret_with_output`]: Interpreter::interpret_with_output + +#![deny(missing_docs)] + +use std::{ + collections::VecDeque, + io::{Read, Write}, +}; + // NOTE(Able): This is the brain fuck interface -use brainfuck::program::Program; -use brainfuck::tape::ArrayTape; -use brainfuck::Interpreter; - -pub fn bff_eval(function: String, args: String) { - let mut stdout = ""; - let program = Program::parse("++>+.").unwrap(); - /* This failes currently I guess - let mut interp = Interpreter::::new(program, &mut stdin, &mut stdout); - */ +#[derive(Debug, Clone, PartialEq, Eq)] +/// A brainfuck interpreter. Read the [module level documentation](self) for more +pub struct Interpreter<'a, I> { + code: &'a [u8], + instr_ptr: usize, + tape: Vec, + data_ptr: usize, + input: I, +} + +impl<'a> Interpreter<'a, InputBuffer> { + /// Construct an `Interpreter` from an ASCII string of code with an empty input buffer + pub fn from_ascii_with_input_buffer(code: &'a [u8]) -> Self { + Self { + code, + instr_ptr: 0, + tape: Vec::new(), + data_ptr: 0, + input: InputBuffer(VecDeque::new()), + } + } + + /// Add a byte to the input buffer of this interpreter + pub fn add_input(&mut self, input: i8) { + self.input.0.push_back(input); + } +} + +impl<'a, I: BootlegRead> Interpreter<'a, I> { + /// Construct an interpreter from an ASCII string of code and an source of input bytes + pub fn from_ascii(code: &'a [u8], input: I) -> Self { + Self { + code, + instr_ptr: 0, + tape: Vec::new(), + data_ptr: 0, + input, + } + } + + /// Advance the interpreter by one instruction. + /// A return value of Ok(None) indicates succesful termination of the interpreter + pub fn advance(&mut self) -> Result, ProgramError> { + let &opcode = match self.code.get(self.instr_ptr) { + Some(opcode) => opcode, + None => return Ok(None), + }; + + match opcode { + b'>' => self.data_ptr += 1, + + b'<' => { + self.data_ptr = self + .data_ptr + .checked_sub(1) + .ok_or(ProgramError::DataPointerUnderflow)?; + } + + b'+' => { + let val = self.get_or_resize_tape_mut(); + *val = val.checked_add(1).ok_or(ProgramError::IntegerOverflow)?; + } + + b'-' => { + let val = self.get_or_resize_tape_mut(); + *val = val.checked_sub(1).ok_or(ProgramError::IntegerUnderflow)?; + } + + b'.' => { + self.instr_ptr += 1; + return Ok(Some(Status::Output(self.get_at_data_ptr()))); + } + + b',' => match self.input.bootleg_read() { + Ok(Some(num)) => *self.get_or_resize_tape_mut() = num, + Ok(None) => return Ok(Some(Status::NeedsInput)), + Err(_) => return Err(ProgramError::InputReadError), + }, + + b'[' => { + if self.get_at_data_ptr() == 0 { + self.instr_ptr = self + .get_matching_closing_bracket(self.instr_ptr) + .ok_or(ProgramError::UnmatchedOpeningBracket)? + //Instruction pointer will be incremented by 1 after the match + } + } + + b']' => { + if self.get_at_data_ptr() != 0 { + self.instr_ptr = self + .get_matching_opening_bracket(self.instr_ptr) + .ok_or(ProgramError::UnmatchedClosingBracket)? + //Instruction pointer will be incremented by 1 after the match + } + } + + _ => {} //brainfuck treats all characters it doesn't understand as comments + } + + self.instr_ptr += 1; + + Ok(Some(Status::Continue)) + } + + /// Advances the interpreter until the next IO operation. See [`advance`](Interpreter::advance) + pub fn advance_until_io(&mut self) -> Result, ProgramError> { + while let Some(status) = self.advance()? { + match status { + Status::NeedsInput => return Ok(Some(IoStatus::NeedsInput)), + Status::Output(out) => return Ok(Some(IoStatus::Output(out))), + Status::Continue => continue, + } + } + Ok(None) + } + + /// Executes the interpreter until it halts, writing all return values to the provided `Write` type. + /// For more granular control, use [`advance`](Interpreter::advance) + pub fn interpret_with_output(&mut self, mut output: O) -> Result<(), InterpretError> { + while let Some(status) = self.advance_until_io()? { + match status { + IoStatus::NeedsInput => return Err(InterpretError::EndOfInput), + IoStatus::Output(out) => match output.write(&[out as u8]) { + Ok(0) => return Err(InterpretError::OutputBufferFull), + Ok(_) => continue, + Err(_) => return Err(InterpretError::OutputWriteError), + }, + } + } + Ok(()) + } + + fn get_or_resize_tape_mut(&mut self) -> &mut i8 { + if self.data_ptr >= self.tape.len() { + self.tape.resize(self.data_ptr + 1, 0); + } + &mut self.tape[self.data_ptr] + } + + fn get_at_data_ptr(&self) -> i8 { + //No need to resize the tape to read: if the tape doesn't extend that far already, it holds a value of 0 + self.tape.get(self.data_ptr).copied().unwrap_or(0) + } + + fn get_matching_closing_bracket(&mut self, opening: usize) -> Option { + self.code[opening..] + .iter() + .zip(opening..) + .scan(0, |counter, (char, index)| { + match char { + b'[' => *counter += 1, + b']' => *counter -= 1, + _ => {} + }; + Some((*counter, index)) + }) + .find_map( + |(counter, index)| { + if counter == 0 { + Some(index) + } else { + None + } + }, + ) + } + + fn get_matching_opening_bracket(&mut self, closing: usize) -> Option { + self.code[..closing + 1] + .iter() + .zip(0..closing + 1) + .rev() + .scan(0, |counter, (char, index)| { + match char { + b']' => *counter += 1, + b'[' => *counter -= 1, + _ => {} + }; + Some((*counter, index)) + }) + .find_map( + |(counter, index)| { + if counter == 0 { + Some(index) + } else { + None + } + }, + ) + } +} + +/// A convenience function for interpreting brainfuck code with a given input and output source. +/// For more information, consult [the module level documentation](self) +pub fn interpret_with_io( + code: &[u8], + input: I, + output: O, +) -> Result<(), InterpretError> { + Interpreter::from_ascii(code, input).interpret_with_output(output) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +///The result of advancing the interpreter by one step, assuming it didn't terminate +pub enum Status { + NeedsInput, + Output(i8), + Continue, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// The result of advancing the interpreter until the next IO operation, assuming it didn't terminate +pub enum IoStatus { + NeedsInput, + Output(i8), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// An error that occurred while the interpreter was advancing +pub enum ProgramError { + DataPointerUnderflow, + IntegerOverflow, + IntegerUnderflow, + InputReadError, + UnmatchedOpeningBracket, + UnmatchedClosingBracket, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// An error that occurred while the interpreter was being run start-to-end all in one go +pub enum InterpretError { + ProgramError(ProgramError), + EndOfInput, + OutputBufferFull, + OutputWriteError, +} + +impl From for InterpretError { + fn from(e: ProgramError) -> Self { + InterpretError::ProgramError(e) + } +} + +/// A bootlegged version of the standard library's read trait, so as to allow the interpreter to be generic over any `Read` +/// type, as well as over an input buffer. +pub trait BootlegRead { + type Error; + fn bootleg_read(&mut self) -> Result, Self::Error>; +} + +impl BootlegRead for T { + type Error = std::io::Error; + fn bootleg_read(&mut self) -> Result, Self::Error> { + let mut buffer = [0]; + match self.read(&mut buffer) { + Ok(0) => Ok(None), + Ok(_) => Ok(Some(buffer[0] as i8)), + Err(e) => Err(e), + } + } +} + +/// A wrapper around a `VecDeque`, to be able to implement `BootlegRead` for it +struct InputBuffer(VecDeque); + +impl BootlegRead for InputBuffer { + type Error = std::convert::Infallible; + fn bootleg_read(&mut self) -> Result, Self::Error> { + Ok(self.0.pop_front()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn adder() { + let mut interpreter = Interpreter { + code: b"[->+<]", //Source: https://en.wikipedia.org/wiki/Brainfuck + instr_ptr: 0, + tape: vec![10, 5], + data_ptr: 0, + input: std::io::empty(), + }; + + while let Some(status) = interpreter.advance_until_io().expect("Unexpected error") { + match status { + IoStatus::NeedsInput => panic!("Requested input in an IO-less program"), + IoStatus::Output(_) => panic!("Produced output in an IO-less program"), + } + } + + assert_eq!(interpreter.tape, vec![0, 15]); + } + + #[test] + fn hello_world() { + let mut interpreter = Interpreter { + //Source: https://en.wikipedia.org/wiki/Brainfuck + code: b"++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.", + instr_ptr: 0, + tape: vec![], + data_ptr: 0, + input: std::io::empty(), + }; + + let mut string = Vec::new(); + interpreter + .interpret_with_output(&mut string) + .expect("Failed to write to output buffer"); + assert_eq!(string, b"Hello World!\n"); + } + + #[test] + fn with_input_buffer() { + let mut interpreter = Interpreter::from_ascii_with_input_buffer(b"+++++.>,[-<->]."); + let output = match interpreter + .advance_until_io() + .expect("Unexpected error") + .expect("Unexpected termination") + { + IoStatus::NeedsInput => panic!("Unexpected input request"), + IoStatus::Output(out) => out, + }; + + assert_eq!( + interpreter.advance_until_io(), + Ok(Some(IoStatus::NeedsInput)) + ); + + interpreter.add_input(output); + + assert_eq!( + interpreter.advance_until_io(), + Ok(Some(IoStatus::Output(0))) + ); + assert_eq!(interpreter.advance_until_io(), Ok(None)); + } } diff --git a/src/main.rs b/src/main.rs index f590830..e72027f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,6 @@ use parser::Parser; fn main() { // variables::test(); // NOTE(Able): Add this as a test case - brian::bff_eval("hi".to_string(), "hello".to_string()); let matches = App::new("AbleScript") .version(env!("CARGO_PKG_VERSION")) .author("Able ")