From 0d1f59bf6c1020f6eedda2ddd383be3cff8388cf Mon Sep 17 00:00:00 2001 From: Nathan Fisher Date: Fri, 10 Jan 2025 19:02:07 -0500 Subject: [PATCH] Account for whitespace and line breaks when decoding --- src/decode.rs | 55 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/decode.rs b/src/decode.rs index 6074401..dff83bd 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -1,5 +1,5 @@ use super::*; -use std::io::{self, Read, Write}; +use std::io::{self, ErrorKind, Read, Write}; #[derive(Debug)] pub enum DecoderError { @@ -17,40 +17,57 @@ pub struct Decoder { reader: R, writer: W, alphabet: B32Alphabet, + ignore_whitespace: bool } impl Decoder { - pub fn new(reader: R, writer: W, alphabet: Option) -> Self { + pub fn new(reader: R, writer: W, alphabet: Option, ignore_whitespace: bool) -> Self { Self { reader, writer, alphabet: alphabet.unwrap_or_default(), + ignore_whitespace, } } - pub fn decode(&mut self) -> Result<(), DecoderError> { - let mut buf = [0; 8]; - let mut num: u64 = 0; + pub fn decode(mut self) -> Result { + let mut byte_reader = self.reader.bytes(); 'outer: loop { - if let Err(e) = self.reader.read_exact(&mut buf) { - if e.kind() == io::ErrorKind::UnexpectedEof { - break; + let mut in_buf = [0_u8; 8]; + let mut out_buf = [0_u8; 5]; + let mut num: u64 = 0; + let mut n_bytes = 0; + while n_bytes < 8 { + match byte_reader.next() { + Some(Ok(b)) => { + if self.ignore_whitespace && b.is_ascii_whitespace() { + continue; + } else if b == b'\n' || b == b'\r' { + continue; + } else { + in_buf[n_bytes] = b; + n_bytes += 1; + } + } + Some(Err(e)) if e.kind() == ErrorKind::UnexpectedEof => break, + Some(Err(e)) if e.kind() == ErrorKind::Interrupted => continue, + Some(Err(e)) => return Err(e.into()), + None => break, } } - for c in &buf { + for c in &in_buf { num <<= 5; if !matches!(self.alphabet.pad(), Some(ch) if ch == *c) { let idx = self.alphabet.idx(*c).ok_or(DecoderError::IllegalChar)?; num |= idx as u64; } } - let mut buf = [0; 5]; for i in (0..5).rev() { let b = (num & 0xff) as u8; - buf[i] = b; + out_buf[i] = b; num >>= 8; } - for c in &buf { + for c in &out_buf { if *c == b'\0' { break 'outer; } else { @@ -59,13 +76,7 @@ impl Decoder { } } self.writer.flush()?; - Ok(()) - } -} - -impl Decoder> { - pub fn bytes(self) -> Vec { - self.writer + Ok(self.writer) } } @@ -86,8 +97,8 @@ mod tests { fn hello() { let reader = ENCODED.as_bytes(); let writer = Vec::::new(); - let mut decoder = Decoder::new(reader, writer, None); - decoder.decode().unwrap(); - assert_eq!(HELLO, String::from_utf8(decoder.bytes()).unwrap()); + let decoder = Decoder::new(reader, writer, None, false); + let output = decoder.decode().unwrap(); + assert_eq!(HELLO.as_bytes(), output); } }