Account for whitespace and line breaks when decoding

This commit is contained in:
Nathan Fisher 2025-01-10 19:02:07 -05:00
parent 2e2ac8154e
commit 0d1f59bf6c

View file

@ -1,5 +1,5 @@
use super::*; use super::*;
use std::io::{self, Read, Write}; use std::io::{self, ErrorKind, Read, Write};
#[derive(Debug)] #[derive(Debug)]
pub enum DecoderError { pub enum DecoderError {
@ -17,40 +17,57 @@ pub struct Decoder<R: Read, W: Write> {
reader: R, reader: R,
writer: W, writer: W,
alphabet: B32Alphabet, alphabet: B32Alphabet,
ignore_whitespace: bool
} }
impl<R: Read, W: Write> Decoder<R, W> { impl<R: Read, W: Write> Decoder<R, W> {
pub fn new(reader: R, writer: W, alphabet: Option<B32Alphabet>) -> Self { pub fn new(reader: R, writer: W, alphabet: Option<B32Alphabet>, ignore_whitespace: bool) -> Self {
Self { Self {
reader, reader,
writer, writer,
alphabet: alphabet.unwrap_or_default(), alphabet: alphabet.unwrap_or_default(),
ignore_whitespace,
} }
} }
pub fn decode(&mut self) -> Result<(), DecoderError> { pub fn decode(mut self) -> Result<W, DecoderError> {
let mut buf = [0; 8]; let mut byte_reader = self.reader.bytes();
let mut num: u64 = 0;
'outer: loop { 'outer: loop {
if let Err(e) = self.reader.read_exact(&mut buf) { let mut in_buf = [0_u8; 8];
if e.kind() == io::ErrorKind::UnexpectedEof { let mut out_buf = [0_u8; 5];
break; let mut num: u64 = 0;
let mut n_bytes = 0;
while n_bytes < 8 {
match byte_reader.next() {
Some(Ok(b)) => {
if self.ignore_whitespace && b.is_ascii_whitespace() {
continue;
} else if b == b'\n' || b == b'\r' {
continue;
} else {
in_buf[n_bytes] = b;
n_bytes += 1;
}
}
Some(Err(e)) if e.kind() == ErrorKind::UnexpectedEof => break,
Some(Err(e)) if e.kind() == ErrorKind::Interrupted => continue,
Some(Err(e)) => return Err(e.into()),
None => break,
} }
} }
for c in &buf { for c in &in_buf {
num <<= 5; num <<= 5;
if !matches!(self.alphabet.pad(), Some(ch) if ch == *c) { if !matches!(self.alphabet.pad(), Some(ch) if ch == *c) {
let idx = self.alphabet.idx(*c).ok_or(DecoderError::IllegalChar)?; let idx = self.alphabet.idx(*c).ok_or(DecoderError::IllegalChar)?;
num |= idx as u64; num |= idx as u64;
} }
} }
let mut buf = [0; 5];
for i in (0..5).rev() { for i in (0..5).rev() {
let b = (num & 0xff) as u8; let b = (num & 0xff) as u8;
buf[i] = b; out_buf[i] = b;
num >>= 8; num >>= 8;
} }
for c in &buf { for c in &out_buf {
if *c == b'\0' { if *c == b'\0' {
break 'outer; break 'outer;
} else { } else {
@ -59,13 +76,7 @@ impl<R: Read, W: Write> Decoder<R, W> {
} }
} }
self.writer.flush()?; self.writer.flush()?;
Ok(()) Ok(self.writer)
}
}
impl<R: Read> Decoder<R, Vec<u8>> {
pub fn bytes(self) -> Vec<u8> {
self.writer
} }
} }
@ -86,8 +97,8 @@ mod tests {
fn hello() { fn hello() {
let reader = ENCODED.as_bytes(); let reader = ENCODED.as_bytes();
let writer = Vec::<u8>::new(); let writer = Vec::<u8>::new();
let mut decoder = Decoder::new(reader, writer, None); let decoder = Decoder::new(reader, writer, None, false);
decoder.decode().unwrap(); let output = decoder.decode().unwrap();
assert_eq!(HELLO, String::from_utf8(decoder.bytes()).unwrap()); assert_eq!(HELLO.as_bytes(), output);
} }
} }