Account for whitespace and line breaks when decoding

This commit is contained in:
Nathan Fisher 2025-01-10 19:02:07 -05:00
parent 2e2ac8154e
commit 0d1f59bf6c

View file

@ -1,5 +1,5 @@
use super::*;
use std::io::{self, Read, Write};
use std::io::{self, ErrorKind, Read, Write};
#[derive(Debug)]
pub enum DecoderError {
@ -17,40 +17,57 @@ pub struct Decoder<R: Read, W: Write> {
reader: R,
writer: W,
alphabet: B32Alphabet,
ignore_whitespace: bool
}
impl<R: Read, W: Write> Decoder<R, W> {
pub fn new(reader: R, writer: W, alphabet: Option<B32Alphabet>) -> Self {
pub fn new(reader: R, writer: W, alphabet: Option<B32Alphabet>, ignore_whitespace: bool) -> Self {
Self {
reader,
writer,
alphabet: alphabet.unwrap_or_default(),
ignore_whitespace,
}
}
pub fn decode(&mut self) -> Result<(), DecoderError> {
let mut buf = [0; 8];
let mut num: u64 = 0;
pub fn decode(mut self) -> Result<W, DecoderError> {
let mut byte_reader = self.reader.bytes();
'outer: loop {
if let Err(e) = self.reader.read_exact(&mut buf) {
if e.kind() == io::ErrorKind::UnexpectedEof {
break;
let mut in_buf = [0_u8; 8];
let mut out_buf = [0_u8; 5];
let mut num: u64 = 0;
let mut n_bytes = 0;
while n_bytes < 8 {
match byte_reader.next() {
Some(Ok(b)) => {
if self.ignore_whitespace && b.is_ascii_whitespace() {
continue;
} else if b == b'\n' || b == b'\r' {
continue;
} else {
in_buf[n_bytes] = b;
n_bytes += 1;
}
}
for c in &buf {
Some(Err(e)) if e.kind() == ErrorKind::UnexpectedEof => break,
Some(Err(e)) if e.kind() == ErrorKind::Interrupted => continue,
Some(Err(e)) => return Err(e.into()),
None => break,
}
}
for c in &in_buf {
num <<= 5;
if !matches!(self.alphabet.pad(), Some(ch) if ch == *c) {
let idx = self.alphabet.idx(*c).ok_or(DecoderError::IllegalChar)?;
num |= idx as u64;
}
}
let mut buf = [0; 5];
for i in (0..5).rev() {
let b = (num & 0xff) as u8;
buf[i] = b;
out_buf[i] = b;
num >>= 8;
}
for c in &buf {
for c in &out_buf {
if *c == b'\0' {
break 'outer;
} else {
@ -59,13 +76,7 @@ impl<R: Read, W: Write> Decoder<R, W> {
}
}
self.writer.flush()?;
Ok(())
}
}
impl<R: Read> Decoder<R, Vec<u8>> {
pub fn bytes(self) -> Vec<u8> {
self.writer
Ok(self.writer)
}
}
@ -86,8 +97,8 @@ mod tests {
fn hello() {
let reader = ENCODED.as_bytes();
let writer = Vec::<u8>::new();
let mut decoder = Decoder::new(reader, writer, None);
decoder.decode().unwrap();
assert_eq!(HELLO, String::from_utf8(decoder.bytes()).unwrap());
let decoder = Decoder::new(reader, writer, None, false);
let output = decoder.decode().unwrap();
assert_eq!(HELLO.as_bytes(), output);
}
}