From db1680d46f0f2967ef9ecad180df87e1162f8e58 Mon Sep 17 00:00:00 2001 From: Nathan Fisher Date: Fri, 9 Jun 2023 10:31:58 -0400 Subject: [PATCH] Rethink `Time` parser, iterating over characters --- src/time/error.rs | 3 + src/time/parser.rs | 393 +++++++++++++++++++---------------------- src/time/parser.rs.bak | 286 ++++++++++++++++++++++++++++++ 3 files changed, 474 insertions(+), 208 deletions(-) create mode 100644 src/time/parser.rs.bak diff --git a/src/time/error.rs b/src/time/error.rs index 366f66b..08c9493 100644 --- a/src/time/error.rs +++ b/src/time/error.rs @@ -1,8 +1,10 @@ use std::{fmt, num::ParseIntError}; +use super::parser::Mode; #[derive(Debug)] pub enum Error { ParseInt, + UnexpectedChar(Mode, char), InvalidMonth, InvalidDay, InvalidHour, @@ -12,6 +14,7 @@ pub enum Error { InvalidOffset, TrailingGarbage, Truncated, + MissingSeparator, } impl fmt::Display for Error { diff --git a/src/time/parser.rs b/src/time/parser.rs index 37055c8..96f6125 100644 --- a/src/time/parser.rs +++ b/src/time/parser.rs @@ -7,8 +7,8 @@ enum Format { Extended, } -#[derive(Debug, PartialEq)] -enum Mode { +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Mode { Year, Month, Day, @@ -24,9 +24,11 @@ pub struct Parser<'a> { text: &'a str, format: Format, mode: Mode, + buffer: String, year: Option, month: Option, day: Option, + sep: bool, hour: Option, minute: Option, second: Option, @@ -37,11 +39,13 @@ impl<'a> Parser<'a> { pub fn new(text: &'a str) -> Self { Self { text, - format: Format::Extended, + format: Format::Basic, + buffer: String::new(), mode: Mode::Year, year: None, month: None, day: None, + sep: false, hour: None, minute: None, second: None, @@ -49,230 +53,203 @@ impl<'a> Parser<'a> { } } - fn parse_year(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::Year); - let year = self.text.get(..3).ok_or(Error::Truncated)?.parse()?; - self.year = Some(year); - if let Some(c) = self.text.chars().nth(5) { - match c { - '-' => self.format = Format::Extended, - _ => self.format = Format::Basic, - } - } - self.mode = Mode::Month; - Ok(()) - } - - fn parse_month(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::Month); - let month = match self.format { - Format::Basic => self.text.get(4..6), - Format::Extended => self.text.get(5..7), - }; - if let Some(m) = month { - let month = match m.parse() { - Ok(m) => m, - Err(_) => { - self.mode = Mode::TimeZone; - return Ok(()); + fn number(&mut self, c: char) -> Result<(), Error> { + self.buffer.push(c); + match self.mode { + Mode::Year => { + if self.buffer.len() == 4 { + let y: u32 = self.buffer.parse()?; + self.buffer.clear(); + self.year = Some(y); + self.mode = Mode::Month; } - }; - if month > 12 { - return Err(Error::InvalidMonth); } - self.month = Some(month); - self.mode = Mode::Day; - } else { - self.mode = Mode::TimeZone; - } - Ok(()) - } - - fn parse_day(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::Day); - let day = match self.format { - Format::Basic => self.text.get(6..8), - Format::Extended => self.text.get(8..10), - }; - if let Some(d) = day { - let day = match d.parse() { - Ok(d) => d, - Err(_) => { - self.mode = Mode::TimeZone; - return Ok(()); + Mode::Month => { + if self.buffer.len() == 2 { + let m: u8 = self.buffer.parse()?; + self.buffer.clear(); + self.month = Some(m); + self.mode = Mode::Day; + self.sep = false; } - }; - let max = match self.month { - Some(1|3|5|7|10|12) => 31, - Some(2) => { - if self.year.unwrap() % 4 == 0 { - 29 - } else { - 28 + } + Mode::Day => { + if self.buffer.len() == 2 { + let d: u8 = self.buffer.parse()?; + self.buffer.clear(); + self.day = Some(d); + self.mode = Mode::Hour; + self.sep = false; + } else if self.format == Format::Extended && !self.sep { + return Err(Error::MissingSeparator); + } + } + Mode::Hour => { + if self.buffer.len() == 2 { + let h: u8 = self.buffer.parse()?; + self.buffer.clear(); + self.hour = Some(h); + self.mode = Mode::Minute; + self.sep = false; + } else if self.format == Format::Extended && !self.sep { + return Err(Error::MissingSeparator); + } + } + Mode::Minute => { + if self.buffer.len() == 2 { + let m = self.buffer.parse()?; + self.buffer.clear(); + self.minute = Some(m); + self.mode = Mode::Second; + self.sep = false; + } else if self.format == Format::Extended && !self.sep { + return Err(Error::MissingSeparator); + } + } + Mode::Second => { + if self.buffer.len() == 2 { + let s = self.buffer.parse()?; + self.buffer.clear(); + self.second = Some(s); + self.mode = Mode::TimeZone; + self.sep = false; + } else if self.format == Format::Extended && !self.sep { + return Err(Error::MissingSeparator); + } + } + Mode::TimeZone => match self.format { + Format::Basic => { + if self.buffer.len() == 5 { + self.mode = Mode::Finish; } }, - Some(4|6|9|11) => 30, - _ => return Err(Error::InvalidMonth), - }; - if day > max { - return Err(Error::InvalidDay); - } - self.day = Some(day); - let tidx = match self.format { - Format::Basic => 8, - Format::Extended => 10, - }; - if let Some(c) = self.text.chars().nth(tidx) { - match c { - 'T' => self.mode = Mode::Hour, - 'Z' | '-' | '+' => self.mode = Mode::TimeZone, - _ => return Err(Error::InvalidHour), - } - } else { - self.mode = Mode::Finish; - } - } else { - self.mode = Mode::TimeZone; + Format::Extended => { + if self.buffer.len() > 3 && !self.sep { + return Err(Error::MissingSeparator); + } else if self.buffer.len() == 5 { + self.mode = Mode::Finish; + } + }, + }, + Mode::Finish => return Err(Error::TrailingGarbage), } Ok(()) } - fn parse_hour(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::Hour); - let hour = match self.format { - Format::Basic => self.text.get(9..11), - Format::Extended => self.text.get(11..13), - }; - if let Some(h) = hour { - let hour = match h.parse() { - Ok(h) => h, - Err(_) => { - self.mode = Mode::TimeZone; - return Ok(()); - } - }; - if hour > 24 { - return Err(Error::InvalidHour); - } - self.hour = Some(hour); - self.mode = Mode::Minute; - } else { - self.mode = Mode::TimeZone; - } - Ok(()) - } - - fn parse_minute(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::Minute); - let minute = match self.format { - Format::Basic => self.text.get(11..13), - Format::Extended => self.text.get(14..16), - }; - if let Some(m) = minute { - let minute = match m.parse() { - Ok(m) => m, - Err(_) => { - self.mode = Mode::TimeZone; - return Ok(()); - } - }; - if minute > 60 { - return Err(Error::InvalidMinute); - } - self.minute = Some(minute); - self.mode = Mode::Second; - } else { - self.mode = Mode::TimeZone; - } - Ok(()) - } - - fn parse_second(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::Second); - let second = match self.format { - Format::Basic => self.text.get(13..15), - Format::Extended => self.text.get(17..19), - }; - if let Some(s) = second { - let second = match s.parse() { - Ok(s) => s, - Err(_) => { - self.mode = Mode::TimeZone; - return Ok(()); - } - }; - if second > 60 { - return Err(Error::InvalidSecond); - } - self.second = Some(second); - } - self.mode = Mode::TimeZone; - Ok(()) - } - - fn parse_timezone(&mut self) -> Result<(), Error> { - assert_eq!(self.mode, Mode::TimeZone); - let idx = if self.second.is_some() { - match self.format { - Format::Basic => 17, - Format::Extended => 21, - } - } else if self.minute.is_some() { - match self.format { - Format::Basic => 15, - Format::Extended => 19, - } - } else if self.hour.is_some() { - match self.format { - Format::Basic => 13, - Format::Extended => 16, - } - } else if self.day.is_some() { - match self.format { - Format::Basic => 9, - Format::Extended => 11, - } - } else if self.month.is_some() { - match self.format { - Format::Basic => 6, - Format::Extended => 8, - } - } else { - 4 - }; - match self.text.chars().nth(idx) { - Some('Z') => { - if self.text.len() > idx + 2 { - return Err(Error::TrailingGarbage); + fn dash(&mut self, c: char) -> Result<(), Error> { + match self.mode { + Mode::Year => return Err(Error::UnexpectedChar(Mode::Year, '-')), + Mode::Month => { + if self.month.is_some() { + return Err(Error::UnexpectedChar(self.mode, '-')); } else { - self.tz = Some(TimeZone::UTC); - return Ok(()); + self.format = Format::Extended; + self.sep = true; } }, - Some('-') => todo!(), - Some('+') => todo!(), - None => self.mode = Mode::Finish, - _ => return Err(Error::InvalidTimezone), + Mode::Day => { + if self.day.is_some() || self.format == Format::Basic { + return Err(Error::UnexpectedChar(self.mode, '-')); + } else { + self.sep = true; + } + }, + Mode::Hour | Mode::Minute | Mode::Second => { + if !self.buffer.is_empty() { + return Err(Error::UnexpectedChar(self.mode, '-')); + } else { + self.buffer.push('-'); + self.mode = Mode::TimeZone; + } + }, + Mode::TimeZone => { + if self.buffer.is_empty() { + self.buffer.push('-'); + } else { + return Err(Error::UnexpectedChar(self.mode, '-')); + } + }, + Mode::Finish => return Err(Error::TrailingGarbage), } + Ok(()) + } + + fn colon(&mut self, c: char) -> Result<(), Error> { + match self.mode { + Mode::Year | Mode::Month | Mode::Day => return Err(Error::UnexpectedChar(self.mode, ':')), + Mode::Hour | Mode::Minute | Mode::Second => { + if !self.buffer.is_empty() || self.format == Format::Basic { + return Err(Error::UnexpectedChar(self.mode, ':')); + } else { + self.sep = true; + } + }, + Mode::TimeZone => { + if !self.buffer.len() == 2 || self.format == Format::Basic { + return Err(Error::UnexpectedChar(self.mode, ':')); + } else { + self.sep = true; + } + }, + Mode::Finish => return Err(Error::TrailingGarbage), + } + Ok(()) + } + + fn tee(&mut self, c: char) -> Result<(), Error> { + if self.mode != Mode::Hour || !self.buffer.is_empty() { + Err(Error::UnexpectedChar(self.mode, 'T')) + } else { + self.sep = true; + Ok(()) + } + } + + fn zed(&mut self, c: char) -> Result<(), Error> { + if self.mode == Mode::Year || !self.buffer.is_empty() { + return Err(Error::UnexpectedChar(self.mode, 'Z')); + } else { + self.tz = Some(TimeZone::UTC); + self.mode = Mode::Finish; + } + Ok(()) + } + + fn parse_tz_basic(&mut self) -> Result<(), Error> { todo!() } + fn parse_tz_extended(&mut self) -> Result<(), Error> { + if !self.sep { + Err(Error::MissingSeparator) + } else { + self.parse_tz_basic() + } + } + pub fn parse(mut self) -> Result { - loop { - match self.mode { - Mode::Year => self.parse_year()?, - Mode::Month => self.parse_month()?, - Mode::Day => self.parse_day()?, - Mode::Hour => self.parse_hour()?, - Mode::Minute => self.parse_minute()?, - Mode::Second => self.parse_second()?, - Mode::TimeZone => { - self.parse_timezone()?; - break; - } - Mode::Finish => break, + for c in self.text.chars() { + match c { + x if x.is_numeric() => self.number(c)?, + '-' => self.dash(c)?, + ':' => self.colon(c)?, + 'T' => self.tee(c)?, + 'Z' => self.zed(c)?, + _ => return Err(Error::UnexpectedChar(self.mode, c)), } } + match self.mode { + Mode::TimeZone | Mode::Finish => { + if self.tz == Some(TimeZone::UTC) && !self.buffer.is_empty() { + return Err(Error::TrailingGarbage); + } + match self.format { + Format::Basic => self.parse_tz_basic()?, + Format::Extended => self.parse_tz_extended()?, + } + }, + _ => return Err(Error::Truncated), + } Ok(DateTime { year: self.year.unwrap(), month: self.month, diff --git a/src/time/parser.rs.bak b/src/time/parser.rs.bak new file mode 100644 index 0000000..37055c8 --- /dev/null +++ b/src/time/parser.rs.bak @@ -0,0 +1,286 @@ +//! Implements a parser for ISO-8601 format Time +use super::{DateTime, Error, TimeZone}; + +#[derive(Debug, PartialEq)] +enum Format { + Basic, + Extended, +} + +#[derive(Debug, PartialEq)] +enum Mode { + Year, + Month, + Day, + Hour, + Minute, + Second, + TimeZone, + Finish, +} + +#[derive(Debug)] +pub struct Parser<'a> { + text: &'a str, + format: Format, + mode: Mode, + year: Option, + month: Option, + day: Option, + hour: Option, + minute: Option, + second: Option, + tz: Option, +} + +impl<'a> Parser<'a> { + pub fn new(text: &'a str) -> Self { + Self { + text, + format: Format::Extended, + mode: Mode::Year, + year: None, + month: None, + day: None, + hour: None, + minute: None, + second: None, + tz: None, + } + } + + fn parse_year(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::Year); + let year = self.text.get(..3).ok_or(Error::Truncated)?.parse()?; + self.year = Some(year); + if let Some(c) = self.text.chars().nth(5) { + match c { + '-' => self.format = Format::Extended, + _ => self.format = Format::Basic, + } + } + self.mode = Mode::Month; + Ok(()) + } + + fn parse_month(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::Month); + let month = match self.format { + Format::Basic => self.text.get(4..6), + Format::Extended => self.text.get(5..7), + }; + if let Some(m) = month { + let month = match m.parse() { + Ok(m) => m, + Err(_) => { + self.mode = Mode::TimeZone; + return Ok(()); + } + }; + if month > 12 { + return Err(Error::InvalidMonth); + } + self.month = Some(month); + self.mode = Mode::Day; + } else { + self.mode = Mode::TimeZone; + } + Ok(()) + } + + fn parse_day(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::Day); + let day = match self.format { + Format::Basic => self.text.get(6..8), + Format::Extended => self.text.get(8..10), + }; + if let Some(d) = day { + let day = match d.parse() { + Ok(d) => d, + Err(_) => { + self.mode = Mode::TimeZone; + return Ok(()); + } + }; + let max = match self.month { + Some(1|3|5|7|10|12) => 31, + Some(2) => { + if self.year.unwrap() % 4 == 0 { + 29 + } else { + 28 + } + }, + Some(4|6|9|11) => 30, + _ => return Err(Error::InvalidMonth), + }; + if day > max { + return Err(Error::InvalidDay); + } + self.day = Some(day); + let tidx = match self.format { + Format::Basic => 8, + Format::Extended => 10, + }; + if let Some(c) = self.text.chars().nth(tidx) { + match c { + 'T' => self.mode = Mode::Hour, + 'Z' | '-' | '+' => self.mode = Mode::TimeZone, + _ => return Err(Error::InvalidHour), + } + } else { + self.mode = Mode::Finish; + } + } else { + self.mode = Mode::TimeZone; + } + Ok(()) + } + + fn parse_hour(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::Hour); + let hour = match self.format { + Format::Basic => self.text.get(9..11), + Format::Extended => self.text.get(11..13), + }; + if let Some(h) = hour { + let hour = match h.parse() { + Ok(h) => h, + Err(_) => { + self.mode = Mode::TimeZone; + return Ok(()); + } + }; + if hour > 24 { + return Err(Error::InvalidHour); + } + self.hour = Some(hour); + self.mode = Mode::Minute; + } else { + self.mode = Mode::TimeZone; + } + Ok(()) + } + + fn parse_minute(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::Minute); + let minute = match self.format { + Format::Basic => self.text.get(11..13), + Format::Extended => self.text.get(14..16), + }; + if let Some(m) = minute { + let minute = match m.parse() { + Ok(m) => m, + Err(_) => { + self.mode = Mode::TimeZone; + return Ok(()); + } + }; + if minute > 60 { + return Err(Error::InvalidMinute); + } + self.minute = Some(minute); + self.mode = Mode::Second; + } else { + self.mode = Mode::TimeZone; + } + Ok(()) + } + + fn parse_second(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::Second); + let second = match self.format { + Format::Basic => self.text.get(13..15), + Format::Extended => self.text.get(17..19), + }; + if let Some(s) = second { + let second = match s.parse() { + Ok(s) => s, + Err(_) => { + self.mode = Mode::TimeZone; + return Ok(()); + } + }; + if second > 60 { + return Err(Error::InvalidSecond); + } + self.second = Some(second); + } + self.mode = Mode::TimeZone; + Ok(()) + } + + fn parse_timezone(&mut self) -> Result<(), Error> { + assert_eq!(self.mode, Mode::TimeZone); + let idx = if self.second.is_some() { + match self.format { + Format::Basic => 17, + Format::Extended => 21, + } + } else if self.minute.is_some() { + match self.format { + Format::Basic => 15, + Format::Extended => 19, + } + } else if self.hour.is_some() { + match self.format { + Format::Basic => 13, + Format::Extended => 16, + } + } else if self.day.is_some() { + match self.format { + Format::Basic => 9, + Format::Extended => 11, + } + } else if self.month.is_some() { + match self.format { + Format::Basic => 6, + Format::Extended => 8, + } + } else { + 4 + }; + match self.text.chars().nth(idx) { + Some('Z') => { + if self.text.len() > idx + 2 { + return Err(Error::TrailingGarbage); + } else { + self.tz = Some(TimeZone::UTC); + return Ok(()); + } + }, + Some('-') => todo!(), + Some('+') => todo!(), + None => self.mode = Mode::Finish, + _ => return Err(Error::InvalidTimezone), + } + todo!() + } + + pub fn parse(mut self) -> Result { + loop { + match self.mode { + Mode::Year => self.parse_year()?, + Mode::Month => self.parse_month()?, + Mode::Day => self.parse_day()?, + Mode::Hour => self.parse_hour()?, + Mode::Minute => self.parse_minute()?, + Mode::Second => self.parse_second()?, + Mode::TimeZone => { + self.parse_timezone()?; + break; + } + Mode::Finish => break, + } + } + Ok(DateTime { + year: self.year.unwrap(), + month: self.month, + day: self.day, + hour: self.hour, + minute: self.minute, + second: self.second, + tz: self.tz, + }) + } +}