Rethink Time parser, iterating over characters

This commit is contained in:
Nathan Fisher 2023-06-09 10:31:58 -04:00
parent 230b8ac48a
commit db1680d46f
3 changed files with 474 additions and 208 deletions

View file

@ -1,8 +1,10 @@
use std::{fmt, num::ParseIntError}; use std::{fmt, num::ParseIntError};
use super::parser::Mode;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
ParseInt, ParseInt,
UnexpectedChar(Mode, char),
InvalidMonth, InvalidMonth,
InvalidDay, InvalidDay,
InvalidHour, InvalidHour,
@ -12,6 +14,7 @@ pub enum Error {
InvalidOffset, InvalidOffset,
TrailingGarbage, TrailingGarbage,
Truncated, Truncated,
MissingSeparator,
} }
impl fmt::Display for Error { impl fmt::Display for Error {

View file

@ -7,8 +7,8 @@ enum Format {
Extended, Extended,
} }
#[derive(Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
enum Mode { pub enum Mode {
Year, Year,
Month, Month,
Day, Day,
@ -24,9 +24,11 @@ pub struct Parser<'a> {
text: &'a str, text: &'a str,
format: Format, format: Format,
mode: Mode, mode: Mode,
buffer: String,
year: Option<u32>, year: Option<u32>,
month: Option<u8>, month: Option<u8>,
day: Option<u8>, day: Option<u8>,
sep: bool,
hour: Option<u8>, hour: Option<u8>,
minute: Option<u8>, minute: Option<u8>,
second: Option<u8>, second: Option<u8>,
@ -37,11 +39,13 @@ impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self { pub fn new(text: &'a str) -> Self {
Self { Self {
text, text,
format: Format::Extended, format: Format::Basic,
buffer: String::new(),
mode: Mode::Year, mode: Mode::Year,
year: None, year: None,
month: None, month: None,
day: None, day: None,
sep: false,
hour: None, hour: None,
minute: None, minute: None,
second: None, second: None,
@ -49,230 +53,203 @@ impl<'a> Parser<'a> {
} }
} }
fn parse_year(&mut self) -> Result<(), Error> { fn number(&mut self, c: char) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Year); self.buffer.push(c);
let year = self.text.get(..3).ok_or(Error::Truncated)?.parse()?; match self.mode {
self.year = Some(year); Mode::Year => {
if let Some(c) = self.text.chars().nth(5) { if self.buffer.len() == 4 {
match c { let y: u32 = self.buffer.parse()?;
'-' => self.format = Format::Extended, self.buffer.clear();
_ => self.format = Format::Basic, self.year = Some(y);
} self.mode = Mode::Month;
}
self.mode = Mode::Month;
Ok(())
}
fn parse_month(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Month);
let month = match self.format {
Format::Basic => self.text.get(4..6),
Format::Extended => self.text.get(5..7),
};
if let Some(m) = month {
let month = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
} }
};
if month > 12 {
return Err(Error::InvalidMonth);
} }
self.month = Some(month); Mode::Month => {
self.mode = Mode::Day; if self.buffer.len() == 2 {
} else { let m: u8 = self.buffer.parse()?;
self.mode = Mode::TimeZone; self.buffer.clear();
} self.month = Some(m);
Ok(()) self.mode = Mode::Day;
} self.sep = false;
fn parse_day(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Day);
let day = match self.format {
Format::Basic => self.text.get(6..8),
Format::Extended => self.text.get(8..10),
};
if let Some(d) = day {
let day = match d.parse() {
Ok(d) => d,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
} }
}; }
let max = match self.month { Mode::Day => {
Some(1|3|5|7|10|12) => 31, if self.buffer.len() == 2 {
Some(2) => { let d: u8 = self.buffer.parse()?;
if self.year.unwrap() % 4 == 0 { self.buffer.clear();
29 self.day = Some(d);
} else { self.mode = Mode::Hour;
28 self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Hour => {
if self.buffer.len() == 2 {
let h: u8 = self.buffer.parse()?;
self.buffer.clear();
self.hour = Some(h);
self.mode = Mode::Minute;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Minute => {
if self.buffer.len() == 2 {
let m = self.buffer.parse()?;
self.buffer.clear();
self.minute = Some(m);
self.mode = Mode::Second;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Second => {
if self.buffer.len() == 2 {
let s = self.buffer.parse()?;
self.buffer.clear();
self.second = Some(s);
self.mode = Mode::TimeZone;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::TimeZone => match self.format {
Format::Basic => {
if self.buffer.len() == 5 {
self.mode = Mode::Finish;
} }
}, },
Some(4|6|9|11) => 30, Format::Extended => {
_ => return Err(Error::InvalidMonth), if self.buffer.len() > 3 && !self.sep {
}; return Err(Error::MissingSeparator);
if day > max { } else if self.buffer.len() == 5 {
return Err(Error::InvalidDay); self.mode = Mode::Finish;
} }
self.day = Some(day); },
let tidx = match self.format { },
Format::Basic => 8, Mode::Finish => return Err(Error::TrailingGarbage),
Format::Extended => 10,
};
if let Some(c) = self.text.chars().nth(tidx) {
match c {
'T' => self.mode = Mode::Hour,
'Z' | '-' | '+' => self.mode = Mode::TimeZone,
_ => return Err(Error::InvalidHour),
}
} else {
self.mode = Mode::Finish;
}
} else {
self.mode = Mode::TimeZone;
} }
Ok(()) Ok(())
} }
fn parse_hour(&mut self) -> Result<(), Error> { fn dash(&mut self, c: char) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Hour); match self.mode {
let hour = match self.format { Mode::Year => return Err(Error::UnexpectedChar(Mode::Year, '-')),
Format::Basic => self.text.get(9..11), Mode::Month => {
Format::Extended => self.text.get(11..13), if self.month.is_some() {
}; return Err(Error::UnexpectedChar(self.mode, '-'));
if let Some(h) = hour {
let hour = match h.parse() {
Ok(h) => h,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if hour > 24 {
return Err(Error::InvalidHour);
}
self.hour = Some(hour);
self.mode = Mode::Minute;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_minute(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Minute);
let minute = match self.format {
Format::Basic => self.text.get(11..13),
Format::Extended => self.text.get(14..16),
};
if let Some(m) = minute {
let minute = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if minute > 60 {
return Err(Error::InvalidMinute);
}
self.minute = Some(minute);
self.mode = Mode::Second;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_second(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Second);
let second = match self.format {
Format::Basic => self.text.get(13..15),
Format::Extended => self.text.get(17..19),
};
if let Some(s) = second {
let second = match s.parse() {
Ok(s) => s,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if second > 60 {
return Err(Error::InvalidSecond);
}
self.second = Some(second);
}
self.mode = Mode::TimeZone;
Ok(())
}
fn parse_timezone(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::TimeZone);
let idx = if self.second.is_some() {
match self.format {
Format::Basic => 17,
Format::Extended => 21,
}
} else if self.minute.is_some() {
match self.format {
Format::Basic => 15,
Format::Extended => 19,
}
} else if self.hour.is_some() {
match self.format {
Format::Basic => 13,
Format::Extended => 16,
}
} else if self.day.is_some() {
match self.format {
Format::Basic => 9,
Format::Extended => 11,
}
} else if self.month.is_some() {
match self.format {
Format::Basic => 6,
Format::Extended => 8,
}
} else {
4
};
match self.text.chars().nth(idx) {
Some('Z') => {
if self.text.len() > idx + 2 {
return Err(Error::TrailingGarbage);
} else { } else {
self.tz = Some(TimeZone::UTC); self.format = Format::Extended;
return Ok(()); self.sep = true;
} }
}, },
Some('-') => todo!(), Mode::Day => {
Some('+') => todo!(), if self.day.is_some() || self.format == Format::Basic {
None => self.mode = Mode::Finish, return Err(Error::UnexpectedChar(self.mode, '-'));
_ => return Err(Error::InvalidTimezone), } else {
self.sep = true;
}
},
Mode::Hour | Mode::Minute | Mode::Second => {
if !self.buffer.is_empty() {
return Err(Error::UnexpectedChar(self.mode, '-'));
} else {
self.buffer.push('-');
self.mode = Mode::TimeZone;
}
},
Mode::TimeZone => {
if self.buffer.is_empty() {
self.buffer.push('-');
} else {
return Err(Error::UnexpectedChar(self.mode, '-'));
}
},
Mode::Finish => return Err(Error::TrailingGarbage),
} }
Ok(())
}
fn colon(&mut self, c: char) -> Result<(), Error> {
match self.mode {
Mode::Year | Mode::Month | Mode::Day => return Err(Error::UnexpectedChar(self.mode, ':')),
Mode::Hour | Mode::Minute | Mode::Second => {
if !self.buffer.is_empty() || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, ':'));
} else {
self.sep = true;
}
},
Mode::TimeZone => {
if !self.buffer.len() == 2 || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, ':'));
} else {
self.sep = true;
}
},
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn tee(&mut self, c: char) -> Result<(), Error> {
if self.mode != Mode::Hour || !self.buffer.is_empty() {
Err(Error::UnexpectedChar(self.mode, 'T'))
} else {
self.sep = true;
Ok(())
}
}
fn zed(&mut self, c: char) -> Result<(), Error> {
if self.mode == Mode::Year || !self.buffer.is_empty() {
return Err(Error::UnexpectedChar(self.mode, 'Z'));
} else {
self.tz = Some(TimeZone::UTC);
self.mode = Mode::Finish;
}
Ok(())
}
fn parse_tz_basic(&mut self) -> Result<(), Error> {
todo!() todo!()
} }
fn parse_tz_extended(&mut self) -> Result<(), Error> {
if !self.sep {
Err(Error::MissingSeparator)
} else {
self.parse_tz_basic()
}
}
pub fn parse(mut self) -> Result<DateTime, Error> { pub fn parse(mut self) -> Result<DateTime, Error> {
loop { for c in self.text.chars() {
match self.mode { match c {
Mode::Year => self.parse_year()?, x if x.is_numeric() => self.number(c)?,
Mode::Month => self.parse_month()?, '-' => self.dash(c)?,
Mode::Day => self.parse_day()?, ':' => self.colon(c)?,
Mode::Hour => self.parse_hour()?, 'T' => self.tee(c)?,
Mode::Minute => self.parse_minute()?, 'Z' => self.zed(c)?,
Mode::Second => self.parse_second()?, _ => return Err(Error::UnexpectedChar(self.mode, c)),
Mode::TimeZone => {
self.parse_timezone()?;
break;
}
Mode::Finish => break,
} }
} }
match self.mode {
Mode::TimeZone | Mode::Finish => {
if self.tz == Some(TimeZone::UTC) && !self.buffer.is_empty() {
return Err(Error::TrailingGarbage);
}
match self.format {
Format::Basic => self.parse_tz_basic()?,
Format::Extended => self.parse_tz_extended()?,
}
},
_ => return Err(Error::Truncated),
}
Ok(DateTime { Ok(DateTime {
year: self.year.unwrap(), year: self.year.unwrap(),
month: self.month, month: self.month,

286
src/time/parser.rs.bak Normal file
View file

@ -0,0 +1,286 @@
//! Implements a parser for ISO-8601 format Time
use super::{DateTime, Error, TimeZone};
#[derive(Debug, PartialEq)]
enum Format {
Basic,
Extended,
}
#[derive(Debug, PartialEq)]
enum Mode {
Year,
Month,
Day,
Hour,
Minute,
Second,
TimeZone,
Finish,
}
#[derive(Debug)]
pub struct Parser<'a> {
text: &'a str,
format: Format,
mode: Mode,
year: Option<u32>,
month: Option<u8>,
day: Option<u8>,
hour: Option<u8>,
minute: Option<u8>,
second: Option<u8>,
tz: Option<TimeZone>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
Self {
text,
format: Format::Extended,
mode: Mode::Year,
year: None,
month: None,
day: None,
hour: None,
minute: None,
second: None,
tz: None,
}
}
fn parse_year(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Year);
let year = self.text.get(..3).ok_or(Error::Truncated)?.parse()?;
self.year = Some(year);
if let Some(c) = self.text.chars().nth(5) {
match c {
'-' => self.format = Format::Extended,
_ => self.format = Format::Basic,
}
}
self.mode = Mode::Month;
Ok(())
}
fn parse_month(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Month);
let month = match self.format {
Format::Basic => self.text.get(4..6),
Format::Extended => self.text.get(5..7),
};
if let Some(m) = month {
let month = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if month > 12 {
return Err(Error::InvalidMonth);
}
self.month = Some(month);
self.mode = Mode::Day;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_day(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Day);
let day = match self.format {
Format::Basic => self.text.get(6..8),
Format::Extended => self.text.get(8..10),
};
if let Some(d) = day {
let day = match d.parse() {
Ok(d) => d,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
let max = match self.month {
Some(1|3|5|7|10|12) => 31,
Some(2) => {
if self.year.unwrap() % 4 == 0 {
29
} else {
28
}
},
Some(4|6|9|11) => 30,
_ => return Err(Error::InvalidMonth),
};
if day > max {
return Err(Error::InvalidDay);
}
self.day = Some(day);
let tidx = match self.format {
Format::Basic => 8,
Format::Extended => 10,
};
if let Some(c) = self.text.chars().nth(tidx) {
match c {
'T' => self.mode = Mode::Hour,
'Z' | '-' | '+' => self.mode = Mode::TimeZone,
_ => return Err(Error::InvalidHour),
}
} else {
self.mode = Mode::Finish;
}
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_hour(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Hour);
let hour = match self.format {
Format::Basic => self.text.get(9..11),
Format::Extended => self.text.get(11..13),
};
if let Some(h) = hour {
let hour = match h.parse() {
Ok(h) => h,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if hour > 24 {
return Err(Error::InvalidHour);
}
self.hour = Some(hour);
self.mode = Mode::Minute;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_minute(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Minute);
let minute = match self.format {
Format::Basic => self.text.get(11..13),
Format::Extended => self.text.get(14..16),
};
if let Some(m) = minute {
let minute = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if minute > 60 {
return Err(Error::InvalidMinute);
}
self.minute = Some(minute);
self.mode = Mode::Second;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_second(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Second);
let second = match self.format {
Format::Basic => self.text.get(13..15),
Format::Extended => self.text.get(17..19),
};
if let Some(s) = second {
let second = match s.parse() {
Ok(s) => s,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if second > 60 {
return Err(Error::InvalidSecond);
}
self.second = Some(second);
}
self.mode = Mode::TimeZone;
Ok(())
}
fn parse_timezone(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::TimeZone);
let idx = if self.second.is_some() {
match self.format {
Format::Basic => 17,
Format::Extended => 21,
}
} else if self.minute.is_some() {
match self.format {
Format::Basic => 15,
Format::Extended => 19,
}
} else if self.hour.is_some() {
match self.format {
Format::Basic => 13,
Format::Extended => 16,
}
} else if self.day.is_some() {
match self.format {
Format::Basic => 9,
Format::Extended => 11,
}
} else if self.month.is_some() {
match self.format {
Format::Basic => 6,
Format::Extended => 8,
}
} else {
4
};
match self.text.chars().nth(idx) {
Some('Z') => {
if self.text.len() > idx + 2 {
return Err(Error::TrailingGarbage);
} else {
self.tz = Some(TimeZone::UTC);
return Ok(());
}
},
Some('-') => todo!(),
Some('+') => todo!(),
None => self.mode = Mode::Finish,
_ => return Err(Error::InvalidTimezone),
}
todo!()
}
pub fn parse(mut self) -> Result<DateTime, Error> {
loop {
match self.mode {
Mode::Year => self.parse_year()?,
Mode::Month => self.parse_month()?,
Mode::Day => self.parse_day()?,
Mode::Hour => self.parse_hour()?,
Mode::Minute => self.parse_minute()?,
Mode::Second => self.parse_second()?,
Mode::TimeZone => {
self.parse_timezone()?;
break;
}
Mode::Finish => break,
}
}
Ok(DateTime {
year: self.year.unwrap(),
month: self.month,
day: self.day,
hour: self.hour,
minute: self.minute,
second: self.second,
tz: self.tz,
})
}
}