Rethink Time parser, iterating over characters

This commit is contained in:
Nathan Fisher 2023-06-09 10:31:58 -04:00
parent 230b8ac48a
commit db1680d46f
3 changed files with 474 additions and 208 deletions

View file

@ -1,8 +1,10 @@
use std::{fmt, num::ParseIntError};
use super::parser::Mode;
#[derive(Debug)]
pub enum Error {
ParseInt,
UnexpectedChar(Mode, char),
InvalidMonth,
InvalidDay,
InvalidHour,
@ -12,6 +14,7 @@ pub enum Error {
InvalidOffset,
TrailingGarbage,
Truncated,
MissingSeparator,
}
impl fmt::Display for Error {

View file

@ -7,8 +7,8 @@ enum Format {
Extended,
}
#[derive(Debug, PartialEq)]
enum Mode {
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Mode {
Year,
Month,
Day,
@ -24,9 +24,11 @@ pub struct Parser<'a> {
text: &'a str,
format: Format,
mode: Mode,
buffer: String,
year: Option<u32>,
month: Option<u8>,
day: Option<u8>,
sep: bool,
hour: Option<u8>,
minute: Option<u8>,
second: Option<u8>,
@ -37,11 +39,13 @@ impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
Self {
text,
format: Format::Extended,
format: Format::Basic,
buffer: String::new(),
mode: Mode::Year,
year: None,
month: None,
day: None,
sep: false,
hour: None,
minute: None,
second: None,
@ -49,230 +53,203 @@ impl<'a> Parser<'a> {
}
}
fn parse_year(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Year);
let year = self.text.get(..3).ok_or(Error::Truncated)?.parse()?;
self.year = Some(year);
if let Some(c) = self.text.chars().nth(5) {
match c {
'-' => self.format = Format::Extended,
_ => self.format = Format::Basic,
}
}
self.mode = Mode::Month;
Ok(())
}
fn parse_month(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Month);
let month = match self.format {
Format::Basic => self.text.get(4..6),
Format::Extended => self.text.get(5..7),
};
if let Some(m) = month {
let month = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
fn number(&mut self, c: char) -> Result<(), Error> {
self.buffer.push(c);
match self.mode {
Mode::Year => {
if self.buffer.len() == 4 {
let y: u32 = self.buffer.parse()?;
self.buffer.clear();
self.year = Some(y);
self.mode = Mode::Month;
}
};
if month > 12 {
return Err(Error::InvalidMonth);
}
self.month = Some(month);
self.mode = Mode::Day;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_day(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Day);
let day = match self.format {
Format::Basic => self.text.get(6..8),
Format::Extended => self.text.get(8..10),
};
if let Some(d) = day {
let day = match d.parse() {
Ok(d) => d,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
Mode::Month => {
if self.buffer.len() == 2 {
let m: u8 = self.buffer.parse()?;
self.buffer.clear();
self.month = Some(m);
self.mode = Mode::Day;
self.sep = false;
}
};
let max = match self.month {
Some(1|3|5|7|10|12) => 31,
Some(2) => {
if self.year.unwrap() % 4 == 0 {
29
} else {
28
}
Mode::Day => {
if self.buffer.len() == 2 {
let d: u8 = self.buffer.parse()?;
self.buffer.clear();
self.day = Some(d);
self.mode = Mode::Hour;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Hour => {
if self.buffer.len() == 2 {
let h: u8 = self.buffer.parse()?;
self.buffer.clear();
self.hour = Some(h);
self.mode = Mode::Minute;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Minute => {
if self.buffer.len() == 2 {
let m = self.buffer.parse()?;
self.buffer.clear();
self.minute = Some(m);
self.mode = Mode::Second;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Second => {
if self.buffer.len() == 2 {
let s = self.buffer.parse()?;
self.buffer.clear();
self.second = Some(s);
self.mode = Mode::TimeZone;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::TimeZone => match self.format {
Format::Basic => {
if self.buffer.len() == 5 {
self.mode = Mode::Finish;
}
},
Some(4|6|9|11) => 30,
_ => return Err(Error::InvalidMonth),
};
if day > max {
return Err(Error::InvalidDay);
}
self.day = Some(day);
let tidx = match self.format {
Format::Basic => 8,
Format::Extended => 10,
};
if let Some(c) = self.text.chars().nth(tidx) {
match c {
'T' => self.mode = Mode::Hour,
'Z' | '-' | '+' => self.mode = Mode::TimeZone,
_ => return Err(Error::InvalidHour),
}
} else {
self.mode = Mode::Finish;
}
} else {
self.mode = Mode::TimeZone;
Format::Extended => {
if self.buffer.len() > 3 && !self.sep {
return Err(Error::MissingSeparator);
} else if self.buffer.len() == 5 {
self.mode = Mode::Finish;
}
},
},
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn parse_hour(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Hour);
let hour = match self.format {
Format::Basic => self.text.get(9..11),
Format::Extended => self.text.get(11..13),
};
if let Some(h) = hour {
let hour = match h.parse() {
Ok(h) => h,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if hour > 24 {
return Err(Error::InvalidHour);
}
self.hour = Some(hour);
self.mode = Mode::Minute;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_minute(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Minute);
let minute = match self.format {
Format::Basic => self.text.get(11..13),
Format::Extended => self.text.get(14..16),
};
if let Some(m) = minute {
let minute = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if minute > 60 {
return Err(Error::InvalidMinute);
}
self.minute = Some(minute);
self.mode = Mode::Second;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_second(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Second);
let second = match self.format {
Format::Basic => self.text.get(13..15),
Format::Extended => self.text.get(17..19),
};
if let Some(s) = second {
let second = match s.parse() {
Ok(s) => s,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if second > 60 {
return Err(Error::InvalidSecond);
}
self.second = Some(second);
}
self.mode = Mode::TimeZone;
Ok(())
}
fn parse_timezone(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::TimeZone);
let idx = if self.second.is_some() {
match self.format {
Format::Basic => 17,
Format::Extended => 21,
}
} else if self.minute.is_some() {
match self.format {
Format::Basic => 15,
Format::Extended => 19,
}
} else if self.hour.is_some() {
match self.format {
Format::Basic => 13,
Format::Extended => 16,
}
} else if self.day.is_some() {
match self.format {
Format::Basic => 9,
Format::Extended => 11,
}
} else if self.month.is_some() {
match self.format {
Format::Basic => 6,
Format::Extended => 8,
}
} else {
4
};
match self.text.chars().nth(idx) {
Some('Z') => {
if self.text.len() > idx + 2 {
return Err(Error::TrailingGarbage);
fn dash(&mut self, c: char) -> Result<(), Error> {
match self.mode {
Mode::Year => return Err(Error::UnexpectedChar(Mode::Year, '-')),
Mode::Month => {
if self.month.is_some() {
return Err(Error::UnexpectedChar(self.mode, '-'));
} else {
self.tz = Some(TimeZone::UTC);
return Ok(());
self.format = Format::Extended;
self.sep = true;
}
},
Some('-') => todo!(),
Some('+') => todo!(),
None => self.mode = Mode::Finish,
_ => return Err(Error::InvalidTimezone),
Mode::Day => {
if self.day.is_some() || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, '-'));
} else {
self.sep = true;
}
},
Mode::Hour | Mode::Minute | Mode::Second => {
if !self.buffer.is_empty() {
return Err(Error::UnexpectedChar(self.mode, '-'));
} else {
self.buffer.push('-');
self.mode = Mode::TimeZone;
}
},
Mode::TimeZone => {
if self.buffer.is_empty() {
self.buffer.push('-');
} else {
return Err(Error::UnexpectedChar(self.mode, '-'));
}
},
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn colon(&mut self, c: char) -> Result<(), Error> {
match self.mode {
Mode::Year | Mode::Month | Mode::Day => return Err(Error::UnexpectedChar(self.mode, ':')),
Mode::Hour | Mode::Minute | Mode::Second => {
if !self.buffer.is_empty() || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, ':'));
} else {
self.sep = true;
}
},
Mode::TimeZone => {
if !self.buffer.len() == 2 || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, ':'));
} else {
self.sep = true;
}
},
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn tee(&mut self, c: char) -> Result<(), Error> {
if self.mode != Mode::Hour || !self.buffer.is_empty() {
Err(Error::UnexpectedChar(self.mode, 'T'))
} else {
self.sep = true;
Ok(())
}
}
fn zed(&mut self, c: char) -> Result<(), Error> {
if self.mode == Mode::Year || !self.buffer.is_empty() {
return Err(Error::UnexpectedChar(self.mode, 'Z'));
} else {
self.tz = Some(TimeZone::UTC);
self.mode = Mode::Finish;
}
Ok(())
}
fn parse_tz_basic(&mut self) -> Result<(), Error> {
todo!()
}
fn parse_tz_extended(&mut self) -> Result<(), Error> {
if !self.sep {
Err(Error::MissingSeparator)
} else {
self.parse_tz_basic()
}
}
pub fn parse(mut self) -> Result<DateTime, Error> {
loop {
match self.mode {
Mode::Year => self.parse_year()?,
Mode::Month => self.parse_month()?,
Mode::Day => self.parse_day()?,
Mode::Hour => self.parse_hour()?,
Mode::Minute => self.parse_minute()?,
Mode::Second => self.parse_second()?,
Mode::TimeZone => {
self.parse_timezone()?;
break;
}
Mode::Finish => break,
for c in self.text.chars() {
match c {
x if x.is_numeric() => self.number(c)?,
'-' => self.dash(c)?,
':' => self.colon(c)?,
'T' => self.tee(c)?,
'Z' => self.zed(c)?,
_ => return Err(Error::UnexpectedChar(self.mode, c)),
}
}
match self.mode {
Mode::TimeZone | Mode::Finish => {
if self.tz == Some(TimeZone::UTC) && !self.buffer.is_empty() {
return Err(Error::TrailingGarbage);
}
match self.format {
Format::Basic => self.parse_tz_basic()?,
Format::Extended => self.parse_tz_extended()?,
}
},
_ => return Err(Error::Truncated),
}
Ok(DateTime {
year: self.year.unwrap(),
month: self.month,

286
src/time/parser.rs.bak Normal file
View file

@ -0,0 +1,286 @@
//! Implements a parser for ISO-8601 format Time
use super::{DateTime, Error, TimeZone};
#[derive(Debug, PartialEq)]
enum Format {
Basic,
Extended,
}
#[derive(Debug, PartialEq)]
enum Mode {
Year,
Month,
Day,
Hour,
Minute,
Second,
TimeZone,
Finish,
}
#[derive(Debug)]
pub struct Parser<'a> {
text: &'a str,
format: Format,
mode: Mode,
year: Option<u32>,
month: Option<u8>,
day: Option<u8>,
hour: Option<u8>,
minute: Option<u8>,
second: Option<u8>,
tz: Option<TimeZone>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
Self {
text,
format: Format::Extended,
mode: Mode::Year,
year: None,
month: None,
day: None,
hour: None,
minute: None,
second: None,
tz: None,
}
}
fn parse_year(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Year);
let year = self.text.get(..3).ok_or(Error::Truncated)?.parse()?;
self.year = Some(year);
if let Some(c) = self.text.chars().nth(5) {
match c {
'-' => self.format = Format::Extended,
_ => self.format = Format::Basic,
}
}
self.mode = Mode::Month;
Ok(())
}
fn parse_month(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Month);
let month = match self.format {
Format::Basic => self.text.get(4..6),
Format::Extended => self.text.get(5..7),
};
if let Some(m) = month {
let month = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if month > 12 {
return Err(Error::InvalidMonth);
}
self.month = Some(month);
self.mode = Mode::Day;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_day(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Day);
let day = match self.format {
Format::Basic => self.text.get(6..8),
Format::Extended => self.text.get(8..10),
};
if let Some(d) = day {
let day = match d.parse() {
Ok(d) => d,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
let max = match self.month {
Some(1|3|5|7|10|12) => 31,
Some(2) => {
if self.year.unwrap() % 4 == 0 {
29
} else {
28
}
},
Some(4|6|9|11) => 30,
_ => return Err(Error::InvalidMonth),
};
if day > max {
return Err(Error::InvalidDay);
}
self.day = Some(day);
let tidx = match self.format {
Format::Basic => 8,
Format::Extended => 10,
};
if let Some(c) = self.text.chars().nth(tidx) {
match c {
'T' => self.mode = Mode::Hour,
'Z' | '-' | '+' => self.mode = Mode::TimeZone,
_ => return Err(Error::InvalidHour),
}
} else {
self.mode = Mode::Finish;
}
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_hour(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Hour);
let hour = match self.format {
Format::Basic => self.text.get(9..11),
Format::Extended => self.text.get(11..13),
};
if let Some(h) = hour {
let hour = match h.parse() {
Ok(h) => h,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if hour > 24 {
return Err(Error::InvalidHour);
}
self.hour = Some(hour);
self.mode = Mode::Minute;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_minute(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Minute);
let minute = match self.format {
Format::Basic => self.text.get(11..13),
Format::Extended => self.text.get(14..16),
};
if let Some(m) = minute {
let minute = match m.parse() {
Ok(m) => m,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if minute > 60 {
return Err(Error::InvalidMinute);
}
self.minute = Some(minute);
self.mode = Mode::Second;
} else {
self.mode = Mode::TimeZone;
}
Ok(())
}
fn parse_second(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::Second);
let second = match self.format {
Format::Basic => self.text.get(13..15),
Format::Extended => self.text.get(17..19),
};
if let Some(s) = second {
let second = match s.parse() {
Ok(s) => s,
Err(_) => {
self.mode = Mode::TimeZone;
return Ok(());
}
};
if second > 60 {
return Err(Error::InvalidSecond);
}
self.second = Some(second);
}
self.mode = Mode::TimeZone;
Ok(())
}
fn parse_timezone(&mut self) -> Result<(), Error> {
assert_eq!(self.mode, Mode::TimeZone);
let idx = if self.second.is_some() {
match self.format {
Format::Basic => 17,
Format::Extended => 21,
}
} else if self.minute.is_some() {
match self.format {
Format::Basic => 15,
Format::Extended => 19,
}
} else if self.hour.is_some() {
match self.format {
Format::Basic => 13,
Format::Extended => 16,
}
} else if self.day.is_some() {
match self.format {
Format::Basic => 9,
Format::Extended => 11,
}
} else if self.month.is_some() {
match self.format {
Format::Basic => 6,
Format::Extended => 8,
}
} else {
4
};
match self.text.chars().nth(idx) {
Some('Z') => {
if self.text.len() > idx + 2 {
return Err(Error::TrailingGarbage);
} else {
self.tz = Some(TimeZone::UTC);
return Ok(());
}
},
Some('-') => todo!(),
Some('+') => todo!(),
None => self.mode = Mode::Finish,
_ => return Err(Error::InvalidTimezone),
}
todo!()
}
pub fn parse(mut self) -> Result<DateTime, Error> {
loop {
match self.mode {
Mode::Year => self.parse_year()?,
Mode::Month => self.parse_month()?,
Mode::Day => self.parse_day()?,
Mode::Hour => self.parse_hour()?,
Mode::Minute => self.parse_minute()?,
Mode::Second => self.parse_second()?,
Mode::TimeZone => {
self.parse_timezone()?;
break;
}
Mode::Finish => break,
}
}
Ok(DateTime {
year: self.year.unwrap(),
month: self.month,
day: self.day,
hour: self.hour,
minute: self.minute,
second: self.second,
tz: self.tz,
})
}
}