dory/src/time/parser.rs
Nathan Fisher af49f327ec Add Id type for creating unique message identifiers; Change DateTime
to only deal with instants after the Unix epoch and simplify the math.
2023-06-26 00:16:05 -04:00

512 lines
15 KiB
Rust

//! Implements a parser for ISO-8601 format Time
use super::{days_in_month, DateTime, Error, Offset, Sign, TimeZone};
#[derive(Debug, PartialEq)]
enum Format {
Basic,
Extended,
}
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Mode {
Year,
Month,
Day,
Hour,
Minute,
Second,
TimeZone,
Finish,
}
#[derive(Debug)]
pub struct Parser<'a> {
text: &'a str,
format: Format,
mode: Mode,
buffer: String,
year: Option<u32>,
month: Option<u8>,
day: Option<u8>,
sep: bool,
hour: Option<u8>,
minute: Option<u8>,
second: Option<u8>,
tz: Option<TimeZone>,
}
impl<'a> Parser<'a> {
pub fn new(text: &'a str) -> Self {
Self {
text,
format: Format::Basic,
buffer: String::new(),
mode: Mode::Year,
year: None,
month: None,
day: None,
sep: false,
hour: None,
minute: None,
second: None,
tz: None,
}
}
#[allow(clippy::missing_panics_doc)]
fn validate_day(&self, day: u8) -> Result<(), Error> {
let max = days_in_month(self.month.unwrap(), self.year.unwrap());
if day > max {
Err(Error::InvalidDay)
} else {
Ok(())
}
}
fn number(&mut self, c: char) -> Result<(), Error> {
self.buffer.push(c);
match self.mode {
Mode::Year => {
if self.buffer.len() == 4 {
let y: u32 = self.buffer.parse()?;
self.buffer.clear();
self.year = Some(y);
self.mode = Mode::Month;
}
}
Mode::Month => {
if self.buffer.len() == 2 {
let m: u8 = self.buffer.parse()?;
if m > 12 {
return Err(Error::InvalidMonth);
}
self.buffer.clear();
self.month = Some(m);
self.mode = Mode::Day;
self.sep = false;
}
}
Mode::Day => {
if self.buffer.len() == 2 {
let d: u8 = self.buffer.parse()?;
self.validate_day(d)?;
self.buffer.clear();
self.day = Some(d);
self.mode = Mode::Hour;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Hour => {
if self.buffer.len() == 2 {
let h: u8 = self.buffer.parse()?;
if h > 23 {
return Err(Error::InvalidHour);
}
self.buffer.clear();
self.hour = Some(h);
self.mode = Mode::Minute;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Minute => {
if self.buffer.len() == 2 {
let m = self.buffer.parse()?;
if m > 59 {
return Err(Error::InvalidMinute);
}
self.buffer.clear();
self.minute = Some(m);
self.mode = Mode::Second;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::Second => {
if self.buffer.len() == 2 {
let s = self.buffer.parse()?;
if s > 59 {
return Err(Error::InvalidSecond);
}
self.buffer.clear();
self.second = Some(s);
self.mode = Mode::TimeZone;
self.sep = false;
} else if self.format == Format::Extended && !self.sep {
return Err(Error::MissingSeparator);
}
}
Mode::TimeZone => match self.format {
Format::Basic => {
if self.buffer.len() == 5 {
self.mode = Mode::Finish;
}
}
Format::Extended => {
if self.buffer.len() > 3 && !self.sep {
return Err(Error::MissingSeparator);
} else if self.buffer.len() == 5 {
self.mode = Mode::Finish;
}
}
},
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn dash(&mut self) -> Result<(), Error> {
match self.mode {
Mode::Year => return Err(Error::UnexpectedChar(Mode::Year, '-')),
Mode::Month => {
if self.month.is_some() {
return Err(Error::UnexpectedChar(self.mode, '-'));
}
self.format = Format::Extended;
self.sep = true;
}
Mode::Day => {
if self.day.is_some() || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, '-'));
} else {
self.sep = true;
}
}
Mode::Hour | Mode::Minute | Mode::Second => {
if self.buffer.is_empty() {
self.buffer.push('-');
self.mode = Mode::TimeZone;
} else {
return Err(Error::UnexpectedChar(self.mode, '-'));
}
}
Mode::TimeZone => {
if self.buffer.is_empty() {
self.buffer.push('-');
} else {
return Err(Error::UnexpectedChar(self.mode, '-'));
}
}
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn colon(&mut self) -> Result<(), Error> {
match self.mode {
Mode::Year | Mode::Month | Mode::Day => {
return Err(Error::UnexpectedChar(self.mode, ':'))
}
Mode::Hour | Mode::Minute | Mode::Second => {
if !self.buffer.is_empty() || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, ':'));
}
self.sep = true;
}
Mode::TimeZone => {
if !self.buffer.len() == 2 || self.format == Format::Basic {
return Err(Error::UnexpectedChar(self.mode, ':'));
}
self.sep = true;
}
Mode::Finish => return Err(Error::TrailingGarbage),
}
Ok(())
}
fn tee(&mut self) -> Result<(), Error> {
if self.mode != Mode::Hour || !self.buffer.is_empty() {
return Err(Error::UnexpectedChar(self.mode, 'T'));
}
self.sep = true;
Ok(())
}
fn zed(&mut self) -> Result<(), Error> {
if self.mode == Mode::Year
|| self.mode == Mode::Month
|| self.mode == Mode::Day
|| !self.buffer.is_empty()
{
return Err(Error::UnexpectedChar(self.mode, 'Z'));
}
self.tz = Some(TimeZone::UTC);
self.mode = Mode::Finish;
Ok(())
}
fn plus(&mut self) -> Result<(), Error> {
if self.mode != Mode::TimeZone && self.mode != Mode::Finish
|| !self.buffer.is_empty()
|| self.mode == Mode::Year
|| self.mode == Mode::Month
|| self.mode == Mode::Day
{
return Err(Error::UnexpectedChar(self.mode, '+'));
}
self.buffer.push('+');
Ok(())
}
fn parse_tz_basic(&mut self) -> Result<(), Error> {
let sign = match self.buffer.chars().next() {
Some('+') => Sign::Positive,
Some('-') => Sign::Negative,
None => return Ok(()),
_ => return Err(Error::InvalidOffset),
};
let Some(n) = self.buffer.get(1..3) else {
return Err(Error::InvalidOffset);
};
let hours: u8 = n.parse()?;
let minutes: Option<u8> = if let Some(n) = self.buffer.get(3..5) {
Some(n.parse()?)
} else {
None
};
match (hours, minutes) {
(h, None) if h > 12 => return Err(Error::InvalidOffset),
(h, Some(m)) if h == 12 && m > 0 || m > 59 => return Err(Error::InvalidOffset),
_ => {
self.tz = Some(TimeZone::Offset(Offset {
sign,
hours,
minutes,
}));
}
}
Ok(())
}
fn parse_tz_extended(&mut self) -> Result<(), Error> {
if self.buffer.len() > 3 && !self.sep {
Err(Error::MissingSeparator)
} else {
self.parse_tz_basic()
}
}
pub fn parse(mut self) -> Result<DateTime, Error> {
for c in self.text.chars() {
match c {
x if x.is_numeric() => self.number(c)?,
'-' => self.dash()?,
':' => self.colon()?,
'T' => self.tee()?,
'Z' => self.zed()?,
'+' => self.plus()?,
_ => return Err(Error::UnexpectedChar(self.mode, c)),
}
}
match self.mode {
Mode::TimeZone | Mode::Finish => {
if self.tz == Some(TimeZone::UTC) && !self.buffer.is_empty() {
return Err(Error::TrailingGarbage);
}
if self.tz.is_none() {
match self.format {
Format::Basic => self.parse_tz_basic()?,
Format::Extended => self.parse_tz_extended()?,
}
}
}
_ => {}
}
if let Some(year) = self.year {
if let Some(month) = self.month {
if let Some(day) = self.day {
return Ok(DateTime {
year,
month,
day,
hour: self.hour,
minute: self.minute,
second: self.second,
tz: self.tz,
});
}
}
}
Err(Error::Truncated)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_extended_utc() {
let parser = Parser::new("2023-05-09T19:39:15Z");
let dt = parser.parse().unwrap();
assert_eq!(dt.year, 2023);
assert_eq!(dt.month, 5);
assert_eq!(dt.day, 9);
assert_eq!(dt.hour, Some(19));
assert_eq!(dt.minute, Some(39));
assert_eq!(dt.second, Some(15));
assert_eq!(dt.tz, Some(TimeZone::UTC));
}
#[test]
fn parse_extended_positive() {
let parser = Parser::new("2023-05-09T19:39:15+05:15");
let dt = parser.parse().unwrap();
assert_eq!(dt.year, 2023);
assert_eq!(dt.month, 5);
assert_eq!(dt.day, 9);
assert_eq!(dt.hour, Some(19));
assert_eq!(dt.minute, Some(39));
assert_eq!(dt.second, Some(15));
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Positive,
hours: 5,
minutes: Some(15)
}))
);
}
#[test]
fn parse_extended_negative() {
let parser = Parser::new("2023-05-09T19:39:15-05");
let dt = parser.parse().unwrap();
assert_eq!(dt.year, 2023);
assert_eq!(dt.month, 5);
assert_eq!(dt.day, 9);
assert_eq!(dt.hour, Some(19));
assert_eq!(dt.minute, Some(39));
assert_eq!(dt.second, Some(15));
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Negative,
hours: 5,
minutes: None
}))
);
}
#[test]
fn parse_extended_no_seconds() {
let parser = Parser::new("2023-05-09T19:39-05");
let dt = parser.parse().unwrap();
assert!(dt.second.is_none());
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Negative,
hours: 5,
minutes: None
}))
);
}
#[test]
fn parse_extended_no_minutes() {
let parser = Parser::new("2023-05-09T19-05");
let dt = parser.parse().unwrap();
assert!(dt.second.is_none());
assert!(dt.minute.is_none());
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Negative,
hours: 5,
minutes: None
}))
);
}
#[test]
fn parse_extended_no_time() {
let parser = Parser::new("2023-05-09");
let dt = parser.parse().unwrap();
assert!(dt.second.is_none());
assert!(dt.minute.is_none());
assert!(dt.tz.is_none());
}
#[test]
fn parse_basic_utc() {
let parser = Parser::new("20230407T114230Z");
let dt = parser.parse().unwrap();
assert_eq!(dt.year, 2023);
assert_eq!(dt.month, 4);
assert_eq!(dt.day, 7);
assert_eq!(dt.hour, Some(11));
assert_eq!(dt.minute, Some(42));
assert_eq!(dt.second, Some(30));
assert_eq!(dt.tz, Some(TimeZone::UTC));
}
#[test]
fn parse_basic_positive() {
let parser = Parser::new("20230407T114230+0520");
let dt = parser.parse().unwrap();
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Positive,
hours: 5,
minutes: Some(20)
}))
);
}
#[test]
fn parse_basic_negative() {
let parser = Parser::new("20230407T114230-0520");
let dt = parser.parse().unwrap();
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Negative,
hours: 5,
minutes: Some(20)
}))
);
}
#[test]
fn parse_basic_no_seconds() {
let parser = Parser::new("20230407T1142-0520");
let dt = parser.parse().unwrap();
assert!(dt.second.is_none());
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Negative,
hours: 5,
minutes: Some(20)
}))
);
}
#[test]
fn parse_basic_no_minutes() {
let parser = Parser::new("20230407T11-05");
let dt = parser.parse().unwrap();
assert!(dt.second.is_none());
assert!(dt.minute.is_none());
assert_eq!(
dt.tz,
Some(TimeZone::Offset(Offset {
sign: Sign::Negative,
hours: 5,
minutes: None
}))
);
}
#[test]
fn parse_no_tz() {
let dt: Result<DateTime, Error> = Parser::new("2023-06-11T22:41:31").parse();
assert!(dt.unwrap().tz.is_none());
}
}