commit e9ff1c9e87c97c3854a12197e469e66cc270a3aa Author: Nathan Fisher Date: Thu Feb 6 11:48:43 2025 -0500 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..de82492 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,95 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "gemtext-rs" +version = "0.1.0" +dependencies = [ + "serde", + "textwrap", +] + +[[package]] +name = "proc-macro2" +version = "1.0.79" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smawk" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" + +[[package]] +name = "syn" +version = "2.0.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "textwrap" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-linebreak" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..36f398a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "gemtext-rs" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +serde = ["dep:serde"] + +[dependencies] +serde = { version = "1.0.197", optional = true } +textwrap = "0.16.1" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4f9388f --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +#![warn(clippy::all, clippy::pedantic)] + +mod node; +mod parser; + +pub use {node::Node, parser::Parser}; diff --git a/src/node.rs b/src/node.rs new file mode 100644 index 0000000..9489381 --- /dev/null +++ b/src/node.rs @@ -0,0 +1,103 @@ +use std::fmt::{self, Write}; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum Node { + Text(String), + H1(String), + H2(String), + H3(String), + ListItem(String), + Quote(String), + PreStart { + alt: Option, + }, + PreBody(String), + PreEnd, + Link { + url: String, + display: Option, + }, + Prompt { + url: String, + display: Option, + }, +} + +impl fmt::Display for Node { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Text(s) | Self::PreBody(s) => writeln!(f, "{s}"), + Self::H1(s) => writeln!(f, "# {s}"), + Self::H2(s) => writeln!(f, "## {s}"), + Self::H3(s) => writeln!(f, "### {s}"), + Self::ListItem(s) => writeln!(f, "* {s}"), + Self::Quote(s) => writeln!(f, "> {s}"), + Self::PreStart { alt } => match alt { + Some(s) => writeln!(f, "```{s}"), + None => writeln!(f, "```"), + }, + Self::PreEnd => writeln!(f, "```"), + Self::Link { + url, + display: Some(display), + } => writeln!(f, "=> {url} {display}"), + Self::Link { url, display: None } => writeln!(f, "=> {url}"), + Self::Prompt { + url, + display: Some(display), + } => writeln!(f, "=: {url} {display}"), + Self::Prompt { url, display: None } => writeln!(f, "=: {url}"), + } + } +} + +impl Node { + pub fn write_html(&self, mut writer: W) -> fmt::Result { + match self { + Self::Text(s) => { + writeln!(writer, "

")?; + textwrap::wrap(s, 80) + .into_iter() + .try_for_each(|line| writeln!(writer, "{line}"))?; + writeln!(writer, "

") + } + Self::H1(s) => writeln!(writer, "

{s}

"), + Self::H2(s) => writeln!(writer, "

{s}

"), + Self::H3(s) => writeln!(writer, "

{s}

"), + Self::ListItem(s) => { + write!(writer, "
  • ")?; + textwrap::wrap(s, 80) + .iter() + .try_for_each(|line| write!(writer, "{line}"))?; + writeln!(writer, "
  • ") + } + Self::Quote(s) => { + write!(writer, "
    ")?; + textwrap::wrap(s, 80) + .iter() + .try_for_each(|line| writeln!(writer, "{line}"))?; + writeln!(writer, "
    ") + } + Self::PreStart { alt: Some(s) } => { + writeln!(writer, "
    ")
    +            }
    +            Self::PreStart { alt: None } => writeln!(writer, "
    "),
    +            Self::PreBody(s) => writeln!(writer, "{s}"),
    +            Self::PreEnd => writeln!(writer, "
    "), + Self::Link { url, display } | Self::Prompt { url, display } => { + writeln!( + writer, + "{}", + match display { + Some(d) => d, + None => url, + } + ) + } + } + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..60b663b --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,118 @@ +use { + crate::Node, + std::{ + io::{self, BufRead, Write}, + string::ToString, + }, +}; + +#[derive(Debug, Default)] +enum Mode { + #[default] + Normal, + Preformatted, +} + +#[derive(Debug)] +pub struct Parser { + reader: B, + writer: Option, + mode: Mode, +} + +impl Iterator for Parser { + type Item = io::Result; + + fn next(&mut self) -> Option { + let mut s = String::new(); + let node = match self.reader.read_line(&mut s) { + Ok(0) => None, + Err(e) => Some(Err(e)), + Ok(_) if self.is_preformatted() => Some(Ok(self.parse_preformatted(&s))), + Ok(_) => Some(Ok(self.parse_normal(&s))), + }; + if let Some(ref mut w) = self.writer { + if let Some(Ok(_)) = node { + // We throw away the possible error here if writing to our cache + // fails. TODO: Log this somehow + let _s = writeln!(w, "{s}"); + } + } + node + } +} + +impl Parser { + pub fn new(reader: B, writer: Option) -> Self { + Self { + reader, + writer, + mode: Mode::default(), + } + } + + fn is_preformatted(&self) -> bool { + matches!(self.mode, Mode::Preformatted) + } + + fn parse_normal(&mut self, s: &str) -> Node { + if let Some(s) = s.strip_prefix("```").map(str::trim) { + self.mode = Mode::Preformatted; + Self::parse_pre_alt(s) + } else if let Some(s) = s.strip_prefix("=>").map(str::trim) { + Self::parse_link(s) + } else if let Some(s) = s.strip_prefix("=:").map(str::trim) { + Self::parse_prompt(s) + } else if let Some(s) = s + .strip_prefix("###") + .map(str::trim) + .map(ToString::to_string) + { + Node::H3(s) + } else if let Some(s) = s.strip_prefix("##").map(str::trim).map(ToString::to_string) { + Node::H2(s) + } else if let Some(s) = s.strip_prefix('#').map(str::trim).map(ToString::to_string) { + Node::H1(s) + } else if let Some(s) = s.strip_prefix('*').map(str::trim).map(ToString::to_string) { + Node::ListItem(s) + } else if let Some(s) = s.strip_prefix('>').map(str::trim).map(ToString::to_string) { + Node::Quote(s) + } else { + Node::Text(s.to_string()) + } + } + + fn parse_preformatted(&mut self, s: &str) -> Node { + if s.starts_with("```") { + self.mode = Mode::Normal; + Node::PreEnd + } else { + Node::PreBody(s.to_string()) + } + } + + fn parse_link(s: &str) -> Node { + let (url, display) = s + .split_once(|x: char| x.is_ascii_whitespace()) + .map(|(u, d)| (u.trim().to_string(), Some(d.trim().to_string()))) + .unwrap_or((s.to_string(), None)); + Node::Link { url, display } + } + + fn parse_prompt(s: &str) -> Node { + let (url, display) = s + .split_once(|x: char| x.is_ascii_whitespace()) + .map(|(u, d)| (u.trim().to_string(), Some(d.trim().to_string()))) + .unwrap_or((s.to_string(), None)); + Node::Prompt { url, display } + } + + fn parse_pre_alt(s: &str) -> Node { + let alt = if s.is_empty() { + None + } else { + Some(s.to_string()) + }; + Node::PreStart { alt } + } +}