From 3ffcdb54ed236766dcc3e3df28ec345f83868979 Mon Sep 17 00:00:00 2001 From: Nathan Fisher Date: Sat, 14 Jan 2023 02:08:14 -0500 Subject: [PATCH] Add `cut` applet --- src/cmd/cut/mod.rs | 285 ++++++++++++++++++++++++++++++++++++++++++++ src/cmd/mod.rs | 15 ++- src/cmd/sync/mod.rs | 10 +- 3 files changed, 300 insertions(+), 10 deletions(-) create mode 100644 src/cmd/cut/mod.rs diff --git a/src/cmd/cut/mod.rs b/src/cmd/cut/mod.rs new file mode 100644 index 0000000..b918cc8 --- /dev/null +++ b/src/cmd/cut/mod.rs @@ -0,0 +1,285 @@ +use super::Cmd; +use clap::{parser::ValuesRef, Arg, ArgAction, ArgGroup, Command}; +use std::{ + error::Error, + fs::File, + io::{self, BufRead, BufReader, BufWriter, Write}, + ops::{Range, RangeFrom, RangeTo}, + usize, vec, +}; + +#[derive(Debug, Default)] +pub struct Cut; + +impl Cmd for Cut { + fn cli(&self) -> clap::Command { + Command::new("cut") + .about("cut out selected fields of each line of a file") + .author("Nathan Fisher") + .version(env!("CARGO_PKG_VERSION")) + .args([ + Arg::new("bytes") + .short('b') + .long("bytes") + .help("select only these bytes") + .num_args(1) + .allow_hyphen_values(true) + .value_name("LIST") + .value_delimiter(','), + Arg::new("characters") + .short('c') + .long("characters") + .help("select only these characters") + .num_args(1) + .allow_hyphen_values(true) + .value_name("LIST") + .value_delimiter(','), + Arg::new("fields") + .short('f') + .long("fields") + .help("select only these fields") + .num_args(1) + .allow_hyphen_values(true) + .value_name("LIST") + .value_delimiter(','), + Arg::new("delimiter") + .short('d') + .long("delimiter") + .help("use DELIM instead of TAB for field delimiter") + .num_args(1) + .value_name("DELIM"), + Arg::new("nosplit").help("ignored").short('n'), + Arg::new("suppress") + .short('s') + .long("only-delimited") + .requires("fields") + .action(ArgAction::SetTrue), + Arg::new("file").value_name("FILE").num_args(0..), + ]) + .group( + ArgGroup::new("flags") + .args(["bytes", "characters", "fields"]) + .required(true), + ) + } + + fn run(&self, matches: Option<&clap::ArgMatches>) -> Result<(), Box> { + let Some(matches) = matches else { + return Err(Box::new(io::Error::new(io::ErrorKind::Other, "no input"))); + }; + let files: Vec = match matches.get_many::("file") { + Some(f) => f.cloned().collect(), + None => vec!["-".to_string()], + }; + let (ranges, operator) = if let Some(raw_ranges) = matches.get_many::("bytes") { + (get_ranges(raw_ranges)?, Operator::Bytes) + } else if let Some(raw_ranges) = matches.get_many::("characters") { + (get_ranges(raw_ranges)?, Operator::Characters) + } else if let Some(raw_ranges) = matches.get_many::("fields") { + (get_ranges(raw_ranges)?, Operator::Fields) + } else { + unreachable!() + }; + for file in files { + let reader: Box = if file.as_str() == "-" { + Box::new(BufReader::new(io::stdin())) + } else { + let fd = File::open(&file)?; + Box::new(BufReader::new(fd)) + }; + for line in reader.lines() { + match operator { + Operator::Bytes => print_bytes(&line?, &ranges)?, + Operator::Characters => print_chars(&line?, &ranges)?, + Operator::Fields => print_fields( + &line?, + &ranges, + matches.get_flag("suppress"), + matches.get_one::("delimiter"), + )?, + } + } + } + Ok(()) + } + + fn path(&self) -> Option { + Some(crate::Path::UsrBin) + } +} + +enum Operator { + Bytes, + Characters, + Fields, +} + +fn get_ranges(raw_ranges: ValuesRef) -> Result, Box> { + let mut ranges = vec![]; + for range in raw_ranges { + let parsed_range = parse_range(range)?; + ranges.push(parsed_range); + } + Ok(ranges) +} + +fn print_bytes(line: &str, ranges: &[ParsedRange]) -> Result<(), Box> { + let combined_ranges = combine_ranges(ranges, line.len()); + let mut writer = BufWriter::new(std::io::stdout()); + for (idx, byte) in line.as_bytes().iter().enumerate() { + if combined_ranges.contains(&idx) { + write!(writer, "{}", *byte as char)?; + } + } + write!(writer, "\n")?; + Ok(()) +} + +fn print_chars(line: &str, ranges: &[ParsedRange]) -> Result<(), Box> { + let combined_ranges = combine_ranges(ranges, line.len()); + let mut writer = BufWriter::new(std::io::stdout()); + for (idx, ch) in line.chars().enumerate() { + if combined_ranges.contains(&idx) { + write!(writer, "{}", &ch)?; + } + } + write!(writer, "\n")?; + Ok(()) +} + +fn print_fields( + line: &str, + ranges: &[ParsedRange], + suppress: bool, + delimiter: Option<&String>, +) -> Result<(), Box> { + let combined_ranges = combine_ranges(ranges, line.len()); + let mut writer = BufWriter::new(std::io::stdout()); + let delimiter = match delimiter { + Some(d) => d, + None => "\t", + }; + if suppress { + if !line.contains(delimiter) { + return Ok(()); + } + } + let fields = line.split(delimiter); + for (idx, field) in fields.enumerate() { + if combined_ranges.contains(&idx) { + write!(writer, "{field}")?; + } + } + write!(writer, "\n")?; + Ok(()) +} + +enum ParsedRange { + Bounded(Range), + LowerBounded(RangeFrom), + UpperBounded(RangeTo), + Single(usize), +} + +impl From> for ParsedRange { + fn from(value: Range) -> Self { + Self::Bounded(value) + } +} + +impl From> for ParsedRange { + fn from(value: RangeFrom) -> Self { + Self::LowerBounded(value) + } +} + +impl From> for ParsedRange { + fn from(value: RangeTo) -> Self { + Self::UpperBounded(value) + } +} + +impl From for ParsedRange { + fn from(value: usize) -> Self { + Self::Single(value) + } +} + +fn combine_ranges(ranges: &[ParsedRange], max: usize) -> Vec { + let mut outrange: Vec = vec![]; + for range in ranges { + match range { + ParsedRange::Bounded(r) => { + for n in r.start..r.end { + outrange.push(n); + } + } + ParsedRange::LowerBounded(r) => { + for n in r.start..=max { + outrange.push(n); + } + } + ParsedRange::UpperBounded(r) => { + for n in 0..=r.end { + outrange.push(n); + } + } + ParsedRange::Single(u) => outrange.push(*u), + } + } + outrange.dedup(); + outrange.sort_unstable(); + outrange +} + +fn parse_range(range: &str) -> Result> { + if range.starts_with('-') { + if range.len() > 1 { + let end: usize = range[1..].parse()?; + if end >= 1 { + Ok((..end - 1).into()) + } else { + Err(Box::new(io::Error::new( + io::ErrorKind::Other, + "invalid range", + ))) + } + } else { + Err(Box::new(io::Error::new( + io::ErrorKind::Other, + "invalid range", + ))) + } + } else if range.ends_with('-') { + if range.len() > 1 { + let start: usize = range[0..range.len()].parse()?; + if start >= 1 { + Ok((start - 1..).into()) + } else { + Err(Box::new(io::Error::new( + io::ErrorKind::Other, + "invalid range", + ))) + } + } else { + Err(Box::new(io::Error::new( + io::ErrorKind::Other, + "invalid range", + ))) + } + } else if let Some((start, end)) = range.split_once('-') { + let start: usize = start.parse()?; + let end: usize = end.parse()?; + if start >= 1 && end > start { + Ok((start - 1..end).into()) + } else { + Err(Box::new(io::Error::new( + io::ErrorKind::Other, + "invalid range", + ))) + } + } else { + let range: usize = range.parse::()? - 1; + Ok(range.into()) + } +} diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index 08b25d7..7c26704 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -9,6 +9,7 @@ mod cat; mod chmod; pub mod clear; mod cp; +pub mod cut; mod date; mod dd; pub mod dirname; @@ -41,10 +42,10 @@ pub mod yes; pub use { self::hostname::Hostname, base32::Base32, base64::Base64, basename::Basename, - bootstrap::Bootstrap, clear::Clear, dirname::Dirname, echo::Echo, factor::Factor, fold::Fold, - groups::Groups, head::Head, link::Link, mountpoint::Mountpoint, nologin::Nologin, nproc::Nproc, - r#false::False, r#true::True, rev::Rev, shitbox::Shitbox, sleep::Sleep, sync::SYnc, - which::Which, whoami::Whoami, yes::Yes, + bootstrap::Bootstrap, clear::Clear, cut::Cut, dirname::Dirname, echo::Echo, factor::Factor, + fold::Fold, groups::Groups, head::Head, link::Link, mountpoint::Mountpoint, nologin::Nologin, + nproc::Nproc, r#false::False, r#true::True, rev::Rev, shitbox::Shitbox, sleep::Sleep, + sync::Sync as SyncCmd, which::Which, whoami::Whoami, yes::Yes, }; pub trait Cmd: fmt::Debug + Sync { @@ -61,6 +62,7 @@ pub fn get(name: &str) -> Option> { "basename" => Some(Box::new(Basename::default())), "bootstrap" => Some(Box::new(Bootstrap::default())), "clear" => Some(Box::new(Clear::default())), + "cut" => Some(Box::new(Cut::default())), "dirname" => Some(Box::new(Dirname::default())), "echo" => Some(Box::new(Echo::default())), "factor" => Some(Box::new(Factor::default())), @@ -75,7 +77,7 @@ pub fn get(name: &str) -> Option> { "rev" => Some(Box::new(Rev::default())), "shitbox" => Some(Box::new(Shitbox::default())), "sleep" => Some(Box::new(Sleep::default())), - "sync" => Some(Box::new(SYnc::default())), + "sync" => Some(Box::new(SyncCmd::default())), "true" => Some(Box::new(True::default())), "which" => Some(Box::new(Which::default())), "whoami" => Some(Box::new(Whoami::default())), @@ -84,12 +86,13 @@ pub fn get(name: &str) -> Option> { } } -pub static COMMANDS: [&'static str; 25] = [ +pub static COMMANDS: [&'static str; 26] = [ "base32", "base64", "basename", "bootstrap", "clear", + "cut", "dirname", "echo", "false", diff --git a/src/cmd/sync/mod.rs b/src/cmd/sync/mod.rs index 86fe70f..95442e5 100644 --- a/src/cmd/sync/mod.rs +++ b/src/cmd/sync/mod.rs @@ -1,11 +1,11 @@ use super::Cmd; use clap::{Arg, ArgAction, Command}; -use std::{error::Error, io, ffi::CString}; +use std::{error::Error, ffi::CString, io}; #[derive(Debug, Default)] -pub struct SYnc; +pub struct Sync; -impl Cmd for SYnc { +impl Cmd for Sync { fn cli(&self) -> clap::Command { Command::new("sync") .about("force completion of pending disk writes (flush cache)") @@ -66,7 +66,9 @@ impl Cmd for SYnc { } } } else { - unsafe { libc::sync(); } + unsafe { + libc::sync(); + } } Ok(()) }