use super::Cmd; use clap::{parser::ValuesRef, Arg, ArgAction, ArgGroup, Command}; use std::{ error::Error, fs::File, io::{self, BufRead, BufReader, BufWriter, Write}, ops::{Range, RangeFrom, RangeTo}, usize, vec, }; #[derive(Debug, Default)] pub struct Cut; impl Cmd for Cut { fn cli(&self) -> clap::Command { Command::new("cut") .about("cut out selected fields of each line of a file") .author("Nathan Fisher") .version(env!("CARGO_PKG_VERSION")) .args([ Arg::new("bytes") .short('b') .long("bytes") .help("select only these bytes") .num_args(1) .allow_hyphen_values(true) .value_name("LIST") .value_delimiter(','), Arg::new("characters") .short('c') .long("characters") .help("select only these characters") .num_args(1) .allow_hyphen_values(true) .value_name("LIST") .value_delimiter(','), Arg::new("fields") .short('f') .long("fields") .help("select only these fields") .num_args(1) .allow_hyphen_values(true) .value_name("LIST") .value_delimiter(','), Arg::new("delimiter") .short('d') .long("delimiter") .help("use DELIM instead of TAB for field delimiter") .num_args(1) .value_name("DELIM"), Arg::new("nosplit").help("ignored").short('n'), Arg::new("suppress") .short('s') .long("only-delimited") .requires("fields") .action(ArgAction::SetTrue), Arg::new("file").value_name("FILE").num_args(0..), ]) .group( ArgGroup::new("flags") .args(["bytes", "characters", "fields"]) .required(true), ) } fn run(&self, matches: Option<&clap::ArgMatches>) -> Result<(), Box> { let Some(matches) = matches else { return Err(Box::new(io::Error::new(io::ErrorKind::Other, "no input"))); }; let files: Vec = match matches.get_many::("file") { Some(f) => f.cloned().collect(), None => vec!["-".to_string()], }; let (ranges, operator) = if let Some(raw_ranges) = matches.get_many::("bytes") { (get_ranges(raw_ranges)?, Operator::Bytes) } else if let Some(raw_ranges) = matches.get_many::("characters") { (get_ranges(raw_ranges)?, Operator::Characters) } else if let Some(raw_ranges) = matches.get_many::("fields") { (get_ranges(raw_ranges)?, Operator::Fields) } else { unreachable!() }; for file in files { let reader: Box = if file.as_str() == "-" { Box::new(BufReader::new(io::stdin())) } else { let fd = File::open(&file)?; Box::new(BufReader::new(fd)) }; for line in reader.lines() { match operator { Operator::Bytes => print_bytes(&line?, &ranges)?, Operator::Characters => print_chars(&line?, &ranges)?, Operator::Fields => print_fields( &line?, &ranges, matches.get_flag("suppress"), matches.get_one::("delimiter"), )?, } } } Ok(()) } fn path(&self) -> Option { Some(crate::Path::UsrBin) } } enum Operator { Bytes, Characters, Fields, } fn get_ranges(raw_ranges: ValuesRef) -> Result, Box> { let mut ranges = vec![]; for range in raw_ranges { let parsed_range = parse_range(range)?; ranges.push(parsed_range); } Ok(ranges) } enum ParsedRange { Bounded(Range), LowerBounded(RangeFrom), UpperBounded(RangeTo), Single(usize), } impl From> for ParsedRange { fn from(value: Range) -> Self { Self::Bounded(value) } } impl From> for ParsedRange { fn from(value: RangeFrom) -> Self { Self::LowerBounded(value) } } impl From> for ParsedRange { fn from(value: RangeTo) -> Self { Self::UpperBounded(value) } } impl From for ParsedRange { fn from(value: usize) -> Self { Self::Single(value) } } fn combine_ranges(ranges: &[ParsedRange], max: usize) -> Vec { let mut outrange: Vec = vec![]; for range in ranges { match range { ParsedRange::Bounded(r) => { for n in r.start..r.end { outrange.push(n); } } ParsedRange::LowerBounded(r) => { for n in r.start..=max { outrange.push(n); } } ParsedRange::UpperBounded(r) => { for n in 0..=r.end { outrange.push(n); } } ParsedRange::Single(u) => outrange.push(*u), } } outrange.dedup(); outrange.sort_unstable(); outrange } fn parse_range(range: &str) -> Result> { if let Some(stripped) = range.strip_prefix('-') { if range.len() > 1 { let end: usize = stripped.parse()?; if end >= 1 { Ok((..end - 1).into()) } else { Err(Box::new(io::Error::new( io::ErrorKind::Other, "invalid range", ))) } } else { Err(Box::new(io::Error::new( io::ErrorKind::Other, "invalid range", ))) } } else if range.ends_with('-') { if range.len() > 1 { let start: usize = range[0..range.len()].parse()?; if start >= 1 { Ok((start - 1..).into()) } else { Err(Box::new(io::Error::new( io::ErrorKind::Other, "invalid range", ))) } } else { Err(Box::new(io::Error::new( io::ErrorKind::Other, "invalid range", ))) } } else if let Some((start, end)) = range.split_once('-') { let start: usize = start.parse()?; let end: usize = end.parse()?; if start >= 1 && end > start { Ok((start - 1..end).into()) } else { Err(Box::new(io::Error::new( io::ErrorKind::Other, "invalid range", ))) } } else { let range: usize = range.parse::()? - 1; Ok(range.into()) } } fn print_bytes(line: &str, ranges: &[ParsedRange]) -> Result<(), Box> { let combined_ranges = combine_ranges(ranges, line.len()); let mut writer = BufWriter::new(std::io::stdout()); for (idx, byte) in line.as_bytes().iter().enumerate() { if combined_ranges.contains(&idx) { write!(writer, "{}", *byte as char)?; } } writeln!(writer)?; Ok(()) } fn print_chars(line: &str, ranges: &[ParsedRange]) -> Result<(), Box> { let combined_ranges = combine_ranges(ranges, line.len()); let mut writer = BufWriter::new(std::io::stdout()); for (idx, ch) in line.chars().enumerate() { if combined_ranges.contains(&idx) { write!(writer, "{}", &ch)?; } } writeln!(writer)?; Ok(()) } fn print_fields( line: &str, ranges: &[ParsedRange], suppress: bool, delimiter: Option<&String>, ) -> Result<(), Box> { let combined_ranges = combine_ranges(ranges, line.len()); let mut writer = BufWriter::new(std::io::stdout()); let delimiter = match delimiter { Some(d) => d, None => "\t", }; if suppress && !line.contains(delimiter) { return Ok(()); } let fields = line.split(delimiter); for (idx, field) in fields.enumerate() { if combined_ranges.contains(&idx) { write!(writer, "{field}")?; } } writeln!(writer)?; Ok(()) }