shitbox/src/cmd/cut/mod.rs

281 lines
8.5 KiB
Rust
Raw Normal View History

2023-01-14 02:08:14 -05:00
use super::Cmd;
use clap::{parser::ValuesRef, Arg, ArgAction, ArgGroup, Command};
use std::{
error::Error,
fs::File,
io::{self, BufRead, BufReader, BufWriter, Write},
ops::{Range, RangeFrom, RangeTo},
usize, vec,
};
#[derive(Debug, Default)]
pub struct Cut;
impl Cmd for Cut {
fn cli(&self) -> clap::Command {
Command::new("cut")
.about("cut out selected fields of each line of a file")
.author("Nathan Fisher")
.version(env!("CARGO_PKG_VERSION"))
.args([
Arg::new("bytes")
.short('b')
.long("bytes")
.help("select only these bytes")
.num_args(1)
.allow_hyphen_values(true)
.value_name("LIST")
.value_delimiter(','),
Arg::new("characters")
.short('c')
.long("characters")
.help("select only these characters")
.num_args(1)
.allow_hyphen_values(true)
.value_name("LIST")
.value_delimiter(','),
Arg::new("fields")
.short('f')
.long("fields")
.help("select only these fields")
.num_args(1)
.allow_hyphen_values(true)
.value_name("LIST")
.value_delimiter(','),
Arg::new("delimiter")
.short('d')
.long("delimiter")
.help("use DELIM instead of TAB for field delimiter")
.num_args(1)
.value_name("DELIM"),
Arg::new("nosplit").help("ignored").short('n'),
Arg::new("suppress")
.short('s')
.long("only-delimited")
.requires("fields")
.action(ArgAction::SetTrue),
Arg::new("file").value_name("FILE").num_args(0..),
])
.group(
ArgGroup::new("flags")
.args(["bytes", "characters", "fields"])
.required(true),
)
}
fn run(&self, matches: &clap::ArgMatches) -> Result<(), Box<dyn Error>> {
2023-01-14 02:08:14 -05:00
let files: Vec<String> = match matches.get_many::<String>("file") {
Some(f) => f.cloned().collect(),
None => vec!["-".to_string()],
};
let (ranges, operator) = if let Some(raw_ranges) = matches.get_many::<String>("bytes") {
(get_ranges(raw_ranges)?, Operator::Bytes)
} else if let Some(raw_ranges) = matches.get_many::<String>("characters") {
(get_ranges(raw_ranges)?, Operator::Characters)
} else if let Some(raw_ranges) = matches.get_many::<String>("fields") {
(get_ranges(raw_ranges)?, Operator::Fields)
} else {
unreachable!()
};
for file in files {
let reader: Box<dyn BufRead> = if file.as_str() == "-" {
Box::new(BufReader::new(io::stdin()))
} else {
let fd = File::open(&file)?;
Box::new(BufReader::new(fd))
};
for line in reader.lines() {
match operator {
Operator::Bytes => print_bytes(&line?, &ranges)?,
Operator::Characters => print_chars(&line?, &ranges)?,
Operator::Fields => print_fields(
&line?,
&ranges,
matches.get_flag("suppress"),
matches.get_one::<String>("delimiter"),
)?,
}
}
}
Ok(())
}
fn path(&self) -> Option<crate::Path> {
Some(crate::Path::UsrBin)
}
}
enum Operator {
Bytes,
Characters,
Fields,
}
fn get_ranges(raw_ranges: ValuesRef<String>) -> Result<Vec<ParsedRange>, Box<dyn Error>> {
let mut ranges = vec![];
for range in raw_ranges {
let parsed_range = parse_range(range)?;
ranges.push(parsed_range);
}
Ok(ranges)
}
enum ParsedRange {
Bounded(Range<usize>),
LowerBounded(RangeFrom<usize>),
UpperBounded(RangeTo<usize>),
Single(usize),
}
impl From<Range<usize>> for ParsedRange {
fn from(value: Range<usize>) -> Self {
Self::Bounded(value)
}
}
impl From<RangeFrom<usize>> for ParsedRange {
fn from(value: RangeFrom<usize>) -> Self {
Self::LowerBounded(value)
}
}
impl From<RangeTo<usize>> for ParsedRange {
fn from(value: RangeTo<usize>) -> Self {
Self::UpperBounded(value)
}
}
impl From<usize> for ParsedRange {
fn from(value: usize) -> Self {
Self::Single(value)
}
}
fn combine_ranges(ranges: &[ParsedRange], max: usize) -> Vec<usize> {
let mut outrange: Vec<usize> = vec![];
for range in ranges {
match range {
ParsedRange::Bounded(r) => {
for n in r.start..r.end {
outrange.push(n);
}
}
ParsedRange::LowerBounded(r) => {
for n in r.start..=max {
outrange.push(n);
}
}
ParsedRange::UpperBounded(r) => {
for n in 0..=r.end {
outrange.push(n);
}
}
ParsedRange::Single(u) => outrange.push(*u),
}
}
outrange.dedup();
outrange.sort_unstable();
outrange
}
fn parse_range(range: &str) -> Result<ParsedRange, Box<dyn Error>> {
2023-01-14 02:34:27 -05:00
if let Some(stripped) = range.strip_prefix('-') {
2023-01-14 02:08:14 -05:00
if range.len() > 1 {
2023-01-14 02:34:27 -05:00
let end: usize = stripped.parse()?;
2023-01-14 02:08:14 -05:00
if end >= 1 {
Ok((..end - 1).into())
} else {
Err(Box::new(io::Error::new(
io::ErrorKind::Other,
"invalid range",
)))
}
} else {
Err(Box::new(io::Error::new(
io::ErrorKind::Other,
"invalid range",
)))
}
} else if range.ends_with('-') {
if range.len() > 1 {
let start: usize = range[0..range.len()].parse()?;
if start >= 1 {
Ok((start - 1..).into())
} else {
Err(Box::new(io::Error::new(
io::ErrorKind::Other,
"invalid range",
)))
}
} else {
Err(Box::new(io::Error::new(
io::ErrorKind::Other,
"invalid range",
)))
}
} else if let Some((start, end)) = range.split_once('-') {
let start: usize = start.parse()?;
let end: usize = end.parse()?;
if start >= 1 && end > start {
Ok((start - 1..end).into())
} else {
Err(Box::new(io::Error::new(
io::ErrorKind::Other,
"invalid range",
)))
}
} else {
let range: usize = range.parse::<usize>()? - 1;
Ok(range.into())
}
}
2023-01-14 02:34:27 -05:00
fn print_bytes(line: &str, ranges: &[ParsedRange]) -> Result<(), Box<dyn Error>> {
let combined_ranges = combine_ranges(ranges, line.len());
let mut writer = BufWriter::new(std::io::stdout());
for (idx, byte) in line.as_bytes().iter().enumerate() {
if combined_ranges.contains(&idx) {
write!(writer, "{}", *byte as char)?;
}
}
writeln!(writer)?;
Ok(())
}
fn print_chars(line: &str, ranges: &[ParsedRange]) -> Result<(), Box<dyn Error>> {
let combined_ranges = combine_ranges(ranges, line.len());
let mut writer = BufWriter::new(std::io::stdout());
for (idx, ch) in line.chars().enumerate() {
if combined_ranges.contains(&idx) {
write!(writer, "{}", &ch)?;
}
}
writeln!(writer)?;
Ok(())
}
fn print_fields(
line: &str,
ranges: &[ParsedRange],
suppress: bool,
delimiter: Option<&String>,
) -> Result<(), Box<dyn Error>> {
let combined_ranges = combine_ranges(ranges, line.len());
let mut writer = BufWriter::new(std::io::stdout());
let delimiter = match delimiter {
Some(d) => d,
None => "\t",
};
if suppress && !line.contains(delimiter) {
return Ok(());
}
let fields = line.split(delimiter);
for (idx, field) in fields.enumerate() {
if combined_ranges.contains(&idx) {
write!(writer, "{field}")?;
}
}
writeln!(writer)?;
Ok(())
}