2023-01-14 02:08:14 -05:00
|
|
|
use super::Cmd;
|
|
|
|
use clap::{parser::ValuesRef, Arg, ArgAction, ArgGroup, Command};
|
|
|
|
use std::{
|
|
|
|
error::Error,
|
|
|
|
fs::File,
|
|
|
|
io::{self, BufRead, BufReader, BufWriter, Write},
|
|
|
|
ops::{Range, RangeFrom, RangeTo},
|
|
|
|
usize, vec,
|
|
|
|
};
|
|
|
|
|
|
|
|
#[derive(Debug, Default)]
|
|
|
|
pub struct Cut;
|
|
|
|
|
|
|
|
impl Cmd for Cut {
|
|
|
|
fn cli(&self) -> clap::Command {
|
|
|
|
Command::new("cut")
|
|
|
|
.about("cut out selected fields of each line of a file")
|
|
|
|
.author("Nathan Fisher")
|
|
|
|
.version(env!("CARGO_PKG_VERSION"))
|
|
|
|
.args([
|
|
|
|
Arg::new("bytes")
|
|
|
|
.short('b')
|
|
|
|
.long("bytes")
|
|
|
|
.help("select only these bytes")
|
|
|
|
.num_args(1)
|
|
|
|
.allow_hyphen_values(true)
|
|
|
|
.value_name("LIST")
|
|
|
|
.value_delimiter(','),
|
|
|
|
Arg::new("characters")
|
|
|
|
.short('c')
|
|
|
|
.long("characters")
|
|
|
|
.help("select only these characters")
|
|
|
|
.num_args(1)
|
|
|
|
.allow_hyphen_values(true)
|
|
|
|
.value_name("LIST")
|
|
|
|
.value_delimiter(','),
|
|
|
|
Arg::new("fields")
|
|
|
|
.short('f')
|
|
|
|
.long("fields")
|
|
|
|
.help("select only these fields")
|
|
|
|
.num_args(1)
|
|
|
|
.allow_hyphen_values(true)
|
|
|
|
.value_name("LIST")
|
|
|
|
.value_delimiter(','),
|
|
|
|
Arg::new("delimiter")
|
|
|
|
.short('d')
|
|
|
|
.long("delimiter")
|
|
|
|
.help("use DELIM instead of TAB for field delimiter")
|
|
|
|
.num_args(1)
|
|
|
|
.value_name("DELIM"),
|
|
|
|
Arg::new("nosplit").help("ignored").short('n'),
|
|
|
|
Arg::new("suppress")
|
|
|
|
.short('s')
|
|
|
|
.long("only-delimited")
|
|
|
|
.requires("fields")
|
|
|
|
.action(ArgAction::SetTrue),
|
|
|
|
Arg::new("file").value_name("FILE").num_args(0..),
|
|
|
|
])
|
|
|
|
.group(
|
|
|
|
ArgGroup::new("flags")
|
|
|
|
.args(["bytes", "characters", "fields"])
|
|
|
|
.required(true),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn run(&self, matches: Option<&clap::ArgMatches>) -> Result<(), Box<dyn Error>> {
|
|
|
|
let Some(matches) = matches else {
|
|
|
|
return Err(Box::new(io::Error::new(io::ErrorKind::Other, "no input")));
|
|
|
|
};
|
|
|
|
let files: Vec<String> = match matches.get_many::<String>("file") {
|
|
|
|
Some(f) => f.cloned().collect(),
|
|
|
|
None => vec!["-".to_string()],
|
|
|
|
};
|
|
|
|
let (ranges, operator) = if let Some(raw_ranges) = matches.get_many::<String>("bytes") {
|
|
|
|
(get_ranges(raw_ranges)?, Operator::Bytes)
|
|
|
|
} else if let Some(raw_ranges) = matches.get_many::<String>("characters") {
|
|
|
|
(get_ranges(raw_ranges)?, Operator::Characters)
|
|
|
|
} else if let Some(raw_ranges) = matches.get_many::<String>("fields") {
|
|
|
|
(get_ranges(raw_ranges)?, Operator::Fields)
|
|
|
|
} else {
|
|
|
|
unreachable!()
|
|
|
|
};
|
|
|
|
for file in files {
|
|
|
|
let reader: Box<dyn BufRead> = if file.as_str() == "-" {
|
|
|
|
Box::new(BufReader::new(io::stdin()))
|
|
|
|
} else {
|
|
|
|
let fd = File::open(&file)?;
|
|
|
|
Box::new(BufReader::new(fd))
|
|
|
|
};
|
|
|
|
for line in reader.lines() {
|
|
|
|
match operator {
|
|
|
|
Operator::Bytes => print_bytes(&line?, &ranges)?,
|
|
|
|
Operator::Characters => print_chars(&line?, &ranges)?,
|
|
|
|
Operator::Fields => print_fields(
|
|
|
|
&line?,
|
|
|
|
&ranges,
|
|
|
|
matches.get_flag("suppress"),
|
|
|
|
matches.get_one::<String>("delimiter"),
|
|
|
|
)?,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn path(&self) -> Option<crate::Path> {
|
|
|
|
Some(crate::Path::UsrBin)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
enum Operator {
|
|
|
|
Bytes,
|
|
|
|
Characters,
|
|
|
|
Fields,
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_ranges(raw_ranges: ValuesRef<String>) -> Result<Vec<ParsedRange>, Box<dyn Error>> {
|
|
|
|
let mut ranges = vec![];
|
|
|
|
for range in raw_ranges {
|
|
|
|
let parsed_range = parse_range(range)?;
|
|
|
|
ranges.push(parsed_range);
|
|
|
|
}
|
|
|
|
Ok(ranges)
|
|
|
|
}
|
|
|
|
|
|
|
|
enum ParsedRange {
|
|
|
|
Bounded(Range<usize>),
|
|
|
|
LowerBounded(RangeFrom<usize>),
|
|
|
|
UpperBounded(RangeTo<usize>),
|
|
|
|
Single(usize),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<Range<usize>> for ParsedRange {
|
|
|
|
fn from(value: Range<usize>) -> Self {
|
|
|
|
Self::Bounded(value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<RangeFrom<usize>> for ParsedRange {
|
|
|
|
fn from(value: RangeFrom<usize>) -> Self {
|
|
|
|
Self::LowerBounded(value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<RangeTo<usize>> for ParsedRange {
|
|
|
|
fn from(value: RangeTo<usize>) -> Self {
|
|
|
|
Self::UpperBounded(value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<usize> for ParsedRange {
|
|
|
|
fn from(value: usize) -> Self {
|
|
|
|
Self::Single(value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn combine_ranges(ranges: &[ParsedRange], max: usize) -> Vec<usize> {
|
|
|
|
let mut outrange: Vec<usize> = vec![];
|
|
|
|
for range in ranges {
|
|
|
|
match range {
|
|
|
|
ParsedRange::Bounded(r) => {
|
|
|
|
for n in r.start..r.end {
|
|
|
|
outrange.push(n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ParsedRange::LowerBounded(r) => {
|
|
|
|
for n in r.start..=max {
|
|
|
|
outrange.push(n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ParsedRange::UpperBounded(r) => {
|
|
|
|
for n in 0..=r.end {
|
|
|
|
outrange.push(n);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ParsedRange::Single(u) => outrange.push(*u),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
outrange.dedup();
|
|
|
|
outrange.sort_unstable();
|
|
|
|
outrange
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_range(range: &str) -> Result<ParsedRange, Box<dyn Error>> {
|
2023-01-14 02:34:27 -05:00
|
|
|
if let Some(stripped) = range.strip_prefix('-') {
|
2023-01-14 02:08:14 -05:00
|
|
|
if range.len() > 1 {
|
2023-01-14 02:34:27 -05:00
|
|
|
let end: usize = stripped.parse()?;
|
2023-01-14 02:08:14 -05:00
|
|
|
if end >= 1 {
|
|
|
|
Ok((..end - 1).into())
|
|
|
|
} else {
|
|
|
|
Err(Box::new(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
"invalid range",
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Err(Box::new(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
"invalid range",
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
} else if range.ends_with('-') {
|
|
|
|
if range.len() > 1 {
|
|
|
|
let start: usize = range[0..range.len()].parse()?;
|
|
|
|
if start >= 1 {
|
|
|
|
Ok((start - 1..).into())
|
|
|
|
} else {
|
|
|
|
Err(Box::new(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
"invalid range",
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Err(Box::new(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
"invalid range",
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
} else if let Some((start, end)) = range.split_once('-') {
|
|
|
|
let start: usize = start.parse()?;
|
|
|
|
let end: usize = end.parse()?;
|
|
|
|
if start >= 1 && end > start {
|
|
|
|
Ok((start - 1..end).into())
|
|
|
|
} else {
|
|
|
|
Err(Box::new(io::Error::new(
|
|
|
|
io::ErrorKind::Other,
|
|
|
|
"invalid range",
|
|
|
|
)))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
let range: usize = range.parse::<usize>()? - 1;
|
|
|
|
Ok(range.into())
|
|
|
|
}
|
|
|
|
}
|
2023-01-14 02:34:27 -05:00
|
|
|
|
|
|
|
fn print_bytes(line: &str, ranges: &[ParsedRange]) -> Result<(), Box<dyn Error>> {
|
|
|
|
let combined_ranges = combine_ranges(ranges, line.len());
|
|
|
|
let mut writer = BufWriter::new(std::io::stdout());
|
|
|
|
for (idx, byte) in line.as_bytes().iter().enumerate() {
|
|
|
|
if combined_ranges.contains(&idx) {
|
|
|
|
write!(writer, "{}", *byte as char)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
writeln!(writer)?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn print_chars(line: &str, ranges: &[ParsedRange]) -> Result<(), Box<dyn Error>> {
|
|
|
|
let combined_ranges = combine_ranges(ranges, line.len());
|
|
|
|
let mut writer = BufWriter::new(std::io::stdout());
|
|
|
|
for (idx, ch) in line.chars().enumerate() {
|
|
|
|
if combined_ranges.contains(&idx) {
|
|
|
|
write!(writer, "{}", &ch)?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
writeln!(writer)?;
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn print_fields(
|
|
|
|
line: &str,
|
|
|
|
ranges: &[ParsedRange],
|
|
|
|
suppress: bool,
|
|
|
|
delimiter: Option<&String>,
|
|
|
|
) -> Result<(), Box<dyn Error>> {
|
|
|
|
let combined_ranges = combine_ranges(ranges, line.len());
|
|
|
|
let mut writer = BufWriter::new(std::io::stdout());
|
|
|
|
let delimiter = match delimiter {
|
|
|
|
Some(d) => d,
|
|
|
|
None => "\t",
|
|
|
|
};
|
|
|
|
if suppress && !line.contains(delimiter) {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
let fields = line.split(delimiter);
|
|
|
|
for (idx, field) in fields.enumerate() {
|
|
|
|
if combined_ranges.contains(&idx) {
|
|
|
|
write!(writer, "{field}")?;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
writeln!(writer)?;
|
|
|
|
Ok(())
|
|
|
|
}
|