diff --git a/Cargo.lock b/Cargo.lock index 330acdb..ce28614 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,60 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "autocfg" version = "1.1.0" @@ -44,6 +98,7 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ + "jobserver", "libc", ] @@ -67,6 +122,81 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "clap" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_complete" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "885e4d7d5af40bfb99ae6f9433e292feac98d452dcb3ec3d25dfe7552b77da8c" +dependencies = [ + "clap", +] + +[[package]] +name = "clap_complete_nushell" +version = "4.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0e48e026ce7df2040239117d25e4e79714907420c70294a5ce4b6bbe6a7b6" +dependencies = [ + "clap", + "clap_complete", +] + +[[package]] +name = "clap_lex" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" + +[[package]] +name = "clap_mangen" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1dd95b5ebb5c1c54581dd6346f3ed6a79a3eef95dd372fc2ac13d535535300e" +dependencies = [ + "clap", + "roff", +] + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "unicode-width", + "windows-sys", +] + [[package]] name = "core-foundation-sys" version = "0.8.6" @@ -133,6 +263,12 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "generic-array" version = "0.14.7" @@ -148,12 +284,17 @@ name = "haggis" version = "0.1.0" dependencies = [ "chrono", + "clap", + "indicatif", "libc", "md-5", + "package-bootstrap", "rayon", "sha1", "sha2", "termcolor", + "walkdir", + "zstd", ] [[package]] @@ -179,6 +320,45 @@ dependencies = [ "cc", ] +[[package]] +name = "indicatif" +version = "0.17.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +dependencies = [ + "console", + "instant", + "number_prefix", + "portable-atomic", + "unicode-segmentation", + "unicode-width", + "vt100", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "jobserver" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.67" @@ -188,6 +368,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.152" @@ -219,12 +405,42 @@ dependencies = [ "autocfg", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "package-bootstrap" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c7bb771dcab88f8b1b124981ccae34802608bbcc2456bbf6cd4d862c4f5deb" +dependencies = [ + "clap", + "clap_complete", + "clap_complete_nushell", + "clap_mangen", +] + +[[package]] +name = "pkg-config" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + [[package]] name = "proc-macro2" version = "1.0.76" @@ -263,6 +479,21 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "roff" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b833d8d034ea094b1ea68aa6d5c740e0d04bad9d16568d08ba6f76823a114316" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "sha1" version = "0.10.6" @@ -285,6 +516,12 @@ dependencies = [ "digest", ] +[[package]] +name = "strsim" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" + [[package]] name = "syn" version = "2.0.48" @@ -317,12 +554,73 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "unicode-segmentation" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vt100" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de" +dependencies = [ + "itoa", + "log", + "unicode-width", + "vte", +] + +[[package]] +name = "vte" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197" +dependencies = [ + "arrayvec", + "utf8parse", + "vte_generate_state_changes", +] + +[[package]] +name = "vte_generate_state_changes" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasm-bindgen" version = "0.2.90" @@ -417,6 +715,15 @@ dependencies = [ "windows-targets 0.52.0", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -530,3 +837,31 @@ name = "windows_x86_64_msvc" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index fd5ec08..6c6d26f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,12 +7,57 @@ edition = "2021" [features] parallel = ["dep:rayon"] color = ["dep:termcolor"] +bin = ["dep:clap", "dep:indicatif", "dep:walkdir", "dep:zstd", "parallel", "color"] +bootstrap = ["bin", "dep:package-bootstrap"] + +[[bin]] +name = "haggis" +path = "src/haggis.rs" +required-features = ["bin"] + +[[bin]] +name = "bootstrap" +path = "src/bootstrap.rs" +required-features = ["bootstrap"] [dependencies] chrono = "0.4" libc = "0.2" md-5 = "0.10" -rayon = { version = "1.8", optional = true } sha1 = "0.10" sha2 = "0.10" -termcolor = { version = "1.4", optional = true } + +[dependencies.clap] +version = "4.3" +optional = true + +[dependencies.indicatif] +version = "0.17" +features = ["improved_unicode", "vt100"] +optional = true + +[dependencies.package-bootstrap] +version = "0.4" +features = ["mangen"] +optional = true + +[dependencies.rayon] +version = "1.8" +optional = true + +[dependencies.termcolor] +version = "1.4" +optional = true + +[dependencies.walkdir] +version = "2.3" +optional = true + +[dependencies.zstd] +version = "0.13" +optional = true + +[profile.release] +codegen-units = 1 +lto = true +strip = true diff --git a/README.md b/README.md index 974630d..849c246 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,19 @@ -A modern archive format for serializing files, like Tar or Zip +A modern archive format for Unix, like Tar or Zip, designed for high performance +and data integrity. Contents ======== - [Features](#features) - [Building](#building) +- [Crate Features](#crate-features) + - [Parallel execution](#parallel-execution) + - [Colored output](#colored-output) + - [Reference binary](#reference-binary) + - [Distribution](#distribution) +- [Comparison with Tar](#comparison-with-tar) +- [On Compression](#on-compression) - [Contributing](#contributing) -- [Raodmap](#roadmap) +- [Roadmap](#roadmap) ## Features For a more full specification of the format, please see [Format.md](Format.md) @@ -26,6 +34,104 @@ rather than from the crates.io package registry. [dependencies.haggis] git = "https://codeberg.org/jeang3nie/haggis.git" ``` +## Crate Features +### Parallel execution +The `parallel` feature enables parallel file operations via +[Rayon](https://crates.io/crates/rayon). When creating an archive, files will be +read and checksummed in separate threads and the data passed back to the main +thread for writing an archive. During extraction, the main thread reads the +archive and passes each node to a worker thread to verify it's checksum and write +the file to disk. + +### Colored output +The `color` feature enables colored output when listing archive members, using +the [termcolor](https://crates.io/crates/termcolor) crate. + +### Reference binary +The reference binary application can be built by running `cargo build` with the +`bin` feature enabled. The binary enables both parallel and color features. Data +can be in compressed form with [zstd](https://github.com/facebook/zstd) compression. +```Sh +cargo build --features bin +``` + +The reference binary has been designed to closely parallel the functionality of +**tar** while being a little nicer to use overall. Progress bars are provided by +default, output is colorized, and a long listing format of archive members (similar +to running `ls -l` in a directory) is available which will print various metadata +about archive members. Quick help is available with the `--help` option. + +### Distribution +A *bootstrap* binary can be built with the `bootstrap` feature enabled. This +binary can then be run to install the binary and generate and install Unix man +pages and shell completions to a given prefix. This can be used to install all +of the above into the filesystem, or to install into a staging directory for +easy packaging. This feature leverages the +[package-bootstrap](https://crates.io/crates/package-bootstrap) crate. + +## Comparison with Tar +The venerable Unix archiver, Tar, has the benefit of being ubiquitous on every Unix +and Unix-like operating system. Beyond that, tar is a rather clunky format with a +number of design flaws and quirks. +- The original Tar specification had a hard limit in path names of 100 bytes +- The Ustar revision of the original Tar specification only partially fixed the + 100 byte filename limit by adding a separate field in which to store the directory + component of the pathname. Pathnames are still limited in size to 350 bytes. +- GNU tar fixed the filename limitation with GNU tar headers. GNU tar headers are + not documented anywhere other than the GNU tar source code, so other implementations + have ignored the GNU format and it never caught on. +- All metadata in a Tar header is stored in ascii. This means that things like numbers + must be parsed from ascii. +- Tar stores all metadata fields based on offsets from the start of the header, + often leading to significant padding between fields. +- File data in a Tar archive is split into 512 byte blocks. Since the final block + must also be 512 bytes, there is yet more padding. +- The same filename may be repeated later in a Tar archive, overwriting the first file + during extraction. +- All potential metadata fields always exist in a header, even if that particular field + makes no sense in context. Example - device major and minor numbers are stored for + regular files, directories and symlinks. This is wasted space. + +Compared with Tar, Haggis takes a different approach. All integer values are stored +as little endian byte arrays, exactly the same as the in memory representation of a +little endian computer. All metadata strings are preceded by their length, requiring +no padding between fields. The actual contents of regular files are written as a byte +array, and again preceded by the length in bytes, so once again no padding is required. + +If you've gotten this far, you might be noticing some differences in design philosophy. +- Ascii is great for humans to read but terrible for computers. Since archives are + read by computers, not humans, ascii is bad. +- Padding is extra bytes. Sure, that overhead tends to get squashed after compressing + an archive, but it requires more memory to create the extra zeroes and more memory + to extract them. Better to not use padding everywhere. +- Using offsets would always have lead to embarrassingly shortsighted limitations + such as the filename length limitation that has plagued Tar from day one. Variable + length fields are easily handled by storing their length first. +- By using a flag to tell the archiver what **kind** of file is being stored, the + archiver can expect different metadata fields for different filetypes, again saving + on space in the file header. + +## On compression +The author performed some very non-scientific testing of various archive formats +and settled on [zstd](https://github.com/facebook/zstd) as being so superior as to +make all other common compression schemes irrelevant for **general** usage. Gzip and +Bzip2 have woefully lower compression ratios and terrible performance. The +[xz](https://tukaani.org/xz/) compression algorithm offers much better compression at +the cost of poor performance. Meta may be evil overall, but zstd offers compression +ratios on par with xz and performance that is higher than all three major competitors. +Zstd now comes pre-installed on virtually every Linux system and is easily installed +on BSD and other Unix-like systems. It is the new standard. + +Other compression schemes could have been implemented into the library code, but +that would add to the maintenance burden while not adding significantly useful +functionality. You need to be able to open gzip compressed Tar archives because there +are literally millions of them out there. Not so for a greenfield project such as +Haggis. Better to encourage the use of one good compression format and discourage +the continued use of legacy software. + +If you absolutely **must** compress a haggis archive using gzip or bzip2, you can +do so manually. The *haggis* binary does not provide this functionality. Don't ask. + ## Contributing Contributions are always welcome. Please run `cargo fmt` and `cargo clippy` and fix any issues before sending pull requests on Codeberg or patches via `git send-email`. diff --git a/src/bootstrap.rs b/src/bootstrap.rs new file mode 100644 index 0000000..134facb --- /dev/null +++ b/src/bootstrap.rs @@ -0,0 +1,49 @@ +#![allow(dead_code)] + +use std::path::Path; + +use clap::ArgAction; +mod cli; +use { + clap::{Arg, Command}, + package_bootstrap::Bootstrap, + std::{error::Error, path::PathBuf}, +}; + +fn main() -> Result<(), Box> { + let matches = Command::new("bootstrap") + .about("install the software") + .author("Nathan Fisher") + .version(env!("CARGO_PKG_VERSION")) + .args([ + Arg::new("target-dir") + .help("the directory where the 'hpk' binary is located") + .short('t') + .long("target-dir") + .num_args(1), + Arg::new("meta") + .help("Install License and Readme files in doc subdirectory") + .short('m') + .long("meta") + .action(ArgAction::SetTrue), + Arg::new("output") + .help("the output directory for the installation") + .required(true) + .num_args(1), + ]) + .get_matches(); + let outdir = matches.get_one::("output").unwrap().to_string(); + let outdir = PathBuf::from(&outdir); + let target_dir = matches + .get_one::("target-dir") + .map(|x| x.to_string()); + let bs = Bootstrap::new("haggis", cli::haggis(), &outdir); + bs.install(target_dir, 1)?; + if matches.get_flag("meta") { + bs.docfiles(&["README.md", "LICENSE.md"], &Path::new("haggis"))?; + } + Bootstrap::new("haggis-create", cli::create(), &outdir).manpage(1)?; + Bootstrap::new("haggis-extract", cli::extract(), &outdir).manpage(1)?; + Bootstrap::new("haggis-list", cli::list(), &outdir).manpage(1)?; + Ok(()) +} diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..a00d9cc --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,162 @@ +use clap::{value_parser, Arg, ArgAction, Command, ValueHint}; + +pub fn haggis() -> Command { + Command::new("haggis") + .about("Create and extract Haggis archives") + .author("Nathan Fisher") + .version(env!("CARGO_PKG_VERSION")) + .propagate_version(true) + .arg_required_else_help(true) + .subcommands([create(), extract(), list()]) +} + +pub fn extract() -> Command { + Command::new("extract") + .about("Extract a Haggis archive") + .author("Nathan Fisher") + .visible_alias("ex") + .args([ + Arg::new("zstd") + .help("Filter data through zstd") + .short('z') + .long("zstd") + .action(ArgAction::SetTrue), + Arg::new("quiet") + .help("Do not show progress") + .short('q') + .long("quiet") + .visible_alias("silent") + .action(ArgAction::SetFalse), + Arg::new("stdin") + .help("Read archive from stdin") + .short('i') + .long("stdin") + .action(ArgAction::SetTrue) + .conflicts_with("archive"), + Arg::new("change") + .help("Change to another working directory before performing the operation") + .value_name("directory") + .visible_alias("directory") + .visible_alias("root") + .short('c') + .long("change") + .num_args(1), + Arg::new("uid") + .help("Set the user ID to the specified number") + .short('u') + .long("uid") + .value_parser(clap::value_parser!(u32)) + .num_args(1), + Arg::new("gid") + .help("Set the group ID to the specified number") + .short('g') + .long("gid") + .value_parser(clap::value_parser!(u32)) + .num_args(1), + Arg::new("archive") + .num_args(1) + .required_unless_present("stdin") + .value_hint(ValueHint::FilePath), + ]) +} + +pub fn create() -> Command { + Command::new("create") + .about("Create a Haggis archive") + .author("Nathan Fisher") + .visible_alias("cr") + .allow_missing_positional(true) + .args([ + Arg::new("algorithm") + .help("the checksum algorithm to use") + .short('a') + .long("algorithm") + .value_parser(["md5", "sha1", "sha256", "skip"]) + .default_value("skip") + .num_args(1) + .required(false), + Arg::new("zstd") + .help("Filter data through zstd") + .short('z') + .long("zstd") + .action(ArgAction::SetTrue), + Arg::new("level") + .help("set the compression level for zstd, from 0-21") + .short('l') + .long("level") + .requires("zstd") + .num_args(1) + .default_value("3") + .value_parser(value_parser!(i32).range(0..=21)), + Arg::new("quiet") + .help("Do not show progress") + .short('q') + .long("quiet") + .visible_alias("silent") + .action(ArgAction::SetFalse), + Arg::new("stdout") + .help("Write archive to stdout") + .short('o') + .long("stdout") + .conflicts_with("output") + .action(ArgAction::SetTrue), + Arg::new("uid") + .help("Set the user ID to the specified number") + .short('u') + .long("uid") + .value_parser(clap::value_parser!(u32)) + .num_args(1), + Arg::new("gid") + .help("Set the group ID to the specified number") + .short('g') + .long("gid") + .value_parser(clap::value_parser!(u32)) + .num_args(1), + Arg::new("output") + .num_args(1) + .required_unless_present("stdout") + .value_hint(ValueHint::FilePath), + Arg::new("files") + .num_args(1..) + .required(true) + .value_hint(ValueHint::AnyPath), + ]) +} + +pub fn list() -> Command { + Command::new("list") + .about("List files in a Haggis archive") + .author("Nathan Fisher") + .visible_alias("ls") + .args([ + Arg::new("long") + .help("Display a long listing with file properties and permissions") + .short('l') + .long("long") + .action(ArgAction::SetTrue), + Arg::new("files") + .help("Omit displaying directory entries") + .short('f') + .long("files") + .action(ArgAction::SetTrue), + Arg::new("zstd") + .help("Filter data through zstd") + .short('z') + .long("zstd") + .action(ArgAction::SetTrue), + Arg::new("color") + .help("Colorize output") + .short('c') + .long("color") + .action(ArgAction::SetTrue), + Arg::new("nosort") + .help("Display archive nodes in the order they appear rather than sorted") + .short('n') + .long("no-sort") + .action(ArgAction::SetTrue), + Arg::new("archive") + .num_args(1) + .required(true) + .value_hint(ValueHint::FilePath), + ]) +} diff --git a/src/haggis.rs b/src/haggis.rs new file mode 100644 index 0000000..4e5d1a7 --- /dev/null +++ b/src/haggis.rs @@ -0,0 +1,325 @@ +#![warn(clippy::all, clippy::pedantic)] +use { + clap::ArgMatches, + haggis::{Algorithm, Listing, ListingKind, ListingStream, Message, Stream, StreamMessage}, + indicatif::{ProgressBar, ProgressStyle}, + std::{ + fs::{self, File}, + io::{self, BufReader, BufWriter}, + os::fd::{AsRawFd, FromRawFd}, + process, + sync::mpsc, + thread, + }, + walkdir::WalkDir, + zstd::{Decoder, Encoder}, +}; + +mod cli; + +static TEMPLATE: &str = "[ {prefix:^30!} ] {wide_bar}{pos:>5.cyan}/{len:5.green}"; + +fn main() { + let matches = cli::haggis().get_matches(); + match matches.subcommand() { + Some(("create", matches)) => { + if let Err(e) = create(matches) { + eprintln!("Error: {e}"); + process::exit(1); + } + } + Some(("extract", matches)) => { + if let Err(e) = extract(matches) { + eprintln!("Error: {e}"); + process::exit(1); + } + } + Some(("list", matches)) => { + if matches.get_flag("nosort") { + if let Err(e) = list_unsorted(matches) { + eprintln!("Error: {e}"); + process::exit(1); + } + } else if let Err(e) = list(matches) { + eprintln!("Error: {e}"); + process::exit(1); + } + } + _ => {} + } +} + +#[allow(clippy::similar_names)] +fn create(matches: &ArgMatches) -> Result<(), haggis::Error> { + let verbose = !matches.get_flag("stdout") || matches.get_flag("quiet"); + let algorithm: Algorithm = matches.get_one::("algorithm").unwrap().parse()?; + let uid = matches.get_one::("uid").copied(); + let gid = matches.get_one::("gid").copied(); + let mut files = vec![]; + if let Some(f) = matches.get_many::("files") { + for f in f { + if let Ok(meta) = fs::metadata(f) { + if meta.is_dir() { + let walker = WalkDir::new(f); + walker.into_iter().for_each(|x| { + if let Ok(x) = x { + let path = x.path().to_str().unwrap().to_string(); + if !path.is_empty() { + files.push(path); + } + } + }); + } else { + files.push(f.to_string()); + } + } + } + } + let output = matches.get_one::("output"); + let (sender, receiver) = mpsc::channel(); + let len = files.len(); + let mut handle = None; + if verbose { + let pb = ProgressBar::new(len as u64); + pb.set_style(ProgressStyle::with_template(TEMPLATE).unwrap()); + pb.set_prefix("Adding files"); + if let Some(o) = output { + pb.println(format!("Creating archive {o}")); + } + handle = Some(thread::spawn(move || { + for msg in &receiver { + match msg { + Message::NodeCreated(s) => { + pb.set_prefix(s.split('/').last().unwrap().to_string()); + pb.inc(1); + } + Message::NodeSaved { name, size } => { + let name = name.split('/').last().unwrap(); + pb.set_prefix(format!("{name} added, {size} bytes")); + pb.inc(1); + } + Message::Eof => { + pb.finish_and_clear(); + break; + } + Message::Err { name, error } => { + pb.println(format!("Error creating node {name}: {error}")); + } + } + } + })); + } + if matches.get_flag("zstd") { + let level = matches.get_one::("level").copied().unwrap_or(3); + if matches.get_flag("stdout") { + let stdout = io::stdout(); + let mut writer = Encoder::new(stdout, level)?; + haggis::par_stream_archive(&mut writer, &files, algorithm, &sender, uid, gid)?; + let _fd = writer.finish(); + } else if let Some(o) = output { + let fd = File::create(o)?; + let mut writer = Encoder::new(fd, level)?; + haggis::par_stream_archive(&mut writer, &files, algorithm, &sender, uid, gid)?; + let _fd = writer.finish()?; + } else { + unreachable!(); + } + } else if matches.get_flag("stdout") { + let stdout = io::stdout(); + let mut writer = BufWriter::new(stdout); + haggis::par_stream_archive(&mut writer, &files, algorithm, &sender, uid, gid)?; + } else if let Some(o) = output { + haggis::par_create_archive(o, &files, algorithm, &sender, uid, gid)?; + } else { + unreachable!(); + } + if let Some(handle) = handle { + match handle.join() { + Ok(()) => { + if verbose { + println!("Archive created successfully"); + } + Ok(()) + } + Err(e) => { + eprintln!("Error: {e:?}"); + process::exit(1); + } + } + } else { + Ok(()) + } +} + +#[allow(clippy::similar_names)] +fn extract(matches: &ArgMatches) -> Result<(), haggis::Error> { + let file = matches.get_one::("archive"); + let uid = matches.get_one::("uid").copied(); + let gid = matches.get_one::("gid").copied(); + let mut fd = if let Some(f) = file { + File::open(f)? + } else if matches.get_flag("stdin") { + let stdin = io::stdin(); + let raw = stdin.as_raw_fd(); + unsafe { File::from_raw_fd(raw) } + } else { + unreachable!() + }; + let zst = matches.get_flag("zstd") + || if matches.get_flag("stdin") { + false + } else { + haggis::detect_zstd(&mut fd)? + }; + let dir = matches.get_one::("change"); + let (sender, receiver) = mpsc::channel(); + let file = file.cloned().unwrap_or("stdin".to_string()); + let handle = if zst { + let reader = Decoder::new(fd)?; + let mut stream = Stream::new(reader)?; + let handle = if matches.get_flag("quiet") { + Some(thread::spawn(move || { + progress(&file, &receiver, u64::from(stream.length)); + Ok::<(), haggis::Error>(()) + })) + } else { + None + }; + stream.par_extract(dir.map(String::as_str), uid, gid, &sender)?; + handle + } else { + let reader = BufReader::new(fd); + let mut stream = Stream::new(reader)?; + let handle = if matches.get_flag("quiet") { + Some(thread::spawn(move || { + progress(&file, &receiver, u64::from(stream.length)); + Ok::<(), haggis::Error>(()) + })) + } else { + None + }; + stream.par_extract(dir.map(String::as_str), uid, gid, &sender)?; + handle + }; + if let Some(handle) = handle { + match handle.join() { + Ok(_) => { + if matches.get_flag("quiet") { + println!("Archive extracted successfully"); + } + Ok(()) + } + Err(e) => { + eprintln!("Error: {e:?}"); + process::exit(1); + } + } + } else { + Ok(()) + } +} + +fn progress(file: &str, receiver: &mpsc::Receiver, len: u64) { + let pb = ProgressBar::new(len); + pb.set_style(ProgressStyle::with_template(TEMPLATE).unwrap()); + pb.set_prefix("Extracting files"); + pb.println(format!("Extracting archive {file}")); + for msg in receiver { + match msg { + StreamMessage::FileExtracted { name, size } => { + let name = name.split('/').last().unwrap(); + pb.set_prefix(format!("{name} extracted, {size} bytes")); + pb.inc(1); + } + StreamMessage::LinkCreated { name, target } => { + let name = name.split('/').last().unwrap(); + let target = target.split('/').last().unwrap(); + pb.set_prefix(format!("{name} -> {target}")); + pb.inc(1); + } + StreamMessage::DirectoryCreated { name } => { + let name = name.split('/').last().unwrap(); + pb.set_prefix(format!("mkdir {name}")); + pb.inc(1); + } + StreamMessage::DeviceCreated { name } => { + let name = name.split('/').last().unwrap(); + pb.set_prefix(format!("mknod {name}")); + pb.inc(1); + } + StreamMessage::Eof => { + pb.finish_and_clear(); + break; + } + StreamMessage::Err { name, error } => { + pb.println(format!("Error with node {name}: {error}")); + } + } + } +} + +fn print_listing(li: &Listing, matches: &ArgMatches) -> Result<(), haggis::Error> { + if matches.get_flag("files") && li.kind == ListingKind::Directory { + return Ok(()); + } + if matches.get_flag("color") { + if matches.get_flag("long") { + li.print_color()?; + } else { + li.print_color_simple()?; + } + } else if matches.get_flag("long") { + println!("{li}"); + } else { + println!("{}", li.name); + } + Ok(()) +} + +fn list_unsorted(matches: &ArgMatches) -> Result<(), haggis::Error> { + let file = matches.get_one::("archive").unwrap(); + let fd = File::open(file)?; + if matches.get_flag("zstd") { + let reader = Decoder::new(fd)?; + let stream = Stream::new(reader)?; + for node in stream { + let node = node?; + let li = Listing::from(node); + print_listing(&li, matches)?; + } + } else { + let reader = BufReader::new(fd); + let stream = ListingStream::new(reader)?; + for li in stream { + let li = li?; + print_listing(&li, matches)?; + } + } + Ok(()) +} + +fn list(matches: &ArgMatches) -> Result<(), haggis::Error> { + let file = matches.get_one::("archive").unwrap(); + let mut fd = File::open(file)?; + let zst = matches.get_flag("zstd") || haggis::detect_zstd(&mut fd)?; + let list = if zst { + let reader = Decoder::new(fd)?; + let stream = Stream::new(reader)?; + let mut list = vec![]; + for node in stream { + let node = node?; + let listing = Listing::from(node); + list.push(listing); + } + list.sort_unstable(); + list + } else { + let reader = BufReader::new(fd); + let mut stream = ListingStream::new(reader)?; + stream.list()? + }; + for li in list { + print_listing(&li, matches)?; + } + Ok(()) +}