Merge binary into project

This commit is contained in:
Nathan Fisher 2024-02-22 19:15:04 -05:00
parent 0f4ca019d2
commit 30fcaa8e07
6 changed files with 1026 additions and 4 deletions

335
Cargo.lock generated
View File

@ -17,6 +17,60 @@ dependencies = [
"libc",
]
[[package]]
name = "anstream"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96b09b5178381e0874812a9b157f7fe84982617e48f71f4e3235482775e5b540"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
[[package]]
name = "anstyle-parse"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "arrayvec"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "autocfg"
version = "1.1.0"
@ -44,6 +98,7 @@ version = "1.0.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
dependencies = [
"jobserver",
"libc",
]
@ -67,6 +122,81 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "clap"
version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_complete"
version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "885e4d7d5af40bfb99ae6f9433e292feac98d452dcb3ec3d25dfe7552b77da8c"
dependencies = [
"clap",
]
[[package]]
name = "clap_complete_nushell"
version = "4.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d0e48e026ce7df2040239117d25e4e79714907420c70294a5ce4b6bbe6a7b6"
dependencies = [
"clap",
"clap_complete",
]
[[package]]
name = "clap_lex"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
[[package]]
name = "clap_mangen"
version = "0.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1dd95b5ebb5c1c54581dd6346f3ed6a79a3eef95dd372fc2ac13d535535300e"
dependencies = [
"clap",
"roff",
]
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"unicode-width",
"windows-sys",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.6"
@ -133,6 +263,12 @@ version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "generic-array"
version = "0.14.7"
@ -148,12 +284,17 @@ name = "haggis"
version = "0.1.0"
dependencies = [
"chrono",
"clap",
"indicatif",
"libc",
"md-5",
"package-bootstrap",
"rayon",
"sha1",
"sha2",
"termcolor",
"walkdir",
"zstd",
]
[[package]]
@ -179,6 +320,45 @@ dependencies = [
"cc",
]
[[package]]
name = "indicatif"
version = "0.17.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3"
dependencies = [
"console",
"instant",
"number_prefix",
"portable-atomic",
"unicode-segmentation",
"unicode-width",
"vt100",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "itoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "jobserver"
version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
dependencies = [
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.67"
@ -188,6 +368,12 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.152"
@ -219,12 +405,42 @@ dependencies = [
"autocfg",
]
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "once_cell"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "package-bootstrap"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29c7bb771dcab88f8b1b124981ccae34802608bbcc2456bbf6cd4d862c4f5deb"
dependencies = [
"clap",
"clap_complete",
"clap_complete_nushell",
"clap_mangen",
]
[[package]]
name = "pkg-config"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
name = "portable-atomic"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
[[package]]
name = "proc-macro2"
version = "1.0.76"
@ -263,6 +479,21 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "roff"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b833d8d034ea094b1ea68aa6d5c740e0d04bad9d16568d08ba6f76823a114316"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "sha1"
version = "0.10.6"
@ -285,6 +516,12 @@ dependencies = [
"digest",
]
[[package]]
name = "strsim"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
[[package]]
name = "syn"
version = "2.0.48"
@ -317,12 +554,73 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-segmentation"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
[[package]]
name = "unicode-width"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "vt100"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de"
dependencies = [
"itoa",
"log",
"unicode-width",
"vte",
]
[[package]]
name = "vte"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197"
dependencies = [
"arrayvec",
"utf8parse",
"vte_generate_state_changes",
]
[[package]]
name = "vte_generate_state_changes"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff"
dependencies = [
"proc-macro2",
"quote",
]
[[package]]
name = "walkdir"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.90"
@ -417,6 +715,15 @@ dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets 0.52.0",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
@ -530,3 +837,31 @@ name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
[[package]]
name = "zstd"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.9+zstd.1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656"
dependencies = [
"cc",
"pkg-config",
]

View File

@ -7,12 +7,57 @@ edition = "2021"
[features]
parallel = ["dep:rayon"]
color = ["dep:termcolor"]
bin = ["dep:clap", "dep:indicatif", "dep:walkdir", "dep:zstd", "parallel", "color"]
bootstrap = ["bin", "dep:package-bootstrap"]
[[bin]]
name = "haggis"
path = "src/haggis.rs"
required-features = ["bin"]
[[bin]]
name = "bootstrap"
path = "src/bootstrap.rs"
required-features = ["bootstrap"]
[dependencies]
chrono = "0.4"
libc = "0.2"
md-5 = "0.10"
rayon = { version = "1.8", optional = true }
sha1 = "0.10"
sha2 = "0.10"
termcolor = { version = "1.4", optional = true }
[dependencies.clap]
version = "4.3"
optional = true
[dependencies.indicatif]
version = "0.17"
features = ["improved_unicode", "vt100"]
optional = true
[dependencies.package-bootstrap]
version = "0.4"
features = ["mangen"]
optional = true
[dependencies.rayon]
version = "1.8"
optional = true
[dependencies.termcolor]
version = "1.4"
optional = true
[dependencies.walkdir]
version = "2.3"
optional = true
[dependencies.zstd]
version = "0.13"
optional = true
[profile.release]
codegen-units = 1
lto = true
strip = true

110
README.md
View File

@ -1,11 +1,19 @@
A modern archive format for serializing files, like Tar or Zip
A modern archive format for Unix, like Tar or Zip, designed for high performance
and data integrity.
Contents
========
- [Features](#features)
- [Building](#building)
- [Crate Features](#crate-features)
- [Parallel execution](#parallel-execution)
- [Colored output](#colored-output)
- [Reference binary](#reference-binary)
- [Distribution](#distribution)
- [Comparison with Tar](#comparison-with-tar)
- [On Compression](#on-compression)
- [Contributing](#contributing)
- [Raodmap](#roadmap)
- [Roadmap](#roadmap)
## Features
For a more full specification of the format, please see [Format.md](Format.md)
@ -26,6 +34,104 @@ rather than from the crates.io package registry.
[dependencies.haggis]
git = "https://codeberg.org/jeang3nie/haggis.git"
```
## Crate Features
### Parallel execution
The `parallel` feature enables parallel file operations via
[Rayon](https://crates.io/crates/rayon). When creating an archive, files will be
read and checksummed in separate threads and the data passed back to the main
thread for writing an archive. During extraction, the main thread reads the
archive and passes each node to a worker thread to verify it's checksum and write
the file to disk.
### Colored output
The `color` feature enables colored output when listing archive members, using
the [termcolor](https://crates.io/crates/termcolor) crate.
### Reference binary
The reference binary application can be built by running `cargo build` with the
`bin` feature enabled. The binary enables both parallel and color features. Data
can be in compressed form with [zstd](https://github.com/facebook/zstd) compression.
```Sh
cargo build --features bin
```
The reference binary has been designed to closely parallel the functionality of
**tar** while being a little nicer to use overall. Progress bars are provided by
default, output is colorized, and a long listing format of archive members (similar
to running `ls -l` in a directory) is available which will print various metadata
about archive members. Quick help is available with the `--help` option.
### Distribution
A *bootstrap* binary can be built with the `bootstrap` feature enabled. This
binary can then be run to install the binary and generate and install Unix man
pages and shell completions to a given prefix. This can be used to install all
of the above into the filesystem, or to install into a staging directory for
easy packaging. This feature leverages the
[package-bootstrap](https://crates.io/crates/package-bootstrap) crate.
## Comparison with Tar
The venerable Unix archiver, Tar, has the benefit of being ubiquitous on every Unix
and Unix-like operating system. Beyond that, tar is a rather clunky format with a
number of design flaws and quirks.
- The original Tar specification had a hard limit in path names of 100 bytes
- The Ustar revision of the original Tar specification only partially fixed the
100 byte filename limit by adding a separate field in which to store the directory
component of the pathname. Pathnames are still limited in size to 350 bytes.
- GNU tar fixed the filename limitation with GNU tar headers. GNU tar headers are
not documented anywhere other than the GNU tar source code, so other implementations
have ignored the GNU format and it never caught on.
- All metadata in a Tar header is stored in ascii. This means that things like numbers
must be parsed from ascii.
- Tar stores all metadata fields based on offsets from the start of the header,
often leading to significant padding between fields.
- File data in a Tar archive is split into 512 byte blocks. Since the final block
must also be 512 bytes, there is yet more padding.
- The same filename may be repeated later in a Tar archive, overwriting the first file
during extraction.
- All potential metadata fields always exist in a header, even if that particular field
makes no sense in context. Example - device major and minor numbers are stored for
regular files, directories and symlinks. This is wasted space.
Compared with Tar, Haggis takes a different approach. All integer values are stored
as little endian byte arrays, exactly the same as the in memory representation of a
little endian computer. All metadata strings are preceded by their length, requiring
no padding between fields. The actual contents of regular files are written as a byte
array, and again preceded by the length in bytes, so once again no padding is required.
If you've gotten this far, you might be noticing some differences in design philosophy.
- Ascii is great for humans to read but terrible for computers. Since archives are
read by computers, not humans, ascii is bad.
- Padding is extra bytes. Sure, that overhead tends to get squashed after compressing
an archive, but it requires more memory to create the extra zeroes and more memory
to extract them. Better to not use padding everywhere.
- Using offsets would always have lead to embarrassingly shortsighted limitations
such as the filename length limitation that has plagued Tar from day one. Variable
length fields are easily handled by storing their length first.
- By using a flag to tell the archiver what **kind** of file is being stored, the
archiver can expect different metadata fields for different filetypes, again saving
on space in the file header.
## On compression
The author performed some very non-scientific testing of various archive formats
and settled on [zstd](https://github.com/facebook/zstd) as being so superior as to
make all other common compression schemes irrelevant for **general** usage. Gzip and
Bzip2 have woefully lower compression ratios and terrible performance. The
[xz](https://tukaani.org/xz/) compression algorithm offers much better compression at
the cost of poor performance. Meta may be evil overall, but zstd offers compression
ratios on par with xz and performance that is higher than all three major competitors.
Zstd now comes pre-installed on virtually every Linux system and is easily installed
on BSD and other Unix-like systems. It is the new standard.
Other compression schemes could have been implemented into the library code, but
that would add to the maintenance burden while not adding significantly useful
functionality. You need to be able to open gzip compressed Tar archives because there
are literally millions of them out there. Not so for a greenfield project such as
Haggis. Better to encourage the use of one good compression format and discourage
the continued use of legacy software.
If you absolutely **must** compress a haggis archive using gzip or bzip2, you can
do so manually. The *haggis* binary does not provide this functionality. Don't ask.
## Contributing
Contributions are always welcome. Please run `cargo fmt` and `cargo clippy` and
fix any issues before sending pull requests on Codeberg or patches via `git send-email`.

49
src/bootstrap.rs Normal file
View File

@ -0,0 +1,49 @@
#![allow(dead_code)]
use std::path::Path;
use clap::ArgAction;
mod cli;
use {
clap::{Arg, Command},
package_bootstrap::Bootstrap,
std::{error::Error, path::PathBuf},
};
fn main() -> Result<(), Box<dyn Error>> {
let matches = Command::new("bootstrap")
.about("install the software")
.author("Nathan Fisher")
.version(env!("CARGO_PKG_VERSION"))
.args([
Arg::new("target-dir")
.help("the directory where the 'hpk' binary is located")
.short('t')
.long("target-dir")
.num_args(1),
Arg::new("meta")
.help("Install License and Readme files in doc subdirectory")
.short('m')
.long("meta")
.action(ArgAction::SetTrue),
Arg::new("output")
.help("the output directory for the installation")
.required(true)
.num_args(1),
])
.get_matches();
let outdir = matches.get_one::<String>("output").unwrap().to_string();
let outdir = PathBuf::from(&outdir);
let target_dir = matches
.get_one::<String>("target-dir")
.map(|x| x.to_string());
let bs = Bootstrap::new("haggis", cli::haggis(), &outdir);
bs.install(target_dir, 1)?;
if matches.get_flag("meta") {
bs.docfiles(&["README.md", "LICENSE.md"], &Path::new("haggis"))?;
}
Bootstrap::new("haggis-create", cli::create(), &outdir).manpage(1)?;
Bootstrap::new("haggis-extract", cli::extract(), &outdir).manpage(1)?;
Bootstrap::new("haggis-list", cli::list(), &outdir).manpage(1)?;
Ok(())
}

162
src/cli.rs Normal file
View File

@ -0,0 +1,162 @@
use clap::{value_parser, Arg, ArgAction, Command, ValueHint};
pub fn haggis() -> Command {
Command::new("haggis")
.about("Create and extract Haggis archives")
.author("Nathan Fisher")
.version(env!("CARGO_PKG_VERSION"))
.propagate_version(true)
.arg_required_else_help(true)
.subcommands([create(), extract(), list()])
}
pub fn extract() -> Command {
Command::new("extract")
.about("Extract a Haggis archive")
.author("Nathan Fisher")
.visible_alias("ex")
.args([
Arg::new("zstd")
.help("Filter data through zstd")
.short('z')
.long("zstd")
.action(ArgAction::SetTrue),
Arg::new("quiet")
.help("Do not show progress")
.short('q')
.long("quiet")
.visible_alias("silent")
.action(ArgAction::SetFalse),
Arg::new("stdin")
.help("Read archive from stdin")
.short('i')
.long("stdin")
.action(ArgAction::SetTrue)
.conflicts_with("archive"),
Arg::new("change")
.help("Change to another working directory before performing the operation")
.value_name("directory")
.visible_alias("directory")
.visible_alias("root")
.short('c')
.long("change")
.num_args(1),
Arg::new("uid")
.help("Set the user ID to the specified number")
.short('u')
.long("uid")
.value_parser(clap::value_parser!(u32))
.num_args(1),
Arg::new("gid")
.help("Set the group ID to the specified number")
.short('g')
.long("gid")
.value_parser(clap::value_parser!(u32))
.num_args(1),
Arg::new("archive")
.num_args(1)
.required_unless_present("stdin")
.value_hint(ValueHint::FilePath),
])
}
pub fn create() -> Command {
Command::new("create")
.about("Create a Haggis archive")
.author("Nathan Fisher")
.visible_alias("cr")
.allow_missing_positional(true)
.args([
Arg::new("algorithm")
.help("the checksum algorithm to use")
.short('a')
.long("algorithm")
.value_parser(["md5", "sha1", "sha256", "skip"])
.default_value("skip")
.num_args(1)
.required(false),
Arg::new("zstd")
.help("Filter data through zstd")
.short('z')
.long("zstd")
.action(ArgAction::SetTrue),
Arg::new("level")
.help("set the compression level for zstd, from 0-21")
.short('l')
.long("level")
.requires("zstd")
.num_args(1)
.default_value("3")
.value_parser(value_parser!(i32).range(0..=21)),
Arg::new("quiet")
.help("Do not show progress")
.short('q')
.long("quiet")
.visible_alias("silent")
.action(ArgAction::SetFalse),
Arg::new("stdout")
.help("Write archive to stdout")
.short('o')
.long("stdout")
.conflicts_with("output")
.action(ArgAction::SetTrue),
Arg::new("uid")
.help("Set the user ID to the specified number")
.short('u')
.long("uid")
.value_parser(clap::value_parser!(u32))
.num_args(1),
Arg::new("gid")
.help("Set the group ID to the specified number")
.short('g')
.long("gid")
.value_parser(clap::value_parser!(u32))
.num_args(1),
Arg::new("output")
.num_args(1)
.required_unless_present("stdout")
.value_hint(ValueHint::FilePath),
Arg::new("files")
.num_args(1..)
.required(true)
.value_hint(ValueHint::AnyPath),
])
}
pub fn list() -> Command {
Command::new("list")
.about("List files in a Haggis archive")
.author("Nathan Fisher")
.visible_alias("ls")
.args([
Arg::new("long")
.help("Display a long listing with file properties and permissions")
.short('l')
.long("long")
.action(ArgAction::SetTrue),
Arg::new("files")
.help("Omit displaying directory entries")
.short('f')
.long("files")
.action(ArgAction::SetTrue),
Arg::new("zstd")
.help("Filter data through zstd")
.short('z')
.long("zstd")
.action(ArgAction::SetTrue),
Arg::new("color")
.help("Colorize output")
.short('c')
.long("color")
.action(ArgAction::SetTrue),
Arg::new("nosort")
.help("Display archive nodes in the order they appear rather than sorted")
.short('n')
.long("no-sort")
.action(ArgAction::SetTrue),
Arg::new("archive")
.num_args(1)
.required(true)
.value_hint(ValueHint::FilePath),
])
}

325
src/haggis.rs Normal file
View File

@ -0,0 +1,325 @@
#![warn(clippy::all, clippy::pedantic)]
use {
clap::ArgMatches,
haggis::{Algorithm, Listing, ListingKind, ListingStream, Message, Stream, StreamMessage},
indicatif::{ProgressBar, ProgressStyle},
std::{
fs::{self, File},
io::{self, BufReader, BufWriter},
os::fd::{AsRawFd, FromRawFd},
process,
sync::mpsc,
thread,
},
walkdir::WalkDir,
zstd::{Decoder, Encoder},
};
mod cli;
static TEMPLATE: &str = "[ {prefix:^30!} ] {wide_bar}{pos:>5.cyan}/{len:5.green}";
fn main() {
let matches = cli::haggis().get_matches();
match matches.subcommand() {
Some(("create", matches)) => {
if let Err(e) = create(matches) {
eprintln!("Error: {e}");
process::exit(1);
}
}
Some(("extract", matches)) => {
if let Err(e) = extract(matches) {
eprintln!("Error: {e}");
process::exit(1);
}
}
Some(("list", matches)) => {
if matches.get_flag("nosort") {
if let Err(e) = list_unsorted(matches) {
eprintln!("Error: {e}");
process::exit(1);
}
} else if let Err(e) = list(matches) {
eprintln!("Error: {e}");
process::exit(1);
}
}
_ => {}
}
}
#[allow(clippy::similar_names)]
fn create(matches: &ArgMatches) -> Result<(), haggis::Error> {
let verbose = !matches.get_flag("stdout") || matches.get_flag("quiet");
let algorithm: Algorithm = matches.get_one::<String>("algorithm").unwrap().parse()?;
let uid = matches.get_one::<u32>("uid").copied();
let gid = matches.get_one::<u32>("gid").copied();
let mut files = vec![];
if let Some(f) = matches.get_many::<String>("files") {
for f in f {
if let Ok(meta) = fs::metadata(f) {
if meta.is_dir() {
let walker = WalkDir::new(f);
walker.into_iter().for_each(|x| {
if let Ok(x) = x {
let path = x.path().to_str().unwrap().to_string();
if !path.is_empty() {
files.push(path);
}
}
});
} else {
files.push(f.to_string());
}
}
}
}
let output = matches.get_one::<String>("output");
let (sender, receiver) = mpsc::channel();
let len = files.len();
let mut handle = None;
if verbose {
let pb = ProgressBar::new(len as u64);
pb.set_style(ProgressStyle::with_template(TEMPLATE).unwrap());
pb.set_prefix("Adding files");
if let Some(o) = output {
pb.println(format!("Creating archive {o}"));
}
handle = Some(thread::spawn(move || {
for msg in &receiver {
match msg {
Message::NodeCreated(s) => {
pb.set_prefix(s.split('/').last().unwrap().to_string());
pb.inc(1);
}
Message::NodeSaved { name, size } => {
let name = name.split('/').last().unwrap();
pb.set_prefix(format!("{name} added, {size} bytes"));
pb.inc(1);
}
Message::Eof => {
pb.finish_and_clear();
break;
}
Message::Err { name, error } => {
pb.println(format!("Error creating node {name}: {error}"));
}
}
}
}));
}
if matches.get_flag("zstd") {
let level = matches.get_one::<i32>("level").copied().unwrap_or(3);
if matches.get_flag("stdout") {
let stdout = io::stdout();
let mut writer = Encoder::new(stdout, level)?;
haggis::par_stream_archive(&mut writer, &files, algorithm, &sender, uid, gid)?;
let _fd = writer.finish();
} else if let Some(o) = output {
let fd = File::create(o)?;
let mut writer = Encoder::new(fd, level)?;
haggis::par_stream_archive(&mut writer, &files, algorithm, &sender, uid, gid)?;
let _fd = writer.finish()?;
} else {
unreachable!();
}
} else if matches.get_flag("stdout") {
let stdout = io::stdout();
let mut writer = BufWriter::new(stdout);
haggis::par_stream_archive(&mut writer, &files, algorithm, &sender, uid, gid)?;
} else if let Some(o) = output {
haggis::par_create_archive(o, &files, algorithm, &sender, uid, gid)?;
} else {
unreachable!();
}
if let Some(handle) = handle {
match handle.join() {
Ok(()) => {
if verbose {
println!("Archive created successfully");
}
Ok(())
}
Err(e) => {
eprintln!("Error: {e:?}");
process::exit(1);
}
}
} else {
Ok(())
}
}
#[allow(clippy::similar_names)]
fn extract(matches: &ArgMatches) -> Result<(), haggis::Error> {
let file = matches.get_one::<String>("archive");
let uid = matches.get_one::<u32>("uid").copied();
let gid = matches.get_one::<u32>("gid").copied();
let mut fd = if let Some(f) = file {
File::open(f)?
} else if matches.get_flag("stdin") {
let stdin = io::stdin();
let raw = stdin.as_raw_fd();
unsafe { File::from_raw_fd(raw) }
} else {
unreachable!()
};
let zst = matches.get_flag("zstd")
|| if matches.get_flag("stdin") {
false
} else {
haggis::detect_zstd(&mut fd)?
};
let dir = matches.get_one::<String>("change");
let (sender, receiver) = mpsc::channel();
let file = file.cloned().unwrap_or("stdin".to_string());
let handle = if zst {
let reader = Decoder::new(fd)?;
let mut stream = Stream::new(reader)?;
let handle = if matches.get_flag("quiet") {
Some(thread::spawn(move || {
progress(&file, &receiver, u64::from(stream.length));
Ok::<(), haggis::Error>(())
}))
} else {
None
};
stream.par_extract(dir.map(String::as_str), uid, gid, &sender)?;
handle
} else {
let reader = BufReader::new(fd);
let mut stream = Stream::new(reader)?;
let handle = if matches.get_flag("quiet") {
Some(thread::spawn(move || {
progress(&file, &receiver, u64::from(stream.length));
Ok::<(), haggis::Error>(())
}))
} else {
None
};
stream.par_extract(dir.map(String::as_str), uid, gid, &sender)?;
handle
};
if let Some(handle) = handle {
match handle.join() {
Ok(_) => {
if matches.get_flag("quiet") {
println!("Archive extracted successfully");
}
Ok(())
}
Err(e) => {
eprintln!("Error: {e:?}");
process::exit(1);
}
}
} else {
Ok(())
}
}
fn progress(file: &str, receiver: &mpsc::Receiver<StreamMessage>, len: u64) {
let pb = ProgressBar::new(len);
pb.set_style(ProgressStyle::with_template(TEMPLATE).unwrap());
pb.set_prefix("Extracting files");
pb.println(format!("Extracting archive {file}"));
for msg in receiver {
match msg {
StreamMessage::FileExtracted { name, size } => {
let name = name.split('/').last().unwrap();
pb.set_prefix(format!("{name} extracted, {size} bytes"));
pb.inc(1);
}
StreamMessage::LinkCreated { name, target } => {
let name = name.split('/').last().unwrap();
let target = target.split('/').last().unwrap();
pb.set_prefix(format!("{name} -> {target}"));
pb.inc(1);
}
StreamMessage::DirectoryCreated { name } => {
let name = name.split('/').last().unwrap();
pb.set_prefix(format!("mkdir {name}"));
pb.inc(1);
}
StreamMessage::DeviceCreated { name } => {
let name = name.split('/').last().unwrap();
pb.set_prefix(format!("mknod {name}"));
pb.inc(1);
}
StreamMessage::Eof => {
pb.finish_and_clear();
break;
}
StreamMessage::Err { name, error } => {
pb.println(format!("Error with node {name}: {error}"));
}
}
}
}
fn print_listing(li: &Listing, matches: &ArgMatches) -> Result<(), haggis::Error> {
if matches.get_flag("files") && li.kind == ListingKind::Directory {
return Ok(());
}
if matches.get_flag("color") {
if matches.get_flag("long") {
li.print_color()?;
} else {
li.print_color_simple()?;
}
} else if matches.get_flag("long") {
println!("{li}");
} else {
println!("{}", li.name);
}
Ok(())
}
fn list_unsorted(matches: &ArgMatches) -> Result<(), haggis::Error> {
let file = matches.get_one::<String>("archive").unwrap();
let fd = File::open(file)?;
if matches.get_flag("zstd") {
let reader = Decoder::new(fd)?;
let stream = Stream::new(reader)?;
for node in stream {
let node = node?;
let li = Listing::from(node);
print_listing(&li, matches)?;
}
} else {
let reader = BufReader::new(fd);
let stream = ListingStream::new(reader)?;
for li in stream {
let li = li?;
print_listing(&li, matches)?;
}
}
Ok(())
}
fn list(matches: &ArgMatches) -> Result<(), haggis::Error> {
let file = matches.get_one::<String>("archive").unwrap();
let mut fd = File::open(file)?;
let zst = matches.get_flag("zstd") || haggis::detect_zstd(&mut fd)?;
let list = if zst {
let reader = Decoder::new(fd)?;
let stream = Stream::new(reader)?;
let mut list = vec![];
for node in stream {
let node = node?;
let listing = Listing::from(node);
list.push(listing);
}
list.sort_unstable();
list
} else {
let reader = BufReader::new(fd);
let mut stream = ListingStream::new(reader)?;
stream.list()?
};
for li in list {
print_listing(&li, matches)?;
}
Ok(())
}