Implement tar::Node creation from raw data + file metadata

This commit is contained in:
Nathan Fisher 2023-03-23 12:11:02 -04:00
parent faf574364a
commit a53cb726f9
9 changed files with 621 additions and 23 deletions

210
Cargo.lock generated
View File

@ -26,6 +26,18 @@ version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "487f1e0fcbe47deb8b0574e646def1c903389d95241dd1bbcc6ce4a715dfc0c1"
[[package]]
name = "bitvec"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c"
dependencies = [
"funty",
"radium",
"tap",
"wyz",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
@ -131,6 +143,64 @@ dependencies = [
"typenum",
]
[[package]]
name = "darling"
version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850"
dependencies = [
"darling_core",
"darling_macro",
]
[[package]]
name = "darling_core"
version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0"
dependencies = [
"fnv",
"ident_case",
"proc-macro2",
"quote",
"strsim",
"syn 1.0.109",
]
[[package]]
name = "darling_macro"
version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e"
dependencies = [
"darling_core",
"quote",
"syn 1.0.109",
]
[[package]]
name = "deku"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819b87cc7a05b3abe3fc38e59b3980a5fd3162f25a247116441a9171d3e84481"
dependencies = [
"bitvec",
"deku_derive",
]
[[package]]
name = "deku_derive"
version = "0.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e2ca12572239215a352a74ad7c776d7e8a914f8a23511c6cbedddd887e5009e"
dependencies = [
"darling",
"proc-macro-crate",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "digest"
version = "0.10.6"
@ -168,6 +238,18 @@ dependencies = [
"libc",
]
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "funty"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "generic-array"
version = "0.14.6"
@ -178,6 +260,12 @@ dependencies = [
"version_check",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
[[package]]
name = "hermit-abi"
version = "0.2.6"
@ -206,6 +294,22 @@ dependencies = [
"walkdir",
]
[[package]]
name = "ident_case"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "indexmap"
version = "1.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "io-lifetimes"
version = "1.0.9"
@ -241,6 +345,12 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memoffset"
version = "0.8.0"
@ -260,12 +370,28 @@ dependencies = [
"libc",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "os_str_bytes"
version = "6.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267"
[[package]]
name = "proc-macro-crate"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
dependencies = [
"once_cell",
"toml_edit",
]
[[package]]
name = "proc-macro2"
version = "1.0.52"
@ -284,6 +410,12 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "radium"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
[[package]]
name = "rayon"
version = "1.7.0"
@ -363,7 +495,7 @@ checksum = "e801c1712f48475582b7696ac71e0ca34ebb30e09338425384269d9717c62cad"
dependencies = [
"proc-macro2",
"quote",
"syn",
"syn 2.0.4",
]
[[package]]
@ -383,6 +515,17 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.4"
@ -394,9 +537,19 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "tap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "tar"
version = "0.1.0"
dependencies = [
"deku",
"thiserror",
]
[[package]]
name = "termcolor"
@ -407,6 +560,43 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "thiserror"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.4",
]
[[package]]
name = "toml_datetime"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622"
[[package]]
name = "toml_edit"
version = "0.19.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13"
dependencies = [
"indexmap",
"toml_datetime",
"winnow",
]
[[package]]
name = "typenum"
version = "1.16.0"
@ -531,3 +721,21 @@ name = "windows_x86_64_msvc"
version = "0.42.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
[[package]]
name = "winnow"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "deac0939bd6e4f24ab5919fbf751c97a8cfc8543bb083a305ed5c0c10bb241d1"
dependencies = [
"memchr",
]
[[package]]
name = "wyz"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed"
dependencies = [
"tap",
]

View File

@ -2,6 +2,7 @@
name = "hpk"
version = "0.1.0"
edition = "2021"
license = "GPL-3.0-only"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -27,7 +27,9 @@ impl TryFrom<&Path> for Plist {
.collect::<Vec<_>>()
.par_iter()
.filter(|x| x.is_ok())
.filter_map(|x| Entry::try_from(x.as_ref().unwrap().path().to_path_buf().as_path()).ok())
.filter_map(|x| {
Entry::try_from(x.as_ref().unwrap().path().to_path_buf().as_path()).ok()
})
.collect();
Ok(Self { entries })
}

View File

@ -2,4 +2,8 @@
name = "tar"
version = "0.1.0"
edition = "2021"
license = "GPL-3.0-only"
[dependencies]
deku = "0.16"
thiserror = "1.0.40"

8
tar/README.md Normal file
View File

@ -0,0 +1,8 @@
## About
Derived originally from [minitar](https://github.com/genonullfree/minitar), this
crate implements basic functionality for creating and extracting tar archives. It
has been adapted to allow creation of a Node (Tar header + 512byte blocks of data)
from the raw data plus file metadata. This allows for better efficiency when it
is embedded into another application (such as a package manager), as the raw data
and metadata about each file can be extracted once and reused for purposes such
as generating checksums, getting file sizes and creating packing lists.

23
tar/src/error.rs Normal file
View File

@ -0,0 +1,23 @@
use std::io;
use std::num::ParseIntError;
use std::str::Utf8Error;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum Error {
#[error("DekuError: {0}")]
Deku(#[from] deku::DekuError),
#[error("IoError: {0}")]
Io(#[from] io::Error),
#[error("Error in conversion of oct_to_dev")]
Utf8Error(#[from] Utf8Error),
#[error("Error in conversion of oct_to_dev")]
ParseIntError(#[from] ParseIntError),
#[error("End of tar")]
EndOfTar,
#[error("Invalid magic")]
InvalidMagic,
#[error("Invalid Checksum")]
InvalidChecksum,
}

View File

@ -1,21 +1,246 @@
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Header {
pub fname: [u8; 100],
pub mode: [u8; 8],
pub uid: [u8; 8],
pub gid: [u8; 8],
pub size: [u8; 12],
pub mtime: [u8; 12],
pub header_checksum: [u8; 8],
pub link_indicator: [u8; 1],
pub link_name: [u8; 100],
pub ustar_magic: [u8; 6],
pub ustar_version: [u8; 2],
pub username: [u8; 32],
pub groupname: [u8; 32],
pub device_major: [u8; 8],
pub device_minor: [u8; 8],
pub file_prefix: [u8; 155],
pub reserved: [u8; 12],
use crate::Error;
use deku::prelude::*;
use std::{
env,
fmt::{self, Write},
fs::{self, Metadata},
ops::Deref,
os::{linux::fs::MetadataExt, unix::fs::FileTypeExt},
};
#[repr(u8)]
pub enum FileType {
Normal = 0x30,
Hardlink = 0x31,
Symlink = 0x32,
Char = 0x33,
Block = 0x34,
Dir = 0x35,
FIFO = 0x36,
Unknown = 0x00,
}
impl<T> From<T> for FileType
where
T: Deref<Target = Metadata>,
{
fn from(meta: T) -> Self {
if meta.is_dir() {
return FileType::Dir;
}
let file_type = meta.file_type();
if file_type.is_fifo() {
return FileType::FIFO;
} else if file_type.is_char_device() {
return FileType::Char;
} else if file_type.is_block_device() {
return FileType::Block;
} else if file_type.is_fifo() {
return FileType::FIFO;
} else if file_type.is_symlink() {
return FileType::Symlink;
} else if file_type.is_file() {
return FileType::Normal;
}
FileType::Unknown
}
}
#[derive(Clone, Copy, Debug, PartialEq, DekuRead, DekuWrite)]
#[deku(endian = "little")]
pub struct Header {
pub(crate) fname: [u8; 100],
pub(crate) mode: [u8; 8],
pub(crate) uid: [u8; 8],
pub(crate) gid: [u8; 8],
pub(crate) size: [u8; 12],
pub(crate) mtime: [u8; 12],
pub(crate) header_checksum: [u8; 8],
pub(crate) link_indicator: [u8; 1],
pub(crate) link_name: [u8; 100],
pub(crate) ustar_magic: [u8; 6],
pub(crate) ustar_version: [u8; 2],
pub(crate) username: [u8; 32],
pub(crate) groupname: [u8; 32],
pub(crate) device_major: [u8; 8],
pub(crate) device_minor: [u8; 8],
pub(crate) file_prefix: [u8; 155],
pub(crate) reserved: [u8; 12],
}
impl Default for Header {
fn default() -> Self {
Self {
fname: [0; 100],
mode: [0; 8],
uid: [0; 8],
gid: [0; 8],
size: [0; 12],
mtime: [0; 12],
header_checksum: [0x20; 8],
link_indicator: [0; 1],
link_name: [0; 100],
ustar_magic: [0x75, 0x73, 0x74, 0x61, 0x72, 0x20],
ustar_version: [0x20, 0x00],
username: [0; 32],
groupname: [0; 32],
device_major: [0; 8],
device_minor: [0; 8],
file_prefix: [0; 155],
reserved: [0; 12],
}
}
}
impl Header {
pub fn filename(&self) -> Result<String, fmt::Error> {
let mut s = String::new();
for c in self.fname {
if c != b'\0' {
write!(s, "{c}")?;
} else {
break;
}
}
Ok(s)
}
pub fn new(filename: &str) -> Result<Self, Error> {
let mut header = Header::default();
let meta = fs::symlink_metadata(filename)?;
/* Fill in metadata */
header.fname[..filename.len()].copy_from_slice(filename.as_bytes());
let mode = format!("{:07o}", (meta.st_mode() & 0o777));
header.mode[..mode.len()].copy_from_slice(mode.as_bytes());
let user = format!("{:07o}", meta.st_uid());
header.uid[..user.len()].copy_from_slice(user.as_bytes());
let group = format!("{:07o}", meta.st_gid());
header.gid[..group.len()].copy_from_slice(group.as_bytes());
let size = format!("{:011o}", meta.st_size());
header.size[..size.len()].copy_from_slice(size.as_bytes());
let mtime = format!("{:011o}", meta.st_mtime());
header.mtime[..mtime.len()].copy_from_slice(mtime.as_bytes());
/* Get the file type and conditional metadata */
header.link_indicator[0] = FileType::from(&meta) as u8;
if header.link_indicator[0] == FileType::Symlink as u8 {
let link = fs::read_link(filename)?.to_str().unwrap().to_string();
header.link_name[..link.len()].copy_from_slice(link.as_bytes());
} else if header.link_indicator[0] == FileType::Block as u8 {
let major = format!("{:07o}", meta.st_dev());
header.device_major[..major.len()].copy_from_slice(major.as_bytes());
let minor = format!("{:07o}", meta.st_rdev());
header.device_minor[..minor.len()].copy_from_slice(minor.as_bytes());
}
/* TODO: Find better way to get username */
let key = "USER";
if let Ok(val) = env::var(key) {
header.username[..val.len()].copy_from_slice(val.as_bytes())
}
/* TODO: Find way to get groupname */
/* Update the header checksum value */
header.update_checksum()?;
Ok(header)
}
pub fn new_from_meta(filename: &str, meta: &Metadata) -> Result<Self, Error> {
let mut header = Header::default();
header.fname[..filename.len()].copy_from_slice(filename.as_bytes());
let mode = format!("{:07o}", meta.st_mode());
header.mode[..mode.len()].copy_from_slice(mode.as_bytes());
let uid = format!("{:07o}", 0);
header.uid[..uid.len()].copy_from_slice(uid.as_bytes());
let gid = format!("{:07o}", 0);
header.gid[..gid.len()].copy_from_slice(gid.as_bytes());
let size = format!("{:011o}", meta.len());
header.size[..size.len()].copy_from_slice(size.as_bytes());
let mtime = format!("{:011o}", meta.st_mtime());
header.mtime[..mtime.len()].copy_from_slice(mtime.as_bytes());
header.link_indicator[0] = FileType::from(meta) as u8;
if header.link_indicator[0] == FileType::Symlink as u8 {
let link = fs::read_link(&filename)?.to_str().unwrap().to_string();
header.link_name[..link.len()].copy_from_slice(link.as_bytes());
} else if header.link_indicator[0] == FileType::Block as u8 {
let major = format!("{:07o}", meta.st_dev());
header.device_major[..major.len()].copy_from_slice(major.as_bytes());
let minor = format!("{:07o}", meta.st_rdev());
header.device_minor[..minor.len()].copy_from_slice(minor.as_bytes());
}
let user = "root";
header.username[..user.len()].copy_from_slice(user.as_bytes());
let group = "root";
header.groupname[..group.len()].copy_from_slice(group.as_bytes());
header.update_checksum()?;
Ok(header)
}
/// Validates that the magic value received matches the magic value required in the Tar specification.
///
/// # Example
///
/// ```
/// use tar::tar::Header;
/// let header = Header::default();
/// if !header.validate_magic() {
/// println!("Magic value is invalid");
/// }
/// ```
pub fn validate_magic(self) -> bool {
self.ustar_magic == "ustar ".as_bytes()
}
/// Validates the header checksum computes to the expected value.
///
/// # Example
///
/// ```
/// use tar::tar::Header;
/// let header = Header::default();
/// if header.validate_checksum().unwrap() {
/// println!("Checksum is valid");
/// }
/// ```
pub fn validate_checksum(self) -> Result<bool, Error> {
let mut test = self;
let mut new = [0x20u8; 8];
test.header_checksum.copy_from_slice(&[0x20; 8]);
let tmp = format!("{:06o}\x00", test.calc_checksum()?);
new[..tmp.len()].copy_from_slice(tmp.as_bytes());
Ok(self.header_checksum == new)
}
/// Updates the header checksum value.
///
/// # Example
///
/// ```
/// use tar::tar::Header;
/// let mut header = Header::default();
///
/// /* Fill in header information */
///
/// header.update_checksum();
/// ```
pub fn update_checksum(&mut self) -> Result<(), Error> {
let checksum = format!("{:06o}\x00", self.calc_checksum()?);
self.header_checksum[..checksum.len()].copy_from_slice(checksum.as_bytes());
Ok(())
}
fn calc_checksum(self) -> Result<usize, Error> {
let out = self.to_bytes()?;
let mut checksum = 0;
for i in out {
checksum += i as usize;
}
Ok(checksum)
}
}

View File

@ -1,6 +1,11 @@
mod error;
mod header;
mod node;
pub use {header::Header, node::Node};
pub use {
error::Error,
header::{FileType, Header},
node::Node,
};
pub struct Archive {
pub nodes: Vec<Node>,

View File

@ -1,6 +1,128 @@
use crate::Header;
use crate::{Error, FileType, Header};
use deku::prelude::*;
use std::{
fs::{File, Metadata},
io::BufReader,
str,
};
#[derive(Clone, Debug, Default)]
pub struct Node {
pub header: Header,
pub data: Vec<[u8; 512]>,
}
impl Node {
/// Write out a single file within the tar to a file or something with a ``std::io::Write`` trait.
pub fn write<T: std::io::Write>(self, mut input: T) -> Result<usize, Error> {
input.write_all(&self.header.to_bytes()?)?;
let mut written = 512;
for d in self.data {
input.write_all(&d)?;
written += d.len();
}
Ok(written)
}
/// Read a TarNode in from a file or something with a ``std::io::Read`` trait.
pub fn read<T: std::io::Read>(mut input: T) -> Result<Node, Error> {
let mut h = vec![0u8; 512];
input.read_exact(&mut h)?;
let (_, header) = Header::from_bytes((&h, 0))?;
if header == Header::default() {
return Err(Error::EndOfTar);
}
if !header.validate_magic() {
return Err(Error::InvalidMagic);
}
if !header.validate_checksum()? {
return Err(Error::InvalidChecksum);
}
let chunks = (oct_to_dec(&header.size)? / 512) + 1;
Ok(Node {
header,
data: Node::chunk_file(&mut input, Some(chunks))?,
})
}
/// Open and read a file from the ``filename`` argument to a TarNode.
pub fn read_file_to_tar(filename: String) -> Result<Node, Error> {
let header = Header::new(&filename)?;
if header.link_indicator[0] != FileType::Normal as u8 {
return Ok(Node {
header,
data: Vec::<[u8; 512]>::new(),
});
}
let file = File::open(&filename)?;
let mut reader = BufReader::new(file);
Ok(Node {
header,
data: Node::chunk_file(&mut reader, None)?,
})
}
/// Create a Node from in memory data, given the filename and metadata
pub fn read_data_to_tar(
data: &[u8],
filename: &str,
meta: &Metadata,
) -> Result<Node, Error> {
let header = Header::new_from_meta(filename, meta)?;
if header.link_indicator[0] != FileType::Normal as u8 {
return Ok(Node {
header,
data: Vec::<[u8; 512]>::new(),
});
}
let mut reader = BufReader::new(data);
Ok(Node {
header,
data: Node::chunk_file(&mut reader, None)?,
})
}
/// Read in and split a file into ``512`` byte chunks.
fn chunk_file<T: std::io::Read>(
file: &mut T,
max_chunks: Option<usize>,
) -> Result<Vec<[u8; 512]>, Error> {
/* Extract the file data from the tar file */
let mut out = Vec::<[u8; 512]>::new();
let mut n = if let Some(max) = max_chunks {
max
} else {
usize::MAX
};
/* Carve out 512 bytes at a time */
let mut buf: [u8; 512] = [0; 512];
loop {
let len = file.read(&mut buf)?;
n -= 1;
/* If read len == 0, we've hit the EOF */
if len == 0 || n == 0 {
break;
}
/* Save this chunk */
out.push(buf);
}
Ok(out)
}
}
fn oct_to_dec(input: &[u8]) -> Result<usize, Error> {
/* Convert the &[u8] to string and remove the null byte */
let mut s = str::from_utf8(input)?.to_string();
s.pop();
/* Convert to usize from octal */
Ok(usize::from_str_radix(&s, 8)?)
}