Add "magic" to file start

This commit is contained in:
Nathan Fisher 2023-07-07 19:20:21 -04:00
parent f2fff3ce76
commit ce7fe5f019
4 changed files with 30 additions and 13 deletions

View File

@ -68,10 +68,15 @@ by 8, the third byte by 16 and the fourth byte by 24, and the bits combined
into a single 32-bit uint. This is dramatically more efficient than storing into a single 32-bit uint. This is dramatically more efficient than storing
those numbers as ascii text, as is done by Tar. those numbers as ascii text, as is done by Tar.
The first 7 bytes of a haggis file make up the "magic" identifier, consisting
of the string "\x89haggis". To be clear, that is the integer `0x89` plus
"haggis". The first file header immediately follows this string.
| **bytes** | **meaning** | | **bytes** | **meaning** |
| ----- | ------- | | ----- | ------- |
| 0-8 | The length of the filename (64 bit unsigned int) | | 0-6 | "\x89haggis" - the haggis "magic" identifier |
| 8 to the length specified above | The bytes making up the filename | | 7-15 | The **length** of the filename (64 bit unsigned int) |
| the next **length** bytes | The bytes making up the filename |
| the next 4 bytes | The files Unix permissions mode (32 bit unsigned int) | | the next 4 bytes | The files Unix permissions mode (32 bit unsigned int) |
| the next 4 bytes | The uid of the file's owner (32 bit unsigned int) | | the next 4 bytes | The uid of the file's owner (32 bit unsigned int) |
| the next 4 bytes | the gid of the file's owner (32 bit unsigned int) | | the next 4 bytes | the gid of the file's owner (32 bit unsigned int) |

View File

@ -10,6 +10,7 @@ pub enum Error {
Slice(TryFromSliceError), Slice(TryFromSliceError),
InvalidChecksum, InvalidChecksum,
InvalidAlgorithm, InvalidAlgorithm,
InvalidMagic,
MissingData, MissingData,
MutexError, MutexError,
NulError, NulError,
@ -28,6 +29,7 @@ impl fmt::Display for Error {
Self::Io(e) => write!(f, "{e}"), Self::Io(e) => write!(f, "{e}"),
Self::InvalidAlgorithm => write!(f, "invalid algorithm"), Self::InvalidAlgorithm => write!(f, "invalid algorithm"),
Self::InvalidChecksum => write!(f, "invalid checksum"), Self::InvalidChecksum => write!(f, "invalid checksum"),
Self::InvalidMagic => write!(f, "invalid magic"),
Self::MissingData => write!(f, "missing data"), Self::MissingData => write!(f, "missing data"),
Self::MutexError => write!(f, "mutex error"), Self::MutexError => write!(f, "mutex error"),
Self::NulError => write!(f, "nul error"), Self::NulError => write!(f, "nul error"),

View File

@ -30,6 +30,8 @@ pub use {
stream::Stream, stream::Stream,
}; };
pub static MAGIC: [u8; 7] = [0x89, b'h', b'a', b'g', b'g', b'i', b's' ];
/// Creates a haggis archive from a list of files /// Creates a haggis archive from a list of files
pub fn create_archive(path: &str, files: Vec<String>, algorithm: Algorithm) -> Result<(), Error> { pub fn create_archive(path: &str, files: Vec<String>, algorithm: Algorithm) -> Result<(), Error> {
let fd = fs::OpenOptions::new() let fd = fs::OpenOptions::new()
@ -48,6 +50,7 @@ pub fn stream_archive<W: Write>(
algorithm: Algorithm, algorithm: Algorithm,
) -> Result<(), Error> { ) -> Result<(), Error> {
let links = Mutex::new(HashMap::new()); let links = Mutex::new(HashMap::new());
writer.write_all(&MAGIC)?;
for f in &files { for f in &files {
let node = Node::from_path(f, algorithm, &links)?; let node = Node::from_path(f, algorithm, &links)?;
node.write(&mut writer)?; node.write(&mut writer)?;
@ -82,11 +85,12 @@ pub fn par_create_archive(
/// Streams a Haggis archive from a list of files, processing each file in parallel /// Streams a Haggis archive from a list of files, processing each file in parallel
#[cfg(feature = "parallel")] #[cfg(feature = "parallel")]
pub fn par_stream_archive<W: Write + Send>( pub fn par_stream_archive<W: Write + Send>(
writer: W, mut writer: W,
files: Vec<String>, files: Vec<String>,
algorithm: Algorithm, algorithm: Algorithm,
sender: &Sender<Message>, sender: &Sender<Message>,
) -> Result<(), Error> { ) -> Result<(), Error> {
writer.write_all(&MAGIC)?;
let links = Mutex::new(HashMap::<u64, String>::new()); let links = Mutex::new(HashMap::<u64, String>::new());
let writer = Mutex::new(writer); let writer = Mutex::new(writer);
let s = sender.clone(); let s = sender.clone();

View File

@ -1,3 +1,5 @@
use crate::MAGIC;
use { use {
crate::{Error, Node}, crate::{Error, Node},
std::{ std::{
@ -14,17 +16,11 @@ use {
/// An iterator over a series of archive `Node`'s. This struct is generic over any /// An iterator over a series of archive `Node`'s. This struct is generic over any
/// type which implements `Read`, such as a file or a network stream. /// type which implements `Read`, such as a file or a network stream.
#[derive(Debug)] #[derive(Debug)]
pub struct Stream<T: Read + Send> { pub struct Stream<R: Read + Send> {
reader: T, reader: R,
} }
impl<T: Read + Send> From<T> for Stream<T> { impl<R: Read + Send> Iterator for Stream<R> {
fn from(value: T) -> Self {
Self { reader: value }
}
}
impl<T: Read + Send> Iterator for Stream<T> {
type Item = Result<Node, Error>; type Item = Result<Node, Error>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
@ -50,7 +46,17 @@ pub enum Message {
Eof, Eof,
} }
impl<T: Read + Send> Stream<T> { impl<R: Read + Send> Stream<R> {
pub fn new(mut reader: R) -> Result<Self, Error> {
let mut buf = [0; 7];
reader.read_exact(&mut buf)?;
if buf == MAGIC {
Ok(Self { reader })
} else {
Err(Error::InvalidMagic)
}
}
pub fn extract(&mut self, prefix: Option<&str>) -> Result<(), Error> { pub fn extract(&mut self, prefix: Option<&str>) -> Result<(), Error> {
for node in self { for node in self {
node?.extract(prefix)?; node?.extract(prefix)?;