From 64a8269b9df20f526382e5f9f92bccea2f7b50ff Mon Sep 17 00:00:00 2001 From: Nathan Fisher Date: Sat, 15 Jul 2023 11:36:21 -0400 Subject: [PATCH] Partial port to new spec revision (won't compile yet); --- Format.md | 26 +++++++++++++------ src/checksum.rs | 5 +++- src/filetype.rs | 67 ++++++++++++++++++++++++++++++++++--------------- src/lib.rs | 4 +-- src/node.rs | 50 ++++++++++++++++++++++-------------- src/stream.rs | 50 +++++++++++++++++++++--------------- 6 files changed, 133 insertions(+), 69 deletions(-) diff --git a/Format.md b/Format.md index 84feee5..dea3408 100644 --- a/Format.md +++ b/Format.md @@ -76,21 +76,33 @@ The following four bytes store the number of nodes making up the archive. | **bytes** | **meaning** | | ----- | ------- | | 0-6 | "\x89haggis" - the haggis "magic" identifier | -| 7-11 | The number of files in the archive (32 bit unsigned int) | +| 7-10 | The number of files in the archive (32 bit unsigned int) | ## Nodes | **bytes** | **meaning** | | ----- | ------- | -| the next 8 bytes | The **length** of the filename (64 bit unsigned int) | +| the next 2 bytes | The **length** of the filename (16 bit unsigned int) | | the next **length** bytes | The bytes making up the filename | -| the next 4 bytes | The files Unix permissions mode (32 bit unsigned int) | | the next 4 bytes | The uid of the file's owner (32 bit unsigned int) | | the next 4 bytes | the gid of the file's owner (32 bit unsigned int) | | the next 8 bytes | The most recent modification time (64 bit unsigned int) | -| the next byte | a flag representing the file's type | +| the next 2 bytes | The file's Unix permissions mode and file type (see next section) | -## File types -The file types represented by the final flag in the previous table are as follows: +## File mode and type +To recreate the Unix permissions and file type flag, the two bytes making up this field +are first interpreted as a 16-bit integer, which has been stored in little endian format +like all of the previous integers. To derive the mode, we `&` the three most significant +bits out as cast it to an appropriately sized uint for the platform. The file type flag +is made up of the three bits that we previously removed. In pseudo-code: +```Rust +let bits = [42, 69]; +let raw = u16::fromLittleEndianBytes(bits); +let mask = 0b111 << 13; +let mode = raw & mask; +let flag = raw & !mask; +``` + +The file mode flag is then interpreted as follows: | **flag** | **file type** | | ---- | --------- | | 0 | Normal file | @@ -136,7 +148,7 @@ after the last byte of the file. ### Hard and soft links | **bytes** | **meaning** | | ----- | ------- | -| next 8 | the **length** of the link target's file name | +| next 2 | the **length** of the link target's file name (16 bit unsigned int | | the next **length** bytes | the link target's file name | The next byte will be the beginning of the following archive node. diff --git a/src/checksum.rs b/src/checksum.rs index f54f86c..ec03276 100644 --- a/src/checksum.rs +++ b/src/checksum.rs @@ -1,6 +1,9 @@ use { crate::Error, - std::{io::{Read, Write}, str::FromStr}, + std::{ + io::{Read, Write}, + str::FromStr, + }, }; #[derive(Clone, Copy, Debug)] diff --git a/src/filetype.rs b/src/filetype.rs index ffeafb8..cde8413 100644 --- a/src/filetype.rs +++ b/src/filetype.rs @@ -3,6 +3,35 @@ use { std::io::{Read, Write}, }; +pub(crate) enum Flag { + Normal, + HardLink, + SoftLink, + Directory, + Character, + Block, + Fifo, + Eof, +} + +impl TryFrom for Flag { + type Error = Error; + + fn try_from(value: u8) -> Result { + match value { + 0 => Ok(Self::Normal), + 1 => Ok(Self::HardLink), + 2 => Ok(Self::SoftLink), + 3 => Ok(Self::Directory), + 4 => Ok(Self::Character), + 5 => Ok(Self::Block), + 6 => Ok(Self::Fifo), + 7 => Ok(Self::Eof), + 8 => Err(Error::UnknownFileType), + } + } +} + /// An enum representing the type of file of an archive member #[derive(Debug)] pub enum FileType { @@ -25,45 +54,43 @@ pub enum FileType { } impl FileType { - pub(crate) fn read(reader: &mut T) -> Result { - let mut buf = [0; 1]; - reader.read_exact(&mut buf)?; - match buf[0] { - 0 => { + pub(crate) fn read(reader: &mut T, flag: Flag) -> Result { + match flag { + Flag::Normal => { let file = File::read(reader)?; Ok(Self::Normal(file)) } - 1 => { - let mut len = [0; 8]; + Flag::HardLink => { + let mut len = [0; 2]; reader.read_exact(&mut len)?; - let len = u64::from_le_bytes(len); - let mut buf = Vec::with_capacity(len.try_into()?); - let mut handle = reader.take(len); + let len = u16::from_le_bytes(len); + let mut buf = Vec::with_capacity(len.into()); + let mut handle = reader.take(len.into()); handle.read_exact(&mut buf)?; let s = String::from_utf8(buf)?; Ok(Self::HardLink(s)) } - 2 => { - let mut len = [0; 8]; + Flag::SoftLink => { + let mut len = [0; 2]; reader.read_exact(&mut len)?; - let len = u64::from_le_bytes(len); - let mut buf = Vec::with_capacity(len.try_into()?); - let mut handle = reader.take(len); + let len = u16::from_le_bytes(len); + let mut buf = Vec::with_capacity(len.into()); + let mut handle = reader.take(len.into()); handle.read_exact(&mut buf)?; let s = String::from_utf8(buf)?; Ok(Self::SoftLink(s)) } - 3 => Ok(Self::Directory), - 4 => { + Flag::Directory => Ok(Self::Directory), + Flag::Character => { let sp = Special::read(reader)?; Ok(Self::Character(sp)) } - 5 => { + Flag::Block => { let sp = Special::read(reader)?; Ok(Self::Block(sp)) } - 6 => Ok(Self::Fifo), - _ => Err(Error::UnknownFileType), + Flag::Fifo => Ok(Self::Fifo), + Flag::Eof => Ok(Self::Eof), } } diff --git a/src/lib.rs b/src/lib.rs index 8fb6e94..51e8719 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,13 +27,13 @@ pub use { filetype::FileType, node::Node, special::Special, - stream::Stream + stream::Stream, }; #[cfg(feature = "parallel")] pub use stream::Message as StreamMessage; -pub static MAGIC: [u8; 7] = [0x89, b'h', b'a', b'g', b'g', b'i', b's' ]; +pub static MAGIC: [u8; 7] = [0x89, b'h', b'a', b'g', b'g', b'i', b's']; /// Creates a haggis archive from a list of files pub fn create_archive(path: &str, files: Vec, algorithm: Algorithm) -> Result<(), Error> { diff --git a/src/node.rs b/src/node.rs index 70740d7..f2d497d 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,5 +1,5 @@ use { - crate::{nix, Algorithm, Checksum, Error, File, FileType, Special}, + crate::{filetype::Flag, nix, Algorithm, Checksum, Error, File, FileType, Special}, md5::{Digest, Md5}, sha1::Sha1, sha2::Sha256, @@ -45,14 +45,14 @@ impl From for Kind { pub struct Node { /// The filesystem path to this file pub name: String, - /// The Unix permissions bits of this file - pub mode: u32, /// The user id of this file's owner pub uid: u32, /// The group id of this file's owner pub gid: u32, /// The most recent modification time of this file pub mtime: u64, + /// The Unix permissions bits of this file + pub mode: u16, /// The type of file this node represents pub filetype: FileType, } @@ -62,9 +62,9 @@ impl Node { /// > Note: this function reads an already created node. To create a new node /// > from a file, use the `from_path` method. pub fn read(reader: &mut T) -> Result { - let mut len = [0; 8]; + let mut len = [0; 2]; reader.read_exact(&mut len)?; - let len = u64::from_le_bytes(len); + let len = u16::from_le_bytes(len); if len == 0 { return Ok(Self { name: "".to_string(), @@ -75,22 +75,27 @@ impl Node { filetype: FileType::Eof, }); } - let mut name = Vec::with_capacity(len.try_into()?); - let mut handle = reader.take(len); + let mut name = Vec::with_capacity(len.into()); + let mut handle = reader.take(len.into()); handle.read_to_end(&mut name)?; - let mut buf = [0; 20]; + let mut buf = [0; 18]; reader.read_exact(&mut buf)?; - let mode: [u8; 4] = buf[..4].try_into()?; - let uid: [u8; 4] = buf[4..8].try_into()?; - let gid: [u8; 4] = buf[8..12].try_into()?; - let mtime: [u8; 8] = buf[12..].try_into()?; - let filetype = FileType::read(reader)?; + let uid: [u8; 4] = buf[0..4].try_into()?; + let gid: [u8; 4] = buf[4..8].try_into()?; + let mtime: [u8; 8] = buf[8..16].try_into()?; + let raw_mode: [u8; 2] = buf[16..18].try_into()?; + let raw_mode = u16::from_le_bytes(raw_mode); + let mask: u16 = 0b111 << 13; + let mode = raw_mode & mask; + let flag: u8 = ((raw_mode & !mask) >> 13).try_into()?; + let flag: Flag = flag.try_into()?; + let filetype = FileType::read(reader, flag)?; Ok(Self { name: String::from_utf8(name)?, - mode: u32::from_le_bytes(mode), uid: u32::from_le_bytes(uid), gid: u32::from_le_bytes(gid), mtime: u64::from_le_bytes(mtime), + mode, filetype, }) } @@ -100,10 +105,11 @@ impl Node { /// > representation. To extract the contents of a `Node` and write out the /// > file it represents, use the `extract` method instead. pub fn write(&self, writer: &mut T) -> Result<(), Error> { - let len = self.name.len() as u64; + let len: u16 = self.name.len().try_into()?; writer.write_all(&len.to_le_bytes())?; writer.write_all(self.name.as_bytes())?; - [self.mode, self.uid, self.gid] + writer.write_all(&self.mode.to_le_bytes())?; + [self.uid, self.gid] .iter() .try_for_each(|f| writer.write_all(&f.to_le_bytes()))?; writer.write_all(&self.mtime.to_le_bytes())?; @@ -237,7 +243,7 @@ impl Node { } } match self.filetype { - FileType::Eof => {}, + FileType::Eof => {} FileType::Fifo => { nix::mkfifo(&path, self.mode)?; if euid == 0 { @@ -294,7 +300,10 @@ impl Node { if nix::geteuid() == 0 { nix::chown(dir.to_str().ok_or(Error::NulError)?, self.uid, self.gid)?; } - nix::chmod(dir.to_str().ok_or(Error::BadPath)?, self.mode & 0o7777 | 0o100)?; + nix::chmod( + dir.to_str().ok_or(Error::BadPath)?, + self.mode & 0o7777 | 0o100, + )?; Ok(()) } } @@ -317,7 +326,10 @@ mod tests { for c in &sum { write!(s, "{c:02x}").unwrap(); } - assert_eq!(s, "5f1b6e6e31682fb6683db2e78db11e624527c897618f1a5b0a0b5256f557c22d"); + assert_eq!( + s, + "5f1b6e6e31682fb6683db2e78db11e624527c897618f1a5b0a0b5256f557c22d" + ); } #[test] diff --git a/src/stream.rs b/src/stream.rs index 317989a..2307ea4 100644 --- a/src/stream.rs +++ b/src/stream.rs @@ -1,5 +1,11 @@ use crate::MAGIC; +#[cfg(feature = "parallel")] +use { + crate::FileType, + rayon::{iter::ParallelBridge, prelude::ParallelIterator}, + std::sync::mpsc::Sender, +}; use { crate::{Error, Node}, std::{ @@ -7,12 +13,6 @@ use { iter::Iterator, }, }; -#[cfg(feature = "parallel")] -use { - crate::FileType, - rayon::{iter::ParallelBridge, prelude::ParallelIterator}, - std::sync::mpsc::Sender, -}; /// An iterator over a series of archive `Node`'s. This struct is generic over any /// type which implements `Read`, such as a file or a network stream. @@ -54,7 +54,7 @@ impl Stream { reader.read_exact(&mut buf)?; let length = u32::from_le_bytes(buf[7..].try_into()?); if buf[0..7] == MAGIC { - Ok(Self { length ,reader }) + Ok(Self { length, reader }) } else { Err(Error::InvalidMagic) } @@ -79,24 +79,34 @@ impl Stream { n.extract(prefix)?; match n.filetype { FileType::Normal(f) => { - s.send(Message::FileExtracted { name: n.name.clone(), size: f.len }) - .map_err(|_| Error::SenderError)?; - }, + s.send(Message::FileExtracted { + name: n.name.clone(), + size: f.len, + }) + .map_err(|_| Error::SenderError)?; + } FileType::SoftLink(t) | FileType::HardLink(t) => { - s.send(Message::LinkCreated { name: n.name.clone(), target: t.clone() }) - .map_err(|_| Error::SenderError)?; - }, + s.send(Message::LinkCreated { + name: n.name.clone(), + target: t.clone(), + }) + .map_err(|_| Error::SenderError)?; + } FileType::Directory => { - s.send(Message::DirectoryCreated { name: n.name.clone() }) - .map_err(|_| Error::SenderError)?; - }, + s.send(Message::DirectoryCreated { + name: n.name.clone(), + }) + .map_err(|_| Error::SenderError)?; + } FileType::Block(_) | FileType::Character(_) | FileType::Fifo => { - s.send(Message::DeviceCreated { name: n.name.clone() }) - .map_err(|_| Error::SenderError)?; - }, + s.send(Message::DeviceCreated { + name: n.name.clone(), + }) + .map_err(|_| Error::SenderError)?; + } FileType::Eof => { s.send(Message::Eof).map_err(|_| Error::SenderError)?; - }, + } } Ok::<(), Error>(()) })?;