commit dc1c6d6d6626aa321295fa6cb7673e8732a19ec6 Author: Nathan Fisher Date: Sat Jun 1 18:47:22 2024 -0400 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ee7098f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +zig-out/ +zig-cache/ diff --git a/build.zig b/build.zig new file mode 100644 index 0000000..c731541 --- /dev/null +++ b/build.zig @@ -0,0 +1,91 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const lib = b.addStaticLibrary(.{ + .name = "b64_zig", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the library to be installed into the standard + // location when the user invokes the "install" step (the default step when + // running `zig build`). + b.installArtifact(lib); + + const exe = b.addExecutable(.{ + .name = "b64_zig", + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const lib_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/root.zig"), + .target = target, + .optimize = optimize, + }); + + const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests); + + const exe_unit_tests = b.addTest(.{ + .root_source_file = b.path("src/main.zig"), + .target = target, + .optimize = optimize, + }); + + const run_exe_unit_tests = b.addRunArtifact(exe_unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_lib_unit_tests.step); + test_step.dependOn(&run_exe_unit_tests.step); +} diff --git a/build.zig.zon b/build.zig.zon new file mode 100644 index 0000000..0fc02ad --- /dev/null +++ b/build.zig.zon @@ -0,0 +1,67 @@ +.{ + .name = "b64_zig", + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + + // This field is optional. + // This is currently advisory only; Zig does not yet do anything + // with this value. + //.minimum_zig_version = "0.11.0", + + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + // This makes *all* files, recursively, included in this package. It is generally + // better to explicitly list the files and directories instead, to insure that + // fetching from tarballs, file system paths, and version control all result + // in the same contents hash. + "", + // For example... + //"build.zig", + //"build.zig.zon", + //"src", + //"LICENSE", + //"README.md", + }, +} diff --git a/src/main.zig b/src/main.zig new file mode 100644 index 0000000..c8a3f67 --- /dev/null +++ b/src/main.zig @@ -0,0 +1,24 @@ +const std = @import("std"); + +pub fn main() !void { + // Prints to stderr (it's a shortcut based on `std.io.getStdErr()`) + std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); + + // stdout is for the actual output of your application, for example if you + // are implementing gzip, then only the compressed bytes should be sent to + // stdout, not any debugging messages. + const stdout_file = std.io.getStdOut().writer(); + var bw = std.io.bufferedWriter(stdout_file); + const stdout = bw.writer(); + + try stdout.print("Run `zig build test` to run the tests.\n", .{}); + + try bw.flush(); // don't forget to flush! +} + +test "simple test" { + var list = std.ArrayList(i32).init(std.testing.allocator); + defer list.deinit(); // try commenting this out and see if zig detects the memory leak! + try list.append(42); + try std.testing.expectEqual(@as(i32, 42), list.pop()); +} diff --git a/src/root.zig b/src/root.zig new file mode 100644 index 0000000..3ff4c26 --- /dev/null +++ b/src/root.zig @@ -0,0 +1,227 @@ +const std = @import("std"); +const testing = std.testing; +const GenericReader = std.io.GenericReader; +const GenericWriter = std.io.GenericWriter; + +const b64Error = error{ + InvalidChar, + MissingPad, +}; + +pub const b64Alphabet = struct { + chars: [64]u8, + pad: u8, + + const Self = @This(); + + fn getIdx(self: Self, c: u8) b64Error!u6 { + for (self.chars, 0..64) |b, i| { + if (c == b) { + return @intCast(i); + } + } + return b64Error.InvalidChar; + } + + fn getchar(self: Self, idx: u6) u8 { + return self.chars[idx]; + } + + fn encodeChunk(self: Self, chunk: [3]u8, len: usize, out: *[4]u8) [4]u8 { + var num: u24 = 0; + var outbuf = out.*; + inline for (0..3) |i| { + outbuf[i] = self.pad; + } + for (0..3) |i| { + num <<= 8; + if (len > i) { + num |= chunk[i]; + } + } + var n: u6 = 0; + var idx: usize = 4; + var outlen = len * 8 / 6; + if (len * 8 % 6 > 0) outlen += 1; + while (idx > 0) : ({ + idx -= 1; + num >>= 6; + }) { + if (idx <= outlen) { + n = @truncate(num & 0x3f); + outbuf[idx - 1] = self.chars[n]; + } else { + outbuf[idx - 1] = self.pad; + } + } + return outbuf; + } + + /// Encodes a twelve element array of bytes and returns a sixteen element + /// array encoded in base64. This function uses simd instructions to run + /// the bitwise operations required for encoding using four parallel vectors + /// of bytes. + fn encodeChunksVectored(self: Self, in: [12]u8, out: *[16]u8) [16]u8 { + var outbuf = out.*; + var numvec: @Vector(4, u24) = @splat(0); + const shlvec: @Vector(4, usize) = @splat(8); + for (0..3) |i| { + numvec <<= shlvec; + const invec = @Vector(4, u8){ in[i], in[i + 3], in[i + 6], in[i + 9] }; + numvec |= invec; + } + const maskvec: @Vector(4, u6) = @splat(0x3f); + var idx: usize = 4; + const shrvec: @Vector(4, u8) = @splat(6); + while (idx > 0) : ({ + idx -= 1; + numvec >>= shrvec; + }) { + const idxvec = numvec & maskvec; + outbuf[idx - 1] = self.chars[idxvec[0]]; + outbuf[idx + 3] = self.chars[idxvec[1]]; + outbuf[idx + 7] = self.chars[idxvec[2]]; + outbuf[idx + 11] = self.chars[idxvec[3]]; + } + return outbuf; + } + + fn decodeChunk(self: Self, chunk: [4]u8, out: *[3]u8) b64Error![3]u8 { + var num: u24 = 0; + var outbuf = out.*; + var len: usize = 0; + for (chunk) |c| { + num <<= 6; + if (self.getIdx(c)) |idx| { + num |= idx; + } else |err| { + if (c != self.pad) return err; + } + len += 1; + } + var i: usize = 3; + var outlen = len * 6 / 8; + if (len * 6 % 8 != 0) outlen += 1; + while (i > 0) : ({ + i -= 1; + num >>= 8; + }) { + outbuf[i - 1] = if (i <= outlen) @intCast(num & 0xff) else 0; + } + return outbuf; + } + + fn decodeChunksVectored(self: Self, in: [16]u8, out: *[12]u8) ![12]u8 { + var outbuf = out.*; + var numvec: @Vector(4, u24) = @splat(0); + const shlvec: @Vector(4, usize) = @splat(6); + for (0..4) |i| { + numvec <<= shlvec; + var inarr: [4]u6 = undefined; + inarr[0] = try self.getIdx(i); + inarr[1] = try self.getIdx(in[i + 4]); + inarr[2] = try self.getIdx(in[i + 8]); + inarr[3] = try self.getIdx(in[i + 12]); + const invec: @Vector(4, u6) = inarr; + numvec |= invec; + } + const maskvec: @Vector(4, u8) = @splat(0xff); + const shrvec: @Vector(4, u8) = @splat(8); + var idx: usize = 3; + while (idx > 0) : ({ + idx -= 1; + numvec >>= shrvec; + }) { + const cvec = numvec & maskvec; + outbuf[idx - 1] = cvec[0]; + outbuf[idx + 2] = cvec[1]; + outbuf[idx + 5] = cvec[2]; + outbuf[idx + 8] = cvec[3]; + } + return outbuf; + } +}; + +pub const rfc4648B64Alphabet = b64Alphabet{ + .chars = [64]u8{ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }, + .pad = '=', +}; + +test "alphabet getIdx" { + try testing.expect(try rfc4648B64Alphabet.getIdx('a') == 26); + try testing.expect(try rfc4648B64Alphabet.getIdx('+') == 62); +} + +test "alphabet getChar" { + try testing.expect(rfc4648B64Alphabet.getchar(4) == 'E'); +} + +test "encode chunk" { + const chunk = [3]u8{ 'H', 'e', 'l' }; + var out: [4]u8 = undefined; + const obuf = rfc4648B64Alphabet.encodeChunk(chunk, 3, &out); + try testing.expect(std.mem.eql(u8, obuf[0..], "SGVs")); +} + +test "encode chunk 2long" { + const chunk = [3]u8{ 'H', 'e', 0 }; + var out: [4]u8 = undefined; + const obuf = rfc4648B64Alphabet.encodeChunk(chunk, 2, &out); + try testing.expect(std.mem.eql(u8, obuf[0..], "SGU=")); +} + +test "encode chunks vectored" { + const chunk = [12]u8{ 'H', 'e', 'l', 'l', 'o', ',', ' ', 'W', 'o', 'r', 'l', 'd' }; + var out: [16]u8 = undefined; + const obuf = rfc4648B64Alphabet.encodeChunksVectored(chunk, &out); + try testing.expect(std.mem.eql(u8, obuf[0..], "SGVsbG8sIFdvcmxk")); +} + +test "decode chunk" { + const chunk = [4]u8{ 'S', 'G', 'V', 's' }; + var out: [3]u8 = undefined; + const obuf = try rfc4648B64Alphabet.decodeChunk(chunk, &out); + try testing.expect(std.mem.eql(u8, obuf[0..], "Hel")); +} + +pub const b64Encoder = struct { + reader: GenericReader, + writer: GenericWriter, + alphabet: b64Alphabet, + + const Self = @This(); + + pub fn init(reader: GenericReader, writer: GenericWriter, alphabet: ?b64Alphabet) Self { + return Self{ + .reader = reader, + .writer = writer, + .alphabet = if (alphabet) |a| a else rfc4648B64Alphabet, + }; + } + + pub fn encode(self: Self) !void { + _ = self; + // todo + } +}; + +pub const b64Decoder = struct { + reader: std.io.GenericReader, + writer: std.io.GenericWriter, + alphabet: b64Alphabet, + + const Self = @This(); + + pub fn init(reader: GenericReader, writer: GenericWriter, alphabet: b64Alphabet) Self { + return Self{ + .reader = reader, + .writer = writer, + .alphabet = if (alphabet) |a| a else b64Alphabet, + }; + } + + pub fn decode(self: Self) !void { + _ = self; + // todo + } +};