概览
Zig 将压缩 API 精简为务实内核:高质量解压器可插入新的std.Io.Reader/Writer接口,为 TAR 与 ZIP 等格式提供数据且无隐藏副作用。#重构 stdcompressflateflate.zig 将这些部件组合起来,使你能复原日志、打包资源或将注册表直接吸入内存,同时保持一致的显式资源管理纪律。
由于 Zig 将归档视为简单字节流,挑战从“魔法助手函数”转移到组合合适的迭代器、缓冲与元数据校验。掌握此处的解压构件可为你的包式流水线与部署工具做准备。tar.zigzip.zig
学习目标
- 直接驱动
std.compress.flate.Decompress、std.compress.lzma2.decompress及其同类,直接针对std.Io.Reader/Writer端点。Decompress.ziglzma2.zigWriter.zig - 选择历史缓冲区、流限制和分配器,以在调试和发布构建下都保持解压缩内存安全。10
- 动态生成小型 TAR 档案并迭代它们,而不接触磁盘状态。28
- 检查和提取 ZIP 中心目录条目,同时强制执行文件系统卫生和压缩方法约束。36
流式解压接口
Zig 的解压缩器使用相同的流式方言:您将任何读取器交给它们,可选择提供临时缓冲区,它们将有效负载发射到您已经拥有的写入器中。这种设计让您可以完全控制分配、错误传播和刷新行为。22
实践中的 Flate 容器
Deflate 样式有效负载(原始、zlib、gzip)依赖最多 32 KiB 的历史窗口。Zig 0.15.2 允许您在直接将数据管道传输到另一个写入器时跳过分配该窗口——传递 &.{},解码器将使用最小缓冲调用 streamRemaining。
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
const compressed = [_]u8{
0x78, 0x9c, 0x0b, 0x2e, 0x29, 0x4a, 0x4d, 0xcc, 0xcd, 0xcc, 0x4b, 0x57, 0x48, 0x49,
0x4d, 0xce, 0xcf, 0x2d, 0x28, 0x4a, 0x2d, 0x2e, 0xce, 0xcc, 0xcf, 0x53, 0xc8, 0x4e,
0x4d, 0x2d, 0x28, 0x56, 0x28, 0xc9, 0xcf, 0xcf, 0x29, 0x56, 0x00, 0x0a, 0xa6, 0x64,
0x26, 0x97, 0x24, 0x26, 0xe5, 0xa4, 0xea, 0x71, 0x01, 0x00, 0xdf, 0xba, 0x12, 0xa6,
};
var source: std.Io.Reader = .fixed(&compressed);
var inflater = std.compress.flate.Decompress.init(&source, .zlib, &.{});
var plain_buf: [128]u8 = undefined;
var sink = std.Io.Writer.fixed(&plain_buf);
const decoded_len = try inflater.reader.streamRemaining(&sink);
const decoded = plain_buf[0..decoded_len];
try stdout.print("decoded ({d} bytes): {s}\n", .{ decoded.len, decoded });
try stdout.flush();
}
$ zig run inflate_greeting.zigdecoded (49 bytes): Streaming decompression keeps tools predictable.std.Io.Writer.fixed 提供具有确定性容量的栈分配接收器;之后始终刷新手动 stdout 缓冲区,以避免进程退出时丢失输出。1
无外部工具的 LZMA2
一些注册表仍使用 LZMA2 帧来提供确定性的逐字节有效负载。Zig 将解码器封装在单个辅助函数中,为您扩展一个 std.Io.Writer.Allocating——非常适合短配置包或固件块。12
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
const compressed = [_]u8{
0x01, 0x00, 0x05, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x02, 0x00, 0x06, 0x57, 0x6f,
0x72, 0x6c, 0x64, 0x21, 0x0a, 0x00,
};
var stream = std.io.fixedBufferStream(&compressed);
var collector = std.Io.Writer.Allocating.init(allocator);
defer collector.deinit();
try std.compress.lzma2.decompress(allocator, stream.reader(), &collector.writer);
const decoded = collector.writer.buffer[0..collector.writer.end];
try stdout.print("lzma2 decoded ({d} bytes):\n{s}\n", .{ decoded.len, decoded });
try stdout.flush();
}
$ zig run lzma2_memory_decode.ziglzma2 decoded (13 bytes):
Hello
World!归档工作流
掌握了这些解压缩原语后,归档变成了组合练习:特定格式的迭代器为您提供元数据,而您决定是缓冲、丢弃还是流式传输到磁盘。28
在内存中完成 TAR 的往返
std.tar.Writer 发出确定性 512 字节块,因此您可以在 RAM 中组装小束,检查它们,然后才决定是否持久化它们。24
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
var archive_storage: [4096]u8 = undefined;
var archive_writer = std.Io.Writer.fixed(&archive_storage);
var tar_writer = std.tar.Writer{ .underlying_writer = &archive_writer };
try tar_writer.writeDir("reports", .{ .mode = 0o755 });
try tar_writer.writeFileBytes(
"reports/summary.txt",
"cpu=28%\nmem=512MiB\n",
.{ .mode = 0o644 },
);
const archive = archive_writer.buffer[0..archive_writer.end];
try stdout.print("tar archive is {d} bytes and holds:\n", .{archive.len});
var source: std.Io.Reader = .fixed(archive);
var name_buf: [std.fs.max_path_bytes]u8 = undefined;
var link_buf: [std.fs.max_path_bytes]u8 = undefined;
var iter = std.tar.Iterator.init(&source, .{
.file_name_buffer = &name_buf,
.link_name_buffer = &link_buf,
});
while (try iter.next()) |entry| {
try stdout.print("- {s} ({s}, {d} bytes)\n", .{ entry.name, @tagName(entry.kind), entry.size });
if (entry.kind == .file) {
var file_buf: [128]u8 = undefined;
var file_writer = std.Io.Writer.fixed(&file_buf);
try iter.streamRemaining(entry, &file_writer);
const written = file_writer.end;
const payload = file_buf[0..written];
try stdout.print(" contents: {s}\n", .{payload});
}
}
try stdout.flush();
}
$ zig run tar_roundtrip.zigtar archive is 1536 bytes and holds:
- reports (directory, 0 bytes)
- reports/summary.txt (file, 19 bytes)
contents: cpu=28%
mem=512MiB在常规文件上调用 Iterator.next 后,您必须用 streamRemaining 清空有效负载;否则,下一个标头将对齐错误,迭代器会抛出 error.UnexpectedEndOfStream。
安全窥视 ZIP 中央目录
ZIP 支持通过 std.zip.Iterator 公开中央目录,将提取策略留给您决定。通过 std.testing.tmpDir 路由条目可保持工件隔离,同时您可以验证压缩方法并检查内容。testing.zig
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
const archive_bytes = @embedFile("demo.zip");
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
var zip_file = try tmp.dir.createFile("demo.zip", .{ .read = true, .truncate = true });
defer {
zip_file.close();
tmp.dir.deleteFile("demo.zip") catch {};
}
try zip_file.writeAll(archive_bytes);
try zip_file.seekTo(0);
var read_buffer: [4096]u8 = undefined;
var archive_reader = zip_file.reader(&read_buffer);
var iter = try std.zip.Iterator.init(&archive_reader);
var name_buf: [std.fs.max_path_bytes]u8 = undefined;
try stdout.print("zip archive contains:\n", .{});
// zip 归档包含:
while (try iter.next()) |entry| {
try entry.extract(&archive_reader, .{}, &name_buf, tmp.dir);
const name = name_buf[0..entry.filename_len];
try stdout.print(
"- {s} ({s}, {d} bytes)\n",
.{ name, @tagName(entry.compression_method), entry.uncompressed_size },
);
if (name.len != 0 and name[name.len - 1] == '/') continue;
var file = try tmp.dir.openFile(name, .{});
defer file.close();
const info = try file.stat();
const size: usize = @intCast(info.size);
const contents = try allocator.alloc(u8, size);
defer allocator.free(contents);
const read_len = try file.readAll(contents);
const slice = contents[0..read_len];
if (std.mem.endsWith(u8, name, ".txt")) {
try stdout.print(" text: {s}\n", .{slice});
} else {
try stdout.print(" bytes:", .{});
for (slice, 0..) |byte, idx| {
const prefix = if (idx % 16 == 0) "\n " else " ";
try stdout.print("{s}{X:0>2}", .{ prefix, byte });
}
try stdout.print("\n", .{});
}
}
try stdout.flush();
}
$ zig run zip_iterator_preview.zigzip archive contains:
- demo/readme.txt (store, 34 bytes)
text: Decompression from Zig streaming.
- demo/raw.bin (store, 4 bytes)
bytes:
00 01 02 03std.zip.Entry.extract 仅支持 store 和 deflate;提前拒绝其他方法,或在互操作性需要时调用第三方库。
混合来源的模式目录
混合这些技术可以从包注册表补充清单,在签名检查之前解压缩发布版本工件,或为 GPU 上传准备二进制 blob——所有这些都无需离开 Zig 的标准工具箱。35
注意与警示
- 向
std.compress.flate.Decompress.init传递零长度缓冲区会禁用历史记录重用,但大型归档受益于重用[flate.max_window_len]u8临时数组。 - TAR 迭代器维护未读文件字节的状态;在前进到下一个标头之前,始终流式传输或丢弃它们。
- ZIP 提取仅在
allow_backslashes = true时规范化反斜杠;强制使用正斜杠以避免 Windows 上的目录遍历错误。33