From 63fb1bafea069c8d0b8eba70eeef48a9fc72422b Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 18 Mar 2011 23:18:36 +1100 Subject: [PATCH] Modify the file format further to make all block header information only encode the number of bytes determined in chunk_bytes instead of 8 full bytes all the time. --- doc/magic.header.txt | 36 +++++++++--------- lrzip.c | 66 ++++++++++++++++---------------- runzip.c | 2 +- stream.c | 90 +++++++++++++++++++++++++------------------- stream.h | 2 +- 5 files changed, 104 insertions(+), 92 deletions(-) diff --git a/doc/magic.header.txt b/doc/magic.header.txt index 89d8627..29243c0 100644 --- a/doc/magic.header.txt +++ b/doc/magic.header.txt @@ -5,8 +5,8 @@ Con Kolivas Byte Content 0-23 Magic --- -24->83 Rzip chunk data -84+ Data blocks +24+ Rzip Chunk Data (RCD) +RCD+ Data blocks --- repeat (end-MD5_DIGEST_SIZE)->(end) md5 hash @@ -24,19 +24,19 @@ Encrypted salt (bytes 6->14 in magic if encrypted): 0->1 Encoded number of loops to hash password 2->7 Random data -Rzip chunk data: -0 Data offsets byte width -1 Flag that there is another chunk beyond this -2->9 Chunk decompressed size -10->34 Stream 0 header data -35->59 Stream 1 header data +Rzip Chunk Ddata: +0 Data offsets byte width (meaning length is < (2 * 8)^RCD0) +1 Flag that there is no chunk beyond this +(RCD0 bytes) Chunk decompressed size +XX Stream 0 header data +XX Stream 1 header data Stream Header Data: Byte: 0 Compressed data type -1->8 Compressed data length -9->16 Uncompressed data length -17->24 Next block head +(RCD0 bytes) Compressed data length +(RCD0 bytes) Uncompressed data length +(RCD0 bytes) Next block head Data blocks: 0->(end-2) data @@ -48,9 +48,9 @@ March 2011 Con Kolivas Byte Content -0->22 Magic +0->23 Magic -- -23->74 Rzip chunk data +24->74 Rzip chunk data 75+ Data blocks -- repeat (end-MD5_DIGEST_SIZE)->(end) md5 hash @@ -62,7 +62,7 @@ Magic data: 6->14 Source File Size 16->20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) 21 Flag that md5sum hash is stored at the end of the archive -22 not used +22-23 not used Rzip chunk data: 0 Data offsets byte width @@ -91,8 +91,8 @@ Byte Content 5 LRZIP Minor Version Number 6-14 Source File Size 16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) -21-22 not used -23-48 Stream 1 header data +21-24 not used +24-48 Stream 1 header data 49-74 Stream 2 header data Block Data: @@ -118,7 +118,7 @@ Byte Content 6-9 Source File Size (no HAVE_LARGE_FILES) 6-14 Source File Size 16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) -21-22 not used -23-36 Stream 1 header data +21-23 not used +24-36 Stream 1 header data 37-50 Stream 2 header data 51 Compressed data type diff --git a/lrzip.c b/lrzip.c index 931bbf7..75ac8b8 100644 --- a/lrzip.c +++ b/lrzip.c @@ -668,11 +668,13 @@ void decompress_file(rzip_control *control) free(infilecopy); } -void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, i64 *u_len, i64 *last_head) +void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, + i64 *u_len, i64 *last_head, int chunk_bytes) { if (unlikely(read(fd_in, ctype, 1) != 1)) fatal("Failed to read in get_header_info\n"); + *c_len = *u_len = *last_head = 0; if (control->major_version == 0 && control->minor_version < 4) { u32 c_len32, u_len32, last_head32; @@ -686,20 +688,26 @@ void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, *u_len = u_len32; *last_head = last_head32; } else { - if (unlikely(read(fd_in, c_len, 8) != 8)) + int read_len; + + if (control->major_version == 0 && control->minor_version == 5) + read_len = 8; + else + read_len = chunk_bytes; + if (unlikely(read(fd_in, c_len, read_len) != read_len)) fatal("Failed to read in get_header_info"); - if (unlikely(read(fd_in, u_len, 8) != 8)) + if (unlikely(read(fd_in, u_len, read_len) != read_len)) fatal("Failed to read in get_header_info"); - if (unlikely(read(fd_in, last_head, 8) != 8)) + if (unlikely(read(fd_in, last_head, read_len) != read_len)) fatal("Failed to read_i64 in get_header_info"); } } void get_fileinfo(rzip_control *control) { - i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 34, stream_head[2]; + i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 25, stream_head[2]; i64 expected_size, infile_size, chunk_size = 0, chunk_total = 0; - int header_length = 25, stream = 0, chunk = 0; + int header_length, stream = 0, chunk = 0; char *tmp, *infilecopy = NULL; int seekspot, fd_in; char chunk_byte = 0; @@ -742,36 +750,24 @@ void get_fileinfo(rzip_control *control) if (control->major_version == 0 && control->minor_version > 5) { if (unlikely(read(fd_in, &control->eof, 1) != 1)) fatal("Failed to read eof in get_fileinfo\n"); - if (unlikely(read(fd_in, &chunk_size, 8) != 8)) + if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte)) fatal("Failed to read chunk_size in get_fileinfo\n"); } } - /* Versions 0.3-0.6 had different file formats */ - if (control->major_version == 0 && control->minor_version < 4) - seekspot = 50; - else if (control->major_version == 0 && control->minor_version == 4) - seekspot = 74; - else if (control->major_version == 0 && control->minor_version == 5) - seekspot = 75; - else - seekspot = 84; - if (unlikely(lseek(fd_in, seekspot, SEEK_SET) == -1)) - fatal("Failed to lseek in get_fileinfo\n"); - - /* Read the compression type of the first block. It's possible that - not all blocks are compressed so this may not be accurate. */ - if (unlikely(read(fd_in, &ctype, 1) != 1)) - fatal("Failed to read in get_fileinfo\n"); - if (control->major_version == 0 && control->minor_version < 4) { ofs = 24; header_length = 13; - } - if (control->major_version == 0 && control->minor_version == 4) + } else if (control->major_version == 0 && control->minor_version == 4) { ofs = 24; - if (control->major_version == 0 && control->minor_version == 5) + header_length = 25; + } else if (control->major_version == 0 && control->minor_version == 5) { ofs = 25; + header_length = 25; + } else { + ofs = 26 + chunk_byte; + header_length = 1 + (chunk_byte * 3); + } next_chunk: stream = 0; stream_head[0] = 0; @@ -789,7 +785,7 @@ next_chunk: if (unlikely(lseek(fd_in, stream_head[stream] + ofs, SEEK_SET)) == -1) fatal("Failed to seek to header data in get_fileinfo\n"); - get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head); + get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head, chunk_byte); print_verbose("Stream: %d\n", stream); print_maxverbose("Offset: %lld\n", ofs); @@ -801,7 +797,8 @@ next_chunk: failure("Offset greater than archive size, likely corrupted/truncated archive.\n"); if (unlikely(head_off = lseek(fd_in, last_head + ofs, SEEK_SET)) == -1) fatal("Failed to seek to header data in get_fileinfo\n"); - get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head); + get_header_info(control, fd_in, &ctype, &c_len, &u_len, + &last_head, chunk_byte); if (unlikely(last_head < 0 || c_len < 0 || u_len < 0)) failure("Entry negative, likely corrupted archive.\n"); print_verbose("%d\t", block); @@ -837,6 +834,8 @@ next_chunk: fatal("Failed to lseek c_len in get_fileinfo\n"); } + if (ofs >= infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0)) + goto done; /* Chunk byte entry */ if (control->major_version == 0 && control->minor_version > 4) { if (unlikely(read(fd_in, &chunk_byte, 1) != 1)) @@ -845,13 +844,14 @@ next_chunk: if (control->major_version == 0 && control->minor_version > 5) { if (unlikely(read(fd_in, &control->eof, 1) != 1)) fatal("Failed to read eof in get_fileinfo\n"); - if (unlikely(read(fd_in, &chunk_size, 8) != 8)) + if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte)) fatal("Failed to read chunk_size in get_fileinfo\n"); - ofs += 9; + ofs += 1 + chunk_byte; + header_length = 1 + (chunk_byte * 3); } } - if (ofs < infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0)) - goto next_chunk; + goto next_chunk; +done: if (unlikely(ofs > infile_size)) failure("Offset greater than archive size, likely corrupted/truncated archive.\n"); if (chunk_total > expected_size) diff --git a/runzip.c b/runzip.c index 38859d6..3f848c9 100644 --- a/runzip.c +++ b/runzip.c @@ -294,7 +294,7 @@ static i64 runzip_chunk(rzip_control *control, int fd_in, int fd_out, int fd_his if (fstat(fd_in, &st) || st.st_size - ofs == 0) return 0; - ss = open_stream_in(control, fd_in, NUM_STREAMS); + ss = open_stream_in(control, fd_in, NUM_STREAMS, chunk_bytes); if (unlikely(!ss)) fatal("Failed to open_stream_in in runzip_chunk\n"); diff --git a/stream.c b/stream.c index cf4bbd7..8bf50c3 100644 --- a/stream.c +++ b/stream.c @@ -768,18 +768,14 @@ static int write_buf(rzip_control *control, int f, uchar *p, i64 len) } /* write a byte */ -static int write_u8(rzip_control *control, int f, uchar v) +static inline int write_u8(rzip_control *control, int f, uchar v) { return write_buf(control, f, &v, 1); } -/* write a i64 */ -static int write_i64(rzip_control *control, int f, i64 v) +static inline int write_val(rzip_control *control, int f, i64 v, int len) { - if (unlikely(write_buf(control, f, (uchar *)&v, 8))) - return -1; - - return 0; + return write_buf(control, f, (uchar *)&v, len); } static int read_buf(rzip_control *control, int f, uchar *p, i64 len) @@ -798,23 +794,26 @@ static int read_buf(rzip_control *control, int f, uchar *p, i64 len) return 0; } -static int read_u8(rzip_control *control, int f, uchar *v) +static inline int read_u8(rzip_control *control, int f, uchar *v) { return read_buf(control, f, v, 1); } -static int read_u32(rzip_control *control, int f, u32 *v) +static inline int read_u32(rzip_control *control, int f, u32 *v) { - if (unlikely(read_buf(control, f, (uchar *)v, 4))) - return -1; - return 0; + return read_buf(control, f, (uchar *)v, 4); } -static int read_i64(rzip_control *control, int f, i64 *v) +static inline int read_i64(rzip_control *control, int f, i64 *v) { - if (unlikely(read_buf(control, f, (uchar *)v, 8))) - return -1; - return 0; + return read_buf(control, f, (uchar *)v, 8); +} + +static inline int read_val(rzip_control *control, int f, i64 *v, int len) +{ + /* We only partially read all 8 bytes so have to zero v here */ + *v = 0; + return read_buf(control, f, (uchar *)v, len); } static int fd_seekto(rzip_control *control, struct stream_info *sinfo, i64 spos, i64 pos) @@ -1012,7 +1011,7 @@ retest_malloc: } /* prepare a set of n streams for reading on file descriptor f */ -void *open_stream_in(rzip_control *control, int f, int n) +void *open_stream_in(rzip_control *control, int f, int n, int chunk_bytes) { struct stream_info *sinfo; int total_threads, i; @@ -1038,6 +1037,7 @@ void *open_stream_in(rzip_control *control, int f, int n) sinfo->num_streams = n; sinfo->fd = f; + sinfo->chunk_bytes = chunk_bytes; sinfo->s = calloc(sizeof(struct stream), n); if (unlikely(!sinfo->s)) { @@ -1056,7 +1056,7 @@ void *open_stream_in(rzip_control *control, int f, int n) goto failed; } /* Read in the expected chunk size */ - if (unlikely(read_i64(control, f, &sinfo->size))) { + if (unlikely(read_val(control, f, &sinfo->size, sinfo->chunk_bytes))) { print_err("Failed to read in chunk size in open_stream_in\n"); goto failed; } @@ -1093,13 +1093,19 @@ again: sinfo->s[i].last_head = last_head32; header_length = 13; } else { - if (unlikely(read_i64(control, f, &v1))) + int read_len; + + if (control->major_version == 0 && control->minor_version < 6) + read_len = 8; + else + read_len = sinfo->chunk_bytes; + if (unlikely(read_val(control, f, &v1, read_len))) goto failed; - if (unlikely(read_i64(control, f, &v2))) + if (unlikely(read_val(control, f, &v2, read_len))) goto failed; - if (unlikely(read_i64(control, f, &sinfo->s[i].last_head))) + if (unlikely(read_val(control, f, &sinfo->s[i].last_head, read_len))) goto failed; - header_length = 25; + header_length = 1 + (read_len * 3); } if (unlikely(c == CTYPE_NONE && v1 == 0 && v2 == 0 && sinfo->s[i].last_head == 0 && i == 0)) { print_err("Enabling stream close workaround\n"); @@ -1225,40 +1231,40 @@ retry: /* Write whether this is the last chunk, followed by the size * of this chunk */ write_u8(control, ctis->fd, control->eof); - write_i64(control, ctis->fd, ctis->size); + write_val(control, ctis->fd, ctis->size, ctis->chunk_bytes); /* First chunk of this stream, write headers */ ctis->initial_pos = get_seek(control, ctis->fd); for (j = 0; j < ctis->num_streams; j++) { - ctis->s[j].last_head = ctis->cur_pos + 17; + ctis->s[j].last_head = ctis->cur_pos + 1 + (ctis->chunk_bytes * 2); write_u8(control, ctis->fd, CTYPE_NONE); - write_i64(control, ctis->fd, 0); - write_i64(control, ctis->fd, 0); - write_i64(control, ctis->fd, 0); - ctis->cur_pos += 25; + write_val(control, ctis->fd, 0, ctis->chunk_bytes); + write_val(control, ctis->fd, 0, ctis->chunk_bytes); + write_val(control, ctis->fd, 0, ctis->chunk_bytes); + ctis->cur_pos += 1 + (ctis->chunk_bytes * 3); } } if (unlikely(seekto(control, ctis, ctis->s[cti->streamno].last_head))) fatal("Failed to seekto in compthread %d\n", i); - if (unlikely(write_i64(control, ctis->fd, ctis->cur_pos))) - fatal("Failed to write_i64 in compthread %d\n", i); + if (unlikely(write_val(control, ctis->fd, ctis->cur_pos, ctis->chunk_bytes))) + fatal("Failed to write_val cur_pos in compthread %d\n", i); - ctis->s[cti->streamno].last_head = ctis->cur_pos + 17; + ctis->s[cti->streamno].last_head = ctis->cur_pos + 1 + (ctis->chunk_bytes * 2); if (unlikely(seekto(control, ctis, ctis->cur_pos))) fatal("Failed to seekto cur_pos in compthread %d\n", i); print_maxverbose("Thread %ld writing %lld compressed bytes from stream %d\n", i, padded_len, cti->streamno); /* We store the actual c_len even though we might pad it out */ if (unlikely(write_u8(control, ctis->fd, cti->c_type) || - write_i64(control, ctis->fd, cti->c_len) || - write_i64(control, ctis->fd, cti->s_len) || - write_i64(control, ctis->fd, 0))) { + write_val(control, ctis->fd, cti->c_len, ctis->chunk_bytes) || + write_val(control, ctis->fd, cti->s_len, ctis->chunk_bytes) || + write_val(control, ctis->fd, 0, ctis->chunk_bytes))) { fatal("Failed write in compthread %d\n", i); } - ctis->cur_pos += 25; + ctis->cur_pos += 1 + (ctis->chunk_bytes * 3); if (ENCRYPT) { ctis->cur_pos += 8; @@ -1423,13 +1429,19 @@ fill_another: last_head = last_head32; header_length = 13; } else { - if (unlikely(read_i64(control, sinfo->fd, &c_len))) + int read_len; + + if (control->major_version == 0 && control->minor_version < 6) + read_len = 8; + else + read_len = sinfo->chunk_bytes; + if (unlikely(read_val(control, sinfo->fd, &c_len, read_len))) return -1; - if (unlikely(read_i64(control, sinfo->fd, &u_len))) + if (unlikely(read_val(control, sinfo->fd, &u_len, read_len))) return -1; - if (unlikely(read_i64(control, sinfo->fd, &last_head))) + if (unlikely(read_val(control, sinfo->fd, &last_head, read_len))) return -1; - header_length = 25; + header_length = 1 + (read_len * 3); } if (ENCRYPT) { diff --git a/stream.h b/stream.h index 09d3b26..95454b2 100644 --- a/stream.h +++ b/stream.h @@ -31,7 +31,7 @@ ssize_t read_1g(rzip_control *control, int fd, void *buf, i64 len); void prepare_streamout_threads(rzip_control *control); void close_streamout_threads(rzip_control *control); void *open_stream_out(rzip_control *control, int f, unsigned int n, i64 chunk_limit, char cbytes); -void *open_stream_in(rzip_control *control, int f, int n); +void *open_stream_in(rzip_control *control, int f, int n, char cbytes); void flush_buffer(rzip_control *control, struct stream_info *sinfo, int stream); int write_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len); i64 read_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len);