Modify the file format further to make all block header information only encode the number of bytes determined in chunk_bytes instead of 8 full bytes all the time.

This commit is contained in:
Con Kolivas 2011-03-18 23:18:36 +11:00
parent 5005c2dff5
commit 63fb1bafea
5 changed files with 104 additions and 92 deletions

View file

@ -5,8 +5,8 @@ Con Kolivas
Byte Content
0-23 Magic
---
24->83 Rzip chunk data
84+ Data blocks
24+ Rzip Chunk Data (RCD)
RCD+ Data blocks
--- repeat
(end-MD5_DIGEST_SIZE)->(end) md5 hash
@ -24,19 +24,19 @@ Encrypted salt (bytes 6->14 in magic if encrypted):
0->1 Encoded number of loops to hash password
2->7 Random data
Rzip chunk data:
0 Data offsets byte width
1 Flag that there is another chunk beyond this
2->9 Chunk decompressed size
10->34 Stream 0 header data
35->59 Stream 1 header data
Rzip Chunk Ddata:
0 Data offsets byte width (meaning length is < (2 * 8)^RCD0)
1 Flag that there is no chunk beyond this
(RCD0 bytes) Chunk decompressed size
XX Stream 0 header data
XX Stream 1 header data
Stream Header Data:
Byte:
0 Compressed data type
1->8 Compressed data length
9->16 Uncompressed data length
17->24 Next block head
(RCD0 bytes) Compressed data length
(RCD0 bytes) Uncompressed data length
(RCD0 bytes) Next block head
Data blocks:
0->(end-2) data
@ -48,9 +48,9 @@ March 2011
Con Kolivas
Byte Content
0->22 Magic
0->23 Magic
--
23->74 Rzip chunk data
24->74 Rzip chunk data
75+ Data blocks
-- repeat
(end-MD5_DIGEST_SIZE)->(end) md5 hash
@ -62,7 +62,7 @@ Magic data:
6->14 Source File Size
16->20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size)
21 Flag that md5sum hash is stored at the end of the archive
22 not used
22-23 not used
Rzip chunk data:
0 Data offsets byte width
@ -91,8 +91,8 @@ Byte Content
5 LRZIP Minor Version Number
6-14 Source File Size
16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size)
21-22 not used
23-48 Stream 1 header data
21-24 not used
24-48 Stream 1 header data
49-74 Stream 2 header data
Block Data:
@ -118,7 +118,7 @@ Byte Content
6-9 Source File Size (no HAVE_LARGE_FILES)
6-14 Source File Size
16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size)
21-22 not used
23-36 Stream 1 header data
21-23 not used
24-36 Stream 1 header data
37-50 Stream 2 header data
51 Compressed data type

66
lrzip.c
View file

@ -668,11 +668,13 @@ void decompress_file(rzip_control *control)
free(infilecopy);
}
void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, i64 *u_len, i64 *last_head)
void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len,
i64 *u_len, i64 *last_head, int chunk_bytes)
{
if (unlikely(read(fd_in, ctype, 1) != 1))
fatal("Failed to read in get_header_info\n");
*c_len = *u_len = *last_head = 0;
if (control->major_version == 0 && control->minor_version < 4) {
u32 c_len32, u_len32, last_head32;
@ -686,20 +688,26 @@ void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len,
*u_len = u_len32;
*last_head = last_head32;
} else {
if (unlikely(read(fd_in, c_len, 8) != 8))
int read_len;
if (control->major_version == 0 && control->minor_version == 5)
read_len = 8;
else
read_len = chunk_bytes;
if (unlikely(read(fd_in, c_len, read_len) != read_len))
fatal("Failed to read in get_header_info");
if (unlikely(read(fd_in, u_len, 8) != 8))
if (unlikely(read(fd_in, u_len, read_len) != read_len))
fatal("Failed to read in get_header_info");
if (unlikely(read(fd_in, last_head, 8) != 8))
if (unlikely(read(fd_in, last_head, read_len) != read_len))
fatal("Failed to read_i64 in get_header_info");
}
}
void get_fileinfo(rzip_control *control)
{
i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 34, stream_head[2];
i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 25, stream_head[2];
i64 expected_size, infile_size, chunk_size = 0, chunk_total = 0;
int header_length = 25, stream = 0, chunk = 0;
int header_length, stream = 0, chunk = 0;
char *tmp, *infilecopy = NULL;
int seekspot, fd_in;
char chunk_byte = 0;
@ -742,36 +750,24 @@ void get_fileinfo(rzip_control *control)
if (control->major_version == 0 && control->minor_version > 5) {
if (unlikely(read(fd_in, &control->eof, 1) != 1))
fatal("Failed to read eof in get_fileinfo\n");
if (unlikely(read(fd_in, &chunk_size, 8) != 8))
if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte))
fatal("Failed to read chunk_size in get_fileinfo\n");
}
}
/* Versions 0.3-0.6 had different file formats */
if (control->major_version == 0 && control->minor_version < 4)
seekspot = 50;
else if (control->major_version == 0 && control->minor_version == 4)
seekspot = 74;
else if (control->major_version == 0 && control->minor_version == 5)
seekspot = 75;
else
seekspot = 84;
if (unlikely(lseek(fd_in, seekspot, SEEK_SET) == -1))
fatal("Failed to lseek in get_fileinfo\n");
/* Read the compression type of the first block. It's possible that
not all blocks are compressed so this may not be accurate. */
if (unlikely(read(fd_in, &ctype, 1) != 1))
fatal("Failed to read in get_fileinfo\n");
if (control->major_version == 0 && control->minor_version < 4) {
ofs = 24;
header_length = 13;
}
if (control->major_version == 0 && control->minor_version == 4)
} else if (control->major_version == 0 && control->minor_version == 4) {
ofs = 24;
if (control->major_version == 0 && control->minor_version == 5)
header_length = 25;
} else if (control->major_version == 0 && control->minor_version == 5) {
ofs = 25;
header_length = 25;
} else {
ofs = 26 + chunk_byte;
header_length = 1 + (chunk_byte * 3);
}
next_chunk:
stream = 0;
stream_head[0] = 0;
@ -789,7 +785,7 @@ next_chunk:
if (unlikely(lseek(fd_in, stream_head[stream] + ofs, SEEK_SET)) == -1)
fatal("Failed to seek to header data in get_fileinfo\n");
get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head);
get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head, chunk_byte);
print_verbose("Stream: %d\n", stream);
print_maxverbose("Offset: %lld\n", ofs);
@ -801,7 +797,8 @@ next_chunk:
failure("Offset greater than archive size, likely corrupted/truncated archive.\n");
if (unlikely(head_off = lseek(fd_in, last_head + ofs, SEEK_SET)) == -1)
fatal("Failed to seek to header data in get_fileinfo\n");
get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head);
get_header_info(control, fd_in, &ctype, &c_len, &u_len,
&last_head, chunk_byte);
if (unlikely(last_head < 0 || c_len < 0 || u_len < 0))
failure("Entry negative, likely corrupted archive.\n");
print_verbose("%d\t", block);
@ -837,6 +834,8 @@ next_chunk:
fatal("Failed to lseek c_len in get_fileinfo\n");
}
if (ofs >= infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0))
goto done;
/* Chunk byte entry */
if (control->major_version == 0 && control->minor_version > 4) {
if (unlikely(read(fd_in, &chunk_byte, 1) != 1))
@ -845,13 +844,14 @@ next_chunk:
if (control->major_version == 0 && control->minor_version > 5) {
if (unlikely(read(fd_in, &control->eof, 1) != 1))
fatal("Failed to read eof in get_fileinfo\n");
if (unlikely(read(fd_in, &chunk_size, 8) != 8))
if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte))
fatal("Failed to read chunk_size in get_fileinfo\n");
ofs += 9;
ofs += 1 + chunk_byte;
header_length = 1 + (chunk_byte * 3);
}
}
if (ofs < infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0))
goto next_chunk;
goto next_chunk;
done:
if (unlikely(ofs > infile_size))
failure("Offset greater than archive size, likely corrupted/truncated archive.\n");
if (chunk_total > expected_size)

View file

@ -294,7 +294,7 @@ static i64 runzip_chunk(rzip_control *control, int fd_in, int fd_out, int fd_his
if (fstat(fd_in, &st) || st.st_size - ofs == 0)
return 0;
ss = open_stream_in(control, fd_in, NUM_STREAMS);
ss = open_stream_in(control, fd_in, NUM_STREAMS, chunk_bytes);
if (unlikely(!ss))
fatal("Failed to open_stream_in in runzip_chunk\n");

View file

@ -768,18 +768,14 @@ static int write_buf(rzip_control *control, int f, uchar *p, i64 len)
}
/* write a byte */
static int write_u8(rzip_control *control, int f, uchar v)
static inline int write_u8(rzip_control *control, int f, uchar v)
{
return write_buf(control, f, &v, 1);
}
/* write a i64 */
static int write_i64(rzip_control *control, int f, i64 v)
static inline int write_val(rzip_control *control, int f, i64 v, int len)
{
if (unlikely(write_buf(control, f, (uchar *)&v, 8)))
return -1;
return 0;
return write_buf(control, f, (uchar *)&v, len);
}
static int read_buf(rzip_control *control, int f, uchar *p, i64 len)
@ -798,23 +794,26 @@ static int read_buf(rzip_control *control, int f, uchar *p, i64 len)
return 0;
}
static int read_u8(rzip_control *control, int f, uchar *v)
static inline int read_u8(rzip_control *control, int f, uchar *v)
{
return read_buf(control, f, v, 1);
}
static int read_u32(rzip_control *control, int f, u32 *v)
static inline int read_u32(rzip_control *control, int f, u32 *v)
{
if (unlikely(read_buf(control, f, (uchar *)v, 4)))
return -1;
return 0;
return read_buf(control, f, (uchar *)v, 4);
}
static int read_i64(rzip_control *control, int f, i64 *v)
static inline int read_i64(rzip_control *control, int f, i64 *v)
{
if (unlikely(read_buf(control, f, (uchar *)v, 8)))
return -1;
return 0;
return read_buf(control, f, (uchar *)v, 8);
}
static inline int read_val(rzip_control *control, int f, i64 *v, int len)
{
/* We only partially read all 8 bytes so have to zero v here */
*v = 0;
return read_buf(control, f, (uchar *)v, len);
}
static int fd_seekto(rzip_control *control, struct stream_info *sinfo, i64 spos, i64 pos)
@ -1012,7 +1011,7 @@ retest_malloc:
}
/* prepare a set of n streams for reading on file descriptor f */
void *open_stream_in(rzip_control *control, int f, int n)
void *open_stream_in(rzip_control *control, int f, int n, int chunk_bytes)
{
struct stream_info *sinfo;
int total_threads, i;
@ -1038,6 +1037,7 @@ void *open_stream_in(rzip_control *control, int f, int n)
sinfo->num_streams = n;
sinfo->fd = f;
sinfo->chunk_bytes = chunk_bytes;
sinfo->s = calloc(sizeof(struct stream), n);
if (unlikely(!sinfo->s)) {
@ -1056,7 +1056,7 @@ void *open_stream_in(rzip_control *control, int f, int n)
goto failed;
}
/* Read in the expected chunk size */
if (unlikely(read_i64(control, f, &sinfo->size))) {
if (unlikely(read_val(control, f, &sinfo->size, sinfo->chunk_bytes))) {
print_err("Failed to read in chunk size in open_stream_in\n");
goto failed;
}
@ -1093,13 +1093,19 @@ again:
sinfo->s[i].last_head = last_head32;
header_length = 13;
} else {
if (unlikely(read_i64(control, f, &v1)))
int read_len;
if (control->major_version == 0 && control->minor_version < 6)
read_len = 8;
else
read_len = sinfo->chunk_bytes;
if (unlikely(read_val(control, f, &v1, read_len)))
goto failed;
if (unlikely(read_i64(control, f, &v2)))
if (unlikely(read_val(control, f, &v2, read_len)))
goto failed;
if (unlikely(read_i64(control, f, &sinfo->s[i].last_head)))
if (unlikely(read_val(control, f, &sinfo->s[i].last_head, read_len)))
goto failed;
header_length = 25;
header_length = 1 + (read_len * 3);
}
if (unlikely(c == CTYPE_NONE && v1 == 0 && v2 == 0 && sinfo->s[i].last_head == 0 && i == 0)) {
print_err("Enabling stream close workaround\n");
@ -1225,40 +1231,40 @@ retry:
/* Write whether this is the last chunk, followed by the size
* of this chunk */
write_u8(control, ctis->fd, control->eof);
write_i64(control, ctis->fd, ctis->size);
write_val(control, ctis->fd, ctis->size, ctis->chunk_bytes);
/* First chunk of this stream, write headers */
ctis->initial_pos = get_seek(control, ctis->fd);
for (j = 0; j < ctis->num_streams; j++) {
ctis->s[j].last_head = ctis->cur_pos + 17;
ctis->s[j].last_head = ctis->cur_pos + 1 + (ctis->chunk_bytes * 2);
write_u8(control, ctis->fd, CTYPE_NONE);
write_i64(control, ctis->fd, 0);
write_i64(control, ctis->fd, 0);
write_i64(control, ctis->fd, 0);
ctis->cur_pos += 25;
write_val(control, ctis->fd, 0, ctis->chunk_bytes);
write_val(control, ctis->fd, 0, ctis->chunk_bytes);
write_val(control, ctis->fd, 0, ctis->chunk_bytes);
ctis->cur_pos += 1 + (ctis->chunk_bytes * 3);
}
}
if (unlikely(seekto(control, ctis, ctis->s[cti->streamno].last_head)))
fatal("Failed to seekto in compthread %d\n", i);
if (unlikely(write_i64(control, ctis->fd, ctis->cur_pos)))
fatal("Failed to write_i64 in compthread %d\n", i);
if (unlikely(write_val(control, ctis->fd, ctis->cur_pos, ctis->chunk_bytes)))
fatal("Failed to write_val cur_pos in compthread %d\n", i);
ctis->s[cti->streamno].last_head = ctis->cur_pos + 17;
ctis->s[cti->streamno].last_head = ctis->cur_pos + 1 + (ctis->chunk_bytes * 2);
if (unlikely(seekto(control, ctis, ctis->cur_pos)))
fatal("Failed to seekto cur_pos in compthread %d\n", i);
print_maxverbose("Thread %ld writing %lld compressed bytes from stream %d\n", i, padded_len, cti->streamno);
/* We store the actual c_len even though we might pad it out */
if (unlikely(write_u8(control, ctis->fd, cti->c_type) ||
write_i64(control, ctis->fd, cti->c_len) ||
write_i64(control, ctis->fd, cti->s_len) ||
write_i64(control, ctis->fd, 0))) {
write_val(control, ctis->fd, cti->c_len, ctis->chunk_bytes) ||
write_val(control, ctis->fd, cti->s_len, ctis->chunk_bytes) ||
write_val(control, ctis->fd, 0, ctis->chunk_bytes))) {
fatal("Failed write in compthread %d\n", i);
}
ctis->cur_pos += 25;
ctis->cur_pos += 1 + (ctis->chunk_bytes * 3);
if (ENCRYPT) {
ctis->cur_pos += 8;
@ -1423,13 +1429,19 @@ fill_another:
last_head = last_head32;
header_length = 13;
} else {
if (unlikely(read_i64(control, sinfo->fd, &c_len)))
int read_len;
if (control->major_version == 0 && control->minor_version < 6)
read_len = 8;
else
read_len = sinfo->chunk_bytes;
if (unlikely(read_val(control, sinfo->fd, &c_len, read_len)))
return -1;
if (unlikely(read_i64(control, sinfo->fd, &u_len)))
if (unlikely(read_val(control, sinfo->fd, &u_len, read_len)))
return -1;
if (unlikely(read_i64(control, sinfo->fd, &last_head)))
if (unlikely(read_val(control, sinfo->fd, &last_head, read_len)))
return -1;
header_length = 25;
header_length = 1 + (read_len * 3);
}
if (ENCRYPT) {

View file

@ -31,7 +31,7 @@ ssize_t read_1g(rzip_control *control, int fd, void *buf, i64 len);
void prepare_streamout_threads(rzip_control *control);
void close_streamout_threads(rzip_control *control);
void *open_stream_out(rzip_control *control, int f, unsigned int n, i64 chunk_limit, char cbytes);
void *open_stream_in(rzip_control *control, int f, int n);
void *open_stream_in(rzip_control *control, int f, int n, char cbytes);
void flush_buffer(rzip_control *control, struct stream_info *sinfo, int stream);
int write_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len);
i64 read_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len);