mirror of
https://github.com/ckolivas/lrzip.git
synced 2025-12-06 07:12:00 +01:00
Modify the file format further to make all block header information only encode the number of bytes determined in chunk_bytes instead of 8 full bytes all the time.
This commit is contained in:
parent
5005c2dff5
commit
63fb1bafea
|
|
@ -5,8 +5,8 @@ Con Kolivas
|
|||
Byte Content
|
||||
0-23 Magic
|
||||
---
|
||||
24->83 Rzip chunk data
|
||||
84+ Data blocks
|
||||
24+ Rzip Chunk Data (RCD)
|
||||
RCD+ Data blocks
|
||||
--- repeat
|
||||
(end-MD5_DIGEST_SIZE)->(end) md5 hash
|
||||
|
||||
|
|
@ -24,19 +24,19 @@ Encrypted salt (bytes 6->14 in magic if encrypted):
|
|||
0->1 Encoded number of loops to hash password
|
||||
2->7 Random data
|
||||
|
||||
Rzip chunk data:
|
||||
0 Data offsets byte width
|
||||
1 Flag that there is another chunk beyond this
|
||||
2->9 Chunk decompressed size
|
||||
10->34 Stream 0 header data
|
||||
35->59 Stream 1 header data
|
||||
Rzip Chunk Ddata:
|
||||
0 Data offsets byte width (meaning length is < (2 * 8)^RCD0)
|
||||
1 Flag that there is no chunk beyond this
|
||||
(RCD0 bytes) Chunk decompressed size
|
||||
XX Stream 0 header data
|
||||
XX Stream 1 header data
|
||||
|
||||
Stream Header Data:
|
||||
Byte:
|
||||
0 Compressed data type
|
||||
1->8 Compressed data length
|
||||
9->16 Uncompressed data length
|
||||
17->24 Next block head
|
||||
(RCD0 bytes) Compressed data length
|
||||
(RCD0 bytes) Uncompressed data length
|
||||
(RCD0 bytes) Next block head
|
||||
|
||||
Data blocks:
|
||||
0->(end-2) data
|
||||
|
|
@ -48,9 +48,9 @@ March 2011
|
|||
Con Kolivas
|
||||
|
||||
Byte Content
|
||||
0->22 Magic
|
||||
0->23 Magic
|
||||
--
|
||||
23->74 Rzip chunk data
|
||||
24->74 Rzip chunk data
|
||||
75+ Data blocks
|
||||
-- repeat
|
||||
(end-MD5_DIGEST_SIZE)->(end) md5 hash
|
||||
|
|
@ -62,7 +62,7 @@ Magic data:
|
|||
6->14 Source File Size
|
||||
16->20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size)
|
||||
21 Flag that md5sum hash is stored at the end of the archive
|
||||
22 not used
|
||||
22-23 not used
|
||||
|
||||
Rzip chunk data:
|
||||
0 Data offsets byte width
|
||||
|
|
@ -91,8 +91,8 @@ Byte Content
|
|||
5 LRZIP Minor Version Number
|
||||
6-14 Source File Size
|
||||
16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size)
|
||||
21-22 not used
|
||||
23-48 Stream 1 header data
|
||||
21-24 not used
|
||||
24-48 Stream 1 header data
|
||||
49-74 Stream 2 header data
|
||||
|
||||
Block Data:
|
||||
|
|
@ -118,7 +118,7 @@ Byte Content
|
|||
6-9 Source File Size (no HAVE_LARGE_FILES)
|
||||
6-14 Source File Size
|
||||
16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size)
|
||||
21-22 not used
|
||||
23-36 Stream 1 header data
|
||||
21-23 not used
|
||||
24-36 Stream 1 header data
|
||||
37-50 Stream 2 header data
|
||||
51 Compressed data type
|
||||
|
|
|
|||
66
lrzip.c
66
lrzip.c
|
|
@ -668,11 +668,13 @@ void decompress_file(rzip_control *control)
|
|||
free(infilecopy);
|
||||
}
|
||||
|
||||
void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len, i64 *u_len, i64 *last_head)
|
||||
void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len,
|
||||
i64 *u_len, i64 *last_head, int chunk_bytes)
|
||||
{
|
||||
if (unlikely(read(fd_in, ctype, 1) != 1))
|
||||
fatal("Failed to read in get_header_info\n");
|
||||
|
||||
*c_len = *u_len = *last_head = 0;
|
||||
if (control->major_version == 0 && control->minor_version < 4) {
|
||||
u32 c_len32, u_len32, last_head32;
|
||||
|
||||
|
|
@ -686,20 +688,26 @@ void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len,
|
|||
*u_len = u_len32;
|
||||
*last_head = last_head32;
|
||||
} else {
|
||||
if (unlikely(read(fd_in, c_len, 8) != 8))
|
||||
int read_len;
|
||||
|
||||
if (control->major_version == 0 && control->minor_version == 5)
|
||||
read_len = 8;
|
||||
else
|
||||
read_len = chunk_bytes;
|
||||
if (unlikely(read(fd_in, c_len, read_len) != read_len))
|
||||
fatal("Failed to read in get_header_info");
|
||||
if (unlikely(read(fd_in, u_len, 8) != 8))
|
||||
if (unlikely(read(fd_in, u_len, read_len) != read_len))
|
||||
fatal("Failed to read in get_header_info");
|
||||
if (unlikely(read(fd_in, last_head, 8) != 8))
|
||||
if (unlikely(read(fd_in, last_head, read_len) != read_len))
|
||||
fatal("Failed to read_i64 in get_header_info");
|
||||
}
|
||||
}
|
||||
|
||||
void get_fileinfo(rzip_control *control)
|
||||
{
|
||||
i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 34, stream_head[2];
|
||||
i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 25, stream_head[2];
|
||||
i64 expected_size, infile_size, chunk_size = 0, chunk_total = 0;
|
||||
int header_length = 25, stream = 0, chunk = 0;
|
||||
int header_length, stream = 0, chunk = 0;
|
||||
char *tmp, *infilecopy = NULL;
|
||||
int seekspot, fd_in;
|
||||
char chunk_byte = 0;
|
||||
|
|
@ -742,36 +750,24 @@ void get_fileinfo(rzip_control *control)
|
|||
if (control->major_version == 0 && control->minor_version > 5) {
|
||||
if (unlikely(read(fd_in, &control->eof, 1) != 1))
|
||||
fatal("Failed to read eof in get_fileinfo\n");
|
||||
if (unlikely(read(fd_in, &chunk_size, 8) != 8))
|
||||
if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte))
|
||||
fatal("Failed to read chunk_size in get_fileinfo\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Versions 0.3-0.6 had different file formats */
|
||||
if (control->major_version == 0 && control->minor_version < 4)
|
||||
seekspot = 50;
|
||||
else if (control->major_version == 0 && control->minor_version == 4)
|
||||
seekspot = 74;
|
||||
else if (control->major_version == 0 && control->minor_version == 5)
|
||||
seekspot = 75;
|
||||
else
|
||||
seekspot = 84;
|
||||
if (unlikely(lseek(fd_in, seekspot, SEEK_SET) == -1))
|
||||
fatal("Failed to lseek in get_fileinfo\n");
|
||||
|
||||
/* Read the compression type of the first block. It's possible that
|
||||
not all blocks are compressed so this may not be accurate. */
|
||||
if (unlikely(read(fd_in, &ctype, 1) != 1))
|
||||
fatal("Failed to read in get_fileinfo\n");
|
||||
|
||||
if (control->major_version == 0 && control->minor_version < 4) {
|
||||
ofs = 24;
|
||||
header_length = 13;
|
||||
}
|
||||
if (control->major_version == 0 && control->minor_version == 4)
|
||||
} else if (control->major_version == 0 && control->minor_version == 4) {
|
||||
ofs = 24;
|
||||
if (control->major_version == 0 && control->minor_version == 5)
|
||||
header_length = 25;
|
||||
} else if (control->major_version == 0 && control->minor_version == 5) {
|
||||
ofs = 25;
|
||||
header_length = 25;
|
||||
} else {
|
||||
ofs = 26 + chunk_byte;
|
||||
header_length = 1 + (chunk_byte * 3);
|
||||
}
|
||||
next_chunk:
|
||||
stream = 0;
|
||||
stream_head[0] = 0;
|
||||
|
|
@ -789,7 +785,7 @@ next_chunk:
|
|||
|
||||
if (unlikely(lseek(fd_in, stream_head[stream] + ofs, SEEK_SET)) == -1)
|
||||
fatal("Failed to seek to header data in get_fileinfo\n");
|
||||
get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head);
|
||||
get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head, chunk_byte);
|
||||
|
||||
print_verbose("Stream: %d\n", stream);
|
||||
print_maxverbose("Offset: %lld\n", ofs);
|
||||
|
|
@ -801,7 +797,8 @@ next_chunk:
|
|||
failure("Offset greater than archive size, likely corrupted/truncated archive.\n");
|
||||
if (unlikely(head_off = lseek(fd_in, last_head + ofs, SEEK_SET)) == -1)
|
||||
fatal("Failed to seek to header data in get_fileinfo\n");
|
||||
get_header_info(control, fd_in, &ctype, &c_len, &u_len, &last_head);
|
||||
get_header_info(control, fd_in, &ctype, &c_len, &u_len,
|
||||
&last_head, chunk_byte);
|
||||
if (unlikely(last_head < 0 || c_len < 0 || u_len < 0))
|
||||
failure("Entry negative, likely corrupted archive.\n");
|
||||
print_verbose("%d\t", block);
|
||||
|
|
@ -837,6 +834,8 @@ next_chunk:
|
|||
fatal("Failed to lseek c_len in get_fileinfo\n");
|
||||
}
|
||||
|
||||
if (ofs >= infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0))
|
||||
goto done;
|
||||
/* Chunk byte entry */
|
||||
if (control->major_version == 0 && control->minor_version > 4) {
|
||||
if (unlikely(read(fd_in, &chunk_byte, 1) != 1))
|
||||
|
|
@ -845,13 +844,14 @@ next_chunk:
|
|||
if (control->major_version == 0 && control->minor_version > 5) {
|
||||
if (unlikely(read(fd_in, &control->eof, 1) != 1))
|
||||
fatal("Failed to read eof in get_fileinfo\n");
|
||||
if (unlikely(read(fd_in, &chunk_size, 8) != 8))
|
||||
if (unlikely(read(fd_in, &chunk_size, chunk_byte) != chunk_byte))
|
||||
fatal("Failed to read chunk_size in get_fileinfo\n");
|
||||
ofs += 9;
|
||||
ofs += 1 + chunk_byte;
|
||||
header_length = 1 + (chunk_byte * 3);
|
||||
}
|
||||
}
|
||||
if (ofs < infile_size - (HAS_MD5 ? MD5_DIGEST_SIZE : 0))
|
||||
goto next_chunk;
|
||||
goto next_chunk;
|
||||
done:
|
||||
if (unlikely(ofs > infile_size))
|
||||
failure("Offset greater than archive size, likely corrupted/truncated archive.\n");
|
||||
if (chunk_total > expected_size)
|
||||
|
|
|
|||
2
runzip.c
2
runzip.c
|
|
@ -294,7 +294,7 @@ static i64 runzip_chunk(rzip_control *control, int fd_in, int fd_out, int fd_his
|
|||
if (fstat(fd_in, &st) || st.st_size - ofs == 0)
|
||||
return 0;
|
||||
|
||||
ss = open_stream_in(control, fd_in, NUM_STREAMS);
|
||||
ss = open_stream_in(control, fd_in, NUM_STREAMS, chunk_bytes);
|
||||
if (unlikely(!ss))
|
||||
fatal("Failed to open_stream_in in runzip_chunk\n");
|
||||
|
||||
|
|
|
|||
90
stream.c
90
stream.c
|
|
@ -768,18 +768,14 @@ static int write_buf(rzip_control *control, int f, uchar *p, i64 len)
|
|||
}
|
||||
|
||||
/* write a byte */
|
||||
static int write_u8(rzip_control *control, int f, uchar v)
|
||||
static inline int write_u8(rzip_control *control, int f, uchar v)
|
||||
{
|
||||
return write_buf(control, f, &v, 1);
|
||||
}
|
||||
|
||||
/* write a i64 */
|
||||
static int write_i64(rzip_control *control, int f, i64 v)
|
||||
static inline int write_val(rzip_control *control, int f, i64 v, int len)
|
||||
{
|
||||
if (unlikely(write_buf(control, f, (uchar *)&v, 8)))
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
return write_buf(control, f, (uchar *)&v, len);
|
||||
}
|
||||
|
||||
static int read_buf(rzip_control *control, int f, uchar *p, i64 len)
|
||||
|
|
@ -798,23 +794,26 @@ static int read_buf(rzip_control *control, int f, uchar *p, i64 len)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int read_u8(rzip_control *control, int f, uchar *v)
|
||||
static inline int read_u8(rzip_control *control, int f, uchar *v)
|
||||
{
|
||||
return read_buf(control, f, v, 1);
|
||||
}
|
||||
|
||||
static int read_u32(rzip_control *control, int f, u32 *v)
|
||||
static inline int read_u32(rzip_control *control, int f, u32 *v)
|
||||
{
|
||||
if (unlikely(read_buf(control, f, (uchar *)v, 4)))
|
||||
return -1;
|
||||
return 0;
|
||||
return read_buf(control, f, (uchar *)v, 4);
|
||||
}
|
||||
|
||||
static int read_i64(rzip_control *control, int f, i64 *v)
|
||||
static inline int read_i64(rzip_control *control, int f, i64 *v)
|
||||
{
|
||||
if (unlikely(read_buf(control, f, (uchar *)v, 8)))
|
||||
return -1;
|
||||
return 0;
|
||||
return read_buf(control, f, (uchar *)v, 8);
|
||||
}
|
||||
|
||||
static inline int read_val(rzip_control *control, int f, i64 *v, int len)
|
||||
{
|
||||
/* We only partially read all 8 bytes so have to zero v here */
|
||||
*v = 0;
|
||||
return read_buf(control, f, (uchar *)v, len);
|
||||
}
|
||||
|
||||
static int fd_seekto(rzip_control *control, struct stream_info *sinfo, i64 spos, i64 pos)
|
||||
|
|
@ -1012,7 +1011,7 @@ retest_malloc:
|
|||
}
|
||||
|
||||
/* prepare a set of n streams for reading on file descriptor f */
|
||||
void *open_stream_in(rzip_control *control, int f, int n)
|
||||
void *open_stream_in(rzip_control *control, int f, int n, int chunk_bytes)
|
||||
{
|
||||
struct stream_info *sinfo;
|
||||
int total_threads, i;
|
||||
|
|
@ -1038,6 +1037,7 @@ void *open_stream_in(rzip_control *control, int f, int n)
|
|||
|
||||
sinfo->num_streams = n;
|
||||
sinfo->fd = f;
|
||||
sinfo->chunk_bytes = chunk_bytes;
|
||||
|
||||
sinfo->s = calloc(sizeof(struct stream), n);
|
||||
if (unlikely(!sinfo->s)) {
|
||||
|
|
@ -1056,7 +1056,7 @@ void *open_stream_in(rzip_control *control, int f, int n)
|
|||
goto failed;
|
||||
}
|
||||
/* Read in the expected chunk size */
|
||||
if (unlikely(read_i64(control, f, &sinfo->size))) {
|
||||
if (unlikely(read_val(control, f, &sinfo->size, sinfo->chunk_bytes))) {
|
||||
print_err("Failed to read in chunk size in open_stream_in\n");
|
||||
goto failed;
|
||||
}
|
||||
|
|
@ -1093,13 +1093,19 @@ again:
|
|||
sinfo->s[i].last_head = last_head32;
|
||||
header_length = 13;
|
||||
} else {
|
||||
if (unlikely(read_i64(control, f, &v1)))
|
||||
int read_len;
|
||||
|
||||
if (control->major_version == 0 && control->minor_version < 6)
|
||||
read_len = 8;
|
||||
else
|
||||
read_len = sinfo->chunk_bytes;
|
||||
if (unlikely(read_val(control, f, &v1, read_len)))
|
||||
goto failed;
|
||||
if (unlikely(read_i64(control, f, &v2)))
|
||||
if (unlikely(read_val(control, f, &v2, read_len)))
|
||||
goto failed;
|
||||
if (unlikely(read_i64(control, f, &sinfo->s[i].last_head)))
|
||||
if (unlikely(read_val(control, f, &sinfo->s[i].last_head, read_len)))
|
||||
goto failed;
|
||||
header_length = 25;
|
||||
header_length = 1 + (read_len * 3);
|
||||
}
|
||||
if (unlikely(c == CTYPE_NONE && v1 == 0 && v2 == 0 && sinfo->s[i].last_head == 0 && i == 0)) {
|
||||
print_err("Enabling stream close workaround\n");
|
||||
|
|
@ -1225,40 +1231,40 @@ retry:
|
|||
/* Write whether this is the last chunk, followed by the size
|
||||
* of this chunk */
|
||||
write_u8(control, ctis->fd, control->eof);
|
||||
write_i64(control, ctis->fd, ctis->size);
|
||||
write_val(control, ctis->fd, ctis->size, ctis->chunk_bytes);
|
||||
|
||||
/* First chunk of this stream, write headers */
|
||||
ctis->initial_pos = get_seek(control, ctis->fd);
|
||||
|
||||
for (j = 0; j < ctis->num_streams; j++) {
|
||||
ctis->s[j].last_head = ctis->cur_pos + 17;
|
||||
ctis->s[j].last_head = ctis->cur_pos + 1 + (ctis->chunk_bytes * 2);
|
||||
write_u8(control, ctis->fd, CTYPE_NONE);
|
||||
write_i64(control, ctis->fd, 0);
|
||||
write_i64(control, ctis->fd, 0);
|
||||
write_i64(control, ctis->fd, 0);
|
||||
ctis->cur_pos += 25;
|
||||
write_val(control, ctis->fd, 0, ctis->chunk_bytes);
|
||||
write_val(control, ctis->fd, 0, ctis->chunk_bytes);
|
||||
write_val(control, ctis->fd, 0, ctis->chunk_bytes);
|
||||
ctis->cur_pos += 1 + (ctis->chunk_bytes * 3);
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(seekto(control, ctis, ctis->s[cti->streamno].last_head)))
|
||||
fatal("Failed to seekto in compthread %d\n", i);
|
||||
|
||||
if (unlikely(write_i64(control, ctis->fd, ctis->cur_pos)))
|
||||
fatal("Failed to write_i64 in compthread %d\n", i);
|
||||
if (unlikely(write_val(control, ctis->fd, ctis->cur_pos, ctis->chunk_bytes)))
|
||||
fatal("Failed to write_val cur_pos in compthread %d\n", i);
|
||||
|
||||
ctis->s[cti->streamno].last_head = ctis->cur_pos + 17;
|
||||
ctis->s[cti->streamno].last_head = ctis->cur_pos + 1 + (ctis->chunk_bytes * 2);
|
||||
if (unlikely(seekto(control, ctis, ctis->cur_pos)))
|
||||
fatal("Failed to seekto cur_pos in compthread %d\n", i);
|
||||
|
||||
print_maxverbose("Thread %ld writing %lld compressed bytes from stream %d\n", i, padded_len, cti->streamno);
|
||||
/* We store the actual c_len even though we might pad it out */
|
||||
if (unlikely(write_u8(control, ctis->fd, cti->c_type) ||
|
||||
write_i64(control, ctis->fd, cti->c_len) ||
|
||||
write_i64(control, ctis->fd, cti->s_len) ||
|
||||
write_i64(control, ctis->fd, 0))) {
|
||||
write_val(control, ctis->fd, cti->c_len, ctis->chunk_bytes) ||
|
||||
write_val(control, ctis->fd, cti->s_len, ctis->chunk_bytes) ||
|
||||
write_val(control, ctis->fd, 0, ctis->chunk_bytes))) {
|
||||
fatal("Failed write in compthread %d\n", i);
|
||||
}
|
||||
ctis->cur_pos += 25;
|
||||
ctis->cur_pos += 1 + (ctis->chunk_bytes * 3);
|
||||
|
||||
if (ENCRYPT) {
|
||||
ctis->cur_pos += 8;
|
||||
|
|
@ -1423,13 +1429,19 @@ fill_another:
|
|||
last_head = last_head32;
|
||||
header_length = 13;
|
||||
} else {
|
||||
if (unlikely(read_i64(control, sinfo->fd, &c_len)))
|
||||
int read_len;
|
||||
|
||||
if (control->major_version == 0 && control->minor_version < 6)
|
||||
read_len = 8;
|
||||
else
|
||||
read_len = sinfo->chunk_bytes;
|
||||
if (unlikely(read_val(control, sinfo->fd, &c_len, read_len)))
|
||||
return -1;
|
||||
if (unlikely(read_i64(control, sinfo->fd, &u_len)))
|
||||
if (unlikely(read_val(control, sinfo->fd, &u_len, read_len)))
|
||||
return -1;
|
||||
if (unlikely(read_i64(control, sinfo->fd, &last_head)))
|
||||
if (unlikely(read_val(control, sinfo->fd, &last_head, read_len)))
|
||||
return -1;
|
||||
header_length = 25;
|
||||
header_length = 1 + (read_len * 3);
|
||||
}
|
||||
|
||||
if (ENCRYPT) {
|
||||
|
|
|
|||
2
stream.h
2
stream.h
|
|
@ -31,7 +31,7 @@ ssize_t read_1g(rzip_control *control, int fd, void *buf, i64 len);
|
|||
void prepare_streamout_threads(rzip_control *control);
|
||||
void close_streamout_threads(rzip_control *control);
|
||||
void *open_stream_out(rzip_control *control, int f, unsigned int n, i64 chunk_limit, char cbytes);
|
||||
void *open_stream_in(rzip_control *control, int f, int n);
|
||||
void *open_stream_in(rzip_control *control, int f, int n, char cbytes);
|
||||
void flush_buffer(rzip_control *control, struct stream_info *sinfo, int stream);
|
||||
int write_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len);
|
||||
i64 read_stream(rzip_control *control, void *ss, int streamno, uchar *p, i64 len);
|
||||
|
|
|
|||
Loading…
Reference in a new issue