Add more robust checking.

Premalloc ram to improve early detection of being unable to allocate that much ram.
Make sure to always make chunk size a multiple of page size for mmap to work.
Begin changes to make variable byte width offsets in rzip chunks.
Decrease header entries to only 2 byte wide as per original rzip.
Random other tidying.
This commit is contained in:
Con Kolivas 2010-10-31 10:35:04 +11:00
parent d396a8a360
commit c5da3a1adb
5 changed files with 87 additions and 95 deletions

View file

@ -1,3 +1,8 @@
lrzip-0.50 update
All files created with lrzip 0.50+ are not backward compatible with versions
prior to 0.50. v0.50 can read earlier generated files.
lrzip-0.41 update
Files created with lrzip 0.41 and selecting the -z option for
@ -17,7 +22,7 @@ Con Kolivas November 2009.
lrzip-0.24 update!
FILES CREATED WITH LRZIP 0.23 and earlier are NOT
BACKWARE COMPATIBLE if compressed with LZMA.
BACKWARD COMPATIBLE if compressed with LZMA.
All other compression schemes are compatible.

View file

@ -28,15 +28,6 @@ static inline uchar read_u8(void *ss, int stream)
return b;
}
static inline u16 read_u16(void *ss, int stream)
{
u16 ret;
if (read_stream(ss, stream, (uchar *)&ret, 2) != 2)
fatal("Stream read u16 failed\n");
return ret;
}
static inline u32 read_u32(void *ss, int stream)
{
u32 ret;
@ -45,28 +36,30 @@ static inline u32 read_u32(void *ss, int stream)
fatal("Stream read u32 failed\n");
return ret;
}
static inline i64 read_i64(void *ss, int stream)
/* Read a variable length of chars dependant on how big the chunk was */
static inline i64 read_vchars(void *ss, int stream, int length)
{
i64 ret;
int bytes;
i64 s = 0;
if (read_stream(ss, stream, (uchar *)&ret, 8) != 8)
fatal("Stream read i64 failed\n");
return ret;
}
for (bytes = 0; bytes < length; bytes++) {
int bits = bytes * 8;
static u16 read_header_v03(void *ss, uchar *head)
{
*head = read_u8(ss, 0);
return read_u16(ss, 0);
uchar sb = read_u8(ss, stream);
s |= (i64)sb << bits;
}
return s;
}
static i64 read_header(void *ss, uchar *head)
{
if (control.major_version == 0 && control.minor_version < 4)
return read_header_v03(ss, head);
int chunk_bytes = 2;
/* All chunks were unnecessarily encoded 8 bytes wide version 0.4x */
if (control.major_version == 0 && control.minor_version == 4)
chunk_bytes = 8;
*head = read_u8(ss, 0);
return read_i64(ss, 0);
return read_vchars(ss, 0, chunk_bytes);
}
static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
@ -93,63 +86,29 @@ static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
return len;
}
static i64 unzip_match_v03(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
{
u32 offset;
i64 n, total = 0;
i64 cur_pos = lseek(fd_out, 0, SEEK_CUR);
if (cur_pos == -1)
fatal("Seek failed on out file in unzip_match.\n");
offset = read_u32(ss, 0);
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
offset, cur_pos, strerror(errno));
while (len) {
uchar *buf;
n = MIN(len, offset);
buf = malloc((size_t)n);
if (!buf)
fatal("Failed to allocate %d bytes in unzip_match\n", n);
if (read_1g(fd_hist, buf, (size_t)n) != (ssize_t)n)
fatal("Failed to read %d bytes in unzip_match\n", n);
if (write_1g(fd_out, buf, (size_t)n) != (ssize_t)n)
fatal("Failed to write %d bytes in unzip_match\n", n);
*cksum = CrcUpdate(*cksum, buf, n);
len -= n;
free(buf);
total += n;
}
return total;
}
static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
{
i64 cur_pos;
i64 offset, n, total;
i64 offset, n, total, cur_pos;
int chunk_bytes = 8;
if (len < 0)
fatal("len %lld is negative in unzip_match!\n",len);
if (control.major_version == 0 && control.minor_version < 4)
return unzip_match_v03(ss, len, fd_out, fd_hist, cksum);
if (control.major_version == 0) {
/* Versions < 0.4 used 4 bytes for all offsets, version 0.4 used 8 bytes.
* Versions 0.5+ used a variable number of bytes depending on block size. */
if (control.minor_version < 4)
chunk_bytes = 4;
else if (control.minor_version == 4)
chunk_bytes = 8;
}
total = 0;
cur_pos = lseek(fd_out, 0, SEEK_CUR);
if (cur_pos == -1)
fatal("Seek failed on out file in unzip_match.\n");
/* Note the offset is in a different format v0.40+ */
offset = read_i64(ss, 0);
offset = read_vchars(ss, 0, chunk_bytes);
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
offset, cur_pos, strerror(errno));

74
rzip.c
View file

@ -20,7 +20,6 @@
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* rzip compression algorithm */
#include "rzip.h"
#define CHUNK_MULTIPLE (100 * 1024 * 1024)
@ -88,25 +87,32 @@ struct rzip_state {
static inline void put_u8(void *ss, int stream, uchar b)
{
if (write_stream(ss, stream, &b, 1) != 0)
fatal(NULL);
fatal("Failed to put_u8\n");
}
static inline void put_u32(void *ss, int stream, uint32_t s)
{
if (write_stream(ss, stream, (uchar *)&s, 4))
fatal(NULL);
fatal("Failed to put_u32\n");
}
static inline void put_i64(void *ss, int stream, i64 s)
/* Put a variable length of bytes dependant on how big the chunk is */
static inline void put_vchars(void *ss, int stream, i64 s, int length)
{
if (write_stream(ss, stream, (uchar *)&s, 8))
fatal(NULL);
int bytes;
for (bytes = 0; bytes < length; bytes++) {
int bits = bytes * 8;
uchar sb = (s >> bits) & (i64)0XFF;
put_u8(ss, stream, sb);
}
}
static void put_header(void *ss, uchar head, i64 len)
{
put_u8(ss, 0, head);
put_i64(ss, 0, len);
put_vchars(ss, 0, len, 2);
}
static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len)
@ -114,10 +120,11 @@ static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i
do {
i64 ofs;
i64 n = len;
if (n > 0xFFFF) n = 0xFFFF;
ofs = (p - (buf+offset));
ofs = (p - (buf + offset));
put_header(st->ss, 1, n);
put_i64(st->ss, 0, ofs);
put_vchars(st->ss, 0, ofs, 8);
st->stats.matches++;
st->stats.match_bytes += n;
@ -131,6 +138,7 @@ static void put_literal(struct rzip_state *st, uchar *last, uchar *p)
{
do {
i64 len = (i64)(p - last);
if (len > 0xFFFF) len = 0xFFFF;
st->stats.literals++;
st->stats.literal_bytes += len;
@ -174,7 +182,7 @@ static int lesser_bitness(tag a, tag b)
{
tag mask;
for (mask = 0; mask != (tag)-1; mask = ((mask<<1)|1)) {
for (mask = 0; mask != (tag) - 1; mask = ((mask << 1) | 1)) {
if ((a & b & mask) != mask)
break;
}
@ -491,7 +499,7 @@ static void hash_search(struct rzip_state *st, uchar *buf,
show_distrib(st);
if (st->last_match < buf + st->chunk_size)
put_literal(st, st->last_match,buf + st->chunk_size);
put_literal(st, st->last_match, buf + st->chunk_size);
if (st->chunk_size > cksum_limit) {
i64 n = st->chunk_size - cksum_limit;
@ -519,21 +527,41 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset,
{
uchar *buf;
buf = (uchar *)mmap(NULL, st->chunk_size, PROT_READ, MAP_SHARED, fd_in, offset);
/* Malloc'ing first will tell us if we can allocate this much ram
* faster than slowly reading in the file and then failing. Filling
* it with zeroes has a defragmenting effect on ram before the real
* read in. */
if (control.flags & FLAG_SHOW_PROGRESS)
fprintf(control.msgout, "Allocating ram...\n");
buf = malloc(st->chunk_size);
if (!buf)
fatal("Failed to premalloc in rzip_chunk\n");
if (!memset(buf, 0, st->chunk_size))
fatal("Failed to memset in rzip_chunk\n");
free(buf);
buf = (uchar *)mmap(buf, st->chunk_size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd_in, offset);
if (buf == (uchar *)-1)
fatal("Failed to map buffer in rzip_fd\n");
fatal("Failed to map buffer in rzip_chunk\n");
st->ss = open_stream_out(fd_out, NUM_STREAMS, limit);
if (!st->ss)
fatal("Failed to open streams in rzip_fd\n");
fatal("Failed to open streams in rzip_chunk\n");
hash_search(st, buf, pct_base, pct_multiple);
/* unmap buffer before closing and reallocating streams */
munmap(buf, st->chunk_size);
if (close_stream_out(st->ss) != 0)
fatal("Failed to flush/close streams in rzip_fd\n");
fatal("Failed to flush/close streams in rzip_chunk\n");
}
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
static void round_to_page_size(i64 *chunk)
{
unsigned long page_size = sysconf(_SC_PAGE_SIZE);
i64 pages = *chunk / page_size + 1;
*chunk = pages * page_size;
}
/* compress a whole file chunks at a time */
void rzip_fd(int fd_in, int fd_out)
@ -549,9 +577,8 @@ void rzip_fd(int fd_in, int fd_out)
struct stat s, s2;
struct rzip_state *st;
i64 len, last_chunk = 0;
i64 chunk_window, pages;
i64 chunk_window;
int pass = 0, passes;
unsigned long page_size;
unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours,
elapsed_minutes, elapsed_seconds;
double finish_time, elapsed_time, chunkmbs;
@ -572,9 +599,7 @@ void rzip_fd(int fd_in, int fd_out)
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
chunk_window = control.window * CHUNK_MULTIPLE;
page_size = sysconf(_SC_PAGE_SIZE);
pages = chunk_window / page_size;
chunk_window = pages * page_size;
round_to_page_size(&chunk_window);
st->level = &levels[MIN(9, control.window)];
st->fd_in = fd_in;
@ -588,14 +613,17 @@ void rzip_fd(int fd_in, int fd_out)
last.tv_sec = last.tv_usec = 0;
gettimeofday(&start, NULL);
while (len) {
while (len > 0) {
i64 chunk, limit = 0;
double pct_base, pct_multiple;
chunk = chunk_window;
if (chunk > len)
limit = chunk = len;
if (chunk > len) {
chunk = len;
round_to_page_size(&chunk);
limit = chunk;
}
pct_base = (100.0 * (s.st_size - len)) / s.st_size;
pct_multiple = ((double)chunk) / s.st_size;

4
rzip.h
View file

@ -18,8 +18,8 @@
*/
#define LRZIP_MAJOR_VERSION 0
#define LRZIP_MINOR_VERSION 4
#define LRZIP_MINOR_SUBVERSION 7
#define LRZIP_MINOR_VERSION 5
#define LRZIP_MINOR_SUBVERSION 0
#define NUM_STREAMS 2

View file

@ -802,7 +802,7 @@ static int flush_buffer(struct stream_info *sinfo, int stream)
free(sinfo->s[stream].buf);
sinfo->s[stream].buf = malloc(sinfo->bufsize);
if (!sinfo->s[stream].buf)
return -1;
fatal("Failed to malloc in flush_buffer\n");
return 0;
}