Add more robust checking.

Premalloc ram to improve early detection of being unable to allocate that much ram.
Make sure to always make chunk size a multiple of page size for mmap to work.
Begin changes to make variable byte width offsets in rzip chunks.
Decrease header entries to only 2 byte wide as per original rzip.
Random other tidying.
This commit is contained in:
Con Kolivas 2010-10-31 10:35:04 +11:00
parent d396a8a360
commit c5da3a1adb
5 changed files with 87 additions and 95 deletions

View file

@ -1,3 +1,8 @@
lrzip-0.50 update
All files created with lrzip 0.50+ are not backward compatible with versions
prior to 0.50. v0.50 can read earlier generated files.
lrzip-0.41 update lrzip-0.41 update
Files created with lrzip 0.41 and selecting the -z option for Files created with lrzip 0.41 and selecting the -z option for
@ -17,7 +22,7 @@ Con Kolivas November 2009.
lrzip-0.24 update! lrzip-0.24 update!
FILES CREATED WITH LRZIP 0.23 and earlier are NOT FILES CREATED WITH LRZIP 0.23 and earlier are NOT
BACKWARE COMPATIBLE if compressed with LZMA. BACKWARD COMPATIBLE if compressed with LZMA.
All other compression schemes are compatible. All other compression schemes are compatible.

View file

@ -28,15 +28,6 @@ static inline uchar read_u8(void *ss, int stream)
return b; return b;
} }
static inline u16 read_u16(void *ss, int stream)
{
u16 ret;
if (read_stream(ss, stream, (uchar *)&ret, 2) != 2)
fatal("Stream read u16 failed\n");
return ret;
}
static inline u32 read_u32(void *ss, int stream) static inline u32 read_u32(void *ss, int stream)
{ {
u32 ret; u32 ret;
@ -45,28 +36,30 @@ static inline u32 read_u32(void *ss, int stream)
fatal("Stream read u32 failed\n"); fatal("Stream read u32 failed\n");
return ret; return ret;
} }
/* Read a variable length of chars dependant on how big the chunk was */
static inline i64 read_i64(void *ss, int stream) static inline i64 read_vchars(void *ss, int stream, int length)
{ {
i64 ret; int bytes;
i64 s = 0;
if (read_stream(ss, stream, (uchar *)&ret, 8) != 8) for (bytes = 0; bytes < length; bytes++) {
fatal("Stream read i64 failed\n"); int bits = bytes * 8;
return ret;
uchar sb = read_u8(ss, stream);
s |= (i64)sb << bits;
} }
return s;
static u16 read_header_v03(void *ss, uchar *head)
{
*head = read_u8(ss, 0);
return read_u16(ss, 0);
} }
static i64 read_header(void *ss, uchar *head) static i64 read_header(void *ss, uchar *head)
{ {
if (control.major_version == 0 && control.minor_version < 4) int chunk_bytes = 2;
return read_header_v03(ss, head);
/* All chunks were unnecessarily encoded 8 bytes wide version 0.4x */
if (control.major_version == 0 && control.minor_version == 4)
chunk_bytes = 8;
*head = read_u8(ss, 0); *head = read_u8(ss, 0);
return read_i64(ss, 0); return read_vchars(ss, 0, chunk_bytes);
} }
static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum) static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
@ -93,63 +86,29 @@ static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
return len; return len;
} }
static i64 unzip_match_v03(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
{
u32 offset;
i64 n, total = 0;
i64 cur_pos = lseek(fd_out, 0, SEEK_CUR);
if (cur_pos == -1)
fatal("Seek failed on out file in unzip_match.\n");
offset = read_u32(ss, 0);
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
offset, cur_pos, strerror(errno));
while (len) {
uchar *buf;
n = MIN(len, offset);
buf = malloc((size_t)n);
if (!buf)
fatal("Failed to allocate %d bytes in unzip_match\n", n);
if (read_1g(fd_hist, buf, (size_t)n) != (ssize_t)n)
fatal("Failed to read %d bytes in unzip_match\n", n);
if (write_1g(fd_out, buf, (size_t)n) != (ssize_t)n)
fatal("Failed to write %d bytes in unzip_match\n", n);
*cksum = CrcUpdate(*cksum, buf, n);
len -= n;
free(buf);
total += n;
}
return total;
}
static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum) static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
{ {
i64 cur_pos; i64 offset, n, total, cur_pos;
i64 offset, n, total; int chunk_bytes = 8;
if (len < 0) if (len < 0)
fatal("len %lld is negative in unzip_match!\n",len); fatal("len %lld is negative in unzip_match!\n",len);
if (control.major_version == 0 && control.minor_version < 4) if (control.major_version == 0) {
return unzip_match_v03(ss, len, fd_out, fd_hist, cksum); /* Versions < 0.4 used 4 bytes for all offsets, version 0.4 used 8 bytes.
* Versions 0.5+ used a variable number of bytes depending on block size. */
if (control.minor_version < 4)
chunk_bytes = 4;
else if (control.minor_version == 4)
chunk_bytes = 8;
}
total = 0; total = 0;
cur_pos = lseek(fd_out, 0, SEEK_CUR); cur_pos = lseek(fd_out, 0, SEEK_CUR);
if (cur_pos == -1) if (cur_pos == -1)
fatal("Seek failed on out file in unzip_match.\n"); fatal("Seek failed on out file in unzip_match.\n");
/* Note the offset is in a different format v0.40+ */ /* Note the offset is in a different format v0.40+ */
offset = read_i64(ss, 0); offset = read_vchars(ss, 0, chunk_bytes);
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1) if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n", fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
offset, cur_pos, strerror(errno)); offset, cur_pos, strerror(errno));

68
rzip.c
View file

@ -20,7 +20,6 @@
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/ */
/* rzip compression algorithm */ /* rzip compression algorithm */
#include "rzip.h" #include "rzip.h"
#define CHUNK_MULTIPLE (100 * 1024 * 1024) #define CHUNK_MULTIPLE (100 * 1024 * 1024)
@ -88,25 +87,32 @@ struct rzip_state {
static inline void put_u8(void *ss, int stream, uchar b) static inline void put_u8(void *ss, int stream, uchar b)
{ {
if (write_stream(ss, stream, &b, 1) != 0) if (write_stream(ss, stream, &b, 1) != 0)
fatal(NULL); fatal("Failed to put_u8\n");
} }
static inline void put_u32(void *ss, int stream, uint32_t s) static inline void put_u32(void *ss, int stream, uint32_t s)
{ {
if (write_stream(ss, stream, (uchar *)&s, 4)) if (write_stream(ss, stream, (uchar *)&s, 4))
fatal(NULL); fatal("Failed to put_u32\n");
} }
static inline void put_i64(void *ss, int stream, i64 s) /* Put a variable length of bytes dependant on how big the chunk is */
static inline void put_vchars(void *ss, int stream, i64 s, int length)
{ {
if (write_stream(ss, stream, (uchar *)&s, 8)) int bytes;
fatal(NULL);
for (bytes = 0; bytes < length; bytes++) {
int bits = bytes * 8;
uchar sb = (s >> bits) & (i64)0XFF;
put_u8(ss, stream, sb);
}
} }
static void put_header(void *ss, uchar head, i64 len) static void put_header(void *ss, uchar head, i64 len)
{ {
put_u8(ss, 0, head); put_u8(ss, 0, head);
put_i64(ss, 0, len); put_vchars(ss, 0, len, 2);
} }
static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len) static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len)
@ -114,10 +120,11 @@ static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i
do { do {
i64 ofs; i64 ofs;
i64 n = len; i64 n = len;
if (n > 0xFFFF) n = 0xFFFF;
ofs = (p - (buf + offset)); ofs = (p - (buf + offset));
put_header(st->ss, 1, n); put_header(st->ss, 1, n);
put_i64(st->ss, 0, ofs); put_vchars(st->ss, 0, ofs, 8);
st->stats.matches++; st->stats.matches++;
st->stats.match_bytes += n; st->stats.match_bytes += n;
@ -131,6 +138,7 @@ static void put_literal(struct rzip_state *st, uchar *last, uchar *p)
{ {
do { do {
i64 len = (i64)(p - last); i64 len = (i64)(p - last);
if (len > 0xFFFF) len = 0xFFFF;
st->stats.literals++; st->stats.literals++;
st->stats.literal_bytes += len; st->stats.literal_bytes += len;
@ -519,21 +527,41 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset,
{ {
uchar *buf; uchar *buf;
buf = (uchar *)mmap(NULL, st->chunk_size, PROT_READ, MAP_SHARED, fd_in, offset); /* Malloc'ing first will tell us if we can allocate this much ram
* faster than slowly reading in the file and then failing. Filling
* it with zeroes has a defragmenting effect on ram before the real
* read in. */
if (control.flags & FLAG_SHOW_PROGRESS)
fprintf(control.msgout, "Allocating ram...\n");
buf = malloc(st->chunk_size);
if (!buf)
fatal("Failed to premalloc in rzip_chunk\n");
if (!memset(buf, 0, st->chunk_size))
fatal("Failed to memset in rzip_chunk\n");
free(buf);
buf = (uchar *)mmap(buf, st->chunk_size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd_in, offset);
if (buf == (uchar *)-1) if (buf == (uchar *)-1)
fatal("Failed to map buffer in rzip_fd\n"); fatal("Failed to map buffer in rzip_chunk\n");
st->ss = open_stream_out(fd_out, NUM_STREAMS, limit); st->ss = open_stream_out(fd_out, NUM_STREAMS, limit);
if (!st->ss) if (!st->ss)
fatal("Failed to open streams in rzip_fd\n"); fatal("Failed to open streams in rzip_chunk\n");
hash_search(st, buf, pct_base, pct_multiple); hash_search(st, buf, pct_base, pct_multiple);
/* unmap buffer before closing and reallocating streams */ /* unmap buffer before closing and reallocating streams */
munmap(buf, st->chunk_size); munmap(buf, st->chunk_size);
if (close_stream_out(st->ss) != 0) if (close_stream_out(st->ss) != 0)
fatal("Failed to flush/close streams in rzip_fd\n"); fatal("Failed to flush/close streams in rzip_chunk\n");
} }
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
static void round_to_page_size(i64 *chunk)
{
unsigned long page_size = sysconf(_SC_PAGE_SIZE);
i64 pages = *chunk / page_size + 1;
*chunk = pages * page_size;
}
/* compress a whole file chunks at a time */ /* compress a whole file chunks at a time */
void rzip_fd(int fd_in, int fd_out) void rzip_fd(int fd_in, int fd_out)
@ -549,9 +577,8 @@ void rzip_fd(int fd_in, int fd_out)
struct stat s, s2; struct stat s, s2;
struct rzip_state *st; struct rzip_state *st;
i64 len, last_chunk = 0; i64 len, last_chunk = 0;
i64 chunk_window, pages; i64 chunk_window;
int pass = 0, passes; int pass = 0, passes;
unsigned long page_size;
unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours, unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours,
elapsed_minutes, elapsed_seconds; elapsed_minutes, elapsed_seconds;
double finish_time, elapsed_time, chunkmbs; double finish_time, elapsed_time, chunkmbs;
@ -572,9 +599,7 @@ void rzip_fd(int fd_in, int fd_out)
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */ /* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
chunk_window = control.window * CHUNK_MULTIPLE; chunk_window = control.window * CHUNK_MULTIPLE;
page_size = sysconf(_SC_PAGE_SIZE); round_to_page_size(&chunk_window);
pages = chunk_window / page_size;
chunk_window = pages * page_size;
st->level = &levels[MIN(9, control.window)]; st->level = &levels[MIN(9, control.window)];
st->fd_in = fd_in; st->fd_in = fd_in;
@ -588,14 +613,17 @@ void rzip_fd(int fd_in, int fd_out)
last.tv_sec = last.tv_usec = 0; last.tv_sec = last.tv_usec = 0;
gettimeofday(&start, NULL); gettimeofday(&start, NULL);
while (len) { while (len > 0) {
i64 chunk, limit = 0; i64 chunk, limit = 0;
double pct_base, pct_multiple; double pct_base, pct_multiple;
chunk = chunk_window; chunk = chunk_window;
if (chunk > len) if (chunk > len) {
limit = chunk = len; chunk = len;
round_to_page_size(&chunk);
limit = chunk;
}
pct_base = (100.0 * (s.st_size - len)) / s.st_size; pct_base = (100.0 * (s.st_size - len)) / s.st_size;
pct_multiple = ((double)chunk) / s.st_size; pct_multiple = ((double)chunk) / s.st_size;

4
rzip.h
View file

@ -18,8 +18,8 @@
*/ */
#define LRZIP_MAJOR_VERSION 0 #define LRZIP_MAJOR_VERSION 0
#define LRZIP_MINOR_VERSION 4 #define LRZIP_MINOR_VERSION 5
#define LRZIP_MINOR_SUBVERSION 7 #define LRZIP_MINOR_SUBVERSION 0
#define NUM_STREAMS 2 #define NUM_STREAMS 2

View file

@ -802,7 +802,7 @@ static int flush_buffer(struct stream_info *sinfo, int stream)
free(sinfo->s[stream].buf); free(sinfo->s[stream].buf);
sinfo->s[stream].buf = malloc(sinfo->bufsize); sinfo->s[stream].buf = malloc(sinfo->bufsize);
if (!sinfo->s[stream].buf) if (!sinfo->s[stream].buf)
return -1; fatal("Failed to malloc in flush_buffer\n");
return 0; return 0;
} }