diff --git a/README-NOT-BACKWARD-COMPATIBLE b/README-NOT-BACKWARD-COMPATIBLE index 3688be9..33e0d73 100644 --- a/README-NOT-BACKWARD-COMPATIBLE +++ b/README-NOT-BACKWARD-COMPATIBLE @@ -1,3 +1,8 @@ +lrzip-0.50 update + +All files created with lrzip 0.50+ are not backward compatible with versions +prior to 0.50. v0.50 can read earlier generated files. + lrzip-0.41 update Files created with lrzip 0.41 and selecting the -z option for @@ -17,7 +22,7 @@ Con Kolivas November 2009. lrzip-0.24 update! FILES CREATED WITH LRZIP 0.23 and earlier are NOT -BACKWARE COMPATIBLE if compressed with LZMA. +BACKWARD COMPATIBLE if compressed with LZMA. All other compression schemes are compatible. diff --git a/runzip.c b/runzip.c index 391ac44..27a3612 100644 --- a/runzip.c +++ b/runzip.c @@ -28,15 +28,6 @@ static inline uchar read_u8(void *ss, int stream) return b; } -static inline u16 read_u16(void *ss, int stream) -{ - u16 ret; - - if (read_stream(ss, stream, (uchar *)&ret, 2) != 2) - fatal("Stream read u16 failed\n"); - return ret; -} - static inline u32 read_u32(void *ss, int stream) { u32 ret; @@ -45,28 +36,30 @@ static inline u32 read_u32(void *ss, int stream) fatal("Stream read u32 failed\n"); return ret; } - -static inline i64 read_i64(void *ss, int stream) +/* Read a variable length of chars dependant on how big the chunk was */ +static inline i64 read_vchars(void *ss, int stream, int length) { - i64 ret; + int bytes; + i64 s = 0; - if (read_stream(ss, stream, (uchar *)&ret, 8) != 8) - fatal("Stream read i64 failed\n"); - return ret; -} + for (bytes = 0; bytes < length; bytes++) { + int bits = bytes * 8; -static u16 read_header_v03(void *ss, uchar *head) -{ - *head = read_u8(ss, 0); - return read_u16(ss, 0); + uchar sb = read_u8(ss, stream); + s |= (i64)sb << bits; + } + return s; } static i64 read_header(void *ss, uchar *head) { - if (control.major_version == 0 && control.minor_version < 4) - return read_header_v03(ss, head); + int chunk_bytes = 2; + + /* All chunks were unnecessarily encoded 8 bytes wide version 0.4x */ + if (control.major_version == 0 && control.minor_version == 4) + chunk_bytes = 8; *head = read_u8(ss, 0); - return read_i64(ss, 0); + return read_vchars(ss, 0, chunk_bytes); } static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum) @@ -93,63 +86,29 @@ static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum) return len; } -static i64 unzip_match_v03(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum) -{ - u32 offset; - i64 n, total = 0; - i64 cur_pos = lseek(fd_out, 0, SEEK_CUR); - - if (cur_pos == -1) - fatal("Seek failed on out file in unzip_match.\n"); - - offset = read_u32(ss, 0); - - if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1) - fatal("Seek failed by %d from %d on history file in unzip_match - %s\n", - offset, cur_pos, strerror(errno)); - - while (len) { - uchar *buf; - n = MIN(len, offset); - - buf = malloc((size_t)n); - if (!buf) - fatal("Failed to allocate %d bytes in unzip_match\n", n); - - if (read_1g(fd_hist, buf, (size_t)n) != (ssize_t)n) - fatal("Failed to read %d bytes in unzip_match\n", n); - - if (write_1g(fd_out, buf, (size_t)n) != (ssize_t)n) - fatal("Failed to write %d bytes in unzip_match\n", n); - - *cksum = CrcUpdate(*cksum, buf, n); - - len -= n; - free(buf); - total += n; - } - - return total; -} - static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum) { - i64 cur_pos; - i64 offset, n, total; + i64 offset, n, total, cur_pos; + int chunk_bytes = 8; if (len < 0) fatal("len %lld is negative in unzip_match!\n",len); - if (control.major_version == 0 && control.minor_version < 4) - return unzip_match_v03(ss, len, fd_out, fd_hist, cksum); - + if (control.major_version == 0) { + /* Versions < 0.4 used 4 bytes for all offsets, version 0.4 used 8 bytes. + * Versions 0.5+ used a variable number of bytes depending on block size. */ + if (control.minor_version < 4) + chunk_bytes = 4; + else if (control.minor_version == 4) + chunk_bytes = 8; + } total = 0; cur_pos = lseek(fd_out, 0, SEEK_CUR); if (cur_pos == -1) fatal("Seek failed on out file in unzip_match.\n"); /* Note the offset is in a different format v0.40+ */ - offset = read_i64(ss, 0); + offset = read_vchars(ss, 0, chunk_bytes); if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1) fatal("Seek failed by %d from %d on history file in unzip_match - %s\n", offset, cur_pos, strerror(errno)); diff --git a/rzip.c b/rzip.c index 9656462..42b8f34 100644 --- a/rzip.c +++ b/rzip.c @@ -20,7 +20,6 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* rzip compression algorithm */ - #include "rzip.h" #define CHUNK_MULTIPLE (100 * 1024 * 1024) @@ -88,25 +87,32 @@ struct rzip_state { static inline void put_u8(void *ss, int stream, uchar b) { if (write_stream(ss, stream, &b, 1) != 0) - fatal(NULL); + fatal("Failed to put_u8\n"); } static inline void put_u32(void *ss, int stream, uint32_t s) { if (write_stream(ss, stream, (uchar *)&s, 4)) - fatal(NULL); + fatal("Failed to put_u32\n"); } -static inline void put_i64(void *ss, int stream, i64 s) +/* Put a variable length of bytes dependant on how big the chunk is */ +static inline void put_vchars(void *ss, int stream, i64 s, int length) { - if (write_stream(ss, stream, (uchar *)&s, 8)) - fatal(NULL); + int bytes; + + for (bytes = 0; bytes < length; bytes++) { + int bits = bytes * 8; + uchar sb = (s >> bits) & (i64)0XFF; + + put_u8(ss, stream, sb); + } } static void put_header(void *ss, uchar head, i64 len) { put_u8(ss, 0, head); - put_i64(ss, 0, len); + put_vchars(ss, 0, len, 2); } static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len) @@ -114,10 +120,11 @@ static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i do { i64 ofs; i64 n = len; + if (n > 0xFFFF) n = 0xFFFF; - ofs = (p - (buf+offset)); + ofs = (p - (buf + offset)); put_header(st->ss, 1, n); - put_i64(st->ss, 0, ofs); + put_vchars(st->ss, 0, ofs, 8); st->stats.matches++; st->stats.match_bytes += n; @@ -131,6 +138,7 @@ static void put_literal(struct rzip_state *st, uchar *last, uchar *p) { do { i64 len = (i64)(p - last); + if (len > 0xFFFF) len = 0xFFFF; st->stats.literals++; st->stats.literal_bytes += len; @@ -174,7 +182,7 @@ static int lesser_bitness(tag a, tag b) { tag mask; - for (mask = 0; mask != (tag)-1; mask = ((mask<<1)|1)) { + for (mask = 0; mask != (tag) - 1; mask = ((mask << 1) | 1)) { if ((a & b & mask) != mask) break; } @@ -491,7 +499,7 @@ static void hash_search(struct rzip_state *st, uchar *buf, show_distrib(st); if (st->last_match < buf + st->chunk_size) - put_literal(st, st->last_match,buf + st->chunk_size); + put_literal(st, st->last_match, buf + st->chunk_size); if (st->chunk_size > cksum_limit) { i64 n = st->chunk_size - cksum_limit; @@ -519,21 +527,41 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset, { uchar *buf; - buf = (uchar *)mmap(NULL, st->chunk_size, PROT_READ, MAP_SHARED, fd_in, offset); + /* Malloc'ing first will tell us if we can allocate this much ram + * faster than slowly reading in the file and then failing. Filling + * it with zeroes has a defragmenting effect on ram before the real + * read in. */ + if (control.flags & FLAG_SHOW_PROGRESS) + fprintf(control.msgout, "Allocating ram...\n"); + buf = malloc(st->chunk_size); + if (!buf) + fatal("Failed to premalloc in rzip_chunk\n"); + if (!memset(buf, 0, st->chunk_size)) + fatal("Failed to memset in rzip_chunk\n"); + free(buf); + buf = (uchar *)mmap(buf, st->chunk_size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd_in, offset); if (buf == (uchar *)-1) - fatal("Failed to map buffer in rzip_fd\n"); + fatal("Failed to map buffer in rzip_chunk\n"); st->ss = open_stream_out(fd_out, NUM_STREAMS, limit); if (!st->ss) - fatal("Failed to open streams in rzip_fd\n"); + fatal("Failed to open streams in rzip_chunk\n"); hash_search(st, buf, pct_base, pct_multiple); /* unmap buffer before closing and reallocating streams */ munmap(buf, st->chunk_size); if (close_stream_out(st->ss) != 0) - fatal("Failed to flush/close streams in rzip_fd\n"); + fatal("Failed to flush/close streams in rzip_chunk\n"); } +/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */ +static void round_to_page_size(i64 *chunk) +{ + unsigned long page_size = sysconf(_SC_PAGE_SIZE); + i64 pages = *chunk / page_size + 1; + + *chunk = pages * page_size; +} /* compress a whole file chunks at a time */ void rzip_fd(int fd_in, int fd_out) @@ -549,9 +577,8 @@ void rzip_fd(int fd_in, int fd_out) struct stat s, s2; struct rzip_state *st; i64 len, last_chunk = 0; - i64 chunk_window, pages; + i64 chunk_window; int pass = 0, passes; - unsigned long page_size; unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours, elapsed_minutes, elapsed_seconds; double finish_time, elapsed_time, chunkmbs; @@ -572,9 +599,7 @@ void rzip_fd(int fd_in, int fd_out) /* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */ chunk_window = control.window * CHUNK_MULTIPLE; - page_size = sysconf(_SC_PAGE_SIZE); - pages = chunk_window / page_size; - chunk_window = pages * page_size; + round_to_page_size(&chunk_window); st->level = &levels[MIN(9, control.window)]; st->fd_in = fd_in; @@ -588,14 +613,17 @@ void rzip_fd(int fd_in, int fd_out) last.tv_sec = last.tv_usec = 0; gettimeofday(&start, NULL); - while (len) { + while (len > 0) { i64 chunk, limit = 0; double pct_base, pct_multiple; chunk = chunk_window; - if (chunk > len) - limit = chunk = len; + if (chunk > len) { + chunk = len; + round_to_page_size(&chunk); + limit = chunk; + } pct_base = (100.0 * (s.st_size - len)) / s.st_size; pct_multiple = ((double)chunk) / s.st_size; diff --git a/rzip.h b/rzip.h index aa4ec7f..b94a6b8 100644 --- a/rzip.h +++ b/rzip.h @@ -18,8 +18,8 @@ */ #define LRZIP_MAJOR_VERSION 0 -#define LRZIP_MINOR_VERSION 4 -#define LRZIP_MINOR_SUBVERSION 7 +#define LRZIP_MINOR_VERSION 5 +#define LRZIP_MINOR_SUBVERSION 0 #define NUM_STREAMS 2 diff --git a/stream.c b/stream.c index 7fb257d..31f4371 100644 --- a/stream.c +++ b/stream.c @@ -802,7 +802,7 @@ static int flush_buffer(struct stream_info *sinfo, int stream) free(sinfo->s[stream].buf); sinfo->s[stream].buf = malloc(sinfo->bufsize); if (!sinfo->s[stream].buf) - return -1; + fatal("Failed to malloc in flush_buffer\n"); return 0; }