mirror of
https://github.com/ckolivas/lrzip.git
synced 2025-12-06 07:12:00 +01:00
Add more robust checking.
Premalloc ram to improve early detection of being unable to allocate that much ram. Make sure to always make chunk size a multiple of page size for mmap to work. Begin changes to make variable byte width offsets in rzip chunks. Decrease header entries to only 2 byte wide as per original rzip. Random other tidying.
This commit is contained in:
parent
d396a8a360
commit
c5da3a1adb
|
|
@ -1,3 +1,8 @@
|
|||
lrzip-0.50 update
|
||||
|
||||
All files created with lrzip 0.50+ are not backward compatible with versions
|
||||
prior to 0.50. v0.50 can read earlier generated files.
|
||||
|
||||
lrzip-0.41 update
|
||||
|
||||
Files created with lrzip 0.41 and selecting the -z option for
|
||||
|
|
@ -17,7 +22,7 @@ Con Kolivas November 2009.
|
|||
lrzip-0.24 update!
|
||||
|
||||
FILES CREATED WITH LRZIP 0.23 and earlier are NOT
|
||||
BACKWARE COMPATIBLE if compressed with LZMA.
|
||||
BACKWARD COMPATIBLE if compressed with LZMA.
|
||||
|
||||
All other compression schemes are compatible.
|
||||
|
||||
|
|
|
|||
95
runzip.c
95
runzip.c
|
|
@ -28,15 +28,6 @@ static inline uchar read_u8(void *ss, int stream)
|
|||
return b;
|
||||
}
|
||||
|
||||
static inline u16 read_u16(void *ss, int stream)
|
||||
{
|
||||
u16 ret;
|
||||
|
||||
if (read_stream(ss, stream, (uchar *)&ret, 2) != 2)
|
||||
fatal("Stream read u16 failed\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32 read_u32(void *ss, int stream)
|
||||
{
|
||||
u32 ret;
|
||||
|
|
@ -45,28 +36,30 @@ static inline u32 read_u32(void *ss, int stream)
|
|||
fatal("Stream read u32 failed\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline i64 read_i64(void *ss, int stream)
|
||||
/* Read a variable length of chars dependant on how big the chunk was */
|
||||
static inline i64 read_vchars(void *ss, int stream, int length)
|
||||
{
|
||||
i64 ret;
|
||||
int bytes;
|
||||
i64 s = 0;
|
||||
|
||||
if (read_stream(ss, stream, (uchar *)&ret, 8) != 8)
|
||||
fatal("Stream read i64 failed\n");
|
||||
return ret;
|
||||
}
|
||||
for (bytes = 0; bytes < length; bytes++) {
|
||||
int bits = bytes * 8;
|
||||
|
||||
static u16 read_header_v03(void *ss, uchar *head)
|
||||
{
|
||||
*head = read_u8(ss, 0);
|
||||
return read_u16(ss, 0);
|
||||
uchar sb = read_u8(ss, stream);
|
||||
s |= (i64)sb << bits;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
static i64 read_header(void *ss, uchar *head)
|
||||
{
|
||||
if (control.major_version == 0 && control.minor_version < 4)
|
||||
return read_header_v03(ss, head);
|
||||
int chunk_bytes = 2;
|
||||
|
||||
/* All chunks were unnecessarily encoded 8 bytes wide version 0.4x */
|
||||
if (control.major_version == 0 && control.minor_version == 4)
|
||||
chunk_bytes = 8;
|
||||
*head = read_u8(ss, 0);
|
||||
return read_i64(ss, 0);
|
||||
return read_vchars(ss, 0, chunk_bytes);
|
||||
}
|
||||
|
||||
static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
|
||||
|
|
@ -93,63 +86,29 @@ static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
|
|||
return len;
|
||||
}
|
||||
|
||||
static i64 unzip_match_v03(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
|
||||
{
|
||||
u32 offset;
|
||||
i64 n, total = 0;
|
||||
i64 cur_pos = lseek(fd_out, 0, SEEK_CUR);
|
||||
|
||||
if (cur_pos == -1)
|
||||
fatal("Seek failed on out file in unzip_match.\n");
|
||||
|
||||
offset = read_u32(ss, 0);
|
||||
|
||||
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
|
||||
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
|
||||
offset, cur_pos, strerror(errno));
|
||||
|
||||
while (len) {
|
||||
uchar *buf;
|
||||
n = MIN(len, offset);
|
||||
|
||||
buf = malloc((size_t)n);
|
||||
if (!buf)
|
||||
fatal("Failed to allocate %d bytes in unzip_match\n", n);
|
||||
|
||||
if (read_1g(fd_hist, buf, (size_t)n) != (ssize_t)n)
|
||||
fatal("Failed to read %d bytes in unzip_match\n", n);
|
||||
|
||||
if (write_1g(fd_out, buf, (size_t)n) != (ssize_t)n)
|
||||
fatal("Failed to write %d bytes in unzip_match\n", n);
|
||||
|
||||
*cksum = CrcUpdate(*cksum, buf, n);
|
||||
|
||||
len -= n;
|
||||
free(buf);
|
||||
total += n;
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
|
||||
{
|
||||
i64 cur_pos;
|
||||
i64 offset, n, total;
|
||||
i64 offset, n, total, cur_pos;
|
||||
int chunk_bytes = 8;
|
||||
|
||||
if (len < 0)
|
||||
fatal("len %lld is negative in unzip_match!\n",len);
|
||||
|
||||
if (control.major_version == 0 && control.minor_version < 4)
|
||||
return unzip_match_v03(ss, len, fd_out, fd_hist, cksum);
|
||||
|
||||
if (control.major_version == 0) {
|
||||
/* Versions < 0.4 used 4 bytes for all offsets, version 0.4 used 8 bytes.
|
||||
* Versions 0.5+ used a variable number of bytes depending on block size. */
|
||||
if (control.minor_version < 4)
|
||||
chunk_bytes = 4;
|
||||
else if (control.minor_version == 4)
|
||||
chunk_bytes = 8;
|
||||
}
|
||||
total = 0;
|
||||
cur_pos = lseek(fd_out, 0, SEEK_CUR);
|
||||
if (cur_pos == -1)
|
||||
fatal("Seek failed on out file in unzip_match.\n");
|
||||
|
||||
/* Note the offset is in a different format v0.40+ */
|
||||
offset = read_i64(ss, 0);
|
||||
offset = read_vchars(ss, 0, chunk_bytes);
|
||||
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
|
||||
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
|
||||
offset, cur_pos, strerror(errno));
|
||||
|
|
|
|||
74
rzip.c
74
rzip.c
|
|
@ -20,7 +20,6 @@
|
|||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
/* rzip compression algorithm */
|
||||
|
||||
#include "rzip.h"
|
||||
|
||||
#define CHUNK_MULTIPLE (100 * 1024 * 1024)
|
||||
|
|
@ -88,25 +87,32 @@ struct rzip_state {
|
|||
static inline void put_u8(void *ss, int stream, uchar b)
|
||||
{
|
||||
if (write_stream(ss, stream, &b, 1) != 0)
|
||||
fatal(NULL);
|
||||
fatal("Failed to put_u8\n");
|
||||
}
|
||||
|
||||
static inline void put_u32(void *ss, int stream, uint32_t s)
|
||||
{
|
||||
if (write_stream(ss, stream, (uchar *)&s, 4))
|
||||
fatal(NULL);
|
||||
fatal("Failed to put_u32\n");
|
||||
}
|
||||
|
||||
static inline void put_i64(void *ss, int stream, i64 s)
|
||||
/* Put a variable length of bytes dependant on how big the chunk is */
|
||||
static inline void put_vchars(void *ss, int stream, i64 s, int length)
|
||||
{
|
||||
if (write_stream(ss, stream, (uchar *)&s, 8))
|
||||
fatal(NULL);
|
||||
int bytes;
|
||||
|
||||
for (bytes = 0; bytes < length; bytes++) {
|
||||
int bits = bytes * 8;
|
||||
uchar sb = (s >> bits) & (i64)0XFF;
|
||||
|
||||
put_u8(ss, stream, sb);
|
||||
}
|
||||
}
|
||||
|
||||
static void put_header(void *ss, uchar head, i64 len)
|
||||
{
|
||||
put_u8(ss, 0, head);
|
||||
put_i64(ss, 0, len);
|
||||
put_vchars(ss, 0, len, 2);
|
||||
}
|
||||
|
||||
static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len)
|
||||
|
|
@ -114,10 +120,11 @@ static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i
|
|||
do {
|
||||
i64 ofs;
|
||||
i64 n = len;
|
||||
if (n > 0xFFFF) n = 0xFFFF;
|
||||
|
||||
ofs = (p - (buf+offset));
|
||||
ofs = (p - (buf + offset));
|
||||
put_header(st->ss, 1, n);
|
||||
put_i64(st->ss, 0, ofs);
|
||||
put_vchars(st->ss, 0, ofs, 8);
|
||||
|
||||
st->stats.matches++;
|
||||
st->stats.match_bytes += n;
|
||||
|
|
@ -131,6 +138,7 @@ static void put_literal(struct rzip_state *st, uchar *last, uchar *p)
|
|||
{
|
||||
do {
|
||||
i64 len = (i64)(p - last);
|
||||
if (len > 0xFFFF) len = 0xFFFF;
|
||||
|
||||
st->stats.literals++;
|
||||
st->stats.literal_bytes += len;
|
||||
|
|
@ -174,7 +182,7 @@ static int lesser_bitness(tag a, tag b)
|
|||
{
|
||||
tag mask;
|
||||
|
||||
for (mask = 0; mask != (tag)-1; mask = ((mask<<1)|1)) {
|
||||
for (mask = 0; mask != (tag) - 1; mask = ((mask << 1) | 1)) {
|
||||
if ((a & b & mask) != mask)
|
||||
break;
|
||||
}
|
||||
|
|
@ -491,7 +499,7 @@ static void hash_search(struct rzip_state *st, uchar *buf,
|
|||
show_distrib(st);
|
||||
|
||||
if (st->last_match < buf + st->chunk_size)
|
||||
put_literal(st, st->last_match,buf + st->chunk_size);
|
||||
put_literal(st, st->last_match, buf + st->chunk_size);
|
||||
|
||||
if (st->chunk_size > cksum_limit) {
|
||||
i64 n = st->chunk_size - cksum_limit;
|
||||
|
|
@ -519,21 +527,41 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset,
|
|||
{
|
||||
uchar *buf;
|
||||
|
||||
buf = (uchar *)mmap(NULL, st->chunk_size, PROT_READ, MAP_SHARED, fd_in, offset);
|
||||
/* Malloc'ing first will tell us if we can allocate this much ram
|
||||
* faster than slowly reading in the file and then failing. Filling
|
||||
* it with zeroes has a defragmenting effect on ram before the real
|
||||
* read in. */
|
||||
if (control.flags & FLAG_SHOW_PROGRESS)
|
||||
fprintf(control.msgout, "Allocating ram...\n");
|
||||
buf = malloc(st->chunk_size);
|
||||
if (!buf)
|
||||
fatal("Failed to premalloc in rzip_chunk\n");
|
||||
if (!memset(buf, 0, st->chunk_size))
|
||||
fatal("Failed to memset in rzip_chunk\n");
|
||||
free(buf);
|
||||
buf = (uchar *)mmap(buf, st->chunk_size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd_in, offset);
|
||||
if (buf == (uchar *)-1)
|
||||
fatal("Failed to map buffer in rzip_fd\n");
|
||||
fatal("Failed to map buffer in rzip_chunk\n");
|
||||
|
||||
st->ss = open_stream_out(fd_out, NUM_STREAMS, limit);
|
||||
if (!st->ss)
|
||||
fatal("Failed to open streams in rzip_fd\n");
|
||||
fatal("Failed to open streams in rzip_chunk\n");
|
||||
hash_search(st, buf, pct_base, pct_multiple);
|
||||
/* unmap buffer before closing and reallocating streams */
|
||||
munmap(buf, st->chunk_size);
|
||||
|
||||
if (close_stream_out(st->ss) != 0)
|
||||
fatal("Failed to flush/close streams in rzip_fd\n");
|
||||
fatal("Failed to flush/close streams in rzip_chunk\n");
|
||||
}
|
||||
|
||||
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
|
||||
static void round_to_page_size(i64 *chunk)
|
||||
{
|
||||
unsigned long page_size = sysconf(_SC_PAGE_SIZE);
|
||||
i64 pages = *chunk / page_size + 1;
|
||||
|
||||
*chunk = pages * page_size;
|
||||
}
|
||||
|
||||
/* compress a whole file chunks at a time */
|
||||
void rzip_fd(int fd_in, int fd_out)
|
||||
|
|
@ -549,9 +577,8 @@ void rzip_fd(int fd_in, int fd_out)
|
|||
struct stat s, s2;
|
||||
struct rzip_state *st;
|
||||
i64 len, last_chunk = 0;
|
||||
i64 chunk_window, pages;
|
||||
i64 chunk_window;
|
||||
int pass = 0, passes;
|
||||
unsigned long page_size;
|
||||
unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours,
|
||||
elapsed_minutes, elapsed_seconds;
|
||||
double finish_time, elapsed_time, chunkmbs;
|
||||
|
|
@ -572,9 +599,7 @@ void rzip_fd(int fd_in, int fd_out)
|
|||
|
||||
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
|
||||
chunk_window = control.window * CHUNK_MULTIPLE;
|
||||
page_size = sysconf(_SC_PAGE_SIZE);
|
||||
pages = chunk_window / page_size;
|
||||
chunk_window = pages * page_size;
|
||||
round_to_page_size(&chunk_window);
|
||||
|
||||
st->level = &levels[MIN(9, control.window)];
|
||||
st->fd_in = fd_in;
|
||||
|
|
@ -588,14 +613,17 @@ void rzip_fd(int fd_in, int fd_out)
|
|||
last.tv_sec = last.tv_usec = 0;
|
||||
gettimeofday(&start, NULL);
|
||||
|
||||
while (len) {
|
||||
while (len > 0) {
|
||||
i64 chunk, limit = 0;
|
||||
double pct_base, pct_multiple;
|
||||
|
||||
chunk = chunk_window;
|
||||
|
||||
if (chunk > len)
|
||||
limit = chunk = len;
|
||||
if (chunk > len) {
|
||||
chunk = len;
|
||||
round_to_page_size(&chunk);
|
||||
limit = chunk;
|
||||
}
|
||||
|
||||
pct_base = (100.0 * (s.st_size - len)) / s.st_size;
|
||||
pct_multiple = ((double)chunk) / s.st_size;
|
||||
|
|
|
|||
4
rzip.h
4
rzip.h
|
|
@ -18,8 +18,8 @@
|
|||
*/
|
||||
|
||||
#define LRZIP_MAJOR_VERSION 0
|
||||
#define LRZIP_MINOR_VERSION 4
|
||||
#define LRZIP_MINOR_SUBVERSION 7
|
||||
#define LRZIP_MINOR_VERSION 5
|
||||
#define LRZIP_MINOR_SUBVERSION 0
|
||||
|
||||
#define NUM_STREAMS 2
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue