mirror of
https://github.com/ckolivas/lrzip.git
synced 2025-12-06 07:12:00 +01:00
Bump version number to 0.542.
Choose sane defaults for memory usage since linux ludicriously overcommits. Use sliding mmap for any compression windows greater than 2/3 ram. Consolidate and simplify testing of allocatable ram. Minor tweaks to output. Round up the size of the high buffer in sliding mmap to one page. Squeeze a little more out of 32 bit compression windows.
This commit is contained in:
parent
25e053ed49
commit
75e675e6dd
20
ChangeLog
20
ChangeLog
|
|
@ -1,4 +1,18 @@
|
|||
lrzip ChangeLog
|
||||
NOVEMBER 2010, version 0.542 Con Kolivas
|
||||
* Choose sane defaults for memory usage since linux ludicriously overcommits.
|
||||
* Use sliding mmap for any compression windows greater than 2/3 ram.
|
||||
* Consolidate and simplify testing of allocatable ram.
|
||||
* Minor tweaks to output.
|
||||
* Round up the size of the high buffer in sliding mmap to one page.
|
||||
* Squeeze a little more out of 32 bit compression windows.
|
||||
|
||||
NOVEMBER 2010, version 0.541 Con Kolivas
|
||||
* Fix wrong number of passes reported.
|
||||
* Re-fix the off-by-one that wasn't off-by-one.
|
||||
* Limit lzma compression windows to 300MB as per reports of failures with larger
|
||||
windows.
|
||||
|
||||
NOVEMBER 2010, version 0.540 Con Kolivas
|
||||
* Massive rewrite of backend decompression phse, implementing multithreading.
|
||||
This is done by taking each stream of data on read in into separate buffers for
|
||||
|
|
@ -7,9 +21,11 @@ into runzip once it is requests more of the stream. Provided there are enough
|
|||
chunks in the originally compressed data, this provides a massive speedup
|
||||
potentially proportional to the number of CPUs. The slower the backend
|
||||
compression, the better the speed up (i.e. zpaq is the best sped up).
|
||||
* Fix the output of zpaq compress and decompress from trampling on itself and racing and consuming a lot of CPU time printing to the console.
|
||||
* Fix the output of zpaq compress and decompress from trampling on itself and
|
||||
racing and consuming a lot of CPU time printing to the console.
|
||||
* When limiting cwindow to 6 on 32 bits, ensure that control.window is also set.
|
||||
* When testing for the maximum size of testmalloc, the multiple used was out by one, so increase it.
|
||||
* When testing for the maximum size of testmalloc, the multiple used was out by
|
||||
one, so increase it.
|
||||
* Minor output tweaks.
|
||||
* Build warning fixes.
|
||||
* Updated benchmarks.
|
||||
|
|
|
|||
11
WHATS-NEW
11
WHATS-NEW
|
|
@ -1,3 +1,14 @@
|
|||
lrzip-0.542
|
||||
|
||||
Lrzip will now try to select sane defaults for memory usage in cases where the
|
||||
virtual memory heavily overcommits (eg. Linux) as this seriously slows down
|
||||
compression.
|
||||
For compression windows larger than 2/3 ram, lrzip will now use a sliding mmap
|
||||
buffer for better performance.
|
||||
The progress output is more informative in max verbose mode, and will no longer
|
||||
do more passes than it estimates.
|
||||
32 bit machines should be able to use slightly larger windows.
|
||||
|
||||
lrzip-0.540
|
||||
|
||||
MASSIVE MULTITHREADING on the decompression phase. Provided there are enough
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
dnl Process this file with autoconf to produce a configure script.
|
||||
AC_INIT([lrzip],[0.541],[kernel@kolivas.org],[lrzip-0.541])
|
||||
AC_INIT([lrzip],[0.542],[kernel@kolivas.org],[lrzip-0.542])
|
||||
AC_CONFIG_HEADER(config.h)
|
||||
# see what our system is!
|
||||
AC_CANONICAL_HOST
|
||||
|
|
|
|||
134
rzip.c
134
rzip.c
|
|
@ -119,7 +119,7 @@ static void remap_low_sb(void)
|
|||
top = 1;
|
||||
}
|
||||
round_to_page(&new_offset);
|
||||
print_maxverbose("Sliding main buffer \n");
|
||||
print_maxverbose("Sliding main buffer to offset %lld\n", new_offset);
|
||||
if (unlikely(munmap(sb.buf_low, sb.size_low)))
|
||||
fatal("Failed to munmap in remap_low_sb\n");
|
||||
sb.offset_low = new_offset;
|
||||
|
|
@ -673,8 +673,11 @@ static void mmap_stdin(uchar *buf, struct rzip_state *st)
|
|||
static void init_sliding_mmap(struct rzip_state *st, int fd_in, i64 offset)
|
||||
{
|
||||
/* Initialise the high buffer */
|
||||
if (UNLIMITED) {
|
||||
if (!STDIN) {
|
||||
sb.high_length = 65536;
|
||||
/* Round up to the next biggest page size */
|
||||
if (sb.high_length % control.page_size)
|
||||
sb.high_length += control.page_size - (sb.high_length % control.page_size);
|
||||
sb.buf_high = (uchar *)mmap(NULL, sb.high_length, PROT_READ, MAP_SHARED, fd_in, offset);
|
||||
if (unlikely(sb.buf_high == MAP_FAILED))
|
||||
fatal("Unable to mmap buf_high in init_sliding_mmap\n");
|
||||
|
|
@ -699,13 +702,13 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset,
|
|||
if (unlikely(!st->ss))
|
||||
fatal("Failed to open streams in rzip_chunk\n");
|
||||
|
||||
print_verbose("Performing rzip pre-processing phase\n");
|
||||
print_verbose("Beginning rzip pre-processing phase\n");
|
||||
hash_search(st, pct_base, pct_multiple);
|
||||
|
||||
/* unmap buffer before closing and reallocating streams */
|
||||
if (unlikely(munmap(sb.buf_low, sb.size_low)))
|
||||
fatal("Failed to munmap in rzip_chunk\n");
|
||||
if (UNLIMITED) {
|
||||
if (!STDIN) {
|
||||
if (unlikely(munmap(sb.buf_high, sb.size_high)))
|
||||
fatal("Failed to munmap in rzip_chunk\n");
|
||||
}
|
||||
|
|
@ -753,17 +756,34 @@ void rzip_fd(int fd_in, int fd_out)
|
|||
} else
|
||||
control.st_size = 0;
|
||||
|
||||
/* Optimal use of ram involves no more than 2/3 of it, so if we
|
||||
* expressly request more with -M or -U, use a sliding mmap */
|
||||
control.max_mmap = control.ramsize / 3 * 2;
|
||||
if (MAXRAM)
|
||||
control.max_chunk = control.ramsize;
|
||||
else
|
||||
control.max_chunk = control.max_mmap;
|
||||
|
||||
/* On 32 bits we can have a big window with sliding mmap, but can
|
||||
* not enable much per mmap/malloc */
|
||||
if (BITS32)
|
||||
control.max_mmap = MIN(control.max_mmap, two_gig / 3);
|
||||
round_to_page(&control.max_chunk);
|
||||
round_to_page(&control.max_mmap);
|
||||
if (UNLIMITED)
|
||||
control.max_chunk = control.st_size;
|
||||
|
||||
if (control.window)
|
||||
chunk_window = control.window * CHUNK_MULTIPLE;
|
||||
else {
|
||||
if (STDIN)
|
||||
chunk_window = control.ramsize;
|
||||
else
|
||||
chunk_window = len;
|
||||
}
|
||||
if (chunk_window < len)
|
||||
chunk_window -= chunk_window % control.page_size;
|
||||
st->chunk_size = chunk_window;
|
||||
else
|
||||
chunk_window = control.max_chunk;
|
||||
|
||||
if (!STDIN)
|
||||
st->chunk_size = MIN(chunk_window, len);
|
||||
else
|
||||
st->chunk_size = chunk_window;
|
||||
if (st->chunk_size < len)
|
||||
round_to_page(&st->chunk_size);
|
||||
|
||||
st->level = &levels[control.compression_level];
|
||||
st->fd_in = fd_in;
|
||||
|
|
@ -783,69 +803,53 @@ void rzip_fd(int fd_in, int fd_out)
|
|||
i64 offset = s.st_size - len;
|
||||
int bits = 8;
|
||||
|
||||
/* Flushing the dirty data will decrease our chances of
|
||||
* running out of memory when we allocate ram again on the
|
||||
* next chunk. It will also prevent thrashing on-disk due to
|
||||
* concurrent reads and writes if we're on the same device. */
|
||||
if (last_chunk)
|
||||
print_verbose("Flushing data to disk.\n");
|
||||
fsync(fd_out);
|
||||
|
||||
if (st->chunk_size > len && !STDIN)
|
||||
st->chunk_size = len;
|
||||
st->mmap_size = st->chunk_size;
|
||||
if (BITS32 && st->mmap_size > two_gig) {
|
||||
print_verbose("Limiting to 2GB due to 32 bit limitations\n");
|
||||
st->mmap_size = two_gig;
|
||||
st->chunk_size = control.max_chunk;
|
||||
st->mmap_size = control.max_mmap;
|
||||
if (!STDIN) {
|
||||
st->chunk_size = MIN(st->chunk_size, len);
|
||||
st->mmap_size = MIN(st->mmap_size, len);
|
||||
}
|
||||
|
||||
retry:
|
||||
/* Mmapping anonymously first will tell us how much ram we can use in
|
||||
* advance and zeroes it which has a defragmenting effect on ram
|
||||
* before the real read in. */
|
||||
sb.buf_low = mmap(NULL, st->mmap_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
/* Better to shrink the window to the largest size that works than fail */
|
||||
if (sb.buf_low == MAP_FAILED) {
|
||||
st->mmap_size = st->mmap_size / 10 * 9;
|
||||
st->mmap_size -= st->mmap_size % control.page_size;
|
||||
if (unlikely(!st->mmap_size))
|
||||
fatal("Unable to mmap any ram\n");
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* NOTE the buf is saved here for STDIN mode */
|
||||
if (!STDIN) {
|
||||
if (unlikely(munmap(sb.buf_low, st->mmap_size)))
|
||||
fatal("Failed to munmap\n");
|
||||
}
|
||||
|
||||
if (!MAXRAM) {
|
||||
print_maxverbose("Succeeded in allocating %lld sized mmap\n", st->mmap_size);
|
||||
if (!UNLIMITED)
|
||||
st->chunk_size = st->mmap_size;
|
||||
} else
|
||||
st->mmap_size = st->chunk_size;
|
||||
|
||||
if (!STDIN) {
|
||||
/* The buf is saved here for !STDIN mode */
|
||||
sb.buf_low = (uchar *)mmap(sb.buf_low, st->mmap_size, PROT_READ, MAP_SHARED, fd_in, offset);
|
||||
if (STDIN) {
|
||||
/* NOTE the buf is saved here for STDIN mode */
|
||||
sb.buf_low = mmap(NULL, st->mmap_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
||||
/* Better to shrink the window to the largest size that works than fail */
|
||||
if (sb.buf_low == MAP_FAILED) {
|
||||
if (unlikely(!MAXRAM))
|
||||
fatal("Failed to remap ram\n");
|
||||
st->mmap_size = st->mmap_size / 10 * 9;
|
||||
st->mmap_size -= st->mmap_size % control.page_size;
|
||||
round_to_page(&st->mmap_size);
|
||||
if (unlikely(!st->mmap_size))
|
||||
fatal("Unable to mmap any ram\n");
|
||||
goto retry;
|
||||
}
|
||||
} else
|
||||
mmap_stdin(sb.buf_low, st);
|
||||
} else {
|
||||
/* NOTE the buf is saved here for !STDIN mode */
|
||||
if (st->mmap_size < st->chunk_size)
|
||||
print_maxverbose("Enabling sliding mmap mode and using mmap of %lld bytes with window of %lld bytes\n", st->mmap_size, st->chunk_size);
|
||||
|
||||
if (MAXRAM)
|
||||
print_maxverbose("Succeeded in allocating %lld sized mmap\n", st->mmap_size);
|
||||
/* The buf is saved here for !STDIN mode */
|
||||
sb.buf_low = (uchar *)mmap(sb.buf_low, st->mmap_size, PROT_READ, MAP_SHARED, fd_in, offset);
|
||||
if (sb.buf_low == MAP_FAILED) {
|
||||
st->mmap_size = st->mmap_size / 10 * 9;
|
||||
round_to_page(&st->mmap_size);
|
||||
if (unlikely(!st->mmap_size))
|
||||
fatal("Unable to mmap any ram\n");
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
print_maxverbose("Succeeded in testing %lld sized mmap for rzip pre-processing\n", st->mmap_size);
|
||||
|
||||
if (st->mmap_size < st->chunk_size)
|
||||
print_verbose("Compression window is larger than ram allocated, will proceed with unlimited mode possibly much slower\n");
|
||||
if (st->chunk_size > control.ramsize)
|
||||
print_verbose("Compression window is larger than ram, will proceed with unlimited mode possibly much slower\n");
|
||||
|
||||
if (!passes && !STDIN) {
|
||||
passes = s.st_size / st->chunk_size + !!(s.st_size % st->chunk_size);
|
||||
if (passes == 1)
|
||||
print_verbose("Will take 1 pass\n");
|
||||
else
|
||||
print_verbose("Will take %d passes\n", passes);
|
||||
}
|
||||
|
||||
sb.orig_offset = offset;
|
||||
print_maxverbose("Chunk size: %lld\n", st->chunk_size);
|
||||
|
|
@ -871,8 +875,6 @@ retry:
|
|||
gettimeofday(¤t, NULL);
|
||||
/* this will count only when size > window */
|
||||
if (last.tv_sec > 0) {
|
||||
if (!passes)
|
||||
passes = s.st_size / st->chunk_size;
|
||||
elapsed_time = current.tv_sec - start.tv_sec;
|
||||
finish_time = elapsed_time / (pct_base / 100.0);
|
||||
elapsed_hours = (unsigned int)(elapsed_time) / 3600;
|
||||
|
|
|
|||
9
rzip.h
9
rzip.h
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
#define LRZIP_MAJOR_VERSION 0
|
||||
#define LRZIP_MINOR_VERSION 5
|
||||
#define LRZIP_MINOR_SUBVERSION 41
|
||||
#define LRZIP_MINOR_SUBVERSION 42
|
||||
|
||||
#define NUM_STREAMS 2
|
||||
|
||||
|
|
@ -121,7 +121,6 @@ extern int errno;
|
|||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
|
||||
typedef unsigned long long u64;
|
||||
typedef long long int i64;
|
||||
typedef uint16_t u16;
|
||||
typedef uint32_t u32;
|
||||
|
|
@ -231,9 +230,11 @@ struct rzip_control {
|
|||
int compression_level;
|
||||
unsigned char lzma_properties[5]; // lzma properties, encoded
|
||||
double threshold;
|
||||
unsigned long long window;
|
||||
i64 window;
|
||||
unsigned long flags;
|
||||
unsigned long long ramsize;
|
||||
i64 ramsize;
|
||||
i64 max_chunk;
|
||||
i64 max_mmap;
|
||||
int threads;
|
||||
int nice_val; // added for consistency
|
||||
int major_version;
|
||||
|
|
|
|||
31
stream.c
31
stream.c
|
|
@ -644,6 +644,7 @@ static int seekto(struct stream_info *sinfo, i64 pos)
|
|||
}
|
||||
|
||||
static pthread_t *threads;
|
||||
extern const i64 two_gig;
|
||||
|
||||
/* open a set of output streams, compressing with the given
|
||||
compression level and algorithm */
|
||||
|
|
@ -651,7 +652,6 @@ void *open_stream_out(int f, int n, i64 limit)
|
|||
{
|
||||
struct stream_info *sinfo;
|
||||
uchar *testmalloc;
|
||||
unsigned cwindow;
|
||||
int i;
|
||||
|
||||
sinfo = malloc(sizeof(*sinfo));
|
||||
|
|
@ -689,21 +689,9 @@ void *open_stream_out(int f, int n, i64 limit)
|
|||
sinfo->cur_pos = 0;
|
||||
sinfo->fd = f;
|
||||
|
||||
if (BITS32) {
|
||||
/* Largest window we can safely support on 32bit is 2GB */
|
||||
if (!control.window || control.window > 20)
|
||||
control.window = 20;
|
||||
/* Largest window supported by lzma is 300MB */
|
||||
if (LZMA_COMPRESS && control.window > 3)
|
||||
control.window = 3;
|
||||
}
|
||||
cwindow = control.window;
|
||||
|
||||
/* No point making the stream larger than the amount of data */
|
||||
if (cwindow)
|
||||
sinfo->bufsize = MIN(STREAM_BUFSIZE * 10 * cwindow, limit);
|
||||
else
|
||||
sinfo->bufsize = limit;
|
||||
/* Serious limits imposed on 32 bit capabilities */
|
||||
if (BITS32)
|
||||
limit = MIN(limit, two_gig / 3);
|
||||
|
||||
sinfo->initial_pos = lseek(f, 0, SEEK_CUR);
|
||||
|
||||
|
|
@ -717,12 +705,19 @@ void *open_stream_out(int f, int n, i64 limit)
|
|||
* ram. We need enough for the 2 streams and for the compression
|
||||
* backend at most, being conservative. */
|
||||
retest_malloc:
|
||||
testmalloc = malloc(sinfo->bufsize * (n + 1));
|
||||
testmalloc = malloc(limit * (n + 1));
|
||||
if (!testmalloc) {
|
||||
sinfo->bufsize = sinfo->bufsize / 10 * 9;
|
||||
limit = limit / 10 * 9;
|
||||
goto retest_malloc;
|
||||
}
|
||||
free(testmalloc);
|
||||
print_maxverbose("Succeeded in testing %lld sized malloc for back end compression\n", limit * (n + 1));
|
||||
|
||||
/* Largest window supported by lzma is 300MB */
|
||||
if (LZMA_COMPRESS)
|
||||
limit = MIN(limit, 3 * STREAM_BUFSIZE * 10);
|
||||
|
||||
sinfo->bufsize = limit;
|
||||
|
||||
/* Make the bufsize no smaller than STREAM_BUFSIZE. Round up the
|
||||
* bufsize to fit X threads into it */
|
||||
|
|
|
|||
Loading…
Reference in a new issue