Bump version number to 0.542.

Choose sane defaults for memory usage since linux ludicriously overcommits.
Use sliding mmap for any compression windows greater than 2/3 ram.
Consolidate and simplify testing of allocatable ram.
Minor tweaks to output.
Round up the size of the high buffer in sliding mmap to one page.
Squeeze a little more out of 32 bit compression windows.
This commit is contained in:
Con Kolivas 2010-11-20 01:23:08 +11:00
parent 25e053ed49
commit 75e675e6dd
6 changed files with 116 additions and 91 deletions

View file

@ -1,4 +1,18 @@
lrzip ChangeLog
NOVEMBER 2010, version 0.542 Con Kolivas
* Choose sane defaults for memory usage since linux ludicriously overcommits.
* Use sliding mmap for any compression windows greater than 2/3 ram.
* Consolidate and simplify testing of allocatable ram.
* Minor tweaks to output.
* Round up the size of the high buffer in sliding mmap to one page.
* Squeeze a little more out of 32 bit compression windows.
NOVEMBER 2010, version 0.541 Con Kolivas
* Fix wrong number of passes reported.
* Re-fix the off-by-one that wasn't off-by-one.
* Limit lzma compression windows to 300MB as per reports of failures with larger
windows.
NOVEMBER 2010, version 0.540 Con Kolivas
* Massive rewrite of backend decompression phse, implementing multithreading.
This is done by taking each stream of data on read in into separate buffers for
@ -7,9 +21,11 @@ into runzip once it is requests more of the stream. Provided there are enough
chunks in the originally compressed data, this provides a massive speedup
potentially proportional to the number of CPUs. The slower the backend
compression, the better the speed up (i.e. zpaq is the best sped up).
* Fix the output of zpaq compress and decompress from trampling on itself and racing and consuming a lot of CPU time printing to the console.
* Fix the output of zpaq compress and decompress from trampling on itself and
racing and consuming a lot of CPU time printing to the console.
* When limiting cwindow to 6 on 32 bits, ensure that control.window is also set.
* When testing for the maximum size of testmalloc, the multiple used was out by one, so increase it.
* When testing for the maximum size of testmalloc, the multiple used was out by
one, so increase it.
* Minor output tweaks.
* Build warning fixes.
* Updated benchmarks.

View file

@ -1,3 +1,14 @@
lrzip-0.542
Lrzip will now try to select sane defaults for memory usage in cases where the
virtual memory heavily overcommits (eg. Linux) as this seriously slows down
compression.
For compression windows larger than 2/3 ram, lrzip will now use a sliding mmap
buffer for better performance.
The progress output is more informative in max verbose mode, and will no longer
do more passes than it estimates.
32 bit machines should be able to use slightly larger windows.
lrzip-0.540
MASSIVE MULTITHREADING on the decompression phase. Provided there are enough

View file

@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
AC_INIT([lrzip],[0.541],[kernel@kolivas.org],[lrzip-0.541])
AC_INIT([lrzip],[0.542],[kernel@kolivas.org],[lrzip-0.542])
AC_CONFIG_HEADER(config.h)
# see what our system is!
AC_CANONICAL_HOST

134
rzip.c
View file

@ -119,7 +119,7 @@ static void remap_low_sb(void)
top = 1;
}
round_to_page(&new_offset);
print_maxverbose("Sliding main buffer \n");
print_maxverbose("Sliding main buffer to offset %lld\n", new_offset);
if (unlikely(munmap(sb.buf_low, sb.size_low)))
fatal("Failed to munmap in remap_low_sb\n");
sb.offset_low = new_offset;
@ -673,8 +673,11 @@ static void mmap_stdin(uchar *buf, struct rzip_state *st)
static void init_sliding_mmap(struct rzip_state *st, int fd_in, i64 offset)
{
/* Initialise the high buffer */
if (UNLIMITED) {
if (!STDIN) {
sb.high_length = 65536;
/* Round up to the next biggest page size */
if (sb.high_length % control.page_size)
sb.high_length += control.page_size - (sb.high_length % control.page_size);
sb.buf_high = (uchar *)mmap(NULL, sb.high_length, PROT_READ, MAP_SHARED, fd_in, offset);
if (unlikely(sb.buf_high == MAP_FAILED))
fatal("Unable to mmap buf_high in init_sliding_mmap\n");
@ -699,13 +702,13 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset,
if (unlikely(!st->ss))
fatal("Failed to open streams in rzip_chunk\n");
print_verbose("Performing rzip pre-processing phase\n");
print_verbose("Beginning rzip pre-processing phase\n");
hash_search(st, pct_base, pct_multiple);
/* unmap buffer before closing and reallocating streams */
if (unlikely(munmap(sb.buf_low, sb.size_low)))
fatal("Failed to munmap in rzip_chunk\n");
if (UNLIMITED) {
if (!STDIN) {
if (unlikely(munmap(sb.buf_high, sb.size_high)))
fatal("Failed to munmap in rzip_chunk\n");
}
@ -753,17 +756,34 @@ void rzip_fd(int fd_in, int fd_out)
} else
control.st_size = 0;
/* Optimal use of ram involves no more than 2/3 of it, so if we
* expressly request more with -M or -U, use a sliding mmap */
control.max_mmap = control.ramsize / 3 * 2;
if (MAXRAM)
control.max_chunk = control.ramsize;
else
control.max_chunk = control.max_mmap;
/* On 32 bits we can have a big window with sliding mmap, but can
* not enable much per mmap/malloc */
if (BITS32)
control.max_mmap = MIN(control.max_mmap, two_gig / 3);
round_to_page(&control.max_chunk);
round_to_page(&control.max_mmap);
if (UNLIMITED)
control.max_chunk = control.st_size;
if (control.window)
chunk_window = control.window * CHUNK_MULTIPLE;
else {
if (STDIN)
chunk_window = control.ramsize;
else
chunk_window = len;
}
if (chunk_window < len)
chunk_window -= chunk_window % control.page_size;
st->chunk_size = chunk_window;
else
chunk_window = control.max_chunk;
if (!STDIN)
st->chunk_size = MIN(chunk_window, len);
else
st->chunk_size = chunk_window;
if (st->chunk_size < len)
round_to_page(&st->chunk_size);
st->level = &levels[control.compression_level];
st->fd_in = fd_in;
@ -783,69 +803,53 @@ void rzip_fd(int fd_in, int fd_out)
i64 offset = s.st_size - len;
int bits = 8;
/* Flushing the dirty data will decrease our chances of
* running out of memory when we allocate ram again on the
* next chunk. It will also prevent thrashing on-disk due to
* concurrent reads and writes if we're on the same device. */
if (last_chunk)
print_verbose("Flushing data to disk.\n");
fsync(fd_out);
if (st->chunk_size > len && !STDIN)
st->chunk_size = len;
st->mmap_size = st->chunk_size;
if (BITS32 && st->mmap_size > two_gig) {
print_verbose("Limiting to 2GB due to 32 bit limitations\n");
st->mmap_size = two_gig;
st->chunk_size = control.max_chunk;
st->mmap_size = control.max_mmap;
if (!STDIN) {
st->chunk_size = MIN(st->chunk_size, len);
st->mmap_size = MIN(st->mmap_size, len);
}
retry:
/* Mmapping anonymously first will tell us how much ram we can use in
* advance and zeroes it which has a defragmenting effect on ram
* before the real read in. */
sb.buf_low = mmap(NULL, st->mmap_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
/* Better to shrink the window to the largest size that works than fail */
if (sb.buf_low == MAP_FAILED) {
st->mmap_size = st->mmap_size / 10 * 9;
st->mmap_size -= st->mmap_size % control.page_size;
if (unlikely(!st->mmap_size))
fatal("Unable to mmap any ram\n");
goto retry;
}
/* NOTE the buf is saved here for STDIN mode */
if (!STDIN) {
if (unlikely(munmap(sb.buf_low, st->mmap_size)))
fatal("Failed to munmap\n");
}
if (!MAXRAM) {
print_maxverbose("Succeeded in allocating %lld sized mmap\n", st->mmap_size);
if (!UNLIMITED)
st->chunk_size = st->mmap_size;
} else
st->mmap_size = st->chunk_size;
if (!STDIN) {
/* The buf is saved here for !STDIN mode */
sb.buf_low = (uchar *)mmap(sb.buf_low, st->mmap_size, PROT_READ, MAP_SHARED, fd_in, offset);
if (STDIN) {
/* NOTE the buf is saved here for STDIN mode */
sb.buf_low = mmap(NULL, st->mmap_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
/* Better to shrink the window to the largest size that works than fail */
if (sb.buf_low == MAP_FAILED) {
if (unlikely(!MAXRAM))
fatal("Failed to remap ram\n");
st->mmap_size = st->mmap_size / 10 * 9;
st->mmap_size -= st->mmap_size % control.page_size;
round_to_page(&st->mmap_size);
if (unlikely(!st->mmap_size))
fatal("Unable to mmap any ram\n");
goto retry;
}
} else
mmap_stdin(sb.buf_low, st);
} else {
/* NOTE the buf is saved here for !STDIN mode */
if (st->mmap_size < st->chunk_size)
print_maxverbose("Enabling sliding mmap mode and using mmap of %lld bytes with window of %lld bytes\n", st->mmap_size, st->chunk_size);
if (MAXRAM)
print_maxverbose("Succeeded in allocating %lld sized mmap\n", st->mmap_size);
/* The buf is saved here for !STDIN mode */
sb.buf_low = (uchar *)mmap(sb.buf_low, st->mmap_size, PROT_READ, MAP_SHARED, fd_in, offset);
if (sb.buf_low == MAP_FAILED) {
st->mmap_size = st->mmap_size / 10 * 9;
round_to_page(&st->mmap_size);
if (unlikely(!st->mmap_size))
fatal("Unable to mmap any ram\n");
goto retry;
}
}
print_maxverbose("Succeeded in testing %lld sized mmap for rzip pre-processing\n", st->mmap_size);
if (st->mmap_size < st->chunk_size)
print_verbose("Compression window is larger than ram allocated, will proceed with unlimited mode possibly much slower\n");
if (st->chunk_size > control.ramsize)
print_verbose("Compression window is larger than ram, will proceed with unlimited mode possibly much slower\n");
if (!passes && !STDIN) {
passes = s.st_size / st->chunk_size + !!(s.st_size % st->chunk_size);
if (passes == 1)
print_verbose("Will take 1 pass\n");
else
print_verbose("Will take %d passes\n", passes);
}
sb.orig_offset = offset;
print_maxverbose("Chunk size: %lld\n", st->chunk_size);
@ -871,8 +875,6 @@ retry:
gettimeofday(&current, NULL);
/* this will count only when size > window */
if (last.tv_sec > 0) {
if (!passes)
passes = s.st_size / st->chunk_size;
elapsed_time = current.tv_sec - start.tv_sec;
finish_time = elapsed_time / (pct_base / 100.0);
elapsed_hours = (unsigned int)(elapsed_time) / 3600;

9
rzip.h
View file

@ -19,7 +19,7 @@
#define LRZIP_MAJOR_VERSION 0
#define LRZIP_MINOR_VERSION 5
#define LRZIP_MINOR_SUBVERSION 41
#define LRZIP_MINOR_SUBVERSION 42
#define NUM_STREAMS 2
@ -121,7 +121,6 @@ extern int errno;
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
typedef unsigned long long u64;
typedef long long int i64;
typedef uint16_t u16;
typedef uint32_t u32;
@ -231,9 +230,11 @@ struct rzip_control {
int compression_level;
unsigned char lzma_properties[5]; // lzma properties, encoded
double threshold;
unsigned long long window;
i64 window;
unsigned long flags;
unsigned long long ramsize;
i64 ramsize;
i64 max_chunk;
i64 max_mmap;
int threads;
int nice_val; // added for consistency
int major_version;

View file

@ -644,6 +644,7 @@ static int seekto(struct stream_info *sinfo, i64 pos)
}
static pthread_t *threads;
extern const i64 two_gig;
/* open a set of output streams, compressing with the given
compression level and algorithm */
@ -651,7 +652,6 @@ void *open_stream_out(int f, int n, i64 limit)
{
struct stream_info *sinfo;
uchar *testmalloc;
unsigned cwindow;
int i;
sinfo = malloc(sizeof(*sinfo));
@ -689,21 +689,9 @@ void *open_stream_out(int f, int n, i64 limit)
sinfo->cur_pos = 0;
sinfo->fd = f;
if (BITS32) {
/* Largest window we can safely support on 32bit is 2GB */
if (!control.window || control.window > 20)
control.window = 20;
/* Largest window supported by lzma is 300MB */
if (LZMA_COMPRESS && control.window > 3)
control.window = 3;
}
cwindow = control.window;
/* No point making the stream larger than the amount of data */
if (cwindow)
sinfo->bufsize = MIN(STREAM_BUFSIZE * 10 * cwindow, limit);
else
sinfo->bufsize = limit;
/* Serious limits imposed on 32 bit capabilities */
if (BITS32)
limit = MIN(limit, two_gig / 3);
sinfo->initial_pos = lseek(f, 0, SEEK_CUR);
@ -717,12 +705,19 @@ void *open_stream_out(int f, int n, i64 limit)
* ram. We need enough for the 2 streams and for the compression
* backend at most, being conservative. */
retest_malloc:
testmalloc = malloc(sinfo->bufsize * (n + 1));
testmalloc = malloc(limit * (n + 1));
if (!testmalloc) {
sinfo->bufsize = sinfo->bufsize / 10 * 9;
limit = limit / 10 * 9;
goto retest_malloc;
}
free(testmalloc);
print_maxverbose("Succeeded in testing %lld sized malloc for back end compression\n", limit * (n + 1));
/* Largest window supported by lzma is 300MB */
if (LZMA_COMPRESS)
limit = MIN(limit, 3 * STREAM_BUFSIZE * 10);
sinfo->bufsize = limit;
/* Make the bufsize no smaller than STREAM_BUFSIZE. Round up the
* bufsize to fit X threads into it */