diff --git a/main.c b/main.c index 6b967e4..355460b 100644 --- a/main.c +++ b/main.c @@ -997,6 +997,7 @@ int main(int argc, char *argv[]) /* Decrease usable ram size on 32 bits due to kernel/userspace split */ if (BITS32) control.ramsize = MAX(control.ramsize - 900000000ll, 900000000ll); + control.maxram = control.ramsize / 3; /* Set the main nice value to half that of the backend threads since * the rzip stage is usually the rate limiting step */ diff --git a/rzip.c b/rzip.c index 9a3dda3..774d4d6 100644 --- a/rzip.c +++ b/rzip.c @@ -780,7 +780,7 @@ void rzip_fd(int fd_in, int fd_out) * allocate 1/3 of it to the main buffer and use a sliding mmap * buffer to work on 2/3 ram size, leaving enough ram for the * compression backends */ - control.max_mmap = control.ramsize / 3; + control.max_mmap = control.maxram; /* On 32 bits we can have a big window with sliding mmap, but can * not enable much per mmap/malloc */ diff --git a/rzip.h b/rzip.h index 175e2aa..5895752 100644 --- a/rzip.h +++ b/rzip.h @@ -270,6 +270,7 @@ struct rzip_control { const char *suffix; int compression_level; i64 overhead; // compressor overhead + i64 maxram; // the largest chunk of ram to allocate unsigned char lzma_properties[5]; // lzma properties, encoded i64 window; unsigned long flags; diff --git a/stream.c b/stream.c index ff3b2a5..dbcd001 100644 --- a/stream.c +++ b/stream.c @@ -763,18 +763,18 @@ void close_streamout_threads(void) /* open a set of output streams, compressing with the given compression level and algorithm */ -void *open_stream_out(int f, int n, i64 limit, char cbytes) +void *open_stream_out(int f, int n, i64 chunk_limit, char cbytes) { struct stream_info *sinfo; + i64 testsize, limit; uchar *testmalloc; - i64 testsize; int i, testbufs; sinfo = calloc(sizeof(struct stream_info), 1); if (unlikely(!sinfo)) return NULL; - sinfo->bufsize = limit; + sinfo->bufsize = limit = chunk_limit; sinfo->chunk_bytes = cbytes; sinfo->num_streams = n; @@ -802,8 +802,19 @@ void *open_stream_out(int f, int n, i64 limit, char cbytes) (control.overhead * control.threads)); testsize = (limit * testbufs) + (control.overhead * control.threads); - if (testsize > control.ramsize / 3) - limit = (control.ramsize / 3 - (control.overhead * control.threads)) / testbufs; + if (testsize > control.maxram) + limit = (control.maxram - (control.overhead * control.threads)) / testbufs; + + /* If we don't have enough ram for the number of threads, decrease the + * number of threads till we do, or only have one thread. */ + while (limit < STREAM_BUFSIZE && limit < chunk_limit) { + if (control.threads > 1) + --control.threads; + else + break; + limit = (control.maxram - (control.overhead * control.threads)) / testbufs; + limit = MIN(limit, chunk_limit); + } retest_malloc: testsize = (limit * testbufs) + (control.overhead * control.threads); testmalloc = malloc(testsize);