From 011344753ac63d0c724dcb3c24c1fc57e9f6d291 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 22 Feb 2011 15:19:31 +1100 Subject: [PATCH] With lzma and zpaq, the compression overhead per thread is significant. As we can work out what that compression overhead is, we can factor that into testing how much ram we can allocate. There is no advantage to running multiple threads when there is no compression back end so drop to 1 only. Limit ram for compression back end to 1/3 ram regardless for when OSs lie due to heavy overcommit. --- main.c | 10 ++++++++++ rzip.h | 1 + stream.c | 36 +++++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/main.c b/main.c index 3b68893..2460acb 100644 --- a/main.c +++ b/main.c @@ -810,6 +810,16 @@ int main(int argc, char *argv[]) if (BITS32) control.ramsize = MAX(control.ramsize - 900000000ll, 900000000ll); + /* Work out the compression overhead per compression thread */ + if (LZMA_COMPRESS) { + int level = control.compression_level * 7 / 9 ? : 1; + i64 dictsize = (level <= 5 ? (1 << (level * 2 + 14)) : + (level == 6 ? (1 << 25) : (1 << 26))); + + control.overhead = (dictsize * 23/ 2) + (4 * 1024 * 1024); + } else if (ZPAQ_COMPRESS) + control.overhead = 112 * 1024 * 1024; + /* OK, if verbosity set, print summary of options selected */ if (!INFO) { if (!TEST_ONLY) diff --git a/rzip.h b/rzip.h index 349d6e7..4e5ca1c 100644 --- a/rzip.h +++ b/rzip.h @@ -266,6 +266,7 @@ struct rzip_control { FILE *msgout; //stream for output messages const char *suffix; int compression_level; + i64 overhead; // compressor overhead unsigned char lzma_properties[5]; // lzma properties, encoded double threshold; i64 window; diff --git a/stream.c b/stream.c index d9ee835..dc36d66 100644 --- a/stream.c +++ b/stream.c @@ -720,9 +720,12 @@ void prepare_streamout_threads(void) /* As we serialise the generation of threads during the rzip * pre-processing stage, it's faster to have one more thread available - * to keep all CPUs busy. */ + * to keep all CPUs busy. There is no point splitting up the chunks + * into multiple threads if there will be no compression back end. */ if (control.threads > 1) ++control.threads; + if (NO_COMPRESS) + control.threads = 1; threads = calloc(sizeof(pthread_t), control.threads); if (unlikely(!threads)) fatal("Unable to calloc threads in prepare_streamout_threads\n"); @@ -780,20 +783,25 @@ void *open_stream_out(int f, int n, i64 limit, char cbytes) } /* Find the largest we can make the window based on ability to malloc - * ram. We need enough for the 2 streams and for the compression - * backend at most, being conservative. We don't need any for the - * backend compression if we won't be doing any. - */ - testbufs = n; - if (!NO_COMPRESS) - testbufs++; + * ram. We need 2 buffers for each compression thread and the overhead + * of each compression back end. No 2nd buf is required when there is + * no back end compression. We limit the total regardless to 1/3 ram + * for when the OS lies due to heavy overcommit. */ + if (NO_COMPRESS) + testbufs = 1; + else + testbufs = 2; /* Serious limits imposed on 32 bit capabilities */ if (BITS32) - limit = MIN(limit, two_gig / testbufs); + limit = MIN(limit, (two_gig / testbufs) - + (control.overhead * control.threads)); + testsize = (limit * testbufs) + (control.overhead * control.threads); + if (testsize > control.ramsize / 3) + limit = (control.ramsize / 3 - (control.overhead * control.threads)) / testbufs; retest_malloc: - testsize = limit * testbufs; + testsize = (limit * testbufs) + (control.overhead * control.threads); testmalloc = malloc(testsize); if (!testmalloc) { limit = limit / 10 * 9; @@ -802,23 +810,25 @@ retest_malloc: free(testmalloc); print_maxverbose("Succeeded in testing %lld sized malloc for back end compression\n", testsize); + sinfo->max_bufsize = limit / control.threads; + /* We start with slightly smaller buffers to start loading CPUs as soon * as possible and make them exponentially larger approaching the * tested maximum size. We ensure the buffers are of a minimum size, * though, as compression efficency drops off dramatically with tiny * buffers. */ if (control.threads > 1) { - sinfo->max_bufsize = limit / control.threads; sinfo->bufsize = sinfo->max_bufsize * 63 / 100; round_to_page(&sinfo->bufsize); sinfo->bufsize = MAX(sinfo->bufsize, STREAM_BUFSIZE); - } + } else + sinfo->bufsize = sinfo->max_bufsize; if (control.threads > 1) print_maxverbose("Using up to %d threads to compress up to %lld bytes each.\n", control.threads, sinfo->max_bufsize); else - print_maxverbose("Using 1 thread to compress up to %lld bytes\n", + print_maxverbose("Using only 1 thread to compress up to %lld bytes\n", sinfo->bufsize); for (i = 0; i < n; i++) {