With lzma and zpaq, the compression overhead per thread is significant.

As we can work out what that compression overhead is, we can factor that into testing how much ram we can allocate. There is no advantage to running multiple threads when there is no compression back end so drop to 1 only. Limit ram for compression back end to 1/3 ram regardless for when OSs lie due to heavy overcommit.
2025-12-06 07:12:00 +01:00 · 2011-02-22 15:19:31 +11:00 · 2011-02-22 15:19:31 +11:00 · 011344753a
parent bcb857d934
commit 011344753a
3 changed files with 34 additions and 13 deletions
--- a/main.c
+++ b/main.c
@ -810,6 +810,16 @@ int main(int argc, char *argv[])
 	if (BITS32)
 		control.ramsize = MAX(control.ramsize - 900000000ll, 900000000ll);

+	/* Work out the compression overhead per compression thread */
+	if (LZMA_COMPRESS) {
+		int level = control.compression_level * 7 / 9 ? : 1;
+		i64 dictsize = (level <= 5 ? (1 << (level * 2 + 14)) :
+				(level == 6 ? (1 << 25) : (1 << 26)));
+
+		control.overhead = (dictsize * 23/ 2) + (4 * 1024 * 1024);
+	} else if (ZPAQ_COMPRESS)
+		control.overhead = 112 * 1024 * 1024;
+
 	/* OK, if verbosity set, print summary of options selected */
 	if (!INFO) {
 		if (!TEST_ONLY)
--- a/rzip.h
+++ b/rzip.h
@ -266,6 +266,7 @@ struct rzip_control {
 	FILE *msgout; //stream for output messages
 	const char *suffix;
 	int compression_level;
+	i64 overhead; // compressor overhead
 	unsigned char lzma_properties[5]; // lzma properties, encoded
 	double threshold;
 	i64 window;
--- a/stream.c
+++ b/stream.c
@ -720,9 +720,12 @@ void prepare_streamout_threads(void)

 	/* As we serialise the generation of threads during the rzip
 	 * pre-processing stage, it's faster to have one more thread available
-	 * to keep all CPUs busy. */
+	 * to keep all CPUs busy. There is no point splitting up the chunks
+	 * into multiple threads if there will be no compression back end. */
 	if (control.threads > 1)
 		++control.threads;
+	if (NO_COMPRESS)
+		control.threads = 1;
 	threads = calloc(sizeof(pthread_t), control.threads);
 	if (unlikely(!threads))
 		fatal("Unable to calloc threads in prepare_streamout_threads\n");
@ -780,20 +783,25 @@ void *open_stream_out(int f, int n, i64 limit, char cbytes)
 	}

 	/* Find the largest we can make the window based on ability to malloc
-	 * ram. We need enough for the 2 streams and for the compression
-	 * backend at most, being conservative. We don't need any for the
-	 * backend compression if we won't be doing any.
-	 */
-	testbufs = n;
-	if (!NO_COMPRESS)
-		testbufs++;
+	 * ram. We need 2 buffers for each compression thread and the overhead
+	 * of each compression back end. No 2nd buf is required when there is
+	 * no back end compression. We limit the total regardless to 1/3 ram
+	 * for when the OS lies due to heavy overcommit. */
+	if (NO_COMPRESS)
+		testbufs = 1;
+	else
+		testbufs = 2;

 	/* Serious limits imposed on 32 bit capabilities */
 	if (BITS32)
-		limit = MIN(limit, two_gig / testbufs);
+		limit = MIN(limit, (two_gig / testbufs) -
+			(control.overhead * control.threads));

+	testsize = (limit * testbufs) + (control.overhead * control.threads);
+	if (testsize > control.ramsize / 3)
+		limit = (control.ramsize / 3 - (control.overhead * control.threads)) / testbufs;
 retest_malloc:
-	testsize = limit * testbufs;
+	testsize = (limit * testbufs) + (control.overhead * control.threads);
 	testmalloc = malloc(testsize);
 	if (!testmalloc) {
 		limit = limit / 10 * 9;
@ -802,23 +810,25 @@ retest_malloc:
 	free(testmalloc);
 	print_maxverbose("Succeeded in testing %lld sized malloc for back end compression\n", testsize);

+	sinfo->max_bufsize = limit / control.threads;
+
 	/* We start with slightly smaller buffers to start loading CPUs as soon
 	 * as possible and make them exponentially larger approaching the
 	 * tested maximum size. We ensure the buffers are of a minimum size,
 	 * though, as compression efficency drops off dramatically with tiny
 	 * buffers. */
 	if (control.threads > 1) {
-		sinfo->max_bufsize = limit / control.threads;
 		sinfo->bufsize = sinfo->max_bufsize * 63 / 100;
 		round_to_page(&sinfo->bufsize);
 		sinfo->bufsize = MAX(sinfo->bufsize, STREAM_BUFSIZE);
-	}
+	} else
+		sinfo->bufsize = sinfo->max_bufsize;

 	if (control.threads > 1)
 		print_maxverbose("Using up to %d threads to compress up to %lld bytes each.\n",
 			control.threads, sinfo->max_bufsize);
 	else
-		print_maxverbose("Using 1 thread to compress up to %lld bytes\n",
+		print_maxverbose("Using only 1 thread to compress up to %lld bytes\n",
 			sinfo->bufsize);

 	for (i = 0; i < n; i++) {