diff --git a/ChangeLog b/ChangeLog index 0199515..d9627a7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ lrzip ChangeLog FEBRUARY 2011, version 0.561 Con Kolivas +* Change the lzo testing to a bool on/off instead of taking a parameter. * Clean up the messy help output. * Refuse to read from or write to terminal in stdin/stdout mode. * Delete temporary files generated when testing from stdin. diff --git a/README b/README index de8fd21..1fd880f 100644 --- a/README +++ b/README @@ -262,41 +262,13 @@ cpu process scheduler how to prioritise workloads, and if your application is the _only_ thing running it will be no faster at nice -20 nor will it be any slower at +19. -Q. What is the Threshold option, -T ## (1-10)? -A. It is for adjusting the sensitivity of the LZO test that is used when LZMA -compression is selected. When highly random or already-compressed data chunks -are evaluated for LZMA compression, sometimes LZO compression actually will -create a larger chunk than the original. - -The Threshold is used to determine a minimum compression amount relative to -the size of the data being evaluated. A value of 1 is the default. This -means that the compression threshold amount is >0% of the size of the -original data. If the threshold is not achieved, the LZMA compression will not -be done and the chunk will not be compressed. Values can be from 0 (bypass the -test) to 10 (maximum compression efficiency expected). The following table can -be used. - -For LZO compressor test -T value Compression % Compression Ratio - 0 Ignored - 1 0-5% 1.00-1.05 very low compression expected - 2 5-10% 1.05-1.10 default value - 3 10-20% 1.12-1.25 - 4 20-30% 1.25-1.43 - 5 30-40% 1.43-1.66 - 6 40-50% 1.66-2.00 - 7 50-60% 2.00-2.50 - 8 60-70% 2.50-3.33 - 9 70-80% 3.33-5.00 - 10 80+% 5x+ - -Whenever the data chunk does not compress to the Threshold value, no LZMA -compression will be attempted. For example, if you select -T 5, LZMA -compression will be performed if the projected compression ratio is -less than 1.43. Otherwise, data will be written in rzip format. Setting -a very high T value will result in a lot of uncompressed data in the lrzip -file. However, a lot of time will be saved. For most people you shouldn't ever -need to touch this. +Q. What is the LZO Testing option, -T? +A. LZO testing is normally performed for the slower back-end compression of LZMA +and ZPAQ. The reasoning is that if it is completely incompressible by LZO then +it will also be incompressible by them. Thus if a block fails to be compressed +by the very fast LZO, lrzip will not attempt to compress that block with the +slower compressor, thereby saving time. If this option is enabled, it will +bypass the LZO testing and attempt to compress each block regardless. Q. Compression and decompression progress on large archives slows down and speeds up. There's also a jump in the percentage at the end? diff --git a/WHATS-NEW b/WHATS-NEW index ab52012..41459c4 100644 --- a/WHATS-NEW +++ b/WHATS-NEW @@ -19,6 +19,8 @@ lrzip will no longer stupidly sit waiting to read from stdin/stdout when called from a terminal without other arguments. Executable size will be slightly smaller due to stripping symbols by default now. +The -T option no longer takes an argument. It simply denotes that lzo testing +should be disabled. lrzip-0.560 diff --git a/lrztar b/lrztar index c0de2b9..7f69321 100755 --- a/lrztar +++ b/lrztar @@ -30,7 +30,7 @@ function lrztar_local() { trap '[[ -z $tname ]] || rm -rf "$tname" &> /dev/null' 1 2 3 15 which tar &> /dev/null || { printf "lrztar: no tar in your path\n"; return 1; } which lrzip &> /dev/null || { printf "lrztar: no lrzip in your path\n"; return 1; } - while getopts w:O:S:DqL:nlbgzUT:N:p:vfodtVhHck x; do + while getopts w:O:S:DqL:nlbgzUTN:p:vfodtVhHck x; do [[ $x == [otV] ]] || ((v_$x=1)) &> /dev/null \ || { printf "lrztar: invalid option for lrztar %s\n" "$x"; return 1; } done diff --git a/main.c b/main.c index 21f6534..b7af53b 100644 --- a/main.c +++ b/main.c @@ -54,7 +54,7 @@ static void usage(void) print_output(" -L level set lzma/bzip2/gzip compression level (1-9, default 7)\n"); print_output(" -N value Set nice value to value (default 19)\n"); print_output(" -p value Set processor count to override number of threads\n"); - print_output(" -T value Compression threshold with LZO test. (0 (nil) - 10 (high), default 1)\n"); + print_output(" -T Disable LZO compressibility testing\n"); print_output(" -U Use unlimited window size beyond ramsize (potentially much slower)\n"); print_output(" -w size maximum compression window in hundreds of MB\n"); print_output(" default chosen by heuristic dependent on ram and chosen compression\n"); @@ -599,7 +599,7 @@ int main(int argc, char *argv[]) memset(&control, 0, sizeof(control)); control.msgout = stderr; - control.flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES; + control.flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES | FLAG_THRESHOLD; control.suffix = ".lrz"; control.outdir = NULL; control.tmpdir = NULL; @@ -610,7 +610,6 @@ int main(int argc, char *argv[]) control.compression_level = 7; control.ramsize = get_ram(); control.window = 0; - control.threshold = 1.0; /* default lzo test compression threshold (level 1) with LZMA compression */ /* for testing single CPU */ control.threads = PROCESSORS; /* get CPUs for LZMA */ control.page_size = PAGE_SIZE; @@ -642,7 +641,7 @@ int main(int argc, char *argv[]) else if (!strstr(eptr,"NOCONFIG")) read_config(&control); - while ((c = getopt(argc, argv, "L:h?dS:tVvDfqo:w:nlbUO:T:N:p:gziHck")) != -1) { + while ((c = getopt(argc, argv, "L:h?dS:tVvDfqo:w:nlbUO:TN:p:gziHck")) != -1) { switch (c) { case 'b': if (control.flags & FLAG_NOT_LZMA) @@ -734,13 +733,7 @@ int main(int argc, char *argv[]) control.flags |= FLAG_TEST_ONLY; break; case 'T': - /* invert argument, a threshold of 1 means that the compressed result can be - * 90%-100% of the sample size - */ - control.threshold = atoi(optarg); - if (control.threshold < 0 || control.threshold > 10) - failure("Threshold value must be between 0 and 10\n"); - control.threshold = 1.05 - control.threshold / 20; + control.flags &= ~FLAG_THRESHOLD; break; case 'U': control.flags |= FLAG_UNLIMITED; @@ -841,18 +834,15 @@ int main(int argc, char *argv[]) if (!DECOMPRESS && !TEST_ONLY) { print_verbose("Compression mode is: "); if (LZMA_COMPRESS) - print_verbose("LZMA. LZO Test Compression Threshold: %.f\n", - (control.threshold < 1.05 ? 21 - control.threshold * 20 : 0)); + print_verbose("LZMA. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled")); else if (LZO_COMPRESS) print_verbose("LZO\n"); else if (BZIP2_COMPRESS) - print_verbose("BZIP2. LZO Test Compression Threshold: %.f\n", - (control.threshold < 1.05 ? 21 - control.threshold * 20 : 0)); + print_verbose("BZIP2. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled")); else if (ZLIB_COMPRESS) print_verbose("GZIP\n"); else if (ZPAQ_COMPRESS) - print_verbose("ZPAQ. LZO Test Compression Threshold: %.f\n", - (control.threshold < 1.05 ? 21 - control.threshold * 20 : 0)); + print_verbose("ZPAQ. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled")); else if (NO_COMPRESS) print_verbose("RZIP pre-processing only\n"); if (control.window) diff --git a/man/lrzip.1 b/man/lrzip.1 index d1f117f..1dfc606 100644 --- a/man/lrzip.1 +++ b/man/lrzip.1 @@ -57,7 +57,7 @@ Low level options: \-L level set lzma/bzip2/gzip compression level (1\-9, default 7) \-N value Set nice value to value (default 19) \-p value Set processor count to override number of threads - \-T value Compression threshold with LZO test. (0 (nil) - 10 (high), default 1) + \-T Disable LZO compressibility testing \-U Use unlimited window size beyond ramsize (potentially much slower) \-w size maximum compression window in hundreds of MB default chosen by heuristic dependent on ram and chosen compression @@ -110,13 +110,15 @@ progressively slower the larger the difference between ram and the file size, so is best reserved for when the smallest possible size is desired on a very large file, and the time taken is not important. .IP -.IP "\fB-T 0\&.\&.10\fP" -Sets the LZO compression threshold when testing a data chunk when slower -compression is used. The threshold level can be from 0 to 10. -This option is used to speed up compression by avoiding doing the slow -compression pass. The reasoning is that if it is completely incompressible -by LZO then it will also be incompressible by them, thereby saving time. -The default is 1. +.IP "\fB-T\fP" +Disables the LZO compressibility threshold testing when a slower compression +back-end is used. LZO testing is normally performed for the slower back-end +compression of LZMA and ZPAQ. The reasoning is that if it is completely +incompressible by LZO then it will also be incompressible by them. Thus if a +block fails to be compressed by the very fast LZO, lrzip will not attempt to +compress that block with the slower compressor, thereby saving time. If this +option is enabled, it will bypass the LZO testing and attempt to compress each +block regardless. .IP .IP "\fB-d\fP" Decompress. If this option is not used then lrzip looks at diff --git a/rzip.h b/rzip.h index 36ae921..78eb8a6 100644 --- a/rzip.h +++ b/rzip.h @@ -217,6 +217,7 @@ static inline i64 get_ram(void) #define FLAG_MD5 (1 << 17) #define FLAG_CHECK (1 << 18) #define FLAG_KEEP_BROKEN (1 << 19) +#define FLAG_THRESHOLD (1 << 20) #define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX) #define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS) @@ -243,6 +244,7 @@ static inline i64 get_ram(void) #define HAS_MD5 (control.flags & FLAG_MD5) #define CHECK_FILE (control.flags & FLAG_CHECK) #define KEEP_BROKEN (control.flags & FLAG_KEEP_BROKEN) +#define LZO_TEST (control.flags & FLAG_THRESHOLD) #define NO_MD5 (!(HASH_CHECK) && !(HAS_MD5)) @@ -266,7 +268,6 @@ struct rzip_control { int compression_level; i64 overhead; // compressor overhead unsigned char lzma_properties[5]; // lzma properties, encoded - double threshold; i64 window; unsigned long flags; i64 ramsize; diff --git a/stream.c b/stream.c index dc36d66..b4b6f39 100644 --- a/stream.c +++ b/stream.c @@ -1364,7 +1364,7 @@ static int lzo_compresses(uchar *s_buf, i64 s_len) int workcounter = 0; /* count # of passes */ lzo_uint best_dlen = UINT_MAX; /* save best compression estimate */ - if (control.threshold > 1) + if (!LZO_TEST) return 1; wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS); if (unlikely(wrkmem == NULL)) @@ -1377,8 +1377,6 @@ static int lzo_compresses(uchar *s_buf, i64 s_len) if (unlikely(!c_buf)) fatal("Unable to allocate c_buf in lzo_compresses\n"); - print_verbose("lzo testing for incompressible data..."); - /* Test progressively larger blocks at a time and as soon as anything compressible is found, jump out as a success */ while (test_len > 0) { @@ -1388,7 +1386,7 @@ static int lzo_compresses(uchar *s_buf, i64 s_len) if (dlen < best_dlen) best_dlen = dlen; /* save best value */ - if ((double) dlen < (double)in_len * control.threshold) { + if (dlen < in_len) { ret = 1; break; } @@ -1401,12 +1399,9 @@ static int lzo_compresses(uchar *s_buf, i64 s_len) in_len = MIN(test_len, buftest_size); } } - if (MAX_VERBOSE) - print_output("%s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n", - (ret == 0? "FAILED - below threshold" : "OK"), save_len, + print_maxverbose("lzo testing %s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n", + (ret == 0? "FAILED" : "OK"), save_len, 100 * ((double) best_dlen / (double) in_len), workcounter); - else if (VERBOSE) - print_output("%s\n", (ret == 0? "FAILED - below threshold" : "OK")); free(wrkmem); free(c_buf); diff --git a/util.c b/util.c index d0c7b87..f565b15 100644 --- a/util.c +++ b/util.c @@ -172,10 +172,9 @@ void read_config( struct rzip_control *control ) else if (strcasecmp(parametervalue, "lzma")) failure("CONF.FILE error. Invalid compression method %s specified",parametervalue); } else if (!strcasecmp(parameter, "testthreshold")) { - control->threshold = atoi(parametervalue); - if (control->threshold < 1 || control->threshold > 10) - failure("CONF.FILE error. Threshold value out of range %d", parametervalue); - control->threshold = 1.05-control->threshold / 20; + /* true by default */ + if (!strcasecmp(parametervalue, "false") || !strcasecmp(parametervalue," 0")) + control->flags &= ~FLAG_THRESHOLD; } else if (!strcasecmp(parameter, "outputdirectory")) { control->outdir = malloc(strlen(parametervalue) + 2); if (!control->outdir)