Change the LZO testing option to be a bool on/off instead of taking a confusing parameter.

Make the lzo testing message simpler and only appear when max verbose mode is enabled.
This commit is contained in:
Con Kolivas 2011-02-23 01:15:18 +11:00
parent fa34905d9d
commit 94673d3fe3
9 changed files with 37 additions and 75 deletions

View file

@ -1,5 +1,6 @@
lrzip ChangeLog lrzip ChangeLog
FEBRUARY 2011, version 0.561 Con Kolivas FEBRUARY 2011, version 0.561 Con Kolivas
* Change the lzo testing to a bool on/off instead of taking a parameter.
* Clean up the messy help output. * Clean up the messy help output.
* Refuse to read from or write to terminal in stdin/stdout mode. * Refuse to read from or write to terminal in stdin/stdout mode.
* Delete temporary files generated when testing from stdin. * Delete temporary files generated when testing from stdin.

42
README
View file

@ -262,41 +262,13 @@ cpu process scheduler how to prioritise workloads, and if your application is
the _only_ thing running it will be no faster at nice -20 nor will it be any the _only_ thing running it will be no faster at nice -20 nor will it be any
slower at +19. slower at +19.
Q. What is the Threshold option, -T ## (1-10)? Q. What is the LZO Testing option, -T?
A. It is for adjusting the sensitivity of the LZO test that is used when LZMA A. LZO testing is normally performed for the slower back-end compression of LZMA
compression is selected. When highly random or already-compressed data chunks and ZPAQ. The reasoning is that if it is completely incompressible by LZO then
are evaluated for LZMA compression, sometimes LZO compression actually will it will also be incompressible by them. Thus if a block fails to be compressed
create a larger chunk than the original. by the very fast LZO, lrzip will not attempt to compress that block with the
slower compressor, thereby saving time. If this option is enabled, it will
The Threshold is used to determine a minimum compression amount relative to bypass the LZO testing and attempt to compress each block regardless.
the size of the data being evaluated. A value of 1 is the default. This
means that the compression threshold amount is >0% of the size of the
original data. If the threshold is not achieved, the LZMA compression will not
be done and the chunk will not be compressed. Values can be from 0 (bypass the
test) to 10 (maximum compression efficiency expected). The following table can
be used.
For LZO compressor test
T value Compression % Compression Ratio
0 Ignored
1 0-5% 1.00-1.05 very low compression expected
2 5-10% 1.05-1.10 default value
3 10-20% 1.12-1.25
4 20-30% 1.25-1.43
5 30-40% 1.43-1.66
6 40-50% 1.66-2.00
7 50-60% 2.00-2.50
8 60-70% 2.50-3.33
9 70-80% 3.33-5.00
10 80+% 5x+
Whenever the data chunk does not compress to the Threshold value, no LZMA
compression will be attempted. For example, if you select -T 5, LZMA
compression will be performed if the projected compression ratio is
less than 1.43. Otherwise, data will be written in rzip format. Setting
a very high T value will result in a lot of uncompressed data in the lrzip
file. However, a lot of time will be saved. For most people you shouldn't ever
need to touch this.
Q. Compression and decompression progress on large archives slows down and Q. Compression and decompression progress on large archives slows down and
speeds up. There's also a jump in the percentage at the end? speeds up. There's also a jump in the percentage at the end?

View file

@ -19,6 +19,8 @@ lrzip will no longer stupidly sit waiting to read from stdin/stdout when called
from a terminal without other arguments. from a terminal without other arguments.
Executable size will be slightly smaller due to stripping symbols by default Executable size will be slightly smaller due to stripping symbols by default
now. now.
The -T option no longer takes an argument. It simply denotes that lzo testing
should be disabled.
lrzip-0.560 lrzip-0.560

2
lrztar
View file

@ -30,7 +30,7 @@ function lrztar_local() {
trap '[[ -z $tname ]] || rm -rf "$tname" &> /dev/null' 1 2 3 15 trap '[[ -z $tname ]] || rm -rf "$tname" &> /dev/null' 1 2 3 15
which tar &> /dev/null || { printf "lrztar: no tar in your path\n"; return 1; } which tar &> /dev/null || { printf "lrztar: no tar in your path\n"; return 1; }
which lrzip &> /dev/null || { printf "lrztar: no lrzip in your path\n"; return 1; } which lrzip &> /dev/null || { printf "lrztar: no lrzip in your path\n"; return 1; }
while getopts w:O:S:DqL:nlbgzUT:N:p:vfodtVhHck x; do while getopts w:O:S:DqL:nlbgzUTN:p:vfodtVhHck x; do
[[ $x == [otV] ]] || ((v_$x=1)) &> /dev/null \ [[ $x == [otV] ]] || ((v_$x=1)) &> /dev/null \
|| { printf "lrztar: invalid option for lrztar %s\n" "$x"; return 1; } || { printf "lrztar: invalid option for lrztar %s\n" "$x"; return 1; }
done done

24
main.c
View file

@ -54,7 +54,7 @@ static void usage(void)
print_output(" -L level set lzma/bzip2/gzip compression level (1-9, default 7)\n"); print_output(" -L level set lzma/bzip2/gzip compression level (1-9, default 7)\n");
print_output(" -N value Set nice value to value (default 19)\n"); print_output(" -N value Set nice value to value (default 19)\n");
print_output(" -p value Set processor count to override number of threads\n"); print_output(" -p value Set processor count to override number of threads\n");
print_output(" -T value Compression threshold with LZO test. (0 (nil) - 10 (high), default 1)\n"); print_output(" -T Disable LZO compressibility testing\n");
print_output(" -U Use unlimited window size beyond ramsize (potentially much slower)\n"); print_output(" -U Use unlimited window size beyond ramsize (potentially much slower)\n");
print_output(" -w size maximum compression window in hundreds of MB\n"); print_output(" -w size maximum compression window in hundreds of MB\n");
print_output(" default chosen by heuristic dependent on ram and chosen compression\n"); print_output(" default chosen by heuristic dependent on ram and chosen compression\n");
@ -599,7 +599,7 @@ int main(int argc, char *argv[])
memset(&control, 0, sizeof(control)); memset(&control, 0, sizeof(control));
control.msgout = stderr; control.msgout = stderr;
control.flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES; control.flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES | FLAG_THRESHOLD;
control.suffix = ".lrz"; control.suffix = ".lrz";
control.outdir = NULL; control.outdir = NULL;
control.tmpdir = NULL; control.tmpdir = NULL;
@ -610,7 +610,6 @@ int main(int argc, char *argv[])
control.compression_level = 7; control.compression_level = 7;
control.ramsize = get_ram(); control.ramsize = get_ram();
control.window = 0; control.window = 0;
control.threshold = 1.0; /* default lzo test compression threshold (level 1) with LZMA compression */
/* for testing single CPU */ /* for testing single CPU */
control.threads = PROCESSORS; /* get CPUs for LZMA */ control.threads = PROCESSORS; /* get CPUs for LZMA */
control.page_size = PAGE_SIZE; control.page_size = PAGE_SIZE;
@ -642,7 +641,7 @@ int main(int argc, char *argv[])
else if (!strstr(eptr,"NOCONFIG")) else if (!strstr(eptr,"NOCONFIG"))
read_config(&control); read_config(&control);
while ((c = getopt(argc, argv, "L:h?dS:tVvDfqo:w:nlbUO:T:N:p:gziHck")) != -1) { while ((c = getopt(argc, argv, "L:h?dS:tVvDfqo:w:nlbUO:TN:p:gziHck")) != -1) {
switch (c) { switch (c) {
case 'b': case 'b':
if (control.flags & FLAG_NOT_LZMA) if (control.flags & FLAG_NOT_LZMA)
@ -734,13 +733,7 @@ int main(int argc, char *argv[])
control.flags |= FLAG_TEST_ONLY; control.flags |= FLAG_TEST_ONLY;
break; break;
case 'T': case 'T':
/* invert argument, a threshold of 1 means that the compressed result can be control.flags &= ~FLAG_THRESHOLD;
* 90%-100% of the sample size
*/
control.threshold = atoi(optarg);
if (control.threshold < 0 || control.threshold > 10)
failure("Threshold value must be between 0 and 10\n");
control.threshold = 1.05 - control.threshold / 20;
break; break;
case 'U': case 'U':
control.flags |= FLAG_UNLIMITED; control.flags |= FLAG_UNLIMITED;
@ -841,18 +834,15 @@ int main(int argc, char *argv[])
if (!DECOMPRESS && !TEST_ONLY) { if (!DECOMPRESS && !TEST_ONLY) {
print_verbose("Compression mode is: "); print_verbose("Compression mode is: ");
if (LZMA_COMPRESS) if (LZMA_COMPRESS)
print_verbose("LZMA. LZO Test Compression Threshold: %.f\n", print_verbose("LZMA. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
(control.threshold < 1.05 ? 21 - control.threshold * 20 : 0));
else if (LZO_COMPRESS) else if (LZO_COMPRESS)
print_verbose("LZO\n"); print_verbose("LZO\n");
else if (BZIP2_COMPRESS) else if (BZIP2_COMPRESS)
print_verbose("BZIP2. LZO Test Compression Threshold: %.f\n", print_verbose("BZIP2. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
(control.threshold < 1.05 ? 21 - control.threshold * 20 : 0));
else if (ZLIB_COMPRESS) else if (ZLIB_COMPRESS)
print_verbose("GZIP\n"); print_verbose("GZIP\n");
else if (ZPAQ_COMPRESS) else if (ZPAQ_COMPRESS)
print_verbose("ZPAQ. LZO Test Compression Threshold: %.f\n", print_verbose("ZPAQ. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
(control.threshold < 1.05 ? 21 - control.threshold * 20 : 0));
else if (NO_COMPRESS) else if (NO_COMPRESS)
print_verbose("RZIP pre-processing only\n"); print_verbose("RZIP pre-processing only\n");
if (control.window) if (control.window)

View file

@ -57,7 +57,7 @@ Low level options:
\-L level set lzma/bzip2/gzip compression level (1\-9, default 7) \-L level set lzma/bzip2/gzip compression level (1\-9, default 7)
\-N value Set nice value to value (default 19) \-N value Set nice value to value (default 19)
\-p value Set processor count to override number of threads \-p value Set processor count to override number of threads
\-T value Compression threshold with LZO test. (0 (nil) - 10 (high), default 1) \-T Disable LZO compressibility testing
\-U Use unlimited window size beyond ramsize (potentially much slower) \-U Use unlimited window size beyond ramsize (potentially much slower)
\-w size maximum compression window in hundreds of MB \-w size maximum compression window in hundreds of MB
default chosen by heuristic dependent on ram and chosen compression default chosen by heuristic dependent on ram and chosen compression
@ -110,13 +110,15 @@ progressively slower the larger the difference between ram and the file size,
so is best reserved for when the smallest possible size is desired on a very so is best reserved for when the smallest possible size is desired on a very
large file, and the time taken is not important. large file, and the time taken is not important.
.IP .IP
.IP "\fB-T 0\&.\&.10\fP" .IP "\fB-T\fP"
Sets the LZO compression threshold when testing a data chunk when slower Disables the LZO compressibility threshold testing when a slower compression
compression is used. The threshold level can be from 0 to 10. back-end is used. LZO testing is normally performed for the slower back-end
This option is used to speed up compression by avoiding doing the slow compression of LZMA and ZPAQ. The reasoning is that if it is completely
compression pass. The reasoning is that if it is completely incompressible incompressible by LZO then it will also be incompressible by them. Thus if a
by LZO then it will also be incompressible by them, thereby saving time. block fails to be compressed by the very fast LZO, lrzip will not attempt to
The default is 1. compress that block with the slower compressor, thereby saving time. If this
option is enabled, it will bypass the LZO testing and attempt to compress each
block regardless.
.IP .IP
.IP "\fB-d\fP" .IP "\fB-d\fP"
Decompress. If this option is not used then lrzip looks at Decompress. If this option is not used then lrzip looks at

3
rzip.h
View file

@ -217,6 +217,7 @@ static inline i64 get_ram(void)
#define FLAG_MD5 (1 << 17) #define FLAG_MD5 (1 << 17)
#define FLAG_CHECK (1 << 18) #define FLAG_CHECK (1 << 18)
#define FLAG_KEEP_BROKEN (1 << 19) #define FLAG_KEEP_BROKEN (1 << 19)
#define FLAG_THRESHOLD (1 << 20)
#define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX) #define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX)
#define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS) #define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS)
@ -243,6 +244,7 @@ static inline i64 get_ram(void)
#define HAS_MD5 (control.flags & FLAG_MD5) #define HAS_MD5 (control.flags & FLAG_MD5)
#define CHECK_FILE (control.flags & FLAG_CHECK) #define CHECK_FILE (control.flags & FLAG_CHECK)
#define KEEP_BROKEN (control.flags & FLAG_KEEP_BROKEN) #define KEEP_BROKEN (control.flags & FLAG_KEEP_BROKEN)
#define LZO_TEST (control.flags & FLAG_THRESHOLD)
#define NO_MD5 (!(HASH_CHECK) && !(HAS_MD5)) #define NO_MD5 (!(HASH_CHECK) && !(HAS_MD5))
@ -266,7 +268,6 @@ struct rzip_control {
int compression_level; int compression_level;
i64 overhead; // compressor overhead i64 overhead; // compressor overhead
unsigned char lzma_properties[5]; // lzma properties, encoded unsigned char lzma_properties[5]; // lzma properties, encoded
double threshold;
i64 window; i64 window;
unsigned long flags; unsigned long flags;
i64 ramsize; i64 ramsize;

View file

@ -1364,7 +1364,7 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
int workcounter = 0; /* count # of passes */ int workcounter = 0; /* count # of passes */
lzo_uint best_dlen = UINT_MAX; /* save best compression estimate */ lzo_uint best_dlen = UINT_MAX; /* save best compression estimate */
if (control.threshold > 1) if (!LZO_TEST)
return 1; return 1;
wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS); wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS);
if (unlikely(wrkmem == NULL)) if (unlikely(wrkmem == NULL))
@ -1377,8 +1377,6 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
if (unlikely(!c_buf)) if (unlikely(!c_buf))
fatal("Unable to allocate c_buf in lzo_compresses\n"); fatal("Unable to allocate c_buf in lzo_compresses\n");
print_verbose("lzo testing for incompressible data...");
/* Test progressively larger blocks at a time and as soon as anything /* Test progressively larger blocks at a time and as soon as anything
compressible is found, jump out as a success */ compressible is found, jump out as a success */
while (test_len > 0) { while (test_len > 0) {
@ -1388,7 +1386,7 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
if (dlen < best_dlen) if (dlen < best_dlen)
best_dlen = dlen; /* save best value */ best_dlen = dlen; /* save best value */
if ((double) dlen < (double)in_len * control.threshold) { if (dlen < in_len) {
ret = 1; ret = 1;
break; break;
} }
@ -1401,12 +1399,9 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
in_len = MIN(test_len, buftest_size); in_len = MIN(test_len, buftest_size);
} }
} }
if (MAX_VERBOSE) print_maxverbose("lzo testing %s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n",
print_output("%s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n", (ret == 0? "FAILED" : "OK"), save_len,
(ret == 0? "FAILED - below threshold" : "OK"), save_len,
100 * ((double) best_dlen / (double) in_len), workcounter); 100 * ((double) best_dlen / (double) in_len), workcounter);
else if (VERBOSE)
print_output("%s\n", (ret == 0? "FAILED - below threshold" : "OK"));
free(wrkmem); free(wrkmem);
free(c_buf); free(c_buf);

7
util.c
View file

@ -172,10 +172,9 @@ void read_config( struct rzip_control *control )
else if (strcasecmp(parametervalue, "lzma")) else if (strcasecmp(parametervalue, "lzma"))
failure("CONF.FILE error. Invalid compression method %s specified",parametervalue); failure("CONF.FILE error. Invalid compression method %s specified",parametervalue);
} else if (!strcasecmp(parameter, "testthreshold")) { } else if (!strcasecmp(parameter, "testthreshold")) {
control->threshold = atoi(parametervalue); /* true by default */
if (control->threshold < 1 || control->threshold > 10) if (!strcasecmp(parametervalue, "false") || !strcasecmp(parametervalue," 0"))
failure("CONF.FILE error. Threshold value out of range %d", parametervalue); control->flags &= ~FLAG_THRESHOLD;
control->threshold = 1.05-control->threshold / 20;
} else if (!strcasecmp(parameter, "outputdirectory")) { } else if (!strcasecmp(parameter, "outputdirectory")) {
control->outdir = malloc(strlen(parametervalue) + 2); control->outdir = malloc(strlen(parametervalue) + 2);
if (!control->outdir) if (!control->outdir)