Change the LZO testing option to be a bool on/off instead of taking a confusing parameter.

Make the lzo testing message simpler and only appear when max verbose mode is enabled.
This commit is contained in:
Con Kolivas 2011-02-23 01:15:18 +11:00
parent fa34905d9d
commit 94673d3fe3
9 changed files with 37 additions and 75 deletions

View file

@ -1,5 +1,6 @@
lrzip ChangeLog
FEBRUARY 2011, version 0.561 Con Kolivas
* Change the lzo testing to a bool on/off instead of taking a parameter.
* Clean up the messy help output.
* Refuse to read from or write to terminal in stdin/stdout mode.
* Delete temporary files generated when testing from stdin.

42
README
View file

@ -262,41 +262,13 @@ cpu process scheduler how to prioritise workloads, and if your application is
the _only_ thing running it will be no faster at nice -20 nor will it be any
slower at +19.
Q. What is the Threshold option, -T ## (1-10)?
A. It is for adjusting the sensitivity of the LZO test that is used when LZMA
compression is selected. When highly random or already-compressed data chunks
are evaluated for LZMA compression, sometimes LZO compression actually will
create a larger chunk than the original.
The Threshold is used to determine a minimum compression amount relative to
the size of the data being evaluated. A value of 1 is the default. This
means that the compression threshold amount is >0% of the size of the
original data. If the threshold is not achieved, the LZMA compression will not
be done and the chunk will not be compressed. Values can be from 0 (bypass the
test) to 10 (maximum compression efficiency expected). The following table can
be used.
For LZO compressor test
T value Compression % Compression Ratio
0 Ignored
1 0-5% 1.00-1.05 very low compression expected
2 5-10% 1.05-1.10 default value
3 10-20% 1.12-1.25
4 20-30% 1.25-1.43
5 30-40% 1.43-1.66
6 40-50% 1.66-2.00
7 50-60% 2.00-2.50
8 60-70% 2.50-3.33
9 70-80% 3.33-5.00
10 80+% 5x+
Whenever the data chunk does not compress to the Threshold value, no LZMA
compression will be attempted. For example, if you select -T 5, LZMA
compression will be performed if the projected compression ratio is
less than 1.43. Otherwise, data will be written in rzip format. Setting
a very high T value will result in a lot of uncompressed data in the lrzip
file. However, a lot of time will be saved. For most people you shouldn't ever
need to touch this.
Q. What is the LZO Testing option, -T?
A. LZO testing is normally performed for the slower back-end compression of LZMA
and ZPAQ. The reasoning is that if it is completely incompressible by LZO then
it will also be incompressible by them. Thus if a block fails to be compressed
by the very fast LZO, lrzip will not attempt to compress that block with the
slower compressor, thereby saving time. If this option is enabled, it will
bypass the LZO testing and attempt to compress each block regardless.
Q. Compression and decompression progress on large archives slows down and
speeds up. There's also a jump in the percentage at the end?

View file

@ -19,6 +19,8 @@ lrzip will no longer stupidly sit waiting to read from stdin/stdout when called
from a terminal without other arguments.
Executable size will be slightly smaller due to stripping symbols by default
now.
The -T option no longer takes an argument. It simply denotes that lzo testing
should be disabled.
lrzip-0.560

2
lrztar
View file

@ -30,7 +30,7 @@ function lrztar_local() {
trap '[[ -z $tname ]] || rm -rf "$tname" &> /dev/null' 1 2 3 15
which tar &> /dev/null || { printf "lrztar: no tar in your path\n"; return 1; }
which lrzip &> /dev/null || { printf "lrztar: no lrzip in your path\n"; return 1; }
while getopts w:O:S:DqL:nlbgzUT:N:p:vfodtVhHck x; do
while getopts w:O:S:DqL:nlbgzUTN:p:vfodtVhHck x; do
[[ $x == [otV] ]] || ((v_$x=1)) &> /dev/null \
|| { printf "lrztar: invalid option for lrztar %s\n" "$x"; return 1; }
done

24
main.c
View file

@ -54,7 +54,7 @@ static void usage(void)
print_output(" -L level set lzma/bzip2/gzip compression level (1-9, default 7)\n");
print_output(" -N value Set nice value to value (default 19)\n");
print_output(" -p value Set processor count to override number of threads\n");
print_output(" -T value Compression threshold with LZO test. (0 (nil) - 10 (high), default 1)\n");
print_output(" -T Disable LZO compressibility testing\n");
print_output(" -U Use unlimited window size beyond ramsize (potentially much slower)\n");
print_output(" -w size maximum compression window in hundreds of MB\n");
print_output(" default chosen by heuristic dependent on ram and chosen compression\n");
@ -599,7 +599,7 @@ int main(int argc, char *argv[])
memset(&control, 0, sizeof(control));
control.msgout = stderr;
control.flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES;
control.flags = FLAG_SHOW_PROGRESS | FLAG_KEEP_FILES | FLAG_THRESHOLD;
control.suffix = ".lrz";
control.outdir = NULL;
control.tmpdir = NULL;
@ -610,7 +610,6 @@ int main(int argc, char *argv[])
control.compression_level = 7;
control.ramsize = get_ram();
control.window = 0;
control.threshold = 1.0; /* default lzo test compression threshold (level 1) with LZMA compression */
/* for testing single CPU */
control.threads = PROCESSORS; /* get CPUs for LZMA */
control.page_size = PAGE_SIZE;
@ -642,7 +641,7 @@ int main(int argc, char *argv[])
else if (!strstr(eptr,"NOCONFIG"))
read_config(&control);
while ((c = getopt(argc, argv, "L:h?dS:tVvDfqo:w:nlbUO:T:N:p:gziHck")) != -1) {
while ((c = getopt(argc, argv, "L:h?dS:tVvDfqo:w:nlbUO:TN:p:gziHck")) != -1) {
switch (c) {
case 'b':
if (control.flags & FLAG_NOT_LZMA)
@ -734,13 +733,7 @@ int main(int argc, char *argv[])
control.flags |= FLAG_TEST_ONLY;
break;
case 'T':
/* invert argument, a threshold of 1 means that the compressed result can be
* 90%-100% of the sample size
*/
control.threshold = atoi(optarg);
if (control.threshold < 0 || control.threshold > 10)
failure("Threshold value must be between 0 and 10\n");
control.threshold = 1.05 - control.threshold / 20;
control.flags &= ~FLAG_THRESHOLD;
break;
case 'U':
control.flags |= FLAG_UNLIMITED;
@ -841,18 +834,15 @@ int main(int argc, char *argv[])
if (!DECOMPRESS && !TEST_ONLY) {
print_verbose("Compression mode is: ");
if (LZMA_COMPRESS)
print_verbose("LZMA. LZO Test Compression Threshold: %.f\n",
(control.threshold < 1.05 ? 21 - control.threshold * 20 : 0));
print_verbose("LZMA. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
else if (LZO_COMPRESS)
print_verbose("LZO\n");
else if (BZIP2_COMPRESS)
print_verbose("BZIP2. LZO Test Compression Threshold: %.f\n",
(control.threshold < 1.05 ? 21 - control.threshold * 20 : 0));
print_verbose("BZIP2. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
else if (ZLIB_COMPRESS)
print_verbose("GZIP\n");
else if (ZPAQ_COMPRESS)
print_verbose("ZPAQ. LZO Test Compression Threshold: %.f\n",
(control.threshold < 1.05 ? 21 - control.threshold * 20 : 0));
print_verbose("ZPAQ. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
else if (NO_COMPRESS)
print_verbose("RZIP pre-processing only\n");
if (control.window)

View file

@ -57,7 +57,7 @@ Low level options:
\-L level set lzma/bzip2/gzip compression level (1\-9, default 7)
\-N value Set nice value to value (default 19)
\-p value Set processor count to override number of threads
\-T value Compression threshold with LZO test. (0 (nil) - 10 (high), default 1)
\-T Disable LZO compressibility testing
\-U Use unlimited window size beyond ramsize (potentially much slower)
\-w size maximum compression window in hundreds of MB
default chosen by heuristic dependent on ram and chosen compression
@ -110,13 +110,15 @@ progressively slower the larger the difference between ram and the file size,
so is best reserved for when the smallest possible size is desired on a very
large file, and the time taken is not important.
.IP
.IP "\fB-T 0\&.\&.10\fP"
Sets the LZO compression threshold when testing a data chunk when slower
compression is used. The threshold level can be from 0 to 10.
This option is used to speed up compression by avoiding doing the slow
compression pass. The reasoning is that if it is completely incompressible
by LZO then it will also be incompressible by them, thereby saving time.
The default is 1.
.IP "\fB-T\fP"
Disables the LZO compressibility threshold testing when a slower compression
back-end is used. LZO testing is normally performed for the slower back-end
compression of LZMA and ZPAQ. The reasoning is that if it is completely
incompressible by LZO then it will also be incompressible by them. Thus if a
block fails to be compressed by the very fast LZO, lrzip will not attempt to
compress that block with the slower compressor, thereby saving time. If this
option is enabled, it will bypass the LZO testing and attempt to compress each
block regardless.
.IP
.IP "\fB-d\fP"
Decompress. If this option is not used then lrzip looks at

3
rzip.h
View file

@ -217,6 +217,7 @@ static inline i64 get_ram(void)
#define FLAG_MD5 (1 << 17)
#define FLAG_CHECK (1 << 18)
#define FLAG_KEEP_BROKEN (1 << 19)
#define FLAG_THRESHOLD (1 << 20)
#define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX)
#define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS)
@ -243,6 +244,7 @@ static inline i64 get_ram(void)
#define HAS_MD5 (control.flags & FLAG_MD5)
#define CHECK_FILE (control.flags & FLAG_CHECK)
#define KEEP_BROKEN (control.flags & FLAG_KEEP_BROKEN)
#define LZO_TEST (control.flags & FLAG_THRESHOLD)
#define NO_MD5 (!(HASH_CHECK) && !(HAS_MD5))
@ -266,7 +268,6 @@ struct rzip_control {
int compression_level;
i64 overhead; // compressor overhead
unsigned char lzma_properties[5]; // lzma properties, encoded
double threshold;
i64 window;
unsigned long flags;
i64 ramsize;

View file

@ -1364,7 +1364,7 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
int workcounter = 0; /* count # of passes */
lzo_uint best_dlen = UINT_MAX; /* save best compression estimate */
if (control.threshold > 1)
if (!LZO_TEST)
return 1;
wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS);
if (unlikely(wrkmem == NULL))
@ -1377,8 +1377,6 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
if (unlikely(!c_buf))
fatal("Unable to allocate c_buf in lzo_compresses\n");
print_verbose("lzo testing for incompressible data...");
/* Test progressively larger blocks at a time and as soon as anything
compressible is found, jump out as a success */
while (test_len > 0) {
@ -1388,7 +1386,7 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
if (dlen < best_dlen)
best_dlen = dlen; /* save best value */
if ((double) dlen < (double)in_len * control.threshold) {
if (dlen < in_len) {
ret = 1;
break;
}
@ -1401,12 +1399,9 @@ static int lzo_compresses(uchar *s_buf, i64 s_len)
in_len = MIN(test_len, buftest_size);
}
}
if (MAX_VERBOSE)
print_output("%s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n",
(ret == 0? "FAILED - below threshold" : "OK"), save_len,
print_maxverbose("lzo testing %s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n",
(ret == 0? "FAILED" : "OK"), save_len,
100 * ((double) best_dlen / (double) in_len), workcounter);
else if (VERBOSE)
print_output("%s\n", (ret == 0? "FAILED - below threshold" : "OK"));
free(wrkmem);
free(c_buf);

7
util.c
View file

@ -172,10 +172,9 @@ void read_config( struct rzip_control *control )
else if (strcasecmp(parametervalue, "lzma"))
failure("CONF.FILE error. Invalid compression method %s specified",parametervalue);
} else if (!strcasecmp(parameter, "testthreshold")) {
control->threshold = atoi(parametervalue);
if (control->threshold < 1 || control->threshold > 10)
failure("CONF.FILE error. Threshold value out of range %d", parametervalue);
control->threshold = 1.05-control->threshold / 20;
/* true by default */
if (!strcasecmp(parametervalue, "false") || !strcasecmp(parametervalue," 0"))
control->flags &= ~FLAG_THRESHOLD;
} else if (!strcasecmp(parameter, "outputdirectory")) {
control->outdir = malloc(strlen(parametervalue) + 2);
if (!control->outdir)