diff --git a/ChangeLog b/ChangeLog index bbdcd25..44c2be5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,19 @@ lrzip ChangeLog +NOVEMBER 2010, version 0.540 Con Kolivas +* Massive rewrite of backend decompression phse, implementing multithreading. +This is done by taking each stream of data on read in into separate buffers for +up to as many threads as CPUs. As each thread's data becomes available, feed it +into runzip once it is requests more of the stream. Provided there are enough +chunks in the originally compressed data, this provides a massive speedup +potentially proportional to the number of CPUs. The slower the backend +compression, the better the speed up (i.e. zpaq is the best sped up). +* Fix the output of zpaq compress and decompress from trampling on itself and racing and consuming a lot of CPU time printing to the console. +* When limiting cwindow to 6 on 32 bits, ensure that control.window is also set. +* When testing for the maximum size of testmalloc, the multiple used was out by one, so increase it. +* Minor output tweaks. +* Build warning fixes. +* Updated benchmarks. + NOVEMBER 2010, version 0.530 Con Kolivas * Massive rewrite of backend compression phase. Now the stream is split up into as many chunks as there are CPUs, of at least 10MB in size, that are @@ -20,7 +35,6 @@ phase. generates a warning now, not a failure. * Updated docs and benchmarks. - NOVEMBER 2010, version 0.520 Con Kolivas * Distros don't like 3 point version numbering so just repackaged as 0.520. diff --git a/WHATS-NEW b/WHATS-NEW index 20b409f..f82195b 100644 --- a/WHATS-NEW +++ b/WHATS-NEW @@ -1,3 +1,17 @@ +lrzip-0.540 + +MASSIVE MULTITHREADING on the decompression phase. Provided there are enough +chunks of data in the archived file, lrzip will use as many threads as there +are CPUs for the backend decompression. Much like the multithreading on the +compression side, it makes the slower compression algorithms speed up the most. +Fixed output from being scrambled and consuming a lot of CPU time on threaded +zpaq compression. +Further fixes to ensure window sizes work on 32 bit machines. +Be more careful about testing for how much ram lrzip can use. +Minor build warning fixes. +Minor tweaks to screen output. +Updated benchmarks. + lrzip-0.530 MASSIVE MULTITHREADING on the compression phase. Lrzip will now use as many diff --git a/configure b/configure index f3d2450..0b4bd9d 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.67 for lrzip 0.530. +# Generated by GNU Autoconf 2.67 for lrzip 0.540. # # Report bugs to . # @@ -551,9 +551,9 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='lrzip' -PACKAGE_TARNAME='lrzip-0.530' -PACKAGE_VERSION='0.530' -PACKAGE_STRING='lrzip 0.530' +PACKAGE_TARNAME='lrzip-0.540' +PACKAGE_VERSION='0.540' +PACKAGE_STRING='lrzip 0.540' PACKAGE_BUGREPORT='kernel@kolivas.org' PACKAGE_URL='' @@ -1221,7 +1221,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures lrzip 0.530 to adapt to many kinds of systems. +\`configure' configures lrzip 0.540 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1269,7 +1269,7 @@ Fine tuning of the installation directories: --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] - --docdir=DIR documentation root [DATAROOTDIR/doc/lrzip-0.530] + --docdir=DIR documentation root [DATAROOTDIR/doc/lrzip-0.540] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] @@ -1286,7 +1286,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of lrzip 0.530:";; + short | recursive ) echo "Configuration of lrzip 0.540:";; esac cat <<\_ACEOF @@ -1375,7 +1375,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -lrzip configure 0.530 +lrzip configure 0.540 generated by GNU Autoconf 2.67 Copyright (C) 2010 Free Software Foundation, Inc. @@ -2014,7 +2014,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by lrzip $as_me 0.530, which was +It was created by lrzip $as_me 0.540, which was generated by GNU Autoconf 2.67. Invocation command line was $ $0 $@ @@ -5324,7 +5324,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by lrzip $as_me 0.530, which was +This file was extended by lrzip $as_me 0.540, which was generated by GNU Autoconf 2.67. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -5386,7 +5386,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -lrzip config.status 0.530 +lrzip config.status 0.540 configured by $0, generated by GNU Autoconf 2.67, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index f642202..3796cb8 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT([lrzip],[0.530],[kernel@kolivas.org],[lrzip-0.530]) +AC_INIT([lrzip],[0.540],[kernel@kolivas.org],[lrzip-0.540]) AC_CONFIG_HEADER(config.h) # see what our system is! AC_CANONICAL_HOST diff --git a/doc/README.benchmarks b/doc/README.benchmarks index 7acaf9d..81b9e3d 100644 --- a/doc/README.benchmarks +++ b/doc/README.benchmarks @@ -13,23 +13,23 @@ backend. linux-2.6.31.tar These are benchmarks performed on a 3GHz quad core Intel Core2 with 8GB ram -using lrzip v0.530 +using lrzip v0.540 Compression Size Percentage Compress Decompress None 365711360 100 7z 53315279 14.6 1m58s 0m5.6s -lrzip 52724172 14.4 1m33s 0m15.6s -lrzip -z 43223954 11.8 3m42s 10m14s -lrzip -l 110893724 30.3 0m21s 0m13.4s -lrzip -g 72746424 19.9 0m25s 0m13.8s -lrzip -b 60774043 16.6 0m29s 0m19.8s +lrzip 52724172 14.4 1m33s 0m13.5s +lrzip -z 43223954 11.8 3m32s 3m40s +lrzip -l 110893724 30.3 0m21s 0m12.1s +lrzip -g 72746424 19.9 0m25s 0m12.3s +lrzip -b 60774043 16.6 0m29s 0m15.2s bzip2 62416571 17.1 0m44s 0m10.5s gzip 80563601 22.0 0m14s 0m3.0s These results are interesting to note the compression of lrzip by default is only slightly better than lzma, but it's significantly faster thanks to its -heavily multithreaded nature. Decompression is slower but I'm working on that. +heavily multithreaded nature. Decompression is slower because of the 2 stages. Zpaq offers by far the best compression but at the cost of extra time. However with the heavily threaded nature of lrzip, it's not a lot longer given how much better its compression is. @@ -102,11 +102,11 @@ gzip 2772899756 25.8 05m47s 2m46s bzip2 2704781700 25.2 16m15s 6m19s xz 2272322208 21.2 50m58s 3m52s 7z 2242897134 20.9 26m36s 5m41s -lrzip 1299228155 12.1 16m12s 4m32s -lrzip -M 1079682231 10.1 12m03s 4m05s -lrzip -l 1754694010 16.3 05m30s 3m12s -lrzip -lM 1414958844 13.2 05m15s 2m57s -lrzip -zM 1066902006 9.9 71m20s 04h08m +lrzip 1239219863 11.5 15m45s 3m07s +lrzip -M 1079682231 10.1 12m03s 2m50s +lrzip -l 1754694010 16.3 05m30s 2m23s +lrzip -lM 1414958844 13.2 04m38s 2m20s +lrzip -zM 1066902006 9.9 71m20s 72m0s At this end of the spectrum things really start to heat up. The compression @@ -114,8 +114,7 @@ advantage is massive, with the lzo backend even giving much better results than 7z, and over a ridiculously short time. The improvements in version 0.530 in scalability with multiple CPUs has a huge impact on compression time here, with zpaq almost being as fast on quad core as xz is, yet producing a file -less than half the size. Note that decompression was not multithreaded on -v0.530, hence why zpaq decompression was so slow. +less than half the size. What appears to be a big disappointment is actually zpaq here which takes more than 6 times longer than lzma for a measly .2% improvement. The reason is that most of the advantage here is achieved by the rzip first stage since there's a @@ -131,4 +130,4 @@ Or, to make things easier, just use the default settings all the time and be happy as lzma gives good results. :D Con Kolivas -Tue, 13th Nov 2010 +Tue, 16th Nov 2010 diff --git a/main.c b/main.c index 8561f34..072af80 100644 --- a/main.c +++ b/main.c @@ -376,8 +376,10 @@ static void get_fileinfo(void) /* Version < 0.4 had different file format */ if (control.major_version == 0 && control.minor_version < 4) seekspot = 50; - else + else if (control.major_version == 0 && control.minor_version == 4) seekspot = 74; + else + seekspot = 75; if (unlikely(lseek(fd_in, seekspot, SEEK_SET) == -1)) fatal("Failed to lseek in get_fileinfo: %s\n", strerror(errno)); @@ -403,6 +405,8 @@ static void get_fileinfo(void) print_output("rzip + gzip\n"); else if (ctype == CTYPE_ZPAQ) print_output("rzip + zpaq\n"); + else + print_output("Dunno wtf\n"); print_output("Decompressed file size: %llu\n", expected_size); print_output("Compressed file size: %llu\n", infile_size); print_output("Compression ratio: %.3Lf\n", cratio); @@ -704,11 +708,11 @@ int main(int argc, char *argv[]) /* OK, if verbosity set, print summary of options selected */ if (!INFO) { - print_verbose("The following options are in effect for this %s.\n", - DECOMPRESS ? "DECOMPRESSION" : "COMPRESSION"); - if (LZMA_COMPRESS) - print_verbose("Threading is %s. Number of CPUs detected: %d\n", control.threads > 1? "ENABLED" : "DISABLED", - control.threads); + if (!TEST_ONLY) + print_verbose("The following options are in effect for this %s.\n", + DECOMPRESS ? "DECOMPRESSION" : "COMPRESSION"); + print_verbose("Threading is %s. Number of CPUs detected: %d\n", control.threads > 1? "ENABLED" : "DISABLED", + control.threads); print_verbose("Detected %lld bytes ram\n", control.ramsize); print_verbose("Compression level %d\n", control.compression_level); print_verbose("Nice Value: %d\n", control.nice_val); @@ -727,7 +731,7 @@ int main(int argc, char *argv[]) print_verbose("Test file integrity\n"); /* show compression options */ - if (!DECOMPRESS) { + if (!DECOMPRESS && !TEST_ONLY) { print_verbose("Compression mode is: "); if (LZMA_COMPRESS) print_verbose("LZMA. LZO Test Compression Threshold: %.f\n", diff --git a/rzip.h b/rzip.h index 3db3237..b1b59f4 100644 --- a/rzip.h +++ b/rzip.h @@ -19,7 +19,7 @@ #define LRZIP_MAJOR_VERSION 0 #define LRZIP_MINOR_VERSION 5 -#define LRZIP_MINOR_SUBVERSION 30 +#define LRZIP_MINOR_SUBVERSION 40 #define NUM_STREAMS 2