mirror of
https://github.com/ckolivas/lrzip.git
synced 2026-02-05 23:14:16 +01:00
Use lz4 for compressibility testing only, which won't break existing archives, but speeds up testing slightly. This makes liblz4 a required library.
This commit is contained in:
parent
224a6306e9
commit
3345a239b7
21
README.md
21
README.md
|
|
@ -49,8 +49,9 @@ A quick guide on building and installing.
|
|||
- libz-dev
|
||||
- libbz2-dev
|
||||
- liblzo2-dev
|
||||
- liblz4-dev
|
||||
- coreutils
|
||||
- nasm on x86, not needed on x64
|
||||
- Optional nasm
|
||||
- git if you want a repo-fresh copy
|
||||
- an OS with the usual *nix headers and libraries
|
||||
|
||||
|
|
@ -247,17 +248,17 @@ lzma compression can't currently be tracked when handing over 100+MB chunks
|
|||
over to the lzma library. Therefore you'll see progress percentage until
|
||||
each chunk is handed over to the lzma library.
|
||||
|
||||
> Q: What's this "lzo testing for incompressible data" message?
|
||||
> Q: What's this "lz4 testing for incompressible data" message?
|
||||
|
||||
> A: Other compression is much slower, and lzo is the fastest. To help speed up
|
||||
the process, lzo compression is performed on the data first to test that the
|
||||
> A: Other compression is much slower, and lz4 is the fastest. To help speed up
|
||||
the process, lz4 compression is performed on the data first to test that the
|
||||
data is at all compressible. If a small block of data is not compressible, it
|
||||
tests progressively larger blocks until it has tested all the data (if it fails
|
||||
to compress at all). If no compressible data is found, then the subsequent
|
||||
compression is not even attempted. This can save a lot of time during the
|
||||
compression phase when there is incompressible data. Theoretically it may be
|
||||
possible that data is compressible by the other backend (zpaq, lzma etc) and
|
||||
not at all by lzo, but in practice such data achieves only minuscule amounts of
|
||||
not at all by lz4, but in practice such data achieves only minuscule amounts of
|
||||
compression which are not worth pursuing. Most of the time it is clear one way
|
||||
or the other that data is compressible or not. If you wish to disable this test
|
||||
and force it to try compressing it anyway, use -T.
|
||||
|
|
@ -357,14 +358,14 @@ cpu process scheduler how to prioritise workloads, and if your application is
|
|||
the _only_ thing running it will be no faster at nice -20 nor will it be any
|
||||
slower at +19.
|
||||
|
||||
> Q: What is the LZO Testing option, -T?
|
||||
> Q: What is the LZ4 Testing option, -T?
|
||||
|
||||
> A: LZO testing is normally performed for the slower back-end compression of
|
||||
LZMA and ZPAQ. The reasoning is that if it is completely incompressible by LZO
|
||||
> A: LZ4 testing is normally performed for the slower back-end compression of
|
||||
LZMA and ZPAQ. The reasoning is that if it is completely incompressible by LZ4
|
||||
then it will also be incompressible by them. Thus if a block fails to be
|
||||
compressed by the very fast LZO, lrzip will not attempt to compress that block
|
||||
compressed by the very fast LZ4, lrzip will not attempt to compress that block
|
||||
with the slower compressor, thereby saving time. If this option is enabled, it
|
||||
will bypass the LZO testing and attempt to compress each block regardless.
|
||||
will bypass the LZ4 testing and attempt to compress each block regardless.
|
||||
|
||||
> Q: Compression and decompression progress on large archives slows down and
|
||||
speeds up. There's also a jump in the percentage at the end?
|
||||
|
|
|
|||
|
|
@ -117,6 +117,8 @@ AC_CHECK_LIB(bz2, BZ2_bzBuffToBuffCompress, ,
|
|||
AC_MSG_ERROR([Could not find bz2 library - please install libbz2-dev]))
|
||||
AC_CHECK_LIB(lzo2, lzo1x_1_compress, ,
|
||||
AC_MSG_ERROR([Could not find lzo2 library - please install liblzo2-dev]))
|
||||
AC_CHECK_LIB(lz4, LZ4_compress_default, ,
|
||||
AC_MSG_ERROR([Could not find lz4 library - please install liblz4-dev]))
|
||||
|
||||
AC_CHECK_FUNCS(mmap strerror)
|
||||
AC_CHECK_FUNCS(getopt_long)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) 2006-2016,2018 Con Kolivas
|
||||
Copyright (C) 2006-2016,2018,2021 Con Kolivas
|
||||
Copyright (C) 2011 Peter Hyman
|
||||
Copyright (C) 1998-2003 Andrew Tridgell
|
||||
|
||||
|
|
@ -308,7 +308,7 @@ typedef sem_t cksem_t;
|
|||
#define HAS_MD5 (control->flags & FLAG_MD5)
|
||||
#define CHECK_FILE (control->flags & FLAG_CHECK)
|
||||
#define KEEP_BROKEN (control->flags & FLAG_KEEP_BROKEN)
|
||||
#define LZO_TEST (control->flags & FLAG_THRESHOLD)
|
||||
#define LZ4_TEST (control->flags & FLAG_THRESHOLD)
|
||||
#define TMP_OUTBUF (control->flags & FLAG_TMP_OUTBUF)
|
||||
#define TMP_INBUF (control->flags & FLAG_TMP_INBUF)
|
||||
#define ENCRYPT (control->flags & FLAG_ENCRYPT)
|
||||
|
|
|
|||
8
main.c
8
main.c
|
|
@ -121,7 +121,7 @@ static void usage(bool compat)
|
|||
print_output(" -p, --threads value Set processor count to override number of threads\n");
|
||||
print_output(" -m, --maxram size Set maximum available ram in hundreds of MB\n");
|
||||
print_output(" overrides detected amount of available ram\n");
|
||||
print_output(" -T, --threshold Disable LZO compressibility testing\n");
|
||||
print_output(" -T, --threshold Disable LZ4 compressibility testing\n");
|
||||
print_output(" -U, --unlimited Use unlimited window size beyond ramsize (potentially much slower)\n");
|
||||
print_output(" -w, --window size maximum compression window in hundreds of MB\n");
|
||||
print_output(" default chosen by heuristic dependent on ram and chosen compression\n");
|
||||
|
|
@ -185,15 +185,15 @@ static void show_summary(void)
|
|||
if (!DECOMPRESS && !TEST_ONLY) {
|
||||
print_verbose("Compression mode is: ");
|
||||
if (LZMA_COMPRESS)
|
||||
print_verbose("LZMA. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
|
||||
print_verbose("LZMA. LZ4 Compressibility testing %s\n", (LZ4_TEST? "enabled" : "disabled"));
|
||||
else if (LZO_COMPRESS)
|
||||
print_verbose("LZO\n");
|
||||
else if (BZIP2_COMPRESS)
|
||||
print_verbose("BZIP2. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
|
||||
print_verbose("BZIP2. LZ4 Compressibility testing %s\n", (LZ4_TEST? "enabled" : "disabled"));
|
||||
else if (ZLIB_COMPRESS)
|
||||
print_verbose("GZIP\n");
|
||||
else if (ZPAQ_COMPRESS)
|
||||
print_verbose("ZPAQ. LZO Compressibility testing %s\n", (LZO_TEST? "enabled" : "disabled"));
|
||||
print_verbose("ZPAQ. LZ4 Compressibility testing %s\n", (LZ4_TEST? "enabled" : "disabled"));
|
||||
else if (NO_COMPRESS)
|
||||
print_verbose("RZIP pre-processing only\n");
|
||||
if (control->window)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
# Copyright
|
||||
#
|
||||
# Copyright (C) 2016 Con Kolivas
|
||||
# Copyright (C) 2021 Con Kolivas
|
||||
#
|
||||
# License
|
||||
#
|
||||
|
|
@ -296,7 +296,7 @@ Overrides detected amount of available ram.
|
|||
|
||||
=item B<-T>
|
||||
|
||||
Disable LZO compressibility testing.
|
||||
Disable LZ4 compressibility testing.
|
||||
|
||||
=item B<--unlimited>
|
||||
|
||||
|
|
|
|||
14
man/lrzip.1
14
man/lrzip.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH "lrzip" "1" "June 2016" "" ""
|
||||
.TH "lrzip" "1" "February 2021" "" ""
|
||||
.SH "NAME"
|
||||
lrzip \- a large-file compression program
|
||||
.SH "SYNOPSIS"
|
||||
|
|
@ -66,7 +66,7 @@ Low level options:
|
|||
\-p, \-\-threads value Set processor count to override number of threads
|
||||
\-m, \-\-maxram size Set maximum available ram in hundreds of MB
|
||||
overrides detected amount of available ram
|
||||
\-T, \-\-threshold Disable LZO compressibility testing
|
||||
\-T, \-\-threshold Disable LZ4 compressibility testing
|
||||
\-U, \-\-unlimited Use unlimited window size beyond ramsize (potentially much slower)
|
||||
\-w, \-\-window size maximum compression window in hundreds of MB
|
||||
default chosen by heuristic dependent on ram and chosen compression
|
||||
|
|
@ -240,13 +240,13 @@ decrease the load on your machine, or to improve compression. Setting it to
|
|||
1 will maximise compression but will not attempt to use more than one CPU.
|
||||
.IP
|
||||
.IP "\fB-T\fP"
|
||||
Disables the LZO compressibility threshold testing when a slower compression
|
||||
back-end is used. LZO testing is normally performed for the slower back-end
|
||||
Disables the LZ4 compressibility threshold testing when a slower compression
|
||||
back-end is used. LZ4 testing is normally performed for the slower back-end
|
||||
compression of LZMA and ZPAQ. The reasoning is that if it is completely
|
||||
incompressible by LZO then it will also be incompressible by them. Thus if a
|
||||
block fails to be compressed by the very fast LZO, lrzip will not attempt to
|
||||
incompressible by LZ4 then it will also be incompressible by them. Thus if a
|
||||
block fails to be compressed by the very fast LZ4, lrzip will not attempt to
|
||||
compress that block with the slower compressor, thereby saving time. If this
|
||||
option is enabled, it will bypass the LZO testing and attempt to compress each
|
||||
option is enabled, it will bypass the LZ4 testing and attempt to compress each
|
||||
block regardless.
|
||||
.IP
|
||||
.IP "\fB-U \fP"
|
||||
|
|
|
|||
58
stream.c
58
stream.c
|
|
@ -42,6 +42,7 @@
|
|||
#include <zlib.h>
|
||||
#include <lzo/lzoconf.h>
|
||||
#include <lzo/lzo1x.h>
|
||||
#include <lz4.h>
|
||||
#ifdef HAVE_ERRNO_H
|
||||
# include <errno.h>
|
||||
#endif
|
||||
|
|
@ -143,7 +144,7 @@ bool join_pthread(rzip_control *control, pthread_t th, void **thread_return)
|
|||
/* just to keep things clean, declare function here
|
||||
* but move body to the end since it's a work function
|
||||
*/
|
||||
static int lzo_compresses(rzip_control *control, uchar *s_buf, i64 s_len);
|
||||
static int lz4_compresses(rzip_control *control, uchar *s_buf, i64 s_len);
|
||||
|
||||
/*
|
||||
***** COMPRESSION FUNCTIONS *****
|
||||
|
|
@ -160,7 +161,7 @@ static int zpaq_compress_buf(rzip_control *control, struct compress_thread *cthr
|
|||
i64 c_len, c_size;
|
||||
uchar *c_buf;
|
||||
|
||||
if (!lzo_compresses(control, cthread->s_buf, cthread->s_len))
|
||||
if (!lz4_compresses(control, cthread->s_buf, cthread->s_len))
|
||||
return 0;
|
||||
|
||||
c_size = round_up_page(control, cthread->s_len + 10000);
|
||||
|
|
@ -195,7 +196,7 @@ static int bzip2_compress_buf(rzip_control *control, struct compress_thread *cth
|
|||
int bzip2_ret;
|
||||
uchar *c_buf;
|
||||
|
||||
if (!lzo_compresses(control, cthread->s_buf, cthread->s_len))
|
||||
if (!lz4_compresses(control, cthread->s_buf, cthread->s_len))
|
||||
return 0;
|
||||
|
||||
c_buf = malloc(dlen);
|
||||
|
|
@ -291,7 +292,7 @@ static int lzma_compress_buf(rzip_control *control, struct compress_thread *cthr
|
|||
uchar *c_buf;
|
||||
size_t dlen;
|
||||
|
||||
if (!lzo_compresses(control, cthread->s_buf, cthread->s_len))
|
||||
if (!lz4_compresses(control, cthread->s_buf, cthread->s_len))
|
||||
return 0;
|
||||
|
||||
/* only 7 levels with lzma, scale them */
|
||||
|
|
@ -1875,47 +1876,43 @@ int close_stream_in(rzip_control *control, void *ss)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* As others are slow and lzo very fast, it is worth doing a quick lzo pass
|
||||
to see if there is any compression at all with lzo first. It is unlikely
|
||||
that others will be able to compress if lzo is unable to drop a single byte
|
||||
so do not compress any block that is incompressible by lzo. */
|
||||
static int lzo_compresses(rzip_control *control, uchar *s_buf, i64 s_len)
|
||||
/* As others are slow and lz4 very fast, it is worth doing a quick lz4 pass
|
||||
to see if there is any compression at all with lz4 first. It is unlikely
|
||||
that others will be able to compress if lz4 is unable to drop a single byte
|
||||
so do not compress any block that is incompressible by lz4. */
|
||||
static int lz4_compresses(rzip_control *control, uchar *s_buf, i64 s_len)
|
||||
{
|
||||
lzo_bytep wrkmem = NULL;
|
||||
lzo_uint in_len, test_len = s_len, save_len = s_len;
|
||||
lzo_uint dlen;
|
||||
uchar *c_buf = NULL, *test_buf = s_buf;
|
||||
int in_len, test_len = s_len, save_len = s_len;
|
||||
int dlen;
|
||||
char *c_buf = NULL, *test_buf = (char *)s_buf;
|
||||
/* set minimum buffer test size based on the length of the test stream */
|
||||
unsigned long buftest_size = (test_len > 5 * STREAM_BUFSIZE ? STREAM_BUFSIZE : STREAM_BUFSIZE / 4096);
|
||||
int buftest_size = (test_len > 5 * STREAM_BUFSIZE ? STREAM_BUFSIZE : STREAM_BUFSIZE / 4096);
|
||||
int ret = 0;
|
||||
int workcounter = 0; /* count # of passes */
|
||||
lzo_uint best_dlen = UINT_MAX; /* save best compression estimate */
|
||||
int best_dlen = INT_MAX; /* save best compression estimate */
|
||||
|
||||
if (!LZO_TEST)
|
||||
if (!LZ4_TEST)
|
||||
return 1;
|
||||
wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS);
|
||||
if (unlikely(wrkmem == NULL))
|
||||
fatal_return(("Unable to allocate wrkmem in lzo_compresses\n"), 0);
|
||||
|
||||
in_len = MIN(test_len, buftest_size);
|
||||
dlen = STREAM_BUFSIZE + STREAM_BUFSIZE / 16 + 64 + 3;
|
||||
|
||||
c_buf = malloc(dlen);
|
||||
if (unlikely(!c_buf)) {
|
||||
dealloc(wrkmem);
|
||||
fatal_return(("Unable to allocate c_buf in lzo_compresses\n"), 0);
|
||||
}
|
||||
if (unlikely(!c_buf))
|
||||
fatal_return(("Unable to allocate c_buf in lz4_compresses\n"), 0);
|
||||
|
||||
/* Test progressively larger blocks at a time and as soon as anything
|
||||
compressible is found, jump out as a success */
|
||||
while (test_len > 0) {
|
||||
int lz4_ret;
|
||||
|
||||
workcounter++;
|
||||
lzo1x_1_compress(test_buf, in_len, (uchar *)c_buf, &dlen, wrkmem);
|
||||
|
||||
if (dlen < best_dlen)
|
||||
best_dlen = dlen; /* save best value */
|
||||
|
||||
if (dlen < in_len) {
|
||||
lz4_ret = LZ4_compress_default((const char *)test_buf, c_buf, test_len, dlen);
|
||||
if (!lz4_ret) // Bigger than dlen, no point going further
|
||||
break;
|
||||
if (lz4_ret < best_dlen)
|
||||
best_dlen = lz4_ret;
|
||||
if (lz4_ret < test_len) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
|
|
@ -1928,11 +1925,10 @@ static int lzo_compresses(rzip_control *control, uchar *s_buf, i64 s_len)
|
|||
in_len = MIN(test_len, buftest_size);
|
||||
}
|
||||
}
|
||||
print_maxverbose("lzo testing %s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n",
|
||||
print_maxverbose("lz4 testing %s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n",
|
||||
(ret == 0? "FAILED" : "OK"), save_len,
|
||||
100 * ((double) best_dlen / (double) in_len), workcounter);
|
||||
|
||||
dealloc(wrkmem);
|
||||
dealloc(c_buf);
|
||||
|
||||
return ret;
|
||||
|
|
|
|||
Loading…
Reference in a new issue