From 8853f2e4499252c982c85ae4838a91474a7df564 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 10 Jun 2016 12:57:34 +1000 Subject: [PATCH 1/5] Add initial argument processing for recursive option --- main.c | 21 +++- regression_test.sh | 239 --------------------------------------------- 2 files changed, 16 insertions(+), 244 deletions(-) delete mode 100755 regression_test.sh diff --git a/main.c b/main.c index b149717..84bf75d 100644 --- a/main.c +++ b/main.c @@ -65,7 +65,7 @@ static rzip_control base_control, local_control, *control; static void usage(bool compat) { print_output("lrz%s version %s\n", compat ? "" : "ip", PACKAGE_VERSION); - print_output("Copyright (C) Con Kolivas 2006-2015\n"); + print_output("Copyright (C) Con Kolivas 2006-2016\n"); print_output("Based on rzip "); print_output("Copyright (C) Andrew Tridgell 1998-2003\n\n"); print_output("Usage: lrz%s [options] \n", compat ? "" : "ip"); @@ -85,6 +85,7 @@ static void usage(bool compat) print_output(" -P, --progress show compression progress\n"); } else print_output(" -q, --quiet don't show compression progress\n"); + print_output(" -r, --recursive operate recursively on directories\n"); print_output(" -t, --test test compressed file integrity\n"); print_output(" -v[v%s], --verbose Increase verbosity\n", compat ? "v" : ""); print_output(" -V, --version show version\n"); @@ -229,6 +230,7 @@ static struct option long_options[] = { {"threads", required_argument, 0, 'p'}, /* 20 */ {"progress", no_argument, 0, 'P'}, {"quiet", no_argument, 0, 'q'}, + {"recursive", no_argument, 0, 'r'}, {"suffix", required_argument, 0, 'S'}, {"test", no_argument, 0, 't'}, {"threshold", required_argument, 0, 'T'}, /* 25 */ @@ -252,7 +254,7 @@ static void set_stdout(struct rzip_control *control) int main(int argc, char *argv[]) { - bool lrzcat = false, compat = false; + bool lrzcat = false, compat = false, recurse = false; struct timeval start_time, end_time; struct sigaction handler; double seconds,total_time; // for timers @@ -294,7 +296,7 @@ int main(int argc, char *argv[]) else if (!strstr(eptr,"NOCONFIG")) read_config(control); - while ((c = getopt_long(argc, argv, "bcCdDefghHikKlL:nN:o:O:pPqS:tTUm:vVw:z?123456789", long_options, &i)) != -1) { + while ((c = getopt_long(argc, argv, "bcCdDefghHikKlL:nN:o:O:pPqrS:tTUm:vVw:z?123456789", long_options, &i)) != -1) { switch (c) { case 'b': if (control->flags & FLAG_NOT_LZMA) @@ -400,6 +402,9 @@ int main(int argc, char *argv[]) case 'q': control->flags &= ~FLAG_SHOW_PROGRESS; break; + case 'r': + recurse = true; + break; case 'S': if (control->outname) failure("Specified output filename already, can't specify an extension.\n"); @@ -462,8 +467,12 @@ int main(int argc, char *argv[]) argc -= optind; argv += optind; - if (control->outname && argc > 1) - failure("Cannot specify output filename with more than 1 file\n"); + if (control->outname) { + if (argc > 1) + failure("Cannot specify output filename with more than 1 file\n"); + if (recurse) + failure("Cannot specify output filename with recursive\n"); + } if (VERBOSE && !SHOW_PROGRESS) { print_err("Cannot have -v and -q options. -v wins.\n"); @@ -514,6 +523,8 @@ int main(int argc, char *argv[]) } } + if (recurse && (STDIN || STDOUT)) + failure("Cannot use -r recursive with STDIO\n"); if (INFO && STDIN) failure("Will not get file info from STDIN\n"); diff --git a/regression_test.sh b/regression_test.sh deleted file mode 100755 index 54c6c38..0000000 --- a/regression_test.sh +++ /dev/null @@ -1,239 +0,0 @@ -#!/bin/bash -#Very basic regression testing does a number of regular compression / -#decompression / test cycles +/- STDIN +/- STDOUT and with the different -#compression backends. -#Run it with -# regression_test.sh filename -#where filename is any random file to test with (big or small depending on -#what's being tested. - -infile=$1 - -end(){ - rm -f lrztest lrztest.lrz -} - -if [ ! -e $infile ]; then - echo $infile does not exist, exiting - exit 1 -fi - -if [ -f lrztest ]; then - echo lrztest file exists, exiting - exit 1 -fi - -if [ -f lrztest.lrz ]; then - echo lrztest.lrz file exists, exiting - exit 1 -fi - -trap 'echo "ABORTING";end;exit' 1 2 15 - -echo testing compression from stdin -./lrzip -vvlfo lrztest.lrz < $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing compression from stdin - end - exit 1 -fi -rm lrztest.lrz - -echo testing compression to stdout -./lrzip -vvlo - $infile > lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing compression to stdout - end - exit 1 -fi - -rm lrztest.lrz -echo testing compression from stdin to stdout -./lrzip -vvl < $infile > lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing compression from stdin to stdout - end - exit 1 -fi - -rm lrztest.lrz -echo testing standard compression -./lrzip -vvlfo lrztest.lrz $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing standard compression - end - exit 1 -fi - -echo testing standard decompression -./lrzip -vvdo lrztest lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest ];then - echo FAILED testing standard decompression - end - exit 1 -fi - -rm lrztest -echo testing standard decompression with file checking -./lrzip -vvdfco lrztest lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest ];then - echo FAILED testing standard decompression with file checking - end - exit 1 -fi - -rm lrztest -echo testing decompression from stdin -./lrzip -vvfo lrztest -d < lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest ];then - echo FAILED testing decompression from stdin - end - exit 1 -fi - -rm lrztest -echo testing decompression to stdout -./lrzip -vvdo - lrztest.lrz > lrztest - -if [ $? -ne 0 ] || [ ! -f lrztest ];then - echo FAILED testing decompression to stdout - end - exit 1 -fi - -rm lrztest -echo testing decompression from stdin to stdout -./lrzip -vvd < lrztest.lrz > lrztest - -if [ $? -ne 0 ] || [ ! -f lrztest ];then - echo FAILED testing decompression from stdin to stdout - end - exit 1 -fi - -rm lrztest -echo testing testing -./lrzip -vvt lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing testing - end - exit 1 -fi - -echo testing testing from stdin -./lrzip -vvt < lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing testing from stdin - end - exit 1 -fi - -rm lrztest.lrz -echo testing rzip only compression -./lrzip -vvnfo lrztest.lrz $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing rzip only compression - end - exit 1 -fi - -echo testing rzip only testing -./lrzip -vvt lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing rzip only testing - end - exit 1 -fi - -rm lrztest.lrz -echo testing lzma compression -./lrzip -vvfo lrztest.lrz $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing lzma compression - end - exit 1 -fi - -echo testing lzma testing -./lrzip -vvt lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing lzma testing - end - exit 1 -fi - -rm lrztest.lrz -echo testing gzip compression -./lrzip -vvgfo lrztest.lrz $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing gzip compression - end - exit 1 -fi - -echo testing gzip testing -./lrzip -vvt lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing gzip testing - end - exit 1 -fi - -rm lrztest.lrz -echo testing bzip2 compression -./lrzip -vvbfo lrztest.lrz $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing bzip2 compression - end - exit 1 -fi - -echo testing bzip2 testing -./lrzip -vvt lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing bzip2 testing - end - exit 1 -fi - -rm lrztest.lrz -echo testing zpaq compression -./lrzip -vvzfo lrztest.lrz $infile - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing zpaq compression - end - exit 1 -fi - -echo testing zpaq testing -./lrzip -vvt lrztest.lrz - -if [ $? -ne 0 ] || [ ! -f lrztest.lrz ];then - echo FAILED testing zpaq testing - end - exit 1 -fi - -end - -echo ALL TESTS SUCCESSFUL - -exit 0 From 7a03965214c89734cb80b4481adf11f419dd6ae4 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 10 Jun 2016 16:10:03 +1000 Subject: [PATCH 2/5] Implement gzip compatible -r recursive option --- main.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 11 deletions(-) diff --git a/main.c b/main.c index 84bf75d..e45595a 100644 --- a/main.c +++ b/main.c @@ -1,5 +1,5 @@ /* - Copyright (C) 2006-2015 Con Kolivas + Copyright (C) 2006-2016 Con Kolivas Copyright (C) 2011 Peter Hyman Copyright (C) 1998-2003 Andrew Tridgell @@ -49,6 +49,7 @@ # include #endif +#include #include #include @@ -60,6 +61,8 @@ /* needed for CRC routines */ #include "lzma/C/7zCrc.h" +#define MAX_PATH_LEN 4096 + static rzip_control base_control, local_control, *control; static void usage(bool compat) @@ -252,6 +255,38 @@ static void set_stdout(struct rzip_control *control) register_outputfile(control, control->msgout); } +/* Recursively enter all directories, adding all regular files to the dirlist array */ +static void recurse_dirlist(char *indir, char **dirlist, int *entries) +{ + char fname[MAX_PATH_LEN]; + struct stat istat; + struct dirent *dp; + DIR *dirp; + + dirp = opendir(indir); + if (unlikely(!dirp)) + failure("Unable to open directory %s\n", indir); + while ((dp = readdir(dirp)) != NULL) { + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + sprintf(fname, "%s/%s", indir, dp->d_name); + if (unlikely(stat(fname, &istat))) + failure("Unable to stat file %s\n", fname); + if (S_ISDIR(istat.st_mode)) { + recurse_dirlist(fname, dirlist, entries); + continue; + } + if (!S_ISREG(istat.st_mode)) { + print_err("Not regular file %s\n", fname); + continue; + } + print_maxverbose("Added file %s\n", fname); + *dirlist = realloc(*dirlist, MAX_PATH_LEN * (*entries + 1)); + strcpy(*dirlist + MAX_PATH_LEN * (*entries)++, fname); + } + closedir(dirp); +} + int main(int argc, char *argv[]) { bool lrzcat = false, compat = false, recurse = false; @@ -506,27 +541,49 @@ int main(int argc, char *argv[]) /* One extra iteration for the case of no parameters means we will default to stdin/out */ for (i = 0; i <= argc; i++) { + char *dirlist = NULL, *infile = NULL; + int direntries = 0, curentry = 0; + if (i < argc) - control->infile = argv[i]; + infile = argv[i]; else if (!(i == 0 && STDIN)) break; - if (control->infile) { - if ((strcmp(control->infile, "-") == 0)) + if (infile) { + if ((strcmp(infile, "-") == 0)) control->flags |= FLAG_STDIN; else { - struct stat infile_stat; + bool isdir = false; + struct stat istat; - stat(control->infile, &infile_stat); - if (unlikely(S_ISDIR(infile_stat.st_mode))) - failure("lrzip only works directly on FILES.\n" - "Use lrztar or pipe through tar for compressing directories.\n"); + if (unlikely(stat(infile, &istat))) + failure("Failed to stat %s\n", infile); + isdir = S_ISDIR(istat.st_mode); + if (!recurse && (isdir || !S_ISREG(istat.st_mode))) { + failure("lrzip only works directly on regular FILES.\n" + "Use -r recursive, lrztar or pipe through tar for compressing directories.\n"); + } + if (recurse && !isdir) + failure("%s not a directory, -r recursive needs a directory\n", infile); } } - if (recurse && (STDIN || STDOUT)) - failure("Cannot use -r recursive with STDIO\n"); + if (recurse) { + if (unlikely(STDIN || STDOUT)) + failure("Cannot use -r recursive with STDIO\n"); + recurse_dirlist(infile, &dirlist, &direntries); + } + if (INFO && STDIN) failure("Will not get file info from STDIN\n"); +recursion: + if (recurse) { + if (curentry >= direntries) { + free(dirlist); + break; + } + infile = dirlist + MAX_PATH_LEN * curentry++; + } + control->infile = infile; /* If no output filename is specified, and we're using * stdin, use stdout */ @@ -602,6 +659,8 @@ int main(int argc, char *argv[]) seconds = total_time - hours * 3600 - minutes * 60; if (!INFO) print_progress("Total time: %02d:%02d:%05.2f\n", hours, minutes, seconds); + if (recurse) + goto recursion; } return 0; From 3e178b4edecfacf1d75a8e566d659be17cddf313 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 10 Jun 2016 16:17:11 +1000 Subject: [PATCH 3/5] Add documentation for recursive mode --- man/lrz.1.pod | 9 +++++---- man/lrzip.1 | 10 +++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/man/lrz.1.pod b/man/lrz.1.pod index 2ac4746..3a795b9 100644 --- a/man/lrz.1.pod +++ b/man/lrz.1.pod @@ -1,6 +1,6 @@ # Copyright # -# Copyright (C) 2015 Con Kolivas +# Copyright (C) 2016 Con Kolivas # # License # @@ -48,8 +48,8 @@ lrz - gzip compatible command line variant of lrzip =head1 DESCRIPTION -lrz is identical to the lrzip application however its command line options are -made to be as compatible with gzip as possible. +lrz is identical to the lrzip application however its command line options and +behaviour are made to be as compatible with gzip as possible. =head1 OPTIONS @@ -65,8 +65,9 @@ General options: -i, --info show compressed file information -L, --license display software version and license -P, --progress show compression progress + -r, --recursive operate recursively on directories -t, --test test compressed file integrity - -v[v], --verbose Increase verbosity + -v[vv], --verbose Increase verbosity -V, --version show version Options affecting output: -f, --force force overwrite of any existing files diff --git a/man/lrzip.1 b/man/lrzip.1 index 741d461..1a12ce6 100644 --- a/man/lrzip.1 +++ b/man/lrzip.1 @@ -1,4 +1,4 @@ -.TH "lrzip" "1" "April 2015" "" "" +.TH "lrzip" "1" "June 2016" "" "" .SH "NAME" lrzip \- a large-file compression program .SH "SYNOPSIS" @@ -43,6 +43,7 @@ General options: \-H, \-\-hash display md5 hash integrity information \-i, \-\-info show compressed file information \-q, \-\-quiet don't show compression progress + \-r, \-\-recursive operate recursively on directories \-t, \-\-test test compressed file integrity \-v[v], \-\-verbose Increase verbosity \-V, \-\-version show version @@ -140,6 +141,13 @@ bursts with lzma compression which is the default compression. This means that it will progress very rapidly for short periods and then stop for long periods. .IP +.IP "\fB-r\fP" +If this option is specified, lrzip will recursively enter the directories +specified, compressing or decompressing every file individually in the same +directory. Note for better compression it is recommended to instead combine +files in a tar file rather than compress them separately, either manually +or with the lrztar helper. +.IP .IP "\fB-t\fP" This tests the compressed file integrity. It does this by decompressing it to a temporary file and then deleting it. From a2796607d9551ad58ea28f1e413546a96aa55a48 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 10 Jun 2016 16:23:00 +1000 Subject: [PATCH 4/5] Update README --- README.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 00b59ea..a7434a5 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ lrzip - Long Range ZIP or LZMA RZIP A compression utility that excels at compressing large files (usually > 10-50 MB). Larger files and/or more free RAM means that the utility will be able to more effectively compress your files (ie: faster / smaller size), especially if the -filesize(s) exceed 100 MB. You can either choose to optimize for speed (fast +filesize(s) exceed 100 MB. You can either choose to optimise for speed (fast compression / decompression) or size, but not both. @@ -89,6 +89,8 @@ Simple 'n Easy™: `sudo make install` |`lrzip -z filename`|An archive "filename.lrz" compressed with **ZPAQ** that can give extreme compression, but takes a bit longer than forever to compress and decompress.| |`lrzip -l filename`|An archive lightly compressed with **LZO**, meaning really, really fast compression and decompression.| |`lrunzip filename.lrz`|Decompress filename.lrz to filename.| +|`lrz filename`|As per lrzip above but with gzip compatible semantics (i.e. will be quiet and delete original file) +|`lrz -d filename.lrz`|As per lrunzip above but with gzip compatible semantics (i.e. will be quiet and delete original file) ### lrzip internals @@ -281,13 +283,13 @@ its very nature has very little redundancy. This means that there is not much that can actually be compressed. If your video/audio/picture is in a high bitrate, there will be more redundancy than a low bitrate one making it more suitable to compression. None of the compression techniques in lrzip are -optimised for this sort of dat +optimised for this sort of data. > A: However, the nature of rzip preparation means that you'll still get better compression than most normal compression algorithms give you if you have very large files. ISO images of dvds for example are best compressed directly instead of individual .VOB files. ZPAQ is the only compression format that can do any significant compression of -multimedi +multimedia. > A: > Q: Is this multithreaded? @@ -397,9 +399,14 @@ compression backend (lzma) needs to compress. > Q: This version is much slower than the old version? -> A: Make sure you have set CFLAGS and CXXFLAGS. An unoptimized build will be +> A: Make sure you have set CFLAGS and CXXFLAGS. An unoptimised build will be almost 3 times slower. +> Q: Why not update to the latest version of libzpaq? + +> A: For reasons that are unclear the later versions of libzpaq create +corrupt archives when included with lrzip + #### LIMITATIONS Due to mmap limitations the maximum size a window can be set to is currently 2GB on 32bit unless the -U option is specified. Files generated on 64 bit @@ -465,7 +472,7 @@ Persons above are listed in chronological order of first contribution to **lrzip #### README Authors Con Kolivas (`ckolivas` on GitHub) -Sat, 11 March 2011: README +Fri, 10 June 2016: README Also documented by Peter Hyman From d05334bd8612bfdef8fb61167226727a5709dd23 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Fri, 10 Jun 2016 21:04:42 +1000 Subject: [PATCH 5/5] checksum.buf should only be changed after the semaphore wait --- main.c | 4 ++-- rzip.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/main.c b/main.c index e45595a..3a4f7ed 100644 --- a/main.c +++ b/main.c @@ -578,8 +578,8 @@ int main(int argc, char *argv[]) recursion: if (recurse) { if (curentry >= direntries) { - free(dirlist); - break; + infile = NULL; + continue; } infile = dirlist + MAX_PATH_LEN * curentry++; } diff --git a/rzip.c b/rzip.c index 56a9291..2bd444f 100644 --- a/rzip.c +++ b/rzip.c @@ -744,11 +744,11 @@ static inline void hash_search(rzip_control *control, struct rzip_state *st, if (cksum_len < control->page_size) failure("Failed to malloc any ram for checksum ckbuf\n"); } - control->checksum.buf = buf; /* Compute checksum. If the entire chunk is longer than maxram, * do it "per-partes" */ cksem_wait(control, &control->cksumsem); + control->checksum.buf = buf; control->checksum.len = st->chunk_size - cksum_limit; cksum_chunks = control->checksum.len / cksum_len; cksum_remains = control->checksum.len % cksum_len;