From 38eca38743c38df1f64c53ee31395f1bf630cd8d Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 13:23:14 +1100 Subject: [PATCH 01/10] Unify maxram allocation and limit threads when there isn't enough ram. --- main.c | 1 + rzip.c | 2 +- rzip.h | 1 + stream.c | 21 ++++++++++++++++----- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/main.c b/main.c index 6b967e4..355460b 100644 --- a/main.c +++ b/main.c @@ -997,6 +997,7 @@ int main(int argc, char *argv[]) /* Decrease usable ram size on 32 bits due to kernel/userspace split */ if (BITS32) control.ramsize = MAX(control.ramsize - 900000000ll, 900000000ll); + control.maxram = control.ramsize / 3; /* Set the main nice value to half that of the backend threads since * the rzip stage is usually the rate limiting step */ diff --git a/rzip.c b/rzip.c index 9a3dda3..774d4d6 100644 --- a/rzip.c +++ b/rzip.c @@ -780,7 +780,7 @@ void rzip_fd(int fd_in, int fd_out) * allocate 1/3 of it to the main buffer and use a sliding mmap * buffer to work on 2/3 ram size, leaving enough ram for the * compression backends */ - control.max_mmap = control.ramsize / 3; + control.max_mmap = control.maxram; /* On 32 bits we can have a big window with sliding mmap, but can * not enable much per mmap/malloc */ diff --git a/rzip.h b/rzip.h index 175e2aa..5895752 100644 --- a/rzip.h +++ b/rzip.h @@ -270,6 +270,7 @@ struct rzip_control { const char *suffix; int compression_level; i64 overhead; // compressor overhead + i64 maxram; // the largest chunk of ram to allocate unsigned char lzma_properties[5]; // lzma properties, encoded i64 window; unsigned long flags; diff --git a/stream.c b/stream.c index ff3b2a5..dbcd001 100644 --- a/stream.c +++ b/stream.c @@ -763,18 +763,18 @@ void close_streamout_threads(void) /* open a set of output streams, compressing with the given compression level and algorithm */ -void *open_stream_out(int f, int n, i64 limit, char cbytes) +void *open_stream_out(int f, int n, i64 chunk_limit, char cbytes) { struct stream_info *sinfo; + i64 testsize, limit; uchar *testmalloc; - i64 testsize; int i, testbufs; sinfo = calloc(sizeof(struct stream_info), 1); if (unlikely(!sinfo)) return NULL; - sinfo->bufsize = limit; + sinfo->bufsize = limit = chunk_limit; sinfo->chunk_bytes = cbytes; sinfo->num_streams = n; @@ -802,8 +802,19 @@ void *open_stream_out(int f, int n, i64 limit, char cbytes) (control.overhead * control.threads)); testsize = (limit * testbufs) + (control.overhead * control.threads); - if (testsize > control.ramsize / 3) - limit = (control.ramsize / 3 - (control.overhead * control.threads)) / testbufs; + if (testsize > control.maxram) + limit = (control.maxram - (control.overhead * control.threads)) / testbufs; + + /* If we don't have enough ram for the number of threads, decrease the + * number of threads till we do, or only have one thread. */ + while (limit < STREAM_BUFSIZE && limit < chunk_limit) { + if (control.threads > 1) + --control.threads; + else + break; + limit = (control.maxram - (control.overhead * control.threads)) / testbufs; + limit = MIN(limit, chunk_limit); + } retest_malloc: testsize = (limit * testbufs) + (control.overhead * control.threads); testmalloc = malloc(testsize); From 8fa01248e7b7d594683cfcb2fffb964e04efba19 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 13:33:14 +1100 Subject: [PATCH 02/10] Unlink temporary files immediately to avoid files lying around in cases of unexpected/uncaught failure. --- main.c | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/main.c b/main.c index 355460b..f5322b5 100644 --- a/main.c +++ b/main.c @@ -167,7 +167,7 @@ static int open_tmpoutfile(void) if (STDOUT) print_verbose("Outputting to stdout.\n"); if (control.tmpdir) { - control.outfile = realloc(NULL, strlen(control.tmpdir)+16); + control.outfile = realloc(NULL, strlen(control.tmpdir) + 16); if (unlikely(!control.outfile)) fatal("Failed to allocate outfile name\n"); strcpy(control.outfile, control.tmpdir); @@ -181,7 +181,11 @@ static int open_tmpoutfile(void) fd_out = mkstemp(control.outfile); if (unlikely(fd_out == -1)) - fatal("Failed to create out tmpfile: %s\n", strerror(errno)); + fatal("Failed to create out tmpfile: %s\n", control.outfile); + /* Unlink temporary file immediately to minimise chance of files left + * lying around in cases of failure. */ + if (unlikely(unlink(control.outfile))) + fatal("Failed to unlink tmpfile: %s\n", control.outfile); return fd_out; } @@ -211,7 +215,7 @@ static int open_tmpinfile(void) int fd_in; if (control.tmpdir) { - control.infile = malloc(strlen(control.tmpdir)+15); + control.infile = malloc(strlen(control.tmpdir) + 15); if (unlikely(!control.infile)) fatal("Failed to allocate infile name\n"); strcpy(control.infile, control.tmpdir); @@ -225,7 +229,9 @@ static int open_tmpinfile(void) fd_in = mkstemp(control.infile); if (unlikely(fd_in == -1)) - fatal("Failed to create in tmpfile: %s\n", strerror(errno)); + fatal("Failed to create in tmpfile: %s\n", control.infile); + if (unlikely(unlink(control.infile))) + fatal("Failed to unlink tmpfile: %s\n", control.infile); return fd_in; } @@ -379,15 +385,9 @@ static void decompress_file(void) if (unlikely(close(fd_hist) || close(fd_out))) fatal("Failed to close files\n"); - if (TEST_ONLY | STDOUT) { - /* Delete temporary files generated for testing or faking stdout */ - if (unlikely(unlink(control.outfile))) - fatal("Failed to unlink tmpfile: %s\n", strerror(errno)); - } - close(fd_in); - if (!(KEEP_FILES | TEST_ONLY) || STDIN) { + if (!KEEP_FILES) { if (unlikely(unlink(control.infile))) fatal("Failed to unlink %s: %s\n", infilecopy, strerror(errno)); } @@ -592,12 +592,8 @@ next_chunk: ctotal, expected_size); } - if (STDIN) { - if (unlikely(unlink(control.infile))) - fatal("Failed to unlink %s: %s\n", infilecopy, strerror(errno)); - } else - if (unlikely(close(fd_in))) - fatal("Failed to close fd_in in get_fileinfo\n"); + if (unlikely(close(fd_in))) + fatal("Failed to close fd_in in get_fileinfo\n"); free(control.outfile); free(infilecopy); @@ -697,12 +693,6 @@ static void compress_file(void) if (unlikely(close(fd_in) || close(fd_out))) fatal("Failed to close files\n"); - if (STDOUT) { - /* Delete temporary files generated for testing or faking stdout */ - if (unlikely(unlink(control.outfile))) - fatal("Failed to unlink tmpfile: %s\n", strerror(errno)); - } - if (!KEEP_FILES) { if (unlikely(unlink(control.infile))) fatal("Failed to unlink %s: %s\n", control.infile, strerror(errno)); From f9046e675677cd21ee8f25ce0ece1d075296d585 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 13:36:03 +1100 Subject: [PATCH 03/10] Check free space after reading magic, and not when decompressing to stdout. --- main.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/main.c b/main.c index f5322b5..3bfe1e4 100644 --- a/main.c +++ b/main.c @@ -344,23 +344,26 @@ static void decompress_file(void) fd_out = open_tmpoutfile(); control.fd_out = fd_out; - /* Check if there's enough free space on the device chosen to fit the - * decompressed file. */ - if (unlikely(fstatvfs(fd_out, &fbuf))) - fatal("Failed to fstatvfs in decompress_file\n"); - free_space = fbuf.f_bsize * fbuf.f_bavail; - if (free_space < expected_size) { - if (FORCE_REPLACE) - print_err("Warning, inadequate free space detected, but attempting to decompress due to -f option being used.\n"); - else - failure("Inadequate free space to decompress file, use -f to override.\n"); + read_magic(fd_in, &expected_size); + + if (!STDOUT) { + /* Check if there's enough free space on the device chosen to fit the + * decompressed file. */ + if (unlikely(fstatvfs(fd_out, &fbuf))) + fatal("Failed to fstatvfs in decompress_file\n"); + free_space = fbuf.f_bsize * fbuf.f_bavail; + if (free_space < expected_size) { + if (FORCE_REPLACE) + print_err("Warning, inadequate free space detected, but attempting to decompress due to -f option being used.\n"); + else + failure("Inadequate free space to decompress file, use -f to override.\n"); + } } fd_hist = open(control.outfile, O_RDONLY); if (unlikely(fd_hist == -1)) fatal("Failed to open history file %s\n", control.outfile); - read_magic(fd_in, &expected_size); if (NO_MD5) print_verbose("Not performing MD5 hash check\n"); if (HAS_MD5) From 06fd0a352862290a05ca85972d81b36f564dabef Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 13:40:43 +1100 Subject: [PATCH 04/10] Unlink files in safe places. --- main.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/main.c b/main.c index 3bfe1e4..eec1e2a 100644 --- a/main.c +++ b/main.c @@ -182,10 +182,6 @@ static int open_tmpoutfile(void) fd_out = mkstemp(control.outfile); if (unlikely(fd_out == -1)) fatal("Failed to create out tmpfile: %s\n", control.outfile); - /* Unlink temporary file immediately to minimise chance of files left - * lying around in cases of failure. */ - if (unlikely(unlink(control.outfile))) - fatal("Failed to unlink tmpfile: %s\n", control.outfile); return fd_out; } @@ -230,6 +226,8 @@ static int open_tmpinfile(void) fd_in = mkstemp(control.infile); if (unlikely(fd_in == -1)) fatal("Failed to create in tmpfile: %s\n", control.infile); + /* Unlink temporary file immediately to minimise chance of files left + * lying around in cases of failure. */ if (unlikely(unlink(control.infile))) fatal("Failed to unlink tmpfile: %s\n", control.infile); return fd_in; @@ -364,6 +362,10 @@ static void decompress_file(void) if (unlikely(fd_hist == -1)) fatal("Failed to open history file %s\n", control.outfile); + /* Unlink temporary file as soon as possible */ + if (unlikely(STDOUT && unlink(control.outfile))) + fatal("Failed to unlink tmpfile: %s\n", control.outfile); + if (NO_MD5) print_verbose("Not performing MD5 hash check\n"); if (HAS_MD5) From 68469c2b6fa465038d1b4b5b41b4670397c6584b Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 13:41:00 +1100 Subject: [PATCH 05/10] Don't dump output to stdout when testing. --- main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.c b/main.c index eec1e2a..84c958a 100644 --- a/main.c +++ b/main.c @@ -378,7 +378,7 @@ static void decompress_file(void) runzip_fd(fd_in, fd_out, fd_hist, expected_size); - if (STDOUT) + if (STDOUT && !TEST_ONLY) dump_tmpoutfile(fd_out); /* if we get here, no fatal errors during decompression */ From 8a4df9774f6467166dd210e2bce2709731587469 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 13:44:29 +1100 Subject: [PATCH 06/10] Forgot two instances of temporary outfiles that need to be unlinked. --- main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/main.c b/main.c index 84c958a..158ccaa 100644 --- a/main.c +++ b/main.c @@ -363,7 +363,7 @@ static void decompress_file(void) fatal("Failed to open history file %s\n", control.outfile); /* Unlink temporary file as soon as possible */ - if (unlikely(STDOUT && unlink(control.outfile))) + if (unlikely((STDOUT || TEST_ONLY) && unlink(control.outfile))) fatal("Failed to unlink tmpfile: %s\n", control.outfile); if (NO_MD5) @@ -681,6 +681,9 @@ static void compress_file(void) fd_out = open_tmpoutfile(); control.fd_out = fd_out; + if (unlikely(STDOUT && unlink(control.outfile))) + fatal("Failed to unlink tmpfile: %s\n", control.outfile); + preserve_perms(fd_in, fd_out); /* write zeroes to 24 bytes at beginning of file */ From 7f45a1f024aab1acb4049133644fee1c82311ca4 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 16:57:48 +1100 Subject: [PATCH 07/10] Update changelog so far. --- ChangeLog | 20 ++++++++++++++++++++ WHATS-NEW | 17 ++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0e78f51..8cafbad 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,24 @@ lrzip ChangeLog +MARCH 2011, version 0.571 Con Kolivas +* Only retry mmaping if it's a memory error, otherwise it may give spurious +errors. +* Check for free space before compression/decompression and abort if there +is inadequate free space if the -f option is not passed. +* Fix the wrong check in rzip.c which was rounding down the page size and +making for one extra small chunk at the end. +* Check the correct stdout when refusing to pipe to a terminal. +* Fix windows EOL on lzma.txt. +* Ignore what stdout is going to when testing from stdin. +* More verbose summary after we know whether we have stdin/out to more +accurately reflect the window that will be used. +* Updated gitignore +* Unlink temporary files immediately to avoid files lying around. +* Check free space AFTER reading magic, and not when decompressing to stdout. +* Don't dump output to stdout when just testing a file. + +MARCH 2011, Michael Blumenkrantz +* Updated autotools/conf build system. + FEBRUARY 2011, version 0.570 Con Kolivas * Change the lzo testing to a bool on/off instead of taking a parameter. * Clean up the messy help output. diff --git a/WHATS-NEW b/WHATS-NEW index 534b6d9..93487eb 100644 --- a/WHATS-NEW +++ b/WHATS-NEW @@ -1,4 +1,19 @@ -lrzip-0.561 +lrzip-0.571 + +A new build configuration system. +Avoid spurious errors on failing to mmap a file. +Fee space will now be checked to ensure there is enough room for the +compressed or decompressed file and lrzip will abort unless the -f option is +passed to it. +The extra little chunk at the end of every large file should now be fixed. +The file lzma.txt now has unix end-of-lines. +There will be a more accurate summary of what compression window will be used +when lrzip is invoked with STDIN/STDOUT. +STDIN will now be able to show estimated time to completion and percentage +complete once lrzip knows how much file is left. +Temporary files are much less likely to be left lying around. + +lrzip-0.570 Multi-threaded performance has been improved with a significant speed-up on both compression and decompression. New benchmark results have been added to From 32e182c95a7f472967bb88253558ac9acaf554b6 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 17:00:28 +1100 Subject: [PATCH 08/10] Check for free space in the right place for compression and give the right message. --- rzip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rzip.c b/rzip.c index 774d4d6..7793257 100644 --- a/rzip.c +++ b/rzip.c @@ -766,8 +766,8 @@ void rzip_fd(int fd_in, int fd_out) /* Check if there's enough free space on the device chosen to fit the * compressed file, based on the compressed file being as large as the * uncompressed file. */ - if (unlikely(fstatvfs(fd_in, &fbuf))) - fatal("Failed to fstatvfs in decompress_file\n"); + if (unlikely(fstatvfs(fd_out, &fbuf))) + fatal("Failed to fstatvfs in compress_file\n"); free_space = fbuf.f_bsize * fbuf.f_bavail; if (free_space < control.st_size) { if (FORCE_REPLACE) From 13a6fb5b430998075eb63246a1a9debb8a330db4 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 17:14:07 +1100 Subject: [PATCH 09/10] Dump the temporary file generated to emulate stdout at the end of each chunk on decompression and then truncate the file instead of writing the whole file before dumping it. --- main.c | 17 +++++++++++------ runzip.c | 5 ++++- rzip.h | 1 + 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/main.c b/main.c index 158ccaa..00aef35 100644 --- a/main.c +++ b/main.c @@ -186,12 +186,12 @@ static int open_tmpoutfile(void) } /* Dump temporary outputfile to perform stdout */ -static void dump_tmpoutfile(int fd_out) +void dump_tmpoutfile(int fd_out) { FILE *tmpoutfp; int tmpchar; - print_progress("Dumping to stdout.\n"); + print_verbose("Dumping temporary file to stdout.\n"); /* flush anything not yet in the temporary file */ fsync(fd_out); tmpoutfp = fdopen(fd_out, "r"); @@ -199,10 +199,15 @@ static void dump_tmpoutfile(int fd_out) fatal("Failed to fdopen out tmpfile: %s\n", strerror(errno)); rewind(tmpoutfp); - while ((tmpchar = fgetc(tmpoutfp)) != EOF) - putchar(tmpchar); + if (!TEST_ONLY) { + while ((tmpchar = fgetc(tmpoutfp)) != EOF) + putchar(tmpchar); + } + fflush(stdout); - fflush(control.msgout); + rewind(tmpoutfp); + if (unlikely(ftruncate(fd_out, 0))) + fatal("Failed to ftruncate fd_out in dump_tmpoutfile\n"); } /* Open a temporary inputfile to perform stdin decompression */ @@ -378,7 +383,7 @@ static void decompress_file(void) runzip_fd(fd_in, fd_out, fd_hist, expected_size); - if (STDOUT && !TEST_ONLY) + if (STDOUT) dump_tmpoutfile(fd_out); /* if we get here, no fatal errors during decompression */ diff --git a/runzip.c b/runzip.c index 49f9c59..9922523 100644 --- a/runzip.c +++ b/runzip.c @@ -242,8 +242,11 @@ i64 runzip_fd(int fd_in, int fd_out, int fd_hist, i64 expected_size) md5_init_ctx (&control.ctx); gettimeofday(&start,NULL); - while (total < expected_size) + while (total < expected_size) { total += runzip_chunk(fd_in, fd_out, fd_hist, expected_size, total); + if (STDOUT) + dump_tmpoutfile(fd_out); + } gettimeofday(&end,NULL); print_progress("\nAverage DeCompression Speed: %6.3fMB/s\n", diff --git a/rzip.h b/rzip.h index 5895752..f2c5ceb 100644 --- a/rzip.h +++ b/rzip.h @@ -334,6 +334,7 @@ const i64 two_gig; void prepare_streamout_threads(void); void close_streamout_threads(void); void round_to_page(i64 *size); +void dump_tmpoutfile(int fd_out); #define print_err(format, args...) do {\ fprintf(stderr, format, ##args); \ From fa821fe19680ee8a9ab0bce46193285295af0c52 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Mon, 7 Mar 2011 17:15:59 +1100 Subject: [PATCH 10/10] Updated changelog. --- ChangeLog | 2 ++ WHATS-NEW | 1 + 2 files changed, 3 insertions(+) diff --git a/ChangeLog b/ChangeLog index 8cafbad..1d5bd86 100644 --- a/ChangeLog +++ b/ChangeLog @@ -15,6 +15,8 @@ accurately reflect the window that will be used. * Unlink temporary files immediately to avoid files lying around. * Check free space AFTER reading magic, and not when decompressing to stdout. * Don't dump output to stdout when just testing a file. +* Dump the temporary file generated on emulating stdout on decompression after +every chunk is decompressed instead of after the whole file is decompressed. MARCH 2011, Michael Blumenkrantz * Updated autotools/conf build system. diff --git a/WHATS-NEW b/WHATS-NEW index 93487eb..4062126 100644 --- a/WHATS-NEW +++ b/WHATS-NEW @@ -12,6 +12,7 @@ when lrzip is invoked with STDIN/STDOUT. STDIN will now be able to show estimated time to completion and percentage complete once lrzip knows how much file is left. Temporary files are much less likely to be left lying around. +Less temporary file space will be used when decompressing to stdout. lrzip-0.570