From 8a27dc50573b00149c5f3bd0d6f5cb01caacc19e Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Sun, 20 Feb 2011 18:01:19 +1100 Subject: [PATCH] Changes to make md5 be used for integrity testing. Add the md5 value to the end of each archive. This can then be used for integrity testing instead of crc32. Keep crc in new archives to maintain compatibility with version 0.5 files. Use md5 integrity testing on decompression when available in preference, and disable calculation of crc32. Display the choice of integrity testing in verbose output and when -i is used. Display the md5 and crc values when max verbosity, file info, or display hash is enabled. Store a new flag in the magic header to show that the md5 value is stored at the end of the file. Update the magic header information document. --- doc/magic.header.txt | 3 ++- main.c | 37 ++++++++++++++++++++++++++- runzip.c | 59 +++++++++++++++++++++++++++++++++----------- rzip.c | 4 ++- rzip.h | 4 +++ 5 files changed, 89 insertions(+), 18 deletions(-) diff --git a/doc/magic.header.txt b/doc/magic.header.txt index 40c1949..0a2a5f1 100644 --- a/doc/magic.header.txt +++ b/doc/magic.header.txt @@ -8,7 +8,8 @@ Byte Content 5 LRZIP Minor Version Number 6-14 Source File Size 16-20 LZMA Properties Encoded (lc,lp,pb,fb, and dictionary size) -21-22 not used +21 Flag that md5sum hash is stored at the end of the archive +22 not used 23-48 Stream 1 header data 49-74 Stream 2 header data diff --git a/main.c b/main.c index 23ffb29..766007c 100644 --- a/main.c +++ b/main.c @@ -78,6 +78,12 @@ static void write_magic(int fd_in, int fd_out) magic[i + 16] = (char)control.lzma_properties[i]; } + /* This is a flag that the archive contains an md5 sum at the end + * which can be used as an integrity check instead of crc check. + * crc is still stored for compatibility with 0.5 versions. + */ + magic[21] = 1; + if (unlikely(lseek(fd_out, 0, SEEK_SET))) fatal("Failed to seek to BOF to write Magic Header\n"); @@ -89,7 +95,7 @@ static void read_magic(int fd_in, i64 *expected_size) { char magic[24]; uint32_t v; - int i; + int md5, i; if (unlikely(read(fd_in, magic, sizeof(magic)) != sizeof(magic))) fatal("Failed to read magic header\n"); @@ -116,6 +122,12 @@ static void read_magic(int fd_in, i64 *expected_size) for (i = 0; i < 5; i++) control.lzma_properties[i] = magic[i + 16]; } + + /* Whether this archive contains md5 data at the end or not */ + md5 = magic[21]; + if (md5 == 1) + control.flags |= FLAG_MD5; + print_verbose("Detected lrzip version %d.%d file.\n", control.major_version, control.minor_version); if (control.major_version > LRZIP_MAJOR_VERSION || (control.major_version == LRZIP_MAJOR_VERSION && control.minor_version > LRZIP_MINOR_VERSION)) @@ -300,6 +312,14 @@ static void decompress_file(void) fatal("Failed to open history file %s\n", control.outfile); read_magic(fd_in, &expected_size); + if (NO_MD5) + print_verbose("Not performing MD5 hash check\n"); + if (HAS_MD5) + print_verbose("MD5 "); + else + print_verbose("CRC32 "); + print_verbose("being used for integrity testing.\n"); + print_progress("Decompressing..."); runzip_fd(fd_in, fd_out, fd_hist, expected_size); @@ -411,6 +431,21 @@ static void get_fileinfo(void) print_output("Decompressed file size: %llu\n", expected_size); print_output("Compressed file size: %llu\n", infile_size); print_output("Compression ratio: %.3Lf\n", cratio); + if (HAS_MD5) { + char md5_stored[MD5_DIGEST_SIZE]; + int i; + + print_output("MD5 used for integrity testing\n"); + if (unlikely(lseek(fd_in, -MD5_DIGEST_SIZE, SEEK_END)) == -1) + fatal("Failed to seek to md5 data in runzip_fd\n"); + if (unlikely(read(fd_in, md5_stored, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) + fatal("Failed to read md5 data in runzip_fd\n"); + print_output("MD5: "); + for (i = 0; i < MD5_DIGEST_SIZE; i++) + print_output("%02x", md5_stored[i] & 0xFF); + print_output("\n"); + } else + print_output("CRC32 used for integrity testing\n"); if (STDIN) { if (unlikely(unlink(control.infile))) diff --git a/runzip.c b/runzip.c index cad0750..98568a0 100644 --- a/runzip.c +++ b/runzip.c @@ -82,8 +82,10 @@ static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum) if (unlikely(write_1g(fd_out, buf, (size_t)stream_read) != (ssize_t)stream_read)) fatal("Failed to write literal buffer of size %lld\n", stream_read); - *cksum = CrcUpdate(*cksum, buf, stream_read); - md5_process_bytes(buf, stream_read, &control.ctx); + if (!HAS_MD5) + *cksum = CrcUpdate(*cksum, buf, stream_read); + if (!NO_MD5) + md5_process_bytes(buf, stream_read, &control.ctx); free(buf); return stream_read; @@ -122,8 +124,10 @@ static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum if (unlikely(write_1g(fd_out, off_buf, (size_t)n) != (ssize_t)n)) fatal("Failed to write %d bytes in unzip_match\n", n); - *cksum = CrcUpdate(*cksum, off_buf, n); - md5_process_bytes(off_buf, n, &control.ctx); + if (!HAS_MD5) + *cksum = CrcUpdate(*cksum, off_buf, n); + if (!NO_MD5) + md5_process_bytes(off_buf, n, &control.ctx); len -= n; off_buf += n; @@ -211,9 +215,12 @@ static i64 runzip_chunk(int fd_in, int fd_out, int fd_hist, i64 expected_size, i } } - good_cksum = read_u32(ss, 0); - if (unlikely(good_cksum != cksum)) - fatal("Bad checksum 0x%08x - expected 0x%08x\n", cksum, good_cksum); + if (!HAS_MD5) { + good_cksum = read_u32(ss, 0); + if (unlikely(good_cksum != cksum)) + fatal("Bad checksum: 0x%08x - expected: 0x%08x\n", cksum, good_cksum); + print_maxverbose("Checksum for block: 0x%08x\n", cksum); + } if (unlikely(close_stream_in(ss))) fatal("Failed to close stream!\n"); @@ -227,11 +234,12 @@ static i64 runzip_chunk(int fd_in, int fd_out, int fd_hist, i64 expected_size, i i64 runzip_fd(int fd_in, int fd_out, int fd_hist, i64 expected_size) { char md5_resblock[MD5_DIGEST_SIZE]; + char md5_stored[MD5_DIGEST_SIZE]; struct timeval start,end; i64 total = 0; - int j; - md5_init_ctx (&control.ctx); + if (!NO_MD5) + md5_init_ctx (&control.ctx); gettimeofday(&start,NULL); while (total < expected_size) @@ -241,12 +249,33 @@ i64 runzip_fd(int fd_in, int fd_out, int fd_hist, i64 expected_size) print_progress("\nAverage DeCompression Speed: %6.3fMB/s\n", (total / 1024 / 1024) / (double)((end.tv_sec-start.tv_sec)? : 1)); - md5_finish_ctx (&control.ctx, md5_resblock); - if (HASH_CHECK || VERBOSE) { - print_output("MD5: "); - for (j = 0; j < MD5_DIGEST_SIZE; j++) - print_output("%02x", md5_resblock[j] & 0xFF); - print_output("\n"); + if (!NO_MD5) { + int i,j; + + md5_finish_ctx (&control.ctx, md5_resblock); + if (HAS_MD5) { + if (unlikely(lseek(fd_in, -MD5_DIGEST_SIZE, SEEK_END)) == -1) + fatal("Failed to seek to md5 data in runzip_fd\n"); + if (unlikely(read(fd_in, md5_stored, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) + fatal("Failed to read md5 data in runzip_fd\n"); + for (i = 0; i < MD5_DIGEST_SIZE; i++) + if (md5_stored[i] != md5_resblock[i]) { + print_output("MD5 CHECK FAILED.\nStored:"); + for (j = 0; j < MD5_DIGEST_SIZE; j++) + print_output("%02x", md5_stored[j] & 0xFF); + print_output("\nOutput file:"); + for (j = 0; j < MD5_DIGEST_SIZE; j++) + print_output("%02x", md5_resblock[j] & 0xFF); + fatal("\n"); + } + } + + if (HASH_CHECK || MAX_VERBOSE) { + print_output("MD5: "); + for (i = 0; i < MD5_DIGEST_SIZE; i++) + print_output("%02x", md5_resblock[i] & 0xFF); + print_output("\n"); + } } return total; diff --git a/rzip.c b/rzip.c index ad5b7ea..3eacdb7 100644 --- a/rzip.c +++ b/rzip.c @@ -916,12 +916,14 @@ retry: close_streamout_threads(); md5_finish_ctx (&control.ctx, md5_resblock); - if (HASH_CHECK || VERBOSE) { + if (HASH_CHECK || MAX_VERBOSE) { print_output("MD5: "); for (j = 0; j < MD5_DIGEST_SIZE; j++) print_output("%02x", md5_resblock[j] & 0xFF); print_output("\n"); } + if (unlikely(write(control.fd_out, md5_resblock, MD5_DIGEST_SIZE) != MD5_DIGEST_SIZE)) + fatal("Failed to write md5 in rzip_fd\n"); gettimeofday(¤t, NULL); if (STDIN) diff --git a/rzip.h b/rzip.h index 11de485..f3096c3 100644 --- a/rzip.h +++ b/rzip.h @@ -213,6 +213,7 @@ static inline i64 get_ram(void) #define FLAG_MAXRAM (1 << 15) #define FLAG_UNLIMITED (1 << 16) #define FLAG_HASH (1 << 17) +#define FLAG_MD5 (1 << 18) #define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX) #define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS) @@ -237,6 +238,9 @@ static inline i64 get_ram(void) #define MAXRAM (control.flags & FLAG_MAXRAM) #define UNLIMITED (control.flags & FLAG_UNLIMITED) #define HASH_CHECK (control.flags & FLAG_HASH) +#define HAS_MD5 (control.flags & FLAG_MD5) + +#define NO_MD5 (!(HASH_CHECK) && !(HAS_MD5)) #define BITS32 (sizeof(long) == 4)