lrzip/stream.c

1016 lines
25 KiB
C
Raw Normal View History

2010-03-29 01:07:08 +02:00
/*
Copyright (C) Andrew Tridgell 1998,
Con Kolivas 2006-2010
2010-03-29 01:07:08 +02:00
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* multiplex N streams into a file - the streams are passed
through different compressors */
#include "rzip.h"
#define STREAM_BUFSIZE (1024 * 1024 * 10)
/* just to keep things clean, declare function here
* but move body to the end since it's a work function
*/
static int lzo_compresses(struct stream *s);
static inline FILE *fake_fmemopen(void *buf, size_t buflen, char *mode)
2010-03-29 01:07:08 +02:00
{
FILE *in;
if (unlikely(strcmp(mode, "r")))
fatal("fake_fmemopen only supports mode \"r\".");
2010-03-29 01:07:08 +02:00
in = tmpfile();
if (unlikely(!in))
return NULL;
if (unlikely(fwrite(buf, buflen, 1, in) != 1))
return NULL;
2010-03-29 01:07:08 +02:00
rewind(in);
return in;
}
static inline FILE *fake_open_memstream(char **buf, size_t *length)
2010-03-29 01:07:08 +02:00
{
FILE *out;
if (unlikely(buf == NULL || length == NULL))
fatal("NULL parameter to fake_open_memstream");
2010-03-29 01:07:08 +02:00
out = tmpfile();
if (unlikely(!out))
2010-03-29 01:07:08 +02:00
return NULL;
return out;
}
static inline int fake_open_memstream_update_buffer(FILE *fp, uchar **buf, size_t *length)
2010-03-29 01:07:08 +02:00
{
long original_pos = ftell(fp);
if (unlikely(fseek(fp, 0, SEEK_END)))
2010-03-29 01:07:08 +02:00
return -1;
*length = ftell(fp);
rewind(fp);
*buf = (uchar *)malloc(*length);
if (unlikely(!*buf))
return -1;
if (unlikely(fread(*buf, *length, 1, fp) != 1))
return -1;
if (unlikely(fseek(fp, original_pos, SEEK_SET)))
return -1;
2010-03-29 01:07:08 +02:00
return 0;
}
/*
***** COMPRESSION FUNCTIONS *****
ZPAQ, BZIP, GZIP, LZMA, LZO
try to compress a buffer. If compression fails for whatever reason then
leave uncompressed. Return the compression type in c_type and resulting
length in c_len
*/
static void zpaq_compress_buf(struct stream *s, int *c_type, i64 *c_len)
{
uchar *c_buf = NULL;
size_t dlen = 0;
2010-03-29 01:07:08 +02:00
FILE *in, *out;
if (!lzo_compresses(s))
return;
in = fmemopen(s->buf, s->buflen, "r");
if (unlikely(!in))
2010-03-29 01:07:08 +02:00
fatal("Failed to fmemopen in zpaq_compress_buf\n");
out = open_memstream((char **)&c_buf, &dlen);
if (unlikely(!out))
2010-03-29 01:07:08 +02:00
fatal("Failed to open_memstream in zpaq_compress_buf\n");
zpipe_compress(in, out, control.msgout, s->buflen, (int)(SHOW_PROGRESS));
2010-03-29 01:07:08 +02:00
if (unlikely(memstream_update_buffer(out, &c_buf, &dlen)))
2010-03-29 01:07:08 +02:00
fatal("Failed to memstream_update_buffer in zpaq_compress_buf");
fclose(in);
fclose(out);
if ((i64)dlen >= *c_len) {
/* Incompressible, leave as CTYPE_NONE */
free(c_buf);
return;
}
*c_len = dlen;
free(s->buf);
s->buf = c_buf;
*c_type = CTYPE_ZPAQ;
}
static void bzip2_compress_buf(struct stream *s, int *c_type, i64 *c_len)
{
u32 dlen = s->buflen;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
if (!lzo_compresses(s))
return;
c_buf = malloc(dlen);
if (!c_buf)
return;
if (BZ2_bzBuffToBuffCompress((char*)c_buf, &dlen, (char*)s->buf, s->buflen,
control.compression_level, 0,
control.compression_level * 10) != BZ_OK) {
free(c_buf);
return;
}
if (dlen >= *c_len) {
/* Incompressible, leave as CTYPE_NONE */
free(c_buf);
return;
}
*c_len = dlen;
free(s->buf);
s->buf = c_buf;
*c_type = CTYPE_BZIP2;
}
static void gzip_compress_buf(struct stream *s, int *c_type, i64 *c_len)
{
unsigned long dlen = s->buflen;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
c_buf = malloc(dlen);
if (!c_buf)
return;
if (compress2(c_buf, &dlen, s->buf, s->buflen, control.compression_level) != Z_OK) {
free(c_buf);
return;
}
if ((i64)dlen >= *c_len) {
/* Incompressible, leave as CTYPE_NONE */
free(c_buf);
return;
}
*c_len = dlen;
free(s->buf);
s->buf = c_buf;
*c_type = CTYPE_GZIP;
}
static void lzma_compress_buf(struct stream *s, int *c_type, i64 *c_len)
{
size_t prop_size = 5; /* return value for lzma_properties */
2010-03-29 01:07:08 +02:00
uchar *c_buf;
size_t dlen;
int lzma_ret;
if (!lzo_compresses(s))
goto out;
dlen = s->buflen;
c_buf = malloc(dlen);
if (!c_buf)
return;
print_progress("\tProgress percentage pausing during lzma compression...");
2010-03-29 01:07:08 +02:00
/* with LZMA SDK 4.63, we pass compression level and threads only
* and receive properties in control->lzma_properties */
lzma_ret = LzmaCompress(c_buf, &dlen, s->buf, (size_t)s->buflen, control.lzma_properties, &prop_size, control.compression_level,
0, /* dict size. set default */
-1, -1, -1, -1, /* lc, lp, pb, fb */
control.threads);
if (lzma_ret != SZ_OK) {
switch (lzma_ret) {
case SZ_ERROR_MEM:
print_err("\nLZMA ERROR: %d. Try a smaller compression window.\n", SZ_ERROR_MEM);
2010-03-29 01:07:08 +02:00
break;
case SZ_ERROR_PARAM:
print_err("\nLZMA Parameter ERROR: %d. This should not happen.\n", SZ_ERROR_PARAM);
2010-03-29 01:07:08 +02:00
break;
case SZ_ERROR_OUTPUT_EOF:
print_err("\nHarmless LZMA Output Buffer Overflow error: %d. Incompressible block.\n", SZ_ERROR_OUTPUT_EOF);
2010-03-29 01:07:08 +02:00
break;
case SZ_ERROR_THREAD:
print_err("\nLZMA Multi Thread ERROR: %d. This should not happen.\n", SZ_ERROR_THREAD);
2010-03-29 01:07:08 +02:00
break;
default:
print_err("Unidentified LZMA ERROR: %d. This should not happen.\n", lzma_ret);
2010-03-29 01:07:08 +02:00
break;
}
/* can pass -1 if not compressible! Thanks Lasse Collin */
free(c_buf);
goto out;
}
if ((i64)dlen >= *c_len) {
/* Incompressible, leave as CTYPE_NONE */
free(c_buf);
goto out;
}
*c_len = dlen;
free(s->buf);
s->buf = c_buf;
*c_type = CTYPE_LZMA;
out:
if (MAX_VERBOSE)
print_output("\n");
else if (SHOW_PROGRESS || VERBOSE)
print_output("\r\t \r");
2010-03-29 01:07:08 +02:00
}
static void lzo_compress_buf(struct stream *s, int *c_type, i64 *c_len)
{
lzo_uint in_len = s->buflen;
lzo_uint dlen = in_len + in_len / 16 + 64 + 3;
lzo_int return_var; /* lzo1x_1_compress does not return anything but LZO_OK */
lzo_bytep wrkmem;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS);
if (wrkmem == NULL)
return;
c_buf = malloc(dlen);
if (!c_buf)
goto out_free;
return_var = lzo1x_1_compress((uchar *)s->buf, in_len, (uchar *)c_buf,
&dlen,wrkmem);
if (dlen >= in_len){
/* Incompressible, leave as CTYPE_NONE */
free(c_buf);
goto out_free;
}
*c_len = dlen;
free(s->buf);
s->buf = c_buf;
*c_type = CTYPE_LZO;
out_free:
free(wrkmem);
}
/*
***** DECOMPRESSION FUNCTIONS *****
ZPAQ, BZIP, GZIP, LZMA, LZO
try to decompress a buffer. Return 0 on success and -1 on failure.
*/
static int zpaq_decompress_buf(struct stream *s)
{
uchar *c_buf = NULL;
size_t dlen = 0;
2010-03-29 01:07:08 +02:00
FILE *in, *out;
in = fmemopen(s->buf, s->buflen, "r");
if (unlikely(!in)) {
print_err("Failed to fmemopen in zpaq_decompress_buf\n");
2010-03-29 01:07:08 +02:00
return -1;
}
out = open_memstream((char **)&c_buf, &dlen);
if (unlikely(!out)) {
print_err("Failed to open_memstream in zpaq_decompress_buf\n");
2010-03-29 01:07:08 +02:00
return -1;
}
zpipe_decompress(in, out, control.msgout, s->buflen, (int)(SHOW_PROGRESS));
2010-03-29 01:07:08 +02:00
if (unlikely(memstream_update_buffer(out, &c_buf, &dlen)))
2010-03-29 01:07:08 +02:00
fatal("Failed to memstream_update_buffer in zpaq_decompress_buf");
fclose(in);
fclose(out);
free(s->buf);
s->buf = c_buf;
if (unlikely((i64)dlen != s->buflen)) {
print_err("Inconsistent length after decompression. Got %d bytes, expected %lld\n", dlen, s->buflen);
2010-03-29 01:07:08 +02:00
return -1;
}
return 0;
}
static int bzip2_decompress_buf(struct stream *s, i64 c_len)
{
u32 dlen = s->buflen;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
int bzerr;
c_buf = s->buf;
s->buf = malloc(dlen);
if (unlikely(!s->buf)) {
print_err("Failed to allocate %d bytes for decompression\n", dlen);
2010-03-29 01:07:08 +02:00
return -1;
}
bzerr = BZ2_bzBuffToBuffDecompress((char*)s->buf, &dlen, (char*)c_buf, c_len, 0, 0);
if (unlikely(bzerr != BZ_OK)) {
print_err("Failed to decompress buffer - bzerr=%d\n", bzerr);
2010-03-29 01:07:08 +02:00
return -1;
}
if (unlikely(dlen != s->buflen)) {
print_err("Inconsistent length after decompression. Got %d bytes, expected %lld\n", dlen, s->buflen);
2010-03-29 01:07:08 +02:00
return -1;
}
free(c_buf);
return 0;
}
static int gzip_decompress_buf(struct stream *s, i64 c_len)
{
unsigned long dlen = s->buflen;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
int gzerr;
c_buf = s->buf;
s->buf = malloc(dlen);
if (unlikely(!s->buf)) {
print_err("Failed to allocate %ld bytes for decompression\n", dlen);
2010-03-29 01:07:08 +02:00
return -1;
}
gzerr = uncompress(s->buf, &dlen, c_buf, c_len);
if (unlikely(gzerr != Z_OK)) {
print_err("Failed to decompress buffer - bzerr=%d\n", gzerr);
2010-03-29 01:07:08 +02:00
return -1;
}
if (unlikely((i64)dlen != s->buflen)) {
print_err("Inconsistent length after decompression. Got %ld bytes, expected %lld\n", dlen, s->buflen);
2010-03-29 01:07:08 +02:00
return -1;
}
free(c_buf);
return 0;
}
static int lzma_decompress_buf(struct stream *s, size_t c_len)
{
size_t dlen = (size_t)s->buflen;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
int lzmaerr;
c_buf = s->buf;
s->buf = malloc(dlen);
if (unlikely(!s->buf)) {
print_err("Failed to allocate %d bytes for decompression\n", dlen);
2010-03-29 01:07:08 +02:00
return -1;
}
/* With LZMA SDK 4.63 we pass control.lzma_properties
* which is needed for proper uncompress */
lzmaerr = LzmaUncompress(s->buf, &dlen, c_buf, &c_len, control.lzma_properties, 5);
if (unlikely(lzmaerr)) {
print_err("Failed to decompress buffer - lzmaerr=%d\n", lzmaerr);
2010-03-29 01:07:08 +02:00
return -1;
}
if (unlikely((i64)dlen != s->buflen)) {
print_err("Inconsistent length after decompression. Got %d bytes, expected %lld\n", dlen, s->buflen);
2010-03-29 01:07:08 +02:00
return -1;
}
free(c_buf);
return 0;
}
static int lzo_decompress_buf(struct stream *s, i64 c_len)
{
lzo_uint dlen = s->buflen;
uchar *c_buf;
2010-03-29 01:07:08 +02:00
int lzerr;
c_buf = s->buf;
s->buf = malloc(dlen);
if (unlikely(!s->buf)) {
print_err("Failed to allocate %d bytes for decompression\n", (int)dlen);
2010-03-29 01:07:08 +02:00
return -1;
}
lzerr = lzo1x_decompress((uchar*)c_buf,c_len,(uchar*)s->buf,&dlen,NULL);
if (unlikely(lzerr != LZO_E_OK)) {
print_err("Failed to decompress buffer - lzerr=%d\n", lzerr);
2010-03-29 01:07:08 +02:00
return -1;
}
if (unlikely((i64)dlen != s->buflen)) {
print_err("Inconsistent length after decompression. Got %d bytes, expected %lld\n", (int)dlen, s->buflen);
2010-03-29 01:07:08 +02:00
return -1;
}
free(c_buf);
return 0;
}
/* WORK FUNCTIONS */
const i64 one_g = 1000 * 1024 * 1024;
/* This is a custom version of write() which writes in 1GB chunks to avoid
the overflows at the >= 2GB mark thanks to 32bit fuckage. This should help
even on the rare occasion write() fails to write 1GB as well. */
2010-03-29 01:07:08 +02:00
ssize_t write_1g(int fd, void *buf, i64 len)
{
uchar *offset_buf = buf;
2010-03-29 01:07:08 +02:00
i64 total, offset;
ssize_t ret;
total = offset = 0;
while (len > 0) {
if (len > one_g)
ret = one_g;
else
ret = len;
ret = write(fd, offset_buf, (size_t)ret);
if (unlikely(ret < 0))
2010-03-29 01:07:08 +02:00
return ret;
len -= ret;
offset_buf += ret;
total += ret;
}
return total;
}
/* Ditto for read */
ssize_t read_1g(int fd, void *buf, i64 len)
{
uchar *offset_buf = buf;
2010-03-29 01:07:08 +02:00
i64 total, offset;
ssize_t ret;
total = offset = 0;
while (len > 0) {
if (len > one_g)
ret = one_g;
else
ret = len;
ret = read(fd, offset_buf, (size_t)ret);
if (unlikely(ret < 0))
2010-03-29 01:07:08 +02:00
return ret;
len -= ret;
offset_buf += ret;
total += ret;
}
return total;
}
/* write to a file, return 0 on success and -1 on failure */
static int write_buf(int f, uchar *p, i64 len)
{
ssize_t ret;
ret = write_1g(f, p, (size_t)len);
if (unlikely(ret == -1)) {
print_err("Write of length %lld failed - %s\n", len, strerror(errno));
2010-03-29 01:07:08 +02:00
return -1;
}
if (unlikely(ret != (ssize_t)len)) {
print_err("Partial write!? asked for %lld bytes but got %d\n", len, ret);
2010-03-29 01:07:08 +02:00
return -1;
}
return 0;
}
/* write a byte */
static int write_u8(int f, uchar v)
{
return write_buf(f, &v, 1);
}
/* write a i64 */
static int write_i64(int f, i64 v)
{
if (unlikely(write_buf(f, (uchar *)&v, 8)))
2010-03-29 01:07:08 +02:00
return -1;
return 0;
}
static int read_buf(int f, uchar *p, i64 len)
{
ssize_t ret;
ret = read_1g(f, p, (size_t)len);
if (unlikely(ret == -1)) {
print_err("Read of length %lld failed - %s\n", len, strerror(errno));
2010-03-29 01:07:08 +02:00
return -1;
}
if (unlikely(ret != (ssize_t)len)) {
print_err("Partial read!? asked for %lld bytes but got %lld\n", len, (i64)ret);
2010-03-29 01:07:08 +02:00
return -1;
}
return 0;
}
static int read_u8(int f, uchar *v)
{
return read_buf(f, v, 1);
}
static int read_u32(int f, u32 *v)
{
if (unlikely(read_buf(f, (uchar *)v, 4)))
2010-03-29 01:07:08 +02:00
return -1;
return 0;
}
static int read_i64(int f, i64 *v)
{
if (unlikely(read_buf(f, (uchar *)v, 8)))
2010-03-29 01:07:08 +02:00
return -1;
return 0;
}
/* seek to a position within a set of streams - return -1 on failure */
static int seekto(struct stream_info *sinfo, i64 pos)
{
i64 spos = pos + sinfo->initial_pos;
if (unlikely(lseek(sinfo->fd, spos, SEEK_SET) != spos)) {
print_err("Failed to seek to %lld in stream\n", pos);
2010-03-29 01:07:08 +02:00
return -1;
}
return 0;
}
/* open a set of output streams, compressing with the given
compression level and algorithm */
void *open_stream_out(int f, int n, i64 limit)
{
unsigned cwindow = control.window;
struct stream_info *sinfo;
uchar *testmalloc;
int i;
2010-03-29 01:07:08 +02:00
sinfo = malloc(sizeof(*sinfo));
if (unlikely(!sinfo))
2010-03-29 01:07:08 +02:00
return NULL;
Huge rewrite of buffer reading in rzip.c. We use a wrapper instead of accessing the buffer directly, thus allowing us to have window sizes larger than available ram. This is implemented through the use of a "sliding mmap" implementation. Sliding mmap uses two mmapped buffers, one large one as previously, and one page sized smaller one. When an attempt is made to read beyond the end of the large buffer, the small buffer is remapped to the file area that's being accessed. While this implementation is 100x slower than direct mmapping, it allows us to implement unlimited sized compression windows. Implement the -U option with unlimited sized windows. Rework the selection of compression windows. Instead of trying to guess how much ram the machine might be able to access, we try to safely buffer as much ram as we can, and then use that to determine the file buffer size. Do not choose an arbitrary upper window limit unless -w is specified. Rework the -M option to try to buffer the entire file, reducing the buffer size until we succeed. Align buffer sizes to page size. Clean up lots of unneeded variables. Fix lots of minor logic issues to do with window sizes accepted/passed to rzip and the compression backends. More error handling. Change -L to affect rzip compression level directly as well as backend compression level and use 9 by default now. More cleanups of information output. Use 3 point release numbering in case one minor version has many subversions. Numerous minor cleanups and tidying. Updated docs and manpages.
2010-11-04 11:14:55 +01:00
sinfo->bufsize = 0;
2010-03-29 01:07:08 +02:00
sinfo->num_streams = n;
sinfo->cur_pos = 0;
sinfo->fd = f;
/* 10MB streams for non lzma compress. There is virtually no gain
in lzo, gzip and bzip2 with larger streams. With lzma and zpaq,
however, the larger the buffer, the better the compression so we
make it as large as the window up to the limit the compressor
will take */
if (LZMA_COMPRESS) {
2010-03-29 01:07:08 +02:00
if (sizeof(long) == 4) {
/* Largest window supported on lzma 32bit is 600MB */
Huge rewrite of buffer reading in rzip.c. We use a wrapper instead of accessing the buffer directly, thus allowing us to have window sizes larger than available ram. This is implemented through the use of a "sliding mmap" implementation. Sliding mmap uses two mmapped buffers, one large one as previously, and one page sized smaller one. When an attempt is made to read beyond the end of the large buffer, the small buffer is remapped to the file area that's being accessed. While this implementation is 100x slower than direct mmapping, it allows us to implement unlimited sized compression windows. Implement the -U option with unlimited sized windows. Rework the selection of compression windows. Instead of trying to guess how much ram the machine might be able to access, we try to safely buffer as much ram as we can, and then use that to determine the file buffer size. Do not choose an arbitrary upper window limit unless -w is specified. Rework the -M option to try to buffer the entire file, reducing the buffer size until we succeed. Align buffer sizes to page size. Clean up lots of unneeded variables. Fix lots of minor logic issues to do with window sizes accepted/passed to rzip and the compression backends. More error handling. Change -L to affect rzip compression level directly as well as backend compression level and use 9 by default now. More cleanups of information output. Use 3 point release numbering in case one minor version has many subversions. Numerous minor cleanups and tidying. Updated docs and manpages.
2010-11-04 11:14:55 +01:00
if (!cwindow || cwindow > 6)
2010-03-29 01:07:08 +02:00
cwindow = 6;
}
/* Largest window supported on lzma 64bit is 4GB */
Huge rewrite of buffer reading in rzip.c. We use a wrapper instead of accessing the buffer directly, thus allowing us to have window sizes larger than available ram. This is implemented through the use of a "sliding mmap" implementation. Sliding mmap uses two mmapped buffers, one large one as previously, and one page sized smaller one. When an attempt is made to read beyond the end of the large buffer, the small buffer is remapped to the file area that's being accessed. While this implementation is 100x slower than direct mmapping, it allows us to implement unlimited sized compression windows. Implement the -U option with unlimited sized windows. Rework the selection of compression windows. Instead of trying to guess how much ram the machine might be able to access, we try to safely buffer as much ram as we can, and then use that to determine the file buffer size. Do not choose an arbitrary upper window limit unless -w is specified. Rework the -M option to try to buffer the entire file, reducing the buffer size until we succeed. Align buffer sizes to page size. Clean up lots of unneeded variables. Fix lots of minor logic issues to do with window sizes accepted/passed to rzip and the compression backends. More error handling. Change -L to affect rzip compression level directly as well as backend compression level and use 9 by default now. More cleanups of information output. Use 3 point release numbering in case one minor version has many subversions. Numerous minor cleanups and tidying. Updated docs and manpages.
2010-11-04 11:14:55 +01:00
if (!cwindow || cwindow > 40)
2010-03-29 01:07:08 +02:00
cwindow = 40;
}
if (LZMA_COMPRESS || ZPAQ_COMPRESS)
2010-03-29 01:07:08 +02:00
sinfo->bufsize = STREAM_BUFSIZE * 10 * cwindow;
else
sinfo->bufsize = STREAM_BUFSIZE;
/* No point making the stream larger than the amount of data */
Huge rewrite of buffer reading in rzip.c. We use a wrapper instead of accessing the buffer directly, thus allowing us to have window sizes larger than available ram. This is implemented through the use of a "sliding mmap" implementation. Sliding mmap uses two mmapped buffers, one large one as previously, and one page sized smaller one. When an attempt is made to read beyond the end of the large buffer, the small buffer is remapped to the file area that's being accessed. While this implementation is 100x slower than direct mmapping, it allows us to implement unlimited sized compression windows. Implement the -U option with unlimited sized windows. Rework the selection of compression windows. Instead of trying to guess how much ram the machine might be able to access, we try to safely buffer as much ram as we can, and then use that to determine the file buffer size. Do not choose an arbitrary upper window limit unless -w is specified. Rework the -M option to try to buffer the entire file, reducing the buffer size until we succeed. Align buffer sizes to page size. Clean up lots of unneeded variables. Fix lots of minor logic issues to do with window sizes accepted/passed to rzip and the compression backends. More error handling. Change -L to affect rzip compression level directly as well as backend compression level and use 9 by default now. More cleanups of information output. Use 3 point release numbering in case one minor version has many subversions. Numerous minor cleanups and tidying. Updated docs and manpages.
2010-11-04 11:14:55 +01:00
if (sinfo->bufsize)
sinfo->bufsize = MIN(sinfo->bufsize, limit);
else
2010-03-29 01:07:08 +02:00
sinfo->bufsize = limit;
sinfo->initial_pos = lseek(f, 0, SEEK_CUR);
sinfo->s = (struct stream *)calloc(sizeof(sinfo->s[0]), n);
if (unlikely(!sinfo->s)) {
2010-03-29 01:07:08 +02:00
free(sinfo);
return NULL;
}
/* Find the largest we can make the window based on ability to malloc
* ram. We need enough for the 2 streams and for the compression
* backend at most, being conservative. */
retest_malloc:
testmalloc = malloc(sinfo->bufsize * (n + 1));
if (!testmalloc) {
sinfo->bufsize = sinfo->bufsize / 10 * 9;
goto retest_malloc;
}
free(testmalloc);
print_maxverbose("Succeeded to malloc for compression bufsize of %lld\n", sinfo->bufsize);
2010-03-29 01:07:08 +02:00
for (i = 0; i < n; i++) {
sinfo->s[i].buf = malloc(sinfo->bufsize);
if (unlikely(!sinfo->s[i].buf))
fatal("Unable to malloc buffer of size %lld in open_stream_out\n", sinfo->bufsize);
2010-03-29 01:07:08 +02:00
}
/* write the initial headers */
for (i = 0; i < n; i++) {
sinfo->s[i].last_head = sinfo->cur_pos + 17;
write_u8(sinfo->fd, CTYPE_NONE);
write_i64(sinfo->fd, 0);
write_i64(sinfo->fd, 0);
write_i64(sinfo->fd, 0);
sinfo->cur_pos += 25;
}
return (void *)sinfo;
}
/* prepare a set of n streams for reading on file descriptor f */
void *open_stream_in(int f, int n)
{
struct stream_info *sinfo;
2010-03-29 01:07:08 +02:00
i64 header_length;
int i;
sinfo = calloc(sizeof(*sinfo), 1);
if (unlikely(!sinfo))
2010-03-29 01:07:08 +02:00
return NULL;
sinfo->num_streams = n;
sinfo->fd = f;
sinfo->initial_pos = lseek(f, 0, SEEK_CUR);
sinfo->s = (struct stream *)calloc(sizeof(sinfo->s[0]), n);
if (unlikely(!sinfo->s)) {
2010-03-29 01:07:08 +02:00
free(sinfo);
return NULL;
}
for (i = 0; i < n; i++) {
uchar c;
i64 v1, v2;
again:
if (unlikely(read_u8(f, &c)))
2010-03-29 01:07:08 +02:00
goto failed;
/* Compatibility crap for versions < 0.40 */
if (control.major_version == 0 && control.minor_version < 4) {
u32 v132, v232, last_head32;
if (unlikely(read_u32(f, &v132)))
2010-03-29 01:07:08 +02:00
goto failed;
if (unlikely(read_u32(f, &v232)))
2010-03-29 01:07:08 +02:00
goto failed;
if ((read_u32(f, &last_head32)))
2010-03-29 01:07:08 +02:00
goto failed;
v1 = v132;
v2 = v232;
sinfo->s[i].last_head = last_head32;
header_length = 13;
} else {
if (unlikely(read_i64(f, &v1)))
2010-03-29 01:07:08 +02:00
goto failed;
if (unlikely(read_i64(f, &v2)))
2010-03-29 01:07:08 +02:00
goto failed;
if (unlikely(read_i64(f, &sinfo->s[i].last_head)))
2010-03-29 01:07:08 +02:00
goto failed;
header_length = 25;
}
if (unlikely(c == CTYPE_NONE && v1 == 0 && v2 == 0 && sinfo->s[i].last_head == 0 && i == 0)) {
print_err("Enabling stream close workaround\n");
2010-03-29 01:07:08 +02:00
sinfo->initial_pos += header_length;
goto again;
}
sinfo->total_read += header_length;
if (unlikely(c != CTYPE_NONE)) {
print_err("Unexpected initial tag %d in streams\n", c);
2010-03-29 01:07:08 +02:00
goto failed;
}
if (unlikely(v1)) {
print_err("Unexpected initial c_len %lld in streams %lld\n", v1, v2);
2010-03-29 01:07:08 +02:00
goto failed;
}
if (unlikely(v2)) {
print_err("Unexpected initial u_len %lld in streams\n", v2);
2010-03-29 01:07:08 +02:00
goto failed;
}
}
return (void *)sinfo;
failed:
free(sinfo->s);
free(sinfo);
return NULL;
}
/* flush out any data in a stream buffer. Return -1 on failure */
Huge rewrite of buffer reading in rzip.c. We use a wrapper instead of accessing the buffer directly, thus allowing us to have window sizes larger than available ram. This is implemented through the use of a "sliding mmap" implementation. Sliding mmap uses two mmapped buffers, one large one as previously, and one page sized smaller one. When an attempt is made to read beyond the end of the large buffer, the small buffer is remapped to the file area that's being accessed. While this implementation is 100x slower than direct mmapping, it allows us to implement unlimited sized compression windows. Implement the -U option with unlimited sized windows. Rework the selection of compression windows. Instead of trying to guess how much ram the machine might be able to access, we try to safely buffer as much ram as we can, and then use that to determine the file buffer size. Do not choose an arbitrary upper window limit unless -w is specified. Rework the -M option to try to buffer the entire file, reducing the buffer size until we succeed. Align buffer sizes to page size. Clean up lots of unneeded variables. Fix lots of minor logic issues to do with window sizes accepted/passed to rzip and the compression backends. More error handling. Change -L to affect rzip compression level directly as well as backend compression level and use 9 by default now. More cleanups of information output. Use 3 point release numbering in case one minor version has many subversions. Numerous minor cleanups and tidying. Updated docs and manpages.
2010-11-04 11:14:55 +01:00
int flush_buffer(struct stream_info *sinfo, int stream)
2010-03-29 01:07:08 +02:00
{
i64 c_len = sinfo->s[stream].buflen;
int c_type = CTYPE_NONE;
2010-03-29 01:07:08 +02:00
if (unlikely(seekto(sinfo, sinfo->s[stream].last_head)))
2010-03-29 01:07:08 +02:00
return -1;
if (unlikely(write_i64(sinfo->fd, sinfo->cur_pos)))
2010-03-29 01:07:08 +02:00
return -1;
sinfo->s[stream].last_head = sinfo->cur_pos + 17;
if (unlikely(seekto(sinfo, sinfo->cur_pos)))
2010-03-29 01:07:08 +02:00
return -1;
if (!(NO_COMPRESS)) {
if (LZMA_COMPRESS)
2010-03-29 01:07:08 +02:00
lzma_compress_buf(&sinfo->s[stream], &c_type, &c_len);
else if (LZO_COMPRESS)
2010-03-29 01:07:08 +02:00
lzo_compress_buf(&sinfo->s[stream], &c_type, &c_len);
else if (BZIP2_COMPRESS)
2010-03-29 01:07:08 +02:00
bzip2_compress_buf(&sinfo->s[stream], &c_type, &c_len);
else if (ZLIB_COMPRESS)
2010-03-29 01:07:08 +02:00
gzip_compress_buf(&sinfo->s[stream], &c_type, &c_len);
else if (ZPAQ_COMPRESS)
2010-03-29 01:07:08 +02:00
zpaq_compress_buf(&sinfo->s[stream], &c_type, &c_len);
else fatal("Dunno wtf compression to use!\n");
}
if (unlikely(write_u8(sinfo->fd, c_type) ||
write_i64(sinfo->fd, c_len) ||
write_i64(sinfo->fd, sinfo->s[stream].buflen) ||
write_i64(sinfo->fd, 0))) {
2010-03-29 01:07:08 +02:00
return -1;
}
sinfo->cur_pos += 25;
if (unlikely(write_buf(sinfo->fd, sinfo->s[stream].buf, c_len)))
2010-03-29 01:07:08 +02:00
return -1;
fsync(sinfo->fd);
2010-03-29 01:07:08 +02:00
sinfo->cur_pos += c_len;
sinfo->s[stream].buflen = 0;
sinfo->s[stream].buf = realloc(sinfo->s[stream].buf, sinfo->bufsize);
if (unlikely(!sinfo->s[stream].buf))
fatal("Failed to realloc in flush_buffer\n");
2010-03-29 01:07:08 +02:00
return 0;
}
/* fill a buffer from a stream - return -1 on failure */
static int fill_buffer(struct stream_info *sinfo, int stream)
{
i64 header_length, u_len, c_len;
2010-03-29 01:07:08 +02:00
uchar c_type;
if (unlikely(seekto(sinfo, sinfo->s[stream].last_head)))
2010-03-29 01:07:08 +02:00
return -1;
if (unlikely(read_u8(sinfo->fd, &c_type)))
2010-03-29 01:07:08 +02:00
return -1;
/* Compatibility crap for versions < 0.4 */
if (control.major_version == 0 && control.minor_version < 4) {
u32 c_len32, u_len32, last_head32;
if (unlikely(read_u32(sinfo->fd, &c_len32)))
2010-03-29 01:07:08 +02:00
return -1;
if (unlikely(read_u32(sinfo->fd, &u_len32)))
2010-03-29 01:07:08 +02:00
return -1;
if (unlikely(read_u32(sinfo->fd, &last_head32)))
2010-03-29 01:07:08 +02:00
return -1;
c_len = c_len32;
u_len = u_len32;
sinfo->s[stream].last_head = last_head32;
header_length = 13;
} else {
if (unlikely(read_i64(sinfo->fd, &c_len)))
2010-03-29 01:07:08 +02:00
return -1;
if (unlikely(read_i64(sinfo->fd, &u_len)))
2010-03-29 01:07:08 +02:00
return -1;
if (unlikely(read_i64(sinfo->fd, &sinfo->s[stream].last_head)))
2010-03-29 01:07:08 +02:00
return -1;
header_length = 25;
}
sinfo->total_read += header_length;
if (sinfo->s[stream].buf)
2010-11-01 14:08:35 +01:00
sinfo->s[stream].buf = realloc(sinfo->s[stream].buf, u_len);
else
sinfo->s[stream].buf = malloc(u_len);
if (unlikely(!sinfo->s[stream].buf))
2010-11-01 14:08:35 +01:00
fatal("Unable to malloc buffer of size %lld in fill_buffer\n", u_len);
if (unlikely(read_buf(sinfo->fd, sinfo->s[stream].buf, c_len)))
2010-03-29 01:07:08 +02:00
return -1;
sinfo->total_read += c_len;
sinfo->s[stream].buflen = u_len;
sinfo->s[stream].bufp = 0;
if (c_type != CTYPE_NONE) {
if (c_type == CTYPE_LZMA) {
if (unlikely(lzma_decompress_buf(&sinfo->s[stream], (size_t)c_len)))
2010-03-29 01:07:08 +02:00
return -1;
} else if (c_type == CTYPE_LZO) {
if (unlikely(lzo_decompress_buf(&sinfo->s[stream], c_len)))
2010-03-29 01:07:08 +02:00
return -1;
} else if (c_type == CTYPE_BZIP2) {
if (unlikely(bzip2_decompress_buf(&sinfo->s[stream], c_len)))
2010-03-29 01:07:08 +02:00
return -1;
} else if (c_type == CTYPE_GZIP) {
if (unlikely(gzip_decompress_buf(&sinfo->s[stream], c_len)))
2010-03-29 01:07:08 +02:00
return -1;
} else if (c_type == CTYPE_ZPAQ) {
if (unlikely(zpaq_decompress_buf(&sinfo->s[stream])))
2010-03-29 01:07:08 +02:00
return -1;
} else fatal("Dunno wtf decompression type to use!\n");
}
return 0;
}
/* write some data to a stream. Return -1 on failure */
int write_stream(void *ss, int stream, uchar *p, i64 len)
{
struct stream_info *sinfo = ss;
while (len) {
i64 n;
n = MIN(sinfo->bufsize - sinfo->s[stream].buflen, len);
memcpy(sinfo->s[stream].buf+sinfo->s[stream].buflen, p, n);
sinfo->s[stream].buflen += n;
p += n;
len -= n;
if (sinfo->s[stream].buflen == sinfo->bufsize) {
if (unlikely(flush_buffer(sinfo, stream)))
2010-03-29 01:07:08 +02:00
return -1;
}
}
return 0;
}
/* read some data from a stream. Return number of bytes read, or -1
on failure */
i64 read_stream(void *ss, int stream, uchar *p, i64 len)
{
struct stream_info *sinfo = ss;
i64 ret = 0;
while (len) {
i64 n;
n = MIN(sinfo->s[stream].buflen-sinfo->s[stream].bufp, len);
if (n > 0) {
memcpy(p, sinfo->s[stream].buf+sinfo->s[stream].bufp, n);
sinfo->s[stream].bufp += n;
p += n;
len -= n;
ret += n;
}
if (len && sinfo->s[stream].bufp == sinfo->s[stream].buflen) {
if (unlikely(fill_buffer(sinfo, stream)))
2010-03-29 01:07:08 +02:00
return -1;
if (sinfo->s[stream].bufp == sinfo->s[stream].buflen)
break;
}
}
return ret;
}
/* flush and close down a stream. return -1 on failure */
int close_stream_out(void *ss)
{
struct stream_info *sinfo = ss;
int i;
/* reallocate buffers to try and save space */
for (i = 0; i < sinfo->num_streams; i++) {
if (sinfo->s[i].buflen) {
if (unlikely(!realloc(sinfo->s[i].buf, sinfo->s[i].buflen)))
2010-03-29 01:07:08 +02:00
fatal("Error Reallocating Output Buffer %d\n", i);
}
}
for (i = 0; i < sinfo->num_streams; i++) {
if (unlikely(sinfo->s[i].buflen && flush_buffer(sinfo, i)))
2010-03-29 01:07:08 +02:00
return -1;
if (sinfo->s[i].buf)
free(sinfo->s[i].buf);
}
free(sinfo->s);
free(sinfo);
return 0;
}
/* close down an input stream */
int close_stream_in(void *ss)
{
struct stream_info *sinfo = ss;
int i;
if (unlikely(lseek(sinfo->fd, sinfo->initial_pos + sinfo->total_read,
SEEK_SET) != sinfo->initial_pos + sinfo->total_read))
2010-03-29 01:07:08 +02:00
return -1;
for (i = 0; i < sinfo->num_streams; i++) {
if (sinfo->s[i].buf)
free(sinfo->s[i].buf);
}
free(sinfo->s);
free(sinfo);
return 0;
}
/* As others are slow and lzo very fast, it is worth doing a quick lzo pass
to see if there is any compression at all with lzo first. It is unlikely
that others will be able to compress if lzo is unable to drop a single byte
so do not compress any block that is incompressible by lzo. */
static int lzo_compresses(struct stream *s)
{
lzo_bytep wrkmem=NULL;
lzo_uint in_len, test_len = s->buflen, save_len = s->buflen;
lzo_uint dlen;
lzo_int return_var; /* lzo1x_1_compress does not return anything but LZO_OK */
uchar *c_buf = NULL, *test_buf = s->buf;
/* set minimum buffer test size based on the length of the test stream */
unsigned long buftest_size = (test_len > 5 * STREAM_BUFSIZE ? STREAM_BUFSIZE : STREAM_BUFSIZE / 4096);
int ret = 0;
int workcounter = 0; /* count # of passes */
lzo_uint best_dlen = UINT_MAX; /* save best compression estimate */
if (control.threshold > 1)
return 1;
wrkmem = (lzo_bytep) malloc(LZO1X_1_MEM_COMPRESS);
if (unlikely(wrkmem == NULL))
2010-03-29 01:07:08 +02:00
fatal("Unable to allocate wrkmem in lzo_compresses\n");
in_len = MIN(test_len, buftest_size);
dlen = STREAM_BUFSIZE + STREAM_BUFSIZE / 16 + 64 + 3;
c_buf = malloc(dlen);
if (unlikely(!c_buf))
2010-03-29 01:07:08 +02:00
fatal("Unable to allocate c_buf in lzo_compresses\n");
print_progress("\tlzo testing for incompressible data...");
2010-03-29 01:07:08 +02:00
/* Test progressively larger blocks at a time and as soon as anything
compressible is found, jump out as a success */
while (test_len > 0) {
workcounter++;
return_var = lzo1x_1_compress(test_buf, in_len, (uchar *)c_buf, &dlen, wrkmem);
if (dlen < best_dlen)
best_dlen = dlen; /* save best value */
if ((double) dlen < (double)in_len * control.threshold) {
ret = 1;
break;
}
/* expand and move buffer */
test_len -= in_len;
if (test_len) {
test_buf += (ptrdiff_t)in_len;
if (buftest_size < STREAM_BUFSIZE)
buftest_size <<= 1;
in_len = MIN(test_len, buftest_size);
}
}
if (MAX_VERBOSE)
print_output("%s for chunk %ld. Compressed size = %5.2F%% of chunk, %d Passes\n",
2010-03-29 01:07:08 +02:00
(ret == 0? "FAILED - below threshold" : "OK"), save_len,
100 * ((double) best_dlen / (double) in_len), workcounter);
else if (VERBOSE)
print_output("%s\r", (ret == 0? "FAILED - below threshold" : "OK"));
else print_progress("\r\t \r");
2010-03-29 01:07:08 +02:00
free(wrkmem);
free(c_buf);
return ret;
}