mirror of
https://github.com/ckolivas/lrzip.git
synced 2025-12-06 07:12:00 +01:00
Add more robust checking.
Premalloc ram to improve early detection of being unable to allocate that much ram. Make sure to always make chunk size a multiple of page size for mmap to work. Begin changes to make variable byte width offsets in rzip chunks. Decrease header entries to only 2 byte wide as per original rzip. Random other tidying.
This commit is contained in:
parent
d396a8a360
commit
c5da3a1adb
|
|
@ -1,3 +1,8 @@
|
||||||
|
lrzip-0.50 update
|
||||||
|
|
||||||
|
All files created with lrzip 0.50+ are not backward compatible with versions
|
||||||
|
prior to 0.50. v0.50 can read earlier generated files.
|
||||||
|
|
||||||
lrzip-0.41 update
|
lrzip-0.41 update
|
||||||
|
|
||||||
Files created with lrzip 0.41 and selecting the -z option for
|
Files created with lrzip 0.41 and selecting the -z option for
|
||||||
|
|
@ -17,7 +22,7 @@ Con Kolivas November 2009.
|
||||||
lrzip-0.24 update!
|
lrzip-0.24 update!
|
||||||
|
|
||||||
FILES CREATED WITH LRZIP 0.23 and earlier are NOT
|
FILES CREATED WITH LRZIP 0.23 and earlier are NOT
|
||||||
BACKWARE COMPATIBLE if compressed with LZMA.
|
BACKWARD COMPATIBLE if compressed with LZMA.
|
||||||
|
|
||||||
All other compression schemes are compatible.
|
All other compression schemes are compatible.
|
||||||
|
|
||||||
|
|
|
||||||
95
runzip.c
95
runzip.c
|
|
@ -28,15 +28,6 @@ static inline uchar read_u8(void *ss, int stream)
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u16 read_u16(void *ss, int stream)
|
|
||||||
{
|
|
||||||
u16 ret;
|
|
||||||
|
|
||||||
if (read_stream(ss, stream, (uchar *)&ret, 2) != 2)
|
|
||||||
fatal("Stream read u16 failed\n");
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline u32 read_u32(void *ss, int stream)
|
static inline u32 read_u32(void *ss, int stream)
|
||||||
{
|
{
|
||||||
u32 ret;
|
u32 ret;
|
||||||
|
|
@ -45,28 +36,30 @@ static inline u32 read_u32(void *ss, int stream)
|
||||||
fatal("Stream read u32 failed\n");
|
fatal("Stream read u32 failed\n");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
/* Read a variable length of chars dependant on how big the chunk was */
|
||||||
static inline i64 read_i64(void *ss, int stream)
|
static inline i64 read_vchars(void *ss, int stream, int length)
|
||||||
{
|
{
|
||||||
i64 ret;
|
int bytes;
|
||||||
|
i64 s = 0;
|
||||||
|
|
||||||
if (read_stream(ss, stream, (uchar *)&ret, 8) != 8)
|
for (bytes = 0; bytes < length; bytes++) {
|
||||||
fatal("Stream read i64 failed\n");
|
int bits = bytes * 8;
|
||||||
return ret;
|
|
||||||
|
uchar sb = read_u8(ss, stream);
|
||||||
|
s |= (i64)sb << bits;
|
||||||
}
|
}
|
||||||
|
return s;
|
||||||
static u16 read_header_v03(void *ss, uchar *head)
|
|
||||||
{
|
|
||||||
*head = read_u8(ss, 0);
|
|
||||||
return read_u16(ss, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static i64 read_header(void *ss, uchar *head)
|
static i64 read_header(void *ss, uchar *head)
|
||||||
{
|
{
|
||||||
if (control.major_version == 0 && control.minor_version < 4)
|
int chunk_bytes = 2;
|
||||||
return read_header_v03(ss, head);
|
|
||||||
|
/* All chunks were unnecessarily encoded 8 bytes wide version 0.4x */
|
||||||
|
if (control.major_version == 0 && control.minor_version == 4)
|
||||||
|
chunk_bytes = 8;
|
||||||
*head = read_u8(ss, 0);
|
*head = read_u8(ss, 0);
|
||||||
return read_i64(ss, 0);
|
return read_vchars(ss, 0, chunk_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
|
static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
|
||||||
|
|
@ -93,63 +86,29 @@ static i64 unzip_literal(void *ss, i64 len, int fd_out, uint32 *cksum)
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
static i64 unzip_match_v03(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
|
|
||||||
{
|
|
||||||
u32 offset;
|
|
||||||
i64 n, total = 0;
|
|
||||||
i64 cur_pos = lseek(fd_out, 0, SEEK_CUR);
|
|
||||||
|
|
||||||
if (cur_pos == -1)
|
|
||||||
fatal("Seek failed on out file in unzip_match.\n");
|
|
||||||
|
|
||||||
offset = read_u32(ss, 0);
|
|
||||||
|
|
||||||
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
|
|
||||||
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
|
|
||||||
offset, cur_pos, strerror(errno));
|
|
||||||
|
|
||||||
while (len) {
|
|
||||||
uchar *buf;
|
|
||||||
n = MIN(len, offset);
|
|
||||||
|
|
||||||
buf = malloc((size_t)n);
|
|
||||||
if (!buf)
|
|
||||||
fatal("Failed to allocate %d bytes in unzip_match\n", n);
|
|
||||||
|
|
||||||
if (read_1g(fd_hist, buf, (size_t)n) != (ssize_t)n)
|
|
||||||
fatal("Failed to read %d bytes in unzip_match\n", n);
|
|
||||||
|
|
||||||
if (write_1g(fd_out, buf, (size_t)n) != (ssize_t)n)
|
|
||||||
fatal("Failed to write %d bytes in unzip_match\n", n);
|
|
||||||
|
|
||||||
*cksum = CrcUpdate(*cksum, buf, n);
|
|
||||||
|
|
||||||
len -= n;
|
|
||||||
free(buf);
|
|
||||||
total += n;
|
|
||||||
}
|
|
||||||
|
|
||||||
return total;
|
|
||||||
}
|
|
||||||
|
|
||||||
static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
|
static i64 unzip_match(void *ss, i64 len, int fd_out, int fd_hist, uint32 *cksum)
|
||||||
{
|
{
|
||||||
i64 cur_pos;
|
i64 offset, n, total, cur_pos;
|
||||||
i64 offset, n, total;
|
int chunk_bytes = 8;
|
||||||
|
|
||||||
if (len < 0)
|
if (len < 0)
|
||||||
fatal("len %lld is negative in unzip_match!\n",len);
|
fatal("len %lld is negative in unzip_match!\n",len);
|
||||||
|
|
||||||
if (control.major_version == 0 && control.minor_version < 4)
|
if (control.major_version == 0) {
|
||||||
return unzip_match_v03(ss, len, fd_out, fd_hist, cksum);
|
/* Versions < 0.4 used 4 bytes for all offsets, version 0.4 used 8 bytes.
|
||||||
|
* Versions 0.5+ used a variable number of bytes depending on block size. */
|
||||||
|
if (control.minor_version < 4)
|
||||||
|
chunk_bytes = 4;
|
||||||
|
else if (control.minor_version == 4)
|
||||||
|
chunk_bytes = 8;
|
||||||
|
}
|
||||||
total = 0;
|
total = 0;
|
||||||
cur_pos = lseek(fd_out, 0, SEEK_CUR);
|
cur_pos = lseek(fd_out, 0, SEEK_CUR);
|
||||||
if (cur_pos == -1)
|
if (cur_pos == -1)
|
||||||
fatal("Seek failed on out file in unzip_match.\n");
|
fatal("Seek failed on out file in unzip_match.\n");
|
||||||
|
|
||||||
/* Note the offset is in a different format v0.40+ */
|
/* Note the offset is in a different format v0.40+ */
|
||||||
offset = read_i64(ss, 0);
|
offset = read_vchars(ss, 0, chunk_bytes);
|
||||||
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
|
if (lseek(fd_hist, cur_pos - offset, SEEK_SET) == -1)
|
||||||
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
|
fatal("Seek failed by %d from %d on history file in unzip_match - %s\n",
|
||||||
offset, cur_pos, strerror(errno));
|
offset, cur_pos, strerror(errno));
|
||||||
|
|
|
||||||
68
rzip.c
68
rzip.c
|
|
@ -20,7 +20,6 @@
|
||||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||||
*/
|
*/
|
||||||
/* rzip compression algorithm */
|
/* rzip compression algorithm */
|
||||||
|
|
||||||
#include "rzip.h"
|
#include "rzip.h"
|
||||||
|
|
||||||
#define CHUNK_MULTIPLE (100 * 1024 * 1024)
|
#define CHUNK_MULTIPLE (100 * 1024 * 1024)
|
||||||
|
|
@ -88,25 +87,32 @@ struct rzip_state {
|
||||||
static inline void put_u8(void *ss, int stream, uchar b)
|
static inline void put_u8(void *ss, int stream, uchar b)
|
||||||
{
|
{
|
||||||
if (write_stream(ss, stream, &b, 1) != 0)
|
if (write_stream(ss, stream, &b, 1) != 0)
|
||||||
fatal(NULL);
|
fatal("Failed to put_u8\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void put_u32(void *ss, int stream, uint32_t s)
|
static inline void put_u32(void *ss, int stream, uint32_t s)
|
||||||
{
|
{
|
||||||
if (write_stream(ss, stream, (uchar *)&s, 4))
|
if (write_stream(ss, stream, (uchar *)&s, 4))
|
||||||
fatal(NULL);
|
fatal("Failed to put_u32\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void put_i64(void *ss, int stream, i64 s)
|
/* Put a variable length of bytes dependant on how big the chunk is */
|
||||||
|
static inline void put_vchars(void *ss, int stream, i64 s, int length)
|
||||||
{
|
{
|
||||||
if (write_stream(ss, stream, (uchar *)&s, 8))
|
int bytes;
|
||||||
fatal(NULL);
|
|
||||||
|
for (bytes = 0; bytes < length; bytes++) {
|
||||||
|
int bits = bytes * 8;
|
||||||
|
uchar sb = (s >> bits) & (i64)0XFF;
|
||||||
|
|
||||||
|
put_u8(ss, stream, sb);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void put_header(void *ss, uchar head, i64 len)
|
static void put_header(void *ss, uchar head, i64 len)
|
||||||
{
|
{
|
||||||
put_u8(ss, 0, head);
|
put_u8(ss, 0, head);
|
||||||
put_i64(ss, 0, len);
|
put_vchars(ss, 0, len, 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len)
|
static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i64 len)
|
||||||
|
|
@ -114,10 +120,11 @@ static void put_match(struct rzip_state *st, uchar *p, uchar *buf, i64 offset, i
|
||||||
do {
|
do {
|
||||||
i64 ofs;
|
i64 ofs;
|
||||||
i64 n = len;
|
i64 n = len;
|
||||||
|
if (n > 0xFFFF) n = 0xFFFF;
|
||||||
|
|
||||||
ofs = (p - (buf + offset));
|
ofs = (p - (buf + offset));
|
||||||
put_header(st->ss, 1, n);
|
put_header(st->ss, 1, n);
|
||||||
put_i64(st->ss, 0, ofs);
|
put_vchars(st->ss, 0, ofs, 8);
|
||||||
|
|
||||||
st->stats.matches++;
|
st->stats.matches++;
|
||||||
st->stats.match_bytes += n;
|
st->stats.match_bytes += n;
|
||||||
|
|
@ -131,6 +138,7 @@ static void put_literal(struct rzip_state *st, uchar *last, uchar *p)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
i64 len = (i64)(p - last);
|
i64 len = (i64)(p - last);
|
||||||
|
if (len > 0xFFFF) len = 0xFFFF;
|
||||||
|
|
||||||
st->stats.literals++;
|
st->stats.literals++;
|
||||||
st->stats.literal_bytes += len;
|
st->stats.literal_bytes += len;
|
||||||
|
|
@ -519,21 +527,41 @@ static void rzip_chunk(struct rzip_state *st, int fd_in, int fd_out, i64 offset,
|
||||||
{
|
{
|
||||||
uchar *buf;
|
uchar *buf;
|
||||||
|
|
||||||
buf = (uchar *)mmap(NULL, st->chunk_size, PROT_READ, MAP_SHARED, fd_in, offset);
|
/* Malloc'ing first will tell us if we can allocate this much ram
|
||||||
|
* faster than slowly reading in the file and then failing. Filling
|
||||||
|
* it with zeroes has a defragmenting effect on ram before the real
|
||||||
|
* read in. */
|
||||||
|
if (control.flags & FLAG_SHOW_PROGRESS)
|
||||||
|
fprintf(control.msgout, "Allocating ram...\n");
|
||||||
|
buf = malloc(st->chunk_size);
|
||||||
|
if (!buf)
|
||||||
|
fatal("Failed to premalloc in rzip_chunk\n");
|
||||||
|
if (!memset(buf, 0, st->chunk_size))
|
||||||
|
fatal("Failed to memset in rzip_chunk\n");
|
||||||
|
free(buf);
|
||||||
|
buf = (uchar *)mmap(buf, st->chunk_size, PROT_READ, MAP_SHARED | MAP_POPULATE, fd_in, offset);
|
||||||
if (buf == (uchar *)-1)
|
if (buf == (uchar *)-1)
|
||||||
fatal("Failed to map buffer in rzip_fd\n");
|
fatal("Failed to map buffer in rzip_chunk\n");
|
||||||
|
|
||||||
st->ss = open_stream_out(fd_out, NUM_STREAMS, limit);
|
st->ss = open_stream_out(fd_out, NUM_STREAMS, limit);
|
||||||
if (!st->ss)
|
if (!st->ss)
|
||||||
fatal("Failed to open streams in rzip_fd\n");
|
fatal("Failed to open streams in rzip_chunk\n");
|
||||||
hash_search(st, buf, pct_base, pct_multiple);
|
hash_search(st, buf, pct_base, pct_multiple);
|
||||||
/* unmap buffer before closing and reallocating streams */
|
/* unmap buffer before closing and reallocating streams */
|
||||||
munmap(buf, st->chunk_size);
|
munmap(buf, st->chunk_size);
|
||||||
|
|
||||||
if (close_stream_out(st->ss) != 0)
|
if (close_stream_out(st->ss) != 0)
|
||||||
fatal("Failed to flush/close streams in rzip_fd\n");
|
fatal("Failed to flush/close streams in rzip_chunk\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
|
||||||
|
static void round_to_page_size(i64 *chunk)
|
||||||
|
{
|
||||||
|
unsigned long page_size = sysconf(_SC_PAGE_SIZE);
|
||||||
|
i64 pages = *chunk / page_size + 1;
|
||||||
|
|
||||||
|
*chunk = pages * page_size;
|
||||||
|
}
|
||||||
|
|
||||||
/* compress a whole file chunks at a time */
|
/* compress a whole file chunks at a time */
|
||||||
void rzip_fd(int fd_in, int fd_out)
|
void rzip_fd(int fd_in, int fd_out)
|
||||||
|
|
@ -549,9 +577,8 @@ void rzip_fd(int fd_in, int fd_out)
|
||||||
struct stat s, s2;
|
struct stat s, s2;
|
||||||
struct rzip_state *st;
|
struct rzip_state *st;
|
||||||
i64 len, last_chunk = 0;
|
i64 len, last_chunk = 0;
|
||||||
i64 chunk_window, pages;
|
i64 chunk_window;
|
||||||
int pass = 0, passes;
|
int pass = 0, passes;
|
||||||
unsigned long page_size;
|
|
||||||
unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours,
|
unsigned int eta_hours, eta_minutes, eta_seconds, elapsed_hours,
|
||||||
elapsed_minutes, elapsed_seconds;
|
elapsed_minutes, elapsed_seconds;
|
||||||
double finish_time, elapsed_time, chunkmbs;
|
double finish_time, elapsed_time, chunkmbs;
|
||||||
|
|
@ -572,9 +599,7 @@ void rzip_fd(int fd_in, int fd_out)
|
||||||
|
|
||||||
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
|
/* Windows must be the width of _SC_PAGE_SIZE for offset to work in mmap */
|
||||||
chunk_window = control.window * CHUNK_MULTIPLE;
|
chunk_window = control.window * CHUNK_MULTIPLE;
|
||||||
page_size = sysconf(_SC_PAGE_SIZE);
|
round_to_page_size(&chunk_window);
|
||||||
pages = chunk_window / page_size;
|
|
||||||
chunk_window = pages * page_size;
|
|
||||||
|
|
||||||
st->level = &levels[MIN(9, control.window)];
|
st->level = &levels[MIN(9, control.window)];
|
||||||
st->fd_in = fd_in;
|
st->fd_in = fd_in;
|
||||||
|
|
@ -588,14 +613,17 @@ void rzip_fd(int fd_in, int fd_out)
|
||||||
last.tv_sec = last.tv_usec = 0;
|
last.tv_sec = last.tv_usec = 0;
|
||||||
gettimeofday(&start, NULL);
|
gettimeofday(&start, NULL);
|
||||||
|
|
||||||
while (len) {
|
while (len > 0) {
|
||||||
i64 chunk, limit = 0;
|
i64 chunk, limit = 0;
|
||||||
double pct_base, pct_multiple;
|
double pct_base, pct_multiple;
|
||||||
|
|
||||||
chunk = chunk_window;
|
chunk = chunk_window;
|
||||||
|
|
||||||
if (chunk > len)
|
if (chunk > len) {
|
||||||
limit = chunk = len;
|
chunk = len;
|
||||||
|
round_to_page_size(&chunk);
|
||||||
|
limit = chunk;
|
||||||
|
}
|
||||||
|
|
||||||
pct_base = (100.0 * (s.st_size - len)) / s.st_size;
|
pct_base = (100.0 * (s.st_size - len)) / s.st_size;
|
||||||
pct_multiple = ((double)chunk) / s.st_size;
|
pct_multiple = ((double)chunk) / s.st_size;
|
||||||
|
|
|
||||||
4
rzip.h
4
rzip.h
|
|
@ -18,8 +18,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define LRZIP_MAJOR_VERSION 0
|
#define LRZIP_MAJOR_VERSION 0
|
||||||
#define LRZIP_MINOR_VERSION 4
|
#define LRZIP_MINOR_VERSION 5
|
||||||
#define LRZIP_MINOR_SUBVERSION 7
|
#define LRZIP_MINOR_SUBVERSION 0
|
||||||
|
|
||||||
#define NUM_STREAMS 2
|
#define NUM_STREAMS 2
|
||||||
|
|
||||||
|
|
|
||||||
2
stream.c
2
stream.c
|
|
@ -802,7 +802,7 @@ static int flush_buffer(struct stream_info *sinfo, int stream)
|
||||||
free(sinfo->s[stream].buf);
|
free(sinfo->s[stream].buf);
|
||||||
sinfo->s[stream].buf = malloc(sinfo->bufsize);
|
sinfo->s[stream].buf = malloc(sinfo->bufsize);
|
||||||
if (!sinfo->s[stream].buf)
|
if (!sinfo->s[stream].buf)
|
||||||
return -1;
|
fatal("Failed to malloc in flush_buffer\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue