Fix the arbitrary lower limit of 128 bytes by compressing zeroes beyond the size of the chunk.

Put special case management of zero sized files.
Modify the percentage calculation to be able to cope with small numbers and zeroes.
This commit is contained in:
Con Kolivas 2011-05-07 18:37:25 +10:00
parent 0662ce3dc6
commit 55dc9b0095
3 changed files with 39 additions and 16 deletions

22
lrzip.c
View file

@ -736,6 +736,20 @@ void get_header_info(rzip_control *control, int fd_in, uchar *ctype, i64 *c_len,
} }
} }
static double percentage(i64 num, i64 den)
{
double d_num, d_den;
if (den < 100) {
d_num = num * 100;
d_den = den ? : 1;
} else {
d_num = num;
d_den = den / 100;
}
return d_num / d_den;
}
void get_fileinfo(rzip_control *control) void get_fileinfo(rzip_control *control)
{ {
i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 25, stream_head[2]; i64 u_len, c_len, last_head, utotal = 0, ctotal = 0, ofs = 25, stream_head[2];
@ -861,7 +875,7 @@ next_chunk:
print_verbose("Dunno wtf"); print_verbose("Dunno wtf");
utotal += u_len; utotal += u_len;
ctotal += c_len; ctotal += c_len;
print_verbose("\t%.1f%%\t%lld / %lld", (double)c_len / (double)(u_len / 100), c_len, u_len); print_verbose("\t%.1f%%\t%lld / %lld", percentage(c_len, u_len), c_len, u_len);
print_maxverbose("\tOffset: %lld\tHead: %lld", head_off, last_head); print_maxverbose("\tOffset: %lld\tHead: %lld", head_off, last_head);
print_verbose("\n"); print_verbose("\n");
block++; block++;
@ -896,13 +910,13 @@ done:
if (chunk_total > expected_size) if (chunk_total > expected_size)
expected_size = chunk_total; expected_size = chunk_total;
print_verbose("Rzip compression: %.1f%% %lld / %lld\n", print_verbose("Rzip compression: %.1f%% %lld / %lld\n",
(double)utotal / (double)(expected_size / 100), percentage (utotal, expected_size),
utotal, expected_size); utotal, expected_size);
print_verbose("Back end compression: %.1f%% %lld / %lld\n", print_verbose("Back end compression: %.1f%% %lld / %lld\n",
(double)ctotal / (double)(utotal / 100), percentage(ctotal, utotal),
ctotal, utotal); ctotal, utotal);
print_verbose("Overall compression: %.1f%% %lld / %lld\n", print_verbose("Overall compression: %.1f%% %lld / %lld\n",
(double)ctotal / (double)(expected_size / 100), percentage(ctotal, expected_size),
ctotal, expected_size); ctotal, expected_size);
cratio = (long double)expected_size / (long double)infile_size; cratio = (long double)expected_size / (long double)infile_size;

22
rzip.c
View file

@ -558,6 +558,7 @@ static void hash_search(rzip_control *control, struct rzip_state *st, double pct
current.p = p; current.p = p;
current.ofs = 0; current.ofs = 0;
if (likely(end > 0))
t = full_tag(control, st, p); t = full_tag(control, st, p);
while (p < end) { while (p < end) {
@ -693,13 +694,18 @@ static void mmap_stdin(rzip_control *control, uchar *buf, struct rzip_state *st)
total += ret; total += ret;
if (ret == 0) { if (ret == 0) {
/* Should be EOF */ /* Should be EOF */
if (total < 128)
failure("Will not compress a tiny file\n");
print_maxverbose("Shrinking chunk to %lld\n", total); print_maxverbose("Shrinking chunk to %lld\n", total);
if (likely(total)) {
buf = (uchar *)mremap(buf, st->chunk_size, total, 0); buf = (uchar *)mremap(buf, st->chunk_size, total, 0);
st->mmap_size = st->chunk_size = total;
} else {
/* Empty file */
buf = (uchar *)mremap(buf, st->chunk_size, control->page_size, 0);
st->mmap_size = control->page_size;
st->chunk_size = 0;
}
if (unlikely(buf == MAP_FAILED)) if (unlikely(buf == MAP_FAILED))
fatal("Failed to remap to smaller buf in mmap_stdin\n"); fatal("Failed to remap to smaller buf in mmap_stdin\n");
st->mmap_size = st->chunk_size = total;
control->eof = st->stdin_eof = 1; control->eof = st->stdin_eof = 1;
break; break;
} }
@ -792,8 +798,6 @@ void rzip_fd(rzip_control *control, int fd_in, int fd_out)
if (!STDIN) { if (!STDIN) {
len = control->st_size = s.st_size; len = control->st_size = s.st_size;
if (len < 128)
failure("Will not compress a tiny file\n");
print_verbose("File size: %lld\n", len); print_verbose("File size: %lld\n", len);
} else } else
control->st_size = 0; control->st_size = 0;
@ -855,7 +859,7 @@ void rzip_fd(rzip_control *control, int fd_in, int fd_out)
prepare_streamout_threads(control); prepare_streamout_threads(control);
while (len > 0 || (STDIN && !st->stdin_eof)) { while (!pass || len > 0 || (STDIN && !st->stdin_eof)) {
double pct_base, pct_multiple; double pct_base, pct_multiple;
i64 offset = s.st_size - len; i64 offset = s.st_size - len;
int bits = 8; int bits = 8;
@ -864,7 +868,10 @@ void rzip_fd(rzip_control *control, int fd_in, int fd_out)
st->mmap_size = control->max_mmap; st->mmap_size = control->max_mmap;
if (!STDIN) { if (!STDIN) {
st->chunk_size = MIN(st->chunk_size, len); st->chunk_size = MIN(st->chunk_size, len);
if (likely(st->chunk_size))
st->mmap_size = MIN(st->mmap_size, len); st->mmap_size = MIN(st->mmap_size, len);
else
st->mmap_size = control->page_size;
} }
retry: retry:
@ -903,7 +910,7 @@ retry:
if (st->chunk_size > control->ramsize) if (st->chunk_size > control->ramsize)
print_verbose("Compression window is larger than ram, will proceed with unlimited mode possibly much slower\n"); print_verbose("Compression window is larger than ram, will proceed with unlimited mode possibly much slower\n");
if (!passes && !STDIN) { if (!passes && !STDIN && st->chunk_size) {
passes = s.st_size / st->chunk_size + !!(s.st_size % st->chunk_size); passes = s.st_size / st->chunk_size + !!(s.st_size % st->chunk_size);
if (passes == 1) if (passes == 1)
print_verbose("Will take 1 pass\n"); print_verbose("Will take 1 pass\n");
@ -973,6 +980,7 @@ retry:
if (unlikely(len > 0 && control->eof)) if (unlikely(len > 0 && control->eof))
failure("Wrote EOF to file yet chunk_size was shrunk, corrupting archive.\n"); failure("Wrote EOF to file yet chunk_size was shrunk, corrupting archive.\n");
} }
close_streams:
close_streamout_threads(control); close_streamout_threads(control);
if (likely(st->hash_table)) if (likely(st->hash_table))

View file

@ -966,7 +966,8 @@ void *open_stream_out(rzip_control *control, int f, unsigned int n, i64 chunk_li
sinfo = calloc(sizeof(struct stream_info), 1); sinfo = calloc(sizeof(struct stream_info), 1);
if (unlikely(!sinfo)) if (unlikely(!sinfo))
return NULL; return NULL;
if (chunk_limit < control->page_size)
chunk_limit = control->page_size;
sinfo->bufsize = sinfo->size = limit = chunk_limit; sinfo->bufsize = sinfo->size = limit = chunk_limit;
sinfo->chunk_bytes = cbytes; sinfo->chunk_bytes = cbytes;
@ -1044,7 +1045,7 @@ retest_malloc:
sinfo->bufsize); sinfo->bufsize);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
sinfo->s[i].buf = malloc(sinfo->bufsize); sinfo->s[i].buf = calloc(sinfo->bufsize , 1);
if (unlikely(!sinfo->s[i].buf)) if (unlikely(!sinfo->s[i].buf))
fatal("Unable to malloc buffer of size %lld in open_stream_out\n", sinfo->bufsize); fatal("Unable to malloc buffer of size %lld in open_stream_out\n", sinfo->bufsize);
} }