mirror of
https://github.com/ckolivas/lrzip.git
synced 2025-12-06 07:12:00 +01:00
get_sb only allows accessing one byte at a time, yet we don't need that functionality when sliding mmap is not in use.
User different versions of the function and the larger memcpys depending on whether sliding mmap is in use or not. This affords a substantial speedup in the rzip phase of files not requiring sliding mmap. A small optimisation of moving the check for remapping the low buffer also speeds up the sliding mmap version slightly.
This commit is contained in:
parent
64be4018b4
commit
f496e0705d
63
rzip.c
63
rzip.c
|
|
@ -180,13 +180,9 @@ static inline void remap_high_sb(rzip_control *control, i64 p)
|
||||||
* it, and a 64k mmap block that slides up and down as is required for any
|
* it, and a 64k mmap block that slides up and down as is required for any
|
||||||
* offsets outside the range of the lower one. This is much slower than mmap
|
* offsets outside the range of the lower one. This is much slower than mmap
|
||||||
* but makes it possible to have unlimited sized compression windows. */
|
* but makes it possible to have unlimited sized compression windows. */
|
||||||
static uchar *get_sb(rzip_control *control, i64 p)
|
static uchar *sliding_get_sb(rzip_control *control, i64 p)
|
||||||
{
|
{
|
||||||
i64 low_end = sb.offset_low + sb.size_low;
|
if (p >= sb.offset_low && p < sb.offset_low + sb.size_low)
|
||||||
|
|
||||||
if (unlikely(sb.offset_search > low_end))
|
|
||||||
remap_low_sb(control);
|
|
||||||
if (p >= sb.offset_low && p < low_end)
|
|
||||||
return (sb.buf_low + p - sb.offset_low);
|
return (sb.buf_low + p - sb.offset_low);
|
||||||
if (p >= sb.offset_high && p < (sb.offset_high + sb.size_high))
|
if (p >= sb.offset_high && p < (sb.offset_high + sb.size_high))
|
||||||
return (sb.buf_high + (p - sb.offset_high));
|
return (sb.buf_high + (p - sb.offset_high));
|
||||||
|
|
@ -195,6 +191,34 @@ static uchar *get_sb(rzip_control *control, i64 p)
|
||||||
return (sb.buf_high + (p - sb.offset_high));
|
return (sb.buf_high + (p - sb.offset_high));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static uchar *single_get_sb(rzip_control *control, i64 p)
|
||||||
|
{
|
||||||
|
return (sb.buf_low + p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We use a pointer to the function we actually want to use and only enable
|
||||||
|
* the sliding mmap version if we need sliding mmap functionality as this is
|
||||||
|
* a hot function during the rzip phase */
|
||||||
|
static uchar *(*get_sb)(rzip_control *control, i64 p);
|
||||||
|
|
||||||
|
static void sliding_mcpy(rzip_control *control, unsigned char *buf, i64 offset, i64 len)
|
||||||
|
{
|
||||||
|
i64 i;
|
||||||
|
|
||||||
|
for (i = 0; i < len; i++)
|
||||||
|
memcpy(buf + i, sliding_get_sb(control, offset + i), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void single_mcpy(rzip_control *control, unsigned char *buf, i64 offset, i64 len)
|
||||||
|
{
|
||||||
|
memcpy(buf, sb.buf_low + offset, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Since the sliding get_sb only allows us to access one byte at a time, we
|
||||||
|
* do the same as we did with get_sb with the memcpy since one memcpy is much
|
||||||
|
* faster than numerous memcpys 1 byte at a time */
|
||||||
|
static void (*do_mcpy)(rzip_control *control, unsigned char *buf, i64 offset, i64 len);
|
||||||
|
|
||||||
/* All put_u8/u32/vchars go to stream 0 */
|
/* All put_u8/u32/vchars go to stream 0 */
|
||||||
static inline void put_u8(rzip_control *control, void *ss, uchar b)
|
static inline void put_u8(rzip_control *control, void *ss, uchar b)
|
||||||
{
|
{
|
||||||
|
|
@ -249,14 +273,10 @@ static int write_sbstream(rzip_control *control, void *ss, int stream, i64 p, i6
|
||||||
struct stream_info *sinfo = ss;
|
struct stream_info *sinfo = ss;
|
||||||
|
|
||||||
while (len) {
|
while (len) {
|
||||||
i64 n, i;
|
i64 n = MIN(sinfo->bufsize - sinfo->s[stream].buflen, len);
|
||||||
|
|
||||||
n = MIN(sinfo->bufsize - sinfo->s[stream].buflen, len);
|
do_mcpy(control, sinfo->s[stream].buf + sinfo->s[stream].buflen, p, n);
|
||||||
|
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
memcpy(sinfo->s[stream].buf + sinfo->s[stream].buflen + i,
|
|
||||||
get_sb(control, p + i), 1);
|
|
||||||
}
|
|
||||||
sinfo->s[stream].buflen += n;
|
sinfo->s[stream].buflen += n;
|
||||||
p += n;
|
p += n;
|
||||||
len -= n;
|
len -= n;
|
||||||
|
|
@ -566,6 +586,8 @@ static void hash_search(rzip_control *control, struct rzip_state *st, double pct
|
||||||
|
|
||||||
p++;
|
p++;
|
||||||
sb.offset_search = p;
|
sb.offset_search = p;
|
||||||
|
if (unlikely(sb.offset_search > sb.offset_low + sb.size_low))
|
||||||
|
remap_low_sb(control);
|
||||||
t = next_tag(control, st, p, t);
|
t = next_tag(control, st, p, t);
|
||||||
|
|
||||||
/* Don't look for a match if there are no tags with
|
/* Don't look for a match if there are no tags with
|
||||||
|
|
@ -617,13 +639,12 @@ static void hash_search(rzip_control *control, struct rzip_state *st, double pct
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p > (i64)cksum_limit) {
|
if (p > (i64)cksum_limit) {
|
||||||
i64 i, n = MIN(st->chunk_size - p, control->page_size);
|
i64 n = MIN(st->chunk_size - p, control->page_size);
|
||||||
uchar *ckbuf = malloc(n);
|
uchar *ckbuf = malloc(n);
|
||||||
|
|
||||||
if (unlikely(!ckbuf))
|
if (unlikely(!ckbuf))
|
||||||
fatal("Failed to malloc ckbuf in hash_search\n");
|
fatal("Failed to malloc ckbuf in hash_search\n");
|
||||||
for (i = 0; i < n; i++)
|
do_mcpy(control, ckbuf, cksum_limit, n);
|
||||||
memcpy(ckbuf + i, get_sb(control, cksum_limit + i), 1);
|
|
||||||
st->cksum = CrcUpdate(st->cksum, ckbuf, n);
|
st->cksum = CrcUpdate(st->cksum, ckbuf, n);
|
||||||
if (!NO_MD5)
|
if (!NO_MD5)
|
||||||
md5_process_bytes(ckbuf, n, &control->ctx);
|
md5_process_bytes(ckbuf, n, &control->ctx);
|
||||||
|
|
@ -639,13 +660,12 @@ static void hash_search(rzip_control *control, struct rzip_state *st, double pct
|
||||||
put_literal(control, st, st->last_match, st->chunk_size);
|
put_literal(control, st, st->last_match, st->chunk_size);
|
||||||
|
|
||||||
if (st->chunk_size > cksum_limit) {
|
if (st->chunk_size > cksum_limit) {
|
||||||
i64 i, n = st->chunk_size - cksum_limit;
|
i64 n = st->chunk_size - cksum_limit;
|
||||||
uchar *ckbuf = malloc(n);
|
uchar *ckbuf = malloc(n);
|
||||||
|
|
||||||
if (unlikely(!ckbuf))
|
if (unlikely(!ckbuf))
|
||||||
fatal("Failed to malloc ckbuf in hash_search\n");
|
fatal("Failed to malloc ckbuf in hash_search\n");
|
||||||
for (i = 0; i < n; i++)
|
do_mcpy(control, ckbuf, cksum_limit, n);
|
||||||
memcpy(ckbuf + i, get_sb(control, cksum_limit + i), 1);
|
|
||||||
st->cksum = CrcUpdate(st->cksum, ckbuf, n);
|
st->cksum = CrcUpdate(st->cksum, ckbuf, n);
|
||||||
if (!NO_MD5)
|
if (!NO_MD5)
|
||||||
md5_process_bytes(ckbuf, n, &control->ctx);
|
md5_process_bytes(ckbuf, n, &control->ctx);
|
||||||
|
|
@ -856,6 +876,8 @@ void rzip_fd(rzip_control *control, int fd_in, int fd_out)
|
||||||
gettimeofday(&start, NULL);
|
gettimeofday(&start, NULL);
|
||||||
|
|
||||||
prepare_streamout_threads(control);
|
prepare_streamout_threads(control);
|
||||||
|
get_sb = single_get_sb;
|
||||||
|
do_mcpy = single_mcpy;
|
||||||
|
|
||||||
while (!pass || len > 0 || (STDIN && !st->stdin_eof)) {
|
while (!pass || len > 0 || (STDIN && !st->stdin_eof)) {
|
||||||
double pct_base, pct_multiple;
|
double pct_base, pct_multiple;
|
||||||
|
|
@ -900,8 +922,11 @@ retry:
|
||||||
fatal("Unable to mmap any ram\n");
|
fatal("Unable to mmap any ram\n");
|
||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
if (st->mmap_size < st->chunk_size)
|
if (st->mmap_size < st->chunk_size) {
|
||||||
print_maxverbose("Enabling sliding mmap mode and using mmap of %lld bytes with window of %lld bytes\n", st->mmap_size, st->chunk_size);
|
print_maxverbose("Enabling sliding mmap mode and using mmap of %lld bytes with window of %lld bytes\n", st->mmap_size, st->chunk_size);
|
||||||
|
get_sb = &sliding_get_sb;
|
||||||
|
do_mcpy = &sliding_mcpy;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
print_maxverbose("Succeeded in testing %lld sized mmap for rzip pre-processing\n", st->mmap_size);
|
print_maxverbose("Succeeded in testing %lld sized mmap for rzip pre-processing\n", st->mmap_size);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue