mirror of
https://github.com/ckolivas/lrzip.git
synced 2026-01-09 18:10:21 +01:00
Import libzapq files.
This commit is contained in:
parent
88e2f80736
commit
2f3f01958d
737
libzpaq501/libzpaq.3.pod
Normal file
737
libzpaq501/libzpaq.3.pod
Normal file
|
|
@ -0,0 +1,737 @@
|
|||
# Documentation for libzpaq
|
||||
#
|
||||
# Copyright (C) 2012, Dell Inc. Written by Matt Mahoney.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so without restriction.
|
||||
# This Software is provided "as is" without warranty.
|
||||
#
|
||||
# To create man page: pod2man libzpaq.3.pod > libzpaq.3
|
||||
# To create HTML documentation: pod2html libzpaq.3.pod > libzpaq.html
|
||||
|
||||
=pod
|
||||
|
||||
=head1 NAME
|
||||
|
||||
libzpaq - ZPAQ compression API
|
||||
|
||||
=head1 SYNOPSIS
|
||||
|
||||
#include "libzpaq.h"
|
||||
|
||||
namespace libzpaq {
|
||||
|
||||
extern void error(const char* msg);
|
||||
|
||||
class Reader {
|
||||
public:
|
||||
virtual int get() = 0;
|
||||
virtual int read(char* buf, int n); // optional
|
||||
virtual ~Reader() {}
|
||||
};
|
||||
|
||||
class Writer {
|
||||
public:
|
||||
virtual void put(int c) = 0;
|
||||
virtual void write(const char* buf, int n); // optional
|
||||
virtual ~Writer() {}
|
||||
};
|
||||
|
||||
class SHA1 {
|
||||
public:
|
||||
SHA1();
|
||||
void put(int c);
|
||||
double size() const;
|
||||
uint64_t usize() const
|
||||
const char* result();
|
||||
};
|
||||
|
||||
class Compressor {
|
||||
public:
|
||||
Compressor();
|
||||
void setOutput(Writer* out);
|
||||
void writeTag();
|
||||
void startBlock(int level);
|
||||
void startBlock(const char* hcomp);
|
||||
void startSegment(const char* filename = 0,
|
||||
const char* comment = 0);
|
||||
void setInput(Reader* i);
|
||||
void postProcess(const char* pcomp = 0, int length = 0);
|
||||
bool compress(int n = -1);
|
||||
void endSegment(const char* sha1string = 0);
|
||||
void endBlock();
|
||||
};
|
||||
|
||||
class Decompresser {
|
||||
public:
|
||||
Decompresser();
|
||||
void setInput(Reader* in);
|
||||
bool findBlock(double* memptr = 0);
|
||||
void hcomp(Writer* out);
|
||||
bool findFilename(Writer* = 0);
|
||||
void readComment(Writer* = 0);
|
||||
void setOutput(Writer* out);
|
||||
void setSHA1(SHA1* sha1ptr);
|
||||
bool decompress(int n = -1);
|
||||
bool pcomp(Writer* out);
|
||||
void readSegmentEnd(char* sha1string = 0);
|
||||
};
|
||||
|
||||
void compress(Reader* in, Writer* out, int level);
|
||||
|
||||
void decompress(Reader* in, Writer* out);
|
||||
}
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
I<libzpaq> is a C++ API for compressing or decompressing
|
||||
files or objects in memory comforming to the ZPAQ level 1 and 2 standards
|
||||
(see I<availability>). This document describes version 5.00
|
||||
of the software. The software may be used without
|
||||
restriction under a modified MIT license.
|
||||
|
||||
ZPAQ provides a high level of data compression in a streaming
|
||||
(single pass) self-describing format that supports single or multiple
|
||||
named objects (such as archives) with optional integrity checking.
|
||||
|
||||
The library provides 3 default compression levels but supports
|
||||
custom algorithms. The performance of the default levels is
|
||||
shown in the table below for the 14 file Calgary corpus as
|
||||
a tar file. Compression and decompression times are in seconds
|
||||
on a 2 GHz T3200 running on one of two cores. Memory required
|
||||
to compress or decompress is in MB. Some popular formats
|
||||
are shown for comparison.
|
||||
|
||||
Program Format Size Time (C, D) Memory
|
||||
----------- ------ --------- ----------- ------
|
||||
Uncompresed .tar 3,152,896
|
||||
compress .tar.Z 1,319,521 1.6 0.2 .1 MB
|
||||
gzip -9 .tar.gz 1,022,810 0.7 0.1 .1 MB
|
||||
bzip2 -9 .tar.bz2 860,097 0.6 0.4 5 MB
|
||||
7zip .tar.7z 824,573 1.5 0.1 195 MB
|
||||
zpaq 1 (fast) .tar.zpaq 806,959 2 2 38 MB
|
||||
zpaq 2 (mid) .tar.zpaq 699,191 8 8 112 MB
|
||||
zpaq 3 (max) .tar.zpaq 644,190 20 20 246 MB
|
||||
|
||||
A ZPAQ stream consists of one or more blocks, possibly mixed with
|
||||
other data, that can be decompressed independently in any order.
|
||||
Each block consists of one or more segments that must be decompressed
|
||||
in order from the beginning of the block. Each block header contains
|
||||
a description of the decompression algorithm. Each segment consists
|
||||
of an optional filename string, an optional comment string,
|
||||
self delimiting compressed data, and an optional SHA-1 checksum.
|
||||
If ZPAQ blocks are mixed with other data, they must be
|
||||
preceded by an identifying 13 byte tag which does not otherwise
|
||||
appear in that data.
|
||||
|
||||
ZPAQ compression is based on the PAQ context mixing model.
|
||||
An array of components predict the probability of the next bit
|
||||
of input, either independently or depending on the predictions
|
||||
of earlier components. The final prediction is arithmetic coded.
|
||||
Each component inputs a context computed from earlier input
|
||||
by a program written in ZPAQL byte code which runs on a virtual
|
||||
machine. Both the component array description and the ZPAQL
|
||||
code are encoded in a string called HCOMP in each block header.
|
||||
Data can also be stored uncompressed.
|
||||
|
||||
A block may optionally specify a post-processor, a program
|
||||
(also in ZPAQL) which takes the decoded data as input and
|
||||
outputs the decompressed output. This program, if present,
|
||||
is encoded as a string called PCOMP which is compressed
|
||||
in the first segment prior to the compressed data. The first
|
||||
decoded byte from the first segment is a flag indicating
|
||||
whether a PCOMP string is present. The user is responsible
|
||||
for correctly pre-processing the data so that post-processing
|
||||
restores the original data.
|
||||
|
||||
=head2 API Organization
|
||||
|
||||
The I<libzpaq> API consists of 2 files.
|
||||
|
||||
=over
|
||||
|
||||
=item libzpaq.h
|
||||
|
||||
Header file to include in your application.
|
||||
|
||||
=item libzpaq.cpp
|
||||
|
||||
Source code file to link to your application.
|
||||
|
||||
=back
|
||||
|
||||
An application would have the line C<#include "libzpaq.h"> and
|
||||
link to libzpaq.cpp.
|
||||
The API provides two classes, C<Compressor> and C<Decompresser>
|
||||
which write or read respectively each of the syntactic elements
|
||||
of a ZPAQ stream. The two functions C<compress()> and
|
||||
C<decompress()> provide simple interfaces for the most common
|
||||
uses. In either case, the user must create classes derived
|
||||
from the abstract base classes C<Reader> and C<Writer> and
|
||||
define methods C<get()> and C<put()> which the code
|
||||
will use to read and write bytes. The user must also define
|
||||
a callback error handler.
|
||||
|
||||
By default, libzpaq(3) uses just-in-time (JIT) acceleration
|
||||
by translating ZPAQL code to x86-32 or x86-64 internally
|
||||
and executing it. This feature can be disabled by compiling
|
||||
with -DNOJIT. If enabled, it requires an x86 processor
|
||||
capable of executing SSE2 instructions. SSE2 is supported
|
||||
by most Intel processors since 2001 and AMD since 2003.
|
||||
|
||||
Run time checks (assertions) can be enabled with -DDEBUG
|
||||
for debugging purposes.
|
||||
|
||||
All of the API code is contained in the namespace C<libzpaq>.
|
||||
|
||||
=head2 Callback Functions
|
||||
|
||||
The following three functions must be defined by the user.
|
||||
|
||||
=over
|
||||
|
||||
=item C<extern void libzpaq::error(const char* msg);>
|
||||
|
||||
This function must be defined by the user to handle errors
|
||||
from libzpaq. The library will call the function with
|
||||
an English language message passed to C<msg>. Errors may
|
||||
result from bad input during decompression, out of memory,
|
||||
or illegal arguments or calling sequences to libzpaq
|
||||
functions. Errors should be considered unrecoverable.
|
||||
|
||||
=item C<int libzpaq::Reader::get() = 0;>
|
||||
|
||||
The user must create a class derived from Reader with an
|
||||
implementation for C<get()> that reads one byte of input
|
||||
and returns its value in the range 0...255, or returns
|
||||
EOF (-1) at end of input. Objects of the derived type
|
||||
would then be passed to functions that require a C<Reader>.
|
||||
|
||||
=item C<void libzpaq::Writer::put(int c) = 0;>
|
||||
|
||||
The user must create a class derived from Writer with
|
||||
an implemenation of C<put()> which is expected to take
|
||||
a byte value C<c> in the range 0...255 and write it to
|
||||
output. Objects of the derived type
|
||||
would then be passed to functions that require a C<Writer>.
|
||||
|
||||
=back
|
||||
|
||||
The following two functions are optional. Defining them
|
||||
can improve performance slightly.
|
||||
|
||||
=over
|
||||
|
||||
=item C<virtual int read(char* buf, int n);>
|
||||
|
||||
If defined, this function should input up to C<n> bytes into
|
||||
the array C<buf> and return the number actually read, in
|
||||
the range 0..n. A return value of 0 indicates end of input.
|
||||
If C<read()> is not defined, then the default implementation
|
||||
will call C<get()> n times.
|
||||
|
||||
=item C<virtual void write(const char* buf, int n);>
|
||||
|
||||
If defined, this function should output the elements C<buf[0]>
|
||||
through C<buf[n-1]> in order. If not defined, then the default
|
||||
implementation will call C<put()> n times.
|
||||
|
||||
=back
|
||||
|
||||
=head2 Simple Compression
|
||||
|
||||
In the remainder of this document, all classes and
|
||||
functions are assumed to be in namespace C<libzpaq>.
|
||||
|
||||
=over
|
||||
|
||||
=item C<void compress(Reader* in, Writer* out, int mode);>
|
||||
|
||||
C<compress()> compresses from C<in> to C<out> until C<get()>
|
||||
returns EOF. It writes a single segment in a single block
|
||||
with empty filename, comment, and checksum fields. C<mode>
|
||||
must be 1, 2, or 3, to select models I<fast>, I<mid>, or
|
||||
I<max> respectively. Higher modes compress smaller but
|
||||
take longer to compress and subsequently decompress.
|
||||
|
||||
=item C<void decompress(Reader* in, Writer* out);>
|
||||
|
||||
C<decompress()> decompresses any valid ZPAQ stream from
|
||||
C<in> to C<out> until C<get()> returns EOF. Any
|
||||
non-ZPAQ data in the input is ignored. Any ZPAQ blocks
|
||||
following non-ZPAQ must be preceded by a marker tag
|
||||
to be recognized. Each block is decoded according to the
|
||||
instructions in the block header. The contents of the
|
||||
filename, comment, and checksum fields are ignored.
|
||||
Data with bad checksums will be decoded anyway. If there
|
||||
is more than one segment, then all of the output
|
||||
data will be concatenated.
|
||||
|
||||
=back
|
||||
|
||||
=head2 class SHA1
|
||||
|
||||
The SHA1 class is used to compute SHA-1 checksums for compression
|
||||
and verify them for decompression. It is believed to be
|
||||
computationally infeasible to find two different strings
|
||||
with the same hash value. Its member functions
|
||||
are as follows:
|
||||
|
||||
=over
|
||||
|
||||
=item C<SHA1();>
|
||||
|
||||
The constructor creates a new SHA1 object representing the
|
||||
hash of an empty string.
|
||||
|
||||
=item C<void put(int c);>
|
||||
|
||||
Appends one byte c (0...255) to the string whose hash is represented.
|
||||
|
||||
=item C<double size() const;>
|
||||
|
||||
Returns the length (so far) of the string whose hash is represented.
|
||||
The largest possible value returned is
|
||||
2^61 - 1 = 2305843009213693951.0, but values larger than 2^53 =
|
||||
9007199254740992.0
|
||||
will not be exact on systems using IEEE 64 bit floating point
|
||||
representation of type C<double>. The initial value is 0.0.
|
||||
|
||||
=item C<int64_t usize() const;>
|
||||
|
||||
Returns the length (so far) as a 64 bit unsigned integer.
|
||||
|
||||
=item C<const char* result();>
|
||||
|
||||
Computes the 20 byte SHA-1 hash and resets the string back
|
||||
to a size of 0.0. The returned pointer points to an array
|
||||
inside the SHA1 object whose
|
||||
contents remain unchanged until the next call to C<result()>.
|
||||
|
||||
=back
|
||||
|
||||
=head2 class Compressor
|
||||
|
||||
The C<Compressor> class has member functions to write
|
||||
each of the syntactic elements of a ZPAQ stream and to specify
|
||||
their values. It will compress using either built-in or
|
||||
user supplied models.
|
||||
|
||||
=over
|
||||
|
||||
=item C<Compressor();>
|
||||
|
||||
The constructor creates a Compression object. No input source,
|
||||
output destination, or compression model is specified.
|
||||
|
||||
=item C<void setOutput(Writer* out);>
|
||||
|
||||
Specifies a destination for output. Must be specified before
|
||||
calling any function that writes data.
|
||||
|
||||
=item C<void writeTag();>
|
||||
|
||||
Writes a 13 byte marker tag which can be used to identify
|
||||
the start of a block following non-ZPAQ data.
|
||||
|
||||
=item C<void startBlock(int level);>
|
||||
|
||||
Writes a block header and specifies a compression model.
|
||||
If linked with F<libzpaqo.cpp>, then C<level> must be 1, 2, or 3
|
||||
to specify I<fast>, I<mid>, or I<max> respectively. Higher numbers
|
||||
compress smaller but more slowly. These models are compatible
|
||||
with both the ZPAQ level 1 and 2 standards.
|
||||
|
||||
=item C<void startBlock(const char* hcomp);>
|
||||
|
||||
Writes a block header and specifies the HCOMP portion of the
|
||||
compression model. The first two bytes of the string should
|
||||
encode the length of the rest of the string as a 16 bit unsigned
|
||||
number with the least significant bit first. The meaning of the
|
||||
rest of the string is defined in the ZPAQ level 2 standard.
|
||||
If the number of components (C<hcomp[8]>) is 0, then the block
|
||||
is saved in ZPAQ level 2 format, which cannot be read by
|
||||
older ZPAQ level 1 decoders. Otherwise the block is saved in
|
||||
ZPAQ level 1 format, which is compatible with all decoders.
|
||||
|
||||
=item C<void startSegment(const char* filename = 0, const char* comment = 0);>
|
||||
|
||||
Writes a segment header. C<filename> and
|
||||
C<comment> are NUL terminated strings. If specified, then their
|
||||
values are stored. Normally, C<filename> would be a file name
|
||||
when compressing to an archive or omitted otherwise. If a file
|
||||
is split among segments, then by convention only the first segment
|
||||
is named. C<comment> is normally the uncompressed size as a decimal
|
||||
number which is displayed when listing the contents of an archive.
|
||||
Omitting it does not affect decompression.
|
||||
|
||||
=item C<void postProcess(const char* pcomp = 0, int length = 0);>
|
||||
|
||||
Specifies the optional PCOMP string used for post-processing.
|
||||
It must be called from within the first segment
|
||||
of each block prior to compressing any data, but not from within
|
||||
any other segment.
|
||||
If C<pcomp> is 0 or no argument is passed, then the decompresser
|
||||
will not post-process the data. The effect is to compress a
|
||||
0 byte to indicate to the decompresser that no PCOMP string
|
||||
is present.
|
||||
|
||||
If C<pcomp> is not 0, then I<length> bytes of the string I<pcomp>
|
||||
are passed. If I<length> is 0 or omitted, then
|
||||
the first two bytes must encode
|
||||
the length of the rest of the string as a 16 bit unsigned number
|
||||
with the least significant byte first. The format of the remainder
|
||||
of the string is described in the ZPAQ level 2 standard.
|
||||
The effect is to compress a 1 byte
|
||||
to indicate the presence of PCOMP, followed by the two length
|
||||
bytes and the string as passed. For example, either
|
||||
C<pcomp("\x02\x00\x05\x08")> or C<pcomp("\x05\x08", 2)>
|
||||
would compress the 5 bytes 1, 2, 0, 5, 8.
|
||||
The user is responsible for pre-processing the input
|
||||
prior to compression so that PCOMP restores the original data.
|
||||
|
||||
=item C<void setInput(Reader* in);>
|
||||
|
||||
Specifies the input source for compression. It must be set
|
||||
prior to the first call to C<compress()>.
|
||||
|
||||
=item C<bool compress(int n = -1);>
|
||||
|
||||
Compress n bytes of data, or until EOF is input, whichever comes
|
||||
first. If n < 0 or omitted, then compress until EOF.
|
||||
Returns true if there is more input available, or false if EOF
|
||||
was read.
|
||||
|
||||
=item C<void endSegment(const char* sha1string = 0);>
|
||||
|
||||
Stop compressing and write the end of a segment. If
|
||||
C<sha1string> is specified, it should be a 20 byte string
|
||||
as returned by C<SHA1::result()> on the input data for
|
||||
this segment I<before> pre-processing.
|
||||
|
||||
=item C<void endBlock();>
|
||||
|
||||
Finish writing the current block.
|
||||
|
||||
=back
|
||||
|
||||
In order to create a valid ZPAQ stream, the components must
|
||||
be written in the following order:
|
||||
|
||||
for each block do {
|
||||
if any non-ZPAQ data then {
|
||||
write non-ZPAQ data
|
||||
writeTag()
|
||||
}
|
||||
startBlock()
|
||||
for each segment do {
|
||||
startSegment()
|
||||
if first segment in block then {
|
||||
postProcess()
|
||||
}
|
||||
while (compress(n)) ;
|
||||
endSegment()
|
||||
}
|
||||
endBlock()
|
||||
}
|
||||
|
||||
=head2 class Decompresser
|
||||
|
||||
The class Decompresser has member functions to read each of the
|
||||
syntactic elements of a ZPAQ stream.
|
||||
|
||||
=over
|
||||
|
||||
=item C<Decompresser()>
|
||||
|
||||
The constructor creates a Decompresser object. No input source or
|
||||
output destination is specified.
|
||||
|
||||
=item C<void setInput(Reader* in);>
|
||||
|
||||
Specifies where the ZPAQ stream will be read from. Must be called
|
||||
before any function that reads the stream.
|
||||
|
||||
=item C<bool findBlock(double* memptr = 0);>
|
||||
|
||||
Scan the input to find the start of the next block. If a block
|
||||
does not start immediately, then the block must be preceded by
|
||||
a marker tag (written with C<Compressor::writeTag()>) or it will
|
||||
not be found. If C<memptr> is not 0, then write the approximate
|
||||
memory requirement (in bytes) to decompress to C<*memptr>). The
|
||||
memory will be allocated by the first call to C<decompress()>.
|
||||
It returns true if a block is found, or false if it reads to EOF
|
||||
without finding a block.
|
||||
|
||||
=item C<void hcomp(Writer* out);>
|
||||
|
||||
Write the HCOMP string of the current block to C<out>.
|
||||
It will be in a format suitable
|
||||
for passing to C<Compressor::startBlock()>. The first 2 bytes will
|
||||
encode the length of the rest of the string as a 16 bit unsigned
|
||||
integer with the least significant byte first. The format of the
|
||||
remainder of the string is described in the ZPAQ level 1
|
||||
specification.
|
||||
|
||||
=item C<bool findFilename(Writer* out = 0);>
|
||||
|
||||
Find the start of the next segment. If another segment is found
|
||||
within the current block then return true. If the end of the block
|
||||
is found first, then return false. If a segment is found, the
|
||||
filename field is not empty, and C<out>
|
||||
is not 0, then write the filename (without a terminating NUL byte)
|
||||
to C<out>.
|
||||
|
||||
=item C<void readComment(Writer* out = 0);>
|
||||
|
||||
Read or skip past the comment field following the filename field
|
||||
in the segment header. If C<out> is not 0 and the comment field is
|
||||
not empty, then write the comment
|
||||
(without a terminating NUL byte) to C<out>.
|
||||
|
||||
=item C<void setOutput(Writer* out);>
|
||||
|
||||
Specify the destination for decompression. It must be set before
|
||||
any data can be decompressed.
|
||||
|
||||
=item C<void setSHA1(SHA1* sha1ptr);>
|
||||
|
||||
Specify the address of a SHA1 object for computing the checksum
|
||||
of the decompressed data (after post-processing). As each byte C<c>
|
||||
is output, it is also passed to C<sha1ptr-E<gt>put(c)>. In order to
|
||||
compute the correct checksum, the SHA1 object should be in its
|
||||
initial state, either newly created, or by calling C<SHA1::result()>,
|
||||
before the first call to C<decompress()>. When the end of the segment
|
||||
is reached, the value returned by C<sha1ptr-E<gt>result()> should match
|
||||
the stored checksum, if any.
|
||||
|
||||
=item C<bool decompress(int n = -1);>
|
||||
|
||||
Decode n bytes or until the end of segment, whichever comes
|
||||
first. Return false if the end of segment is reached first. If
|
||||
n < 0 or not specified, then decompress to the end of segment
|
||||
and return false. C<n> is the number of bytes prior to post-processing.
|
||||
If the data is post-processed, then the size of the output may
|
||||
be different.
|
||||
|
||||
=item C<bool pcomp(Writer* out);>
|
||||
|
||||
Write the PCOMP string, if any, for the current block to C<out>.
|
||||
If there is no PCOMP string (no post-processor) then return false.
|
||||
Otherwise write the string to C<out> in a format suitable for
|
||||
passing to C<Compressor::postProcess()> and return true. If written,
|
||||
then the first 2 bytes will encode the length of the rest of the
|
||||
string as a 16 bit unsigned integer with the least significant
|
||||
bit first. The format of the rest of the string is descibed in
|
||||
the ZPAQ level 1 standard.
|
||||
|
||||
C<pcomp()> is only valid after the first call to C<decompress()>
|
||||
in the current block. To read the PCOMP string without decompressing any
|
||||
data, then call C<decompress(0)> first. It is not necessary to
|
||||
call C<setOutput()> in this case.
|
||||
|
||||
=item C<void readSegmentEnd(char* sha1string = 0);>
|
||||
|
||||
Skip any compressed data in the current segment that has not yet
|
||||
been decompressed and advance to the end of the segment.
|
||||
Then if C<sha1string> is not 0 then write into
|
||||
the 21 byte array that it points to. If a checksum is present,
|
||||
then write a 1 into C<sha1string[0]> and write the stored checksum
|
||||
in C<sha1string[1...20]>. Otherwise write a 0 in C<sha1string[0]>.
|
||||
|
||||
Note that it is not permitted to call decompress() if any compressed
|
||||
data has been skipped in any earlier segments in the same block.
|
||||
|
||||
=back
|
||||
|
||||
A valid sequence of calls is as follows:
|
||||
|
||||
while (findBlock()) {
|
||||
while (findFilename()) {
|
||||
readComment();
|
||||
if first segment in block then { (optional)
|
||||
decompress(0)
|
||||
pcomp()
|
||||
}
|
||||
while (decompress(n)) ; (optional)
|
||||
readSegmentEnd();
|
||||
}
|
||||
}
|
||||
|
||||
=head1 EXAMPLES
|
||||
|
||||
The following program F<listzpaq.cpp>
|
||||
lists the contents of a ZPAQ archive
|
||||
read from standard input.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "libzpaq.h"
|
||||
|
||||
// Implement Reader and Writer interfaces for file I/O
|
||||
class File: public libzpaq::Reader, public libzpaq::Writer {
|
||||
FILE* f;
|
||||
public:
|
||||
File(FILE* f_): f(f_) {}
|
||||
int get() {return getc(f);}
|
||||
void put(int c) {putc(c, f);}
|
||||
int read(char* buf, int n) {return fread(buf, 1, n, f);}
|
||||
void write(const char* buf, int n) {fwrite(buf, 1, n, f);}
|
||||
};
|
||||
|
||||
// Implement error handler
|
||||
namespace libzpaq {
|
||||
void error(const char* msg) {
|
||||
fprintf(stderr, "Error: %s\n", msg);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// List the contents of an archive. For each block, show
|
||||
// the memory required to decompress. For each segment,
|
||||
// show the filename and comment.
|
||||
void list(FILE* input, FILE* output) {
|
||||
libzpaq::Decompresser d;
|
||||
File in(input), out(output);
|
||||
double memory;
|
||||
d.setInput(&in);
|
||||
for (int block=1; d.findBlock(&memory); ++block) {
|
||||
printf("Block %d needs %1.0f MB\n", block, memory/1e6);
|
||||
while (d.findFilename(&out)) { // print filename
|
||||
printf("\t");
|
||||
d.readComment(&out); // print comment
|
||||
printf("\n");
|
||||
d.readSegmentEnd(); // skip compressed data
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
list(stdin, stdout);
|
||||
return 0;
|
||||
}
|
||||
|
||||
The program could be compiled as follows:
|
||||
|
||||
g++ listzpaq.cpp libzpaq.cpp
|
||||
|
||||
The following code compresses a list of files into one block
|
||||
written to stdout. Each file is compressed to a separate
|
||||
segment. For each segment, the filename, comment, and SHA-1
|
||||
checksum are stored. The comment, as conventional, is the
|
||||
file size as a decimal string.
|
||||
|
||||
// Compress one file to one segment
|
||||
void compress_file(libzpaq::Compressor& c,
|
||||
const char* filename,
|
||||
bool first_segment) {
|
||||
|
||||
// Open input file
|
||||
FILE* f;
|
||||
f=fopen(filename, "rb");
|
||||
if (!f) return;
|
||||
|
||||
// Compute SHA-1 checksum and file size
|
||||
libzpaq::SHA1 sha1;
|
||||
int ch;
|
||||
while ((ch=getc(f))!=EOF)
|
||||
sha1.put(ch);
|
||||
|
||||
// Write file size as a comment.
|
||||
// The size can have at most 19 digits.
|
||||
char comment[20];
|
||||
sprintf(comment, "%1.0f", sha1.size());
|
||||
|
||||
// Compress segment
|
||||
rewind(f);
|
||||
File in(f);
|
||||
c.startSegment(filename, comment);
|
||||
if (first_segment)
|
||||
c.postProcess();
|
||||
c.setInput(&in);
|
||||
c.compress();
|
||||
c.endSegment(sha1.result());
|
||||
|
||||
// Close input file
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
// Compress a list of argc files in argv[0...argc-1] into one
|
||||
// ZPAQ block to stdout at level 2.
|
||||
void compress_list(int argc, char** argv) {
|
||||
libzpaq::Compressor c;
|
||||
File out(stdout);
|
||||
c.setOutput(&out);
|
||||
c.startBlock(2);
|
||||
for (int i=0; i<argc; ++i)
|
||||
compress_file(c, argv[i], i==0);
|
||||
c.endBlock();
|
||||
}
|
||||
|
||||
The following function decompresses from stdin to stdout.
|
||||
Filenames and comments are ignored, but checksums are verified
|
||||
if present.
|
||||
|
||||
void decompress() {
|
||||
libzpaq::Decompresser d;
|
||||
File in(stdin), out(stdout);
|
||||
d.setInput(&in);
|
||||
while (d.findBlock()) {
|
||||
while (d.findFilename()) {
|
||||
d.readComment();
|
||||
libzpaq::SHA1 sha1;
|
||||
d.setSHA1(&sha1);
|
||||
d.setOutput(&out);
|
||||
d.decompress();
|
||||
char sha1string[21];
|
||||
d.readSegmentEnd(sha1string);
|
||||
const char* sha1result = sha1.result();
|
||||
if (sha1string[0]==1
|
||||
&& memcmp(sha1string+1, sha1result, 20))
|
||||
libzpaq::error("checksum verify error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
C<Compressor::compress()> and C<Decompresser::decompress()> can
|
||||
be passed an argument n to display progress every n bytes,
|
||||
for example:
|
||||
|
||||
for (int i=1; d.decompress(1000000); ++i)
|
||||
fprintf(stderr, "Decompressed %d MB\n", i);
|
||||
|
||||
To compress or decompress to and from objects in memory, derive
|
||||
appropriate classes from C<Reader> and C<Writer>. For example, it is
|
||||
possible to compress or decompress to a C<std::string> using
|
||||
the following class.
|
||||
|
||||
struct String: public libzpaq::Writer {
|
||||
std::string s;
|
||||
void put(int c) {s+=char(c);}
|
||||
};
|
||||
|
||||
This class is also useful for reading the filename and comment
|
||||
fields during decompression as follows:
|
||||
|
||||
String filename, comment;
|
||||
while (d.findFilename(&filename)) {
|
||||
d.readComment(&comment);
|
||||
// ...
|
||||
|
||||
=head1 AVAILABILITY
|
||||
|
||||
I<libzpaq>, I<zpaq>, and the ZPAQ level 1 and 2 specifications are
|
||||
available from L<http://mattmahoney.net/zpaq/>.
|
||||
|
||||
=head1 SEE ALSO
|
||||
|
||||
C<zpaq(1)>
|
||||
C<sha1(1SSL)>
|
||||
|
||||
=cut
|
||||
|
||||
|
||||
3181
libzpaq501/libzpaq.cpp
Normal file
3181
libzpaq501/libzpaq.cpp
Normal file
File diff suppressed because it is too large
Load diff
441
libzpaq501/libzpaq.h
Normal file
441
libzpaq501/libzpaq.h
Normal file
|
|
@ -0,0 +1,441 @@
|
|||
/* libzpaq.h - LIBZPAQ Version 5.00.
|
||||
|
||||
Copyright (C) 2011, Dell Inc. Written by Matt Mahoney.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so without restriction.
|
||||
This Software is provided "as is" without warranty.
|
||||
|
||||
LIBZPAQ is a C++ library for compression and decompression of data
|
||||
conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/
|
||||
|
||||
By default, LIBZPAQ uses JIT (just in time) acceleration. This only
|
||||
works on x86-32 and x86-64 processors that support the SSE2 instruction
|
||||
set. To disable JIT, compile with -DNOJIT. To enable run time checks,
|
||||
compile with -DDEBUG. Both options will decrease speed.
|
||||
|
||||
The decompression code, when compiled with -DDEBUG and -DNOJIT,
|
||||
comprises the reference decoder for the ZPAQ level 2 standard.
|
||||
*/
|
||||
|
||||
#ifndef LIBZPAQ_H
|
||||
#define LIBZPAQ_H
|
||||
|
||||
#ifndef DEBUG
|
||||
#define NDEBUG 1
|
||||
#endif
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace libzpaq {
|
||||
|
||||
// 1, 2, 4, 8 byte unsigned integers
|
||||
typedef uint8_t U8;
|
||||
typedef uint16_t U16;
|
||||
typedef uint32_t U32;
|
||||
typedef uint64_t U64;
|
||||
|
||||
// Standard library prototypes redirected to libzpaq.cpp
|
||||
void* calloc(size_t, size_t);
|
||||
void free(void*);
|
||||
|
||||
// Callback for error handling
|
||||
extern void error(const char* msg);
|
||||
|
||||
// Virtual base classes for input and output
|
||||
// get() and put() must be overridden to read or write 1 byte.
|
||||
// read() and write() may be overridden to read or write n bytes more
|
||||
// efficiently than calling get() or put() n times.
|
||||
class Reader {
|
||||
public:
|
||||
virtual int get() = 0; // should return 0..255, or -1 at EOF
|
||||
virtual int read(char* buf, int n); // read to buf[n], return no. read
|
||||
virtual ~Reader() {}
|
||||
};
|
||||
|
||||
class Writer {
|
||||
public:
|
||||
virtual void put(int c) = 0; // should output low 8 bits of c
|
||||
virtual void write(const char* buf, int n); // write buf[n]
|
||||
virtual ~Writer() {}
|
||||
};
|
||||
|
||||
// Read 16 bit little-endian number
|
||||
int toU16(const char* p);
|
||||
|
||||
// An Array of T is cleared and aligned on a 64 byte address
|
||||
// with no constructors called. No copy or assignment.
|
||||
// Array<T> a(n, ex=0); - creates n<<ex elements of type T
|
||||
// a[i] - index
|
||||
// a(i) - index mod n, n must be a power of 2
|
||||
// a.size() - gets n
|
||||
template <typename T>
|
||||
class Array {
|
||||
T *data; // user location of [0] on a 64 byte boundary
|
||||
size_t n; // user size
|
||||
int offset; // distance back in bytes to start of actual allocation
|
||||
void operator=(const Array&); // no assignment
|
||||
Array(const Array&); // no copy
|
||||
public:
|
||||
Array(size_t sz=0, int ex=0): data(0), n(0), offset(0) {
|
||||
resize(sz, ex);} // [0..sz-1] = 0
|
||||
void resize(size_t sz, int ex=0); // change size, erase content to zeros
|
||||
~Array() {resize(0);} // free memory
|
||||
size_t size() const {return n;} // get size
|
||||
int isize() const {return int(n);} // get size as an int
|
||||
T& operator[](size_t i) {assert(n>0 && i<n); return data[i];}
|
||||
T& operator()(size_t i) {assert(n>0 && (n&(n-1))==0); return data[i&(n-1)];}
|
||||
};
|
||||
|
||||
// Change size to sz<<ex elements of 0
|
||||
template<typename T>
|
||||
void Array<T>::resize(size_t sz, int ex) {
|
||||
assert(size_t(-1)>0); // unsigned type?
|
||||
while (ex>0) {
|
||||
if (sz>sz*2) error("Array too big");
|
||||
sz*=2, --ex;
|
||||
}
|
||||
if (n>0) {
|
||||
assert(offset>0 && offset<=64);
|
||||
assert((char*)data-offset);
|
||||
free((char*)data-offset);
|
||||
}
|
||||
n=0;
|
||||
if (sz==0) return;
|
||||
n=sz;
|
||||
const size_t nb=128+n*sizeof(T); // test for overflow
|
||||
if (nb<=128 || (nb-128)/sizeof(T)!=n) error("Array too big");
|
||||
data=(T*)calloc(nb, 1);
|
||||
if (!data) error("Out of memory");
|
||||
offset=64-(((char*)data-(char*)0)&63);
|
||||
assert(offset>0 && offset<=64);
|
||||
data=(T*)((char*)data+offset);
|
||||
}
|
||||
|
||||
//////////////////////////// SHA1 ////////////////////////////
|
||||
|
||||
// For computing SHA-1 checksums
|
||||
class SHA1 {
|
||||
public:
|
||||
void put(int c) { // hash 1 byte
|
||||
U32& r=w[len0>>5&15];
|
||||
r=(r<<8)|(c&255);
|
||||
if (!(len0+=8)) ++len1;
|
||||
if ((len0&511)==0) process();
|
||||
}
|
||||
double size() const {return len0/8+len1*536870912.0;} // size in bytes
|
||||
uint64_t usize() const {return len0/8+(U64(len1)<<29);} // size in bytes
|
||||
const char* result(); // get hash and reset
|
||||
SHA1() {init();}
|
||||
private:
|
||||
void init(); // reset, but don't clear hbuf
|
||||
U32 len0, len1; // length in bits (low, high)
|
||||
U32 h[5]; // hash state
|
||||
U32 w[80]; // input buffer
|
||||
char hbuf[20]; // result
|
||||
void process(); // hash 1 block
|
||||
};
|
||||
|
||||
//////////////////////////// ZPAQL ///////////////////////////
|
||||
|
||||
// Symbolic constants, instruction size, and names
|
||||
typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType;
|
||||
extern const int compsize[256];
|
||||
|
||||
// A ZPAQL machine COMP+HCOMP or PCOMP.
|
||||
class ZPAQL {
|
||||
public:
|
||||
ZPAQL();
|
||||
~ZPAQL();
|
||||
void clear(); // Free memory, erase program, reset machine state
|
||||
void inith(); // Initialize as HCOMP to run
|
||||
void initp(); // Initialize as PCOMP to run
|
||||
double memory(); // Return memory requirement in bytes
|
||||
void run(U32 input); // Execute with input
|
||||
int read(Reader* in2); // Read header
|
||||
bool write(Writer* out2, bool pp); // If pp write PCOMP else HCOMP header
|
||||
int step(U32 input, int mode); // Trace execution (defined externally)
|
||||
|
||||
Writer* output; // Destination for OUT instruction, or 0 to suppress
|
||||
SHA1* sha1; // Points to checksum computer
|
||||
U32 H(int i) {return h(i);} // get element of h
|
||||
|
||||
void flush(); // write outbuf[0..bufptr-1] to output and sha1
|
||||
void outc(int c) { // output byte c (0..255) or -1 at EOS
|
||||
if (c<0 || (outbuf[bufptr]=c, ++bufptr==outbuf.isize())) flush();
|
||||
}
|
||||
|
||||
// ZPAQ1 block header
|
||||
Array<U8> header; // hsize[2] hh hm ph pm n COMP (guard) HCOMP (guard)
|
||||
int cend; // COMP in header[7...cend-1]
|
||||
int hbegin, hend; // HCOMP/PCOMP in header[hbegin...hend-1]
|
||||
|
||||
private:
|
||||
// Machine state for executing HCOMP
|
||||
Array<U8> m; // memory array M for HCOMP
|
||||
Array<U32> h; // hash array H for HCOMP
|
||||
Array<U32> r; // 256 element register array
|
||||
Array<char> outbuf; // output buffer
|
||||
int bufptr; // number of bytes in outbuf
|
||||
U32 a, b, c, d; // machine registers
|
||||
int f; // condition flag
|
||||
int pc; // program counter
|
||||
int rcode_size; // length of rcode
|
||||
U8* rcode; // JIT code for run()
|
||||
|
||||
// Support code
|
||||
int assemble(); // put JIT code in rcode
|
||||
void init(int hbits, int mbits); // initialize H and M sizes
|
||||
int execute(); // execute 1 instruction, return 0 after HALT, else 1
|
||||
void run0(U32 input); // default run() when select==0
|
||||
void div(U32 x) {if (x) a/=x; else a=0;}
|
||||
void mod(U32 x) {if (x) a%=x; else a=0;}
|
||||
void swap(U32& x) {a^=x; x^=a; a^=x;}
|
||||
void swap(U8& x) {a^=x; x^=a; a^=x;}
|
||||
void err(); // exit with run time error
|
||||
};
|
||||
|
||||
///////////////////////// Component //////////////////////////
|
||||
|
||||
// A Component is a context model, indirect context model, match model,
|
||||
// fixed weight mixer, adaptive 2 input mixer without or with current
|
||||
// partial byte as context, adaptive m input mixer (without or with),
|
||||
// or SSE (without or with).
|
||||
|
||||
struct Component {
|
||||
size_t limit; // max count for cm
|
||||
size_t cxt; // saved context
|
||||
size_t a, b, c; // multi-purpose variables
|
||||
Array<U32> cm; // cm[cxt] -> p in bits 31..10, n in 9..0; MATCH index
|
||||
Array<U8> ht; // ICM/ISSE hash table[0..size1][0..15] and MATCH buf
|
||||
Array<U16> a16; // MIX weights
|
||||
void init(); // initialize to all 0
|
||||
Component() {init();}
|
||||
};
|
||||
|
||||
////////////////////////// StateTable ////////////////////////
|
||||
|
||||
// Next state table generator
|
||||
class StateTable {
|
||||
enum {N=64}; // sizes of b, t
|
||||
int num_states(int n0, int n1); // compute t[n0][n1][1]
|
||||
void discount(int& n0); // set new value of n0 after 1 or n1 after 0
|
||||
void next_state(int& n0, int& n1, int y); // new (n0,n1) after bit y
|
||||
public:
|
||||
U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1
|
||||
int next(int state, int y) { // next state for bit y
|
||||
assert(state>=0 && state<256);
|
||||
assert(y>=0 && y<4);
|
||||
return ns[state*4+y];
|
||||
}
|
||||
int cminit(int state) { // initial probability of 1 * 2^23
|
||||
assert(state>=0 && state<256);
|
||||
return ((ns[state*4+3]*2+1)<<22)/(ns[state*4+2]+ns[state*4+3]+1);
|
||||
}
|
||||
StateTable();
|
||||
};
|
||||
|
||||
///////////////////////// Predictor //////////////////////////
|
||||
|
||||
// A predictor guesses the next bit
|
||||
class Predictor {
|
||||
public:
|
||||
Predictor(ZPAQL&);
|
||||
~Predictor();
|
||||
void init(); // build model
|
||||
int predict(); // probability that next bit is a 1 (0..4095)
|
||||
void update(int y); // train on bit y (0..1)
|
||||
int stat(int); // Defined externally
|
||||
bool isModeled() { // n>0 components?
|
||||
assert(z.header.isize()>6);
|
||||
return z.header[6]!=0;
|
||||
}
|
||||
private:
|
||||
|
||||
// Predictor state
|
||||
int c8; // last 0...7 bits.
|
||||
int hmap4; // c8 split into nibbles
|
||||
int p[256]; // predictions
|
||||
U32 h[256]; // unrolled copy of z.h
|
||||
ZPAQL& z; // VM to compute context hashes, includes H, n
|
||||
Component comp[256]; // the model, includes P
|
||||
|
||||
// Modeling support functions
|
||||
int predict0(); // default
|
||||
void update0(int y); // default
|
||||
int dt2k[256]; // division table for match: dt2k[i] = 2^12/i
|
||||
int dt[1024]; // division table for cm: dt[i] = 2^16/(i+1.5)
|
||||
U16 squasht[4096]; // squash() lookup table
|
||||
short stretcht[32768];// stretch() lookup table
|
||||
StateTable st; // next, cminit functions
|
||||
U8* pcode; // JIT code for predict() and update()
|
||||
int pcode_size; // length of pcode
|
||||
|
||||
// reduce prediction error in cr.cm
|
||||
void train(Component& cr, int y) {
|
||||
assert(y==0 || y==1);
|
||||
U32& pn=cr.cm(cr.cxt);
|
||||
U32 count=pn&0x3ff;
|
||||
int error=y*32767-(cr.cm(cr.cxt)>>17);
|
||||
pn+=(error*dt[count]&-1024)+(count<cr.limit);
|
||||
}
|
||||
|
||||
// x -> floor(32768/(1+exp(-x/64)))
|
||||
int squash(int x) {
|
||||
assert(x>=-2048 && x<=2047);
|
||||
return squasht[x+2048];
|
||||
}
|
||||
|
||||
// x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash
|
||||
int stretch(int x) {
|
||||
assert(x>=0 && x<=32767);
|
||||
return stretcht[x];
|
||||
}
|
||||
|
||||
// bound x to a 12 bit signed int
|
||||
int clamp2k(int x) {
|
||||
if (x<-2048) return -2048;
|
||||
else if (x>2047) return 2047;
|
||||
else return x;
|
||||
}
|
||||
|
||||
// bound x to a 20 bit signed int
|
||||
int clamp512k(int x) {
|
||||
if (x<-(1<<19)) return -(1<<19);
|
||||
else if (x>=(1<<19)) return (1<<19)-1;
|
||||
else return x;
|
||||
}
|
||||
|
||||
// Get cxt in ht, creating a new row if needed
|
||||
size_t find(Array<U8>& ht, int sizebits, U32 cxt);
|
||||
|
||||
// Put JIT code in pcode
|
||||
int assemble_p();
|
||||
};
|
||||
|
||||
//////////////////////////// Decoder /////////////////////////
|
||||
|
||||
// Decoder decompresses using an arithmetic code
|
||||
class Decoder {
|
||||
public:
|
||||
Reader* in; // destination
|
||||
Decoder(ZPAQL& z);
|
||||
int decompress(); // return a byte or EOF
|
||||
int skip(); // skip to the end of the segment, return next byte
|
||||
void init(); // initialize at start of block
|
||||
int stat(int x) {return pr.stat(x);}
|
||||
private:
|
||||
U32 low, high; // range
|
||||
U32 curr; // last 4 bytes of archive
|
||||
Predictor pr; // to get p
|
||||
enum {BUFSIZE=1<<16};
|
||||
Array<char> buf; // input buffer of size BUFSIZE bytes
|
||||
// of unmodeled data. buf[low..high-1] is input with curr
|
||||
// remaining in sub-block.
|
||||
int decode(int p); // return decoded bit (0..1) with prob. p (0..65535)
|
||||
void loadbuf(); // read unmodeled data into buf to EOS
|
||||
};
|
||||
|
||||
/////////////////////////// PostProcessor ////////////////////
|
||||
|
||||
class PostProcessor {
|
||||
int state; // input parse state: 0=INIT, 1=PASS, 2..4=loading, 5=POST
|
||||
int hsize; // header size
|
||||
int ph, pm; // sizes of H and M in z
|
||||
public:
|
||||
ZPAQL z; // holds PCOMP
|
||||
PostProcessor(): state(0), hsize(0), ph(0), pm(0) {}
|
||||
void init(int h, int m); // ph, pm sizes of H and M
|
||||
int write(int c); // Input a byte, return state
|
||||
int getState() const {return state;}
|
||||
void setOutput(Writer* out) {z.output=out;}
|
||||
void setSHA1(SHA1* sha1ptr) {z.sha1=sha1ptr;}
|
||||
};
|
||||
|
||||
//////////////////////// Decompresser ////////////////////////
|
||||
|
||||
// For decompression and listing archive contents
|
||||
class Decompresser {
|
||||
public:
|
||||
Decompresser(): z(), dec(z), pp(), state(BLOCK), decode_state(FIRSTSEG) {}
|
||||
void setInput(Reader* in) {dec.in=in;}
|
||||
bool findBlock(double* memptr = 0);
|
||||
void hcomp(Writer* out2) {z.write(out2, false);}
|
||||
bool findFilename(Writer* = 0);
|
||||
void readComment(Writer* = 0);
|
||||
void setOutput(Writer* out) {pp.setOutput(out);}
|
||||
void setSHA1(SHA1* sha1ptr) {pp.setSHA1(sha1ptr);}
|
||||
bool decompress(int n = -1); // n bytes, -1=all, return true until done
|
||||
bool pcomp(Writer* out2) {return pp.z.write(out2, true);}
|
||||
void readSegmentEnd(char* sha1string = 0);
|
||||
int stat(int x) {return dec.stat(x);}
|
||||
private:
|
||||
ZPAQL z;
|
||||
Decoder dec;
|
||||
PostProcessor pp;
|
||||
enum {BLOCK, FILENAME, COMMENT, DATA, SEGEND} state; // expected next
|
||||
enum {FIRSTSEG, SEG, SKIP} decode_state; // which segment in block?
|
||||
};
|
||||
|
||||
/////////////////////////// decompress() /////////////////////
|
||||
|
||||
void decompress(Reader* in, Writer* out);
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////
|
||||
|
||||
// Code following this point is not a part of the ZPAQ level 2 standard.
|
||||
|
||||
//////////////////////////// Encoder /////////////////////////
|
||||
|
||||
// Encoder compresses using an arithmetic code
|
||||
class Encoder {
|
||||
public:
|
||||
Encoder(ZPAQL& z, int size=0):
|
||||
out(0), low(1), high(0xFFFFFFFF), pr(z) {}
|
||||
void init();
|
||||
void compress(int c); // c is 0..255 or EOF
|
||||
int stat(int x) {return pr.stat(x);}
|
||||
Writer* out; // destination
|
||||
private:
|
||||
U32 low, high; // range
|
||||
Predictor pr; // to get p
|
||||
Array<char> buf; // unmodeled input
|
||||
void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535)
|
||||
};
|
||||
|
||||
//////////////////////// Compressor //////////////////////////
|
||||
|
||||
class Compressor {
|
||||
public:
|
||||
Compressor(): enc(z), in(0), state(INIT) {}
|
||||
void setOutput(Writer* out) {enc.out=out;}
|
||||
void writeTag();
|
||||
void startBlock(int level); // level=1,2,3
|
||||
void startBlock(const char* hcomp);
|
||||
void startSegment(const char* filename = 0, const char* comment = 0);
|
||||
void setInput(Reader* i) {in=i;}
|
||||
void postProcess(const char* pcomp = 0, int len = 0);
|
||||
bool compress(int n = -1); // n bytes, -1=all, return true until done
|
||||
void endSegment(const char* sha1string = 0);
|
||||
void endBlock();
|
||||
int stat(int x) {return enc.stat(x);}
|
||||
private:
|
||||
ZPAQL z;
|
||||
Encoder enc;
|
||||
Reader* in;
|
||||
enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state;
|
||||
};
|
||||
|
||||
/////////////////////////// compress() ///////////////////////
|
||||
|
||||
void compress(Reader* in, Writer* out, int level);
|
||||
|
||||
} // namespace libzpaq
|
||||
|
||||
#endif // LIBZPAQ_H
|
||||
Loading…
Reference in a new issue