2015-03-10 08:23:00 +01:00
|
|
|
/* libzpaq.h - LIBZPAQ Version 5.00.
|
|
|
|
|
|
|
|
|
|
Copyright (C) 2011, Dell Inc. Written by Matt Mahoney.
|
|
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
furnished to do so without restriction.
|
|
|
|
|
This Software is provided "as is" without warranty.
|
|
|
|
|
|
|
|
|
|
LIBZPAQ is a C++ library for compression and decompression of data
|
|
|
|
|
conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/
|
|
|
|
|
|
|
|
|
|
By default, LIBZPAQ uses JIT (just in time) acceleration. This only
|
|
|
|
|
works on x86-32 and x86-64 processors that support the SSE2 instruction
|
|
|
|
|
set. To disable JIT, compile with -DNOJIT. To enable run time checks,
|
|
|
|
|
compile with -DDEBUG. Both options will decrease speed.
|
|
|
|
|
|
|
|
|
|
The decompression code, when compiled with -DDEBUG and -DNOJIT,
|
|
|
|
|
comprises the reference decoder for the ZPAQ level 2 standard.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef LIBZPAQ_H
|
|
|
|
|
#define LIBZPAQ_H
|
|
|
|
|
|
|
|
|
|
#ifndef DEBUG
|
|
|
|
|
#define NDEBUG 1
|
|
|
|
|
#endif
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
|
|
namespace libzpaq {
|
|
|
|
|
|
|
|
|
|
// 1, 2, 4, 8 byte unsigned integers
|
|
|
|
|
typedef uint8_t U8;
|
|
|
|
|
typedef uint16_t U16;
|
|
|
|
|
typedef uint32_t U32;
|
|
|
|
|
typedef uint64_t U64;
|
|
|
|
|
|
|
|
|
|
// Standard library prototypes redirected to libzpaq.cpp
|
|
|
|
|
void* calloc(size_t, size_t);
|
|
|
|
|
void free(void*);
|
|
|
|
|
|
|
|
|
|
// Callback for error handling
|
|
|
|
|
extern void error(const char* msg);
|
|
|
|
|
|
|
|
|
|
// Virtual base classes for input and output
|
|
|
|
|
// get() and put() must be overridden to read or write 1 byte.
|
|
|
|
|
// read() and write() may be overridden to read or write n bytes more
|
|
|
|
|
// efficiently than calling get() or put() n times.
|
|
|
|
|
class Reader {
|
|
|
|
|
public:
|
|
|
|
|
virtual int get() = 0; // should return 0..255, or -1 at EOF
|
|
|
|
|
virtual int read(char* buf, int n); // read to buf[n], return no. read
|
|
|
|
|
virtual ~Reader() {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
class Writer {
|
|
|
|
|
public:
|
|
|
|
|
virtual void put(int c) = 0; // should output low 8 bits of c
|
|
|
|
|
virtual void write(const char* buf, int n); // write buf[n]
|
|
|
|
|
virtual ~Writer() {}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Read 16 bit little-endian number
|
|
|
|
|
int toU16(const char* p);
|
|
|
|
|
|
|
|
|
|
// An Array of T is cleared and aligned on a 64 byte address
|
|
|
|
|
// with no constructors called. No copy or assignment.
|
|
|
|
|
// Array<T> a(n, ex=0); - creates n<<ex elements of type T
|
|
|
|
|
// a[i] - index
|
|
|
|
|
// a(i) - index mod n, n must be a power of 2
|
|
|
|
|
// a.size() - gets n
|
|
|
|
|
template <typename T>
|
|
|
|
|
class Array {
|
|
|
|
|
T *data; // user location of [0] on a 64 byte boundary
|
|
|
|
|
size_t n; // user size
|
|
|
|
|
int offset; // distance back in bytes to start of actual allocation
|
|
|
|
|
void operator=(const Array&); // no assignment
|
|
|
|
|
Array(const Array&); // no copy
|
|
|
|
|
public:
|
|
|
|
|
Array(size_t sz=0, int ex=0): data(0), n(0), offset(0) {
|
|
|
|
|
resize(sz, ex);} // [0..sz-1] = 0
|
|
|
|
|
void resize(size_t sz, int ex=0); // change size, erase content to zeros
|
|
|
|
|
~Array() {resize(0);} // free memory
|
|
|
|
|
size_t size() const {return n;} // get size
|
|
|
|
|
int isize() const {return int(n);} // get size as an int
|
|
|
|
|
T& operator[](size_t i) {assert(n>0 && i<n); return data[i];}
|
|
|
|
|
T& operator()(size_t i) {assert(n>0 && (n&(n-1))==0); return data[i&(n-1)];}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Change size to sz<<ex elements of 0
|
|
|
|
|
template<typename T>
|
|
|
|
|
void Array<T>::resize(size_t sz, int ex) {
|
|
|
|
|
assert(size_t(-1)>0); // unsigned type?
|
|
|
|
|
while (ex>0) {
|
|
|
|
|
if (sz>sz*2) error("Array too big");
|
|
|
|
|
sz*=2, --ex;
|
|
|
|
|
}
|
|
|
|
|
if (n>0) {
|
|
|
|
|
assert(offset>0 && offset<=64);
|
|
|
|
|
assert((char*)data-offset);
|
|
|
|
|
free((char*)data-offset);
|
|
|
|
|
}
|
|
|
|
|
n=0;
|
|
|
|
|
if (sz==0) return;
|
|
|
|
|
n=sz;
|
|
|
|
|
const size_t nb=128+n*sizeof(T); // test for overflow
|
|
|
|
|
if (nb<=128 || (nb-128)/sizeof(T)!=n) error("Array too big");
|
|
|
|
|
data=(T*)calloc(nb, 1);
|
|
|
|
|
if (!data) error("Out of memory");
|
|
|
|
|
offset=64-(((char*)data-(char*)0)&63);
|
|
|
|
|
assert(offset>0 && offset<=64);
|
|
|
|
|
data=(T*)((char*)data+offset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//////////////////////////// SHA1 ////////////////////////////
|
|
|
|
|
|
|
|
|
|
// For computing SHA-1 checksums
|
|
|
|
|
class SHA1 {
|
|
|
|
|
public:
|
|
|
|
|
void put(int c) { // hash 1 byte
|
|
|
|
|
U32& r=w[len0>>5&15];
|
|
|
|
|
r=(r<<8)|(c&255);
|
|
|
|
|
if (!(len0+=8)) ++len1;
|
|
|
|
|
if ((len0&511)==0) process();
|
|
|
|
|
}
|
|
|
|
|
double size() const {return len0/8+len1*536870912.0;} // size in bytes
|
|
|
|
|
uint64_t usize() const {return len0/8+(U64(len1)<<29);} // size in bytes
|
|
|
|
|
const char* result(); // get hash and reset
|
|
|
|
|
SHA1() {init();}
|
|
|
|
|
private:
|
|
|
|
|
void init(); // reset, but don't clear hbuf
|
|
|
|
|
U32 len0, len1; // length in bits (low, high)
|
|
|
|
|
U32 h[5]; // hash state
|
|
|
|
|
U32 w[80]; // input buffer
|
|
|
|
|
char hbuf[20]; // result
|
|
|
|
|
void process(); // hash 1 block
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//////////////////////////// ZPAQL ///////////////////////////
|
|
|
|
|
|
|
|
|
|
// Symbolic constants, instruction size, and names
|
|
|
|
|
typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType;
|
|
|
|
|
extern const int compsize[256];
|
|
|
|
|
|
|
|
|
|
// A ZPAQL machine COMP+HCOMP or PCOMP.
|
|
|
|
|
class ZPAQL {
|
|
|
|
|
public:
|
|
|
|
|
ZPAQL();
|
|
|
|
|
~ZPAQL();
|
|
|
|
|
void clear(); // Free memory, erase program, reset machine state
|
|
|
|
|
void inith(); // Initialize as HCOMP to run
|
|
|
|
|
void initp(); // Initialize as PCOMP to run
|
|
|
|
|
double memory(); // Return memory requirement in bytes
|
|
|
|
|
void run(U32 input); // Execute with input
|
|
|
|
|
int read(Reader* in2); // Read header
|
|
|
|
|
bool write(Writer* out2, bool pp); // If pp write PCOMP else HCOMP header
|
|
|
|
|
int step(U32 input, int mode); // Trace execution (defined externally)
|
|
|
|
|
|
|
|
|
|
Writer* output; // Destination for OUT instruction, or 0 to suppress
|
|
|
|
|
SHA1* sha1; // Points to checksum computer
|
|
|
|
|
U32 H(int i) {return h(i);} // get element of h
|
|
|
|
|
|
|
|
|
|
void flush(); // write outbuf[0..bufptr-1] to output and sha1
|
|
|
|
|
void outc(int c) { // output byte c (0..255) or -1 at EOS
|
|
|
|
|
if (c<0 || (outbuf[bufptr]=c, ++bufptr==outbuf.isize())) flush();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ZPAQ1 block header
|
|
|
|
|
Array<U8> header; // hsize[2] hh hm ph pm n COMP (guard) HCOMP (guard)
|
|
|
|
|
int cend; // COMP in header[7...cend-1]
|
|
|
|
|
int hbegin, hend; // HCOMP/PCOMP in header[hbegin...hend-1]
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
// Machine state for executing HCOMP
|
|
|
|
|
Array<U8> m; // memory array M for HCOMP
|
|
|
|
|
Array<U32> h; // hash array H for HCOMP
|
|
|
|
|
Array<U32> r; // 256 element register array
|
|
|
|
|
Array<char> outbuf; // output buffer
|
|
|
|
|
int bufptr; // number of bytes in outbuf
|
|
|
|
|
U32 a, b, c, d; // machine registers
|
|
|
|
|
int f; // condition flag
|
|
|
|
|
int pc; // program counter
|
|
|
|
|
int rcode_size; // length of rcode
|
|
|
|
|
U8* rcode; // JIT code for run()
|
|
|
|
|
|
|
|
|
|
// Support code
|
|
|
|
|
int assemble(); // put JIT code in rcode
|
|
|
|
|
void init(int hbits, int mbits); // initialize H and M sizes
|
|
|
|
|
int execute(); // execute 1 instruction, return 0 after HALT, else 1
|
|
|
|
|
void run0(U32 input); // default run() when select==0
|
|
|
|
|
void div(U32 x) {if (x) a/=x; else a=0;}
|
|
|
|
|
void mod(U32 x) {if (x) a%=x; else a=0;}
|
|
|
|
|
void swap(U32& x) {a^=x; x^=a; a^=x;}
|
|
|
|
|
void swap(U8& x) {a^=x; x^=a; a^=x;}
|
|
|
|
|
void err(); // exit with run time error
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
///////////////////////// Component //////////////////////////
|
|
|
|
|
|
|
|
|
|
// A Component is a context model, indirect context model, match model,
|
|
|
|
|
// fixed weight mixer, adaptive 2 input mixer without or with current
|
|
|
|
|
// partial byte as context, adaptive m input mixer (without or with),
|
|
|
|
|
// or SSE (without or with).
|
|
|
|
|
|
|
|
|
|
struct Component {
|
|
|
|
|
size_t limit; // max count for cm
|
|
|
|
|
size_t cxt; // saved context
|
|
|
|
|
size_t a, b, c; // multi-purpose variables
|
|
|
|
|
Array<U32> cm; // cm[cxt] -> p in bits 31..10, n in 9..0; MATCH index
|
|
|
|
|
Array<U8> ht; // ICM/ISSE hash table[0..size1][0..15] and MATCH buf
|
|
|
|
|
Array<U16> a16; // MIX weights
|
|
|
|
|
void init(); // initialize to all 0
|
|
|
|
|
Component() {init();}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
////////////////////////// StateTable ////////////////////////
|
|
|
|
|
|
|
|
|
|
// Next state table generator
|
|
|
|
|
class StateTable {
|
|
|
|
|
enum {N=64}; // sizes of b, t
|
|
|
|
|
int num_states(int n0, int n1); // compute t[n0][n1][1]
|
|
|
|
|
void discount(int& n0); // set new value of n0 after 1 or n1 after 0
|
|
|
|
|
void next_state(int& n0, int& n1, int y); // new (n0,n1) after bit y
|
|
|
|
|
public:
|
|
|
|
|
U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1
|
|
|
|
|
int next(int state, int y) { // next state for bit y
|
|
|
|
|
assert(state>=0 && state<256);
|
|
|
|
|
assert(y>=0 && y<4);
|
|
|
|
|
return ns[state*4+y];
|
|
|
|
|
}
|
|
|
|
|
int cminit(int state) { // initial probability of 1 * 2^23
|
|
|
|
|
assert(state>=0 && state<256);
|
|
|
|
|
return ((ns[state*4+3]*2+1)<<22)/(ns[state*4+2]+ns[state*4+3]+1);
|
|
|
|
|
}
|
|
|
|
|
StateTable();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
///////////////////////// Predictor //////////////////////////
|
|
|
|
|
|
|
|
|
|
// A predictor guesses the next bit
|
|
|
|
|
class Predictor {
|
|
|
|
|
public:
|
|
|
|
|
Predictor(ZPAQL&);
|
|
|
|
|
~Predictor();
|
|
|
|
|
void init(); // build model
|
|
|
|
|
int predict(); // probability that next bit is a 1 (0..4095)
|
|
|
|
|
void update(int y); // train on bit y (0..1)
|
|
|
|
|
int stat(int); // Defined externally
|
|
|
|
|
bool isModeled() { // n>0 components?
|
|
|
|
|
assert(z.header.isize()>6);
|
|
|
|
|
return z.header[6]!=0;
|
|
|
|
|
}
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
|
|
// Predictor state
|
|
|
|
|
int c8; // last 0...7 bits.
|
|
|
|
|
int hmap4; // c8 split into nibbles
|
|
|
|
|
int p[256]; // predictions
|
|
|
|
|
U32 h[256]; // unrolled copy of z.h
|
|
|
|
|
ZPAQL& z; // VM to compute context hashes, includes H, n
|
|
|
|
|
Component comp[256]; // the model, includes P
|
|
|
|
|
|
|
|
|
|
// Modeling support functions
|
|
|
|
|
int predict0(); // default
|
|
|
|
|
void update0(int y); // default
|
|
|
|
|
int dt2k[256]; // division table for match: dt2k[i] = 2^12/i
|
|
|
|
|
int dt[1024]; // division table for cm: dt[i] = 2^16/(i+1.5)
|
|
|
|
|
U16 squasht[4096]; // squash() lookup table
|
|
|
|
|
short stretcht[32768];// stretch() lookup table
|
|
|
|
|
StateTable st; // next, cminit functions
|
|
|
|
|
U8* pcode; // JIT code for predict() and update()
|
|
|
|
|
int pcode_size; // length of pcode
|
|
|
|
|
|
|
|
|
|
// reduce prediction error in cr.cm
|
|
|
|
|
void train(Component& cr, int y) {
|
|
|
|
|
assert(y==0 || y==1);
|
|
|
|
|
U32& pn=cr.cm(cr.cxt);
|
|
|
|
|
U32 count=pn&0x3ff;
|
|
|
|
|
int error=y*32767-(cr.cm(cr.cxt)>>17);
|
|
|
|
|
pn+=(error*dt[count]&-1024)+(count<cr.limit);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// x -> floor(32768/(1+exp(-x/64)))
|
|
|
|
|
int squash(int x) {
|
|
|
|
|
assert(x>=-2048 && x<=2047);
|
|
|
|
|
return squasht[x+2048];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash
|
|
|
|
|
int stretch(int x) {
|
|
|
|
|
assert(x>=0 && x<=32767);
|
|
|
|
|
return stretcht[x];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bound x to a 12 bit signed int
|
|
|
|
|
int clamp2k(int x) {
|
|
|
|
|
if (x<-2048) return -2048;
|
|
|
|
|
else if (x>2047) return 2047;
|
|
|
|
|
else return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bound x to a 20 bit signed int
|
|
|
|
|
int clamp512k(int x) {
|
|
|
|
|
if (x<-(1<<19)) return -(1<<19);
|
|
|
|
|
else if (x>=(1<<19)) return (1<<19)-1;
|
|
|
|
|
else return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get cxt in ht, creating a new row if needed
|
|
|
|
|
size_t find(Array<U8>& ht, int sizebits, U32 cxt);
|
|
|
|
|
|
|
|
|
|
// Put JIT code in pcode
|
|
|
|
|
int assemble_p();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//////////////////////////// Decoder /////////////////////////
|
|
|
|
|
|
|
|
|
|
// Decoder decompresses using an arithmetic code
|
|
|
|
|
class Decoder {
|
|
|
|
|
public:
|
|
|
|
|
Reader* in; // destination
|
|
|
|
|
Decoder(ZPAQL& z);
|
|
|
|
|
int decompress(); // return a byte or EOF
|
|
|
|
|
int skip(); // skip to the end of the segment, return next byte
|
|
|
|
|
void init(); // initialize at start of block
|
|
|
|
|
int stat(int x) {return pr.stat(x);}
|
|
|
|
|
private:
|
|
|
|
|
U32 low, high; // range
|
|
|
|
|
U32 curr; // last 4 bytes of archive
|
|
|
|
|
Predictor pr; // to get p
|
|
|
|
|
enum {BUFSIZE=1<<16};
|
|
|
|
|
Array<char> buf; // input buffer of size BUFSIZE bytes
|
|
|
|
|
// of unmodeled data. buf[low..high-1] is input with curr
|
|
|
|
|
// remaining in sub-block.
|
|
|
|
|
int decode(int p); // return decoded bit (0..1) with prob. p (0..65535)
|
|
|
|
|
void loadbuf(); // read unmodeled data into buf to EOS
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/////////////////////////// PostProcessor ////////////////////
|
|
|
|
|
|
|
|
|
|
class PostProcessor {
|
|
|
|
|
int state; // input parse state: 0=INIT, 1=PASS, 2..4=loading, 5=POST
|
|
|
|
|
int hsize; // header size
|
|
|
|
|
int ph, pm; // sizes of H and M in z
|
|
|
|
|
public:
|
|
|
|
|
ZPAQL z; // holds PCOMP
|
|
|
|
|
PostProcessor(): state(0), hsize(0), ph(0), pm(0) {}
|
|
|
|
|
void init(int h, int m); // ph, pm sizes of H and M
|
|
|
|
|
int write(int c); // Input a byte, return state
|
|
|
|
|
int getState() const {return state;}
|
|
|
|
|
void setOutput(Writer* out) {z.output=out;}
|
|
|
|
|
void setSHA1(SHA1* sha1ptr) {z.sha1=sha1ptr;}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//////////////////////// Decompresser ////////////////////////
|
|
|
|
|
|
|
|
|
|
// For decompression and listing archive contents
|
|
|
|
|
class Decompresser {
|
|
|
|
|
public:
|
|
|
|
|
Decompresser(): z(), dec(z), pp(), state(BLOCK), decode_state(FIRSTSEG) {}
|
|
|
|
|
void setInput(Reader* in) {dec.in=in;}
|
|
|
|
|
bool findBlock(double* memptr = 0);
|
|
|
|
|
void hcomp(Writer* out2) {z.write(out2, false);}
|
|
|
|
|
bool findFilename(Writer* = 0);
|
|
|
|
|
void readComment(Writer* = 0);
|
|
|
|
|
void setOutput(Writer* out) {pp.setOutput(out);}
|
|
|
|
|
void setSHA1(SHA1* sha1ptr) {pp.setSHA1(sha1ptr);}
|
|
|
|
|
bool decompress(int n = -1); // n bytes, -1=all, return true until done
|
|
|
|
|
bool pcomp(Writer* out2) {return pp.z.write(out2, true);}
|
|
|
|
|
void readSegmentEnd(char* sha1string = 0);
|
|
|
|
|
int stat(int x) {return dec.stat(x);}
|
|
|
|
|
private:
|
|
|
|
|
ZPAQL z;
|
|
|
|
|
Decoder dec;
|
|
|
|
|
PostProcessor pp;
|
|
|
|
|
enum {BLOCK, FILENAME, COMMENT, DATA, SEGEND} state; // expected next
|
|
|
|
|
enum {FIRSTSEG, SEG, SKIP} decode_state; // which segment in block?
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/////////////////////////// decompress() /////////////////////
|
|
|
|
|
|
|
|
|
|
void decompress(Reader* in, Writer* out);
|
|
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////
|
|
|
|
|
//////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
|
|
// Code following this point is not a part of the ZPAQ level 2 standard.
|
|
|
|
|
|
|
|
|
|
//////////////////////////// Encoder /////////////////////////
|
|
|
|
|
|
|
|
|
|
// Encoder compresses using an arithmetic code
|
|
|
|
|
class Encoder {
|
|
|
|
|
public:
|
|
|
|
|
Encoder(ZPAQL& z):
|
|
|
|
|
out(0), low(1), high(0xFFFFFFFF), pr(z) {}
|
|
|
|
|
void init();
|
|
|
|
|
void compress(int c); // c is 0..255 or EOF
|
|
|
|
|
int stat(int x) {return pr.stat(x);}
|
|
|
|
|
Writer* out; // destination
|
|
|
|
|
private:
|
|
|
|
|
U32 low, high; // range
|
|
|
|
|
Predictor pr; // to get p
|
|
|
|
|
Array<char> buf; // unmodeled input
|
|
|
|
|
void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
//////////////////////// Compressor //////////////////////////
|
|
|
|
|
|
|
|
|
|
class Compressor {
|
|
|
|
|
public:
|
|
|
|
|
Compressor(): enc(z), in(0), state(INIT) {}
|
|
|
|
|
void setOutput(Writer* out) {enc.out=out;}
|
|
|
|
|
void writeTag();
|
|
|
|
|
void startBlock(int level); // level=1,2,3
|
|
|
|
|
void startBlock(const char* hcomp);
|
|
|
|
|
void startSegment(const char* filename = 0, const char* comment = 0);
|
|
|
|
|
void setInput(Reader* i) {in=i;}
|
|
|
|
|
void postProcess(const char* pcomp = 0, int len = 0);
|
|
|
|
|
bool compress(int n = -1); // n bytes, -1=all, return true until done
|
|
|
|
|
void endSegment(const char* sha1string = 0);
|
|
|
|
|
void endBlock();
|
|
|
|
|
int stat(int x) {return enc.stat(x);}
|
|
|
|
|
private:
|
|
|
|
|
ZPAQL z;
|
|
|
|
|
Encoder enc;
|
|
|
|
|
Reader* in;
|
|
|
|
|
enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/////////////////////////// compress() ///////////////////////
|
|
|
|
|
|
|
|
|
|
void compress(Reader* in, Writer* out, int level);
|
|
|
|
|
|
|
|
|
|
} // namespace libzpaq
|
|
|
|
|
|
|
|
|
|
/////////////////////////// lrzip functions //////////////////
|
|
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#ifndef uchar
|
|
|
|
|
#define uchar unsigned char
|
|
|
|
|
#endif
|
|
|
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
|
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
|
|
|
#define __maybe_unused __attribute__((unused))
|
|
|
|
|
|
2015-03-10 10:05:51 +01:00
|
|
|
typedef int64_t i64;
|
2015-03-10 08:23:00 +01:00
|
|
|
|
|
|
|
|
struct bufRead: public libzpaq::Reader {
|
|
|
|
|
uchar *s_buf;
|
|
|
|
|
i64 *s_len;
|
|
|
|
|
i64 total_len;
|
|
|
|
|
int *last_pct;
|
|
|
|
|
bool progress;
|
|
|
|
|
long thread;
|
|
|
|
|
FILE *msgout;
|
|
|
|
|
|
|
|
|
|
bufRead(uchar *buf_, i64 *n_, i64 total_len_, int *last_pct_, bool progress_, long thread_, FILE *msgout_):
|
|
|
|
|
s_buf(buf_), s_len(n_), total_len(total_len_), last_pct(last_pct_), progress(progress_), thread(thread_), msgout(msgout_) {}
|
|
|
|
|
|
|
|
|
|
int get() {
|
|
|
|
|
if (progress && !(*s_len % 128)) {
|
|
|
|
|
int pct = (total_len - *s_len) * 100 / total_len;
|
|
|
|
|
|
|
|
|
|
if (pct / 10 != *last_pct / 10) {
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
fprintf(msgout, "\r\t\t\tZPAQ\t");
|
|
|
|
|
for (i = 0; i < thread; i++)
|
|
|
|
|
fprintf(msgout, "\t");
|
|
|
|
|
fprintf(msgout, "%ld:%i%% \r",
|
|
|
|
|
thread + 1, pct);
|
|
|
|
|
fflush(msgout);
|
|
|
|
|
*last_pct = pct;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (likely(*s_len > 0)) {
|
|
|
|
|
(*s_len)--;
|
|
|
|
|
return ((int)(uchar)*s_buf++);
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
} // read and return byte 0..255, or -1 at EOF
|
|
|
|
|
|
|
|
|
|
int read(char *buf, int n) {
|
|
|
|
|
if (unlikely(n > *s_len))
|
|
|
|
|
n = *s_len;
|
|
|
|
|
|
|
|
|
|
if (likely(n > 0)) {
|
|
|
|
|
*s_len -= n;
|
|
|
|
|
memcpy(buf, s_buf, n);
|
|
|
|
|
}
|
|
|
|
|
return n;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct bufWrite: public libzpaq::Writer {
|
|
|
|
|
uchar *c_buf;
|
|
|
|
|
i64 *c_len;
|
|
|
|
|
bufWrite(uchar *buf_, i64 *n_): c_buf(buf_), c_len(n_) {}
|
|
|
|
|
|
|
|
|
|
void put(int c) {
|
|
|
|
|
c_buf[(*c_len)++] = (uchar)c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void write(const char *buf, int n) {
|
|
|
|
|
memcpy(c_buf + *c_len, buf, n);
|
|
|
|
|
*c_len += n;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
extern "C" void zpaq_compress(uchar *c_buf, i64 *c_len, uchar *s_buf, i64 s_len, int level,
|
|
|
|
|
FILE *msgout, bool progress, long thread)
|
|
|
|
|
{
|
|
|
|
|
i64 total_len = s_len;
|
|
|
|
|
int last_pct = 100;
|
|
|
|
|
|
|
|
|
|
bufRead bufR(s_buf, &s_len, total_len, &last_pct, progress, thread, msgout);
|
|
|
|
|
bufWrite bufW(c_buf, c_len);
|
|
|
|
|
|
|
|
|
|
compress (&bufR, &bufW, level);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_len,
|
|
|
|
|
FILE *msgout, bool progress, long thread)
|
|
|
|
|
{
|
|
|
|
|
i64 total_len = c_len;
|
|
|
|
|
int last_pct = 100;
|
|
|
|
|
|
|
|
|
|
bufRead bufR(c_buf, &c_len, total_len, &last_pct, progress, thread, msgout);
|
|
|
|
|
bufWrite bufW(s_buf, d_len);
|
|
|
|
|
|
|
|
|
|
decompress(&bufR, &bufW);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif // LIBZPAQ_H
|