diff --git a/libzpaq/libzpaq.3.pod b/libzpaq/libzpaq.3.pod index 5726755..3ea1b95 100644 --- a/libzpaq/libzpaq.3.pod +++ b/libzpaq/libzpaq.3.pod @@ -1,737 +1,737 @@ -# Documentation for libzpaq -# -# Copyright (C) 2012, Dell Inc. Written by Matt Mahoney. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so without restriction. -# This Software is provided "as is" without warranty. -# -# To create man page: pod2man libzpaq.3.pod > libzpaq.3 -# To create HTML documentation: pod2html libzpaq.3.pod > libzpaq.html - -=pod - -=head1 NAME - -libzpaq - ZPAQ compression API - -=head1 SYNOPSIS - - #include "libzpaq.h" - - namespace libzpaq { - - extern void error(const char* msg); - - class Reader { - public: - virtual int get() = 0; - virtual int read(char* buf, int n); // optional - virtual ~Reader() {} - }; - - class Writer { - public: - virtual void put(int c) = 0; - virtual void write(const char* buf, int n); // optional - virtual ~Writer() {} - }; - - class SHA1 { - public: - SHA1(); - void put(int c); - double size() const; - uint64_t usize() const - const char* result(); - }; - - class Compressor { - public: - Compressor(); - void setOutput(Writer* out); - void writeTag(); - void startBlock(int level); - void startBlock(const char* hcomp); - void startSegment(const char* filename = 0, - const char* comment = 0); - void setInput(Reader* i); - void postProcess(const char* pcomp = 0, int length = 0); - bool compress(int n = -1); - void endSegment(const char* sha1string = 0); - void endBlock(); - }; - - class Decompresser { - public: - Decompresser(); - void setInput(Reader* in); - bool findBlock(double* memptr = 0); - void hcomp(Writer* out); - bool findFilename(Writer* = 0); - void readComment(Writer* = 0); - void setOutput(Writer* out); - void setSHA1(SHA1* sha1ptr); - bool decompress(int n = -1); - bool pcomp(Writer* out); - void readSegmentEnd(char* sha1string = 0); - }; - - void compress(Reader* in, Writer* out, int level); - - void decompress(Reader* in, Writer* out); - } - -=head1 DESCRIPTION - -I is a C++ API for compressing or decompressing -files or objects in memory comforming to the ZPAQ level 1 and 2 standards -(see I). This document describes version 5.00 -of the software. The software may be used without -restriction under a modified MIT license. - -ZPAQ provides a high level of data compression in a streaming -(single pass) self-describing format that supports single or multiple -named objects (such as archives) with optional integrity checking. - -The library provides 3 default compression levels but supports -custom algorithms. The performance of the default levels is -shown in the table below for the 14 file Calgary corpus as -a tar file. Compression and decompression times are in seconds -on a 2 GHz T3200 running on one of two cores. Memory required -to compress or decompress is in MB. Some popular formats -are shown for comparison. - - Program Format Size Time (C, D) Memory - ----------- ------ --------- ----------- ------ - Uncompresed .tar 3,152,896 - compress .tar.Z 1,319,521 1.6 0.2 .1 MB - gzip -9 .tar.gz 1,022,810 0.7 0.1 .1 MB - bzip2 -9 .tar.bz2 860,097 0.6 0.4 5 MB - 7zip .tar.7z 824,573 1.5 0.1 195 MB - zpaq 1 (fast) .tar.zpaq 806,959 2 2 38 MB - zpaq 2 (mid) .tar.zpaq 699,191 8 8 112 MB - zpaq 3 (max) .tar.zpaq 644,190 20 20 246 MB - -A ZPAQ stream consists of one or more blocks, possibly mixed with -other data, that can be decompressed independently in any order. -Each block consists of one or more segments that must be decompressed -in order from the beginning of the block. Each block header contains -a description of the decompression algorithm. Each segment consists -of an optional filename string, an optional comment string, -self delimiting compressed data, and an optional SHA-1 checksum. -If ZPAQ blocks are mixed with other data, they must be -preceded by an identifying 13 byte tag which does not otherwise -appear in that data. - -ZPAQ compression is based on the PAQ context mixing model. -An array of components predict the probability of the next bit -of input, either independently or depending on the predictions -of earlier components. The final prediction is arithmetic coded. -Each component inputs a context computed from earlier input -by a program written in ZPAQL byte code which runs on a virtual -machine. Both the component array description and the ZPAQL -code are encoded in a string called HCOMP in each block header. -Data can also be stored uncompressed. - -A block may optionally specify a post-processor, a program -(also in ZPAQL) which takes the decoded data as input and -outputs the decompressed output. This program, if present, -is encoded as a string called PCOMP which is compressed -in the first segment prior to the compressed data. The first -decoded byte from the first segment is a flag indicating -whether a PCOMP string is present. The user is responsible -for correctly pre-processing the data so that post-processing -restores the original data. - -=head2 API Organization - -The I API consists of 2 files. - -=over - -=item libzpaq.h - -Header file to include in your application. - -=item libzpaq.cpp - -Source code file to link to your application. - -=back - -An application would have the line C<#include "libzpaq.h"> and -link to libzpaq.cpp. -The API provides two classes, C and C -which write or read respectively each of the syntactic elements -of a ZPAQ stream. The two functions C and -C provide simple interfaces for the most common -uses. In either case, the user must create classes derived -from the abstract base classes C and C and -define methods C and C which the code -will use to read and write bytes. The user must also define -a callback error handler. - -By default, libzpaq(3) uses just-in-time (JIT) acceleration -by translating ZPAQL code to x86-32 or x86-64 internally -and executing it. This feature can be disabled by compiling -with -DNOJIT. If enabled, it requires an x86 processor -capable of executing SSE2 instructions. SSE2 is supported -by most Intel processors since 2001 and AMD since 2003. - -Run time checks (assertions) can be enabled with -DDEBUG -for debugging purposes. - -All of the API code is contained in the namespace C. - -=head2 Callback Functions - -The following three functions must be defined by the user. - -=over - -=item C - -This function must be defined by the user to handle errors -from libzpaq. The library will call the function with -an English language message passed to C. Errors may -result from bad input during decompression, out of memory, -or illegal arguments or calling sequences to libzpaq -functions. Errors should be considered unrecoverable. - -=item C - -The user must create a class derived from Reader with an -implementation for C that reads one byte of input -and returns its value in the range 0...255, or returns -EOF (-1) at end of input. Objects of the derived type -would then be passed to functions that require a C. - -=item C - -The user must create a class derived from Writer with -an implemenation of C which is expected to take -a byte value C in the range 0...255 and write it to -output. Objects of the derived type -would then be passed to functions that require a C. - -=back - -The following two functions are optional. Defining them -can improve performance slightly. - -=over - -=item C - -If defined, this function should input up to C bytes into -the array C and return the number actually read, in -the range 0..n. A return value of 0 indicates end of input. -If C is not defined, then the default implementation -will call C n times. - -=item C - -If defined, this function should output the elements C -through C in order. If not defined, then the default -implementation will call C n times. - -=back - -=head2 Simple Compression - -In the remainder of this document, all classes and -functions are assumed to be in namespace C. - -=over - -=item C - -C compresses from C to C until C -returns EOF. It writes a single segment in a single block -with empty filename, comment, and checksum fields. C -must be 1, 2, or 3, to select models I, I, or -I respectively. Higher modes compress smaller but -take longer to compress and subsequently decompress. - -=item C - -C decompresses any valid ZPAQ stream from -C to C until C returns EOF. Any -non-ZPAQ data in the input is ignored. Any ZPAQ blocks -following non-ZPAQ must be preceded by a marker tag -to be recognized. Each block is decoded according to the -instructions in the block header. The contents of the -filename, comment, and checksum fields are ignored. -Data with bad checksums will be decoded anyway. If there -is more than one segment, then all of the output -data will be concatenated. - -=back - -=head2 class SHA1 - -The SHA1 class is used to compute SHA-1 checksums for compression -and verify them for decompression. It is believed to be -computationally infeasible to find two different strings -with the same hash value. Its member functions -are as follows: - -=over - -=item C - -The constructor creates a new SHA1 object representing the -hash of an empty string. - -=item C - -Appends one byte c (0...255) to the string whose hash is represented. - -=item C - -Returns the length (so far) of the string whose hash is represented. -The largest possible value returned is -2^61 - 1 = 2305843009213693951.0, but values larger than 2^53 = -9007199254740992.0 -will not be exact on systems using IEEE 64 bit floating point -representation of type C. The initial value is 0.0. - -=item C - -Returns the length (so far) as a 64 bit unsigned integer. - -=item C - -Computes the 20 byte SHA-1 hash and resets the string back -to a size of 0.0. The returned pointer points to an array -inside the SHA1 object whose -contents remain unchanged until the next call to C. - -=back - -=head2 class Compressor - -The C class has member functions to write -each of the syntactic elements of a ZPAQ stream and to specify -their values. It will compress using either built-in or -user supplied models. - -=over - -=item C - -The constructor creates a Compression object. No input source, -output destination, or compression model is specified. - -=item C - -Specifies a destination for output. Must be specified before -calling any function that writes data. - -=item C - -Writes a 13 byte marker tag which can be used to identify -the start of a block following non-ZPAQ data. - -=item C - -Writes a block header and specifies a compression model. -If linked with F, then C must be 1, 2, or 3 -to specify I, I, or I respectively. Higher numbers -compress smaller but more slowly. These models are compatible -with both the ZPAQ level 1 and 2 standards. - -=item C - -Writes a block header and specifies the HCOMP portion of the -compression model. The first two bytes of the string should -encode the length of the rest of the string as a 16 bit unsigned -number with the least significant bit first. The meaning of the -rest of the string is defined in the ZPAQ level 2 standard. -If the number of components (C) is 0, then the block -is saved in ZPAQ level 2 format, which cannot be read by -older ZPAQ level 1 decoders. Otherwise the block is saved in -ZPAQ level 1 format, which is compatible with all decoders. - -=item C - -Writes a segment header. C and -C are NUL terminated strings. If specified, then their -values are stored. Normally, C would be a file name -when compressing to an archive or omitted otherwise. If a file -is split among segments, then by convention only the first segment -is named. C is normally the uncompressed size as a decimal -number which is displayed when listing the contents of an archive. -Omitting it does not affect decompression. - -=item C - -Specifies the optional PCOMP string used for post-processing. -It must be called from within the first segment -of each block prior to compressing any data, but not from within -any other segment. -If C is 0 or no argument is passed, then the decompresser -will not post-process the data. The effect is to compress a -0 byte to indicate to the decompresser that no PCOMP string -is present. - -If C is not 0, then I bytes of the string I -are passed. If I is 0 or omitted, then -the first two bytes must encode -the length of the rest of the string as a 16 bit unsigned number -with the least significant byte first. The format of the remainder -of the string is described in the ZPAQ level 2 standard. -The effect is to compress a 1 byte -to indicate the presence of PCOMP, followed by the two length -bytes and the string as passed. For example, either -C or C -would compress the 5 bytes 1, 2, 0, 5, 8. -The user is responsible for pre-processing the input -prior to compression so that PCOMP restores the original data. - -=item C - -Specifies the input source for compression. It must be set -prior to the first call to C. - -=item C - -Compress n bytes of data, or until EOF is input, whichever comes -first. If n < 0 or omitted, then compress until EOF. -Returns true if there is more input available, or false if EOF -was read. - -=item C - -Stop compressing and write the end of a segment. If -C is specified, it should be a 20 byte string -as returned by C on the input data for -this segment I pre-processing. - -=item C - -Finish writing the current block. - -=back - -In order to create a valid ZPAQ stream, the components must -be written in the following order: - - for each block do { - if any non-ZPAQ data then { - write non-ZPAQ data - writeTag() - } - startBlock() - for each segment do { - startSegment() - if first segment in block then { - postProcess() - } - while (compress(n)) ; - endSegment() - } - endBlock() - } - -=head2 class Decompresser - -The class Decompresser has member functions to read each of the -syntactic elements of a ZPAQ stream. - -=over - -=item C - -The constructor creates a Decompresser object. No input source or -output destination is specified. - -=item C - -Specifies where the ZPAQ stream will be read from. Must be called -before any function that reads the stream. - -=item C - -Scan the input to find the start of the next block. If a block -does not start immediately, then the block must be preceded by -a marker tag (written with C) or it will -not be found. If C is not 0, then write the approximate -memory requirement (in bytes) to decompress to C<*memptr>). The -memory will be allocated by the first call to C. -It returns true if a block is found, or false if it reads to EOF -without finding a block. - -=item C - -Write the HCOMP string of the current block to C. -It will be in a format suitable -for passing to C. The first 2 bytes will -encode the length of the rest of the string as a 16 bit unsigned -integer with the least significant byte first. The format of the -remainder of the string is described in the ZPAQ level 1 -specification. - -=item C - -Find the start of the next segment. If another segment is found -within the current block then return true. If the end of the block -is found first, then return false. If a segment is found, the -filename field is not empty, and C -is not 0, then write the filename (without a terminating NUL byte) -to C. - -=item C - -Read or skip past the comment field following the filename field -in the segment header. If C is not 0 and the comment field is -not empty, then write the comment -(without a terminating NUL byte) to C. - -=item C - -Specify the destination for decompression. It must be set before -any data can be decompressed. - -=item C - -Specify the address of a SHA1 object for computing the checksum -of the decompressed data (after post-processing). As each byte C -is output, it is also passed to Cput(c)>. In order to -compute the correct checksum, the SHA1 object should be in its -initial state, either newly created, or by calling C, -before the first call to C. When the end of the segment -is reached, the value returned by Cresult()> should match -the stored checksum, if any. - -=item C - -Decode n bytes or until the end of segment, whichever comes -first. Return false if the end of segment is reached first. If -n < 0 or not specified, then decompress to the end of segment -and return false. C is the number of bytes prior to post-processing. -If the data is post-processed, then the size of the output may -be different. - -=item C - -Write the PCOMP string, if any, for the current block to C. -If there is no PCOMP string (no post-processor) then return false. -Otherwise write the string to C in a format suitable for -passing to C and return true. If written, -then the first 2 bytes will encode the length of the rest of the -string as a 16 bit unsigned integer with the least significant -bit first. The format of the rest of the string is descibed in -the ZPAQ level 1 standard. - -C is only valid after the first call to C -in the current block. To read the PCOMP string without decompressing any -data, then call C first. It is not necessary to -call C in this case. - -=item C - -Skip any compressed data in the current segment that has not yet -been decompressed and advance to the end of the segment. -Then if C is not 0 then write into -the 21 byte array that it points to. If a checksum is present, -then write a 1 into C and write the stored checksum -in C. Otherwise write a 0 in C. - -Note that it is not permitted to call decompress() if any compressed -data has been skipped in any earlier segments in the same block. - -=back - -A valid sequence of calls is as follows: - - while (findBlock()) { - while (findFilename()) { - readComment(); - if first segment in block then { (optional) - decompress(0) - pcomp() - } - while (decompress(n)) ; (optional) - readSegmentEnd(); - } - } - -=head1 EXAMPLES - -The following program F -lists the contents of a ZPAQ archive -read from standard input. - - #include - #include - #include "libzpaq.h" - - // Implement Reader and Writer interfaces for file I/O - class File: public libzpaq::Reader, public libzpaq::Writer { - FILE* f; - public: - File(FILE* f_): f(f_) {} - int get() {return getc(f);} - void put(int c) {putc(c, f);} - int read(char* buf, int n) {return fread(buf, 1, n, f);} - void write(const char* buf, int n) {fwrite(buf, 1, n, f);} - }; - - // Implement error handler - namespace libzpaq { - void error(const char* msg) { - fprintf(stderr, "Error: %s\n", msg); - exit(1); - } - } - - // List the contents of an archive. For each block, show - // the memory required to decompress. For each segment, - // show the filename and comment. - void list(FILE* input, FILE* output) { - libzpaq::Decompresser d; - File in(input), out(output); - double memory; - d.setInput(&in); - for (int block=1; d.findBlock(&memory); ++block) { - printf("Block %d needs %1.0f MB\n", block, memory/1e6); - while (d.findFilename(&out)) { // print filename - printf("\t"); - d.readComment(&out); // print comment - printf("\n"); - d.readSegmentEnd(); // skip compressed data - } - } - } - - int main() { - list(stdin, stdout); - return 0; - } - -The program could be compiled as follows: - - g++ listzpaq.cpp libzpaq.cpp - -The following code compresses a list of files into one block -written to stdout. Each file is compressed to a separate -segment. For each segment, the filename, comment, and SHA-1 -checksum are stored. The comment, as conventional, is the -file size as a decimal string. - - // Compress one file to one segment - void compress_file(libzpaq::Compressor& c, - const char* filename, - bool first_segment) { - - // Open input file - FILE* f; - f=fopen(filename, "rb"); - if (!f) return; - - // Compute SHA-1 checksum and file size - libzpaq::SHA1 sha1; - int ch; - while ((ch=getc(f))!=EOF) - sha1.put(ch); - - // Write file size as a comment. - // The size can have at most 19 digits. - char comment[20]; - sprintf(comment, "%1.0f", sha1.size()); - - // Compress segment - rewind(f); - File in(f); - c.startSegment(filename, comment); - if (first_segment) - c.postProcess(); - c.setInput(&in); - c.compress(); - c.endSegment(sha1.result()); - - // Close input file - fclose(f); - } - - // Compress a list of argc files in argv[0...argc-1] into one - // ZPAQ block to stdout at level 2. - void compress_list(int argc, char** argv) { - libzpaq::Compressor c; - File out(stdout); - c.setOutput(&out); - c.startBlock(2); - for (int i=0; i and C can -be passed an argument n to display progress every n bytes, -for example: - - for (int i=1; d.decompress(1000000); ++i) - fprintf(stderr, "Decompressed %d MB\n", i); - -To compress or decompress to and from objects in memory, derive -appropriate classes from C and C. For example, it is -possible to compress or decompress to a C using -the following class. - - struct String: public libzpaq::Writer { - std::string s; - void put(int c) {s+=char(c);} - }; - -This class is also useful for reading the filename and comment -fields during decompression as follows: - - String filename, comment; - while (d.findFilename(&filename)) { - d.readComment(&comment); - // ... - -=head1 AVAILABILITY - -I, I, and the ZPAQ level 1 and 2 specifications are -available from L. - -=head1 SEE ALSO - -C -C - -=cut - - +# Documentation for libzpaq +# +# Copyright (C) 2012, Dell Inc. Written by Matt Mahoney. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so without restriction. +# This Software is provided "as is" without warranty. +# +# To create man page: pod2man libzpaq.3.pod > libzpaq.3 +# To create HTML documentation: pod2html libzpaq.3.pod > libzpaq.html + +=pod + +=head1 NAME + +libzpaq - ZPAQ compression API + +=head1 SYNOPSIS + + #include "libzpaq.h" + + namespace libzpaq { + + extern void error(const char* msg); + + class Reader { + public: + virtual int get() = 0; + virtual int read(char* buf, int n); // optional + virtual ~Reader() {} + }; + + class Writer { + public: + virtual void put(int c) = 0; + virtual void write(const char* buf, int n); // optional + virtual ~Writer() {} + }; + + class SHA1 { + public: + SHA1(); + void put(int c); + double size() const; + uint64_t usize() const + const char* result(); + }; + + class Compressor { + public: + Compressor(); + void setOutput(Writer* out); + void writeTag(); + void startBlock(int level); + void startBlock(const char* hcomp); + void startSegment(const char* filename = 0, + const char* comment = 0); + void setInput(Reader* i); + void postProcess(const char* pcomp = 0, int length = 0); + bool compress(int n = -1); + void endSegment(const char* sha1string = 0); + void endBlock(); + }; + + class Decompresser { + public: + Decompresser(); + void setInput(Reader* in); + bool findBlock(double* memptr = 0); + void hcomp(Writer* out); + bool findFilename(Writer* = 0); + void readComment(Writer* = 0); + void setOutput(Writer* out); + void setSHA1(SHA1* sha1ptr); + bool decompress(int n = -1); + bool pcomp(Writer* out); + void readSegmentEnd(char* sha1string = 0); + }; + + void compress(Reader* in, Writer* out, int level); + + void decompress(Reader* in, Writer* out); + } + +=head1 DESCRIPTION + +I is a C++ API for compressing or decompressing +files or objects in memory comforming to the ZPAQ level 1 and 2 standards +(see I). This document describes version 5.00 +of the software. The software may be used without +restriction under a modified MIT license. + +ZPAQ provides a high level of data compression in a streaming +(single pass) self-describing format that supports single or multiple +named objects (such as archives) with optional integrity checking. + +The library provides 3 default compression levels but supports +custom algorithms. The performance of the default levels is +shown in the table below for the 14 file Calgary corpus as +a tar file. Compression and decompression times are in seconds +on a 2 GHz T3200 running on one of two cores. Memory required +to compress or decompress is in MB. Some popular formats +are shown for comparison. + + Program Format Size Time (C, D) Memory + ----------- ------ --------- ----------- ------ + Uncompresed .tar 3,152,896 + compress .tar.Z 1,319,521 1.6 0.2 .1 MB + gzip -9 .tar.gz 1,022,810 0.7 0.1 .1 MB + bzip2 -9 .tar.bz2 860,097 0.6 0.4 5 MB + 7zip .tar.7z 824,573 1.5 0.1 195 MB + zpaq 1 (fast) .tar.zpaq 806,959 2 2 38 MB + zpaq 2 (mid) .tar.zpaq 699,191 8 8 112 MB + zpaq 3 (max) .tar.zpaq 644,190 20 20 246 MB + +A ZPAQ stream consists of one or more blocks, possibly mixed with +other data, that can be decompressed independently in any order. +Each block consists of one or more segments that must be decompressed +in order from the beginning of the block. Each block header contains +a description of the decompression algorithm. Each segment consists +of an optional filename string, an optional comment string, +self delimiting compressed data, and an optional SHA-1 checksum. +If ZPAQ blocks are mixed with other data, they must be +preceded by an identifying 13 byte tag which does not otherwise +appear in that data. + +ZPAQ compression is based on the PAQ context mixing model. +An array of components predict the probability of the next bit +of input, either independently or depending on the predictions +of earlier components. The final prediction is arithmetic coded. +Each component inputs a context computed from earlier input +by a program written in ZPAQL byte code which runs on a virtual +machine. Both the component array description and the ZPAQL +code are encoded in a string called HCOMP in each block header. +Data can also be stored uncompressed. + +A block may optionally specify a post-processor, a program +(also in ZPAQL) which takes the decoded data as input and +outputs the decompressed output. This program, if present, +is encoded as a string called PCOMP which is compressed +in the first segment prior to the compressed data. The first +decoded byte from the first segment is a flag indicating +whether a PCOMP string is present. The user is responsible +for correctly pre-processing the data so that post-processing +restores the original data. + +=head2 API Organization + +The I API consists of 2 files. + +=over + +=item libzpaq.h + +Header file to include in your application. + +=item libzpaq.cpp + +Source code file to link to your application. + +=back + +An application would have the line C<#include "libzpaq.h"> and +link to libzpaq.cpp. +The API provides two classes, C and C +which write or read respectively each of the syntactic elements +of a ZPAQ stream. The two functions C and +C provide simple interfaces for the most common +uses. In either case, the user must create classes derived +from the abstract base classes C and C and +define methods C and C which the code +will use to read and write bytes. The user must also define +a callback error handler. + +By default, libzpaq(3) uses just-in-time (JIT) acceleration +by translating ZPAQL code to x86-32 or x86-64 internally +and executing it. This feature can be disabled by compiling +with -DNOJIT. If enabled, it requires an x86 processor +capable of executing SSE2 instructions. SSE2 is supported +by most Intel processors since 2001 and AMD since 2003. + +Run time checks (assertions) can be enabled with -DDEBUG +for debugging purposes. + +All of the API code is contained in the namespace C. + +=head2 Callback Functions + +The following three functions must be defined by the user. + +=over + +=item C + +This function must be defined by the user to handle errors +from libzpaq. The library will call the function with +an English language message passed to C. Errors may +result from bad input during decompression, out of memory, +or illegal arguments or calling sequences to libzpaq +functions. Errors should be considered unrecoverable. + +=item C + +The user must create a class derived from Reader with an +implementation for C that reads one byte of input +and returns its value in the range 0...255, or returns +EOF (-1) at end of input. Objects of the derived type +would then be passed to functions that require a C. + +=item C + +The user must create a class derived from Writer with +an implemenation of C which is expected to take +a byte value C in the range 0...255 and write it to +output. Objects of the derived type +would then be passed to functions that require a C. + +=back + +The following two functions are optional. Defining them +can improve performance slightly. + +=over + +=item C + +If defined, this function should input up to C bytes into +the array C and return the number actually read, in +the range 0..n. A return value of 0 indicates end of input. +If C is not defined, then the default implementation +will call C n times. + +=item C + +If defined, this function should output the elements C +through C in order. If not defined, then the default +implementation will call C n times. + +=back + +=head2 Simple Compression + +In the remainder of this document, all classes and +functions are assumed to be in namespace C. + +=over + +=item C + +C compresses from C to C until C +returns EOF. It writes a single segment in a single block +with empty filename, comment, and checksum fields. C +must be 1, 2, or 3, to select models I, I, or +I respectively. Higher modes compress smaller but +take longer to compress and subsequently decompress. + +=item C + +C decompresses any valid ZPAQ stream from +C to C until C returns EOF. Any +non-ZPAQ data in the input is ignored. Any ZPAQ blocks +following non-ZPAQ must be preceded by a marker tag +to be recognized. Each block is decoded according to the +instructions in the block header. The contents of the +filename, comment, and checksum fields are ignored. +Data with bad checksums will be decoded anyway. If there +is more than one segment, then all of the output +data will be concatenated. + +=back + +=head2 class SHA1 + +The SHA1 class is used to compute SHA-1 checksums for compression +and verify them for decompression. It is believed to be +computationally infeasible to find two different strings +with the same hash value. Its member functions +are as follows: + +=over + +=item C + +The constructor creates a new SHA1 object representing the +hash of an empty string. + +=item C + +Appends one byte c (0...255) to the string whose hash is represented. + +=item C + +Returns the length (so far) of the string whose hash is represented. +The largest possible value returned is +2^61 - 1 = 2305843009213693951.0, but values larger than 2^53 = +9007199254740992.0 +will not be exact on systems using IEEE 64 bit floating point +representation of type C. The initial value is 0.0. + +=item C + +Returns the length (so far) as a 64 bit unsigned integer. + +=item C + +Computes the 20 byte SHA-1 hash and resets the string back +to a size of 0.0. The returned pointer points to an array +inside the SHA1 object whose +contents remain unchanged until the next call to C. + +=back + +=head2 class Compressor + +The C class has member functions to write +each of the syntactic elements of a ZPAQ stream and to specify +their values. It will compress using either built-in or +user supplied models. + +=over + +=item C + +The constructor creates a Compression object. No input source, +output destination, or compression model is specified. + +=item C + +Specifies a destination for output. Must be specified before +calling any function that writes data. + +=item C + +Writes a 13 byte marker tag which can be used to identify +the start of a block following non-ZPAQ data. + +=item C + +Writes a block header and specifies a compression model. +If linked with F, then C must be 1, 2, or 3 +to specify I, I, or I respectively. Higher numbers +compress smaller but more slowly. These models are compatible +with both the ZPAQ level 1 and 2 standards. + +=item C + +Writes a block header and specifies the HCOMP portion of the +compression model. The first two bytes of the string should +encode the length of the rest of the string as a 16 bit unsigned +number with the least significant bit first. The meaning of the +rest of the string is defined in the ZPAQ level 2 standard. +If the number of components (C) is 0, then the block +is saved in ZPAQ level 2 format, which cannot be read by +older ZPAQ level 1 decoders. Otherwise the block is saved in +ZPAQ level 1 format, which is compatible with all decoders. + +=item C + +Writes a segment header. C and +C are NUL terminated strings. If specified, then their +values are stored. Normally, C would be a file name +when compressing to an archive or omitted otherwise. If a file +is split among segments, then by convention only the first segment +is named. C is normally the uncompressed size as a decimal +number which is displayed when listing the contents of an archive. +Omitting it does not affect decompression. + +=item C + +Specifies the optional PCOMP string used for post-processing. +It must be called from within the first segment +of each block prior to compressing any data, but not from within +any other segment. +If C is 0 or no argument is passed, then the decompresser +will not post-process the data. The effect is to compress a +0 byte to indicate to the decompresser that no PCOMP string +is present. + +If C is not 0, then I bytes of the string I +are passed. If I is 0 or omitted, then +the first two bytes must encode +the length of the rest of the string as a 16 bit unsigned number +with the least significant byte first. The format of the remainder +of the string is described in the ZPAQ level 2 standard. +The effect is to compress a 1 byte +to indicate the presence of PCOMP, followed by the two length +bytes and the string as passed. For example, either +C or C +would compress the 5 bytes 1, 2, 0, 5, 8. +The user is responsible for pre-processing the input +prior to compression so that PCOMP restores the original data. + +=item C + +Specifies the input source for compression. It must be set +prior to the first call to C. + +=item C + +Compress n bytes of data, or until EOF is input, whichever comes +first. If n < 0 or omitted, then compress until EOF. +Returns true if there is more input available, or false if EOF +was read. + +=item C + +Stop compressing and write the end of a segment. If +C is specified, it should be a 20 byte string +as returned by C on the input data for +this segment I pre-processing. + +=item C + +Finish writing the current block. + +=back + +In order to create a valid ZPAQ stream, the components must +be written in the following order: + + for each block do { + if any non-ZPAQ data then { + write non-ZPAQ data + writeTag() + } + startBlock() + for each segment do { + startSegment() + if first segment in block then { + postProcess() + } + while (compress(n)) ; + endSegment() + } + endBlock() + } + +=head2 class Decompresser + +The class Decompresser has member functions to read each of the +syntactic elements of a ZPAQ stream. + +=over + +=item C + +The constructor creates a Decompresser object. No input source or +output destination is specified. + +=item C + +Specifies where the ZPAQ stream will be read from. Must be called +before any function that reads the stream. + +=item C + +Scan the input to find the start of the next block. If a block +does not start immediately, then the block must be preceded by +a marker tag (written with C) or it will +not be found. If C is not 0, then write the approximate +memory requirement (in bytes) to decompress to C<*memptr>). The +memory will be allocated by the first call to C. +It returns true if a block is found, or false if it reads to EOF +without finding a block. + +=item C + +Write the HCOMP string of the current block to C. +It will be in a format suitable +for passing to C. The first 2 bytes will +encode the length of the rest of the string as a 16 bit unsigned +integer with the least significant byte first. The format of the +remainder of the string is described in the ZPAQ level 1 +specification. + +=item C + +Find the start of the next segment. If another segment is found +within the current block then return true. If the end of the block +is found first, then return false. If a segment is found, the +filename field is not empty, and C +is not 0, then write the filename (without a terminating NUL byte) +to C. + +=item C + +Read or skip past the comment field following the filename field +in the segment header. If C is not 0 and the comment field is +not empty, then write the comment +(without a terminating NUL byte) to C. + +=item C + +Specify the destination for decompression. It must be set before +any data can be decompressed. + +=item C + +Specify the address of a SHA1 object for computing the checksum +of the decompressed data (after post-processing). As each byte C +is output, it is also passed to Cput(c)>. In order to +compute the correct checksum, the SHA1 object should be in its +initial state, either newly created, or by calling C, +before the first call to C. When the end of the segment +is reached, the value returned by Cresult()> should match +the stored checksum, if any. + +=item C + +Decode n bytes or until the end of segment, whichever comes +first. Return false if the end of segment is reached first. If +n < 0 or not specified, then decompress to the end of segment +and return false. C is the number of bytes prior to post-processing. +If the data is post-processed, then the size of the output may +be different. + +=item C + +Write the PCOMP string, if any, for the current block to C. +If there is no PCOMP string (no post-processor) then return false. +Otherwise write the string to C in a format suitable for +passing to C and return true. If written, +then the first 2 bytes will encode the length of the rest of the +string as a 16 bit unsigned integer with the least significant +bit first. The format of the rest of the string is descibed in +the ZPAQ level 1 standard. + +C is only valid after the first call to C +in the current block. To read the PCOMP string without decompressing any +data, then call C first. It is not necessary to +call C in this case. + +=item C + +Skip any compressed data in the current segment that has not yet +been decompressed and advance to the end of the segment. +Then if C is not 0 then write into +the 21 byte array that it points to. If a checksum is present, +then write a 1 into C and write the stored checksum +in C. Otherwise write a 0 in C. + +Note that it is not permitted to call decompress() if any compressed +data has been skipped in any earlier segments in the same block. + +=back + +A valid sequence of calls is as follows: + + while (findBlock()) { + while (findFilename()) { + readComment(); + if first segment in block then { (optional) + decompress(0) + pcomp() + } + while (decompress(n)) ; (optional) + readSegmentEnd(); + } + } + +=head1 EXAMPLES + +The following program F +lists the contents of a ZPAQ archive +read from standard input. + + #include + #include + #include "libzpaq.h" + + // Implement Reader and Writer interfaces for file I/O + class File: public libzpaq::Reader, public libzpaq::Writer { + FILE* f; + public: + File(FILE* f_): f(f_) {} + int get() {return getc(f);} + void put(int c) {putc(c, f);} + int read(char* buf, int n) {return fread(buf, 1, n, f);} + void write(const char* buf, int n) {fwrite(buf, 1, n, f);} + }; + + // Implement error handler + namespace libzpaq { + void error(const char* msg) { + fprintf(stderr, "Error: %s\n", msg); + exit(1); + } + } + + // List the contents of an archive. For each block, show + // the memory required to decompress. For each segment, + // show the filename and comment. + void list(FILE* input, FILE* output) { + libzpaq::Decompresser d; + File in(input), out(output); + double memory; + d.setInput(&in); + for (int block=1; d.findBlock(&memory); ++block) { + printf("Block %d needs %1.0f MB\n", block, memory/1e6); + while (d.findFilename(&out)) { // print filename + printf("\t"); + d.readComment(&out); // print comment + printf("\n"); + d.readSegmentEnd(); // skip compressed data + } + } + } + + int main() { + list(stdin, stdout); + return 0; + } + +The program could be compiled as follows: + + g++ listzpaq.cpp libzpaq.cpp + +The following code compresses a list of files into one block +written to stdout. Each file is compressed to a separate +segment. For each segment, the filename, comment, and SHA-1 +checksum are stored. The comment, as conventional, is the +file size as a decimal string. + + // Compress one file to one segment + void compress_file(libzpaq::Compressor& c, + const char* filename, + bool first_segment) { + + // Open input file + FILE* f; + f=fopen(filename, "rb"); + if (!f) return; + + // Compute SHA-1 checksum and file size + libzpaq::SHA1 sha1; + int ch; + while ((ch=getc(f))!=EOF) + sha1.put(ch); + + // Write file size as a comment. + // The size can have at most 19 digits. + char comment[20]; + sprintf(comment, "%1.0f", sha1.size()); + + // Compress segment + rewind(f); + File in(f); + c.startSegment(filename, comment); + if (first_segment) + c.postProcess(); + c.setInput(&in); + c.compress(); + c.endSegment(sha1.result()); + + // Close input file + fclose(f); + } + + // Compress a list of argc files in argv[0...argc-1] into one + // ZPAQ block to stdout at level 2. + void compress_list(int argc, char** argv) { + libzpaq::Compressor c; + File out(stdout); + c.setOutput(&out); + c.startBlock(2); + for (int i=0; i and C can +be passed an argument n to display progress every n bytes, +for example: + + for (int i=1; d.decompress(1000000); ++i) + fprintf(stderr, "Decompressed %d MB\n", i); + +To compress or decompress to and from objects in memory, derive +appropriate classes from C and C. For example, it is +possible to compress or decompress to a C using +the following class. + + struct String: public libzpaq::Writer { + std::string s; + void put(int c) {s+=char(c);} + }; + +This class is also useful for reading the filename and comment +fields during decompression as follows: + + String filename, comment; + while (d.findFilename(&filename)) { + d.readComment(&comment); + // ... + +=head1 AVAILABILITY + +I, I, and the ZPAQ level 1 and 2 specifications are +available from L. + +=head1 SEE ALSO + +C +C + +=cut + + diff --git a/libzpaq/libzpaq.cpp b/libzpaq/libzpaq.cpp index 50bc59b..cf871c1 100644 --- a/libzpaq/libzpaq.cpp +++ b/libzpaq/libzpaq.cpp @@ -1,3186 +1,3186 @@ -/* libzpaq.cpp - Part of LIBZPAQ Version 5.01 - - Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so without restriction. - This Software is provided "as is" without warranty. - -LIBZPAQ is a C++ library for compression and decompression of data -conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ -*/ - -#include "libzpaq.h" -#include -#include -#include -#include - -#ifndef NOJIT -#ifndef _WIN32 -#include -#else -#include -#endif -#endif - -namespace libzpaq { - -// Standard library redirections -void* calloc(size_t a, size_t b) {return ::calloc(a, b);} -void free(void* p) {::free(p);} -int memcmp(const void* d, const void* s, size_t n) { - return ::memcmp(d, s, n);} -void* memset(void* d, int c, size_t n) {return ::memset(d, c, n);} -double log(double x) {return ::log(x);} -double exp(double x) {return ::exp(x);} -double pow(double x, double y) {return ::pow(x, y);} - -// Read 16 bit little-endian number -int toU16(const char* p) { - return (p[0]&255)+256*(p[1]&255); -} - -// Default read() and write() -int Reader::read(char* buf, int n) { - int i=0, c; - while (i=0) - buf[i++]=c; - return i; -} - -void Writer::write(const char* buf, int n) { - for (int i=0; i 0 bytes of executable memory and update -// p to point to it and newsize = n. Free any previously -// allocated memory first. If newsize is 0 then free only. -// Call error in case of failure. If NOJIT, ignore newsize -// and set p=0, n=0 without allocating memory. -void allocx(U8* &p, int &n, int newsize) { -#ifdef NOJIT - p=0; - n=0; -#else - if (p || n) { - if (p) -#ifndef _WIN32 - munmap(p, n); -#else // Windows - VirtualFree(p, 0, MEM_RELEASE); -#endif - p=0; - n=0; - } - if (newsize>0) { -#ifndef _WIN32 - p=(U8*)mmap(0, newsize, PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_PRIVATE|MAP_ANON, -1, 0); - if ((void*)p==MAP_FAILED) p=0; -#else - p=(U8*)VirtualAlloc(0, newsize, MEM_RESERVE|MEM_COMMIT, - PAGE_EXECUTE_READWRITE); -#endif - if (p) - n=newsize; - else { - n=0; - error("allocx failed"); - } - } -#endif -} - -//////////////////////////// SHA1 //////////////////////////// - -// SHA1 code, see http://en.wikipedia.org/wiki/SHA-1 - -// Start a new hash -void SHA1::init() { - len0=len1=0; - h[0]=0x67452301; - h[1]=0xEFCDAB89; - h[2]=0x98BADCFE; - h[3]=0x10325476; - h[4]=0xC3D2E1F0; -} - -// Return old result and start a new hash -const char* SHA1::result() { - - // pad and append length - const U32 s1=len1, s0=len0; - put(0x80); - while ((len0&511)!=448) - put(0); - put(s1>>24); - put(s1>>16); - put(s1>>8); - put(s1); - put(s0>>24); - put(s0>>16); - put(s0>>8); - put(s0); - - // copy h to hbuf - for (int i=0; i<5; ++i) { - hbuf[4*i]=h[i]>>24; - hbuf[4*i+1]=h[i]>>16; - hbuf[4*i+2]=h[i]>>8; - hbuf[4*i+3]=h[i]; - } - - // return hash prior to clearing state - init(); - return hbuf; -} - -// Hash 1 block of 64 bytes -void SHA1::process() { - for (int i=16; i<80; ++i) { - w[i]=w[i-3]^w[i-8]^w[i-14]^w[i-16]; - w[i]=w[i]<<1|w[i]>>31; - } - U32 a=h[0]; - U32 b=h[1]; - U32 c=h[2]; - U32 d=h[3]; - U32 e=h[4]; - const U32 k1=0x5A827999, k2=0x6ED9EBA1, k3=0x8F1BBCDC, k4=0xCA62C1D6; -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(~b&d))+k1+w[i]; b=b<<30|b>>2; -#define f5(i) f1(a,b,c,d,e,i) f1(e,a,b,c,d,i+1) f1(d,e,a,b,c,i+2) \ - f1(c,d,e,a,b,i+3) f1(b,c,d,e,a,i+4) - f5(0) f5(5) f5(10) f5(15) -#undef f1 -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k2+w[i]; b=b<<30|b>>2; - f5(20) f5(25) f5(30) f5(35) -#undef f1 -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(b&d)|(c&d))+k3+w[i]; b=b<<30|b>>2; - f5(40) f5(45) f5(50) f5(55) -#undef f1 -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k4+w[i]; b=b<<30|b>>2; - f5(60) f5(65) f5(70) f5(75) -#undef f1 -#undef f5 - h[0]+=a; - h[1]+=b; - h[2]+=c; - h[3]+=d; - h[4]+=e; -} - -//////////////////////////// Component /////////////////////// - -// A Component is a context model, indirect context model, match model, -// fixed weight mixer, adaptive 2 input mixer without or with current -// partial byte as context, adaptive m input mixer (without or with), -// or SSE (without or with). - -const int compsize[256]={0,2,3,2,3,4,6,6,3,5}; - -void Component::init() { - limit=cxt=a=b=c=0; - cm.resize(0); - ht.resize(0); - a16.resize(0); -} - -////////////////////////// StateTable ////////////////////////// - -// How many states with count of n0 zeros, n1 ones (0...2) -int StateTable::num_states(int n0, int n1) { - const int B=6; - const int bound[B]={20,48,15,8,6,5}; // n0 -> max n1, n1 -> max n0 - if (n0=B || n0>bound[n1]) return 0; - return 1+(n1>0 && n0+n1<=17); -} - -// New value of count n0 if 1 is observed (and vice versa) -void StateTable::discount(int& n0) { - n0=(n0>=1)+(n0>=2)+(n0>=3)+(n0>=4)+(n0>=5)+(n0>=7)+(n0>=8); -} - -// compute next n0,n1 (0 to N) given input y (0 or 1) -void StateTable::next_state(int& n0, int& n1, int y) { - if (n0 20,0 - // 48,1,0 -> 48,1 - // 15,2,0 -> 8,1 - // 8,3,0 -> 6,2 - // 8,3,1 -> 5,3 - // 6,4,0 -> 5,3 - // 5,5,0 -> 5,4 - // 5,5,1 -> 4,5 - while (!num_states(n0, n1)) { - if (n1<2) --n0; - else { - n0=(n0*(n1-1)+(n1/2))/n1; - --n1; - } - } - } -} - -// Initialize next state table ns[state*4] -> next if 0, next if 1, n0, n1 -StateTable::StateTable() { - - // Assign states by increasing priority - const int N=50; - U8 t[N][N][2]={{{0}}}; // (n0,n1,y) -> state number - int state=0; - for (int i=0; i=0 && n<=2); - if (n) { - t[n0][n1][0]=state; - t[n0][n1][1]=state+n-1; - state+=n; - } - } - } - - // Generate next state table - memset(ns, 0, sizeof(ns)); - for (int n0=0; n0=0 && s<256); - int s0=n0, s1=n1; - next_state(s0, s1, 0); - assert(s0>=0 && s0=0 && s1=0 && s0=0 && s1=7); - assert(hbegin>=cend); - assert(hend>=hbegin); - assert(out2); - if (!pp) { // if not a postprocessor then write COMP - for (int i=0; iput(header[i]); - } - else { // write PCOMP size only - out2->put((hend-hbegin)&255); - out2->put((hend-hbegin)>>8); - } - for (int i=hbegin; iput(header[i]); - return true; -} - -// Read header from in2 -int ZPAQL::read(Reader* in2) { - - // Get header size and allocate - int hsize=in2->get(); - hsize+=in2->get()*256; - header.resize(hsize+300); - cend=hbegin=hend=0; - header[cend++]=hsize&255; - header[cend++]=hsize>>8; - while (cend<7) header[cend++]=in2->get(); // hh hm ph pm n - - // Read COMP - int n=header[cend-1]; - for (int i=0; iget(); // component type - if (type==-1) error("unexpected end of file"); - header[cend++]=type; // component type - int size=compsize[type]; - if (size<1) error("Invalid component type"); - if (cend+size>header.isize()-8) error("COMP list too big"); - for (int j=1; jget(); - } - if ((header[cend++]=in2->get())!=0) error("missing COMP END"); - - // Insert a guard gap and read HCOMP - hbegin=hend=cend+128; - while (hendget(); - if (op==-1) error("unexpected end of file"); - header[hend++]=op; - } - if ((header[hend++]=in2->get())!=0) error("missing HCOMP END"); - assert(cend>=7 && cendhbegin && hend6); - assert(output==0); - assert(sha1==0); - init(header[2], header[3]); // hh, hm -} - -// Initialize machine state as PCOMP -void ZPAQL::initp() { - assert(header.isize()>6); - init(header[4], header[5]); // ph, pm -} - -// Flush pending output -void ZPAQL::flush() { - if (output) output->write(&outbuf[0], bufptr); - if (sha1) for (int i=0; iput(U8(outbuf[i])); - bufptr=0; -} - -// Return memory requirement in bytes -double ZPAQL::memory() { - double mem=pow(2.0,header[2]+2)+pow(2.0,header[3]) // hh hm - +pow(2.0,header[4]+2)+pow(2.0,header[5]) // ph pm - +header.size(); - int cp=7; // start of comp list - for (int i=0; i0); - assert(cend>=7); - assert(hbegin>=cend+128); - assert(hend>=hbegin); - assert(hend0); - h.resize(1, hbits); - m.resize(1, mbits); - r.resize(256); - a=b=c=d=pc=f=0; -} - -// Run program on input by interpreting header -void ZPAQL::run0(U32 input) { - assert(cend>6); - assert(hbegin>=cend+128); - assert(hend>=hbegin); - assert(hend0); - assert(h.size()>0); - assert(header[0]+256*header[1]==cend+hend-hbegin-2); - pc=hbegin; - a=input; - while (execute()) ; -} - -// Execute one instruction, return 0 after HALT else 1 -int ZPAQL::execute() { - switch(header[pc++]) { - case 0: err(); break; // ERROR - case 1: ++a; break; // A++ - case 2: --a; break; // A-- - case 3: a = ~a; break; // A! - case 4: a = 0; break; // A=0 - case 7: a = r[header[pc++]]; break; // A=R N - case 8: swap(b); break; // B<>A - case 9: ++b; break; // B++ - case 10: --b; break; // B-- - case 11: b = ~b; break; // B! - case 12: b = 0; break; // B=0 - case 15: b = r[header[pc++]]; break; // B=R N - case 16: swap(c); break; // C<>A - case 17: ++c; break; // C++ - case 18: --c; break; // C-- - case 19: c = ~c; break; // C! - case 20: c = 0; break; // C=0 - case 23: c = r[header[pc++]]; break; // C=R N - case 24: swap(d); break; // D<>A - case 25: ++d; break; // D++ - case 26: --d; break; // D-- - case 27: d = ~d; break; // D! - case 28: d = 0; break; // D=0 - case 31: d = r[header[pc++]]; break; // D=R N - case 32: swap(m(b)); break; // *B<>A - case 33: ++m(b); break; // *B++ - case 34: --m(b); break; // *B-- - case 35: m(b) = ~m(b); break; // *B! - case 36: m(b) = 0; break; // *B=0 - case 39: if (f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JT N - case 40: swap(m(c)); break; // *C<>A - case 41: ++m(c); break; // *C++ - case 42: --m(c); break; // *C-- - case 43: m(c) = ~m(c); break; // *C! - case 44: m(c) = 0; break; // *C=0 - case 47: if (!f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JF N - case 48: swap(h(d)); break; // *D<>A - case 49: ++h(d); break; // *D++ - case 50: --h(d); break; // *D-- - case 51: h(d) = ~h(d); break; // *D! - case 52: h(d) = 0; break; // *D=0 - case 55: r[header[pc++]] = a; break; // R=A N - case 56: return 0 ; // HALT - case 57: outc(a&255); break; // OUT - case 59: a = (a+m(b)+512)*773; break; // HASH - case 60: h(d) = (h(d)+a+512)*773; break; // HASHD - case 63: pc+=((header[pc]+128)&255)-127; break; // JMP N - case 64: a = a; break; // A=A - case 65: a = b; break; // A=B - case 66: a = c; break; // A=C - case 67: a = d; break; // A=D - case 68: a = m(b); break; // A=*B - case 69: a = m(c); break; // A=*C - case 70: a = h(d); break; // A=*D - case 71: a = header[pc++]; break; // A= N - case 72: b = a; break; // B=A - case 73: b = b; break; // B=B - case 74: b = c; break; // B=C - case 75: b = d; break; // B=D - case 76: b = m(b); break; // B=*B - case 77: b = m(c); break; // B=*C - case 78: b = h(d); break; // B=*D - case 79: b = header[pc++]; break; // B= N - case 80: c = a; break; // C=A - case 81: c = b; break; // C=B - case 82: c = c; break; // C=C - case 83: c = d; break; // C=D - case 84: c = m(b); break; // C=*B - case 85: c = m(c); break; // C=*C - case 86: c = h(d); break; // C=*D - case 87: c = header[pc++]; break; // C= N - case 88: d = a; break; // D=A - case 89: d = b; break; // D=B - case 90: d = c; break; // D=C - case 91: d = d; break; // D=D - case 92: d = m(b); break; // D=*B - case 93: d = m(c); break; // D=*C - case 94: d = h(d); break; // D=*D - case 95: d = header[pc++]; break; // D= N - case 96: m(b) = a; break; // *B=A - case 97: m(b) = b; break; // *B=B - case 98: m(b) = c; break; // *B=C - case 99: m(b) = d; break; // *B=D - case 100: m(b) = m(b); break; // *B=*B - case 101: m(b) = m(c); break; // *B=*C - case 102: m(b) = h(d); break; // *B=*D - case 103: m(b) = header[pc++]; break; // *B= N - case 104: m(c) = a; break; // *C=A - case 105: m(c) = b; break; // *C=B - case 106: m(c) = c; break; // *C=C - case 107: m(c) = d; break; // *C=D - case 108: m(c) = m(b); break; // *C=*B - case 109: m(c) = m(c); break; // *C=*C - case 110: m(c) = h(d); break; // *C=*D - case 111: m(c) = header[pc++]; break; // *C= N - case 112: h(d) = a; break; // *D=A - case 113: h(d) = b; break; // *D=B - case 114: h(d) = c; break; // *D=C - case 115: h(d) = d; break; // *D=D - case 116: h(d) = m(b); break; // *D=*B - case 117: h(d) = m(c); break; // *D=*C - case 118: h(d) = h(d); break; // *D=*D - case 119: h(d) = header[pc++]; break; // *D= N - case 128: a += a; break; // A+=A - case 129: a += b; break; // A+=B - case 130: a += c; break; // A+=C - case 131: a += d; break; // A+=D - case 132: a += m(b); break; // A+=*B - case 133: a += m(c); break; // A+=*C - case 134: a += h(d); break; // A+=*D - case 135: a += header[pc++]; break; // A+= N - case 136: a -= a; break; // A-=A - case 137: a -= b; break; // A-=B - case 138: a -= c; break; // A-=C - case 139: a -= d; break; // A-=D - case 140: a -= m(b); break; // A-=*B - case 141: a -= m(c); break; // A-=*C - case 142: a -= h(d); break; // A-=*D - case 143: a -= header[pc++]; break; // A-= N - case 144: a *= a; break; // A*=A - case 145: a *= b; break; // A*=B - case 146: a *= c; break; // A*=C - case 147: a *= d; break; // A*=D - case 148: a *= m(b); break; // A*=*B - case 149: a *= m(c); break; // A*=*C - case 150: a *= h(d); break; // A*=*D - case 151: a *= header[pc++]; break; // A*= N - case 152: div(a); break; // A/=A - case 153: div(b); break; // A/=B - case 154: div(c); break; // A/=C - case 155: div(d); break; // A/=D - case 156: div(m(b)); break; // A/=*B - case 157: div(m(c)); break; // A/=*C - case 158: div(h(d)); break; // A/=*D - case 159: div(header[pc++]); break; // A/= N - case 160: mod(a); break; // A%=A - case 161: mod(b); break; // A%=B - case 162: mod(c); break; // A%=C - case 163: mod(d); break; // A%=D - case 164: mod(m(b)); break; // A%=*B - case 165: mod(m(c)); break; // A%=*C - case 166: mod(h(d)); break; // A%=*D - case 167: mod(header[pc++]); break; // A%= N - case 168: a &= a; break; // A&=A - case 169: a &= b; break; // A&=B - case 170: a &= c; break; // A&=C - case 171: a &= d; break; // A&=D - case 172: a &= m(b); break; // A&=*B - case 173: a &= m(c); break; // A&=*C - case 174: a &= h(d); break; // A&=*D - case 175: a &= header[pc++]; break; // A&= N - case 176: a &= ~ a; break; // A&~A - case 177: a &= ~ b; break; // A&~B - case 178: a &= ~ c; break; // A&~C - case 179: a &= ~ d; break; // A&~D - case 180: a &= ~ m(b); break; // A&~*B - case 181: a &= ~ m(c); break; // A&~*C - case 182: a &= ~ h(d); break; // A&~*D - case 183: a &= ~ header[pc++]; break; // A&~ N - case 184: a |= a; break; // A|=A - case 185: a |= b; break; // A|=B - case 186: a |= c; break; // A|=C - case 187: a |= d; break; // A|=D - case 188: a |= m(b); break; // A|=*B - case 189: a |= m(c); break; // A|=*C - case 190: a |= h(d); break; // A|=*D - case 191: a |= header[pc++]; break; // A|= N - case 192: a ^= a; break; // A^=A - case 193: a ^= b; break; // A^=B - case 194: a ^= c; break; // A^=C - case 195: a ^= d; break; // A^=D - case 196: a ^= m(b); break; // A^=*B - case 197: a ^= m(c); break; // A^=*C - case 198: a ^= h(d); break; // A^=*D - case 199: a ^= header[pc++]; break; // A^= N - case 200: a <<= (a&31); break; // A<<=A - case 201: a <<= (b&31); break; // A<<=B - case 202: a <<= (c&31); break; // A<<=C - case 203: a <<= (d&31); break; // A<<=D - case 204: a <<= (m(b)&31); break; // A<<=*B - case 205: a <<= (m(c)&31); break; // A<<=*C - case 206: a <<= (h(d)&31); break; // A<<=*D - case 207: a <<= (header[pc++]&31); break; // A<<= N - case 208: a >>= (a&31); break; // A>>=A - case 209: a >>= (b&31); break; // A>>=B - case 210: a >>= (c&31); break; // A>>=C - case 211: a >>= (d&31); break; // A>>=D - case 212: a >>= (m(b)&31); break; // A>>=*B - case 213: a >>= (m(c)&31); break; // A>>=*C - case 214: a >>= (h(d)&31); break; // A>>=*D - case 215: a >>= (header[pc++]&31); break; // A>>= N - case 216: f = (a == a); break; // A==A - case 217: f = (a == b); break; // A==B - case 218: f = (a == c); break; // A==C - case 219: f = (a == d); break; // A==D - case 220: f = (a == U32(m(b))); break; // A==*B - case 221: f = (a == U32(m(c))); break; // A==*C - case 222: f = (a == h(d)); break; // A==*D - case 223: f = (a == U32(header[pc++])); break; // A== N - case 224: f = (a < a); break; // A a); break; // A>A - case 233: f = (a > b); break; // A>B - case 234: f = (a > c); break; // A>C - case 235: f = (a > d); break; // A>D - case 236: f = (a > U32(m(b))); break; // A>*B - case 237: f = (a > U32(m(c))); break; // A>*C - case 238: f = (a > h(d)); break; // A>*D - case 239: f = (a > U32(header[pc++])); break; // A> N - case 255: if((pc=hbegin+header[pc]+256*header[pc+1])>=hend)err();break;//LJ - default: err(); - } - return 1; -} - -// Print illegal instruction error message and exit -void ZPAQL::err() { - error("ZPAQL execution error"); -} - -///////////////////////// Predictor ///////////////////////// - -// Initailize model-independent tables -Predictor::Predictor(ZPAQL& zr): - c8(1), hmap4(1), z(zr) { - assert(sizeof(U8)==1); - assert(sizeof(U16)==2); - assert(sizeof(U32)==4); - assert(sizeof(U64)==8); - assert(sizeof(short)==2); - assert(sizeof(int)==4); - - // Initialize tables - dt2k[0]=0; - for (int i=1; i<256; ++i) - dt2k[i]=2048/i; - for (int i=0; i<1024; ++i) - dt[i]=(1<<17)/(i*2+3)*2; - for (int i=0; i<32768; ++i) - stretcht[i]=int(log((i+0.5)/(32767.5-i))*64+0.5+100000)-100000; - for (int i=0; i<4096; ++i) - squasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64)))); - - // Verify floating point math for squash() and stretch() - U32 sqsum=0, stsum=0; - for (int i=32767; i>=0; --i) - stsum=stsum*3+stretch(i); - for (int i=4095; i>=0; --i) - sqsum=sqsum*3+squash(i-2048); - assert(stsum==3887533746u); - assert(sqsum==2278286169u); - - pcode=0; - pcode_size=0; -} - -Predictor::~Predictor() { - allocx(pcode, pcode_size, 0); // free executable memory -} - -// Initialize the predictor with a new model in z -void Predictor::init() { - - // Clear old JIT code if any - allocx(pcode, pcode_size, 0); - - // Initialize context hash function - z.inith(); - - // Initialize predictions - for (int i=0; i<256; ++i) h[i]=p[i]=0; - - // Initialize components - for (int i=0; i<256; ++i) // clear old model - comp[i].init(); - int n=z.header[6]; // hsize[0..1] hh hm ph pm n (comp)[n] END 0[128] (hcomp) END - const U8* cp=&z.header[7]; // start of component list - for (int i=0; i&z.header[0] && cp<&z.header[z.header.isize()-8]); - Component& cr=comp[i]; - switch(cp[0]) { - case CONS: // c - p[i]=(cp[1]-128)*4; - break; - case CM: // sizebits limit - if (cp[1]>32) error("max size for CM is 32"); - cr.cm.resize(1, cp[1]); // packed CM (22 bits) + CMCOUNT (10 bits) - cr.limit=cp[2]*4; - for (size_t j=0; j26) error("max size for ICM is 26"); - cr.limit=1023; - cr.cm.resize(256); - cr.ht.resize(64, cp[1]); - for (size_t j=0; j32 || cp[2]>32) error("max size for MATCH is 32 32"); - cr.cm.resize(1, cp[1]); // index - cr.ht.resize(1, cp[2]); // buf - cr.ht(0)=1; - break; - case AVG: // j k wt - if (cp[1]>=i) error("AVG j >= i"); - if (cp[2]>=i) error("AVG k >= i"); - break; - case MIX2: // sizebits j k rate mask - if (cp[1]>32) error("max size for MIX2 is 32"); - if (cp[3]>=i) error("MIX2 k >= i"); - if (cp[2]>=i) error("MIX2 j >= i"); - cr.c=(size_t(1)<32) error("max size for MIX is 32"); - if (cp[2]>=i) error("MIX j >= i"); - if (cp[3]<1 || cp[3]>i-cp[2]) error("MIX m not in 1..i-j"); - int m=cp[3]; // number of inputs - assert(m>=1); - cr.c=(size_t(1)<32) error("max size for ISSE is 32"); - if (cp[2]>=i) error("ISSE j >= i"); - cr.ht.resize(64, cp[1]); - cr.cm.resize(512); - for (int j=0; j<256; ++j) { - cr.cm[j*2]=1<<15; - cr.cm[j*2+1]=clamp512k(stretch(st.cminit(j)>>8)<<10); - } - break; - case SSE: // sizebits j start limit - if (cp[1]>32) error("max size for SSE is 32"); - if (cp[2]>=i) error("SSE j >= i"); - if (cp[3]>cp[4]*4) error("SSE start > limit*4"); - cr.cm.resize(32, cp[1]); - cr.limit=cp[4]*4; - for (size_t j=0; j0); - cp+=compsize[*cp]; - assert(cp>=&z.header[7] && cp<&z.header[z.cend]); - } -} - -// Return next bit prediction using interpreted COMP code -int Predictor::predict0() { - assert(c8>=1 && c8<=255); - - // Predict next bit - int n=z.header[6]; - assert(n>0 && n<=255); - const U8* cp=&z.header[7]; - assert(cp[-1]==n); - for (int i=0; i&z.header[0] && cp<&z.header[z.header.isize()-8]); - Component& cr=comp[i]; - switch(cp[0]) { - case CONS: // c - break; - case CM: // sizebits limit - cr.cxt=h[i]^hmap4; - p[i]=stretch(cr.cm(cr.cxt)>>17); - break; - case ICM: // sizebits - assert((hmap4&15)>0); - if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); - cr.cxt=cr.ht[cr.c+(hmap4&15)]; - p[i]=stretch(cr.cm(cr.cxt)>>8); - break; - case MATCH: // sizebits bufbits: a=len, b=offset, c=bit, cxt=bitpos, - // ht=buf, limit=pos - assert(cr.cm.size()==(size_t(1)<>(7-cr.cxt))&1; // predicted bit - p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767); - } - break; - case AVG: // j k wt - p[i]=(p[cp[1]]*cp[3]+p[cp[2]]*(256-cp[3]))>>8; - break; - case MIX2: { // sizebits j k rate mask - // c=size cm=wt[size] cxt=input - cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1)); - assert(cr.cxt=0 && w<65536); - p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16; - assert(p[i]>=-2048 && p[i]<2048); - } - break; - case MIX: { // sizebits j m rate mask - // c=size cm=wt[size][m] cxt=index of wt in cm - int m=cp[3]; - assert(m>=1 && m<=i); - cr.cxt=h[i]+(c8&cp[5]); - cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights - assert(cr.cxt<=cr.cm.size()-m); - int* wt=(int*)&cr.cm[cr.cxt]; - p[i]=0; - for (int j=0; j>8)*p[cp[2]+j]; - p[i]=clamp2k(p[i]>>8); - } - break; - case ISSE: { // sizebits j -- c=hi, cxt=bh - assert((hmap4&15)>0); - if (c8==1 || (c8&0xf0)==16) - cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); - cr.cxt=cr.ht[cr.c+(hmap4&15)]; // bit history - int *wt=(int*)&cr.cm[cr.cxt*2]; - p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16); - } - break; - case SSE: { // sizebits j start limit - cr.cxt=(h[i]+c8)*32; - int pq=p[cp[2]]+992; - if (pq<0) pq=0; - if (pq>1983) pq=1983; - int wt=pq&63; - pq>>=6; - assert(pq>=0 && pq<=30); - cr.cxt+=pq; - p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt)+(cr.cm(cr.cxt+1)>>10)*wt)>>13); - cr.cxt+=wt>>5; - } - break; - default: - error("component predict not implemented"); - } - cp+=compsize[cp[0]]; - assert(cp<&z.header[z.cend]); - assert(p[i]>=-2048 && p[i]<2048); - } - assert(cp[0]==NONE); - return squash(p[n-1]); -} - -// Update model with decoded bit y (0...1) -void Predictor::update0(int y) { - assert(y==0 || y==1); - assert(c8>=1 && c8<=255); - assert(hmap4>=1 && hmap4<=511); - - // Update components - const U8* cp=&z.header[7]; - int n=z.header[6]; - assert(n>=1 && n<=255); - assert(cp[-1]==n); - for (int i=0; i>8))>>2; - } - break; - case MATCH: // sizebits bufbits: - // a=len, b=offset, c=bit, cm=index, cxt=bitpos - // ht=buf, limit=pos - { - assert(cr.a<=255); - assert(cr.c==0 || cr.c==1); - assert(cr.cxt<8); - assert(cr.cm.size()==(size_t(1)<>5; - int w=cr.a16[cr.cxt]; - w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13; - if (w<0) w=0; - if (w>65535) w=65535; - cr.a16[cr.cxt]=w; - } - break; - case MIX: { // sizebits j m rate mask - // cm=wt[size][m], cxt=input - int m=cp[3]; - assert(m>0 && m<=i); - assert(cr.cm.size()==m*cr.c); - assert(cr.cxt+m<=cr.cm.size()); - int err=(y*32767-squash(p[i]))*cp[4]>>4; - int* wt=(int*)&cr.cm[cr.cxt]; - for (int j=0; j>13)); - } - break; - case ISSE: { // sizebits j -- c=hi, cxt=bh - assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]); - int err=y*32767-squash(p[i]); - int *wt=(int*)&cr.cm[cr.cxt*2]; - wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13)); - wt[1]=clamp512k(wt[1]+((err+16)>>5)); - cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y); - } - break; - case SSE: // sizebits j start limit - train(cr, y); - break; - default: - assert(0); - } - cp+=compsize[cp[0]]; - assert(cp>=&z.header[7] && cp<&z.header[z.cend] - && cp<&z.header[z.header.isize()-8]); - } - assert(cp[0]==NONE); - - // Save bit y in c8, hmap4 - c8+=c8+y; - if (c8>=256) { - z.run(c8-256); - hmap4=1; - c8=1; - for (int i=0; i=16 && c8<32) - hmap4=(hmap4&0xf)<<5|y<<4|1; - else - hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf); -} - -// Find cxt row in hash table ht. ht has rows of 16 indexed by the -// low sizebits of cxt with element 0 having the next higher 8 bits for -// collision detection. If not found after 3 adjacent tries, replace the -// row with lowest element 1 as priority. Return index of row. -size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { - assert(ht.size()==size_t(16)<>sizebits&255; - size_t h0=(cxt*16)&(ht.size()-16); - if (ht[h0]==chk) return h0; - size_t h1=h0^16; - if (ht[h1]==chk) return h1; - size_t h2=h0^32; - if (ht[h2]==chk) return h2; - if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1]) - return memset(&ht[h0], 0, 16), ht[h0]=chk, h0; - else if (ht[h1+1]get(); - if (c<0) error("unexpected end of input"); - curr=curr<<8|c; - } - } - U32 n=buf.size(); - if (n>curr) n=curr; - high=in->read(&buf[0], n); - curr-=high; - low=0; -} - -// Return next bit of decoded input, which has 16 bit probability p of being 1 -int Decoder::decode(int p) { - assert(p>=0 && p<65536); - assert(high>low && low>0); - if (currhigh) error("archive corrupted"); - assert(curr>=low && curr<=high); - U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range - assert(high>mid && mid>=low); - int y=curr<=mid; - if (y) high=mid; else low=mid+1; // pick half - while ((high^low)<0x1000000) { // shift out identical leading bytes - high=high<<8|255; - low=low<<8; - low+=(low==0); - int c=in->get(); - if (c<0) error("unexpected end of file"); - curr=curr<<8|c; - } - return y; -} - -// Decompress 1 byte or -1 at end of input -int Decoder::decompress() { - if (pr.isModeled()) { // n>0 components? - if (curr==0) { // segment initialization - for (int i=0; i<4; ++i) - curr=curr<<8|in->get(); - } - if (decode(0)) { - if (curr!=0) error("decoding end of stream"); - return -1; - } - else { - int c=1; - while (c<256) { // get 8 bits - int p=pr.predict()*2+1; - c+=c+decode(p); - pr.update(c&1); - } - return c-256; - } - } - else { - if (low==high) loadbuf(); - if (low==high) return -1; - return buf[low++]&255; - } -} - -// Find end of compressed data and return next byte -int Decoder::skip() { - int c=-1; - if (pr.isModeled()) { - while (curr==0) // at start? - curr=in->get(); - while (curr && (c=in->get())>=0) // find 4 zeros - curr=curr<<8|c; - while ((c=in->get())==0) ; // might be more than 4 - return c; - } - else { - if (curr==0) // at start? - for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; - while (curr>0) { - U32 n=BUFSIZE; - if (n>curr) n=curr; - U32 n1=in->read(&buf[0], n); - curr-=n1; - if (n1!=n) return -1; - if (curr==0) - for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; - } - if (c>=0) c=in->get(); - return c; - } -} - -////////////////////// PostProcessor ////////////////////// - -// Copy ph, pm from block header -void PostProcessor::init(int h, int m) { - state=hsize=0; - ph=h; - pm=m; - z.clear(); -} - -// (PASS=0 | PROG=1 psize[0..1] pcomp[0..psize-1]) data... EOB=-1 -// Return state: 1=PASS, 2..4=loading PROG, 5=PROG loaded -int PostProcessor::write(int c) { - assert(c>=-1 && c<=255); - switch (state) { - case 0: // initial state - if (c<0) error("Unexpected EOS"); - state=c+1; // 1=PASS, 2=PROG - if (state>2) error("unknown post processing type"); - if (state==1) z.clear(); - break; - case 1: // PASS - z.outc(c); - break; - case 2: // PROG - if (c<0) error("Unexpected EOS"); - hsize=c; // low byte of size - state=3; - break; - case 3: // PROG psize[0] - if (c<0) error("Unexpected EOS"); - hsize+=c*256; // high byte of psize - z.header.resize(hsize+300); - z.cend=8; - z.hbegin=z.hend=z.cend+128; - z.header[4]=ph; - z.header[5]=pm; - state=4; - break; - case 4: // PROG psize[0..1] pcomp[0...] - if (c<0) error("Unexpected EOS"); - assert(z.hend>8; - z.initp(); - state=5; - } - break; - case 5: // PROG ... data - z.run(c); - if (c<0) z.flush(); - break; - } - return state; -} - -/////////////////////// Decompresser ///////////////////// - -// Find the start of a block and return true if found. Set memptr -// to memory used. -bool Decompresser::findBlock(double* memptr) { - assert(state==BLOCK); - - // Find start of block - U32 h1=0x3D49B113, h2=0x29EB7F93, h3=0x2614BE13, h4=0x3828EB13; - // Rolling hashes initialized to hash of first 13 bytes - int c; - while ((c=dec.in->get())!=-1) { - h1=h1*12+c; - h2=h2*20+c; - h3=h3*28+c; - h4=h4*44+c; - if (h1==0xB16B88F1 && h2==0xFF5376F1 && h3==0x72AC5BF1 && h4==0x2F909AF1) - break; // hash of 16 byte string - } - if (c==-1) return false; - - // Read header - if ((c=dec.in->get())!=1 && c!=2) error("unsupported ZPAQ level"); - if (dec.in->get()!=1) error("unsupported ZPAQL type"); - z.read(dec.in); - if (c==1 && z.header.isize()>6 && z.header[6]==0) - error("ZPAQ level 1 requires at least 1 component"); - if (memptr) *memptr=z.memory(); - state=FILENAME; - decode_state=FIRSTSEG; - return true; -} - -// Read the start of a segment (1) or end of block code (255). -// If a segment is found, write the filename and return true, else false. -bool Decompresser::findFilename(Writer* filename) { - assert(state==FILENAME); - int c=dec.in->get(); - if (c==1) { // segment found - while (true) { - c=dec.in->get(); - if (c==-1) error("unexpected EOF"); - if (c==0) { - state=COMMENT; - return true; - } - if (filename) filename->put(c); - } - } - else if (c==255) { // end of block found - state=BLOCK; - return false; - } - else - error("missing segment or end of block"); - return false; -} - -// Read the comment from the segment header -void Decompresser::readComment(Writer* comment) { - assert(state==COMMENT); - state=DATA; - while (true) { - int c=dec.in->get(); - if (c==-1) error("unexpected EOF"); - if (c==0) break; - if (comment) comment->put(c); - } - if (dec.in->get()!=0) error("missing reserved byte"); -} - -// Decompress n bytes, or all if n < 0. Return false if done -bool Decompresser::decompress(int n) { - assert(state==DATA); - assert(decode_state!=SKIP); - - // Initialize models to start decompressing block - if (decode_state==FIRSTSEG) { - dec.init(); - assert(z.header.size()>5); - pp.init(z.header[4], z.header[5]); - decode_state=SEG; - } - - // Decompress and load PCOMP into postprocessor - while ((pp.getState()&3)!=1) - pp.write(dec.decompress()); - - // Decompress n bytes, or all if n < 0 - while (n) { - int c=dec.decompress(); - pp.write(c); - if (c==-1) { - state=SEGEND; - return false; - } - if (n>0) --n; - } - return true; -} - -// Read end of block. If a SHA1 checksum is present, write 1 and the -// 20 byte checksum into sha1string, else write 0 in first byte. -// If sha1string is 0 then discard it. -void Decompresser::readSegmentEnd(char* sha1string) { - assert(state==DATA || state==SEGEND); - - // Skip remaining data if any and get next byte - int c=0; - if (state==DATA) { - c=dec.skip(); - decode_state=SKIP; - } - else if (state==SEGEND) - c=dec.in->get(); - state=FILENAME; - - // Read checksum - if (c==254) { - if (sha1string) sha1string[0]=0; // no checksum - } - else if (c==253) { - if (sha1string) sha1string[0]=1; - for (int i=1; i<=20; ++i) { - c=dec.in->get(); - if (sha1string) sha1string[i]=c; - } - } - else - error("missing end of segment marker"); -} - -/////////////////////////// decompress() ///////////////////// - -void decompress(Reader* in, Writer* out) { - Decompresser d; - d.setInput(in); - d.setOutput(out); - while (d.findBlock()) { // don't calculate memory - while (d.findFilename()) { // discard filename - d.readComment(); // discard comment - d.decompress(); // to end of segment - d.readSegmentEnd(); // discard sha1string - } - } -} - -////////////////////// Encoder //////////////////// - -// Initialize for start of block -void Encoder::init() { - low=1; - high=0xFFFFFFFF; - pr.init(); - if (!pr.isModeled()) low=0, buf.resize(1<<16); -} - -// compress bit y having probability p/64K -void Encoder::encode(int y, int p) { - assert(out); - assert(p>=0 && p<65536); - assert(y==0 || y==1); - assert(high>low && low>0); - U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range - assert(high>mid && mid>=low); - if (y) high=mid; else low=mid+1; // pick half - while ((high^low)<0x1000000) { // write identical leading bytes - out->put(high>>24); // same as low>>24 - high=high<<8|255; - low=low<<8; - low+=(low==0); // so we don't code 4 0 bytes in a row - } -} - -// compress byte c (0..255 or -1=EOS) -void Encoder::compress(int c) { - assert(out); - if (pr.isModeled()) { - if (c==-1) - encode(1, 0); - else { - assert(c>=0 && c<=255); - encode(0, 0); - for (int i=7; i>=0; --i) { - int p=pr.predict()*2+1; - assert(p>0 && p<65536); - int y=c>>i&1; - encode(y, p); - pr.update(y); - } - } - } - else { - if (c<0 || low==buf.size()) { - out->put((low>>24)&255); - out->put((low>>16)&255); - out->put((low>>8)&255); - out->put(low&255); - out->write(&buf[0], low); - low=0; - } - if (c>=0) buf[low++]=c; - } -} - -///////////////////// Compressor ////////////////////// - -// Write 13 byte start tag -// "\x37\x6B\x53\x74\xA0\x31\x83\xD3\x8C\xB2\x28\xB0\xD3" -void Compressor::writeTag() { - assert(state==INIT); - enc.out->put(0x37); - enc.out->put(0x6b); - enc.out->put(0x53); - enc.out->put(0x74); - enc.out->put(0xa0); - enc.out->put(0x31); - enc.out->put(0x83); - enc.out->put(0xd3); - enc.out->put(0x8c); - enc.out->put(0xb2); - enc.out->put(0x28); - enc.out->put(0xb0); - enc.out->put(0xd3); -} - -void Compressor::startBlock(int level) { - - // Model 1 - min.cfg - static const char models[]={ - 26,0,1,2,0,0,2,3,16,8,19,0,0,96,4,28, - 59,10,59,112,25,10,59,10,59,112,56,0, - - // Model 2 - mid.cfg - 69,0,3,3,0,0,8,3,5,8,13,0,8,17,1,8, - 18,2,8,18,3,8,19,4,4,22,24,7,16,0,7,24, - -1,0,17,104,74,4,95,1,59,112,10,25,59,112,10,25, - 59,112,10,25,59,112,10,25,59,112,10,25,59,10,59,112, - 25,69,-49,8,112,56,0, - - // Model 3 - max.cfg - -60,0,5,9,0,0,22,1,-96,3,5,8,13,1,8,16, - 2,8,18,3,8,19,4,8,19,5,8,20,6,4,22,24, - 3,17,8,19,9,3,13,3,13,3,13,3,14,7,16,0, - 15,24,-1,7,8,0,16,10,-1,6,0,15,16,24,0,9, - 8,17,32,-1,6,8,17,18,16,-1,9,16,19,32,-1,6, - 0,19,20,16,0,0,17,104,74,4,95,2,59,112,10,25, - 59,112,10,25,59,112,10,25,59,112,10,25,59,112,10,25, - 59,10,59,112,10,25,59,112,10,25,69,-73,32,-17,64,47, - 14,-25,91,47,10,25,60,26,48,-122,-105,20,112,63,9,70, - -33,0,39,3,25,112,26,52,25,25,74,10,4,59,112,25, - 10,4,59,112,25,10,4,59,112,25,65,-113,-44,72,4,59, - 112,8,-113,-40,8,68,-81,60,60,25,69,-49,9,112,25,25, - 25,25,25,112,56,0, - - 0,0}; // 0,0 = end of list - - if (level<1) error("compression level must be at least 1"); - const char* p=models; - int i; - for (i=1; iput('z'); - enc.out->put('P'); - enc.out->put('Q'); - enc.out->put(1+(len>6 && hcomp[6]==0)); // level 1 or 2 - enc.out->put(1); - for (int i=0; iput(hcomp[i]); - MemoryReader m(hcomp); - z.read(&m); - state=BLOCK1; -} - -// Write a segment header -void Compressor::startSegment(const char* filename, const char* comment) { - assert(state==BLOCK1 || state==BLOCK2); - enc.out->put(1); - while (filename && *filename) - enc.out->put(*filename++); - enc.out->put(0); - while (comment && *comment) - enc.out->put(*comment++); - enc.out->put(0); - enc.out->put(0); - if (state==BLOCK1) state=SEG1; - if (state==BLOCK2) state=SEG2; -} - -// Initialize encoding and write pcomp to first segment -// If len is 0 then length is encoded in pcomp[0..1] -void Compressor::postProcess(const char* pcomp, int len) { - assert(state==SEG1); - enc.init(); - if (pcomp) { - enc.compress(1); - if (len<=0) { - len=toU16(pcomp); - pcomp+=2; - } - enc.compress(len&255); - enc.compress((len>>8)&255); - for (int i=0; iget())>=0) { - enc.compress(ch); - if (n>0) --n; - } - return ch>=0; -} - -// End segment, write sha1string if present -void Compressor::endSegment(const char* sha1string) { - assert(state==SEG2); - enc.compress(-1); - enc.out->put(0); - enc.out->put(0); - enc.out->put(0); - enc.out->put(0); - if (sha1string) { - enc.out->put(253); - for (int i=0; i<20; ++i) - enc.out->put(sha1string[i]); - } - else - enc.out->put(254); - state=BLOCK2; -} - -// End block -void Compressor::endBlock() { - assert(state==BLOCK2); - enc.out->put(255); - state=INIT; -} - -/////////////////////////// compress() /////////////////////// - -void compress(Reader* in, Writer* out, int level) { - assert(level>=1); - Compressor c; - c.setInput(in); - c.setOutput(out); - c.startBlock(level); - c.startSegment(); - c.postProcess(); - c.compress(); - c.endSegment(); - c.endBlock(); -} - -//////////////////////// ZPAQL::assemble() //////////////////// - -#ifndef NOJIT -/* -assemble(); - -Assembles the ZPAQL code in hcomp[0..hlen-1] and stores x86-32 or x86-64 -code in rcode[0..rcode_size-1]. Execution begins at rcode[0]. It will not -write beyond the end of rcode, but in any case it returns the number of -bytes that would have been written. It returns 0 in case of error. - -The assembled code implements run() and returns 1 if successful or -0 if the ZPAQL code executes an invalid instruction or jumps out of -bounds. - -A ZPAQL virtual machine has the following state. All values are -unsigned and initially 0: - - a, b, c, d: 32 bit registers (pointed to by their respective parameters) - f: 1 bit flag register (pointed to) - r[0..255]: 32 bit registers - m[0..msize-1]: 8 bit registers, where msize is a power of 2 - h[0..hsize-1]: 32 bit registers, where hsize is a power of 2 - out: pointer to a Writer - sha1: pointer to a SHA1 - -Generally a ZPAQL machine is used to compute contexts which are -placed in h. A second machine might post-process, and write its -output to out and sha1. In either case, a machine is called with -its input in a, representing a single byte (0..255) or -(for a postprocessor) EOF (0xffffffff). Execution returs after a -ZPAQL halt instruction. - -ZPAQL instructions are 1 byte unless the last 3 bits are 1. -In this case, a second operand byte follows. Opcode 255 is -the only 3 byte instruction. They are organized: - - 00dddxxx = unary opcode xxx on destination ddd (ddd < 111) - 00111xxx = special instruction xxx - 01dddsss = assignment: ddd = sss (ddd < 111) - 1xxxxsss = operation sxxx from sss to a - -The meaning of sss and ddd are as follows: - - 000 = a (accumulator) - 001 = b - 010 = c - 011 = d - 100 = *b (means m[b mod msize]) - 101 = *c (means m[c mod msize]) - 110 = *d (means h[d mod hsize]) - 111 = n (constant 0..255 in second byte of instruction) - -For example, 01001110 assigns *d to b. The other instructions xxx -are as follows: - -Group 00dddxxx where ddd < 111 and xxx is: - 000 = ddd<>a, swap with a (except 00000000 is an error, and swap - with *b or *c leaves the high bits of a unchanged) - 001 = ddd++, increment - 010 = ddd--, decrement - 011 = ddd!, not (invert all bits) - 100 = ddd=0, clear (set all bits of ddd to 0) - 101 = not used (error) - 110 = not used - 111 = ddd=r n, assign from r[n] to ddd, n=0..255 in next opcode byte -Except: - 00100111 = jt n, jump if f is true (n = -128..127, relative to next opcode) - 00101111 = jf n, jump if f is false (n = -128..127) - 00110111 = r=a n, assign r[n] = a (n = 0..255) - -Group 00111xxx where xxx is: - 000 = halt (return) - 001 = output a - 010 = not used - 011 = hash: a = (a + *b + 512) * 773 - 100 = hashd: *d = (*d + a + 512) * 773 - 101 = not used - 110 = not used - 111 = unconditional jump (n = -128 to 127, relative to next opcode) - -Group 1xxxxsss where xxxx is: - 0000 = a += sss (add, subtract, multiply, divide sss to a) - 0001 = a -= sss - 0010 = a *= sss - 0011 = a /= sss (unsigned, except set a = 0 if sss is 0) - 0100 = a %= sss (remainder, except set a = 0 if sss is 0) - 0101 = a &= sss (bitwise AND) - 0110 = a &= ~sss (bitwise AND with complement of sss) - 0111 = a |= sss (bitwise OR) - 1000 = a ^= sss (bitwise XOR) - 1001 = a <<= (sss % 32) (left shift by low 5 bits of sss) - 1010 = a >>= (sss % 32) (unsigned, zero bits shifted in) - 1011 = a == sss (compare, set f = true if equal or false otherwise) - 1100 = a < sss (unsigned compare, result in f) - 1101 = a > sss (unsigned compare) - 1110 = not used - 1111 = not used except 11111111 is a 3 byte jump to the absolute address - in the next 2 bytes in little-endian (LSB first) order. - -assemble() translates ZPAQL to 32 bit x86 code to be executed by run(). -Registers are mapped as follows: - - eax = source sss from *b, *c, *d or sometimes n - ecx = pointer to destination *b, *c, *d, or spare - edx = a - ebx = f (1 for true, 0 for false) - esp = stack pointer - ebp = d - esi = b - edi = c - -run() saves non-volatile registers (ebp, esi, edi, ebx) on the stack, -loads a, b, c, d, f, and executes the translated instructions. -A halt instruction saves a, b, c, d, f, pops the saved registers -and returns. Invalid instructions or jumps outside of the range -of the ZPAQL code call libzpaq::error(). - -In 64 bit mode, the following additional registers are used: - - r12 = h - r14 = r - r15 = m - -*/ - -// Called by out -static void flush1(ZPAQL* z) { - z->flush(); -} - -// return true if op is an undefined ZPAQL instruction -static bool iserr(int op) { - return op==0 || (op>=120 && op<=127) || (op>=240 && op<=254) - || op==58 || (op<64 && (op%8==5 || op%8==6)); -} - -// Write k bytes of x to rcode[o++] MSB first -static void put(U8* rcode, int n, int& o, U32 x, int k) { - while (k-->0) { - if (o>(k*8))&255; - ++o; - } -} - -// Write 4 bytes of x to rcode[o++] LSB first -static void put4lsb(U8* rcode, int n, int& o, U32 x) { - for (int k=0; k<4; ++k) { - if (o>(k*8))&255; - ++o; - } -} - -// Write a 1-4 byte x86 opcode without or with an 4 byte operand -// to rcode[o...] -#define put1(x) put(rcode, rcode_size, o, (x), 1) -#define put2(x) put(rcode, rcode_size, o, (x), 2) -#define put3(x) put(rcode, rcode_size, o, (x), 3) -#define put4(x) put(rcode, rcode_size, o, (x), 4) -#define put5(x,y) put4(x), put1(y) -#define put6(x,y) put4(x), put2(y) -#define put4r(x) put4lsb(rcode, rcode_size, o, x) -#define puta(x) t=U32(size_t(x)), put4r(t) -#define put1a(x,y) put1(x), puta(y) -#define put2a(x,y) put2(x), puta(y) -#define put3a(x,y) put3(x), puta(y) -#define put4a(x,y) put4(x), puta(y) -#define put5a(x,y,z) put4(x), put1(y), puta(z) -#define put2l(x,y) put2(x), t=U32(size_t(y)), put4r(t), \ - t=U32(size_t(y)>>(S*4)), put4r(t) - -// Assemble ZPAQL in in the HCOMP section of header to rcode, -// but do not write beyond rcode_size. Return the number of -// bytes output or that would have been output. -// Execution starts at rcode[0] and returns 1 if successful or 0 -// in case of a ZPAQL execution error. -int ZPAQL::assemble() { - - // x86? (not foolproof) - const int S=sizeof(char*); // 4 = x86, 8 = x86-64 - U32 t=0x12345678; - if (*(char*)&t!=0x78 || (S!=4 && S!=8)) - error("JIT supported only for x86-32 and x86-64"); - - const U8* hcomp=&header[hbegin]; - const int hlen=hend-hbegin+1; - const int msize=m.size(); - const int hsize=h.size(); - const int regcode[8]={2,6,7,5}; // a,b,c,d.. -> edx,esi,edi,ebp,eax.. - Array it(hlen); // hcomp -> rcode locations - int done=0; // number of instructions assembled (0..hlen) - int o=5; // rcode output index, reserve space for jmp - - // Code for the halt instruction (restore registers and return) - const int halt=o; - if (S==8) { - put2l(0x48b9, &a); // mov rcx, a - put2(0x8911); // mov [rcx], edx - put2l(0x48b9, &b); // mov rcx, b - put2(0x8931); // mov [rcx], esi - put2l(0x48b9, &c); // mov rcx, c - put2(0x8939); // mov [rcx], edi - put2l(0x48b9, &d); // mov rcx, d - put2(0x8929); // mov [rcx], ebp - put2l(0x48b9, &f); // mov rcx, f - put2(0x8919); // mov [rcx], ebx - put4(0x4883c438); // add rsp, 56 - put2(0x415f); // pop r15 - put2(0x415e); // pop r14 - put2(0x415d); // pop r13 - put2(0x415c); // pop r12 - } - else { - put2a(0x8915, &a); // mov [a], edx - put2a(0x8935, &b); // mov [b], esi - put2a(0x893d, &c); // mov [c], edi - put2a(0x892d, &d); // mov [d], ebp - put2a(0x891d, &f); // mov [f], ebx - put3(0x83c43c); // add esp, 60 - } - put1(0x5d); // pop ebp - put1(0x5b); // pop ebx - put1(0x5f); // pop edi - put1(0x5e); // pop esi - put1(0xc3); // ret - - // Code for the out instruction. - // Store a=edx at outbuf[bufptr++]. If full, call flush1(). - const int outlabel=o; - if (S==8) { - put2l(0x48b8, &outbuf[0]);// mov rax, outbuf.p - put2l(0x49ba, &bufptr); // mov r10, &bufptr - put3(0x418b0a); // mov ecx, [r10] - put3(0x891408); // mov [rax+rcx], edx - put2(0xffc1); // inc ecx - put3(0x41890a); // mov [r10], ecx - put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() - put2(0x7401); // jz L1 - put1(0xc3); // ret - put4(0x4883ec30); // L1: sub esp, 48 ; call flush1(this) - put4(0x48893c24); // mov [rsp], rdi - put5(0x48897424,8); // mov [rsp+8], rsi - put5(0x48895424,16); // mov [rsp+16], rdx - put5(0x48894c24,24); // mov [rsp+24], rcx -#ifndef _WIN32 - put2l(0x48bf, this); // mov rdi, this -#else // Windows - put2l(0x48b9, this); // mov rcx, this -#endif - put2l(0x49bb, &flush1); // mov r11, &flush1 - put3(0x41ffd3); // call r11 - put5(0x488b4c24,24); // mov rcx, [rsp+24] - put5(0x488b5424,16); // mov rdx, [rsp+16] - put5(0x488b7424,8); // mov rsi, [rsp+8] - put4(0x488b3c24); // mov rdi, [rsp] - put4(0x4883c430); // add esp, 48 - put1(0xc3); // ret - } - else { - put1a(0xb8, &outbuf[0]); // mov eax, outbuf.p - put2a(0x8b0d, &bufptr); // mov ecx, [bufptr] - put3(0x891408); // mov [eax+ecx], edx - put2(0xffc1); // inc ecx - put2a(0x890d, &bufptr); // mov [bufptr], ecx - put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() - put2(0x7401); // jz L1 - put1(0xc3); // ret - put3(0x83ec08); // L1: sub esp, 8 - put4(0x89542404); // mov [esp+4], edx - put3a(0xc70424, this); // mov [esp], this - put1a(0xb8, &flush1); // mov eax, &flush1 - put2(0xffd0); // call eax - put4(0x8b542404); // mov edx, [esp+4] - put3(0x83c408); // add esp, 8 - put1(0xc3); // ret - } - - // Set it[i]=1 for each ZPAQL instruction reachable from the previous - // instruction + 2 if reachable by a jump (or 3 if both). - it[0]=2; - assert(hlen>0 && hcomp[hlen-1]==0); // ends with error - do { - done=0; - const int NONE=0x80000000; - for (int i=0; i>24);// jt,jf,jmp - if (op==63) next1=NONE; // jmp - if ((next2<0 || next2>=hlen) && next2!=NONE) next2=hlen-1; // error - if (next1!=NONE && !(it[next1]&1)) it[next1]|=1, ++done; - if (next2!=NONE && !(it[next2]&2)) it[next2]|=2, ++done; - } - } - } while (done>0); - - // Set it[i] bits 2-3 to 4, 8, or 12 if a comparison - // (<, >, == respectively) does not need to save the result in f, - // or if a conditional jump (jt, jf) does not need to read f. - // This is true if a comparison is followed directly by a jt/jf, - // the jt/jf is not a jump target, the byte before is not a jump - // target (for a 2 byte comparison), and for the comparison instruction - // if both paths after the jt/jf lead to another comparison or error - // before another jt/jf. At most hlen steps are traced because after - // that it must be an infinite loop. - for (int i=0; i=216 && op1<240 && (op2==39 || op2==47) - && it[i2]==1 && (i2==i+1 || it[i+1]==0)) { - int code=(op1-208)/8*4; // 4,8,12 is ==,<,> - it[i2]+=code; // OK to test CF, ZF instead of f - for (int j=0; j<2 && code; ++j) { // trace each path from i2 - int k=i2+2; // branch not taken - if (j==1) k=i2+2+(hcomp[i2+1]<<24>>24); // branch taken - for (int l=0; l=hlen) break; // out of bounds, pass - const int op=hcomp[k]; - if (op==39 || op==47) code=0; // jt,jf, fail - else if (op>=216 && op<240) break; // ==,<,>, pass - else if (iserr(op)) break; // error, pass - else if (op==255) k=hcomp[k+1]+256*hcomp[k+2]; // lj - else if (op==63) k=k+2+(hcomp[k+1]<<24>>24); // jmp - else if (op==56) k=0; // halt - else k=k+1+(op%8==7); // ordinary instruction - } - } - it[i]+=code; // if > 0 then OK to not save flags in f (bl) - } - } - - // Start of run(): Save x86 and load ZPAQL registers - const int start=o; - assert(start>=16); - put1(0x56); // push esi/rsi - put1(0x57); // push edi/rdi - put1(0x53); // push ebx/rbx - put1(0x55); // push ebp/rbp - if (S==8) { - put2(0x4154); // push r12 - put2(0x4155); // push r13 - put2(0x4156); // push r14 - put2(0x4157); // push r15 - put4(0x4883ec38); // sub rsp, 56 - put2l(0x48b8, &a); // mov rax, a - put2(0x8b10); // mov edx, [rax] - put2l(0x48b8, &b); // mov rax, b - put2(0x8b30); // mov esi, [rax] - put2l(0x48b8, &c); // mov rax, c - put2(0x8b38); // mov edi, [rax] - put2l(0x48b8, &d); // mov rax, d - put2(0x8b28); // mov ebp, [rax] - put2l(0x48b8, &f); // mov rax, f - put2(0x8b18); // mov ebx, [rax] - put2l(0x49bc, &h[0]); // mov r12, h - put2l(0x49bd, &outbuf[0]); // mov r13, outbuf.p - put2l(0x49be, &r[0]); // mov r14, r - put2l(0x49bf, &m[0]); // mov r15, m - } - else { - put3(0x83ec3c); // sub esp, 60 - put2a(0x8b15, &a); // mov edx, [a] - put2a(0x8b35, &b); // mov esi, [b] - put2a(0x8b3d, &c); // mov edi, [c] - put2a(0x8b2d, &d); // mov ebp, [d] - put2a(0x8b1d, &f); // mov ebx, [f] - } - - // Assemble in multiple passes until every byte of hcomp has a translation - for (int istart=0; istarti); - assert(i>=0 && i=16) { - if (i>istart) { - int a=code-o; - if (a>-120 && a<120) - put2(0xeb00+((a-2)&255)); // jmp short o - else - put1a(0xe9, a-5); // jmp near o - } - break; - } - - // Else assemble the instruction at hcode[i] to rcode[o] - else { - assert(i>=0 && i0 && it[i]<16); - assert(o>=16); - it[i]=o; - ++done; - const int op=hcomp[i]; - const int arg=hcomp[i+1]+((op==255)?256*hcomp[i+2]:0); - const int ddd=op/8%8; - const int sss=op%8; - - // error instruction: return 0 - if (iserr(op)) { - put2(0x31c0); // xor eax, eax - put1a(0xe9, halt-o-4); // jmp near halt - continue; - } - - // Load source *b, *c, *d, or hash (*b) into eax except: - // {a,b,c,d}=*d, a{+,-,*,&,|,^,=,==,>,>}=*d: load address to eax - // {a,b,c,d}={*b,*c}: load source into ddd - if (op==59 || (op>=64 && op<240 && op%8>=4 && op%8<7)) { - put2(0x89c0+8*regcode[sss-3+(op==59)]); // mov eax, {esi,edi,ebp} - const int sz=(sss==6?hsize:msize)-1; - if (sz>=128) put1a(0x25, sz); // and eax, dword msize-1 - else put3(0x83e000+sz); // and eax, byte msize-1 - const int move=(op>=64 && op<112); // = or else ddd is eax - if (sss<6) { // ddd={a,b,c,d,*b,*c} - if (S==8) put5(0x410fb604+8*move*regcode[ddd],0x07); - // movzx ddd, byte [r15+rax] - else put3a(0x0fb680+8*move*regcode[ddd], &m[0]); - // movzx ddd, byte [m+eax] - } - else if ((0x06587000>>(op/8))&1) {// {*b,*c,*d,a/,a%,a&~,a<<,a>>}=*d - if (S==8) put4(0x418b0484); // mov eax, [r12+rax*4] - else put3a(0x8b0485, &h[0]); // mov eax, [h+eax*4] - } - } - - // Load destination address *b, *c, *d or hashd (*d) into ecx - if ((op>=32 && op<56 && op%8<5) || (op>=96 && op<120) || op==60) { - put2(0x89c1+8*regcode[op/8%8-3-(op==60)]);// mov ecx,{esi,edi,ebp} - const int sz=(ddd==6||op==60?hsize:msize)-1; - if (sz>=128) put2a(0x81e1, sz); // and ecx, dword sz - else put3(0x83e100+sz); // and ecx, byte sz - if (op/8%8==6 || op==60) { // *d - if (S==8) put4(0x498d0c8c); // lea rcx, [r12+rcx*4] - else put3a(0x8d0c8d, &h[0]); // lea ecx, [ecx*4+h] - } - else { // *b, *c - if (S==8) put4(0x498d0c0f); // lea rcx, [r15+rcx] - else put2a(0x8d89, &m[0]); // lea ecx, [ecx+h] - } - } - - // Translate by opcode - switch((op/8)&31) { - case 0: // ddd = a - case 1: // ddd = b - case 2: // ddd = c - case 3: // ddd = d - switch(sss) { - case 0: // ddd<>a (swap) - put2(0x87d0+regcode[ddd]); // xchg edx, ddd - break; - case 1: // ddd++ - put2(0xffc0+regcode[ddd]); // inc ddd - break; - case 2: // ddd-- - put2(0xffc8+regcode[ddd]); // dec ddd - break; - case 3: // ddd! - put2(0xf7d0+regcode[ddd]); // not ddd - break; - case 4: // ddd=0 - put2(0x31c0+9*regcode[ddd]); // xor ddd,ddd - break; - case 7: // ddd=r n - if (S==8) - put3a(0x418b86+8*regcode[ddd], arg*4); // mov ddd, [r14+n*4] - else - put2a(0x8b05+8*regcode[ddd], (&r[arg]));//mov ddd, [r+n] - break; - } - break; - case 4: // ddd = *b - case 5: // ddd = *c - switch(sss) { - case 0: // ddd<>a (swap) - put2(0x8611); // xchg dl, [ecx] - break; - case 1: // ddd++ - put2(0xfe01); // inc byte [ecx] - break; - case 2: // ddd-- - put2(0xfe09); // dec byte [ecx] - break; - case 3: // ddd! - put2(0xf611); // not byte [ecx] - break; - case 4: // ddd=0 - put2(0x31c0); // xor eax, eax - put2(0x8801); // mov [ecx], al - break; - case 7: // jt, jf - { - assert(code>=0 && code<16); - const int jtab[2][4]={{5,4,2,7},{4,5,3,6}}; - // jnz,je,jb,ja, jz,jne,jae,jbe - if (code<4) put2(0x84db); // test bl, bl - if (arg>=128 && arg-257-i>=0 && o-it[arg-257-i]<120) - put2(0x7000+256*jtab[op==47][code/4]); // jx short 0 - else - put2a(0x0f80+jtab[op==47][code/4], 0); // jx near 0 - break; - } - } - break; - case 6: // ddd = *d - switch(sss) { - case 0: // ddd<>a (swap) - put2(0x8711); // xchg edx, [ecx] - break; - case 1: // ddd++ - put2(0xff01); // inc dword [ecx] - break; - case 2: // ddd-- - put2(0xff09); // dec dword [ecx] - break; - case 3: // ddd! - put2(0xf711); // not dword [ecx] - break; - case 4: // ddd=0 - put2(0x31c0); // xor eax, eax - put2(0x8901); // mov [ecx], eax - break; - case 7: // ddd=r n - if (S==8) - put3a(0x418996, arg*4); // mov [r14+n*4], edx - else - put2a(0x8915, &r[arg]); // mov [r+n], edx - break; - } - break; - case 7: // special - switch(op) { - case 56: // halt - put1a(0xb8, 1); // mov eax, 1 - put1a(0xe9, halt-o-4); // jmp near halt - break; - case 57: // out - put1a(0xe8, outlabel-o-4);// call outlabel - break; - case 59: // hash: a = (a + *b + 512) * 773 - put3a(0x8d8410, 512); // lea edx, [eax+edx+512] - put2a(0x69d0, 773); // imul edx, eax, 773 - break; - case 60: // hashd: *d = (*d + a + 512) * 773 - put2(0x8b01); // mov eax, [ecx] - put3a(0x8d8410, 512); // lea eax, [eax+edx+512] - put2a(0x69c0, 773); // imul eax, eax, 773 - put2(0x8901); // mov [ecx], eax - break; - case 63: // jmp - put1a(0xe9, 0); // jmp near 0 (fill in target later) - break; - } - break; - case 8: // a= - case 9: // b= - case 10: // c= - case 11: // d= - if (sss==7) // n - put1a(0xb8+regcode[ddd], arg); // mov ddd, n - else if (sss==6) { // *d - if (S==8) - put4(0x418b0484+(regcode[ddd]<<11)); // mov ddd, [r12+rax*4] - else - put3a(0x8b0485+(regcode[ddd]<<11),&h[0]);// mov ddd, [h+eax*4] - } - else if (sss<4) // a, b, c, d - put2(0x89c0+regcode[ddd]+8*regcode[sss]);// mov ddd,sss - break; - case 12: // *b= - case 13: // *c= - if (sss==7) put3(0xc60100+arg); // mov byte [ecx], n - else if (sss==0) put2(0x8811); // mov byte [ecx], dl - else { - if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss - put2(0x8801); // mov byte [ecx], al - } - break; - case 14: // *d= - if (sss<7) put2(0x8901+8*regcode[sss]); // mov [ecx], sss - else put2a(0xc701, arg); // mov dword [ecx], n - break; - case 15: break; // not used - case 16: // a+= - if (sss==6) { - if (S==8) put4(0x41031484); // add edx, [r12+rax*4] - else put3a(0x031485, &h[0]); // add edx, [h+eax*4] - } - else if (sss<7) put2(0x01c2+8*regcode[sss]);// add edx, sss - else if (arg>128) put2a(0x81c2, arg); // add edx, n - else put3(0x83c200+arg); // add edx, byte n - break; - case 17: // a-= - if (sss==6) { - if (S==8) put4(0x412b1484); // sub edx, [r12+rax*4] - else put3a(0x2b1485, &h[0]); // sub edx, [h+eax*4] - } - else if (sss<7) put2(0x29c2+8*regcode[sss]);// sub edx, sss - else if (arg>=128) put2a(0x81ea, arg); // sub edx, n - else put3(0x83ea00+arg); // sub edx, byte n - break; - case 18: // a*= - if (sss==6) { - if (S==8) put5(0x410faf14,0x84); // imul edx, [r12+rax*4] - else put4a(0x0faf1485, &h[0]); // imul edx, [h+eax*4] - } - else if (sss<7) put3(0x0fafd0+regcode[sss]);// imul edx, sss - else if (arg>=128) put2a(0x69d2, arg); // imul edx, n - else put3(0x6bd200+arg); // imul edx, byte n - break; - case 19: // a/= - case 20: // a%= - if (sss<7) put2(0x89c1+8*regcode[sss]); // mov ecx, sss - else put1a(0xb9, arg); // mov ecx, n - put2(0x85c9); // test ecx, ecx - put3(0x0f44d1); // cmovz edx, ecx - put2(0x7408-2*(op/8==20)); // jz (over rest) - put2(0x89d0); // mov eax, edx - put2(0x31d2); // xor edx, edx - put2(0xf7f1); // div ecx - if (op/8==19) put2(0x89c2); // mov edx, eax - break; - case 21: // a&= - if (sss==6) { - if (S==8) put4(0x41231484); // and edx, [r12+rax*4] - else put3a(0x231485, &h[0]); // and edx, [h+eax*4] - } - else if (sss<7) put2(0x21c2+8*regcode[sss]);// and edx, sss - else if (arg>=128) put2a(0x81e2, arg); // and edx, n - else put3(0x83e200+arg); // and edx, byte n - break; - case 22: // a&~ - if (sss==7) { - if (arg<128) put3(0x83e200+(~arg&255));// and edx, byte ~n - else put2a(0x81e2, ~arg); // and edx, ~n - } - else { - if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss - put2(0xf7d0); // not eax - put2(0x21c2); // and edx, eax - } - break; - case 23: // a|= - if (sss==6) { - if (S==8) put4(0x410b1484); // or edx, [r12+rax*4] - else put3a(0x0b1485, &h[0]); // or edx, [h+eax*4] - } - else if (sss<7) put2(0x09c2+8*regcode[sss]);// or edx, sss - else if (arg>=128) put2a(0x81ca, arg); // or edx, n - else put3(0x83ca00+arg); // or edx, byte n - break; - case 24: // a^= - if (sss==6) { - if (S==8) put4(0x41331484); // xor edx, [r12+rax*4] - else put3a(0x331485, &h[0]); // xor edx, [h+eax*4] - } - else if (sss<7) put2(0x31c2+8*regcode[sss]);// xor edx, sss - else if (arg>=128) put2a(0x81f2, arg); // xor edx, byte n - else put3(0x83f200+arg); // xor edx, n - break; - case 25: // a<<= - case 26: // a>>= - if (sss==7) // sss = n - put3(0xc1e200+8*256*(op/8==26)+arg); // shl/shr n - else { - put2(0x89c1+8*regcode[sss]); // mov ecx, sss - put2(0xd3e2+8*(op/8==26)); // shl/shr edx, cl - } - break; - case 27: // a== - case 28: // a< - case 29: // a> - if (sss==6) { - if (S==8) put4(0x413b1484); // cmp edx, [r12+rax*4] - else put3a(0x3b1485, &h[0]); // cmp edx, [h+eax*4] - } - else if (sss==7) // sss = n - put2a(0x81fa, arg); // cmp edx, dword n - else - put2(0x39c2+8*regcode[sss]); // cmp edx, sss - if (code<4) { - if (op/8==27) put3(0x0f94c3); // setz bl - if (op/8==28) put3(0x0f92c3); // setc bl - if (op/8==29) put3(0x0f97c3); // seta bl - } - break; - case 30: // not used - case 31: // 255 = lj - if (op==255) put1a(0xe9, 0); // jmp near - break; - } - } - } - } - - // Finish first pass - const int rsize=o; - if (o>rcode_size) return rsize; - - // Fill in jump addresses (second pass) - for (int i=0; i=128) target-=256; - target+=i+2; - } - if (target<0 || target>=hlen) target=hlen-1; // runtime ZPAQL error - o=it[i]; - assert(o>=16 && o skip test - assert(o>=16 && o=0x72 && op<0x78) || op==0xeb) { // jx, jmp short - --target; - if (target<-128 || target>127) - error("Cannot code x86 short jump"); - assert(o=0x82 && op<0x88) || op==0xe9) // jx, jmp near - { - target-=4; - puta(target); - } - else assert(false); // not a x86 jump - } - } - - // Jump to start - o=0; - put1a(0xe9, start-5); // jmp near start - return rsize; -} - -//////////////////////// Predictor::assemble_p() ///////////////////// - -// Assemble the ZPAQL code in the HCOMP section of z.header to pcomp and -// return the number of bytes of x86 or x86-64 code written, or that would -// be written if pcomp were large enough. The code for predict() begins -// at pr.pcomp[0] and update() at pr.pcomp[5], both as jmp instructions. - -// The assembled code is equivalent to int predict(Predictor*) -// and void update(Predictor*, int y); The Preditor address is placed in -// edi/rdi. The update bit y is placed in ebp/rbp. - -int Predictor::assemble_p() { - Predictor& pr=*this; - U8* rcode=pr.pcode; // x86 output array - int rcode_size=pcode_size; // output size - int o=0; // output index in pcode - const int S=sizeof(char*); // 4 or 8 - U8* hcomp=&pr.z.header[0]; // The code to translate -#define off(x) ((char*)&(pr.x)-(char*)&pr) -#define offc(x) ((char*)&(pr.comp[i].x)-(char*)&pr) - - // test for little-endian (probably x86) - U32 t=0x12345678; - if (*(char*)&t!=0x78 || (S!=4 && S!=8)) - error("JIT supported only for x86-32 and x86-64"); - - // Initialize for predict(). Put predictor address in edi/rdi - put1a(0xe9, 5); // jmp predict - put1a(0, 0x90909000); // reserve space for jmp update - put1(0x53); // push ebx/rbx - put1(0x55); // push ebp/rbp - put1(0x56); // push esi/rsi - put1(0x57); // push edi/rdi - if (S==4) - put4(0x8b7c2414); // mov edi,[esp+0x14] ; pr - else { -#ifdef _WIN32 - put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) -#endif - } - - // Code predict() for each component - const int n=hcomp[6]; // number of components - U8* cp=hcomp+7; - for (int i=0; i=pr.z.cend) error("comp too big"); - if (cp[0]<1 || cp[0]>9) error("invalid component"); - assert(compsize[cp[0]]>0 && compsize[cp[0]]<8); - switch (cp[0]) { - - case CONS: // c - break; - - case CM: // sizebits limit - // Component& cr=comp[i]; - // cr.cxt=h[i]^hmap4; - // p[i]=stretch(cr.cm(cr.cxt)>>17); - - put2a(0x8b87, off(h[i])); // mov eax, [edi+&h[i]] - put2a(0x3387, off(hmap4)); // xor eax, [edi+&hmap4] - put1a(0x25, (1<rsi) - put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] - put3(0x8b0486); // mov eax, [esi+eax*4] - put3(0xc1e811); // shr eax, 17 - put4a(0x0fbf8447, off(stretcht)); // movsx eax,word[edi+eax*2+..] - put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax - break; - - case ISSE: // sizebits j -- c=hi, cxt=bh - // assert((hmap4&15)>0); - // if (c8==1 || (c8&0xf0)==16) - // cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); - // cr.cxt=cr.ht[cr.c+(hmap4&15)]; // bit history - // int *wt=(int*)&cr.cm[cr.cxt*2]; - // p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16); - - case ICM: // sizebits - // assert((hmap4&15)>0); - // if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); - // cr.cxt=cr.ht[cr.c+(hmap4&15)]; - // p[i]=stretch(cr.cm(cr.cxt)>>8); - // - // Find cxt row in hash table ht. ht has rows of 16 indexed by the low - // sizebits of cxt with element 0 having the next higher 8 bits for - // collision detection. If not found after 3 adjacent tries, replace - // row with lowest element 1 as priority. Return index of row. - // - // size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { - // assert(ht.size()==size_t(16)<>sizebits&255; - // size_t h0=(cxt*16)&(ht.size()-16); - // if (ht[h0]==chk) return h0; - // size_t h1=h0^16; - // if (ht[h1]==chk) return h1; - // size_t h2=h0^32; - // if (ht[h2]==chk) return h2; - // if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1]) - // return memset(&ht[h0], 0, 16), ht[h0]=chk, h0; - // else if (ht[h1+1]>(7-cr.cxt))&1; // predicted bit - // p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767); - // } - - if (S==8) put1(0x48); // rex.w - put2a(0x8bb7, offc(ht)); // mov esi, [edi+&ht] - - // If match length (a) is 0 then p[i]=0 - put2a(0x8b87, offc(a)); // mov eax, [edi+&a] - put2(0x85c0); // test eax, eax - put2(0x7449); // jz L2 ; p[i]=0 - - // Else put predicted bit in c - put1a(0xb9, 7); // mov ecx, 7 - put2a(0x2b8f, offc(cxt)); // sub ecx, [edi+&cxt] - put2a(0x8b87, offc(limit)); // mov eax, [edi+&limit] - put2a(0x2b87, offc(b)); // sub eax, [edi+&b] - put1a(0x25, (1<>8; - - put2a(0x8b87, off(p[cp[1]])); // mov eax, [edi+&p[j]] - put2a(0x2b87, off(p[cp[2]])); // sub eax, [edi+&p[k]] - put2a(0x69c0, cp[3]); // imul eax, wt - put3(0xc1f808); // sar eax, 8 - put2a(0x0387, off(p[cp[2]])); // add eax, [edi+&p[k]] - put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax - break; - - case MIX2: // sizebits j k rate mask - // c=size cm=wt[size] cxt=input - // cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1)); - // assert(cr.cxt=0 && w<65536); - // p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16; - // assert(p[i]>=-2048 && p[i]<2048); - - put2(0x8b07); // mov eax, [edi] ; c8 - put1a(0x25, cp[5]); // and eax, mask - put2a(0x0387, off(h[i])); // add eax, [edi+&h[i]] - put1a(0x25, (1<=1 && m<=i); - // cr.cxt=h[i]+(c8&cp[5]); - // cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights - // assert(cr.cxt<=cr.cm.size()-m); - // int* wt=(int*)&cr.cm[cr.cxt]; - // p[i]=0; - // for (int j=0; j>8)*p[cp[2]+j]; - // p[i]=clamp2k(p[i]>>8); - - put2(0x8b07); // mov eax, [edi] ; c8 - put1a(0x25, cp[5]); // and eax, mask - put2a(0x0387, off(h[i])); // add eax, [edi+&h[i]] - put1a(0x25, (1<3) put4a(0xf30f6f96, k*4+16);//movdqu xmm2, [esi+k*4+16] - put5(0x660f72e1,0x08); // psrad xmm1, 8 - if (tail>3) put5(0x660f72e2,0x08); // psrad xmm2, 8 - put4(0x660f6bca); // packssdw xmm1, xmm2 - put4a(0xf30f6f9f, off(p[cp[2]+k])); // movdqu xmm3, [edi+&p[j+k]] - if (tail>3) - put4a(0xf30f6fa7,off(p[cp[2]+k+4]));//movdqu xmm4, [edi+&p[j+k+4]] - put4(0x660f6bdc); // packssdw, xmm3, xmm4 - if (tail>0 && tail<8) { // last loop, mask extra weights - put4(0x660f76ed); // pcmpeqd xmm5, xmm5 ; -1 - put5(0x660f73dd, 16-tail*2); // psrldq xmm5, 16-tail*2 - put4(0x660fdbcd); // pand xmm1, xmm5 - } - if (k==0) { // first loop, initialize sum in xmm0 - put4(0xf30f6fc1); // movdqu xmm0, xmm1 - put4(0x660ff5c3); // pmaddwd xmm0, xmm3 - } - else { // accumulate sum in xmm0 - put4(0xf30f6fd1); // movdqu xmm2, xmm1 - put4(0x660ff5d3); // pmaddwd xmm2, xmm3 - put4(0x660ffec2); // paddd, xmm0, xmm2 - } - } - - // Add up the 4 elements of xmm0 = p[i] in the first element - put4(0xf30f6fc8); // movdqu xmm1, xmm0 - put5(0x660f73d9,0x08); // psrldq xmm1, 8 - put4(0x660ffec1); // paddd xmm0, xmm1 - put4(0xf30f6fc8); // movdqu xmm1, xmm0 - put5(0x660f73d9,0x04); // psrldq xmm1, 4 - put4(0x660ffec1); // paddd xmm0, xmm1 - put4(0x660f7ec0); // movd eax, xmm0 ; p[i] - put3(0xc1f808); // sar eax, 8 - put1a(0xb9, 2047); // mov ecx, 2047 ; clamp2k - put2(0x39c8); // cmp eax, ecx - put3(0x0f4fc1); // cmovg eax, ecx - put2(0xf7d1); // not ecx ; -2048 - put2(0x39c8); // cmp eax, ecx - put3(0x0f4cc1); // cmovl eax, ecx - put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax - break; - - case SSE: // sizebits j start limit - // cr.cxt=(h[i]+c8)*32; - // int pq=p[cp[2]]+992; - // if (pq<0) pq=0; - // if (pq>1983) pq=1983; - // int wt=pq&63; - // pq>>=6; - // assert(pq>=0 && pq<=30); - // cr.cxt+=pq; - // p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt) // p0 - // +(cr.cm(cr.cxt+1)>>10)*wt)>>13); // p1 - // // p = p0*(64-wt)+p1*wt = (p1-p0)*wt + p0*64 - // cr.cxt+=wt>>5; - - put2a(0x8b8f, off(h[i])); // mov ecx, [edi+&h[i]] - put2(0x030f); // add ecx, [edi] ; c0 - put2a(0x81e1, (1<>5 - put2a(0x898f, offc(cxt)); // mov [edi+cxt], ecx ; cxt saved - put3(0xc1e80a); // shr eax, 10 ; p0 = cm[cxt]>>10 - put3(0xc1eb0a); // shr ebx, 10 ; p1 = cm[cxt+1]>>10 - put2(0x29c3); // sub ebx, eax, ; p1-p0 - put3(0x0fafda); // imul ebx, edx ; (p1-p0)*wt - put3(0xc1e006); // shr eax, 6 - put2(0x01d8); // add eax, ebx ; p in 0..2^28-1 - put3(0xc1e80d); // shr eax, 13 ; p in 0..32767 - put4a(0x0fbf8447, off(stretcht)); // movsx eax, word [edi+eax*2+...] - put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax - break; - - default: - error("invalid ZPAQ component"); - } - } - - // return squash(p[n-1]) - put2a(0x8b87, off(p[n-1])); // mov eax, [edi+...] - put1a(0x05, 0x800); // add eax, 2048 - put4a(0x0fbf8447, off(squasht[0])); // movsx eax, word [edi+eax*2+...] - put1(0x5f); // pop edi - put1(0x5e); // pop esi - put1(0x5d); // pop ebp - put1(0x5b); // pop ebx - put1(0xc3); // ret - - // Initialize for update() Put predictor address in edi/rdi - // and bit y=0..1 in ebp - int save_o=o; - o=5; - put1a(0xe9, save_o-10); // jmp update - o=save_o; - put1(0x53); // push ebx/rbx - put1(0x55); // push ebp/rbp - put1(0x56); // push esi/rsi - put1(0x57); // push edi/rdi - if (S==4) { - put4(0x8b7c2414); // mov edi,[esp+0x14] ; (1st arg = pr) - put4(0x8b6c2418); // mov ebp,[esp+0x18] ; (2nd arg = y) - } - else { -#ifndef _WIN32 - put3(0x4889f5); // mov rbp, rsi (2nd arg in Linux-64) -#else - put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) - put3(0x4889d5); // mov rbp, rdx (2nd arg) -#endif - } - - // Code update() for each component - cp=hcomp+7; - for (int i=0; i=1 && cp[0]<=9); - assert(compsize[cp[0]]>0 && compsize[cp[0]]<8); - switch (cp[0]) { - - case CONS: // c - break; - - case SSE: // sizebits j start limit - case CM: // sizebits limit - // train(cr, y); - // - // reduce prediction error in cr.cm - // void train(Component& cr, int y) { - // assert(y==0 || y==1); - // U32& pn=cr.cm(cr.cxt); - // U32 count=pn&0x3ff; - // int error=y*32767-(cr.cm(cr.cxt)>>17); - // pn+=(error*dt[count]&-1024)+(countrsi) - put2a(0x8bb7, offc(cm)); // mov esi,[edi+cm] ; cm - put2a(0x8b87, offc(cxt)); // mov eax,[edi+cxt] ; cxt - put1a(0x25, pr.comp[i].cm.size()-1); // and eax, size-1 - if (S==8) put1(0x48); // rex.w - put3(0x8d3486); // lea esi,[esi+eax*4] ; &cm[cxt] - put2(0x8b06); // mov eax,[esi] ; cm[cxt] - put2(0x89c2); // mov edx, eax ; cm[cxt] - put3(0xc1e811); // shr eax, 17 ; cm[cxt]>>17 - put2(0x89e9); // mov ecx, ebp ; y - put3(0xc1e10f); // shl ecx, 15 ; y*32768 - put2(0x29e9); // sub ecx, ebp ; y*32767 - put2(0x29c1); // sub ecx, eax ; error - put2a(0x81e2, 0x3ff); // and edx, 1023 ; count - put3a(0x8b8497, off(dt)); // mov eax,[edi+edx*4+dt] ; dt[count] - put3(0x0fafc8); // imul ecx, eax ; error*dt[count] - put2a(0x81e1, 0xfffffc00); // and ecx, -1024 - put2a(0x81fa, cp[2+2*(cp[0]==SSE)]*4); // cmp edx, limit*4 - put2(0x110e); // adc [esi], ecx ; pn+=... - break; - - case ICM: // sizebits: cxt=bh, ht[c][0..15]=bh row - // cr.ht[cr.c+(hmap4&15)]=st.next(cr.ht[cr.c+(hmap4&15)], y); - // U32& pn=cr.cm(cr.cxt); - // pn+=int(y*32767-(pn>>8))>>2; - - case ISSE: // sizebits j -- c=hi, cxt=bh - // assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]); - // int err=y*32767-squash(p[i]); - // int *wt=(int*)&cr.cm[cr.cxt*2]; - // wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13)); - // wt[1]=clamp512k(wt[1]+((err+16)>>5)); - // cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y); - - // update bit history bh to next(bh,y=ebp) in ht[c+(hmap4&15)] - put3(0x8b4700+off(hmap4)); // mov eax, [edi+&hmap4] - put3(0x83e00f); // and eax, 15 - put2a(0x0387, offc(c)); // add eax [edi+&c] ; cxt - if (S==8) put1(0x48); // rex.w - put2a(0x8bb7, offc(ht)); // mov esi, [edi+&ht] - put4(0x0fb61406); // movzx edx, byte [esi+eax] ; bh - put4(0x8d5c9500); // lea ebx, [ebp+edx*4] ; index to st - put4a(0x0fb69c1f, off(st)); // movzx ebx,byte[edi+ebx+st]; next bh - put3(0x881c06); // mov [esi+eax], bl ; save next bh - if (S==8) put1(0x48); // rex.w - put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] - - // ICM: update cm[cxt=edx=bit history] to reduce prediction error - // esi = &cm - if (cp[0]==ICM) { - if (S==8) put1(0x48); // rex.w - put3(0x8d3496); // lea esi, [esi+edx*4] ; &cm[bh] - put2(0x8b06); // mov eax, [esi] ; pn - put3(0xc1e808); // shr eax, 8 ; pn>>8 - put2(0x89e9); // mov ecx, ebp ; y - put3(0xc1e10f); // shl ecx, 15 - put2(0x29e9); // sub ecx, ebp ; y*32767 - put2(0x29c1); // sub ecx, eax - put3(0xc1f902); // sar ecx, 2 - put2(0x010e); // add [esi], ecx - } - - // ISSE: update weights. edx=cxt=bit history (0..255), esi=cm[512] - else { - put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] - put1a(0x05, 2048); // add eax, 2048 - put4a(0x0fb78447, off(squasht)); // movzx eax, word [edi+eax*2+..] - put2(0x89e9); // mov ecx, ebp ; y - put3(0xc1e10f); // shl ecx, 15 - put2(0x29e9); // sub ecx, ebp ; y*32767 - put2(0x29c1); // sub ecx, eax ; err - put2a(0x8b87, off(p[cp[2]]));// mov eax, [edi+&p[j]] - put3(0x0fafc1); // imul eax, ecx - put1a(0x05, (1<<12)); // add eax, 4096 - put3(0xc1f80d); // sar eax, 13 - put3(0x0304d6); // add eax, [esi+edx*8] ; wt[0] - put1a(0xbb, (1<<19)-1); // mov ebx, 524287 - put2(0x39d8); // cmp eax, ebx - put3(0x0f4fc3); // cmovg eax, ebx - put2(0xf7d3); // not ebx ; -524288 - put2(0x39d8); // cmp eax, ebx - put3(0x0f4cc3); // cmovl eax, ebx - put3(0x8904d6); // mov [esi+edx*8], eax - put3(0x83c110); // add ecx, 16 ; err - put3(0xc1f905); // sar ecx, 5 - put4(0x034cd604); // add ecx, [esi+edx*8+4] ; wt[1] - put1a(0xb8, (1<<19)-1); // mov eax, 524287 - put2(0x39c1); // cmp ecx, eax - put3(0x0f4fc8); // cmovg ecx, eax - put2(0xf7d0); // not eax ; -524288 - put2(0x39c1); // cmp ecx, eax - put3(0x0f4cc8); // cmovl ecx, eax - put4(0x894cd604); // mov [esi+edx*8+4], ecx - } - break; - - case MATCH: // sizebits bufbits: - // a=len, b=offset, c=bit, cm=index, cxt=bitpos - // ht=buf, limit=pos - // assert(cr.a<=255); - // assert(cr.c==0 || cr.c==1); - // assert(cr.cxt<8); - // assert(cr.cm.size()==(size_t(1)<>5; - // int w=cr.a16[cr.cxt]; - // w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13; - // if (w<0) w=0; - // if (w>65535) w=65535; - // cr.a16[cr.cxt]=w; - - // set ecx=err - put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] - put1a(0x05, 2048); // add eax, 2048 - put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht] - put2(0x89e9); // mov ecx, ebp ; y - put3(0xc1e10f); // shl ecx, 15 - put2(0x29e9); // sub ecx, ebp ; y*32767 - put2(0x29c1); // sub ecx, eax - put2a(0x69c9, cp[4]); // imul ecx, rate - put3(0xc1f905); // sar ecx, 5 ; err - - // Update w - put2a(0x8b87, offc(cxt)); // mov eax, [edi+&cxt] - if (S==8) put1(0x48); // rex.w - put2a(0x8bb7, offc(a16)); // mov esi, [edi+&a16] - if (S==8) put1(0x48); // rex.w - put3(0x8d3446); // lea esi, [esi+eax*2] ; &w - put2a(0x8b87, off(p[cp[2]])); // mov eax, [edi+&p[j]] - put2a(0x2b87, off(p[cp[3]])); // sub eax, [edi+&p[k]] ; p[j]-p[k] - put3(0x0fafc1); // imul eax, ecx ; * err - put1a(0x05, 1<<12); // add eax, 4096 - put3(0xc1f80d); // sar eax, 13 - put3(0x0fb716); // movzx edx, word [esi] ; w - put2(0x01d0); // add eax, edx - put1a(0xba, 0xffff); // mov edx, 65535 - put2(0x39d0); // cmp eax, edx - put3(0x0f4fc2); // cmovg eax, edx - put2(0x31d2); // xor edx, edx - put2(0x39d0); // cmp eax, edx - put3(0x0f4cc2); // cmovl eax, edx - put3(0x668906); // mov word [esi], ax - break; - - case MIX: // sizebits j m rate mask - // cm=wt[size][m], cxt=input - // int m=cp[3]; - // assert(m>0 && m<=i); - // assert(cr.cm.size()==m*cr.c); - // assert(cr.cxt+m<=cr.cm.size()); - // int err=(y*32767-squash(p[i]))*cp[4]>>4; - // int* wt=(int*)&cr.cm[cr.cxt]; - // for (int j=0; j>13)); - - // set ecx=err - put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] - put1a(0x05, 2048); // add eax, 2048 - put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht] - put2(0x89e9); // mov ecx, ebp ; y - put3(0xc1e10f); // shl ecx, 15 - put2(0x29e9); // sub ecx, ebp ; y*32767 - put2(0x29c1); // sub ecx, eax - put2a(0x69c9, cp[4]); // imul ecx, rate - put3(0xc1f904); // sar ecx, 4 ; err - - // set esi=wt - put2a(0x8b87, offc(cxt)); // mov eax, [edi+&cxt] ; cxt - if (S==8) put1(0x48); // rex.w - put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] - if (S==8) put1(0x48); // rex.w - put3(0x8d3486); // lea esi, [esi+eax*4] ; wt - - for (int k=0; k=256) { - z.run(c8-256); - hmap4=1; - c8=1; - for (int i=0; i=16 && c8<32) - hmap4=(hmap4&0xf)<<5|y<<4|1; - else - hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf); -#endif -} - -// Execute the ZPAQL code with input byte or -1 for EOF. -// Use JIT code at rcode if available, or else create it. -void ZPAQL::run(U32 input) { -#ifdef NOJIT - run0(input); -#else - if (!rcode) { - int n=assemble(); - allocx(rcode, rcode_size, n); - if (!rcode || n<10 || rcode_size<10 || n!=assemble()) - error("run JIT failed"); - } - a=input; - if (!((int(*)())(&rcode[0]))()) - libzpaq::error("Bad ZPAQL opcode"); -#endif -} - -} // end namespace libzpaq +/* libzpaq.cpp - Part of LIBZPAQ Version 5.01 + + Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so without restriction. + This Software is provided "as is" without warranty. + +LIBZPAQ is a C++ library for compression and decompression of data +conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ +*/ + +#include "libzpaq.h" +#include +#include +#include +#include + +#ifndef NOJIT +#ifndef _WIN32 +#include +#else +#include +#endif +#endif + +namespace libzpaq { + +// Standard library redirections +void* calloc(size_t a, size_t b) {return ::calloc(a, b);} +void free(void* p) {::free(p);} +int memcmp(const void* d, const void* s, size_t n) { + return ::memcmp(d, s, n);} +void* memset(void* d, int c, size_t n) {return ::memset(d, c, n);} +double log(double x) {return ::log(x);} +double exp(double x) {return ::exp(x);} +double pow(double x, double y) {return ::pow(x, y);} + +// Read 16 bit little-endian number +int toU16(const char* p) { + return (p[0]&255)+256*(p[1]&255); +} + +// Default read() and write() +int Reader::read(char* buf, int n) { + int i=0, c; + while (i=0) + buf[i++]=c; + return i; +} + +void Writer::write(const char* buf, int n) { + for (int i=0; i 0 bytes of executable memory and update +// p to point to it and newsize = n. Free any previously +// allocated memory first. If newsize is 0 then free only. +// Call error in case of failure. If NOJIT, ignore newsize +// and set p=0, n=0 without allocating memory. +void allocx(U8* &p, int &n, int newsize) { +#ifdef NOJIT + p=0; + n=0; +#else + if (p || n) { + if (p) +#ifndef _WIN32 + munmap(p, n); +#else // Windows + VirtualFree(p, 0, MEM_RELEASE); +#endif + p=0; + n=0; + } + if (newsize>0) { +#ifndef _WIN32 + p=(U8*)mmap(0, newsize, PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_PRIVATE|MAP_ANON, -1, 0); + if ((void*)p==MAP_FAILED) p=0; +#else + p=(U8*)VirtualAlloc(0, newsize, MEM_RESERVE|MEM_COMMIT, + PAGE_EXECUTE_READWRITE); +#endif + if (p) + n=newsize; + else { + n=0; + error("allocx failed"); + } + } +#endif +} + +//////////////////////////// SHA1 //////////////////////////// + +// SHA1 code, see http://en.wikipedia.org/wiki/SHA-1 + +// Start a new hash +void SHA1::init() { + len0=len1=0; + h[0]=0x67452301; + h[1]=0xEFCDAB89; + h[2]=0x98BADCFE; + h[3]=0x10325476; + h[4]=0xC3D2E1F0; +} + +// Return old result and start a new hash +const char* SHA1::result() { + + // pad and append length + const U32 s1=len1, s0=len0; + put(0x80); + while ((len0&511)!=448) + put(0); + put(s1>>24); + put(s1>>16); + put(s1>>8); + put(s1); + put(s0>>24); + put(s0>>16); + put(s0>>8); + put(s0); + + // copy h to hbuf + for (int i=0; i<5; ++i) { + hbuf[4*i]=h[i]>>24; + hbuf[4*i+1]=h[i]>>16; + hbuf[4*i+2]=h[i]>>8; + hbuf[4*i+3]=h[i]; + } + + // return hash prior to clearing state + init(); + return hbuf; +} + +// Hash 1 block of 64 bytes +void SHA1::process() { + for (int i=16; i<80; ++i) { + w[i]=w[i-3]^w[i-8]^w[i-14]^w[i-16]; + w[i]=w[i]<<1|w[i]>>31; + } + U32 a=h[0]; + U32 b=h[1]; + U32 c=h[2]; + U32 d=h[3]; + U32 e=h[4]; + const U32 k1=0x5A827999, k2=0x6ED9EBA1, k3=0x8F1BBCDC, k4=0xCA62C1D6; +#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(~b&d))+k1+w[i]; b=b<<30|b>>2; +#define f5(i) f1(a,b,c,d,e,i) f1(e,a,b,c,d,i+1) f1(d,e,a,b,c,i+2) \ + f1(c,d,e,a,b,i+3) f1(b,c,d,e,a,i+4) + f5(0) f5(5) f5(10) f5(15) +#undef f1 +#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k2+w[i]; b=b<<30|b>>2; + f5(20) f5(25) f5(30) f5(35) +#undef f1 +#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(b&d)|(c&d))+k3+w[i]; b=b<<30|b>>2; + f5(40) f5(45) f5(50) f5(55) +#undef f1 +#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k4+w[i]; b=b<<30|b>>2; + f5(60) f5(65) f5(70) f5(75) +#undef f1 +#undef f5 + h[0]+=a; + h[1]+=b; + h[2]+=c; + h[3]+=d; + h[4]+=e; +} + +//////////////////////////// Component /////////////////////// + +// A Component is a context model, indirect context model, match model, +// fixed weight mixer, adaptive 2 input mixer without or with current +// partial byte as context, adaptive m input mixer (without or with), +// or SSE (without or with). + +const int compsize[256]={0,2,3,2,3,4,6,6,3,5}; + +void Component::init() { + limit=cxt=a=b=c=0; + cm.resize(0); + ht.resize(0); + a16.resize(0); +} + +////////////////////////// StateTable ////////////////////////// + +// How many states with count of n0 zeros, n1 ones (0...2) +int StateTable::num_states(int n0, int n1) { + const int B=6; + const int bound[B]={20,48,15,8,6,5}; // n0 -> max n1, n1 -> max n0 + if (n0=B || n0>bound[n1]) return 0; + return 1+(n1>0 && n0+n1<=17); +} + +// New value of count n0 if 1 is observed (and vice versa) +void StateTable::discount(int& n0) { + n0=(n0>=1)+(n0>=2)+(n0>=3)+(n0>=4)+(n0>=5)+(n0>=7)+(n0>=8); +} + +// compute next n0,n1 (0 to N) given input y (0 or 1) +void StateTable::next_state(int& n0, int& n1, int y) { + if (n0 20,0 + // 48,1,0 -> 48,1 + // 15,2,0 -> 8,1 + // 8,3,0 -> 6,2 + // 8,3,1 -> 5,3 + // 6,4,0 -> 5,3 + // 5,5,0 -> 5,4 + // 5,5,1 -> 4,5 + while (!num_states(n0, n1)) { + if (n1<2) --n0; + else { + n0=(n0*(n1-1)+(n1/2))/n1; + --n1; + } + } + } +} + +// Initialize next state table ns[state*4] -> next if 0, next if 1, n0, n1 +StateTable::StateTable() { + + // Assign states by increasing priority + const int N=50; + U8 t[N][N][2]={{{0}}}; // (n0,n1,y) -> state number + int state=0; + for (int i=0; i=0 && n<=2); + if (n) { + t[n0][n1][0]=state; + t[n0][n1][1]=state+n-1; + state+=n; + } + } + } + + // Generate next state table + memset(ns, 0, sizeof(ns)); + for (int n0=0; n0=0 && s<256); + int s0=n0, s1=n1; + next_state(s0, s1, 0); + assert(s0>=0 && s0=0 && s1=0 && s0=0 && s1=7); + assert(hbegin>=cend); + assert(hend>=hbegin); + assert(out2); + if (!pp) { // if not a postprocessor then write COMP + for (int i=0; iput(header[i]); + } + else { // write PCOMP size only + out2->put((hend-hbegin)&255); + out2->put((hend-hbegin)>>8); + } + for (int i=hbegin; iput(header[i]); + return true; +} + +// Read header from in2 +int ZPAQL::read(Reader* in2) { + + // Get header size and allocate + int hsize=in2->get(); + hsize+=in2->get()*256; + header.resize(hsize+300); + cend=hbegin=hend=0; + header[cend++]=hsize&255; + header[cend++]=hsize>>8; + while (cend<7) header[cend++]=in2->get(); // hh hm ph pm n + + // Read COMP + int n=header[cend-1]; + for (int i=0; iget(); // component type + if (type==-1) error("unexpected end of file"); + header[cend++]=type; // component type + int size=compsize[type]; + if (size<1) error("Invalid component type"); + if (cend+size>header.isize()-8) error("COMP list too big"); + for (int j=1; jget(); + } + if ((header[cend++]=in2->get())!=0) error("missing COMP END"); + + // Insert a guard gap and read HCOMP + hbegin=hend=cend+128; + while (hendget(); + if (op==-1) error("unexpected end of file"); + header[hend++]=op; + } + if ((header[hend++]=in2->get())!=0) error("missing HCOMP END"); + assert(cend>=7 && cendhbegin && hend6); + assert(output==0); + assert(sha1==0); + init(header[2], header[3]); // hh, hm +} + +// Initialize machine state as PCOMP +void ZPAQL::initp() { + assert(header.isize()>6); + init(header[4], header[5]); // ph, pm +} + +// Flush pending output +void ZPAQL::flush() { + if (output) output->write(&outbuf[0], bufptr); + if (sha1) for (int i=0; iput(U8(outbuf[i])); + bufptr=0; +} + +// Return memory requirement in bytes +double ZPAQL::memory() { + double mem=pow(2.0,header[2]+2)+pow(2.0,header[3]) // hh hm + +pow(2.0,header[4]+2)+pow(2.0,header[5]) // ph pm + +header.size(); + int cp=7; // start of comp list + for (int i=0; i0); + assert(cend>=7); + assert(hbegin>=cend+128); + assert(hend>=hbegin); + assert(hend0); + h.resize(1, hbits); + m.resize(1, mbits); + r.resize(256); + a=b=c=d=pc=f=0; +} + +// Run program on input by interpreting header +void ZPAQL::run0(U32 input) { + assert(cend>6); + assert(hbegin>=cend+128); + assert(hend>=hbegin); + assert(hend0); + assert(h.size()>0); + assert(header[0]+256*header[1]==cend+hend-hbegin-2); + pc=hbegin; + a=input; + while (execute()) ; +} + +// Execute one instruction, return 0 after HALT else 1 +int ZPAQL::execute() { + switch(header[pc++]) { + case 0: err(); break; // ERROR + case 1: ++a; break; // A++ + case 2: --a; break; // A-- + case 3: a = ~a; break; // A! + case 4: a = 0; break; // A=0 + case 7: a = r[header[pc++]]; break; // A=R N + case 8: swap(b); break; // B<>A + case 9: ++b; break; // B++ + case 10: --b; break; // B-- + case 11: b = ~b; break; // B! + case 12: b = 0; break; // B=0 + case 15: b = r[header[pc++]]; break; // B=R N + case 16: swap(c); break; // C<>A + case 17: ++c; break; // C++ + case 18: --c; break; // C-- + case 19: c = ~c; break; // C! + case 20: c = 0; break; // C=0 + case 23: c = r[header[pc++]]; break; // C=R N + case 24: swap(d); break; // D<>A + case 25: ++d; break; // D++ + case 26: --d; break; // D-- + case 27: d = ~d; break; // D! + case 28: d = 0; break; // D=0 + case 31: d = r[header[pc++]]; break; // D=R N + case 32: swap(m(b)); break; // *B<>A + case 33: ++m(b); break; // *B++ + case 34: --m(b); break; // *B-- + case 35: m(b) = ~m(b); break; // *B! + case 36: m(b) = 0; break; // *B=0 + case 39: if (f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JT N + case 40: swap(m(c)); break; // *C<>A + case 41: ++m(c); break; // *C++ + case 42: --m(c); break; // *C-- + case 43: m(c) = ~m(c); break; // *C! + case 44: m(c) = 0; break; // *C=0 + case 47: if (!f) pc+=((header[pc]+128)&255)-127; else ++pc; break; // JF N + case 48: swap(h(d)); break; // *D<>A + case 49: ++h(d); break; // *D++ + case 50: --h(d); break; // *D-- + case 51: h(d) = ~h(d); break; // *D! + case 52: h(d) = 0; break; // *D=0 + case 55: r[header[pc++]] = a; break; // R=A N + case 56: return 0 ; // HALT + case 57: outc(a&255); break; // OUT + case 59: a = (a+m(b)+512)*773; break; // HASH + case 60: h(d) = (h(d)+a+512)*773; break; // HASHD + case 63: pc+=((header[pc]+128)&255)-127; break; // JMP N + case 64: a = a; break; // A=A + case 65: a = b; break; // A=B + case 66: a = c; break; // A=C + case 67: a = d; break; // A=D + case 68: a = m(b); break; // A=*B + case 69: a = m(c); break; // A=*C + case 70: a = h(d); break; // A=*D + case 71: a = header[pc++]; break; // A= N + case 72: b = a; break; // B=A + case 73: b = b; break; // B=B + case 74: b = c; break; // B=C + case 75: b = d; break; // B=D + case 76: b = m(b); break; // B=*B + case 77: b = m(c); break; // B=*C + case 78: b = h(d); break; // B=*D + case 79: b = header[pc++]; break; // B= N + case 80: c = a; break; // C=A + case 81: c = b; break; // C=B + case 82: c = c; break; // C=C + case 83: c = d; break; // C=D + case 84: c = m(b); break; // C=*B + case 85: c = m(c); break; // C=*C + case 86: c = h(d); break; // C=*D + case 87: c = header[pc++]; break; // C= N + case 88: d = a; break; // D=A + case 89: d = b; break; // D=B + case 90: d = c; break; // D=C + case 91: d = d; break; // D=D + case 92: d = m(b); break; // D=*B + case 93: d = m(c); break; // D=*C + case 94: d = h(d); break; // D=*D + case 95: d = header[pc++]; break; // D= N + case 96: m(b) = a; break; // *B=A + case 97: m(b) = b; break; // *B=B + case 98: m(b) = c; break; // *B=C + case 99: m(b) = d; break; // *B=D + case 100: m(b) = m(b); break; // *B=*B + case 101: m(b) = m(c); break; // *B=*C + case 102: m(b) = h(d); break; // *B=*D + case 103: m(b) = header[pc++]; break; // *B= N + case 104: m(c) = a; break; // *C=A + case 105: m(c) = b; break; // *C=B + case 106: m(c) = c; break; // *C=C + case 107: m(c) = d; break; // *C=D + case 108: m(c) = m(b); break; // *C=*B + case 109: m(c) = m(c); break; // *C=*C + case 110: m(c) = h(d); break; // *C=*D + case 111: m(c) = header[pc++]; break; // *C= N + case 112: h(d) = a; break; // *D=A + case 113: h(d) = b; break; // *D=B + case 114: h(d) = c; break; // *D=C + case 115: h(d) = d; break; // *D=D + case 116: h(d) = m(b); break; // *D=*B + case 117: h(d) = m(c); break; // *D=*C + case 118: h(d) = h(d); break; // *D=*D + case 119: h(d) = header[pc++]; break; // *D= N + case 128: a += a; break; // A+=A + case 129: a += b; break; // A+=B + case 130: a += c; break; // A+=C + case 131: a += d; break; // A+=D + case 132: a += m(b); break; // A+=*B + case 133: a += m(c); break; // A+=*C + case 134: a += h(d); break; // A+=*D + case 135: a += header[pc++]; break; // A+= N + case 136: a -= a; break; // A-=A + case 137: a -= b; break; // A-=B + case 138: a -= c; break; // A-=C + case 139: a -= d; break; // A-=D + case 140: a -= m(b); break; // A-=*B + case 141: a -= m(c); break; // A-=*C + case 142: a -= h(d); break; // A-=*D + case 143: a -= header[pc++]; break; // A-= N + case 144: a *= a; break; // A*=A + case 145: a *= b; break; // A*=B + case 146: a *= c; break; // A*=C + case 147: a *= d; break; // A*=D + case 148: a *= m(b); break; // A*=*B + case 149: a *= m(c); break; // A*=*C + case 150: a *= h(d); break; // A*=*D + case 151: a *= header[pc++]; break; // A*= N + case 152: div(a); break; // A/=A + case 153: div(b); break; // A/=B + case 154: div(c); break; // A/=C + case 155: div(d); break; // A/=D + case 156: div(m(b)); break; // A/=*B + case 157: div(m(c)); break; // A/=*C + case 158: div(h(d)); break; // A/=*D + case 159: div(header[pc++]); break; // A/= N + case 160: mod(a); break; // A%=A + case 161: mod(b); break; // A%=B + case 162: mod(c); break; // A%=C + case 163: mod(d); break; // A%=D + case 164: mod(m(b)); break; // A%=*B + case 165: mod(m(c)); break; // A%=*C + case 166: mod(h(d)); break; // A%=*D + case 167: mod(header[pc++]); break; // A%= N + case 168: a &= a; break; // A&=A + case 169: a &= b; break; // A&=B + case 170: a &= c; break; // A&=C + case 171: a &= d; break; // A&=D + case 172: a &= m(b); break; // A&=*B + case 173: a &= m(c); break; // A&=*C + case 174: a &= h(d); break; // A&=*D + case 175: a &= header[pc++]; break; // A&= N + case 176: a &= ~ a; break; // A&~A + case 177: a &= ~ b; break; // A&~B + case 178: a &= ~ c; break; // A&~C + case 179: a &= ~ d; break; // A&~D + case 180: a &= ~ m(b); break; // A&~*B + case 181: a &= ~ m(c); break; // A&~*C + case 182: a &= ~ h(d); break; // A&~*D + case 183: a &= ~ header[pc++]; break; // A&~ N + case 184: a |= a; break; // A|=A + case 185: a |= b; break; // A|=B + case 186: a |= c; break; // A|=C + case 187: a |= d; break; // A|=D + case 188: a |= m(b); break; // A|=*B + case 189: a |= m(c); break; // A|=*C + case 190: a |= h(d); break; // A|=*D + case 191: a |= header[pc++]; break; // A|= N + case 192: a ^= a; break; // A^=A + case 193: a ^= b; break; // A^=B + case 194: a ^= c; break; // A^=C + case 195: a ^= d; break; // A^=D + case 196: a ^= m(b); break; // A^=*B + case 197: a ^= m(c); break; // A^=*C + case 198: a ^= h(d); break; // A^=*D + case 199: a ^= header[pc++]; break; // A^= N + case 200: a <<= (a&31); break; // A<<=A + case 201: a <<= (b&31); break; // A<<=B + case 202: a <<= (c&31); break; // A<<=C + case 203: a <<= (d&31); break; // A<<=D + case 204: a <<= (m(b)&31); break; // A<<=*B + case 205: a <<= (m(c)&31); break; // A<<=*C + case 206: a <<= (h(d)&31); break; // A<<=*D + case 207: a <<= (header[pc++]&31); break; // A<<= N + case 208: a >>= (a&31); break; // A>>=A + case 209: a >>= (b&31); break; // A>>=B + case 210: a >>= (c&31); break; // A>>=C + case 211: a >>= (d&31); break; // A>>=D + case 212: a >>= (m(b)&31); break; // A>>=*B + case 213: a >>= (m(c)&31); break; // A>>=*C + case 214: a >>= (h(d)&31); break; // A>>=*D + case 215: a >>= (header[pc++]&31); break; // A>>= N + case 216: f = (a == a); break; // A==A + case 217: f = (a == b); break; // A==B + case 218: f = (a == c); break; // A==C + case 219: f = (a == d); break; // A==D + case 220: f = (a == U32(m(b))); break; // A==*B + case 221: f = (a == U32(m(c))); break; // A==*C + case 222: f = (a == h(d)); break; // A==*D + case 223: f = (a == U32(header[pc++])); break; // A== N + case 224: f = (a < a); break; // A a); break; // A>A + case 233: f = (a > b); break; // A>B + case 234: f = (a > c); break; // A>C + case 235: f = (a > d); break; // A>D + case 236: f = (a > U32(m(b))); break; // A>*B + case 237: f = (a > U32(m(c))); break; // A>*C + case 238: f = (a > h(d)); break; // A>*D + case 239: f = (a > U32(header[pc++])); break; // A> N + case 255: if((pc=hbegin+header[pc]+256*header[pc+1])>=hend)err();break;//LJ + default: err(); + } + return 1; +} + +// Print illegal instruction error message and exit +void ZPAQL::err() { + error("ZPAQL execution error"); +} + +///////////////////////// Predictor ///////////////////////// + +// Initailize model-independent tables +Predictor::Predictor(ZPAQL& zr): + c8(1), hmap4(1), z(zr) { + assert(sizeof(U8)==1); + assert(sizeof(U16)==2); + assert(sizeof(U32)==4); + assert(sizeof(U64)==8); + assert(sizeof(short)==2); + assert(sizeof(int)==4); + + // Initialize tables + dt2k[0]=0; + for (int i=1; i<256; ++i) + dt2k[i]=2048/i; + for (int i=0; i<1024; ++i) + dt[i]=(1<<17)/(i*2+3)*2; + for (int i=0; i<32768; ++i) + stretcht[i]=int(log((i+0.5)/(32767.5-i))*64+0.5+100000)-100000; + for (int i=0; i<4096; ++i) + squasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64)))); + + // Verify floating point math for squash() and stretch() + U32 sqsum=0, stsum=0; + for (int i=32767; i>=0; --i) + stsum=stsum*3+stretch(i); + for (int i=4095; i>=0; --i) + sqsum=sqsum*3+squash(i-2048); + assert(stsum==3887533746u); + assert(sqsum==2278286169u); + + pcode=0; + pcode_size=0; +} + +Predictor::~Predictor() { + allocx(pcode, pcode_size, 0); // free executable memory +} + +// Initialize the predictor with a new model in z +void Predictor::init() { + + // Clear old JIT code if any + allocx(pcode, pcode_size, 0); + + // Initialize context hash function + z.inith(); + + // Initialize predictions + for (int i=0; i<256; ++i) h[i]=p[i]=0; + + // Initialize components + for (int i=0; i<256; ++i) // clear old model + comp[i].init(); + int n=z.header[6]; // hsize[0..1] hh hm ph pm n (comp)[n] END 0[128] (hcomp) END + const U8* cp=&z.header[7]; // start of component list + for (int i=0; i&z.header[0] && cp<&z.header[z.header.isize()-8]); + Component& cr=comp[i]; + switch(cp[0]) { + case CONS: // c + p[i]=(cp[1]-128)*4; + break; + case CM: // sizebits limit + if (cp[1]>32) error("max size for CM is 32"); + cr.cm.resize(1, cp[1]); // packed CM (22 bits) + CMCOUNT (10 bits) + cr.limit=cp[2]*4; + for (size_t j=0; j26) error("max size for ICM is 26"); + cr.limit=1023; + cr.cm.resize(256); + cr.ht.resize(64, cp[1]); + for (size_t j=0; j32 || cp[2]>32) error("max size for MATCH is 32 32"); + cr.cm.resize(1, cp[1]); // index + cr.ht.resize(1, cp[2]); // buf + cr.ht(0)=1; + break; + case AVG: // j k wt + if (cp[1]>=i) error("AVG j >= i"); + if (cp[2]>=i) error("AVG k >= i"); + break; + case MIX2: // sizebits j k rate mask + if (cp[1]>32) error("max size for MIX2 is 32"); + if (cp[3]>=i) error("MIX2 k >= i"); + if (cp[2]>=i) error("MIX2 j >= i"); + cr.c=(size_t(1)<32) error("max size for MIX is 32"); + if (cp[2]>=i) error("MIX j >= i"); + if (cp[3]<1 || cp[3]>i-cp[2]) error("MIX m not in 1..i-j"); + int m=cp[3]; // number of inputs + assert(m>=1); + cr.c=(size_t(1)<32) error("max size for ISSE is 32"); + if (cp[2]>=i) error("ISSE j >= i"); + cr.ht.resize(64, cp[1]); + cr.cm.resize(512); + for (int j=0; j<256; ++j) { + cr.cm[j*2]=1<<15; + cr.cm[j*2+1]=clamp512k(stretch(st.cminit(j)>>8)<<10); + } + break; + case SSE: // sizebits j start limit + if (cp[1]>32) error("max size for SSE is 32"); + if (cp[2]>=i) error("SSE j >= i"); + if (cp[3]>cp[4]*4) error("SSE start > limit*4"); + cr.cm.resize(32, cp[1]); + cr.limit=cp[4]*4; + for (size_t j=0; j0); + cp+=compsize[*cp]; + assert(cp>=&z.header[7] && cp<&z.header[z.cend]); + } +} + +// Return next bit prediction using interpreted COMP code +int Predictor::predict0() { + assert(c8>=1 && c8<=255); + + // Predict next bit + int n=z.header[6]; + assert(n>0 && n<=255); + const U8* cp=&z.header[7]; + assert(cp[-1]==n); + for (int i=0; i&z.header[0] && cp<&z.header[z.header.isize()-8]); + Component& cr=comp[i]; + switch(cp[0]) { + case CONS: // c + break; + case CM: // sizebits limit + cr.cxt=h[i]^hmap4; + p[i]=stretch(cr.cm(cr.cxt)>>17); + break; + case ICM: // sizebits + assert((hmap4&15)>0); + if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); + cr.cxt=cr.ht[cr.c+(hmap4&15)]; + p[i]=stretch(cr.cm(cr.cxt)>>8); + break; + case MATCH: // sizebits bufbits: a=len, b=offset, c=bit, cxt=bitpos, + // ht=buf, limit=pos + assert(cr.cm.size()==(size_t(1)<>(7-cr.cxt))&1; // predicted bit + p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767); + } + break; + case AVG: // j k wt + p[i]=(p[cp[1]]*cp[3]+p[cp[2]]*(256-cp[3]))>>8; + break; + case MIX2: { // sizebits j k rate mask + // c=size cm=wt[size] cxt=input + cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1)); + assert(cr.cxt=0 && w<65536); + p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16; + assert(p[i]>=-2048 && p[i]<2048); + } + break; + case MIX: { // sizebits j m rate mask + // c=size cm=wt[size][m] cxt=index of wt in cm + int m=cp[3]; + assert(m>=1 && m<=i); + cr.cxt=h[i]+(c8&cp[5]); + cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights + assert(cr.cxt<=cr.cm.size()-m); + int* wt=(int*)&cr.cm[cr.cxt]; + p[i]=0; + for (int j=0; j>8)*p[cp[2]+j]; + p[i]=clamp2k(p[i]>>8); + } + break; + case ISSE: { // sizebits j -- c=hi, cxt=bh + assert((hmap4&15)>0); + if (c8==1 || (c8&0xf0)==16) + cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); + cr.cxt=cr.ht[cr.c+(hmap4&15)]; // bit history + int *wt=(int*)&cr.cm[cr.cxt*2]; + p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16); + } + break; + case SSE: { // sizebits j start limit + cr.cxt=(h[i]+c8)*32; + int pq=p[cp[2]]+992; + if (pq<0) pq=0; + if (pq>1983) pq=1983; + int wt=pq&63; + pq>>=6; + assert(pq>=0 && pq<=30); + cr.cxt+=pq; + p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt)+(cr.cm(cr.cxt+1)>>10)*wt)>>13); + cr.cxt+=wt>>5; + } + break; + default: + error("component predict not implemented"); + } + cp+=compsize[cp[0]]; + assert(cp<&z.header[z.cend]); + assert(p[i]>=-2048 && p[i]<2048); + } + assert(cp[0]==NONE); + return squash(p[n-1]); +} + +// Update model with decoded bit y (0...1) +void Predictor::update0(int y) { + assert(y==0 || y==1); + assert(c8>=1 && c8<=255); + assert(hmap4>=1 && hmap4<=511); + + // Update components + const U8* cp=&z.header[7]; + int n=z.header[6]; + assert(n>=1 && n<=255); + assert(cp[-1]==n); + for (int i=0; i>8))>>2; + } + break; + case MATCH: // sizebits bufbits: + // a=len, b=offset, c=bit, cm=index, cxt=bitpos + // ht=buf, limit=pos + { + assert(cr.a<=255); + assert(cr.c==0 || cr.c==1); + assert(cr.cxt<8); + assert(cr.cm.size()==(size_t(1)<>5; + int w=cr.a16[cr.cxt]; + w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13; + if (w<0) w=0; + if (w>65535) w=65535; + cr.a16[cr.cxt]=w; + } + break; + case MIX: { // sizebits j m rate mask + // cm=wt[size][m], cxt=input + int m=cp[3]; + assert(m>0 && m<=i); + assert(cr.cm.size()==m*cr.c); + assert(cr.cxt+m<=cr.cm.size()); + int err=(y*32767-squash(p[i]))*cp[4]>>4; + int* wt=(int*)&cr.cm[cr.cxt]; + for (int j=0; j>13)); + } + break; + case ISSE: { // sizebits j -- c=hi, cxt=bh + assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]); + int err=y*32767-squash(p[i]); + int *wt=(int*)&cr.cm[cr.cxt*2]; + wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13)); + wt[1]=clamp512k(wt[1]+((err+16)>>5)); + cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y); + } + break; + case SSE: // sizebits j start limit + train(cr, y); + break; + default: + assert(0); + } + cp+=compsize[cp[0]]; + assert(cp>=&z.header[7] && cp<&z.header[z.cend] + && cp<&z.header[z.header.isize()-8]); + } + assert(cp[0]==NONE); + + // Save bit y in c8, hmap4 + c8+=c8+y; + if (c8>=256) { + z.run(c8-256); + hmap4=1; + c8=1; + for (int i=0; i=16 && c8<32) + hmap4=(hmap4&0xf)<<5|y<<4|1; + else + hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf); +} + +// Find cxt row in hash table ht. ht has rows of 16 indexed by the +// low sizebits of cxt with element 0 having the next higher 8 bits for +// collision detection. If not found after 3 adjacent tries, replace the +// row with lowest element 1 as priority. Return index of row. +size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { + assert(ht.size()==size_t(16)<>sizebits&255; + size_t h0=(cxt*16)&(ht.size()-16); + if (ht[h0]==chk) return h0; + size_t h1=h0^16; + if (ht[h1]==chk) return h1; + size_t h2=h0^32; + if (ht[h2]==chk) return h2; + if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1]) + return memset(&ht[h0], 0, 16), ht[h0]=chk, h0; + else if (ht[h1+1]get(); + if (c<0) error("unexpected end of input"); + curr=curr<<8|c; + } + } + U32 n=buf.size(); + if (n>curr) n=curr; + high=in->read(&buf[0], n); + curr-=high; + low=0; +} + +// Return next bit of decoded input, which has 16 bit probability p of being 1 +int Decoder::decode(int p) { + assert(p>=0 && p<65536); + assert(high>low && low>0); + if (currhigh) error("archive corrupted"); + assert(curr>=low && curr<=high); + U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range + assert(high>mid && mid>=low); + int y=curr<=mid; + if (y) high=mid; else low=mid+1; // pick half + while ((high^low)<0x1000000) { // shift out identical leading bytes + high=high<<8|255; + low=low<<8; + low+=(low==0); + int c=in->get(); + if (c<0) error("unexpected end of file"); + curr=curr<<8|c; + } + return y; +} + +// Decompress 1 byte or -1 at end of input +int Decoder::decompress() { + if (pr.isModeled()) { // n>0 components? + if (curr==0) { // segment initialization + for (int i=0; i<4; ++i) + curr=curr<<8|in->get(); + } + if (decode(0)) { + if (curr!=0) error("decoding end of stream"); + return -1; + } + else { + int c=1; + while (c<256) { // get 8 bits + int p=pr.predict()*2+1; + c+=c+decode(p); + pr.update(c&1); + } + return c-256; + } + } + else { + if (low==high) loadbuf(); + if (low==high) return -1; + return buf[low++]&255; + } +} + +// Find end of compressed data and return next byte +int Decoder::skip() { + int c=-1; + if (pr.isModeled()) { + while (curr==0) // at start? + curr=in->get(); + while (curr && (c=in->get())>=0) // find 4 zeros + curr=curr<<8|c; + while ((c=in->get())==0) ; // might be more than 4 + return c; + } + else { + if (curr==0) // at start? + for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; + while (curr>0) { + U32 n=BUFSIZE; + if (n>curr) n=curr; + U32 n1=in->read(&buf[0], n); + curr-=n1; + if (n1!=n) return -1; + if (curr==0) + for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; + } + if (c>=0) c=in->get(); + return c; + } +} + +////////////////////// PostProcessor ////////////////////// + +// Copy ph, pm from block header +void PostProcessor::init(int h, int m) { + state=hsize=0; + ph=h; + pm=m; + z.clear(); +} + +// (PASS=0 | PROG=1 psize[0..1] pcomp[0..psize-1]) data... EOB=-1 +// Return state: 1=PASS, 2..4=loading PROG, 5=PROG loaded +int PostProcessor::write(int c) { + assert(c>=-1 && c<=255); + switch (state) { + case 0: // initial state + if (c<0) error("Unexpected EOS"); + state=c+1; // 1=PASS, 2=PROG + if (state>2) error("unknown post processing type"); + if (state==1) z.clear(); + break; + case 1: // PASS + z.outc(c); + break; + case 2: // PROG + if (c<0) error("Unexpected EOS"); + hsize=c; // low byte of size + state=3; + break; + case 3: // PROG psize[0] + if (c<0) error("Unexpected EOS"); + hsize+=c*256; // high byte of psize + z.header.resize(hsize+300); + z.cend=8; + z.hbegin=z.hend=z.cend+128; + z.header[4]=ph; + z.header[5]=pm; + state=4; + break; + case 4: // PROG psize[0..1] pcomp[0...] + if (c<0) error("Unexpected EOS"); + assert(z.hend>8; + z.initp(); + state=5; + } + break; + case 5: // PROG ... data + z.run(c); + if (c<0) z.flush(); + break; + } + return state; +} + +/////////////////////// Decompresser ///////////////////// + +// Find the start of a block and return true if found. Set memptr +// to memory used. +bool Decompresser::findBlock(double* memptr) { + assert(state==BLOCK); + + // Find start of block + U32 h1=0x3D49B113, h2=0x29EB7F93, h3=0x2614BE13, h4=0x3828EB13; + // Rolling hashes initialized to hash of first 13 bytes + int c; + while ((c=dec.in->get())!=-1) { + h1=h1*12+c; + h2=h2*20+c; + h3=h3*28+c; + h4=h4*44+c; + if (h1==0xB16B88F1 && h2==0xFF5376F1 && h3==0x72AC5BF1 && h4==0x2F909AF1) + break; // hash of 16 byte string + } + if (c==-1) return false; + + // Read header + if ((c=dec.in->get())!=1 && c!=2) error("unsupported ZPAQ level"); + if (dec.in->get()!=1) error("unsupported ZPAQL type"); + z.read(dec.in); + if (c==1 && z.header.isize()>6 && z.header[6]==0) + error("ZPAQ level 1 requires at least 1 component"); + if (memptr) *memptr=z.memory(); + state=FILENAME; + decode_state=FIRSTSEG; + return true; +} + +// Read the start of a segment (1) or end of block code (255). +// If a segment is found, write the filename and return true, else false. +bool Decompresser::findFilename(Writer* filename) { + assert(state==FILENAME); + int c=dec.in->get(); + if (c==1) { // segment found + while (true) { + c=dec.in->get(); + if (c==-1) error("unexpected EOF"); + if (c==0) { + state=COMMENT; + return true; + } + if (filename) filename->put(c); + } + } + else if (c==255) { // end of block found + state=BLOCK; + return false; + } + else + error("missing segment or end of block"); + return false; +} + +// Read the comment from the segment header +void Decompresser::readComment(Writer* comment) { + assert(state==COMMENT); + state=DATA; + while (true) { + int c=dec.in->get(); + if (c==-1) error("unexpected EOF"); + if (c==0) break; + if (comment) comment->put(c); + } + if (dec.in->get()!=0) error("missing reserved byte"); +} + +// Decompress n bytes, or all if n < 0. Return false if done +bool Decompresser::decompress(int n) { + assert(state==DATA); + assert(decode_state!=SKIP); + + // Initialize models to start decompressing block + if (decode_state==FIRSTSEG) { + dec.init(); + assert(z.header.size()>5); + pp.init(z.header[4], z.header[5]); + decode_state=SEG; + } + + // Decompress and load PCOMP into postprocessor + while ((pp.getState()&3)!=1) + pp.write(dec.decompress()); + + // Decompress n bytes, or all if n < 0 + while (n) { + int c=dec.decompress(); + pp.write(c); + if (c==-1) { + state=SEGEND; + return false; + } + if (n>0) --n; + } + return true; +} + +// Read end of block. If a SHA1 checksum is present, write 1 and the +// 20 byte checksum into sha1string, else write 0 in first byte. +// If sha1string is 0 then discard it. +void Decompresser::readSegmentEnd(char* sha1string) { + assert(state==DATA || state==SEGEND); + + // Skip remaining data if any and get next byte + int c=0; + if (state==DATA) { + c=dec.skip(); + decode_state=SKIP; + } + else if (state==SEGEND) + c=dec.in->get(); + state=FILENAME; + + // Read checksum + if (c==254) { + if (sha1string) sha1string[0]=0; // no checksum + } + else if (c==253) { + if (sha1string) sha1string[0]=1; + for (int i=1; i<=20; ++i) { + c=dec.in->get(); + if (sha1string) sha1string[i]=c; + } + } + else + error("missing end of segment marker"); +} + +/////////////////////////// decompress() ///////////////////// + +void decompress(Reader* in, Writer* out) { + Decompresser d; + d.setInput(in); + d.setOutput(out); + while (d.findBlock()) { // don't calculate memory + while (d.findFilename()) { // discard filename + d.readComment(); // discard comment + d.decompress(); // to end of segment + d.readSegmentEnd(); // discard sha1string + } + } +} + +////////////////////// Encoder //////////////////// + +// Initialize for start of block +void Encoder::init() { + low=1; + high=0xFFFFFFFF; + pr.init(); + if (!pr.isModeled()) low=0, buf.resize(1<<16); +} + +// compress bit y having probability p/64K +void Encoder::encode(int y, int p) { + assert(out); + assert(p>=0 && p<65536); + assert(y==0 || y==1); + assert(high>low && low>0); + U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range + assert(high>mid && mid>=low); + if (y) high=mid; else low=mid+1; // pick half + while ((high^low)<0x1000000) { // write identical leading bytes + out->put(high>>24); // same as low>>24 + high=high<<8|255; + low=low<<8; + low+=(low==0); // so we don't code 4 0 bytes in a row + } +} + +// compress byte c (0..255 or -1=EOS) +void Encoder::compress(int c) { + assert(out); + if (pr.isModeled()) { + if (c==-1) + encode(1, 0); + else { + assert(c>=0 && c<=255); + encode(0, 0); + for (int i=7; i>=0; --i) { + int p=pr.predict()*2+1; + assert(p>0 && p<65536); + int y=c>>i&1; + encode(y, p); + pr.update(y); + } + } + } + else { + if (c<0 || low==buf.size()) { + out->put((low>>24)&255); + out->put((low>>16)&255); + out->put((low>>8)&255); + out->put(low&255); + out->write(&buf[0], low); + low=0; + } + if (c>=0) buf[low++]=c; + } +} + +///////////////////// Compressor ////////////////////// + +// Write 13 byte start tag +// "\x37\x6B\x53\x74\xA0\x31\x83\xD3\x8C\xB2\x28\xB0\xD3" +void Compressor::writeTag() { + assert(state==INIT); + enc.out->put(0x37); + enc.out->put(0x6b); + enc.out->put(0x53); + enc.out->put(0x74); + enc.out->put(0xa0); + enc.out->put(0x31); + enc.out->put(0x83); + enc.out->put(0xd3); + enc.out->put(0x8c); + enc.out->put(0xb2); + enc.out->put(0x28); + enc.out->put(0xb0); + enc.out->put(0xd3); +} + +void Compressor::startBlock(int level) { + + // Model 1 - min.cfg + static const char models[]={ + 26,0,1,2,0,0,2,3,16,8,19,0,0,96,4,28, + 59,10,59,112,25,10,59,10,59,112,56,0, + + // Model 2 - mid.cfg + 69,0,3,3,0,0,8,3,5,8,13,0,8,17,1,8, + 18,2,8,18,3,8,19,4,4,22,24,7,16,0,7,24, + -1,0,17,104,74,4,95,1,59,112,10,25,59,112,10,25, + 59,112,10,25,59,112,10,25,59,112,10,25,59,10,59,112, + 25,69,-49,8,112,56,0, + + // Model 3 - max.cfg + -60,0,5,9,0,0,22,1,-96,3,5,8,13,1,8,16, + 2,8,18,3,8,19,4,8,19,5,8,20,6,4,22,24, + 3,17,8,19,9,3,13,3,13,3,13,3,14,7,16,0, + 15,24,-1,7,8,0,16,10,-1,6,0,15,16,24,0,9, + 8,17,32,-1,6,8,17,18,16,-1,9,16,19,32,-1,6, + 0,19,20,16,0,0,17,104,74,4,95,2,59,112,10,25, + 59,112,10,25,59,112,10,25,59,112,10,25,59,112,10,25, + 59,10,59,112,10,25,59,112,10,25,69,-73,32,-17,64,47, + 14,-25,91,47,10,25,60,26,48,-122,-105,20,112,63,9,70, + -33,0,39,3,25,112,26,52,25,25,74,10,4,59,112,25, + 10,4,59,112,25,10,4,59,112,25,65,-113,-44,72,4,59, + 112,8,-113,-40,8,68,-81,60,60,25,69,-49,9,112,25,25, + 25,25,25,112,56,0, + + 0,0}; // 0,0 = end of list + + if (level<1) error("compression level must be at least 1"); + const char* p=models; + int i; + for (i=1; iput('z'); + enc.out->put('P'); + enc.out->put('Q'); + enc.out->put(1+(len>6 && hcomp[6]==0)); // level 1 or 2 + enc.out->put(1); + for (int i=0; iput(hcomp[i]); + MemoryReader m(hcomp); + z.read(&m); + state=BLOCK1; +} + +// Write a segment header +void Compressor::startSegment(const char* filename, const char* comment) { + assert(state==BLOCK1 || state==BLOCK2); + enc.out->put(1); + while (filename && *filename) + enc.out->put(*filename++); + enc.out->put(0); + while (comment && *comment) + enc.out->put(*comment++); + enc.out->put(0); + enc.out->put(0); + if (state==BLOCK1) state=SEG1; + if (state==BLOCK2) state=SEG2; +} + +// Initialize encoding and write pcomp to first segment +// If len is 0 then length is encoded in pcomp[0..1] +void Compressor::postProcess(const char* pcomp, int len) { + assert(state==SEG1); + enc.init(); + if (pcomp) { + enc.compress(1); + if (len<=0) { + len=toU16(pcomp); + pcomp+=2; + } + enc.compress(len&255); + enc.compress((len>>8)&255); + for (int i=0; iget())>=0) { + enc.compress(ch); + if (n>0) --n; + } + return ch>=0; +} + +// End segment, write sha1string if present +void Compressor::endSegment(const char* sha1string) { + assert(state==SEG2); + enc.compress(-1); + enc.out->put(0); + enc.out->put(0); + enc.out->put(0); + enc.out->put(0); + if (sha1string) { + enc.out->put(253); + for (int i=0; i<20; ++i) + enc.out->put(sha1string[i]); + } + else + enc.out->put(254); + state=BLOCK2; +} + +// End block +void Compressor::endBlock() { + assert(state==BLOCK2); + enc.out->put(255); + state=INIT; +} + +/////////////////////////// compress() /////////////////////// + +void compress(Reader* in, Writer* out, int level) { + assert(level>=1); + Compressor c; + c.setInput(in); + c.setOutput(out); + c.startBlock(level); + c.startSegment(); + c.postProcess(); + c.compress(); + c.endSegment(); + c.endBlock(); +} + +//////////////////////// ZPAQL::assemble() //////////////////// + +#ifndef NOJIT +/* +assemble(); + +Assembles the ZPAQL code in hcomp[0..hlen-1] and stores x86-32 or x86-64 +code in rcode[0..rcode_size-1]. Execution begins at rcode[0]. It will not +write beyond the end of rcode, but in any case it returns the number of +bytes that would have been written. It returns 0 in case of error. + +The assembled code implements run() and returns 1 if successful or +0 if the ZPAQL code executes an invalid instruction or jumps out of +bounds. + +A ZPAQL virtual machine has the following state. All values are +unsigned and initially 0: + + a, b, c, d: 32 bit registers (pointed to by their respective parameters) + f: 1 bit flag register (pointed to) + r[0..255]: 32 bit registers + m[0..msize-1]: 8 bit registers, where msize is a power of 2 + h[0..hsize-1]: 32 bit registers, where hsize is a power of 2 + out: pointer to a Writer + sha1: pointer to a SHA1 + +Generally a ZPAQL machine is used to compute contexts which are +placed in h. A second machine might post-process, and write its +output to out and sha1. In either case, a machine is called with +its input in a, representing a single byte (0..255) or +(for a postprocessor) EOF (0xffffffff). Execution returs after a +ZPAQL halt instruction. + +ZPAQL instructions are 1 byte unless the last 3 bits are 1. +In this case, a second operand byte follows. Opcode 255 is +the only 3 byte instruction. They are organized: + + 00dddxxx = unary opcode xxx on destination ddd (ddd < 111) + 00111xxx = special instruction xxx + 01dddsss = assignment: ddd = sss (ddd < 111) + 1xxxxsss = operation sxxx from sss to a + +The meaning of sss and ddd are as follows: + + 000 = a (accumulator) + 001 = b + 010 = c + 011 = d + 100 = *b (means m[b mod msize]) + 101 = *c (means m[c mod msize]) + 110 = *d (means h[d mod hsize]) + 111 = n (constant 0..255 in second byte of instruction) + +For example, 01001110 assigns *d to b. The other instructions xxx +are as follows: + +Group 00dddxxx where ddd < 111 and xxx is: + 000 = ddd<>a, swap with a (except 00000000 is an error, and swap + with *b or *c leaves the high bits of a unchanged) + 001 = ddd++, increment + 010 = ddd--, decrement + 011 = ddd!, not (invert all bits) + 100 = ddd=0, clear (set all bits of ddd to 0) + 101 = not used (error) + 110 = not used + 111 = ddd=r n, assign from r[n] to ddd, n=0..255 in next opcode byte +Except: + 00100111 = jt n, jump if f is true (n = -128..127, relative to next opcode) + 00101111 = jf n, jump if f is false (n = -128..127) + 00110111 = r=a n, assign r[n] = a (n = 0..255) + +Group 00111xxx where xxx is: + 000 = halt (return) + 001 = output a + 010 = not used + 011 = hash: a = (a + *b + 512) * 773 + 100 = hashd: *d = (*d + a + 512) * 773 + 101 = not used + 110 = not used + 111 = unconditional jump (n = -128 to 127, relative to next opcode) + +Group 1xxxxsss where xxxx is: + 0000 = a += sss (add, subtract, multiply, divide sss to a) + 0001 = a -= sss + 0010 = a *= sss + 0011 = a /= sss (unsigned, except set a = 0 if sss is 0) + 0100 = a %= sss (remainder, except set a = 0 if sss is 0) + 0101 = a &= sss (bitwise AND) + 0110 = a &= ~sss (bitwise AND with complement of sss) + 0111 = a |= sss (bitwise OR) + 1000 = a ^= sss (bitwise XOR) + 1001 = a <<= (sss % 32) (left shift by low 5 bits of sss) + 1010 = a >>= (sss % 32) (unsigned, zero bits shifted in) + 1011 = a == sss (compare, set f = true if equal or false otherwise) + 1100 = a < sss (unsigned compare, result in f) + 1101 = a > sss (unsigned compare) + 1110 = not used + 1111 = not used except 11111111 is a 3 byte jump to the absolute address + in the next 2 bytes in little-endian (LSB first) order. + +assemble() translates ZPAQL to 32 bit x86 code to be executed by run(). +Registers are mapped as follows: + + eax = source sss from *b, *c, *d or sometimes n + ecx = pointer to destination *b, *c, *d, or spare + edx = a + ebx = f (1 for true, 0 for false) + esp = stack pointer + ebp = d + esi = b + edi = c + +run() saves non-volatile registers (ebp, esi, edi, ebx) on the stack, +loads a, b, c, d, f, and executes the translated instructions. +A halt instruction saves a, b, c, d, f, pops the saved registers +and returns. Invalid instructions or jumps outside of the range +of the ZPAQL code call libzpaq::error(). + +In 64 bit mode, the following additional registers are used: + + r12 = h + r14 = r + r15 = m + +*/ + +// Called by out +static void flush1(ZPAQL* z) { + z->flush(); +} + +// return true if op is an undefined ZPAQL instruction +static bool iserr(int op) { + return op==0 || (op>=120 && op<=127) || (op>=240 && op<=254) + || op==58 || (op<64 && (op%8==5 || op%8==6)); +} + +// Write k bytes of x to rcode[o++] MSB first +static void put(U8* rcode, int n, int& o, U32 x, int k) { + while (k-->0) { + if (o>(k*8))&255; + ++o; + } +} + +// Write 4 bytes of x to rcode[o++] LSB first +static void put4lsb(U8* rcode, int n, int& o, U32 x) { + for (int k=0; k<4; ++k) { + if (o>(k*8))&255; + ++o; + } +} + +// Write a 1-4 byte x86 opcode without or with an 4 byte operand +// to rcode[o...] +#define put1(x) put(rcode, rcode_size, o, (x), 1) +#define put2(x) put(rcode, rcode_size, o, (x), 2) +#define put3(x) put(rcode, rcode_size, o, (x), 3) +#define put4(x) put(rcode, rcode_size, o, (x), 4) +#define put5(x,y) put4(x), put1(y) +#define put6(x,y) put4(x), put2(y) +#define put4r(x) put4lsb(rcode, rcode_size, o, x) +#define puta(x) t=U32(size_t(x)), put4r(t) +#define put1a(x,y) put1(x), puta(y) +#define put2a(x,y) put2(x), puta(y) +#define put3a(x,y) put3(x), puta(y) +#define put4a(x,y) put4(x), puta(y) +#define put5a(x,y,z) put4(x), put1(y), puta(z) +#define put2l(x,y) put2(x), t=U32(size_t(y)), put4r(t), \ + t=U32(size_t(y)>>(S*4)), put4r(t) + +// Assemble ZPAQL in in the HCOMP section of header to rcode, +// but do not write beyond rcode_size. Return the number of +// bytes output or that would have been output. +// Execution starts at rcode[0] and returns 1 if successful or 0 +// in case of a ZPAQL execution error. +int ZPAQL::assemble() { + + // x86? (not foolproof) + const int S=sizeof(char*); // 4 = x86, 8 = x86-64 + U32 t=0x12345678; + if (*(char*)&t!=0x78 || (S!=4 && S!=8)) + error("JIT supported only for x86-32 and x86-64"); + + const U8* hcomp=&header[hbegin]; + const int hlen=hend-hbegin+1; + const int msize=m.size(); + const int hsize=h.size(); + const int regcode[8]={2,6,7,5}; // a,b,c,d.. -> edx,esi,edi,ebp,eax.. + Array it(hlen); // hcomp -> rcode locations + int done=0; // number of instructions assembled (0..hlen) + int o=5; // rcode output index, reserve space for jmp + + // Code for the halt instruction (restore registers and return) + const int halt=o; + if (S==8) { + put2l(0x48b9, &a); // mov rcx, a + put2(0x8911); // mov [rcx], edx + put2l(0x48b9, &b); // mov rcx, b + put2(0x8931); // mov [rcx], esi + put2l(0x48b9, &c); // mov rcx, c + put2(0x8939); // mov [rcx], edi + put2l(0x48b9, &d); // mov rcx, d + put2(0x8929); // mov [rcx], ebp + put2l(0x48b9, &f); // mov rcx, f + put2(0x8919); // mov [rcx], ebx + put4(0x4883c438); // add rsp, 56 + put2(0x415f); // pop r15 + put2(0x415e); // pop r14 + put2(0x415d); // pop r13 + put2(0x415c); // pop r12 + } + else { + put2a(0x8915, &a); // mov [a], edx + put2a(0x8935, &b); // mov [b], esi + put2a(0x893d, &c); // mov [c], edi + put2a(0x892d, &d); // mov [d], ebp + put2a(0x891d, &f); // mov [f], ebx + put3(0x83c43c); // add esp, 60 + } + put1(0x5d); // pop ebp + put1(0x5b); // pop ebx + put1(0x5f); // pop edi + put1(0x5e); // pop esi + put1(0xc3); // ret + + // Code for the out instruction. + // Store a=edx at outbuf[bufptr++]. If full, call flush1(). + const int outlabel=o; + if (S==8) { + put2l(0x48b8, &outbuf[0]);// mov rax, outbuf.p + put2l(0x49ba, &bufptr); // mov r10, &bufptr + put3(0x418b0a); // mov ecx, [r10] + put3(0x891408); // mov [rax+rcx], edx + put2(0xffc1); // inc ecx + put3(0x41890a); // mov [r10], ecx + put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() + put2(0x7401); // jz L1 + put1(0xc3); // ret + put4(0x4883ec30); // L1: sub esp, 48 ; call flush1(this) + put4(0x48893c24); // mov [rsp], rdi + put5(0x48897424,8); // mov [rsp+8], rsi + put5(0x48895424,16); // mov [rsp+16], rdx + put5(0x48894c24,24); // mov [rsp+24], rcx +#ifndef _WIN32 + put2l(0x48bf, this); // mov rdi, this +#else // Windows + put2l(0x48b9, this); // mov rcx, this +#endif + put2l(0x49bb, &flush1); // mov r11, &flush1 + put3(0x41ffd3); // call r11 + put5(0x488b4c24,24); // mov rcx, [rsp+24] + put5(0x488b5424,16); // mov rdx, [rsp+16] + put5(0x488b7424,8); // mov rsi, [rsp+8] + put4(0x488b3c24); // mov rdi, [rsp] + put4(0x4883c430); // add esp, 48 + put1(0xc3); // ret + } + else { + put1a(0xb8, &outbuf[0]); // mov eax, outbuf.p + put2a(0x8b0d, &bufptr); // mov ecx, [bufptr] + put3(0x891408); // mov [eax+ecx], edx + put2(0xffc1); // inc ecx + put2a(0x890d, &bufptr); // mov [bufptr], ecx + put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() + put2(0x7401); // jz L1 + put1(0xc3); // ret + put3(0x83ec08); // L1: sub esp, 8 + put4(0x89542404); // mov [esp+4], edx + put3a(0xc70424, this); // mov [esp], this + put1a(0xb8, &flush1); // mov eax, &flush1 + put2(0xffd0); // call eax + put4(0x8b542404); // mov edx, [esp+4] + put3(0x83c408); // add esp, 8 + put1(0xc3); // ret + } + + // Set it[i]=1 for each ZPAQL instruction reachable from the previous + // instruction + 2 if reachable by a jump (or 3 if both). + it[0]=2; + assert(hlen>0 && hcomp[hlen-1]==0); // ends with error + do { + done=0; + const int NONE=0x80000000; + for (int i=0; i>24);// jt,jf,jmp + if (op==63) next1=NONE; // jmp + if ((next2<0 || next2>=hlen) && next2!=NONE) next2=hlen-1; // error + if (next1!=NONE && !(it[next1]&1)) it[next1]|=1, ++done; + if (next2!=NONE && !(it[next2]&2)) it[next2]|=2, ++done; + } + } + } while (done>0); + + // Set it[i] bits 2-3 to 4, 8, or 12 if a comparison + // (<, >, == respectively) does not need to save the result in f, + // or if a conditional jump (jt, jf) does not need to read f. + // This is true if a comparison is followed directly by a jt/jf, + // the jt/jf is not a jump target, the byte before is not a jump + // target (for a 2 byte comparison), and for the comparison instruction + // if both paths after the jt/jf lead to another comparison or error + // before another jt/jf. At most hlen steps are traced because after + // that it must be an infinite loop. + for (int i=0; i=216 && op1<240 && (op2==39 || op2==47) + && it[i2]==1 && (i2==i+1 || it[i+1]==0)) { + int code=(op1-208)/8*4; // 4,8,12 is ==,<,> + it[i2]+=code; // OK to test CF, ZF instead of f + for (int j=0; j<2 && code; ++j) { // trace each path from i2 + int k=i2+2; // branch not taken + if (j==1) k=i2+2+(hcomp[i2+1]<<24>>24); // branch taken + for (int l=0; l=hlen) break; // out of bounds, pass + const int op=hcomp[k]; + if (op==39 || op==47) code=0; // jt,jf, fail + else if (op>=216 && op<240) break; // ==,<,>, pass + else if (iserr(op)) break; // error, pass + else if (op==255) k=hcomp[k+1]+256*hcomp[k+2]; // lj + else if (op==63) k=k+2+(hcomp[k+1]<<24>>24); // jmp + else if (op==56) k=0; // halt + else k=k+1+(op%8==7); // ordinary instruction + } + } + it[i]+=code; // if > 0 then OK to not save flags in f (bl) + } + } + + // Start of run(): Save x86 and load ZPAQL registers + const int start=o; + assert(start>=16); + put1(0x56); // push esi/rsi + put1(0x57); // push edi/rdi + put1(0x53); // push ebx/rbx + put1(0x55); // push ebp/rbp + if (S==8) { + put2(0x4154); // push r12 + put2(0x4155); // push r13 + put2(0x4156); // push r14 + put2(0x4157); // push r15 + put4(0x4883ec38); // sub rsp, 56 + put2l(0x48b8, &a); // mov rax, a + put2(0x8b10); // mov edx, [rax] + put2l(0x48b8, &b); // mov rax, b + put2(0x8b30); // mov esi, [rax] + put2l(0x48b8, &c); // mov rax, c + put2(0x8b38); // mov edi, [rax] + put2l(0x48b8, &d); // mov rax, d + put2(0x8b28); // mov ebp, [rax] + put2l(0x48b8, &f); // mov rax, f + put2(0x8b18); // mov ebx, [rax] + put2l(0x49bc, &h[0]); // mov r12, h + put2l(0x49bd, &outbuf[0]); // mov r13, outbuf.p + put2l(0x49be, &r[0]); // mov r14, r + put2l(0x49bf, &m[0]); // mov r15, m + } + else { + put3(0x83ec3c); // sub esp, 60 + put2a(0x8b15, &a); // mov edx, [a] + put2a(0x8b35, &b); // mov esi, [b] + put2a(0x8b3d, &c); // mov edi, [c] + put2a(0x8b2d, &d); // mov ebp, [d] + put2a(0x8b1d, &f); // mov ebx, [f] + } + + // Assemble in multiple passes until every byte of hcomp has a translation + for (int istart=0; istarti); + assert(i>=0 && i=16) { + if (i>istart) { + int a=code-o; + if (a>-120 && a<120) + put2(0xeb00+((a-2)&255)); // jmp short o + else + put1a(0xe9, a-5); // jmp near o + } + break; + } + + // Else assemble the instruction at hcode[i] to rcode[o] + else { + assert(i>=0 && i0 && it[i]<16); + assert(o>=16); + it[i]=o; + ++done; + const int op=hcomp[i]; + const int arg=hcomp[i+1]+((op==255)?256*hcomp[i+2]:0); + const int ddd=op/8%8; + const int sss=op%8; + + // error instruction: return 0 + if (iserr(op)) { + put2(0x31c0); // xor eax, eax + put1a(0xe9, halt-o-4); // jmp near halt + continue; + } + + // Load source *b, *c, *d, or hash (*b) into eax except: + // {a,b,c,d}=*d, a{+,-,*,&,|,^,=,==,>,>}=*d: load address to eax + // {a,b,c,d}={*b,*c}: load source into ddd + if (op==59 || (op>=64 && op<240 && op%8>=4 && op%8<7)) { + put2(0x89c0+8*regcode[sss-3+(op==59)]); // mov eax, {esi,edi,ebp} + const int sz=(sss==6?hsize:msize)-1; + if (sz>=128) put1a(0x25, sz); // and eax, dword msize-1 + else put3(0x83e000+sz); // and eax, byte msize-1 + const int move=(op>=64 && op<112); // = or else ddd is eax + if (sss<6) { // ddd={a,b,c,d,*b,*c} + if (S==8) put5(0x410fb604+8*move*regcode[ddd],0x07); + // movzx ddd, byte [r15+rax] + else put3a(0x0fb680+8*move*regcode[ddd], &m[0]); + // movzx ddd, byte [m+eax] + } + else if ((0x06587000>>(op/8))&1) {// {*b,*c,*d,a/,a%,a&~,a<<,a>>}=*d + if (S==8) put4(0x418b0484); // mov eax, [r12+rax*4] + else put3a(0x8b0485, &h[0]); // mov eax, [h+eax*4] + } + } + + // Load destination address *b, *c, *d or hashd (*d) into ecx + if ((op>=32 && op<56 && op%8<5) || (op>=96 && op<120) || op==60) { + put2(0x89c1+8*regcode[op/8%8-3-(op==60)]);// mov ecx,{esi,edi,ebp} + const int sz=(ddd==6||op==60?hsize:msize)-1; + if (sz>=128) put2a(0x81e1, sz); // and ecx, dword sz + else put3(0x83e100+sz); // and ecx, byte sz + if (op/8%8==6 || op==60) { // *d + if (S==8) put4(0x498d0c8c); // lea rcx, [r12+rcx*4] + else put3a(0x8d0c8d, &h[0]); // lea ecx, [ecx*4+h] + } + else { // *b, *c + if (S==8) put4(0x498d0c0f); // lea rcx, [r15+rcx] + else put2a(0x8d89, &m[0]); // lea ecx, [ecx+h] + } + } + + // Translate by opcode + switch((op/8)&31) { + case 0: // ddd = a + case 1: // ddd = b + case 2: // ddd = c + case 3: // ddd = d + switch(sss) { + case 0: // ddd<>a (swap) + put2(0x87d0+regcode[ddd]); // xchg edx, ddd + break; + case 1: // ddd++ + put2(0xffc0+regcode[ddd]); // inc ddd + break; + case 2: // ddd-- + put2(0xffc8+regcode[ddd]); // dec ddd + break; + case 3: // ddd! + put2(0xf7d0+regcode[ddd]); // not ddd + break; + case 4: // ddd=0 + put2(0x31c0+9*regcode[ddd]); // xor ddd,ddd + break; + case 7: // ddd=r n + if (S==8) + put3a(0x418b86+8*regcode[ddd], arg*4); // mov ddd, [r14+n*4] + else + put2a(0x8b05+8*regcode[ddd], (&r[arg]));//mov ddd, [r+n] + break; + } + break; + case 4: // ddd = *b + case 5: // ddd = *c + switch(sss) { + case 0: // ddd<>a (swap) + put2(0x8611); // xchg dl, [ecx] + break; + case 1: // ddd++ + put2(0xfe01); // inc byte [ecx] + break; + case 2: // ddd-- + put2(0xfe09); // dec byte [ecx] + break; + case 3: // ddd! + put2(0xf611); // not byte [ecx] + break; + case 4: // ddd=0 + put2(0x31c0); // xor eax, eax + put2(0x8801); // mov [ecx], al + break; + case 7: // jt, jf + { + assert(code>=0 && code<16); + const int jtab[2][4]={{5,4,2,7},{4,5,3,6}}; + // jnz,je,jb,ja, jz,jne,jae,jbe + if (code<4) put2(0x84db); // test bl, bl + if (arg>=128 && arg-257-i>=0 && o-it[arg-257-i]<120) + put2(0x7000+256*jtab[op==47][code/4]); // jx short 0 + else + put2a(0x0f80+jtab[op==47][code/4], 0); // jx near 0 + break; + } + } + break; + case 6: // ddd = *d + switch(sss) { + case 0: // ddd<>a (swap) + put2(0x8711); // xchg edx, [ecx] + break; + case 1: // ddd++ + put2(0xff01); // inc dword [ecx] + break; + case 2: // ddd-- + put2(0xff09); // dec dword [ecx] + break; + case 3: // ddd! + put2(0xf711); // not dword [ecx] + break; + case 4: // ddd=0 + put2(0x31c0); // xor eax, eax + put2(0x8901); // mov [ecx], eax + break; + case 7: // ddd=r n + if (S==8) + put3a(0x418996, arg*4); // mov [r14+n*4], edx + else + put2a(0x8915, &r[arg]); // mov [r+n], edx + break; + } + break; + case 7: // special + switch(op) { + case 56: // halt + put1a(0xb8, 1); // mov eax, 1 + put1a(0xe9, halt-o-4); // jmp near halt + break; + case 57: // out + put1a(0xe8, outlabel-o-4);// call outlabel + break; + case 59: // hash: a = (a + *b + 512) * 773 + put3a(0x8d8410, 512); // lea edx, [eax+edx+512] + put2a(0x69d0, 773); // imul edx, eax, 773 + break; + case 60: // hashd: *d = (*d + a + 512) * 773 + put2(0x8b01); // mov eax, [ecx] + put3a(0x8d8410, 512); // lea eax, [eax+edx+512] + put2a(0x69c0, 773); // imul eax, eax, 773 + put2(0x8901); // mov [ecx], eax + break; + case 63: // jmp + put1a(0xe9, 0); // jmp near 0 (fill in target later) + break; + } + break; + case 8: // a= + case 9: // b= + case 10: // c= + case 11: // d= + if (sss==7) // n + put1a(0xb8+regcode[ddd], arg); // mov ddd, n + else if (sss==6) { // *d + if (S==8) + put4(0x418b0484+(regcode[ddd]<<11)); // mov ddd, [r12+rax*4] + else + put3a(0x8b0485+(regcode[ddd]<<11),&h[0]);// mov ddd, [h+eax*4] + } + else if (sss<4) // a, b, c, d + put2(0x89c0+regcode[ddd]+8*regcode[sss]);// mov ddd,sss + break; + case 12: // *b= + case 13: // *c= + if (sss==7) put3(0xc60100+arg); // mov byte [ecx], n + else if (sss==0) put2(0x8811); // mov byte [ecx], dl + else { + if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss + put2(0x8801); // mov byte [ecx], al + } + break; + case 14: // *d= + if (sss<7) put2(0x8901+8*regcode[sss]); // mov [ecx], sss + else put2a(0xc701, arg); // mov dword [ecx], n + break; + case 15: break; // not used + case 16: // a+= + if (sss==6) { + if (S==8) put4(0x41031484); // add edx, [r12+rax*4] + else put3a(0x031485, &h[0]); // add edx, [h+eax*4] + } + else if (sss<7) put2(0x01c2+8*regcode[sss]);// add edx, sss + else if (arg>128) put2a(0x81c2, arg); // add edx, n + else put3(0x83c200+arg); // add edx, byte n + break; + case 17: // a-= + if (sss==6) { + if (S==8) put4(0x412b1484); // sub edx, [r12+rax*4] + else put3a(0x2b1485, &h[0]); // sub edx, [h+eax*4] + } + else if (sss<7) put2(0x29c2+8*regcode[sss]);// sub edx, sss + else if (arg>=128) put2a(0x81ea, arg); // sub edx, n + else put3(0x83ea00+arg); // sub edx, byte n + break; + case 18: // a*= + if (sss==6) { + if (S==8) put5(0x410faf14,0x84); // imul edx, [r12+rax*4] + else put4a(0x0faf1485, &h[0]); // imul edx, [h+eax*4] + } + else if (sss<7) put3(0x0fafd0+regcode[sss]);// imul edx, sss + else if (arg>=128) put2a(0x69d2, arg); // imul edx, n + else put3(0x6bd200+arg); // imul edx, byte n + break; + case 19: // a/= + case 20: // a%= + if (sss<7) put2(0x89c1+8*regcode[sss]); // mov ecx, sss + else put1a(0xb9, arg); // mov ecx, n + put2(0x85c9); // test ecx, ecx + put3(0x0f44d1); // cmovz edx, ecx + put2(0x7408-2*(op/8==20)); // jz (over rest) + put2(0x89d0); // mov eax, edx + put2(0x31d2); // xor edx, edx + put2(0xf7f1); // div ecx + if (op/8==19) put2(0x89c2); // mov edx, eax + break; + case 21: // a&= + if (sss==6) { + if (S==8) put4(0x41231484); // and edx, [r12+rax*4] + else put3a(0x231485, &h[0]); // and edx, [h+eax*4] + } + else if (sss<7) put2(0x21c2+8*regcode[sss]);// and edx, sss + else if (arg>=128) put2a(0x81e2, arg); // and edx, n + else put3(0x83e200+arg); // and edx, byte n + break; + case 22: // a&~ + if (sss==7) { + if (arg<128) put3(0x83e200+(~arg&255));// and edx, byte ~n + else put2a(0x81e2, ~arg); // and edx, ~n + } + else { + if (sss<4) put2(0x89c0+8*regcode[sss]);// mov eax, sss + put2(0xf7d0); // not eax + put2(0x21c2); // and edx, eax + } + break; + case 23: // a|= + if (sss==6) { + if (S==8) put4(0x410b1484); // or edx, [r12+rax*4] + else put3a(0x0b1485, &h[0]); // or edx, [h+eax*4] + } + else if (sss<7) put2(0x09c2+8*regcode[sss]);// or edx, sss + else if (arg>=128) put2a(0x81ca, arg); // or edx, n + else put3(0x83ca00+arg); // or edx, byte n + break; + case 24: // a^= + if (sss==6) { + if (S==8) put4(0x41331484); // xor edx, [r12+rax*4] + else put3a(0x331485, &h[0]); // xor edx, [h+eax*4] + } + else if (sss<7) put2(0x31c2+8*regcode[sss]);// xor edx, sss + else if (arg>=128) put2a(0x81f2, arg); // xor edx, byte n + else put3(0x83f200+arg); // xor edx, n + break; + case 25: // a<<= + case 26: // a>>= + if (sss==7) // sss = n + put3(0xc1e200+8*256*(op/8==26)+arg); // shl/shr n + else { + put2(0x89c1+8*regcode[sss]); // mov ecx, sss + put2(0xd3e2+8*(op/8==26)); // shl/shr edx, cl + } + break; + case 27: // a== + case 28: // a< + case 29: // a> + if (sss==6) { + if (S==8) put4(0x413b1484); // cmp edx, [r12+rax*4] + else put3a(0x3b1485, &h[0]); // cmp edx, [h+eax*4] + } + else if (sss==7) // sss = n + put2a(0x81fa, arg); // cmp edx, dword n + else + put2(0x39c2+8*regcode[sss]); // cmp edx, sss + if (code<4) { + if (op/8==27) put3(0x0f94c3); // setz bl + if (op/8==28) put3(0x0f92c3); // setc bl + if (op/8==29) put3(0x0f97c3); // seta bl + } + break; + case 30: // not used + case 31: // 255 = lj + if (op==255) put1a(0xe9, 0); // jmp near + break; + } + } + } + } + + // Finish first pass + const int rsize=o; + if (o>rcode_size) return rsize; + + // Fill in jump addresses (second pass) + for (int i=0; i=128) target-=256; + target+=i+2; + } + if (target<0 || target>=hlen) target=hlen-1; // runtime ZPAQL error + o=it[i]; + assert(o>=16 && o skip test + assert(o>=16 && o=0x72 && op<0x78) || op==0xeb) { // jx, jmp short + --target; + if (target<-128 || target>127) + error("Cannot code x86 short jump"); + assert(o=0x82 && op<0x88) || op==0xe9) // jx, jmp near + { + target-=4; + puta(target); + } + else assert(false); // not a x86 jump + } + } + + // Jump to start + o=0; + put1a(0xe9, start-5); // jmp near start + return rsize; +} + +//////////////////////// Predictor::assemble_p() ///////////////////// + +// Assemble the ZPAQL code in the HCOMP section of z.header to pcomp and +// return the number of bytes of x86 or x86-64 code written, or that would +// be written if pcomp were large enough. The code for predict() begins +// at pr.pcomp[0] and update() at pr.pcomp[5], both as jmp instructions. + +// The assembled code is equivalent to int predict(Predictor*) +// and void update(Predictor*, int y); The Preditor address is placed in +// edi/rdi. The update bit y is placed in ebp/rbp. + +int Predictor::assemble_p() { + Predictor& pr=*this; + U8* rcode=pr.pcode; // x86 output array + int rcode_size=pcode_size; // output size + int o=0; // output index in pcode + const int S=sizeof(char*); // 4 or 8 + U8* hcomp=&pr.z.header[0]; // The code to translate +#define off(x) ((char*)&(pr.x)-(char*)&pr) +#define offc(x) ((char*)&(pr.comp[i].x)-(char*)&pr) + + // test for little-endian (probably x86) + U32 t=0x12345678; + if (*(char*)&t!=0x78 || (S!=4 && S!=8)) + error("JIT supported only for x86-32 and x86-64"); + + // Initialize for predict(). Put predictor address in edi/rdi + put1a(0xe9, 5); // jmp predict + put1a(0, 0x90909000); // reserve space for jmp update + put1(0x53); // push ebx/rbx + put1(0x55); // push ebp/rbp + put1(0x56); // push esi/rsi + put1(0x57); // push edi/rdi + if (S==4) + put4(0x8b7c2414); // mov edi,[esp+0x14] ; pr + else { +#ifdef _WIN32 + put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) +#endif + } + + // Code predict() for each component + const int n=hcomp[6]; // number of components + U8* cp=hcomp+7; + for (int i=0; i=pr.z.cend) error("comp too big"); + if (cp[0]<1 || cp[0]>9) error("invalid component"); + assert(compsize[cp[0]]>0 && compsize[cp[0]]<8); + switch (cp[0]) { + + case CONS: // c + break; + + case CM: // sizebits limit + // Component& cr=comp[i]; + // cr.cxt=h[i]^hmap4; + // p[i]=stretch(cr.cm(cr.cxt)>>17); + + put2a(0x8b87, off(h[i])); // mov eax, [edi+&h[i]] + put2a(0x3387, off(hmap4)); // xor eax, [edi+&hmap4] + put1a(0x25, (1<rsi) + put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] + put3(0x8b0486); // mov eax, [esi+eax*4] + put3(0xc1e811); // shr eax, 17 + put4a(0x0fbf8447, off(stretcht)); // movsx eax,word[edi+eax*2+..] + put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax + break; + + case ISSE: // sizebits j -- c=hi, cxt=bh + // assert((hmap4&15)>0); + // if (c8==1 || (c8&0xf0)==16) + // cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); + // cr.cxt=cr.ht[cr.c+(hmap4&15)]; // bit history + // int *wt=(int*)&cr.cm[cr.cxt*2]; + // p[i]=clamp2k((wt[0]*p[cp[2]]+wt[1]*64)>>16); + + case ICM: // sizebits + // assert((hmap4&15)>0); + // if (c8==1 || (c8&0xf0)==16) cr.c=find(cr.ht, cp[1]+2, h[i]+16*c8); + // cr.cxt=cr.ht[cr.c+(hmap4&15)]; + // p[i]=stretch(cr.cm(cr.cxt)>>8); + // + // Find cxt row in hash table ht. ht has rows of 16 indexed by the low + // sizebits of cxt with element 0 having the next higher 8 bits for + // collision detection. If not found after 3 adjacent tries, replace + // row with lowest element 1 as priority. Return index of row. + // + // size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { + // assert(ht.size()==size_t(16)<>sizebits&255; + // size_t h0=(cxt*16)&(ht.size()-16); + // if (ht[h0]==chk) return h0; + // size_t h1=h0^16; + // if (ht[h1]==chk) return h1; + // size_t h2=h0^32; + // if (ht[h2]==chk) return h2; + // if (ht[h0+1]<=ht[h1+1] && ht[h0+1]<=ht[h2+1]) + // return memset(&ht[h0], 0, 16), ht[h0]=chk, h0; + // else if (ht[h1+1]>(7-cr.cxt))&1; // predicted bit + // p[i]=stretch(dt2k[cr.a]*(cr.c*-2+1)&32767); + // } + + if (S==8) put1(0x48); // rex.w + put2a(0x8bb7, offc(ht)); // mov esi, [edi+&ht] + + // If match length (a) is 0 then p[i]=0 + put2a(0x8b87, offc(a)); // mov eax, [edi+&a] + put2(0x85c0); // test eax, eax + put2(0x7449); // jz L2 ; p[i]=0 + + // Else put predicted bit in c + put1a(0xb9, 7); // mov ecx, 7 + put2a(0x2b8f, offc(cxt)); // sub ecx, [edi+&cxt] + put2a(0x8b87, offc(limit)); // mov eax, [edi+&limit] + put2a(0x2b87, offc(b)); // sub eax, [edi+&b] + put1a(0x25, (1<>8; + + put2a(0x8b87, off(p[cp[1]])); // mov eax, [edi+&p[j]] + put2a(0x2b87, off(p[cp[2]])); // sub eax, [edi+&p[k]] + put2a(0x69c0, cp[3]); // imul eax, wt + put3(0xc1f808); // sar eax, 8 + put2a(0x0387, off(p[cp[2]])); // add eax, [edi+&p[k]] + put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax + break; + + case MIX2: // sizebits j k rate mask + // c=size cm=wt[size] cxt=input + // cr.cxt=((h[i]+(c8&cp[5]))&(cr.c-1)); + // assert(cr.cxt=0 && w<65536); + // p[i]=(w*p[cp[2]]+(65536-w)*p[cp[3]])>>16; + // assert(p[i]>=-2048 && p[i]<2048); + + put2(0x8b07); // mov eax, [edi] ; c8 + put1a(0x25, cp[5]); // and eax, mask + put2a(0x0387, off(h[i])); // add eax, [edi+&h[i]] + put1a(0x25, (1<=1 && m<=i); + // cr.cxt=h[i]+(c8&cp[5]); + // cr.cxt=(cr.cxt&(cr.c-1))*m; // pointer to row of weights + // assert(cr.cxt<=cr.cm.size()-m); + // int* wt=(int*)&cr.cm[cr.cxt]; + // p[i]=0; + // for (int j=0; j>8)*p[cp[2]+j]; + // p[i]=clamp2k(p[i]>>8); + + put2(0x8b07); // mov eax, [edi] ; c8 + put1a(0x25, cp[5]); // and eax, mask + put2a(0x0387, off(h[i])); // add eax, [edi+&h[i]] + put1a(0x25, (1<3) put4a(0xf30f6f96, k*4+16);//movdqu xmm2, [esi+k*4+16] + put5(0x660f72e1,0x08); // psrad xmm1, 8 + if (tail>3) put5(0x660f72e2,0x08); // psrad xmm2, 8 + put4(0x660f6bca); // packssdw xmm1, xmm2 + put4a(0xf30f6f9f, off(p[cp[2]+k])); // movdqu xmm3, [edi+&p[j+k]] + if (tail>3) + put4a(0xf30f6fa7,off(p[cp[2]+k+4]));//movdqu xmm4, [edi+&p[j+k+4]] + put4(0x660f6bdc); // packssdw, xmm3, xmm4 + if (tail>0 && tail<8) { // last loop, mask extra weights + put4(0x660f76ed); // pcmpeqd xmm5, xmm5 ; -1 + put5(0x660f73dd, 16-tail*2); // psrldq xmm5, 16-tail*2 + put4(0x660fdbcd); // pand xmm1, xmm5 + } + if (k==0) { // first loop, initialize sum in xmm0 + put4(0xf30f6fc1); // movdqu xmm0, xmm1 + put4(0x660ff5c3); // pmaddwd xmm0, xmm3 + } + else { // accumulate sum in xmm0 + put4(0xf30f6fd1); // movdqu xmm2, xmm1 + put4(0x660ff5d3); // pmaddwd xmm2, xmm3 + put4(0x660ffec2); // paddd, xmm0, xmm2 + } + } + + // Add up the 4 elements of xmm0 = p[i] in the first element + put4(0xf30f6fc8); // movdqu xmm1, xmm0 + put5(0x660f73d9,0x08); // psrldq xmm1, 8 + put4(0x660ffec1); // paddd xmm0, xmm1 + put4(0xf30f6fc8); // movdqu xmm1, xmm0 + put5(0x660f73d9,0x04); // psrldq xmm1, 4 + put4(0x660ffec1); // paddd xmm0, xmm1 + put4(0x660f7ec0); // movd eax, xmm0 ; p[i] + put3(0xc1f808); // sar eax, 8 + put1a(0xb9, 2047); // mov ecx, 2047 ; clamp2k + put2(0x39c8); // cmp eax, ecx + put3(0x0f4fc1); // cmovg eax, ecx + put2(0xf7d1); // not ecx ; -2048 + put2(0x39c8); // cmp eax, ecx + put3(0x0f4cc1); // cmovl eax, ecx + put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax + break; + + case SSE: // sizebits j start limit + // cr.cxt=(h[i]+c8)*32; + // int pq=p[cp[2]]+992; + // if (pq<0) pq=0; + // if (pq>1983) pq=1983; + // int wt=pq&63; + // pq>>=6; + // assert(pq>=0 && pq<=30); + // cr.cxt+=pq; + // p[i]=stretch(((cr.cm(cr.cxt)>>10)*(64-wt) // p0 + // +(cr.cm(cr.cxt+1)>>10)*wt)>>13); // p1 + // // p = p0*(64-wt)+p1*wt = (p1-p0)*wt + p0*64 + // cr.cxt+=wt>>5; + + put2a(0x8b8f, off(h[i])); // mov ecx, [edi+&h[i]] + put2(0x030f); // add ecx, [edi] ; c0 + put2a(0x81e1, (1<>5 + put2a(0x898f, offc(cxt)); // mov [edi+cxt], ecx ; cxt saved + put3(0xc1e80a); // shr eax, 10 ; p0 = cm[cxt]>>10 + put3(0xc1eb0a); // shr ebx, 10 ; p1 = cm[cxt+1]>>10 + put2(0x29c3); // sub ebx, eax, ; p1-p0 + put3(0x0fafda); // imul ebx, edx ; (p1-p0)*wt + put3(0xc1e006); // shr eax, 6 + put2(0x01d8); // add eax, ebx ; p in 0..2^28-1 + put3(0xc1e80d); // shr eax, 13 ; p in 0..32767 + put4a(0x0fbf8447, off(stretcht)); // movsx eax, word [edi+eax*2+...] + put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax + break; + + default: + error("invalid ZPAQ component"); + } + } + + // return squash(p[n-1]) + put2a(0x8b87, off(p[n-1])); // mov eax, [edi+...] + put1a(0x05, 0x800); // add eax, 2048 + put4a(0x0fbf8447, off(squasht[0])); // movsx eax, word [edi+eax*2+...] + put1(0x5f); // pop edi + put1(0x5e); // pop esi + put1(0x5d); // pop ebp + put1(0x5b); // pop ebx + put1(0xc3); // ret + + // Initialize for update() Put predictor address in edi/rdi + // and bit y=0..1 in ebp + int save_o=o; + o=5; + put1a(0xe9, save_o-10); // jmp update + o=save_o; + put1(0x53); // push ebx/rbx + put1(0x55); // push ebp/rbp + put1(0x56); // push esi/rsi + put1(0x57); // push edi/rdi + if (S==4) { + put4(0x8b7c2414); // mov edi,[esp+0x14] ; (1st arg = pr) + put4(0x8b6c2418); // mov ebp,[esp+0x18] ; (2nd arg = y) + } + else { +#ifndef _WIN32 + put3(0x4889f5); // mov rbp, rsi (2nd arg in Linux-64) +#else + put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) + put3(0x4889d5); // mov rbp, rdx (2nd arg) +#endif + } + + // Code update() for each component + cp=hcomp+7; + for (int i=0; i=1 && cp[0]<=9); + assert(compsize[cp[0]]>0 && compsize[cp[0]]<8); + switch (cp[0]) { + + case CONS: // c + break; + + case SSE: // sizebits j start limit + case CM: // sizebits limit + // train(cr, y); + // + // reduce prediction error in cr.cm + // void train(Component& cr, int y) { + // assert(y==0 || y==1); + // U32& pn=cr.cm(cr.cxt); + // U32 count=pn&0x3ff; + // int error=y*32767-(cr.cm(cr.cxt)>>17); + // pn+=(error*dt[count]&-1024)+(countrsi) + put2a(0x8bb7, offc(cm)); // mov esi,[edi+cm] ; cm + put2a(0x8b87, offc(cxt)); // mov eax,[edi+cxt] ; cxt + put1a(0x25, pr.comp[i].cm.size()-1); // and eax, size-1 + if (S==8) put1(0x48); // rex.w + put3(0x8d3486); // lea esi,[esi+eax*4] ; &cm[cxt] + put2(0x8b06); // mov eax,[esi] ; cm[cxt] + put2(0x89c2); // mov edx, eax ; cm[cxt] + put3(0xc1e811); // shr eax, 17 ; cm[cxt]>>17 + put2(0x89e9); // mov ecx, ebp ; y + put3(0xc1e10f); // shl ecx, 15 ; y*32768 + put2(0x29e9); // sub ecx, ebp ; y*32767 + put2(0x29c1); // sub ecx, eax ; error + put2a(0x81e2, 0x3ff); // and edx, 1023 ; count + put3a(0x8b8497, off(dt)); // mov eax,[edi+edx*4+dt] ; dt[count] + put3(0x0fafc8); // imul ecx, eax ; error*dt[count] + put2a(0x81e1, 0xfffffc00); // and ecx, -1024 + put2a(0x81fa, cp[2+2*(cp[0]==SSE)]*4); // cmp edx, limit*4 + put2(0x110e); // adc [esi], ecx ; pn+=... + break; + + case ICM: // sizebits: cxt=bh, ht[c][0..15]=bh row + // cr.ht[cr.c+(hmap4&15)]=st.next(cr.ht[cr.c+(hmap4&15)], y); + // U32& pn=cr.cm(cr.cxt); + // pn+=int(y*32767-(pn>>8))>>2; + + case ISSE: // sizebits j -- c=hi, cxt=bh + // assert(cr.cxt==cr.ht[cr.c+(hmap4&15)]); + // int err=y*32767-squash(p[i]); + // int *wt=(int*)&cr.cm[cr.cxt*2]; + // wt[0]=clamp512k(wt[0]+((err*p[cp[2]]+(1<<12))>>13)); + // wt[1]=clamp512k(wt[1]+((err+16)>>5)); + // cr.ht[cr.c+(hmap4&15)]=st.next(cr.cxt, y); + + // update bit history bh to next(bh,y=ebp) in ht[c+(hmap4&15)] + put3(0x8b4700+off(hmap4)); // mov eax, [edi+&hmap4] + put3(0x83e00f); // and eax, 15 + put2a(0x0387, offc(c)); // add eax [edi+&c] ; cxt + if (S==8) put1(0x48); // rex.w + put2a(0x8bb7, offc(ht)); // mov esi, [edi+&ht] + put4(0x0fb61406); // movzx edx, byte [esi+eax] ; bh + put4(0x8d5c9500); // lea ebx, [ebp+edx*4] ; index to st + put4a(0x0fb69c1f, off(st)); // movzx ebx,byte[edi+ebx+st]; next bh + put3(0x881c06); // mov [esi+eax], bl ; save next bh + if (S==8) put1(0x48); // rex.w + put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] + + // ICM: update cm[cxt=edx=bit history] to reduce prediction error + // esi = &cm + if (cp[0]==ICM) { + if (S==8) put1(0x48); // rex.w + put3(0x8d3496); // lea esi, [esi+edx*4] ; &cm[bh] + put2(0x8b06); // mov eax, [esi] ; pn + put3(0xc1e808); // shr eax, 8 ; pn>>8 + put2(0x89e9); // mov ecx, ebp ; y + put3(0xc1e10f); // shl ecx, 15 + put2(0x29e9); // sub ecx, ebp ; y*32767 + put2(0x29c1); // sub ecx, eax + put3(0xc1f902); // sar ecx, 2 + put2(0x010e); // add [esi], ecx + } + + // ISSE: update weights. edx=cxt=bit history (0..255), esi=cm[512] + else { + put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] + put1a(0x05, 2048); // add eax, 2048 + put4a(0x0fb78447, off(squasht)); // movzx eax, word [edi+eax*2+..] + put2(0x89e9); // mov ecx, ebp ; y + put3(0xc1e10f); // shl ecx, 15 + put2(0x29e9); // sub ecx, ebp ; y*32767 + put2(0x29c1); // sub ecx, eax ; err + put2a(0x8b87, off(p[cp[2]]));// mov eax, [edi+&p[j]] + put3(0x0fafc1); // imul eax, ecx + put1a(0x05, (1<<12)); // add eax, 4096 + put3(0xc1f80d); // sar eax, 13 + put3(0x0304d6); // add eax, [esi+edx*8] ; wt[0] + put1a(0xbb, (1<<19)-1); // mov ebx, 524287 + put2(0x39d8); // cmp eax, ebx + put3(0x0f4fc3); // cmovg eax, ebx + put2(0xf7d3); // not ebx ; -524288 + put2(0x39d8); // cmp eax, ebx + put3(0x0f4cc3); // cmovl eax, ebx + put3(0x8904d6); // mov [esi+edx*8], eax + put3(0x83c110); // add ecx, 16 ; err + put3(0xc1f905); // sar ecx, 5 + put4(0x034cd604); // add ecx, [esi+edx*8+4] ; wt[1] + put1a(0xb8, (1<<19)-1); // mov eax, 524287 + put2(0x39c1); // cmp ecx, eax + put3(0x0f4fc8); // cmovg ecx, eax + put2(0xf7d0); // not eax ; -524288 + put2(0x39c1); // cmp ecx, eax + put3(0x0f4cc8); // cmovl ecx, eax + put4(0x894cd604); // mov [esi+edx*8+4], ecx + } + break; + + case MATCH: // sizebits bufbits: + // a=len, b=offset, c=bit, cm=index, cxt=bitpos + // ht=buf, limit=pos + // assert(cr.a<=255); + // assert(cr.c==0 || cr.c==1); + // assert(cr.cxt<8); + // assert(cr.cm.size()==(size_t(1)<>5; + // int w=cr.a16[cr.cxt]; + // w+=(err*(p[cp[2]]-p[cp[3]])+(1<<12))>>13; + // if (w<0) w=0; + // if (w>65535) w=65535; + // cr.a16[cr.cxt]=w; + + // set ecx=err + put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] + put1a(0x05, 2048); // add eax, 2048 + put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht] + put2(0x89e9); // mov ecx, ebp ; y + put3(0xc1e10f); // shl ecx, 15 + put2(0x29e9); // sub ecx, ebp ; y*32767 + put2(0x29c1); // sub ecx, eax + put2a(0x69c9, cp[4]); // imul ecx, rate + put3(0xc1f905); // sar ecx, 5 ; err + + // Update w + put2a(0x8b87, offc(cxt)); // mov eax, [edi+&cxt] + if (S==8) put1(0x48); // rex.w + put2a(0x8bb7, offc(a16)); // mov esi, [edi+&a16] + if (S==8) put1(0x48); // rex.w + put3(0x8d3446); // lea esi, [esi+eax*2] ; &w + put2a(0x8b87, off(p[cp[2]])); // mov eax, [edi+&p[j]] + put2a(0x2b87, off(p[cp[3]])); // sub eax, [edi+&p[k]] ; p[j]-p[k] + put3(0x0fafc1); // imul eax, ecx ; * err + put1a(0x05, 1<<12); // add eax, 4096 + put3(0xc1f80d); // sar eax, 13 + put3(0x0fb716); // movzx edx, word [esi] ; w + put2(0x01d0); // add eax, edx + put1a(0xba, 0xffff); // mov edx, 65535 + put2(0x39d0); // cmp eax, edx + put3(0x0f4fc2); // cmovg eax, edx + put2(0x31d2); // xor edx, edx + put2(0x39d0); // cmp eax, edx + put3(0x0f4cc2); // cmovl eax, edx + put3(0x668906); // mov word [esi], ax + break; + + case MIX: // sizebits j m rate mask + // cm=wt[size][m], cxt=input + // int m=cp[3]; + // assert(m>0 && m<=i); + // assert(cr.cm.size()==m*cr.c); + // assert(cr.cxt+m<=cr.cm.size()); + // int err=(y*32767-squash(p[i]))*cp[4]>>4; + // int* wt=(int*)&cr.cm[cr.cxt]; + // for (int j=0; j>13)); + + // set ecx=err + put2a(0x8b87, off(p[i])); // mov eax, [edi+&p[i]] + put1a(0x05, 2048); // add eax, 2048 + put4a(0x0fb78447, off(squasht));//movzx eax, word [edi+eax*2+&squasht] + put2(0x89e9); // mov ecx, ebp ; y + put3(0xc1e10f); // shl ecx, 15 + put2(0x29e9); // sub ecx, ebp ; y*32767 + put2(0x29c1); // sub ecx, eax + put2a(0x69c9, cp[4]); // imul ecx, rate + put3(0xc1f904); // sar ecx, 4 ; err + + // set esi=wt + put2a(0x8b87, offc(cxt)); // mov eax, [edi+&cxt] ; cxt + if (S==8) put1(0x48); // rex.w + put2a(0x8bb7, offc(cm)); // mov esi, [edi+&cm] + if (S==8) put1(0x48); // rex.w + put3(0x8d3486); // lea esi, [esi+eax*4] ; wt + + for (int k=0; k=256) { + z.run(c8-256); + hmap4=1; + c8=1; + for (int i=0; i=16 && c8<32) + hmap4=(hmap4&0xf)<<5|y<<4|1; + else + hmap4=(hmap4&0x1f0)|(((hmap4&0xf)*2+y)&0xf); +#endif +} + +// Execute the ZPAQL code with input byte or -1 for EOF. +// Use JIT code at rcode if available, or else create it. +void ZPAQL::run(U32 input) { +#ifdef NOJIT + run0(input); +#else + if (!rcode) { + int n=assemble(); + allocx(rcode, rcode_size, n); + if (!rcode || n<10 || rcode_size<10 || n!=assemble()) + error("run JIT failed"); + } + a=input; + if (!((int(*)())(&rcode[0]))()) + libzpaq::error("Bad ZPAQL opcode"); +#endif +} + +} // end namespace libzpaq diff --git a/libzpaq/libzpaq.h b/libzpaq/libzpaq.h index 1e2bf0e..e6109af 100644 --- a/libzpaq/libzpaq.h +++ b/libzpaq/libzpaq.h @@ -1,541 +1,541 @@ -/* libzpaq.h - LIBZPAQ Version 5.00. - - Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so without restriction. - This Software is provided "as is" without warranty. - -LIBZPAQ is a C++ library for compression and decompression of data -conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ - -By default, LIBZPAQ uses JIT (just in time) acceleration. This only -works on x86-32 and x86-64 processors that support the SSE2 instruction -set. To disable JIT, compile with -DNOJIT. To enable run time checks, -compile with -DDEBUG. Both options will decrease speed. - -The decompression code, when compiled with -DDEBUG and -DNOJIT, -comprises the reference decoder for the ZPAQ level 2 standard. -*/ - -#ifndef LIBZPAQ_H -#define LIBZPAQ_H - -#ifndef DEBUG -#define NDEBUG 1 -#endif -#include -#include -#include -#include - -namespace libzpaq { - -// 1, 2, 4, 8 byte unsigned integers -typedef uint8_t U8; -typedef uint16_t U16; -typedef uint32_t U32; -typedef uint64_t U64; - -// Standard library prototypes redirected to libzpaq.cpp -void* calloc(size_t, size_t); -void free(void*); - -// Callback for error handling -extern void error(const char* msg); - -// Virtual base classes for input and output -// get() and put() must be overridden to read or write 1 byte. -// read() and write() may be overridden to read or write n bytes more -// efficiently than calling get() or put() n times. -class Reader { -public: - virtual int get() = 0; // should return 0..255, or -1 at EOF - virtual int read(char* buf, int n); // read to buf[n], return no. read - virtual ~Reader() {} -}; - -class Writer { -public: - virtual void put(int c) = 0; // should output low 8 bits of c - virtual void write(const char* buf, int n); // write buf[n] - virtual ~Writer() {} -}; - -// Read 16 bit little-endian number -int toU16(const char* p); - -// An Array of T is cleared and aligned on a 64 byte address -// with no constructors called. No copy or assignment. -// Array a(n, ex=0); - creates n< -class Array { - T *data; // user location of [0] on a 64 byte boundary - size_t n; // user size - int offset; // distance back in bytes to start of actual allocation - void operator=(const Array&); // no assignment - Array(const Array&); // no copy -public: - Array(size_t sz=0, int ex=0): data(0), n(0), offset(0) { - resize(sz, ex);} // [0..sz-1] = 0 - void resize(size_t sz, int ex=0); // change size, erase content to zeros - ~Array() {resize(0);} // free memory - size_t size() const {return n;} // get size - int isize() const {return int(n);} // get size as an int - T& operator[](size_t i) {assert(n>0 && i0 && (n&(n-1))==0); return data[i&(n-1)];} -}; - -// Change size to sz< -void Array::resize(size_t sz, int ex) { - assert(size_t(-1)>0); // unsigned type? - while (ex>0) { - if (sz>sz*2) error("Array too big"); - sz*=2, --ex; - } - if (n>0) { - assert(offset>0 && offset<=64); - assert((char*)data-offset); - free((char*)data-offset); - } - n=0; - if (sz==0) return; - n=sz; - const size_t nb=128+n*sizeof(T); // test for overflow - if (nb<=128 || (nb-128)/sizeof(T)!=n) error("Array too big"); - data=(T*)calloc(nb, 1); - if (!data) error("Out of memory"); - offset=64-(((char*)data-(char*)0)&63); - assert(offset>0 && offset<=64); - data=(T*)((char*)data+offset); -} - -//////////////////////////// SHA1 //////////////////////////// - -// For computing SHA-1 checksums -class SHA1 { -public: - void put(int c) { // hash 1 byte - U32& r=w[len0>>5&15]; - r=(r<<8)|(c&255); - if (!(len0+=8)) ++len1; - if ((len0&511)==0) process(); - } - double size() const {return len0/8+len1*536870912.0;} // size in bytes - uint64_t usize() const {return len0/8+(U64(len1)<<29);} // size in bytes - const char* result(); // get hash and reset - SHA1() {init();} -private: - void init(); // reset, but don't clear hbuf - U32 len0, len1; // length in bits (low, high) - U32 h[5]; // hash state - U32 w[80]; // input buffer - char hbuf[20]; // result - void process(); // hash 1 block -}; - -//////////////////////////// ZPAQL /////////////////////////// - -// Symbolic constants, instruction size, and names -typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType; -extern const int compsize[256]; - -// A ZPAQL machine COMP+HCOMP or PCOMP. -class ZPAQL { -public: - ZPAQL(); - ~ZPAQL(); - void clear(); // Free memory, erase program, reset machine state - void inith(); // Initialize as HCOMP to run - void initp(); // Initialize as PCOMP to run - double memory(); // Return memory requirement in bytes - void run(U32 input); // Execute with input - int read(Reader* in2); // Read header - bool write(Writer* out2, bool pp); // If pp write PCOMP else HCOMP header - int step(U32 input, int mode); // Trace execution (defined externally) - - Writer* output; // Destination for OUT instruction, or 0 to suppress - SHA1* sha1; // Points to checksum computer - U32 H(int i) {return h(i);} // get element of h - - void flush(); // write outbuf[0..bufptr-1] to output and sha1 - void outc(int c) { // output byte c (0..255) or -1 at EOS - if (c<0 || (outbuf[bufptr]=c, ++bufptr==outbuf.isize())) flush(); - } - - // ZPAQ1 block header - Array header; // hsize[2] hh hm ph pm n COMP (guard) HCOMP (guard) - int cend; // COMP in header[7...cend-1] - int hbegin, hend; // HCOMP/PCOMP in header[hbegin...hend-1] - -private: - // Machine state for executing HCOMP - Array m; // memory array M for HCOMP - Array h; // hash array H for HCOMP - Array r; // 256 element register array - Array outbuf; // output buffer - int bufptr; // number of bytes in outbuf - U32 a, b, c, d; // machine registers - int f; // condition flag - int pc; // program counter - int rcode_size; // length of rcode - U8* rcode; // JIT code for run() - - // Support code - int assemble(); // put JIT code in rcode - void init(int hbits, int mbits); // initialize H and M sizes - int execute(); // execute 1 instruction, return 0 after HALT, else 1 - void run0(U32 input); // default run() when select==0 - void div(U32 x) {if (x) a/=x; else a=0;} - void mod(U32 x) {if (x) a%=x; else a=0;} - void swap(U32& x) {a^=x; x^=a; a^=x;} - void swap(U8& x) {a^=x; x^=a; a^=x;} - void err(); // exit with run time error -}; - -///////////////////////// Component ////////////////////////// - -// A Component is a context model, indirect context model, match model, -// fixed weight mixer, adaptive 2 input mixer without or with current -// partial byte as context, adaptive m input mixer (without or with), -// or SSE (without or with). - -struct Component { - size_t limit; // max count for cm - size_t cxt; // saved context - size_t a, b, c; // multi-purpose variables - Array cm; // cm[cxt] -> p in bits 31..10, n in 9..0; MATCH index - Array ht; // ICM/ISSE hash table[0..size1][0..15] and MATCH buf - Array a16; // MIX weights - void init(); // initialize to all 0 - Component() {init();} -}; - -////////////////////////// StateTable //////////////////////// - -// Next state table generator -class StateTable { - enum {N=64}; // sizes of b, t - int num_states(int n0, int n1); // compute t[n0][n1][1] - void discount(int& n0); // set new value of n0 after 1 or n1 after 0 - void next_state(int& n0, int& n1, int y); // new (n0,n1) after bit y -public: - U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1 - int next(int state, int y) { // next state for bit y - assert(state>=0 && state<256); - assert(y>=0 && y<4); - return ns[state*4+y]; - } - int cminit(int state) { // initial probability of 1 * 2^23 - assert(state>=0 && state<256); - return ((ns[state*4+3]*2+1)<<22)/(ns[state*4+2]+ns[state*4+3]+1); - } - StateTable(); -}; - -///////////////////////// Predictor ////////////////////////// - -// A predictor guesses the next bit -class Predictor { -public: - Predictor(ZPAQL&); - ~Predictor(); - void init(); // build model - int predict(); // probability that next bit is a 1 (0..4095) - void update(int y); // train on bit y (0..1) - int stat(int); // Defined externally - bool isModeled() { // n>0 components? - assert(z.header.isize()>6); - return z.header[6]!=0; - } -private: - - // Predictor state - int c8; // last 0...7 bits. - int hmap4; // c8 split into nibbles - int p[256]; // predictions - U32 h[256]; // unrolled copy of z.h - ZPAQL& z; // VM to compute context hashes, includes H, n - Component comp[256]; // the model, includes P - - // Modeling support functions - int predict0(); // default - void update0(int y); // default - int dt2k[256]; // division table for match: dt2k[i] = 2^12/i - int dt[1024]; // division table for cm: dt[i] = 2^16/(i+1.5) - U16 squasht[4096]; // squash() lookup table - short stretcht[32768];// stretch() lookup table - StateTable st; // next, cminit functions - U8* pcode; // JIT code for predict() and update() - int pcode_size; // length of pcode - - // reduce prediction error in cr.cm - void train(Component& cr, int y) { - assert(y==0 || y==1); - U32& pn=cr.cm(cr.cxt); - U32 count=pn&0x3ff; - int error=y*32767-(cr.cm(cr.cxt)>>17); - pn+=(error*dt[count]&-1024)+(count floor(32768/(1+exp(-x/64))) - int squash(int x) { - assert(x>=-2048 && x<=2047); - return squasht[x+2048]; - } - - // x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash - int stretch(int x) { - assert(x>=0 && x<=32767); - return stretcht[x]; - } - - // bound x to a 12 bit signed int - int clamp2k(int x) { - if (x<-2048) return -2048; - else if (x>2047) return 2047; - else return x; - } - - // bound x to a 20 bit signed int - int clamp512k(int x) { - if (x<-(1<<19)) return -(1<<19); - else if (x>=(1<<19)) return (1<<19)-1; - else return x; - } - - // Get cxt in ht, creating a new row if needed - size_t find(Array& ht, int sizebits, U32 cxt); - - // Put JIT code in pcode - int assemble_p(); -}; - -//////////////////////////// Decoder ///////////////////////// - -// Decoder decompresses using an arithmetic code -class Decoder { -public: - Reader* in; // destination - Decoder(ZPAQL& z); - int decompress(); // return a byte or EOF - int skip(); // skip to the end of the segment, return next byte - void init(); // initialize at start of block - int stat(int x) {return pr.stat(x);} -private: - U32 low, high; // range - U32 curr; // last 4 bytes of archive - Predictor pr; // to get p - enum {BUFSIZE=1<<16}; - Array buf; // input buffer of size BUFSIZE bytes - // of unmodeled data. buf[low..high-1] is input with curr - // remaining in sub-block. - int decode(int p); // return decoded bit (0..1) with prob. p (0..65535) - void loadbuf(); // read unmodeled data into buf to EOS -}; - -/////////////////////////// PostProcessor //////////////////// - -class PostProcessor { - int state; // input parse state: 0=INIT, 1=PASS, 2..4=loading, 5=POST - int hsize; // header size - int ph, pm; // sizes of H and M in z -public: - ZPAQL z; // holds PCOMP - PostProcessor(): state(0), hsize(0), ph(0), pm(0) {} - void init(int h, int m); // ph, pm sizes of H and M - int write(int c); // Input a byte, return state - int getState() const {return state;} - void setOutput(Writer* out) {z.output=out;} - void setSHA1(SHA1* sha1ptr) {z.sha1=sha1ptr;} -}; - -//////////////////////// Decompresser //////////////////////// - -// For decompression and listing archive contents -class Decompresser { -public: - Decompresser(): z(), dec(z), pp(), state(BLOCK), decode_state(FIRSTSEG) {} - void setInput(Reader* in) {dec.in=in;} - bool findBlock(double* memptr = 0); - void hcomp(Writer* out2) {z.write(out2, false);} - bool findFilename(Writer* = 0); - void readComment(Writer* = 0); - void setOutput(Writer* out) {pp.setOutput(out);} - void setSHA1(SHA1* sha1ptr) {pp.setSHA1(sha1ptr);} - bool decompress(int n = -1); // n bytes, -1=all, return true until done - bool pcomp(Writer* out2) {return pp.z.write(out2, true);} - void readSegmentEnd(char* sha1string = 0); - int stat(int x) {return dec.stat(x);} -private: - ZPAQL z; - Decoder dec; - PostProcessor pp; - enum {BLOCK, FILENAME, COMMENT, DATA, SEGEND} state; // expected next - enum {FIRSTSEG, SEG, SKIP} decode_state; // which segment in block? -}; - -/////////////////////////// decompress() ///////////////////// - -void decompress(Reader* in, Writer* out); - -////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////// - -// Code following this point is not a part of the ZPAQ level 2 standard. - -//////////////////////////// Encoder ///////////////////////// - -// Encoder compresses using an arithmetic code -class Encoder { -public: - Encoder(ZPAQL& z, int size=0): - out(0), low(1), high(0xFFFFFFFF), pr(z) {} - void init(); - void compress(int c); // c is 0..255 or EOF - int stat(int x) {return pr.stat(x);} - Writer* out; // destination -private: - U32 low, high; // range - Predictor pr; // to get p - Array buf; // unmodeled input - void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535) -}; - -//////////////////////// Compressor ////////////////////////// - -class Compressor { -public: - Compressor(): enc(z), in(0), state(INIT) {} - void setOutput(Writer* out) {enc.out=out;} - void writeTag(); - void startBlock(int level); // level=1,2,3 - void startBlock(const char* hcomp); - void startSegment(const char* filename = 0, const char* comment = 0); - void setInput(Reader* i) {in=i;} - void postProcess(const char* pcomp = 0, int len = 0); - bool compress(int n = -1); // n bytes, -1=all, return true until done - void endSegment(const char* sha1string = 0); - void endBlock(); - int stat(int x) {return enc.stat(x);} -private: - ZPAQL z; - Encoder enc; - Reader* in; - enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state; -}; - -/////////////////////////// compress() /////////////////////// - -void compress(Reader* in, Writer* out, int level); - -} // namespace libzpaq - -/////////////////////////// lrzip functions ////////////////// - -#include -#ifndef uchar -#define uchar unsigned char -#endif -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) -#define __maybe_unused __attribute__((unused)) - -typedef long long int i64; - -struct bufRead: public libzpaq::Reader { - uchar *s_buf; - i64 *s_len; - i64 total_len; - int *last_pct; - bool progress; - long thread; - FILE *msgout; - - bufRead(uchar *buf_, i64 *n_, i64 total_len_, int *last_pct_, bool progress_, long thread_, FILE *msgout_): - s_buf(buf_), s_len(n_), total_len(total_len_), last_pct(last_pct_), progress(progress_), thread(thread_), msgout(msgout_) {} - - int get() { - if (progress && !(*s_len % 128)) { - int pct = (total_len - *s_len) * 100 / total_len; - - if (pct / 10 != *last_pct / 10) { - int i; - - fprintf(msgout, "\r\t\t\tZPAQ\t"); - for (i = 0; i < thread; i++) - fprintf(msgout, "\t"); - fprintf(msgout, "%ld:%i%% \r", - thread + 1, pct); - fflush(msgout); - *last_pct = pct; - } - } - - if (likely(*s_len > 0)) { - (*s_len)--; - return ((int)(uchar)*s_buf++); - } - return -1; - } // read and return byte 0..255, or -1 at EOF - - int read(char *buf, int n) { - if (unlikely(n > *s_len)) - n = *s_len; - - if (likely(n > 0)) { - *s_len -= n; - memcpy(buf, s_buf, n); - } - return n; - } -}; - -struct bufWrite: public libzpaq::Writer { - uchar *c_buf; - i64 *c_len; - bufWrite(uchar *buf_, i64 *n_): c_buf(buf_), c_len(n_) {} - - void put(int c) { - c_buf[(*c_len)++] = (uchar)c; - } - - void write(const char *buf, int n) { - memcpy(c_buf + *c_len, buf, n); - *c_len += n; - } -}; - -extern "C" void zpaq_compress(uchar *c_buf, i64 *c_len, uchar *s_buf, i64 s_len, int level, - FILE *msgout, bool progress, long thread) -{ - i64 total_len = s_len; - int last_pct = 100; - - bufRead bufR(s_buf, &s_len, total_len, &last_pct, progress, thread, msgout); - bufWrite bufW(c_buf, c_len); - - compress (&bufR, &bufW, level); -} - -extern "C" void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_len, - FILE *msgout, bool progress, long thread) -{ - i64 total_len = c_len; - int last_pct = 100; - - bufRead bufR(c_buf, &c_len, total_len, &last_pct, progress, thread, msgout); - bufWrite bufW(s_buf, d_len); - - decompress(&bufR, &bufW); -} - -#endif // LIBZPAQ_H +/* libzpaq.h - LIBZPAQ Version 5.00. + + Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so without restriction. + This Software is provided "as is" without warranty. + +LIBZPAQ is a C++ library for compression and decompression of data +conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ + +By default, LIBZPAQ uses JIT (just in time) acceleration. This only +works on x86-32 and x86-64 processors that support the SSE2 instruction +set. To disable JIT, compile with -DNOJIT. To enable run time checks, +compile with -DDEBUG. Both options will decrease speed. + +The decompression code, when compiled with -DDEBUG and -DNOJIT, +comprises the reference decoder for the ZPAQ level 2 standard. +*/ + +#ifndef LIBZPAQ_H +#define LIBZPAQ_H + +#ifndef DEBUG +#define NDEBUG 1 +#endif +#include +#include +#include +#include + +namespace libzpaq { + +// 1, 2, 4, 8 byte unsigned integers +typedef uint8_t U8; +typedef uint16_t U16; +typedef uint32_t U32; +typedef uint64_t U64; + +// Standard library prototypes redirected to libzpaq.cpp +void* calloc(size_t, size_t); +void free(void*); + +// Callback for error handling +extern void error(const char* msg); + +// Virtual base classes for input and output +// get() and put() must be overridden to read or write 1 byte. +// read() and write() may be overridden to read or write n bytes more +// efficiently than calling get() or put() n times. +class Reader { +public: + virtual int get() = 0; // should return 0..255, or -1 at EOF + virtual int read(char* buf, int n); // read to buf[n], return no. read + virtual ~Reader() {} +}; + +class Writer { +public: + virtual void put(int c) = 0; // should output low 8 bits of c + virtual void write(const char* buf, int n); // write buf[n] + virtual ~Writer() {} +}; + +// Read 16 bit little-endian number +int toU16(const char* p); + +// An Array of T is cleared and aligned on a 64 byte address +// with no constructors called. No copy or assignment. +// Array a(n, ex=0); - creates n< +class Array { + T *data; // user location of [0] on a 64 byte boundary + size_t n; // user size + int offset; // distance back in bytes to start of actual allocation + void operator=(const Array&); // no assignment + Array(const Array&); // no copy +public: + Array(size_t sz=0, int ex=0): data(0), n(0), offset(0) { + resize(sz, ex);} // [0..sz-1] = 0 + void resize(size_t sz, int ex=0); // change size, erase content to zeros + ~Array() {resize(0);} // free memory + size_t size() const {return n;} // get size + int isize() const {return int(n);} // get size as an int + T& operator[](size_t i) {assert(n>0 && i0 && (n&(n-1))==0); return data[i&(n-1)];} +}; + +// Change size to sz< +void Array::resize(size_t sz, int ex) { + assert(size_t(-1)>0); // unsigned type? + while (ex>0) { + if (sz>sz*2) error("Array too big"); + sz*=2, --ex; + } + if (n>0) { + assert(offset>0 && offset<=64); + assert((char*)data-offset); + free((char*)data-offset); + } + n=0; + if (sz==0) return; + n=sz; + const size_t nb=128+n*sizeof(T); // test for overflow + if (nb<=128 || (nb-128)/sizeof(T)!=n) error("Array too big"); + data=(T*)calloc(nb, 1); + if (!data) error("Out of memory"); + offset=64-(((char*)data-(char*)0)&63); + assert(offset>0 && offset<=64); + data=(T*)((char*)data+offset); +} + +//////////////////////////// SHA1 //////////////////////////// + +// For computing SHA-1 checksums +class SHA1 { +public: + void put(int c) { // hash 1 byte + U32& r=w[len0>>5&15]; + r=(r<<8)|(c&255); + if (!(len0+=8)) ++len1; + if ((len0&511)==0) process(); + } + double size() const {return len0/8+len1*536870912.0;} // size in bytes + uint64_t usize() const {return len0/8+(U64(len1)<<29);} // size in bytes + const char* result(); // get hash and reset + SHA1() {init();} +private: + void init(); // reset, but don't clear hbuf + U32 len0, len1; // length in bits (low, high) + U32 h[5]; // hash state + U32 w[80]; // input buffer + char hbuf[20]; // result + void process(); // hash 1 block +}; + +//////////////////////////// ZPAQL /////////////////////////// + +// Symbolic constants, instruction size, and names +typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType; +extern const int compsize[256]; + +// A ZPAQL machine COMP+HCOMP or PCOMP. +class ZPAQL { +public: + ZPAQL(); + ~ZPAQL(); + void clear(); // Free memory, erase program, reset machine state + void inith(); // Initialize as HCOMP to run + void initp(); // Initialize as PCOMP to run + double memory(); // Return memory requirement in bytes + void run(U32 input); // Execute with input + int read(Reader* in2); // Read header + bool write(Writer* out2, bool pp); // If pp write PCOMP else HCOMP header + int step(U32 input, int mode); // Trace execution (defined externally) + + Writer* output; // Destination for OUT instruction, or 0 to suppress + SHA1* sha1; // Points to checksum computer + U32 H(int i) {return h(i);} // get element of h + + void flush(); // write outbuf[0..bufptr-1] to output and sha1 + void outc(int c) { // output byte c (0..255) or -1 at EOS + if (c<0 || (outbuf[bufptr]=c, ++bufptr==outbuf.isize())) flush(); + } + + // ZPAQ1 block header + Array header; // hsize[2] hh hm ph pm n COMP (guard) HCOMP (guard) + int cend; // COMP in header[7...cend-1] + int hbegin, hend; // HCOMP/PCOMP in header[hbegin...hend-1] + +private: + // Machine state for executing HCOMP + Array m; // memory array M for HCOMP + Array h; // hash array H for HCOMP + Array r; // 256 element register array + Array outbuf; // output buffer + int bufptr; // number of bytes in outbuf + U32 a, b, c, d; // machine registers + int f; // condition flag + int pc; // program counter + int rcode_size; // length of rcode + U8* rcode; // JIT code for run() + + // Support code + int assemble(); // put JIT code in rcode + void init(int hbits, int mbits); // initialize H and M sizes + int execute(); // execute 1 instruction, return 0 after HALT, else 1 + void run0(U32 input); // default run() when select==0 + void div(U32 x) {if (x) a/=x; else a=0;} + void mod(U32 x) {if (x) a%=x; else a=0;} + void swap(U32& x) {a^=x; x^=a; a^=x;} + void swap(U8& x) {a^=x; x^=a; a^=x;} + void err(); // exit with run time error +}; + +///////////////////////// Component ////////////////////////// + +// A Component is a context model, indirect context model, match model, +// fixed weight mixer, adaptive 2 input mixer without or with current +// partial byte as context, adaptive m input mixer (without or with), +// or SSE (without or with). + +struct Component { + size_t limit; // max count for cm + size_t cxt; // saved context + size_t a, b, c; // multi-purpose variables + Array cm; // cm[cxt] -> p in bits 31..10, n in 9..0; MATCH index + Array ht; // ICM/ISSE hash table[0..size1][0..15] and MATCH buf + Array a16; // MIX weights + void init(); // initialize to all 0 + Component() {init();} +}; + +////////////////////////// StateTable //////////////////////// + +// Next state table generator +class StateTable { + enum {N=64}; // sizes of b, t + int num_states(int n0, int n1); // compute t[n0][n1][1] + void discount(int& n0); // set new value of n0 after 1 or n1 after 0 + void next_state(int& n0, int& n1, int y); // new (n0,n1) after bit y +public: + U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1 + int next(int state, int y) { // next state for bit y + assert(state>=0 && state<256); + assert(y>=0 && y<4); + return ns[state*4+y]; + } + int cminit(int state) { // initial probability of 1 * 2^23 + assert(state>=0 && state<256); + return ((ns[state*4+3]*2+1)<<22)/(ns[state*4+2]+ns[state*4+3]+1); + } + StateTable(); +}; + +///////////////////////// Predictor ////////////////////////// + +// A predictor guesses the next bit +class Predictor { +public: + Predictor(ZPAQL&); + ~Predictor(); + void init(); // build model + int predict(); // probability that next bit is a 1 (0..4095) + void update(int y); // train on bit y (0..1) + int stat(int); // Defined externally + bool isModeled() { // n>0 components? + assert(z.header.isize()>6); + return z.header[6]!=0; + } +private: + + // Predictor state + int c8; // last 0...7 bits. + int hmap4; // c8 split into nibbles + int p[256]; // predictions + U32 h[256]; // unrolled copy of z.h + ZPAQL& z; // VM to compute context hashes, includes H, n + Component comp[256]; // the model, includes P + + // Modeling support functions + int predict0(); // default + void update0(int y); // default + int dt2k[256]; // division table for match: dt2k[i] = 2^12/i + int dt[1024]; // division table for cm: dt[i] = 2^16/(i+1.5) + U16 squasht[4096]; // squash() lookup table + short stretcht[32768];// stretch() lookup table + StateTable st; // next, cminit functions + U8* pcode; // JIT code for predict() and update() + int pcode_size; // length of pcode + + // reduce prediction error in cr.cm + void train(Component& cr, int y) { + assert(y==0 || y==1); + U32& pn=cr.cm(cr.cxt); + U32 count=pn&0x3ff; + int error=y*32767-(cr.cm(cr.cxt)>>17); + pn+=(error*dt[count]&-1024)+(count floor(32768/(1+exp(-x/64))) + int squash(int x) { + assert(x>=-2048 && x<=2047); + return squasht[x+2048]; + } + + // x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash + int stretch(int x) { + assert(x>=0 && x<=32767); + return stretcht[x]; + } + + // bound x to a 12 bit signed int + int clamp2k(int x) { + if (x<-2048) return -2048; + else if (x>2047) return 2047; + else return x; + } + + // bound x to a 20 bit signed int + int clamp512k(int x) { + if (x<-(1<<19)) return -(1<<19); + else if (x>=(1<<19)) return (1<<19)-1; + else return x; + } + + // Get cxt in ht, creating a new row if needed + size_t find(Array& ht, int sizebits, U32 cxt); + + // Put JIT code in pcode + int assemble_p(); +}; + +//////////////////////////// Decoder ///////////////////////// + +// Decoder decompresses using an arithmetic code +class Decoder { +public: + Reader* in; // destination + Decoder(ZPAQL& z); + int decompress(); // return a byte or EOF + int skip(); // skip to the end of the segment, return next byte + void init(); // initialize at start of block + int stat(int x) {return pr.stat(x);} +private: + U32 low, high; // range + U32 curr; // last 4 bytes of archive + Predictor pr; // to get p + enum {BUFSIZE=1<<16}; + Array buf; // input buffer of size BUFSIZE bytes + // of unmodeled data. buf[low..high-1] is input with curr + // remaining in sub-block. + int decode(int p); // return decoded bit (0..1) with prob. p (0..65535) + void loadbuf(); // read unmodeled data into buf to EOS +}; + +/////////////////////////// PostProcessor //////////////////// + +class PostProcessor { + int state; // input parse state: 0=INIT, 1=PASS, 2..4=loading, 5=POST + int hsize; // header size + int ph, pm; // sizes of H and M in z +public: + ZPAQL z; // holds PCOMP + PostProcessor(): state(0), hsize(0), ph(0), pm(0) {} + void init(int h, int m); // ph, pm sizes of H and M + int write(int c); // Input a byte, return state + int getState() const {return state;} + void setOutput(Writer* out) {z.output=out;} + void setSHA1(SHA1* sha1ptr) {z.sha1=sha1ptr;} +}; + +//////////////////////// Decompresser //////////////////////// + +// For decompression and listing archive contents +class Decompresser { +public: + Decompresser(): z(), dec(z), pp(), state(BLOCK), decode_state(FIRSTSEG) {} + void setInput(Reader* in) {dec.in=in;} + bool findBlock(double* memptr = 0); + void hcomp(Writer* out2) {z.write(out2, false);} + bool findFilename(Writer* = 0); + void readComment(Writer* = 0); + void setOutput(Writer* out) {pp.setOutput(out);} + void setSHA1(SHA1* sha1ptr) {pp.setSHA1(sha1ptr);} + bool decompress(int n = -1); // n bytes, -1=all, return true until done + bool pcomp(Writer* out2) {return pp.z.write(out2, true);} + void readSegmentEnd(char* sha1string = 0); + int stat(int x) {return dec.stat(x);} +private: + ZPAQL z; + Decoder dec; + PostProcessor pp; + enum {BLOCK, FILENAME, COMMENT, DATA, SEGEND} state; // expected next + enum {FIRSTSEG, SEG, SKIP} decode_state; // which segment in block? +}; + +/////////////////////////// decompress() ///////////////////// + +void decompress(Reader* in, Writer* out); + +////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////// + +// Code following this point is not a part of the ZPAQ level 2 standard. + +//////////////////////////// Encoder ///////////////////////// + +// Encoder compresses using an arithmetic code +class Encoder { +public: + Encoder(ZPAQL& z): + out(0), low(1), high(0xFFFFFFFF), pr(z) {} + void init(); + void compress(int c); // c is 0..255 or EOF + int stat(int x) {return pr.stat(x);} + Writer* out; // destination +private: + U32 low, high; // range + Predictor pr; // to get p + Array buf; // unmodeled input + void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535) +}; + +//////////////////////// Compressor ////////////////////////// + +class Compressor { +public: + Compressor(): enc(z), in(0), state(INIT) {} + void setOutput(Writer* out) {enc.out=out;} + void writeTag(); + void startBlock(int level); // level=1,2,3 + void startBlock(const char* hcomp); + void startSegment(const char* filename = 0, const char* comment = 0); + void setInput(Reader* i) {in=i;} + void postProcess(const char* pcomp = 0, int len = 0); + bool compress(int n = -1); // n bytes, -1=all, return true until done + void endSegment(const char* sha1string = 0); + void endBlock(); + int stat(int x) {return enc.stat(x);} +private: + ZPAQL z; + Encoder enc; + Reader* in; + enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state; +}; + +/////////////////////////// compress() /////////////////////// + +void compress(Reader* in, Writer* out, int level); + +} // namespace libzpaq + +/////////////////////////// lrzip functions ////////////////// + +#include +#ifndef uchar +#define uchar unsigned char +#endif +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define __maybe_unused __attribute__((unused)) + +typedef long long int i64; + +struct bufRead: public libzpaq::Reader { + uchar *s_buf; + i64 *s_len; + i64 total_len; + int *last_pct; + bool progress; + long thread; + FILE *msgout; + + bufRead(uchar *buf_, i64 *n_, i64 total_len_, int *last_pct_, bool progress_, long thread_, FILE *msgout_): + s_buf(buf_), s_len(n_), total_len(total_len_), last_pct(last_pct_), progress(progress_), thread(thread_), msgout(msgout_) {} + + int get() { + if (progress && !(*s_len % 128)) { + int pct = (total_len - *s_len) * 100 / total_len; + + if (pct / 10 != *last_pct / 10) { + int i; + + fprintf(msgout, "\r\t\t\tZPAQ\t"); + for (i = 0; i < thread; i++) + fprintf(msgout, "\t"); + fprintf(msgout, "%ld:%i%% \r", + thread + 1, pct); + fflush(msgout); + *last_pct = pct; + } + } + + if (likely(*s_len > 0)) { + (*s_len)--; + return ((int)(uchar)*s_buf++); + } + return -1; + } // read and return byte 0..255, or -1 at EOF + + int read(char *buf, int n) { + if (unlikely(n > *s_len)) + n = *s_len; + + if (likely(n > 0)) { + *s_len -= n; + memcpy(buf, s_buf, n); + } + return n; + } +}; + +struct bufWrite: public libzpaq::Writer { + uchar *c_buf; + i64 *c_len; + bufWrite(uchar *buf_, i64 *n_): c_buf(buf_), c_len(n_) {} + + void put(int c) { + c_buf[(*c_len)++] = (uchar)c; + } + + void write(const char *buf, int n) { + memcpy(c_buf + *c_len, buf, n); + *c_len += n; + } +}; + +extern "C" void zpaq_compress(uchar *c_buf, i64 *c_len, uchar *s_buf, i64 s_len, int level, + FILE *msgout, bool progress, long thread) +{ + i64 total_len = s_len; + int last_pct = 100; + + bufRead bufR(s_buf, &s_len, total_len, &last_pct, progress, thread, msgout); + bufWrite bufW(c_buf, c_len); + + compress (&bufR, &bufW, level); +} + +extern "C" void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_len, + FILE *msgout, bool progress, long thread) +{ + i64 total_len = c_len; + int last_pct = 100; + + bufRead bufR(c_buf, &c_len, total_len, &last_pct, progress, thread, msgout); + bufWrite bufW(s_buf, d_len); + + decompress(&bufR, &bufW); +} + +#endif // LIBZPAQ_H