From c1ab298c965c78c2c0a8864a980b895667b0c8d7 Mon Sep 17 00:00:00 2001 From: Peter Hyman Date: Sun, 5 Apr 2020 15:09:58 -0500 Subject: [PATCH] zpaq 7.15 update --- configure.ac | 2 +- libzpaq/libzpaq.3.pod | 737 ------ libzpaq/libzpaq.cpp | 5239 ++++++++++++++++++++++++++++++++++++++--- libzpaq/libzpaq.h | 1213 +++++++++- stream.c | 3 +- 5 files changed, 6054 insertions(+), 1140 deletions(-) delete mode 100644 libzpaq/libzpaq.3.pod diff --git a/configure.ac b/configure.ac index 29e8d35..c3a96c8 100644 --- a/configure.ac +++ b/configure.ac @@ -124,7 +124,7 @@ AC_CHECK_FUNCS(getopt_long) AX_PTHREAD LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" -CXXFLAGS="$CXXFLAGS $PTHREAD_CXXFLAGS" +CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS" # final checks for assembler # ASM is back for x86_64 by using newer CRC code from p7zip-16.02 diff --git a/libzpaq/libzpaq.3.pod b/libzpaq/libzpaq.3.pod deleted file mode 100644 index 3ea1b95..0000000 --- a/libzpaq/libzpaq.3.pod +++ /dev/null @@ -1,737 +0,0 @@ -# Documentation for libzpaq -# -# Copyright (C) 2012, Dell Inc. Written by Matt Mahoney. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so without restriction. -# This Software is provided "as is" without warranty. -# -# To create man page: pod2man libzpaq.3.pod > libzpaq.3 -# To create HTML documentation: pod2html libzpaq.3.pod > libzpaq.html - -=pod - -=head1 NAME - -libzpaq - ZPAQ compression API - -=head1 SYNOPSIS - - #include "libzpaq.h" - - namespace libzpaq { - - extern void error(const char* msg); - - class Reader { - public: - virtual int get() = 0; - virtual int read(char* buf, int n); // optional - virtual ~Reader() {} - }; - - class Writer { - public: - virtual void put(int c) = 0; - virtual void write(const char* buf, int n); // optional - virtual ~Writer() {} - }; - - class SHA1 { - public: - SHA1(); - void put(int c); - double size() const; - uint64_t usize() const - const char* result(); - }; - - class Compressor { - public: - Compressor(); - void setOutput(Writer* out); - void writeTag(); - void startBlock(int level); - void startBlock(const char* hcomp); - void startSegment(const char* filename = 0, - const char* comment = 0); - void setInput(Reader* i); - void postProcess(const char* pcomp = 0, int length = 0); - bool compress(int n = -1); - void endSegment(const char* sha1string = 0); - void endBlock(); - }; - - class Decompresser { - public: - Decompresser(); - void setInput(Reader* in); - bool findBlock(double* memptr = 0); - void hcomp(Writer* out); - bool findFilename(Writer* = 0); - void readComment(Writer* = 0); - void setOutput(Writer* out); - void setSHA1(SHA1* sha1ptr); - bool decompress(int n = -1); - bool pcomp(Writer* out); - void readSegmentEnd(char* sha1string = 0); - }; - - void compress(Reader* in, Writer* out, int level); - - void decompress(Reader* in, Writer* out); - } - -=head1 DESCRIPTION - -I is a C++ API for compressing or decompressing -files or objects in memory comforming to the ZPAQ level 1 and 2 standards -(see I). This document describes version 5.00 -of the software. The software may be used without -restriction under a modified MIT license. - -ZPAQ provides a high level of data compression in a streaming -(single pass) self-describing format that supports single or multiple -named objects (such as archives) with optional integrity checking. - -The library provides 3 default compression levels but supports -custom algorithms. The performance of the default levels is -shown in the table below for the 14 file Calgary corpus as -a tar file. Compression and decompression times are in seconds -on a 2 GHz T3200 running on one of two cores. Memory required -to compress or decompress is in MB. Some popular formats -are shown for comparison. - - Program Format Size Time (C, D) Memory - ----------- ------ --------- ----------- ------ - Uncompresed .tar 3,152,896 - compress .tar.Z 1,319,521 1.6 0.2 .1 MB - gzip -9 .tar.gz 1,022,810 0.7 0.1 .1 MB - bzip2 -9 .tar.bz2 860,097 0.6 0.4 5 MB - 7zip .tar.7z 824,573 1.5 0.1 195 MB - zpaq 1 (fast) .tar.zpaq 806,959 2 2 38 MB - zpaq 2 (mid) .tar.zpaq 699,191 8 8 112 MB - zpaq 3 (max) .tar.zpaq 644,190 20 20 246 MB - -A ZPAQ stream consists of one or more blocks, possibly mixed with -other data, that can be decompressed independently in any order. -Each block consists of one or more segments that must be decompressed -in order from the beginning of the block. Each block header contains -a description of the decompression algorithm. Each segment consists -of an optional filename string, an optional comment string, -self delimiting compressed data, and an optional SHA-1 checksum. -If ZPAQ blocks are mixed with other data, they must be -preceded by an identifying 13 byte tag which does not otherwise -appear in that data. - -ZPAQ compression is based on the PAQ context mixing model. -An array of components predict the probability of the next bit -of input, either independently or depending on the predictions -of earlier components. The final prediction is arithmetic coded. -Each component inputs a context computed from earlier input -by a program written in ZPAQL byte code which runs on a virtual -machine. Both the component array description and the ZPAQL -code are encoded in a string called HCOMP in each block header. -Data can also be stored uncompressed. - -A block may optionally specify a post-processor, a program -(also in ZPAQL) which takes the decoded data as input and -outputs the decompressed output. This program, if present, -is encoded as a string called PCOMP which is compressed -in the first segment prior to the compressed data. The first -decoded byte from the first segment is a flag indicating -whether a PCOMP string is present. The user is responsible -for correctly pre-processing the data so that post-processing -restores the original data. - -=head2 API Organization - -The I API consists of 2 files. - -=over - -=item libzpaq.h - -Header file to include in your application. - -=item libzpaq.cpp - -Source code file to link to your application. - -=back - -An application would have the line C<#include "libzpaq.h"> and -link to libzpaq.cpp. -The API provides two classes, C and C -which write or read respectively each of the syntactic elements -of a ZPAQ stream. The two functions C and -C provide simple interfaces for the most common -uses. In either case, the user must create classes derived -from the abstract base classes C and C and -define methods C and C which the code -will use to read and write bytes. The user must also define -a callback error handler. - -By default, libzpaq(3) uses just-in-time (JIT) acceleration -by translating ZPAQL code to x86-32 or x86-64 internally -and executing it. This feature can be disabled by compiling -with -DNOJIT. If enabled, it requires an x86 processor -capable of executing SSE2 instructions. SSE2 is supported -by most Intel processors since 2001 and AMD since 2003. - -Run time checks (assertions) can be enabled with -DDEBUG -for debugging purposes. - -All of the API code is contained in the namespace C. - -=head2 Callback Functions - -The following three functions must be defined by the user. - -=over - -=item C - -This function must be defined by the user to handle errors -from libzpaq. The library will call the function with -an English language message passed to C. Errors may -result from bad input during decompression, out of memory, -or illegal arguments or calling sequences to libzpaq -functions. Errors should be considered unrecoverable. - -=item C - -The user must create a class derived from Reader with an -implementation for C that reads one byte of input -and returns its value in the range 0...255, or returns -EOF (-1) at end of input. Objects of the derived type -would then be passed to functions that require a C. - -=item C - -The user must create a class derived from Writer with -an implemenation of C which is expected to take -a byte value C in the range 0...255 and write it to -output. Objects of the derived type -would then be passed to functions that require a C. - -=back - -The following two functions are optional. Defining them -can improve performance slightly. - -=over - -=item C - -If defined, this function should input up to C bytes into -the array C and return the number actually read, in -the range 0..n. A return value of 0 indicates end of input. -If C is not defined, then the default implementation -will call C n times. - -=item C - -If defined, this function should output the elements C -through C in order. If not defined, then the default -implementation will call C n times. - -=back - -=head2 Simple Compression - -In the remainder of this document, all classes and -functions are assumed to be in namespace C. - -=over - -=item C - -C compresses from C to C until C -returns EOF. It writes a single segment in a single block -with empty filename, comment, and checksum fields. C -must be 1, 2, or 3, to select models I, I, or -I respectively. Higher modes compress smaller but -take longer to compress and subsequently decompress. - -=item C - -C decompresses any valid ZPAQ stream from -C to C until C returns EOF. Any -non-ZPAQ data in the input is ignored. Any ZPAQ blocks -following non-ZPAQ must be preceded by a marker tag -to be recognized. Each block is decoded according to the -instructions in the block header. The contents of the -filename, comment, and checksum fields are ignored. -Data with bad checksums will be decoded anyway. If there -is more than one segment, then all of the output -data will be concatenated. - -=back - -=head2 class SHA1 - -The SHA1 class is used to compute SHA-1 checksums for compression -and verify them for decompression. It is believed to be -computationally infeasible to find two different strings -with the same hash value. Its member functions -are as follows: - -=over - -=item C - -The constructor creates a new SHA1 object representing the -hash of an empty string. - -=item C - -Appends one byte c (0...255) to the string whose hash is represented. - -=item C - -Returns the length (so far) of the string whose hash is represented. -The largest possible value returned is -2^61 - 1 = 2305843009213693951.0, but values larger than 2^53 = -9007199254740992.0 -will not be exact on systems using IEEE 64 bit floating point -representation of type C. The initial value is 0.0. - -=item C - -Returns the length (so far) as a 64 bit unsigned integer. - -=item C - -Computes the 20 byte SHA-1 hash and resets the string back -to a size of 0.0. The returned pointer points to an array -inside the SHA1 object whose -contents remain unchanged until the next call to C. - -=back - -=head2 class Compressor - -The C class has member functions to write -each of the syntactic elements of a ZPAQ stream and to specify -their values. It will compress using either built-in or -user supplied models. - -=over - -=item C - -The constructor creates a Compression object. No input source, -output destination, or compression model is specified. - -=item C - -Specifies a destination for output. Must be specified before -calling any function that writes data. - -=item C - -Writes a 13 byte marker tag which can be used to identify -the start of a block following non-ZPAQ data. - -=item C - -Writes a block header and specifies a compression model. -If linked with F, then C must be 1, 2, or 3 -to specify I, I, or I respectively. Higher numbers -compress smaller but more slowly. These models are compatible -with both the ZPAQ level 1 and 2 standards. - -=item C - -Writes a block header and specifies the HCOMP portion of the -compression model. The first two bytes of the string should -encode the length of the rest of the string as a 16 bit unsigned -number with the least significant bit first. The meaning of the -rest of the string is defined in the ZPAQ level 2 standard. -If the number of components (C) is 0, then the block -is saved in ZPAQ level 2 format, which cannot be read by -older ZPAQ level 1 decoders. Otherwise the block is saved in -ZPAQ level 1 format, which is compatible with all decoders. - -=item C - -Writes a segment header. C and -C are NUL terminated strings. If specified, then their -values are stored. Normally, C would be a file name -when compressing to an archive or omitted otherwise. If a file -is split among segments, then by convention only the first segment -is named. C is normally the uncompressed size as a decimal -number which is displayed when listing the contents of an archive. -Omitting it does not affect decompression. - -=item C - -Specifies the optional PCOMP string used for post-processing. -It must be called from within the first segment -of each block prior to compressing any data, but not from within -any other segment. -If C is 0 or no argument is passed, then the decompresser -will not post-process the data. The effect is to compress a -0 byte to indicate to the decompresser that no PCOMP string -is present. - -If C is not 0, then I bytes of the string I -are passed. If I is 0 or omitted, then -the first two bytes must encode -the length of the rest of the string as a 16 bit unsigned number -with the least significant byte first. The format of the remainder -of the string is described in the ZPAQ level 2 standard. -The effect is to compress a 1 byte -to indicate the presence of PCOMP, followed by the two length -bytes and the string as passed. For example, either -C or C -would compress the 5 bytes 1, 2, 0, 5, 8. -The user is responsible for pre-processing the input -prior to compression so that PCOMP restores the original data. - -=item C - -Specifies the input source for compression. It must be set -prior to the first call to C. - -=item C - -Compress n bytes of data, or until EOF is input, whichever comes -first. If n < 0 or omitted, then compress until EOF. -Returns true if there is more input available, or false if EOF -was read. - -=item C - -Stop compressing and write the end of a segment. If -C is specified, it should be a 20 byte string -as returned by C on the input data for -this segment I pre-processing. - -=item C - -Finish writing the current block. - -=back - -In order to create a valid ZPAQ stream, the components must -be written in the following order: - - for each block do { - if any non-ZPAQ data then { - write non-ZPAQ data - writeTag() - } - startBlock() - for each segment do { - startSegment() - if first segment in block then { - postProcess() - } - while (compress(n)) ; - endSegment() - } - endBlock() - } - -=head2 class Decompresser - -The class Decompresser has member functions to read each of the -syntactic elements of a ZPAQ stream. - -=over - -=item C - -The constructor creates a Decompresser object. No input source or -output destination is specified. - -=item C - -Specifies where the ZPAQ stream will be read from. Must be called -before any function that reads the stream. - -=item C - -Scan the input to find the start of the next block. If a block -does not start immediately, then the block must be preceded by -a marker tag (written with C) or it will -not be found. If C is not 0, then write the approximate -memory requirement (in bytes) to decompress to C<*memptr>). The -memory will be allocated by the first call to C. -It returns true if a block is found, or false if it reads to EOF -without finding a block. - -=item C - -Write the HCOMP string of the current block to C. -It will be in a format suitable -for passing to C. The first 2 bytes will -encode the length of the rest of the string as a 16 bit unsigned -integer with the least significant byte first. The format of the -remainder of the string is described in the ZPAQ level 1 -specification. - -=item C - -Find the start of the next segment. If another segment is found -within the current block then return true. If the end of the block -is found first, then return false. If a segment is found, the -filename field is not empty, and C -is not 0, then write the filename (without a terminating NUL byte) -to C. - -=item C - -Read or skip past the comment field following the filename field -in the segment header. If C is not 0 and the comment field is -not empty, then write the comment -(without a terminating NUL byte) to C. - -=item C - -Specify the destination for decompression. It must be set before -any data can be decompressed. - -=item C - -Specify the address of a SHA1 object for computing the checksum -of the decompressed data (after post-processing). As each byte C -is output, it is also passed to Cput(c)>. In order to -compute the correct checksum, the SHA1 object should be in its -initial state, either newly created, or by calling C, -before the first call to C. When the end of the segment -is reached, the value returned by Cresult()> should match -the stored checksum, if any. - -=item C - -Decode n bytes or until the end of segment, whichever comes -first. Return false if the end of segment is reached first. If -n < 0 or not specified, then decompress to the end of segment -and return false. C is the number of bytes prior to post-processing. -If the data is post-processed, then the size of the output may -be different. - -=item C - -Write the PCOMP string, if any, for the current block to C. -If there is no PCOMP string (no post-processor) then return false. -Otherwise write the string to C in a format suitable for -passing to C and return true. If written, -then the first 2 bytes will encode the length of the rest of the -string as a 16 bit unsigned integer with the least significant -bit first. The format of the rest of the string is descibed in -the ZPAQ level 1 standard. - -C is only valid after the first call to C -in the current block. To read the PCOMP string without decompressing any -data, then call C first. It is not necessary to -call C in this case. - -=item C - -Skip any compressed data in the current segment that has not yet -been decompressed and advance to the end of the segment. -Then if C is not 0 then write into -the 21 byte array that it points to. If a checksum is present, -then write a 1 into C and write the stored checksum -in C. Otherwise write a 0 in C. - -Note that it is not permitted to call decompress() if any compressed -data has been skipped in any earlier segments in the same block. - -=back - -A valid sequence of calls is as follows: - - while (findBlock()) { - while (findFilename()) { - readComment(); - if first segment in block then { (optional) - decompress(0) - pcomp() - } - while (decompress(n)) ; (optional) - readSegmentEnd(); - } - } - -=head1 EXAMPLES - -The following program F -lists the contents of a ZPAQ archive -read from standard input. - - #include - #include - #include "libzpaq.h" - - // Implement Reader and Writer interfaces for file I/O - class File: public libzpaq::Reader, public libzpaq::Writer { - FILE* f; - public: - File(FILE* f_): f(f_) {} - int get() {return getc(f);} - void put(int c) {putc(c, f);} - int read(char* buf, int n) {return fread(buf, 1, n, f);} - void write(const char* buf, int n) {fwrite(buf, 1, n, f);} - }; - - // Implement error handler - namespace libzpaq { - void error(const char* msg) { - fprintf(stderr, "Error: %s\n", msg); - exit(1); - } - } - - // List the contents of an archive. For each block, show - // the memory required to decompress. For each segment, - // show the filename and comment. - void list(FILE* input, FILE* output) { - libzpaq::Decompresser d; - File in(input), out(output); - double memory; - d.setInput(&in); - for (int block=1; d.findBlock(&memory); ++block) { - printf("Block %d needs %1.0f MB\n", block, memory/1e6); - while (d.findFilename(&out)) { // print filename - printf("\t"); - d.readComment(&out); // print comment - printf("\n"); - d.readSegmentEnd(); // skip compressed data - } - } - } - - int main() { - list(stdin, stdout); - return 0; - } - -The program could be compiled as follows: - - g++ listzpaq.cpp libzpaq.cpp - -The following code compresses a list of files into one block -written to stdout. Each file is compressed to a separate -segment. For each segment, the filename, comment, and SHA-1 -checksum are stored. The comment, as conventional, is the -file size as a decimal string. - - // Compress one file to one segment - void compress_file(libzpaq::Compressor& c, - const char* filename, - bool first_segment) { - - // Open input file - FILE* f; - f=fopen(filename, "rb"); - if (!f) return; - - // Compute SHA-1 checksum and file size - libzpaq::SHA1 sha1; - int ch; - while ((ch=getc(f))!=EOF) - sha1.put(ch); - - // Write file size as a comment. - // The size can have at most 19 digits. - char comment[20]; - sprintf(comment, "%1.0f", sha1.size()); - - // Compress segment - rewind(f); - File in(f); - c.startSegment(filename, comment); - if (first_segment) - c.postProcess(); - c.setInput(&in); - c.compress(); - c.endSegment(sha1.result()); - - // Close input file - fclose(f); - } - - // Compress a list of argc files in argv[0...argc-1] into one - // ZPAQ block to stdout at level 2. - void compress_list(int argc, char** argv) { - libzpaq::Compressor c; - File out(stdout); - c.setOutput(&out); - c.startBlock(2); - for (int i=0; i and C can -be passed an argument n to display progress every n bytes, -for example: - - for (int i=1; d.decompress(1000000); ++i) - fprintf(stderr, "Decompressed %d MB\n", i); - -To compress or decompress to and from objects in memory, derive -appropriate classes from C and C. For example, it is -possible to compress or decompress to a C using -the following class. - - struct String: public libzpaq::Writer { - std::string s; - void put(int c) {s+=char(c);} - }; - -This class is also useful for reading the filename and comment -fields during decompression as follows: - - String filename, comment; - while (d.findFilename(&filename)) { - d.readComment(&comment); - // ... - -=head1 AVAILABILITY - -I, I, and the ZPAQ level 1 and 2 specifications are -available from L. - -=head1 SEE ALSO - -C -C - -=cut - - diff --git a/libzpaq/libzpaq.cpp b/libzpaq/libzpaq.cpp index 633c6f1..d82bacb 100644 --- a/libzpaq/libzpaq.cpp +++ b/libzpaq/libzpaq.cpp @@ -1,31 +1,41 @@ -/* libzpaq.cpp - Part of LIBZPAQ Version 5.01 +/* libzpaq.cpp - LIBZPAQ Version 7.15 implementation - Aug. 17, 2016. - Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. + libdivsufsort.c for divsufsort 2.00, included within, is + (C) 2003-2008 Yuta Mori, all rights reserved. + It is released under the MIT license as described in the comments + at the beginning of that section. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so without restriction. - This Software is provided "as is" without warranty. + Some of the code for AES is from libtomcrypt 1.17 by Tom St. Denis + and is public domain. + + The Salsa20/8 code for Scrypt is by D. Bernstein and is public domain. + + All of the remaining software is provided as-is, with no warranty. + I, Matt Mahoney, release this software into + the public domain. This applies worldwide. + In some countries this may not be legally possible; if so: + I grant anyone the right to use this software for any purpose, + without any conditions, unless such conditions are required by law. LIBZPAQ is a C++ library for compression and decompression of data conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ +See libzpaq.h for additional documentation. */ #include "libzpaq.h" -#include -#include #include +#include +#include +#include #include +#ifdef unix #ifndef NOJIT -#ifndef _WIN32 #include +#endif #else #include -#endif +#include #endif namespace libzpaq { @@ -58,10 +68,6 @@ void Writer::write(const char* buf, int n) { put(U8(buf[i])); } -void error(const char* msg) { - fprintf(stderr, "zpipe error: %s\n", msg); - exit(1); -} ///////////////////////// allocx ////////////////////// // Allocate newsize > 0 bytes of executable memory and update @@ -76,7 +82,7 @@ void allocx(U8* &p, int &n, int newsize) { #else if (p || n) { if (p) -#ifndef _WIN32 +#ifdef unix munmap(p, n); #else // Windows VirtualFree(p, 0, MEM_RELEASE); @@ -85,7 +91,7 @@ void allocx(U8* &p, int &n, int newsize) { n=0; } if (newsize>0) { -#ifndef _WIN32 +#ifdef unix p=(U8*)mmap(0, newsize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON, -1, 0); if ((void*)p==MAP_FAILED) p=0; @@ -109,30 +115,31 @@ void allocx(U8* &p, int &n, int newsize) { // Start a new hash void SHA1::init() { - len0=len1=0; + len=0; h[0]=0x67452301; h[1]=0xEFCDAB89; h[2]=0x98BADCFE; h[3]=0x10325476; h[4]=0xC3D2E1F0; + memset(w, 0, sizeof(w)); } // Return old result and start a new hash const char* SHA1::result() { // pad and append length - const U32 s1=len1, s0=len0; + const U64 s=len; put(0x80); - while ((len0&511)!=448) + while ((len&511)!=448) put(0); - put(s1>>24); - put(s1>>16); - put(s1>>8); - put(s1); - put(s0>>24); - put(s0>>16); - put(s0>>8); - put(s0); + put(s>>56); + put(s>>48); + put(s>>40); + put(s>>32); + put(s>>24); + put(s>>16); + put(s>>8); + put(s); // copy h to hbuf for (int i=0; i<5; ++i) { @@ -147,38 +154,565 @@ const char* SHA1::result() { return hbuf; } +// Hash buf[0..n-1] +void SHA1::write(const char* buf, int64_t n) { + const unsigned char* p=(const unsigned char*) buf; + for (; n>0 && (U32(len)&511)!=0; --n) put(*p++); + for (; n>=64; n-=64) { + for (int i=0; i<16; ++i) + w[i]=p[0]<<24|p[1]<<16|p[2]<<8|p[3], p+=4; + len+=512; + process(); + } + for (; n>0; --n) put(*p++); +} + // Hash 1 block of 64 bytes void SHA1::process() { - for (int i=16; i<80; ++i) { - w[i]=w[i-3]^w[i-8]^w[i-14]^w[i-16]; - w[i]=w[i]<<1|w[i]>>31; + U32 a=h[0], b=h[1], c=h[2], d=h[3], e=h[4]; + static const U32 k[4]={0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6}; + #define f(a,b,c,d,e,i) \ + if (i>=16) \ + w[(i)&15]^=w[(i-3)&15]^w[(i-8)&15]^w[(i-14)&15], \ + w[(i)&15]=w[(i)&15]<<1|w[(i)&15]>>31; \ + e+=(a<<5|a>>27)+k[(i)/20]+w[(i)&15] \ + +((i)%40>=20 ? b^c^d : i>=40 ? (b&c)|(d&(b|c)) : d^(b&(c^d))); \ + b=b<<30|b>>2; + #define r(i) f(a,b,c,d,e,i) f(e,a,b,c,d,i+1) f(d,e,a,b,c,i+2) \ + f(c,d,e,a,b,i+3) f(b,c,d,e,a,i+4) + r(0) r(5) r(10) r(15) r(20) r(25) r(30) r(35) + r(40) r(45) r(50) r(55) r(60) r(65) r(70) r(75) + #undef f + #undef r + h[0]+=a; h[1]+=b; h[2]+=c; h[3]+=d; h[4]+=e; +} + +//////////////////////////// SHA256 ////////////////////////// + +void SHA256::init() { + len0=len1=0; + s[0]=0x6a09e667; + s[1]=0xbb67ae85; + s[2]=0x3c6ef372; + s[3]=0xa54ff53a; + s[4]=0x510e527f; + s[5]=0x9b05688c; + s[6]=0x1f83d9ab; + s[7]=0x5be0cd19; + memset(w, 0, sizeof(w)); +} + +void SHA256::process() { + + #define ror(a,b) ((a)>>(b)|(a<<(32-(b)))) + + #define m(i) \ + w[(i)&15]+=w[(i-7)&15] \ + +(ror(w[(i-15)&15],7)^ror(w[(i-15)&15],18)^(w[(i-15)&15]>>3)) \ + +(ror(w[(i-2)&15],17)^ror(w[(i-2)&15],19)^(w[(i-2)&15]>>10)) + + #define r(a,b,c,d,e,f,g,h,i) { \ + unsigned t1=ror(e,14)^e; \ + t1=ror(t1,5)^e; \ + h+=ror(t1,6)+((e&f)^(~e&g))+k[i]+w[(i)&15]; } \ + d+=h; \ + {unsigned t1=ror(a,9)^a; \ + t1=ror(t1,11)^a; \ + h+=ror(t1,2)+((a&b)^(c&(a^b))); } + + #define mr(a,b,c,d,e,f,g,h,i) m(i); r(a,b,c,d,e,f,g,h,i); + + #define r8(i) \ + r(a,b,c,d,e,f,g,h,i); \ + r(h,a,b,c,d,e,f,g,i+1); \ + r(g,h,a,b,c,d,e,f,i+2); \ + r(f,g,h,a,b,c,d,e,i+3); \ + r(e,f,g,h,a,b,c,d,i+4); \ + r(d,e,f,g,h,a,b,c,i+5); \ + r(c,d,e,f,g,h,a,b,i+6); \ + r(b,c,d,e,f,g,h,a,i+7); + + #define mr8(i) \ + mr(a,b,c,d,e,f,g,h,i); \ + mr(h,a,b,c,d,e,f,g,i+1); \ + mr(g,h,a,b,c,d,e,f,i+2); \ + mr(f,g,h,a,b,c,d,e,i+3); \ + mr(e,f,g,h,a,b,c,d,i+4); \ + mr(d,e,f,g,h,a,b,c,i+5); \ + mr(c,d,e,f,g,h,a,b,i+6); \ + mr(b,c,d,e,f,g,h,a,i+7); + + static const unsigned k[64]={ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + + unsigned a=s[0]; + unsigned b=s[1]; + unsigned c=s[2]; + unsigned d=s[3]; + unsigned e=s[4]; + unsigned f=s[5]; + unsigned g=s[6]; + unsigned h=s[7]; + + r8(0); + r8(8); + mr8(16); + mr8(24); + mr8(32); + mr8(40); + mr8(48); + mr8(56); + + s[0]+=a; + s[1]+=b; + s[2]+=c; + s[3]+=d; + s[4]+=e; + s[5]+=f; + s[6]+=g; + s[7]+=h; + + #undef mr8 + #undef r8 + #undef mr + #undef r + #undef m + #undef ror +}; + +// Return old result and start a new hash +const char* SHA256::result() { + + // pad and append length + const unsigned s1=len1, s0=len0; + put(0x80); + while ((len0&511)!=448) put(0); + put(s1>>24); + put(s1>>16); + put(s1>>8); + put(s1); + put(s0>>24); + put(s0>>16); + put(s0>>8); + put(s0); + + // copy s to hbuf + for (int i=0; i<8; ++i) { + hbuf[4*i]=s[i]>>24; + hbuf[4*i+1]=s[i]>>16; + hbuf[4*i+2]=s[i]>>8; + hbuf[4*i+3]=s[i]; } - U32 a=h[0]; - U32 b=h[1]; - U32 c=h[2]; - U32 d=h[3]; - U32 e=h[4]; - const U32 k1=0x5A827999, k2=0x6ED9EBA1, k3=0x8F1BBCDC, k4=0xCA62C1D6; -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(~b&d))+k1+w[i]; b=b<<30|b>>2; -#define f5(i) f1(a,b,c,d,e,i) f1(e,a,b,c,d,i+1) f1(d,e,a,b,c,i+2) \ - f1(c,d,e,a,b,i+3) f1(b,c,d,e,a,i+4) - f5(0) f5(5) f5(10) f5(15) -#undef f1 -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k2+w[i]; b=b<<30|b>>2; - f5(20) f5(25) f5(30) f5(35) -#undef f1 -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+((b&c)|(b&d)|(c&d))+k3+w[i]; b=b<<30|b>>2; - f5(40) f5(45) f5(50) f5(55) -#undef f1 -#define f1(a,b,c,d,e,i) e+=(a<<5|a>>27)+(b^c^d)+k4+w[i]; b=b<<30|b>>2; - f5(60) f5(65) f5(70) f5(75) -#undef f1 -#undef f5 - h[0]+=a; - h[1]+=b; - h[2]+=c; - h[3]+=d; - h[4]+=e; + + // return hash prior to clearing state + init(); + return hbuf; +} + +//////////////////////////// AES ///////////////////////////// + +// Some AES code is derived from libtomcrypt 1.17 (public domain). + +#define Te4_0 0x000000FF & Te4 +#define Te4_1 0x0000FF00 & Te4 +#define Te4_2 0x00FF0000 & Te4 +#define Te4_3 0xFF000000 & Te4 + +// Extract byte n of x +static inline unsigned byte(unsigned x, unsigned n) {return (x>>(8*n))&255;} + +// x = y[0..3] MSB first +static inline void LOAD32H(U32& x, const char* y) { + const unsigned char* u=(const unsigned char*)y; + x=u[0]<<24|u[1]<<16|u[2]<<8|u[3]; +} + +// y[0..3] = x MSB first +static inline void STORE32H(U32& x, unsigned char* y) { + y[0]=x>>24; + y[1]=x>>16; + y[2]=x>>8; + y[3]=x; +} + +#define setup_mix(temp) \ + ((Te4_3[byte(temp, 2)]) ^ (Te4_2[byte(temp, 1)]) ^ \ + (Te4_1[byte(temp, 0)]) ^ (Te4_0[byte(temp, 3)])) + +// Initialize encryption tables and round key. keylen is 16, 24, or 32. +AES_CTR::AES_CTR(const char* key, int keylen, const char* iv) { + assert(key != NULL); + assert(keylen==16 || keylen==24 || keylen==32); + + // Initialize IV (default 0) + iv0=iv1=0; + if (iv) { + LOAD32H(iv0, iv); + LOAD32H(iv1, iv+4); + } + + // Initialize encryption tables + for (int i=0; i<256; ++i) { + unsigned s1= + "\x63\x7c\x77\x7b\xf2\x6b\x6f\xc5\x30\x01\x67\x2b\xfe\xd7\xab\x76" + "\xca\x82\xc9\x7d\xfa\x59\x47\xf0\xad\xd4\xa2\xaf\x9c\xa4\x72\xc0" + "\xb7\xfd\x93\x26\x36\x3f\xf7\xcc\x34\xa5\xe5\xf1\x71\xd8\x31\x15" + "\x04\xc7\x23\xc3\x18\x96\x05\x9a\x07\x12\x80\xe2\xeb\x27\xb2\x75" + "\x09\x83\x2c\x1a\x1b\x6e\x5a\xa0\x52\x3b\xd6\xb3\x29\xe3\x2f\x84" + "\x53\xd1\x00\xed\x20\xfc\xb1\x5b\x6a\xcb\xbe\x39\x4a\x4c\x58\xcf" + "\xd0\xef\xaa\xfb\x43\x4d\x33\x85\x45\xf9\x02\x7f\x50\x3c\x9f\xa8" + "\x51\xa3\x40\x8f\x92\x9d\x38\xf5\xbc\xb6\xda\x21\x10\xff\xf3\xd2" + "\xcd\x0c\x13\xec\x5f\x97\x44\x17\xc4\xa7\x7e\x3d\x64\x5d\x19\x73" + "\x60\x81\x4f\xdc\x22\x2a\x90\x88\x46\xee\xb8\x14\xde\x5e\x0b\xdb" + "\xe0\x32\x3a\x0a\x49\x06\x24\x5c\xc2\xd3\xac\x62\x91\x95\xe4\x79" + "\xe7\xc8\x37\x6d\x8d\xd5\x4e\xa9\x6c\x56\xf4\xea\x65\x7a\xae\x08" + "\xba\x78\x25\x2e\x1c\xa6\xb4\xc6\xe8\xdd\x74\x1f\x4b\xbd\x8b\x8a" + "\x70\x3e\xb5\x66\x48\x03\xf6\x0e\x61\x35\x57\xb9\x86\xc1\x1d\x9e" + "\xe1\xf8\x98\x11\x69\xd9\x8e\x94\x9b\x1e\x87\xe9\xce\x55\x28\xdf" + "\x8c\xa1\x89\x0d\xbf\xe6\x42\x68\x41\x99\x2d\x0f\xb0\x54\xbb\x16" + [i]&255; + unsigned s2=s1<<1; + if (s2>=0x100) s2^=0x11b; + unsigned s3=s1^s2; + Te0[i]=s2<<24|s1<<16|s1<<8|s3; + Te1[i]=s3<<24|s2<<16|s1<<8|s1; + Te2[i]=s1<<24|s3<<16|s2<<8|s1; + Te3[i]=s1<<24|s1<<16|s3<<8|s2; + Te4[i]=s1<<24|s1<<16|s1<<8|s1; + } + + // setup the forward key + Nr = 10 + ((keylen/8)-2)*2; // 10, 12, or 14 rounds + int i = 0; + U32* rk = &ek[0]; + U32 temp; + static const U32 rcon[10] = { + 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, + 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL, + 0x1B000000UL, 0x36000000UL}; // round constants + + LOAD32H(rk[0], key ); + LOAD32H(rk[1], key + 4); + LOAD32H(rk[2], key + 8); + LOAD32H(rk[3], key + 12); + if (keylen == 16) { + for (;;) { + temp = rk[3]; + rk[4] = rk[0] ^ setup_mix(temp) ^ rcon[i]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + if (++i == 10) { + break; + } + rk += 4; + } + } + else if (keylen == 24) { + LOAD32H(rk[4], key + 16); + LOAD32H(rk[5], key + 20); + for (;;) { + temp = rk[5]; + rk[ 6] = rk[ 0] ^ setup_mix(temp) ^ rcon[i]; + rk[ 7] = rk[ 1] ^ rk[ 6]; + rk[ 8] = rk[ 2] ^ rk[ 7]; + rk[ 9] = rk[ 3] ^ rk[ 8]; + if (++i == 8) { + break; + } + rk[10] = rk[ 4] ^ rk[ 9]; + rk[11] = rk[ 5] ^ rk[10]; + rk += 6; + } + } + else if (keylen == 32) { + LOAD32H(rk[4], key + 16); + LOAD32H(rk[5], key + 20); + LOAD32H(rk[6], key + 24); + LOAD32H(rk[7], key + 28); + for (;;) { + temp = rk[7]; + rk[ 8] = rk[ 0] ^ setup_mix(temp) ^ rcon[i]; + rk[ 9] = rk[ 1] ^ rk[ 8]; + rk[10] = rk[ 2] ^ rk[ 9]; + rk[11] = rk[ 3] ^ rk[10]; + if (++i == 7) { + break; + } + temp = rk[11]; + rk[12] = rk[ 4] ^ setup_mix(temp<<24|temp>>8); + rk[13] = rk[ 5] ^ rk[12]; + rk[14] = rk[ 6] ^ rk[13]; + rk[15] = rk[ 7] ^ rk[14]; + rk += 8; + } + } +} + +// Encrypt to ct[16] +void AES_CTR::encrypt(U32 s0, U32 s1, U32 s2, U32 s3, unsigned char* ct) { + int r = Nr >> 1; + U32 *rk = &ek[0]; + U32 t0=0, t1=0, t2=0, t3=0; + s0 ^= rk[0]; + s1 ^= rk[1]; + s2 ^= rk[2]; + s3 ^= rk[3]; + for (;;) { + t0 = + Te0[byte(s0, 3)] ^ + Te1[byte(s1, 2)] ^ + Te2[byte(s2, 1)] ^ + Te3[byte(s3, 0)] ^ + rk[4]; + t1 = + Te0[byte(s1, 3)] ^ + Te1[byte(s2, 2)] ^ + Te2[byte(s3, 1)] ^ + Te3[byte(s0, 0)] ^ + rk[5]; + t2 = + Te0[byte(s2, 3)] ^ + Te1[byte(s3, 2)] ^ + Te2[byte(s0, 1)] ^ + Te3[byte(s1, 0)] ^ + rk[6]; + t3 = + Te0[byte(s3, 3)] ^ + Te1[byte(s0, 2)] ^ + Te2[byte(s1, 1)] ^ + Te3[byte(s2, 0)] ^ + rk[7]; + + rk += 8; + if (--r == 0) { + break; + } + + s0 = + Te0[byte(t0, 3)] ^ + Te1[byte(t1, 2)] ^ + Te2[byte(t2, 1)] ^ + Te3[byte(t3, 0)] ^ + rk[0]; + s1 = + Te0[byte(t1, 3)] ^ + Te1[byte(t2, 2)] ^ + Te2[byte(t3, 1)] ^ + Te3[byte(t0, 0)] ^ + rk[1]; + s2 = + Te0[byte(t2, 3)] ^ + Te1[byte(t3, 2)] ^ + Te2[byte(t0, 1)] ^ + Te3[byte(t1, 0)] ^ + rk[2]; + s3 = + Te0[byte(t3, 3)] ^ + Te1[byte(t0, 2)] ^ + Te2[byte(t1, 1)] ^ + Te3[byte(t2, 0)] ^ + rk[3]; + } + + // apply last round and map cipher state to byte array block: + s0 = + (Te4_3[byte(t0, 3)]) ^ + (Te4_2[byte(t1, 2)]) ^ + (Te4_1[byte(t2, 1)]) ^ + (Te4_0[byte(t3, 0)]) ^ + rk[0]; + STORE32H(s0, ct); + s1 = + (Te4_3[byte(t1, 3)]) ^ + (Te4_2[byte(t2, 2)]) ^ + (Te4_1[byte(t3, 1)]) ^ + (Te4_0[byte(t0, 0)]) ^ + rk[1]; + STORE32H(s1, ct+4); + s2 = + (Te4_3[byte(t2, 3)]) ^ + (Te4_2[byte(t3, 2)]) ^ + (Te4_1[byte(t0, 1)]) ^ + (Te4_0[byte(t1, 0)]) ^ + rk[2]; + STORE32H(s2, ct+8); + s3 = + (Te4_3[byte(t3, 3)]) ^ + (Te4_2[byte(t0, 2)]) ^ + (Te4_1[byte(t1, 1)]) ^ + (Te4_0[byte(t2, 0)]) ^ + rk[3]; + STORE32H(s3, ct+12); +} + +// Encrypt or decrypt slice buf[0..n-1] at offset by XOR with AES(i) where +// i is the 128 bit big-endian distance from the start in 16 byte blocks. +void AES_CTR::encrypt(char* buf, int n, U64 offset) { + for (U64 i=offset/16; i<=(offset+n)/16; ++i) { + unsigned char ct[16]; + encrypt(iv0, iv1, i>>32, i, ct); + for (int j=0; j<16; ++j) { + const int k=i*16-offset+j; + if (k>=0 && k=0; j-=8) sha256.put(i>>j); + memcpy(b, sha256.result(), 32); + for (int j=0; j>(32-b))) + x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); + x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); + x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); + x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); + x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); + x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); + x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); + x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); + x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); + x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); + x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); + x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); + #undef R + } + for (int i=0; i<16; ++i) b[i]+=x[i]; +} + +// BlockMix_{Salsa20/8, r} on b[0..128*r-1] +static void blockmix(U32* b, int r) { + assert(r<=8); + U32 x[16]; + U32 y[256]; + memcpy(x, b+32*r-16, 64); + for (int i=0; i<2*r; ++i) { + for (int j=0; j<16; ++j) x[j]^=b[i*16+j]; + salsa8(x); + memcpy(&y[i*16], x, 64); + } + for (int i=0; i x(32*r), v(32*r*n); + for (int i=0; i>(i%4*8); +} + +// Strengthen password pw[0..pwlen-1] and salt[0..saltlen-1] +// to produce key buf[0..buflen-1]. Uses O(n*r*p) time and 128*r*n bytes +// of memory. n must be a power of 2 and r <= 8. +void scrypt(const char* pw, int pwlen, + const char* salt, int saltlen, + int n, int r, int p, char* buf, int buflen) { + assert(r<=8); + assert(n>0 && (n&(n-1))==0); // power of 2? + libzpaq::Array b(p*r*128); + pbkdf2(pw, pwlen, salt, saltlen, 1, &b[0], p*r*128); + for (int i=0; i=1 && (buf[0]=='z' || buf[0]=='7')) + buf[0]^=0x80; } //////////////////////////// Component /////////////////////// @@ -197,93 +731,143 @@ void Component::init() { a16.resize(0); } -////////////////////////// StateTable ////////////////////////// +////////////////////////// StateTable //////////////////////// -// How many states with count of n0 zeros, n1 ones (0...2) -int StateTable::num_states(int n0, int n1) { - const int B=6; - const int bound[B]={20,48,15,8,6,5}; // n0 -> max n1, n1 -> max n0 - if (n0=B || n0>bound[n1]) return 0; - return 1+(n1>0 && n0+n1<=17); -} - -// New value of count n0 if 1 is observed (and vice versa) -void StateTable::discount(int& n0) { - n0=(n0>=1)+(n0>=2)+(n0>=3)+(n0>=4)+(n0>=5)+(n0>=7)+(n0>=8); -} - -// compute next n0,n1 (0 to N) given input y (0 or 1) -void StateTable::next_state(int& n0, int& n1, int y) { - if (n0 20,0 - // 48,1,0 -> 48,1 - // 15,2,0 -> 8,1 - // 8,3,0 -> 6,2 - // 8,3,1 -> 5,3 - // 6,4,0 -> 5,3 - // 5,5,0 -> 5,4 - // 5,5,1 -> 4,5 - while (!num_states(n0, n1)) { - if (n1<2) --n0; - else { - n0=(n0*(n1-1)+(n1/2))/n1; - --n1; - } - } - } -} +// sns[i*4] -> next state if 0, next state if 1, n0, n1 +static const U8 sns[1024]={ + 1, 2, 0, 0, 3, 5, 1, 0, + 4, 6, 0, 1, 7, 9, 2, 0, + 8, 11, 1, 1, 8, 11, 1, 1, + 10, 12, 0, 2, 13, 15, 3, 0, + 14, 17, 2, 1, 14, 17, 2, 1, + 16, 19, 1, 2, 16, 19, 1, 2, + 18, 20, 0, 3, 21, 23, 4, 0, + 22, 25, 3, 1, 22, 25, 3, 1, + 24, 27, 2, 2, 24, 27, 2, 2, + 26, 29, 1, 3, 26, 29, 1, 3, + 28, 30, 0, 4, 31, 33, 5, 0, + 32, 35, 4, 1, 32, 35, 4, 1, + 34, 37, 3, 2, 34, 37, 3, 2, + 36, 39, 2, 3, 36, 39, 2, 3, + 38, 41, 1, 4, 38, 41, 1, 4, + 40, 42, 0, 5, 43, 33, 6, 0, + 44, 47, 5, 1, 44, 47, 5, 1, + 46, 49, 4, 2, 46, 49, 4, 2, + 48, 51, 3, 3, 48, 51, 3, 3, + 50, 53, 2, 4, 50, 53, 2, 4, + 52, 55, 1, 5, 52, 55, 1, 5, + 40, 56, 0, 6, 57, 45, 7, 0, + 58, 47, 6, 1, 58, 47, 6, 1, + 60, 63, 5, 2, 60, 63, 5, 2, + 62, 65, 4, 3, 62, 65, 4, 3, + 64, 67, 3, 4, 64, 67, 3, 4, + 66, 69, 2, 5, 66, 69, 2, 5, + 52, 71, 1, 6, 52, 71, 1, 6, + 54, 72, 0, 7, 73, 59, 8, 0, + 74, 61, 7, 1, 74, 61, 7, 1, + 76, 63, 6, 2, 76, 63, 6, 2, + 78, 81, 5, 3, 78, 81, 5, 3, + 80, 83, 4, 4, 80, 83, 4, 4, + 82, 85, 3, 5, 82, 85, 3, 5, + 66, 87, 2, 6, 66, 87, 2, 6, + 68, 89, 1, 7, 68, 89, 1, 7, + 70, 90, 0, 8, 91, 59, 9, 0, + 92, 77, 8, 1, 92, 77, 8, 1, + 94, 79, 7, 2, 94, 79, 7, 2, + 96, 81, 6, 3, 96, 81, 6, 3, + 98, 101, 5, 4, 98, 101, 5, 4, + 100, 103, 4, 5, 100, 103, 4, 5, + 82, 105, 3, 6, 82, 105, 3, 6, + 84, 107, 2, 7, 84, 107, 2, 7, + 86, 109, 1, 8, 86, 109, 1, 8, + 70, 110, 0, 9, 111, 59, 10, 0, + 112, 77, 9, 1, 112, 77, 9, 1, + 114, 97, 8, 2, 114, 97, 8, 2, + 116, 99, 7, 3, 116, 99, 7, 3, + 62, 101, 6, 4, 62, 101, 6, 4, + 80, 83, 5, 5, 80, 83, 5, 5, + 100, 67, 4, 6, 100, 67, 4, 6, + 102, 119, 3, 7, 102, 119, 3, 7, + 104, 121, 2, 8, 104, 121, 2, 8, + 86, 123, 1, 9, 86, 123, 1, 9, + 70, 124, 0, 10, 125, 59, 11, 0, + 126, 77, 10, 1, 126, 77, 10, 1, + 128, 97, 9, 2, 128, 97, 9, 2, + 60, 63, 8, 3, 60, 63, 8, 3, + 66, 69, 3, 8, 66, 69, 3, 8, + 104, 131, 2, 9, 104, 131, 2, 9, + 86, 133, 1, 10, 86, 133, 1, 10, + 70, 134, 0, 11, 135, 59, 12, 0, + 136, 77, 11, 1, 136, 77, 11, 1, + 138, 97, 10, 2, 138, 97, 10, 2, + 104, 141, 2, 10, 104, 141, 2, 10, + 86, 143, 1, 11, 86, 143, 1, 11, + 70, 144, 0, 12, 145, 59, 13, 0, + 146, 77, 12, 1, 146, 77, 12, 1, + 148, 97, 11, 2, 148, 97, 11, 2, + 104, 151, 2, 11, 104, 151, 2, 11, + 86, 153, 1, 12, 86, 153, 1, 12, + 70, 154, 0, 13, 155, 59, 14, 0, + 156, 77, 13, 1, 156, 77, 13, 1, + 158, 97, 12, 2, 158, 97, 12, 2, + 104, 161, 2, 12, 104, 161, 2, 12, + 86, 163, 1, 13, 86, 163, 1, 13, + 70, 164, 0, 14, 165, 59, 15, 0, + 166, 77, 14, 1, 166, 77, 14, 1, + 168, 97, 13, 2, 168, 97, 13, 2, + 104, 171, 2, 13, 104, 171, 2, 13, + 86, 173, 1, 14, 86, 173, 1, 14, + 70, 174, 0, 15, 175, 59, 16, 0, + 176, 77, 15, 1, 176, 77, 15, 1, + 178, 97, 14, 2, 178, 97, 14, 2, + 104, 181, 2, 14, 104, 181, 2, 14, + 86, 183, 1, 15, 86, 183, 1, 15, + 70, 184, 0, 16, 185, 59, 17, 0, + 186, 77, 16, 1, 186, 77, 16, 1, + 74, 97, 15, 2, 74, 97, 15, 2, + 104, 89, 2, 15, 104, 89, 2, 15, + 86, 187, 1, 16, 86, 187, 1, 16, + 70, 188, 0, 17, 189, 59, 18, 0, + 190, 77, 17, 1, 86, 191, 1, 17, + 70, 192, 0, 18, 193, 59, 19, 0, + 194, 77, 18, 1, 86, 195, 1, 18, + 70, 196, 0, 19, 193, 59, 20, 0, + 197, 77, 19, 1, 86, 198, 1, 19, + 70, 196, 0, 20, 199, 77, 20, 1, + 86, 200, 1, 20, 201, 77, 21, 1, + 86, 202, 1, 21, 203, 77, 22, 1, + 86, 204, 1, 22, 205, 77, 23, 1, + 86, 206, 1, 23, 207, 77, 24, 1, + 86, 208, 1, 24, 209, 77, 25, 1, + 86, 210, 1, 25, 211, 77, 26, 1, + 86, 212, 1, 26, 213, 77, 27, 1, + 86, 214, 1, 27, 215, 77, 28, 1, + 86, 216, 1, 28, 217, 77, 29, 1, + 86, 218, 1, 29, 219, 77, 30, 1, + 86, 220, 1, 30, 221, 77, 31, 1, + 86, 222, 1, 31, 223, 77, 32, 1, + 86, 224, 1, 32, 225, 77, 33, 1, + 86, 226, 1, 33, 227, 77, 34, 1, + 86, 228, 1, 34, 229, 77, 35, 1, + 86, 230, 1, 35, 231, 77, 36, 1, + 86, 232, 1, 36, 233, 77, 37, 1, + 86, 234, 1, 37, 235, 77, 38, 1, + 86, 236, 1, 38, 237, 77, 39, 1, + 86, 238, 1, 39, 239, 77, 40, 1, + 86, 240, 1, 40, 241, 77, 41, 1, + 86, 242, 1, 41, 243, 77, 42, 1, + 86, 244, 1, 42, 245, 77, 43, 1, + 86, 246, 1, 43, 247, 77, 44, 1, + 86, 248, 1, 44, 249, 77, 45, 1, + 86, 250, 1, 45, 251, 77, 46, 1, + 86, 252, 1, 46, 253, 77, 47, 1, + 86, 254, 1, 47, 253, 77, 48, 1, + 86, 254, 1, 48, 0, 0, 0, 0 +}; // Initialize next state table ns[state*4] -> next if 0, next if 1, n0, n1 StateTable::StateTable() { - - // Assign states by increasing priority - const int N=50; - U8 t[N][N][2]={{{0}}}; // (n0,n1,y) -> state number - int state=0; - for (int i=0; i=0 && n<=2); - if (n) { - t[n0][n1][0]=state; - t[n0][n1][1]=state+n-1; - state+=n; - } - } - } - - // Generate next state table - memset(ns, 0, sizeof(ns)); - for (int n0=0; n0=0 && s<256); - int s0=n0, s1=n1; - next_state(s0, s1, 0); - assert(s0>=0 && s0=0 && s1=0 && s0=0 && s1get(); // component type - if (type==-1) error("unexpected end of file"); + if (type<0 || type>255) error("unexpected end of file"); header[cend++]=type; // component type int size=compsize[type]; if (size<1) error("Invalid component type"); - if (cend+size>header.isize()-8) error("COMP list too big"); + if (cend+size>hsize) error("COMP overflows header"); for (int j=1; jget(); } @@ -338,6 +922,7 @@ int ZPAQL::read(Reader* in2) { // Insert a guard gap and read HCOMP hbegin=hend=cend+128; + if (hend>hsize+129) error("missing HCOMP"); while (hendget(); @@ -397,23 +982,30 @@ void ZPAQL::initp() { // Flush pending output void ZPAQL::flush() { if (output) output->write(&outbuf[0], bufptr); - if (sha1) for (int i=0; iput(U8(outbuf[i])); + if (sha1) sha1->write(&outbuf[0], bufptr); bufptr=0; } +// pow(2, x) +static double pow2(int x) { + double r=1; + for (; x>0; x--) r+=r; + return r; +} + // Return memory requirement in bytes double ZPAQL::memory() { - double mem=pow(2.0,header[2]+2)+pow(2.0,header[3]) // hh hm - +pow(2.0,header[4]+2)+pow(2.0,header[5]) // ph pm + double mem=pow2(header[2]+2)+pow2(header[3]) // hh hm + +pow2(header[4]+2)+pow2(header[5]) // ph pm +header.size(); int cp=7; // start of comp list for (int i=0; i0); + if (hbits>32) error("H too big"); + if (mbits>32) error("M too big"); h.resize(1, hbits); m.resize(1, mbits); r.resize(256); @@ -540,7 +1134,7 @@ int ZPAQL::execute() { case 97: m(b) = b; break; // *B=B case 98: m(b) = c; break; // *B=C case 99: m(b) = d; break; // *B=D - case 100: m(b) = m(b); break; // *B=*B + case 100: break; // *B=*B case 101: m(b) = m(c); break; // *B=*C case 102: m(b) = h(d); break; // *B=*D case 103: m(b) = header[pc++]; break; // *B= N @@ -549,7 +1143,7 @@ int ZPAQL::execute() { case 106: m(c) = c; break; // *C=C case 107: m(c) = d; break; // *C=D case 108: m(c) = m(b); break; // *C=*B - case 109: m(c) = m(c); break; // *C=*C + case 109: break; // *C=*C case 110: m(c) = h(d); break; // *C=*D case 111: m(c) = header[pc++]; break; // *C= N case 112: h(d) = a; break; // *D=A @@ -558,7 +1152,7 @@ int ZPAQL::execute() { case 115: h(d) = d; break; // *D=D case 116: h(d) = m(b); break; // *D=*B case 117: h(d) = m(c); break; // *D=*C - case 118: h(d) = h(d); break; // *D=*D + case 118: break; // *D=*D case 119: h(d) = header[pc++]; break; // *D= N case 128: a += a; break; // A+=A case 129: a += b; break; // A+=B @@ -648,7 +1242,7 @@ int ZPAQL::execute() { case 213: a >>= (m(c)&31); break; // A>>=*C case 214: a >>= (h(d)&31); break; // A>>=*D case 215: a >>= (header[pc++]&31); break; // A>>= N - case 216: f = (true); break; // A==A + case 216: f = 1; break; // A==A case 217: f = (a == b); break; // A==B case 218: f = (a == c); break; // A==C case 219: f = (a == d); break; // A==D @@ -656,7 +1250,7 @@ int ZPAQL::execute() { case 221: f = (a == U32(m(c))); break; // A==*C case 222: f = (a == h(d)); break; // A==*D case 223: f = (a == U32(header[pc++])); break; // A== N - case 224: f = (false); break; // AA + case 232: f = 0; break; // A>A case 233: f = (a > b); break; // A>B case 234: f = (a > c); break; // A>C case 235: f = (a > d); break; // A>D @@ -685,7 +1279,440 @@ void ZPAQL::err() { ///////////////////////// Predictor ///////////////////////// -// Initailize model-independent tables +// sdt2k[i]=2048/i; +static const int sdt2k[256]={ + 0, 2048, 1024, 682, 512, 409, 341, 292, + 256, 227, 204, 186, 170, 157, 146, 136, + 128, 120, 113, 107, 102, 97, 93, 89, + 85, 81, 78, 75, 73, 70, 68, 66, + 64, 62, 60, 58, 56, 55, 53, 52, + 51, 49, 48, 47, 46, 45, 44, 43, + 42, 41, 40, 40, 39, 38, 37, 37, + 36, 35, 35, 34, 34, 33, 33, 32, + 32, 31, 31, 30, 30, 29, 29, 28, + 28, 28, 27, 27, 26, 26, 26, 25, + 25, 25, 24, 24, 24, 24, 23, 23, + 23, 23, 22, 22, 22, 22, 21, 21, + 21, 21, 20, 20, 20, 20, 20, 19, + 19, 19, 19, 19, 18, 18, 18, 18, + 18, 18, 17, 17, 17, 17, 17, 17, + 17, 16, 16, 16, 16, 16, 16, 16, + 16, 15, 15, 15, 15, 15, 15, 15, + 15, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8 +}; + +// sdt[i]=(1<<17)/(i*2+3)*2; +static const int sdt[1024]={ + 87380, 52428, 37448, 29126, 23830, 20164, 17476, 15420, + 13796, 12482, 11396, 10484, 9708, 9038, 8456, 7942, + 7488, 7084, 6720, 6392, 6096, 5824, 5576, 5348, + 5140, 4946, 4766, 4598, 4442, 4296, 4160, 4032, + 3912, 3798, 3692, 3590, 3494, 3404, 3318, 3236, + 3158, 3084, 3012, 2944, 2880, 2818, 2758, 2702, + 2646, 2594, 2544, 2496, 2448, 2404, 2360, 2318, + 2278, 2240, 2202, 2166, 2130, 2096, 2064, 2032, + 2000, 1970, 1940, 1912, 1884, 1858, 1832, 1806, + 1782, 1758, 1736, 1712, 1690, 1668, 1648, 1628, + 1608, 1588, 1568, 1550, 1532, 1514, 1496, 1480, + 1464, 1448, 1432, 1416, 1400, 1386, 1372, 1358, + 1344, 1330, 1316, 1304, 1290, 1278, 1266, 1254, + 1242, 1230, 1218, 1208, 1196, 1186, 1174, 1164, + 1154, 1144, 1134, 1124, 1114, 1106, 1096, 1086, + 1078, 1068, 1060, 1052, 1044, 1036, 1028, 1020, + 1012, 1004, 996, 988, 980, 974, 966, 960, + 952, 946, 938, 932, 926, 918, 912, 906, + 900, 894, 888, 882, 876, 870, 864, 858, + 852, 848, 842, 836, 832, 826, 820, 816, + 810, 806, 800, 796, 790, 786, 782, 776, + 772, 768, 764, 758, 754, 750, 746, 742, + 738, 734, 730, 726, 722, 718, 714, 710, + 706, 702, 698, 694, 690, 688, 684, 680, + 676, 672, 670, 666, 662, 660, 656, 652, + 650, 646, 644, 640, 636, 634, 630, 628, + 624, 622, 618, 616, 612, 610, 608, 604, + 602, 598, 596, 594, 590, 588, 586, 582, + 580, 578, 576, 572, 570, 568, 566, 562, + 560, 558, 556, 554, 550, 548, 546, 544, + 542, 540, 538, 536, 532, 530, 528, 526, + 524, 522, 520, 518, 516, 514, 512, 510, + 508, 506, 504, 502, 500, 498, 496, 494, + 492, 490, 488, 488, 486, 484, 482, 480, + 478, 476, 474, 474, 472, 470, 468, 466, + 464, 462, 462, 460, 458, 456, 454, 454, + 452, 450, 448, 448, 446, 444, 442, 442, + 440, 438, 436, 436, 434, 432, 430, 430, + 428, 426, 426, 424, 422, 422, 420, 418, + 418, 416, 414, 414, 412, 410, 410, 408, + 406, 406, 404, 402, 402, 400, 400, 398, + 396, 396, 394, 394, 392, 390, 390, 388, + 388, 386, 386, 384, 382, 382, 380, 380, + 378, 378, 376, 376, 374, 372, 372, 370, + 370, 368, 368, 366, 366, 364, 364, 362, + 362, 360, 360, 358, 358, 356, 356, 354, + 354, 352, 352, 350, 350, 348, 348, 348, + 346, 346, 344, 344, 342, 342, 340, 340, + 340, 338, 338, 336, 336, 334, 334, 332, + 332, 332, 330, 330, 328, 328, 328, 326, + 326, 324, 324, 324, 322, 322, 320, 320, + 320, 318, 318, 316, 316, 316, 314, 314, + 312, 312, 312, 310, 310, 310, 308, 308, + 308, 306, 306, 304, 304, 304, 302, 302, + 302, 300, 300, 300, 298, 298, 298, 296, + 296, 296, 294, 294, 294, 292, 292, 292, + 290, 290, 290, 288, 288, 288, 286, 286, + 286, 284, 284, 284, 284, 282, 282, 282, + 280, 280, 280, 278, 278, 278, 276, 276, + 276, 276, 274, 274, 274, 272, 272, 272, + 272, 270, 270, 270, 268, 268, 268, 268, + 266, 266, 266, 266, 264, 264, 264, 262, + 262, 262, 262, 260, 260, 260, 260, 258, + 258, 258, 258, 256, 256, 256, 256, 254, + 254, 254, 254, 252, 252, 252, 252, 250, + 250, 250, 250, 248, 248, 248, 248, 248, + 246, 246, 246, 246, 244, 244, 244, 244, + 242, 242, 242, 242, 242, 240, 240, 240, + 240, 238, 238, 238, 238, 238, 236, 236, + 236, 236, 234, 234, 234, 234, 234, 232, + 232, 232, 232, 232, 230, 230, 230, 230, + 230, 228, 228, 228, 228, 228, 226, 226, + 226, 226, 226, 224, 224, 224, 224, 224, + 222, 222, 222, 222, 222, 220, 220, 220, + 220, 220, 220, 218, 218, 218, 218, 218, + 216, 216, 216, 216, 216, 216, 214, 214, + 214, 214, 214, 212, 212, 212, 212, 212, + 212, 210, 210, 210, 210, 210, 210, 208, + 208, 208, 208, 208, 208, 206, 206, 206, + 206, 206, 206, 204, 204, 204, 204, 204, + 204, 204, 202, 202, 202, 202, 202, 202, + 200, 200, 200, 200, 200, 200, 198, 198, + 198, 198, 198, 198, 198, 196, 196, 196, + 196, 196, 196, 196, 194, 194, 194, 194, + 194, 194, 194, 192, 192, 192, 192, 192, + 192, 192, 190, 190, 190, 190, 190, 190, + 190, 188, 188, 188, 188, 188, 188, 188, + 186, 186, 186, 186, 186, 186, 186, 186, + 184, 184, 184, 184, 184, 184, 184, 182, + 182, 182, 182, 182, 182, 182, 182, 180, + 180, 180, 180, 180, 180, 180, 180, 178, + 178, 178, 178, 178, 178, 178, 178, 176, + 176, 176, 176, 176, 176, 176, 176, 176, + 174, 174, 174, 174, 174, 174, 174, 174, + 172, 172, 172, 172, 172, 172, 172, 172, + 172, 170, 170, 170, 170, 170, 170, 170, + 170, 170, 168, 168, 168, 168, 168, 168, + 168, 168, 168, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 164, 164, 164, + 164, 164, 164, 164, 164, 164, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 158, 158, 158, 158, 158, 158, + 158, 158, 158, 158, 158, 156, 156, 156, + 156, 156, 156, 156, 156, 156, 156, 154, + 154, 154, 154, 154, 154, 154, 154, 154, + 154, 154, 152, 152, 152, 152, 152, 152, + 152, 152, 152, 152, 152, 150, 150, 150, + 150, 150, 150, 150, 150, 150, 150, 150, + 150, 148, 148, 148, 148, 148, 148, 148, + 148, 148, 148, 148, 148, 146, 146, 146, + 146, 146, 146, 146, 146, 146, 146, 146, + 146, 144, 144, 144, 144, 144, 144, 144, + 144, 144, 144, 144, 144, 142, 142, 142, + 142, 142, 142, 142, 142, 142, 142, 142, + 142, 142, 140, 140, 140, 140, 140, 140, + 140, 140, 140, 140, 140, 140, 140, 138, + 138, 138, 138, 138, 138, 138, 138, 138, + 138, 138, 138, 138, 138, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 134, 134, 134, 134, 134, + 134, 134, 134, 134, 134, 134, 134, 134, + 134, 132, 132, 132, 132, 132, 132, 132, + 132, 132, 132, 132, 132, 132, 132, 132, + 130, 130, 130, 130, 130, 130, 130, 130, + 130, 130, 130, 130, 130, 130, 130, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 126 +}; + +// ssquasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64)))); +// Middle 1344 of 4096 entries only. +static const U16 ssquasht[1344]={ + 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 7, 7, 7, 7, + 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, + 9, 9, 10, 10, 10, 10, 10, 10, + 10, 11, 11, 11, 11, 11, 12, 12, + 12, 12, 12, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 15, 15, 15, 15, + 15, 16, 16, 16, 17, 17, 17, 17, + 18, 18, 18, 18, 19, 19, 19, 20, + 20, 20, 21, 21, 21, 22, 22, 22, + 23, 23, 23, 24, 24, 25, 25, 25, + 26, 26, 27, 27, 28, 28, 28, 29, + 29, 30, 30, 31, 31, 32, 32, 33, + 33, 34, 34, 35, 36, 36, 37, 37, + 38, 38, 39, 40, 40, 41, 42, 42, + 43, 44, 44, 45, 46, 46, 47, 48, + 49, 49, 50, 51, 52, 53, 54, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 76, 77, 78, 79, + 81, 82, 83, 84, 86, 87, 88, 90, + 91, 93, 94, 96, 97, 99, 100, 102, + 103, 105, 107, 108, 110, 112, 114, 115, + 117, 119, 121, 123, 125, 127, 129, 131, + 133, 135, 137, 139, 141, 144, 146, 148, + 151, 153, 155, 158, 160, 163, 165, 168, + 171, 173, 176, 179, 182, 184, 187, 190, + 193, 196, 199, 202, 206, 209, 212, 215, + 219, 222, 226, 229, 233, 237, 240, 244, + 248, 252, 256, 260, 264, 268, 272, 276, + 281, 285, 289, 294, 299, 303, 308, 313, + 318, 323, 328, 333, 338, 343, 349, 354, + 360, 365, 371, 377, 382, 388, 394, 401, + 407, 413, 420, 426, 433, 440, 446, 453, + 460, 467, 475, 482, 490, 497, 505, 513, + 521, 529, 537, 545, 554, 562, 571, 580, + 589, 598, 607, 617, 626, 636, 646, 656, + 666, 676, 686, 697, 708, 719, 730, 741, + 752, 764, 776, 788, 800, 812, 825, 837, + 850, 863, 876, 890, 903, 917, 931, 946, + 960, 975, 990, 1005, 1020, 1036, 1051, 1067, + 1084, 1100, 1117, 1134, 1151, 1169, 1186, 1204, + 1223, 1241, 1260, 1279, 1298, 1318, 1338, 1358, + 1379, 1399, 1421, 1442, 1464, 1486, 1508, 1531, + 1554, 1577, 1600, 1624, 1649, 1673, 1698, 1724, + 1749, 1775, 1802, 1829, 1856, 1883, 1911, 1940, + 1968, 1998, 2027, 2057, 2087, 2118, 2149, 2181, + 2213, 2245, 2278, 2312, 2345, 2380, 2414, 2450, + 2485, 2521, 2558, 2595, 2633, 2671, 2709, 2748, + 2788, 2828, 2869, 2910, 2952, 2994, 3037, 3080, + 3124, 3168, 3213, 3259, 3305, 3352, 3399, 3447, + 3496, 3545, 3594, 3645, 3696, 3747, 3799, 3852, + 3906, 3960, 4014, 4070, 4126, 4182, 4240, 4298, + 4356, 4416, 4476, 4537, 4598, 4660, 4723, 4786, + 4851, 4916, 4981, 5048, 5115, 5183, 5251, 5320, + 5390, 5461, 5533, 5605, 5678, 5752, 5826, 5901, + 5977, 6054, 6131, 6210, 6289, 6369, 6449, 6530, + 6613, 6695, 6779, 6863, 6949, 7035, 7121, 7209, + 7297, 7386, 7476, 7566, 7658, 7750, 7842, 7936, + 8030, 8126, 8221, 8318, 8415, 8513, 8612, 8712, + 8812, 8913, 9015, 9117, 9221, 9324, 9429, 9534, + 9640, 9747, 9854, 9962, 10071, 10180, 10290, 10401, + 10512, 10624, 10737, 10850, 10963, 11078, 11192, 11308, + 11424, 11540, 11658, 11775, 11893, 12012, 12131, 12251, + 12371, 12491, 12612, 12734, 12856, 12978, 13101, 13224, + 13347, 13471, 13595, 13719, 13844, 13969, 14095, 14220, + 14346, 14472, 14599, 14725, 14852, 14979, 15106, 15233, + 15361, 15488, 15616, 15744, 15872, 16000, 16128, 16256, + 16384, 16511, 16639, 16767, 16895, 17023, 17151, 17279, + 17406, 17534, 17661, 17788, 17915, 18042, 18168, 18295, + 18421, 18547, 18672, 18798, 18923, 19048, 19172, 19296, + 19420, 19543, 19666, 19789, 19911, 20033, 20155, 20276, + 20396, 20516, 20636, 20755, 20874, 20992, 21109, 21227, + 21343, 21459, 21575, 21689, 21804, 21917, 22030, 22143, + 22255, 22366, 22477, 22587, 22696, 22805, 22913, 23020, + 23127, 23233, 23338, 23443, 23546, 23650, 23752, 23854, + 23955, 24055, 24155, 24254, 24352, 24449, 24546, 24641, + 24737, 24831, 24925, 25017, 25109, 25201, 25291, 25381, + 25470, 25558, 25646, 25732, 25818, 25904, 25988, 26072, + 26154, 26237, 26318, 26398, 26478, 26557, 26636, 26713, + 26790, 26866, 26941, 27015, 27089, 27162, 27234, 27306, + 27377, 27447, 27516, 27584, 27652, 27719, 27786, 27851, + 27916, 27981, 28044, 28107, 28169, 28230, 28291, 28351, + 28411, 28469, 28527, 28585, 28641, 28697, 28753, 28807, + 28861, 28915, 28968, 29020, 29071, 29122, 29173, 29222, + 29271, 29320, 29368, 29415, 29462, 29508, 29554, 29599, + 29643, 29687, 29730, 29773, 29815, 29857, 29898, 29939, + 29979, 30019, 30058, 30096, 30134, 30172, 30209, 30246, + 30282, 30317, 30353, 30387, 30422, 30455, 30489, 30522, + 30554, 30586, 30618, 30649, 30680, 30710, 30740, 30769, + 30799, 30827, 30856, 30884, 30911, 30938, 30965, 30992, + 31018, 31043, 31069, 31094, 31118, 31143, 31167, 31190, + 31213, 31236, 31259, 31281, 31303, 31325, 31346, 31368, + 31388, 31409, 31429, 31449, 31469, 31488, 31507, 31526, + 31544, 31563, 31581, 31598, 31616, 31633, 31650, 31667, + 31683, 31700, 31716, 31731, 31747, 31762, 31777, 31792, + 31807, 31821, 31836, 31850, 31864, 31877, 31891, 31904, + 31917, 31930, 31942, 31955, 31967, 31979, 31991, 32003, + 32015, 32026, 32037, 32048, 32059, 32070, 32081, 32091, + 32101, 32111, 32121, 32131, 32141, 32150, 32160, 32169, + 32178, 32187, 32196, 32205, 32213, 32222, 32230, 32238, + 32246, 32254, 32262, 32270, 32277, 32285, 32292, 32300, + 32307, 32314, 32321, 32327, 32334, 32341, 32347, 32354, + 32360, 32366, 32373, 32379, 32385, 32390, 32396, 32402, + 32407, 32413, 32418, 32424, 32429, 32434, 32439, 32444, + 32449, 32454, 32459, 32464, 32468, 32473, 32478, 32482, + 32486, 32491, 32495, 32499, 32503, 32507, 32511, 32515, + 32519, 32523, 32527, 32530, 32534, 32538, 32541, 32545, + 32548, 32552, 32555, 32558, 32561, 32565, 32568, 32571, + 32574, 32577, 32580, 32583, 32585, 32588, 32591, 32594, + 32596, 32599, 32602, 32604, 32607, 32609, 32612, 32614, + 32616, 32619, 32621, 32623, 32626, 32628, 32630, 32632, + 32634, 32636, 32638, 32640, 32642, 32644, 32646, 32648, + 32650, 32652, 32653, 32655, 32657, 32659, 32660, 32662, + 32664, 32665, 32667, 32668, 32670, 32671, 32673, 32674, + 32676, 32677, 32679, 32680, 32681, 32683, 32684, 32685, + 32686, 32688, 32689, 32690, 32691, 32693, 32694, 32695, + 32696, 32697, 32698, 32699, 32700, 32701, 32702, 32703, + 32704, 32705, 32706, 32707, 32708, 32709, 32710, 32711, + 32712, 32713, 32713, 32714, 32715, 32716, 32717, 32718, + 32718, 32719, 32720, 32721, 32721, 32722, 32723, 32723, + 32724, 32725, 32725, 32726, 32727, 32727, 32728, 32729, + 32729, 32730, 32730, 32731, 32731, 32732, 32733, 32733, + 32734, 32734, 32735, 32735, 32736, 32736, 32737, 32737, + 32738, 32738, 32739, 32739, 32739, 32740, 32740, 32741, + 32741, 32742, 32742, 32742, 32743, 32743, 32744, 32744, + 32744, 32745, 32745, 32745, 32746, 32746, 32746, 32747, + 32747, 32747, 32748, 32748, 32748, 32749, 32749, 32749, + 32749, 32750, 32750, 32750, 32750, 32751, 32751, 32751, + 32752, 32752, 32752, 32752, 32752, 32753, 32753, 32753, + 32753, 32754, 32754, 32754, 32754, 32754, 32755, 32755, + 32755, 32755, 32755, 32756, 32756, 32756, 32756, 32756, + 32757, 32757, 32757, 32757, 32757, 32757, 32757, 32758, + 32758, 32758, 32758, 32758, 32758, 32759, 32759, 32759, + 32759, 32759, 32759, 32759, 32759, 32760, 32760, 32760, + 32760, 32760, 32760, 32760, 32760, 32761, 32761, 32761, + 32761, 32761, 32761, 32761, 32761, 32761, 32761, 32762, + 32762, 32762, 32762, 32762, 32762, 32762, 32762, 32762, + 32762, 32762, 32762, 32763, 32763, 32763, 32763, 32763, + 32763, 32763, 32763, 32763, 32763, 32763, 32763, 32763, + 32763, 32764, 32764, 32764, 32764, 32764, 32764, 32764, + 32764, 32764, 32764, 32764, 32764, 32764, 32764, 32764, + 32764, 32764, 32764, 32764, 32765, 32765, 32765, 32765, + 32765, 32765, 32765, 32765, 32765, 32765, 32765, 32765, + 32765, 32765, 32765, 32765, 32765, 32765, 32765, 32765, + 32765, 32765, 32765, 32765, 32765, 32765, 32766, 32766, + 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766, + 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766, + 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766, + 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766, + 32766, 32766, 32766, 32766, 32766, 32766, 32766, 32766, + 32766, 32766, 32767, 32767, 32767, 32767, 32767, 32767 +}; + +// stdt[i]=count of -i or i in botton or top of stretcht[] +static const U8 stdt[712]={ + 64, 128, 128, 128, 128, 128, 127, 128, + 127, 128, 127, 127, 127, 127, 126, 126, + 126, 126, 126, 125, 125, 124, 125, 124, + 123, 123, 123, 123, 122, 122, 121, 121, + 120, 120, 119, 119, 118, 118, 118, 116, + 117, 115, 116, 114, 114, 113, 113, 112, + 112, 111, 110, 110, 109, 108, 108, 107, + 106, 106, 105, 104, 104, 102, 103, 101, + 101, 100, 99, 98, 98, 97, 96, 96, + 94, 94, 94, 92, 92, 91, 90, 89, + 89, 88, 87, 86, 86, 84, 84, 84, + 82, 82, 81, 80, 79, 79, 78, 77, + 76, 76, 75, 74, 73, 73, 72, 71, + 70, 70, 69, 68, 67, 67, 66, 65, + 65, 64, 63, 62, 62, 61, 61, 59, + 59, 59, 57, 58, 56, 56, 55, 54, + 54, 53, 52, 52, 51, 51, 50, 49, + 49, 48, 48, 47, 47, 45, 46, 44, + 45, 43, 43, 43, 42, 41, 41, 40, + 40, 40, 39, 38, 38, 37, 37, 36, + 36, 36, 35, 34, 34, 34, 33, 32, + 33, 32, 31, 31, 30, 31, 29, 30, + 28, 29, 28, 28, 27, 27, 27, 26, + 26, 25, 26, 24, 25, 24, 24, 23, + 23, 23, 23, 22, 22, 21, 22, 21, + 20, 21, 20, 19, 20, 19, 19, 19, + 18, 18, 18, 18, 17, 17, 17, 17, + 16, 16, 16, 16, 15, 15, 15, 15, + 15, 14, 14, 14, 14, 13, 14, 13, + 13, 13, 12, 13, 12, 12, 12, 11, + 12, 11, 11, 11, 11, 11, 10, 11, + 10, 10, 10, 10, 9, 10, 9, 9, + 9, 9, 9, 8, 9, 8, 9, 8, + 8, 8, 7, 8, 8, 7, 7, 8, + 7, 7, 7, 6, 7, 7, 6, 6, + 7, 6, 6, 6, 6, 6, 6, 5, + 6, 5, 6, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 4, 5, 4, 5, + 4, 4, 5, 4, 4, 4, 4, 4, + 4, 3, 4, 4, 3, 4, 4, 3, + 3, 4, 3, 3, 3, 4, 3, 3, + 3, 3, 3, 3, 2, 3, 3, 3, + 2, 3, 2, 3, 3, 2, 2, 3, + 2, 2, 3, 2, 2, 2, 2, 3, + 2, 2, 2, 2, 2, 2, 1, 2, + 2, 2, 2, 1, 2, 2, 2, 1, + 2, 1, 2, 2, 1, 2, 1, 2, + 1, 1, 2, 1, 1, 2, 1, 1, + 2, 1, 1, 1, 1, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 1, 0, + 1, 1, 1, 0, 1, 1, 1, 0, + 1, 1, 0, 1, 1, 0, 1, 0, + 1, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 0, + 1, 0, 1, 0, 0, 1, 0, 1, + 0, 0, 1, 0, 0, 1, 0, 0, + 1, 0, 0, 1, 0, 0, 0, 1, + 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 1, 0, 0, 0, 1, 0, + 0, 0, 0, 1, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0 +}; + Predictor::Predictor(ZPAQL& zr): c8(1), hmap4(1), z(zr) { assert(sizeof(U8)==1); @@ -694,29 +1721,9 @@ Predictor::Predictor(ZPAQL& zr): assert(sizeof(U64)==8); assert(sizeof(short)==2); assert(sizeof(int)==4); - - // Initialize tables - dt2k[0]=0; - for (int i=1; i<256; ++i) - dt2k[i]=2048/i; - for (int i=0; i<1024; ++i) - dt[i]=(1<<17)/(i*2+3)*2; - for (int i=0; i<32768; ++i) - stretcht[i]=int(log((i+0.5)/(32767.5-i))*64+0.5+100000)-100000; - for (int i=0; i<4096; ++i) - squasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64)))); - - // Verify floating point math for squash() and stretch() - U32 sqsum=0, stsum=0; - for (int i=32767; i>=0; --i) - stsum=stsum*3+stretch(i); - for (int i=4095; i>=0; --i) - sqsum=sqsum*3+squash(i-2048); - assert(stsum==3887533746u); - assert(sqsum==2278286169u); - pcode=0; pcode_size=0; + initTables=false; } Predictor::~Predictor() { @@ -732,6 +1739,39 @@ void Predictor::init() { // Initialize context hash function z.inith(); + // Initialize model independent tables + if (!initTables && isModeled()) { + initTables=true; + memcpy(dt2k, sdt2k, sizeof(dt2k)); + memcpy(dt, sdt, sizeof(dt)); + + // ssquasht[i]=int(32768.0/(1+exp((i-2048)*(-1.0/64)))); + // Copy middle 1344 of 4096 entries. + memset(squasht, 0, 1376*2); + memcpy(squasht+1376, ssquasht, 1344*2); + for (int i=2720; i<4096; ++i) squasht[i]=32767; + + // sstretcht[i]=int(log((i+0.5)/(32767.5-i))*64+0.5+100000)-100000; + int k=16384; + for (int i=0; i<712; ++i) + for (int j=stdt[i]; j>0; --j) + stretcht[k++]=i; + assert(k==32768); + for (int i=0; i<16384; ++i) + stretcht[i]=-stretcht[32767-i]; + +#ifndef NDEBUG + // Verify floating point math for squash() and stretch() + U32 sqsum=0, stsum=0; + for (int i=32767; i>=0; --i) + stsum=stsum*3+stretch(i); + for (int i=4095; i>=0; --i) + sqsum=sqsum*3+squash(i-2048); + assert(stsum==3887533746u); + assert(sqsum==2278286169u); +#endif + } + // Initialize predictions for (int i=0; i<256; ++i) h[i]=p[i]=0; @@ -801,7 +1841,7 @@ void Predictor::init() { cr.cm.resize(512); for (int j=0; j<256; ++j) { cr.cm[j*2]=1<<15; - cr.cm[j*2+1]=clamp512k(stretch(st.cminit(j)>>8)<<10); + cr.cm[j*2+1]=clamp512k(stretch(st.cminit(j)>>8)*1024); } break; case SSE: // sizebits j start limit @@ -823,6 +1863,7 @@ void Predictor::init() { // Return next bit prediction using interpreted COMP code int Predictor::predict0() { + assert(initTables); assert(c8>=1 && c8<=255); // Predict next bit @@ -922,6 +1963,7 @@ int Predictor::predict0() { // Update model with decoded bit y (0...1) void Predictor::update0(int y) { + assert(initTables); assert(y==0 || y==1); assert(c8>=1 && c8<=255); assert(hmap4>=1 && hmap4<=511); @@ -1039,6 +2081,7 @@ void Predictor::update0(int y) { // collision detection. If not found after 3 adjacent tries, replace the // row with lowest element 1 as priority. Return index of row. size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { + assert(initTables); assert(ht.size()==size_t(16)<>sizebits&255; size_t h0=(cxt*16)&(ht.size()-16); @@ -1058,7 +2101,8 @@ size_t Predictor::find(Array& ht, int sizebits, U32 cxt) { /////////////////////// Decoder /////////////////////// Decoder::Decoder(ZPAQL& z): - in(0), low(1), high(0xFFFFFFFF), curr(0), pr(z), buf(BUFSIZE) { + in(0), low(1), high(0xFFFFFFFF), curr(0), rpos(0), wpos(0), + pr(z), buf(BUFSIZE) { } void Decoder::init() { @@ -1067,35 +2111,18 @@ void Decoder::init() { else low=high=curr=0; } -// Read un-modeled input into buf[low=0..high-1] -// with curr remaining in subblock to read. -void Decoder::loadbuf() { - assert(!pr.isModeled()); - assert(low==high); - if (curr==0) { - for (int i=0; i<4; ++i) { - int c=in->get(); - if (c<0) error("unexpected end of input"); - curr=curr<<8|c; - } - } - U32 n=buf.size(); - if (n>curr) n=curr; - high=in->read(&buf[0], n); - curr-=high; - low=0; -} - // Return next bit of decoded input, which has 16 bit probability p of being 1 int Decoder::decode(int p) { + assert(pr.isModeled()); assert(p>=0 && p<65536); assert(high>low && low>0); if (currhigh) error("archive corrupted"); assert(curr>=low && curr<=high); U32 mid=low+U32(((high-low)*U64(U32(p)))>>16); // split range assert(high>mid && mid>=low); - int y=curr<=mid; - if (y) high=mid; else low=mid+1; // pick half + int y; + if (curr<=mid) y=1, high=mid; // pick half + else y=0, low=mid+1; while ((high^low)<0x1000000) { // shift out identical leading bytes high=high<<8|255; low=low<<8; @@ -1129,9 +2156,12 @@ int Decoder::decompress() { } } else { - if (low==high) loadbuf(); - if (low==high) return -1; - return buf[low++]&255; + if (curr==0) { + for (int i=0; i<4; ++i) curr=curr<<8|get(); + if (curr==0) return -1; + } + --curr; + return in->get(); } } @@ -1150,13 +2180,11 @@ int Decoder::skip() { if (curr==0) // at start? for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; while (curr>0) { - U32 n=BUFSIZE; - if (n>curr) n=curr; - U32 n1=in->read(&buf[0], n); - curr-=n1; - if (n1!=n) return -1; - if (curr==0) - for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; + while (curr>0) { + --curr; + if (in->get()<0) return error("skipped to EOF"), -1; + } + for (int i=0; i<4 && (c=in->get())>=0; ++i) curr=curr<<8|c; } if (c>=0) c=in->get(); return c; @@ -1195,6 +2223,7 @@ int PostProcessor::write(int c) { case 3: // PROG psize[0] if (c<0) error("Unexpected EOS"); hsize+=c*256; // high byte of psize + if (hsize<1) error("Empty PCOMP"); z.header.resize(hsize+300); z.cend=8; z.hbegin=z.hend=z.cend+128; @@ -1296,6 +2325,7 @@ void Decompresser::readComment(Writer* comment) { // Decompress n bytes, or all if n < 0. Return false if done bool Decompresser::decompress(int n) { assert(state==DATA); + if (decode_state==SKIP) error("decompression after skipped segment"); assert(decode_state!=SKIP); // Initialize models to start decompressing block @@ -1354,7 +2384,7 @@ void Decompresser::readSegmentEnd(char* sha1string) { error("missing end of segment marker"); } -/////////////////////////// decompress() ///////////////////// +/////////////////////////// decompress() ////////////////////// void decompress(Reader* in, Writer* out) { Decompresser d; @@ -1369,7 +2399,7 @@ void decompress(Reader* in, Writer* out) { } } -////////////////////// Encoder //////////////////// +/////////////////////////// Encoder /////////////////////////// // Initialize for start of block void Encoder::init() { @@ -1415,7 +2445,7 @@ void Encoder::compress(int c) { } } else { - if (c<0 || low==buf.size()) { + if (low && (c<0 || low==buf.size())) { out->put((low>>24)&255); out->put((low>>16)&255); out->put((low>>8)&255); @@ -1427,6 +2457,329 @@ void Encoder::compress(int c) { } } +//////////////////////////// Compiler ///////////////////////// + +// Component names +const char* compname[256]= + {"","const","cm","icm","match","avg","mix2","mix","isse","sse",0}; + +// Opcodes +const char* opcodelist[272]={ +"error","a++", "a--", "a!", "a=0", "", "", "a=r", +"b<>a", "b++", "b--", "b!", "b=0", "", "", "b=r", +"c<>a", "c++", "c--", "c!", "c=0", "", "", "c=r", +"d<>a", "d++", "d--", "d!", "d=0", "", "", "d=r", +"*b<>a","*b++", "*b--", "*b!", "*b=0", "", "", "jt", +"*c<>a","*c++", "*c--", "*c!", "*c=0", "", "", "jf", +"*d<>a","*d++", "*d--", "*d!", "*d=0", "", "", "r=a", +"halt", "out", "", "hash", "hashd","", "", "jmp", +"a=a", "a=b", "a=c", "a=d", "a=*b", "a=*c", "a=*d", "a=", +"b=a", "b=b", "b=c", "b=d", "b=*b", "b=*c", "b=*d", "b=", +"c=a", "c=b", "c=c", "c=d", "c=*b", "c=*c", "c=*d", "c=", +"d=a", "d=b", "d=c", "d=d", "d=*b", "d=*c", "d=*d", "d=", +"*b=a", "*b=b", "*b=c", "*b=d", "*b=*b","*b=*c","*b=*d","*b=", +"*c=a", "*c=b", "*c=c", "*c=d", "*c=*b","*c=*c","*c=*d","*c=", +"*d=a", "*d=b", "*d=c", "*d=d", "*d=*b","*d=*c","*d=*d","*d=", +"", "", "", "", "", "", "", "", +"a+=a", "a+=b", "a+=c", "a+=d", "a+=*b","a+=*c","a+=*d","a+=", +"a-=a", "a-=b", "a-=c", "a-=d", "a-=*b","a-=*c","a-=*d","a-=", +"a*=a", "a*=b", "a*=c", "a*=d", "a*=*b","a*=*c","a*=*d","a*=", +"a/=a", "a/=b", "a/=c", "a/=d", "a/=*b","a/=*c","a/=*d","a/=", +"a%=a", "a%=b", "a%=c", "a%=d", "a%=*b","a%=*c","a%=*d","a%=", +"a&=a", "a&=b", "a&=c", "a&=d", "a&=*b","a&=*c","a&=*d","a&=", +"a&~a", "a&~b", "a&~c", "a&~d", "a&~*b","a&~*c","a&~*d","a&~", +"a|=a", "a|=b", "a|=c", "a|=d", "a|=*b","a|=*c","a|=*d","a|=", +"a^=a", "a^=b", "a^=c", "a^=d", "a^=*b","a^=*c","a^=*d","a^=", +"a<<=a","a<<=b","a<<=c","a<<=d","a<<=*b","a<<=*c","a<<=*d","a<<=", +"a>>=a","a>>=b","a>>=c","a>>=d","a>>=*b","a>>=*c","a>>=*d","a>>=", +"a==a", "a==b", "a==c", "a==d", "a==*b","a==*c","a==*d","a==", +"aa", "a>b", "a>c", "a>d", "a>*b", "a>*c", "a>*d", "a>", +"", "", "", "", "", "", "", "", +"", "", "", "", "", "", "", "lj", +"post", "pcomp","end", "if", "ifnot","else", "endif","do", +"while","until","forever","ifl","ifnotl","elsel",";", 0}; + +// Advance in to start of next token. Tokens are delimited by white +// space. Comments inclosed in ((nested) parenthsis) are skipped. +void Compiler::next() { + assert(in); + for (; *in; ++in) { + if (*in=='\n') ++line; + if (*in=='(') state+=1+(state<0); + else if (state>0 && *in==')') --state; + else if (state<0 && *in<=' ') state=0; + else if (state==0 && *in>' ') {state=-1; break;} + } + if (!*in) error("unexpected end of config"); +} + +// convert to lower case +int tolower(int c) {return (c>='A' && c<='Z') ? c+'a'-'A' : c;} + +// return true if in==word up to white space or '(', case insensitive +bool Compiler::matchToken(const char* word) { + const char* a=in; + for (; (*a>' ' && *a!='(' && *word); ++a, ++word) + if (tolower(*a)!=tolower(*word)) return false; + return !*word && (*a<=' ' || *a=='('); +} + +// Print error message and exit +void Compiler::syntaxError(const char* msg, const char* expected) { + Array sbuf(128); // error message to report + char* s=&sbuf[0]; + strcat(s, "Config line "); + for (int i=strlen(s), r=1000000; r; r/=10) // append line number + if (line/r) s[i++]='0'+line/r%10; + strcat(s, " at "); + for (int i=strlen(s); i<40 && *in>' '; ++i) // append token found + s[i]=*in++; + strcat(s, ": "); + strncat(s, msg, 40); // append message + if (expected) { + strcat(s, ", expected: "); + strncat(s, expected, 20); // append expected token if any + } + error(s); +} + +// Read a token, which must be in the NULL terminated list or else +// exit with an error. If found, return its index. +int Compiler::rtoken(const char* list[]) { + assert(in); + assert(list); + next(); + for (int i=0; list[i]; ++i) + if (matchToken(list[i])) + return i; + syntaxError("unexpected"); + assert(0); + return -1; // not reached +} + +// Read a token which must be the specified value s +void Compiler::rtoken(const char* s) { + assert(s); + next(); + if (!matchToken(s)) syntaxError("expected", s); +} + +// Read a number in (low...high) or exit with an error +// For numbers like $N+M, return arg[N-1]+M +int Compiler::rtoken(int low, int high) { + next(); + int r=0; + if (in[0]=='$' && in[1]>='1' && in[1]<='9') { + if (in[2]=='+') r=atoi(in+3); + if (args) r+=args[in[1]-'1']; + } + else if (in[0]=='-' || (in[0]>='0' && in[0]<='9')) r=atoi(in); + else syntaxError("expected a number"); + if (rhigh) syntaxError("number too high"); + return r; +} + +// Compile HCOMP or PCOMP code. Exit on error. Return +// code for end token (POST, PCOMP, END) +int Compiler::compile_comp(ZPAQL& z) { + int op=0; + const int comp_begin=z.hend; + while (true) { + op=rtoken(opcodelist); + if (op==POST || op==PCOMP || op==END) break; + int operand=-1; // 0...255 if 2 bytes + int operand2=-1; // 0...255 if 3 bytes + if (op==IF) { + op=JF; + operand=0; // set later + if_stack.push(z.hend+1); // save jump target location + } + else if (op==IFNOT) { + op=JT; + operand=0; + if_stack.push(z.hend+1); // save jump target location + } + else if (op==IFL || op==IFNOTL) { // long if + if (op==IFL) z.header[z.hend++]=(JT); + if (op==IFNOTL) z.header[z.hend++]=(JF); + z.header[z.hend++]=(3); + op=LJ; + operand=operand2=0; + if_stack.push(z.hend+1); + } + else if (op==ELSE || op==ELSEL) { + if (op==ELSE) op=JMP, operand=0; + if (op==ELSEL) op=LJ, operand=operand2=0; + int a=if_stack.pop(); // conditional jump target location + assert(a>comp_begin && a=0); + if (j>127) syntaxError("IF too big, try IFL, IFNOTL"); + z.header[a]=j; + } + else { // IFL, IFNOTL + int j=z.hend-comp_begin+2+(op==LJ); + assert(j>=0); + z.header[a]=j&255; + z.header[a+1]=(j>>8)&255; + } + if_stack.push(z.hend+1); // save JMP target location + } + else if (op==ENDIF) { + int a=if_stack.pop(); // jump target address + assert(a>comp_begin && a=0); + if (z.header[a-1]!=LJ) { + assert(z.header[a-1]==JT || z.header[a-1]==JF || z.header[a-1]==JMP); + if (j>127) syntaxError("IF too big, try IFL, IFNOTL, ELSEL\n"); + z.header[a]=j; + } + else { + assert(a+1>8)&255; + } + } + else if (op==DO) { + do_stack.push(z.hend); + } + else if (op==WHILE || op==UNTIL || op==FOREVER) { + int a=do_stack.pop(); + assert(a>=comp_begin && a=-127) { // backward short jump + if (op==WHILE) op=JT; + if (op==UNTIL) op=JF; + if (op==FOREVER) op=JMP; + operand=j&255; + } + else { // backward long jump + j=a-comp_begin; + assert(j>=0 && j>8; + } + } + else if ((op&7)==7) { // 2 byte operand, read N + if (op==LJ) { + operand=rtoken(0, 65535); + operand2=operand>>8; + operand&=255; + } + else if (op==JT || op==JF || op==JMP) { + operand=rtoken(-128, 127); + operand&=255; + } + else + operand=rtoken(0, 255); + } + if (op>=0 && op<=255) + z.header[z.hend++]=(op); + if (operand>=0) + z.header[z.hend++]=(operand); + if (operand2>=0) + z.header[z.hend++]=(operand2); + if (z.hend>=z.header.isize()-130 || z.hend-z.hbegin+z.cend-2>65535) + syntaxError("program too big"); + } + z.header[z.hend++]=(0); // END + return op; +} + +// Compile a configuration file. Store COMP/HCOMP section in hcomp. +// If there is a PCOMP section, store it in pcomp and store the PCOMP +// command in pcomp_cmd. Replace "$1..$9+n" with args[0..8]+n + +Compiler::Compiler(const char* in_, int* args_, ZPAQL& hz_, ZPAQL& pz_, + Writer* out2_): in(in_), args(args_), hz(hz_), pz(pz_), + out2(out2_), if_stack(1000), do_stack(1000) { + line=1; + state=0; + hz.clear(); + pz.clear(); + hz.header.resize(68000); + + // Compile the COMP section of header + rtoken("comp"); + hz.header[2]=rtoken(0, 255); // hh + hz.header[3]=rtoken(0, 255); // hm + hz.header[4]=rtoken(0, 255); // ph + hz.header[5]=rtoken(0, 255); // pm + const int n=hz.header[6]=rtoken(0, 255); // n + hz.cend=7; + for (int i=0; i10) syntaxError("invalid component"); + for (int j=1; j>8; + + // Compile POST 0 END + if (op==POST) { + rtoken(0, 0); + rtoken("end"); + } + + // Compile PCOMP pcomp_cmd ; program... END + else if (op==PCOMP) { + pz.header.resize(68000); + pz.header[4]=hz.header[4]; // ph + pz.header[5]=hz.header[5]; // pm + pz.cend=8; + pz.hbegin=pz.hend=pz.cend+128; + + // get pcomp_cmd ending with ";" (case sensitive) + next(); + while (*in && *in!=';') { + if (out2) + out2->put(*in); + ++in; + } + if (*in) ++in; + + // Compile PCOMP + op=compile_comp(pz); + int len=pz.cend-2+pz.hend-pz.hbegin; // insert header size + assert(len>=0); + pz.header[0]=len&255; + pz.header[1]=len>>8; + if (op!=END) + syntaxError("expected END"); + } + else if (op!=END) + syntaxError("expected END or POST 0 END or PCOMP cmd ; ... END"); +} + ///////////////////// Compressor ////////////////////// // Write 13 byte start tag @@ -1496,20 +2849,32 @@ public: int get() {return *p++&255;} }; -// Write a block header void Compressor::startBlock(const char* hcomp) { assert(state==INIT); - assert(hcomp); - int len=toU16(hcomp)+2; + MemoryReader m(hcomp); + z.read(&m); + pz.sha1=&sha1; + assert(z.header.isize()>6); enc.out->put('z'); enc.out->put('P'); enc.out->put('Q'); - enc.out->put(1+(len>6 && hcomp[6]==0)); // level 1 or 2 + enc.out->put(1+(z.header[6]==0)); // level 1 or 2 enc.out->put(1); - for (int i=0; iput(hcomp[i]); - MemoryReader m(hcomp); - z.read(&m); + z.write(enc.out, false); + state=BLOCK1; +} + +void Compressor::startBlock(const char* config, int* args, Writer* pcomp_cmd) { + assert(state==INIT); + Compiler(config, args, z, pz, pcomp_cmd); + pz.sha1=&sha1; + assert(z.header.isize()>6); + enc.out->put('z'); + enc.out->put('P'); + enc.out->put('Q'); + enc.out->put(1+(z.header[6]==0)); // level 1 or 2 + enc.out->put(1); + z.write(enc.out, false); state=BLOCK1; } @@ -1530,40 +2895,82 @@ void Compressor::startSegment(const char* filename, const char* comment) { // Initialize encoding and write pcomp to first segment // If len is 0 then length is encoded in pcomp[0..1] +// if pcomp is 0 then get pcomp from pz.header void Compressor::postProcess(const char* pcomp, int len) { + if (state==SEG2) return; assert(state==SEG1); enc.init(); - if (pcomp) { - enc.compress(1); - if (len<=0) { - len=toU16(pcomp); - pcomp+=2; + if (!pcomp) { + len=pz.hend-pz.hbegin; + if (len>0) { + assert(pz.header.isize()>pz.hend); + assert(pz.hbegin>=0); + pcomp=(const char*)&pz.header[pz.hbegin]; } + assert(len>=0); + } + else if (len==0) { + len=toU16(pcomp); + pcomp+=2; + } + if (len>0) { + enc.compress(1); enc.compress(len&255); enc.compress((len>>8)&255); for (int i=0; iget())>=0) { - enc.compress(ch); - if (n>0) --n; + + const int BUFSIZE=1<<14; + char buf[BUFSIZE]; // input buffer + while (n) { + int nbuf=BUFSIZE; // bytes read into buf + if (n>=0 && nread(buf, nbuf); + if (nr<0 || nr>BUFSIZE || nr>nbuf) error("invalid read size"); + if (nr<=0) return false; + if (n>=0) n-=nr; + for (int i=0; i=0; + return true; } // End segment, write sha1string if present void Compressor::endSegment(const char* sha1string) { + if (state==SEG1) + postProcess(); assert(state==SEG2); enc.compress(-1); + if (verify && pz.hend) { + pz.run(-1); + pz.flush(); + } enc.out->put(0); enc.out->put(0); enc.out->put(0); @@ -1578,6 +2985,35 @@ void Compressor::endSegment(const char* sha1string) { state=BLOCK2; } +// End segment, write checksum and size is verify is true +char* Compressor::endSegmentChecksum(int64_t* size, bool dosha1) { + if (state==SEG1) + postProcess(); + assert(state==SEG2); + enc.compress(-1); + if (verify && pz.hend) { + pz.run(-1); + pz.flush(); + } + enc.out->put(0); + enc.out->put(0); + enc.out->put(0); + enc.out->put(0); + if (verify) { + if (size) *size=sha1.usize(); + memcpy(sha1result, sha1.result(), 20); + } + if (verify && dosha1) { + enc.out->put(253); + for (int i=0; i<20; ++i) + enc.out->put(sha1result[i]); + } + else + enc.out->put(254); + state=BLOCK2; + return verify ? sha1result : 0; +} + // End block void Compressor::endBlock() { assert(state==BLOCK2); @@ -1587,17 +3023,29 @@ void Compressor::endBlock() { /////////////////////////// compress() /////////////////////// -void compress(Reader* in, Writer* out, int level) { - assert(level>=1); - Compressor c; - c.setInput(in); - c.setOutput(out); - c.startBlock(level); - c.startSegment(); - c.postProcess(); - c.compress(); - c.endSegment(); - c.endBlock(); +void compress(Reader* in, Writer* out, const char* method, + const char* filename, const char* comment, bool dosha1) { + + // Get block size + int bs=4; + if (method && method[0] && method[1]>='0' && method[1]<='9') { + bs=method[1]-'0'; + if (method[2]>='0' && method[2]<='9') bs=bs*10+method[2]-'0'; + if (bs>11) bs=11; + } + bs=(0x100000<read((char*)sb.data(), bs))>0) { + sb.resize(n); + compressBlock(&sb, out, method, filename, comment, dosha1); + filename=0; + comment=0; + sb.resize(0); + } } //////////////////////// ZPAQL::assemble() //////////////////// @@ -1611,9 +3059,9 @@ code in rcode[0..rcode_size-1]. Execution begins at rcode[0]. It will not write beyond the end of rcode, but in any case it returns the number of bytes that would have been written. It returns 0 in case of error. -The assembled code implements run() and returns 1 if successful or -0 if the ZPAQL code executes an invalid instruction or jumps out of -bounds. +The assembled code implements int run() and returns 0 if successful, +1 if the ZPAQL code executes an invalid instruction or jumps out of +bounds, or 2 if OUT throws bad_alloc, or 3 for other OUT exceptions. A ZPAQL virtual machine has the following state. All values are unsigned and initially 0: @@ -1640,7 +3088,7 @@ the only 3 byte instruction. They are organized: 00dddxxx = unary opcode xxx on destination ddd (ddd < 111) 00111xxx = special instruction xxx 01dddsss = assignment: ddd = sss (ddd < 111) - 1xxxxsss = operation sxxx from sss to a + 1xxxxsss = operation xxxx from sss to a The meaning of sss and ddd are as follows: @@ -1727,8 +3175,17 @@ In 64 bit mode, the following additional registers are used: */ // Called by out -static void flush1(ZPAQL* z) { - z->flush(); +static int flush1(ZPAQL* z) { + try { + z->flush(); + return 0; + } + catch(std::bad_alloc& x) { + return 2; + } + catch(...) { + return 3; + } } // return true if op is an undefined ZPAQL instruction @@ -1737,6 +3194,19 @@ static bool iserr(int op) { || op==58 || (op<64 && (op%8==5 || op%8==6)); } +// Return length of ZPAQL instruction at hcomp[0]. Assume 0 padding at end. +// A run of identical ++ or -- is counted as 1 instruction. +static int oplen(const U8* hcomp) { + if (*hcomp==255) return 3; + if (*hcomp%8==7) return 2; + if (*hcomp<51 && (*hcomp%8-1)/2==0) { // ++ or -- opcode + int i; + for (i=1; i<127 && hcomp[i]==hcomp[0]; ++i); + return i; + } + return 1; +} + // Write k bytes of x to rcode[o++] MSB first static void put(U8* rcode, int n, int& o, U32 x, int k) { while (k-->0) { @@ -1785,10 +3255,10 @@ int ZPAQL::assemble() { error("JIT supported only for x86-32 and x86-64"); const U8* hcomp=&header[hbegin]; - const int hlen=hend-hbegin+1; + const int hlen=hend-hbegin+2; const int msize=m.size(); const int hsize=h.size(); - const int regcode[8]={2,6,7,5}; // a,b,c,d.. -> edx,esi,edi,ebp,eax.. + static const int regcode[8]={2,6,7,5}; // a,b,c,d.. -> edx,esi,edi,ebp,eax.. Array it(hlen); // hcomp -> rcode locations int done=0; // number of instructions assembled (0..hlen) int o=5; // rcode output index, reserve space for jmp @@ -1806,7 +3276,7 @@ int ZPAQL::assemble() { put2(0x8929); // mov [rcx], ebp put2l(0x48b9, &f); // mov rcx, f put2(0x8919); // mov [rcx], ebx - put4(0x4883c438); // add rsp, 56 + put4(0x4883c408); // add rsp, 8 put2(0x415f); // pop r15 put2(0x415e); // pop r14 put2(0x415d); // pop r13 @@ -1818,12 +3288,12 @@ int ZPAQL::assemble() { put2a(0x893d, &c); // mov [c], edi put2a(0x892d, &d); // mov [d], ebp put2a(0x891d, &f); // mov [f], ebx - put3(0x83c43c); // add esp, 60 + put3(0x83c40c); // add esp, 12 } - put1(0x5d); // pop ebp put1(0x5b); // pop ebx put1(0x5f); // pop edi put1(0x5e); // pop esi + put1(0x5d); // pop ebp put1(0xc3); // ret // Code for the out instruction. @@ -1832,50 +3302,54 @@ int ZPAQL::assemble() { if (S==8) { put2l(0x48b8, &outbuf[0]);// mov rax, outbuf.p put2l(0x49ba, &bufptr); // mov r10, &bufptr - put3(0x418b0a); // mov ecx, [r10] - put3(0x891408); // mov [rax+rcx], edx - put2(0xffc1); // inc ecx + put3(0x418b0a); // mov rcx, [r10] + put3(0x881408); // mov [rax+rcx], dl + put2(0xffc1); // inc rcx put3(0x41890a); // mov [r10], ecx - put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() - put2(0x7401); // jz L1 + put2a(0x81f9, outbuf.size()); // cmp rcx, outbuf.size() + put2(0x7403); // jz L1 + put2(0x31c0); // xor eax, eax put1(0xc3); // ret - put4(0x4883ec30); // L1: sub esp, 48 ; call flush1(this) - put4(0x48893c24); // mov [rsp], rdi - put5(0x48897424,8); // mov [rsp+8], rsi - put5(0x48895424,16); // mov [rsp+16], rdx - put5(0x48894c24,24); // mov [rsp+24], rcx -#ifndef _WIN32 + + put1(0x55); // L1: push rbp ; call flush1(this) + put1(0x57); // push rdi + put1(0x56); // push rsi + put1(0x52); // push rdx + put1(0x51); // push rcx + put3(0x4889e5); // mov rbp, rsp + put4(0x4883c570); // add rbp, 112 +#if defined(unix) && !defined(__CYGWIN__) put2l(0x48bf, this); // mov rdi, this #else // Windows put2l(0x48b9, this); // mov rcx, this #endif put2l(0x49bb, &flush1); // mov r11, &flush1 put3(0x41ffd3); // call r11 - put5(0x488b4c24,24); // mov rcx, [rsp+24] - put5(0x488b5424,16); // mov rdx, [rsp+16] - put5(0x488b7424,8); // mov rsi, [rsp+8] - put4(0x488b3c24); // mov rdi, [rsp] - put4(0x4883c430); // add esp, 48 - put1(0xc3); // ret + put1(0x59); // pop rcx + put1(0x5a); // pop rdx + put1(0x5e); // pop rsi + put1(0x5f); // pop rdi + put1(0x5d); // pop rbp } else { put1a(0xb8, &outbuf[0]); // mov eax, outbuf.p put2a(0x8b0d, &bufptr); // mov ecx, [bufptr] - put3(0x891408); // mov [eax+ecx], edx + put3(0x881408); // mov [eax+ecx], dl put2(0xffc1); // inc ecx put2a(0x890d, &bufptr); // mov [bufptr], ecx put2a(0x81f9, outbuf.size()); // cmp ecx, outbuf.size() - put2(0x7401); // jz L1 + put2(0x7403); // jz L1 + put2(0x31c0); // xor eax, eax put1(0xc3); // ret - put3(0x83ec08); // L1: sub esp, 8 + put3(0x83ec0c); // L1: sub esp, 12 put4(0x89542404); // mov [esp+4], edx put3a(0xc70424, this); // mov [esp], this put1a(0xb8, &flush1); // mov eax, &flush1 put2(0xffd0); // call eax put4(0x8b542404); // mov edx, [esp+4] - put3(0x83c408); // add esp, 8 - put1(0xc3); // ret + put3(0x83c40c); // add esp, 12 } + put1(0xc3); // ret // Set it[i]=1 for each ZPAQL instruction reachable from the previous // instruction + 2 if reachable by a jump (or 3 if both). @@ -1887,21 +3361,21 @@ int ZPAQL::assemble() { for (int i=0; i>24);// jt,jf,jmp if (op==63) next1=NONE; // jmp if ((next2<0 || next2>=hlen) && next2!=NONE) next2=hlen-1; // error - if (next1!=NONE && !(it[next1]&1)) it[next1]|=1, ++done; - if (next2!=NONE && !(it[next2]&2)) it[next2]|=2, ++done; + if (next1>=0 && next1=0 && next20); // Set it[i] bits 2-3 to 4, 8, or 12 if a comparison - // (<, >, == respectively) does not need to save the result in f, + // (==, <, > respectively) does not need to save the result in f, // or if a conditional jump (jt, jf) does not need to read f. // This is true if a comparison is followed directly by a jt/jf, // the jt/jf is not a jump target, the byte before is not a jump @@ -1939,16 +3413,16 @@ int ZPAQL::assemble() { // Start of run(): Save x86 and load ZPAQL registers const int start=o; assert(start>=16); + put1(0x55); // push ebp/rbp put1(0x56); // push esi/rsi put1(0x57); // push edi/rdi put1(0x53); // push ebx/rbx - put1(0x55); // push ebp/rbp if (S==8) { put2(0x4154); // push r12 put2(0x4155); // push r13 put2(0x4156); // push r14 put2(0x4157); // push r15 - put4(0x4883ec38); // sub rsp, 56 + put4(0x4883ec08); // sub rsp, 8 put2l(0x48b8, &a); // mov rax, a put2(0x8b10); // mov edx, [rax] put2l(0x48b8, &b); // mov rax, b @@ -1965,7 +3439,7 @@ int ZPAQL::assemble() { put2l(0x49bf, &m[0]); // mov r15, m } else { - put3(0x83ec3c); // sub esp, 60 + put3(0x83ec0c); // sub esp, 12 put2a(0x8b15, &a); // mov edx, [a] put2a(0x8b35, &b); // mov esi, [b] put2a(0x8b3d, &c); // mov edi, [c] @@ -1975,8 +3449,10 @@ int ZPAQL::assemble() { // Assemble in multiple passes until every byte of hcomp has a translation for (int istart=0; istart=0 && i0 && it[i]<16); @@ -2005,9 +3481,9 @@ int ZPAQL::assemble() { const int ddd=op/8%8; const int sss=op%8; - // error instruction: return 0 + // error instruction: return 1 if (iserr(op)) { - put2(0x31c0); // xor eax, eax + put1a(0xb8, 1); // mov eax, 1 put1a(0xe9, halt-o-4); // jmp near halt continue; } @@ -2060,10 +3536,10 @@ int ZPAQL::assemble() { put2(0x87d0+regcode[ddd]); // xchg edx, ddd break; case 1: // ddd++ - put2(0xffc0+regcode[ddd]); // inc ddd + put3(0x83c000+256*regcode[ddd]+inc); // add ddd, inc break; case 2: // ddd-- - put2(0xffc8+regcode[ddd]); // dec ddd + put3(0x83e800+256*regcode[ddd]+inc); // sub ddd, inc break; case 3: // ddd! put2(0xf7d0+regcode[ddd]); // not ddd @@ -2086,10 +3562,10 @@ int ZPAQL::assemble() { put2(0x8611); // xchg dl, [ecx] break; case 1: // ddd++ - put2(0xfe01); // inc byte [ecx] + put3(0x800100+inc); // add byte [ecx], inc break; case 2: // ddd-- - put2(0xfe09); // dec byte [ecx] + put3(0x802900+inc); // sub byte [ecx], inc break; case 3: // ddd! put2(0xf611); // not byte [ecx] @@ -2101,7 +3577,7 @@ int ZPAQL::assemble() { case 7: // jt, jf { assert(code>=0 && code<16); - const int jtab[2][4]={{5,4,2,7},{4,5,3,6}}; + static const unsigned char jtab[2][4]={{5,4,2,7},{4,5,3,6}}; // jnz,je,jb,ja, jz,jne,jae,jbe if (code<4) put2(0x84db); // test bl, bl if (arg>=128 && arg-257-i>=0 && o-it[arg-257-i]<120) @@ -2118,10 +3594,10 @@ int ZPAQL::assemble() { put2(0x8711); // xchg edx, [ecx] break; case 1: // ddd++ - put2(0xff01); // inc dword [ecx] + put3(0x830100+inc); // add dword [ecx], inc break; case 2: // ddd-- - put2(0xff09); // dec dword [ecx] + put3(0x832900+inc); // sub dword [ecx], inc break; case 3: // ddd! put2(0xf711); // not dword [ecx] @@ -2141,11 +3617,14 @@ int ZPAQL::assemble() { case 7: // special switch(op) { case 56: // halt - put1a(0xb8, 1); // mov eax, 1 + put2(0x31c0); // xor eax, eax ; return 0 put1a(0xe9, halt-o-4); // jmp near halt break; case 57: // out put1a(0xe8, outlabel-o-4);// call outlabel + put3(0x83f800); // cmp eax, 0 ; returned error code + put2(0x7405); // je L1: + put1a(0xe9, halt-o-4); // jmp near halt ; L1: break; case 59: // hash: a = (a + *b + 512) * 773 put3a(0x8d8410, 512); // lea edx, [eax+edx+512] @@ -2197,7 +3676,7 @@ int ZPAQL::assemble() { else put3a(0x031485, &h[0]); // add edx, [h+eax*4] } else if (sss<7) put2(0x01c2+8*regcode[sss]);// add edx, sss - else if (arg>128) put2a(0x81c2, arg); // add edx, n + else if (arg>=128) put2a(0x81c2, arg); // add edx, n else put3(0x83c200+arg); // add edx, byte n break; case 17: // a-= @@ -2385,7 +3864,7 @@ int Predictor::assemble_p() { if (S==4) put4(0x8b7c2414); // mov edi,[esp+0x14] ; pr else { -#ifdef _WIN32 +#if !defined(unix) || defined(__CYGWIN__) put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) #endif } @@ -2673,9 +4152,8 @@ int Predictor::assemble_p() { put4(0x660ff5c3); // pmaddwd xmm0, xmm3 } else { // accumulate sum in xmm0 - put4(0xf30f6fd1); // movdqu xmm2, xmm1 - put4(0x660ff5d3); // pmaddwd xmm2, xmm3 - put4(0x660ffec2); // paddd, xmm0, xmm2 + put4(0x660ff5cb); // pmaddwd xmm1, xmm3 + put4(0x660ffec1); // paddd xmm0, xmm1 } } @@ -2688,13 +4166,13 @@ int Predictor::assemble_p() { put4(0x660ffec1); // paddd xmm0, xmm1 put4(0x660f7ec0); // movd eax, xmm0 ; p[i] put3(0xc1f808); // sar eax, 8 - put1a(0xb9, 2047); // mov ecx, 2047 ; clamp2k - put2(0x39c8); // cmp eax, ecx - put3(0x0f4fc1); // cmovg eax, ecx - put2(0xf7d1); // not ecx ; -2048 - put2(0x39c8); // cmp eax, ecx - put3(0x0f4cc1); // cmovl eax, ecx - put2a(0x8987, off(p[i])); // mov [edi+&p[i]], eax + put1a(0x3d, 2047); // cmp eax, 2047 + put2(0x7e05); // jle L1 + put1a(0xb8, 2047); // mov eax, 2047 + put1a(0x3d, -2048); // L1: cmp eax, -2048 + put2(0x7d05); // jge, L2 + put1a(0xb8, -2048); // mov eax, -2048 + put2a(0x8987, off(p[i])); // L2: mov [edi+&p[i]], eax break; case SSE: // sizebits j start limit @@ -2775,7 +4253,7 @@ int Predictor::assemble_p() { put4(0x8b6c2418); // mov ebp,[esp+0x18] ; (2nd arg = y) } else { -#ifndef _WIN32 +#if defined(unix) && !defined(__CYGWIN__) // (1st arg already in rdi) put3(0x4889f5); // mov rbp, rsi (2nd arg in Linux-64) #else put3(0x4889cf); // mov rdi, rcx (1st arg in Win64) @@ -2882,23 +4360,23 @@ int Predictor::assemble_p() { put1a(0x05, (1<<12)); // add eax, 4096 put3(0xc1f80d); // sar eax, 13 put3(0x0304d6); // add eax, [esi+edx*8] ; wt[0] - put1a(0xbb, (1<<19)-1); // mov ebx, 524287 - put2(0x39d8); // cmp eax, ebx - put3(0x0f4fc3); // cmovg eax, ebx - put2(0xf7d3); // not ebx ; -524288 - put2(0x39d8); // cmp eax, ebx - put3(0x0f4cc3); // cmovl eax, ebx - put3(0x8904d6); // mov [esi+edx*8], eax + put1a(0x3d, (1<<19)-1); // cmp eax, (1<<19)-1 + put2(0x7e05); // jle L1 + put1a(0xb8, (1<<19)-1); // mov eax, (1<<19)-1 + put1a(0x3d, 0xfff80000); // cmp eax, -1<<19 + put2(0x7d05); // jge L2 + put1a(0xb8, 0xfff80000); // mov eax, -1<<19 + put3(0x8904d6); // L2: mov [esi+edx*8], eax put3(0x83c110); // add ecx, 16 ; err put3(0xc1f905); // sar ecx, 5 put4(0x034cd604); // add ecx, [esi+edx*8+4] ; wt[1] - put1a(0xb8, (1<<19)-1); // mov eax, 524287 - put2(0x39c1); // cmp ecx, eax - put3(0x0f4fc8); // cmovg ecx, eax - put2(0xf7d0); // not eax ; -524288 - put2(0x39c1); // cmp ecx, eax - put3(0x0f4cc8); // cmovl ecx, eax - put4(0x894cd604); // mov [esi+edx*8+4], ecx + put2a(0x81f9, (1<<19)-1); // cmp ecx, (1<<19)-1 + put2(0x7e05); // jle L3 + put1a(0xb9, (1<<19)-1); // mov ecx, (1<<19)-1 + put2a(0x81f9, 0xfff80000); // cmp ecx, -1<<19 + put2(0x7d05); // jge L4 + put1a(0xb9, 0xfff80000); // mov ecx, -1<<19 + put4(0x894cd604); // L4: mov [esi+edx*8+4], ecx } break; @@ -3093,13 +4571,13 @@ int Predictor::assemble_p() { put1a(0x05, 1<<12); // add eax, 1<<12 put3(0xc1f80d); // sar eax, 13 put2(0x0306); // add eax, [esi] - put1a(0xba, (1<<19)-1); // mov edx, (1<<19)-1 - put2(0x39d0); // cmp eax, edx - put3(0x0f4fc2); // cmovg eax, edx - put2(0xf7d2); // not edx - put2(0x39d0); // cmp eax, edx - put3(0x0f4cc2); // cmovl eax, edx - put2(0x8906); // mov [esi], eax + put1a(0x3d, (1<<19)-1); // cmp eax, (1<<19)-1 + put2(0x7e05); // jge L1 + put1a(0xb8, (1<<19)-1); // mov eax, (1<<19)-1 + put1a(0x3d, 0xfff80000); // cmp eax, -1<<19 + put2(0x7d05); // jle L2 + put1a(0xb8, 0xfff80000); // mov eax, -1<<19 + put2(0x8906); // L2: mov [esi], eax if (kpcode_size) { + allocx(pcode, pcode_size, n); + n=assemble_p(); + } + if (!pcode || n<15 || pcode_size<15) + error("run JIT failed"); } assert(pcode && pcode[0]); - return ((int(*)(Predictor*))&pcode[0])(this); + return ((int(*)(Predictor*))&pcode[10])(this); #endif } @@ -3172,15 +4654,3098 @@ void ZPAQL::run(U32 input) { run0(input); #else if (!rcode) { + allocx(rcode, rcode_size, (hend*10+4096)&-4096); int n=assemble(); - allocx(rcode, rcode_size, n); - if (!rcode || n<10 || rcode_size<10 || n!=assemble()) + if (n>rcode_size) { + allocx(rcode, rcode_size, n); + n=assemble(); + } + if (!rcode || n<10 || rcode_size<10) error("run JIT failed"); } a=input; - if (!((int(*)())(&rcode[0]))()) - libzpaq::error("Bad ZPAQL opcode"); + const U32 rc=((int(*)())(&rcode[0]))(); + if (rc==0) return; + else if (rc==1) libzpaq::error("Bad ZPAQL opcode"); + else if (rc==2) libzpaq::error("Out of memory"); + else if (rc==3) libzpaq::error("Write error"); + else libzpaq::error("ZPAQL execution error"); #endif } +////////////////////////// divsufsort /////////////////////////////// + +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Constants -*/ +#define INLINE __inline +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n) { + int *PAb, *ISAb, *buf; +#ifdef _OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef _OPENMP + int d0, d1; + int tmp; +#endif + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef _OPENMP + tmp = omp_get_max_threads(); + buf = SA + m, bufsize = (n - (2 * m)) / tmp; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) + { + tmp = omp_get_thread_num(); + curbuf = buf + tmp * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of tyoe B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n); + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} + +// End divsufsort.c + +/////////////////////////////// add /////////////////////////////////// + +// Convert non-negative decimal number x to string of at least n digits +std::string itos(int64_t x, int n=1) { + assert(x>=0); + assert(n>=0); + std::string r; + for (; x || n>0; x/=10, --n) r=std::string(1, '0'+x%10)+r; + return r; +} + +// E8E9 transform of buf[0..n-1] to improve compression of .exe and .dll. +// Patterns (E8|E9 xx xx xx 00|FF) at offset i replace the 3 middle +// bytes with x+i mod 2^24, LSB first, reading backward. +void e8e9(unsigned char* buf, int n) { + for (int i=n-5; i>=0; --i) { + if (((buf[i]&254)==0xe8) && ((buf[i+4]+1)&254)==0) { + unsigned a=(buf[i+1]|buf[i+2]<<8|buf[i+3]<<16)+i; + buf[i+1]=a; + buf[i+2]=a>>8; + buf[i+3]=a>>16; + } + } +} + +// Encode inbuf to buf using LZ77. args are as follows: +// args[0] is log2 buffer size in MB. +// args[1] is level (1=var. length, 2=byte aligned lz77, 3=bwt) + 4 if E8E9. +// args[2] is the lz77 minimum match length and context order. +// args[3] is the lz77 higher context order to search first, or else 0. +// args[4] is the log2 hash bucket size (number of searches). +// args[5] is the log2 hash table size. If 21+args[0] then use a suffix array. +// args[6] is the secondary context look ahead +// sap is pointer to external suffix array of inbuf or 0. If supplied and +// args[0]=5..7 then it is assumed that E8E9 was already applied to +// both the input and sap and the input buffer is not modified. + +class LZBuffer: public libzpaq::Reader { + libzpaq::Array ht;// hash table, confirm in low bits, or SA+ISA + const unsigned char* in; // input pointer + const int checkbits; // hash confirmation size or lg(ISA size) + const int level; // 1=var length LZ77, 2=byte aligned LZ77, 3=BWT + const unsigned htsize; // size of hash table + const unsigned n; // input length + unsigned i; // current location in in (0 <= i < n) + const unsigned minMatch; // minimum match length + const unsigned minMatch2; // second context order or 0 if not used + const unsigned maxMatch; // longest match length allowed + const unsigned maxLiteral; // longest literal length allowed + const unsigned lookahead; // second context look ahead + unsigned h1, h2; // low, high order context hashes of in[i..] + const unsigned bucket; // number of matches to search per hash - 1 + const unsigned shift1, shift2; // how far to shift h1, h2 per hash + const int minMatchBoth; // max(minMatch, minMatch2) + const unsigned rb; // number of level 1 r bits in match code + unsigned bits; // pending output bits (level 1) + unsigned nbits; // number of bits in bits + unsigned rpos, wpos; // read, write pointers + unsigned idx; // BWT index + const unsigned* sa; // suffix array for BWT or LZ77-SA + unsigned* isa; // inverse suffix array for LZ77-SA + enum {BUFSIZE=1<<14}; // output buffer size + unsigned char buf[BUFSIZE]; // output buffer + + void write_literal(unsigned i, unsigned& lit); + void write_match(unsigned len, unsigned off); + void fill(); // encode to buf + + // write k bits of x + void putb(unsigned x, int k) { + x&=(1<7) { + assert(wpos>=8, nbits-=8; + } + } + + // write last byte + void flush() { + assert(wpos0) buf[wpos++]=bits; + bits=nbits=0; + } + + // write 1 byte + void put(int c) { + assert(wpos 00) = match 4*n+ll at offset (q<=65536) r=16, x>>=16; + if (x>=256) r+=8, x>>=8; + if (x>=16) r+=4, x>>=4; + assert(x>=0 && x<16); + return + "\x00\x01\x02\x02\x03\x03\x03\x03\x04\x04\x04\x04\x04\x04\x04\x04"[x]+r; +} + +// return number of 1 bits in x +int nbits(unsigned x) { + int r; + for (r=0; x; x>>=1) r+=x&1; + return r; +} + +// Read n bytes of compressed output into p and return number of +// bytes read in 0..n. 0 signals EOF (overrides Reader). +int LZBuffer::read(char* p, int n) { + if (rpos==wpos) fill(); + int nr=n; + if (nr>int(wpos-rpos)) nr=wpos-rpos; + if (nr) memcpy(p, buf+rpos, nr); + rpos+=nr; + assert(rpos<=wpos); + if (rpos==wpos) rpos=wpos=0; + return nr; +} + +LZBuffer::LZBuffer(StringBuffer& inbuf, int args[], const unsigned* sap): + ht((args[1]&3)==3 ? (inbuf.size()+1)*!sap // for BWT suffix array + : args[5]-args[0]<21 ? 1u<0 ? (args[5]-1)/minMatch+1 : 1), + shift2(minMatch2>0 ? (args[5]-1)/minMatch2+1 : 0), + minMatchBoth(MAX(minMatch, minMatch2+lookahead)+4), + rb(args[0]>4 ? args[0]-4 : 0), + bits(0), nbits(0), rpos(0), wpos(0), + idx(0), sa(0), isa(0) { + assert(args[0]>=0); + assert(n<=(1u<<20<=1 && args[1]<=7 && args[1]!=4); + assert(level>=1 && level<=3); + if ((minMatch<4 && level==1) || (minMatch<1 && level==2)) + error("match length $3 too small"); + + // e8e9 transform + if (args[1]>4 && !sap) e8e9(inbuf.data(), n); + + // build suffix array if not supplied + if (args[5]-args[0]>=21 || level==3) { // LZ77-SA or BWT + if (sap) + sa=sap; + else { + assert(ht.size()>=n); + assert(ht.size()>0); + sa=&ht[0]; + if (n>0) divsufsort((const unsigned char*)in, (int*)sa, n); + } + if (level<3) { + assert(ht.size()>=(n*(sap==0))+(1u<<17<0 ? in[n-1] : 255); + else if (i>n) put(idx&255), idx>>=8; + else if (sa[i-1]==0) idx=i, put(255); + else put(in[sa[i-1]-1]); + } + return; + } + + // LZ77: scan the input + unsigned lit=0; // number of output literals pending + const unsigned mask=(1<0 && in[p+l1-1]==in[i+l1-1]; --l1); + int score=int(l-l1)*8-lg(i-p)-4*(lit==0 && l1>0)-11; + for (unsigned a=0; abscore) blen=l, bp=p, blit=l1, bscore=score; + if (l255) break; + } + } + } + if (bscore<=0 || blen0) { + for (unsigned k=0; k<=bucket; ++k) { + unsigned p=ht[h2^k]; + if (p && (p&mask)==(in[i+3]&mask)) { + p>>=checkbits; + if (p=minMatch2+lookahead) { + int l1; // length back from lookahead + for (l1=lookahead; l1>0 && in[p+l1-1]==in[i+l1-1]; --l1); + assert(l1>=0 && l1<=int(lookahead)); + int score=int(l-l1)*8-lg(i-p)-8*(lit==0 && l1>0)-11; + if (score>bscore) blen=l, bp=p, blit=l1, bscore=score; + } + } + } + if (blen>=128) break; + } + } + + // Search the lower order context + if (!minMatch2 || blen>=checkbits; + if (p0)-11; + if (score>bscore) blen=l, bp=p, blit=0, bscore=score; + } + } + if (blen>=128) break; + } + } + } + + // If match is long enough, then output any pending literals first, + // and then the match. blen is the length of the match. + assert(i>=bp); + const unsigned off=i-bp; // offset + if (off>0 && bscore>0 + && blen-blit>=minMatch+(level==2)*((off>=(1<<16))+(off>=(1<<24)))) { + lit+=blit; + write_literal(i+blit, lit); + write_match(blen-blit, off); + } + + // Otherwise add to literal length + else { + blen=1; + ++lit; + } + + // Update index, advance blen bytes + if (isa) + i+=blen; + else { + while (blen--) { + if (i+minMatchBoth>19)&bucket; + const unsigned p=(i<=maxLiteral) + write_literal(i, lit); + } + + // Write pending literals at end of input + assert(i<=n); + if (i==n) { + write_literal(n, lit); + flush(); + } +} + +// Write literal sequence in[i-lit..i-1], set lit=0 +void LZBuffer::write_literal(unsigned i, unsigned& lit) { + assert(lit>=0); + assert(i>=0 && i<=n); + assert(i>=lit); + if (level==1) { + if (lit<1) return; + int ll=lg(lit); + assert(ll>=1 && ll<=24); + putb(0, 2); + --ll; + while (--ll>=0) { + putb(1, 1); + putb((lit>>ll)&1, 1); + } + putb(0, 1); + while (lit) putb(in[i-lit--], 8); + } + else { + assert(level==2); + while (lit>0) { + unsigned lit1=lit; + if (lit1>64) lit1=64; + put(lit1-1); + for (unsigned j=i-lit; j=minMatch && len<=maxMatch); + assert(off>0); + assert(len>=4); + assert(rb>=0 && rb<=8); + int ll=lg(len)-1; + assert(ll>=2); + off+=(1<=0 && lo<=23); + putb((lo+8)>>3, 2);// mm + putb(lo&7, 3); // mmm + while (--ll>=2) { // n + putb(1, 1); + putb((len>>ll)&1, 1); + } + putb(0, 1); + putb(len&3, 2); // ll + putb(off, rb); // r + putb(off>>rb, lo); // q + } + + // x[2]:len[6] off[x-1] + else { + assert(level==2); + assert(minMatch>=1 && minMatch<=64); + --off; + while (len>0) { // Split long matches to len1=minMatch..minMatch+63 + const unsigned len1=len>minMatch*2+63 ? minMatch+63 : + len>minMatch+63 ? len-minMatch : len; + assert(wpos=minMatch && len1>8); + put(off); + } + else if (off<(1<<24)) { + put(128+len1-minMatch); + put(off>>16); + put(off>>8); + put(off); + } + else { + put(192+len1-minMatch); + put(off>>24); + put(off>>16); + put(off>>8); + put(off); + } + len-=len1; + } + } +} + +// Generate a config file from the method argument with syntax: +// {0|x|s|i}[N1[,N2]...][{ciamtswf}[N1[,N2]]...]... +std::string makeConfig(const char* method, int args[]) { + assert(method); + const char type=method[0]; + assert(type=='x' || type=='s' || type=='0' || type=='i'); + + // Read "{x|s|i|0}N1,N2...N9" into args[0..8] ($1..$9) + args[0]=0; // log block size in MiB + args[1]=0; // 0=none, 1=var-LZ77, 2=byte-LZ77, 3=BWT, 4..7 adds E8E9 + args[2]=0; // lz77 minimum match length + args[3]=0; // secondary context length + args[4]=0; // log searches + args[5]=0; // lz77 hash table size or SA if args[0]+21 + args[6]=0; // secondary context look ahead + args[7]=0; // not used + args[8]=0; // not used + if (isdigit(*++method)) args[0]=0; + for (int i=0; i<9 && (isdigit(*method) || *method==',' || *method=='.');) { + if (isdigit(*method)) + args[i]=args[i]*10+*method-'0'; + else if (++i<9) + args[i]=0; + ++method; + } + + // "0..." = No compression + if (type=='0') + return "comp 0 0 0 0 0 hcomp end\n"; + + // Generate the postprocessor + std::string hdr, pcomp; + const int level=args[1]&3; + const bool doe8=args[1]>=4 && args[1]<=7; + + // LZ77+Huffman, with or without E8E9 + if (level==1) { + const int rb=args[0]>4 ? args[0]-4 : 0; + hdr="comp 9 16 0 $1+20 "; + pcomp= + "pcomp lazy2 3 ;\n" + " (r1 = state\n" + " r2 = len - match or literal length\n" + " r3 = m - number of offset bits expected\n" + " r4 = ptr to buf\n" + " r5 = r - low bits of offset\n" + " c = bits - input buffer\n" + " d = n - number of bits in c)\n" + "\n" + " a> 255 if\n"; + if (doe8) + pcomp+= + " b=0 d=r 4 do (for b=0..d-1, d = end of buf)\n" + " a=b a==d ifnot\n" + " a+= 4 a>= 8 b++\n" + " *b=a a>>= 8 b++\n" + " *b=a b++\n" + " endif\n" + " b=c\n" + " endif\n" + " endif\n" + " a=*b out b++\n" + " forever\n" + " endif\n" + "\n"; + pcomp+= + " (reset state)\n" + " a=0 b=0 c=0 d=0 r=a 1 r=a 2 r=a 3 r=a 4\n" + " halt\n" + " endif\n" + "\n" + " a<<=d a+=c c=a (bits+=a< 0 if (if (bits&3))\n" + " a-- a<<= 3 r=a 3 (m=((bits&3)-1)*8)\n" + " a=c a>>= 2 c=a (bits>>=2)\n" + " b=r 3 a&= 7 a+=b r=a 3 (m+=bits&7)\n" + " a=c a>>= 3 c=a (bits>>=3)\n" + " a=d a-= 5 d=a (n-=5)\n" + " a= 1 r=a 1 (state=1)\n" + " else (literal, discard 00)\n" + " a=c a>>= 2 c=a (bits>>=2)\n" + " d-- d-- (n-=2)\n" + " a= 3 r=a 1 (state=3)\n" + " endif\n" + " endif\n" + "\n" + " (while state==1 && n>=3 (expect match length n*4+ll -> r2))\n" + " do a=r 1 a== 1 if a=d a> 2 if\n" + " a=c a&= 1 a== 1 if (if bits&1)\n" + " a=c a>>= 1 c=a (bits>>=1)\n" + " b=r 2 a=c a&= 1 a+=b a+=b r=a 2 (len+=len+(bits&1))\n" + " a=c a>>= 1 c=a (bits>>=1)\n" + " d-- d-- (n-=2)\n" + " else\n" + " a=c a>>= 1 c=a (bits>>=1)\n" + " a=r 2 a<<= 2 b=a (len<<=2)\n" + " a=c a&= 3 a+=b r=a 2 (len+=bits&3)\n" + " a=c a>>= 2 c=a (bits>>=2)\n" + " d-- d-- d-- (n-=3)\n"; + if (rb) + pcomp+=" a= 5 r=a 1 (state=5)\n"; + else + pcomp+=" a= 2 r=a 1 (state=2)\n"; + pcomp+= + " endif\n" + " forever endif endif\n" + "\n"; + if (rb) pcomp+= // save r in r5 + " (if state==5 && n>=8) (expect low bits of offset to put in r5)\n" + " a=r 1 a== 5 if a=d a> "+itos(rb-1)+" if\n" + " a=c a&= "+itos((1<>= "+itos(rb)+" c=a\n" + " a=d a-= "+itos(rb)+ " d=a\n" + " a= 2 r=a 1 (go to state 2)\n" + " endif endif\n" + "\n"; + pcomp+= + " (if state==2 && n>=m) (expect m offset bits)\n" + " a=r 1 a== 2 if a=r 3 a>d ifnot\n" + " a=c r=a 6 a=d r=a 7 (save c=bits, d=n in r6,r7)\n" + " b=r 3 a= 1 a<<=b d=a (d=1< 0 if d--\n" + " a=*c *b=a c++ b++ (buf[ptr++]-buf[p++])\n"; + if (!doe8) pcomp+=" out\n"; + pcomp+= + " forever endif\n" + " a=b r=a 4\n" + "\n" + " a=r 6 b=r 3 a>>=b c=a (bits>>=m)\n" + " a=r 7 a-=b d=a (n-=m)\n" + " a=0 r=a 1 (state=0)\n" + " endif endif\n" + "\n" + " (while state==3 && n>=2 (expect literal length))\n" + " do a=r 1 a== 3 if a=d a> 1 if\n" + " a=c a&= 1 a== 1 if (if bits&1)\n" + " a=c a>>= 1 c=a (bits>>=1)\n" + " b=r 2 a&= 1 a+=b a+=b r=a 2 (len+=len+(bits&1))\n" + " a=c a>>= 1 c=a (bits>>=1)\n" + " d-- d-- (n-=2)\n" + " else\n" + " a=c a>>= 1 c=a (bits>>=1)\n" + " d-- (--n)\n" + " a= 4 r=a 1 (state=4)\n" + " endif\n" + " forever endif endif\n" + "\n" + " (if state==4 && n>=8 (expect len literals))\n" + " a=r 1 a== 4 if a=d a> 7 if\n" + " b=r 4 a=c *b=a\n"; + if (!doe8) pcomp+=" out\n"; + pcomp+= + " b++ a=b r=a 4 (buf[ptr++]=bits)\n" + " a=c a>>= 8 c=a (bits>>=8)\n" + " a=d a-= 8 d=a (n-=8)\n" + " a=r 2 a-- r=a 2 a== 0 if (if --len<1)\n" + " a=0 r=a 1 (state=0)\n" + " endif\n" + " endif endif\n" + " halt\n" + "end\n"; + } + + // Byte aligned LZ77, with or without E8E9 + else if (level==2) { + hdr="comp 9 16 0 $1+20 "; + pcomp= + "pcomp lzpre c ;\n" + " (Decode LZ77: d=state, M=output buffer, b=size)\n" + " a> 255 if (at EOF decode e8e9 and output)\n"; + if (doe8) + pcomp+= + " d=b b=0 do (for b=0..d-1, d = end of buf)\n" + " a=b a==d ifnot\n" + " a+= 4 a>= 8 b++\n" + " *b=a a>>= 8 b++\n" + " *b=a b++\n" + " endif\n" + " b=c\n" + " endif\n" + " endif\n" + " a=*b out b++\n" + " forever\n" + " endif\n"; + pcomp+= + " b=0 c=0 d=0 a=0 r=a 1 r=a 2 (reset state)\n" + " halt\n" + " endif\n" + "\n" + " (in state d==0, expect a new code)\n" + " (put length in r1 and inital part of offset in r2)\n" + " c=a a=d a== 0 if\n" + " a=c a>>= 6 a++ d=a\n" + " a== 1 if (literal?)\n" + " a+=c r=a 1 a=0 r=a 2\n" + " else (3 to 5 byte match)\n" + " d++ a=c a&= 63 a+= $3 r=a 1 a=0 r=a 2\n" + " endif\n" + " else\n" + " a== 1 if (writing literal)\n" + " a=c *b=a b++\n"; + if (!doe8) pcomp+=" out\n"; + pcomp+= + " a=r 1 a-- a== 0 if d=0 endif r=a 1 (if (--len==0) state=0)\n" + " else\n" + " a> 2 if (reading offset)\n" + " a=r 2 a<<= 8 a|=c r=a 2 d-- (off=off<<8|c, --state)\n" + " else (state==2, write match)\n" + " a=r 2 a<<= 8 a|=c c=a a=b a-=c a-- c=a (c=i-off-1)\n" + " d=r 1 (d=len)\n" + " do (copy and output d=len bytes)\n" + " a=*c *b=a c++ b++\n"; + if (!doe8) pcomp+=" out\n"; + pcomp+= + " d-- a=d a> 0 while\n" + " (d=state=0. off, len don\'t matter)\n" + " endif\n" + " endif\n" + " endif\n" + " halt\n" + "end\n"; + } + + // BWT with or without E8E9 + else if (level==3) { // IBWT + hdr="comp 9 16 $1+20 $1+20 "; // 2^$1 = block size in MB + pcomp= + "pcomp bwtrle c ;\n" + "\n" + " (read BWT, index into M, size in b)\n" + " a> 255 ifnot\n" + " *b=a b++\n" + "\n" + " (inverse BWT)\n" + " elsel\n" + "\n" + " (index in last 4 bytes, put in c and R1)\n" + " b-- a=*b\n" + " b-- a<<= 8 a+=*b\n" + " b-- a<<= 8 a+=*b\n" + " b-- a<<= 8 a+=*b c=a r=a 1\n" + "\n" + " (save size in R2)\n" + " a=b r=a 2\n" + "\n" + " (count bytes in H[~1..~255, ~0])\n" + " do\n" + " a=b a> 0 if\n" + " b-- a=*b a++ a&= 255 d=a d! *d++\n" + " forever\n" + " endif\n" + "\n" + " (cumulative counts: H[~i=0..255] = count of bytes before i)\n" + " d=0 d! *d= 1 a=0\n" + " do\n" + " a+=*d *d=a d--\n" + " d<>a a! a> 255 a! d<>a until\n" + "\n" + " (build first part of linked list in H[0..idx-1])\n" + " b=0 do\n" + " a=c a>b if\n" + " d=*b d! *d++ d=*d d-- *d=b\n" + " b++ forever\n" + " endif\n" + "\n" + " (rest of list in H[idx+1..n-1])\n" + " b=c b++ c=r 2 do\n" + " a=c a>b if\n" + " d=*b d! *d++ d=*d d-- *d=b\n" + " b++ forever\n" + " endif\n" + "\n"; + if (args[0]<=4) { // faster IBWT list traversal limited to 16 MB blocks + pcomp+= + " (copy M to low 8 bits of H to reduce cache misses in next loop)\n" + " b=0 do\n" + " a=c a>b if\n" + " d=b a=*d a<<= 8 a+=*b *d=a\n" + " b++ forever\n" + " endif\n" + "\n" + " (traverse list and output or copy to M)\n" + " d=r 1 b=0 do\n" + " a=d a== 0 ifnot\n" + " a=*d a>>= 8 d=a\n"; + if (doe8) pcomp+=" *b=*d b++\n"; + else pcomp+=" a=*d out\n"; + pcomp+= + " forever\n" + " endif\n" + "\n"; + if (doe8) // IBWT+E8E9 + pcomp+= + " (e8e9 transform to out)\n" + " d=b b=0 do (for b=0..d-1, d = end of buf)\n" + " a=b a==d ifnot\n" + " a+= 4 a>= 8 b++\n" + " *b=a a>>= 8 b++\n" + " *b=a b++\n" + " endif\n" + " b=c\n" + " endif\n" + " endif\n" + " a=*b out b++\n" + " forever\n" + " endif\n"; + pcomp+= + " endif\n" + " halt\n" + "end\n"; + } + else { // slower IBWT list traversal for all sized blocks + if (doe8) { // E8E9 after IBWT + pcomp+= + " (R2 = output size without EOS)\n" + " a=r 2 a-- r=a 2\n" + "\n" + " (traverse list (d = IBWT pointer) and output inverse e8e9)\n" + " (C = offset = 0..R2-1)\n" + " (R4 = last 4 bytes shifted in from MSB end)\n" + " (R5 = temp pending output byte)\n" + " c=0 d=r 1 do\n" + " a=d a== 0 ifnot\n" + " d=*d\n" + "\n" + " (store byte in R4 and shift out to R5)\n" + " b=d a=*b a<<= 24 b=a\n" + " a=r 4 r=a 5 a>>= 8 a|=b r=a 4\n" + "\n" + " (if E8|E9 xx xx xx 00|FF in R4:R5 then subtract c from x)\n" + " a=c a> 3 if\n" + " a=r 5 a&= 254 a== 232 if\n" + " a=r 4 a>>= 24 b=a a++ a&= 254 a< 2 if\n" + " a=r 4 a-=c a+= 4 a<<= 8 a>>= 8 \n" + " b<>a a<<= 24 a+=b r=a 4\n" + " endif\n" + " endif\n" + " endif\n" + "\n" + " (output buffered byte)\n" + " a=c a> 3 if a=r 5 out endif c++\n" + "\n" + " forever\n" + " endif\n" + "\n" + " (output up to 4 pending bytes in R4)\n" + " b=r 4\n" + " a=c a> 3 a=b if out endif a>>= 8 b=a\n" + " a=c a> 2 a=b if out endif a>>= 8 b=a\n" + " a=c a> 1 a=b if out endif a>>= 8 b=a\n" + " a=c a> 0 a=b if out endif\n" + "\n" + " endif\n" + " halt\n" + "end\n"; + } + else { + pcomp+= + " (traverse list and output)\n" + " d=r 1 do\n" + " a=d a== 0 ifnot\n" + " d=*d\n" + " b=d a=*b out\n" + " forever\n" + " endif\n" + " endif\n" + " halt\n" + "end\n"; + } + } + } + + // E8E9 or no preprocessing + else if (level==0) { + hdr="comp 9 16 0 0 "; + if (doe8) { // E8E9? + pcomp= + "pcomp e8e9 d ;\n" + " a> 255 if\n" + " a=c a> 4 if\n" + " c= 4\n" + " else\n" + " a! a+= 5 a<<= 3 d=a a=b a>>=d b=a\n" + " endif\n" + " do a=c a> 0 if\n" + " a=b out a>>= 8 b=a c--\n" + " forever endif\n" + " else\n" + " *b=b a<<= 24 d=a a=b a>>= 8 a+=d b=a c++\n" + " a=c a> 4 if\n" + " a=*b out\n" + " a&= 254 a== 232 if\n" + " a=b a>>= 24 a++ a&= 254 a== 0 if\n" + " a=b a>>= 24 a<<= 24 d=a\n" + " a=b a-=c a+= 5\n" + " a<<= 8 a>>= 8 a|=d b=a\n" + " endif\n" + " endif\n" + " endif\n" + " endif\n" + " halt\n" + "end\n"; + } + else + pcomp="end\n"; + } + else + error("Unsupported method"); + + // Build context model (comp, hcomp) assuming: + // H[0..254] = contexts + // H[255..511] = location of last byte i-255 + // M = last 64K bytes, filling backward + // C = pointer to most recent byte + // R1 = level 2 lz77 1+bytes expected until next code, 0=init + // R2 = level 2 lz77 first byte of code + int ncomp=0; // number of components + const int membits=args[0]+20; + int sb=5; // bits in last context + std::string comp; + std::string hcomp="hcomp\n" + "c-- *c=a a+= 255 d=a *d=c\n"; + if (level==2) { // put level 2 lz77 parse state in R1, R2 + hcomp+= + " (decode lz77 into M. Codes:\n" + " 00xxxxxx = literal length xxxxxx+1\n" + " xx......, xx > 0 = match with xx offset bytes to follow)\n" + "\n" + " a=r 1 a== 0 if (init)\n" + " a= "+itos(111+57*doe8)+" (skip post code)\n" + " else a== 1 if (new code?)\n" + " a=*c r=a 2 (save code in R2)\n" + " a> 63 if a>>= 6 a++ a++ (match)\n" + " else a++ a++ endif (literal)\n" + " else (read rest of code)\n" + " a--\n" + " endif endif\n" + " r=a 1 (R1 = 1+expected bytes to next code)\n"; + } + + // Generate the context model + while (*method && ncomp<254) { + + // parse command C[N1[,N2]...] into v = {C, N1, N2...} + std::vector v; + v.push_back(*method++); + if (isdigit(*method)) { + v.push_back(*method++-'0'); + while (isdigit(*method) || *method==',' || *method=='.') { + if (isdigit(*method)) + v.back()=v.back()*10+*method++-'0'; + else { + v.push_back(0); + ++method; + } + } + } + + // c: context model + // N1%1000: 0=ICM 1..256=CM limit N1-1 + // N1/1000: number of times to halve memory + // N2: 1..255=offset mod N2. 1000..1255=distance to N2-1000 + // N3...: 0..255=byte mask + 256=lz77 state. 1000+=run of N3-1000 zeros. + if (v[0]=='c') { + while (v.size()<3) v.push_back(0); + comp+=itos(ncomp)+" "; + sb=11; // count context bits + if (v[2]<256) sb+=lg(v[2]); + else sb+=6; + for (unsigned i=3; imembits) sb=membits; + if (v[1]%1000==0) comp+="icm "+itos(sb-6-v[1]/1000)+"\n"; + else comp+="cm "+itos(sb-2-v[1]/1000)+" "+itos(v[1]%1000-1)+"\n"; + + // special contexts + hcomp+="d= "+itos(ncomp)+" *d=0\n"; + if (v[2]>1 && v[2]<=255) { // periodic context + if (lg(v[2])!=lg(v[2]-1)) + hcomp+="a=c a&= "+itos(v[2]-1)+" hashd\n"; + else + hcomp+="a=c a%= "+itos(v[2])+" hashd\n"; + } + else if (v[2]>=1000 && v[2]<=1255) // distance context + hcomp+="a= 255 a+= "+itos(v[2]-1000)+ + " d=a a=*d a-=c a> 255 if a= 255 endif d= "+ + itos(ncomp)+" hashd\n"; + + // Masked context + for (unsigned i=3; i0 && v[i]<255) + hcomp+="a=*b a&= "+itos(v[i])+" hashd\n"; // masked byte + else if (v[i]>=256 && v[i]<512) { // lz77 state or masked literal byte + hcomp+= + "a=r 1 a> 1 if\n" // expect literal or offset + " a=r 2 a< 64 if\n" // expect literal + " a=*b "; + if (v[i]<511) hcomp+="a&= "+itos(v[i]-256); + hcomp+=" hashd\n" + " else\n" // expect match offset byte + " a>>= 6 hashd a=r 1 hashd\n" + " endif\n" + "else\n" // expect new code + " a= 255 hashd a=r 2 hashd\n" + "endif\n"; + } + else if (v[i]>=1256) // skip v[i]-1000 bytes + hcomp+="a= "+itos(((v[i]-1000)>>8)&255)+" a<<= 8 a+= " + +itos((v[i]-1000)&255)+ + " a+=b b=a\n"; + else if (v[i]>1000) + hcomp+="a= "+itos(v[i]-1000)+" a+=b b=a\n"; + if (v[i]<512 && iint(v[0]=='t')) { + if (v.size()<=1) v.push_back(8); + if (v.size()<=2) v.push_back(24+8*(v[0]=='s')); + if (v[0]=='s' && v.size()<=3) v.push_back(255); + comp+=itos(ncomp); + sb=5+v[1]*3/4; + if (v[0]=='m') + comp+=" mix "+itos(v[1])+" 0 "+itos(ncomp)+" "+itos(v[2])+" 255\n"; + else if (v[0]=='t') + comp+=" mix2 "+itos(v[1])+" "+itos(ncomp-1)+" "+itos(ncomp-2) + +" "+itos(v[2])+" 255\n"; + else // s + comp+=" sse "+itos(v[1])+" "+itos(ncomp-1)+" "+itos(v[2])+" " + +itos(v[3])+"\n"; + if (v[1]>8) { + hcomp+="d= "+itos(ncomp)+" *d=0 b=c a=0\n"; + for (; v[1]>=16; v[1]-=8) { + hcomp+="a<<= 8 a+=*b"; + if (v[1]>16) hcomp+=" b++"; + hcomp+="\n"; + } + if (v[1]>8) + hcomp+="a<<= 8 a+=*b a>>= "+itos(16-v[1])+"\n"; + hcomp+="a<<= 8 *d=a\n"; + } + ++ncomp; + } + + // i: ISSE chain with order increasing by N1,N2... + if (v[0]=='i' && ncomp>0) { + assert(sb>=5); + hcomp+="d= "+itos(ncomp-1)+" b=c a=*d d++\n"; + for (unsigned i=1; imembits) sb=membits; + comp+=itos(ncomp)+" isse "+itos(sb-6-v[i]/10)+" "+itos(ncomp-1)+"\n"; + ++ncomp; + } + } + + // a24,0,0: MATCH. N1=hash multiplier. N2,N3=halve buf, table. + if (v[0]=='a') { + if (v.size()<=1) v.push_back(24); + while (v.size()<4) v.push_back(0); + comp+=itos(ncomp)+" match "+itos(membits-v[3]-2)+" " + +itos(membits-v[2])+"\n"; + hcomp+="d= "+itos(ncomp)+" a=*d a*= "+itos(v[1]) + +" a+=*c a++ *d=a\n"; + sb=5+(membits-v[2])*3/4; + ++ncomp; + } + + // w1,65,26,223,20,0: ICM-ISSE chain of length N1 with word contexts, + // where a word is a sequence of c such that c&N4 is in N2..N2+N3-1. + // Word is hashed by: hash := hash*N5+c+1 + // Decrease memory by 2^-N6. + if (v[0]=='w') { + if (v.size()<=1) v.push_back(1); + if (v.size()<=2) v.push_back(65); + if (v.size()<=3) v.push_back(26); + if (v.size()<=4) v.push_back(223); + if (v.size()<=5) v.push_back(20); + if (v.size()<=6) v.push_back(0); + comp+=itos(ncomp)+" icm "+itos(membits-6-v[6])+"\n"; + for (int i=1; i0; --i) + hcomp+=" d= "+itos(ncomp+i-1)+" a=*d d++ *d=a\n"; + hcomp+=" d= "+itos(ncomp)+" *d=0\n" + "endif\n"; + ncomp+=v[1]-1; + sb=membits-v[6]; + ++ncomp; + } + } + return hdr+itos(ncomp)+"\n"+comp+hcomp+"halt\n"+pcomp; +} + +// Compress from in to out in 1 segment in 1 block using the algorithm +// descried in method. If method begins with a digit then choose +// a method depending on type. Save filename and comment +// in the segment header. If comment is 0 then the default is the input size +// as a decimal string, plus " jDC\x01" for a journaling method (method[0] +// is not 's'). Write the generated method to methodOut if not 0. +void compressBlock(StringBuffer* in, Writer* out, const char* method_, + const char* filename, const char* comment, bool dosha1) { + assert(in); + assert(out); + assert(method_); + assert(method_[0]); + std::string method=method_; + const unsigned n=in->size(); // input size + const int arg0=MAX(lg(n+4095)-20, 0); // block size + assert((1u<<(arg0+20))>=n+4096); + + // Get type from method "LB,R,t" where L is level 0..5, B is block + // size 0..11, R is redundancy 0..255, t = 0..3 = binary, text, exe, both. + unsigned type=0; + if (isdigit(method[0])) { + int commas=0, arg[4]={0}; + for (int i=1; ic_str(), n); + sha1ptr=sha1.result(); + } + + // Expand default methods + if (isdigit(method[0])) { + const int level=method[0]-'0'; + assert(level>=0 && level<=9); + + // build models + const int doe8=(type&2)*2; + method="x"+itos(arg0); + std::string htsz=","+itos(19+arg0+(arg0<=6)); // lz77 hash table size + std::string sasz=","+itos(21+arg0); // lz77 suffix array size + + // store uncompressed + if (level==0) + method="0"+itos(arg0)+",0"; + + // LZ77, no model. Store if hard to compress + else if (level==1) { + if (type<40) method+=",0"; + else { + method+=","+itos(1+doe8)+","; + if (type<80) method+="4,0,1,15"; + else if (type<128) method+="4,0,2,16"; + else if (type<256) method+="4,0,2"+htsz; + else if (type<960) method+="5,0,3"+htsz; + else method+="6,0,3"+htsz; + } + } + + // LZ77 with longer search + else if (level==2) { + if (type<32) method+=",0"; + else { + method+=","+itos(1+doe8)+","; + if (type<64) method+="4,0,3"+htsz; + else method+="4,0,7"+sasz+",1"; + } + } + + // LZ77 with CM depending on redundancy + else if (level==3) { + if (type<20) // store if not compressible + method+=",0"; + else if (type<48) // fast LZ77 if barely compressible + method+=","+itos(1+doe8)+",4,0,3"+htsz; + else if (type>=640 || (type&1)) // BWT if text or highly compressible + method+=","+itos(3+doe8)+"ci1"; + else // LZ77 with O0-1 compression of up to 12 literals + method+=","+itos(2+doe8)+",12,0,7"+sasz+",1c0,0,511i2"; + } + + // LZ77+CM, fast CM, or BWT depending on type + else if (level==4) { + if (type<12) + method+=",0"; + else if (type<24) + method+=","+itos(1+doe8)+",4,0,3"+htsz; + else if (type<48) + method+=","+itos(2+doe8)+",5,0,7"+sasz+"1c0,0,511"; + else if (type<900) { + method+=","+itos(doe8)+"ci1,1,1,1,2a"; + if (type&1) method+="w"; + method+="m"; + } + else + method+=","+itos(3+doe8)+"ci1"; + } + + // Slow CM with lots of models + else { // 5..9 + + // Model text files + method+=","+itos(doe8); + if (type&1) method+="w2c0,1010,255i1"; + else method+="w1i1"; + method+="c256ci1,1,1,1,1,1,2a"; + + // Analyze the data + const int NR=1<<12; + int pt[256]={0}; // position of last occurrence + int r[NR]={0}; // count repetition gaps of length r + const unsigned char* p=in->data(); + if (level>0) { + for (unsigned i=0; i0 && kscore) score=s, period=j; + t+=r[j]; + } + if (period>4 && score>0.1) { + method+="c0,0,"+itos(999+period)+",255i1"; + if (period<=255) + method+="c0,"+itos(period)+"i1"; + n1-=r[period]; + r[period]=0; + } + else + break; + } + method+="c0,2,0,255i1c0,3,0,0,255i1c0,4,0,0,0,255i1mm16ts19t0"; + } + } + + // Compress + std::string config; + int args[9]={0}; + config=makeConfig(method.c_str(), args); + assert(n<=(0x100000u<=1 && args[1]<=7 && args[1]!=4) { // LZ77 or BWT + LZBuffer lz(*in, args); + co.setInput(&lz); + co.compress(); + } + else { // compress with e8e9 or no preprocessing + if (args[1]>=4 && args[1]<=7) + e8e9(in->data(), in->size()); + co.setInput(in); + co.compress(); + } +#ifdef DEBUG // verify pre-post processing are inverses + int64_t outsize; + const char* sha1result=co.endSegmentChecksum(&outsize, dosha1); + assert(sha1result); + assert(sha1ptr); + if (memcmp(sha1result, sha1ptr, 20)!=0) + error("Pre/post-processor test failed"); +#else + co.endSegment(sha1ptr); +#endif + co.endBlock(); +} + } // end namespace libzpaq diff --git a/libzpaq/libzpaq.h b/libzpaq/libzpaq.h index cbe211d..ff8cad8 100644 --- a/libzpaq/libzpaq.h +++ b/libzpaq/libzpaq.h @@ -1,27 +1,834 @@ -/* libzpaq.h - LIBZPAQ Version 5.00. +/* libzpaq.h - LIBZPAQ Version 7.12 header - Apr. 19, 2016. - Copyright (C) 2011, Dell Inc. Written by Matt Mahoney. + This software is provided as-is, with no warranty. + I, Matt Mahoney, release this software into + the public domain. This applies worldwide. + In some countries this may not be legally possible; if so: + I grant anyone the right to use this software for any purpose, + without any conditions, unless such conditions are required by law. - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so without restriction. - This Software is provided "as is" without warranty. +LIBZPAQ is a C++ library providing data compression and decompression +services using the ZPAQ level 2 format as described in +http://mattmahoney.net/zpaq/ -LIBZPAQ is a C++ library for compression and decompression of data -conforming to the ZPAQ level 2 standard. See http://mattmahoney.net/zpaq/ +An application wishing to use these services should #include "libzpaq.h" +and link to libzpaq.cpp (and advapi32.lib in Windows/VC++). +libzpaq recognizes the following options: -By default, LIBZPAQ uses JIT (just in time) acceleration. This only -works on x86-32 and x86-64 processors that support the SSE2 instruction -set. To disable JIT, compile with -DNOJIT. To enable run time checks, -compile with -DDEBUG. Both options will decrease speed. + -DDEBUG Turn on assertion checks (slower). + -DNOJIT Don't assume x86-32 or x86-64 with SSE2 (slower). + -Dunix Without -DNOJIT, assume Unix (Linux, Mac) rather than Windows. -The decompression code, when compiled with -DDEBUG and -DNOJIT, -comprises the reference decoder for the ZPAQ level 2 standard. +The application must provide an error handling function and derived +implementations of two abstract classes, Reader and Writer, +specifying the input and output byte streams. For example, to compress +from stdin to stdout (assuming binary I/O as in Linux): + + #include "libzpaq.h" + #include + #include + + void libzpaq::error(const char* msg) { // print message and exit + fprintf(stderr, "Oops: %s\n", msg); + exit(1); + } + + class In: public libzpaq::Reader { + public: + int get() {return getchar();} // returns byte 0..255 or -1 at EOF + } in; + + class Out: public libzpaq::Writer { + public: + void put(int c) {putchar(c);} // writes 1 byte 0..255 + } out; + + int main() { + libzpaq::compress(&in, &out, "1"); // "0".."5" = faster..better + } + +Or to decompress: + + libzpaq::decompress(&in, &out); + +The function error() will be called with an English language message +in case of an unrecoverable error such as badly formatted compressed +input data or running out of memory. error() should not return. +In a multi-threaded application where ZPAQ blocks are being decompressed +in separate threads, error() should exit the thread, but other threads +may continue. Blocks are independent and libzpaq is thread safe. + +Reader and Writer provide default implementations of read() and write() +for block I/O. You may override these with your own versions, which +might be faster. The default is to call get() or put() the appropriate +number of times. For example: + + // Read n bytes into buf[0..n-1] or to EOF, whichever is first. + // Return the number of bytes actually read. + int In::read(char* buf, int n) {return fread(buf, 1, n, stdin);} + + // Write buf[0..n-1] + void Out::write(char* buf, int n) {fwrite(buf, 1, n, stdout);} + +By default, compress() divides the input into blocks with one segment +each. The segment filename field is empty. The comment field of each +block is the uncompressed size as a decimal string. The checksum +is saved. To override: + + compress(&in, &out, "1", "filename", "comment", false); + +If the filename is not NULL then it is saved in the first block only. +If the comment is not NULL then a space and the comment are appended +to the decimal size in the first block only. The comment would normally +be the date and attributes like "20141231235959 w32", or "jDC\x01" for +a journaling archive as described in the ZPAQ specification. + +The method string has the general form of a concatenation of single +character commands each possibly followed by a list of decimal +numeric arguments separated by commas or periods: + + {012345xciawmst}[N1[{.,}N2]...]... + +For example "1" or "14,128,0" or "x6.3ci1m". + +Only the first command can be a digit 0..5. If it is, then it selects +a compression level and the other commands are ignored. Otherwise, +if it is "x" then the arguments and remaining commands describe +the compression method. Any other letter as the first command is +interpreted the same as "x". + +Higher compression levels are slower but compress better. "1" is +good for most purposes. "0" does not compress. "2" compresses slower +but decompression is just as fast as 1. "3", "4", and "5" also +decompress slower. The numeric arguments are as follows: + + N1: 0..11 = block size of at most 2^N1 MiB - 4096 bytes (default 4). + N2: 0..255 = estimated ease of compression (default 128). + N3: 0..3 = data type. 1 = text, 2 = exe, 3 = both (default 0). + +For example, "14" or "54" divide the input in 16 MB blocks which +are compressed independently. N2 and N3 are hints to the compressor +based on analysis of the input data. N2 is 0 if the data is random +or 255 if the data is easily compressed (for example, all zero bytes). +Most compression methods will simply store random data with no +compression. The default is "14,128,0". + +If the first command is "x" then the string describes the exact +compression method. The arguments to "x" describe the pre/post +processing (LZ77, BWT, E8E9), and remaining commands describe the +context model, if any, of the transformed data. The arguments to "x" are: + + N1: 0..11 = block size as before. + N2: 0..7: 0=none, 1=packed LZ77, 2=LZ77, 3=BWT, 4..7 = 0..3 + E8E9. + N3: 4..63: LZ77 min match. + N4: LZ77 secondary match to try first or 0 to skip. + N5: LZ77 log search depth. + N6: LZ77 log hash table size, or N1+21 to use a suffix array. + N7: LZ77 lookahead. + +N2 selects the basic transform applied before context modeling. +N2 = 0 does not transform the input. N2 = 1 selects LZ77 encoding +of literals strings and matches using bit-packed codes. It is normally +not used with a context model. N2 = 2 selects byte aligned LZ77, which +compresses worse by itself but better than 1 when a context model is +used. It uses single bytes to encode either a literal of length 1..64 +or a match of length N3..N3+63 with a 2, 3, or 4 byte offset. + +N2 = 3 selects a Burrows-Wheeler transform, in which the input is +sorted by right-context. This does not compress by itself but makes +the data more compressible using a low order, adaptive context model. +BWT requires 4 times the block size in additional memory for both +compression and decompression. + +N2 = 4..7 are the same as 0..3 except that a E8E9 transform is first applied +to improve the compression of x86 code usually found .exe and .dll files. +It scans the input block backward for 5 byte strings of the form +{E8|E9 xx xx xx 00|FF} and adds the offset from the start of the +block to the middle 3 bytes interpreted as a little-endian (LSB first) +number (mod 2^24). E8 and E9 are the CALL and JMP instructions, followed +by a 32 bit relative offset. + +N3..N7 apply only to LZ77. For either type, it searches for matches +by hashing the next N4 bytes, and then the next N3 bytes, and looking +up each of the hashes at 2^N5 locations in a table with 2^N6 entries. +Of those, it picks the longest match, or closest in case of a tie. +If no match is at least N3, then a literal is encoded instead. If N5 +is 0 then only one hash is computed, which is faster but does not +compress as well. Typical good values for fast compression are +"x4.1.5.0.3.22" which means 16 MiB blocks, packed LZ77, mininum match +length 5, no secondary match, search depth 2^3 = 8, and 2^22 = 4M +hash table (using 16 MiB memory). + +The hash table requires 4 x 2^N6 bytes of memory. If N6 = N1+21, then +matches are found using a suffix array and inverse suffix array using +2.25 x 2^N6 bytes (4.5 x block size). This finds better matches but +takes longer to compute the suffix array (SA). The matches are found by +searching forward and backward in the SA 2^N5 in each direction up +to the first earlier match, and picking the longer of the two. +Good values are "x4.1.4.0.8.25". The secondary match N4 has no effect. + +N7 is the lookahead. It looks for matches of length at least N4+N7 +when using a hash table or N3+N7 for a SA, but allows the first N7 +bytes not to match and be coded as literals if this results in +a significantly longer match. Values higher than 1 are rarely effective. +The default is 0. + +All subsequent commands after "x" describe a context model. A model +consists of a set of components that output a bit prediction, taking +a context and possibly earlier predictions as input. The final prediction +is arithmetic coded. The component types are: + + c = CM or ICM (context model or indirect context model). + i = ISSE chain (indirect secondary symbol estimator). + a = MATCH. + w = word model (ICM-ISSE chain with whole word contexts). + m = MIX. + s = SSE (secondary symbol estimator). + t = MIX2 (2 input MIX). + +For example, "x4.3ci1" describes a BWT followed by an order 0 CM +and order 1 ISSE, which is used for level 3 text compression. The +parameters to "c" (default all 0) are as follows: + + N1: 0 = ICM, 1..256 CM with faster..slower adaptation, +1000 halves memory. + N2: 1..255 = offset mod N2, 1000..1255 = offset to last N2-1000 byte. + N3: 0..255 = order 0 context mask, 256..511 mixes LZ77 parse state. + N4...: 0..255 order 1... context masks. 1000... skips N4-1000 bytes. + +Most components use no more memory than the block size, depending on +the number of context bits, but it is possible to select less memory +and lose compression. + +A CM inputs a context hash and outputs a prediction from a table. +The table entry is then updated by adjusting in the direction of the +actual bit. The adjustment is 1/count, where the maximum count is 4 x N1. +Larger values are best for stationary data. Smaller values adapt faster +to changing data. + +If N1 is 0 then c selects an ICM. An ICM maps a context to a bit history +(8 bit state), and then to slow adapting prediction. It is generally +better than a CM on most nonstationary data. + +The context for a CM or ICM is a hash of all selected contexts: a +cyclic counter (N2 = 1..255), the distance from the last occurrence +of some byte value (N2 = 1000..1255), and the masked history of the +last 64K bytes ANDED with N3, N4... For example, "c0.0.255.255.255" is +an order 3 ICM. "C0.1010.255" is an order 1 context hashed together +with the column number in a text file (distance to the last linefeed, +ASCII 10). "c256.0.255.1511.255" is a stationary grayscale 512 byte +wide image model using the two previous neighboring pixels as context. +"c0.0.511.255" is an order 1 model for LZ77, which helps compress +literal strings. The LZ77 state context applies only to byte aligned +LZ77 (type 2 or 6). + +The parameters to "i" (ISSE chain) are the initial context length and +subsequent increments for a chain connected to an existing earlier component. +For example, "ci1.1.2" specifies an ICM (order 0) followed by a chain +of 3 ISSE with orders 1, 2, and 4. An ISSE maps a context to a bit +history like an ISSE, but uses the history to select a pair of weights +to mix the input prediction with a constant 1, thus performing the +mapping q' := w1 x q + w2 in the logistic domain (q = log p/(1-p)). +The mixer is then updated by adjusting the weights to improve the +prediction. High order ISSE chains (like "x4.0ci1.1.1.1.2") and BWT +followed by a low order chain (like "x4.3ci1") both provide +excellent general purpose compression. + +A MATCH ("a") keeps a rotating history buffer and a hash table to look +up the previous occurrence of the current context hash and predicts +whatever bit came next. The parameters are: + + N1 = hash multiplier, default 24. + N2 = halve buffer size, default 0 = same size as input block. + N3 = halve hash table size, default 0 = block size / 4. + +For example, "x4.0m24.1.1" selects a 16 MiB block size, 8 MiB match +buffer size, and 2M hash table size (using 8 MiB at 4 bytes per entry). +The hash is computed as hash := hash x N1 + next_byte + 1 (mod hash table +size). Thus, N1 = 12 selects a higher order context, and N1 = 48 selects a +lower order. + +A word model ('w") is an ICM-ISSE chain of length N1 (orders 0..N1-1) +in which the contexts are whole words. A word is defined as the set +of characters in the range N2..N2+N3-1 after ANDing with N4. The context +is hashed using multiplier N5. Memory is halved by N6. The default is +"w1.65.26.223.20.0" which is a chain of length 1 (ICM only), where words +are in range 65 ('A') to 65+26-1 ('Z') after ANDing with 223 (which +converts to upper case). The hash multiplier is 20, which has the +effect of shifting the high 2 bits out of the hash. The memory usage +of each component is the same as the block size. + +A MIX ("m") performs the weighted average of all previous component +predictions. The weights are then adjusted to improve the prediction +by favoring the most accurate components. N1 selects the number of +context bits (not hashed) to select a set of weights. N2 is the +learning rate (around 16..32 works well). The default is "m8.24" +which selects the previously modeled bits of the current byte as +context. When N1 is not a multiple of 8, it selects the most significant +bits of the oldest byte. + +A SSE ("s") adjusts the previous prediction like an ISSE, but uses +a direct lookup table of the quantized and interpolated input prediction +and a direct (not hashed) N1-bit context. The adjustment is 1/count where +the count is allowed to range from N2 to 4 x N3. The default +is "s8.32.255". + +A MIX2 ("t") is a MIX but mixing only the last 2 components. The +default is "t8.24" where the meaning is the same as "m". + +For example, a good model for text is "x6.0ci1.1.1.1.2aw2mm16tst" +which selects 2^6 = 64 MiB blocks, no preprocessing, an order 0 ICM, +an ISSE chain with orders 1, 2, 3, 4, 6, a MATCH, an order 0-1 word +ICM-ISSE chain, two mixers with 0 and 1 byte contexts, whose outputs are +mixed by a MIX2. The MIX2 output is adjusted by a SSE, and finally +the SSE input and outputs are mixed again for the final bit prediction. + + +COMPRESSBLOCK + +CompressBlock() takes the same arguments as compress() except that +the input is a StringBuffer instead of a Reader. The output is always +a single block, regardless of the N1 (block size) argument in the method. + + void compressBlock(StringBuffer* in, Writer* out, const char* method, + const char* filename=0, const char* comment=0, + bool compute_sha1=false); + +A StringBuffer is both a Reader and a Writer, but also allows random +memory access. It provides convenient and efficient storage when the +input size is unknown. + + class StringBuffer: public libzpaq::Reader, public libzpaq::Writer { + public: + StringBuffer(size_t n=0); // initial allocation after first use + ~StringBuffer(); + int get(); // read 1 byte or EOF from memory + int read(char* buf, int n); // read n bytes + void put(int c); // write 1 byte to memory + void write(const char* buf, int n); // write n bytes + const char* c_str() const; // read-only access to written data + unsigned char* data(); // read-write access + size_t size() const; // number of bytes written + size_t remaining() const; // number of bytes to read until EOF + void setLimit(size_t n); // set maximum write size + void reset(); // discard contents and free memory + void resize(size_t n); // truncate to n bytes + void swap(StringBuffer& s); // exchange contents efficiently + }; + +The constructor sets the inital allocation size after the first +write to n or 128, whichever is larger. Initially, no memory is allocated. +The allocated size is always n x (2^k - 1), for example +128 x (1, 3, 7, 15, 31...). + +put() and write() append 1 or n bytes, allocating memory as needed. +buf can be NULL and the StringBuffer will be enlarged by n. +get() and read() read 1 or up to n bytes. get() returns EOF if you +attempt to read past the end of written data. read() returns less +than n if it reaches EOF first, or 0 at EOF. + +size() is the number of bytes written, which does not change when +data is read. remaining() is the number of bytes left to read +before EOF. + +c_str() provides read-only access to the data. It is not NUL terminated. +data() provides read-write access. Either may return NULL if size() +is 0. write(), put(), reset(), swap(), and the destructor may +invalidate saved pointers. + +setLimit() sets a maximum size. It will call error() if you try to +write past it. The default is -1 or no limit. + +reset() sets the size to 0 and frees memory. resize() sets the size +to n by moving the write pointer, but does not allocate or free memory. +Moving the pointer forward does not overwrite the previous contents +in between. The write pointer can be moved past the end of allocated +memory, and the next put() or write() will allocate as needed. If the +write pointer is moved back before the read pointer, then remaining() +is set to 0. + +swap() swaps 2 StringBuffers efficiently, but does not change their +initial allocations. + + +DECOMPRESSER + +decompress() will decompress any valid ZPAQ stream, which may contain +multiple blocks with multiple segments each. It will ignore filenames, +comments, and checksums. You need the Decompresser class if you want to +do something other than decompress all of the data serially to a single +file. To decompress individual blocks and segments and retrieve the +filenames, comments, data, and hashes of each segment (in exactly this +order): + + libzpaq::Decompresser d; // to decompress + libzpaq::SHA1 sha1; // to verify output hashes + double memory; // bytes required to decompress + Out filename, comment; + char sha1out[21]; + d.setInput(&in); + while (d.findBlock(&memory)) { // default is NULL + while (d.findFilename(&filename)) { // default is NULL + d.readComment(&comment); // default is NULL + d.setOutput(&out); // if omitted or NULL, discard output + d.setSHA1(&sha1); // optional + while (d.decompress(1000)); // bytes to decode, default is all + d.readSegmentEnd(sha1out); // {0} or {1,hash[20]} + if (sha1out[0]==1 && memcmp(sha1.result(), sha1out+1, 20)) + error("checksum error"); + } + } + +findBlock() scans the input for the next ZPAQ block and returns true +if found. It optionally sets memory to the approximate number of bytes +that it will allocate at the first call to decompress(). + +findFilename() finds the next segment and returns false if there are +no more in the current block. It optionally writes the saved filename. + +readComment() optionally writes the comment. It must be called +after reading the filename and before decompressing. + +setSHA1() specifies an SHA1 object for computing a hash of the segment. +It may be omitted if you do not want to compute a hash. + +decompress() decodes the requested number of bytes, postprocesses them, +and writes them to out. For the 3 built in compression levels, this +is the same as the number of bytes output, but it may be different if +postprocessing was used. It returns true until there is no more data +to decompress in the current segment. The default (-1) is to decompress the +whole segment. + +readSegmentEnd() skips any remaining data not yet decompressed in the +segment and writes 21 bytes, either a 0 if no hash was saved, +or a 1 followed by the 20 byte saved hash. If any data is skipped, +then all data in the remaining segments in the current block must +also be skipped. + + +SHA1 + +The SHA1 object computes SHA-1 cryptographic hashes. It is safe to +assume that two inputs with the same hash are identical. For example: + + libzpaq::SHA1 sha1; + int ch; + while ((ch=getchar())!=EOF) + sha1.put(ch); + printf("Size is %1.0f or %1.0f bytes\n", sha1.size(), double(sha1.usize())); + +size() returns the number of bytes read as a double, and usize() as a +64 bit integer. result() returns a pointer to the 20 byte hash and +resets the size to 0. The hash (not just the pointer) should be copied +before the next call to result() if you want to save it. You can also +call sha1.write(buffer, n) to hash n bytes of char* buffer. + + +COMPRESSOR + +A Compressor object allows greater control over the compressed data. +In particular you can specify the compression algorithm in ZPAQL to +specify methods not possible using compress() or compressBlock(). You +can create blocks with multiple segments specifying different files, +or compress streams of unlimited size to a single block when the +input size is not known. + + libzpaq::Compressor c; + for (int i=0; i 128) or 0 (c < 128). + CM s t context model with 2^s contexts, learning rate 1/4t. + ICM s indirect context model with 2^(s+6) contexts. + MATCH s b match model with 2^s context hashes and 2^b history. + AVG j k wt average components j and k with weight wt/256 for j. + MIX2 s j k r x average j and k with 2^s contexts, rate r, mask x. + MIX s j m r x average j..j+m-1 with 2^s contexts, rate r, mask x. + ISSE s j adjust prediction j using 2^(s+6) indirect contexts. + SSE s j t1 t2 adjust j using 2^s direct contexts, rate 1/t1..1/4t2. + +A CONST predicts a 1 with probability 1/(1+exp((128-c)/16)), i.e +numbers near 0 or 255 are the most confident. + +A CM maps a context to a prediction and a count. It is updated by +adjusting the prediction to reduce the error by 1/count and incrementing +the count up to 4t. + +A ICM maps a s+10 bit context hash to a bit history (8 bit state) +representing a bounded count of zeros and ones previously seen in the +context and which bit was last. The bit history is mapped to a +prediction, which is updated by reducing the error by 1/1024. +The initial prediction is estimated from the counts represented by each +bit history. + +A MATCH looks up a context hash and predicts whatever bit came next +following the previous occurrence in the history buffer. The strength +of the prediction depends on the match length. + +AVG, MIX2, and MIX perform weighted averaging of predictions in the +logistic domain (log(p/(1-p))). AVG uses a fixed weight. MIX2 and MIX +adjust the weights (selected by context) to reduce prediction error +by a rate that increases with r. The mask is AND-ed with the current +partially coded byte to compute that context. Normally it is 255. +A MIX takes a contiguous range of m components as input. + +ISSE adjusts a prediction using a bit history (as with an ICM) to +select a pair of weights for a 2 input MIX. It mixes the input +prediction with a constant 1 in the logistic domain. + +SSE adjusts a logistic prediction by quantizing it to 32 levels and +selecting a new prediction from a table indexed by context, interpolating +between the nearest two steps. The nearest prediction error is +reduced by 1/count where count increments from t1 to 4*t2. + +Contexts are computed and stored in an array H of 32 bit unsigned +integers by the HCOMP program written in ZPAQL. The program is called +after encoding a whole byte. To form a complete context, these values +are combined with the previous 0 to 7 bits of the current parital byte. +The method depends on the component type as follows: + + CM: H[i] XOR hmap4(c). + ICM, ISSE: hash table lookup of (H[i]*16+c) on nibble boundaries. + MIX2, MIX: H[i] + (c AND x). + SSE: H[i] + c. + +where c is the previous bits with a leading 1 bit (1, 1x, 1xx, ..., +1xxxxxxx where x is a previously coded bit). hmap4(c) maps c +to a 9 bit value to reduce cache misses. The first nibble is +mapped as before and the second nibble with 1xxxx in the high +5 bits. For example, after 6 bits, where c = 1xxxxxx, +hmap4(c) = 1xxxx01xx with the bits in the same order. + +There are two ZPAQL virtual machines, HCOMP to compute contexts +and PCOMP to post-process the decoded output. Each has the +following state: + + PC: 16 bit program counter. + A, B, C, D, R0...R255: 32 bit unsigned registers. + F: 1 bit condition register. + H: array of 2^h 32 bit unsigned values (output for HCOMP). + M: array of 2^m 8 bit unsigned values. + +All values are initialized to 0 at the beginning of a block +and retain their values between calls. There are two machines. +HCOMP is called after coding each byte with the value of that +byte in A. PCOMP, if present, is called once for each decoded +byte with that byte in A, and once more at the end of each +segment with 2^32 - 1 in A. + +Normally, A is an accumulator. It is the destination of all +binary operations except assignment. The low m bits of B and +C index M. The low h bits of D indexes H. We write *B, *C, *D +to refer to the elements they point to. The instruction set +is as follows, where X is A, B, C, D, *B, *C, *D except as +indicated. X may also be a constant 0...255, written with +a leading space if it appears on the right side of an operator, +e.g. "*B= 255". Instructions taking a numeric argument are 2 bytes, +otherwise 1. Arithmetic is modulo 2^32. + + X<>A Swap X with A (X cannot be A). + X++ Add 1. + X-- Subtract 1. + X! Complement bits of X. + X=0 Clear X (1 byte instruction). + X=X Assignment to left hand side. + A+=X Add to A + A-=X Subtract from A + A*=X Multipy + A/=X Divide. If X is 0 then A=0. + A%=X Mod. If X is 0 then A=0. + A&=X Clear bits of A that are 0 in X. + A&~X Clear bits of A that are 1 in X. + A|=X Set bits of A that are 1 in X. + A^=X Complement bits of A that are set in X. + A<<=X Shift A left by (X mod 32) bits. + A>>=X Shift right (zero fill) A by (X mod 32) bits. + A==X Set F=1 if equal else F=0. + AX Set F=1 if greater else F=0. + X=R N Set A,B,C,D to RN (R0...R255). + R=A N Set R0...R255 to A. + JMP N Jump N=-128...127 bytes from next instruction. + JT N Jump N=-128...127 if F is 1. + JF N Jump N=-128...127 if F is 0. + LJ N Long jump to location 0...65535 (only 3 byte instruction). + OUT Output A (PCOMP only). + HASH A=(A+*B+512)*773. + HASHD *D=(*D+A+512)*773. + HALT Return at end of program. + ERROR Fail if executed. + +Rather than using jump instructions, the following constructs are +allowed and translated appropriately. + + IF ... ENDIF Execute if F is 1. + IFNOT ... ENDIF Execute if F is 0. + IF ... ELSE ... ENDIF Execute first part if F is 1 else second part. + IFNOT ... ELSE ... ENDIF Execute first part if F is 0 else second part. + DO ... WHILE Loop while F is 1. + DO ... UNTIL Loop while F is 0. + DO ... FOREVER Loop unconditionally. + +Forward jumps (IF, IFNOT, ELSE) will not compile if beyond 127 +instructions. In that case, use the long form (IFL, IFNOTL, ELSEL). +DO loops automatically use long jumps if needed. IF and DO loops +may intersect. For example, DO ... IF ... FOREVER ENDIF is equivalent +to a while-loop. + +A config argument without a postprocessor has the following syntax: + + COMP hh hm ph pm n + i COMP args... + HCOMP + zpaql... + END (or POST 0 END for backward compatibility) + +With a postprocessor: + + COMP hh hm ph pm n + i COMP args... + HCOMP + zpaql... + PCOMP command args... ; + zpaql... + END + +In HCOMP, H and M have sizes 2^hh and 2^hm respectively. In PCOMP, +H and M have sizes 2^ph and 2^pm respectively. There are n components, +which must be numbered i = 0 to n-1. If a postprocessor is used, then +"command args..." is written to the Writer* passed as the 4'th argument, +but otherwise ignored. A typical use in a development environment might +be to call an external program that will be passed two additional +arguments on the command line, the input and output file names +respectively. + +You can pass up to 9 signed numeric arguments in args[]. In any +place that a number "N" is allowed, you can write "$M" or "$M+N" +(like "$1" or $9+25") and value args[M-1]+N will be substituted. + +ZPAQL allows (nested) comments in parenthesis. It is not case sensitive. +If there are input errors, then error() will report the error. If the +string contains newlines, it will report the line number of the error. + +ZPAQL is compiled internally into a byte code, and then to native x86 +32 or 64 bit code (unless compiled with -DNOJIT, in which case the +byte code is interpreted). You can also specify the algorithm directly +in byte code, although this is less convenient because it requires two +steps: + + c.startBlock(hcomp); // COMP and HCOMP at start of block + c.postProcess(pcomp, 0); // PCOMP right before compress() in first segment + +This is necessary because the COMP and HCOMP sections are stored in +the block header, but the PCOMP section is compressed in the first +segment after the filename and comment but before any data. + +To retrive compiled byte code in suitable format after startBlock(): + + c.hcomp(&out); // writes COMP and HCOMP sections + c.pcomp(&out); // writes PCOMP section if any + +Or during decompression: + + d.hcomp(&out); // valid after findBlock() + d.pcomp(&out); // valid after decompress(0) in first segment + +Both versions of pcomp() write nothing and return false if there is no +PCOMP section. The output of hcomp() and pcomp() may be passed to the +input of startBlock() and postProcess(). These are strings in which the +first 2 bytes encode the length of the rest of the string, least +significant byte first. Alternatively, postProcess() allows the length to +be omitted and passed separately as the second argument. In the case +of decompression, the HCOMP and PCOMP strings are read from the archive. +The preprocessor command (from "PCOMP cmd ;") is not saved in the compressed +data. + + +ARRAY + +The libzpaq::Array template class is convenient for creating arrays aligned +on 64 byte addresses. It calls error("Out of memory") if needed. +It is used as follows: + + libzpaq::Array a(n); // array a[0]..a[n-1] of type T, zeroed + a.resize(n); // change size and zero contents + a[i] // i'th element + a(i) // a[i%n], valid only if n is a power of 2 + a.size() // n (as a size_t) + a.isize() // n (as a signed int) + +T should be a simple type without constructors or destructors. Arrays +cannot be copied or assigned. You can also specify the size: + + Array a(n, e); // n << e + a.resize(n, e); // n << e + +which is equivalent to n << e except that it calls error("Array too big") +rather than overflow if n << e would require more than 32 bits. If +compiled with -DDEBUG, then bounds are checked at run time. + + +ENCRYPTION + +There is a class libzpaq::SHA256 with put(), result(), size(), and usize() +as in SHA1. result() returns a 32 byte SHA-256 hash. It is used by scrypt. + +The libzpaq::AES_CTR class allows encryption in CTR mode with 128, 192, +or 256 bit keys. The public members are: + +class AES_CTR { +public: + AES_CTR(const char* key, int keylen, char* iv=0); + void encrypt(U32 s0, U32 s1, U32 s2, U32 s3, unsigned char* ct); + void encrypt(char* buf, int n, U64 offset); +}; + +The constructor initializes with a 16, 24, or 32 byte key. The length +is given by keylen. iv can be an 8 byte string or NULL. If not NULL +then iv0, iv1 are initialized with iv[0..7] in big-endian order, else 0. + +encrypt(s0, s1, s2, s3, ct) encrypts a plaintext block divided into +4 32-bit words MSB first. The first byte of plaintext is the high 8 +bits of s0. The output is to ct[16]. + +encrypt(buf, n, offset) encrypts or decrypts an n byte slice of a string +starting at offset. The i'th 16 byte block is encrypted by XOR with +the result (in ct) of encrypt(iv0, iv1, i>>32, i&0xffffffff, ct) starting +with i = 0. For example: + + AES_CTR a("a 128 bit key!!!", 16); + char buf[500]; // some data + a.encrypt(buf, 100, 0); // encrypt first 100 bytes + a.encrypt(buf, 400, 100); // encrypt next 400 bytes + a.encrypt(buf, 500, 0); // decrypt in one step + +libzpaq::stretchKey(char* out, const char* in, const char* salt); + +Generate a 32 byte key out[0..31] from key[0..31] and salt[0..31] +using scrypt(key, salt, N=16384, r=8, p=1). key[0..31] should be +the SHA-256 hash of the password. With these parameters, the function +uses 0.1 to 0.3 seconds and 16 MiB memory. +Scrypt is defined in http://www.tarsnap.com/scrypt/scrypt.pdf + +void random(char* buf, int n); + +Puts n cryptographic random bytes in buf[0..n-1], where the first +byte is never '7' or 'z' (start of a ZPAQ archive). For a pure +random string, discard the first byte. + +Other classes and functions defined here are for internal use. +Use at your own risk. */ +////////////////////////////////////////////////////////////// + #ifndef LIBZPAQ_H #define LIBZPAQ_H @@ -29,9 +836,10 @@ comprises the reference decoder for the ZPAQ level 2 standard. #define NDEBUG 1 #endif #include -#include #include +#include #include +#include namespace libzpaq { @@ -41,9 +849,10 @@ typedef uint16_t U16; typedef uint32_t U32; typedef uint64_t U64; -// Standard library prototypes redirected to libzpaq.cpp -void* calloc(size_t, size_t); -void free(void*); +// Tables for parsing ZPAQL source code +extern const char* compname[256]; // list of ZPAQL component types +extern const int compsize[256]; // number of bytes to encode a component +extern const char* opcodelist[272]; // list of ZPAQL instructions // Callback for error handling extern void error(const char* msg); @@ -56,6 +865,10 @@ class Reader { public: virtual int get() = 0; // should return 0..255, or -1 at EOF virtual int read(char* buf, int n); // read to buf[n], return no. read +/* TODO + * need to show progress + virtual void show_progress(int bytes_in); // lrzip addition to show progress +*/ virtual ~Reader() {} }; @@ -104,15 +917,16 @@ void Array::resize(size_t sz, int ex) { if (n>0) { assert(offset>0 && offset<=64); assert((char*)data-offset); - free((char*)data-offset); + ::free((char*)data-offset); } n=0; + offset=0; if (sz==0) return; n=sz; const size_t nb=128+n*sizeof(T); // test for overflow - if (nb<=128 || (nb-128)/sizeof(T)!=n) error("Array too big"); - data=(T*)calloc(nb, 1); - if (!data) error("Out of memory"); + if (nb<=128 || (nb-128)/sizeof(T)!=n) n=0, error("Array too big"); + data=(T*)::calloc(nb, 1); + if (!data) n=0, error("Out of memory"); offset=64-(((char*)data-(char*)0)&63); assert(offset>0 && offset<=64); data=(T*)((char*)data+offset); @@ -124,29 +938,92 @@ void Array::resize(size_t sz, int ex) { class SHA1 { public: void put(int c) { // hash 1 byte - U32& r=w[len0>>5&15]; + U32& r=w[U32(len)>>5&15]; + r=(r<<8)|(c&255); + len+=8; + if ((U32(len)&511)==0) process(); + } + void write(const char* buf, int64_t n); // hash buf[0..n-1] + double size() const {return len/8;} // size in bytes + uint64_t usize() const {return len/8;} // size in bytes + const char* result(); // get hash and reset + SHA1() {init();} +private: + void init(); // reset, but don't clear hbuf + U64 len; // length in bits + U32 h[5]; // hash state + U32 w[16]; // input buffer + char hbuf[20]; // result + void process(); // hash 1 block +}; + +//////////////////////////// SHA256 ////////////////////////// + +// For computing SHA-256 checksums +// http://en.wikipedia.org/wiki/SHA-2 +class SHA256 { +public: + void put(int c) { // hash 1 byte + unsigned& r=w[len0>>5&15]; r=(r<<8)|(c&255); if (!(len0+=8)) ++len1; if ((len0&511)==0) process(); } double size() const {return len0/8+len1*536870912.0;} // size in bytes - uint64_t usize() const {return len0/8+(U64(len1)<<29);} // size in bytes + uint64_t usize() const {return len0/8+(U64(len1)<<29);} //size in bytes const char* result(); // get hash and reset - SHA1() {init();} + SHA256() {init();} private: - void init(); // reset, but don't clear hbuf - U32 len0, len1; // length in bits (low, high) - U32 h[5]; // hash state - U32 w[80]; // input buffer - char hbuf[20]; // result - void process(); // hash 1 block + void init(); // reset, but don't clear hbuf + unsigned len0, len1; // length in bits (low, high) + unsigned s[8]; // hash state + unsigned w[16]; // input buffer + char hbuf[32]; // result + void process(); // hash 1 block }; +//////////////////////////// AES ///////////////////////////// + +// For encrypting with AES in CTR mode. +// The i'th 16 byte block is encrypted by XOR with AES(i) +// (i is big endian or MSB first, starting with 0). +class AES_CTR { + U32 Te0[256], Te1[256], Te2[256], Te3[256], Te4[256]; // encryption tables + U32 ek[60]; // round key + int Nr; // number of rounds (10, 12, 14 for AES 128, 192, 256) + U32 iv0, iv1; // first 8 bytes in CTR mode +public: + AES_CTR(const char* key, int keylen, const char* iv=0); + // Schedule: keylen is 16, 24, or 32, iv is 8 bytes or NULL + void encrypt(U32 s0, U32 s1, U32 s2, U32 s3, unsigned char* ct); + void encrypt(char* buf, int n, U64 offset); // encrypt n bytes of buf +}; + +//////////////////////////// stretchKey ////////////////////// + +// Strengthen password pw[0..pwlen-1] and salt[0..saltlen-1] +// to produce key buf[0..buflen-1]. Uses O(n*r*p) time and 128*r*n bytes +// of memory. n must be a power of 2 and r <= 8. +void scrypt(const char* pw, int pwlen, + const char* salt, int saltlen, + int n, int r, int p, char* buf, int buflen); + +// Generate a strong key out[0..31] key[0..31] and salt[0..31]. +// Calls scrypt(key, 32, salt, 32, 16384, 8, 1, out, 32); +void stretchKey(char* out, const char* key, const char* salt); + +//////////////////////////// random ////////////////////////// + +// Fill buf[0..n-1] with n cryptographic random bytes. The first +// byte is never '7' or 'z'. +void random(char* buf, int n); + //////////////////////////// ZPAQL /////////////////////////// // Symbolic constants, instruction size, and names typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE} CompType; extern const int compsize[256]; +class Decoder; // forward // A ZPAQL machine COMP+HCOMP or PCOMP. class ZPAQL { @@ -167,8 +1044,8 @@ public: U32 H(int i) {return h(i);} // get element of h void flush(); // write outbuf[0..bufptr-1] to output and sha1 - void outc(int c) { // output byte c (0..255) or -1 at EOS - if (c<0 || (outbuf[bufptr]=c, ++bufptr==outbuf.isize())) flush(); + void outc(int ch) { // output byte ch (0..255) or -1 at EOS + if (ch<0 || (outbuf[bufptr]=ch, ++bufptr==outbuf.isize())) flush(); } // ZPAQ1 block header @@ -192,8 +1069,8 @@ private: // Support code int assemble(); // put JIT code in rcode void init(int hbits, int mbits); // initialize H and M sizes - int execute(); // execute 1 instruction, return 0 after HALT, else 1 - void run0(U32 input); // default run() when select==0 + int execute(); // interpret 1 instruction, return 0 after HALT, else 1 + void run0(U32 input); // default run() if not JIT void div(U32 x) {if (x) a/=x; else a=0;} void mod(U32 x) {if (x) a%=x; else a=0;} void swap(U32& x) {a^=x; x^=a; a^=x;} @@ -221,12 +1098,8 @@ struct Component { ////////////////////////// StateTable //////////////////////// -// Next state table generator +// Next state table class StateTable { - enum {N=64}; // sizes of b, t - int num_states(int n0, int n1); // compute t[n0][n1][1] - void discount(int& n0); // set new value of n0 after 1 or n1 after 0 - void next_state(int& n0, int& n1, int y); // new (n0,n1) after bit y public: U8 ns[1024]; // state*4 -> next state if 0, if 1, n0, n1 int next(int state, int y) { // next state for bit y @@ -265,6 +1138,7 @@ private: U32 h[256]; // unrolled copy of z.h ZPAQL& z; // VM to compute context hashes, includes H, n Component comp[256]; // the model, includes P + bool initTables; // are tables initialized? // Modeling support functions int predict0(); // default @@ -288,12 +1162,14 @@ private: // x -> floor(32768/(1+exp(-x/64))) int squash(int x) { + assert(initTables); assert(x>=-2048 && x<=2047); return squasht[x+2048]; } // x -> round(64*log((x+0.5)/(32767.5-x))), approx inverse of squash int stretch(int x) { + assert(initTables); assert(x>=0 && x<=32767); return stretcht[x]; } @@ -322,7 +1198,7 @@ private: //////////////////////////// Decoder ///////////////////////// // Decoder decompresses using an arithmetic code -class Decoder { +class Decoder: public Reader { public: Reader* in; // destination Decoder(ZPAQL& z); @@ -330,16 +1206,23 @@ public: int skip(); // skip to the end of the segment, return next byte void init(); // initialize at start of block int stat(int x) {return pr.stat(x);} + int get() { // return 1 byte of buffered input or EOF + if (rpos==wpos) { + rpos=0; + wpos=in ? in->read(&buf[0], BUFSIZE) : 0; + assert(wpos<=BUFSIZE); + } + return rpos buf; // input buffer of size BUFSIZE bytes - // of unmodeled data. buf[low..high-1] is input with curr - // remaining in sub-block. int decode(int p); // return decoded bit (0..1) with prob. p (0..65535) - void loadbuf(); // read unmodeled data into buf to EOS }; /////////////////////////// PostProcessor //////////////////// @@ -375,6 +1258,7 @@ public: bool pcomp(Writer* out2) {return pp.z.write(out2, true);} void readSegmentEnd(char* sha1string = 0); int stat(int x) {return dec.stat(x);} + int buffered() {return dec.buffered();} private: ZPAQL z; Decoder dec; @@ -387,17 +1271,12 @@ private: void decompress(Reader* in, Writer* out); -////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////// - -// Code following this point is not a part of the ZPAQ level 2 standard. - //////////////////////////// Encoder ///////////////////////// // Encoder compresses using an arithmetic code class Encoder { public: - Encoder(ZPAQL& z): + Encoder(ZPAQL& z, int size=0): out(0), low(1), high(0xFFFFFFFF), pr(z) {} void init(); void compress(int c); // c is 0..255 or EOF @@ -410,37 +1289,231 @@ private: void encode(int y, int p); // encode bit y (0..1) with prob. p (0..65535) }; +//////////////////////////// Compiler //////////////////////// + +// Input ZPAQL source code with args and store the compiled code +// in hz and pz and write pcomp_cmd to out2. + +class Compiler { +public: + Compiler(const char* in, int* args, ZPAQL& hz, ZPAQL& pz, Writer* out2); +private: + const char* in; // ZPAQL source code + int* args; // Array of up to 9 args, default NULL = all 0 + ZPAQL& hz; // Output of COMP and HCOMP sections + ZPAQL& pz; // Output of PCOMP section + Writer* out2; // Output ... of "PCOMP ... ;" + int line; // Input line number for reporting errors + int state; // parse state: 0=space -1=word >0 (nest level) + + // Symbolic constants + typedef enum {NONE,CONS,CM,ICM,MATCH,AVG,MIX2,MIX,ISSE,SSE, + JT=39,JF=47,JMP=63,LJ=255, + POST=256,PCOMP,END,IF,IFNOT,ELSE,ENDIF,DO, + WHILE,UNTIL,FOREVER,IFL,IFNOTL,ELSEL,SEMICOLON} CompType; + + void syntaxError(const char* msg, const char* expected=0); // error() + void next(); // advance in to next token + bool matchToken(const char* tok);// in==token? + int rtoken(int low, int high); // return token which must be in range + int rtoken(const char* list[]); // return token by position in list + void rtoken(const char* s); // return token which must be s + int compile_comp(ZPAQL& z); // compile either HCOMP or PCOMP + + // Stack of n elements + class Stack { + libzpaq::Array s; + size_t top; + public: + Stack(int n): s(n), top(0) {} + void push(const U16& x) { + if (top>=s.size()) error("IF or DO nested too deep"); + s[top++]=x; + } + U16 pop() { + if (top<=0) error("unmatched IF or DO"); + return s[--top]; + } + }; + + Stack if_stack, do_stack; +}; + //////////////////////// Compressor ////////////////////////// class Compressor { public: - Compressor(): enc(z), in(0), state(INIT) {} + Compressor(): enc(z), in(0), state(INIT), verify(false) {} void setOutput(Writer* out) {enc.out=out;} void writeTag(); void startBlock(int level); // level=1,2,3 - void startBlock(const char* hcomp); + void startBlock(const char* hcomp); // ZPAQL byte code + void startBlock(const char* config, // ZPAQL source code + int* args, // NULL or int[9] arguments + Writer* pcomp_cmd = 0); // retrieve preprocessor command + void setVerify(bool v) {verify = v;} // check postprocessing? + void hcomp(Writer* out2) {z.write(out2, false);} + bool pcomp(Writer* out2) {return pz.write(out2, true);} void startSegment(const char* filename = 0, const char* comment = 0); void setInput(Reader* i) {in=i;} - void postProcess(const char* pcomp = 0, int len = 0); + void postProcess(const char* pcomp = 0, int len = 0); // byte code bool compress(int n = -1); // n bytes, -1=all, return true until done void endSegment(const char* sha1string = 0); + char* endSegmentChecksum(int64_t* size = 0, bool dosha1=true); + int64_t getSize() {return sha1.usize();} + const char* getChecksum() {return sha1.result();} void endBlock(); int stat(int x) {return enc.stat(x);} private: - ZPAQL z; - Encoder enc; - Reader* in; + ZPAQL z, pz; // model and test postprocessor + Encoder enc; // arithmetic encoder containing predictor + SHA1 sha1; // to test pz output + Reader* in; // input source + char sha1result[20]; // sha1 output enum {INIT, BLOCK1, SEG1, BLOCK2, SEG2} state; + bool verify; // if true then test by postprocessing +}; + +/////////////////////////// StringBuffer ///////////////////// + +// For (de)compressing to/from a string. Writing appends bytes +// which can be later read. +class StringBuffer: public libzpaq::Reader, public libzpaq::Writer { + unsigned char* p; // allocated memory, not NUL terminated, may be NULL + size_t al; // number of bytes allocated, 0 iff p is NULL + size_t wpos; // index of next byte to write, wpos <= al + size_t rpos; // index of next byte to read, rpos < wpos or return EOF. + size_t limit; // max size, default = -1 + const size_t init; // initial size on first use after reset + + // Increase capacity to a without changing size + void reserve(size_t a) { + assert(!al==!p); + if (a<=al) return; + unsigned char* q=0; + if (a>0) q=(unsigned char*)(p ? realloc(p, a) : malloc(a)); + if (a>0 && !q) error("Out of memory"); + p=q; + al=a; + } + + // Enlarge al to make room to write at least n bytes. + void lengthen(size_t n) { + assert(wpos<=al); + if (wpos+n>limit || wpos+n=a) a=a*2+init; + reserve(a); + } + + // No assignment or copy + void operator=(const StringBuffer&); + StringBuffer(const StringBuffer&); + +public: + + // Direct access to data + unsigned char* data() {assert(p || wpos==0); return p;} + + // Allocate no memory initially + StringBuffer(size_t n=0): + p(0), al(0), wpos(0), rpos(0), limit(size_t(-1)), init(n>128?n:128) {} + + // Set output limit + void setLimit(size_t n) {limit=n;} + + // Free memory + ~StringBuffer() {if (p) free(p);} + + // Return number of bytes written. + size_t size() const {return wpos;} + + // Return number of bytes left to read + size_t remaining() const {return wpos-rpos;} + + // Reset size to 0 and free memory. + void reset() { + if (p) free(p); + p=0; + al=rpos=wpos=0; + } + + // Write a single byte. + void put(int c) { // write 1 byte + lengthen(1); + assert(p); + assert(wposwpos) n=wpos-rpos; + if (n>0 && buf) memcpy(buf, p+rpos, n); + rpos+=n; + return n; + } + + // Return the entire string as a read-only array. + const char* c_str() const {return (const char*)p;} + + // Truncate the string to size i. + void resize(size_t i) { + wpos=i; + if (rpos>wpos) rpos=wpos; + } + + // Swap efficiently (init is not swapped) + void swap(StringBuffer& s) { + std::swap(p, s.p); + std::swap(al, s.al); + std::swap(wpos, s.wpos); + std::swap(rpos, s.rpos); + std::swap(limit, s.limit); + } }; /////////////////////////// compress() /////////////////////// -void compress(Reader* in, Writer* out, int level); +// Compress in to out in multiple blocks. Default method is "14,128,0" +// Default filename is "". Comment is appended to input size. +// dosha1 means save the SHA-1 checksum. +void compress(Reader* in, Writer* out, const char* method, + const char* filename=0, const char* comment=0, bool dosha1=true); + +// Same as compress() but output is 1 block, ignoring block size parameter. +void compressBlock(StringBuffer* in, Writer* out, const char* method, + const char* filename=0, const char* comment=0, bool dosha1=true); } // namespace libzpaq /////////////////////////// lrzip functions ////////////////// + #include #ifndef uchar #define uchar unsigned char @@ -463,8 +1536,8 @@ struct bufRead: public libzpaq::Reader { bufRead(uchar *buf_, i64 *n_, i64 total_len_, int *last_pct_, bool progress_, long thread_, FILE *msgout_): s_buf(buf_), s_len(n_), total_len(total_len_), last_pct(last_pct_), progress(progress_), thread(thread_), msgout(msgout_) {} - int get() { - if (progress && !(*s_len % 128)) { + void show_progress (int bytes_in) { + if (progress) { int pct = (total_len > 0) ? (total_len - *s_len) * 100 / total_len : 100; @@ -480,9 +1553,13 @@ struct bufRead: public libzpaq::Reader { *last_pct = pct; } } + } + int get() { if (likely(*s_len > 0)) { (*s_len)--; + if (!(*s_len % 128)) + show_progress(*s_len); return ((int)(uchar)*s_buf++); } return -1; @@ -520,11 +1597,14 @@ extern "C" void zpaq_compress(uchar *c_buf, i64 *c_len, uchar *s_buf, i64 s_len, { i64 total_len = s_len; int last_pct = 100; + char method[]="041280"; // defaults for buffer size, compressibility, data type bufRead bufR(s_buf, &s_len, total_len, &last_pct, progress, thread, msgout); bufWrite bufW(c_buf, c_len); - compress (&bufR, &bufW, level); + method[0]=(char)level+'0'; // set level 1-5 + + compress (&bufR, &bufW, (const char *) &method, NULL, NULL, true); } extern "C" void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_len, @@ -539,4 +1619,9 @@ extern "C" void zpaq_decompress(uchar *s_buf, i64 *d_len, uchar *c_buf, i64 c_le decompress(&bufR, &bufW); } +void libzpaq::error(const char* msg) { // print message and exit + fprintf(stderr, "ZPAQ Error: %s\n", msg); + exit(1); +} + #endif // LIBZPAQ_H diff --git a/stream.c b/stream.c index 407c550..492381d 100644 --- a/stream.c +++ b/stream.c @@ -181,7 +181,8 @@ static int zpaq_compress_buf(rzip_control *control, struct compress_thread *cthr c_len = 0; - zpaq_compress(c_buf, &c_len, cthread->s_buf, cthread->s_len, control->compression_level / 4 + 1, + /* Compression level can be 1 to 5, zpaq version 7.15 */ + zpaq_compress(c_buf, &c_len, cthread->s_buf, cthread->s_len, control->compression_level / 2 + 1, control->msgout, SHOW_PROGRESS ? true: false, thread); if (unlikely(c_len >= cthread->c_len)) {