This commit is contained in:
Igor Pavlov 2021-12-27 00:00:00 +00:00
parent 98e06a519b
commit f19f813537
1244 changed files with 325856 additions and 2 deletions

100
Asm/arm/7zCrcOpt.asm Normal file
View file

@ -0,0 +1,100 @@
CODE32
EXPORT |CrcUpdateT4@16|
AREA |.text|, CODE, ARM
MACRO
CRC32_STEP_1
ldrb r4, [r1], #1
subs r2, r2, #1
eor r4, r4, r0
and r4, r4, #0xFF
ldr r4, [r3, +r4, lsl #2]
eor r0, r4, r0, lsr #8
MEND
MACRO
CRC32_STEP_4 $STREAM_WORD
eor r7, r7, r8
eor r7, r7, r9
eor r0, r0, r7
eor r0, r0, $STREAM_WORD
ldr $STREAM_WORD, [r1], #4
and r7, r0, #0xFF
and r8, r0, #0xFF00
and r9, r0, #0xFF0000
and r0, r0, #0xFF000000
ldr r7, [r6, +r7, lsl #2]
ldr r8, [r5, +r8, lsr #6]
ldr r9, [r4, +r9, lsr #14]
ldr r0, [r3, +r0, lsr #22]
MEND
|CrcUpdateT4@16| PROC
stmdb sp!, {r4-r11, lr}
cmp r2, #0
beq |$fin|
|$v1|
tst r1, #7
beq |$v2|
CRC32_STEP_1
bne |$v1|
|$v2|
cmp r2, #16
blo |$v3|
ldr r10, [r1], #4
ldr r11, [r1], #4
add r4, r3, #0x400
add r5, r3, #0x800
add r6, r3, #0xC00
mov r7, #0
mov r8, #0
mov r9, #0
sub r2, r2, #16
|$loop|
; pld [r1, #0x40]
CRC32_STEP_4 r10
CRC32_STEP_4 r11
subs r2, r2, #8
bhs |$loop|
sub r1, r1, #8
add r2, r2, #16
eor r7, r7, r8
eor r7, r7, r9
eor r0, r0, r7
|$v3|
cmp r2, #0
beq |$fin|
|$v4|
CRC32_STEP_1
bne |$v4|
|$fin|
ldmia sp!, {r4-r11, pc}
|CrcUpdateT4@16| ENDP
END

181
Asm/arm64/7zAsm.S Normal file
View file

@ -0,0 +1,181 @@
// 7zAsm.S -- ASM macros for arm64
// 2021-04-25 : Igor Pavlov : Public domain
#define r0 x0
#define r1 x1
#define r2 x2
#define r3 x3
#define r4 x4
#define r5 x5
#define r6 x6
#define r7 x7
#define r8 x8
#define r9 x9
#define r10 x10
#define r11 x11
#define r12 x12
#define r13 x13
#define r14 x14
#define r15 x15
#define r16 x16
#define r17 x17
#define r18 x18
#define r19 x19
#define r20 x20
#define r21 x21
#define r22 x22
#define r23 x23
#define r24 x24
#define r25 x25
#define r26 x26
#define r27 x27
#define r28 x28
#define r29 x29
#define r30 x30
#define REG_ABI_PARAM_0 r0
#define REG_ABI_PARAM_1 r1
#define REG_ABI_PARAM_2 r2
.macro p2_add reg:req, param:req
add \reg, \reg, \param
.endm
.macro p2_sub reg:req, param:req
sub \reg, \reg, \param
.endm
.macro p2_sub_s reg:req, param:req
subs \reg, \reg, \param
.endm
.macro p2_and reg:req, param:req
and \reg, \reg, \param
.endm
.macro xor reg:req, param:req
eor \reg, \reg, \param
.endm
.macro or reg:req, param:req
orr \reg, \reg, \param
.endm
.macro shl reg:req, param:req
lsl \reg, \reg, \param
.endm
.macro shr reg:req, param:req
lsr \reg, \reg, \param
.endm
.macro sar reg:req, param:req
asr \reg, \reg, \param
.endm
.macro p1_neg reg:req
neg \reg, \reg
.endm
.macro dec reg:req
sub \reg, \reg, 1
.endm
.macro dec_s reg:req
subs \reg, \reg, 1
.endm
.macro inc reg:req
add \reg, \reg, 1
.endm
.macro inc_s reg:req
adds \reg, \reg, 1
.endm
.macro imul reg:req, param:req
mul \reg, \reg, \param
.endm
/*
arm64 and arm use reverted c flag after subs/cmp instructions:
arm64-arm : x86
b.lo / b.cc : jb / jc
b.hs / b.cs : jae / jnc
*/
.macro jmp lab:req
b \lab
.endm
.macro je lab:req
b.eq \lab
.endm
.macro jz lab:req
b.eq \lab
.endm
.macro jnz lab:req
b.ne \lab
.endm
.macro jne lab:req
b.ne \lab
.endm
.macro jb lab:req
b.lo \lab
.endm
.macro jbe lab:req
b.ls \lab
.endm
.macro ja lab:req
b.hi \lab
.endm
.macro jae lab:req
b.hs \lab
.endm
.macro cmove dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, eq
.endm
.macro cmovne dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, ne
.endm
.macro cmovs dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, mi
.endm
.macro cmovns dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, pl
.endm
.macro cmovb dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, lo
.endm
.macro cmovae dest:req, srcTrue:req
csel \dest, \srcTrue, \dest, hs
.endm
.macro MY_ALIGN_16 macro
.p2align 4,, (1 << 4) - 1
.endm
.macro MY_ALIGN_32 macro
.p2align 5,, (1 << 5) - 1
.endm
.macro MY_ALIGN_64 macro
.p2align 6,, (1 << 6) - 1
.endm

1487
Asm/arm64/LzmaDecOpt.S Normal file

File diff suppressed because it is too large Load diff

284
Asm/x86/7zAsm.asm Normal file
View file

@ -0,0 +1,284 @@
; 7zAsm.asm -- ASM macros
; 2021-12-25 : Igor Pavlov : Public domain
ifdef @wordsize
; @wordsize is defined only in JWASM and ASMC and is not defined in MASM
; @wordsize eq 8 for 64-bit x64
; @wordsize eq 2 for 32-bit x86
if @wordsize eq 8
x64 equ 1
endif
else
ifdef RAX
x64 equ 1
endif
endif
ifdef x64
IS_X64 equ 1
else
IS_X64 equ 0
endif
ifdef ABI_LINUX
IS_LINUX equ 1
else
IS_LINUX equ 0
endif
ifndef x64
; Use ABI_CDECL for x86 (32-bit) only
; if ABI_CDECL is not defined, we use fastcall abi
ifdef ABI_CDECL
IS_CDECL equ 1
else
IS_CDECL equ 0
endif
endif
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
MY_ASM_START macro
ifdef x64
.code
else
.386
.model flat
_TEXT$00 SEGMENT PARA PUBLIC 'CODE'
endif
endm
MY_PROC macro name:req, numParams:req
align 16
proc_numParams = numParams
if (IS_X64 gt 0)
proc_name equ name
elseif (IS_LINUX gt 0)
proc_name equ name
elseif (IS_CDECL gt 0)
proc_name equ @CatStr(_,name)
else
proc_name equ @CatStr(@,name,@, %numParams * 4)
endif
proc_name PROC
endm
MY_ENDP macro
if (IS_X64 gt 0)
ret
elseif (IS_CDECL gt 0)
ret
elseif (proc_numParams LT 3)
ret
else
ret (proc_numParams - 2) * 4
endif
proc_name ENDP
endm
ifdef x64
REG_SIZE equ 8
REG_LOGAR_SIZE equ 3
else
REG_SIZE equ 4
REG_LOGAR_SIZE equ 2
endif
x0 equ EAX
x1 equ ECX
x2 equ EDX
x3 equ EBX
x4 equ ESP
x5 equ EBP
x6 equ ESI
x7 equ EDI
x0_W equ AX
x1_W equ CX
x2_W equ DX
x3_W equ BX
x5_W equ BP
x6_W equ SI
x7_W equ DI
x0_L equ AL
x1_L equ CL
x2_L equ DL
x3_L equ BL
x0_H equ AH
x1_H equ CH
x2_H equ DH
x3_H equ BH
ifdef x64
x5_L equ BPL
x6_L equ SIL
x7_L equ DIL
r0 equ RAX
r1 equ RCX
r2 equ RDX
r3 equ RBX
r4 equ RSP
r5 equ RBP
r6 equ RSI
r7 equ RDI
x8 equ r8d
x9 equ r9d
x10 equ r10d
x11 equ r11d
x12 equ r12d
x13 equ r13d
x14 equ r14d
x15 equ r15d
else
r0 equ x0
r1 equ x1
r2 equ x2
r3 equ x3
r4 equ x4
r5 equ x5
r6 equ x6
r7 equ x7
endif
ifdef x64
ifdef ABI_LINUX
MY_PUSH_2_REGS macro
push r3
push r5
endm
MY_POP_2_REGS macro
pop r5
pop r3
endm
endif
endif
MY_PUSH_4_REGS macro
push r3
push r5
push r6
push r7
endm
MY_POP_4_REGS macro
pop r7
pop r6
pop r5
pop r3
endm
; for fastcall and for WIN-x64
REG_PARAM_0_x equ x1
REG_PARAM_0 equ r1
REG_PARAM_1_x equ x2
REG_PARAM_1 equ r2
ifndef x64
; for x86-fastcall
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
REG_ABI_PARAM_0 equ REG_PARAM_0
REG_ABI_PARAM_1_x equ REG_PARAM_1_x
REG_ABI_PARAM_1 equ REG_PARAM_1
else
; x64
if (IS_LINUX eq 0)
; for WIN-x64:
REG_PARAM_2_x equ x8
REG_PARAM_2 equ r8
REG_PARAM_3 equ r9
REG_ABI_PARAM_0_x equ REG_PARAM_0_x
REG_ABI_PARAM_0 equ REG_PARAM_0
REG_ABI_PARAM_1_x equ REG_PARAM_1_x
REG_ABI_PARAM_1 equ REG_PARAM_1
REG_ABI_PARAM_2_x equ REG_PARAM_2_x
REG_ABI_PARAM_2 equ REG_PARAM_2
REG_ABI_PARAM_3 equ REG_PARAM_3
else
; for LINUX-x64:
REG_LINUX_PARAM_0_x equ x7
REG_LINUX_PARAM_0 equ r7
REG_LINUX_PARAM_1_x equ x6
REG_LINUX_PARAM_1 equ r6
REG_LINUX_PARAM_2 equ r2
REG_LINUX_PARAM_3 equ r1
REG_LINUX_PARAM_4_x equ x8
REG_LINUX_PARAM_4 equ r8
REG_LINUX_PARAM_5 equ r9
REG_ABI_PARAM_0_x equ REG_LINUX_PARAM_0_x
REG_ABI_PARAM_0 equ REG_LINUX_PARAM_0
REG_ABI_PARAM_1_x equ REG_LINUX_PARAM_1_x
REG_ABI_PARAM_1 equ REG_LINUX_PARAM_1
REG_ABI_PARAM_2 equ REG_LINUX_PARAM_2
REG_ABI_PARAM_3 equ REG_LINUX_PARAM_3
REG_ABI_PARAM_4_x equ REG_LINUX_PARAM_4_x
REG_ABI_PARAM_4 equ REG_LINUX_PARAM_4
REG_ABI_PARAM_5 equ REG_LINUX_PARAM_5
MY_ABI_LINUX_TO_WIN_2 macro
mov r2, r6
mov r1, r7
endm
MY_ABI_LINUX_TO_WIN_3 macro
mov r8, r2
mov r2, r6
mov r1, r7
endm
MY_ABI_LINUX_TO_WIN_4 macro
mov r9, r1
mov r8, r2
mov r2, r6
mov r1, r7
endm
endif ; IS_LINUX
MY_PUSH_PRESERVED_ABI_REGS macro
if (IS_LINUX gt 0)
MY_PUSH_2_REGS
else
MY_PUSH_4_REGS
endif
push r12
push r13
push r14
push r15
endm
MY_POP_PRESERVED_ABI_REGS macro
pop r15
pop r14
pop r13
pop r12
if (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
endif ; x64

180
Asm/x86/7zCrcOpt.asm Normal file
View file

@ -0,0 +1,180 @@
; 7zCrcOpt.asm -- CRC32 calculation : optimized version
; 2021-02-07 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
rD equ r2
rN equ r7
rT equ r5
ifdef x64
num_VAR equ r8
table_VAR equ r9
else
if (IS_CDECL gt 0)
crc_OFFS equ (REG_SIZE * 5)
data_OFFS equ (REG_SIZE + crc_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
else
size_OFFS equ (REG_SIZE * 5)
endif
table_OFFS equ (REG_SIZE + size_OFFS)
num_VAR equ [r4 + size_OFFS]
table_VAR equ [r4 + table_OFFS]
endif
SRCDAT equ rD + rN * 1 + 4 *
CRC macro op:req, dest:req, src:req, t:req
op dest, DWORD PTR [rT + src * 4 + 0400h * t]
endm
CRC_XOR macro dest:req, src:req, t:req
CRC xor, dest, src, t
endm
CRC_MOV macro dest:req, src:req, t:req
CRC mov, dest, src, t
endm
CRC1b macro
movzx x6, BYTE PTR [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shr x0, 8
CRC xor, x0, r6, 0
dec rN
endm
MY_PROLOG macro crc_end:req
ifdef x64
if (IS_LINUX gt 0)
MY_PUSH_2_REGS
mov x0, REG_ABI_PARAM_0_x ; x0 = x7
mov rT, REG_ABI_PARAM_3 ; r5 = r1
mov rN, REG_ABI_PARAM_2 ; r7 = r2
mov rD, REG_ABI_PARAM_1 ; r2 = r6
else
MY_PUSH_4_REGS
mov x0, REG_ABI_PARAM_0_x ; x0 = x1
mov rT, REG_ABI_PARAM_3 ; r5 = r9
mov rN, REG_ABI_PARAM_2 ; r7 = r8
; mov rD, REG_ABI_PARAM_1 ; r2 = r2
endif
else
MY_PUSH_4_REGS
if (IS_CDECL gt 0)
mov x0, [r4 + crc_OFFS]
mov rD, [r4 + data_OFFS]
else
mov x0, REG_ABI_PARAM_0_x
endif
mov rN, num_VAR
mov rT, table_VAR
endif
test rN, rN
jz crc_end
@@:
test rD, 7
jz @F
CRC1b
jnz @B
@@:
cmp rN, 16
jb crc_end
add rN, rD
mov num_VAR, rN
sub rN, 8
and rN, NOT 7
sub rD, rN
xor x0, [SRCDAT 0]
endm
MY_EPILOG macro crc_end:req
xor x0, [SRCDAT 0]
mov rD, rN
mov rN, num_VAR
sub rN, rD
crc_end:
test rN, rN
jz @F
CRC1b
jmp crc_end
@@:
if (IS_X64 gt 0) and (IS_LINUX gt 0)
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
MY_PROC CrcUpdateT8, 4
MY_PROLOG crc_end_8
mov x1, [SRCDAT 1]
align 16
main_loop_8:
mov x6, [SRCDAT 2]
movzx x3, x1_L
CRC_XOR x6, r3, 3
movzx x3, x1_H
CRC_XOR x6, r3, 2
shr x1, 16
movzx x3, x1_L
movzx x1, x1_H
CRC_XOR x6, r3, 1
movzx x3, x0_L
CRC_XOR x6, r1, 0
mov x1, [SRCDAT 3]
CRC_XOR x6, r3, 7
movzx x3, x0_H
shr x0, 16
CRC_XOR x6, r3, 6
movzx x3, x0_L
CRC_XOR x6, r3, 5
movzx x3, x0_H
CRC_MOV x0, r3, 4
xor x0, x6
add rD, 8
jnz main_loop_8
MY_EPILOG crc_end_8
MY_ENDP
MY_PROC CrcUpdateT4, 4
MY_PROLOG crc_end_4
align 16
main_loop_4:
movzx x1, x0_L
movzx x3, x0_H
shr x0, 16
movzx x6, x0_H
and x0, 0FFh
CRC_MOV x1, r1, 3
xor x1, [SRCDAT 1]
CRC_XOR x1, r3, 2
CRC_XOR x1, r6, 0
CRC_XOR x1, r0, 1
movzx x0, x1_L
movzx x3, x1_H
shr x1, 16
movzx x6, x1_H
and x1, 0FFh
CRC_MOV x0, r0, 3
xor x0, [SRCDAT 2]
CRC_XOR x0, r3, 2
CRC_XOR x0, r6, 0
CRC_XOR x0, r1, 1
add rD, 8
jnz main_loop_4
MY_EPILOG crc_end_4
MY_ENDP
end

742
Asm/x86/AesOpt.asm Normal file
View file

@ -0,0 +1,742 @@
; AesOpt.asm -- AES optimized code for x86 AES hardware instructions
; 2021-12-25 : Igor Pavlov : Public domain
include 7zAsm.asm
ifdef __ASMC__
use_vaes_256 equ 1
else
ifdef ymm0
use_vaes_256 equ 1
endif
endif
ifdef use_vaes_256
ECHO "++ VAES 256"
else
ECHO "-- NO VAES 256"
endif
ifdef x64
ECHO "x86-64"
else
ECHO "x86"
if (IS_CDECL gt 0)
ECHO "ABI : CDECL"
else
ECHO "ABI : no CDECL : FASTCALL"
endif
endif
if (IS_LINUX gt 0)
ECHO "ABI : LINUX"
else
ECHO "ABI : WINDOWS"
endif
MY_ASM_START
ifndef x64
.686
.xmm
endif
; MY_ALIGN EQU ALIGN(64)
MY_ALIGN EQU
SEG_ALIGN EQU MY_ALIGN
MY_SEG_PROC macro name:req, numParams:req
; seg_name equ @CatStr(_TEXT$, name)
; seg_name SEGMENT SEG_ALIGN 'CODE'
MY_PROC name, numParams
endm
MY_SEG_ENDP macro
; seg_name ENDS
endm
NUM_AES_KEYS_MAX equ 15
; the number of push operators in function PROLOG
if (IS_LINUX eq 0) or (IS_X64 eq 0)
num_regs_push equ 2
stack_param_offset equ (REG_SIZE * (1 + num_regs_push))
endif
ifdef x64
num_param equ REG_ABI_PARAM_2
else
if (IS_CDECL gt 0)
; size_t size
; void * data
; UInt32 * aes
; ret-ip <- (r4)
aes_OFFS equ (stack_param_offset)
data_OFFS equ (REG_SIZE + aes_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
num_param equ [r4 + size_OFFS]
else
num_param equ [r4 + stack_param_offset]
endif
endif
keys equ REG_PARAM_0 ; r1
rD equ REG_PARAM_1 ; r2
rN equ r0
koffs_x equ x7
koffs_r equ r7
ksize_x equ x6
ksize_r equ r6
keys2 equ r3
state equ xmm0
key equ xmm0
key_ymm equ ymm0
key_ymm_n equ 0
ifdef x64
ways = 11
else
ways = 4
endif
ways_start_reg equ 1
iv equ @CatStr(xmm, %(ways_start_reg + ways))
iv_ymm equ @CatStr(ymm, %(ways_start_reg + ways))
WOP macro op, op2
i = 0
rept ways
op @CatStr(xmm, %(ways_start_reg + i)), op2
i = i + 1
endm
endm
ifndef ABI_LINUX
ifdef x64
; we use 32 bytes of home space in stack in WIN64-x64
NUM_HOME_MM_REGS equ (32 / 16)
; we preserve xmm registers starting from xmm6 in WIN64-x64
MM_START_SAVE_REG equ 6
SAVE_XMM macro num_used_mm_regs:req
num_save_mm_regs = num_used_mm_regs - MM_START_SAVE_REG
if num_save_mm_regs GT 0
num_save_mm_regs2 = num_save_mm_regs - NUM_HOME_MM_REGS
; RSP is (16*x + 8) after entering the function in WIN64-x64
stack_offset = 16 * num_save_mm_regs2 + (stack_param_offset mod 16)
i = 0
rept num_save_mm_regs
if i eq NUM_HOME_MM_REGS
sub r4, stack_offset
endif
if i lt NUM_HOME_MM_REGS
movdqa [r4 + stack_param_offset + i * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
else
movdqa [r4 + (i - NUM_HOME_MM_REGS) * 16], @CatStr(xmm, %(MM_START_SAVE_REG + i))
endif
i = i + 1
endm
endif
endm
RESTORE_XMM macro num_used_mm_regs:req
if num_save_mm_regs GT 0
i = 0
if num_save_mm_regs2 GT 0
rept num_save_mm_regs2
movdqa @CatStr(xmm, %(MM_START_SAVE_REG + NUM_HOME_MM_REGS + i)), [r4 + i * 16]
i = i + 1
endm
add r4, stack_offset
endif
num_low_regs = num_save_mm_regs - i
i = 0
rept num_low_regs
movdqa @CatStr(xmm, %(MM_START_SAVE_REG + i)), [r4 + stack_param_offset + i * 16]
i = i + 1
endm
endif
endm
endif ; x64
endif ; ABI_LINUX
MY_PROLOG macro num_used_mm_regs:req
; num_regs_push: must be equal to the number of push operators
; push r3
; push r5
if (IS_LINUX eq 0) or (IS_X64 eq 0)
push r6
push r7
endif
mov rN, num_param ; don't move it; num_param can use stack pointer (r4)
if (IS_X64 eq 0)
if (IS_CDECL gt 0)
mov rD, [r4 + data_OFFS]
mov keys, [r4 + aes_OFFS]
endif
elseif (IS_LINUX gt 0)
MY_ABI_LINUX_TO_WIN_2
endif
ifndef ABI_LINUX
ifdef x64
SAVE_XMM num_used_mm_regs
endif
endif
mov ksize_x, [keys + 16]
shl ksize_x, 5
endm
MY_EPILOG macro
ifndef ABI_LINUX
ifdef x64
RESTORE_XMM num_save_mm_regs
endif
endif
if (IS_LINUX eq 0) or (IS_X64 eq 0)
pop r7
pop r6
endif
; pop r5
; pop r3
MY_ENDP
endm
OP_KEY macro op:req, offs:req
op state, [keys + offs]
endm
WOP_KEY macro op:req, offs:req
movdqa key, [keys + offs]
WOP op, key
endm
; ---------- AES-CBC Decode ----------
XOR_WITH_DATA macro reg, _ppp_
pxor reg, [rD + i * 16]
endm
WRITE_TO_DATA macro reg, _ppp_
movdqa [rD + i * 16], reg
endm
; state0 equ @CatStr(xmm, %(ways_start_reg))
key0 equ @CatStr(xmm, %(ways_start_reg + ways + 1))
key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))
key_last equ @CatStr(xmm, %(ways_start_reg + ways + 2))
key_last_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))
key_last_ymm_n equ (ways_start_reg + ways + 2)
NUM_CBC_REGS equ (ways_start_reg + ways + 3)
MY_SEG_PROC AesCbc_Decode_HW, 3
AesCbc_Decode_HW_start::
MY_PROLOG NUM_CBC_REGS
AesCbc_Decode_HW_start_2::
movdqa iv, [keys]
add keys, 32
movdqa key0, [keys + 1 * ksize_r]
movdqa key_last, [keys]
sub ksize_x, 16
jmp check2
align 16
nextBlocks2:
WOP movdqa, [rD + i * 16]
mov koffs_x, ksize_x
; WOP_KEY pxor, ksize_r + 16
WOP pxor, key0
; align 16
@@:
WOP_KEY aesdec, 1 * koffs_r
sub koffs_r, 16
jnz @B
; WOP_KEY aesdeclast, 0
WOP aesdeclast, key_last
pxor @CatStr(xmm, %(ways_start_reg)), iv
i = 1
rept ways - 1
pxor @CatStr(xmm, %(ways_start_reg + i)), [rD + i * 16 - 16]
i = i + 1
endm
movdqa iv, [rD + ways * 16 - 16]
WOP WRITE_TO_DATA
add rD, ways * 16
AesCbc_Decode_HW_start_3::
check2:
sub rN, ways
jnc nextBlocks2
add rN, ways
sub ksize_x, 16
jmp check
nextBlock:
movdqa state, [rD]
mov koffs_x, ksize_x
; OP_KEY pxor, 1 * ksize_r + 32
pxor state, key0
; movdqa state0, [rD]
; movdqa state, key0
; pxor state, state0
@@:
OP_KEY aesdec, 1 * koffs_r + 16
OP_KEY aesdec, 1 * koffs_r
sub koffs_r, 32
jnz @B
OP_KEY aesdec, 16
; OP_KEY aesdeclast, 0
aesdeclast state, key_last
pxor state, iv
movdqa iv, [rD]
; movdqa iv, state0
movdqa [rD], state
add rD, 16
check:
sub rN, 1
jnc nextBlock
movdqa [keys - 32], iv
MY_EPILOG
; ---------- AVX ----------
AVX__WOP_n macro op
i = 0
rept ways
op (ways_start_reg + i)
i = i + 1
endm
endm
AVX__WOP macro op
i = 0
rept ways
op @CatStr(ymm, %(ways_start_reg + i))
i = i + 1
endm
endm
AVX__WOP_KEY macro op:req, offs:req
vmovdqa key_ymm, ymmword ptr [keys2 + offs]
AVX__WOP_n op
endm
AVX__CBC_START macro reg
; vpxor reg, key_ymm, ymmword ptr [rD + 32 * i]
vpxor reg, key0_ymm, ymmword ptr [rD + 32 * i]
endm
AVX__CBC_END macro reg
if i eq 0
vpxor reg, reg, iv_ymm
else
vpxor reg, reg, ymmword ptr [rD + i * 32 - 16]
endif
endm
AVX__WRITE_TO_DATA macro reg
vmovdqu ymmword ptr [rD + 32 * i], reg
endm
AVX__XOR_WITH_DATA macro reg
vpxor reg, reg, ymmword ptr [rD + 32 * i]
endm
AVX__CTR_START macro reg
vpaddq iv_ymm, iv_ymm, one_ymm
; vpxor reg, iv_ymm, key_ymm
vpxor reg, iv_ymm, key0_ymm
endm
MY_VAES_INSTR_2 macro cmd, dest, a1, a2
db 0c4H
db 2 + 040H + 020h * (1 - (a2) / 8) + 080h * (1 - (dest) / 8)
db 5 + 8 * ((not (a1)) and 15)
db cmd
db 0c0H + 8 * ((dest) and 7) + ((a2) and 7)
endm
MY_VAES_INSTR macro cmd, dest, a
MY_VAES_INSTR_2 cmd, dest, dest, a
endm
MY_vaesenc macro dest, a
MY_VAES_INSTR 0dcH, dest, a
endm
MY_vaesenclast macro dest, a
MY_VAES_INSTR 0ddH, dest, a
endm
MY_vaesdec macro dest, a
MY_VAES_INSTR 0deH, dest, a
endm
MY_vaesdeclast macro dest, a
MY_VAES_INSTR 0dfH, dest, a
endm
AVX__VAES_DEC macro reg
MY_vaesdec reg, key_ymm_n
endm
AVX__VAES_DEC_LAST_key_last macro reg
; MY_vaesdeclast reg, key_ymm_n
MY_vaesdeclast reg, key_last_ymm_n
endm
AVX__VAES_ENC macro reg
MY_vaesenc reg, key_ymm_n
endm
AVX__VAES_ENC_LAST macro reg
MY_vaesenclast reg, key_ymm_n
endm
AVX__vinserti128_TO_HIGH macro dest, src
vinserti128 dest, dest, src, 1
endm
MY_PROC AesCbc_Decode_HW_256, 3
ifdef use_vaes_256
MY_PROLOG NUM_CBC_REGS
cmp rN, ways * 2
jb AesCbc_Decode_HW_start_2
vmovdqa iv, xmmword ptr [keys]
add keys, 32
vbroadcasti128 key0_ymm, xmmword ptr [keys + 1 * ksize_r]
vbroadcasti128 key_last_ymm, xmmword ptr [keys]
sub ksize_x, 16
mov koffs_x, ksize_x
add ksize_x, ksize_x
AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 2) * 32)
push keys2
sub r4, AVX_STACK_SUB
; sub r4, 32
; sub r4, ksize_r
; lea keys2, [r4 + 32]
mov keys2, r4
and keys2, -32
broad:
vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]
vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm
sub koffs_r, 16
; jnc broad
jnz broad
sub rN, ways * 2
align 16
avx_cbcdec_nextBlock2:
mov koffs_x, ksize_x
; AVX__WOP_KEY AVX__CBC_START, 1 * koffs_r + 32
AVX__WOP AVX__CBC_START
@@:
AVX__WOP_KEY AVX__VAES_DEC, 1 * koffs_r
sub koffs_r, 32
jnz @B
; AVX__WOP_KEY AVX__VAES_DEC_LAST, 0
AVX__WOP_n AVX__VAES_DEC_LAST_key_last
AVX__vinserti128_TO_HIGH iv_ymm, xmmword ptr [rD]
AVX__WOP AVX__CBC_END
vmovdqa iv, xmmword ptr [rD + ways * 32 - 16]
AVX__WOP AVX__WRITE_TO_DATA
add rD, ways * 32
sub rN, ways * 2
jnc avx_cbcdec_nextBlock2
add rN, ways * 2
shr ksize_x, 1
; lea r4, [r4 + 1 * ksize_r + 32]
add r4, AVX_STACK_SUB
pop keys2
vzeroupper
jmp AesCbc_Decode_HW_start_3
else
jmp AesCbc_Decode_HW_start
endif
MY_ENDP
MY_SEG_ENDP
; ---------- AES-CBC Encode ----------
e0 equ xmm1
CENC_START_KEY equ 2
CENC_NUM_REG_KEYS equ (3 * 2)
; last_key equ @CatStr(xmm, %(CENC_START_KEY + CENC_NUM_REG_KEYS))
MY_SEG_PROC AesCbc_Encode_HW, 3
MY_PROLOG (CENC_START_KEY + CENC_NUM_REG_KEYS + 0)
movdqa state, [keys]
add keys, 32
i = 0
rept CENC_NUM_REG_KEYS
movdqa @CatStr(xmm, %(CENC_START_KEY + i)), [keys + i * 16]
i = i + 1
endm
add keys, ksize_r
neg ksize_r
add ksize_r, (16 * CENC_NUM_REG_KEYS)
; movdqa last_key, [keys]
jmp check_e
align 16
nextBlock_e:
movdqa e0, [rD]
mov koffs_r, ksize_r
pxor e0, @CatStr(xmm, %(CENC_START_KEY))
pxor state, e0
i = 1
rept (CENC_NUM_REG_KEYS - 1)
aesenc state, @CatStr(xmm, %(CENC_START_KEY + i))
i = i + 1
endm
@@:
OP_KEY aesenc, 1 * koffs_r
OP_KEY aesenc, 1 * koffs_r + 16
add koffs_r, 32
jnz @B
OP_KEY aesenclast, 0
; aesenclast state, last_key
movdqa [rD], state
add rD, 16
check_e:
sub rN, 1
jnc nextBlock_e
; movdqa [keys - 32], state
movdqa [keys + 1 * ksize_r - (16 * CENC_NUM_REG_KEYS) - 32], state
MY_EPILOG
MY_SEG_ENDP
; ---------- AES-CTR ----------
ifdef x64
; ways = 11
endif
one equ @CatStr(xmm, %(ways_start_reg + ways + 1))
one_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 1))
key0 equ @CatStr(xmm, %(ways_start_reg + ways + 2))
key0_ymm equ @CatStr(ymm, %(ways_start_reg + ways + 2))
NUM_CTR_REGS equ (ways_start_reg + ways + 3)
INIT_CTR macro reg, _ppp_
paddq iv, one
movdqa reg, iv
endm
MY_SEG_PROC AesCtr_Code_HW, 3
Ctr_start::
MY_PROLOG NUM_CTR_REGS
Ctr_start_2::
movdqa iv, [keys]
add keys, 32
movdqa key0, [keys]
add keys, ksize_r
neg ksize_r
add ksize_r, 16
Ctr_start_3::
mov koffs_x, 1
movd one, koffs_x
jmp check2_c
align 16
nextBlocks2_c:
WOP INIT_CTR, 0
mov koffs_r, ksize_r
; WOP_KEY pxor, 1 * koffs_r -16
WOP pxor, key0
@@:
WOP_KEY aesenc, 1 * koffs_r
add koffs_r, 16
jnz @B
WOP_KEY aesenclast, 0
WOP XOR_WITH_DATA
WOP WRITE_TO_DATA
add rD, ways * 16
check2_c:
sub rN, ways
jnc nextBlocks2_c
add rN, ways
sub keys, 16
add ksize_r, 16
jmp check_c
; align 16
nextBlock_c:
paddq iv, one
; movdqa state, [keys + 1 * koffs_r - 16]
movdqa state, key0
mov koffs_r, ksize_r
pxor state, iv
@@:
OP_KEY aesenc, 1 * koffs_r
OP_KEY aesenc, 1 * koffs_r + 16
add koffs_r, 32
jnz @B
OP_KEY aesenc, 0
OP_KEY aesenclast, 16
pxor state, [rD]
movdqa [rD], state
add rD, 16
check_c:
sub rN, 1
jnc nextBlock_c
; movdqa [keys - 32], iv
movdqa [keys + 1 * ksize_r - 16 - 32], iv
MY_EPILOG
MY_PROC AesCtr_Code_HW_256, 3
ifdef use_vaes_256
MY_PROLOG NUM_CTR_REGS
cmp rN, ways * 2
jb Ctr_start_2
vbroadcasti128 iv_ymm, xmmword ptr [keys]
add keys, 32
vbroadcasti128 key0_ymm, xmmword ptr [keys]
mov koffs_x, 1
vmovd one, koffs_x
vpsubq iv_ymm, iv_ymm, one_ymm
vpaddq one, one, one
AVX__vinserti128_TO_HIGH one_ymm, one
add keys, ksize_r
sub ksize_x, 16
neg ksize_r
mov koffs_r, ksize_r
add ksize_r, ksize_r
AVX_STACK_SUB = ((NUM_AES_KEYS_MAX + 1 - 1) * 32)
push keys2
lea keys2, [r4 - 32]
sub r4, AVX_STACK_SUB
and keys2, -32
vbroadcasti128 key_ymm, xmmword ptr [keys]
vmovdqa ymmword ptr [keys2], key_ymm
@@:
vbroadcasti128 key_ymm, xmmword ptr [keys + 1 * koffs_r]
vmovdqa ymmword ptr [keys2 + koffs_r * 2], key_ymm
add koffs_r, 16
jnz @B
sub rN, ways * 2
align 16
avx_ctr_nextBlock2:
mov koffs_r, ksize_r
AVX__WOP AVX__CTR_START
; AVX__WOP_KEY AVX__CTR_START, 1 * koffs_r - 32
@@:
AVX__WOP_KEY AVX__VAES_ENC, 1 * koffs_r
add koffs_r, 32
jnz @B
AVX__WOP_KEY AVX__VAES_ENC_LAST, 0
AVX__WOP AVX__XOR_WITH_DATA
AVX__WOP AVX__WRITE_TO_DATA
add rD, ways * 32
sub rN, ways * 2
jnc avx_ctr_nextBlock2
add rN, ways * 2
vextracti128 iv, iv_ymm, 1
sar ksize_r, 1
add r4, AVX_STACK_SUB
pop keys2
vzeroupper
jmp Ctr_start_3
else
jmp Ctr_start
endif
MY_ENDP
MY_SEG_ENDP
end

513
Asm/x86/LzFindOpt.asm Normal file
View file

@ -0,0 +1,513 @@
; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
; 2021-07-21: Igor Pavlov : Public domain
;
ifndef x64
; x64=1
; .err <x64_IS_REQUIRED>
endif
include 7zAsm.asm
MY_ASM_START
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
MY_ALIGN macro num:req
align num
endm
MY_ALIGN_32 macro
MY_ALIGN 32
endm
MY_ALIGN_64 macro
MY_ALIGN 64
endm
t0_L equ x0_L
t0_x equ x0
t0 equ r0
t1_x equ x3
t1 equ r3
cp_x equ t1_x
cp_r equ t1
m equ x5
m_r equ r5
len_x equ x6
len equ r6
diff_x equ x7
diff equ r7
len0 equ r10
len1_x equ x11
len1 equ r11
maxLen_x equ x12
maxLen equ r12
d equ r13
ptr0 equ r14
ptr1 equ r15
d_lim equ m_r
cycSize equ len_x
hash_lim equ len0
delta1_x equ len1_x
delta1_r equ len1
delta_x equ maxLen_x
delta_r equ maxLen
hash equ ptr0
src equ ptr1
if (IS_LINUX gt 0)
; r1 r2 r8 r9 : win32
; r7 r6 r2 r1 r8 r9 : linux
lenLimit equ r8
lenLimit_x equ x8
; pos_r equ r2
pos equ x2
cur equ r1
son equ r9
else
lenLimit equ REG_ABI_PARAM_2
lenLimit_x equ REG_ABI_PARAM_2_x
pos equ REG_ABI_PARAM_1_x
cur equ REG_ABI_PARAM_0
son equ REG_ABI_PARAM_3
endif
if (IS_LINUX gt 0)
maxLen_OFFS equ (REG_SIZE * (6 + 1))
else
cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))
d_OFFS equ (REG_SIZE + cutValue_OFFS)
maxLen_OFFS equ (REG_SIZE + d_OFFS)
endif
hash_OFFS equ (REG_SIZE + maxLen_OFFS)
limit_OFFS equ (REG_SIZE + hash_OFFS)
size_OFFS equ (REG_SIZE + limit_OFFS)
cycPos_OFFS equ (REG_SIZE + size_OFFS)
cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)
posRes_OFFS equ (REG_SIZE + cycSize_OFFS)
if (IS_LINUX gt 0)
else
cutValue_PAR equ [r0 + cutValue_OFFS]
d_PAR equ [r0 + d_OFFS]
endif
maxLen_PAR equ [r0 + maxLen_OFFS]
hash_PAR equ [r0 + hash_OFFS]
limit_PAR equ [r0 + limit_OFFS]
size_PAR equ [r0 + size_OFFS]
cycPos_PAR equ [r0 + cycPos_OFFS]
cycSize_PAR equ [r0 + cycSize_OFFS]
posRes_PAR equ [r0 + posRes_OFFS]
cutValue_VAR equ DWORD PTR [r4 + 8 * 0]
cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]
cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]
cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]
hash_VAR equ QWORD PTR [r4 + 8 * 2]
limit_VAR equ QWORD PTR [r4 + 8 * 3]
size_VAR equ QWORD PTR [r4 + 8 * 4]
distances equ QWORD PTR [r4 + 8 * 5]
maxLen_VAR equ QWORD PTR [r4 + 8 * 6]
Old_RSP equ QWORD PTR [r4 + 8 * 7]
LOCAL_SIZE equ 8 * 8
COPY_VAR_32 macro dest_var, src_var
mov x3, src_var
mov dest_var, x3
endm
COPY_VAR_64 macro dest_var, src_var
mov r3, src_var
mov dest_var, r3
endm
; MY_ALIGN_64
MY_PROC GetMatchesSpecN_2, 13
MY_PUSH_PRESERVED_ABI_REGS
mov r0, RSP
lea r3, [r0 - LOCAL_SIZE]
and r3, -64
mov RSP, r3
mov Old_RSP, r0
if (IS_LINUX gt 0)
mov d, REG_ABI_PARAM_5 ; r13 = r9
mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8
mov son, REG_ABI_PARAM_3 ; r9 = r1
mov r8, REG_ABI_PARAM_2 ; r8 = r2
mov pos, REG_ABI_PARAM_1_x ; r2 = x6
mov r1, REG_ABI_PARAM_0 ; r1 = r7
else
COPY_VAR_32 cutValue_VAR, cutValue_PAR
mov d, d_PAR
endif
COPY_VAR_64 limit_VAR, limit_PAR
mov hash_lim, size_PAR
mov size_VAR, hash_lim
mov cp_x, cycPos_PAR
mov hash, hash_PAR
mov cycSize, cycSize_PAR
mov cycSize_VAR, cycSize
; we want cur in (rcx). So we change the cur and lenLimit variables
sub lenLimit, cur
neg lenLimit_x
inc lenLimit_x
mov t0_x, maxLen_PAR
sub t0, lenLimit
mov maxLen_VAR, t0
jmp main_loop
MY_ALIGN_64
fill_empty:
; ptr0 = *ptr1 = kEmptyHashValue;
mov QWORD PTR [ptr1], 0
inc pos
inc cp_x
mov DWORD PTR [d - 4], 0
cmp d, limit_VAR
jae fin
cmp hash, hash_lim
je fin
; MY_ALIGN_64
main_loop:
; UInt32 delta = *hash++;
mov diff_x, [hash] ; delta
add hash, 4
; mov cycPos_VAR, cp_x
inc cur
add d, 4
mov m, pos
sub m, diff_x; ; matchPos
; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
lea ptr1, [son + 8 * cp_r]
; mov cycSize, cycSize_VAR
cmp pos, cycSize
jb directMode ; if (pos < cycSize_VAR)
; CYC MODE
cmp diff_x, cycSize
jae fill_empty ; if (delta >= cycSize_VAR)
xor t0_x, t0_x
mov cycPos_VAR, cp_x
sub cp_x, diff_x
; jae prepare_for_tree_loop
; add cp_x, cycSize
cmovb t0_x, cycSize
add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
jmp prepare_for_tree_loop
directMode:
cmp diff_x, pos
je fill_empty ; if (delta == pos)
jae fin_error ; if (delta >= pos)
mov cycPos_VAR, cp_x
mov cp_x, m
prepare_for_tree_loop:
mov len0, lenLimit
mov hash_VAR, hash
; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
lea ptr0, [ptr1 + 4]
; UInt32 *_distances = ++d;
mov distances, d
neg len0
mov len1, len0
mov t0_x, cutValue_VAR
mov maxLen, maxLen_VAR
mov cutValueCur_VAR, t0_x
MY_ALIGN_32
tree_loop:
neg diff
mov len, len0
cmp len1, len0
cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);
add diff, cur
mov t0_x, [son + cp_r * 8] ; prefetch
movzx t0_x, BYTE PTR [diff + 1 * len]
lea cp_r, [son + cp_r * 8]
cmp [cur + 1 * len], t0_L
je matched_1
jb left_0
mov [ptr1], m
mov m, [cp_r + 4]
lea ptr1, [cp_r + 4]
sub diff, cur ; FIX32
jmp next_node
MY_ALIGN_32
left_0:
mov [ptr0], m
mov m, [cp_r]
mov ptr0, cp_r
sub diff, cur ; FIX32
; jmp next_node
; ------------ NEXT NODE ------------
; MY_ALIGN_32
next_node:
mov cycSize, cycSize_VAR
dec cutValueCur_VAR
je finish_tree
add diff_x, pos ; prev_match = pos + diff
cmp m, diff_x
jae fin_error ; if (new_match >= prev_match)
mov diff_x, pos
sub diff_x, m ; delta = pos - new_match
cmp pos, cycSize
jae cyc_mode_2 ; if (pos >= cycSize)
mov cp_x, m
test m, m
jne tree_loop ; if (m != 0)
finish_tree:
; ptr0 = *ptr1 = kEmptyHashValue;
mov DWORD PTR [ptr0], 0
mov DWORD PTR [ptr1], 0
inc pos
; _distances[-1] = (UInt32)(d - _distances);
mov t0, distances
mov t1, d
sub t1, t0
shr t1_x, 2
mov [t0 - 4], t1_x
cmp d, limit_VAR
jae fin ; if (d >= limit)
mov cp_x, cycPos_VAR
mov hash, hash_VAR
mov hash_lim, size_VAR
inc cp_x
cmp hash, hash_lim
jne main_loop ; if (hash != size)
jmp fin
MY_ALIGN_32
cyc_mode_2:
cmp diff_x, cycSize
jae finish_tree ; if (delta >= cycSize)
mov cp_x, cycPos_VAR
xor t0_x, t0_x
sub cp_x, diff_x ; cp_x = cycPos - delta
cmovb t0_x, cycSize
add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
jmp tree_loop
MY_ALIGN_32
matched_1:
inc len
; cmp len_x, lenLimit_x
je short lenLimit_reach
movzx t0_x, BYTE PTR [diff + 1 * len]
cmp [cur + 1 * len], t0_L
jne mismatch
MY_ALIGN_32
match_loop:
; while (++len != lenLimit) (len[diff] != len[0]) ;
inc len
; cmp len_x, lenLimit_x
je short lenLimit_reach
movzx t0_x, BYTE PTR [diff + 1 * len]
cmp BYTE PTR [cur + 1 * len], t0_L
je match_loop
mismatch:
jb left_2
mov [ptr1], m
mov m, [cp_r + 4]
lea ptr1, [cp_r + 4]
mov len1, len
jmp max_update
MY_ALIGN_32
left_2:
mov [ptr0], m
mov m, [cp_r]
mov ptr0, cp_r
mov len0, len
max_update:
sub diff, cur ; restore diff
cmp maxLen, len
jae next_node
mov maxLen, len
add len, lenLimit
mov [d], len_x
mov t0_x, diff_x
not t0_x
mov [d + 4], t0_x
add d, 8
jmp next_node
MY_ALIGN_32
lenLimit_reach:
mov delta_r, cur
sub delta_r, diff
lea delta1_r, [delta_r - 1]
mov t0_x, [cp_r]
mov [ptr1], t0_x
mov t0_x, [cp_r + 4]
mov [ptr0], t0_x
mov [d], lenLimit_x
mov [d + 4], delta1_x
add d, 8
; _distances[-1] = (UInt32)(d - _distances);
mov t0, distances
mov t1, d
sub t1, t0
shr t1_x, 2
mov [t0 - 4], t1_x
mov hash, hash_VAR
mov hash_lim, size_VAR
inc pos
mov cp_x, cycPos_VAR
inc cp_x
mov d_lim, limit_VAR
mov cycSize, cycSize_VAR
; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
; break;
cmp hash, hash_lim
je fin
cmp d, d_lim
jae fin
cmp delta_x, [hash]
jne main_loop
movzx t0_x, BYTE PTR [diff]
cmp [cur], t0_L
jne main_loop
; jmp main_loop ; bypass for debug
mov cycPos_VAR, cp_x
shl len, 3 ; cycSize * 8
sub diff, cur ; restore diff
xor t0_x, t0_x
cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)
lea cp_r, [son + 8 * cp_r] ; dest
lea src, [cp_r + 8 * diff]
cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)
add src, t0
add len, son ; len = son + cycSize * 8
MY_ALIGN_32
long_loop:
add hash, 4
; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
mov t0, [src]
add src, 8
mov [cp_r], t0
add cp_r, 8
cmp src, len
cmove src, son ; if end of (son) buffer is reached, we wrap to begin
mov DWORD PTR [d], 2
mov [d + 4], lenLimit_x
mov [d + 8], delta1_x
add d, 12
inc cur
cmp hash, hash_lim
je long_footer
cmp delta_x, [hash]
jne long_footer
movzx t0_x, BYTE PTR [diff + 1 * cur]
cmp [cur], t0_L
jne long_footer
cmp d, d_lim
jb long_loop
long_footer:
sub cp_r, son
shr cp_r, 3
add pos, cp_x
sub pos, cycPos_VAR
mov cycSize, cycSize_VAR
cmp d, d_lim
jae fin
cmp hash, hash_lim
jne main_loop
jmp fin
fin_error:
xor d, d
fin:
mov RSP, Old_RSP
mov t0, [r4 + posRes_OFFS]
mov [t0], pos
mov r0, d
MY_POP_PRESERVED_ABI_REGS
MY_ENDP
_TEXT$LZFINDOPT ENDS
end

1303
Asm/x86/LzmaDecOpt.asm Normal file

File diff suppressed because it is too large Load diff

263
Asm/x86/Sha1Opt.asm Normal file
View file

@ -0,0 +1,263 @@
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
CONST SEGMENT
align 16
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
CONST ENDS
; _TEXT$SHA1OPT SEGMENT 'CODE'
ifndef x64
.686
.xmm
endif
ifdef x64
rNum equ REG_ABI_PARAM_2
if (IS_LINUX eq 0)
LOCAL_SIZE equ (16 * 2)
endif
else
rNum equ r0
LOCAL_SIZE equ (16 * 1)
endif
rState equ REG_ABI_PARAM_0
rData equ REG_ABI_PARAM_1
MY_sha1rnds4 macro a1, a2, imm
db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm
endm
MY_SHA_INSTR macro cmd, a1, a2
db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
endm
cmd_sha1nexte equ 0c8H
cmd_sha1msg1 equ 0c9H
cmd_sha1msg2 equ 0caH
MY_sha1nexte macro a1, a2
MY_SHA_INSTR cmd_sha1nexte, a1, a2
endm
MY_sha1msg1 macro a1, a2
MY_SHA_INSTR cmd_sha1msg1, a1, a2
endm
MY_sha1msg2 macro a1, a2
MY_SHA_INSTR cmd_sha1msg2, a1, a2
endm
MY_PROLOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa [r4 + 8], xmm6
movdqa [r4 + 8 + 16], xmm7
sub r4, LOCAL_SIZE + 8
movdqa [r4 ], xmm8
movdqa [r4 + 16], xmm9
endif
else ; x86
if (IS_CDECL gt 0)
mov rState, [r4 + REG_SIZE * 1]
mov rData, [r4 + REG_SIZE * 2]
mov rNum, [r4 + REG_SIZE * 3]
else ; fastcall
mov rNum, [r4 + REG_SIZE * 1]
endif
push r5
mov r5, r4
and r4, -16
sub r4, LOCAL_SIZE
endif
endm
MY_EPILOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa xmm8, [r4]
movdqa xmm9, [r4 + 16]
add r4, LOCAL_SIZE + 8
movdqa xmm6, [r4 + 8]
movdqa xmm7, [r4 + 8 + 16]
endif
else ; x86
mov r4, r5
pop r5
endif
MY_ENDP
endm
e0_N equ 0
e1_N equ 1
abcd_N equ 2
e0_save_N equ 3
w_regs equ 4
e0 equ @CatStr(xmm, %e0_N)
e1 equ @CatStr(xmm, %e1_N)
abcd equ @CatStr(xmm, %abcd_N)
e0_save equ @CatStr(xmm, %e0_save_N)
ifdef x64
abcd_save equ xmm8
mask2 equ xmm9
else
abcd_save equ [r4]
mask2 equ e1
endif
LOAD_MASK macro
movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
endm
LOAD_W macro k:req
movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
pshufb @CatStr(xmm, %(w_regs + k)), mask2
endm
; pre2 can be 2 or 3 (recommended)
pre2 equ 3
pre1 equ (pre2 + 1)
NUM_ROUNDS4 equ 20
RND4 macro k
movdqa @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd
MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5
nextM = (w_regs + ((k + 1) mod 4))
if (k EQ NUM_ROUNDS4 - 1)
nextM = e0_save_N
endif
MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM
if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4)))
endif
if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1))
MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
endif
if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4))
endif
endm
REVERSE_STATE macro
; abcd ; dcba
; e0 ; 000e
pshufd abcd, abcd, 01bH ; abcd
pshufd e0, e0, 01bH ; e000
endm
MY_PROC Sha1_UpdateBlocks_HW, 3
MY_PROLOG
cmp rNum, 0
je end_c
movdqu abcd, [rState] ; dcba
movd e0, dword ptr [rState + 16] ; 000e
REVERSE_STATE
ifdef x64
LOAD_MASK
endif
align 16
nextBlock:
movdqa abcd_save, abcd
movdqa e0_save, e0
ifndef x64
LOAD_MASK
endif
LOAD_W 0
LOAD_W 1
LOAD_W 2
LOAD_W 3
paddd e0, @CatStr(xmm, %(w_regs))
k = 0
rept NUM_ROUNDS4
RND4 k
k = k + 1
endm
paddd abcd, abcd_save
add rData, 64
sub rNum, 1
jnz nextBlock
REVERSE_STATE
movdqu [rState], abcd
movd dword ptr [rState + 16], e0
end_c:
MY_EPILOG
; _TEXT$SHA1OPT ENDS
end

263
Asm/x86/Sha256Opt.asm Normal file
View file

@ -0,0 +1,263 @@
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
; 2021-03-10 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
; .data
; public K
; we can use external SHA256_K_ARRAY defined in Sha256.c
; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes
COMMENT @
ifdef x64
K_CONST equ SHA256_K_ARRAY
else
K_CONST equ _SHA256_K_ARRAY
endif
EXTRN K_CONST:xmmword
@
CONST SEGMENT
align 16
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
; COMMENT @
align 16
K_CONST \
DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H
DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H
DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H
DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H
DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH
DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH
DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H
DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H
DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H
DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H
DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H
DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H
DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H
DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H
DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H
DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H
; @
CONST ENDS
; _TEXT$SHA256OPT SEGMENT 'CODE'
ifndef x64
.686
.xmm
endif
ifdef x64
rNum equ REG_ABI_PARAM_2
if (IS_LINUX eq 0)
LOCAL_SIZE equ (16 * 2)
endif
else
rNum equ r0
LOCAL_SIZE equ (16 * 1)
endif
rState equ REG_ABI_PARAM_0
rData equ REG_ABI_PARAM_1
MY_SHA_INSTR macro cmd, a1, a2
db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
endm
cmd_sha256rnds2 equ 0cbH
cmd_sha256msg1 equ 0ccH
cmd_sha256msg2 equ 0cdH
MY_sha256rnds2 macro a1, a2
MY_SHA_INSTR cmd_sha256rnds2, a1, a2
endm
MY_sha256msg1 macro a1, a2
MY_SHA_INSTR cmd_sha256msg1, a1, a2
endm
MY_sha256msg2 macro a1, a2
MY_SHA_INSTR cmd_sha256msg2, a1, a2
endm
MY_PROLOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa [r4 + 8], xmm6
movdqa [r4 + 8 + 16], xmm7
sub r4, LOCAL_SIZE + 8
movdqa [r4 ], xmm8
movdqa [r4 + 16], xmm9
endif
else ; x86
if (IS_CDECL gt 0)
mov rState, [r4 + REG_SIZE * 1]
mov rData, [r4 + REG_SIZE * 2]
mov rNum, [r4 + REG_SIZE * 3]
else ; fastcall
mov rNum, [r4 + REG_SIZE * 1]
endif
push r5
mov r5, r4
and r4, -16
sub r4, LOCAL_SIZE
endif
endm
MY_EPILOG macro
ifdef x64
if (IS_LINUX eq 0)
movdqa xmm8, [r4]
movdqa xmm9, [r4 + 16]
add r4, LOCAL_SIZE + 8
movdqa xmm6, [r4 + 8]
movdqa xmm7, [r4 + 8 + 16]
endif
else ; x86
mov r4, r5
pop r5
endif
MY_ENDP
endm
msg equ xmm0
tmp equ xmm0
state0_N equ 2
state1_N equ 3
w_regs equ 4
state1_save equ xmm1
state0 equ @CatStr(xmm, %state0_N)
state1 equ @CatStr(xmm, %state1_N)
ifdef x64
state0_save equ xmm8
mask2 equ xmm9
else
state0_save equ [r4]
mask2 equ xmm0
endif
LOAD_MASK macro
movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
endm
LOAD_W macro k:req
movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
pshufb @CatStr(xmm, %(w_regs + k)), mask2
endm
; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1
pre1 equ 3
pre2 equ 2
RND4 macro k
movdqa msg, xmmword ptr [K_CONST + (k) * 16]
paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
MY_sha256rnds2 state0_N, state1_N
pshufd msg, msg, 0eH
if (k GE (4 - pre1)) AND (k LT (16 - pre1))
; w4[0] = msg1(w4[-4], w4[-3])
MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
endif
MY_sha256rnds2 state1_N, state0_N
if (k GE (4 - pre2)) AND (k LT (16 - pre2))
movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))
palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4
paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp
; w4[0] = msg2(w4[0], w4[-1])
MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))
endif
endm
REVERSE_STATE macro
; state0 ; dcba
; state1 ; hgfe
pshufd tmp, state0, 01bH ; abcd
pshufd state0, state1, 01bH ; efgh
movdqa state1, state0 ; efgh
punpcklqdq state0, tmp ; cdgh
punpckhqdq state1, tmp ; abef
endm
MY_PROC Sha256_UpdateBlocks_HW, 3
MY_PROLOG
cmp rNum, 0
je end_c
movdqu state0, [rState] ; dcba
movdqu state1, [rState + 16] ; hgfe
REVERSE_STATE
ifdef x64
LOAD_MASK
endif
align 16
nextBlock:
movdqa state0_save, state0
movdqa state1_save, state1
ifndef x64
LOAD_MASK
endif
LOAD_W 0
LOAD_W 1
LOAD_W 2
LOAD_W 3
k = 0
rept 16
RND4 k
k = k + 1
endm
paddd state0, state0_save
paddd state1, state1_save
add rData, 64
sub rNum, 1
jnz nextBlock
REVERSE_STATE
movdqu [rState], state0
movdqu [rState + 16], state1
end_c:
MY_EPILOG
; _TEXT$SHA256OPT ENDS
end

239
Asm/x86/XzCrc64Opt.asm Normal file
View file

@ -0,0 +1,239 @@
; XzCrc64Opt.asm -- CRC64 calculation : optimized version
; 2021-02-06 : Igor Pavlov : Public domain
include 7zAsm.asm
MY_ASM_START
ifdef x64
rD equ r9
rN equ r10
rT equ r5
num_VAR equ r8
SRCDAT4 equ dword ptr [rD + rN * 1]
CRC_XOR macro dest:req, src:req, t:req
xor dest, QWORD PTR [rT + src * 8 + 0800h * t]
endm
CRC1b macro
movzx x6, BYTE PTR [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shr r0, 8
CRC_XOR r0, r6, 0
dec rN
endm
MY_PROLOG macro crc_end:req
ifdef ABI_LINUX
MY_PUSH_2_REGS
else
MY_PUSH_4_REGS
endif
mov r0, REG_ABI_PARAM_0
mov rN, REG_ABI_PARAM_2
mov rT, REG_ABI_PARAM_3
mov rD, REG_ABI_PARAM_1
test rN, rN
jz crc_end
@@:
test rD, 3
jz @F
CRC1b
jnz @B
@@:
cmp rN, 8
jb crc_end
add rN, rD
mov num_VAR, rN
sub rN, 4
and rN, NOT 3
sub rD, rN
mov x1, SRCDAT4
xor r0, r1
add rN, 4
endm
MY_EPILOG macro crc_end:req
sub rN, 4
mov x1, SRCDAT4
xor r0, r1
mov rD, rN
mov rN, num_VAR
sub rN, rD
crc_end:
test rN, rN
jz @F
CRC1b
jmp crc_end
@@:
ifdef ABI_LINUX
MY_POP_2_REGS
else
MY_POP_4_REGS
endif
endm
MY_PROC XzCrc64UpdateT4, 4
MY_PROLOG crc_end_4
align 16
main_loop_4:
mov x1, SRCDAT4
movzx x2, x0_L
movzx x3, x0_H
shr r0, 16
movzx x6, x0_L
movzx x7, x0_H
shr r0, 16
CRC_XOR r1, r2, 3
CRC_XOR r0, r3, 2
CRC_XOR r1, r6, 1
CRC_XOR r0, r7, 0
xor r0, r1
add rD, 4
jnz main_loop_4
MY_EPILOG crc_end_4
MY_ENDP
else
; x86 (32-bit)
rD equ r1
rN equ r7
rT equ r5
crc_OFFS equ (REG_SIZE * 5)
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
; cdecl or (GNU fastcall) stack:
; (UInt32 *) table
; size_t size
; void * data
; (UInt64) crc
; ret-ip <-(r4)
data_OFFS equ (8 + crc_OFFS)
size_OFFS equ (REG_SIZE + data_OFFS)
table_OFFS equ (REG_SIZE + size_OFFS)
num_VAR equ [r4 + size_OFFS]
table_VAR equ [r4 + table_OFFS]
else
; Windows fastcall:
; r1 = data, r2 = size
; stack:
; (UInt32 *) table
; (UInt64) crc
; ret-ip <-(r4)
table_OFFS equ (8 + crc_OFFS)
table_VAR equ [r4 + table_OFFS]
num_VAR equ table_VAR
endif
SRCDAT4 equ dword ptr [rD + rN * 1]
CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]
op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
endm
CRC_XOR macro dest0:req, dest1:req, src:req, t:req
CRC xor, xor, dest0, dest1, src, t
endm
CRC1b macro
movzx x6, BYTE PTR [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shrd r0, r2, 8
shr r2, 8
CRC_XOR r0, r2, r6, 0
dec rN
endm
MY_PROLOG macro crc_end:req
MY_PUSH_4_REGS
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
mov rN, [r4 + size_OFFS]
mov rD, [r4 + data_OFFS]
else
mov rN, r2
endif
mov x0, [r4 + crc_OFFS]
mov x2, [r4 + crc_OFFS + 4]
mov rT, table_VAR
test rN, rN
jz crc_end
@@:
test rD, 3
jz @F
CRC1b
jnz @B
@@:
cmp rN, 8
jb crc_end
add rN, rD
mov num_VAR, rN
sub rN, 4
and rN, NOT 3
sub rD, rN
xor r0, SRCDAT4
add rN, 4
endm
MY_EPILOG macro crc_end:req
sub rN, 4
xor r0, SRCDAT4
mov rD, rN
mov rN, num_VAR
sub rN, rD
crc_end:
test rN, rN
jz @F
CRC1b
jmp crc_end
@@:
MY_POP_4_REGS
endm
MY_PROC XzCrc64UpdateT4, 5
MY_PROLOG crc_end_4
movzx x6, x0_L
align 16
main_loop_4:
mov r3, SRCDAT4
xor r3, r2
CRC xor, mov, r3, r2, r6, 3
movzx x6, x0_H
shr r0, 16
CRC_XOR r3, r2, r6, 2
movzx x6, x0_L
movzx x0, x0_H
CRC_XOR r3, r2, r6, 1
CRC_XOR r3, r2, r0, 0
movzx x6, x3_L
mov r0, r3
add rD, 4
jnz main_loop_4
MY_EPILOG crc_end_4
MY_ENDP
endif ; ! x64
end

204
C/7z.h Normal file
View file

@ -0,0 +1,204 @@
/* 7z.h -- 7z interface
2018-07-02 : Igor Pavlov : Public domain */
#ifndef __7Z_H
#define __7Z_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define k7zStartHeaderSize 0x20
#define k7zSignatureSize 6
extern const Byte k7zSignature[k7zSignatureSize];
typedef struct
{
const Byte *Data;
size_t Size;
} CSzData;
/* CSzCoderInfo & CSzFolder support only default methods */
typedef struct
{
size_t PropsOffset;
UInt32 MethodID;
Byte NumStreams;
Byte PropsSize;
} CSzCoderInfo;
typedef struct
{
UInt32 InIndex;
UInt32 OutIndex;
} CSzBond;
#define SZ_NUM_CODERS_IN_FOLDER_MAX 4
#define SZ_NUM_BONDS_IN_FOLDER_MAX 3
#define SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX 4
typedef struct
{
UInt32 NumCoders;
UInt32 NumBonds;
UInt32 NumPackStreams;
UInt32 UnpackStream;
UInt32 PackStreams[SZ_NUM_PACK_STREAMS_IN_FOLDER_MAX];
CSzBond Bonds[SZ_NUM_BONDS_IN_FOLDER_MAX];
CSzCoderInfo Coders[SZ_NUM_CODERS_IN_FOLDER_MAX];
} CSzFolder;
SRes SzGetNextFolderItem(CSzFolder *f, CSzData *sd);
typedef struct
{
UInt32 Low;
UInt32 High;
} CNtfsFileTime;
typedef struct
{
Byte *Defs; /* MSB 0 bit numbering */
UInt32 *Vals;
} CSzBitUi32s;
typedef struct
{
Byte *Defs; /* MSB 0 bit numbering */
// UInt64 *Vals;
CNtfsFileTime *Vals;
} CSzBitUi64s;
#define SzBitArray_Check(p, i) (((p)[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
#define SzBitWithVals_Check(p, i) ((p)->Defs && ((p)->Defs[(i) >> 3] & (0x80 >> ((i) & 7))) != 0)
typedef struct
{
UInt32 NumPackStreams;
UInt32 NumFolders;
UInt64 *PackPositions; // NumPackStreams + 1
CSzBitUi32s FolderCRCs; // NumFolders
size_t *FoCodersOffsets; // NumFolders + 1
UInt32 *FoStartPackStreamIndex; // NumFolders + 1
UInt32 *FoToCoderUnpackSizes; // NumFolders + 1
Byte *FoToMainUnpackSizeIndex; // NumFolders
UInt64 *CoderUnpackSizes; // for all coders in all folders
Byte *CodersData;
UInt64 RangeLimit;
} CSzAr;
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *stream, UInt64 startPos,
Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain);
typedef struct
{
CSzAr db;
UInt64 startPosAfterHeader;
UInt64 dataPos;
UInt32 NumFiles;
UInt64 *UnpackPositions; // NumFiles + 1
// Byte *IsEmptyFiles;
Byte *IsDirs;
CSzBitUi32s CRCs;
CSzBitUi32s Attribs;
// CSzBitUi32s Parents;
CSzBitUi64s MTime;
CSzBitUi64s CTime;
UInt32 *FolderToFile; // NumFolders + 1
UInt32 *FileToFolder; // NumFiles
size_t *FileNameOffsets; /* in 2-byte steps */
Byte *FileNames; /* UTF-16-LE */
} CSzArEx;
#define SzArEx_IsDir(p, i) (SzBitArray_Check((p)->IsDirs, i))
#define SzArEx_GetFileSize(p, i) ((p)->UnpackPositions[(i) + 1] - (p)->UnpackPositions[i])
void SzArEx_Init(CSzArEx *p);
void SzArEx_Free(CSzArEx *p, ISzAllocPtr alloc);
UInt64 SzArEx_GetFolderStreamPos(const CSzArEx *p, UInt32 folderIndex, UInt32 indexInFolder);
int SzArEx_GetFolderFullPackSize(const CSzArEx *p, UInt32 folderIndex, UInt64 *resSize);
/*
if dest == NULL, the return value specifies the required size of the buffer,
in 16-bit characters, including the null-terminating character.
if dest != NULL, the return value specifies the number of 16-bit characters that
are written to the dest, including the null-terminating character. */
size_t SzArEx_GetFileNameUtf16(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
/*
size_t SzArEx_GetFullNameLen(const CSzArEx *p, size_t fileIndex);
UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16 *dest);
*/
/*
SzArEx_Extract extracts file from archive
*outBuffer must be 0 before first call for each new archive.
Extracting cache:
If you need to decompress more than one file, you can send
these values from previous call:
*blockIndex,
*outBuffer,
*outBufferSize
You can consider "*outBuffer" as cache of solid block. If your archive is solid,
it will increase decompression speed.
If you use external function, you can declare these 3 cache variables
(blockIndex, outBuffer, outBufferSize) as static in that external function.
Free *outBuffer and set *outBuffer to 0, if you want to flush cache.
*/
SRes SzArEx_Extract(
const CSzArEx *db,
ILookInStream *inStream,
UInt32 fileIndex, /* index of file */
UInt32 *blockIndex, /* index of solid block */
Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */
size_t *outBufferSize, /* buffer size for output buffer */
size_t *offset, /* offset of stream for required file in *outBuffer */
size_t *outSizeProcessed, /* size of file in *outBuffer */
ISzAllocPtr allocMain,
ISzAllocPtr allocTemp);
/*
SzArEx_Open Errors:
SZ_ERROR_NO_ARCHIVE
SZ_ERROR_ARCHIVE
SZ_ERROR_UNSUPPORTED
SZ_ERROR_MEM
SZ_ERROR_CRC
SZ_ERROR_INPUT_EOF
SZ_ERROR_FAIL
*/
SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
EXTERN_C_END
#endif

80
C/7zAlloc.c Normal file
View file

@ -0,0 +1,80 @@
/* 7zAlloc.c -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdlib.h>
#include "7zAlloc.h"
/* #define _SZ_ALLOC_DEBUG */
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG
#ifdef _WIN32
#include <windows.h>
#endif
#include <stdio.h>
int g_allocCount = 0;
int g_allocCountTemp = 0;
#endif
void *SzAlloc(ISzAllocPtr p, size_t size)
{
UNUSED_VAR(p);
if (size == 0)
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount);
g_allocCount++;
#endif
return malloc(size);
}
void SzFree(ISzAllocPtr p, void *address)
{
UNUSED_VAR(p);
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
{
g_allocCount--;
fprintf(stderr, "\nFree; count = %10d", g_allocCount);
}
#endif
free(address);
}
void *SzAllocTemp(ISzAllocPtr p, size_t size)
{
UNUSED_VAR(p);
if (size == 0)
return 0;
#ifdef _SZ_ALLOC_DEBUG
fprintf(stderr, "\nAlloc_temp %10u bytes; count = %10d", (unsigned)size, g_allocCountTemp);
g_allocCountTemp++;
#ifdef _WIN32
return HeapAlloc(GetProcessHeap(), 0, size);
#endif
#endif
return malloc(size);
}
void SzFreeTemp(ISzAllocPtr p, void *address)
{
UNUSED_VAR(p);
#ifdef _SZ_ALLOC_DEBUG
if (address != 0)
{
g_allocCountTemp--;
fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
}
#ifdef _WIN32
HeapFree(GetProcessHeap(), 0, address);
return;
#endif
#endif
free(address);
}

19
C/7zAlloc.h Normal file
View file

@ -0,0 +1,19 @@
/* 7zAlloc.h -- Allocation functions
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_ALLOC_H
#define __7Z_ALLOC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
void *SzAlloc(ISzAllocPtr p, size_t size);
void SzFree(ISzAllocPtr p, void *address);
void *SzAllocTemp(ISzAllocPtr p, size_t size);
void SzFreeTemp(ISzAllocPtr p, void *address);
EXTERN_C_END
#endif

1783
C/7zArcIn.c Normal file

File diff suppressed because it is too large Load diff

36
C/7zBuf.c Normal file
View file

@ -0,0 +1,36 @@
/* 7zBuf.c -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zBuf.h"
void Buf_Init(CBuf *p)
{
p->data = 0;
p->size = 0;
}
int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc)
{
p->size = 0;
if (size == 0)
{
p->data = 0;
return 1;
}
p->data = (Byte *)ISzAlloc_Alloc(alloc, size);
if (p->data)
{
p->size = size;
return 1;
}
return 0;
}
void Buf_Free(CBuf *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->data);
p->data = 0;
p->size = 0;
}

35
C/7zBuf.h Normal file
View file

@ -0,0 +1,35 @@
/* 7zBuf.h -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
#ifndef __7Z_BUF_H
#define __7Z_BUF_H
#include "7zTypes.h"
EXTERN_C_BEGIN
typedef struct
{
Byte *data;
size_t size;
} CBuf;
void Buf_Init(CBuf *p);
int Buf_Create(CBuf *p, size_t size, ISzAllocPtr alloc);
void Buf_Free(CBuf *p, ISzAllocPtr alloc);
typedef struct
{
Byte *data;
size_t size;
size_t pos;
} CDynBuf;
void DynBuf_Construct(CDynBuf *p);
void DynBuf_SeekToBeg(CDynBuf *p);
int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc);
void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc);
EXTERN_C_END
#endif

52
C/7zBuf2.c Normal file
View file

@ -0,0 +1,52 @@
/* 7zBuf2.c -- Byte Buffer
2017-04-03 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "7zBuf.h"
void DynBuf_Construct(CDynBuf *p)
{
p->data = 0;
p->size = 0;
p->pos = 0;
}
void DynBuf_SeekToBeg(CDynBuf *p)
{
p->pos = 0;
}
int DynBuf_Write(CDynBuf *p, const Byte *buf, size_t size, ISzAllocPtr alloc)
{
if (size > p->size - p->pos)
{
size_t newSize = p->pos + size;
Byte *data;
newSize += newSize / 4;
data = (Byte *)ISzAlloc_Alloc(alloc, newSize);
if (!data)
return 0;
p->size = newSize;
if (p->pos != 0)
memcpy(data, p->data, p->pos);
ISzAlloc_Free(alloc, p->data);
p->data = data;
}
if (size != 0)
{
memcpy(p->data + p->pos, buf, size);
p->pos += size;
}
return 1;
}
void DynBuf_Free(CDynBuf *p, ISzAllocPtr alloc)
{
ISzAlloc_Free(alloc, p->data);
p->data = 0;
p->size = 0;
p->pos = 0;
}

322
C/7zCrc.c Normal file
View file

@ -0,0 +1,322 @@
/* 7zCrc.c -- CRC32 init
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zCrc.h"
#include "CpuArch.h"
#define kCrcPoly 0xEDB88320
#ifdef MY_CPU_LE
#define CRC_NUM_TABLES 8
#else
#define CRC_NUM_TABLES 9
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
#ifndef MY_CPU_BE
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
#endif
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
extern
CRC_FUNC g_CrcUpdateT4;
CRC_FUNC g_CrcUpdateT4;
extern
CRC_FUNC g_CrcUpdateT8;
CRC_FUNC g_CrcUpdateT8;
extern
CRC_FUNC g_CrcUpdateT0_32;
CRC_FUNC g_CrcUpdateT0_32;
extern
CRC_FUNC g_CrcUpdateT0_64;
CRC_FUNC g_CrcUpdateT0_64;
extern
CRC_FUNC g_CrcUpdate;
CRC_FUNC g_CrcUpdate;
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
{
return g_CrcUpdate(v, data, size, g_CrcTable);
}
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
{
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
}
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
const Byte *pEnd = p + size;
for (; p != pEnd; p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
/* ---------- hardware CRC ---------- */
#ifdef MY_CPU_LE
#if defined(MY_CPU_ARM_OR_ARM64)
// #pragma message("ARM*")
#if defined(_MSC_VER)
#if defined(MY_CPU_ARM64)
#if (_MSC_VER >= 1910)
#define USE_ARM64_CRC
#endif
#endif
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|| (defined(__GNUC__) && (__GNUC__ > 4))
#if !defined(__ARM_FEATURE_CRC32)
#define __ARM_FEATURE_CRC32 1
#if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
#endif
#endif
#if defined(__ARM_FEATURE_CRC32)
#define USE_ARM64_CRC
#include <arm_acle.h>
#endif
#endif
#else
// no hardware CRC
// #define USE_CRC_EMU
#ifdef USE_CRC_EMU
#pragma message("ARM64 CRC emulation")
MY_FORCE_INLINE
UInt32 __crc32b(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
return v;
}
MY_FORCE_INLINE
UInt32 __crc32w(UInt32 v, UInt32 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
MY_FORCE_INLINE
UInt32 __crc32d(UInt32 v, UInt64 data)
{
const UInt32 *table = g_CrcTable;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
return v;
}
#endif // USE_CRC_EMU
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#define T0_32_UNROLL_BYTES (4 * 4)
#define T0_64_UNROLL_BYTES (4 * 8)
#ifndef ATTRIB_CRC
#define ATTRIB_CRC
#endif
// #pragma message("USE ARM HW CRC")
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_32_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_32_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
ATTRIB_CRC
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
UNUSED_VAR(table);
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
v = __crc32b(v, *p++);
if (size >= T0_64_UNROLL_BYTES)
{
const Byte *lim = p + size;
size &= (T0_64_UNROLL_BYTES - 1);
lim -= size;
do
{
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
}
while (p != lim);
}
for (; size != 0; size--)
v = __crc32b(v, *p++);
return v;
}
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
#endif // MY_CPU_LE
void MY_FAST_CALL CrcGenerateTable()
{
UInt32 i;
for (i = 0; i < 256; i++)
{
UInt32 r = i;
unsigned j;
for (j = 0; j < 8; j++)
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
g_CrcTable[i] = r;
}
for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
{
UInt32 r = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
}
#if CRC_NUM_TABLES < 4
g_CrcUpdate = CrcUpdateT1;
#else
#ifdef MY_CPU_LE
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
#ifdef MY_CPU_X86_OR_AMD64
if (!CPU_Is_InOrder())
#endif
g_CrcUpdate = CrcUpdateT8;
#endif
#else
{
#ifndef MY_CPU_BE
UInt32 k = 0x01020304;
const Byte *p = (const Byte *)&k;
if (p[0] == 4 && p[1] == 3)
{
g_CrcUpdateT4 = CrcUpdateT4;
g_CrcUpdate = CrcUpdateT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT8;
g_CrcUpdate = CrcUpdateT8;
#endif
}
else if (p[0] != 1 || p[1] != 2)
g_CrcUpdate = CrcUpdateT1;
else
#endif
{
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
{
UInt32 x = g_CrcTable[(size_t)i - 256];
g_CrcTable[i] = CRC_UINT32_SWAP(x);
}
g_CrcUpdateT4 = CrcUpdateT1_BeT4;
g_CrcUpdate = CrcUpdateT1_BeT4;
#if CRC_NUM_TABLES >= 8
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
g_CrcUpdate = CrcUpdateT1_BeT8;
#endif
}
}
#endif
#endif
#ifdef MY_CPU_LE
#ifdef USE_ARM64_CRC
if (CPU_IsSupported_CRC32())
{
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate =
#if defined(MY_CPU_ARM)
CrcUpdateT0_32;
#else
CrcUpdateT0_64;
#endif
}
#endif
#ifdef USE_CRC_EMU
g_CrcUpdateT0_32 = CrcUpdateT0_32;
g_CrcUpdateT0_64 = CrcUpdateT0_64;
g_CrcUpdate = CrcUpdateT0_64;
#endif
#endif
}

25
C/7zCrc.h Normal file
View file

@ -0,0 +1,25 @@
/* 7zCrc.h -- CRC32 calculation
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __7Z_CRC_H
#define __7Z_CRC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
extern UInt32 g_CrcTable[];
/* Call CrcGenerateTable one time before other CRC functions */
void MY_FAST_CALL CrcGenerateTable(void);
#define CRC_INIT_VAL 0xFFFFFFFF
#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
EXTERN_C_END
#endif

117
C/7zCrcOpt.c Normal file
View file

@ -0,0 +1,117 @@
/* 7zCrcOpt.c -- CRC32 calculation
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifndef MY_CPU_BE
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x300)[((v ) & 0xFF)]
^ (table + 0x200)[((v >> 8) & 0xFF)]
^ (table + 0x100)[((v >> 16) & 0xFF)]
^ (table + 0x000)[((v >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x700)[((v ) & 0xFF)]
^ (table + 0x600)[((v >> 8) & 0xFF)]
^ (table + 0x500)[((v >> 16) & 0xFF)]
^ (table + 0x400)[((v >> 24))];
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x300)[((d ) & 0xFF)]
^ (table + 0x200)[((d >> 8) & 0xFF)]
^ (table + 0x100)[((d >> 16) & 0xFF)]
^ (table + 0x000)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2(v, *p);
return v;
}
#endif
#ifndef MY_CPU_LE
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
table += 0x100;
v = CRC_UINT32_SWAP(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 4; size -= 4, p += 4)
{
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x000)[((v ) & 0xFF)]
^ (table + 0x100)[((v >> 8) & 0xFF)]
^ (table + 0x200)[((v >> 16) & 0xFF)]
^ (table + 0x300)[((v >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
}
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
{
const Byte *p = (const Byte *)data;
table += 0x100;
v = CRC_UINT32_SWAP(v);
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
for (; size >= 8; size -= 8, p += 8)
{
UInt32 d;
v ^= *(const UInt32 *)(const void *)p;
v =
(table + 0x400)[((v ) & 0xFF)]
^ (table + 0x500)[((v >> 8) & 0xFF)]
^ (table + 0x600)[((v >> 16) & 0xFF)]
^ (table + 0x700)[((v >> 24))];
d = *((const UInt32 *)(const void *)p + 1);
v ^=
(table + 0x000)[((d ) & 0xFF)]
^ (table + 0x100)[((d >> 8) & 0xFF)]
^ (table + 0x200)[((d >> 16) & 0xFF)]
^ (table + 0x300)[((d >> 24))];
}
for (; size > 0; size--, p++)
v = CRC_UPDATE_BYTE_2_BE(v, *p);
return CRC_UINT32_SWAP(v);
}
#endif

600
C/7zDec.c Normal file
View file

@ -0,0 +1,600 @@
/* 7zDec.c -- Decoding from 7z folder
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
/* #define _7ZIP_PPMD_SUPPPORT */
#include "7z.h"
#include "7zCrc.h"
#include "Bcj2.h"
#include "Bra.h"
#include "CpuArch.h"
#include "Delta.h"
#include "LzmaDec.h"
#include "Lzma2Dec.h"
#ifdef _7ZIP_PPMD_SUPPPORT
#include "Ppmd7.h"
#endif
#define k_Copy 0
#ifndef _7Z_NO_METHOD_LZMA2
#define k_LZMA2 0x21
#endif
#define k_LZMA 0x30101
#define k_BCJ2 0x303011B
#ifndef _7Z_NO_METHODS_FILTERS
#define k_Delta 3
#define k_BCJ 0x3030103
#define k_PPC 0x3030205
#define k_IA64 0x3030401
#define k_ARM 0x3030501
#define k_ARMT 0x3030701
#define k_SPARC 0x3030805
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
#define k_PPMD 0x30401
typedef struct
{
IByteIn vt;
const Byte *cur;
const Byte *end;
const Byte *begin;
UInt64 processed;
BoolInt extra;
SRes res;
const ILookInStream *inStream;
} CByteInToLook;
static Byte ReadByte(const IByteIn *pp)
{
CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
if (p->cur != p->end)
return *p->cur++;
if (p->res == SZ_OK)
{
size_t size = (size_t)(p->cur - p->begin);
p->processed += size;
p->res = ILookInStream_Skip(p->inStream, size);
size = (1 << 25);
p->res = ILookInStream_Look(p->inStream, (const void **)&p->begin, &size);
p->cur = p->begin;
p->end = p->begin + size;
if (size != 0)
return *p->cur++;;
}
p->extra = True;
return 0;
}
static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{
CPpmd7 ppmd;
CByteInToLook s;
SRes res = SZ_OK;
s.vt.Read = ReadByte;
s.inStream = inStream;
s.begin = s.end = s.cur = NULL;
s.extra = False;
s.res = SZ_OK;
s.processed = 0;
if (propsSize != 5)
return SZ_ERROR_UNSUPPORTED;
{
unsigned order = props[0];
UInt32 memSize = GetUi32(props + 1);
if (order < PPMD7_MIN_ORDER ||
order > PPMD7_MAX_ORDER ||
memSize < PPMD7_MIN_MEM_SIZE ||
memSize > PPMD7_MAX_MEM_SIZE)
return SZ_ERROR_UNSUPPORTED;
Ppmd7_Construct(&ppmd);
if (!Ppmd7_Alloc(&ppmd, memSize, allocMain))
return SZ_ERROR_MEM;
Ppmd7_Init(&ppmd, order);
}
{
ppmd.rc.dec.Stream = &s.vt;
if (!Ppmd7z_RangeDec_Init(&ppmd.rc.dec))
res = SZ_ERROR_DATA;
else if (!s.extra)
{
Byte *buf = outBuffer;
const Byte *lim = buf + outSize;
for (; buf != lim; buf++)
{
int sym = Ppmd7z_DecodeSymbol(&ppmd);
if (s.extra || sym < 0)
break;
*buf = (Byte)sym;
}
if (buf != lim)
res = SZ_ERROR_DATA;
else if (!Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec))
{
/* if (Ppmd7z_DecodeSymbol(&ppmd) != PPMD7_SYM_END || !Ppmd7z_RangeDec_IsFinishedOK(&ppmd.rc.dec)) */
res = SZ_ERROR_DATA;
}
}
if (s.extra)
res = (s.res != SZ_OK ? s.res : SZ_ERROR_DATA);
else if (s.processed + (size_t)(s.cur - s.begin) != inSize)
res = SZ_ERROR_DATA;
}
Ppmd7_Free(&ppmd, allocMain);
return res;
}
#endif
static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{
CLzmaDec state;
SRes res = SZ_OK;
LzmaDec_Construct(&state);
RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain));
state.dic = outBuffer;
state.dicBufSize = outSize;
LzmaDec_Init(&state);
for (;;)
{
const void *inBuf = NULL;
size_t lookahead = (1 << 18);
if (lookahead > inSize)
lookahead = (size_t)inSize;
res = ILookInStream_Look(inStream, &inBuf, &lookahead);
if (res != SZ_OK)
break;
{
SizeT inProcessed = (SizeT)lookahead, dicPos = state.dicPos;
ELzmaStatus status;
res = LzmaDec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
lookahead -= inProcessed;
inSize -= inProcessed;
if (res != SZ_OK)
break;
if (status == LZMA_STATUS_FINISHED_WITH_MARK)
{
if (outSize != state.dicPos || inSize != 0)
res = SZ_ERROR_DATA;
break;
}
if (outSize == state.dicPos && inSize == 0 && status == LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK)
break;
if (inProcessed == 0 && dicPos == state.dicPos)
{
res = SZ_ERROR_DATA;
break;
}
res = ILookInStream_Skip(inStream, inProcessed);
if (res != SZ_OK)
break;
}
}
LzmaDec_FreeProbs(&state, allocMain);
return res;
}
#ifndef _7Z_NO_METHOD_LZMA2
static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
{
CLzma2Dec state;
SRes res = SZ_OK;
Lzma2Dec_Construct(&state);
if (propsSize != 1)
return SZ_ERROR_DATA;
RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain));
state.decoder.dic = outBuffer;
state.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&state);
for (;;)
{
const void *inBuf = NULL;
size_t lookahead = (1 << 18);
if (lookahead > inSize)
lookahead = (size_t)inSize;
res = ILookInStream_Look(inStream, &inBuf, &lookahead);
if (res != SZ_OK)
break;
{
SizeT inProcessed = (SizeT)lookahead, dicPos = state.decoder.dicPos;
ELzmaStatus status;
res = Lzma2Dec_DecodeToDic(&state, outSize, (const Byte *)inBuf, &inProcessed, LZMA_FINISH_END, &status);
lookahead -= inProcessed;
inSize -= inProcessed;
if (res != SZ_OK)
break;
if (status == LZMA_STATUS_FINISHED_WITH_MARK)
{
if (outSize != state.decoder.dicPos || inSize != 0)
res = SZ_ERROR_DATA;
break;
}
if (inProcessed == 0 && dicPos == state.decoder.dicPos)
{
res = SZ_ERROR_DATA;
break;
}
res = ILookInStream_Skip(inStream, inProcessed);
if (res != SZ_OK)
break;
}
}
Lzma2Dec_FreeProbs(&state, allocMain);
return res;
}
#endif
static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer)
{
while (inSize > 0)
{
const void *inBuf;
size_t curSize = (1 << 18);
if (curSize > inSize)
curSize = (size_t)inSize;
RINOK(ILookInStream_Look(inStream, &inBuf, &curSize));
if (curSize == 0)
return SZ_ERROR_INPUT_EOF;
memcpy(outBuffer, inBuf, curSize);
outBuffer += curSize;
inSize -= curSize;
RINOK(ILookInStream_Skip(inStream, curSize));
}
return SZ_OK;
}
static BoolInt IS_MAIN_METHOD(UInt32 m)
{
switch (m)
{
case k_Copy:
case k_LZMA:
#ifndef _7Z_NO_METHOD_LZMA2
case k_LZMA2:
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
case k_PPMD:
#endif
return True;
}
return False;
}
static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)
{
return
c->NumStreams == 1
/* && c->MethodID <= (UInt32)0xFFFFFFFF */
&& IS_MAIN_METHOD((UInt32)c->MethodID);
}
#define IS_BCJ2(c) ((c)->MethodID == k_BCJ2 && (c)->NumStreams == 4)
static SRes CheckSupportedFolder(const CSzFolder *f)
{
if (f->NumCoders < 1 || f->NumCoders > 4)
return SZ_ERROR_UNSUPPORTED;
if (!IS_SUPPORTED_CODER(&f->Coders[0]))
return SZ_ERROR_UNSUPPORTED;
if (f->NumCoders == 1)
{
if (f->NumPackStreams != 1 || f->PackStreams[0] != 0 || f->NumBonds != 0)
return SZ_ERROR_UNSUPPORTED;
return SZ_OK;
}
#ifndef _7Z_NO_METHODS_FILTERS
if (f->NumCoders == 2)
{
const CSzCoderInfo *c = &f->Coders[1];
if (
/* c->MethodID > (UInt32)0xFFFFFFFF || */
c->NumStreams != 1
|| f->NumPackStreams != 1
|| f->PackStreams[0] != 0
|| f->NumBonds != 1
|| f->Bonds[0].InIndex != 1
|| f->Bonds[0].OutIndex != 0)
return SZ_ERROR_UNSUPPORTED;
switch ((UInt32)c->MethodID)
{
case k_Delta:
case k_BCJ:
case k_PPC:
case k_IA64:
case k_SPARC:
case k_ARM:
case k_ARMT:
break;
default:
return SZ_ERROR_UNSUPPORTED;
}
return SZ_OK;
}
#endif
if (f->NumCoders == 4)
{
if (!IS_SUPPORTED_CODER(&f->Coders[1])
|| !IS_SUPPORTED_CODER(&f->Coders[2])
|| !IS_BCJ2(&f->Coders[3]))
return SZ_ERROR_UNSUPPORTED;
if (f->NumPackStreams != 4
|| f->PackStreams[0] != 2
|| f->PackStreams[1] != 6
|| f->PackStreams[2] != 1
|| f->PackStreams[3] != 0
|| f->NumBonds != 3
|| f->Bonds[0].InIndex != 5 || f->Bonds[0].OutIndex != 0
|| f->Bonds[1].InIndex != 4 || f->Bonds[1].OutIndex != 1
|| f->Bonds[2].InIndex != 3 || f->Bonds[2].OutIndex != 2)
return SZ_ERROR_UNSUPPORTED;
return SZ_OK;
}
return SZ_ERROR_UNSUPPORTED;
}
#ifndef _7Z_NO_METHODS_FILTERS
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
#endif
static SRes SzFolder_Decode2(const CSzFolder *folder,
const Byte *propsData,
const UInt64 *unpackSizes,
const UInt64 *packPositions,
ILookInStream *inStream, UInt64 startPos,
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain,
Byte *tempBuf[])
{
UInt32 ci;
SizeT tempSizes[3] = { 0, 0, 0};
SizeT tempSize3 = 0;
Byte *tempBuf3 = 0;
RINOK(CheckSupportedFolder(folder));
for (ci = 0; ci < folder->NumCoders; ci++)
{
const CSzCoderInfo *coder = &folder->Coders[ci];
if (IS_MAIN_METHOD((UInt32)coder->MethodID))
{
UInt32 si = 0;
UInt64 offset;
UInt64 inSize;
Byte *outBufCur = outBuffer;
SizeT outSizeCur = outSize;
if (folder->NumCoders == 4)
{
UInt32 indices[] = { 3, 2, 0 };
UInt64 unpackSize = unpackSizes[ci];
si = indices[ci];
if (ci < 2)
{
Byte *temp;
outSizeCur = (SizeT)unpackSize;
if (outSizeCur != unpackSize)
return SZ_ERROR_MEM;
temp = (Byte *)ISzAlloc_Alloc(allocMain, outSizeCur);
if (!temp && outSizeCur != 0)
return SZ_ERROR_MEM;
outBufCur = tempBuf[1 - ci] = temp;
tempSizes[1 - ci] = outSizeCur;
}
else if (ci == 2)
{
if (unpackSize > outSize) /* check it */
return SZ_ERROR_PARAM;
tempBuf3 = outBufCur = outBuffer + (outSize - (size_t)unpackSize);
tempSize3 = outSizeCur = (SizeT)unpackSize;
}
else
return SZ_ERROR_UNSUPPORTED;
}
offset = packPositions[si];
inSize = packPositions[(size_t)si + 1] - offset;
RINOK(LookInStream_SeekTo(inStream, startPos + offset));
if (coder->MethodID == k_Copy)
{
if (inSize != outSizeCur) /* check it */
return SZ_ERROR_DATA;
RINOK(SzDecodeCopy(inSize, inStream, outBufCur));
}
else if (coder->MethodID == k_LZMA)
{
RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
}
#ifndef _7Z_NO_METHOD_LZMA2
else if (coder->MethodID == k_LZMA2)
{
RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
}
#endif
#ifdef _7ZIP_PPMD_SUPPPORT
else if (coder->MethodID == k_PPMD)
{
RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
}
#endif
else
return SZ_ERROR_UNSUPPORTED;
}
else if (coder->MethodID == k_BCJ2)
{
UInt64 offset = packPositions[1];
UInt64 s3Size = packPositions[2] - offset;
if (ci != 3)
return SZ_ERROR_UNSUPPORTED;
tempSizes[2] = (SizeT)s3Size;
if (tempSizes[2] != s3Size)
return SZ_ERROR_MEM;
tempBuf[2] = (Byte *)ISzAlloc_Alloc(allocMain, tempSizes[2]);
if (!tempBuf[2] && tempSizes[2] != 0)
return SZ_ERROR_MEM;
RINOK(LookInStream_SeekTo(inStream, startPos + offset));
RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]));
if ((tempSizes[0] & 3) != 0 ||
(tempSizes[1] & 3) != 0 ||
tempSize3 + tempSizes[0] + tempSizes[1] != outSize)
return SZ_ERROR_DATA;
{
CBcj2Dec p;
p.bufs[0] = tempBuf3; p.lims[0] = tempBuf3 + tempSize3;
p.bufs[1] = tempBuf[0]; p.lims[1] = tempBuf[0] + tempSizes[0];
p.bufs[2] = tempBuf[1]; p.lims[2] = tempBuf[1] + tempSizes[1];
p.bufs[3] = tempBuf[2]; p.lims[3] = tempBuf[2] + tempSizes[2];
p.dest = outBuffer;
p.destLim = outBuffer + outSize;
Bcj2Dec_Init(&p);
RINOK(Bcj2Dec_Decode(&p));
{
unsigned i;
for (i = 0; i < 4; i++)
if (p.bufs[i] != p.lims[i])
return SZ_ERROR_DATA;
if (!Bcj2Dec_IsFinished(&p))
return SZ_ERROR_DATA;
if (p.dest != p.destLim
|| p.state != BCJ2_STREAM_MAIN)
return SZ_ERROR_DATA;
}
}
}
#ifndef _7Z_NO_METHODS_FILTERS
else if (ci == 1)
{
if (coder->MethodID == k_Delta)
{
if (coder->PropsSize != 1)
return SZ_ERROR_UNSUPPORTED;
{
Byte state[DELTA_STATE_SIZE];
Delta_Init(state);
Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
}
}
else
{
if (coder->PropsSize != 0)
return SZ_ERROR_UNSUPPORTED;
switch (coder->MethodID)
{
case k_BCJ:
{
UInt32 state;
x86_Convert_Init(state);
x86_Convert(outBuffer, outSize, 0, &state, 0);
break;
}
CASE_BRA_CONV(PPC)
CASE_BRA_CONV(IA64)
CASE_BRA_CONV(SPARC)
CASE_BRA_CONV(ARM)
CASE_BRA_CONV(ARMT)
default:
return SZ_ERROR_UNSUPPORTED;
}
}
}
#endif
else
return SZ_ERROR_UNSUPPORTED;
}
return SZ_OK;
}
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
ILookInStream *inStream, UInt64 startPos,
Byte *outBuffer, size_t outSize,
ISzAllocPtr allocMain)
{
SRes res;
CSzFolder folder;
CSzData sd;
const Byte *data = p->CodersData + p->FoCodersOffsets[folderIndex];
sd.Data = data;
sd.Size = p->FoCodersOffsets[(size_t)folderIndex + 1] - p->FoCodersOffsets[folderIndex];
res = SzGetNextFolderItem(&folder, &sd);
if (res != SZ_OK)
return res;
if (sd.Size != 0
|| folder.UnpackStream != p->FoToMainUnpackSizeIndex[folderIndex]
|| outSize != SzAr_GetFolderUnpackSize(p, folderIndex))
return SZ_ERROR_FAIL;
{
unsigned i;
Byte *tempBuf[3] = { 0, 0, 0};
res = SzFolder_Decode2(&folder, data,
&p->CoderUnpackSizes[p->FoToCoderUnpackSizes[folderIndex]],
p->PackPositions + p->FoStartPackStreamIndex[folderIndex],
inStream, startPos,
outBuffer, (SizeT)outSize, allocMain, tempBuf);
for (i = 0; i < 3; i++)
ISzAlloc_Free(allocMain, tempBuf[i]);
if (res == SZ_OK)
if (SzBitWithVals_Check(&p->FolderCRCs, folderIndex))
if (CrcCalc(outBuffer, outSize) != p->FolderCRCs.Vals[folderIndex])
res = SZ_ERROR_CRC;
return res;
}
}

442
C/7zFile.c Normal file
View file

@ -0,0 +1,442 @@
/* 7zFile.c -- File IO
2021-04-29 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "7zFile.h"
#ifndef USE_WINDOWS_FILE
#include <errno.h>
#ifndef USE_FOPEN
#include <stdio.h>
#include <fcntl.h>
#ifdef _WIN32
#include <io.h>
typedef int ssize_t;
typedef int off_t;
#else
#include <unistd.h>
#endif
#endif
#else
/*
ReadFile and WriteFile functions in Windows have BUG:
If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1)
from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES
(Insufficient system resources exist to complete the requested service).
Probably in some version of Windows there are problems with other sizes:
for 32 MB (maybe also for 16 MB).
And message can be "Network connection was lost"
*/
#endif
#define kChunkSizeMax (1 << 22)
void File_Construct(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
p->handle = INVALID_HANDLE_VALUE;
#elif defined(USE_FOPEN)
p->file = NULL;
#else
p->fd = -1;
#endif
}
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
static WRes File_Open(CSzFile *p, const char *name, int writeMode)
{
#ifdef USE_WINDOWS_FILE
p->handle = CreateFileA(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
#elif defined(USE_FOPEN)
p->file = fopen(name, writeMode ? "wb+" : "rb");
return (p->file != 0) ? 0 :
#ifdef UNDER_CE
2; /* ENOENT */
#else
errno;
#endif
#else
int flags = (writeMode ? (O_CREAT | O_EXCL | O_WRONLY) : O_RDONLY);
#ifdef O_BINARY
flags |= O_BINARY;
#endif
p->fd = open(name, flags, 0666);
return (p->fd != -1) ? 0 : errno;
#endif
}
WRes InFile_Open(CSzFile *p, const char *name) { return File_Open(p, name, 0); }
WRes OutFile_Open(CSzFile *p, const char *name)
{
#if defined(USE_WINDOWS_FILE) || defined(USE_FOPEN)
return File_Open(p, name, 1);
#else
p->fd = creat(name, 0666);
return (p->fd != -1) ? 0 : errno;
#endif
}
#endif
#ifdef USE_WINDOWS_FILE
static WRes File_OpenW(CSzFile *p, const WCHAR *name, int writeMode)
{
p->handle = CreateFileW(name,
writeMode ? GENERIC_WRITE : GENERIC_READ,
FILE_SHARE_READ, NULL,
writeMode ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL, NULL);
return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError();
}
WRes InFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 0); }
WRes OutFile_OpenW(CSzFile *p, const WCHAR *name) { return File_OpenW(p, name, 1); }
#endif
WRes File_Close(CSzFile *p)
{
#ifdef USE_WINDOWS_FILE
if (p->handle != INVALID_HANDLE_VALUE)
{
if (!CloseHandle(p->handle))
return GetLastError();
p->handle = INVALID_HANDLE_VALUE;
}
#elif defined(USE_FOPEN)
if (p->file != NULL)
{
int res = fclose(p->file);
if (res != 0)
{
if (res == EOF)
return errno;
return res;
}
p->file = NULL;
}
#else
if (p->fd != -1)
{
if (close(p->fd) != 0)
return errno;
p->fd = -1;
}
#endif
return 0;
}
WRes File_Read(CSzFile *p, void *data, size_t *size)
{
size_t originalSize = *size;
*size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
do
{
const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
const BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL);
data = (void *)((Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
// debug : we can break here for partial reading mode
if (processed == 0)
break;
}
while (originalSize > 0);
#elif defined(USE_FOPEN)
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const size_t processed = fread(data, 1, curSize, p->file);
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= processed;
*size += processed;
if (processed != curSize)
return ferror(p->file);
// debug : we can break here for partial reading mode
if (processed == 0)
break;
}
while (originalSize > 0);
#else
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const ssize_t processed = read(p->fd, data, curSize);
if (processed == -1)
return errno;
if (processed == 0)
break;
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= (size_t)processed;
*size += (size_t)processed;
// debug : we can break here for partial reading mode
// break;
}
while (originalSize > 0);
#endif
return 0;
}
WRes File_Write(CSzFile *p, const void *data, size_t *size)
{
size_t originalSize = *size;
*size = 0;
if (originalSize == 0)
return 0;
#ifdef USE_WINDOWS_FILE
do
{
const DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize;
DWORD processed = 0;
const BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL);
data = (const void *)((const Byte *)data + processed);
originalSize -= processed;
*size += processed;
if (!res)
return GetLastError();
if (processed == 0)
break;
}
while (originalSize > 0);
#elif defined(USE_FOPEN)
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const size_t processed = fwrite(data, 1, curSize, p->file);
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= processed;
*size += processed;
if (processed != curSize)
return ferror(p->file);
if (processed == 0)
break;
}
while (originalSize > 0);
#else
do
{
const size_t curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : originalSize;
const ssize_t processed = write(p->fd, data, curSize);
if (processed == -1)
return errno;
if (processed == 0)
break;
data = (void *)((Byte *)data + (size_t)processed);
originalSize -= (size_t)processed;
*size += (size_t)processed;
}
while (originalSize > 0);
#endif
return 0;
}
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
{
#ifdef USE_WINDOWS_FILE
DWORD moveMethod;
UInt32 low = (UInt32)*pos;
LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
switch (origin)
{
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
case SZ_SEEK_END: moveMethod = FILE_END; break;
default: return ERROR_INVALID_PARAMETER;
}
low = SetFilePointer(p->handle, (LONG)low, &high, moveMethod);
if (low == (UInt32)0xFFFFFFFF)
{
WRes res = GetLastError();
if (res != NO_ERROR)
return res;
}
*pos = ((Int64)high << 32) | low;
return 0;
#else
int moveMethod; // = origin;
switch (origin)
{
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
case SZ_SEEK_END: moveMethod = SEEK_END; break;
default: return EINVAL;
}
#if defined(USE_FOPEN)
{
int res = fseek(p->file, (long)*pos, moveMethod);
if (res == -1)
return errno;
*pos = ftell(p->file);
if (*pos == -1)
return errno;
return 0;
}
#else
{
off_t res = lseek(p->fd, (off_t)*pos, moveMethod);
if (res == -1)
return errno;
*pos = res;
return 0;
}
#endif // USE_FOPEN
#endif // USE_WINDOWS_FILE
}
WRes File_GetLength(CSzFile *p, UInt64 *length)
{
#ifdef USE_WINDOWS_FILE
DWORD sizeHigh;
DWORD sizeLow = GetFileSize(p->handle, &sizeHigh);
if (sizeLow == 0xFFFFFFFF)
{
DWORD res = GetLastError();
if (res != NO_ERROR)
return res;
}
*length = (((UInt64)sizeHigh) << 32) + sizeLow;
return 0;
#elif defined(USE_FOPEN)
long pos = ftell(p->file);
int res = fseek(p->file, 0, SEEK_END);
*length = ftell(p->file);
fseek(p->file, pos, SEEK_SET);
return res;
#else
off_t pos;
*length = 0;
pos = lseek(p->fd, 0, SEEK_CUR);
if (pos != -1)
{
const off_t len2 = lseek(p->fd, 0, SEEK_END);
const off_t res2 = lseek(p->fd, pos, SEEK_SET);
if (len2 != -1)
{
*length = (UInt64)len2;
if (res2 != -1)
return 0;
}
}
return errno;
#endif
}
/* ---------- FileSeqInStream ---------- */
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
WRes wres = File_Read(&p->file, buf, size);
p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
{
p->vt.Read = FileSeqInStream_Read;
}
/* ---------- FileInStream ---------- */
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
WRes wres = File_Read(&p->file, buf, size);
p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
{
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
WRes wres = File_Seek(&p->file, pos, origin);
p->wres = wres;
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
}
void FileInStream_CreateVTable(CFileInStream *p)
{
p->vt.Read = FileInStream_Read;
p->vt.Seek = FileInStream_Seek;
}
/* ---------- FileOutStream ---------- */
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
{
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
WRes wres = File_Write(&p->file, data, &size);
p->wres = wres;
return size;
}
void FileOutStream_CreateVTable(CFileOutStream *p)
{
p->vt.Write = FileOutStream_Write;
}

91
C/7zFile.h Normal file
View file

@ -0,0 +1,91 @@
/* 7zFile.h -- File IO
2021-02-15 : Igor Pavlov : Public domain */
#ifndef __7Z_FILE_H
#define __7Z_FILE_H
#ifdef _WIN32
#define USE_WINDOWS_FILE
// #include <windows.h>
#endif
#ifdef USE_WINDOWS_FILE
#include <windows.h>
#else
// note: USE_FOPEN mode is limited to 32-bit file size
// #define USE_FOPEN
// #include <stdio.h>
#endif
#include "7zTypes.h"
EXTERN_C_BEGIN
/* ---------- File ---------- */
typedef struct
{
#ifdef USE_WINDOWS_FILE
HANDLE handle;
#elif defined(USE_FOPEN)
FILE *file;
#else
int fd;
#endif
} CSzFile;
void File_Construct(CSzFile *p);
#if !defined(UNDER_CE) || !defined(USE_WINDOWS_FILE)
WRes InFile_Open(CSzFile *p, const char *name);
WRes OutFile_Open(CSzFile *p, const char *name);
#endif
#ifdef USE_WINDOWS_FILE
WRes InFile_OpenW(CSzFile *p, const WCHAR *name);
WRes OutFile_OpenW(CSzFile *p, const WCHAR *name);
#endif
WRes File_Close(CSzFile *p);
/* reads max(*size, remain file's size) bytes */
WRes File_Read(CSzFile *p, void *data, size_t *size);
/* writes *size bytes */
WRes File_Write(CSzFile *p, const void *data, size_t *size);
WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin);
WRes File_GetLength(CSzFile *p, UInt64 *length);
/* ---------- FileInStream ---------- */
typedef struct
{
ISeqInStream vt;
CSzFile file;
WRes wres;
} CFileSeqInStream;
void FileSeqInStream_CreateVTable(CFileSeqInStream *p);
typedef struct
{
ISeekInStream vt;
CSzFile file;
WRes wres;
} CFileInStream;
void FileInStream_CreateVTable(CFileInStream *p);
typedef struct
{
ISeqOutStream vt;
CSzFile file;
WRes wres;
} CFileOutStream;
void FileOutStream_CreateVTable(CFileOutStream *p);
EXTERN_C_END
#endif

176
C/7zStream.c Normal file
View file

@ -0,0 +1,176 @@
/* 7zStream.c -- 7z Stream functions
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "7zTypes.h"
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
{
while (size != 0)
{
size_t processed = size;
RINOK(ISeqInStream_Read(stream, buf, &processed));
if (processed == 0)
return errorType;
buf = (void *)((Byte *)buf + processed);
size -= processed;
}
return SZ_OK;
}
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size)
{
return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
}
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
{
size_t processed = 1;
RINOK(ISeqInStream_Read(stream, buf, &processed));
return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
}
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
{
Int64 t = (Int64)offset;
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
}
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size)
{
const void *lookBuf;
if (*size == 0)
return SZ_OK;
RINOK(ILookInStream_Look(stream, &lookBuf, size));
memcpy(buf, lookBuf, *size);
return ILookInStream_Skip(stream, *size);
}
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType)
{
while (size != 0)
{
size_t processed = size;
RINOK(ILookInStream_Read(stream, buf, &processed));
if (processed == 0)
return errorType;
buf = (void *)((Byte *)buf + processed);
size -= processed;
}
return SZ_OK;
}
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size)
{
return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
}
#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt);
static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size)
{
SRes res = SZ_OK;
GET_LookToRead2
size_t size2 = p->size - p->pos;
if (size2 == 0 && *size != 0)
{
p->pos = 0;
p->size = 0;
size2 = p->bufSize;
res = ISeekInStream_Read(p->realStream, p->buf, &size2);
p->size = size2;
}
if (*size > size2)
*size = size2;
*buf = p->buf + p->pos;
return res;
}
static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size)
{
SRes res = SZ_OK;
GET_LookToRead2
size_t size2 = p->size - p->pos;
if (size2 == 0 && *size != 0)
{
p->pos = 0;
p->size = 0;
if (*size > p->bufSize)
*size = p->bufSize;
res = ISeekInStream_Read(p->realStream, p->buf, size);
size2 = p->size = *size;
}
if (*size > size2)
*size = size2;
*buf = p->buf + p->pos;
return res;
}
static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset)
{
GET_LookToRead2
p->pos += offset;
return SZ_OK;
}
static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
{
GET_LookToRead2
size_t rem = p->size - p->pos;
if (rem == 0)
return ISeekInStream_Read(p->realStream, buf, size);
if (rem > *size)
rem = *size;
memcpy(buf, p->buf + p->pos, rem);
p->pos += rem;
*size = rem;
return SZ_OK;
}
static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin)
{
GET_LookToRead2
p->pos = p->size = 0;
return ISeekInStream_Seek(p->realStream, pos, origin);
}
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
{
p->vt.Look = lookahead ?
LookToRead2_Look_Lookahead :
LookToRead2_Look_Exact;
p->vt.Skip = LookToRead2_Skip;
p->vt.Read = LookToRead2_Read;
p->vt.Seek = LookToRead2_Seek;
}
static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt);
return LookInStream_LookRead(p->realStream, buf, size);
}
void SecToLook_CreateVTable(CSecToLook *p)
{
p->vt.Read = SecToLook_Read;
}
static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size)
{
CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt);
return ILookInStream_Read(p->realStream, buf, size);
}
void SecToRead_CreateVTable(CSecToRead *p)
{
p->vt.Read = SecToRead_Read;
}

525
C/7zTypes.h Normal file
View file

@ -0,0 +1,525 @@
/* 7zTypes.h -- Basic types
2021-12-25 : Igor Pavlov : Public domain */
#ifndef __7Z_TYPES_H
#define __7Z_TYPES_H
#ifdef _WIN32
/* #include <windows.h> */
#else
#include <errno.h>
#endif
#include <stddef.h>
#ifndef EXTERN_C_BEGIN
#ifdef __cplusplus
#define EXTERN_C_BEGIN extern "C" {
#define EXTERN_C_END }
#else
#define EXTERN_C_BEGIN
#define EXTERN_C_END
#endif
#endif
EXTERN_C_BEGIN
#define SZ_OK 0
#define SZ_ERROR_DATA 1
#define SZ_ERROR_MEM 2
#define SZ_ERROR_CRC 3
#define SZ_ERROR_UNSUPPORTED 4
#define SZ_ERROR_PARAM 5
#define SZ_ERROR_INPUT_EOF 6
#define SZ_ERROR_OUTPUT_EOF 7
#define SZ_ERROR_READ 8
#define SZ_ERROR_WRITE 9
#define SZ_ERROR_PROGRESS 10
#define SZ_ERROR_FAIL 11
#define SZ_ERROR_THREAD 12
#define SZ_ERROR_ARCHIVE 16
#define SZ_ERROR_NO_ARCHIVE 17
typedef int SRes;
#ifdef _MSC_VER
#if _MSC_VER > 1200
#define MY_ALIGN(n) __declspec(align(n))
#else
#define MY_ALIGN(n)
#endif
#else
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
#endif
#ifdef _WIN32
/* typedef DWORD WRes; */
typedef unsigned WRes;
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
#else // _WIN32
// #define ENV_HAVE_LSTAT
typedef int WRes;
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
#define MY__FACILITY_ERRNO 0x800
#define MY__FACILITY_WIN32 7
#define MY__FACILITY__WRes MY__FACILITY_ERRNO
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
( (HRESULT)(x) & 0x0000FFFF) \
| (MY__FACILITY__WRes << 16) \
| (HRESULT)0x80000000 ))
#define MY_SRes_HRESULT_FROM_WRes(x) \
((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : MY_HRESULT_FROM_errno_CONST_ERROR(x))
// we call macro HRESULT_FROM_WIN32 for system errors (WRes) that are (errno)
#define HRESULT_FROM_WIN32(x) MY_SRes_HRESULT_FROM_WRes(x)
/*
#define ERROR_FILE_NOT_FOUND 2L
#define ERROR_ACCESS_DENIED 5L
#define ERROR_NO_MORE_FILES 18L
#define ERROR_LOCK_VIOLATION 33L
#define ERROR_FILE_EXISTS 80L
#define ERROR_DISK_FULL 112L
#define ERROR_NEGATIVE_SEEK 131L
#define ERROR_ALREADY_EXISTS 183L
#define ERROR_DIRECTORY 267L
#define ERROR_TOO_MANY_POSTS 298L
#define ERROR_INTERNAL_ERROR 1359L
#define ERROR_INVALID_REPARSE_DATA 4392L
#define ERROR_REPARSE_TAG_INVALID 4393L
#define ERROR_REPARSE_TAG_MISMATCH 4394L
*/
// we use errno equivalents for some WIN32 errors:
#define ERROR_INVALID_PARAMETER EINVAL
#define ERROR_INVALID_FUNCTION EINVAL
#define ERROR_ALREADY_EXISTS EEXIST
#define ERROR_FILE_EXISTS EEXIST
#define ERROR_PATH_NOT_FOUND ENOENT
#define ERROR_FILE_NOT_FOUND ENOENT
#define ERROR_DISK_FULL ENOSPC
// #define ERROR_INVALID_HANDLE EBADF
// we use FACILITY_WIN32 for errors that has no errno equivalent
// Too many posts were made to a semaphore.
#define ERROR_TOO_MANY_POSTS ((HRESULT)0x8007012AL)
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
// if (MY__FACILITY__WRes != FACILITY_WIN32),
// we use FACILITY_WIN32 for COM errors:
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
#define E_INVALIDARG ((HRESULT)0x80070057L)
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
/*
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
*/
// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
#define TEXT(quote) quote
#define FILE_ATTRIBUTE_READONLY 0x0001
#define FILE_ATTRIBUTE_HIDDEN 0x0002
#define FILE_ATTRIBUTE_SYSTEM 0x0004
#define FILE_ATTRIBUTE_DIRECTORY 0x0010
#define FILE_ATTRIBUTE_ARCHIVE 0x0020
#define FILE_ATTRIBUTE_DEVICE 0x0040
#define FILE_ATTRIBUTE_NORMAL 0x0080
#define FILE_ATTRIBUTE_TEMPORARY 0x0100
#define FILE_ATTRIBUTE_SPARSE_FILE 0x0200
#define FILE_ATTRIBUTE_REPARSE_POINT 0x0400
#define FILE_ATTRIBUTE_COMPRESSED 0x0800
#define FILE_ATTRIBUTE_OFFLINE 0x1000
#define FILE_ATTRIBUTE_NOT_CONTENT_INDEXED 0x2000
#define FILE_ATTRIBUTE_ENCRYPTED 0x4000
#define FILE_ATTRIBUTE_UNIX_EXTENSION 0x8000 /* trick for Unix */
#endif
#ifndef RINOK
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
#endif
#ifndef RINOK_WRes
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
#endif
typedef unsigned char Byte;
typedef short Int16;
typedef unsigned short UInt16;
#ifdef _LZMA_UINT32_IS_ULONG
typedef long Int32;
typedef unsigned long UInt32;
#else
typedef int Int32;
typedef unsigned int UInt32;
#endif
#ifndef _WIN32
typedef int INT;
typedef Int32 INT32;
typedef unsigned int UINT;
typedef UInt32 UINT32;
typedef INT32 LONG; // LONG, ULONG and DWORD must be 32-bit for _WIN32 compatibility
typedef UINT32 ULONG;
#undef DWORD
typedef UINT32 DWORD;
#define VOID void
#define HRESULT LONG
typedef void *LPVOID;
// typedef void VOID;
// typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
// gcc / clang on Unix : sizeof(long==sizeof(void*) in 32 or 64 bits)
typedef long INT_PTR;
typedef unsigned long UINT_PTR;
typedef long LONG_PTR;
typedef unsigned long DWORD_PTR;
typedef size_t SIZE_T;
#endif // _WIN32
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
#ifdef _SZ_NO_INT_64
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
NOTES: Some code will work incorrectly in that case! */
typedef long Int64;
typedef unsigned long UInt64;
#else
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef __int64 Int64;
typedef unsigned __int64 UInt64;
#define UINT64_CONST(n) n
#else
typedef long long int Int64;
typedef unsigned long long int UInt64;
#define UINT64_CONST(n) n ## ULL
#endif
#endif
#ifdef _LZMA_NO_SYSTEM_SIZE_T
typedef UInt32 SizeT;
#else
typedef size_t SizeT;
#endif
typedef int BoolInt;
/* typedef BoolInt Bool; */
#define True 1
#define False 0
#ifdef _WIN32
#define MY_STD_CALL __stdcall
#else
#define MY_STD_CALL
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_NO_INLINE __declspec(noinline)
#else
#define MY_NO_INLINE
#endif
#define MY_FORCE_INLINE __forceinline
#define MY_CDECL __cdecl
#define MY_FAST_CALL __fastcall
#else // _MSC_VER
#if (defined(__GNUC__) && (__GNUC__ >= 4)) \
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|| defined(__INTEL_COMPILER) \
|| defined(__xlC__)
#define MY_NO_INLINE __attribute__((noinline))
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
#else
#define MY_NO_INLINE
#endif
#define MY_FORCE_INLINE
#define MY_CDECL
#if defined(_M_IX86) \
|| defined(__i386__)
// #define MY_FAST_CALL __attribute__((fastcall))
// #define MY_FAST_CALL __attribute__((cdecl))
#define MY_FAST_CALL
#elif defined(MY_CPU_AMD64)
// #define MY_FAST_CALL __attribute__((ms_abi))
#define MY_FAST_CALL
#else
#define MY_FAST_CALL
#endif
#endif // _MSC_VER
/* The following interfaces use first parameter as pointer to structure */
typedef struct IByteIn IByteIn;
struct IByteIn
{
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
};
#define IByteIn_Read(p) (p)->Read(p)
typedef struct IByteOut IByteOut;
struct IByteOut
{
void (*Write)(const IByteOut *p, Byte b);
};
#define IByteOut_Write(p, b) (p)->Write(p, b)
typedef struct ISeqInStream ISeqInStream;
struct ISeqInStream
{
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) < input(*size)) is allowed */
};
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
/* it can return SZ_ERROR_INPUT_EOF */
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
typedef struct ISeqOutStream ISeqOutStream;
struct ISeqOutStream
{
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
/* Returns: result - the number of actually written bytes.
(result < size) means error */
};
#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
typedef enum
{
SZ_SEEK_SET = 0,
SZ_SEEK_CUR = 1,
SZ_SEEK_END = 2
} ESzSeek;
typedef struct ISeekInStream ISeekInStream;
struct ISeekInStream
{
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
};
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
typedef struct ILookInStream ILookInStream;
struct ILookInStream
{
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
(output(*size) > input(*size)) is not allowed
(output(*size) < input(*size)) is allowed */
SRes (*Skip)(const ILookInStream *p, size_t offset);
/* offset must be <= output(*size) of Look */
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
/* reads directly (without buffer). It's same as ISeqInStream::Read */
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
};
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
#define ILookInStream_Skip(p, offset) (p)->Skip(p, offset)
#define ILookInStream_Read(p, buf, size) (p)->Read(p, buf, size)
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
/* reads via ILookInStream::Read */
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
typedef struct
{
ILookInStream vt;
const ISeekInStream *realStream;
size_t pos;
size_t size; /* it's data size */
/* the following variables must be set outside */
Byte *buf;
size_t bufSize;
} CLookToRead2;
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
typedef struct
{
ISeqInStream vt;
const ILookInStream *realStream;
} CSecToLook;
void SecToLook_CreateVTable(CSecToLook *p);
typedef struct
{
ISeqInStream vt;
const ILookInStream *realStream;
} CSecToRead;
void SecToRead_CreateVTable(CSecToRead *p);
typedef struct ICompressProgress ICompressProgress;
struct ICompressProgress
{
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
/* Returns: result. (result != SZ_OK) means break.
Value (UInt64)(Int64)-1 for size means unknown value. */
};
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
typedef struct ISzAlloc ISzAlloc;
typedef const ISzAlloc * ISzAllocPtr;
struct ISzAlloc
{
void *(*Alloc)(ISzAllocPtr p, size_t size);
void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
};
#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
#define ISzAlloc_Free(p, a) (p)->Free(p, a)
/* deprecated */
#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
#ifndef MY_offsetof
#ifdef offsetof
#define MY_offsetof(type, m) offsetof(type, m)
/*
#define MY_offsetof(type, m) FIELD_OFFSET(type, m)
*/
#else
#define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
#endif
#endif
#ifndef MY_container_of
/*
#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
*/
/*
GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
GCC 3.4.4 : classes with constructor
GCC 4.8.1 : classes with non-public variable members"
*/
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
#endif
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
/*
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
*/
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
/*
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
*/
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
#ifdef _WIN32
#define CHAR_PATH_SEPARATOR '\\'
#define WCHAR_PATH_SEPARATOR L'\\'
#define STRING_PATH_SEPARATOR "\\"
#define WSTRING_PATH_SEPARATOR L"\\"
#else
#define CHAR_PATH_SEPARATOR '/'
#define WCHAR_PATH_SEPARATOR L'/'
#define STRING_PATH_SEPARATOR "/"
#define WSTRING_PATH_SEPARATOR L"/"
#endif
EXTERN_C_END
#endif

27
C/7zVersion.h Normal file
View file

@ -0,0 +1,27 @@
#define MY_VER_MAJOR 21
#define MY_VER_MINOR 07
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "21.07"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
#define MY_VERSION_CPU MY_VERSION " (" MY_CPU_NAME ")"
#else
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2021-12-26"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR
#else
#define MY_COPYRIGHT MY_COPYRIGHT_PD
#endif
#define MY_COPYRIGHT_DATE MY_COPYRIGHT " : " MY_DATE
#define MY_VERSION_COPYRIGHT_DATE MY_VERSION_CPU " : " MY_COPYRIGHT " : " MY_DATE

55
C/7zVersion.rc Normal file
View file

@ -0,0 +1,55 @@
#define MY_VS_FFI_FILEFLAGSMASK 0x0000003FL
#define MY_VOS_NT_WINDOWS32 0x00040004L
#define MY_VOS_CE_WINDOWS32 0x00050004L
#define MY_VFT_APP 0x00000001L
#define MY_VFT_DLL 0x00000002L
// #include <WinVer.h>
#ifndef MY_VERSION
#include "7zVersion.h"
#endif
#define MY_VER MY_VER_MAJOR,MY_VER_MINOR,MY_VER_BUILD,0
#ifdef DEBUG
#define DBG_FL VS_FF_DEBUG
#else
#define DBG_FL 0
#endif
#define MY_VERSION_INFO(fileType, descr, intName, origName) \
LANGUAGE 9, 1 \
1 VERSIONINFO \
FILEVERSION MY_VER \
PRODUCTVERSION MY_VER \
FILEFLAGSMASK MY_VS_FFI_FILEFLAGSMASK \
FILEFLAGS DBG_FL \
FILEOS MY_VOS_NT_WINDOWS32 \
FILETYPE fileType \
FILESUBTYPE 0x0L \
BEGIN \
BLOCK "StringFileInfo" \
BEGIN \
BLOCK "040904b0" \
BEGIN \
VALUE "CompanyName", "Igor Pavlov" \
VALUE "FileDescription", descr \
VALUE "FileVersion", MY_VERSION \
VALUE "InternalName", intName \
VALUE "LegalCopyright", MY_COPYRIGHT \
VALUE "OriginalFilename", origName \
VALUE "ProductName", "7-Zip" \
VALUE "ProductVersion", MY_VERSION \
END \
END \
BLOCK "VarFileInfo" \
BEGIN \
VALUE "Translation", 0x409, 1200 \
END \
END
#define MY_VERSION_INFO_APP(descr, intName) MY_VERSION_INFO(MY_VFT_APP, descr, intName, intName ".exe")
#define MY_VERSION_INFO_DLL(descr, intName) MY_VERSION_INFO(MY_VFT_DLL, descr, intName, intName ".dll")

313
C/7zip_gcc_c.mak Normal file
View file

@ -0,0 +1,313 @@
MY_ARCH_2 = $(MY_ARCH)
MY_ASM = jwasm
MY_ASM = asmc
PROGPATH = $(O)/$(PROG)
PROGPATH_STATIC = $(O)/$(PROG)s
# for object file
CFLAGS_BASE_LIST = -c
# for ASM file
# CFLAGS_BASE_LIST = -S
CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) -Wall -Werror -Wextra $(CFLAGS_WARN) \
-DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
LDFLAGS_STATIC = -DNDEBUG
# -static
ifdef SystemDrive
IS_MINGW = 1
endif
ifdef DEF_FILE
ifdef IS_MINGW
SHARED_EXT=.dll
LDFLAGS = -shared -DEF $(DEF_FILE) $(LDFLAGS_STATIC)
else
SHARED_EXT=.so
LDFLAGS = -shared -fPIC $(LDFLAGS_STATIC)
CC_SHARED=-fPIC
endif
else
LDFLAGS = $(LDFLAGS_STATIC)
# -s is not required for clang, do we need it for GGC ???
# -s
#-static -static-libgcc -static-libstdc++
ifdef IS_MINGW
SHARED_EXT=.exe
else
SHARED_EXT=
endif
endif
PROGPATH = $(O)/$(PROG)$(SHARED_EXT)
PROGPATH_STATIC = $(O)/$(PROG)s$(SHARED_EXT)
ifndef O
O=_o
endif
ifdef IS_MINGW
RM = del
MY_MKDIR=mkdir
LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32
CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE
# -Wno-delete-non-virtual-dtor
DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll
else
RM = rm -f
MY_MKDIR=mkdir -p
# CFLAGS_BASE := $(CFLAGS_BASE) -D_7ZIP_ST
# CXXFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
# LOCAL_LIBS=-lpthread
# LOCAL_LIBS_DLL=$(LOCAL_LIBS) -ldl
LIB2 = -lpthread -ldl
DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(PROGPATH_STATIC) $(OBJS)
endif
CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CC_SHARED) -o $@
ifdef IS_X64
AFLAGS_ABI = -elf64 -DABI_LINUX
else
AFLAGS_ABI = -elf -DABI_LINUX -DABI_CDECL
# -DABI_CDECL
# -DABI_LINUX
# -DABI_CDECL
endif
AFLAGS = $(AFLAGS_ABI) -Fo$(O)/
CXX_WARN_FLAGS =
#-Wno-invalid-offsetof
#-Wno-reorder
CXXFLAGS = $(LOCAL_FLAGS) $(CXXFLAGS_BASE2) $(CFLAGS_BASE) $(CXXFLAGS_EXTRA) $(CC_SHARED) -o $@ $(CXX_WARN_FLAGS)
STATIC_TARGET=
ifdef COMPL_STATIC
STATIC_TARGET=$(PROGPATH_STATIC)
endif
all: $(O) $(PROGPATH) $(STATIC_TARGET)
$(O):
$(MY_MKDIR) $(O)
LFLAGS_ALL = -s $(MY_ARCH_2) $(LDFLAGS) $(LD_arch) $(OBJS) $(MY_LIBS) $(LIB2)
$(PROGPATH): $(OBJS)
$(CXX) -o $(PROGPATH) $(LFLAGS_ALL)
$(PROGPATH_STATIC): $(OBJS)
$(CXX) -static -o $(PROGPATH_STATIC) $(LFLAGS_ALL)
ifndef NO_DEFAULT_RES
$O/resource.o: resource.rc
windres.exe $(RFLAGS) resource.rc $O/resource.o
endif
$O/7zAlloc.o: ../../../C/7zAlloc.c
$(CC) $(CFLAGS) $<
$O/7zArcIn.o: ../../../C/7zArcIn.c
$(CC) $(CFLAGS) $<
$O/7zBuf.o: ../../../C/7zBuf.c
$(CC) $(CFLAGS) $<
$O/7zBuf2.o: ../../../C/7zBuf2.c
$(CC) $(CFLAGS) $<
$O/7zCrc.o: ../../../C/7zCrc.c
$(CC) $(CFLAGS) $<
$O/7zDec.o: ../../../C/7zDec.c
$(CC) $(CFLAGS) $<
$O/7zFile.o: ../../../C/7zFile.c
$(CC) $(CFLAGS) $<
$O/7zStream.o: ../../../C/7zStream.c
$(CC) $(CFLAGS) $<
$O/Aes.o: ../../../C/Aes.c
$(CC) $(CFLAGS) $<
$O/Alloc.o: ../../../C/Alloc.c
$(CC) $(CFLAGS) $<
$O/Bcj2.o: ../../../C/Bcj2.c
$(CC) $(CFLAGS) $<
$O/Bcj2Enc.o: ../../../C/Bcj2Enc.c
$(CC) $(CFLAGS) $<
$O/Blake2s.o: ../../../C/Blake2s.c
$(CC) $(CFLAGS) $<
$O/Bra.o: ../../../C/Bra.c
$(CC) $(CFLAGS) $<
$O/Bra86.o: ../../../C/Bra86.c
$(CC) $(CFLAGS) $<
$O/BraIA64.o: ../../../C/BraIA64.c
$(CC) $(CFLAGS) $<
$O/BwtSort.o: ../../../C/BwtSort.c
$(CC) $(CFLAGS) $<
$O/CpuArch.o: ../../../C/CpuArch.c
$(CC) $(CFLAGS) $<
$O/Delta.o: ../../../C/Delta.c
$(CC) $(CFLAGS) $<
$O/DllSecur.o: ../../../C/DllSecur.c
$(CC) $(CFLAGS) $<
$O/HuffEnc.o: ../../../C/HuffEnc.c
$(CC) $(CFLAGS) $<
$O/LzFind.o: ../../../C/LzFind.c
$(CC) $(CFLAGS) $<
# ifdef MT_FILES
$O/LzFindMt.o: ../../../C/LzFindMt.c
$(CC) $(CFLAGS) $<
$O/LzFindOpt.o: ../../../C/LzFindOpt.c
$(CC) $(CFLAGS) $<
$O/Threads.o: ../../../C/Threads.c
$(CC) $(CFLAGS) $<
# endif
$O/LzmaEnc.o: ../../../C/LzmaEnc.c
$(CC) $(CFLAGS) $<
$O/Lzma86Dec.o: ../../../C/Lzma86Dec.c
$(CC) $(CFLAGS) $<
$O/Lzma86Enc.o: ../../../C/Lzma86Enc.c
$(CC) $(CFLAGS) $<
$O/Lzma2Dec.o: ../../../C/Lzma2Dec.c
$(CC) $(CFLAGS) $<
$O/Lzma2DecMt.o: ../../../C/Lzma2DecMt.c
$(CC) $(CFLAGS) $<
$O/Lzma2Enc.o: ../../../C/Lzma2Enc.c
$(CC) $(CFLAGS) $<
$O/LzmaLib.o: ../../../C/LzmaLib.c
$(CC) $(CFLAGS) $<
$O/MtCoder.o: ../../../C/MtCoder.c
$(CC) $(CFLAGS) $<
$O/MtDec.o: ../../../C/MtDec.c
$(CC) $(CFLAGS) $<
$O/Ppmd7.o: ../../../C/Ppmd7.c
$(CC) $(CFLAGS) $<
$O/Ppmd7aDec.o: ../../../C/Ppmd7aDec.c
$(CC) $(CFLAGS) $<
$O/Ppmd7Dec.o: ../../../C/Ppmd7Dec.c
$(CC) $(CFLAGS) $<
$O/Ppmd7Enc.o: ../../../C/Ppmd7Enc.c
$(CC) $(CFLAGS) $<
$O/Ppmd8.o: ../../../C/Ppmd8.c
$(CC) $(CFLAGS) $<
$O/Ppmd8Dec.o: ../../../C/Ppmd8Dec.c
$(CC) $(CFLAGS) $<
$O/Ppmd8Enc.o: ../../../C/Ppmd8Enc.c
$(CC) $(CFLAGS) $<
$O/Sha1.o: ../../../C/Sha1.c
$(CC) $(CFLAGS) $<
$O/Sha256.o: ../../../C/Sha256.c
$(CC) $(CFLAGS) $<
$O/Sort.o: ../../../C/Sort.c
$(CC) $(CFLAGS) $<
$O/Xz.o: ../../../C/Xz.c
$(CC) $(CFLAGS) $<
$O/XzCrc64.o: ../../../C/XzCrc64.c
$(CC) $(CFLAGS) $<
ifdef USE_ASM
ifdef IS_X64
USE_X86_ASM=1
else
ifdef IS_X86
USE_X86_ASM=1
endif
endif
endif
ifdef USE_X86_ASM
$O/7zCrcOpt.o: ../../../Asm/x86/7zCrcOpt.asm
$(MY_ASM) $(AFLAGS) $<
$O/XzCrc64Opt.o: ../../../Asm/x86/XzCrc64Opt.asm
$(MY_ASM) $(AFLAGS) $<
$O/AesOpt.o: ../../../Asm/x86/AesOpt.asm
$(MY_ASM) $(AFLAGS) $<
$O/Sha1Opt.o: ../../../Asm/x86/Sha1Opt.asm
$(MY_ASM) $(AFLAGS) $<
$O/Sha256Opt.o: ../../../Asm/x86/Sha256Opt.asm
$(MY_ASM) $(AFLAGS) $<
else
$O/7zCrcOpt.o: ../../7zCrcOpt.c
$(CC) $(CFLAGS) $<
$O/XzCrc64Opt.o: ../../XzCrc64Opt.c
$(CC) $(CFLAGS) $<
$O/Sha1Opt.o: ../../Sha1Opt.c
$(CC) $(CFLAGS) $<
$O/Sha256Opt.o: ../../Sha256Opt.c
$(CC) $(CFLAGS) $<
$O/AesOpt.o: ../../AesOpt.c
$(CC) $(CFLAGS) $<
endif
ifdef USE_LZMA_DEC_ASM
ifdef IS_X64
$O/LzmaDecOpt.o: ../../../Asm/x86/LzmaDecOpt.asm
$(MY_ASM) $(AFLAGS) $<
endif
ifdef IS_ARM64
$O/LzmaDecOpt.o: ../../../Asm/arm64/LzmaDecOpt.S ../../../Asm/arm64/7zAsm.S
$(CC) $(CFLAGS) $<
endif
$O/LzmaDec.o: ../../LzmaDec.c
$(CC) $(CFLAGS) -D_LZMA_DEC_OPT $<
else
$O/LzmaDec.o: ../../LzmaDec.c
$(CC) $(CFLAGS) $<
endif
$O/XzDec.o: ../../../C/XzDec.c
$(CC) $(CFLAGS) $<
$O/XzEnc.o: ../../../C/XzEnc.c
$(CC) $(CFLAGS) $<
$O/XzIn.o: ../../../C/XzIn.c
$(CC) $(CFLAGS) $<
$O/7zMain.o: ../../../C/Util/7z/7zMain.c
$(CC) $(CFLAGS) $<
$O/LzmaUtil.o: ../../../C/Util/Lzma/LzmaUtil.c
$(CC) $(CFLAGS) $<
clean:
-$(DEL_OBJ_EXE)

375
C/Aes.c Normal file
View file

@ -0,0 +1,375 @@
/* Aes.c -- AES encryption / decryption
2021-05-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "Aes.h"
AES_CODE_FUNC g_AesCbc_Decode;
#ifndef _SFX
AES_CODE_FUNC g_AesCbc_Encode;
AES_CODE_FUNC g_AesCtr_Code;
UInt32 g_Aes_SupportedFunctions_Flags;
#endif
static UInt32 T[256 * 4];
static const Byte Sbox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
static UInt32 D[256 * 4];
static Byte InvS[256];
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
#define Ui32(a0, a1, a2, a3) ((UInt32)(a0) | ((UInt32)(a1) << 8) | ((UInt32)(a2) << 16) | ((UInt32)(a3) << 24))
#define gb0(x) ( (x) & 0xFF)
#define gb1(x) (((x) >> ( 8)) & 0xFF)
#define gb2(x) (((x) >> (16)) & 0xFF)
#define gb3(x) (((x) >> (24)))
#define gb(n, x) gb ## n(x)
#define TT(x) (T + (x << 8))
#define DD(x) (D + (x << 8))
// #define _SHOW_AES_STATUS
#ifdef MY_CPU_X86_OR_AMD64
#define USE_HW_AES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_AES
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_AES
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#endif
#endif
#endif
#ifdef USE_HW_AES
#ifdef _SHOW_AES_STATUS
#include <stdio.h>
#define _PRF(x) x
#else
#define _PRF(x)
#endif
#endif
void AesGenTables(void)
{
unsigned i;
for (i = 0; i < 256; i++)
InvS[Sbox[i]] = (Byte)i;
for (i = 0; i < 256; i++)
{
{
UInt32 a1 = Sbox[i];
UInt32 a2 = xtime(a1);
UInt32 a3 = a2 ^ a1;
TT(0)[i] = Ui32(a2, a1, a1, a3);
TT(1)[i] = Ui32(a3, a2, a1, a1);
TT(2)[i] = Ui32(a1, a3, a2, a1);
TT(3)[i] = Ui32(a1, a1, a3, a2);
}
{
UInt32 a1 = InvS[i];
UInt32 a2 = xtime(a1);
UInt32 a4 = xtime(a2);
UInt32 a8 = xtime(a4);
UInt32 a9 = a8 ^ a1;
UInt32 aB = a8 ^ a2 ^ a1;
UInt32 aD = a8 ^ a4 ^ a1;
UInt32 aE = a8 ^ a4 ^ a2;
DD(0)[i] = Ui32(aE, a9, aD, aB);
DD(1)[i] = Ui32(aB, aE, a9, aD);
DD(2)[i] = Ui32(aD, aB, aE, a9);
DD(3)[i] = Ui32(a9, aD, aB, aE);
}
}
{
AES_CODE_FUNC d = AesCbc_Decode;
#ifndef _SFX
AES_CODE_FUNC e = AesCbc_Encode;
AES_CODE_FUNC c = AesCtr_Code;
UInt32 flags = 0;
#endif
#ifdef USE_HW_AES
if (CPU_IsSupported_AES())
{
// #pragma message ("AES HW")
_PRF(printf("\n===AES HW\n"));
d = AesCbc_Decode_HW;
#ifndef _SFX
e = AesCbc_Encode_HW;
c = AesCtr_Code_HW;
flags = k_Aes_SupportedFunctions_HW;
#endif
#ifdef MY_CPU_X86_OR_AMD64
if (CPU_IsSupported_VAES_AVX2())
{
_PRF(printf("\n===vaes avx2\n"));
d = AesCbc_Decode_HW_256;
#ifndef _SFX
c = AesCtr_Code_HW_256;
flags |= k_Aes_SupportedFunctions_HW_256;
#endif
}
#endif
}
#endif
g_AesCbc_Decode = d;
#ifndef _SFX
g_AesCbc_Encode = e;
g_AesCtr_Code = c;
g_Aes_SupportedFunctions_Flags = flags;
#endif
}
}
#define HT(i, x, s) TT(x)[gb(x, s[(i + x) & 3])]
#define HT4(m, i, s, p) m[i] = \
HT(i, 0, s) ^ \
HT(i, 1, s) ^ \
HT(i, 2, s) ^ \
HT(i, 3, s) ^ w[p + i]
#define HT16(m, s, p) \
HT4(m, 0, s, p); \
HT4(m, 1, s, p); \
HT4(m, 2, s, p); \
HT4(m, 3, s, p); \
#define FT(i, x) Sbox[gb(x, m[(i + x) & 3])]
#define FT4(i) dest[i] = Ui32(FT(i, 0), FT(i, 1), FT(i, 2), FT(i, 3)) ^ w[i];
#define HD(i, x, s) DD(x)[gb(x, s[(i - x) & 3])]
#define HD4(m, i, s, p) m[i] = \
HD(i, 0, s) ^ \
HD(i, 1, s) ^ \
HD(i, 2, s) ^ \
HD(i, 3, s) ^ w[p + i];
#define HD16(m, s, p) \
HD4(m, 0, s, p); \
HD4(m, 1, s, p); \
HD4(m, 2, s, p); \
HD4(m, 3, s, p); \
#define FD(i, x) InvS[gb(x, m[(i - x) & 3])]
#define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i];
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
{
unsigned i, m;
const UInt32 *wLim;
UInt32 t;
UInt32 rcon = 1;
keySize /= 4;
w[0] = ((UInt32)keySize / 2) + 3;
w += 4;
for (i = 0; i < keySize; i++, key += 4)
w[i] = GetUi32(key);
t = w[(size_t)keySize - 1];
wLim = w + (size_t)keySize * 3 + 28;
m = 0;
do
{
if (m == 0)
{
t = Ui32(Sbox[gb1(t)] ^ rcon, Sbox[gb2(t)], Sbox[gb3(t)], Sbox[gb0(t)]);
rcon <<= 1;
if (rcon & 0x100)
rcon = 0x1b;
m = keySize;
}
else if (m == 4 && keySize > 6)
t = Ui32(Sbox[gb0(t)], Sbox[gb1(t)], Sbox[gb2(t)], Sbox[gb3(t)]);
m--;
t ^= w[0];
w[keySize] = t;
}
while (++w != wLim);
}
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
{
unsigned i, num;
Aes_SetKey_Enc(w, key, keySize);
num = keySize + 20;
w += 8;
for (i = 0; i < num; i++)
{
UInt32 r = w[i];
w[i] =
DD(0)[Sbox[gb0(r)]] ^
DD(1)[Sbox[gb1(r)]] ^
DD(2)[Sbox[gb2(r)]] ^
DD(3)[Sbox[gb3(r)]];
}
}
/* Aes_Encode and Aes_Decode functions work with little-endian words.
src and dest are pointers to 4 UInt32 words.
src and dest can point to same block */
// MY_FORCE_INLINE
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
UInt32 m[4];
UInt32 numRounds2 = w[0];
w += 4;
s[0] = src[0] ^ w[0];
s[1] = src[1] ^ w[1];
s[2] = src[2] ^ w[2];
s[3] = src[3] ^ w[3];
w += 4;
for (;;)
{
HT16(m, s, 0);
if (--numRounds2 == 0)
break;
HT16(s, m, 4);
w += 8;
}
w += 4;
FT4(0); FT4(1); FT4(2); FT4(3);
}
MY_FORCE_INLINE
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
{
UInt32 s[4];
UInt32 m[4];
UInt32 numRounds2 = w[0];
w += 4 + numRounds2 * 8;
s[0] = src[0] ^ w[0];
s[1] = src[1] ^ w[1];
s[2] = src[2] ^ w[2];
s[3] = src[3] ^ w[3];
for (;;)
{
w -= 8;
HD16(m, s, 4);
if (--numRounds2 == 0)
break;
HD16(s, m, 0);
}
FD4(0); FD4(1); FD4(2); FD4(3);
}
void AesCbc_Init(UInt32 *p, const Byte *iv)
{
unsigned i;
for (i = 0; i < 4; i++)
p[i] = GetUi32(iv + i * 4);
}
void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
{
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
{
p[0] ^= GetUi32(data);
p[1] ^= GetUi32(data + 4);
p[2] ^= GetUi32(data + 8);
p[3] ^= GetUi32(data + 12);
Aes_Encode(p + 4, p, p);
SetUi32(data, p[0]);
SetUi32(data + 4, p[1]);
SetUi32(data + 8, p[2]);
SetUi32(data + 12, p[3]);
}
}
void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
{
UInt32 in[4], out[4];
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
{
in[0] = GetUi32(data);
in[1] = GetUi32(data + 4);
in[2] = GetUi32(data + 8);
in[3] = GetUi32(data + 12);
Aes_Decode(p + 4, out, in);
SetUi32(data, p[0] ^ out[0]);
SetUi32(data + 4, p[1] ^ out[1]);
SetUi32(data + 8, p[2] ^ out[2]);
SetUi32(data + 12, p[3] ^ out[3]);
p[0] = in[0];
p[1] = in[1];
p[2] = in[2];
p[3] = in[3];
}
}
void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
{
for (; numBlocks != 0; numBlocks--)
{
UInt32 temp[4];
unsigned i;
if (++p[0] == 0)
p[1]++;
Aes_Encode(p + 4, temp, p);
for (i = 0; i < 4; i++, data += 4)
{
UInt32 t = temp[i];
#ifdef MY_CPU_LE_UNALIGN
*((UInt32 *)(void *)data) ^= t;
#else
data[0] = (Byte)(data[0] ^ (t & 0xFF));
data[1] = (Byte)(data[1] ^ ((t >> 8) & 0xFF));
data[2] = (Byte)(data[2] ^ ((t >> 16) & 0xFF));
data[3] = (Byte)(data[3] ^ ((t >> 24)));
#endif
}
}
}

60
C/Aes.h Normal file
View file

@ -0,0 +1,60 @@
/* Aes.h -- AES encryption / decryption
2018-04-28 : Igor Pavlov : Public domain */
#ifndef __AES_H
#define __AES_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define AES_BLOCK_SIZE 16
/* Call AesGenTables one time before other AES functions */
void AesGenTables(void);
/* UInt32 pointers must be 16-byte aligned */
/* 16-byte (4 * 32-bit words) blocks: 1 (IV) + 1 (keyMode) + 15 (AES-256 roundKeys) */
#define AES_NUM_IVMRK_WORDS ((1 + 1 + 15) * 4)
/* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */
/* keySize = 16 or 24 or 32 (bytes) */
typedef void (MY_FAST_CALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize);
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize);
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize);
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
/* data - 16-byte aligned pointer to data */
/* numBlocks - the number of 16-byte blocks in data array */
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
extern AES_CODE_FUNC g_AesCbc_Decode;
#ifndef _SFX
extern AES_CODE_FUNC g_AesCbc_Encode;
extern AES_CODE_FUNC g_AesCtr_Code;
#define k_Aes_SupportedFunctions_HW (1 << 2)
#define k_Aes_SupportedFunctions_HW_256 (1 << 3)
extern UInt32 g_Aes_SupportedFunctions_Flags;
#endif
#define DECLARE__AES_CODE_FUNC(funcName) \
void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
DECLARE__AES_CODE_FUNC (AesCbc_Encode)
DECLARE__AES_CODE_FUNC (AesCbc_Decode)
DECLARE__AES_CODE_FUNC (AesCtr_Code)
DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)
EXTERN_C_END
#endif

776
C/AesOpt.c Normal file
View file

@ -0,0 +1,776 @@
/* AesOpt.c -- AES optimized code for x86 AES hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
#define USE_INTEL_AES
#define ATTRIB_AES __attribute__((__target__("aes")))
#if (__clang_major__ >= 8)
#define USE_INTEL_VAES
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
#endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
#define USE_INTEL_AES
#ifndef __AES__
#define ATTRIB_AES __attribute__((__target__("aes")))
#endif
#if (__GNUC__ >= 8)
#define USE_INTEL_VAES
#define ATTRIB_VAES __attribute__((__target__("aes,vaes,avx2")))
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1110)
#define USE_INTEL_AES
#if (__INTEL_COMPILER >= 1900)
#define USE_INTEL_VAES
#endif
#endif
#elif defined(_MSC_VER)
#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
#define USE_INTEL_AES
#if (_MSC_VER >= 1910)
#define USE_INTEL_VAES
#endif
#endif
#endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#ifndef ATTRIB_VAES
#define ATTRIB_VAES
#endif
#ifdef USE_INTEL_AES
#include <wmmintrin.h>
#ifndef USE_INTEL_VAES
#define AES_TYPE_keys __m128i
#define AES_TYPE_data __m128i
#endif
#define AES_FUNC_START(name) \
void MY_FAST_CALL name(__m128i *p, __m128i *data, size_t numBlocks)
#define AES_FUNC_START2(name) \
AES_FUNC_START (name); \
ATTRIB_AES \
AES_FUNC_START (name)
#define MM_OP(op, dest, src) dest = op(dest, src);
#define MM_OP_m(op, src) MM_OP(op, m, src);
#define MM_XOR( dest, src) MM_OP(_mm_xor_si128, dest, src);
#define AVX_XOR(dest, src) MM_OP(_mm256_xor_si256, dest, src);
AES_FUNC_START2 (AesCbc_Encode_HW)
{
__m128i m = *p;
const __m128i k0 = p[2];
const __m128i k1 = p[3];
const UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
for (; numBlocks != 0; numBlocks--, data++)
{
UInt32 r = numRounds2;
const __m128i *w = p + 4;
__m128i temp = *data;
MM_XOR (temp, k0);
MM_XOR (m, temp);
MM_OP_m (_mm_aesenc_si128, k1);
do
{
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
while (--r);
MM_OP_m (_mm_aesenclast_si128, w[0]);
*data = m;
}
*p = m;
}
#define WOP_1(op)
#define WOP_2(op) WOP_1 (op) op (m1, 1);
#define WOP_3(op) WOP_2 (op) op (m2, 2);
#define WOP_4(op) WOP_3 (op) op (m3, 3);
#ifdef MY_CPU_AMD64
#define WOP_5(op) WOP_4 (op) op (m4, 4);
#define WOP_6(op) WOP_5 (op) op (m5, 5);
#define WOP_7(op) WOP_6 (op) op (m6, 6);
#define WOP_8(op) WOP_7 (op) op (m7, 7);
#endif
/*
#define WOP_9(op) WOP_8 (op) op (m8, 8);
#define WOP_10(op) WOP_9 (op) op (m9, 9);
#define WOP_11(op) WOP_10(op) op (m10, 10);
#define WOP_12(op) WOP_11(op) op (m11, 11);
#define WOP_13(op) WOP_12(op) op (m12, 12);
#define WOP_14(op) WOP_13(op) op (m13, 13);
*/
#ifdef MY_CPU_AMD64
#define NUM_WAYS 8
#define WOP_M1 WOP_8
#else
#define NUM_WAYS 4
#define WOP_M1 WOP_4
#endif
#define WOP(op) op (m0, 0); WOP_M1(op)
#define DECLARE_VAR(reg, ii) __m128i reg
#define LOAD_data( reg, ii) reg = data[ii];
#define STORE_data( reg, ii) data[ii] = reg;
#if (NUM_WAYS > 1)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
#endif
#define AVX__DECLARE_VAR(reg, ii) __m256i reg
#define AVX__LOAD_data( reg, ii) reg = ((const __m256i *)(const void *)data)[ii];
#define AVX__STORE_data( reg, ii) ((__m256i *)(void *)data)[ii] = reg;
#define AVX__XOR_data_M1(reg, ii) AVX_XOR (reg, (((const __m256i *)(const void *)(data - 1))[ii]));
#define MM_OP_key(op, reg) MM_OP(op, reg, key);
#define AES_DEC( reg, ii) MM_OP_key (_mm_aesdec_si128, reg)
#define AES_DEC_LAST( reg, ii) MM_OP_key (_mm_aesdeclast_si128, reg)
#define AES_ENC( reg, ii) MM_OP_key (_mm_aesenc_si128, reg)
#define AES_ENC_LAST( reg, ii) MM_OP_key (_mm_aesenclast_si128, reg)
#define AES_XOR( reg, ii) MM_OP_key (_mm_xor_si128, reg)
#define AVX__AES_DEC( reg, ii) MM_OP_key (_mm256_aesdec_epi128, reg)
#define AVX__AES_DEC_LAST( reg, ii) MM_OP_key (_mm256_aesdeclast_epi128, reg)
#define AVX__AES_ENC( reg, ii) MM_OP_key (_mm256_aesenc_epi128, reg)
#define AVX__AES_ENC_LAST( reg, ii) MM_OP_key (_mm256_aesenclast_epi128, reg)
#define AVX__AES_XOR( reg, ii) MM_OP_key (_mm256_xor_si256, reg)
#define CTR_START(reg, ii) MM_OP (_mm_add_epi64, ctr, one); reg = ctr;
#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
#define AVX__CTR_START(reg, ii) MM_OP (_mm256_add_epi64, ctr2, two); reg = _mm256_xor_si256(ctr2, key);
#define AVX__CTR_END( reg, ii) AVX_XOR (((__m256i *)(void *)data)[ii], reg);
#define WOP_KEY(op, n) { \
const __m128i key = w[n]; \
WOP(op); }
#define AVX__WOP_KEY(op, n) { \
const __m256i key = w[n]; \
WOP(op); }
#define WIDE_LOOP_START \
dataEnd = data + numBlocks; \
if (numBlocks >= NUM_WAYS) \
{ dataEnd -= NUM_WAYS; do { \
#define WIDE_LOOP_END \
data += NUM_WAYS; \
} while (data <= dataEnd); \
dataEnd += NUM_WAYS; } \
#define SINGLE_LOOP \
for (; data < dataEnd; data++)
#define NUM_AES_KEYS_MAX 15
#define WIDE_LOOP_START_AVX(OP) \
dataEnd = data + numBlocks; \
if (numBlocks >= NUM_WAYS * 2) \
{ __m256i keys[NUM_AES_KEYS_MAX]; \
UInt32 ii; \
OP \
for (ii = 0; ii < numRounds; ii++) \
keys[ii] = _mm256_broadcastsi128_si256(p[ii]); \
dataEnd -= NUM_WAYS * 2; do { \
#define WIDE_LOOP_END_AVX(OP) \
data += NUM_WAYS * 2; \
} while (data <= dataEnd); \
dataEnd += NUM_WAYS * 2; \
OP \
_mm256_zeroupper(); \
} \
/* MSVC for x86: If we don't call _mm256_zeroupper(), and -arch:IA32 is not specified,
MSVC still can insert vzeroupper instruction. */
AES_FUNC_START2 (AesCbc_Decode_HW)
{
__m128i iv = *p;
const __m128i *wStart = p + *(const UInt32 *)(p + 1) * 2 + 2 - 1;
const __m128i *dataEnd;
p += 2;
WIDE_LOOP_START
{
const __m128i *w = wStart;
WOP (DECLARE_VAR)
WOP (LOAD_data);
WOP_KEY (AES_XOR, 1)
do
{
WOP_KEY (AES_DEC, 0)
w--;
}
while (w != p);
WOP_KEY (AES_DEC_LAST, 0)
MM_XOR (m0, iv);
WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1];
WOP (STORE_data);
}
WIDE_LOOP_END
SINGLE_LOOP
{
const __m128i *w = wStart - 1;
__m128i m = _mm_xor_si128 (w[2], *data);
do
{
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
while (w != p);
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdeclast_si128, w[0]);
MM_XOR (m, iv);
iv = *data;
*data = m;
}
p[-2] = iv;
}
AES_FUNC_START2 (AesCtr_Code_HW)
{
__m128i ctr = *p;
UInt32 numRoundsMinus2 = *(const UInt32 *)(p + 1) * 2 - 1;
const __m128i *dataEnd;
__m128i one = _mm_cvtsi32_si128(1);
p += 2;
WIDE_LOOP_START
{
const __m128i *w = p;
UInt32 r = numRoundsMinus2;
WOP (DECLARE_VAR)
WOP (CTR_START);
WOP_KEY (AES_XOR, 0)
w += 1;
do
{
WOP_KEY (AES_ENC, 0)
w += 1;
}
while (--r);
WOP_KEY (AES_ENC_LAST, 0)
WOP (CTR_END);
}
WIDE_LOOP_END
SINGLE_LOOP
{
UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p;
__m128i m;
MM_OP (_mm_add_epi64, ctr, one);
m = _mm_xor_si128 (ctr, p[0]);
w += 1;
do
{
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
while (--numRounds2);
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenclast_si128, w[1]);
MM_XOR (*data, m);
}
p[-2] = ctr;
}
#ifdef USE_INTEL_VAES
#if defined(__clang__) && defined(_MSC_VER)
#define __SSE4_2__
#define __AES__
#define __AVX__
#define __AVX2__
#define __VAES__
#define __AVX512F__
#define __AVX512VL__
#endif
#include <immintrin.h>
#define VAES_FUNC_START2(name) \
AES_FUNC_START (name); \
ATTRIB_VAES \
AES_FUNC_START (name)
VAES_FUNC_START2 (AesCbc_Decode_HW_256)
{
__m128i iv = *p;
const __m128i *dataEnd;
UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
p += 2;
WIDE_LOOP_START_AVX(;)
{
const __m256i *w = keys + numRounds - 2;
WOP (AVX__DECLARE_VAR)
WOP (AVX__LOAD_data);
AVX__WOP_KEY (AVX__AES_XOR, 1)
do
{
AVX__WOP_KEY (AVX__AES_DEC, 0)
w--;
}
while (w != keys);
AVX__WOP_KEY (AVX__AES_DEC_LAST, 0)
AVX_XOR (m0, _mm256_setr_m128i(iv, data[0]));
WOP_M1 (AVX__XOR_data_M1)
iv = data[NUM_WAYS * 2 - 1];
WOP (AVX__STORE_data);
}
WIDE_LOOP_END_AVX(;)
SINGLE_LOOP
{
const __m128i *w = p + *(const UInt32 *)(p + 1 - 2) * 2 + 1 - 3;
__m128i m = _mm_xor_si128 (w[2], *data);
do
{
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdec_si128, w[0]);
w -= 2;
}
while (w != p);
MM_OP_m (_mm_aesdec_si128, w[1]);
MM_OP_m (_mm_aesdeclast_si128, w[0]);
MM_XOR (m, iv);
iv = *data;
*data = m;
}
p[-2] = iv;
}
/*
SSE2: _mm_cvtsi32_si128 : movd
AVX: _mm256_setr_m128i : vinsertf128
AVX2: _mm256_add_epi64 : vpaddq ymm, ymm, ymm
_mm256_extracti128_si256 : vextracti128
_mm256_broadcastsi128_si256 : vbroadcasti128
*/
#define AVX__CTR_LOOP_START \
ctr2 = _mm256_setr_m128i(_mm_sub_epi64(ctr, one), ctr); \
two = _mm256_setr_m128i(one, one); \
two = _mm256_add_epi64(two, two); \
// two = _mm256_setr_epi64x(2, 0, 2, 0);
#define AVX__CTR_LOOP_ENC \
ctr = _mm256_extracti128_si256 (ctr2, 1); \
VAES_FUNC_START2 (AesCtr_Code_HW_256)
{
__m128i ctr = *p;
UInt32 numRounds = *(const UInt32 *)(p + 1) * 2 + 1;
const __m128i *dataEnd;
__m128i one = _mm_cvtsi32_si128(1);
__m256i ctr2, two;
p += 2;
WIDE_LOOP_START_AVX (AVX__CTR_LOOP_START)
{
const __m256i *w = keys;
UInt32 r = numRounds - 2;
WOP (AVX__DECLARE_VAR)
AVX__WOP_KEY (AVX__CTR_START, 0);
w += 1;
do
{
AVX__WOP_KEY (AVX__AES_ENC, 0)
w += 1;
}
while (--r);
AVX__WOP_KEY (AVX__AES_ENC_LAST, 0)
WOP (AVX__CTR_END);
}
WIDE_LOOP_END_AVX (AVX__CTR_LOOP_ENC)
SINGLE_LOOP
{
UInt32 numRounds2 = *(const UInt32 *)(p - 2 + 1) - 1;
const __m128i *w = p;
__m128i m;
MM_OP (_mm_add_epi64, ctr, one);
m = _mm_xor_si128 (ctr, p[0]);
w += 1;
do
{
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenc_si128, w[1]);
w += 2;
}
while (--numRounds2);
MM_OP_m (_mm_aesenc_si128, w[0]);
MM_OP_m (_mm_aesenclast_si128, w[1]);
MM_XOR (*data, m);
}
p[-2] = ctr;
}
#endif // USE_INTEL_VAES
#else // USE_INTEL_AES
/* no USE_INTEL_AES */
#pragma message("AES HW_SW stub was used")
#define AES_TYPE_keys UInt32
#define AES_TYPE_data Byte
#define AES_FUNC_START(name) \
void MY_FAST_CALL name(UInt32 *p, Byte *data, size_t numBlocks) \
#define AES_COMPAT_STUB(name) \
AES_FUNC_START(name); \
AES_FUNC_START(name ## _HW) \
{ name(p, data, numBlocks); }
AES_COMPAT_STUB (AesCbc_Encode)
AES_COMPAT_STUB (AesCbc_Decode)
AES_COMPAT_STUB (AesCtr_Code)
#endif // USE_INTEL_AES
#ifndef USE_INTEL_VAES
#pragma message("VAES HW_SW stub was used")
#define VAES_COMPAT_STUB(name) \
void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks); \
void MY_FAST_CALL name ## _256(UInt32 *p, Byte *data, size_t numBlocks) \
{ name((AES_TYPE_keys *)(void *)p, (AES_TYPE_data *)(void *)data, numBlocks); }
VAES_COMPAT_STUB (AesCbc_Decode_HW)
VAES_COMPAT_STUB (AesCtr_Code_HW)
#endif // ! USE_INTEL_VAES
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_AES
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_AES
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_AES
#endif
#endif
#ifdef USE_HW_AES
// #pragma message("=== AES HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_AES __attribute__((__target__("+crypto")))
#else
#define ATTRIB_AES __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#ifndef ATTRIB_AES
#define ATTRIB_AES
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint8x16_t v128;
#define AES_FUNC_START(name) \
void MY_FAST_CALL name(v128 *p, v128 *data, size_t numBlocks)
#define AES_FUNC_START2(name) \
AES_FUNC_START (name); \
ATTRIB_AES \
AES_FUNC_START (name)
#define MM_OP(op, dest, src) dest = op(dest, src);
#define MM_OP_m(op, src) MM_OP(op, m, src);
#define MM_OP1_m(op) m = op(m);
#define MM_XOR( dest, src) MM_OP(veorq_u8, dest, src);
#define MM_XOR_m( src) MM_XOR(m, src);
#define AES_E_m(k) MM_OP_m (vaeseq_u8, k);
#define AES_E_MC_m(k) AES_E_m (k); MM_OP1_m(vaesmcq_u8);
AES_FUNC_START2 (AesCbc_Encode_HW)
{
v128 m = *p;
const v128 k0 = p[2];
const v128 k1 = p[3];
const v128 k2 = p[4];
const v128 k3 = p[5];
const v128 k4 = p[6];
const v128 k5 = p[7];
const v128 k6 = p[8];
const v128 k7 = p[9];
const v128 k8 = p[10];
const v128 k9 = p[11];
const UInt32 numRounds2 = *(const UInt32 *)(p + 1);
const v128 *w = p + ((size_t)numRounds2 * 2);
const v128 k_z1 = w[1];
const v128 k_z0 = w[2];
for (; numBlocks != 0; numBlocks--, data++)
{
MM_XOR_m (*data);
AES_E_MC_m (k0)
AES_E_MC_m (k1)
AES_E_MC_m (k2)
AES_E_MC_m (k3)
AES_E_MC_m (k4)
AES_E_MC_m (k5)
AES_E_MC_m (k6)
AES_E_MC_m (k7)
AES_E_MC_m (k8)
if (numRounds2 >= 6)
{
AES_E_MC_m (k9)
AES_E_MC_m (p[12])
if (numRounds2 != 6)
{
AES_E_MC_m (p[13])
AES_E_MC_m (p[14])
}
}
AES_E_m (k_z1);
MM_XOR_m (k_z0);
*data = m;
}
*p = m;
}
#define WOP_1(op)
#define WOP_2(op) WOP_1 (op) op (m1, 1);
#define WOP_3(op) WOP_2 (op) op (m2, 2);
#define WOP_4(op) WOP_3 (op) op (m3, 3);
#define WOP_5(op) WOP_4 (op) op (m4, 4);
#define WOP_6(op) WOP_5 (op) op (m5, 5);
#define WOP_7(op) WOP_6 (op) op (m6, 6);
#define WOP_8(op) WOP_7 (op) op (m7, 7);
#define NUM_WAYS 8
#define WOP_M1 WOP_8
#define WOP(op) op (m0, 0); WOP_M1(op)
#define DECLARE_VAR(reg, ii) v128 reg
#define LOAD_data( reg, ii) reg = data[ii];
#define STORE_data( reg, ii) data[ii] = reg;
#if (NUM_WAYS > 1)
#define XOR_data_M1(reg, ii) MM_XOR (reg, data[ii- 1]);
#endif
#define MM_OP_key(op, reg) MM_OP (op, reg, key);
#define AES_D_m(k) MM_OP_m (vaesdq_u8, k);
#define AES_D_IMC_m(k) AES_D_m (k); MM_OP1_m (vaesimcq_u8);
#define AES_XOR( reg, ii) MM_OP_key (veorq_u8, reg)
#define AES_D( reg, ii) MM_OP_key (vaesdq_u8, reg)
#define AES_E( reg, ii) MM_OP_key (vaeseq_u8, reg)
#define AES_D_IMC( reg, ii) AES_D (reg, ii); reg = vaesimcq_u8(reg)
#define AES_E_MC( reg, ii) AES_E (reg, ii); reg = vaesmcq_u8(reg)
#define CTR_START(reg, ii) MM_OP (vaddq_u64, ctr, one); reg = vreinterpretq_u8_u64(ctr);
#define CTR_END( reg, ii) MM_XOR (data[ii], reg);
#define WOP_KEY(op, n) { \
const v128 key = w[n]; \
WOP(op); }
#define WIDE_LOOP_START \
dataEnd = data + numBlocks; \
if (numBlocks >= NUM_WAYS) \
{ dataEnd -= NUM_WAYS; do { \
#define WIDE_LOOP_END \
data += NUM_WAYS; \
} while (data <= dataEnd); \
dataEnd += NUM_WAYS; } \
#define SINGLE_LOOP \
for (; data < dataEnd; data++)
AES_FUNC_START2 (AesCbc_Decode_HW)
{
v128 iv = *p;
const v128 *wStart = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd;
p += 2;
WIDE_LOOP_START
{
const v128 *w = wStart;
WOP (DECLARE_VAR)
WOP (LOAD_data);
WOP_KEY (AES_D_IMC, 2)
do
{
WOP_KEY (AES_D_IMC, 1)
WOP_KEY (AES_D_IMC, 0)
w -= 2;
}
while (w != p);
WOP_KEY (AES_D, 1)
WOP_KEY (AES_XOR, 0)
MM_XOR (m0, iv);
WOP_M1 (XOR_data_M1)
iv = data[NUM_WAYS - 1];
WOP (STORE_data);
}
WIDE_LOOP_END
SINGLE_LOOP
{
const v128 *w = wStart;
v128 m = *data;
AES_D_IMC_m (w[2])
do
{
AES_D_IMC_m (w[1]);
AES_D_IMC_m (w[0]);
w -= 2;
}
while (w != p);
AES_D_m (w[1]);
MM_XOR_m (w[0]);
MM_XOR_m (iv);
iv = *data;
*data = m;
}
p[-2] = iv;
}
AES_FUNC_START2 (AesCtr_Code_HW)
{
uint64x2_t ctr = vreinterpretq_u64_u8(*p);
const v128 *wEnd = p + ((size_t)*(const UInt32 *)(p + 1)) * 2;
const v128 *dataEnd;
uint64x2_t one = vdupq_n_u64(0);
one = vsetq_lane_u64(1, one, 0);
p += 2;
WIDE_LOOP_START
{
const v128 *w = p;
WOP (DECLARE_VAR)
WOP (CTR_START);
do
{
WOP_KEY (AES_E_MC, 0)
WOP_KEY (AES_E_MC, 1)
w += 2;
}
while (w != wEnd);
WOP_KEY (AES_E_MC, 0)
WOP_KEY (AES_E, 1)
WOP_KEY (AES_XOR, 2)
WOP (CTR_END);
}
WIDE_LOOP_END
SINGLE_LOOP
{
const v128 *w = p;
v128 m;
CTR_START (m, 0);
do
{
AES_E_MC_m (w[0]);
AES_E_MC_m (w[1]);
w += 2;
}
while (w != wEnd);
AES_E_MC_m (w[0]);
AES_E_m (w[1]);
MM_XOR_m (w[2]);
CTR_END (m, 0);
}
p[-2] = vreinterpretq_u8_u64(ctr);
}
#endif // USE_HW_AES
#endif // MY_CPU_ARM_OR_ARM64

463
C/Alloc.c Normal file
View file

@ -0,0 +1,463 @@
/* Alloc.c -- Memory allocation functions
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <stdio.h>
#ifdef _WIN32
#include <Windows.h>
#endif
#include <stdlib.h>
#include "Alloc.h"
/* #define _SZ_ALLOC_DEBUG */
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
#ifdef _SZ_ALLOC_DEBUG
#include <stdio.h>
int g_allocCount = 0;
int g_allocCountMid = 0;
int g_allocCountBig = 0;
#define CONVERT_INT_TO_STR(charType, tempSize) \
unsigned char temp[tempSize]; unsigned i = 0; \
while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
*s++ = (charType)('0' + (unsigned)val); \
while (i != 0) { i--; *s++ = temp[i]; } \
*s = 0;
static void ConvertUInt64ToString(UInt64 val, char *s)
{
CONVERT_INT_TO_STR(char, 24);
}
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
static void ConvertUInt64ToHex(UInt64 val, char *s)
{
UInt64 v = val;
unsigned i;
for (i = 1;; i++)
{
v >>= 4;
if (v == 0)
break;
}
s[i] = 0;
do
{
unsigned t = (unsigned)(val & 0xF);
val >>= 4;
s[--i] = GET_HEX_CHAR(t);
}
while (i);
}
#define DEBUG_OUT_STREAM stderr
static void Print(const char *s)
{
fputs(s, DEBUG_OUT_STREAM);
}
static void PrintAligned(const char *s, size_t align)
{
size_t len = strlen(s);
for(;;)
{
fputc(' ', DEBUG_OUT_STREAM);
if (len >= align)
break;
++len;
}
Print(s);
}
static void PrintLn()
{
Print("\n");
}
static void PrintHex(UInt64 v, size_t align)
{
char s[32];
ConvertUInt64ToHex(v, s);
PrintAligned(s, align);
}
static void PrintDec(UInt64 v, size_t align)
{
char s[32];
ConvertUInt64ToString(v, s);
PrintAligned(s, align);
}
static void PrintAddr(void *p)
{
PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
}
#define PRINT_ALLOC(name, cnt, size, ptr) \
Print(name " "); \
PrintDec(cnt++, 10); \
PrintHex(size, 10); \
PrintAddr(ptr); \
PrintLn();
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
Print(name " "); \
PrintDec(--cnt, 10); \
PrintAddr(ptr); \
PrintLn(); }
#else
#define PRINT_ALLOC(name, cnt, size, ptr)
#define PRINT_FREE(name, cnt, ptr)
#define Print(s)
#define PrintLn()
#define PrintHex(v, align)
#define PrintAddr(p)
#endif
void *MyAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc ", g_allocCount, size, NULL);
#ifdef _SZ_ALLOC_DEBUG
{
void *p = malloc(size);
// PRINT_ALLOC("Alloc ", g_allocCount, size, p);
return p;
}
#else
return malloc(size);
#endif
}
void MyFree(void *address)
{
PRINT_FREE("Free ", g_allocCount, address);
free(address);
}
#ifdef _WIN32
void *MidAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
}
void MidFree(void *address)
{
PRINT_FREE("Free-Mid", g_allocCountMid, address);
if (!address)
return;
VirtualFree(address, 0, MEM_RELEASE);
}
#ifdef _7ZIP_LARGE_PAGES
#ifdef MEM_LARGE_PAGES
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
#else
#define MY__MEM_LARGE_PAGES 0x20000000
#endif
extern
SIZE_T g_LargePageSize;
SIZE_T g_LargePageSize = 0;
typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
#endif // _7ZIP_LARGE_PAGES
void SetLargePageSize()
{
#ifdef _7ZIP_LARGE_PAGES
SIZE_T size;
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
if (!largePageMinimum)
return;
size = largePageMinimum();
if (size == 0 || (size & (size - 1)) != 0)
return;
g_LargePageSize = size;
#endif
}
void *BigAlloc(size_t size)
{
if (size == 0)
return NULL;
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
#ifdef _7ZIP_LARGE_PAGES
{
SIZE_T ps = g_LargePageSize;
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
{
size_t size2;
ps--;
size2 = (size + ps) & ~ps;
if (size2 >= size)
{
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
if (res)
return res;
}
}
}
#endif
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
}
void BigFree(void *address)
{
PRINT_FREE("Free-Big", g_allocCountBig, address);
if (!address)
return;
VirtualFree(address, 0, MEM_RELEASE);
}
#endif
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
#ifdef _WIN32
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
#endif
/*
uintptr_t : <stdint.h> C99 (optional)
: unsupported in VS6
*/
#ifdef _WIN32
typedef UINT_PTR UIntPtr;
#else
/*
typedef uintptr_t UIntPtr;
*/
typedef ptrdiff_t UIntPtr;
#endif
#define ADJUST_ALLOC_SIZE 0
/*
#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
*/
/*
Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
MyAlloc() can return address that is NOT multiple of sizeof(void *).
*/
/*
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
*/
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
#define USE_posix_memalign
#endif
#ifndef USE_posix_memalign
#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
#endif
/*
This posix_memalign() is for test purposes only.
We also need special Free() function instead of free(),
if this posix_memalign() is used.
*/
/*
static int posix_memalign(void **ptr, size_t align, size_t size)
{
size_t newSize = size + align;
void *p;
void *pAligned;
*ptr = NULL;
if (newSize < size)
return 12; // ENOMEM
p = MyAlloc(newSize);
if (!p)
return 12; // ENOMEM
pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
((void **)pAligned)[-1] = p;
*ptr = pAligned;
return 0;
}
*/
/*
ALLOC_ALIGN_SIZE >= sizeof(void *)
ALLOC_ALIGN_SIZE >= cache_line_size
*/
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
{
#ifndef USE_posix_memalign
void *p;
void *pAligned;
size_t newSize;
UNUSED_VAR(pp);
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
if (newSize < size)
return NULL;
p = MyAlloc(newSize);
if (!p)
return NULL;
pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
Print(" size="); PrintHex(size, 8);
Print(" a_size="); PrintHex(newSize, 8);
Print(" ptr="); PrintAddr(p);
Print(" a_ptr="); PrintAddr(pAligned);
PrintLn();
((void **)pAligned)[-1] = p;
return pAligned;
#else
void *p;
UNUSED_VAR(pp);
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
return NULL;
Print(" posix_memalign="); PrintAddr(p);
PrintLn();
return p;
#endif
}
static void SzAlignedFree(ISzAllocPtr pp, void *address)
{
UNUSED_VAR(pp);
#ifndef USE_posix_memalign
if (address)
MyFree(((void **)address)[-1]);
#else
free(address);
#endif
}
const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
/*
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
*/
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
{
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
void *adr;
void *pAligned;
size_t newSize;
size_t extra;
size_t alignSize = (size_t)1 << p->numAlignBits;
if (alignSize < sizeof(void *))
alignSize = sizeof(void *);
if (p->offset >= alignSize)
return NULL;
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
block to prevent cache line sharing with another allocated blocks */
extra = p->offset & (sizeof(void *) - 1);
newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
if (newSize < size)
return NULL;
adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
if (!adr)
return NULL;
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
PrintLn();
Print("- Aligned: ");
Print(" size="); PrintHex(size, 8);
Print(" a_size="); PrintHex(newSize, 8);
Print(" ptr="); PrintAddr(adr);
Print(" a_ptr="); PrintAddr(pAligned);
PrintLn();
REAL_BLOCK_PTR_VAR(pAligned) = adr;
return pAligned;
}
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
{
if (address)
{
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
PrintLn();
Print("- Aligned Free: ");
PrintLn();
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
}
}
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
{
p->vt.Alloc = AlignOffsetAlloc_Alloc;
p->vt.Free = AlignOffsetAlloc_Free;
}

58
C/Alloc.h Normal file
View file

@ -0,0 +1,58 @@
/* Alloc.h -- Memory allocation functions
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __COMMON_ALLOC_H
#define __COMMON_ALLOC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
void *MyAlloc(size_t size);
void MyFree(void *address);
#ifdef _WIN32
void SetLargePageSize(void);
void *MidAlloc(size_t size);
void MidFree(void *address);
void *BigAlloc(size_t size);
void BigFree(void *address);
#else
#define MidAlloc(size) MyAlloc(size)
#define MidFree(address) MyFree(address)
#define BigAlloc(size) MyAlloc(size)
#define BigFree(address) MyFree(address)
#endif
extern const ISzAlloc g_Alloc;
#ifdef _WIN32
extern const ISzAlloc g_BigAlloc;
extern const ISzAlloc g_MidAlloc;
#else
#define g_BigAlloc g_AlignedAlloc
#define g_MidAlloc g_AlignedAlloc
#endif
extern const ISzAlloc g_AlignedAlloc;
typedef struct
{
ISzAlloc vt;
ISzAllocPtr baseAlloc;
unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
size_t offset; /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
} CAlignOffsetAlloc;
void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
EXTERN_C_END
#endif

257
C/Bcj2.c Normal file
View file

@ -0,0 +1,257 @@
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Bcj2.h"
#include "CpuArch.h"
#define CProb UInt16
#define kTopValue ((UInt32)1 << 24)
#define kNumModelBits 11
#define kBitModelTotal (1 << kNumModelBits)
#define kNumMoveBits 5
#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)
#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
void Bcj2Dec_Init(CBcj2Dec *p)
{
unsigned i;
p->state = BCJ2_DEC_STATE_OK;
p->ip = 0;
p->temp[3] = 0;
p->range = 0;
p->code = 0;
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
p->probs[i] = kBitModelTotal >> 1;
}
SRes Bcj2Dec_Decode(CBcj2Dec *p)
{
if (p->range <= 5)
{
p->state = BCJ2_DEC_STATE_OK;
for (; p->range != 5; p->range++)
{
if (p->range == 1 && p->code != 0)
return SZ_ERROR_DATA;
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
{
p->state = BCJ2_STREAM_RC;
return SZ_OK;
}
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
}
if (p->code == 0xFFFFFFFF)
return SZ_ERROR_DATA;
p->range = 0xFFFFFFFF;
}
else if (p->state >= BCJ2_DEC_STATE_ORIG_0)
{
while (p->state <= BCJ2_DEC_STATE_ORIG_3)
{
Byte *dest = p->dest;
if (dest == p->destLim)
return SZ_OK;
*dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];
p->state++;
p->dest = dest + 1;
}
}
/*
if (BCJ2_IS_32BIT_STREAM(p->state))
{
const Byte *cur = p->bufs[p->state];
if (cur == p->lims[p->state])
return SZ_OK;
p->bufs[p->state] = cur + 4;
{
UInt32 val;
Byte *dest;
SizeT rem;
p->ip += 4;
val = GetBe32(cur) - p->ip;
dest = p->dest;
rem = p->destLim - dest;
if (rem < 4)
{
SizeT i;
SetUi32(p->temp, val);
for (i = 0; i < rem; i++)
dest[i] = p->temp[i];
p->dest = dest + rem;
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
return SZ_OK;
}
SetUi32(dest, val);
p->temp[3] = (Byte)(val >> 24);
p->dest = dest + 4;
p->state = BCJ2_DEC_STATE_OK;
}
}
*/
for (;;)
{
if (BCJ2_IS_32BIT_STREAM(p->state))
p->state = BCJ2_DEC_STATE_OK;
else
{
if (p->range < kTopValue)
{
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
{
p->state = BCJ2_STREAM_RC;
return SZ_OK;
}
p->range <<= 8;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
}
{
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
const Byte *srcLim;
Byte *dest;
SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
return SZ_OK;
}
dest = p->dest;
if (num > (SizeT)(p->destLim - dest))
{
num = (SizeT)(p->destLim - dest);
if (num == 0)
{
p->state = BCJ2_DEC_STATE_ORIG;
return SZ_OK;
}
}
srcLim = src + num;
if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
*dest = src[0];
else for (;;)
{
Byte b = *src;
*dest = b;
if (b != 0x0F)
{
if ((b & 0xFE) == 0xE8)
break;
dest++;
if (++src != srcLim)
continue;
break;
}
dest++;
if (++src == srcLim)
break;
if ((*src & 0xF0) != 0x80)
continue;
*dest = *src;
break;
}
num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
if (src == srcLim)
{
p->temp[3] = src[-1];
p->bufs[BCJ2_STREAM_MAIN] = src;
p->ip += (UInt32)num;
p->dest += num;
p->state =
p->bufs[BCJ2_STREAM_MAIN] ==
p->lims[BCJ2_STREAM_MAIN] ?
(unsigned)BCJ2_STREAM_MAIN :
(unsigned)BCJ2_DEC_STATE_ORIG;
return SZ_OK;
}
{
UInt32 bound, ttt;
CProb *prob;
Byte b = src[0];
Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);
p->temp[3] = b;
p->bufs[BCJ2_STREAM_MAIN] = src + 1;
num++;
p->ip += (UInt32)num;
p->dest += num;
prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
_IF_BIT_0
{
_UPDATE_0
continue;
}
_UPDATE_1
}
}
}
{
UInt32 val;
unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
const Byte *cur = p->bufs[cj];
Byte *dest;
SizeT rem;
if (cur == p->lims[cj])
{
p->state = cj;
break;
}
val = GetBe32(cur);
p->bufs[cj] = cur + 4;
p->ip += 4;
val -= p->ip;
dest = p->dest;
rem = (SizeT)(p->destLim - dest);
if (rem < 4)
{
p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8;
p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8;
p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8;
p->temp[3] = (Byte)val;
p->dest = dest + rem;
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
break;
}
SetUi32(dest, val);
p->temp[3] = (Byte)(val >> 24);
p->dest = dest + 4;
}
}
if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
{
p->range <<= 8;
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
}
return SZ_OK;
}

146
C/Bcj2.h Normal file
View file

@ -0,0 +1,146 @@
/* Bcj2.h -- BCJ2 Converter for x86 code
2014-11-10 : Igor Pavlov : Public domain */
#ifndef __BCJ2_H
#define __BCJ2_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define BCJ2_NUM_STREAMS 4
enum
{
BCJ2_STREAM_MAIN,
BCJ2_STREAM_CALL,
BCJ2_STREAM_JUMP,
BCJ2_STREAM_RC
};
enum
{
BCJ2_DEC_STATE_ORIG_0 = BCJ2_NUM_STREAMS,
BCJ2_DEC_STATE_ORIG_1,
BCJ2_DEC_STATE_ORIG_2,
BCJ2_DEC_STATE_ORIG_3,
BCJ2_DEC_STATE_ORIG,
BCJ2_DEC_STATE_OK
};
enum
{
BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
BCJ2_ENC_STATE_OK
};
#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)
/*
CBcj2Dec / CBcj2Enc
bufs sizes:
BUF_SIZE(n) = lims[n] - bufs[n]
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:
(BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
(BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
*/
/*
CBcj2Dec:
dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
bufs[BCJ2_STREAM_MAIN] >= dest &&
bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +
BUF_SIZE(BCJ2_STREAM_CALL) +
BUF_SIZE(BCJ2_STREAM_JUMP)
tempReserv = 0 : for first call of Bcj2Dec_Decode
tempReserv = 4 : for any other calls of Bcj2Dec_Decode
overlap with offset = 1 is not allowed
*/
typedef struct
{
const Byte *bufs[BCJ2_NUM_STREAMS];
const Byte *lims[BCJ2_NUM_STREAMS];
Byte *dest;
const Byte *destLim;
unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
UInt32 ip;
Byte temp[4];
UInt32 range;
UInt32 code;
UInt16 probs[2 + 256];
} CBcj2Dec;
void Bcj2Dec_Init(CBcj2Dec *p);
/* Returns: SZ_OK or SZ_ERROR_DATA */
SRes Bcj2Dec_Decode(CBcj2Dec *p);
#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)
typedef enum
{
BCJ2_ENC_FINISH_MODE_CONTINUE,
BCJ2_ENC_FINISH_MODE_END_BLOCK,
BCJ2_ENC_FINISH_MODE_END_STREAM
} EBcj2Enc_FinishMode;
typedef struct
{
Byte *bufs[BCJ2_NUM_STREAMS];
const Byte *lims[BCJ2_NUM_STREAMS];
const Byte *src;
const Byte *srcLim;
unsigned state;
EBcj2Enc_FinishMode finishMode;
Byte prevByte;
Byte cache;
UInt32 range;
UInt64 low;
UInt64 cacheSize;
UInt32 ip;
/* 32-bit ralative offset in JUMP/CALL commands is
- (mod 4 GB) in 32-bit mode
- signed Int32 in 64-bit mode
We use (mod 4 GB) check for fileSize.
Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */
UInt32 fileIp;
UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
UInt32 tempTarget;
unsigned tempPos;
Byte temp[4 * 2];
unsigned flushPos;
UInt16 probs[2 + 256];
} CBcj2Enc;
void Bcj2Enc_Init(CBcj2Enc *p);
void Bcj2Enc_Encode(CBcj2Enc *p);
#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)
#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)
#define BCJ2_RELAT_LIMIT_NUM_BITS 26
#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)
/* limit for CBcj2Enc::fileSize variable */
#define BCJ2_FileSize_MAX ((UInt32)1 << 31)
EXTERN_C_END
#endif

311
C/Bcj2Enc.c Normal file
View file

@ -0,0 +1,311 @@
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
/* #define SHOW_STAT */
#ifdef SHOW_STAT
#include <stdio.h>
#define PRF(x) x
#else
#define PRF(x)
#endif
#include <string.h>
#include "Bcj2.h"
#include "CpuArch.h"
#define CProb UInt16
#define kTopValue ((UInt32)1 << 24)
#define kNumModelBits 11
#define kBitModelTotal (1 << kNumModelBits)
#define kNumMoveBits 5
void Bcj2Enc_Init(CBcj2Enc *p)
{
unsigned i;
p->state = BCJ2_ENC_STATE_OK;
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
p->prevByte = 0;
p->cache = 0;
p->range = 0xFFFFFFFF;
p->low = 0;
p->cacheSize = 1;
p->ip = 0;
p->fileIp = 0;
p->fileSize = 0;
p->relatLimit = BCJ2_RELAT_LIMIT;
p->tempPos = 0;
p->flushPos = 0;
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
p->probs[i] = kBitModelTotal >> 1;
}
static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
{
if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
{
Byte *buf = p->bufs[BCJ2_STREAM_RC];
do
{
if (buf == p->lims[BCJ2_STREAM_RC])
{
p->state = BCJ2_STREAM_RC;
p->bufs[BCJ2_STREAM_RC] = buf;
return True;
}
*buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));
p->cache = 0xFF;
}
while (--p->cacheSize);
p->bufs[BCJ2_STREAM_RC] = buf;
p->cache = (Byte)((UInt32)p->low >> 24);
}
p->cacheSize++;
p->low = (UInt32)p->low << 8;
return False;
}
static void Bcj2Enc_Encode_2(CBcj2Enc *p)
{
if (BCJ2_IS_32BIT_STREAM(p->state))
{
Byte *cur = p->bufs[p->state];
if (cur == p->lims[p->state])
return;
SetBe32(cur, p->tempTarget);
p->bufs[p->state] = cur + 4;
}
p->state = BCJ2_ENC_STATE_ORIG;
for (;;)
{
if (p->range < kTopValue)
{
if (RangeEnc_ShiftLow(p))
return;
p->range <<= 8;
}
{
{
const Byte *src = p->src;
const Byte *srcLim;
Byte *dest;
SizeT num = (SizeT)(p->srcLim - src);
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
{
if (num <= 4)
return;
num -= 4;
}
else if (num == 0)
break;
dest = p->bufs[BCJ2_STREAM_MAIN];
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
{
num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
if (num == 0)
{
p->state = BCJ2_STREAM_MAIN;
return;
}
}
srcLim = src + num;
if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)
*dest = src[0];
else for (;;)
{
Byte b = *src;
*dest = b;
if (b != 0x0F)
{
if ((b & 0xFE) == 0xE8)
break;
dest++;
if (++src != srcLim)
continue;
break;
}
dest++;
if (++src == srcLim)
break;
if ((*src & 0xF0) != 0x80)
continue;
*dest = *src;
break;
}
num = (SizeT)(src - p->src);
if (src == srcLim)
{
p->prevByte = src[-1];
p->bufs[BCJ2_STREAM_MAIN] = dest;
p->src = src;
p->ip += (UInt32)num;
continue;
}
{
Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
BoolInt needConvert;
p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
p->ip += (UInt32)num + 1;
src++;
needConvert = False;
if ((SizeT)(p->srcLim - src) >= 4)
{
UInt32 relatVal = GetUi32(src);
if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)
&& ((relatVal + p->relatLimit) >> 1) < p->relatLimit)
needConvert = True;
}
{
UInt32 bound;
unsigned ttt;
Byte b = src[-1];
CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
ttt = *prob;
bound = (p->range >> kNumModelBits) * ttt;
if (!needConvert)
{
p->range = bound;
*prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
p->src = src;
p->prevByte = b;
continue;
}
p->low += bound;
p->range -= bound;
*prob = (CProb)(ttt - (ttt >> kNumMoveBits));
{
UInt32 relatVal = GetUi32(src);
UInt32 absVal;
p->ip += 4;
absVal = p->ip + relatVal;
p->prevByte = src[3];
src += 4;
p->src = src;
{
unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
Byte *cur = p->bufs[cj];
if (cur == p->lims[cj])
{
p->state = cj;
p->tempTarget = absVal;
return;
}
SetBe32(cur, absVal);
p->bufs[cj] = cur + 4;
}
}
}
}
}
}
}
if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
return;
for (; p->flushPos < 5; p->flushPos++)
if (RangeEnc_ShiftLow(p))
return;
p->state = BCJ2_ENC_STATE_OK;
}
void Bcj2Enc_Encode(CBcj2Enc *p)
{
PRF(printf("\n"));
PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
if (p->tempPos != 0)
{
unsigned extra = 0;
for (;;)
{
const Byte *src = p->src;
const Byte *srcLim = p->srcLim;
EBcj2Enc_FinishMode finishMode = p->finishMode;
p->src = p->temp;
p->srcLim = p->temp + p->tempPos;
if (src != srcLim)
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
Bcj2Enc_Encode_2(p);
{
unsigned num = (unsigned)(p->src - p->temp);
unsigned tempPos = p->tempPos - num;
unsigned i;
p->tempPos = tempPos;
for (i = 0; i < tempPos; i++)
p->temp[i] = p->temp[(size_t)i + num];
p->src = src;
p->srcLim = srcLim;
p->finishMode = finishMode;
if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)
return;
if (extra >= tempPos)
{
p->src = src - tempPos;
p->tempPos = 0;
break;
}
p->temp[tempPos] = src[0];
p->tempPos = tempPos + 1;
p->src = src + 1;
extra++;
}
}
}
PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
Bcj2Enc_Encode_2(p);
if (p->state == BCJ2_ENC_STATE_ORIG)
{
const Byte *src = p->src;
unsigned rem = (unsigned)(p->srcLim - src);
unsigned i;
for (i = 0; i < rem; i++)
p->temp[i] = src[i];
p->tempPos = rem;
p->src = src + rem;
}
}

48
C/Blake2.h Normal file
View file

@ -0,0 +1,48 @@
/* Blake2.h -- BLAKE2 Hash
2015-06-30 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#ifndef __BLAKE2_H
#define __BLAKE2_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define BLAKE2S_BLOCK_SIZE 64
#define BLAKE2S_DIGEST_SIZE 32
#define BLAKE2SP_PARALLEL_DEGREE 8
typedef struct
{
UInt32 h[8];
UInt32 t[2];
UInt32 f[2];
Byte buf[BLAKE2S_BLOCK_SIZE];
UInt32 bufPos;
UInt32 lastNode_f1;
UInt32 dummy[2]; /* for sizeof(CBlake2s) alignment */
} CBlake2s;
/* You need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */
/*
void Blake2s_Init0(CBlake2s *p);
void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size);
void Blake2s_Final(CBlake2s *p, Byte *digest);
*/
typedef struct
{
CBlake2s S[BLAKE2SP_PARALLEL_DEGREE];
unsigned bufPos;
} CBlake2sp;
void Blake2sp_Init(CBlake2sp *p);
void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size);
void Blake2sp_Final(CBlake2sp *p, Byte *digest);
EXTERN_C_END
#endif

244
C/Blake2s.c Normal file
View file

@ -0,0 +1,244 @@
/* Blake2s.c -- BLAKE2s and BLAKE2sp Hash
2021-02-09 : Igor Pavlov : Public domain
2015 : Samuel Neves : Public domain */
#include <string.h>
#include "Blake2.h"
#include "CpuArch.h"
#include "RotateDefs.h"
#define rotr32 rotrFixed
#define BLAKE2S_NUM_ROUNDS 10
#define BLAKE2S_FINAL_FLAG (~(UInt32)0)
static const UInt32 k_Blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const Byte k_Blake2s_Sigma[BLAKE2S_NUM_ROUNDS][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
static void Blake2s_Init0(CBlake2s *p)
{
unsigned i;
for (i = 0; i < 8; i++)
p->h[i] = k_Blake2s_IV[i];
p->t[0] = 0;
p->t[1] = 0;
p->f[0] = 0;
p->f[1] = 0;
p->bufPos = 0;
p->lastNode_f1 = 0;
}
static void Blake2s_Compress(CBlake2s *p)
{
UInt32 m[16];
UInt32 v[16];
{
unsigned i;
for (i = 0; i < 16; i++)
m[i] = GetUi32(p->buf + i * sizeof(m[i]));
for (i = 0; i < 8; i++)
v[i] = p->h[i];
}
v[ 8] = k_Blake2s_IV[0];
v[ 9] = k_Blake2s_IV[1];
v[10] = k_Blake2s_IV[2];
v[11] = k_Blake2s_IV[3];
v[12] = p->t[0] ^ k_Blake2s_IV[4];
v[13] = p->t[1] ^ k_Blake2s_IV[5];
v[14] = p->f[0] ^ k_Blake2s_IV[6];
v[15] = p->f[1] ^ k_Blake2s_IV[7];
#define G(r,i,a,b,c,d) \
a += b + m[sigma[2*i+0]]; d ^= a; d = rotr32(d, 16); c += d; b ^= c; b = rotr32(b, 12); \
a += b + m[sigma[2*i+1]]; d ^= a; d = rotr32(d, 8); c += d; b ^= c; b = rotr32(b, 7); \
#define R(r) \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
{
unsigned r;
for (r = 0; r < BLAKE2S_NUM_ROUNDS; r++)
{
const Byte *sigma = k_Blake2s_Sigma[r];
R(r);
}
/* R(0); R(1); R(2); R(3); R(4); R(5); R(6); R(7); R(8); R(9); */
}
#undef G
#undef R
{
unsigned i;
for (i = 0; i < 8; i++)
p->h[i] ^= v[i] ^ v[i + 8];
}
}
#define Blake2s_Increment_Counter(S, inc) \
{ p->t[0] += (inc); p->t[1] += (p->t[0] < (inc)); }
#define Blake2s_Set_LastBlock(p) \
{ p->f[0] = BLAKE2S_FINAL_FLAG; p->f[1] = p->lastNode_f1; }
static void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size)
{
while (size != 0)
{
unsigned pos = (unsigned)p->bufPos;
unsigned rem = BLAKE2S_BLOCK_SIZE - pos;
if (size <= rem)
{
memcpy(p->buf + pos, data, size);
p->bufPos += (UInt32)size;
return;
}
memcpy(p->buf + pos, data, rem);
Blake2s_Increment_Counter(S, BLAKE2S_BLOCK_SIZE);
Blake2s_Compress(p);
p->bufPos = 0;
data += rem;
size -= rem;
}
}
static void Blake2s_Final(CBlake2s *p, Byte *digest)
{
unsigned i;
Blake2s_Increment_Counter(S, (UInt32)p->bufPos);
Blake2s_Set_LastBlock(p);
memset(p->buf + p->bufPos, 0, BLAKE2S_BLOCK_SIZE - p->bufPos);
Blake2s_Compress(p);
for (i = 0; i < 8; i++)
SetUi32(digest + sizeof(p->h[i]) * i, p->h[i]);
}
/* ---------- BLAKE2s ---------- */
/* we need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */
/*
typedef struct
{
Byte digest_length;
Byte key_length;
Byte fanout;
Byte depth;
UInt32 leaf_length;
Byte node_offset[6];
Byte node_depth;
Byte inner_length;
Byte salt[BLAKE2S_SALTBYTES];
Byte personal[BLAKE2S_PERSONALBYTES];
} CBlake2sParam;
*/
static void Blake2sp_Init_Spec(CBlake2s *p, unsigned node_offset, unsigned node_depth)
{
Blake2s_Init0(p);
p->h[0] ^= (BLAKE2S_DIGEST_SIZE | ((UInt32)BLAKE2SP_PARALLEL_DEGREE << 16) | ((UInt32)2 << 24));
p->h[2] ^= ((UInt32)node_offset);
p->h[3] ^= ((UInt32)node_depth << 16) | ((UInt32)BLAKE2S_DIGEST_SIZE << 24);
/*
P->digest_length = BLAKE2S_DIGEST_SIZE;
P->key_length = 0;
P->fanout = BLAKE2SP_PARALLEL_DEGREE;
P->depth = 2;
P->leaf_length = 0;
store48(P->node_offset, node_offset);
P->node_depth = node_depth;
P->inner_length = BLAKE2S_DIGEST_SIZE;
*/
}
void Blake2sp_Init(CBlake2sp *p)
{
unsigned i;
p->bufPos = 0;
for (i = 0; i < BLAKE2SP_PARALLEL_DEGREE; i++)
Blake2sp_Init_Spec(&p->S[i], i, 0);
p->S[BLAKE2SP_PARALLEL_DEGREE - 1].lastNode_f1 = BLAKE2S_FINAL_FLAG;
}
void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size)
{
unsigned pos = p->bufPos;
while (size != 0)
{
unsigned index = pos / BLAKE2S_BLOCK_SIZE;
unsigned rem = BLAKE2S_BLOCK_SIZE - (pos & (BLAKE2S_BLOCK_SIZE - 1));
if (rem > size)
rem = (unsigned)size;
Blake2s_Update(&p->S[index], data, rem);
size -= rem;
data += rem;
pos += rem;
pos &= (BLAKE2S_BLOCK_SIZE * BLAKE2SP_PARALLEL_DEGREE - 1);
}
p->bufPos = pos;
}
void Blake2sp_Final(CBlake2sp *p, Byte *digest)
{
CBlake2s R;
unsigned i;
Blake2sp_Init_Spec(&R, 0, 1);
R.lastNode_f1 = BLAKE2S_FINAL_FLAG;
for (i = 0; i < BLAKE2SP_PARALLEL_DEGREE; i++)
{
Byte hash[BLAKE2S_DIGEST_SIZE];
Blake2s_Final(&p->S[i], hash);
Blake2s_Update(&R, hash, BLAKE2S_DIGEST_SIZE);
}
Blake2s_Final(&R, digest);
}

230
C/Bra.c Normal file
View file

@ -0,0 +1,230 @@
/* Bra.c -- Converters for RISC code
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h"
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)3;
ip += 4;
p = data;
lim = data + size;
if (encoding)
for (;;)
{
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v += ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
}
}
for (;;)
{
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
p += 4;
if (p[-1] == 0xEB)
break;
}
{
UInt32 v = GetUi32(p - 4);
v <<= 2;
v -= ip + (UInt32)(p - data);
v >>= 2;
v &= 0x00FFFFFF;
v |= 0xEB000000;
SetUi32(p - 4, v);
}
}
}
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)1;
p = data;
lim = data + size - 4;
if (encoding)
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v += cur;
}
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
for (;;)
{
UInt32 b1;
for (;;)
{
UInt32 b3;
if (p > lim)
return (SizeT)(p - data);
b1 = p[1];
b3 = p[3];
p += 2;
b1 ^= 8;
if ((b3 & b1) >= 0xF8)
break;
}
{
UInt32 v =
((UInt32)b1 << 19)
+ (((UInt32)p[1] & 0x7) << 8)
+ (((UInt32)p[-2] << 11))
+ (p[0]);
p += 2;
{
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
v -= cur;
}
/*
SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
SetUi16(p - 2, (UInt16)(v | 0xF800));
*/
p[-4] = (Byte)(v >> 11);
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
p[-2] = (Byte)v;
p[-1] = (Byte)(0xF8 | (v >> 8));
}
}
}
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)3;
ip -= 4;
p = data;
lim = data + size;
for (;;)
{
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
p += 4;
/* if ((v & 0xFC000003) == 0x48000001) */
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
break;
}
{
UInt32 v = GetBe32(p - 4);
if (encoding)
v += ip + (UInt32)(p - data);
else
v -= ip + (UInt32)(p - data);
v &= 0x03FFFFFF;
v |= 0x48000000;
SetBe32(p - 4, v);
}
}
}
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
Byte *p;
const Byte *lim;
size &= ~(size_t)3;
ip -= 4;
p = data;
lim = data + size;
for (;;)
{
for (;;)
{
if (p >= lim)
return (SizeT)(p - data);
/*
v = GetBe32(p);
p += 4;
m = v + ((UInt32)5 << 29);
m ^= (UInt32)7 << 29;
m += (UInt32)1 << 22;
if ((m & ((UInt32)0x1FF << 23)) == 0)
break;
*/
p += 4;
if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
(p[-4] == 0x7F && (p[-3] >= 0xC0)))
break;
}
{
UInt32 v = GetBe32(p - 4);
v <<= 2;
if (encoding)
v += ip + (UInt32)(p - data);
else
v -= ip + (UInt32)(p - data);
v &= 0x01FFFFFF;
v -= (UInt32)1 << 24;
v ^= 0xFF000000;
v >>= 2;
v |= 0x40000000;
SetBe32(p - 4, v);
}
}
}

64
C/Bra.h Normal file
View file

@ -0,0 +1,64 @@
/* Bra.h -- Branch converters for executables
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __BRA_H
#define __BRA_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
These functions convert relative addresses to absolute addresses
in CALL instructions to increase the compression ratio.
In:
data - data buffer
size - size of data
ip - current virtual Instruction Pinter (IP) value
state - state variable for x86 converter
encoding - 0 (for decoding), 1 (for encoding)
Out:
state - state variable for x86 converter
Returns:
The number of processed bytes. If you call these functions with multiple calls,
you must start next call with first byte after block of processed bytes.
Type Endian Alignment LookAhead
x86 little 1 4
ARMT little 2 2
ARM little 4 0
PPC big 4 0
SPARC big 4 0
IA64 little 16 0
size must be >= Alignment + LookAhead, if it's not last block.
If (size < Alignment + LookAhead), converter returns 0.
Example:
UInt32 ip = 0;
for ()
{
; size must be >= Alignment + LookAhead, if it's not last block
SizeT processed = Convert(data, size, ip, 1);
data += processed;
size -= processed;
ip += processed;
}
*/
#define x86_Convert_Init(state) { state = 0; }
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
EXTERN_C_END
#endif

82
C/Bra86.c Normal file
View file

@ -0,0 +1,82 @@
/* Bra86.c -- Converter for x86 code (BCJ)
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Bra.h"
#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
{
SizeT pos = 0;
UInt32 mask = *state & 7;
if (size < 5)
return 0;
size -= 4;
ip += 5;
for (;;)
{
Byte *p = data + pos;
const Byte *limit = data + size;
for (; p < limit; p++)
if ((*p & 0xFE) == 0xE8)
break;
{
SizeT d = (SizeT)(p - data) - pos;
pos = (SizeT)(p - data);
if (p >= limit)
{
*state = (d > 2 ? 0 : mask >> (unsigned)d);
return pos;
}
if (d > 2)
mask = 0;
else
{
mask >>= (unsigned)d;
if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
{
mask = (mask >> 1) | 4;
pos++;
continue;
}
}
}
if (Test86MSByte(p[4]))
{
UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
UInt32 cur = ip + (UInt32)pos;
pos += 5;
if (encoding)
v += cur;
else
v -= cur;
if (mask != 0)
{
unsigned sh = (mask & 6) << 2;
if (Test86MSByte((Byte)(v >> sh)))
{
v ^= (((UInt32)0x100 << sh) - 1);
if (encoding)
v += cur;
else
v -= cur;
}
mask = 0;
}
p[1] = (Byte)v;
p[2] = (Byte)(v >> 8);
p[3] = (Byte)(v >> 16);
p[4] = (Byte)(0 - ((v >> 24) & 1));
}
else
{
mask = (mask >> 1) | 4;
pos++;
}
}
}

53
C/BraIA64.c Normal file
View file

@ -0,0 +1,53 @@
/* BraIA64.c -- Converter for IA-64 code
2017-01-26 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "Bra.h"
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
{
SizeT i;
if (size < 16)
return 0;
size -= 16;
i = 0;
do
{
unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
if (m)
{
m++;
do
{
Byte *p = data + (i + (size_t)m * 5 - 8);
if (((p[3] >> m) & 15) == 5
&& (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
{
unsigned raw = GetUi32(p);
unsigned v = raw >> m;
v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
v <<= 4;
if (encoding)
v += ip + (UInt32)i;
else
v -= ip + (UInt32)i;
v >>= 4;
v &= 0x1FFFFF;
v += 0x700000;
v &= 0x8FFFFF;
raw &= ~((UInt32)0x8FFFFF << m);
raw |= (v << m);
SetUi32(p, raw);
}
}
while (++m <= 4);
}
i += 16;
}
while (i <= size);
return i;
}

515
C/BwtSort.c Normal file
View file

@ -0,0 +1,515 @@
/* BwtSort.c -- BWT block sorting
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "BwtSort.h"
#include "Sort.h"
/* #define BLOCK_SORT_USE_HEAP_SORT */
#define NO_INLINE MY_FAST_CALL
/* Don't change it !!! */
#define kNumHashBytes 2
#define kNumHashValues (1 << (kNumHashBytes * 8))
/* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */
#define kNumRefBitsMax 12
#define BS_TEMP_SIZE kNumHashValues
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
/* 32 Flags in UInt32 word */
#define kNumFlagsBits 5
#define kNumFlagsInWord (1 << kNumFlagsBits)
#define kFlagsMask (kNumFlagsInWord - 1)
#define kAllFlags 0xFFFFFFFF
#else
#define kNumBitsMax 20
#define kIndexMask ((1 << kNumBitsMax) - 1)
#define kNumExtraBits (32 - kNumBitsMax)
#define kNumExtra0Bits (kNumExtraBits - 2)
#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
#define SetFinishedGroupSize(p, size) \
{ *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
if ((size) > (1 << kNumExtra0Bits)) { \
*(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
static void SetGroupSize(UInt32 *p, UInt32 size)
{
if (--size == 0)
return;
*p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
if (size >= (1 << kNumExtra0Bits))
{
*p |= 0x40000000;
p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
}
}
#endif
/*
SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks
"range" is not real range. It's only for optimization.
returns: 1 - if there are groups, 0 - no more groups
*/
static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, UInt32 left, UInt32 range
#endif
)
{
UInt32 *ind2 = Indices + groupOffset;
UInt32 *Groups;
if (groupSize <= 1)
{
/*
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetFinishedGroupSize(ind2, 1);
#endif
*/
return 0;
}
Groups = Indices + BlockSize + BS_TEMP_SIZE;
if (groupSize <= ((UInt32)1 << NumRefBits)
#ifndef BLOCK_SORT_USE_HEAP_SORT
&& groupSize <= range
#endif
)
{
UInt32 *temp = Indices + BlockSize;
UInt32 j;
UInt32 mask, thereAreGroups, group, cg;
{
UInt32 gPrev;
UInt32 gRes = 0;
{
UInt32 sp = ind2[0] + NumSortedBytes;
if (sp >= BlockSize) sp -= BlockSize;
gPrev = Groups[sp];
temp[0] = (gPrev << NumRefBits);
}
for (j = 1; j < groupSize; j++)
{
UInt32 sp = ind2[j] + NumSortedBytes;
UInt32 g;
if (sp >= BlockSize) sp -= BlockSize;
g = Groups[sp];
temp[j] = (g << NumRefBits) | j;
gRes |= (gPrev ^ g);
}
if (gRes == 0)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
return 1;
}
}
HeapSort(temp, groupSize);
mask = (((UInt32)1 << NumRefBits) - 1);
thereAreGroups = 0;
group = groupOffset;
cg = (temp[0] >> NumRefBits);
temp[0] = ind2[temp[0] & mask];
{
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags = Groups + BlockSize;
#else
UInt32 prevGroupStart = 0;
#endif
for (j = 1; j < groupSize; j++)
{
UInt32 val = temp[j];
UInt32 cgCur = (val >> NumRefBits);
if (cgCur != cg)
{
cg = cgCur;
group = groupOffset + j;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = group - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#else
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
prevGroupStart = j;
#endif
}
else
thereAreGroups = 1;
{
UInt32 ind = ind2[val & mask];
temp[j] = ind;
Groups[ind] = group;
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
#endif
}
for (j = 0; j < groupSize; j++)
ind2[j] = temp[j];
return thereAreGroups;
}
/* Check that all strings are in one group (cannot sort) */
{
UInt32 group, j;
UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
group = Groups[sp];
for (j = 1; j < groupSize; j++)
{
sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] != group)
break;
}
if (j == groupSize)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
return 1;
}
}
#ifndef BLOCK_SORT_USE_HEAP_SORT
{
/* ---------- Range Sort ---------- */
UInt32 i;
UInt32 mid;
for (;;)
{
UInt32 j;
if (range <= 1)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
return 1;
}
mid = left + ((range + 1) >> 1);
j = groupSize;
i = 0;
do
{
UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] >= mid)
{
for (j--; j > i; j--)
{
sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] < mid)
{
UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp;
break;
}
}
if (i >= j)
break;
}
}
while (++i < j);
if (i == 0)
{
range = range - (mid - left);
left = mid;
}
else if (i == groupSize)
range = (mid - left);
else
break;
}
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = (groupOffset + i - 1);
UInt32 *Flags = Groups + BlockSize;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#endif
{
UInt32 j;
for (j = i; j < groupSize; j++)
Groups[ind2[j]] = groupOffset + i;
}
{
UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
}
}
#else
/* ---------- Heap Sort ---------- */
{
UInt32 j;
for (j = 0; j < groupSize; j++)
{
UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
ind2[j] = sp;
}
HeapSortRef(ind2, Groups, groupSize);
/* Write Flags */
{
UInt32 sp = ind2[0];
UInt32 group = Groups[sp];
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags = Groups + BlockSize;
#else
UInt32 prevGroupStart = 0;
#endif
for (j = 1; j < groupSize; j++)
{
sp = ind2[j];
if (Groups[sp] != group)
{
group = Groups[sp];
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = groupOffset + j - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#else
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
prevGroupStart = j;
#endif
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
#endif
}
{
/* Write new Groups values and Check that there are groups */
UInt32 thereAreGroups = 0;
for (j = 0; j < groupSize; j++)
{
UInt32 group = groupOffset + j;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
if ((ind2[j] & 0x40000000) != 0)
subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
subGroupSize++;
for (;;)
{
UInt32 original = ind2[j];
UInt32 sp = original & kIndexMask;
if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
ind2[j] = sp | (original & ~kIndexMask);
Groups[sp] = group;
if (--subGroupSize == 0)
break;
j++;
thereAreGroups = 1;
}
#else
UInt32 *Flags = Groups + BlockSize;
for (;;)
{
UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
ind2[j] = sp;
Groups[sp] = group;
if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
break;
j++;
thereAreGroups = 1;
}
#endif
}
return thereAreGroups;
}
}
#endif
}
/* conditions: blockSize > 0 */
UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
{
UInt32 *counters = Indices + blockSize;
UInt32 i;
UInt32 *Groups;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags;
#endif
/* Radix-Sort for 2 bytes */
for (i = 0; i < kNumHashValues; i++)
counters[i] = 0;
for (i = 0; i < blockSize - 1; i++)
counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
counters[((UInt32)data[i] << 8) | data[0]]++;
Groups = counters + BS_TEMP_SIZE;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
Flags = Groups + blockSize;
{
UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
for (i = 0; i < numWords; i++)
Flags[i] = kAllFlags;
}
#endif
{
UInt32 sum = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 groupSize = counters[i];
if (groupSize > 0)
{
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 t = sum + groupSize - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
#endif
sum += groupSize;
}
counters[i] = sum - groupSize;
}
for (i = 0; i < blockSize - 1; i++)
Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
for (i = 0; i < blockSize - 1; i++)
Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 prev = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 prevGroupSize = counters[i] - prev;
if (prevGroupSize == 0)
continue;
SetGroupSize(Indices + prev, prevGroupSize);
prev = counters[i];
}
}
#endif
}
{
int NumRefBits;
UInt32 NumSortedBytes;
for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
NumRefBits = 32 - NumRefBits;
if (NumRefBits > kNumRefBitsMax)
NumRefBits = kNumRefBitsMax;
for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 finishedGroupSize = 0;
#endif
UInt32 newLimit = 0;
for (i = 0; i < blockSize;)
{
UInt32 groupSize;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
{
i++;
continue;
}
for (groupSize = 1;
(Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
groupSize++);
groupSize++;
#else
groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
{
BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
if ((Indices[i] & 0x40000000) != 0)
{
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
Indices[(size_t)i + 1] &= kIndexMask;
}
Indices[i] &= kIndexMask;
groupSize++;
if (finishedGroup || groupSize == 1)
{
Indices[i - finishedGroupSize] &= kIndexMask;
if (finishedGroupSize > 1)
Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
{
UInt32 newGroupSize = groupSize + finishedGroupSize;
SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize);
finishedGroupSize = newGroupSize;
}
i += groupSize;
continue;
}
finishedGroupSize = 0;
}
#endif
if (NumSortedBytes >= blockSize)
{
UInt32 j;
for (j = 0; j < groupSize; j++)
{
UInt32 t = (i + j);
/* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
Groups[Indices[t]] = t;
}
}
else
if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, 0, blockSize
#endif
) != 0)
newLimit = i + groupSize;
i += groupSize;
}
if (newLimit == 0)
break;
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
for (i = 0; i < blockSize;)
{
UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
if ((Indices[i] & 0x40000000) != 0)
{
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
Indices[(size_t)i + 1] &= kIndexMask;
}
Indices[i] &= kIndexMask;
groupSize++;
i += groupSize;
}
#endif
return Groups[0];
}

26
C/BwtSort.h Normal file
View file

@ -0,0 +1,26 @@
/* BwtSort.h -- BWT block sorting
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __BWT_SORT_H
#define __BWT_SORT_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/* use BLOCK_SORT_EXTERNAL_FLAGS if blockSize can be > 1M */
/* #define BLOCK_SORT_EXTERNAL_FLAGS */
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) ((((blockSize) + 31) >> 5))
#else
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) 0
#endif
#define BLOCK_SORT_BUF_SIZE(blockSize) ((blockSize) * 2 + BLOCK_SORT_EXTERNAL_SIZE(blockSize) + (1 << 16))
UInt32 BlockSort(UInt32 *indices, const Byte *data, UInt32 blockSize);
EXTERN_C_END
#endif

43
C/Compiler.h Normal file
View file

@ -0,0 +1,43 @@
/* Compiler.h
2021-01-05 : Igor Pavlov : Public domain */
#ifndef __7Z_COMPILER_H
#define __7Z_COMPILER_H
#ifdef __clang__
#pragma clang diagnostic ignored "-Wunused-private-field"
#endif
#ifdef _MSC_VER
#ifdef UNDER_CE
#define RPC_NO_WINDOWS_H
/* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
#pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
#endif
#if _MSC_VER >= 1300
#pragma warning(disable : 4996) // This function or variable may be unsafe
#else
#pragma warning(disable : 4511) // copy constructor could not be generated
#pragma warning(disable : 4512) // assignment operator could not be generated
#pragma warning(disable : 4514) // unreferenced inline function has been removed
#pragma warning(disable : 4702) // unreachable code
#pragma warning(disable : 4710) // not inlined
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
#endif
#ifdef __clang__
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
#endif
#endif
#define UNUSED_VAR(x) (void)x;
/* #define UNUSED_VAR(x) x=x; */
#endif

478
C/CpuArch.c Normal file
View file

@ -0,0 +1,478 @@
/* CpuArch.c -- CPU specific code
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
#define USE_ASM
#endif
#if !defined(USE_ASM) && _MSC_VER >= 1500
#include <intrin.h>
#endif
#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
static UInt32 CheckFlag(UInt32 flag)
{
#ifdef _MSC_VER
__asm pushfd;
__asm pop EAX;
__asm mov EDX, EAX;
__asm xor EAX, flag;
__asm push EAX;
__asm popfd;
__asm pushfd;
__asm pop EAX;
__asm xor EAX, EDX;
__asm push EDX;
__asm popfd;
__asm and flag, EAX;
#else
__asm__ __volatile__ (
"pushf\n\t"
"pop %%EAX\n\t"
"movl %%EAX,%%EDX\n\t"
"xorl %0,%%EAX\n\t"
"push %%EAX\n\t"
"popf\n\t"
"pushf\n\t"
"pop %%EAX\n\t"
"xorl %%EDX,%%EAX\n\t"
"push %%EDX\n\t"
"popf\n\t"
"andl %%EAX, %0\n\t":
"=c" (flag) : "c" (flag) :
"%eax", "%edx");
#endif
return flag;
}
#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
#else
#define CHECK_CPUID_IS_SUPPORTED
#endif
#ifndef USE_ASM
#ifdef _MSC_VER
#if _MSC_VER >= 1600
#define MY__cpuidex __cpuidex
#else
/*
__cpuid (function == 4) requires subfunction number in ECX.
MSDN: The __cpuid intrinsic clears the ECX register before calling the cpuid instruction.
__cpuid() in new MSVC clears ECX.
__cpuid() in old MSVC (14.00) doesn't clear ECX
We still can use __cpuid for low (function) values that don't require ECX,
but __cpuid() in old MSVC will be incorrect for some function values: (function == 4).
So here we use the hack for old MSVC to send (subFunction) in ECX register to cpuid instruction,
where ECX value is first parameter for FAST_CALL / NO_INLINE function,
So the caller of MY__cpuidex_HACK() sets ECX as subFunction, and
old MSVC for __cpuid() doesn't change ECX and cpuid instruction gets (subFunction) value.
DON'T remove MY_NO_INLINE and MY_FAST_CALL for MY__cpuidex_HACK() !!!
*/
static
MY_NO_INLINE
void MY_FAST_CALL MY__cpuidex_HACK(UInt32 subFunction, int *CPUInfo, UInt32 function)
{
UNUSED_VAR(subFunction);
__cpuid(CPUInfo, function);
}
#define MY__cpuidex(info, func, func2) MY__cpuidex_HACK(func2, info, func)
#pragma message("======== MY__cpuidex_HACK WAS USED ========")
#endif
#else
#define MY__cpuidex(info, func, func2) __cpuid(info, func)
#pragma message("======== (INCORRECT ?) cpuid WAS USED ========")
#endif
#endif
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
{
#ifdef USE_ASM
#ifdef _MSC_VER
UInt32 a2, b2, c2, d2;
__asm xor EBX, EBX;
__asm xor ECX, ECX;
__asm xor EDX, EDX;
__asm mov EAX, function;
__asm cpuid;
__asm mov a2, EAX;
__asm mov b2, EBX;
__asm mov c2, ECX;
__asm mov d2, EDX;
*a = a2;
*b = b2;
*c = c2;
*d = d2;
#else
__asm__ __volatile__ (
#if defined(MY_CPU_AMD64) && defined(__PIC__)
"mov %%rbx, %%rdi;"
"cpuid;"
"xchg %%rbx, %%rdi;"
: "=a" (*a) ,
"=D" (*b) ,
#elif defined(MY_CPU_X86) && defined(__PIC__)
"mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*a) ,
"=D" (*b) ,
#else
"cpuid"
: "=a" (*a) ,
"=b" (*b) ,
#endif
"=c" (*c) ,
"=d" (*d)
: "0" (function), "c"(0) ) ;
#endif
#else
int CPUInfo[4];
MY__cpuidex(CPUInfo, (int)function, 0);
*a = (UInt32)CPUInfo[0];
*b = (UInt32)CPUInfo[1];
*c = (UInt32)CPUInfo[2];
*d = (UInt32)CPUInfo[3];
#endif
}
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
{
CHECK_CPUID_IS_SUPPORTED
MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
return True;
}
static const UInt32 kVendors[][3] =
{
{ 0x756E6547, 0x49656E69, 0x6C65746E},
{ 0x68747541, 0x69746E65, 0x444D4163},
{ 0x746E6543, 0x48727561, 0x736C7561}
};
int x86cpuid_GetFirm(const Cx86cpuid *p)
{
unsigned i;
for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
{
const UInt32 *v = kVendors[i];
if (v[0] == p->vendor[0] &&
v[1] == p->vendor[1] &&
v[2] == p->vendor[2])
return (int)i;
}
return -1;
}
BoolInt CPU_Is_InOrder()
{
Cx86cpuid p;
int firm;
UInt32 family, model;
if (!x86cpuid_CheckAndRead(&p))
return True;
family = x86cpuid_GetFamily(p.ver);
model = x86cpuid_GetModel(p.ver);
firm = x86cpuid_GetFirm(&p);
switch (firm)
{
case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
/* In-Order Atom CPU */
model == 0x1C /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
|| model == 0x26 /* 45 nm, Z6xx */
|| model == 0x27 /* 32 nm, Z2460 */
|| model == 0x35 /* 32 nm, Z2760 */
|| model == 0x36 /* 32 nm, N2xxx, D2xxx */
)));
case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
}
return True;
}
#if !defined(MY_CPU_AMD64) && defined(_WIN32)
#include <Windows.h>
static BoolInt CPU_Sys_Is_SSE_Supported()
{
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi))
return False;
return (vi.dwMajorVersion >= 5);
}
#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
#else
#define CHECK_SYS_SSE_SUPPORT
#endif
static UInt32 X86_CPUID_ECX_Get_Flags()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return 0;
return p.c;
}
BoolInt CPU_IsSupported_AES()
{
return (X86_CPUID_ECX_Get_Flags() >> 25) & 1;
}
BoolInt CPU_IsSupported_SSSE3()
{
return (X86_CPUID_ECX_Get_Flags() >> 9) & 1;
}
BoolInt CPU_IsSupported_SSE41()
{
return (X86_CPUID_ECX_Get_Flags() >> 19) & 1;
}
BoolInt CPU_IsSupported_SHA()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
return (d[1] >> 29) & 1;
}
}
// #include <stdio.h>
#ifdef _WIN32
#include <Windows.h>
#endif
BoolInt CPU_IsSupported_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False;
#endif
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5); // avx2
}
}
BoolInt CPU_IsSupported_VAES_AVX2()
{
Cx86cpuid p;
CHECK_SYS_SSE_SUPPORT
#ifdef _WIN32
#define MY__PF_XSAVE_ENABLED 17
if (!IsProcessorFeaturePresent(MY__PF_XSAVE_ENABLED))
return False;
#endif
if (!x86cpuid_CheckAndRead(&p))
return False;
if (p.maxFunc < 7)
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(7, &d[0], &d[1], &d[2], &d[3]);
// printf("\ncpuid(7): ebx=%8x ecx=%8x\n", d[1], d[2]);
return 1
& (d[1] >> 5) // avx2
// & (d[1] >> 31) // avx512vl
& (d[2] >> 9); // vaes // VEX-256/EVEX
}
}
BoolInt CPU_IsSupported_PageGB()
{
Cx86cpuid cpuid;
if (!x86cpuid_CheckAndRead(&cpuid))
return False;
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
if (d[0] < 0x80000001)
return False;
}
{
UInt32 d[4] = { 0 };
MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
return (d[3] >> 26) & 1;
}
}
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _WIN32
#include <Windows.h>
BoolInt CPU_IsSupported_CRC32() { return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_CRYPTO() { return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
BoolInt CPU_IsSupported_NEON() { return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) ? 1 : 0; }
#else
#if defined(__APPLE__)
/*
#include <stdio.h>
#include <string.h>
static void Print_sysctlbyname(const char *name)
{
size_t bufSize = 256;
char buf[256];
int res = sysctlbyname(name, &buf, &bufSize, NULL, 0);
{
int i;
printf("\nres = %d : %s : '%s' : bufSize = %d, numeric", res, name, buf, (unsigned)bufSize);
for (i = 0; i < 20; i++)
printf(" %2x", (unsigned)(Byte)buf[i]);
}
}
*/
static BoolInt My_sysctlbyname_Get_BoolInt(const char *name)
{
UInt32 val = 0;
if (My_sysctlbyname_Get_UInt32(name, &val) == 0 && val == 1)
return 1;
return 0;
}
/*
Print_sysctlbyname("hw.pagesize");
Print_sysctlbyname("machdep.cpu.brand_string");
*/
BoolInt CPU_IsSupported_CRC32(void)
{
return My_sysctlbyname_Get_BoolInt("hw.optional.armv8_crc32");
}
BoolInt CPU_IsSupported_NEON(void)
{
return My_sysctlbyname_Get_BoolInt("hw.optional.neon");
}
#ifdef MY_CPU_ARM64
#define APPLE_CRYPTO_SUPPORT_VAL 1
#else
#define APPLE_CRYPTO_SUPPORT_VAL 0
#endif
BoolInt CPU_IsSupported_SHA1(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_SHA2(void) { return APPLE_CRYPTO_SUPPORT_VAL; }
BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; }
#else // __APPLE__
#include <sys/auxv.h>
#define USE_HWCAP
#ifdef USE_HWCAP
#include <asm/hwcap.h>
#define MY_HWCAP_CHECK_FUNC_2(name1, name2) \
BoolInt CPU_IsSupported_ ## name1() { return (getauxval(AT_HWCAP) & (HWCAP_ ## name2)) ? 1 : 0; }
#ifdef MY_CPU_ARM64
#define MY_HWCAP_CHECK_FUNC(name) \
MY_HWCAP_CHECK_FUNC_2(name, name)
MY_HWCAP_CHECK_FUNC_2(NEON, ASIMD)
// MY_HWCAP_CHECK_FUNC (ASIMD)
#elif defined(MY_CPU_ARM)
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return (getauxval(AT_HWCAP2) & (HWCAP2_ ## name)) ? 1 : 0; }
MY_HWCAP_CHECK_FUNC_2(NEON, NEON)
#endif
#else // USE_HWCAP
#define MY_HWCAP_CHECK_FUNC(name) \
BoolInt CPU_IsSupported_ ## name() { return 0; }
MY_HWCAP_CHECK_FUNC(NEON)
#endif // USE_HWCAP
MY_HWCAP_CHECK_FUNC (CRC32)
MY_HWCAP_CHECK_FUNC (SHA1)
MY_HWCAP_CHECK_FUNC (SHA2)
MY_HWCAP_CHECK_FUNC (AES)
#endif // __APPLE__
#endif // _WIN32
#endif // MY_CPU_ARM_OR_ARM64
#ifdef __APPLE__
#include <sys/sysctl.h>
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize)
{
return sysctlbyname(name, buf, bufSize, NULL, 0);
}
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val)
{
size_t bufSize = sizeof(*val);
int res = My_sysctlbyname_Get(name, val, &bufSize);
if (res == 0 && bufSize != sizeof(*val))
return EFAULT;
return res;
}
#endif

442
C/CpuArch.h Normal file
View file

@ -0,0 +1,442 @@
/* CpuArch.h -- CPU specific code
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __CPU_ARCH_H
#define __CPU_ARCH_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
MY_CPU_LE means that CPU is LITTLE ENDIAN.
MY_CPU_BE means that CPU is BIG ENDIAN.
If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
MY_CPU_64BIT means that processor can work with 64-bit registers.
MY_CPU_64BIT can be used to select fast code branch
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
*/
#if defined(_M_X64) \
|| defined(_M_AMD64) \
|| defined(__x86_64__) \
|| defined(__AMD64__) \
|| defined(__amd64__)
#define MY_CPU_AMD64
#ifdef __ILP32__
#define MY_CPU_NAME "x32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "x64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#endif
#if defined(_M_IX86) \
|| defined(__i386__)
#define MY_CPU_X86
#define MY_CPU_NAME "x86"
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
#if defined(_M_ARM64) \
|| defined(__AARCH64EL__) \
|| defined(__AARCH64EB__) \
|| defined(__aarch64__)
#define MY_CPU_ARM64
#define MY_CPU_NAME "arm64"
#define MY_CPU_64BIT
#endif
#if defined(_M_ARM) \
|| defined(_M_ARM_NT) \
|| defined(_M_ARMT) \
|| defined(__arm__) \
|| defined(__thumb__) \
|| defined(__ARMEL__) \
|| defined(__ARMEB__) \
|| defined(__THUMBEL__) \
|| defined(__THUMBEB__)
#define MY_CPU_ARM
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
#define MY_CPU_NAME "armt"
#else
#define MY_CPU_NAME "arm"
#endif
/* #define MY_CPU_32BIT */
#define MY_CPU_SIZEOF_POINTER 4
#endif
#if defined(_M_IA64) \
|| defined(__ia64__)
#define MY_CPU_IA64
#define MY_CPU_NAME "ia64"
#define MY_CPU_64BIT
#endif
#if defined(__mips64) \
|| defined(__mips64__) \
|| (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
#define MY_CPU_NAME "mips64"
#define MY_CPU_64BIT
#elif defined(__mips__)
#define MY_CPU_NAME "mips"
/* #define MY_CPU_32BIT */
#endif
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(__ppc__) \
|| defined(__powerpc__) \
|| defined(__PPC__) \
|| defined(_POWER)
#if defined(__ppc64__) \
|| defined(__powerpc64__) \
|| defined(_LP64) \
|| defined(__64BIT__)
#ifdef __ILP32__
#define MY_CPU_NAME "ppc64-32"
#define MY_CPU_SIZEOF_POINTER 4
#else
#define MY_CPU_NAME "ppc64"
#define MY_CPU_SIZEOF_POINTER 8
#endif
#define MY_CPU_64BIT
#else
#define MY_CPU_NAME "ppc"
#define MY_CPU_SIZEOF_POINTER 4
/* #define MY_CPU_32BIT */
#endif
#endif
#if defined(__sparc64__)
#define MY_CPU_NAME "sparc64"
#define MY_CPU_64BIT
#elif defined(__sparc__)
#define MY_CPU_NAME "sparc"
/* #define MY_CPU_32BIT */
#endif
#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
#define MY_CPU_X86_OR_AMD64
#endif
#if defined(MY_CPU_ARM) || defined(MY_CPU_ARM64)
#define MY_CPU_ARM_OR_ARM64
#endif
#ifdef _WIN32
#ifdef MY_CPU_ARM
#define MY_CPU_ARM_LE
#endif
#ifdef MY_CPU_ARM64
#define MY_CPU_ARM64_LE
#endif
#ifdef _M_IA64
#define MY_CPU_IA64_LE
#endif
#endif
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM_LE) \
|| defined(MY_CPU_ARM64_LE) \
|| defined(MY_CPU_IA64_LE) \
|| defined(__LITTLE_ENDIAN__) \
|| defined(__ARMEL__) \
|| defined(__THUMBEL__) \
|| defined(__AARCH64EL__) \
|| defined(__MIPSEL__) \
|| defined(__MIPSEL) \
|| defined(_MIPSEL) \
|| defined(__BFIN__) \
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
#define MY_CPU_LE
#endif
#if defined(__BIG_ENDIAN__) \
|| defined(__ARMEB__) \
|| defined(__THUMBEB__) \
|| defined(__AARCH64EB__) \
|| defined(__MIPSEB__) \
|| defined(__MIPSEB) \
|| defined(_MIPSEB) \
|| defined(__m68k__) \
|| defined(__s390__) \
|| defined(__s390x__) \
|| defined(__zarch__) \
|| (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
#define MY_CPU_BE
#endif
#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
#error Stop_Compiling_Bad_Endian
#endif
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
#error Stop_Compiling_Bad_32_64_BIT
#endif
#ifdef __SIZEOF_POINTER__
#ifdef MY_CPU_SIZEOF_POINTER
#if MY_CPU_SIZEOF_POINTER != __SIZEOF_POINTER__
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#else
#define MY_CPU_SIZEOF_POINTER __SIZEOF_POINTER__
#endif
#endif
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
#if defined (_LP64)
#error Stop_Compiling_Bad_MY_CPU_PTR_SIZE
#endif
#endif
#ifdef _MSC_VER
#if _MSC_VER >= 1300
#define MY_CPU_pragma_pack_push_1 __pragma(pack(push, 1))
#define MY_CPU_pragma_pop __pragma(pack(pop))
#else
#define MY_CPU_pragma_pack_push_1
#define MY_CPU_pragma_pop
#endif
#else
#ifdef __xlC__
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(1)")
#define MY_CPU_pragma_pop _Pragma("pack()")
#else
#define MY_CPU_pragma_pack_push_1 _Pragma("pack(push, 1)")
#define MY_CPU_pragma_pop _Pragma("pack(pop)")
#endif
#endif
#ifndef MY_CPU_NAME
#ifdef MY_CPU_LE
#define MY_CPU_NAME "LE"
#elif defined(MY_CPU_BE)
#define MY_CPU_NAME "BE"
#else
/*
#define MY_CPU_NAME ""
*/
#endif
#endif
#ifdef MY_CPU_LE
#if defined(MY_CPU_X86_OR_AMD64) \
|| defined(MY_CPU_ARM64)
#define MY_CPU_LE_UNALIGN
#define MY_CPU_LE_UNALIGN_64
#elif defined(__ARM_FEATURE_UNALIGNED)
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
So we can't use unaligned 64-bit operations. */
#define MY_CPU_LE_UNALIGN
#endif
#endif
#ifdef MY_CPU_LE_UNALIGN
#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
#ifdef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
#endif
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
#ifdef MY_CPU_LE_UNALIGN_64
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
#endif
#else
#define GetUi16(p) ( (UInt16) ( \
((const Byte *)(p))[0] | \
((UInt16)((const Byte *)(p))[1] << 8) ))
#define GetUi32(p) ( \
((const Byte *)(p))[0] | \
((UInt32)((const Byte *)(p))[1] << 8) | \
((UInt32)((const Byte *)(p))[2] << 16) | \
((UInt32)((const Byte *)(p))[3] << 24))
#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); }
#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)_vvv_; \
_ppp_[1] = (Byte)(_vvv_ >> 8); \
_ppp_[2] = (Byte)(_vvv_ >> 16); \
_ppp_[3] = (Byte)(_vvv_ >> 24); }
#endif
#ifndef MY_CPU_LE_UNALIGN_64
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
#endif
#ifdef __has_builtin
#define MY__has_builtin(x) __has_builtin(x)
#else
#define MY__has_builtin(x) 0
#endif
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
#include <stdlib.h>
#pragma intrinsic(_byteswap_ushort)
#pragma intrinsic(_byteswap_ulong)
#pragma intrinsic(_byteswap_uint64)
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
#elif defined(MY_CPU_LE_UNALIGN) && ( \
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
#else
#define GetBe32(p) ( \
((UInt32)((const Byte *)(p))[0] << 24) | \
((UInt32)((const Byte *)(p))[1] << 16) | \
((UInt32)((const Byte *)(p))[2] << 8) | \
((const Byte *)(p))[3] )
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
_ppp_[0] = (Byte)(_vvv_ >> 24); \
_ppp_[1] = (Byte)(_vvv_ >> 16); \
_ppp_[2] = (Byte)(_vvv_ >> 8); \
_ppp_[3] = (Byte)_vvv_; }
#endif
#ifndef GetBe16
#define GetBe16(p) ( (UInt16) ( \
((UInt16)((const Byte *)(p))[0] << 8) | \
((const Byte *)(p))[1] ))
#endif
#ifdef MY_CPU_X86_OR_AMD64
typedef struct
{
UInt32 maxFunc;
UInt32 vendor[3];
UInt32 ver;
UInt32 b;
UInt32 c;
UInt32 d;
} Cx86cpuid;
enum
{
CPU_FIRM_INTEL,
CPU_FIRM_AMD,
CPU_FIRM_VIA
};
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
int x86cpuid_GetFirm(const Cx86cpuid *p);
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
#define x86cpuid_GetStepping(ver) (ver & 0xF)
BoolInt CPU_Is_InOrder(void);
BoolInt CPU_IsSupported_AES(void);
BoolInt CPU_IsSupported_AVX2(void);
BoolInt CPU_IsSupported_VAES_AVX2(void);
BoolInt CPU_IsSupported_SSSE3(void);
BoolInt CPU_IsSupported_SSE41(void);
BoolInt CPU_IsSupported_SHA(void);
BoolInt CPU_IsSupported_PageGB(void);
#elif defined(MY_CPU_ARM_OR_ARM64)
BoolInt CPU_IsSupported_CRC32(void);
BoolInt CPU_IsSupported_NEON(void);
#if defined(_WIN32)
BoolInt CPU_IsSupported_CRYPTO(void);
#define CPU_IsSupported_SHA1 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_SHA2 CPU_IsSupported_CRYPTO
#define CPU_IsSupported_AES CPU_IsSupported_CRYPTO
#else
BoolInt CPU_IsSupported_SHA1(void);
BoolInt CPU_IsSupported_SHA2(void);
BoolInt CPU_IsSupported_AES(void);
#endif
#endif
#if defined(__APPLE__)
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
#endif
EXTERN_C_END
#endif

169
C/Delta.c Normal file
View file

@ -0,0 +1,169 @@
/* Delta.c -- Delta converter
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Delta.h"
void Delta_Init(Byte *state)
{
unsigned i;
for (i = 0; i < DELTA_STATE_SIZE; i++)
state[i] = 0;
}
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
Byte temp[DELTA_STATE_SIZE];
if (size == 0)
return;
{
unsigned i = 0;
do
temp[i] = state[i];
while (++i != delta);
}
if (size <= delta)
{
unsigned i = 0, k;
do
{
Byte b = *data;
*data++ = (Byte)(b - temp[i]);
temp[i] = b;
}
while (++i != size);
k = 0;
do
{
if (i == delta)
i = 0;
state[k] = temp[i++];
}
while (++k != delta);
return;
}
{
Byte *p = data + size - delta;
{
unsigned i = 0;
do
state[i] = *p++;
while (++i != delta);
}
{
const Byte *lim = data + delta;
ptrdiff_t dif = -(ptrdiff_t)delta;
if (((ptrdiff_t)size + dif) & 1)
{
--p; *p = (Byte)(*p - p[dif]);
}
while (p != lim)
{
--p; *p = (Byte)(*p - p[dif]);
--p; *p = (Byte)(*p - p[dif]);
}
dif = -dif;
do
{
--p; *p = (Byte)(*p - temp[--dif]);
}
while (dif != 0);
}
}
}
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
{
unsigned i;
const Byte *lim;
if (size == 0)
return;
i = 0;
lim = data + size;
if (size <= delta)
{
do
*data = (Byte)(*data + state[i++]);
while (++data != lim);
for (; delta != i; state++, delta--)
*state = state[i];
data -= i;
}
else
{
/*
#define B(n) b ## n
#define I(n) Byte B(n) = state[n];
#define U(n) { B(n) = (Byte)((B(n)) + *data++); data[-1] = (B(n)); }
#define F(n) if (data != lim) { U(n) }
if (delta == 1)
{
I(0)
if ((lim - data) & 1) { U(0) }
while (data != lim) { U(0) U(0) }
data -= 1;
}
else if (delta == 2)
{
I(0) I(1)
lim -= 1; while (data < lim) { U(0) U(1) }
lim += 1; F(0)
data -= 2;
}
else if (delta == 3)
{
I(0) I(1) I(2)
lim -= 2; while (data < lim) { U(0) U(1) U(2) }
lim += 2; F(0) F(1)
data -= 3;
}
else if (delta == 4)
{
I(0) I(1) I(2) I(3)
lim -= 3; while (data < lim) { U(0) U(1) U(2) U(3) }
lim += 3; F(0) F(1) F(2)
data -= 4;
}
else
*/
{
do
{
*data = (Byte)(*data + state[i++]);
data++;
}
while (i != delta);
{
ptrdiff_t dif = -(ptrdiff_t)delta;
do
*data = (Byte)(*data + data[dif]);
while (++data != lim);
data += dif;
}
}
}
do
*state++ = *data;
while (++data != lim);
}

19
C/Delta.h Normal file
View file

@ -0,0 +1,19 @@
/* Delta.h -- Delta converter
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __DELTA_H
#define __DELTA_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define DELTA_STATE_SIZE 256
void Delta_Init(Byte *state);
void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size);
void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size);
EXTERN_C_END
#endif

110
C/DllSecur.c Normal file
View file

@ -0,0 +1,110 @@
/* DllSecur.c -- DLL loading security
2021-12-25 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
#include <Windows.h>
#include "DllSecur.h"
#ifndef UNDER_CE
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
#define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800
static const char * const g_Dlls =
#ifndef _CONSOLE
"UXTHEME\0"
#endif
"USERENV\0"
"SETUPAPI\0"
"APPHELP\0"
"PROPSYS\0"
"DWMAPI\0"
"CRYPTBASE\0"
"OLEACC\0"
"CLBCATQ\0"
"VERSION\0"
;
#endif
// #define MY_CAST_FUNC (void(*)())
#define MY_CAST_FUNC
void My_SetDefaultDllDirectories()
{
#ifndef UNDER_CE
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
#endif
}
void LoadSecurityDlls()
{
#ifndef UNDER_CE
wchar_t buf[MAX_PATH + 100];
{
// at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
OSVERSIONINFO vi;
vi.dwOSVersionInfoSize = sizeof(vi);
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
{
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
if (setDllDirs)
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
return;
}
}
{
unsigned len = GetSystemDirectoryW(buf, MAX_PATH + 2);
if (len == 0 || len > MAX_PATH)
return;
}
{
const char *dll;
unsigned pos = (unsigned)lstrlenW(buf);
if (buf[pos - 1] != '\\')
buf[pos++] = '\\';
for (dll = g_Dlls; dll[0] != 0;)
{
unsigned k = 0;
for (;;)
{
char c = *dll++;
buf[pos + k] = (Byte)c;
k++;
if (c == 0)
break;
}
lstrcatW(buf, L".dll");
LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
}
}
#endif
}
#endif

20
C/DllSecur.h Normal file
View file

@ -0,0 +1,20 @@
/* DllSecur.h -- DLL loading for security
2018-02-19 : Igor Pavlov : Public domain */
#ifndef __DLL_SECUR_H
#define __DLL_SECUR_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#ifdef _WIN32
void My_SetDefaultDllDirectories(void);
void LoadSecurityDlls(void);
#endif
EXTERN_C_END
#endif

148
C/HuffEnc.c Normal file
View file

@ -0,0 +1,148 @@
/* HuffEnc.c -- functions for Huffman encoding
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "HuffEnc.h"
#include "Sort.h"
#define kMaxLen 16
#define NUM_BITS 10
#define MASK (((unsigned)1 << NUM_BITS) - 1)
#define NUM_COUNTERS 64
#define HUFFMAN_SPEED_OPT
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen)
{
UInt32 num = 0;
/* if (maxLen > 10) maxLen = 10; */
{
UInt32 i;
#ifdef HUFFMAN_SPEED_OPT
UInt32 counters[NUM_COUNTERS];
for (i = 0; i < NUM_COUNTERS; i++)
counters[i] = 0;
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++;
}
for (i = 1; i < NUM_COUNTERS; i++)
{
UInt32 temp = counters[i];
counters[i] = num;
num += temp;
}
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
if (freq == 0)
lens[i] = 0;
else
p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS);
}
counters[0] = 0;
HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]);
#else
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
if (freq == 0)
lens[i] = 0;
else
p[num++] = i | (freq << NUM_BITS);
}
HeapSort(p, num);
#endif
}
if (num < 2)
{
unsigned minCode = 0;
unsigned maxCode = 1;
if (num == 1)
{
maxCode = (unsigned)p[0] & MASK;
if (maxCode == 0)
maxCode++;
}
p[minCode] = 0;
p[maxCode] = 1;
lens[minCode] = lens[maxCode] = 1;
return;
}
{
UInt32 b, e, i;
i = b = e = 0;
do
{
UInt32 n, m, freq;
n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
freq = (p[n] & ~MASK);
p[n] = (p[n] & MASK) | (e << NUM_BITS);
m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
freq += (p[m] & ~MASK);
p[m] = (p[m] & MASK) | (e << NUM_BITS);
p[e] = (p[e] & MASK) | freq;
e++;
}
while (num - e > 1);
{
UInt32 lenCounters[kMaxLen + 1];
for (i = 0; i <= kMaxLen; i++)
lenCounters[i] = 0;
p[--e] &= MASK;
lenCounters[1] = 2;
while (e > 0)
{
UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
p[e] = (p[e] & MASK) | (len << NUM_BITS);
if (len >= maxLen)
for (len = maxLen - 1; lenCounters[len] == 0; len--);
lenCounters[len]--;
lenCounters[(size_t)len + 1] += 2;
}
{
UInt32 len;
i = 0;
for (len = maxLen; len != 0; len--)
{
UInt32 k;
for (k = lenCounters[len]; k != 0; k--)
lens[p[i++] & MASK] = (Byte)len;
}
}
{
UInt32 nextCodes[kMaxLen + 1];
{
UInt32 code = 0;
UInt32 len;
for (len = 1; len <= kMaxLen; len++)
nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1;
}
/* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
{
UInt32 k;
for (k = 0; k < numSymbols; k++)
p[k] = nextCodes[lens[k]]++;
}
}
}
}
}

23
C/HuffEnc.h Normal file
View file

@ -0,0 +1,23 @@
/* HuffEnc.h -- Huffman encoding
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __HUFF_ENC_H
#define __HUFF_ENC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/*
Conditions:
num <= 1024 = 2 ^ NUM_BITS
Sum(freqs) < 4M = 2 ^ (32 - NUM_BITS)
maxLen <= 16 = kMaxLen
Num_Items(p) >= HUFFMAN_TEMP_SIZE(num)
*/
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen);
EXTERN_C_END
#endif

1628
C/LzFind.c Normal file

File diff suppressed because it is too large Load diff

136
C/LzFind.h Normal file
View file

@ -0,0 +1,136 @@
/* LzFind.h -- Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_H
#define __LZ_FIND_H
#include "7zTypes.h"
EXTERN_C_BEGIN
typedef UInt32 CLzRef;
typedef struct _CMatchFinder
{
Byte *buffer;
UInt32 pos;
UInt32 posLimit;
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
UInt32 lenLimit;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
Byte streamEndWasReached;
Byte btMode;
Byte bigHash;
Byte directInput;
UInt32 matchMaxLen;
CLzRef *hash;
CLzRef *son;
UInt32 hashMask;
UInt32 cutValue;
Byte *bufferBase;
ISeqInStream *stream;
UInt32 blockSize;
UInt32 keepSizeBefore;
UInt32 keepSizeAfter;
UInt32 numHashBytes;
size_t directInputRem;
UInt32 historySize;
UInt32 fixedHashSize;
UInt32 hashSizeSum;
SRes result;
UInt32 crc[256];
size_t numRefs;
UInt64 expectedDataSize;
} CMatchFinder;
#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((const Byte *)(p)->buffer)
#define Inline_MatchFinder_GetNumAvailableBytes(p) ((UInt32)((p)->streamPos - (p)->pos))
/*
#define Inline_MatchFinder_IsFinishedOK(p) \
((p)->streamEndWasReached \
&& (p)->streamPos == (p)->pos \
&& (!(p)->directInput || (p)->directInputRem == 0))
*/
int MatchFinder_NeedMove(CMatchFinder *p);
/* Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p); */
void MatchFinder_MoveBlock(CMatchFinder *p);
void MatchFinder_ReadIfRequired(CMatchFinder *p);
void MatchFinder_Construct(CMatchFinder *p);
/* Conditions:
historySize <= 3 GB
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
*/
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
ISzAllocPtr alloc);
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
/*
#define Inline_MatchFinder_InitPos(p, val) \
(p)->pos = (val); \
(p)->streamPos = (val);
*/
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
(p)->pos -= (subValue); \
(p)->streamPos -= (subValue);
UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
UInt32 *distances, UInt32 maxLen);
/*
Conditions:
Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
*/
typedef void (*Mf_Init_Func)(void *object);
typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
typedef void (*Mf_Skip_Func)(void *object, UInt32);
typedef struct _IMatchFinder
{
Mf_Init_Func Init;
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
Mf_GetMatches_Func GetMatches;
Mf_Skip_Func Skip;
} IMatchFinder2;
void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
void MatchFinder_Init_LowHash(CMatchFinder *p);
void MatchFinder_Init_HighHash(CMatchFinder *p);
void MatchFinder_Init_4(CMatchFinder *p);
void MatchFinder_Init(CMatchFinder *p);
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
void LzFindPrepare(void);
EXTERN_C_END
#endif

1400
C/LzFindMt.c Normal file

File diff suppressed because it is too large Load diff

109
C/LzFindMt.h Normal file
View file

@ -0,0 +1,109 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2021-07-12 : Igor Pavlov : Public domain */
#ifndef __LZ_FIND_MT_H
#define __LZ_FIND_MT_H
#include "LzFind.h"
#include "Threads.h"
EXTERN_C_BEGIN
typedef struct _CMtSync
{
UInt32 numProcessedBlocks;
CThread thread;
UInt64 affinity;
BoolInt wasCreated;
BoolInt needStart;
BoolInt csWasInitialized;
BoolInt csWasEntered;
BoolInt exit;
BoolInt stopWriting;
CAutoResetEvent canStart;
CAutoResetEvent wasStopped;
CSemaphore freeSemaphore;
CSemaphore filledSemaphore;
CCriticalSection cs;
// UInt32 numBlocks_Sent;
} CMtSync;
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
#define kMtCacheLineDummy 128
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
typedef struct _CMatchFinderMt
{
/* LZ */
const Byte *pointerToCurPos;
UInt32 *btBuf;
const UInt32 *btBufPos;
const UInt32 *btBufPosLimit;
UInt32 lzPos;
UInt32 btNumAvailBytes;
UInt32 *hash;
UInt32 fixedHashSize;
// UInt32 hash4Mask;
UInt32 historySize;
const UInt32 *crc;
Mf_Mix_Matches MixMatchesFunc;
UInt32 failure_LZ_BT; // failure in BT transfered to LZ
// UInt32 failure_LZ_LZ; // failure in LZ tables
UInt32 failureBuf[1];
// UInt32 crc[256];
/* LZ + BT */
CMtSync btSync;
Byte btDummy[kMtCacheLineDummy];
/* BT */
UInt32 *hashBuf;
UInt32 hashBufPos;
UInt32 hashBufPosLimit;
UInt32 hashNumAvail;
UInt32 failure_BT;
CLzRef *son;
UInt32 matchMaxLen;
UInt32 numHashBytes;
UInt32 pos;
const Byte *buffer;
UInt32 cyclicBufferPos;
UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
UInt32 cutValue;
/* BT + Hash */
CMtSync hashSync;
/* Byte hashDummy[kMtCacheLineDummy]; */
/* Hash */
Mf_GetHeads GetHeadsFunc;
CMatchFinder *MatchFinder;
// CMatchFinder MatchFinder;
} CMatchFinderMt;
// only for Mt part
void MatchFinderMt_Construct(CMatchFinderMt *p);
void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc);
SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc);
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable);
/* call MatchFinderMt_InitMt() before IMatchFinder::Init() */
SRes MatchFinderMt_InitMt(CMatchFinderMt *p);
void MatchFinderMt_ReleaseStream(CMatchFinderMt *p);
EXTERN_C_END
#endif

578
C/LzFindOpt.c Normal file
View file

@ -0,0 +1,578 @@
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
2021-07-13 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "CpuArch.h"
#include "LzFind.h"
// #include "LzFindMt.h"
// #define LOG_ITERS
// #define LOG_THREAD
#ifdef LOG_THREAD
#include <stdio.h>
#define PRF(x) x
#else
// #define PRF(x)
#endif
#ifdef LOG_ITERS
#include <stdio.h>
UInt64 g_NumIters_Tree;
UInt64 g_NumIters_Loop;
UInt64 g_NumIters_Bytes;
#define LOG_ITER(x) x
#else
#define LOG_ITER(x)
#endif
// ---------- BT THREAD ----------
#define USE_SON_PREFETCH
#define USE_LONG_MATCH_OPT
#define kEmptyHashValue 0
// #define CYC_TO_POS_OFFSET 0
// #define CYC_TO_POS_OFFSET 1 // for debug
/*
MY_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
{
do
{
UInt32 delta;
if (hash == size)
break;
delta = *hash++;
if (delta == 0 || delta > (UInt32)pos)
return NULL;
lenLimit++;
if (delta == (UInt32)pos)
{
CLzRef *ptr1 = son + ((size_t)pos << 1) - CYC_TO_POS_OFFSET * 2;
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
const Byte *len0 = cur, *len1 = cur;
UInt32 cutValue = _cutValue;
const Byte *maxLen = cur + _maxLen;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)(((ptrdiff_t)pos - CYC_TO_POS_OFFSET) + diff) << 1);
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
hash++;
pos++;
cur++;
lenLimit++;
{
CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
#else
const UInt32 p0 = ptr[0 + (diff * 2)];
const UInt32 p1 = ptr[1 + (diff * 2)];
ptr[0] = p0;
ptr[1] = p1;
// ptr[0] = ptr[0 + (diff * 2)];
// ptr[1] = ptr[1 + (diff * 2)];
#endif
}
// PrintSon(son + 2, pos - 1);
// printf("\npos = %x delta = %x\n", pos, delta);
len++;
*d++ = 2;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
if (delta >= curMatch)
return NULL;
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
if (delta >= curMatch)
return NULL;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= pos)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/
/* define cbs if you use 2 functions.
GetMatchesSpecN_1() : (pos < _cyclicBufferSize)
GetMatchesSpecN_2() : (pos >= _cyclicBufferSize)
do not define cbs if you use 1 function:
GetMatchesSpecN_2()
*/
// #define cbs _cyclicBufferSize
/*
we use size_t for (pos) and (_cyclicBufferPos_ instead of UInt32
to eliminate "movsx" BUG in old MSVC x64 compiler.
*/
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes);
MY_NO_INLINE
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
lenLimit++;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
UInt32 *_distances = ++d;
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 cutValue = _cutValue;
const Byte *len0 = cur, *len1 = cur;
const Byte *maxLen = cur + _maxLen;
// if (cutValue == 0) { *ptr0 = *ptr1 = kEmptyHashValue; } else
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// SPEC code
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - (ptrdiff_t)delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
const Byte *len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (len[diff] == len[0])
{
if (++len != lenLimit && len[diff] == len[0])
while (++len != lenLimit)
{
LOG_ITER(g_NumIters_Bytes++);
if (len[diff] != len[0])
break;
}
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)(len - cur);
*d++ = delta - 1;
if (len == lenLimit)
{
const UInt32 pair1 = pair[1];
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
*ptr0 = pair1;
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
{
for (;;)
{
*d++ = 2;
*d++ = (UInt32)(lenLimit - cur);
*d++ = delta - 1;
cur++;
lenLimit++;
// SPEC
_cyclicBufferPos++;
{
// SPEC code
CLzRef *dest = son + ((size_t)(_cyclicBufferPos) << 1);
const CLzRef *src = dest + ((diff
+ (ptrdiff_t)(UInt32)((_cyclicBufferPos < delta) ? cbs : 0)) << 1);
// CLzRef *ptr = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
pos++;
hash++;
if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
break;
} // for() end for long matches
}
#endif
break; // break from TREE iterations
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta; // (UInt32)(pos + diff);
if (len[diff] < len[0])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
if (delta >= curMatch)
return NULL;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
if (delta >= curMatch)
return NULL;
}
delta = (UInt32)pos - delta;
if (--cutValue == 0 || delta >= cbs)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
/*
typedef UInt32 uint32plus; // size_t
UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
UInt32 *posRes)
{
do // while (hash != size)
{
UInt32 delta;
#ifndef cbs
UInt32 cbs;
#endif
if (hash == size)
break;
delta = *hash++;
if (delta == 0)
return NULL;
#ifndef cbs
cbs = _cyclicBufferSize;
if ((UInt32)pos < cbs)
{
if (delta > (UInt32)pos)
return NULL;
cbs = (UInt32)pos;
}
#endif
if (delta >= cbs)
{
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
*d++ = 0;
ptr1[0] = kEmptyHashValue;
ptr1[1] = kEmptyHashValue;
}
else
{
CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
UInt32 *_distances = ++d;
uint32plus len0 = 0, len1 = 0;
UInt32 cutValue = _cutValue;
uint32plus maxLen = _maxLen;
// lenLimit++; // const Byte *lenLimit = cur + _lenLimit;
for (LOG_ITER(g_NumIters_Tree++);;)
{
LOG_ITER(g_NumIters_Loop++);
{
// const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
CLzRef *pair = son + ((size_t)((ptrdiff_t)_cyclicBufferPos - delta
+ (ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)
) << 1);
const Byte *pb = cur - delta;
uint32plus len = (len0 < len1 ? len0 : len1);
#ifdef USE_SON_PREFETCH
const UInt32 pair0 = *pair;
#endif
if (pb[len] == cur[len])
{
if (++len != lenLimit && pb[len] == cur[len])
while (++len != lenLimit)
if (pb[len] != cur[len])
break;
if (maxLen < len)
{
maxLen = len;
*d++ = (UInt32)len;
*d++ = delta - 1;
if (len == lenLimit)
{
{
const UInt32 pair1 = pair[1];
*ptr0 = pair1;
*ptr1 =
#ifdef USE_SON_PREFETCH
pair0;
#else
pair[0];
#endif
}
_distances[-1] = (UInt32)(d - _distances);
#ifdef USE_LONG_MATCH_OPT
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
{
const ptrdiff_t diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
for (;;)
{
*d++ = 2;
*d++ = (UInt32)lenLimit;
*d++ = delta - 1;
_cyclicBufferPos++;
{
CLzRef *dest = son + ((size_t)_cyclicBufferPos << 1);
const CLzRef *src = dest + ((diff +
(ptrdiff_t)(UInt32)(_cyclicBufferPos < delta ? cbs : 0)) << 1);
#if 0
*(UInt64 *)(void *)dest = *((const UInt64 *)(const void *)src);
#else
const UInt32 p0 = src[0];
const UInt32 p1 = src[1];
dest[0] = p0;
dest[1] = p1;
#endif
}
hash++;
pos++;
cur++;
pb++;
if (hash == size || *hash != delta || pb[lenLimit] != cur[lenLimit] || d >= limit)
break;
}
}
#endif
break;
}
}
}
{
const UInt32 curMatch = (UInt32)pos - delta;
if (pb[len] < cur[len])
{
delta = pair[1];
*ptr1 = curMatch;
ptr1 = pair + 1;
len1 = len;
}
else
{
delta = *pair;
*ptr0 = curMatch;
ptr0 = pair;
len0 = len;
}
{
if (delta >= curMatch)
return NULL;
delta = (UInt32)pos - delta;
if (delta >= cbs
// delta >= _cyclicBufferSize || delta >= pos
|| --cutValue == 0)
{
*ptr0 = *ptr1 = kEmptyHashValue;
_distances[-1] = (UInt32)(d - _distances);
break;
}
}
}
}
} // for (tree iterations)
}
pos++;
_cyclicBufferPos++;
cur++;
}
while (d < limit);
*posRes = (UInt32)pos;
return d;
}
*/

34
C/LzHash.h Normal file
View file

@ -0,0 +1,34 @@
/* LzHash.h -- HASH functions for LZ algorithms
2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZ_HASH_H
#define __LZ_HASH_H
/*
(kHash2Size >= (1 << 8)) : Required
(kHash3Size >= (1 << 16)) : Required
*/
#define kHash2Size (1 << 10)
#define kHash3Size (1 << 16)
// #define kHash4Size (1 << 20)
#define kFix3HashSize (kHash2Size)
#define kFix4HashSize (kHash2Size + kHash3Size)
// #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
/*
We use up to 3 crc values for hash:
crc0
crc1 << Shift_1
crc2 << Shift_2
(Shift_1 = 5) and (Shift_2 = 10) is good tradeoff.
Small values for Shift are not good for collision rate.
Big value for Shift_2 increases the minimum size
of hash table, that will be slow for small files.
*/
#define kLzHash_CrcShift_1 5
#define kLzHash_CrcShift_2 10
#endif

489
C/Lzma2Dec.c Normal file
View file

@ -0,0 +1,489 @@
/* Lzma2Dec.c -- LZMA2 Decoder
2021-02-09 : Igor Pavlov : Public domain */
/* #define SHOW_DEBUG_INFO */
#include "Precomp.h"
#ifdef SHOW_DEBUG_INFO
#include <stdio.h>
#endif
#include <string.h>
#include "Lzma2Dec.h"
/*
00000000 - End of data
00000001 U U - Uncompressed, reset dic, need reset state and set new prop
00000010 U U - Uncompressed, no reset
100uuuuu U U P P - LZMA, no reset
101uuuuu U U P P - LZMA, reset state
110uuuuu U U P P S - LZMA, reset state + set new prop
111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic
u, U - Unpack Size
P - Pack Size
S - Props
*/
#define LZMA2_CONTROL_COPY_RESET_DIC 1
#define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0)
#define LZMA2_LCLP_MAX 4
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
#ifdef SHOW_DEBUG_INFO
#define PRF(x) x
#else
#define PRF(x)
#endif
typedef enum
{
LZMA2_STATE_CONTROL,
LZMA2_STATE_UNPACK0,
LZMA2_STATE_UNPACK1,
LZMA2_STATE_PACK0,
LZMA2_STATE_PACK1,
LZMA2_STATE_PROP,
LZMA2_STATE_DATA,
LZMA2_STATE_DATA_CONT,
LZMA2_STATE_FINISHED,
LZMA2_STATE_ERROR
} ELzma2State;
static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
{
UInt32 dicSize;
if (prop > 40)
return SZ_ERROR_UNSUPPORTED;
dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop);
props[0] = (Byte)LZMA2_LCLP_MAX;
props[1] = (Byte)(dicSize);
props[2] = (Byte)(dicSize >> 8);
props[3] = (Byte)(dicSize >> 16);
props[4] = (Byte)(dicSize >> 24);
return SZ_OK;
}
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
{
Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props));
return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
}
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
{
Byte props[LZMA_PROPS_SIZE];
RINOK(Lzma2Dec_GetOldProps(prop, props));
return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
}
void Lzma2Dec_Init(CLzma2Dec *p)
{
p->state = LZMA2_STATE_CONTROL;
p->needInitLevel = 0xE0;
p->isExtraMode = False;
p->unpackSize = 0;
// p->decoder.dicPos = 0; // we can use it instead of full init
LzmaDec_Init(&p->decoder);
}
// ELzma2State
static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
{
switch (p->state)
{
case LZMA2_STATE_CONTROL:
p->isExtraMode = False;
p->control = b;
PRF(printf("\n %8X", (unsigned)p->decoder.dicPos));
PRF(printf(" %02X", (unsigned)b));
if (b == 0)
return LZMA2_STATE_FINISHED;
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (b == LZMA2_CONTROL_COPY_RESET_DIC)
p->needInitLevel = 0xC0;
else if (b > 2 || p->needInitLevel == 0xE0)
return LZMA2_STATE_ERROR;
}
else
{
if (b < p->needInitLevel)
return LZMA2_STATE_ERROR;
p->needInitLevel = 0;
p->unpackSize = (UInt32)(b & 0x1F) << 16;
}
return LZMA2_STATE_UNPACK0;
case LZMA2_STATE_UNPACK0:
p->unpackSize |= (UInt32)b << 8;
return LZMA2_STATE_UNPACK1;
case LZMA2_STATE_UNPACK1:
p->unpackSize |= (UInt32)b;
p->unpackSize++;
PRF(printf(" %7u", (unsigned)p->unpackSize));
return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0;
case LZMA2_STATE_PACK0:
p->packSize = (UInt32)b << 8;
return LZMA2_STATE_PACK1;
case LZMA2_STATE_PACK1:
p->packSize |= (UInt32)b;
p->packSize++;
// if (p->packSize < 5) return LZMA2_STATE_ERROR;
PRF(printf(" %5u", (unsigned)p->packSize));
return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA;
case LZMA2_STATE_PROP:
{
unsigned lc, lp;
if (b >= (9 * 5 * 5))
return LZMA2_STATE_ERROR;
lc = b % 9;
b /= 9;
p->decoder.prop.pb = (Byte)(b / 5);
lp = b % 5;
if (lc + lp > LZMA2_LCLP_MAX)
return LZMA2_STATE_ERROR;
p->decoder.prop.lc = (Byte)lc;
p->decoder.prop.lp = (Byte)lp;
return LZMA2_STATE_DATA;
}
}
return LZMA2_STATE_ERROR;
}
static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
{
memcpy(p->dic + p->dicPos, src, size);
p->dicPos += size;
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size)
p->checkDicSize = p->prop.dicSize;
p->processedPos += (UInt32)size;
}
void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState);
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT inSize = *srcLen;
*srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
while (p->state != LZMA2_STATE_ERROR)
{
SizeT dicPos;
if (p->state == LZMA2_STATE_FINISHED)
{
*status = LZMA_STATUS_FINISHED_WITH_MARK;
return SZ_OK;
}
dicPos = p->decoder.dicPos;
if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY)
{
*status = LZMA_STATUS_NOT_FINISHED;
return SZ_OK;
}
if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
{
if (*srcLen == inSize)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
(*srcLen)++;
p->state = Lzma2Dec_UpdateState(p, *src++);
if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED)
break;
continue;
}
{
SizeT inCur = inSize - *srcLen;
SizeT outCur = dicLimit - dicPos;
ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY;
if (outCur >= p->unpackSize)
{
outCur = (SizeT)p->unpackSize;
curFinishMode = LZMA_FINISH_END;
}
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (inCur == 0)
{
*status = LZMA_STATUS_NEEDS_MORE_INPUT;
return SZ_OK;
}
if (p->state == LZMA2_STATE_DATA)
{
BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC);
LzmaDec_InitDicAndState(&p->decoder, initDic, False);
}
if (inCur > outCur)
inCur = outCur;
if (inCur == 0)
break;
LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur);
src += inCur;
*srcLen += inCur;
p->unpackSize -= (UInt32)inCur;
p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
}
else
{
SRes res;
if (p->state == LZMA2_STATE_DATA)
{
BoolInt initDic = (p->control >= 0xE0);
BoolInt initState = (p->control >= 0xA0);
LzmaDec_InitDicAndState(&p->decoder, initDic, initState);
p->state = LZMA2_STATE_DATA_CONT;
}
if (inCur > p->packSize)
inCur = (SizeT)p->packSize;
res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status);
src += inCur;
*srcLen += inCur;
p->packSize -= (UInt32)inCur;
outCur = p->decoder.dicPos - dicPos;
p->unpackSize -= (UInt32)outCur;
if (res != 0)
break;
if (*status == LZMA_STATUS_NEEDS_MORE_INPUT)
{
if (p->packSize == 0)
break;
return SZ_OK;
}
if (inCur == 0 && outCur == 0)
{
if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
|| p->unpackSize != 0
|| p->packSize != 0)
break;
p->state = LZMA2_STATE_CONTROL;
}
*status = LZMA_STATUS_NOT_SPECIFIED;
}
}
}
*status = LZMA_STATUS_NOT_SPECIFIED;
p->state = LZMA2_STATE_ERROR;
return SZ_ERROR_DATA;
}
ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
SizeT outSize,
const Byte *src, SizeT *srcLen,
int checkFinishBlock)
{
SizeT inSize = *srcLen;
*srcLen = 0;
while (p->state != LZMA2_STATE_ERROR)
{
if (p->state == LZMA2_STATE_FINISHED)
return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK;
if (outSize == 0 && !checkFinishBlock)
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT)
{
if (*srcLen == inSize)
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
(*srcLen)++;
p->state = Lzma2Dec_UpdateState(p, *src++);
if (p->state == LZMA2_STATE_UNPACK0)
{
// if (p->decoder.dicPos != 0)
if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0)
return LZMA2_PARSE_STATUS_NEW_BLOCK;
// if (outSize == 0) return LZMA_STATUS_NOT_FINISHED;
}
// The following code can be commented.
// It's not big problem, if we read additional input bytes.
// It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state.
if (outSize == 0 && p->state != LZMA2_STATE_FINISHED)
{
// checkFinishBlock is true. So we expect that block must be finished,
// We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here
// break;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
}
if (p->state == LZMA2_STATE_DATA)
return LZMA2_PARSE_STATUS_NEW_CHUNK;
continue;
}
if (outSize == 0)
return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED;
{
SizeT inCur = inSize - *srcLen;
if (LZMA2_IS_UNCOMPRESSED_STATE(p))
{
if (inCur == 0)
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
if (inCur > p->unpackSize)
inCur = p->unpackSize;
if (inCur > outSize)
inCur = outSize;
p->decoder.dicPos += inCur;
src += inCur;
*srcLen += inCur;
outSize -= inCur;
p->unpackSize -= (UInt32)inCur;
p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT;
}
else
{
p->isExtraMode = True;
if (inCur == 0)
{
if (p->packSize != 0)
return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT;
}
else if (p->state == LZMA2_STATE_DATA)
{
p->state = LZMA2_STATE_DATA_CONT;
if (*src != 0)
{
// first byte of lzma chunk must be Zero
*srcLen += 1;
p->packSize--;
break;
}
}
if (inCur > p->packSize)
inCur = (SizeT)p->packSize;
src += inCur;
*srcLen += inCur;
p->packSize -= (UInt32)inCur;
if (p->packSize == 0)
{
SizeT rem = outSize;
if (rem > p->unpackSize)
rem = p->unpackSize;
p->decoder.dicPos += rem;
p->unpackSize -= (UInt32)rem;
outSize -= rem;
if (p->unpackSize == 0)
p->state = LZMA2_STATE_CONTROL;
}
}
}
}
p->state = LZMA2_STATE_ERROR;
return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED;
}
SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
{
SizeT outSize = *destLen, inSize = *srcLen;
*srcLen = *destLen = 0;
for (;;)
{
SizeT inCur = inSize, outCur, dicPos;
ELzmaFinishMode curFinishMode;
SRes res;
if (p->decoder.dicPos == p->decoder.dicBufSize)
p->decoder.dicPos = 0;
dicPos = p->decoder.dicPos;
curFinishMode = LZMA_FINISH_ANY;
outCur = p->decoder.dicBufSize - dicPos;
if (outCur >= outSize)
{
outCur = outSize;
curFinishMode = finishMode;
}
res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status);
src += inCur;
inSize -= inCur;
*srcLen += inCur;
outCur = p->decoder.dicPos - dicPos;
memcpy(dest, p->decoder.dic + dicPos, outCur);
dest += outCur;
outSize -= outCur;
*destLen += outCur;
if (res != 0)
return res;
if (outCur == 0 || outSize == 0)
return SZ_OK;
}
}
SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc)
{
CLzma2Dec p;
SRes res;
SizeT outSize = *destLen, inSize = *srcLen;
*destLen = *srcLen = 0;
*status = LZMA_STATUS_NOT_SPECIFIED;
Lzma2Dec_Construct(&p);
RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
p.decoder.dic = dest;
p.decoder.dicBufSize = outSize;
Lzma2Dec_Init(&p);
*srcLen = inSize;
res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
*destLen = p.decoder.dicPos;
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
res = SZ_ERROR_INPUT_EOF;
Lzma2Dec_FreeProbs(&p, alloc);
return res;
}

120
C/Lzma2Dec.h Normal file
View file

@ -0,0 +1,120 @@
/* Lzma2Dec.h -- LZMA2 Decoder
2018-02-19 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_H
#define __LZMA2_DEC_H
#include "LzmaDec.h"
EXTERN_C_BEGIN
/* ---------- State Interface ---------- */
typedef struct
{
unsigned state;
Byte control;
Byte needInitLevel;
Byte isExtraMode;
Byte _pad_;
UInt32 packSize;
UInt32 unpackSize;
CLzmaDec decoder;
} CLzma2Dec;
#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
void Lzma2Dec_Init(CLzma2Dec *p);
/*
finishMode:
It has meaning only if the decoding reaches output limit (*destLen or dicLimit).
LZMA_FINISH_ANY - use smallest number of input bytes
LZMA_FINISH_END - read EndOfStream marker after decoding
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_NEEDS_MORE_INPUT
SZ_ERROR_DATA - Data error
*/
SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- LZMA2 block and chunk parsing ---------- */
/*
Lzma2Dec_Parse() parses compressed data stream up to next independent block or next chunk data.
It can return LZMA_STATUS_* code or LZMA2_PARSE_STATUS_* code:
- LZMA2_PARSE_STATUS_NEW_BLOCK - there is new block, and 1 additional byte (control byte of next block header) was read from input.
- LZMA2_PARSE_STATUS_NEW_CHUNK - there is new chunk, and only lzma2 header of new chunk was read.
CLzma2Dec::unpackSize contains unpack size of that chunk
*/
typedef enum
{
/*
LZMA_STATUS_NOT_SPECIFIED // data error
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED //
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK // unused
*/
LZMA2_PARSE_STATUS_NEW_BLOCK = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK + 1,
LZMA2_PARSE_STATUS_NEW_CHUNK
} ELzma2ParseStatus;
ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p,
SizeT outSize, // output size
const Byte *src, SizeT *srcLen,
int checkFinishBlock // set (checkFinishBlock = 1), if it must read full input data, if decoder.dicPos reaches blockMax position.
);
/*
LZMA2 parser doesn't decode LZMA chunks, so we must read
full input LZMA chunk to decode some part of LZMA chunk.
Lzma2Dec_GetUnpackExtra() returns the value that shows
max possible number of output bytes that can be output by decoder
at current input positon.
*/
#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0);
/* ---------- One Call Interface ---------- */
/*
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - use smallest number of input bytes
LZMA_FINISH_END - read EndOfStream marker after decoding
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
*/
SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc);
EXTERN_C_END
#endif

1090
C/Lzma2DecMt.c Normal file

File diff suppressed because it is too large Load diff

79
C/Lzma2DecMt.h Normal file
View file

@ -0,0 +1,79 @@
/* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread
2018-02-17 : Igor Pavlov : Public domain */
#ifndef __LZMA2_DEC_MT_H
#define __LZMA2_DEC_MT_H
#include "7zTypes.h"
EXTERN_C_BEGIN
typedef struct
{
size_t inBufSize_ST;
size_t outStep_ST;
#ifndef _7ZIP_ST
unsigned numThreads;
size_t inBufSize_MT;
size_t outBlockMax;
size_t inBlockMax;
#endif
} CLzma2DecMtProps;
/* init to single-thread mode */
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p);
/* ---------- CLzma2DecMtHandle Interface ---------- */
/* Lzma2DecMt_ * functions can return the following exit codes:
SRes:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
// SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzma2DecMtHandle;
CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
void Lzma2DecMt_Destroy(CLzma2DecMtHandle p);
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
Byte prop,
const CLzma2DecMtProps *props,
ISeqOutStream *outStream,
const UInt64 *outDataSize, // NULL means undefined
int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished
// Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
// const Byte *inData, size_t inDataSize,
// out variables:
UInt64 *inProcessed,
int *isMT, /* out: (*isMT == 0), if single thread decoding was used */
// UInt64 *outProcessed,
ICompressProgress *progress);
/* ---------- Read from CLzma2DecMtHandle Interface ---------- */
SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
Byte prop,
const CLzma2DecMtProps *props,
const UInt64 *outDataSize, int finishMode,
ISeqInStream *inStream);
SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
Byte *data, size_t *outSize,
UInt64 *inStreamProcessed);
EXTERN_C_END
#endif

803
C/Lzma2Enc.c Normal file
View file

@ -0,0 +1,803 @@
/* Lzma2Enc.c -- LZMA2 Encoder
2021-02-09 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
/* #define _7ZIP_ST */
#include "Lzma2Enc.h"
#ifndef _7ZIP_ST
#include "MtCoder.h"
#else
#define MTCODER__THREADS_MAX 1
#endif
#define LZMA2_CONTROL_LZMA (1 << 7)
#define LZMA2_CONTROL_COPY_NO_RESET 2
#define LZMA2_CONTROL_COPY_RESET_DIC 1
#define LZMA2_CONTROL_EOF 0
#define LZMA2_LCLP_MAX 4
#define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11))
#define LZMA2_PACK_SIZE_MAX (1 << 16)
#define LZMA2_COPY_CHUNK_SIZE LZMA2_PACK_SIZE_MAX
#define LZMA2_UNPACK_SIZE_MAX (1 << 21)
#define LZMA2_KEEP_WINDOW_SIZE LZMA2_UNPACK_SIZE_MAX
#define LZMA2_CHUNK_SIZE_COMPRESSED_MAX ((1 << 16) + 16)
#define PRF(x) /* x */
/* ---------- CLimitedSeqInStream ---------- */
typedef struct
{
ISeqInStream vt;
ISeqInStream *realStream;
UInt64 limit;
UInt64 processed;
int finished;
} CLimitedSeqInStream;
static void LimitedSeqInStream_Init(CLimitedSeqInStream *p)
{
p->limit = (UInt64)(Int64)-1;
p->processed = 0;
p->finished = 0;
}
static SRes LimitedSeqInStream_Read(const ISeqInStream *pp, void *data, size_t *size)
{
CLimitedSeqInStream *p = CONTAINER_FROM_VTBL(pp, CLimitedSeqInStream, vt);
size_t size2 = *size;
SRes res = SZ_OK;
if (p->limit != (UInt64)(Int64)-1)
{
UInt64 rem = p->limit - p->processed;
if (size2 > rem)
size2 = (size_t)rem;
}
if (size2 != 0)
{
res = ISeqInStream_Read(p->realStream, data, &size2);
p->finished = (size2 == 0 ? 1 : 0);
p->processed += size2;
}
*size = size2;
return res;
}
/* ---------- CLzma2EncInt ---------- */
typedef struct
{
CLzmaEncHandle enc;
Byte propsAreSet;
Byte propsByte;
Byte needInitState;
Byte needInitProp;
UInt64 srcPos;
} CLzma2EncInt;
static SRes Lzma2EncInt_InitStream(CLzma2EncInt *p, const CLzma2EncProps *props)
{
if (!p->propsAreSet)
{
SizeT propsSize = LZMA_PROPS_SIZE;
Byte propsEncoded[LZMA_PROPS_SIZE];
RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps));
RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize));
p->propsByte = propsEncoded[0];
p->propsAreSet = True;
}
return SZ_OK;
}
static void Lzma2EncInt_InitBlock(CLzma2EncInt *p)
{
p->srcPos = 0;
p->needInitState = True;
p->needInitProp = True;
}
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
void LzmaEnc_Finish(CLzmaEncHandle pp);
void LzmaEnc_SaveState(CLzmaEncHandle pp);
void LzmaEnc_RestoreState(CLzmaEncHandle pp);
/*
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp);
*/
static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
size_t *packSizeRes, ISeqOutStream *outStream)
{
size_t packSizeLimit = *packSizeRes;
size_t packSize = packSizeLimit;
UInt32 unpackSize = LZMA2_UNPACK_SIZE_MAX;
unsigned lzHeaderSize = 5 + (p->needInitProp ? 1 : 0);
BoolInt useCopyBlock;
SRes res;
*packSizeRes = 0;
if (packSize < lzHeaderSize)
return SZ_ERROR_OUTPUT_EOF;
packSize -= lzHeaderSize;
LzmaEnc_SaveState(p->enc);
res = LzmaEnc_CodeOneMemBlock(p->enc, p->needInitState,
outBuf + lzHeaderSize, &packSize, LZMA2_PACK_SIZE_MAX, &unpackSize);
PRF(printf("\npackSize = %7d unpackSize = %7d ", packSize, unpackSize));
if (unpackSize == 0)
return res;
if (res == SZ_OK)
useCopyBlock = (packSize + 2 >= unpackSize || packSize > (1 << 16));
else
{
if (res != SZ_ERROR_OUTPUT_EOF)
return res;
res = SZ_OK;
useCopyBlock = True;
}
if (useCopyBlock)
{
size_t destPos = 0;
PRF(printf("################# COPY "));
while (unpackSize > 0)
{
UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE;
if (packSizeLimit - destPos < u + 3)
return SZ_ERROR_OUTPUT_EOF;
outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET);
outBuf[destPos++] = (Byte)((u - 1) >> 8);
outBuf[destPos++] = (Byte)(u - 1);
memcpy(outBuf + destPos, LzmaEnc_GetCurBuf(p->enc) - unpackSize, u);
unpackSize -= u;
destPos += u;
p->srcPos += u;
if (outStream)
{
*packSizeRes += destPos;
if (ISeqOutStream_Write(outStream, outBuf, destPos) != destPos)
return SZ_ERROR_WRITE;
destPos = 0;
}
else
*packSizeRes = destPos;
/* needInitState = True; */
}
LzmaEnc_RestoreState(p->enc);
return SZ_OK;
}
{
size_t destPos = 0;
UInt32 u = unpackSize - 1;
UInt32 pm = (UInt32)(packSize - 1);
unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0);
PRF(printf(" "));
outBuf[destPos++] = (Byte)(LZMA2_CONTROL_LZMA | (mode << 5) | ((u >> 16) & 0x1F));
outBuf[destPos++] = (Byte)(u >> 8);
outBuf[destPos++] = (Byte)u;
outBuf[destPos++] = (Byte)(pm >> 8);
outBuf[destPos++] = (Byte)pm;
if (p->needInitProp)
outBuf[destPos++] = p->propsByte;
p->needInitProp = False;
p->needInitState = False;
destPos += packSize;
p->srcPos += unpackSize;
if (outStream)
if (ISeqOutStream_Write(outStream, outBuf, destPos) != destPos)
return SZ_ERROR_WRITE;
*packSizeRes = destPos;
return SZ_OK;
}
}
/* ---------- Lzma2 Props ---------- */
void Lzma2EncProps_Init(CLzma2EncProps *p)
{
LzmaEncProps_Init(&p->lzmaProps);
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO;
p->numBlockThreads_Reduced = -1;
p->numBlockThreads_Max = -1;
p->numTotalThreads = -1;
}
void Lzma2EncProps_Normalize(CLzma2EncProps *p)
{
UInt64 fileSize;
int t1, t1n, t2, t2r, t3;
{
CLzmaEncProps lzmaProps = p->lzmaProps;
LzmaEncProps_Normalize(&lzmaProps);
t1n = lzmaProps.numThreads;
}
t1 = p->lzmaProps.numThreads;
t2 = p->numBlockThreads_Max;
t3 = p->numTotalThreads;
if (t2 > MTCODER__THREADS_MAX)
t2 = MTCODER__THREADS_MAX;
if (t3 <= 0)
{
if (t2 <= 0)
t2 = 1;
t3 = t1n * t2;
}
else if (t2 <= 0)
{
t2 = t3 / t1n;
if (t2 == 0)
{
t1 = 1;
t2 = t3;
}
if (t2 > MTCODER__THREADS_MAX)
t2 = MTCODER__THREADS_MAX;
}
else if (t1 <= 0)
{
t1 = t3 / t2;
if (t1 == 0)
t1 = 1;
}
else
t3 = t1n * t2;
p->lzmaProps.numThreads = t1;
t2r = t2;
fileSize = p->lzmaProps.reduceSize;
if ( p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
&& p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO
&& (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1))
p->lzmaProps.reduceSize = p->blockSize;
LzmaEncProps_Normalize(&p->lzmaProps);
p->lzmaProps.reduceSize = fileSize;
t1 = p->lzmaProps.numThreads;
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID)
{
t2r = t2 = 1;
t3 = t1;
}
else if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO && t2 <= 1)
{
/* if there is no block multi-threading, we use SOLID block */
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID;
}
else
{
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO)
{
const UInt32 kMinSize = (UInt32)1 << 20;
const UInt32 kMaxSize = (UInt32)1 << 28;
const UInt32 dictSize = p->lzmaProps.dictSize;
UInt64 blockSize = (UInt64)dictSize << 2;
if (blockSize < kMinSize) blockSize = kMinSize;
if (blockSize > kMaxSize) blockSize = kMaxSize;
if (blockSize < dictSize) blockSize = dictSize;
blockSize += (kMinSize - 1);
blockSize &= ~(UInt64)(kMinSize - 1);
p->blockSize = blockSize;
}
if (t2 > 1 && fileSize != (UInt64)(Int64)-1)
{
UInt64 numBlocks = fileSize / p->blockSize;
if (numBlocks * p->blockSize != fileSize)
numBlocks++;
if (numBlocks < (unsigned)t2)
{
t2r = (int)numBlocks;
if (t2r == 0)
t2r = 1;
t3 = t1 * t2r;
}
}
}
p->numBlockThreads_Max = t2;
p->numBlockThreads_Reduced = t2r;
p->numTotalThreads = t3;
}
static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize)
{
return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK;
}
/* ---------- Lzma2 ---------- */
typedef struct
{
Byte propEncoded;
CLzma2EncProps props;
UInt64 expectedDataSize;
Byte *tempBufLzma;
ISzAllocPtr alloc;
ISzAllocPtr allocBig;
CLzma2EncInt coders[MTCODER__THREADS_MAX];
#ifndef _7ZIP_ST
ISeqOutStream *outStream;
Byte *outBuf;
size_t outBuf_Rem; /* remainder in outBuf */
size_t outBufSize; /* size of allocated outBufs[i] */
size_t outBufsDataSizes[MTCODER__BLOCKS_MAX];
BoolInt mtCoder_WasConstructed;
CMtCoder mtCoder;
Byte *outBufs[MTCODER__BLOCKS_MAX];
#endif
} CLzma2Enc;
CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig)
{
CLzma2Enc *p = (CLzma2Enc *)ISzAlloc_Alloc(alloc, sizeof(CLzma2Enc));
if (!p)
return NULL;
Lzma2EncProps_Init(&p->props);
Lzma2EncProps_Normalize(&p->props);
p->expectedDataSize = (UInt64)(Int64)-1;
p->tempBufLzma = NULL;
p->alloc = alloc;
p->allocBig = allocBig;
{
unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++)
p->coders[i].enc = NULL;
}
#ifndef _7ZIP_ST
p->mtCoder_WasConstructed = False;
{
unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
p->outBufs[i] = NULL;
p->outBufSize = 0;
}
#endif
return p;
}
#ifndef _7ZIP_ST
static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p)
{
unsigned i;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
if (p->outBufs[i])
{
ISzAlloc_Free(p->alloc, p->outBufs[i]);
p->outBufs[i] = NULL;
}
p->outBufSize = 0;
}
#endif
void Lzma2Enc_Destroy(CLzma2EncHandle pp)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++)
{
CLzma2EncInt *t = &p->coders[i];
if (t->enc)
{
LzmaEnc_Destroy(t->enc, p->alloc, p->allocBig);
t->enc = NULL;
}
}
#ifndef _7ZIP_ST
if (p->mtCoder_WasConstructed)
{
MtCoder_Destruct(&p->mtCoder);
p->mtCoder_WasConstructed = False;
}
Lzma2Enc_FreeOutBufs(p);
#endif
ISzAlloc_Free(p->alloc, p->tempBufLzma);
p->tempBufLzma = NULL;
ISzAlloc_Free(p->alloc, pp);
}
SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
CLzmaEncProps lzmaProps = props->lzmaProps;
LzmaEncProps_Normalize(&lzmaProps);
if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX)
return SZ_ERROR_PARAM;
p->props = *props;
Lzma2EncProps_Normalize(&p->props);
return SZ_OK;
}
void Lzma2Enc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
p->expectedDataSize = expectedDataSiize;
}
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
unsigned i;
UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps);
for (i = 0; i < 40; i++)
if (dicSize <= LZMA2_DIC_SIZE_FROM_PROP(i))
break;
return (Byte)i;
}
static SRes Lzma2Enc_EncodeMt1(
CLzma2Enc *me,
CLzma2EncInt *p,
ISeqOutStream *outStream,
Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
const Byte *inData, size_t inDataSize,
int finished,
ICompressProgress *progress)
{
UInt64 unpackTotal = 0;
UInt64 packTotal = 0;
size_t outLim = 0;
CLimitedSeqInStream limitedInStream;
if (outBuf)
{
outLim = *outBufSize;
*outBufSize = 0;
}
if (!p->enc)
{
p->propsAreSet = False;
p->enc = LzmaEnc_Create(me->alloc);
if (!p->enc)
return SZ_ERROR_MEM;
}
limitedInStream.realStream = inStream;
if (inStream)
{
limitedInStream.vt.Read = LimitedSeqInStream_Read;
}
if (!outBuf)
{
// outStream version works only in one thread. So we use CLzma2Enc::tempBufLzma
if (!me->tempBufLzma)
{
me->tempBufLzma = (Byte *)ISzAlloc_Alloc(me->alloc, LZMA2_CHUNK_SIZE_COMPRESSED_MAX);
if (!me->tempBufLzma)
return SZ_ERROR_MEM;
}
}
RINOK(Lzma2EncInt_InitStream(p, &me->props));
for (;;)
{
SRes res = SZ_OK;
size_t inSizeCur = 0;
Lzma2EncInt_InitBlock(p);
LimitedSeqInStream_Init(&limitedInStream);
limitedInStream.limit = me->props.blockSize;
if (inStream)
{
UInt64 expected = (UInt64)(Int64)-1;
// inStream version works only in one thread. So we use CLzma2Enc::expectedDataSize
if (me->expectedDataSize != (UInt64)(Int64)-1
&& me->expectedDataSize >= unpackTotal)
expected = me->expectedDataSize - unpackTotal;
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
&& expected > me->props.blockSize)
expected = (size_t)me->props.blockSize;
LzmaEnc_SetDataSize(p->enc, expected);
RINOK(LzmaEnc_PrepareForLzma2(p->enc,
&limitedInStream.vt,
LZMA2_KEEP_WINDOW_SIZE,
me->alloc,
me->allocBig));
}
else
{
inSizeCur = inDataSize - (size_t)unpackTotal;
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
&& inSizeCur > me->props.blockSize)
inSizeCur = (size_t)me->props.blockSize;
// LzmaEnc_SetDataSize(p->enc, inSizeCur);
RINOK(LzmaEnc_MemPrepare(p->enc,
inData + (size_t)unpackTotal, inSizeCur,
LZMA2_KEEP_WINDOW_SIZE,
me->alloc,
me->allocBig));
}
for (;;)
{
size_t packSize = LZMA2_CHUNK_SIZE_COMPRESSED_MAX;
if (outBuf)
packSize = outLim - (size_t)packTotal;
res = Lzma2EncInt_EncodeSubblock(p,
outBuf ? outBuf + (size_t)packTotal : me->tempBufLzma, &packSize,
outBuf ? NULL : outStream);
if (res != SZ_OK)
break;
packTotal += packSize;
if (outBuf)
*outBufSize = (size_t)packTotal;
res = Progress(progress, unpackTotal + p->srcPos, packTotal);
if (res != SZ_OK)
break;
/*
if (LzmaEnc_GetNumAvailableBytes(p->enc) == 0)
break;
*/
if (packSize == 0)
break;
}
LzmaEnc_Finish(p->enc);
unpackTotal += p->srcPos;
RINOK(res);
if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur))
return SZ_ERROR_FAIL;
if (inStream ? limitedInStream.finished : (unpackTotal == inDataSize))
{
if (finished)
{
if (outBuf)
{
const size_t destPos = *outBufSize;
if (destPos >= outLim)
return SZ_ERROR_OUTPUT_EOF;
outBuf[destPos] = LZMA2_CONTROL_EOF; // 0
*outBufSize = destPos + 1;
}
else
{
const Byte b = LZMA2_CONTROL_EOF; // 0;
if (ISeqOutStream_Write(outStream, &b, 1) != 1)
return SZ_ERROR_WRITE;
}
}
return SZ_OK;
}
}
}
#ifndef _7ZIP_ST
static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex,
const Byte *src, size_t srcSize, int finished)
{
CLzma2Enc *me = (CLzma2Enc *)pp;
size_t destSize = me->outBufSize;
SRes res;
CMtProgressThunk progressThunk;
Byte *dest = me->outBufs[outBufIndex];
me->outBufsDataSizes[outBufIndex] = 0;
if (!dest)
{
dest = (Byte *)ISzAlloc_Alloc(me->alloc, me->outBufSize);
if (!dest)
return SZ_ERROR_MEM;
me->outBufs[outBufIndex] = dest;
}
MtProgressThunk_CreateVTable(&progressThunk);
progressThunk.mtProgress = &me->mtCoder.mtProgress;
progressThunk.inSize = 0;
progressThunk.outSize = 0;
res = Lzma2Enc_EncodeMt1(me,
&me->coders[coderIndex],
NULL, dest, &destSize,
NULL, src, srcSize,
finished,
&progressThunk.vt);
me->outBufsDataSizes[outBufIndex] = destSize;
return res;
}
static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex)
{
CLzma2Enc *me = (CLzma2Enc *)pp;
size_t size = me->outBufsDataSizes[outBufIndex];
const Byte *data = me->outBufs[outBufIndex];
if (me->outStream)
return ISeqOutStream_Write(me->outStream, data, size) == size ? SZ_OK : SZ_ERROR_WRITE;
if (size > me->outBuf_Rem)
return SZ_ERROR_OUTPUT_EOF;
memcpy(me->outBuf, data, size);
me->outBuf_Rem -= size;
me->outBuf += size;
return SZ_OK;
}
#endif
SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
ISeqOutStream *outStream,
Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
const Byte *inData, size_t inDataSize,
ICompressProgress *progress)
{
CLzma2Enc *p = (CLzma2Enc *)pp;
if (inStream && inData)
return SZ_ERROR_PARAM;
if (outStream && outBuf)
return SZ_ERROR_PARAM;
{
unsigned i;
for (i = 0; i < MTCODER__THREADS_MAX; i++)
p->coders[i].propsAreSet = False;
}
#ifndef _7ZIP_ST
if (p->props.numBlockThreads_Reduced > 1)
{
IMtCoderCallback2 vt;
if (!p->mtCoder_WasConstructed)
{
p->mtCoder_WasConstructed = True;
MtCoder_Construct(&p->mtCoder);
}
vt.Code = Lzma2Enc_MtCallback_Code;
vt.Write = Lzma2Enc_MtCallback_Write;
p->outStream = outStream;
p->outBuf = NULL;
p->outBuf_Rem = 0;
if (!outStream)
{
p->outBuf = outBuf;
p->outBuf_Rem = *outBufSize;
*outBufSize = 0;
}
p->mtCoder.allocBig = p->allocBig;
p->mtCoder.progress = progress;
p->mtCoder.inStream = inStream;
p->mtCoder.inData = inData;
p->mtCoder.inDataSize = inDataSize;
p->mtCoder.mtCallback = &vt;
p->mtCoder.mtCallbackObject = p;
p->mtCoder.blockSize = (size_t)p->props.blockSize;
if (p->mtCoder.blockSize != p->props.blockSize)
return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */
{
size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16;
if (destBlockSize < p->mtCoder.blockSize)
return SZ_ERROR_PARAM;
if (p->outBufSize != destBlockSize)
Lzma2Enc_FreeOutBufs(p);
p->outBufSize = destBlockSize;
}
p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
p->mtCoder.expectedDataSize = p->expectedDataSize;
{
SRes res = MtCoder_Code(&p->mtCoder);
if (!outStream)
*outBufSize = (size_t)(p->outBuf - outBuf);
return res;
}
}
#endif
return Lzma2Enc_EncodeMt1(p,
&p->coders[0],
outStream, outBuf, outBufSize,
inStream, inData, inDataSize,
True, /* finished */
progress);
}

55
C/Lzma2Enc.h Normal file
View file

@ -0,0 +1,55 @@
/* Lzma2Enc.h -- LZMA2 Encoder
2017-07-27 : Igor Pavlov : Public domain */
#ifndef __LZMA2_ENC_H
#define __LZMA2_ENC_H
#include "LzmaEnc.h"
EXTERN_C_BEGIN
#define LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO 0
#define LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID ((UInt64)(Int64)-1)
typedef struct
{
CLzmaEncProps lzmaProps;
UInt64 blockSize;
int numBlockThreads_Reduced;
int numBlockThreads_Max;
int numTotalThreads;
} CLzma2EncProps;
void Lzma2EncProps_Init(CLzma2EncProps *p);
void Lzma2EncProps_Normalize(CLzma2EncProps *p);
/* ---------- CLzmaEnc2Handle Interface ---------- */
/* Lzma2Enc_* functions can return the following exit codes:
SRes:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzma2EncHandle;
CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig);
void Lzma2Enc_Destroy(CLzma2EncHandle p);
SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props);
void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize);
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p);
SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
ISeqOutStream *outStream,
Byte *outBuf, size_t *outBufSize,
ISeqInStream *inStream,
const Byte *inData, size_t inDataSize,
ICompressProgress *progress);
EXTERN_C_END
#endif

111
C/Lzma86.h Normal file
View file

@ -0,0 +1,111 @@
/* Lzma86.h -- LZMA + x86 (BCJ) Filter
2013-01-18 : Igor Pavlov : Public domain */
#ifndef __LZMA86_H
#define __LZMA86_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define LZMA86_SIZE_OFFSET (1 + 5)
#define LZMA86_HEADER_SIZE (LZMA86_SIZE_OFFSET + 8)
/*
It's an example for LZMA + x86 Filter use.
You can use .lzma86 extension, if you write that stream to file.
.lzma86 header adds one additional byte to standard .lzma header.
.lzma86 header (14 bytes):
Offset Size Description
0 1 = 0 - no filter, pure LZMA
= 1 - x86 filter + LZMA
1 1 lc, lp and pb in encoded form
2 4 dictSize (little endian)
6 8 uncompressed size (little endian)
Lzma86_Encode
-------------
level - compression level: 0 <= level <= 9, the default value for "level" is 5.
dictSize - The dictionary size in bytes. The maximum value is
128 MB = (1 << 27) bytes for 32-bit version
1 GB = (1 << 30) bytes for 64-bit version
The default value is 16 MB = (1 << 24) bytes, for level = 5.
It's recommended to use the dictionary that is larger than 4 KB and
that can be calculated as (1 << N) or (3 << N) sizes.
For better compression ratio dictSize must be >= inSize.
filterMode:
SZ_FILTER_NO - no Filter
SZ_FILTER_YES - x86 Filter
SZ_FILTER_AUTO - it tries both alternatives to select best.
Encoder will use 2 or 3 passes:
2 passes when FILTER_NO provides better compression.
3 passes when FILTER_YES provides better compression.
Lzma86Encode allocates Data with MyAlloc functions.
RAM Requirements for compressing:
RamSize = dictionarySize * 11.5 + 6MB + FilterBlockSize
filterMode FilterBlockSize
SZ_FILTER_NO 0
SZ_FILTER_YES inSize
SZ_FILTER_AUTO inSize
Return code:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater
SZ_ERROR_OUTPUT_EOF - output buffer overflow
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
enum ESzFilterMode
{
SZ_FILTER_NO,
SZ_FILTER_YES,
SZ_FILTER_AUTO
};
SRes Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode);
/*
Lzma86_GetUnpackSize:
In:
src - input data
srcLen - input data size
Out:
unpackSize - size of uncompressed stream
Return code:
SZ_OK - OK
SZ_ERROR_INPUT_EOF - Error in headers
*/
SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize);
/*
Lzma86_Decode:
In:
dest - output data
destLen - output data size
src - input data
srcLen - input data size
Out:
destLen - processed output size
srcLen - processed input size
Return code:
SZ_OK - OK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - unsupported file
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer
*/
SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen);
EXTERN_C_END
#endif

54
C/Lzma86Dec.c Normal file
View file

@ -0,0 +1,54 @@
/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder
2016-05-16 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Lzma86.h"
#include "Alloc.h"
#include "Bra.h"
#include "LzmaDec.h"
SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize)
{
unsigned i;
if (srcLen < LZMA86_HEADER_SIZE)
return SZ_ERROR_INPUT_EOF;
*unpackSize = 0;
for (i = 0; i < sizeof(UInt64); i++)
*unpackSize += ((UInt64)src[LZMA86_SIZE_OFFSET + i]) << (8 * i);
return SZ_OK;
}
SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen)
{
SRes res;
int useFilter;
SizeT inSizePure;
ELzmaStatus status;
if (*srcLen < LZMA86_HEADER_SIZE)
return SZ_ERROR_INPUT_EOF;
useFilter = src[0];
if (useFilter > 1)
{
*destLen = 0;
return SZ_ERROR_UNSUPPORTED;
}
inSizePure = *srcLen - LZMA86_HEADER_SIZE;
res = LzmaDecode(dest, destLen, src + LZMA86_HEADER_SIZE, &inSizePure,
src + 1, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc);
*srcLen = inSizePure + LZMA86_HEADER_SIZE;
if (res != SZ_OK)
return res;
if (useFilter == 1)
{
UInt32 x86State;
x86_Convert_Init(x86State);
x86_Convert(dest, *destLen, 0, &x86State, 0);
}
return SZ_OK;
}

104
C/Lzma86Enc.c Normal file
View file

@ -0,0 +1,104 @@
/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder
2018-07-04 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "Lzma86.h"
#include "Alloc.h"
#include "Bra.h"
#include "LzmaEnc.h"
int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
int level, UInt32 dictSize, int filterMode)
{
size_t outSize2 = *destLen;
Byte *filteredStream;
BoolInt useFilter;
int mainResult = SZ_ERROR_OUTPUT_EOF;
CLzmaEncProps props;
LzmaEncProps_Init(&props);
props.level = level;
props.dictSize = dictSize;
*destLen = 0;
if (outSize2 < LZMA86_HEADER_SIZE)
return SZ_ERROR_OUTPUT_EOF;
{
int i;
UInt64 t = srcLen;
for (i = 0; i < 8; i++, t >>= 8)
dest[LZMA86_SIZE_OFFSET + i] = (Byte)t;
}
filteredStream = 0;
useFilter = (filterMode != SZ_FILTER_NO);
if (useFilter)
{
if (srcLen != 0)
{
filteredStream = (Byte *)MyAlloc(srcLen);
if (filteredStream == 0)
return SZ_ERROR_MEM;
memcpy(filteredStream, src, srcLen);
}
{
UInt32 x86State;
x86_Convert_Init(x86State);
x86_Convert(filteredStream, srcLen, 0, &x86State, 1);
}
}
{
size_t minSize = 0;
BoolInt bestIsFiltered = False;
/* passes for SZ_FILTER_AUTO:
0 - BCJ + LZMA
1 - LZMA
2 - BCJ + LZMA agaian, if pass 0 (BCJ + LZMA) is better.
*/
int numPasses = (filterMode == SZ_FILTER_AUTO) ? 3 : 1;
int i;
for (i = 0; i < numPasses; i++)
{
size_t outSizeProcessed = outSize2 - LZMA86_HEADER_SIZE;
size_t outPropsSize = 5;
SRes curRes;
BoolInt curModeIsFiltered = (numPasses > 1 && i == numPasses - 1);
if (curModeIsFiltered && !bestIsFiltered)
break;
if (useFilter && i == 0)
curModeIsFiltered = True;
curRes = LzmaEncode(dest + LZMA86_HEADER_SIZE, &outSizeProcessed,
curModeIsFiltered ? filteredStream : src, srcLen,
&props, dest + 1, &outPropsSize, 0,
NULL, &g_Alloc, &g_Alloc);
if (curRes != SZ_ERROR_OUTPUT_EOF)
{
if (curRes != SZ_OK)
{
mainResult = curRes;
break;
}
if (outSizeProcessed <= minSize || mainResult != SZ_OK)
{
minSize = outSizeProcessed;
bestIsFiltered = curModeIsFiltered;
mainResult = SZ_OK;
}
}
}
dest[0] = (Byte)(bestIsFiltered ? 1 : 0);
*destLen = LZMA86_HEADER_SIZE + minSize;
}
if (useFilter)
MyFree(filteredStream);
return mainResult;
}

1363
C/LzmaDec.c Normal file

File diff suppressed because it is too large Load diff

236
C/LzmaDec.h Normal file
View file

@ -0,0 +1,236 @@
/* LzmaDec.h -- LZMA Decoder
2020-03-19 : Igor Pavlov : Public domain */
#ifndef __LZMA_DEC_H
#define __LZMA_DEC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
/* #define _LZMA_PROB32 */
/* _LZMA_PROB32 can increase the speed on some CPUs,
but memory usage for CLzmaDec::probs will be doubled in that case */
typedef
#ifdef _LZMA_PROB32
UInt32
#else
UInt16
#endif
CLzmaProb;
/* ---------- LZMA Properties ---------- */
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaProps
{
Byte lc;
Byte lp;
Byte pb;
Byte _pad_;
UInt32 dicSize;
} CLzmaProps;
/* LzmaProps_Decode - decodes properties
Returns:
SZ_OK
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
/* ---------- LZMA Decoder state ---------- */
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
#define LZMA_REQUIRED_INPUT_MAX 20
typedef struct
{
/* Don't change this structure. ASM code can use it. */
CLzmaProps prop;
CLzmaProb *probs;
CLzmaProb *probs_1664;
Byte *dic;
SizeT dicBufSize;
SizeT dicPos;
const Byte *buf;
UInt32 range;
UInt32 code;
UInt32 processedPos;
UInt32 checkDicSize;
UInt32 reps[4];
UInt32 state;
UInt32 remainLen;
UInt32 numProbs;
unsigned tempBufSize;
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
} CLzmaDec;
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
void LzmaDec_Init(CLzmaDec *p);
/* There are two types of LZMA streams:
- Stream with end mark. That end mark adds about 6 bytes to compressed size.
- Stream without end mark. You must know exact uncompressed size to decompress such stream. */
typedef enum
{
LZMA_FINISH_ANY, /* finish at any point */
LZMA_FINISH_END /* block must be finished at the end */
} ELzmaFinishMode;
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
You must use LZMA_FINISH_END, when you know that current output buffer
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
and output value of destLen will be less than output buffer size limit.
You can check status result also.
You can use multiple checks to test data integrity after full decompression:
1) Check Result and "status" variable.
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
You must use correct finish mode in that case. */
typedef enum
{
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */
} ELzmaStatus;
/* ELzmaStatus is used only as output value for function call */
/* ---------- Interfaces ---------- */
/* There are 3 levels of interfaces:
1) Dictionary Interface
2) Buffer Interface
3) One Call Interface
You can select any of these interfaces, but don't mix functions from different
groups for same object. */
/* There are two variants to allocate state for Dictionary Interface:
1) LzmaDec_Allocate / LzmaDec_Free
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
You can use variant 2, if you set dictionary buffer manually.
For Buffer Interface you must always use variant 1.
LzmaDec_Allocate* can return:
SZ_OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
*/
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
/* ---------- Dictionary Interface ---------- */
/* You can use it, if you want to eliminate the overhead for data copying from
dictionary to some other external buffer.
You must work with CLzmaDec variables directly in this interface.
STEPS:
LzmaDec_Construct()
LzmaDec_Allocate()
for (each new stream)
{
LzmaDec_Init()
while (it needs more decompression)
{
LzmaDec_DecodeToDic()
use data from CLzmaDec::dic and update CLzmaDec::dicPos
}
}
LzmaDec_Free()
*/
/* LzmaDec_DecodeToDic
The decoding to internal dictionary buffer (CLzmaDec::dic).
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
finishMode:
It has meaning only if the decoding reaches output limit (dicLimit).
LZMA_FINISH_ANY - Decode just dicLimit bytes.
LZMA_FINISH_END - Stream must be finished after dicLimit.
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_NEEDS_MORE_INPUT
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- Buffer Interface ---------- */
/* It's zlib-like interface.
See LzmaDec_DecodeToDic description for information about STEPS and return results,
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
to work with CLzmaDec variables manually.
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
*/
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
/* ---------- One Call Interface ---------- */
/* LzmaDecode
finishMode:
It has meaning only if the decoding reaches output limit (*destLen).
LZMA_FINISH_ANY - Decode just destLen bytes.
LZMA_FINISH_END - Stream must be finished after (*destLen).
Returns:
SZ_OK
status:
LZMA_STATUS_FINISHED_WITH_MARK
LZMA_STATUS_NOT_FINISHED
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
SZ_ERROR_FAIL - Some unexpected error: internal error of code, memory corruption or hardware failure
*/
SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
ELzmaStatus *status, ISzAllocPtr alloc);
EXTERN_C_END
#endif

3165
C/LzmaEnc.c Normal file

File diff suppressed because it is too large Load diff

78
C/LzmaEnc.h Normal file
View file

@ -0,0 +1,78 @@
/* LzmaEnc.h -- LZMA Encoder
2019-10-30 : Igor Pavlov : Public domain */
#ifndef __LZMA_ENC_H
#define __LZMA_ENC_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define LZMA_PROPS_SIZE 5
typedef struct _CLzmaEncProps
{
int level; /* 0 <= level <= 9 */
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
(1 << 12) <= dictSize <= (3 << 29) for 64-bit version
default = (1 << 24) */
int lc; /* 0 <= lc <= 8, default = 3 */
int lp; /* 0 <= lp <= 4, default = 0 */
int pb; /* 0 <= pb <= 4, default = 2 */
int algo; /* 0 - fast, 1 - normal, default = 1 */
int fb; /* 5 <= fb <= 273, default = 32 */
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
int numHashBytes; /* 2, 3 or 4, default = 4 */
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
int numThreads; /* 1 or 2, default = 2 */
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
UInt64 affinity;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);
void LzmaEncProps_Normalize(CLzmaEncProps *p);
UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
/* ---------- CLzmaEncHandle Interface ---------- */
/* LzmaEnc* functions can return the following exit codes:
SRes:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater in props
SZ_ERROR_WRITE - ISeqOutStream write callback error
SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
SZ_ERROR_PROGRESS - some break from progress callback
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
*/
typedef void * CLzmaEncHandle;
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
/* ---------- One Call Interface ---------- */
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
EXTERN_C_END
#endif

40
C/LzmaLib.c Normal file
View file

@ -0,0 +1,40 @@
/* LzmaLib.c -- LZMA library wrapper
2015-06-13 : Igor Pavlov : Public domain */
#include "Alloc.h"
#include "LzmaDec.h"
#include "LzmaEnc.h"
#include "LzmaLib.h"
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize,
int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
int lc, /* 0 <= lc <= 8, default = 3 */
int lp, /* 0 <= lp <= 4, default = 0 */
int pb, /* 0 <= pb <= 4, default = 2 */
int fb, /* 5 <= fb <= 273, default = 32 */
int numThreads /* 1 or 2, default = 2 */
)
{
CLzmaEncProps props;
LzmaEncProps_Init(&props);
props.level = level;
props.dictSize = dictSize;
props.lc = lc;
props.lp = lp;
props.pb = pb;
props.fb = fb;
props.numThreads = numThreads;
return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0,
NULL, &g_Alloc, &g_Alloc);
}
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
const unsigned char *props, size_t propsSize)
{
ELzmaStatus status;
return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc);
}

138
C/LzmaLib.h Normal file
View file

@ -0,0 +1,138 @@
/* LzmaLib.h -- LZMA library interface
2021-04-03 : Igor Pavlov : Public domain */
#ifndef __LZMA_LIB_H
#define __LZMA_LIB_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define MY_STDAPI int MY_STD_CALL
#define LZMA_PROPS_SIZE 5
/*
RAM requirements for LZMA:
for compression: (dictSize * 11.5 + 6 MB) + state_size
for decompression: dictSize + state_size
state_size = (4 + (1.5 << (lc + lp))) KB
by default (lc=3, lp=0), state_size = 16 KB.
LZMA properties (5 bytes) format
Offset Size Description
0 1 lc, lp and pb in encoded form.
1 4 dictSize (little endian).
*/
/*
LzmaCompress
------------
outPropsSize -
In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
LZMA Encoder will use defult values for any parameter, if it is
-1 for any from: level, loc, lp, pb, fb, numThreads
0 for dictSize
level - compression level: 0 <= level <= 9;
level dictSize algo fb
0: 64 KB 0 32
1: 256 KB 0 32
2: 1 MB 0 32
3: 4 MB 0 32
4: 16 MB 0 32
5: 16 MB 1 32
6: 32 MB 1 32
7: 32 MB 1 64
8: 64 MB 1 64
9: 64 MB 1 64
The default value for "level" is 5.
algo = 0 means fast method
algo = 1 means normal method
dictSize - The dictionary size in bytes. The maximum value is
128 MB = (1 << 27) bytes for 32-bit version
1 GB = (1 << 30) bytes for 64-bit version
The default value is 16 MB = (1 << 24) bytes.
It's recommended to use the dictionary that is larger than 4 KB and
that can be calculated as (1 << N) or (3 << N) sizes.
lc - The number of literal context bits (high bits of previous literal).
It can be in the range from 0 to 8. The default value is 3.
Sometimes lc=4 gives the gain for big files.
lp - The number of literal pos bits (low bits of current position for literals).
It can be in the range from 0 to 4. The default value is 0.
The lp switch is intended for periodical data when the period is equal to 2^lp.
For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
better to set lc=0, if you change lp switch.
pb - The number of pos bits (low bits of current position).
It can be in the range from 0 to 4. The default value is 2.
The pb switch is intended for periodical data when the period is equal 2^pb.
fb - Word size (the number of fast bytes).
It can be in the range from 5 to 273. The default value is 32.
Usually, a big number gives a little bit better compression ratio and
slower compression process.
numThreads - The number of thereads. 1 or 2. The default value is 2.
Fast mode (algo = 0) can use only 1 thread.
In:
dest - output data buffer
destLen - output data buffer size
src - input data
srcLen - input data size
Out:
destLen - processed output size
Returns:
SZ_OK - OK
SZ_ERROR_MEM - Memory allocation error
SZ_ERROR_PARAM - Incorrect paramater
SZ_ERROR_OUTPUT_EOF - output buffer overflow
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
*/
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
int level, /* 0 <= level <= 9, default = 5 */
unsigned dictSize, /* default = (1 << 24) */
int lc, /* 0 <= lc <= 8, default = 3 */
int lp, /* 0 <= lp <= 4, default = 0 */
int pb, /* 0 <= pb <= 4, default = 2 */
int fb, /* 5 <= fb <= 273, default = 32 */
int numThreads /* 1 or 2, default = 2 */
);
/*
LzmaUncompress
--------------
In:
dest - output data buffer
destLen - output data buffer size
src - input data
srcLen - input data size
Out:
destLen - processed output size
srcLen - processed input size
Returns:
SZ_OK - OK
SZ_ERROR_DATA - Data error
SZ_ERROR_MEM - Memory allocation arror
SZ_ERROR_UNSUPPORTED - Unsupported properties
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
*/
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
const unsigned char *props, size_t propsSize);
EXTERN_C_END
#endif

595
C/MtCoder.c Normal file
View file

@ -0,0 +1,595 @@
/* MtCoder.c -- Multi-thread Coder
2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "MtCoder.h"
#ifndef _7ZIP_ST
static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
{
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
UInt64 inSize2 = 0;
UInt64 outSize2 = 0;
if (inSize != (UInt64)(Int64)-1)
{
inSize2 = inSize - thunk->inSize;
thunk->inSize = inSize;
}
if (outSize != (UInt64)(Int64)-1)
{
outSize2 = outSize - thunk->outSize;
thunk->outSize = outSize;
}
return MtProgress_ProgressAdd(thunk->mtProgress, inSize2, outSize2);
}
void MtProgressThunk_CreateVTable(CMtProgressThunk *p)
{
p->vt.Progress = MtProgressThunk_Progress;
}
#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
{
if (Event_IsCreated(p))
return Event_Reset(p);
return AutoResetEvent_CreateNotSignaled(p);
}
static THREAD_FUNC_DECL ThreadFunc(void *pp);
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
{
WRes wres = ArEvent_OptCreate_And_Reset(&t->startEvent);
if (wres == 0)
{
t->stop = False;
if (!Thread_WasCreated(&t->thread))
wres = Thread_Create(&t->thread, ThreadFunc, t);
if (wres == 0)
wres = Event_Set(&t->startEvent);
}
if (wres == 0)
return SZ_OK;
return MY_SRes_HRESULT_FROM_WRes(wres);
}
static void MtCoderThread_Destruct(CMtCoderThread *t)
{
if (Thread_WasCreated(&t->thread))
{
t->stop = 1;
Event_Set(&t->startEvent);
Thread_Wait_Close(&t->thread);
}
Event_Close(&t->startEvent);
if (t->inBuf)
{
ISzAlloc_Free(t->mtCoder->allocBig, t->inBuf);
t->inBuf = NULL;
}
}
static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
{
size_t size = *processedSize;
*processedSize = 0;
while (size != 0)
{
size_t cur = size;
SRes res = ISeqInStream_Read(stream, data, &cur);
*processedSize += cur;
data += cur;
size -= cur;
RINOK(res);
if (cur == 0)
return SZ_OK;
}
return SZ_OK;
}
/*
ThreadFunc2() returns:
SZ_OK - in all normal cases (even for stream error or memory allocation error)
SZ_ERROR_THREAD - in case of failure in system synch function
*/
static SRes ThreadFunc2(CMtCoderThread *t)
{
CMtCoder *mtc = t->mtCoder;
for (;;)
{
unsigned bi;
SRes res;
SRes res2;
BoolInt finished;
unsigned bufIndex;
size_t size;
const Byte *inData;
UInt64 readProcessed = 0;
RINOK_THREAD(Event_Wait(&mtc->readEvent))
/* after Event_Wait(&mtc->readEvent) we must call Event_Set(&mtc->readEvent) in any case to unlock another threads */
if (mtc->stopReading)
{
return Event_Set(&mtc->readEvent) == 0 ? SZ_OK : SZ_ERROR_THREAD;
}
res = MtProgress_GetError(&mtc->mtProgress);
size = 0;
inData = NULL;
finished = True;
if (res == SZ_OK)
{
size = mtc->blockSize;
if (mtc->inStream)
{
if (!t->inBuf)
{
t->inBuf = (Byte *)ISzAlloc_Alloc(mtc->allocBig, mtc->blockSize);
if (!t->inBuf)
res = SZ_ERROR_MEM;
}
if (res == SZ_OK)
{
res = FullRead(mtc->inStream, t->inBuf, &size);
readProcessed = mtc->readProcessed + size;
mtc->readProcessed = readProcessed;
}
if (res != SZ_OK)
{
mtc->readRes = res;
/* after reading error - we can stop encoding of previous blocks */
MtProgress_SetError(&mtc->mtProgress, res);
}
else
finished = (size != mtc->blockSize);
}
else
{
size_t rem;
readProcessed = mtc->readProcessed;
rem = mtc->inDataSize - (size_t)readProcessed;
if (size > rem)
size = rem;
inData = mtc->inData + (size_t)readProcessed;
readProcessed += size;
mtc->readProcessed = readProcessed;
finished = (mtc->inDataSize == (size_t)readProcessed);
}
}
/* we must get some block from blocksSemaphore before Event_Set(&mtc->readEvent) */
res2 = SZ_OK;
if (Semaphore_Wait(&mtc->blocksSemaphore) != 0)
{
res2 = SZ_ERROR_THREAD;
if (res == SZ_OK)
{
res = res2;
// MtProgress_SetError(&mtc->mtProgress, res);
}
}
bi = mtc->blockIndex;
if (++mtc->blockIndex >= mtc->numBlocksMax)
mtc->blockIndex = 0;
bufIndex = (unsigned)(int)-1;
if (res == SZ_OK)
res = MtProgress_GetError(&mtc->mtProgress);
if (res != SZ_OK)
finished = True;
if (!finished)
{
if (mtc->numStartedThreads < mtc->numStartedThreadsLimit
&& mtc->expectedDataSize != readProcessed)
{
res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]);
if (res == SZ_OK)
mtc->numStartedThreads++;
else
{
MtProgress_SetError(&mtc->mtProgress, res);
finished = True;
}
}
}
if (finished)
mtc->stopReading = True;
RINOK_THREAD(Event_Set(&mtc->readEvent))
if (res2 != SZ_OK)
return res2;
if (res == SZ_OK)
{
CriticalSection_Enter(&mtc->cs);
bufIndex = mtc->freeBlockHead;
mtc->freeBlockHead = mtc->freeBlockList[bufIndex];
CriticalSection_Leave(&mtc->cs);
res = mtc->mtCallback->Code(mtc->mtCallbackObject, t->index, bufIndex,
mtc->inStream ? t->inBuf : inData, size, finished);
// MtProgress_Reinit(&mtc->mtProgress, t->index);
if (res != SZ_OK)
MtProgress_SetError(&mtc->mtProgress, res);
}
{
CMtCoderBlock *block = &mtc->blocks[bi];
block->res = res;
block->bufIndex = bufIndex;
block->finished = finished;
}
#ifdef MTCODER__USE_WRITE_THREAD
RINOK_THREAD(Event_Set(&mtc->writeEvents[bi]))
#else
{
unsigned wi;
{
CriticalSection_Enter(&mtc->cs);
wi = mtc->writeIndex;
if (wi == bi)
mtc->writeIndex = (unsigned)(int)-1;
else
mtc->ReadyBlocks[bi] = True;
CriticalSection_Leave(&mtc->cs);
}
if (wi != bi)
{
if (res != SZ_OK || finished)
return 0;
continue;
}
if (mtc->writeRes != SZ_OK)
res = mtc->writeRes;
for (;;)
{
if (res == SZ_OK && bufIndex != (unsigned)(int)-1)
{
res = mtc->mtCallback->Write(mtc->mtCallbackObject, bufIndex);
if (res != SZ_OK)
{
mtc->writeRes = res;
MtProgress_SetError(&mtc->mtProgress, res);
}
}
if (++wi >= mtc->numBlocksMax)
wi = 0;
{
BoolInt isReady;
CriticalSection_Enter(&mtc->cs);
if (bufIndex != (unsigned)(int)-1)
{
mtc->freeBlockList[bufIndex] = mtc->freeBlockHead;
mtc->freeBlockHead = bufIndex;
}
isReady = mtc->ReadyBlocks[wi];
if (isReady)
mtc->ReadyBlocks[wi] = False;
else
mtc->writeIndex = wi;
CriticalSection_Leave(&mtc->cs);
RINOK_THREAD(Semaphore_Release1(&mtc->blocksSemaphore))
if (!isReady)
break;
}
{
CMtCoderBlock *block = &mtc->blocks[wi];
if (res == SZ_OK && block->res != SZ_OK)
res = block->res;
bufIndex = block->bufIndex;
finished = block->finished;
}
}
}
#endif
if (finished || res != SZ_OK)
return 0;
}
}
static THREAD_FUNC_DECL ThreadFunc(void *pp)
{
CMtCoderThread *t = (CMtCoderThread *)pp;
for (;;)
{
if (Event_Wait(&t->startEvent) != 0)
return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
if (t->stop)
return 0;
{
SRes res = ThreadFunc2(t);
CMtCoder *mtc = t->mtCoder;
if (res != SZ_OK)
{
MtProgress_SetError(&mtc->mtProgress, res);
}
#ifndef MTCODER__USE_WRITE_THREAD
{
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads)
if (Event_Set(&mtc->finishedEvent) != 0)
return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
}
#endif
}
}
}
void MtCoder_Construct(CMtCoder *p)
{
unsigned i;
p->blockSize = 0;
p->numThreadsMax = 0;
p->expectedDataSize = (UInt64)(Int64)-1;
p->inStream = NULL;
p->inData = NULL;
p->inDataSize = 0;
p->progress = NULL;
p->allocBig = NULL;
p->mtCallback = NULL;
p->mtCallbackObject = NULL;
p->allocatedBufsSize = 0;
Event_Construct(&p->readEvent);
Semaphore_Construct(&p->blocksSemaphore);
for (i = 0; i < MTCODER__THREADS_MAX; i++)
{
CMtCoderThread *t = &p->threads[i];
t->mtCoder = p;
t->index = i;
t->inBuf = NULL;
t->stop = False;
Event_Construct(&t->startEvent);
Thread_Construct(&t->thread);
}
#ifdef MTCODER__USE_WRITE_THREAD
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
Event_Construct(&p->writeEvents[i]);
#else
Event_Construct(&p->finishedEvent);
#endif
CriticalSection_Init(&p->cs);
CriticalSection_Init(&p->mtProgress.cs);
}
static void MtCoder_Free(CMtCoder *p)
{
unsigned i;
/*
p->stopReading = True;
if (Event_IsCreated(&p->readEvent))
Event_Set(&p->readEvent);
*/
for (i = 0; i < MTCODER__THREADS_MAX; i++)
MtCoderThread_Destruct(&p->threads[i]);
Event_Close(&p->readEvent);
Semaphore_Close(&p->blocksSemaphore);
#ifdef MTCODER__USE_WRITE_THREAD
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
Event_Close(&p->writeEvents[i]);
#else
Event_Close(&p->finishedEvent);
#endif
}
void MtCoder_Destruct(CMtCoder *p)
{
MtCoder_Free(p);
CriticalSection_Delete(&p->cs);
CriticalSection_Delete(&p->mtProgress.cs);
}
SRes MtCoder_Code(CMtCoder *p)
{
unsigned numThreads = p->numThreadsMax;
unsigned numBlocksMax;
unsigned i;
SRes res = SZ_OK;
if (numThreads > MTCODER__THREADS_MAX)
numThreads = MTCODER__THREADS_MAX;
numBlocksMax = MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads);
if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++;
if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++;
if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++;
if (numBlocksMax > MTCODER__BLOCKS_MAX)
numBlocksMax = MTCODER__BLOCKS_MAX;
if (p->blockSize != p->allocatedBufsSize)
{
for (i = 0; i < MTCODER__THREADS_MAX; i++)
{
CMtCoderThread *t = &p->threads[i];
if (t->inBuf)
{
ISzAlloc_Free(p->allocBig, t->inBuf);
t->inBuf = NULL;
}
}
p->allocatedBufsSize = p->blockSize;
}
p->readRes = SZ_OK;
MtProgress_Init(&p->mtProgress, p->progress);
#ifdef MTCODER__USE_WRITE_THREAD
for (i = 0; i < numBlocksMax; i++)
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->writeEvents[i]));
}
#else
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent));
#endif
{
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
}
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)
p->freeBlockList[i] = i + 1;
p->freeBlockList[MTCODER__BLOCKS_MAX - 1] = (unsigned)(int)-1;
p->freeBlockHead = 0;
p->readProcessed = 0;
p->blockIndex = 0;
p->numBlocksMax = numBlocksMax;
p->stopReading = False;
#ifndef MTCODER__USE_WRITE_THREAD
p->writeIndex = 0;
p->writeRes = SZ_OK;
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
p->ReadyBlocks[i] = False;
p->numFinishedThreads = 0;
#endif
p->numStartedThreadsLimit = numThreads;
p->numStartedThreads = 0;
// for (i = 0; i < numThreads; i++)
{
CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];
RINOK(MtCoderThread_CreateAndStart(nextThread));
}
RINOK_THREAD(Event_Set(&p->readEvent))
#ifdef MTCODER__USE_WRITE_THREAD
{
unsigned bi = 0;
for (;; bi++)
{
if (bi >= numBlocksMax)
bi = 0;
RINOK_THREAD(Event_Wait(&p->writeEvents[bi]))
{
const CMtCoderBlock *block = &p->blocks[bi];
unsigned bufIndex = block->bufIndex;
BoolInt finished = block->finished;
if (res == SZ_OK && block->res != SZ_OK)
res = block->res;
if (bufIndex != (unsigned)(int)-1)
{
if (res == SZ_OK)
{
res = p->mtCallback->Write(p->mtCallbackObject, bufIndex);
if (res != SZ_OK)
MtProgress_SetError(&p->mtProgress, res);
}
CriticalSection_Enter(&p->cs);
{
p->freeBlockList[bufIndex] = p->freeBlockHead;
p->freeBlockHead = bufIndex;
}
CriticalSection_Leave(&p->cs);
}
RINOK_THREAD(Semaphore_Release1(&p->blocksSemaphore))
if (finished)
break;
}
}
}
#else
{
WRes wres = Event_Wait(&p->finishedEvent);
res = MY_SRes_HRESULT_FROM_WRes(wres);
}
#endif
if (res == SZ_OK)
res = p->readRes;
if (res == SZ_OK)
res = p->mtProgress.res;
#ifndef MTCODER__USE_WRITE_THREAD
if (res == SZ_OK)
res = p->writeRes;
#endif
if (res != SZ_OK)
MtCoder_Free(p);
return res;
}
#endif

141
C/MtCoder.h Normal file
View file

@ -0,0 +1,141 @@
/* MtCoder.h -- Multi-thread Coder
2018-07-04 : Igor Pavlov : Public domain */
#ifndef __MT_CODER_H
#define __MT_CODER_H
#include "MtDec.h"
EXTERN_C_BEGIN
/*
if ( defined MTCODER__USE_WRITE_THREAD) : main thread writes all data blocks to output stream
if (not defined MTCODER__USE_WRITE_THREAD) : any coder thread can write data blocks to output stream
*/
/* #define MTCODER__USE_WRITE_THREAD */
#ifndef _7ZIP_ST
#define MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
#define MTCODER__THREADS_MAX 64
#define MTCODER__BLOCKS_MAX (MTCODER__GET_NUM_BLOCKS_FROM_THREADS(MTCODER__THREADS_MAX) + 3)
#else
#define MTCODER__THREADS_MAX 1
#define MTCODER__BLOCKS_MAX 1
#endif
#ifndef _7ZIP_ST
typedef struct
{
ICompressProgress vt;
CMtProgress *mtProgress;
UInt64 inSize;
UInt64 outSize;
} CMtProgressThunk;
void MtProgressThunk_CreateVTable(CMtProgressThunk *p);
#define MtProgressThunk_Init(p) { (p)->inSize = 0; (p)->outSize = 0; }
struct _CMtCoder;
typedef struct
{
struct _CMtCoder *mtCoder;
unsigned index;
int stop;
Byte *inBuf;
CAutoResetEvent startEvent;
CThread thread;
} CMtCoderThread;
typedef struct
{
SRes (*Code)(void *p, unsigned coderIndex, unsigned outBufIndex,
const Byte *src, size_t srcSize, int finished);
SRes (*Write)(void *p, unsigned outBufIndex);
} IMtCoderCallback2;
typedef struct
{
SRes res;
unsigned bufIndex;
BoolInt finished;
} CMtCoderBlock;
typedef struct _CMtCoder
{
/* input variables */
size_t blockSize; /* size of input block */
unsigned numThreadsMax;
UInt64 expectedDataSize;
ISeqInStream *inStream;
const Byte *inData;
size_t inDataSize;
ICompressProgress *progress;
ISzAllocPtr allocBig;
IMtCoderCallback2 *mtCallback;
void *mtCallbackObject;
/* internal variables */
size_t allocatedBufsSize;
CAutoResetEvent readEvent;
CSemaphore blocksSemaphore;
BoolInt stopReading;
SRes readRes;
#ifdef MTCODER__USE_WRITE_THREAD
CAutoResetEvent writeEvents[MTCODER__BLOCKS_MAX];
#else
CAutoResetEvent finishedEvent;
SRes writeRes;
unsigned writeIndex;
Byte ReadyBlocks[MTCODER__BLOCKS_MAX];
LONG numFinishedThreads;
#endif
unsigned numStartedThreadsLimit;
unsigned numStartedThreads;
unsigned numBlocksMax;
unsigned blockIndex;
UInt64 readProcessed;
CCriticalSection cs;
unsigned freeBlockHead;
unsigned freeBlockList[MTCODER__BLOCKS_MAX];
CMtProgress mtProgress;
CMtCoderBlock blocks[MTCODER__BLOCKS_MAX];
CMtCoderThread threads[MTCODER__THREADS_MAX];
} CMtCoder;
void MtCoder_Construct(CMtCoder *p);
void MtCoder_Destruct(CMtCoder *p);
SRes MtCoder_Code(CMtCoder *p);
#endif
EXTERN_C_END
#endif

1139
C/MtDec.c Normal file

File diff suppressed because it is too large Load diff

202
C/MtDec.h Normal file
View file

@ -0,0 +1,202 @@
/* MtDec.h -- Multi-thread Decoder
2020-03-05 : Igor Pavlov : Public domain */
#ifndef __MT_DEC_H
#define __MT_DEC_H
#include "7zTypes.h"
#ifndef _7ZIP_ST
#include "Threads.h"
#endif
EXTERN_C_BEGIN
#ifndef _7ZIP_ST
#ifndef _7ZIP_ST
#define MTDEC__THREADS_MAX 32
#else
#define MTDEC__THREADS_MAX 1
#endif
typedef struct
{
ICompressProgress *progress;
SRes res;
UInt64 totalInSize;
UInt64 totalOutSize;
CCriticalSection cs;
} CMtProgress;
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress);
SRes MtProgress_Progress_ST(CMtProgress *p);
SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize);
SRes MtProgress_GetError(CMtProgress *p);
void MtProgress_SetError(CMtProgress *p, SRes res);
struct _CMtDec;
typedef struct
{
struct _CMtDec *mtDec;
unsigned index;
void *inBuf;
size_t inDataSize_Start; // size of input data in start block
UInt64 inDataSize; // total size of input data in all blocks
CThread thread;
CAutoResetEvent canRead;
CAutoResetEvent canWrite;
void *allocaPtr;
} CMtDecThread;
void MtDecThread_FreeInBufs(CMtDecThread *t);
typedef enum
{
MTDEC_PARSE_CONTINUE, // continue this block with more input data
MTDEC_PARSE_OVERFLOW, // MT buffers overflow, need switch to single-thread
MTDEC_PARSE_NEW, // new block
MTDEC_PARSE_END // end of block threading. But we still can return to threading after Write(&needContinue)
} EMtDecParseState;
typedef struct
{
// in
int startCall;
const Byte *src;
size_t srcSize;
// in : (srcSize == 0) is allowed
// out : it's allowed to return less that actually was used ?
int srcFinished;
// out
EMtDecParseState state;
BoolInt canCreateNewThread;
UInt64 outPos; // check it (size_t)
} CMtDecCallbackInfo;
typedef struct
{
void (*Parse)(void *p, unsigned coderIndex, CMtDecCallbackInfo *ci);
// PreCode() and Code():
// (SRes_return_result != SZ_OK) means stop decoding, no need another blocks
SRes (*PreCode)(void *p, unsigned coderIndex);
SRes (*Code)(void *p, unsigned coderIndex,
const Byte *src, size_t srcSize, int srcFinished,
UInt64 *inCodePos, UInt64 *outCodePos, int *stop);
// stop - means stop another Code calls
/* Write() must be called, if Parse() was called
set (needWrite) if
{
&& (was not interrupted by progress)
&& (was not interrupted in previous block)
}
out:
if (*needContinue), decoder still need to continue decoding with new iteration,
even after MTDEC_PARSE_END
if (*canRecode), we didn't flush current block data, so we still can decode current block later.
*/
SRes (*Write)(void *p, unsigned coderIndex,
BoolInt needWriteToStream,
const Byte *src, size_t srcSize, BoolInt isCross,
// int srcFinished,
BoolInt *needContinue,
BoolInt *canRecode);
} IMtDecCallback2;
typedef struct _CMtDec
{
/* input variables */
size_t inBufSize; /* size of input block */
unsigned numThreadsMax;
// size_t inBlockMax;
unsigned numThreadsMax_2;
ISeqInStream *inStream;
// const Byte *inData;
// size_t inDataSize;
ICompressProgress *progress;
ISzAllocPtr alloc;
IMtDecCallback2 *mtCallback;
void *mtCallbackObject;
/* internal variables */
size_t allocatedBufsSize;
BoolInt exitThread;
WRes exitThreadWRes;
UInt64 blockIndex;
BoolInt isAllocError;
BoolInt overflow;
SRes threadingErrorSRes;
BoolInt needContinue;
// CAutoResetEvent finishedEvent;
SRes readRes;
SRes codeRes;
BoolInt wasInterrupted;
unsigned numStartedThreads_Limit;
unsigned numStartedThreads;
Byte *crossBlock;
size_t crossStart;
size_t crossEnd;
UInt64 readProcessed;
BoolInt readWasFinished;
UInt64 inProcessed;
unsigned filledThreadStart;
unsigned numFilledThreads;
#ifndef _7ZIP_ST
BoolInt needInterrupt;
UInt64 interruptIndex;
CMtProgress mtProgress;
CMtDecThread threads[MTDEC__THREADS_MAX];
#endif
} CMtDec;
void MtDec_Construct(CMtDec *p);
void MtDec_Destruct(CMtDec *p);
/*
MtDec_Code() returns:
SZ_OK - in most cases
MY_SRes_HRESULT_FROM_WRes(WRes_error) - in case of unexpected error in threading function
*/
SRes MtDec_Code(CMtDec *p);
Byte *MtDec_GetCrossBuff(CMtDec *p);
int MtDec_PrepareRead(CMtDec *p);
const Byte *MtDec_Read(CMtDec *p, size_t *inLim);
#endif
EXTERN_C_END
#endif

167
C/Ppmd.h Normal file
View file

@ -0,0 +1,167 @@
/* Ppmd.h -- PPMD codec common code
2021-04-13 : Igor Pavlov : Public domain
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD_H
#define __PPMD_H
#include "CpuArch.h"
EXTERN_C_BEGIN
#if defined(MY_CPU_SIZEOF_POINTER) && (MY_CPU_SIZEOF_POINTER == 4)
/*
PPMD code always uses 32-bit internal fields in PPMD structures to store internal references in main block.
if (PPMD_32BIT is defined), the PPMD code stores internal pointers to 32-bit reference fields.
if (PPMD_32BIT is NOT defined), the PPMD code stores internal UInt32 offsets to reference fields.
if (pointer size is 64-bit), then (PPMD_32BIT) mode is not allowed,
if (pointer size is 32-bit), then (PPMD_32BIT) mode is optional,
and it's allowed to disable PPMD_32BIT mode even if pointer is 32-bit.
PPMD code works slightly faster in (PPMD_32BIT) mode.
*/
#define PPMD_32BIT
#endif
#define PPMD_INT_BITS 7
#define PPMD_PERIOD_BITS 7
#define PPMD_BIN_SCALE (1 << (PPMD_INT_BITS + PPMD_PERIOD_BITS))
#define PPMD_GET_MEAN_SPEC(summ, shift, round) (((summ) + (1 << ((shift) - (round)))) >> (shift))
#define PPMD_GET_MEAN(summ) PPMD_GET_MEAN_SPEC((summ), PPMD_PERIOD_BITS, 2)
#define PPMD_UPDATE_PROB_0(prob) ((prob) + (1 << PPMD_INT_BITS) - PPMD_GET_MEAN(prob))
#define PPMD_UPDATE_PROB_1(prob) ((prob) - PPMD_GET_MEAN(prob))
#define PPMD_N1 4
#define PPMD_N2 4
#define PPMD_N3 4
#define PPMD_N4 ((128 + 3 - 1 * PPMD_N1 - 2 * PPMD_N2 - 3 * PPMD_N3) / 4)
#define PPMD_NUM_INDEXES (PPMD_N1 + PPMD_N2 + PPMD_N3 + PPMD_N4)
MY_CPU_pragma_pack_push_1
/* Most compilers works OK here even without #pragma pack(push, 1), but some GCC compilers need it. */
/* SEE-contexts for PPM-contexts with masked symbols */
typedef struct
{
UInt16 Summ; /* Freq */
Byte Shift; /* Speed of Freq change; low Shift is for fast change */
Byte Count; /* Count to next change of Shift */
} CPpmd_See;
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
{ (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
typedef struct
{
Byte Symbol;
Byte Freq;
UInt16 Successor_0;
UInt16 Successor_1;
} CPpmd_State;
typedef struct CPpmd_State2_
{
Byte Symbol;
Byte Freq;
} CPpmd_State2;
typedef struct CPpmd_State4_
{
UInt16 Successor_0;
UInt16 Successor_1;
} CPpmd_State4;
MY_CPU_pragma_pop
/*
PPMD code can write full CPpmd_State structure data to CPpmd*_Context
at (byte offset = 2) instead of some fields of original CPpmd*_Context structure.
If we use pointers to different types, but that point to shared
memory space, we can have aliasing problem (strict aliasing).
XLC compiler in -O2 mode can change the order of memory write instructions
in relation to read instructions, if we have use pointers to different types.
To solve that aliasing problem we use combined CPpmd*_Context structure
with unions that contain the fields from both structures:
the original CPpmd*_Context and CPpmd_State.
So we can access the fields from both structures via one pointer,
and the compiler doesn't change the order of write instructions
in relation to read instructions.
If we don't use memory write instructions to shared memory in
some local code, and we use only reading instructions (read only),
then probably it's safe to use pointers to different types for reading.
*/
#ifdef PPMD_32BIT
#define Ppmd_Ref_Type(type) type *
#define Ppmd_GetRef(p, ptr) (ptr)
#define Ppmd_GetPtr(p, ptr) (ptr)
#define Ppmd_GetPtr_Type(p, ptr, note_type) (ptr)
#else
#define Ppmd_Ref_Type(type) UInt32
#define Ppmd_GetRef(p, ptr) ((UInt32)((Byte *)(ptr) - (p)->Base))
#define Ppmd_GetPtr(p, offs) ((void *)((p)->Base + (offs)))
#define Ppmd_GetPtr_Type(p, offs, type) ((type *)Ppmd_GetPtr(p, offs))
#endif // PPMD_32BIT
typedef Ppmd_Ref_Type(CPpmd_State) CPpmd_State_Ref;
typedef Ppmd_Ref_Type(void) CPpmd_Void_Ref;
typedef Ppmd_Ref_Type(Byte) CPpmd_Byte_Ref;
/*
#ifdef MY_CPU_LE_UNALIGN
// the unaligned 32-bit access latency can be too large, if the data is not in L1 cache.
#define Ppmd_GET_SUCCESSOR(p) ((CPpmd_Void_Ref)*(const UInt32 *)(const void *)&(p)->Successor_0)
#define Ppmd_SET_SUCCESSOR(p, v) *(UInt32 *)(void *)(void *)&(p)->Successor_0 = (UInt32)(v)
#else
*/
/*
We can write 16-bit halves to 32-bit (Successor) field in any selected order.
But the native order is more consistent way.
So we use the native order, if LE/BE order can be detected here at compile time.
*/
#ifdef MY_CPU_BE
#define Ppmd_GET_SUCCESSOR(p) \
( (CPpmd_Void_Ref) (((UInt32)(p)->Successor_0 << 16) | (p)->Successor_1) )
#define Ppmd_SET_SUCCESSOR(p, v) { \
(p)->Successor_0 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); \
(p)->Successor_1 = (UInt16)((UInt32)(v) /* & 0xFFFF */); }
#else
#define Ppmd_GET_SUCCESSOR(p) \
( (CPpmd_Void_Ref) ((p)->Successor_0 | ((UInt32)(p)->Successor_1 << 16)) )
#define Ppmd_SET_SUCCESSOR(p, v) { \
(p)->Successor_0 = (UInt16)((UInt32)(v) /* & 0xFFFF */); \
(p)->Successor_1 = (UInt16)(((UInt32)(v) >> 16) /* & 0xFFFF */); }
#endif
// #endif
#define PPMD_SetAllBitsIn256Bytes(p) \
{ size_t z; for (z = 0; z < 256 / sizeof(p[0]); z += 8) { \
p[z+7] = p[z+6] = p[z+5] = p[z+4] = p[z+3] = p[z+2] = p[z+1] = p[z+0] = ~(size_t)0; }}
EXTERN_C_END
#endif

1104
C/Ppmd7.c Normal file

File diff suppressed because it is too large Load diff

181
C/Ppmd7.h Normal file
View file

@ -0,0 +1,181 @@
/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#ifndef __PPMD7_H
#define __PPMD7_H
#include "Ppmd.h"
EXTERN_C_BEGIN
#define PPMD7_MIN_ORDER 2
#define PPMD7_MAX_ORDER 64
#define PPMD7_MIN_MEM_SIZE (1 << 11)
#define PPMD7_MAX_MEM_SIZE (0xFFFFFFFF - 12 * 3)
struct CPpmd7_Context_;
typedef Ppmd_Ref_Type(struct CPpmd7_Context_) CPpmd7_Context_Ref;
// MY_CPU_pragma_pack_push_1
typedef struct CPpmd7_Context_
{
UInt16 NumStats;
union
{
UInt16 SummFreq;
CPpmd_State2 State2;
} Union2;
union
{
CPpmd_State_Ref Stats;
CPpmd_State4 State4;
} Union4;
CPpmd7_Context_Ref Suffix;
} CPpmd7_Context;
// MY_CPU_pragma_pop
#define Ppmd7Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
typedef struct
{
UInt32 Range;
UInt32 Code;
UInt32 Low;
IByteIn *Stream;
} CPpmd7_RangeDec;
typedef struct
{
UInt32 Range;
Byte Cache;
// Byte _dummy_[3];
UInt64 Low;
UInt64 CacheSize;
IByteOut *Stream;
} CPpmd7z_RangeEnc;
typedef struct
{
CPpmd7_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, HiBitsFlag;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
UInt32 AlignOffset;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
union
{
CPpmd7_RangeDec dec;
CPpmd7z_RangeEnc enc;
} rc;
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
Byte NS2BSIndx[256], NS2Indx[256];
Byte ExpEscape[16];
CPpmd_See DummySee, See[25][16];
UInt16 BinSumm[128][64];
// int LastSymbol;
} CPpmd7;
void Ppmd7_Construct(CPpmd7 *p);
BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd7_Free(CPpmd7 *p, ISzAllocPtr alloc);
void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder);
#define Ppmd7_WasAllocated(p) ((p)->Base != NULL)
/* ---------- Internal Functions ---------- */
#define Ppmd7_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
#define Ppmd7_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd7_Context)
#define Ppmd7_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd7_Update1(CPpmd7 *p);
void Ppmd7_Update1_0(CPpmd7 *p);
void Ppmd7_Update2(CPpmd7 *p);
#define PPMD7_HiBitsFlag_3(sym) ((((unsigned)sym + 0xC0) >> (8 - 3)) & (1 << 3))
#define PPMD7_HiBitsFlag_4(sym) ((((unsigned)sym + 0xC0) >> (8 - 4)) & (1 << 4))
// #define PPMD7_HiBitsFlag_3(sym) ((sym) < 0x40 ? 0 : (1 << 3))
// #define PPMD7_HiBitsFlag_4(sym) ((sym) < 0x40 ? 0 : (1 << 4))
#define Ppmd7_GetBinSumm(p) \
&p->BinSumm[(size_t)(unsigned)Ppmd7Context_OneState(p->MinContext)->Freq - 1] \
[ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ p->NS2BSIndx[(size_t)Ppmd7_GetContext(p, p->MinContext->Suffix)->NumStats - 1] \
+ PPMD7_HiBitsFlag_4(Ppmd7Context_OneState(p->MinContext)->Symbol) \
+ (p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol)) ]
CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *scale);
/*
We support two versions of Ppmd7 (PPMdH) methods that use same CPpmd7 structure:
1) Ppmd7a_*: original PPMdH
2) Ppmd7z_*: modified PPMdH with 7z Range Coder
Ppmd7_*: the structures and functions that are common for both versions of PPMd7 (PPMdH)
*/
/* ---------- Decode ---------- */
#define PPMD7_SYM_END (-1)
#define PPMD7_SYM_ERROR (-2)
/*
You must set (CPpmd7::rc.dec.Stream) before Ppmd7*_RangeDec_Init()
Ppmd7*_DecodeSymbol()
out:
>= 0 : decoded byte
-1 : PPMD7_SYM_END : End of payload marker
-2 : PPMD7_SYM_ERROR : Data error
*/
/* Ppmd7a_* : original PPMdH */
BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p);
#define Ppmd7a_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7a_DecodeSymbol(CPpmd7 *p);
/* Ppmd7z_* : modified PPMdH with 7z Range Coder */
BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p);
#define Ppmd7z_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd7z_DecodeSymbol(CPpmd7 *p);
// Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim);
/* ---------- Encode ---------- */
void Ppmd7z_Init_RangeEnc(CPpmd7 *p);
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p);
// void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol);
void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim);
EXTERN_C_END
#endif

297
C/Ppmd7Dec.c Normal file
View file

@ -0,0 +1,297 @@
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTopValue (1 << 24)
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
if (READ_BYTE(p) != 0)
return False;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define RC_NORM_BASE(p) if ((p)->Range < kTopValue) \
{ (p)->Code = ((p)->Code << 8) | READ_BYTE(p); (p)->Range <<= 8;
#define RC_NORM_1(p) RC_NORM_BASE(p) }
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R (&p->rc.dec)
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
R->Code -= start * R->Range;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
// MY_FORCE_INLINE
// static
int Ppmd7z_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1_0(p);
return sym;
}
p->PrevSuccess = 0;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1(p);
return sym;
}
}
while (--i);
if (hiCnt >= summFreq)
return PPMD7_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM_1(R)
/* we can use single byte normalization here because of
(min(BinSumm[][]) = 95) > (1 << (14 - 8)) */
// sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(size0);
R->Code -= size0;
R->Range -= size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
CPpmd7_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return PPMD7_SYM_END;
mc = Ppmd7_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
s = Ppmd7_GetStats(p, mc);
{
unsigned num = mc->NumStats;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
Byte sym;
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
};
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update2(p);
return sym;
}
if (count >= freqSum)
return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
s = Ppmd7_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}
/*
Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
{
int sym = 0;
if (buf != lim)
do
{
sym = Ppmd7z_DecodeSymbol(p);
if (sym < 0)
break;
*buf = (Byte)sym;
}
while (++buf < lim);
p->LastSymbol = sym;
return buf;
}
*/

323
C/Ppmd7Enc.c Normal file
View file

@ -0,0 +1,323 @@
/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTopValue (1 << 24)
#define R (&p->rc.enc)
void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
{
R->Low = 0;
R->Range = 0xFFFFFFFF;
R->Cache = 0;
R->CacheSize = 1;
}
MY_NO_INLINE
static void RangeEnc_ShiftLow(CPpmd7 *p)
{
if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
{
Byte temp = R->Cache;
do
{
IByteOut_Write(R->Stream, (Byte)(temp + (Byte)(R->Low >> 32)));
temp = 0xFF;
}
while (--R->CacheSize != 0);
R->Cache = (Byte)((UInt32)R->Low >> 24);
}
R->CacheSize++;
R->Low = (UInt32)((UInt32)R->Low << 8);
}
#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p);
#define RC_NORM_1(p) RC_NORM_BASE(p) }
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
/*
#define RangeEnc_Encode(p, start, _size_) \
{ UInt32 size = _size_; \
R->Low += start * R->Range; \
R->Range *= size; \
RC_NORM_LOCAL(p); }
*/
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
{
R->Low += start * R->Range;
R->Range *= size;
RC_NORM_LOCAL(p);
}
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
{
unsigned i;
for (i = 0; i < 5; i++)
RangeEnc_ShiftLow(p);
}
#define RC_Encode(start, size) RangeEnc_Encode(p, start, size);
#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p);
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
#define SUFFIX(ctx) CTX((ctx)->Suffix)
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
MY_FORCE_INLINE
static
void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
R->Range /= p->MinContext->Union2.SummFreq;
if (s->Symbol == symbol)
{
// R->Range /= p->MinContext->Union2.SummFreq;
RC_EncodeFinal(0, s->Freq);
p->FoundState = s;
Ppmd7_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((++s)->Symbol == symbol)
{
// R->Range /= p->MinContext->Union2.SummFreq;
RC_EncodeFinal(sum, s->Freq);
p->FoundState = s;
Ppmd7_Update1(p);
return;
}
sum += s->Freq;
}
while (--i);
// R->Range /= p->MinContext->Union2.SummFreq;
RC_Encode(sum, p->MinContext->Union2.SummFreq - sum);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
// MASK(s->Symbol) = 0;
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
UInt16 *prob = Ppmd7_GetBinSumm(p);
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt32 pr = *prob;
UInt32 bound = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeEnc_EncodeBit_0(p, bound);
R->Range = bound;
RC_NORM_1(p);
// p->FoundState = s;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeEnc_EncodeBit_1(p, bound);
R->Low += bound;
R->Range -= bound;
RC_NORM_LOCAL(p)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_See *see;
CPpmd_State *s;
UInt32 sum, escFreq;
CPpmd7_Context *mc;
unsigned i, numMasked;
RC_NORM_REMOTE(p)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
mc = Ppmd7_GetContext(p, mc->Suffix);
i = mc->NumStats;
}
while (i == numMasked);
p->MinContext = mc;
// see = Ppmd7_MakeEscFreq(p, numMasked, &escFreq);
{
if (i != 256)
{
unsigned nonMasked = i - numMasked;
see = p->See[(unsigned)p->NS2Indx[(size_t)nonMasked - 1]]
+ p->HiBitsFlag
+ (nonMasked < (unsigned)SUFFIX(mc)->NumStats - i)
+ 2 * (unsigned)(mc->Union2.SummFreq < 11 * i)
+ 4 * (unsigned)(numMasked > nonMasked);
{
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
unsigned summ = (UInt16)see->Summ; // & 0xFFFF
unsigned r = (summ >> see->Shift);
see->Summ = (UInt16)(summ - r);
escFreq = r + (r == 0);
}
}
else
{
see = &p->DummySee;
escFreq = 1;
}
}
s = Ppmd7_GetStats(p, mc);
sum = 0;
// i = mc->NumStats;
do
{
unsigned cur = s->Symbol;
if ((int)cur == symbol)
{
UInt32 low = sum;
UInt32 freq = s->Freq;
unsigned num2;
Ppmd_See_Update(see);
p->FoundState = s;
sum += escFreq;
num2 = i / 2;
i &= 1;
sum += freq & (0 - (UInt32)i);
if (num2 != 0)
{
s += i;
for (;;)
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
}
}
R->Range /= sum;
RC_EncodeFinal(low, freq);
Ppmd7_Update2(p);
return;
}
sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
{
UInt32 total = sum + escFreq;
see->Summ = (UInt16)(see->Summ + total);
R->Range /= total;
RC_Encode(sum, escFreq);
}
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
s--;
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
}
void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
{
for (; buf < lim; buf++)
{
Ppmd7z_EncodeSymbol(p, *buf);
}
}

279
C/Ppmd7aDec.c Normal file
View file

@ -0,0 +1,279 @@
/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.H (2001): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd7.h"
#define kTop (1 << 24)
#define kBot (1 << 15)
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
p->Low = 0;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
p->Code = (p->Code << 8) | READ_BYTE(p); \
p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R (&p->rc.dec)
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
{
start *= R->Range;
R->Low += start;
R->Code -= start;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
typedef CPpmd7_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd7_UpdateModel(CPpmd7 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
int Ppmd7a_DecodeSymbol(CPpmd7 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 1)
{
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
if (summFreq > R->Range)
return PPMD7_SYM_ERROR;
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1_0(p);
return sym;
}
p->PrevSuccess = 0;
i = (unsigned)p->MinContext->NumStats - 1;
do
{
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update1(p);
return sym;
}
}
while (--i);
if (hiCnt >= summFreq)
return PPMD7_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
PPMD_SetAllBitsIn256Bytes(charMask);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
UInt16 *prob = Ppmd7_GetBinSumm(p);
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM(R)
// sym = (p->FoundState = Ppmd7Context_OneState(p->MinContext))->Symbol;
// Ppmd7_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 128));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
p->MaxContext = p->MinContext = c;
else
Ppmd7_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(size0);
R->Low += size0;
R->Code -= size0;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
CPpmd_See *see;
CPpmd7_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return PPMD7_SYM_END;
mc = Ppmd7_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
s = Ppmd7_GetStats(p, mc);
{
unsigned num = mc->NumStats;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd7_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
if (freqSum > R->Range)
return PPMD7_SYM_ERROR;
count = RC_GetThreshold(freqSum);
if (count < hiCnt)
{
Byte sym;
s = Ppmd7_GetStats(p, p->MinContext);
hiCnt = count;
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
// if ((Int32)count >= 0)
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
};
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
sym = s->Symbol;
Ppmd7_Update2(p);
return sym;
}
if (count >= freqSum)
return PPMD7_SYM_ERROR;
RC_Decode(hiCnt, freqSum - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
s = Ppmd7_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}

1537
C/Ppmd8.c Normal file

File diff suppressed because it is too large Load diff

181
C/Ppmd8.h Normal file
View file

@ -0,0 +1,181 @@
/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#ifndef __PPMD8_H
#define __PPMD8_H
#include "Ppmd.h"
EXTERN_C_BEGIN
#define PPMD8_MIN_ORDER 2
#define PPMD8_MAX_ORDER 16
struct CPpmd8_Context_;
typedef Ppmd_Ref_Type(struct CPpmd8_Context_) CPpmd8_Context_Ref;
// MY_CPU_pragma_pack_push_1
typedef struct CPpmd8_Context_
{
Byte NumStats;
Byte Flags;
union
{
UInt16 SummFreq;
CPpmd_State2 State2;
} Union2;
union
{
CPpmd_State_Ref Stats;
CPpmd_State4 State4;
} Union4;
CPpmd8_Context_Ref Suffix;
} CPpmd8_Context;
// MY_CPU_pragma_pop
#define Ppmd8Context_OneState(p) ((CPpmd_State *)&(p)->Union2)
/* PPMdI code rev.2 contains the fix over PPMdI code rev.1.
But the code PPMdI.2 is not compatible with PPMdI.1 for some files compressed
in FREEZE mode. So we disable FREEZE mode support. */
// #define PPMD8_FREEZE_SUPPORT
enum
{
PPMD8_RESTORE_METHOD_RESTART,
PPMD8_RESTORE_METHOD_CUT_OFF
#ifdef PPMD8_FREEZE_SUPPORT
, PPMD8_RESTORE_METHOD_FREEZE
#endif
, PPMD8_RESTORE_METHOD_UNSUPPPORTED
};
typedef struct
{
CPpmd8_Context *MinContext, *MaxContext;
CPpmd_State *FoundState;
unsigned OrderFall, InitEsc, PrevSuccess, MaxOrder, RestoreMethod;
Int32 RunLength, InitRL; /* must be 32-bit at least */
UInt32 Size;
UInt32 GlueCount;
UInt32 AlignOffset;
Byte *Base, *LoUnit, *HiUnit, *Text, *UnitsStart;
UInt32 Range;
UInt32 Code;
UInt32 Low;
union
{
IByteIn *In;
IByteOut *Out;
} Stream;
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
Byte Units2Indx[128];
CPpmd_Void_Ref FreeList[PPMD_NUM_INDEXES];
UInt32 Stamps[PPMD_NUM_INDEXES];
Byte NS2BSIndx[256], NS2Indx[260];
Byte ExpEscape[16];
CPpmd_See DummySee, See[24][32];
UInt16 BinSumm[25][64];
} CPpmd8;
void Ppmd8_Construct(CPpmd8 *p);
BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc);
void Ppmd8_Free(CPpmd8 *p, ISzAllocPtr alloc);
void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod);
#define Ppmd8_WasAllocated(p) ((p)->Base != NULL)
/* ---------- Internal Functions ---------- */
#define Ppmd8_GetPtr(p, ptr) Ppmd_GetPtr(p, ptr)
#define Ppmd8_GetContext(p, ptr) Ppmd_GetPtr_Type(p, ptr, CPpmd8_Context)
#define Ppmd8_GetStats(p, ctx) Ppmd_GetPtr_Type(p, (ctx)->Union4.Stats, CPpmd_State)
void Ppmd8_Update1(CPpmd8 *p);
void Ppmd8_Update1_0(CPpmd8 *p);
void Ppmd8_Update2(CPpmd8 *p);
#define Ppmd8_GetBinSumm(p) \
&p->BinSumm[p->NS2Indx[(size_t)Ppmd8Context_OneState(p->MinContext)->Freq - 1]] \
[ p->PrevSuccess + ((p->RunLength >> 26) & 0x20) \
+ p->NS2BSIndx[Ppmd8_GetContext(p, p->MinContext->Suffix)->NumStats] + \
+ p->MinContext->Flags ]
CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked, UInt32 *scale);
/* 20.01: the original PPMdI encoder and decoder probably could work incorrectly in some rare cases,
where the original PPMdI code can give "Divide by Zero" operation.
We use the following fix to allow correct working of encoder and decoder in any cases.
We correct (Escape_Freq) and (_sum_), if (_sum_) is larger than p->Range) */
#define PPMD8_CORRECT_SUM_RANGE(p, _sum_) if (_sum_ > p->Range /* /1 */) _sum_ = p->Range;
/* ---------- Decode ---------- */
#define PPMD8_SYM_END (-1)
#define PPMD8_SYM_ERROR (-2)
/*
You must set (CPpmd8::Stream.In) before Ppmd8_RangeDec_Init()
Ppmd8_DecodeSymbol()
out:
>= 0 : decoded byte
-1 : PPMD8_SYM_END : End of payload marker
-2 : PPMD8_SYM_ERROR : Data error
*/
BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p);
#define Ppmd8_RangeDec_IsFinishedOK(p) ((p)->Code == 0)
int Ppmd8_DecodeSymbol(CPpmd8 *p);
/* ---------- Encode ---------- */
#define Ppmd8_Init_RangeEnc(p) { (p)->Low = 0; (p)->Range = 0xFFFFFFFF; }
void Ppmd8_Flush_RangeEnc(CPpmd8 *p);
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol);
EXTERN_C_END
#endif

279
C/Ppmd8Dec.c Normal file
View file

@ -0,0 +1,279 @@
/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd8.h"
#define kTop (1 << 24)
#define kBot (1 << 15)
#define READ_BYTE(p) IByteIn_Read((p)->Stream.In)
BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p)
{
unsigned i;
p->Code = 0;
p->Range = 0xFFFFFFFF;
p->Low = 0;
for (i = 0; i < 4; i++)
p->Code = (p->Code << 8) | READ_BYTE(p);
return (p->Code < 0xFFFFFFFF);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) { \
p->Code = (p->Code << 8) | READ_BYTE(p); \
p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
#define R p
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
{
start *= R->Range;
R->Low += start;
R->Code -= start;
R->Range *= size;
RC_NORM_LOCAL(R)
}
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
typedef CPpmd8_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd8_UpdateModel(CPpmd8 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
int Ppmd8_DecodeSymbol(CPpmd8 *p)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
unsigned i;
UInt32 count, hiCnt;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
PPMD8_CORRECT_SUM_RANGE(p, summFreq)
count = RC_GetThreshold(summFreq);
hiCnt = count;
if ((Int32)(count -= s->Freq) < 0)
{
Byte sym;
RC_DecodeFinal(0, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd8_Update1_0(p);
return sym;
}
p->PrevSuccess = 0;
i = p->MinContext->NumStats;
do
{
if ((Int32)(count -= (++s)->Freq) < 0)
{
Byte sym;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
p->FoundState = s;
sym = s->Symbol;
Ppmd8_Update1(p);
return sym;
}
}
while (--i);
if (hiCnt >= summFreq)
return PPMD8_SYM_ERROR;
hiCnt -= count;
RC_Decode(hiCnt, summFreq - hiCnt);
PPMD_SetAllBitsIn256Bytes(charMask);
// i = p->MinContext->NumStats - 1;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt16 *prob = Ppmd8_GetBinSumm(p);
UInt32 pr = *prob;
UInt32 size0 = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (R->Code < size0)
{
Byte sym;
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeDec_DecodeBit0(size0);
R->Range = size0;
RC_NORM(R)
// sym = (p->FoundState = Ppmd8Context_OneState(p->MinContext))->Symbol;
// Ppmd8_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
sym = s->Symbol;
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 196));
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
p->MaxContext = p->MinContext = c;
else
Ppmd8_UpdateModel(p);
}
return sym;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeDec_DecodeBit1(rc2, size0);
R->Low += size0;
R->Code -= size0;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_State *s, *s2;
UInt32 freqSum, count, hiCnt;
UInt32 freqSum2;
CPpmd_See *see;
CPpmd8_Context *mc;
unsigned numMasked;
RC_NORM_REMOTE(R)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return PPMD8_SYM_END;
mc = Ppmd8_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
s = Ppmd8_GetStats(p, mc);
{
unsigned num = (unsigned)mc->NumStats + 1;
unsigned num2 = num / 2;
num &= 1;
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
s += num;
p->MinContext = mc;
do
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
}
while (--num2);
}
see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum);
freqSum += hiCnt;
freqSum2 = freqSum;
PPMD8_CORRECT_SUM_RANGE(R, freqSum2);
count = RC_GetThreshold(freqSum2);
if (count < hiCnt)
{
Byte sym;
// Ppmd_See_Update(see); // new (see->Summ) value can overflow over 16-bits in some rare cases
s = Ppmd8_GetStats(p, p->MinContext);
hiCnt = count;
{
for (;;)
{
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
}
}
s--;
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
// new (see->Summ) value can overflow over 16-bits in some rare cases
Ppmd_See_Update(see);
p->FoundState = s;
sym = s->Symbol;
Ppmd8_Update2(p);
return sym;
}
if (count >= freqSum2)
return PPMD8_SYM_ERROR;
RC_Decode(hiCnt, freqSum2 - hiCnt);
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
// new (see->Summ) value can overflow over 16-bits in some rare cases
see->Summ = (UInt16)(see->Summ + freqSum);
s = Ppmd8_GetStats(p, p->MinContext);
s2 = s + p->MinContext->NumStats + 1;
do
{
MASK(s->Symbol) = 0;
s++;
}
while (s != s2);
}
}

314
C/Ppmd8Enc.c Normal file
View file

@ -0,0 +1,314 @@
/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder
2021-04-13 : Igor Pavlov : Public domain
This code is based on:
PPMd var.I (2002): Dmitry Shkarin : Public domain
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
#include "Precomp.h"
#include "Ppmd8.h"
#define kTop (1 << 24)
#define kBot (1 << 15)
#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24))
void Ppmd8_Flush_RangeEnc(CPpmd8 *p)
{
unsigned i;
for (i = 0; i < 4; i++, p->Low <<= 8 )
WRITE_BYTE(p);
}
#define RC_NORM(p) \
while ((p->Low ^ (p->Low + p->Range)) < kTop \
|| (p->Range < kBot && ((p->Range = (0 - p->Low) & (kBot - 1)), 1))) \
{ WRITE_BYTE(p); p->Range <<= 8; p->Low <<= 8; }
// we must use only one type of Normalization from two: LOCAL or REMOTE
#define RC_NORM_LOCAL(p) // RC_NORM(p)
#define RC_NORM_REMOTE(p) RC_NORM(p)
// #define RC_PRE(total) p->Range /= total;
// #define RC_PRE(total)
#define R p
MY_FORCE_INLINE
// MY_NO_INLINE
static void RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
{
R->Low += start * (R->Range /= total);
R->Range *= size;
RC_NORM_LOCAL(R);
}
#define RC_Encode(start, size, total) RangeEnc_Encode(p, start, size, total);
#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total); RC_NORM_REMOTE(p);
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
typedef CPpmd8_Context * CTX_PTR;
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
void Ppmd8_UpdateModel(CPpmd8 *p);
#define MASK(sym) ((unsigned char *)charMask)[sym]
// MY_FORCE_INLINE
// static
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
{
size_t charMask[256 / sizeof(size_t)];
if (p->MinContext->NumStats != 0)
{
CPpmd_State *s = Ppmd8_GetStats(p, p->MinContext);
UInt32 sum;
unsigned i;
UInt32 summFreq = p->MinContext->Union2.SummFreq;
PPMD8_CORRECT_SUM_RANGE(p, summFreq)
// RC_PRE(summFreq);
if (s->Symbol == symbol)
{
RC_EncodeFinal(0, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1_0(p);
return;
}
p->PrevSuccess = 0;
sum = s->Freq;
i = p->MinContext->NumStats;
do
{
if ((++s)->Symbol == symbol)
{
RC_EncodeFinal(sum, s->Freq, summFreq);
p->FoundState = s;
Ppmd8_Update1(p);
return;
}
sum += s->Freq;
}
while (--i);
RC_Encode(sum, summFreq - sum, summFreq);
PPMD_SetAllBitsIn256Bytes(charMask);
// MASK(s->Symbol) = 0;
// i = p->MinContext->NumStats;
// do { MASK((--s)->Symbol) = 0; } while (--i);
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
else
{
UInt16 *prob = Ppmd8_GetBinSumm(p);
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
UInt32 pr = *prob;
UInt32 bound = (R->Range >> 14) * pr;
pr = PPMD_UPDATE_PROB_1(pr);
if (s->Symbol == symbol)
{
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
// RangeEnc_EncodeBit_0(p, bound);
R->Range = bound;
RC_NORM(R);
// p->FoundState = s;
// Ppmd8_UpdateBin(p);
{
unsigned freq = s->Freq;
CTX_PTR c = CTX(SUCCESSOR(s));
p->FoundState = s;
p->PrevSuccess = 1;
p->RunLength++;
s->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
// NextContext(p);
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
p->MaxContext = p->MinContext = c;
else
Ppmd8_UpdateModel(p);
}
return;
}
*prob = (UInt16)pr;
p->InitEsc = p->ExpEscape[pr >> 10];
// RangeEnc_EncodeBit_1(p, bound);
R->Low += bound;
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound;
RC_NORM_LOCAL(R)
PPMD_SetAllBitsIn256Bytes(charMask);
MASK(s->Symbol) = 0;
p->PrevSuccess = 0;
}
for (;;)
{
CPpmd_See *see;
CPpmd_State *s;
UInt32 sum, escFreq;
CPpmd8_Context *mc;
unsigned i, numMasked;
RC_NORM_REMOTE(p)
mc = p->MinContext;
numMasked = mc->NumStats;
do
{
p->OrderFall++;
if (!mc->Suffix)
return; /* EndMarker (symbol = -1) */
mc = Ppmd8_GetContext(p, mc->Suffix);
}
while (mc->NumStats == numMasked);
p->MinContext = mc;
see = Ppmd8_MakeEscFreq(p, numMasked, &escFreq);
s = Ppmd8_GetStats(p, p->MinContext);
sum = 0;
i = (unsigned)p->MinContext->NumStats + 1;
do
{
unsigned cur = s->Symbol;
if ((int)cur == symbol)
{
UInt32 low = sum;
UInt32 freq = s->Freq;
unsigned num2;
Ppmd_See_Update(see);
p->FoundState = s;
sum += escFreq;
num2 = i / 2;
i &= 1;
sum += freq & (0 - (UInt32)i);
if (num2 != 0)
{
s += i;
for (;;)
{
unsigned sym0 = s[0].Symbol;
unsigned sym1 = s[1].Symbol;
s += 2;
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
if (--num2 == 0)
break;
}
}
PPMD8_CORRECT_SUM_RANGE(p, sum);
RC_EncodeFinal(low, freq, sum);
Ppmd8_Update2(p);
return;
}
sum += (s->Freq & (unsigned)(MASK(cur)));
s++;
}
while (--i);
{
UInt32 total = sum + escFreq;
see->Summ = (UInt16)(see->Summ + total);
PPMD8_CORRECT_SUM_RANGE(p, total);
RC_Encode(sum, total - sum, total);
}
{
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
s--;
MASK(s->Symbol) = 0;
do
{
unsigned sym0 = s2[0].Symbol;
unsigned sym1 = s2[1].Symbol;
s2 += 2;
MASK(sym0) = 0;
MASK(sym1) = 0;
}
while (s2 < s);
}
}
}

10
C/Precomp.h Normal file
View file

@ -0,0 +1,10 @@
/* Precomp.h -- StdAfx
2013-11-12 : Igor Pavlov : Public domain */
#ifndef __7Z_PRECOMP_H
#define __7Z_PRECOMP_H
#include "Compiler.h"
/* #include "7zTypes.h" */
#endif

30
C/RotateDefs.h Normal file
View file

@ -0,0 +1,30 @@
/* RotateDefs.h -- Rotate functions
2015-03-25 : Igor Pavlov : Public domain */
#ifndef __ROTATE_DEFS_H
#define __ROTATE_DEFS_H
#ifdef _MSC_VER
#include <stdlib.h>
/* don't use _rotl with MINGW. It can insert slow call to function. */
/* #if (_MSC_VER >= 1200) */
#pragma intrinsic(_rotl)
#pragma intrinsic(_rotr)
/* #endif */
#define rotlFixed(x, n) _rotl((x), (n))
#define rotrFixed(x, n) _rotr((x), (n))
#else
/* new compilers can translate these macros to fast commands. */
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#endif
#endif

473
C/Sha1.c Normal file
View file

@ -0,0 +1,473 @@
/* Sha1.c -- SHA-1 Hash
2021-07-13 : Igor Pavlov : Public domain
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
#include "Precomp.h"
#include <string.h>
#include "CpuArch.h"
#include "RotateDefs.h"
#include "Sha1.h"
#if defined(_MSC_VER) && (_MSC_VER < 1900)
// #define USE_MY_MM
#endif
#ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER
#if _MSC_VER >= 1200
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#endif
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef _SHA_SUPPORTED
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks
#else
#define UPDATE_BLOCKS(p) Sha1_UpdateBlocks
#endif
BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
{
SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks;
#ifdef _SHA_SUPPORTED
if (algo != SHA1_ALGO_SW)
{
if (algo == SHA1_ALGO_DEFAULT)
func = g_FUNC_UPDATE_BLOCKS;
else
{
if (algo != SHA1_ALGO_HW)
return False;
func = g_FUNC_UPDATE_BLOCKS_HW;
if (!func)
return False;
}
}
#else
if (algo > 1)
return False;
#endif
p->func_UpdateBlocks = func;
return True;
}
/* define it for speed optimization */
// #define _SHA1_UNROLL
// allowed unroll steps: (1, 2, 4, 5, 20)
#ifdef _SHA1_UNROLL
#define STEP_PRE 20
#define STEP_MAIN 20
#else
#define _SHA1_BIG_W
#define STEP_PRE 5
#define STEP_MAIN 5
#endif
#ifdef _SHA1_BIG_W
#define kNumW 80
#define w(i) W[i]
#else
#define kNumW 16
#define w(i) W[(i)&15]
#endif
#define w0(i) (W[i] = GetBe32(data + (size_t)(i) * 4))
#define w1(i) (w(i) = rotlFixed(w((size_t)(i)-3) ^ w((size_t)(i)-8) ^ w((size_t)(i)-14) ^ w((size_t)(i)-16), 1))
#define f0(x,y,z) ( 0x5a827999 + (z^(x&(y^z))) )
#define f1(x,y,z) ( 0x6ed9eba1 + (x^y^z) )
#define f2(x,y,z) ( 0x8f1bbcdc + ((x&y)|(z&(x|y))) )
#define f3(x,y,z) ( 0xca62c1d6 + (x^y^z) )
/*
#define T1(fx, ww) \
tmp = e + fx(b,c,d) + ww + rotlFixed(a, 5); \
e = d; \
d = c; \
c = rotlFixed(b, 30); \
b = a; \
a = tmp; \
*/
#define T5(a,b,c,d,e, fx, ww) \
e += fx(b,c,d) + ww + rotlFixed(a, 5); \
b = rotlFixed(b, 30); \
/*
#define R1(i, fx, wx) \
T1 ( fx, wx(i)); \
#define R2(i, fx, wx) \
R1 ( (i) , fx, wx); \
R1 ( (i) + 1, fx, wx); \
#define R4(i, fx, wx) \
R2 ( (i) , fx, wx); \
R2 ( (i) + 2, fx, wx); \
*/
#define M5(i, fx, wx0, wx1) \
T5 ( a,b,c,d,e, fx, wx0((i) ) ); \
T5 ( e,a,b,c,d, fx, wx1((i)+1) ); \
T5 ( d,e,a,b,c, fx, wx1((i)+2) ); \
T5 ( c,d,e,a,b, fx, wx1((i)+3) ); \
T5 ( b,c,d,e,a, fx, wx1((i)+4) ); \
#define R5(i, fx, wx) \
M5 ( i, fx, wx, wx) \
#if STEP_PRE > 5
#define R20_START \
R5 ( 0, f0, w0); \
R5 ( 5, f0, w0); \
R5 ( 10, f0, w0); \
M5 ( 15, f0, w0, w1); \
#elif STEP_PRE == 5
#define R20_START \
{ size_t i; for (i = 0; i < 15; i += STEP_PRE) \
{ R5(i, f0, w0); } } \
M5 ( 15, f0, w0, w1); \
#else
#if STEP_PRE == 1
#define R_PRE R1
#elif STEP_PRE == 2
#define R_PRE R2
#elif STEP_PRE == 4
#define R_PRE R4
#endif
#define R20_START \
{ size_t i; for (i = 0; i < 16; i += STEP_PRE) \
{ R_PRE(i, f0, w0); } } \
R4 ( 16, f0, w1); \
#endif
#if STEP_MAIN > 5
#define R20(ii, fx) \
R5 ( (ii) , fx, w1); \
R5 ( (ii) + 5 , fx, w1); \
R5 ( (ii) + 10, fx, w1); \
R5 ( (ii) + 15, fx, w1); \
#else
#if STEP_MAIN == 1
#define R_MAIN R1
#elif STEP_MAIN == 2
#define R_MAIN R2
#elif STEP_MAIN == 4
#define R_MAIN R4
#elif STEP_MAIN == 5
#define R_MAIN R5
#endif
#define R20(ii, fx) \
{ size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
{ R_MAIN(i, fx, w1); } } \
#endif
void Sha1_InitState(CSha1 *p)
{
p->count = 0;
p->state[0] = 0x67452301;
p->state[1] = 0xEFCDAB89;
p->state[2] = 0x98BADCFE;
p->state[3] = 0x10325476;
p->state[4] = 0xC3D2E1F0;
}
void Sha1_Init(CSha1 *p)
{
p->func_UpdateBlocks =
#ifdef _SHA_SUPPORTED
g_FUNC_UPDATE_BLOCKS;
#else
NULL;
#endif
Sha1_InitState(p);
}
MY_NO_INLINE
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
{
UInt32 a, b, c, d, e;
UInt32 W[kNumW];
// if (numBlocks != 0x1264378347) return;
if (numBlocks == 0)
return;
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
do
{
#if STEP_PRE < 5 || STEP_MAIN < 5
UInt32 tmp;
#endif
R20_START
R20(20, f1);
R20(40, f2);
R20(60, f3);
a += state[0];
b += state[1];
c += state[2];
d += state[3];
e += state[4];
state[0] = a;
state[1] = b;
state[2] = c;
state[3] = d;
state[4] = e;
data += 64;
}
while (--numBlocks);
}
#define Sha1_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
{
if (size == 0)
return;
{
unsigned pos = (unsigned)p->count & 0x3F;
unsigned num;
p->count += size;
num = 64 - pos;
if (num > size)
{
memcpy(p->buffer + pos, data, size);
return;
}
if (pos != 0)
{
size -= num;
memcpy(p->buffer + pos, data, num);
data += num;
Sha1_UpdateBlock(p);
}
}
{
size_t numBlocks = size >> 6;
UPDATE_BLOCKS(p)(p->state, data, numBlocks);
size &= 0x3F;
if (size == 0)
return;
data += (numBlocks << 6);
memcpy(p->buffer, data, size);
}
}
void Sha1_Final(CSha1 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
p->buffer[pos++] = 0x80;
if (pos > (64 - 8))
{
while (pos != 64) { p->buffer[pos++] = 0; }
// memset(&p->buf.buffer[pos], 0, 64 - pos);
Sha1_UpdateBlock(p);
pos = 0;
}
/*
if (pos & 3)
{
p->buffer[pos] = 0;
p->buffer[pos + 1] = 0;
p->buffer[pos + 2] = 0;
pos += 3;
pos &= ~3;
}
{
for (; pos < 64 - 8; pos += 4)
*(UInt32 *)(&p->buffer[pos]) = 0;
}
*/
memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
UInt64 numBits = (p->count << 3);
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
Sha1_UpdateBlock(p);
SetBe32(digest, p->state[0]);
SetBe32(digest + 4, p->state[1]);
SetBe32(digest + 8, p->state[2]);
SetBe32(digest + 12, p->state[3]);
SetBe32(digest + 16, p->state[4]);
Sha1_InitState(p);
}
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
{
const UInt64 numBits = (p->count + size) << 3;
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32));
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits));
// SetBe32((UInt32 *)(block + size), 0x80000000);
SetUi32((UInt32 *)(void *)(block + size), 0x80);
size += 4;
while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
{
*((UInt32 *)(void *)(block + size)) = 0;
size += 4;
}
}
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
{
MY_ALIGN (16)
UInt32 st[SHA1_NUM_DIGEST_WORDS];
st[0] = p->state[0];
st[1] = p->state[1];
st[2] = p->state[2];
st[3] = p->state[3];
st[4] = p->state[4];
UPDATE_BLOCKS(p)(st, data, 1);
SetBe32(destDigest + 0 , st[0]);
SetBe32(destDigest + 1 * 4, st[1]);
SetBe32(destDigest + 2 * 4, st[2]);
SetBe32(destDigest + 3 * 4, st[3]);
SetBe32(destDigest + 4 * 4, st[4]);
}
void Sha1Prepare()
{
#ifdef _SHA_SUPPORTED
SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha1_UpdateBlocks;
f_hw = NULL;
#ifdef MY_CPU_X86_OR_AMD64
#ifndef USE_MY_MM
if (CPU_IsSupported_SHA()
&& CPU_IsSupported_SSSE3()
// && CPU_IsSupported_SSE41()
)
#endif
#else
if (CPU_IsSupported_SHA1())
#endif
{
// printf("\n========== HW SHA1 ======== \n");
#if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
/* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
It generated incorrect SHA-1 code.
21.03 : we test sha1-hardware code at runtime initialization */
#pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
Byte data[64];
unsigned i;
for (i = 0; i < sizeof(data); i += 2)
{
data[i ] = (Byte)(i);
data[i + 1] = (Byte)(i + 1);
}
Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
if ( state[0] != 0x9acd7297
|| state[1] != 0x4624d898
|| state[2] != 0x0bf079f0
|| state[3] != 0x031e61b3
|| state[4] != 0x8323fe20)
{
// printf("\n========== SHA-1 hardware version failure ======== \n");
}
else
#endif
{
f = f_hw = Sha1_UpdateBlocks_HW;
}
}
g_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
#endif
}

76
C/Sha1.h Normal file
View file

@ -0,0 +1,76 @@
/* Sha1.h -- SHA-1 Hash
2021-02-08 : Igor Pavlov : Public domain */
#ifndef __7Z_SHA1_H
#define __7Z_SHA1_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define SHA1_NUM_BLOCK_WORDS 16
#define SHA1_NUM_DIGEST_WORDS 5
#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4)
#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4)
typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
/*
if (the system supports different SHA1 code implementations)
{
(CSha1::func_UpdateBlocks) will be used
(CSha1::func_UpdateBlocks) can be set by
Sha1_Init() - to default (fastest)
Sha1_SetFunction() - to any algo
}
else
{
(CSha1::func_UpdateBlocks) is ignored.
}
*/
typedef struct
{
SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
UInt64 __pad_2[2];
UInt32 state[SHA1_NUM_DIGEST_WORDS];
UInt32 __pad_3[3];
Byte buffer[SHA1_BLOCK_SIZE];
} CSha1;
#define SHA1_ALGO_DEFAULT 0
#define SHA1_ALGO_SW 1
#define SHA1_ALGO_HW 2
/*
Sha1_SetFunction()
return:
0 - (algo) value is not supported, and func_UpdateBlocks was not changed
1 - func_UpdateBlocks was set according (algo) value.
*/
BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo);
void Sha1_InitState(CSha1 *p);
void Sha1_Init(CSha1 *p);
void Sha1_Update(CSha1 *p, const Byte *data, size_t size);
void Sha1_Final(CSha1 *p, Byte *digest);
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
// void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
/*
call Sha1Prepare() once at program start.
It prepares all supported implementations, and detects the fastest implementation.
*/
void Sha1Prepare(void);
EXTERN_C_END
#endif

373
C/Sha1Opt.c Normal file
View file

@ -0,0 +1,373 @@
/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
// #define USE_MY_MM
#endif
#endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#if defined(_MSC_VER)
// SSSE3: for clang-cl:
#include <tmmintrin.h>
#define __SHA__
#endif
#endif
#pragma clang diagnostic ignored "-Wvector-conversion"
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
// #pragma GCC target("sha,ssse3")
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN 1300
#else
#define USE_VER_MIN 1910
#endif
#if _MSC_VER >= USE_VER_MIN
#define USE_HW_SHA
#endif
#endif
// #endif // MY_CPU_X86_OR_AMD64
#ifdef USE_HW_SHA
// #pragma message("Sha1 HW")
// #include <wmmintrin.h>
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
#include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
// #include <intrin.h>
#endif
#ifdef USE_MY_MM
#include "My_mm.h"
#endif
#endif
/*
SHA1 uses:
SSE2:
_mm_loadu_si128
_mm_storeu_si128
_mm_set_epi32
_mm_add_epi32
_mm_shuffle_epi32 / pshufd
_mm_xor_si128
_mm_cvtsi128_si32
_mm_cvtsi32_si128
SSSE3:
_mm_shuffle_epi8 / pshufb
SHA:
_mm_sha1*
*/
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f);
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
SHUFFLE_EPI8(m, mask); \
#define SM1(m0, m1, m2, m3) \
SHA1_MSG1(m0, m1); \
#define SM2(m0, m1, m2, m3) \
XOR_SI128(m3, m1); \
SHA1_MSG2(m3, m2); \
#define SM3(m0, m1, m2, m3) \
XOR_SI128(m3, m1); \
SM1(m0, m1, m2, m3) \
SHA1_MSG2(m3, m2); \
#define NNN(m0, m1, m2, m3)
#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
e1 = abcd; \
SHA1_RND4(abcd, e0, (k) / 5); \
SHA1_NEXTE(e1, m1); \
OP(m0, m1, m2, m3); \
#define R16(k, mx, OP0, OP1, OP2, OP3) \
R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \
R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \
R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
#define PREPARE_STATE \
SHUFFLE_EPI32 (abcd, 0x1B); \
SHUFFLE_EPI32 (e0, 0x1B); \
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
__m128i abcd, e0;
if (numBlocks == 0)
return;
abcd = _mm_loadu_si128((const __m128i *) (const void *) &state[0]); // dbca
e0 = _mm_cvtsi32_si128((int)state[4]); // 000e
PREPARE_STATE
do
{
__m128i abcd_save, e2;
__m128i m0, m1, m2, m3;
__m128i e1;
abcd_save = abcd;
e2 = e0;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
ADD_EPI32(e0, m0);
R16 ( 0, m0, SM1, SM3, SM3, SM3 );
R16 ( 1, m0, SM3, SM3, SM3, SM3 );
R16 ( 2, m0, SM3, SM3, SM3, SM3 );
R16 ( 3, m0, SM3, SM3, SM3, SM3 );
R16 ( 4, e2, SM2, NNN, NNN, NNN );
ADD_EPI32(abcd, abcd_save);
data += 64;
}
while (--numBlocks);
PREPARE_STATE
_mm_storeu_si128((__m128i *) (void *) state, abcd);
*(state+4) = (UInt32)_mm_cvtsi128_si32(e0);
}
#endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
// #pragma message("=== Sha1 HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC
#ifdef MY_CPU_BE
#define MY_rev32_for_LE(x)
#else
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
#endif
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
#define LOAD_SHUFFLE(m, k) \
m = LOAD_128((data + (k) * 16)); \
MY_rev32_for_LE(m); \
#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
#define SU1(dest, src) dest = vsha1su1q_u32(dest, src);
#define C(e) abcd = vsha1cq_u32(abcd, e, t);
#define P(e) abcd = vsha1pq_u32(abcd, e, t);
#define M(e) abcd = vsha1mq_u32(abcd, e, t);
#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
#define T(m, c) t = vaddq_u32(m, c)
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 abcd;
v128 c0, c1, c2, c3;
uint32_t e0;
if (numBlocks == 0)
return;
c0 = vdupq_n_u32(0x5a827999);
c1 = vdupq_n_u32(0x6ed9eba1);
c2 = vdupq_n_u32(0x8f1bbcdc);
c3 = vdupq_n_u32(0xca62c1d6);
abcd = LOAD_128(&state[0]);
e0 = state[4];
do
{
v128 abcd_save;
v128 m0, m1, m2, m3;
v128 t;
uint32_t e0_save, e1;
abcd_save = abcd;
e0_save = e0;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
T(m0, c0); H(e1); C(e0);
T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1);
T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0);
T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1);
T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0);
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0);
T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1);
T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0);
T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1);
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
T(m1, c3); SU1(m3, m2); H(e0); P(e1);
T(m2, c3); H(e1); P(e0);
T(m3, c3); H(e0); P(e1);
abcd = vaddq_u32(abcd, abcd_save);
e0 += e0_save;
data += 64;
}
while (--numBlocks);
STORE_128(&state[0], abcd);
state[4] = e0;
}
#endif // USE_HW_SHA
#endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA
// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>
// #include "Sha1.h"
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
#pragma message("Sha1 HW-SW stub was used")
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
{
Sha1_UpdateBlocks(state, data, numBlocks);
/*
UNUSED_VAR(state);
UNUSED_VAR(data);
UNUSED_VAR(numBlocks);
exit(1);
return;
*/
}
#endif

486
C/Sha256.c Normal file
View file

@ -0,0 +1,486 @@
/* Sha256.c -- SHA-256 Hash
2021-04-01 : Igor Pavlov : Public domain
This code is based on public domain code from Wei Dai's Crypto++ library. */
#include "Precomp.h"
#include <string.h>
#include "CpuArch.h"
#include "RotateDefs.h"
#include "Sha256.h"
#if defined(_MSC_VER) && (_MSC_VER < 1900)
// #define USE_MY_MM
#endif
#ifdef MY_CPU_X86_OR_AMD64
#ifdef _MSC_VER
#if _MSC_VER >= 1200
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#elif defined(MY_CPU_ARM_OR_ARM64)
#ifdef _MSC_VER
#if _MSC_VER >= 1910
#define _SHA_SUPPORTED
#endif
#elif defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define _SHA_SUPPORTED
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define _SHA_SUPPORTED
#endif
#endif
#endif
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef _SHA_SUPPORTED
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks
#else
#define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
#endif
BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
{
SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
#ifdef _SHA_SUPPORTED
if (algo != SHA256_ALGO_SW)
{
if (algo == SHA256_ALGO_DEFAULT)
func = g_FUNC_UPDATE_BLOCKS;
else
{
if (algo != SHA256_ALGO_HW)
return False;
func = g_FUNC_UPDATE_BLOCKS_HW;
if (!func)
return False;
}
}
#else
if (algo > 1)
return False;
#endif
p->func_UpdateBlocks = func;
return True;
}
/* define it for speed optimization */
#ifdef _SFX
#define STEP_PRE 1
#define STEP_MAIN 1
#else
#define STEP_PRE 2
#define STEP_MAIN 4
// #define _SHA256_UNROLL
#endif
#if STEP_MAIN != 16
#define _SHA256_BIG_W
#endif
void Sha256_InitState(CSha256 *p)
{
p->count = 0;
p->state[0] = 0x6a09e667;
p->state[1] = 0xbb67ae85;
p->state[2] = 0x3c6ef372;
p->state[3] = 0xa54ff53a;
p->state[4] = 0x510e527f;
p->state[5] = 0x9b05688c;
p->state[6] = 0x1f83d9ab;
p->state[7] = 0x5be0cd19;
}
void Sha256_Init(CSha256 *p)
{
p->func_UpdateBlocks =
#ifdef _SHA_SUPPORTED
g_FUNC_UPDATE_BLOCKS;
#else
NULL;
#endif
Sha256_InitState(p);
}
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
#define Ch(x,y,z) (z^(x&(y^z)))
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe32(data + ((size_t)(j) + i) * 4))
#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
#ifdef _SHA256_BIG_W
// we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
#define w(j, i) W[(size_t)(j) + i]
#define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
#else
#if STEP_MAIN == 16
#define w(j, i) W[(i) & 15]
#else
#define w(j, i) W[((size_t)(j) + (i)) & 15]
#endif
#define blk2(j, i) (w(j, i) += blk2_main(j, i))
#endif
#define W_MAIN(i) blk2(j, i)
#define T1(wx, i) \
tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
h = g; \
g = f; \
f = e; \
e = d + tmp; \
tmp += S0(a) + Maj(a, b, c); \
d = c; \
c = b; \
b = a; \
a = tmp; \
#define R1_PRE(i) T1( W_PRE, i)
#define R1_MAIN(i) T1( W_MAIN, i)
#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
#define R2_MAIN(i) \
R1_MAIN(i) \
R1_MAIN(i + 1) \
#endif
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
#define T4( a,b,c,d,e,f,g,h, wx, i) \
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
tmp = h; \
h += d; \
d = tmp + S0(a) + Maj(a, b, c); \
#define R4( wx, i) \
T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
#define R4_PRE(i) R4( W_PRE, i)
#define R4_MAIN(i) R4( W_MAIN, i)
#define T8( a,b,c,d,e,f,g,h, wx, i) \
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
d += h; \
h += S0(a) + Maj(a, b, c); \
#define R8( wx, i) \
T8 ( a,b,c,d,e,f,g,h, wx, i ); \
T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
#define R8_PRE(i) R8( W_PRE, i)
#define R8_MAIN(i) R8( W_MAIN, i)
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
// static
extern MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
#define K SHA256_K_ARRAY
MY_NO_INLINE
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
{
UInt32 W
#ifdef _SHA256_BIG_W
[64];
#else
[16];
#endif
unsigned j;
UInt32 a,b,c,d,e,f,g,h;
#if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
UInt32 tmp;
#endif
a = state[0];
b = state[1];
c = state[2];
d = state[3];
e = state[4];
f = state[5];
g = state[6];
h = state[7];
while (numBlocks)
{
for (j = 0; j < 16; j += STEP_PRE)
{
#if STEP_PRE > 4
#if STEP_PRE < 8
R4_PRE(0);
#else
R8_PRE(0);
#if STEP_PRE == 16
R8_PRE(8);
#endif
#endif
#else
R1_PRE(0);
#if STEP_PRE >= 2
R1_PRE(1);
#if STEP_PRE >= 4
R1_PRE(2);
R1_PRE(3);
#endif
#endif
#endif
}
for (j = 16; j < 64; j += STEP_MAIN)
{
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
#if STEP_MAIN < 8
R4_MAIN(0);
#else
R8_MAIN(0);
#if STEP_MAIN == 16
R8_MAIN(8);
#endif
#endif
#else
R1_MAIN(0);
#if STEP_MAIN >= 2
R1_MAIN(1);
#if STEP_MAIN >= 4
R2_MAIN(2);
#if STEP_MAIN >= 8
R2_MAIN(4);
R2_MAIN(6);
#if STEP_MAIN >= 16
R2_MAIN(8);
R2_MAIN(10);
R2_MAIN(12);
R2_MAIN(14);
#endif
#endif
#endif
#endif
#endif
}
a += state[0]; state[0] = a;
b += state[1]; state[1] = b;
c += state[2]; state[2] = c;
d += state[3]; state[3] = d;
e += state[4]; state[4] = e;
f += state[5]; state[5] = f;
g += state[6]; state[6] = g;
h += state[7]; state[7] = h;
data += 64;
numBlocks--;
}
/* Wipe variables */
/* memset(W, 0, sizeof(W)); */
}
#undef S0
#undef S1
#undef s0
#undef s1
#undef K
#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
{
if (size == 0)
return;
{
unsigned pos = (unsigned)p->count & 0x3F;
unsigned num;
p->count += size;
num = 64 - pos;
if (num > size)
{
memcpy(p->buffer + pos, data, size);
return;
}
if (pos != 0)
{
size -= num;
memcpy(p->buffer + pos, data, num);
data += num;
Sha256_UpdateBlock(p);
}
}
{
size_t numBlocks = size >> 6;
UPDATE_BLOCKS(p)(p->state, data, numBlocks);
size &= 0x3F;
if (size == 0)
return;
data += (numBlocks << 6);
memcpy(p->buffer, data, size);
}
}
void Sha256_Final(CSha256 *p, Byte *digest)
{
unsigned pos = (unsigned)p->count & 0x3F;
unsigned i;
p->buffer[pos++] = 0x80;
if (pos > (64 - 8))
{
while (pos != 64) { p->buffer[pos++] = 0; }
// memset(&p->buf.buffer[pos], 0, 64 - pos);
Sha256_UpdateBlock(p);
pos = 0;
}
/*
if (pos & 3)
{
p->buffer[pos] = 0;
p->buffer[pos + 1] = 0;
p->buffer[pos + 2] = 0;
pos += 3;
pos &= ~3;
}
{
for (; pos < 64 - 8; pos += 4)
*(UInt32 *)(&p->buffer[pos]) = 0;
}
*/
memset(&p->buffer[pos], 0, (64 - 8) - pos);
{
UInt64 numBits = (p->count << 3);
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
}
Sha256_UpdateBlock(p);
for (i = 0; i < 8; i += 2)
{
UInt32 v0 = p->state[i];
UInt32 v1 = p->state[(size_t)i + 1];
SetBe32(digest , v0);
SetBe32(digest + 4, v1);
digest += 8;
}
Sha256_InitState(p);
}
void Sha256Prepare()
{
#ifdef _SHA_SUPPORTED
SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha256_UpdateBlocks;
f_hw = NULL;
#ifdef MY_CPU_X86_OR_AMD64
#ifndef USE_MY_MM
if (CPU_IsSupported_SHA()
&& CPU_IsSupported_SSSE3()
// && CPU_IsSupported_SSE41()
)
#endif
#else
if (CPU_IsSupported_SHA2())
#endif
{
// printf("\n========== HW SHA256 ======== \n");
f = f_hw = Sha256_UpdateBlocks_HW;
}
g_FUNC_UPDATE_BLOCKS = f;
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
#endif
}

76
C/Sha256.h Normal file
View file

@ -0,0 +1,76 @@
/* Sha256.h -- SHA-256 Hash
2021-01-01 : Igor Pavlov : Public domain */
#ifndef __7Z_SHA256_H
#define __7Z_SHA256_H
#include "7zTypes.h"
EXTERN_C_BEGIN
#define SHA256_NUM_BLOCK_WORDS 16
#define SHA256_NUM_DIGEST_WORDS 8
#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)
#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)
typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
/*
if (the system supports different SHA256 code implementations)
{
(CSha256::func_UpdateBlocks) will be used
(CSha256::func_UpdateBlocks) can be set by
Sha256_Init() - to default (fastest)
Sha256_SetFunction() - to any algo
}
else
{
(CSha256::func_UpdateBlocks) is ignored.
}
*/
typedef struct
{
SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
UInt64 count;
UInt64 __pad_2[2];
UInt32 state[SHA256_NUM_DIGEST_WORDS];
Byte buffer[SHA256_BLOCK_SIZE];
} CSha256;
#define SHA256_ALGO_DEFAULT 0
#define SHA256_ALGO_SW 1
#define SHA256_ALGO_HW 2
/*
Sha256_SetFunction()
return:
0 - (algo) value is not supported, and func_UpdateBlocks was not changed
1 - func_UpdateBlocks was set according (algo) value.
*/
BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo);
void Sha256_InitState(CSha256 *p);
void Sha256_Init(CSha256 *p);
void Sha256_Update(CSha256 *p, const Byte *data, size_t size);
void Sha256_Final(CSha256 *p, Byte *digest);
// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
/*
call Sha256Prepare() once at program start.
It prepares all supported implementations, and detects the fastest implementation.
*/
void Sha256Prepare(void);
EXTERN_C_END
#endif

373
C/Sha256Opt.c Normal file
View file

@ -0,0 +1,373 @@
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
2021-04-01 : Igor Pavlov : Public domain */
#include "Precomp.h"
#if defined(_MSC_VER)
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
// #define USE_MY_MM
#endif
#endif
#include "CpuArch.h"
#ifdef MY_CPU_X86_OR_AMD64
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
#if defined(_MSC_VER)
// SSSE3: for clang-cl:
#include <tmmintrin.h>
#define __SHA__
#endif
#endif
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 8) // fix that check
#define USE_HW_SHA
#ifndef __SHA__
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
// #pragma GCC target("sha,ssse3")
#endif
#endif
#elif defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1800) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#ifdef USE_MY_MM
#define USE_VER_MIN 1300
#else
#define USE_VER_MIN 1910
#endif
#if _MSC_VER >= USE_VER_MIN
#define USE_HW_SHA
#endif
#endif
// #endif // MY_CPU_X86_OR_AMD64
#ifdef USE_HW_SHA
// #pragma message("Sha256 HW")
// #include <wmmintrin.h>
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
#include <immintrin.h>
#else
#include <emmintrin.h>
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
// #include <intrin.h>
#endif
#ifdef USE_MY_MM
#include "My_mm.h"
#endif
#endif
/*
SHA256 uses:
SSE2:
_mm_loadu_si128
_mm_storeu_si128
_mm_set_epi32
_mm_add_epi32
_mm_shuffle_epi32 / pshufd
SSSE3:
_mm_shuffle_epi8 / pshufb
_mm_alignr_epi8
SHA:
_mm_sha256*
*/
// K array must be aligned for 16-bytes at least.
// The compiler can look align attribute and selects
// movdqu - for code without align attribute
// movdqa - for code with align attribute
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
#define K SHA256_K_ARRAY
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
#define LOAD_SHUFFLE(m, k) \
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
m = _mm_shuffle_epi8(m, mask); \
#define SM1(g0, g1, g2, g3) \
SHA256_MSG1(g3, g0); \
#define SM2(g0, g1, g2, g3) \
tmp = _mm_alignr_epi8(g1, g0, 4); \
ADD_EPI32(g2, tmp); \
SHA25G_MSG2(g2, g1); \
// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
#define NNN(g0, g1, g2, g3)
#define RND2(t0, t1) \
t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
#define RND2_0(m, k) \
msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
RND2(state0, state1); \
msg = _mm_shuffle_epi32(msg, 0x0E); \
#define RND2_1 \
RND2(state1, state0); \
// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
RND2_0(g0, k); \
OP0(g0, g1, g2, g3); \
RND2_1; \
OP1(g0, g1, g2, g3); \
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
#define PREPARE_STATE \
tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
state0 = _mm_shuffle_epi32(state1, 0x1B); /* efgh */ \
state1 = state0; \
state0 = _mm_unpacklo_epi64(state0, tmp); /* cdgh */ \
state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
__m128i tmp;
__m128i state0, state1;
if (numBlocks == 0)
return;
state0 = _mm_loadu_si128((const __m128i *) (const void *) &state[0]);
state1 = _mm_loadu_si128((const __m128i *) (const void *) &state[4]);
PREPARE_STATE
do
{
__m128i state0_save, state1_save;
__m128i m0, m1, m2, m3;
__m128i msg;
// #define msg tmp
state0_save = state0;
state1_save = state1;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
ADD_EPI32(state0, state0_save);
ADD_EPI32(state1, state1_save);
data += 64;
}
while (--numBlocks);
PREPARE_STATE
_mm_storeu_si128((__m128i *) (void *) &state[0], state0);
_mm_storeu_si128((__m128i *) (void *) &state[4], state1);
}
#endif // USE_HW_SHA
#elif defined(MY_CPU_ARM_OR_ARM64)
#if defined(__clang__)
#if (__clang_major__ >= 8) // fix that check
#define USE_HW_SHA
#endif
#elif defined(__GNUC__)
#if (__GNUC__ >= 6) // fix that check
#define USE_HW_SHA
#endif
#elif defined(_MSC_VER)
#if _MSC_VER >= 1910
#define USE_HW_SHA
#endif
#endif
#ifdef USE_HW_SHA
// #pragma message("=== Sha256 HW === ")
#if defined(__clang__) || defined(__GNUC__)
#ifdef MY_CPU_ARM64
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
#else
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
#endif
#else
// _MSC_VER
// for arm32
#define _ARM_USE_NEW_NEON_INTRINSICS
#endif
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
#include <arm64_neon.h>
#else
#include <arm_neon.h>
#endif
typedef uint32x4_t v128;
// typedef __n128 v128; // MSVC
#ifdef MY_CPU_BE
#define MY_rev32_for_LE(x)
#else
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
#endif
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
#define LOAD_SHUFFLE(m, k) \
m = LOAD_128((data + (k) * 16)); \
MY_rev32_for_LE(m); \
// K array must be aligned for 16-bytes at least.
extern
MY_ALIGN(64)
const UInt32 SHA256_K_ARRAY[64];
#define K SHA256_K_ARRAY
#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src);
#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0)
#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1)
#define NNN(g0, g1, g2, g3)
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
tmp = state0; \
state0 = vsha256hq_u32( state0, state1, msg ); \
state1 = vsha256h2q_u32( state1, tmp, msg ); \
OP0(g0, g1, g2, g3); \
OP1(g0, g1, g2, g3); \
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
#ifdef ATTRIB_SHA
ATTRIB_SHA
#endif
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
v128 state0, state1;
if (numBlocks == 0)
return;
state0 = LOAD_128(&state[0]);
state1 = LOAD_128(&state[4]);
do
{
v128 state0_save, state1_save;
v128 m0, m1, m2, m3;
v128 msg, tmp;
state0_save = state0;
state1_save = state1;
LOAD_SHUFFLE (m0, 0)
LOAD_SHUFFLE (m1, 1)
LOAD_SHUFFLE (m2, 2)
LOAD_SHUFFLE (m3, 3)
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
state0 = vaddq_u32(state0, state0_save);
state1 = vaddq_u32(state1, state1_save);
data += 64;
}
while (--numBlocks);
STORE_128(&state[0], state0);
STORE_128(&state[4], state1);
}
#endif // USE_HW_SHA
#endif // MY_CPU_ARM_OR_ARM64
#ifndef USE_HW_SHA
// #error Stop_Compiling_UNSUPPORTED_SHA
// #include <stdlib.h>
// #include "Sha256.h"
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
#pragma message("Sha256 HW-SW stub was used")
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
{
Sha256_UpdateBlocks(state, data, numBlocks);
/*
UNUSED_VAR(state);
UNUSED_VAR(data);
UNUSED_VAR(numBlocks);
exit(1);
return;
*/
}
#endif

141
C/Sort.c Normal file
View file

@ -0,0 +1,141 @@
/* Sort.c -- Sort functions
2014-04-05 : Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Sort.h"
#define HeapSortDown(p, k, size, temp) \
{ for (;;) { \
size_t s = (k << 1); \
if (s > size) break; \
if (s < size && p[s + 1] > p[s]) s++; \
if (temp >= p[s]) break; \
p[k] = p[s]; k = s; \
} p[k] = temp; }
void HeapSort(UInt32 *p, size_t size)
{
if (size <= 1)
return;
p--;
{
size_t i = size / 2;
do
{
UInt32 temp = p[i];
size_t k = i;
HeapSortDown(p, k, size, temp)
}
while (--i != 0);
}
/*
do
{
size_t k = 1;
UInt32 temp = p[size];
p[size--] = p[1];
HeapSortDown(p, k, size, temp)
}
while (size > 1);
*/
while (size > 3)
{
UInt32 temp = p[size];
size_t k = (p[3] > p[2]) ? 3 : 2;
p[size--] = p[1];
p[1] = p[k];
HeapSortDown(p, k, size, temp)
}
{
UInt32 temp = p[size];
p[size] = p[1];
if (size > 2 && p[2] < temp)
{
p[1] = p[2];
p[2] = temp;
}
else
p[1] = temp;
}
}
void HeapSort64(UInt64 *p, size_t size)
{
if (size <= 1)
return;
p--;
{
size_t i = size / 2;
do
{
UInt64 temp = p[i];
size_t k = i;
HeapSortDown(p, k, size, temp)
}
while (--i != 0);
}
/*
do
{
size_t k = 1;
UInt64 temp = p[size];
p[size--] = p[1];
HeapSortDown(p, k, size, temp)
}
while (size > 1);
*/
while (size > 3)
{
UInt64 temp = p[size];
size_t k = (p[3] > p[2]) ? 3 : 2;
p[size--] = p[1];
p[1] = p[k];
HeapSortDown(p, k, size, temp)
}
{
UInt64 temp = p[size];
p[size] = p[1];
if (size > 2 && p[2] < temp)
{
p[1] = p[2];
p[2] = temp;
}
else
p[1] = temp;
}
}
/*
#define HeapSortRefDown(p, vals, n, size, temp) \
{ size_t k = n; UInt32 val = vals[temp]; for (;;) { \
size_t s = (k << 1); \
if (s > size) break; \
if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
if (val >= vals[p[s]]) break; \
p[k] = p[s]; k = s; \
} p[k] = temp; }
void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
{
if (size <= 1)
return;
p--;
{
size_t i = size / 2;
do
{
UInt32 temp = p[i];
HeapSortRefDown(p, vals, i, size, temp);
}
while (--i != 0);
}
do
{
UInt32 temp = p[size];
p[size--] = p[1];
HeapSortRefDown(p, vals, 1, size, temp);
}
while (size > 1);
}
*/

18
C/Sort.h Normal file
View file

@ -0,0 +1,18 @@
/* Sort.h -- Sort functions
2014-04-05 : Igor Pavlov : Public domain */
#ifndef __7Z_SORT_H
#define __7Z_SORT_H
#include "7zTypes.h"
EXTERN_C_BEGIN
void HeapSort(UInt32 *p, size_t size);
void HeapSort64(UInt64 *p, size_t size);
/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */
EXTERN_C_END
#endif

540
C/Threads.c Normal file
View file

@ -0,0 +1,540 @@
/* Threads.c -- multithreading library
2021-12-21 : Igor Pavlov : Public domain */
#include "Precomp.h"
#ifdef _WIN32
#ifndef USE_THREADS_CreateThread
#include <process.h>
#endif
#include "Threads.h"
static WRes GetError()
{
DWORD res = GetLastError();
return res ? (WRes)res : 1;
}
static WRes HandleToWRes(HANDLE h) { return (h != NULL) ? 0 : GetError(); }
static WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); }
WRes HandlePtr_Close(HANDLE *p)
{
if (*p != NULL)
{
if (!CloseHandle(*p))
return GetError();
*p = NULL;
}
return 0;
}
WRes Handle_WaitObject(HANDLE h)
{
DWORD dw = WaitForSingleObject(h, INFINITE);
/*
(dw) result:
WAIT_OBJECT_0 // 0
WAIT_ABANDONED // 0x00000080 : is not compatible with Win32 Error space
WAIT_TIMEOUT // 0x00000102 : is compatible with Win32 Error space
WAIT_FAILED // 0xFFFFFFFF
*/
if (dw == WAIT_FAILED)
{
dw = GetLastError();
if (dw == 0)
return WAIT_FAILED;
}
return (WRes)dw;
}
#define Thread_Wait(p) Handle_WaitObject(*(p))
WRes Thread_Wait_Close(CThread *p)
{
WRes res = Thread_Wait(p);
WRes res2 = Thread_Close(p);
return (res != 0 ? res : res2);
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
#ifdef USE_THREADS_CreateThread
DWORD threadId;
*p = CreateThread(NULL, 0, func, param, 0, &threadId);
#else
unsigned threadId;
*p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
#endif
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return HandleToWRes(*p);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
#ifdef USE_THREADS_CreateThread
UNUSED_VAR(affinity)
return Thread_Create(p, func, param);
#else
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
HANDLE h;
WRes wres;
unsigned threadId;
h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
*p = h;
wres = HandleToWRes(h);
if (h)
{
{
// DWORD_PTR prevMask =
SetThreadAffinityMask(h, (DWORD_PTR)affinity);
/*
if (prevMask == 0)
{
// affinity change is non-critical error, so we can ignore it
// wres = GetError();
}
*/
}
{
DWORD prevSuspendCount = ResumeThread(h);
/* ResumeThread() returns:
0 : was_not_suspended
1 : was_resumed
-1 : error
*/
if (prevSuspendCount == (DWORD)-1)
wres = GetError();
}
}
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return wres;
#endif
}
static WRes Event_Create(CEvent *p, BOOL manualReset, int signaled)
{
*p = CreateEvent(NULL, manualReset, (signaled ? TRUE : FALSE), NULL);
return HandleToWRes(*p);
}
WRes Event_Set(CEvent *p) { return BOOLToWRes(SetEvent(*p)); }
WRes Event_Reset(CEvent *p) { return BOOLToWRes(ResetEvent(*p)); }
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled) { return Event_Create(p, TRUE, signaled); }
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled) { return Event_Create(p, FALSE, signaled); }
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p) { return ManualResetEvent_Create(p, 0); }
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p) { return AutoResetEvent_Create(p, 0); }
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// negative ((LONG)maxCount) is not supported in WIN32::CreateSemaphore()
*p = CreateSemaphore(NULL, (LONG)initCount, (LONG)maxCount, NULL);
return HandleToWRes(*p);
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
// if (Semaphore_IsCreated(p))
{
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
}
return Semaphore_Create(p, initCount, maxCount);
}
static WRes Semaphore_Release(CSemaphore *p, LONG releaseCount, LONG *previousCount)
{ return BOOLToWRes(ReleaseSemaphore(*p, releaseCount, previousCount)); }
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 num)
{ return Semaphore_Release(p, (LONG)num, NULL); }
WRes Semaphore_Release1(CSemaphore *p) { return Semaphore_ReleaseN(p, 1); }
WRes CriticalSection_Init(CCriticalSection *p)
{
/* InitializeCriticalSection() can raise exception:
Windows XP, 2003 : can raise a STATUS_NO_MEMORY exception
Windows Vista+ : no exceptions */
#ifdef _MSC_VER
__try
#endif
{
InitializeCriticalSection(p);
/* InitializeCriticalSectionAndSpinCount(p, 0); */
}
#ifdef _MSC_VER
__except (EXCEPTION_EXECUTE_HANDLER) { return ERROR_NOT_ENOUGH_MEMORY; }
#endif
return 0;
}
#else // _WIN32
// ---------- POSIX ----------
#ifndef __APPLE__
#ifndef _7ZIP_AFFINITY_DISABLE
// _GNU_SOURCE can be required for pthread_setaffinity_np() / CPU_ZERO / CPU_SET
#define _GNU_SOURCE
#endif
#endif
#include "Threads.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifdef _7ZIP_AFFINITY_SUPPORTED
// #include <sched.h>
#endif
// #include <stdio.h>
// #define PRF(p) p
#define PRF(p)
#define Print(s) PRF(printf("\n%s\n", s))
// #include <stdio.h>
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet)
{
// new thread in Posix probably inherits affinity from parrent thread
Print("Thread_Create_With_CpuSet");
pthread_attr_t attr;
int ret;
// int ret2;
p->_created = 0;
RINOK(pthread_attr_init(&attr));
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
if (!ret)
{
if (cpuSet)
{
#ifdef _7ZIP_AFFINITY_SUPPORTED
/*
printf("\n affinity :");
unsigned i;
for (i = 0; i < sizeof(*cpuSet) && i < 8; i++)
{
Byte b = *((const Byte *)cpuSet + i);
char temp[32];
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
temp[0] = GET_HEX_CHAR((b & 0xF));
temp[1] = GET_HEX_CHAR((b >> 4));
// temp[0] = GET_HEX_CHAR((b >> 4)); // big-endian
// temp[1] = GET_HEX_CHAR((b & 0xF)); // big-endian
temp[2] = 0;
printf("%s", temp);
}
printf("\n");
*/
// ret2 =
pthread_attr_setaffinity_np(&attr, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
#endif
}
ret = pthread_create(&p->_tid, &attr, func, param);
if (!ret)
{
p->_created = 1;
/*
if (cpuSet)
{
// ret2 =
pthread_setaffinity_np(p->_tid, sizeof(*cpuSet), cpuSet);
// if (ret2) ret = ret2;
}
*/
}
}
// ret2 =
pthread_attr_destroy(&attr);
// if (ret2 != 0) ret = ret2;
return ret;
}
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
return Thread_Create_With_CpuSet(p, func, param, NULL);
}
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
Print("Thread_Create_WithAffinity");
CCpuSet cs;
unsigned i;
CpuSet_Zero(&cs);
for (i = 0; i < sizeof(affinity) * 8; i++)
{
if (affinity == 0)
break;
if (affinity & 1)
{
CpuSet_Set(&cs, i);
}
affinity >>= 1;
}
return Thread_Create_With_CpuSet(p, func, param, &cs);
}
WRes Thread_Close(CThread *p)
{
// Print("Thread_Close");
int ret;
if (!p->_created)
return 0;
ret = pthread_detach(p->_tid);
p->_tid = 0;
p->_created = 0;
return ret;
}
WRes Thread_Wait_Close(CThread *p)
{
// Print("Thread_Wait_Close");
void *thread_return;
int ret;
if (!p->_created)
return EINVAL;
ret = pthread_join(p->_tid, &thread_return);
// probably we can't use that (_tid) after pthread_join(), so we close thread here
p->_created = 0;
p->_tid = 0;
return ret;
}
static WRes Event_Create(CEvent *p, int manualReset, int signaled)
{
RINOK(pthread_mutex_init(&p->_mutex, NULL));
RINOK(pthread_cond_init(&p->_cond, NULL));
p->_manual_reset = manualReset;
p->_state = (signaled ? True : False);
p->_created = 1;
return 0;
}
WRes ManualResetEvent_Create(CManualResetEvent *p, int signaled)
{ return Event_Create(p, True, signaled); }
WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent *p)
{ return ManualResetEvent_Create(p, 0); }
WRes AutoResetEvent_Create(CAutoResetEvent *p, int signaled)
{ return Event_Create(p, False, signaled); }
WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent *p)
{ return AutoResetEvent_Create(p, 0); }
WRes Event_Set(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
p->_state = True;
int res1 = pthread_cond_broadcast(&p->_cond);
int res2 = pthread_mutex_unlock(&p->_mutex);
return (res2 ? res2 : res1);
}
WRes Event_Reset(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
p->_state = False;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Wait(CEvent *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
while (p->_state == False)
{
// ETIMEDOUT
// ret =
pthread_cond_wait(&p->_cond, &p->_mutex);
// if (ret != 0) break;
}
if (p->_manual_reset == False)
{
p->_state = False;
}
return pthread_mutex_unlock(&p->_mutex);
}
WRes Event_Close(CEvent *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes Semaphore_Create(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (initCount > maxCount || maxCount < 1)
return EINVAL;
RINOK(pthread_mutex_init(&p->_mutex, NULL));
RINOK(pthread_cond_init(&p->_cond, NULL));
p->_count = initCount;
p->_maxCount = maxCount;
p->_created = 1;
return 0;
}
WRes Semaphore_OptCreateInit(CSemaphore *p, UInt32 initCount, UInt32 maxCount)
{
if (Semaphore_IsCreated(p))
{
/*
WRes wres = Semaphore_Close(p);
if (wres != 0)
return wres;
*/
if (initCount > maxCount || maxCount < 1)
return EINVAL;
// return EINVAL; // for debug
p->_count = initCount;
p->_maxCount = maxCount;
return 0;
}
return Semaphore_Create(p, initCount, maxCount);
}
WRes Semaphore_ReleaseN(CSemaphore *p, UInt32 releaseCount)
{
UInt32 newCount;
int ret;
if (releaseCount < 1)
return EINVAL;
RINOK(pthread_mutex_lock(&p->_mutex));
newCount = p->_count + releaseCount;
if (newCount > p->_maxCount)
ret = ERROR_TOO_MANY_POSTS; // EINVAL;
else
{
p->_count = newCount;
ret = pthread_cond_broadcast(&p->_cond);
}
RINOK(pthread_mutex_unlock(&p->_mutex));
return ret;
}
WRes Semaphore_Wait(CSemaphore *p)
{
RINOK(pthread_mutex_lock(&p->_mutex));
while (p->_count < 1)
{
pthread_cond_wait(&p->_cond, &p->_mutex);
}
p->_count--;
return pthread_mutex_unlock(&p->_mutex);
}
WRes Semaphore_Close(CSemaphore *p)
{
if (!p->_created)
return 0;
p->_created = 0;
{
int res1 = pthread_mutex_destroy(&p->_mutex);
int res2 = pthread_cond_destroy(&p->_cond);
return (res1 ? res1 : res2);
}
}
WRes CriticalSection_Init(CCriticalSection *p)
{
// Print("CriticalSection_Init");
if (!p)
return EINTR;
return pthread_mutex_init(&p->_mutex, NULL);
}
void CriticalSection_Enter(CCriticalSection *p)
{
// Print("CriticalSection_Enter");
if (p)
{
// int ret =
pthread_mutex_lock(&p->_mutex);
}
}
void CriticalSection_Leave(CCriticalSection *p)
{
// Print("CriticalSection_Leave");
if (p)
{
// int ret =
pthread_mutex_unlock(&p->_mutex);
}
}
void CriticalSection_Delete(CCriticalSection *p)
{
// Print("CriticalSection_Delete");
if (p)
{
// int ret =
pthread_mutex_destroy(&p->_mutex);
}
}
LONG InterlockedIncrement(LONG volatile *addend)
{
// Print("InterlockedIncrement");
#ifdef USE_HACK_UNSAFE_ATOMIC
LONG val = *addend + 1;
*addend = val;
return val;
#else
return __sync_add_and_fetch(addend, 1);
#endif
}
#endif // _WIN32

Some files were not shown because too many files have changed in this diff Show more