mirror of
https://github.com/ip7z/7zip.git
synced 2026-01-21 23:40:16 +01:00
Compare commits
10 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5e96a82794 | ||
|
|
395149956d | ||
|
|
e5431fa6f5 | ||
|
|
e008ce3976 | ||
|
|
a7a1d4a241 | ||
|
|
89a73b9012 | ||
|
|
fc662341e6 | ||
|
|
5b39dc76f1 | ||
|
|
93be7d4abf | ||
|
|
a3e1d22737 |
|
|
@ -1,7 +1,12 @@
|
|||
; 7zAsm.asm -- ASM macros
|
||||
; 2021-12-25 : Igor Pavlov : Public domain
|
||||
; 2023-12-08 : Igor Pavlov : Public domain
|
||||
|
||||
|
||||
; UASM can require these changes
|
||||
; OPTION FRAMEPRESERVEFLAGS:ON
|
||||
; OPTION PROLOGUE:NONE
|
||||
; OPTION EPILOGUE:NONE
|
||||
|
||||
ifdef @wordsize
|
||||
; @wordsize is defined only in JWASM and ASMC and is not defined in MASM
|
||||
; @wordsize eq 8 for 64-bit x64
|
||||
|
|
@ -38,7 +43,7 @@ else
|
|||
endif
|
||||
endif
|
||||
|
||||
OPTION PROLOGUE:NONE
|
||||
OPTION PROLOGUE:NONE
|
||||
OPTION EPILOGUE:NONE
|
||||
|
||||
MY_ASM_START macro
|
||||
|
|
@ -116,10 +121,29 @@ endif
|
|||
x2_H equ DH
|
||||
x3_H equ BH
|
||||
|
||||
; r0_L equ AL
|
||||
; r1_L equ CL
|
||||
; r2_L equ DL
|
||||
; r3_L equ BL
|
||||
|
||||
; r0_H equ AH
|
||||
; r1_H equ CH
|
||||
; r2_H equ DH
|
||||
; r3_H equ BH
|
||||
|
||||
|
||||
ifdef x64
|
||||
x5_L equ BPL
|
||||
x6_L equ SIL
|
||||
x7_L equ DIL
|
||||
x8_L equ r8b
|
||||
x9_L equ r9b
|
||||
x10_L equ r10b
|
||||
x11_L equ r11b
|
||||
x12_L equ r12b
|
||||
x13_L equ r13b
|
||||
x14_L equ r14b
|
||||
x15_L equ r15b
|
||||
|
||||
r0 equ RAX
|
||||
r1 equ RCX
|
||||
|
|
@ -148,6 +172,22 @@ else
|
|||
r7 equ x7
|
||||
endif
|
||||
|
||||
x0_R equ r0
|
||||
x1_R equ r1
|
||||
x2_R equ r2
|
||||
x3_R equ r3
|
||||
x4_R equ r4
|
||||
x5_R equ r5
|
||||
x6_R equ r6
|
||||
x7_R equ r7
|
||||
x8_R equ r8
|
||||
x9_R equ r9
|
||||
x10_R equ r10
|
||||
x11_R equ r11
|
||||
x12_R equ r12
|
||||
x13_R equ r13
|
||||
x14_R equ r14
|
||||
x15_R equ r15
|
||||
|
||||
ifdef x64
|
||||
ifdef ABI_LINUX
|
||||
|
|
@ -195,6 +235,14 @@ REG_ABI_PARAM_0 equ REG_PARAM_0
|
|||
REG_ABI_PARAM_1_x equ REG_PARAM_1_x
|
||||
REG_ABI_PARAM_1 equ REG_PARAM_1
|
||||
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
|
||||
MY_PUSH_4_REGS
|
||||
endm
|
||||
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
|
||||
MY_POP_4_REGS
|
||||
endm
|
||||
|
||||
else
|
||||
; x64
|
||||
|
||||
|
|
@ -256,12 +304,25 @@ endm
|
|||
endif ; IS_LINUX
|
||||
|
||||
|
||||
MY_PUSH_PRESERVED_ABI_REGS macro
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
|
||||
if (IS_LINUX gt 0)
|
||||
MY_PUSH_2_REGS
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
endif
|
||||
endm
|
||||
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 macro
|
||||
if (IS_LINUX gt 0)
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
MY_PUSH_PRESERVED_ABI_REGS macro
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
|
|
@ -274,11 +335,7 @@ MY_POP_PRESERVED_ABI_REGS macro
|
|||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
if (IS_LINUX gt 0)
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
endm
|
||||
|
||||
endif ; x64
|
||||
|
|
|
|||
|
|
@ -1,180 +1,258 @@
|
|||
; 7zCrcOpt.asm -- CRC32 calculation : optimized version
|
||||
; 2021-02-07 : Igor Pavlov : Public domain
|
||||
; 2023-12-08 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
rD equ r2
|
||||
rN equ r7
|
||||
rT equ r5
|
||||
NUM_WORDS equ 3
|
||||
UNROLL_CNT equ 2
|
||||
|
||||
ifdef x64
|
||||
num_VAR equ r8
|
||||
table_VAR equ r9
|
||||
else
|
||||
if (IS_CDECL gt 0)
|
||||
crc_OFFS equ (REG_SIZE * 5)
|
||||
data_OFFS equ (REG_SIZE + crc_OFFS)
|
||||
size_OFFS equ (REG_SIZE + data_OFFS)
|
||||
else
|
||||
size_OFFS equ (REG_SIZE * 5)
|
||||
endif
|
||||
table_OFFS equ (REG_SIZE + size_OFFS)
|
||||
num_VAR equ [r4 + size_OFFS]
|
||||
table_VAR equ [r4 + table_OFFS]
|
||||
if (NUM_WORDS lt 1) or (NUM_WORDS gt 64)
|
||||
.err <NUM_WORDS_IS_INCORRECT>
|
||||
endif
|
||||
if (UNROLL_CNT lt 1)
|
||||
.err <UNROLL_CNT_IS_INCORRECT>
|
||||
endif
|
||||
|
||||
SRCDAT equ rD + rN * 1 + 4 *
|
||||
rD equ r2
|
||||
rD_x equ x2
|
||||
rN equ r7
|
||||
rT equ r5
|
||||
|
||||
ifndef x64
|
||||
if (IS_CDECL gt 0)
|
||||
crc_OFFS equ (REG_SIZE * 5)
|
||||
data_OFFS equ (REG_SIZE + crc_OFFS)
|
||||
size_OFFS equ (REG_SIZE + data_OFFS)
|
||||
else
|
||||
size_OFFS equ (REG_SIZE * 5)
|
||||
endif
|
||||
table_OFFS equ (REG_SIZE + size_OFFS)
|
||||
endif
|
||||
|
||||
; rN + rD is same speed as rD, but we reduce one instruction in loop
|
||||
SRCDAT_1 equ rN + rD * 1 + 1 *
|
||||
SRCDAT_4 equ rN + rD * 1 + 4 *
|
||||
|
||||
CRC macro op:req, dest:req, src:req, t:req
|
||||
op dest, DWORD PTR [rT + src * 4 + 0400h * t]
|
||||
op dest, dword ptr [rT + @CatStr(src, _R) * 4 + 0400h * (t)]
|
||||
endm
|
||||
|
||||
CRC_XOR macro dest:req, src:req, t:req
|
||||
CRC xor, dest, src, t
|
||||
CRC xor, dest, src, t
|
||||
endm
|
||||
|
||||
CRC_MOV macro dest:req, src:req, t:req
|
||||
CRC mov, dest, src, t
|
||||
CRC mov, dest, src, t
|
||||
endm
|
||||
|
||||
MOVZXLO macro dest:req, src:req
|
||||
movzx dest, @CatStr(src, _L)
|
||||
endm
|
||||
|
||||
MOVZXHI macro dest:req, src:req
|
||||
movzx dest, @CatStr(src, _H)
|
||||
endm
|
||||
|
||||
; movzx x0, x0_L - is slow in some cpus (ivb), if same register for src and dest
|
||||
; movzx x3, x0_L sometimes is 0 cycles latency (not always)
|
||||
; movzx x3, x0_L sometimes is 0.5 cycles latency
|
||||
; movzx x3, x0_H is 2 cycles latency in some cpus
|
||||
|
||||
CRC1b macro
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
movzx x3, x0_L
|
||||
xor x6, x3
|
||||
shr x0, 8
|
||||
CRC xor, x0, r6, 0
|
||||
dec rN
|
||||
movzx x6, byte ptr [rD]
|
||||
MOVZXLO x3, x0
|
||||
inc rD
|
||||
shr x0, 8
|
||||
xor x6, x3
|
||||
CRC_XOR x0, x6, 0
|
||||
dec rN
|
||||
endm
|
||||
|
||||
MY_PROLOG macro crc_end:req
|
||||
LOAD_1 macro dest:req, t:req, iter:req, index:req
|
||||
movzx dest, byte ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + index)]
|
||||
endm
|
||||
|
||||
LOAD_2 macro dest:req, t:req, iter:req, index:req
|
||||
movzx dest, word ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + index)]
|
||||
endm
|
||||
|
||||
CRC_QUAD macro nn, t:req, iter:req
|
||||
ifdef x64
|
||||
; paired memory loads give 1-3% speed gain, but it uses more registers
|
||||
LOAD_2 x3, t, iter, 0
|
||||
LOAD_2 x9, t, iter, 2
|
||||
MOVZXLO x6, x3
|
||||
shr x3, 8
|
||||
CRC_XOR nn, x6, t * 4 + 3
|
||||
MOVZXLO x6, x9
|
||||
shr x9, 8
|
||||
CRC_XOR nn, x3, t * 4 + 2
|
||||
CRC_XOR nn, x6, t * 4 + 1
|
||||
CRC_XOR nn, x9, t * 4 + 0
|
||||
elseif 0
|
||||
LOAD_2 x3, t, iter, 0
|
||||
MOVZXLO x6, x3
|
||||
shr x3, 8
|
||||
CRC_XOR nn, x6, t * 4 + 3
|
||||
CRC_XOR nn, x3, t * 4 + 2
|
||||
LOAD_2 x3, t, iter, 2
|
||||
MOVZXLO x6, x3
|
||||
shr x3, 8
|
||||
CRC_XOR nn, x6, t * 4 + 1
|
||||
CRC_XOR nn, x3, t * 4 + 0
|
||||
elseif 0
|
||||
LOAD_1 x3, t, iter, 0
|
||||
LOAD_1 x6, t, iter, 1
|
||||
CRC_XOR nn, x3, t * 4 + 3
|
||||
CRC_XOR nn, x6, t * 4 + 2
|
||||
LOAD_1 x3, t, iter, 2
|
||||
LOAD_1 x6, t, iter, 3
|
||||
CRC_XOR nn, x3, t * 4 + 1
|
||||
CRC_XOR nn, x6, t * 4 + 0
|
||||
else
|
||||
; 32-bit load is better if there is only one read port (core2)
|
||||
; but that code can be slower if there are 2 read ports (snb)
|
||||
mov x3, dword ptr [SRCDAT_1 (4 * (NUM_WORDS - 1 - t + iter * NUM_WORDS) + 0)]
|
||||
MOVZXLO x6, x3
|
||||
CRC_XOR nn, x6, t * 4 + 3
|
||||
MOVZXHI x6, x3
|
||||
shr x3, 16
|
||||
CRC_XOR nn, x6, t * 4 + 2
|
||||
MOVZXLO x6, x3
|
||||
shr x3, 8
|
||||
CRC_XOR nn, x6, t * 4 + 1
|
||||
CRC_XOR nn, x3, t * 4 + 0
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
LAST equ (4 * (NUM_WORDS - 1))
|
||||
|
||||
CRC_ITER macro qq, nn, iter
|
||||
mov nn, [SRCDAT_4 (NUM_WORDS * (1 + iter))]
|
||||
|
||||
i = 0
|
||||
rept NUM_WORDS - 1
|
||||
CRC_QUAD nn, i, iter
|
||||
i = i + 1
|
||||
endm
|
||||
|
||||
MOVZXLO x6, qq
|
||||
mov x3, qq
|
||||
shr x3, 24
|
||||
CRC_XOR nn, x6, LAST + 3
|
||||
CRC_XOR nn, x3, LAST + 0
|
||||
ror qq, 16
|
||||
MOVZXLO x6, qq
|
||||
shr qq, 24
|
||||
CRC_XOR nn, x6, LAST + 1
|
||||
if ((UNROLL_CNT and 1) eq 1) and (iter eq (UNROLL_CNT - 1))
|
||||
CRC_MOV qq, qq, LAST + 2
|
||||
xor qq, nn
|
||||
else
|
||||
CRC_XOR nn, qq, LAST + 2
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
; + 4 for prefetching next 4-bytes after current iteration
|
||||
NUM_BYTES_LIMIT equ (NUM_WORDS * 4 * UNROLL_CNT + 4)
|
||||
ALIGN_MASK equ 3
|
||||
|
||||
|
||||
; MY_PROC @CatStr(CrcUpdateT, 12), 4
|
||||
MY_PROC @CatStr(CrcUpdateT, %(NUM_WORDS * 4)), 4
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
ifdef x64
|
||||
mov x0, REG_ABI_PARAM_0_x ; x0 = x1(win) / x7(linux)
|
||||
mov rT, REG_ABI_PARAM_3 ; r5 = r9(win) / x1(linux)
|
||||
mov rN, REG_ABI_PARAM_2 ; r7 = r8(win) / r2(linux)
|
||||
; mov rD, REG_ABI_PARAM_1 ; r2 = r2(win)
|
||||
if (IS_LINUX gt 0)
|
||||
MY_PUSH_2_REGS
|
||||
mov x0, REG_ABI_PARAM_0_x ; x0 = x7
|
||||
mov rT, REG_ABI_PARAM_3 ; r5 = r1
|
||||
mov rN, REG_ABI_PARAM_2 ; r7 = r2
|
||||
mov rD, REG_ABI_PARAM_1 ; r2 = r6
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
mov x0, REG_ABI_PARAM_0_x ; x0 = x1
|
||||
mov rT, REG_ABI_PARAM_3 ; r5 = r9
|
||||
mov rN, REG_ABI_PARAM_2 ; r7 = r8
|
||||
; mov rD, REG_ABI_PARAM_1 ; r2 = r2
|
||||
endif
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
if (IS_CDECL gt 0)
|
||||
mov x0, [r4 + crc_OFFS]
|
||||
mov rD, [r4 + data_OFFS]
|
||||
else
|
||||
mov x0, REG_ABI_PARAM_0_x
|
||||
endif
|
||||
mov rN, num_VAR
|
||||
mov rT, table_VAR
|
||||
mov rN, [r4 + size_OFFS]
|
||||
mov rT, [r4 + table_OFFS]
|
||||
endif
|
||||
|
||||
test rN, rN
|
||||
jz crc_end
|
||||
@@:
|
||||
test rD, 7
|
||||
jz @F
|
||||
CRC1b
|
||||
jnz @B
|
||||
@@:
|
||||
cmp rN, 16
|
||||
jb crc_end
|
||||
add rN, rD
|
||||
mov num_VAR, rN
|
||||
sub rN, 8
|
||||
and rN, NOT 7
|
||||
sub rD, rN
|
||||
xor x0, [SRCDAT 0]
|
||||
cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK
|
||||
jb crc_end
|
||||
@@:
|
||||
test rD_x, ALIGN_MASK ; test rD, ALIGN_MASK
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp @B
|
||||
@@:
|
||||
xor x0, dword ptr [rD]
|
||||
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
|
||||
sub rD, rN
|
||||
|
||||
align 16
|
||||
@@:
|
||||
unr_index = 0
|
||||
while unr_index lt UNROLL_CNT
|
||||
if (unr_index and 1) eq 0
|
||||
CRC_ITER x0, x1, unr_index
|
||||
else
|
||||
CRC_ITER x1, x0, unr_index
|
||||
endif
|
||||
unr_index = unr_index + 1
|
||||
endm
|
||||
|
||||
MY_EPILOG macro crc_end:req
|
||||
xor x0, [SRCDAT 0]
|
||||
mov rD, rN
|
||||
mov rN, num_VAR
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp crc_end
|
||||
@@:
|
||||
if (IS_X64 gt 0) and (IS_LINUX gt 0)
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
endm
|
||||
add rD, NUM_WORDS * 4 * UNROLL_CNT
|
||||
jnc @B
|
||||
|
||||
MY_PROC CrcUpdateT8, 4
|
||||
MY_PROLOG crc_end_8
|
||||
mov x1, [SRCDAT 1]
|
||||
align 16
|
||||
main_loop_8:
|
||||
mov x6, [SRCDAT 2]
|
||||
movzx x3, x1_L
|
||||
CRC_XOR x6, r3, 3
|
||||
movzx x3, x1_H
|
||||
CRC_XOR x6, r3, 2
|
||||
shr x1, 16
|
||||
movzx x3, x1_L
|
||||
movzx x1, x1_H
|
||||
CRC_XOR x6, r3, 1
|
||||
movzx x3, x0_L
|
||||
CRC_XOR x6, r1, 0
|
||||
if 0
|
||||
; byte verson
|
||||
add rD, rN
|
||||
xor x0, dword ptr [rD]
|
||||
add rN, NUM_BYTES_LIMIT - 1
|
||||
else
|
||||
; 4-byte version
|
||||
add rN, 4 * NUM_WORDS * UNROLL_CNT
|
||||
sub rD, 4 * NUM_WORDS * UNROLL_CNT
|
||||
@@:
|
||||
MOVZXLO x3, x0
|
||||
MOVZXHI x1, x0
|
||||
shr x0, 16
|
||||
MOVZXLO x6, x0
|
||||
shr x0, 8
|
||||
CRC_MOV x0, x0, 0
|
||||
CRC_XOR x0, x3, 3
|
||||
CRC_XOR x0, x1, 2
|
||||
CRC_XOR x0, x6, 1
|
||||
|
||||
mov x1, [SRCDAT 3]
|
||||
CRC_XOR x6, r3, 7
|
||||
movzx x3, x0_H
|
||||
shr x0, 16
|
||||
CRC_XOR x6, r3, 6
|
||||
movzx x3, x0_L
|
||||
CRC_XOR x6, r3, 5
|
||||
movzx x3, x0_H
|
||||
CRC_MOV x0, r3, 4
|
||||
xor x0, x6
|
||||
add rD, 8
|
||||
jnz main_loop_8
|
||||
add rD, 4
|
||||
if (NUM_WORDS * UNROLL_CNT) ne 1
|
||||
jc @F
|
||||
xor x0, [SRCDAT_4 0]
|
||||
jmp @B
|
||||
@@:
|
||||
endif
|
||||
add rD, rN
|
||||
add rN, 4 - 1
|
||||
|
||||
endif
|
||||
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz func_end
|
||||
@@:
|
||||
CRC1b
|
||||
jnz @B
|
||||
|
||||
MY_EPILOG crc_end_8
|
||||
MY_ENDP
|
||||
|
||||
MY_PROC CrcUpdateT4, 4
|
||||
MY_PROLOG crc_end_4
|
||||
align 16
|
||||
main_loop_4:
|
||||
movzx x1, x0_L
|
||||
movzx x3, x0_H
|
||||
shr x0, 16
|
||||
movzx x6, x0_H
|
||||
and x0, 0FFh
|
||||
CRC_MOV x1, r1, 3
|
||||
xor x1, [SRCDAT 1]
|
||||
CRC_XOR x1, r3, 2
|
||||
CRC_XOR x1, r6, 0
|
||||
CRC_XOR x1, r0, 1
|
||||
|
||||
movzx x0, x1_L
|
||||
movzx x3, x1_H
|
||||
shr x1, 16
|
||||
movzx x6, x1_H
|
||||
and x1, 0FFh
|
||||
CRC_MOV x0, r0, 3
|
||||
xor x0, [SRCDAT 2]
|
||||
CRC_XOR x0, r3, 2
|
||||
CRC_XOR x0, r6, 0
|
||||
CRC_XOR x0, r1, 1
|
||||
add rD, 8
|
||||
jnz main_loop_4
|
||||
|
||||
MY_EPILOG crc_end_4
|
||||
func_end:
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
MY_ENDP
|
||||
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
|
||||
; 2021-07-21: Igor Pavlov : Public domain
|
||||
; 2024-06-18: Igor Pavlov : Public domain
|
||||
;
|
||||
|
||||
ifndef x64
|
||||
|
|
@ -11,10 +11,31 @@ include 7zAsm.asm
|
|||
|
||||
MY_ASM_START
|
||||
|
||||
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
|
||||
ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
|
||||
if (IS_LINUX gt 0)
|
||||
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
|
||||
else
|
||||
Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
|
||||
_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
|
||||
MY_ALIGN macro num:req
|
||||
align num
|
||||
; align 16
|
||||
endm
|
||||
else
|
||||
MY_ALIGN macro num:req
|
||||
; We expect that ".text" is aligned for 16-bytes.
|
||||
; So we don't need large alignment inside our function.
|
||||
align 16
|
||||
endm
|
||||
endif
|
||||
|
||||
|
||||
MY_ALIGN_16 macro
|
||||
MY_ALIGN 16
|
||||
endm
|
||||
|
||||
MY_ALIGN_32 macro
|
||||
|
|
@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var
|
|||
endm
|
||||
|
||||
|
||||
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
|
||||
; MY_ALIGN_64
|
||||
else
|
||||
MY_ALIGN_16
|
||||
endif
|
||||
MY_PROC GetMatchesSpecN_2, 13
|
||||
MY_PUSH_PRESERVED_ABI_REGS
|
||||
mov r0, RSP
|
||||
|
|
@ -508,6 +533,8 @@ fin:
|
|||
MY_POP_PRESERVED_ABI_REGS
|
||||
MY_ENDP
|
||||
|
||||
ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
|
||||
_TEXT$LZFINDOPT ENDS
|
||||
endif
|
||||
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
|
||||
; 2021-02-23: Igor Pavlov : Public domain
|
||||
; 2024-06-18: Igor Pavlov : Public domain
|
||||
;
|
||||
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
|
||||
; function for check at link time.
|
||||
|
|
@ -17,11 +17,41 @@ include 7zAsm.asm
|
|||
|
||||
MY_ASM_START
|
||||
|
||||
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
|
||||
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
|
||||
; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
|
||||
; The performance is almost identical in our tests.
|
||||
; But the performance can depend from position of lzmadec code inside instruction cache
|
||||
; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
|
||||
; And 64-byte alignment provides a more consistent speed regardless
|
||||
; of the code's position in the executable.
|
||||
; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
|
||||
; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
|
||||
; code in 64-byte block after compilation provides better speed by some reason.
|
||||
; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
|
||||
; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
|
||||
|
||||
ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
|
||||
if (IS_LINUX gt 0)
|
||||
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
|
||||
else
|
||||
Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
|
||||
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
|
||||
MY_ALIGN macro num:req
|
||||
align num
|
||||
; align 16
|
||||
endm
|
||||
else
|
||||
MY_ALIGN macro num:req
|
||||
; We expect that ".text" is aligned for 16-bytes.
|
||||
; So we don't need large alignment inside out function.
|
||||
align 16
|
||||
endm
|
||||
endif
|
||||
|
||||
|
||||
MY_ALIGN_16 macro
|
||||
MY_ALIGN 16
|
||||
|
|
@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
|
|||
PARAM_limit equ REG_ABI_PARAM_1
|
||||
PARAM_bufLimit equ REG_ABI_PARAM_2
|
||||
|
||||
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
|
||||
; MY_ALIGN_64
|
||||
else
|
||||
MY_ALIGN_16
|
||||
endif
|
||||
MY_PROC LzmaDec_DecodeReal_3, 3
|
||||
MY_PUSH_PRESERVED_ABI_REGS
|
||||
|
||||
|
|
@ -1298,6 +1332,8 @@ fin:
|
|||
MY_POP_PRESERVED_ABI_REGS
|
||||
MY_ENDP
|
||||
|
||||
ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
|
||||
_TEXT$LZMADECOPT ENDS
|
||||
endif
|
||||
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
|
||||
; 2021-03-10 : Igor Pavlov : Public domain
|
||||
; 2024-06-16 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
|
|
@ -20,7 +20,7 @@ MY_ASM_START
|
|||
|
||||
|
||||
|
||||
CONST SEGMENT
|
||||
CONST SEGMENT READONLY
|
||||
|
||||
align 16
|
||||
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
|
||||
; 2021-03-10 : Igor Pavlov : Public domain
|
||||
; 2024-06-16 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
|
|
@ -20,7 +20,7 @@ endif
|
|||
EXTRN K_CONST:xmmword
|
||||
@
|
||||
|
||||
CONST SEGMENT
|
||||
CONST SEGMENT READONLY
|
||||
|
||||
align 16
|
||||
Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
|
||||
|
|
@ -54,14 +54,20 @@ ifndef x64
|
|||
.686
|
||||
.xmm
|
||||
endif
|
||||
|
||||
|
||||
; jwasm-based assemblers for linux and linker from new versions of binutils
|
||||
; can generate incorrect code for load [ARRAY + offset] instructions.
|
||||
; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem
|
||||
rTable equ r0
|
||||
; rTable equ K_CONST
|
||||
|
||||
ifdef x64
|
||||
rNum equ REG_ABI_PARAM_2
|
||||
if (IS_LINUX eq 0)
|
||||
LOCAL_SIZE equ (16 * 2)
|
||||
endif
|
||||
else
|
||||
rNum equ r0
|
||||
rNum equ r3
|
||||
LOCAL_SIZE equ (16 * 1)
|
||||
endif
|
||||
|
||||
|
|
@ -103,15 +109,18 @@ MY_PROLOG macro
|
|||
movdqa [r4 + 16], xmm9
|
||||
endif
|
||||
else ; x86
|
||||
if (IS_CDECL gt 0)
|
||||
mov rState, [r4 + REG_SIZE * 1]
|
||||
mov rData, [r4 + REG_SIZE * 2]
|
||||
mov rNum, [r4 + REG_SIZE * 3]
|
||||
else ; fastcall
|
||||
mov rNum, [r4 + REG_SIZE * 1]
|
||||
endif
|
||||
push r3
|
||||
push r5
|
||||
mov r5, r4
|
||||
NUM_PUSH_REGS equ 2
|
||||
PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS))
|
||||
if (IS_CDECL gt 0)
|
||||
mov rState, [r4 + PARAM_OFFSET]
|
||||
mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1]
|
||||
mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2]
|
||||
else ; fastcall
|
||||
mov rNum, [r4 + PARAM_OFFSET]
|
||||
endif
|
||||
and r4, -16
|
||||
sub r4, LOCAL_SIZE
|
||||
endif
|
||||
|
|
@ -129,6 +138,7 @@ MY_EPILOG macro
|
|||
else ; x86
|
||||
mov r4, r5
|
||||
pop r5
|
||||
pop r3
|
||||
endif
|
||||
MY_ENDP
|
||||
endm
|
||||
|
|
@ -171,7 +181,7 @@ pre2 equ 2
|
|||
|
||||
|
||||
RND4 macro k
|
||||
movdqa msg, xmmword ptr [K_CONST + (k) * 16]
|
||||
movdqa msg, xmmword ptr [rTable + (k) * 16]
|
||||
paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
|
||||
MY_sha256rnds2 state0_N, state1_N
|
||||
pshufd msg, msg, 0eH
|
||||
|
|
@ -210,6 +220,8 @@ endm
|
|||
MY_PROC Sha256_UpdateBlocks_HW, 3
|
||||
MY_PROLOG
|
||||
|
||||
lea rTable, [K_CONST]
|
||||
|
||||
cmp rNum, 0
|
||||
je end_c
|
||||
|
||||
|
|
|
|||
860
Asm/x86/Sort.asm
Normal file
860
Asm/x86/Sort.asm
Normal file
|
|
@ -0,0 +1,860 @@
|
|||
; SortTest.asm -- ASM version of HeapSort() function
|
||||
; Igor Pavlov : Public domain
|
||||
|
||||
include ../../../../Asm/x86/7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
ifndef Z7_SORT_ASM_USE_SEGMENT
|
||||
if (IS_LINUX gt 0)
|
||||
; Z7_SORT_ASM_USE_SEGMENT equ 1
|
||||
else
|
||||
; Z7_SORT_ASM_USE_SEGMENT equ 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef Z7_SORT_ASM_USE_SEGMENT
|
||||
_TEXT$Z7_SORT SEGMENT ALIGN(64) 'CODE'
|
||||
MY_ALIGN macro num:req
|
||||
align num
|
||||
endm
|
||||
else
|
||||
MY_ALIGN macro num:req
|
||||
; We expect that ".text" is aligned for 16-bytes.
|
||||
; So we don't need large alignment inside our function.
|
||||
align 16
|
||||
endm
|
||||
endif
|
||||
|
||||
|
||||
MY_ALIGN_16 macro
|
||||
MY_ALIGN 16
|
||||
endm
|
||||
|
||||
MY_ALIGN_32 macro
|
||||
MY_ALIGN 32
|
||||
endm
|
||||
|
||||
MY_ALIGN_64 macro
|
||||
MY_ALIGN 64
|
||||
endm
|
||||
|
||||
ifdef x64
|
||||
|
||||
NUM_PREFETCH_LEVELS equ 3 ; to prefetch 1x 64-bytes line (is good for most cases)
|
||||
; NUM_PREFETCH_LEVELS equ 4 ; to prefetch 2x 64-bytes lines (better for big arrays)
|
||||
|
||||
acc equ x0
|
||||
k equ r0
|
||||
k_x equ x0
|
||||
|
||||
p equ r1
|
||||
|
||||
s equ r2
|
||||
s_x equ x2
|
||||
|
||||
a0 equ x3
|
||||
t0 equ a0
|
||||
|
||||
a3 equ x5
|
||||
qq equ a3
|
||||
|
||||
a1 equ x6
|
||||
t1 equ a1
|
||||
t1_r equ r6
|
||||
|
||||
a2 equ x7
|
||||
t2 equ a2
|
||||
|
||||
i equ r8
|
||||
e0 equ x8
|
||||
|
||||
e1 equ x9
|
||||
|
||||
num_last equ r10
|
||||
num_last_x equ x10
|
||||
|
||||
next4_lim equ r11
|
||||
pref_lim equ r12
|
||||
|
||||
|
||||
|
||||
SORT_2_WITH_TEMP_REG macro b0, b1, temp_reg
|
||||
mov temp_reg, b0
|
||||
cmp b0, b1
|
||||
cmovae b0, b1 ; min
|
||||
cmovae b1, temp_reg ; max
|
||||
endm
|
||||
|
||||
SORT macro b0, b1
|
||||
SORT_2_WITH_TEMP_REG b0, b1, acc
|
||||
endm
|
||||
|
||||
LOAD macro dest:req, index:req
|
||||
mov dest, [p + 4 * index]
|
||||
endm
|
||||
|
||||
STORE macro reg:req, index:req
|
||||
mov [p + 4 * index], reg
|
||||
endm
|
||||
|
||||
|
||||
if (NUM_PREFETCH_LEVELS gt 3)
|
||||
num_prefetches equ (1 SHL (NUM_PREFETCH_LEVELS - 3))
|
||||
else
|
||||
num_prefetches equ 1
|
||||
endif
|
||||
|
||||
PREFETCH_OP macro offs
|
||||
cur_offset = 7 * 4 ; it's average offset in 64-bytes cache line.
|
||||
; cur_offset = 0 ; we can use zero offset, if we are sure that array is aligned for 64-bytes.
|
||||
rept num_prefetches
|
||||
if 1
|
||||
prefetcht0 byte ptr [p + offs + cur_offset]
|
||||
else
|
||||
mov pref_x, dword ptr [p + offs + cur_offset]
|
||||
endif
|
||||
cur_offset = cur_offset + 64
|
||||
endm
|
||||
endm
|
||||
|
||||
PREFETCH_MY macro
|
||||
if 1
|
||||
if 1
|
||||
shl k, NUM_PREFETCH_LEVELS + 3
|
||||
else
|
||||
; we delay prefetch instruction to improve main loads
|
||||
shl k, NUM_PREFETCH_LEVELS
|
||||
shl k, 3
|
||||
; shl k, 0
|
||||
endif
|
||||
PREFETCH_OP k
|
||||
elseif 1
|
||||
shl k, 3
|
||||
PREFETCH_OP k * (1 SHL NUM_PREFETCH_LEVELS) ; change it
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
STEP_1 macro exit_label, prefetch_macro
|
||||
use_cmov_1 equ 1 ; set 1 for cmov, but it's slower in some cases
|
||||
; set 0 for LOAD after adc s, 0
|
||||
cmp t0, t1
|
||||
if use_cmov_1
|
||||
cmovb t0, t1
|
||||
; STORE t0, k
|
||||
endif
|
||||
adc s, 0
|
||||
if use_cmov_1 eq 0
|
||||
LOAD t0, s
|
||||
endif
|
||||
cmp qq, t0
|
||||
jae exit_label
|
||||
if 1 ; use_cmov_1 eq 0
|
||||
STORE t0, k
|
||||
endif
|
||||
prefetch_macro
|
||||
mov t0, [p + s * 8]
|
||||
mov t1, [p + s * 8 + 4]
|
||||
mov k, s
|
||||
add s, s ; slower for some cpus
|
||||
; lea s, dword ptr [s + s] ; slower for some cpus
|
||||
; shl s, 1 ; faster for some cpus
|
||||
; lea s, dword ptr [s * 2] ; faster for some cpus
|
||||
rept 0 ; 1000 for debug : 0 for normal
|
||||
; number of calls in generate_stage : ~0.6 of number of items
|
||||
shl k, 0
|
||||
endm
|
||||
endm
|
||||
|
||||
|
||||
STEP_2 macro exit_label, prefetch_macro
|
||||
use_cmov_2 equ 0 ; set 1 for cmov, but it's slower in some cases
|
||||
; set 0 for LOAD after adc s, 0
|
||||
cmp t0, t1
|
||||
if use_cmov_2
|
||||
mov t2, t0
|
||||
cmovb t2, t1
|
||||
; STORE t2, k
|
||||
endif
|
||||
mov t0, [p + s * 8]
|
||||
mov t1, [p + s * 8 + 4]
|
||||
cmovb t0, [p + s * 8 + 8]
|
||||
cmovb t1, [p + s * 8 + 12]
|
||||
adc s, 0
|
||||
if use_cmov_2 eq 0
|
||||
LOAD t2, s
|
||||
endif
|
||||
cmp qq, t2
|
||||
jae exit_label
|
||||
if 1 ; use_cmov_2 eq 0
|
||||
STORE t2, k
|
||||
endif
|
||||
prefetch_macro
|
||||
mov k, s
|
||||
; add s, s
|
||||
; lea s, [s + s]
|
||||
shl s, 1
|
||||
; lea s, [s * 2]
|
||||
endm
|
||||
|
||||
|
||||
MOVE_SMALLEST_UP macro STEP, use_prefetch, num_unrolls
|
||||
LOCAL exit_1, exit_2, leaves, opt_loop, last_nodes
|
||||
|
||||
; s == k * 2
|
||||
; t0 == (p)[s]
|
||||
; t1 == (p)[s + 1]
|
||||
cmp k, next4_lim
|
||||
jae leaves
|
||||
|
||||
rept num_unrolls
|
||||
STEP exit_2
|
||||
cmp k, next4_lim
|
||||
jae leaves
|
||||
endm
|
||||
|
||||
if use_prefetch
|
||||
prefetch_macro equ PREFETCH_MY
|
||||
pref_lim_2 equ pref_lim
|
||||
; lea pref_lim, dword ptr [num_last + 1]
|
||||
; shr pref_lim, NUM_PREFETCH_LEVELS + 1
|
||||
cmp k, pref_lim_2
|
||||
jae last_nodes
|
||||
else
|
||||
prefetch_macro equ
|
||||
pref_lim_2 equ next4_lim
|
||||
endif
|
||||
|
||||
MY_ALIGN_16
|
||||
opt_loop:
|
||||
STEP exit_2, prefetch_macro
|
||||
cmp k, pref_lim_2
|
||||
jb opt_loop
|
||||
|
||||
last_nodes:
|
||||
; k >= pref_lim_2
|
||||
; 2 cases are possible:
|
||||
; case-1: num_after_prefetch_levels == 0 && next4_lim = pref_lim_2
|
||||
; case-2: num_after_prefetch_levels == NUM_PREFETCH_LEVELS - 1 &&
|
||||
; next4_lim = pref_lim_2 / (NUM_PREFETCH_LEVELS - 1)
|
||||
if use_prefetch
|
||||
yyy = NUM_PREFETCH_LEVELS - 1
|
||||
while yyy
|
||||
yyy = yyy - 1
|
||||
STEP exit_2
|
||||
if yyy
|
||||
cmp k, next4_lim
|
||||
jae leaves
|
||||
endif
|
||||
endm
|
||||
endif
|
||||
|
||||
leaves:
|
||||
; k >= next4_lim == (num_last + 1) / 4 must be provided by previous code.
|
||||
; we have 2 nodes in (s) level : always
|
||||
; we can have some nodes in (s * 2) level : low probability case
|
||||
; we have no nodes in (s * 4) level
|
||||
; s == k * 2
|
||||
; t0 == (p)[s]
|
||||
; t1 == (p)[s + 1]
|
||||
cmp t0, t1
|
||||
cmovb t0, t1
|
||||
adc s, 0
|
||||
STORE t0, k
|
||||
|
||||
; t0 == (p)[s]
|
||||
; s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1]
|
||||
; we have 3 possible cases here:
|
||||
; s * 2 > num_last : (s) node has no childs
|
||||
; s * 2 == num_last : (s) node has 1 leaf child that is last item of array
|
||||
; s * 2 < num_last : (s) node has 2 leaf childs. We provide (s * 4 > num_last)
|
||||
; we check for (s * 2 > num_last) before "cmp qq, t0" check, because
|
||||
; we will replace conditional jump with cmov instruction later.
|
||||
lea t1_r, dword ptr [s + s]
|
||||
cmp t1_r, num_last
|
||||
ja exit_1 ; if (s * 2 > num_last), we have no childs : it's high probability branch
|
||||
|
||||
; it's low probability branch
|
||||
; s * 2 <= num_last
|
||||
cmp qq, t0
|
||||
jae exit_2
|
||||
|
||||
; qq < t0, so we go to next level
|
||||
; we check 1 or 2 childs in next level
|
||||
mov t0, [p + s * 8]
|
||||
mov k, s
|
||||
mov s, t1_r
|
||||
cmp t1_r, num_last
|
||||
je @F ; (s == num_last) means that we have single child in tree
|
||||
|
||||
; (s < num_last) : so we must read both childs and select max of them.
|
||||
mov t1, [p + k * 8 + 4]
|
||||
cmp t0, t1
|
||||
cmovb t0, t1
|
||||
adc s, 0
|
||||
@@:
|
||||
STORE t0, k
|
||||
exit_1:
|
||||
; t0 == (p)[s], s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1]
|
||||
cmp qq, t0
|
||||
cmovb k, s
|
||||
exit_2:
|
||||
STORE qq, k
|
||||
endm
|
||||
|
||||
|
||||
|
||||
|
||||
ifdef Z7_SORT_ASM_USE_SEGMENT
|
||||
; MY_ALIGN_64
|
||||
else
|
||||
MY_ALIGN_16
|
||||
endif
|
||||
|
||||
MY_PROC HeapSort, 2
|
||||
|
||||
if (IS_LINUX gt 0)
|
||||
mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux
|
||||
endif
|
||||
mov num_last, REG_ABI_PARAM_1 ; r10 <- r6 : linux
|
||||
; r10 <- r2 : win64
|
||||
cmp num_last, 2
|
||||
jb end_1
|
||||
|
||||
; MY_PUSH_PRESERVED_ABI_REGS
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
push r12
|
||||
|
||||
cmp num_last, 4
|
||||
ja sort_5
|
||||
|
||||
LOAD a0, 0
|
||||
LOAD a1, 1
|
||||
SORT a0, a1
|
||||
cmp num_last, 3
|
||||
jb end_2
|
||||
|
||||
LOAD a2, 2
|
||||
je sort_3
|
||||
|
||||
LOAD a3, 3
|
||||
SORT a2, a3
|
||||
SORT a1, a3
|
||||
STORE a3, 3
|
||||
sort_3:
|
||||
SORT a0, a2
|
||||
SORT a1, a2
|
||||
STORE a2, 2
|
||||
jmp end_2
|
||||
|
||||
sort_5:
|
||||
; (num_last > 4) is required here
|
||||
; if (num_last >= 6) : we will use optimized loop for leaf nodes loop_down_1
|
||||
mov next4_lim, num_last
|
||||
shr next4_lim, 2
|
||||
|
||||
dec num_last
|
||||
mov k, num_last
|
||||
shr k, 1
|
||||
mov i, num_last
|
||||
shr i, 2
|
||||
test num_last, 1
|
||||
jnz size_even
|
||||
|
||||
; ODD number of items. So we compare parent with single child
|
||||
LOAD t1, num_last
|
||||
LOAD t0, k
|
||||
SORT_2_WITH_TEMP_REG t1, t0, t2
|
||||
STORE t1, num_last
|
||||
STORE t0, k
|
||||
dec k
|
||||
|
||||
size_even:
|
||||
cmp k, i
|
||||
jbe loop_down ; jump for num_last == 4 case
|
||||
|
||||
if 0 ; 1 for debug
|
||||
mov r15, k
|
||||
mov r14d, 1 ; 100
|
||||
loop_benchmark:
|
||||
endif
|
||||
; optimized loop for leaf nodes:
|
||||
mov t0, [p + k * 8]
|
||||
mov t1, [p + k * 8 + 4]
|
||||
|
||||
MY_ALIGN_16
|
||||
loop_down_1:
|
||||
; we compare parent with max of childs:
|
||||
; lea s, dword ptr [2 * k]
|
||||
mov s, k
|
||||
cmp t0, t1
|
||||
cmovb t0, t1
|
||||
adc s, s
|
||||
LOAD t2, k
|
||||
STORE t0, k
|
||||
cmp t2, t0
|
||||
cmovae s, k
|
||||
dec k
|
||||
; we preload next items before STORE operation for calculated address
|
||||
mov t0, [p + k * 8]
|
||||
mov t1, [p + k * 8 + 4]
|
||||
STORE t2, s
|
||||
cmp k, i
|
||||
jne loop_down_1
|
||||
|
||||
if 0 ; 1 for debug
|
||||
mov k, r15
|
||||
dec r14d
|
||||
jnz loop_benchmark
|
||||
; jmp end_debug
|
||||
endif
|
||||
|
||||
MY_ALIGN_16
|
||||
loop_down:
|
||||
mov t0, [p + i * 8]
|
||||
mov t1, [p + i * 8 + 4]
|
||||
LOAD qq, i
|
||||
mov k, i
|
||||
lea s, dword ptr [i + i]
|
||||
; jmp end_debug
|
||||
DOWN_use_prefetch equ 0
|
||||
DOWN_num_unrolls equ 0
|
||||
MOVE_SMALLEST_UP STEP_1, DOWN_use_prefetch, DOWN_num_unrolls
|
||||
sub i, 1
|
||||
jnb loop_down
|
||||
|
||||
; jmp end_debug
|
||||
LOAD e0, 0
|
||||
LOAD e1, 1
|
||||
|
||||
LEVEL_3_LIMIT equ 8 ; 8 is default, but 7 also can work
|
||||
|
||||
cmp num_last, LEVEL_3_LIMIT + 1
|
||||
jb main_loop_sort_5
|
||||
|
||||
MY_ALIGN_16
|
||||
main_loop_sort:
|
||||
; num_last > LEVEL_3_LIMIT
|
||||
; p[size--] = p[0];
|
||||
LOAD qq, num_last
|
||||
STORE e0, num_last
|
||||
mov e0, e1
|
||||
|
||||
mov next4_lim, num_last
|
||||
shr next4_lim, 2
|
||||
mov pref_lim, num_last
|
||||
shr pref_lim, NUM_PREFETCH_LEVELS + 1
|
||||
|
||||
dec num_last
|
||||
if 0 ; 1 for debug
|
||||
; that optional optimization can improve the performance, if there are identical items in array
|
||||
; 3 times improvement : if all items in array are identical
|
||||
; 20% improvement : if items are different for 1 bit only
|
||||
; 1-10% improvement : if items are different for (2+) bits
|
||||
; no gain : if items are different
|
||||
cmp qq, e1
|
||||
jae next_iter_main
|
||||
endif
|
||||
LOAD e1, 2
|
||||
LOAD t0, 3
|
||||
mov k_x, 2
|
||||
cmp e1, t0
|
||||
cmovb e1, t0
|
||||
mov t0, [p + 4 * (4 + 0)]
|
||||
mov t1, [p + 4 * (4 + 1)]
|
||||
cmovb t0, [p + 4 * (4 + 2)]
|
||||
cmovb t1, [p + 4 * (4 + 3)]
|
||||
adc k_x, 0
|
||||
; (qq <= e1), because the tree is correctly sorted
|
||||
; also here we could check (qq >= e1) or (qq == e1) for faster exit
|
||||
lea s, dword ptr [k + k]
|
||||
MAIN_use_prefetch equ 1
|
||||
MAIN_num_unrolls equ 0
|
||||
MOVE_SMALLEST_UP STEP_2, MAIN_use_prefetch, MAIN_num_unrolls
|
||||
|
||||
next_iter_main:
|
||||
cmp num_last, LEVEL_3_LIMIT
|
||||
jne main_loop_sort
|
||||
|
||||
; num_last == LEVEL_3_LIMIT
|
||||
main_loop_sort_5:
|
||||
; 4 <= num_last <= LEVEL_3_LIMIT
|
||||
; p[size--] = p[0];
|
||||
LOAD qq, num_last
|
||||
STORE e0, num_last
|
||||
mov e0, e1
|
||||
dec num_last_x
|
||||
|
||||
LOAD e1, 2
|
||||
LOAD t0, 3
|
||||
mov k_x, 2
|
||||
cmp e1, t0
|
||||
cmovb e1, t0
|
||||
adc k_x, 0
|
||||
|
||||
lea s_x, dword ptr [k * 2]
|
||||
cmp s_x, num_last_x
|
||||
ja exit_2
|
||||
|
||||
mov t0, [p + k * 8]
|
||||
je exit_1
|
||||
|
||||
; s < num_last
|
||||
mov t1, [p + k * 8 + 4]
|
||||
cmp t0, t1
|
||||
cmovb t0, t1
|
||||
adc s_x, 0
|
||||
exit_1:
|
||||
STORE t0, k
|
||||
cmp qq, t0
|
||||
cmovb k_x, s_x
|
||||
exit_2:
|
||||
STORE qq, k
|
||||
cmp num_last_x, 3
|
||||
jne main_loop_sort_5
|
||||
|
||||
; num_last == 3 (real_size == 4)
|
||||
LOAD a0, 2
|
||||
LOAD a1, 3
|
||||
STORE e1, 2
|
||||
STORE e0, 3
|
||||
SORT a0, a1
|
||||
end_2:
|
||||
STORE a0, 0
|
||||
STORE a1, 1
|
||||
; end_debug:
|
||||
; MY_POP_PRESERVED_ABI_REGS
|
||||
pop r12
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
end_1:
|
||||
MY_ENDP
|
||||
|
||||
|
||||
|
||||
else
|
||||
; ------------ x86 32-bit ------------
|
||||
|
||||
ifdef x64
|
||||
IS_CDECL = 0
|
||||
endif
|
||||
|
||||
acc equ x0
|
||||
k equ r0
|
||||
k_x equ acc
|
||||
|
||||
p equ r1
|
||||
|
||||
num_last equ r2
|
||||
num_last_x equ x2
|
||||
|
||||
a0 equ x3
|
||||
t0 equ a0
|
||||
|
||||
a3 equ x5
|
||||
i equ r5
|
||||
e0 equ a3
|
||||
|
||||
a1 equ x6
|
||||
qq equ a1
|
||||
|
||||
a2 equ x7
|
||||
s equ r7
|
||||
s_x equ a2
|
||||
|
||||
|
||||
SORT macro b0, b1
|
||||
cmp b1, b0
|
||||
jae @F
|
||||
if 1
|
||||
xchg b0, b1
|
||||
else
|
||||
mov acc, b0
|
||||
mov b0, b1 ; min
|
||||
mov b1, acc ; max
|
||||
endif
|
||||
@@:
|
||||
endm
|
||||
|
||||
LOAD macro dest:req, index:req
|
||||
mov dest, [p + 4 * index]
|
||||
endm
|
||||
|
||||
STORE macro reg:req, index:req
|
||||
mov [p + 4 * index], reg
|
||||
endm
|
||||
|
||||
|
||||
STEP_1 macro exit_label
|
||||
mov t0, [p + k * 8]
|
||||
cmp t0, [p + k * 8 + 4]
|
||||
adc s, 0
|
||||
LOAD t0, s
|
||||
STORE t0, k ; we lookahed stooring for most expected branch
|
||||
cmp qq, t0
|
||||
jae exit_label
|
||||
; STORE t0, k ; use if
|
||||
mov k, s
|
||||
add s, s
|
||||
; lea s, dword ptr [s + s]
|
||||
; shl s, 1
|
||||
; lea s, dword ptr [s * 2]
|
||||
endm
|
||||
|
||||
STEP_BRANCH macro exit_label
|
||||
mov t0, [p + k * 8]
|
||||
cmp t0, [p + k * 8 + 4]
|
||||
jae @F
|
||||
inc s
|
||||
mov t0, [p + k * 8 + 4]
|
||||
@@:
|
||||
cmp qq, t0
|
||||
jae exit_label
|
||||
STORE t0, k
|
||||
mov k, s
|
||||
add s, s
|
||||
endm
|
||||
|
||||
|
||||
|
||||
MOVE_SMALLEST_UP macro STEP, num_unrolls, exit_2
|
||||
LOCAL leaves, opt_loop, single
|
||||
|
||||
; s == k * 2
|
||||
rept num_unrolls
|
||||
cmp s, num_last
|
||||
jae leaves
|
||||
STEP_1 exit_2
|
||||
endm
|
||||
cmp s, num_last
|
||||
jb opt_loop
|
||||
|
||||
leaves:
|
||||
; (s >= num_last)
|
||||
jne exit_2
|
||||
single:
|
||||
; (s == num_last)
|
||||
mov t0, [p + k * 8]
|
||||
cmp qq, t0
|
||||
jae exit_2
|
||||
STORE t0, k
|
||||
mov k, s
|
||||
jmp exit_2
|
||||
|
||||
MY_ALIGN_16
|
||||
opt_loop:
|
||||
STEP exit_2
|
||||
cmp s, num_last
|
||||
jb opt_loop
|
||||
je single
|
||||
exit_2:
|
||||
STORE qq, k
|
||||
endm
|
||||
|
||||
|
||||
|
||||
|
||||
ifdef Z7_SORT_ASM_USE_SEGMENT
|
||||
; MY_ALIGN_64
|
||||
else
|
||||
MY_ALIGN_16
|
||||
endif
|
||||
|
||||
MY_PROC HeapSort, 2
|
||||
ifdef x64
|
||||
if (IS_LINUX gt 0)
|
||||
mov num_last, REG_ABI_PARAM_1 ; r2 <- r6 : linux
|
||||
mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux
|
||||
endif
|
||||
elseif (IS_CDECL gt 0)
|
||||
mov num_last, [r4 + REG_SIZE * 2]
|
||||
mov p, [r4 + REG_SIZE * 1]
|
||||
endif
|
||||
cmp num_last, 2
|
||||
jb end_1
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
|
||||
cmp num_last, 4
|
||||
ja sort_5
|
||||
|
||||
LOAD a0, 0
|
||||
LOAD a1, 1
|
||||
SORT a0, a1
|
||||
cmp num_last, 3
|
||||
jb end_2
|
||||
|
||||
LOAD a2, 2
|
||||
je sort_3
|
||||
|
||||
LOAD a3, 3
|
||||
SORT a2, a3
|
||||
SORT a1, a3
|
||||
STORE a3, 3
|
||||
sort_3:
|
||||
SORT a0, a2
|
||||
SORT a1, a2
|
||||
STORE a2, 2
|
||||
jmp end_2
|
||||
|
||||
sort_5:
|
||||
; num_last > 4
|
||||
lea i, dword ptr [num_last - 2]
|
||||
dec num_last
|
||||
test i, 1
|
||||
jz loop_down
|
||||
|
||||
; single child
|
||||
mov t0, [p + num_last * 4]
|
||||
mov qq, [p + num_last * 2]
|
||||
dec i
|
||||
cmp qq, t0
|
||||
jae loop_down
|
||||
|
||||
mov [p + num_last * 2], t0
|
||||
mov [p + num_last * 4], qq
|
||||
|
||||
MY_ALIGN_16
|
||||
loop_down:
|
||||
mov t0, [p + i * 4]
|
||||
cmp t0, [p + i * 4 + 4]
|
||||
mov k, i
|
||||
mov qq, [p + i * 2]
|
||||
adc k, 0
|
||||
LOAD t0, k
|
||||
cmp qq, t0
|
||||
jae down_next
|
||||
mov [p + i * 2], t0
|
||||
lea s, dword ptr [k + k]
|
||||
|
||||
DOWN_num_unrolls equ 0
|
||||
MOVE_SMALLEST_UP STEP_1, DOWN_num_unrolls, down_exit_label
|
||||
down_next:
|
||||
sub i, 2
|
||||
jnb loop_down
|
||||
; jmp end_debug
|
||||
|
||||
LOAD e0, 0
|
||||
|
||||
MY_ALIGN_16
|
||||
main_loop_sort:
|
||||
; num_last > 3
|
||||
mov t0, [p + 2 * 4]
|
||||
cmp t0, [p + 3 * 4]
|
||||
LOAD qq, num_last
|
||||
STORE e0, num_last
|
||||
LOAD e0, 1
|
||||
mov s_x, 2
|
||||
mov k_x, 1
|
||||
adc s, 0
|
||||
LOAD t0, s
|
||||
dec num_last
|
||||
cmp qq, t0
|
||||
jae main_exit_label
|
||||
STORE t0, 1
|
||||
mov k, s
|
||||
add s, s
|
||||
if 1
|
||||
; for branch data prefetch mode :
|
||||
; it's faster for large arrays : larger than (1 << 13) items.
|
||||
MAIN_num_unrolls equ 10
|
||||
STEP_LOOP equ STEP_BRANCH
|
||||
else
|
||||
MAIN_num_unrolls equ 0
|
||||
STEP_LOOP equ STEP_1
|
||||
endif
|
||||
|
||||
MOVE_SMALLEST_UP STEP_LOOP, MAIN_num_unrolls, main_exit_label
|
||||
|
||||
; jmp end_debug
|
||||
cmp num_last, 3
|
||||
jne main_loop_sort
|
||||
|
||||
; num_last == 3 (real_size == 4)
|
||||
LOAD a0, 2
|
||||
LOAD a1, 3
|
||||
LOAD a2, 1
|
||||
STORE e0, 3 ; e0 is alias for a3
|
||||
STORE a2, 2
|
||||
SORT a0, a1
|
||||
end_2:
|
||||
STORE a0, 0
|
||||
STORE a1, 1
|
||||
; end_debug:
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
end_1:
|
||||
MY_ENDP
|
||||
|
||||
endif
|
||||
|
||||
ifdef Z7_SORT_ASM_USE_SEGMENT
|
||||
_TEXT$Z7_SORT ENDS
|
||||
endif
|
||||
|
||||
if 0
|
||||
LEA_IS_D8 (R64) [R2 * 4 + 16]
|
||||
Lat : TP
|
||||
2 : 1 : adl-e
|
||||
2 : 3 p056 adl-p
|
||||
1 : 2 : p15 hsw-rocket
|
||||
1 : 2 : p01 snb-ivb
|
||||
1 : 1 : p1 conroe-wsm
|
||||
1 : 4 : zen3,zen4
|
||||
2 : 4 : zen1,zen2
|
||||
|
||||
LEA_B_IS (R64) [R2 + R3 * 4]
|
||||
Lat : TP
|
||||
1 : 1 : adl-e
|
||||
2 : 3 p056 adl-p
|
||||
1 : 2 : p15 hsw-rocket
|
||||
1 : 2 : p01 snb-ivb
|
||||
1 : 1 : p1 nhm-wsm
|
||||
1 : 1 : p0 conroe-wsm
|
||||
1 : 4 : zen3,zen4
|
||||
2 :2,4 : zen1,zen2
|
||||
|
||||
LEA_B_IS_D8 (R64) [R2 + R3 * 4 + 16]
|
||||
Lat : TP
|
||||
2 : 1 : adl-e
|
||||
2 : 3 p056 adl-p
|
||||
1 : 2 : p15 ice-rocket
|
||||
3 : 1 : p1/p15 hsw-rocket
|
||||
3 : 1 : p01 snb-ivb
|
||||
1 : 1 : p1 nhm-wsm
|
||||
1 : 1 : p0 conroe-wsm
|
||||
2,1 : 2 : zen3,zen4
|
||||
2 : 2 : zen1,zen2
|
||||
|
||||
CMOVB (R64, R64)
|
||||
Lat : TP
|
||||
1,2 : 2 : adl-e
|
||||
1 : 2 p06 adl-p
|
||||
1 : 2 : p06 bwd-rocket
|
||||
1,2 : 2 : p0156+p06 hsw
|
||||
1,2 :1.5 : p015+p05 snb-ivb
|
||||
1,2 : 1 : p015+p05 nhm
|
||||
1 : 1 : 2*p015 conroe
|
||||
1 : 2 : zen3,zen4
|
||||
1 : 4 : zen1,zen2
|
||||
|
||||
ADC (R64, 0)
|
||||
Lat : TP
|
||||
1,2 : 2 : adl-e
|
||||
1 : 2 p06 adl-p
|
||||
1 : 2 : p06 bwd-rocket
|
||||
1 :1.5 : p0156+p06 hsw
|
||||
1 :1.5 : p015+p05 snb-ivb
|
||||
2 : 1 : 2*p015 conroe-wstm
|
||||
1 : 2 : zen1,zen2,zen3,zen4
|
||||
|
||||
PREFETCHNTA : fetch data into non-temporal cache close to the processor, minimizing cache pollution.
|
||||
L1 : Pentium3
|
||||
L2 : NetBurst
|
||||
L1, not L2: Core duo, Core 2, Atom processors
|
||||
L1, not L2, may fetch into L3 with fast replacement: Nehalem, Westmere, Sandy Bridge, ...
|
||||
NEHALEM: Fills L1/L3, L1 LRU is not updated
|
||||
L3 with fast replacement: Xeon Processors based on Nehalem, Westmere, Sandy Bridge, ...
|
||||
PREFETCHT0 : fetch data into all cache levels.
|
||||
PREFETCHT1 : fetch data into L2 and L3
|
||||
endif
|
||||
|
||||
end
|
||||
|
|
@ -1,113 +1,231 @@
|
|||
; XzCrc64Opt.asm -- CRC64 calculation : optimized version
|
||||
; 2021-02-06 : Igor Pavlov : Public domain
|
||||
; 2023-12-08 : Igor Pavlov : Public domain
|
||||
|
||||
include 7zAsm.asm
|
||||
|
||||
MY_ASM_START
|
||||
|
||||
NUM_WORDS equ 3
|
||||
|
||||
if (NUM_WORDS lt 1) or (NUM_WORDS gt 64)
|
||||
.err <num_words_IS_INCORRECT>
|
||||
endif
|
||||
|
||||
NUM_SKIP_BYTES equ ((NUM_WORDS - 2) * 4)
|
||||
|
||||
|
||||
MOVZXLO macro dest:req, src:req
|
||||
movzx dest, @CatStr(src, _L)
|
||||
endm
|
||||
|
||||
MOVZXHI macro dest:req, src:req
|
||||
movzx dest, @CatStr(src, _H)
|
||||
endm
|
||||
|
||||
|
||||
ifdef x64
|
||||
|
||||
rD equ r9
|
||||
rD equ r11
|
||||
rN equ r10
|
||||
rT equ r5
|
||||
num_VAR equ r8
|
||||
|
||||
SRCDAT4 equ dword ptr [rD + rN * 1]
|
||||
rT equ r9
|
||||
|
||||
CRC_OP macro op:req, dest:req, src:req, t:req
|
||||
op dest, QWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t)]
|
||||
endm
|
||||
|
||||
CRC_XOR macro dest:req, src:req, t:req
|
||||
xor dest, QWORD PTR [rT + src * 8 + 0800h * t]
|
||||
CRC_OP xor, dest, src, t
|
||||
endm
|
||||
|
||||
CRC_MOV macro dest:req, src:req, t:req
|
||||
CRC_OP mov, dest, src, t
|
||||
endm
|
||||
|
||||
CRC1b macro
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
movzx x3, x0_L
|
||||
xor x6, x3
|
||||
shr r0, 8
|
||||
CRC_XOR r0, r6, 0
|
||||
dec rN
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
MOVZXLO x3, x0
|
||||
xor x6, x3
|
||||
shr r0, 8
|
||||
CRC_XOR r0, x6, 0
|
||||
dec rN
|
||||
endm
|
||||
|
||||
MY_PROLOG macro crc_end:req
|
||||
ifdef ABI_LINUX
|
||||
MY_PUSH_2_REGS
|
||||
else
|
||||
MY_PUSH_4_REGS
|
||||
endif
|
||||
mov r0, REG_ABI_PARAM_0
|
||||
mov rN, REG_ABI_PARAM_2
|
||||
mov rT, REG_ABI_PARAM_3
|
||||
mov rD, REG_ABI_PARAM_1
|
||||
test rN, rN
|
||||
jz crc_end
|
||||
@@:
|
||||
test rD, 3
|
||||
jz @F
|
||||
CRC1b
|
||||
jnz @B
|
||||
@@:
|
||||
cmp rN, 8
|
||||
jb crc_end
|
||||
add rN, rD
|
||||
mov num_VAR, rN
|
||||
sub rN, 4
|
||||
and rN, NOT 3
|
||||
sub rD, rN
|
||||
mov x1, SRCDAT4
|
||||
xor r0, r1
|
||||
add rN, 4
|
||||
|
||||
; ALIGN_MASK is 3 or 7 bytes alignment:
|
||||
ALIGN_MASK equ (7 - (NUM_WORDS and 1) * 4)
|
||||
|
||||
if NUM_WORDS eq 1
|
||||
|
||||
src_rN_offset equ 4
|
||||
; + 4 for prefetching next 4-bytes after current iteration
|
||||
NUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 4)
|
||||
SRCDAT4 equ DWORD PTR [rN + rD * 1]
|
||||
|
||||
XOR_NEXT macro
|
||||
mov x1, [rD]
|
||||
xor r0, r1
|
||||
endm
|
||||
|
||||
MY_EPILOG macro crc_end:req
|
||||
sub rN, 4
|
||||
mov x1, SRCDAT4
|
||||
xor r0, r1
|
||||
mov rD, rN
|
||||
mov rN, num_VAR
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp crc_end
|
||||
@@:
|
||||
ifdef ABI_LINUX
|
||||
MY_POP_2_REGS
|
||||
else
|
||||
MY_POP_4_REGS
|
||||
endif
|
||||
else ; NUM_WORDS > 1
|
||||
|
||||
src_rN_offset equ 8
|
||||
; + 8 for prefetching next 8-bytes after current iteration
|
||||
NUM_BYTES_LIMIT equ (NUM_WORDS * 4 + 8)
|
||||
|
||||
XOR_NEXT macro
|
||||
xor r0, QWORD PTR [rD] ; 64-bit read, can be unaligned
|
||||
endm
|
||||
|
||||
MY_PROC XzCrc64UpdateT4, 4
|
||||
MY_PROLOG crc_end_4
|
||||
align 16
|
||||
main_loop_4:
|
||||
mov x1, SRCDAT4
|
||||
movzx x2, x0_L
|
||||
movzx x3, x0_H
|
||||
shr r0, 16
|
||||
movzx x6, x0_L
|
||||
movzx x7, x0_H
|
||||
shr r0, 16
|
||||
CRC_XOR r1, r2, 3
|
||||
CRC_XOR r0, r3, 2
|
||||
CRC_XOR r1, r6, 1
|
||||
CRC_XOR r0, r7, 0
|
||||
xor r0, r1
|
||||
; 32-bit or 64-bit
|
||||
LOAD_SRC_MULT4 macro dest:req, word_index:req
|
||||
mov dest, [rN + rD * 1 + 4 * (word_index) - src_rN_offset];
|
||||
endm
|
||||
|
||||
add rD, 4
|
||||
jnz main_loop_4
|
||||
endif
|
||||
|
||||
MY_EPILOG crc_end_4
|
||||
|
||||
|
||||
MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 4
|
||||
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
|
||||
mov r0, REG_ABI_PARAM_0 ; r0 <- r1 / r7
|
||||
mov rD, REG_ABI_PARAM_1 ; r11 <- r2 / r6
|
||||
mov rN, REG_ABI_PARAM_2 ; r10 <- r8 / r2
|
||||
if (IS_LINUX gt 0)
|
||||
mov rT, REG_ABI_PARAM_3 ; r9 <- r9 / r1
|
||||
endif
|
||||
|
||||
cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK
|
||||
jb crc_end
|
||||
@@:
|
||||
test rD, ALIGN_MASK
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp @B
|
||||
@@:
|
||||
XOR_NEXT
|
||||
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
|
||||
sub rD, rN
|
||||
add rN, src_rN_offset
|
||||
|
||||
align 16
|
||||
@@:
|
||||
|
||||
if NUM_WORDS eq 1
|
||||
|
||||
mov x1, x0
|
||||
shr x1, 8
|
||||
MOVZXLO x3, x1
|
||||
MOVZXLO x2, x0
|
||||
shr x1, 8
|
||||
shr r0, 32
|
||||
xor x0, SRCDAT4
|
||||
CRC_XOR r0, x2, 3
|
||||
CRC_XOR r0, x3, 2
|
||||
MOVZXLO x2, x1
|
||||
shr x1, 8
|
||||
CRC_XOR r0, x2, 1
|
||||
CRC_XOR r0, x1, 0
|
||||
|
||||
else ; NUM_WORDS > 1
|
||||
|
||||
if NUM_WORDS ne 2
|
||||
k = 2
|
||||
while k lt NUM_WORDS
|
||||
|
||||
LOAD_SRC_MULT4 x1, k
|
||||
crc_op1 textequ <xor>
|
||||
|
||||
if k eq 2
|
||||
if (NUM_WORDS and 1)
|
||||
LOAD_SRC_MULT4 x7, NUM_WORDS ; aligned 32-bit
|
||||
LOAD_SRC_MULT4 x6, NUM_WORDS + 1 ; aligned 32-bit
|
||||
shl r6, 32
|
||||
else
|
||||
LOAD_SRC_MULT4 r6, NUM_WORDS ; aligned 64-bit
|
||||
crc_op1 textequ <mov>
|
||||
endif
|
||||
endif
|
||||
table = 4 * (NUM_WORDS - 1 - k)
|
||||
MOVZXLO x3, x1
|
||||
CRC_OP crc_op1, r7, x3, 3 + table
|
||||
MOVZXHI x3, x1
|
||||
shr x1, 16
|
||||
CRC_XOR r6, x3, 2 + table
|
||||
MOVZXLO x3, x1
|
||||
shr x1, 8
|
||||
CRC_XOR r7, x3, 1 + table
|
||||
CRC_XOR r6, x1, 0 + table
|
||||
k = k + 1
|
||||
endm
|
||||
crc_op2 textequ <xor>
|
||||
|
||||
else ; NUM_WORDS == 2
|
||||
LOAD_SRC_MULT4 r6, NUM_WORDS ; aligned 64-bit
|
||||
crc_op2 textequ <mov>
|
||||
endif ; NUM_WORDS == 2
|
||||
|
||||
MOVZXHI x3, x0
|
||||
MOVZXLO x2, x0
|
||||
mov r1, r0
|
||||
shr r1, 32
|
||||
shr x0, 16
|
||||
CRC_XOR r6, x2, NUM_SKIP_BYTES + 7
|
||||
CRC_OP crc_op2, r7, x3, NUM_SKIP_BYTES + 6
|
||||
MOVZXLO x2, x0
|
||||
MOVZXHI x5, x1
|
||||
MOVZXLO x3, x1
|
||||
shr x0, 8
|
||||
shr x1, 16
|
||||
CRC_XOR r7, x2, NUM_SKIP_BYTES + 5
|
||||
CRC_XOR r6, x3, NUM_SKIP_BYTES + 3
|
||||
CRC_XOR r7, x0, NUM_SKIP_BYTES + 4
|
||||
CRC_XOR r6, x5, NUM_SKIP_BYTES + 2
|
||||
MOVZXLO x2, x1
|
||||
shr x1, 8
|
||||
CRC_XOR r7, x2, NUM_SKIP_BYTES + 1
|
||||
CRC_MOV r0, x1, NUM_SKIP_BYTES + 0
|
||||
xor r0, r6
|
||||
xor r0, r7
|
||||
|
||||
endif ; NUM_WORDS > 1
|
||||
add rD, NUM_WORDS * 4
|
||||
jnc @B
|
||||
|
||||
sub rN, src_rN_offset
|
||||
add rD, rN
|
||||
XOR_NEXT
|
||||
add rN, NUM_BYTES_LIMIT - 1
|
||||
sub rN, rD
|
||||
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz func_end
|
||||
@@:
|
||||
CRC1b
|
||||
jnz @B
|
||||
func_end:
|
||||
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
|
||||
MY_ENDP
|
||||
|
||||
|
||||
|
||||
else
|
||||
; ==================================================================
|
||||
; x86 (32-bit)
|
||||
|
||||
rD equ r1
|
||||
rN equ r7
|
||||
rD equ r7
|
||||
rN equ r1
|
||||
rT equ r5
|
||||
|
||||
xA equ x6
|
||||
xA_R equ r6
|
||||
|
||||
ifdef x64
|
||||
num_VAR equ r8
|
||||
else
|
||||
|
||||
crc_OFFS equ (REG_SIZE * 5)
|
||||
|
||||
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
|
||||
|
|
@ -133,107 +251,273 @@ else
|
|||
table_VAR equ [r4 + table_OFFS]
|
||||
num_VAR equ table_VAR
|
||||
endif
|
||||
endif ; x64
|
||||
|
||||
SRCDAT4 equ dword ptr [rD + rN * 1]
|
||||
SRCDAT4 equ DWORD PTR [rN + rD * 1]
|
||||
|
||||
CRC_1 macro op:req, dest:req, src:req, t:req, word_index:req
|
||||
op dest, DWORD PTR [rT + @CatStr(src, _R) * 8 + 0800h * (t) + (word_index) * 4]
|
||||
endm
|
||||
|
||||
CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req
|
||||
op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t]
|
||||
op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4]
|
||||
CRC_1 op0, dest0, src, t, 0
|
||||
CRC_1 op1, dest1, src, t, 1
|
||||
endm
|
||||
|
||||
CRC_XOR macro dest0:req, dest1:req, src:req, t:req
|
||||
CRC xor, xor, dest0, dest1, src, t
|
||||
CRC xor, xor, dest0, dest1, src, t
|
||||
endm
|
||||
|
||||
|
||||
CRC1b macro
|
||||
movzx x6, BYTE PTR [rD]
|
||||
inc rD
|
||||
movzx x3, x0_L
|
||||
xor x6, x3
|
||||
shrd r0, r2, 8
|
||||
shr r2, 8
|
||||
CRC_XOR r0, r2, r6, 0
|
||||
dec rN
|
||||
movzx xA, BYTE PTR [rD]
|
||||
inc rD
|
||||
MOVZXLO x3, x0
|
||||
xor xA, x3
|
||||
shrd x0, x2, 8
|
||||
shr x2, 8
|
||||
CRC_XOR x0, x2, xA, 0
|
||||
dec rN
|
||||
endm
|
||||
|
||||
MY_PROLOG macro crc_end:req
|
||||
MY_PUSH_4_REGS
|
||||
|
||||
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
|
||||
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
|
||||
mov rN, [r4 + size_OFFS]
|
||||
mov rD, [r4 + data_OFFS]
|
||||
else
|
||||
mov rN, r2
|
||||
endif
|
||||
|
||||
mov x0, [r4 + crc_OFFS]
|
||||
mov x2, [r4 + crc_OFFS + 4]
|
||||
mov rT, table_VAR
|
||||
test rN, rN
|
||||
jz crc_end
|
||||
@@:
|
||||
test rD, 3
|
||||
jz @F
|
||||
CRC1b
|
||||
jnz @B
|
||||
@@:
|
||||
cmp rN, 8
|
||||
jb crc_end
|
||||
add rN, rD
|
||||
|
||||
mov num_VAR, rN
|
||||
|
||||
sub rN, 4
|
||||
and rN, NOT 3
|
||||
sub rD, rN
|
||||
xor r0, SRCDAT4
|
||||
add rN, 4
|
||||
MY_PROLOG_BASE macro
|
||||
MY_PUSH_4_REGS
|
||||
ifdef x64
|
||||
mov r0, REG_ABI_PARAM_0 ; r0 <- r1 / r7
|
||||
mov rT, REG_ABI_PARAM_3 ; r5 <- r9 / r1
|
||||
mov rN, REG_ABI_PARAM_2 ; r1 <- r8 / r2
|
||||
mov rD, REG_ABI_PARAM_1 ; r7 <- r2 / r6
|
||||
mov r2, r0
|
||||
shr r2, 32
|
||||
mov x0, x0
|
||||
else
|
||||
if (IS_CDECL gt 0) or (IS_LINUX gt 0)
|
||||
proc_numParams = proc_numParams + 2 ; for ABI_LINUX
|
||||
mov rN, [r4 + size_OFFS]
|
||||
mov rD, [r4 + data_OFFS]
|
||||
else
|
||||
mov rD, REG_ABI_PARAM_0 ; r7 <- r1 : (data)
|
||||
mov rN, REG_ABI_PARAM_1 ; r1 <- r2 : (size)
|
||||
endif
|
||||
mov x0, [r4 + crc_OFFS]
|
||||
mov x2, [r4 + crc_OFFS + 4]
|
||||
mov rT, table_VAR
|
||||
endif
|
||||
endm
|
||||
|
||||
MY_EPILOG macro crc_end:req
|
||||
sub rN, 4
|
||||
xor r0, SRCDAT4
|
||||
|
||||
mov rD, rN
|
||||
mov rN, num_VAR
|
||||
sub rN, rD
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp crc_end
|
||||
@@:
|
||||
MY_POP_4_REGS
|
||||
MY_EPILOG_BASE macro crc_end:req, func_end:req
|
||||
crc_end:
|
||||
test rN, rN
|
||||
jz func_end
|
||||
@@:
|
||||
CRC1b
|
||||
jnz @B
|
||||
func_end:
|
||||
ifdef x64
|
||||
shl r2, 32
|
||||
xor r0, r2
|
||||
endif
|
||||
MY_POP_4_REGS
|
||||
endm
|
||||
|
||||
MY_PROC XzCrc64UpdateT4, 5
|
||||
MY_PROLOG crc_end_4
|
||||
movzx x6, x0_L
|
||||
align 16
|
||||
main_loop_4:
|
||||
mov r3, SRCDAT4
|
||||
xor r3, r2
|
||||
|
||||
CRC xor, mov, r3, r2, r6, 3
|
||||
movzx x6, x0_H
|
||||
shr r0, 16
|
||||
CRC_XOR r3, r2, r6, 2
|
||||
; ALIGN_MASK is 3 or 7 bytes alignment:
|
||||
ALIGN_MASK equ (7 - (NUM_WORDS and 1) * 4)
|
||||
|
||||
movzx x6, x0_L
|
||||
movzx x0, x0_H
|
||||
CRC_XOR r3, r2, r6, 1
|
||||
CRC_XOR r3, r2, r0, 0
|
||||
movzx x6, x3_L
|
||||
mov r0, r3
|
||||
if (NUM_WORDS eq 1)
|
||||
|
||||
add rD, 4
|
||||
jnz main_loop_4
|
||||
NUM_BYTES_LIMIT_T4 equ (NUM_WORDS * 4 + 4)
|
||||
|
||||
MY_EPILOG crc_end_4
|
||||
MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
|
||||
MY_PROLOG_BASE
|
||||
|
||||
cmp rN, NUM_BYTES_LIMIT_T4 + ALIGN_MASK
|
||||
jb crc_end_4
|
||||
@@:
|
||||
test rD, ALIGN_MASK
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp @B
|
||||
@@:
|
||||
xor x0, [rD]
|
||||
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT_T4 - 1)]
|
||||
sub rD, rN
|
||||
add rN, 4
|
||||
|
||||
MOVZXLO xA, x0
|
||||
align 16
|
||||
@@:
|
||||
mov x3, SRCDAT4
|
||||
xor x3, x2
|
||||
shr x0, 8
|
||||
CRC xor, mov, x3, x2, xA, 3
|
||||
MOVZXLO xA, x0
|
||||
shr x0, 8
|
||||
; MOVZXHI xA, x0
|
||||
; shr x0, 16
|
||||
CRC_XOR x3, x2, xA, 2
|
||||
|
||||
MOVZXLO xA, x0
|
||||
shr x0, 8
|
||||
CRC_XOR x3, x2, xA, 1
|
||||
CRC_XOR x3, x2, x0, 0
|
||||
MOVZXLO xA, x3
|
||||
mov x0, x3
|
||||
|
||||
add rD, 4
|
||||
jnc @B
|
||||
|
||||
sub rN, 4
|
||||
add rD, rN
|
||||
xor x0, [rD]
|
||||
add rN, NUM_BYTES_LIMIT_T4 - 1
|
||||
sub rN, rD
|
||||
MY_EPILOG_BASE crc_end_4, func_end_4
|
||||
MY_ENDP
|
||||
|
||||
endif ; ! x64
|
||||
else ; NUM_WORDS > 1
|
||||
|
||||
SHR_X macro x, imm
|
||||
shr x, imm
|
||||
endm
|
||||
|
||||
|
||||
ITER_1 macro v0, v1, a, off
|
||||
MOVZXLO xA, a
|
||||
SHR_X a, 8
|
||||
CRC_XOR v0, v1, xA, off
|
||||
endm
|
||||
|
||||
|
||||
ITER_4 macro v0, v1, a, off
|
||||
if 0 eq 0
|
||||
ITER_1 v0, v1, a, off + 3
|
||||
ITER_1 v0, v1, a, off + 2
|
||||
ITER_1 v0, v1, a, off + 1
|
||||
CRC_XOR v0, v1, a, off
|
||||
elseif 0 eq 0
|
||||
MOVZXLO xA, a
|
||||
CRC_XOR v0, v1, xA, off + 3
|
||||
mov xA, a
|
||||
ror a, 16 ; 32-bit ror
|
||||
shr xA, 24
|
||||
CRC_XOR v0, v1, xA, off
|
||||
MOVZXLO xA, a
|
||||
SHR_X a, 24
|
||||
CRC_XOR v0, v1, xA, off + 1
|
||||
CRC_XOR v0, v1, a, off + 2
|
||||
else
|
||||
; MOVZXHI provides smaller code, but MOVZX_HI_BYTE is not fast instruction
|
||||
MOVZXLO xA, a
|
||||
CRC_XOR v0, v1, xA, off + 3
|
||||
MOVZXHI xA, a
|
||||
SHR_X a, 16
|
||||
CRC_XOR v0, v1, xA, off + 2
|
||||
MOVZXLO xA, a
|
||||
SHR_X a, 8
|
||||
CRC_XOR v0, v1, xA, off + 1
|
||||
CRC_XOR v0, v1, a, off
|
||||
endif
|
||||
endm
|
||||
|
||||
|
||||
|
||||
ITER_1_PAIR macro v0, v1, a0, a1, off
|
||||
ITER_1 v0, v1, a0, off + 4
|
||||
ITER_1 v0, v1, a1, off
|
||||
endm
|
||||
|
||||
src_rD_offset equ 8
|
||||
STEP_SIZE equ (NUM_WORDS * 4)
|
||||
|
||||
ITER_12_NEXT macro op, index, v0, v1
|
||||
op v0, DWORD PTR [rD + (index + 1) * STEP_SIZE - src_rD_offset]
|
||||
op v1, DWORD PTR [rD + (index + 1) * STEP_SIZE + 4 - src_rD_offset]
|
||||
endm
|
||||
|
||||
ITER_12 macro index, a0, a1, v0, v1
|
||||
|
||||
if NUM_SKIP_BYTES eq 0
|
||||
ITER_12_NEXT mov, index, v0, v1
|
||||
else
|
||||
k = 0
|
||||
while k lt NUM_SKIP_BYTES
|
||||
movzx xA, BYTE PTR [rD + (index) * STEP_SIZE + k + 8 - src_rD_offset]
|
||||
if k eq 0
|
||||
CRC mov, mov, v0, v1, xA, NUM_SKIP_BYTES - 1 - k
|
||||
else
|
||||
CRC_XOR v0, v1, xA, NUM_SKIP_BYTES - 1 - k
|
||||
endif
|
||||
k = k + 1
|
||||
endm
|
||||
ITER_12_NEXT xor, index, v0, v1
|
||||
endif
|
||||
|
||||
if 0 eq 0
|
||||
ITER_4 v0, v1, a0, NUM_SKIP_BYTES + 4
|
||||
ITER_4 v0, v1, a1, NUM_SKIP_BYTES
|
||||
else ; interleave version is faster/slower for different processors
|
||||
ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 3
|
||||
ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 2
|
||||
ITER_1_PAIR v0, v1, a0, a1, NUM_SKIP_BYTES + 1
|
||||
CRC_XOR v0, v1, a0, NUM_SKIP_BYTES + 4
|
||||
CRC_XOR v0, v1, a1, NUM_SKIP_BYTES
|
||||
endif
|
||||
endm
|
||||
|
||||
; we use (UNROLL_CNT > 1) to reduce read ports pressure (num_VAR reads)
|
||||
UNROLL_CNT equ (2 * 1)
|
||||
NUM_BYTES_LIMIT equ (STEP_SIZE * UNROLL_CNT + 8)
|
||||
|
||||
MY_PROC @CatStr(XzCrc64UpdateT, %(NUM_WORDS * 4)), 5
|
||||
MY_PROLOG_BASE
|
||||
|
||||
cmp rN, NUM_BYTES_LIMIT + ALIGN_MASK
|
||||
jb crc_end_12
|
||||
@@:
|
||||
test rD, ALIGN_MASK
|
||||
jz @F
|
||||
CRC1b
|
||||
jmp @B
|
||||
@@:
|
||||
xor x0, [rD]
|
||||
xor x2, [rD + 4]
|
||||
add rD, src_rD_offset
|
||||
lea rN, [rD + rN * 1 - (NUM_BYTES_LIMIT - 1)]
|
||||
mov num_VAR, rN
|
||||
|
||||
align 16
|
||||
@@:
|
||||
i = 0
|
||||
rept UNROLL_CNT
|
||||
if (i and 1) eq 0
|
||||
ITER_12 i, x0, x2, x1, x3
|
||||
else
|
||||
ITER_12 i, x1, x3, x0, x2
|
||||
endif
|
||||
i = i + 1
|
||||
endm
|
||||
|
||||
if (UNROLL_CNT and 1)
|
||||
mov x0, x1
|
||||
mov x2, x3
|
||||
endif
|
||||
add rD, STEP_SIZE * UNROLL_CNT
|
||||
cmp rD, num_VAR
|
||||
jb @B
|
||||
|
||||
mov rN, num_VAR
|
||||
add rN, NUM_BYTES_LIMIT - 1
|
||||
sub rN, rD
|
||||
sub rD, src_rD_offset
|
||||
xor x0, [rD]
|
||||
xor x2, [rD + 4]
|
||||
|
||||
MY_EPILOG_BASE crc_end_12, func_end_12
|
||||
MY_ENDP
|
||||
|
||||
endif ; (NUM_WORDS > 1)
|
||||
endif ; ! x64
|
||||
end
|
||||
|
|
|
|||
12
C/7z.h
12
C/7z.h
|
|
@ -1,8 +1,8 @@
|
|||
/* 7z.h -- 7z interface
|
||||
2018-07-02 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_H
|
||||
#define __7Z_H
|
||||
#ifndef ZIP7_INC_7Z_H
|
||||
#define ZIP7_INC_7Z_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ typedef struct
|
|||
UInt64 SzAr_GetFolderUnpackSize(const CSzAr *p, UInt32 folderIndex);
|
||||
|
||||
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
|
||||
ILookInStream *stream, UInt64 startPos,
|
||||
ILookInStreamPtr stream, UInt64 startPos,
|
||||
Byte *outBuffer, size_t outSize,
|
||||
ISzAllocPtr allocMain);
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ UInt16 *SzArEx_GetFullNameUtf16_Back(const CSzArEx *p, size_t fileIndex, UInt16
|
|||
|
||||
SRes SzArEx_Extract(
|
||||
const CSzArEx *db,
|
||||
ILookInStream *inStream,
|
||||
ILookInStreamPtr inStream,
|
||||
UInt32 fileIndex, /* index of file */
|
||||
UInt32 *blockIndex, /* index of solid block */
|
||||
Byte **outBuffer, /* pointer to pointer to output buffer (allocated with allocMain) */
|
||||
|
|
@ -196,7 +196,7 @@ SZ_ERROR_INPUT_EOF
|
|||
SZ_ERROR_FAIL
|
||||
*/
|
||||
|
||||
SRes SzArEx_Open(CSzArEx *p, ILookInStream *inStream,
|
||||
SRes SzArEx_Open(CSzArEx *p, ILookInStreamPtr inStream,
|
||||
ISzAllocPtr allocMain, ISzAllocPtr allocTemp);
|
||||
|
||||
EXTERN_C_END
|
||||
|
|
|
|||
69
C/7zAlloc.c
69
C/7zAlloc.c
|
|
@ -1,5 +1,5 @@
|
|||
/* 7zAlloc.c -- Allocation functions
|
||||
2017-04-03 : Igor Pavlov : Public domain */
|
||||
/* 7zAlloc.c -- Allocation functions for 7z processing
|
||||
2023-03-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -7,74 +7,83 @@
|
|||
|
||||
#include "7zAlloc.h"
|
||||
|
||||
/* #define _SZ_ALLOC_DEBUG */
|
||||
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
/* #define SZ_ALLOC_DEBUG */
|
||||
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
|
||||
/*
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include "7zWindows.h"
|
||||
#endif
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
int g_allocCount = 0;
|
||||
int g_allocCountTemp = 0;
|
||||
static int g_allocCount = 0;
|
||||
static int g_allocCountTemp = 0;
|
||||
|
||||
static void Print_Alloc(const char *s, size_t size, int *counter)
|
||||
{
|
||||
const unsigned size2 = (unsigned)size;
|
||||
fprintf(stderr, "\n%s count = %10d : %10u bytes; ", s, *counter, size2);
|
||||
(*counter)++;
|
||||
}
|
||||
static void Print_Free(const char *s, int *counter)
|
||||
{
|
||||
(*counter)--;
|
||||
fprintf(stderr, "\n%s count = %10d", s, *counter);
|
||||
}
|
||||
#endif
|
||||
|
||||
void *SzAlloc(ISzAllocPtr p, size_t size)
|
||||
{
|
||||
UNUSED_VAR(p);
|
||||
UNUSED_VAR(p)
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
fprintf(stderr, "\nAlloc %10u bytes; count = %10d", (unsigned)size, g_allocCount);
|
||||
g_allocCount++;
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
Print_Alloc("Alloc", size, &g_allocCount);
|
||||
#endif
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void SzFree(ISzAllocPtr p, void *address)
|
||||
{
|
||||
UNUSED_VAR(p);
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
{
|
||||
g_allocCount--;
|
||||
fprintf(stderr, "\nFree; count = %10d", g_allocCount);
|
||||
}
|
||||
UNUSED_VAR(p)
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
if (address)
|
||||
Print_Free("Free ", &g_allocCount);
|
||||
#endif
|
||||
free(address);
|
||||
}
|
||||
|
||||
void *SzAllocTemp(ISzAllocPtr p, size_t size)
|
||||
{
|
||||
UNUSED_VAR(p);
|
||||
UNUSED_VAR(p)
|
||||
if (size == 0)
|
||||
return 0;
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
fprintf(stderr, "\nAlloc_temp %10u bytes; count = %10d", (unsigned)size, g_allocCountTemp);
|
||||
g_allocCountTemp++;
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
Print_Alloc("Alloc_temp", size, &g_allocCountTemp);
|
||||
/*
|
||||
#ifdef _WIN32
|
||||
return HeapAlloc(GetProcessHeap(), 0, size);
|
||||
#endif
|
||||
*/
|
||||
#endif
|
||||
return malloc(size);
|
||||
}
|
||||
|
||||
void SzFreeTemp(ISzAllocPtr p, void *address)
|
||||
{
|
||||
UNUSED_VAR(p);
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
if (address != 0)
|
||||
{
|
||||
g_allocCountTemp--;
|
||||
fprintf(stderr, "\nFree_temp; count = %10d", g_allocCountTemp);
|
||||
}
|
||||
UNUSED_VAR(p)
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
if (address)
|
||||
Print_Free("Free_temp ", &g_allocCountTemp);
|
||||
/*
|
||||
#ifdef _WIN32
|
||||
HeapFree(GetProcessHeap(), 0, address);
|
||||
return;
|
||||
#endif
|
||||
*/
|
||||
#endif
|
||||
free(address);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* 7zAlloc.h -- Allocation functions
|
||||
2017-04-03 : Igor Pavlov : Public domain */
|
||||
2023-03-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_ALLOC_H
|
||||
#define __7Z_ALLOC_H
|
||||
#ifndef ZIP7_INC_7Z_ALLOC_H
|
||||
#define ZIP7_INC_7Z_ALLOC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
|
|||
409
C/7zArcIn.c
409
C/7zArcIn.c
File diff suppressed because it is too large
Load diff
|
|
@ -1,8 +1,8 @@
|
|||
/* 7zBuf.h -- Byte Buffer
|
||||
2017-04-03 : Igor Pavlov : Public domain */
|
||||
2023-03-04 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_BUF_H
|
||||
#define __7Z_BUF_H
|
||||
#ifndef ZIP7_INC_7Z_BUF_H
|
||||
#define ZIP7_INC_7Z_BUF_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
|
|||
554
C/7zCrc.c
554
C/7zCrc.c
|
|
@ -1,182 +1,218 @@
|
|||
/* 7zCrc.c -- CRC32 init
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
/* 7zCrc.c -- CRC32 calculation and init
|
||||
2024-03-01 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "7zCrc.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define kCrcPoly 0xEDB88320
|
||||
// for debug:
|
||||
// #define __ARM_FEATURE_CRC32 1
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
#define CRC_NUM_TABLES 8
|
||||
#ifdef __ARM_FEATURE_CRC32
|
||||
// #pragma message("__ARM_FEATURE_CRC32")
|
||||
#define Z7_CRC_HW_FORCE
|
||||
#endif
|
||||
|
||||
// #define Z7_CRC_DEBUG_BE
|
||||
#ifdef Z7_CRC_DEBUG_BE
|
||||
#undef MY_CPU_LE
|
||||
#define MY_CPU_BE
|
||||
#endif
|
||||
|
||||
#ifdef Z7_CRC_HW_FORCE
|
||||
#define Z7_CRC_NUM_TABLES_USE 1
|
||||
#else
|
||||
#define CRC_NUM_TABLES 9
|
||||
|
||||
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
#ifdef Z7_CRC_NUM_TABLES
|
||||
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
|
||||
#else
|
||||
#define Z7_CRC_NUM_TABLES_USE 12
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MY_CPU_BE
|
||||
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
#if Z7_CRC_NUM_TABLES_USE < 1
|
||||
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
|
||||
#endif
|
||||
|
||||
typedef UInt32 (MY_FAST_CALL *CRC_FUNC)(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
#if defined(MY_CPU_LE) || (Z7_CRC_NUM_TABLES_USE == 1)
|
||||
#define Z7_CRC_NUM_TABLES_TOTAL Z7_CRC_NUM_TABLES_USE
|
||||
#else
|
||||
#define Z7_CRC_NUM_TABLES_TOTAL (Z7_CRC_NUM_TABLES_USE + 1)
|
||||
#endif
|
||||
|
||||
extern
|
||||
CRC_FUNC g_CrcUpdateT4;
|
||||
CRC_FUNC g_CrcUpdateT4;
|
||||
extern
|
||||
CRC_FUNC g_CrcUpdateT8;
|
||||
CRC_FUNC g_CrcUpdateT8;
|
||||
extern
|
||||
CRC_FUNC g_CrcUpdateT0_32;
|
||||
CRC_FUNC g_CrcUpdateT0_32;
|
||||
extern
|
||||
CRC_FUNC g_CrcUpdateT0_64;
|
||||
CRC_FUNC g_CrcUpdateT0_64;
|
||||
extern
|
||||
CRC_FUNC g_CrcUpdate;
|
||||
CRC_FUNC g_CrcUpdate;
|
||||
#ifndef Z7_CRC_HW_FORCE
|
||||
|
||||
UInt32 g_CrcTable[256 * CRC_NUM_TABLES];
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void *data, size_t size)
|
||||
{
|
||||
return g_CrcUpdate(v, data, size, g_CrcTable);
|
||||
}
|
||||
|
||||
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size)
|
||||
{
|
||||
return g_CrcUpdate(CRC_INIT_VAL, data, size, g_CrcTable) ^ CRC_INIT_VAL;
|
||||
}
|
||||
|
||||
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
UInt32 MY_FAST_CALL CrcUpdateT1(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
#if Z7_CRC_NUM_TABLES_USE == 1 \
|
||||
|| (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
|
||||
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
|
||||
#define Z7_CRC_UPDATE_T1_FUNC_NAME CrcUpdateGT1
|
||||
static UInt32 Z7_FASTCALL Z7_CRC_UPDATE_T1_FUNC_NAME(UInt32 v, const void *data, size_t size)
|
||||
{
|
||||
const UInt32 *table = g_CrcTable;
|
||||
const Byte *p = (const Byte *)data;
|
||||
const Byte *pEnd = p + size;
|
||||
for (; p != pEnd; p++)
|
||||
const Byte *lim = p + size;
|
||||
for (; p != lim; p++)
|
||||
v = CRC_UPDATE_BYTE_2(v, *p);
|
||||
return v;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if Z7_CRC_NUM_TABLES_USE != 1
|
||||
#ifndef MY_CPU_BE
|
||||
#define FUNC_NAME_LE_2(s) CrcUpdateT ## s
|
||||
#define FUNC_NAME_LE_1(s) FUNC_NAME_LE_2(s)
|
||||
#define FUNC_NAME_LE FUNC_NAME_LE_1(Z7_CRC_NUM_TABLES_USE)
|
||||
UInt32 Z7_FASTCALL FUNC_NAME_LE (UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
#endif
|
||||
#ifndef MY_CPU_LE
|
||||
#define FUNC_NAME_BE_2(s) CrcUpdateT1_BeT ## s
|
||||
#define FUNC_NAME_BE_1(s) FUNC_NAME_BE_2(s)
|
||||
#define FUNC_NAME_BE FUNC_NAME_BE_1(Z7_CRC_NUM_TABLES_USE)
|
||||
UInt32 Z7_FASTCALL FUNC_NAME_BE (UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif // Z7_CRC_HW_FORCE
|
||||
|
||||
/* ---------- hardware CRC ---------- */
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
|
||||
#if defined(MY_CPU_ARM_OR_ARM64)
|
||||
|
||||
// #pragma message("ARM*")
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(MY_CPU_ARM64)
|
||||
#if (_MSC_VER >= 1910)
|
||||
#define USE_ARM64_CRC
|
||||
#endif
|
||||
#endif
|
||||
#elif (defined(__clang__) && (__clang_major__ >= 3)) \
|
||||
|| (defined(__GNUC__) && (__GNUC__ > 4))
|
||||
#if (defined(__clang__) && (__clang_major__ >= 3)) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 6) && defined(MY_CPU_ARM64) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 8)
|
||||
#if !defined(__ARM_FEATURE_CRC32)
|
||||
// #pragma message("!defined(__ARM_FEATURE_CRC32)")
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define __ARM_FEATURE_CRC32 1
|
||||
#if (!defined(__clang__) || (__clang_major__ > 3)) // fix these numbers
|
||||
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#define Z7_ARM_FEATURE_CRC32_WAS_SET
|
||||
#if defined(__clang__)
|
||||
#if defined(MY_CPU_ARM64)
|
||||
#define ATTRIB_CRC __attribute__((__target__("crc")))
|
||||
#else
|
||||
#define ATTRIB_CRC __attribute__((__target__("armv8-a,crc")))
|
||||
#endif
|
||||
#else
|
||||
#if defined(MY_CPU_ARM64)
|
||||
#if !defined(Z7_GCC_VERSION) || (Z7_GCC_VERSION >= 60000)
|
||||
#define ATTRIB_CRC __attribute__((__target__("+crc")))
|
||||
#endif
|
||||
#else
|
||||
#if !defined(Z7_GCC_VERSION) || (__GNUC__ >= 8)
|
||||
#if defined(__ARM_FP) && __GNUC__ >= 8
|
||||
// for -mfloat-abi=hard: similar to <arm_acle.h>
|
||||
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc+simd")))
|
||||
#else
|
||||
#define ATTRIB_CRC __attribute__((__target__("arch=armv8-a+crc")))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#if defined(__ARM_FEATURE_CRC32)
|
||||
#define USE_ARM64_CRC
|
||||
// #pragma message("<arm_acle.h>")
|
||||
/*
|
||||
arm_acle.h (GGC):
|
||||
before Nov 17, 2017:
|
||||
#ifdef __ARM_FEATURE_CRC32
|
||||
|
||||
Nov 17, 2017: gcc10.0 (gcc 9.2.0) checked"
|
||||
#if __ARM_ARCH >= 8
|
||||
#pragma GCC target ("arch=armv8-a+crc")
|
||||
|
||||
Aug 22, 2019: GCC 8.4?, 9.2.1, 10.1:
|
||||
#ifdef __ARM_FEATURE_CRC32
|
||||
#ifdef __ARM_FP
|
||||
#pragma GCC target ("arch=armv8-a+crc+simd")
|
||||
#else
|
||||
#pragma GCC target ("arch=armv8-a+crc")
|
||||
#endif
|
||||
*/
|
||||
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
|
||||
#if defined(Z7_GCC_VERSION) && (__GNUC__ == 8) && (Z7_GCC_VERSION < 80400) \
|
||||
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 9) && (Z7_GCC_VERSION < 90201) \
|
||||
|| defined(Z7_GCC_VERSION) && (__GNUC__ == 10) && (Z7_GCC_VERSION < 100100)
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
// #pragma message("#define __ARM_ARCH 8")
|
||||
#undef __ARM_ARCH
|
||||
#define __ARM_ARCH 8
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
#endif
|
||||
#define Z7_CRC_HW_USE
|
||||
#include <arm_acle.h>
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if defined(MY_CPU_ARM64)
|
||||
#if (_MSC_VER >= 1910)
|
||||
#ifdef __clang__
|
||||
// #define Z7_CRC_HW_USE
|
||||
// #include <arm_acle.h>
|
||||
#else
|
||||
#define Z7_CRC_HW_USE
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#else // non-ARM*
|
||||
|
||||
// no hardware CRC
|
||||
|
||||
// #define USE_CRC_EMU
|
||||
|
||||
#ifdef USE_CRC_EMU
|
||||
|
||||
#pragma message("ARM64 CRC emulation")
|
||||
|
||||
MY_FORCE_INLINE
|
||||
UInt32 __crc32b(UInt32 v, UInt32 data)
|
||||
{
|
||||
const UInt32 *table = g_CrcTable;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data);
|
||||
return v;
|
||||
}
|
||||
|
||||
MY_FORCE_INLINE
|
||||
UInt32 __crc32w(UInt32 v, UInt32 data)
|
||||
{
|
||||
const UInt32 *table = g_CrcTable;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
return v;
|
||||
}
|
||||
|
||||
MY_FORCE_INLINE
|
||||
UInt32 __crc32d(UInt32 v, UInt64 data)
|
||||
{
|
||||
const UInt32 *table = g_CrcTable;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
v = CRC_UPDATE_BYTE_2(v, (Byte)data); data >>= 8;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // USE_CRC_EMU
|
||||
|
||||
#endif // defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
|
||||
|
||||
|
||||
|
||||
#if defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
|
||||
|
||||
#define T0_32_UNROLL_BYTES (4 * 4)
|
||||
#define T0_64_UNROLL_BYTES (4 * 8)
|
||||
|
||||
#ifndef ATTRIB_CRC
|
||||
#define ATTRIB_CRC
|
||||
// #define Z7_CRC_HW_USE // for debug : we can test HW-branch of code
|
||||
#ifdef Z7_CRC_HW_USE
|
||||
#include "7zCrcEmu.h"
|
||||
#endif
|
||||
|
||||
#endif // non-ARM*
|
||||
|
||||
|
||||
|
||||
#if defined(Z7_CRC_HW_USE)
|
||||
|
||||
// #pragma message("USE ARM HW CRC")
|
||||
|
||||
ATTRIB_CRC
|
||||
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
ATTRIB_CRC
|
||||
UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
#ifdef MY_CPU_64BIT
|
||||
#define CRC_HW_WORD_TYPE UInt64
|
||||
#define CRC_HW_WORD_FUNC __crc32d
|
||||
#else
|
||||
#define CRC_HW_WORD_TYPE UInt32
|
||||
#define CRC_HW_WORD_FUNC __crc32w
|
||||
#endif
|
||||
|
||||
#define CRC_HW_UNROLL_BYTES (sizeof(CRC_HW_WORD_TYPE) * 4)
|
||||
|
||||
#ifdef ATTRIB_CRC
|
||||
ATTRIB_CRC
|
||||
#endif
|
||||
Z7_NO_INLINE
|
||||
#ifdef Z7_CRC_HW_FORCE
|
||||
UInt32 Z7_FASTCALL CrcUpdate
|
||||
#else
|
||||
static UInt32 Z7_FASTCALL CrcUpdate_HW
|
||||
#endif
|
||||
(UInt32 v, const void *data, size_t size)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
UNUSED_VAR(table);
|
||||
|
||||
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_32_UNROLL_BYTES - 1)) != 0; size--)
|
||||
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (CRC_HW_UNROLL_BYTES - 1)) != 0; size--)
|
||||
v = __crc32b(v, *p++);
|
||||
|
||||
if (size >= T0_32_UNROLL_BYTES)
|
||||
if (size >= CRC_HW_UNROLL_BYTES)
|
||||
{
|
||||
const Byte *lim = p + size;
|
||||
size &= (T0_32_UNROLL_BYTES - 1);
|
||||
size &= CRC_HW_UNROLL_BYTES - 1;
|
||||
lim -= size;
|
||||
do
|
||||
{
|
||||
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
|
||||
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
|
||||
v = __crc32w(v, *(const UInt32 *)(const void *)(p));
|
||||
v = __crc32w(v, *(const UInt32 *)(const void *)(p + 4)); p += 2 * 4;
|
||||
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
|
||||
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
|
||||
p += 2 * sizeof(CRC_HW_WORD_TYPE);
|
||||
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p));
|
||||
v = CRC_HW_WORD_FUNC(v, *(const CRC_HW_WORD_TYPE *)(const void *)(p + sizeof(CRC_HW_WORD_TYPE)));
|
||||
p += 2 * sizeof(CRC_HW_WORD_TYPE);
|
||||
}
|
||||
while (p != lim);
|
||||
}
|
||||
|
|
@ -187,136 +223,198 @@ UInt32 MY_FAST_CALL CrcUpdateT0_32(UInt32 v, const void *data, size_t size, cons
|
|||
return v;
|
||||
}
|
||||
|
||||
ATTRIB_CRC
|
||||
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
ATTRIB_CRC
|
||||
UInt32 MY_FAST_CALL CrcUpdateT0_64(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
UNUSED_VAR(table);
|
||||
|
||||
for (; size != 0 && ((unsigned)(ptrdiff_t)p & (T0_64_UNROLL_BYTES - 1)) != 0; size--)
|
||||
v = __crc32b(v, *p++);
|
||||
|
||||
if (size >= T0_64_UNROLL_BYTES)
|
||||
{
|
||||
const Byte *lim = p + size;
|
||||
size &= (T0_64_UNROLL_BYTES - 1);
|
||||
lim -= size;
|
||||
do
|
||||
{
|
||||
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
|
||||
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
|
||||
v = __crc32d(v, *(const UInt64 *)(const void *)(p));
|
||||
v = __crc32d(v, *(const UInt64 *)(const void *)(p + 8)); p += 2 * 8;
|
||||
}
|
||||
while (p != lim);
|
||||
}
|
||||
|
||||
for (; size != 0; size--)
|
||||
v = __crc32b(v, *p++);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // defined(USE_ARM64_CRC) || defined(USE_CRC_EMU)
|
||||
#ifdef Z7_ARM_FEATURE_CRC32_WAS_SET
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#undef __ARM_FEATURE_CRC32
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#undef Z7_ARM_FEATURE_CRC32_WAS_SET
|
||||
#endif
|
||||
|
||||
#endif // defined(Z7_CRC_HW_USE)
|
||||
#endif // MY_CPU_LE
|
||||
|
||||
|
||||
|
||||
#ifndef Z7_CRC_HW_FORCE
|
||||
|
||||
void MY_FAST_CALL CrcGenerateTable()
|
||||
#if defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
|
||||
/*
|
||||
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_WITH_TABLE_FUNC)
|
||||
(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
Z7_CRC_UPDATE_WITH_TABLE_FUNC g_CrcUpdate;
|
||||
*/
|
||||
static unsigned g_Crc_Algo;
|
||||
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
|
||||
static unsigned g_Crc_Be;
|
||||
#endif
|
||||
#endif // defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
|
||||
|
||||
|
||||
|
||||
Z7_NO_INLINE
|
||||
#ifdef Z7_CRC_HW_USE
|
||||
static UInt32 Z7_FASTCALL CrcUpdate_Base
|
||||
#else
|
||||
UInt32 Z7_FASTCALL CrcUpdate
|
||||
#endif
|
||||
(UInt32 crc, const void *data, size_t size)
|
||||
{
|
||||
#if Z7_CRC_NUM_TABLES_USE == 1
|
||||
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
|
||||
#else // Z7_CRC_NUM_TABLES_USE != 1
|
||||
#ifdef Z7_CRC_UPDATE_T1_FUNC_NAME
|
||||
if (g_Crc_Algo == 1)
|
||||
return Z7_CRC_UPDATE_T1_FUNC_NAME(crc, data, size);
|
||||
#endif
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
|
||||
#elif defined(MY_CPU_BE)
|
||||
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
|
||||
#else
|
||||
if (g_Crc_Be)
|
||||
return FUNC_NAME_BE(crc, data, size, g_CrcTable);
|
||||
else
|
||||
return FUNC_NAME_LE(crc, data, size, g_CrcTable);
|
||||
#endif
|
||||
#endif // Z7_CRC_NUM_TABLES_USE != 1
|
||||
}
|
||||
|
||||
|
||||
#ifdef Z7_CRC_HW_USE
|
||||
Z7_NO_INLINE
|
||||
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size)
|
||||
{
|
||||
if (g_Crc_Algo == 0)
|
||||
return CrcUpdate_HW(crc, data, size);
|
||||
return CrcUpdate_Base(crc, data, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // !defined(Z7_CRC_HW_FORCE)
|
||||
|
||||
|
||||
|
||||
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size)
|
||||
{
|
||||
return CrcUpdate(CRC_INIT_VAL, data, size) ^ CRC_INIT_VAL;
|
||||
}
|
||||
|
||||
|
||||
MY_ALIGN(64)
|
||||
UInt32 g_CrcTable[256 * Z7_CRC_NUM_TABLES_TOTAL];
|
||||
|
||||
|
||||
void Z7_FASTCALL CrcGenerateTable(void)
|
||||
{
|
||||
UInt32 i;
|
||||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
#if defined(Z7_CRC_HW_FORCE)
|
||||
g_CrcTable[i] = __crc32b(i, 0);
|
||||
#else
|
||||
#define kCrcPoly 0xEDB88320
|
||||
UInt32 r = i;
|
||||
unsigned j;
|
||||
for (j = 0; j < 8; j++)
|
||||
r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
|
||||
g_CrcTable[i] = r;
|
||||
#endif
|
||||
}
|
||||
for (i = 256; i < 256 * CRC_NUM_TABLES; i++)
|
||||
for (i = 256; i < 256 * Z7_CRC_NUM_TABLES_USE; i++)
|
||||
{
|
||||
UInt32 r = g_CrcTable[(size_t)i - 256];
|
||||
const UInt32 r = g_CrcTable[(size_t)i - 256];
|
||||
g_CrcTable[i] = g_CrcTable[r & 0xFF] ^ (r >> 8);
|
||||
}
|
||||
|
||||
#if CRC_NUM_TABLES < 4
|
||||
|
||||
g_CrcUpdate = CrcUpdateT1;
|
||||
|
||||
#else
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
#if !defined(Z7_CRC_HW_FORCE) && \
|
||||
(defined(Z7_CRC_HW_USE) || defined(Z7_CRC_UPDATE_T1_FUNC_NAME) || defined(MY_CPU_BE))
|
||||
|
||||
g_CrcUpdateT4 = CrcUpdateT4;
|
||||
g_CrcUpdate = CrcUpdateT4;
|
||||
#if Z7_CRC_NUM_TABLES_USE <= 1
|
||||
g_Crc_Algo = 1;
|
||||
#else // Z7_CRC_NUM_TABLES_USE <= 1
|
||||
|
||||
#if CRC_NUM_TABLES >= 8
|
||||
g_CrcUpdateT8 = CrcUpdateT8;
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
if (!CPU_Is_InOrder())
|
||||
#endif
|
||||
g_CrcUpdate = CrcUpdateT8;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#if defined(MY_CPU_LE)
|
||||
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
|
||||
#else // !defined(MY_CPU_LE)
|
||||
{
|
||||
#ifndef MY_CPU_BE
|
||||
#ifndef MY_CPU_BE
|
||||
UInt32 k = 0x01020304;
|
||||
const Byte *p = (const Byte *)&k;
|
||||
if (p[0] == 4 && p[1] == 3)
|
||||
{
|
||||
g_CrcUpdateT4 = CrcUpdateT4;
|
||||
g_CrcUpdate = CrcUpdateT4;
|
||||
#if CRC_NUM_TABLES >= 8
|
||||
g_CrcUpdateT8 = CrcUpdateT8;
|
||||
g_CrcUpdate = CrcUpdateT8;
|
||||
#endif
|
||||
}
|
||||
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
|
||||
else if (p[0] != 1 || p[1] != 2)
|
||||
g_CrcUpdate = CrcUpdateT1;
|
||||
g_Crc_Algo = 1;
|
||||
else
|
||||
#endif
|
||||
#endif // MY_CPU_BE
|
||||
{
|
||||
for (i = 256 * CRC_NUM_TABLES - 1; i >= 256; i--)
|
||||
for (i = 256 * Z7_CRC_NUM_TABLES_TOTAL - 1; i >= 256; i--)
|
||||
{
|
||||
UInt32 x = g_CrcTable[(size_t)i - 256];
|
||||
g_CrcTable[i] = CRC_UINT32_SWAP(x);
|
||||
const UInt32 x = g_CrcTable[(size_t)i - 256];
|
||||
g_CrcTable[i] = Z7_BSWAP32(x);
|
||||
}
|
||||
g_CrcUpdateT4 = CrcUpdateT1_BeT4;
|
||||
g_CrcUpdate = CrcUpdateT1_BeT4;
|
||||
#if CRC_NUM_TABLES >= 8
|
||||
g_CrcUpdateT8 = CrcUpdateT1_BeT8;
|
||||
g_CrcUpdate = CrcUpdateT1_BeT8;
|
||||
#endif
|
||||
#if defined(Z7_CRC_UPDATE_T1_FUNC_NAME)
|
||||
g_Crc_Algo = Z7_CRC_NUM_TABLES_USE;
|
||||
#endif
|
||||
#if (!defined(MY_CPU_LE) && !defined(MY_CPU_BE))
|
||||
g_Crc_Be = 1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#endif // !defined(MY_CPU_LE)
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
#ifdef USE_ARM64_CRC
|
||||
if (CPU_IsSupported_CRC32())
|
||||
{
|
||||
g_CrcUpdateT0_32 = CrcUpdateT0_32;
|
||||
g_CrcUpdateT0_64 = CrcUpdateT0_64;
|
||||
g_CrcUpdate =
|
||||
#if defined(MY_CPU_ARM)
|
||||
CrcUpdateT0_32;
|
||||
#else
|
||||
CrcUpdateT0_64;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef USE_CRC_EMU
|
||||
g_CrcUpdateT0_32 = CrcUpdateT0_32;
|
||||
g_CrcUpdateT0_64 = CrcUpdateT0_64;
|
||||
g_CrcUpdate = CrcUpdateT0_64;
|
||||
#endif
|
||||
#endif
|
||||
#ifdef MY_CPU_LE
|
||||
#ifdef Z7_CRC_HW_USE
|
||||
if (CPU_IsSupported_CRC32())
|
||||
g_Crc_Algo = 0;
|
||||
#endif // Z7_CRC_HW_USE
|
||||
#endif // MY_CPU_LE
|
||||
|
||||
#endif // Z7_CRC_NUM_TABLES_USE <= 1
|
||||
#endif // g_Crc_Algo was declared
|
||||
}
|
||||
|
||||
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo)
|
||||
{
|
||||
if (algo == 0)
|
||||
return &CrcUpdate;
|
||||
|
||||
#if defined(Z7_CRC_HW_USE)
|
||||
if (algo == sizeof(CRC_HW_WORD_TYPE) * 8)
|
||||
{
|
||||
#ifdef Z7_CRC_HW_FORCE
|
||||
return &CrcUpdate;
|
||||
#else
|
||||
if (g_Crc_Algo == 0)
|
||||
return &CrcUpdate_HW;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef Z7_CRC_HW_FORCE
|
||||
if (algo == Z7_CRC_NUM_TABLES_USE)
|
||||
return
|
||||
#ifdef Z7_CRC_HW_USE
|
||||
&CrcUpdate_Base;
|
||||
#else
|
||||
&CrcUpdate;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#undef kCrcPoly
|
||||
#undef Z7_CRC_NUM_TABLES_USE
|
||||
#undef Z7_CRC_NUM_TABLES_TOTAL
|
||||
#undef CRC_UPDATE_BYTE_2
|
||||
#undef FUNC_NAME_LE_2
|
||||
#undef FUNC_NAME_LE_1
|
||||
#undef FUNC_NAME_LE
|
||||
#undef FUNC_NAME_BE_2
|
||||
#undef FUNC_NAME_BE_1
|
||||
#undef FUNC_NAME_BE
|
||||
|
||||
#undef CRC_HW_UNROLL_BYTES
|
||||
#undef CRC_HW_WORD_FUNC
|
||||
#undef CRC_HW_WORD_TYPE
|
||||
|
|
|
|||
15
C/7zCrc.h
15
C/7zCrc.h
|
|
@ -1,8 +1,8 @@
|
|||
/* 7zCrc.h -- CRC32 calculation
|
||||
2013-01-18 : Igor Pavlov : Public domain */
|
||||
2024-01-22 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_CRC_H
|
||||
#define __7Z_CRC_H
|
||||
#ifndef ZIP7_INC_7Z_CRC_H
|
||||
#define ZIP7_INC_7Z_CRC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -11,14 +11,17 @@ EXTERN_C_BEGIN
|
|||
extern UInt32 g_CrcTable[];
|
||||
|
||||
/* Call CrcGenerateTable one time before other CRC functions */
|
||||
void MY_FAST_CALL CrcGenerateTable(void);
|
||||
void Z7_FASTCALL CrcGenerateTable(void);
|
||||
|
||||
#define CRC_INIT_VAL 0xFFFFFFFF
|
||||
#define CRC_GET_DIGEST(crc) ((crc) ^ CRC_INIT_VAL)
|
||||
#define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void *data, size_t size);
|
||||
UInt32 MY_FAST_CALL CrcCalc(const void *data, size_t size);
|
||||
UInt32 Z7_FASTCALL CrcUpdate(UInt32 crc, const void *data, size_t size);
|
||||
UInt32 Z7_FASTCALL CrcCalc(const void *data, size_t size);
|
||||
|
||||
typedef UInt32 (Z7_FASTCALL *Z7_CRC_UPDATE_FUNC)(UInt32 v, const void *data, size_t size);
|
||||
Z7_CRC_UPDATE_FUNC z7_GetFunc_CrcUpdate(unsigned algo);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
244
C/7zCrcOpt.c
244
C/7zCrcOpt.c
|
|
@ -1,117 +1,199 @@
|
|||
/* 7zCrcOpt.c -- CRC32 calculation
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
/* 7zCrcOpt.c -- CRC32 calculation (optimized functions)
|
||||
2023-12-07 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "CpuArch.h"
|
||||
|
||||
#if !defined(Z7_CRC_NUM_TABLES) || Z7_CRC_NUM_TABLES > 1
|
||||
|
||||
// for debug only : define Z7_CRC_DEBUG_BE to test big-endian code in little-endian cpu
|
||||
// #define Z7_CRC_DEBUG_BE
|
||||
#ifdef Z7_CRC_DEBUG_BE
|
||||
#undef MY_CPU_LE
|
||||
#define MY_CPU_BE
|
||||
#endif
|
||||
|
||||
// the value Z7_CRC_NUM_TABLES_USE must be defined to same value as in 7zCrc.c
|
||||
#ifdef Z7_CRC_NUM_TABLES
|
||||
#define Z7_CRC_NUM_TABLES_USE Z7_CRC_NUM_TABLES
|
||||
#else
|
||||
#define Z7_CRC_NUM_TABLES_USE 12
|
||||
#endif
|
||||
|
||||
#if Z7_CRC_NUM_TABLES_USE % 4 || \
|
||||
Z7_CRC_NUM_TABLES_USE < 4 * 1 || \
|
||||
Z7_CRC_NUM_TABLES_USE > 4 * 6
|
||||
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef MY_CPU_BE
|
||||
|
||||
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
|
||||
#define CRC_UPDATE_BYTE_2(crc, b) (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
UInt32 MY_FAST_CALL CrcUpdateT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
#define Q(n, d) \
|
||||
( (table + ((n) * 4 + 3) * 0x100)[(Byte)(d)] \
|
||||
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
|
||||
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
|
||||
^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
|
||||
|
||||
#define R(a) *((const UInt32 *)(const void *)p + (a))
|
||||
|
||||
#define CRC_FUNC_PRE_LE2(step) \
|
||||
UInt32 Z7_FASTCALL CrcUpdateT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
|
||||
#define CRC_FUNC_PRE_LE(step) \
|
||||
CRC_FUNC_PRE_LE2(step); \
|
||||
CRC_FUNC_PRE_LE2(step)
|
||||
|
||||
CRC_FUNC_PRE_LE(Z7_CRC_NUM_TABLES_USE)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
|
||||
const Byte *lim;
|
||||
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
|
||||
v = CRC_UPDATE_BYTE_2(v, *p);
|
||||
for (; size >= 4; size -= 4, p += 4)
|
||||
lim = p + size;
|
||||
if (size >= Z7_CRC_NUM_TABLES_USE)
|
||||
{
|
||||
v ^= *(const UInt32 *)(const void *)p;
|
||||
v =
|
||||
(table + 0x300)[((v ) & 0xFF)]
|
||||
^ (table + 0x200)[((v >> 8) & 0xFF)]
|
||||
^ (table + 0x100)[((v >> 16) & 0xFF)]
|
||||
^ (table + 0x000)[((v >> 24))];
|
||||
lim -= Z7_CRC_NUM_TABLES_USE;
|
||||
do
|
||||
{
|
||||
v ^= R(0);
|
||||
{
|
||||
#if Z7_CRC_NUM_TABLES_USE == 1 * 4
|
||||
v = Q(0, v);
|
||||
#else
|
||||
#define U2(r, op) \
|
||||
{ d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
|
||||
UInt32 d, x;
|
||||
U2(1, =)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
|
||||
#define U(r) U2(r, ^=)
|
||||
U(2)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
|
||||
U(3)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
|
||||
U(4)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
|
||||
U(5)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
|
||||
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#undef U
|
||||
#undef U2
|
||||
v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
|
||||
#endif
|
||||
}
|
||||
p += Z7_CRC_NUM_TABLES_USE;
|
||||
}
|
||||
while (p <= lim);
|
||||
lim += Z7_CRC_NUM_TABLES_USE;
|
||||
}
|
||||
for (; size > 0; size--, p++)
|
||||
for (; p < lim; p++)
|
||||
v = CRC_UPDATE_BYTE_2(v, *p);
|
||||
return v;
|
||||
}
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table);
|
||||
UInt32 MY_FAST_CALL CrcUpdateT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
|
||||
v = CRC_UPDATE_BYTE_2(v, *p);
|
||||
for (; size >= 8; size -= 8, p += 8)
|
||||
{
|
||||
UInt32 d;
|
||||
v ^= *(const UInt32 *)(const void *)p;
|
||||
v =
|
||||
(table + 0x700)[((v ) & 0xFF)]
|
||||
^ (table + 0x600)[((v >> 8) & 0xFF)]
|
||||
^ (table + 0x500)[((v >> 16) & 0xFF)]
|
||||
^ (table + 0x400)[((v >> 24))];
|
||||
d = *((const UInt32 *)(const void *)p + 1);
|
||||
v ^=
|
||||
(table + 0x300)[((d ) & 0xFF)]
|
||||
^ (table + 0x200)[((d >> 8) & 0xFF)]
|
||||
^ (table + 0x100)[((d >> 16) & 0xFF)]
|
||||
^ (table + 0x000)[((d >> 24))];
|
||||
}
|
||||
for (; size > 0; size--, p++)
|
||||
v = CRC_UPDATE_BYTE_2(v, *p);
|
||||
return v;
|
||||
}
|
||||
#undef CRC_UPDATE_BYTE_2
|
||||
#undef R
|
||||
#undef Q
|
||||
#undef CRC_FUNC_PRE_LE
|
||||
#undef CRC_FUNC_PRE_LE2
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef MY_CPU_LE
|
||||
|
||||
#define CRC_UINT32_SWAP(v) ((v >> 24) | ((v >> 8) & 0xFF00) | ((v << 8) & 0xFF0000) | (v << 24))
|
||||
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[((crc) >> 24) ^ (b)] ^ ((crc) << 8))
|
||||
|
||||
#define CRC_UPDATE_BYTE_2_BE(crc, b) (table[(((crc) >> 24) ^ (b))] ^ ((crc) << 8))
|
||||
#define Q(n, d) \
|
||||
( (table + ((n) * 4 + 0) * 0x100)[((d)) & 0xFF] \
|
||||
^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
|
||||
^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
|
||||
^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdateT1_BeT4(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
#ifdef Z7_CRC_DEBUG_BE
|
||||
#define R(a) GetBe32a((const UInt32 *)(const void *)p + (a))
|
||||
#else
|
||||
#define R(a) *((const UInt32 *)(const void *)p + (a))
|
||||
#endif
|
||||
|
||||
|
||||
#define CRC_FUNC_PRE_BE2(step) \
|
||||
UInt32 Z7_FASTCALL CrcUpdateT1_BeT ## step (UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
|
||||
#define CRC_FUNC_PRE_BE(step) \
|
||||
CRC_FUNC_PRE_BE2(step); \
|
||||
CRC_FUNC_PRE_BE2(step)
|
||||
|
||||
CRC_FUNC_PRE_BE(Z7_CRC_NUM_TABLES_USE)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
const Byte *lim;
|
||||
table += 0x100;
|
||||
v = CRC_UINT32_SWAP(v);
|
||||
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 3) != 0; size--, p++)
|
||||
v = Z7_BSWAP32(v);
|
||||
for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC_NUM_TABLES_USE & 4))) != 0; size--, p++)
|
||||
v = CRC_UPDATE_BYTE_2_BE(v, *p);
|
||||
for (; size >= 4; size -= 4, p += 4)
|
||||
lim = p + size;
|
||||
if (size >= Z7_CRC_NUM_TABLES_USE)
|
||||
{
|
||||
v ^= *(const UInt32 *)(const void *)p;
|
||||
v =
|
||||
(table + 0x000)[((v ) & 0xFF)]
|
||||
^ (table + 0x100)[((v >> 8) & 0xFF)]
|
||||
^ (table + 0x200)[((v >> 16) & 0xFF)]
|
||||
^ (table + 0x300)[((v >> 24))];
|
||||
lim -= Z7_CRC_NUM_TABLES_USE;
|
||||
do
|
||||
{
|
||||
v ^= R(0);
|
||||
{
|
||||
#if Z7_CRC_NUM_TABLES_USE == 1 * 4
|
||||
v = Q(0, v);
|
||||
#else
|
||||
#define U2(r, op) \
|
||||
{ d = R(r); x op Q(Z7_CRC_NUM_TABLES_USE / 4 - 1 - (r), d); }
|
||||
UInt32 d, x;
|
||||
U2(1, =)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 3 * 4
|
||||
#define U(r) U2(r, ^=)
|
||||
U(2)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 4 * 4
|
||||
U(3)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 5 * 4
|
||||
U(4)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 6 * 4
|
||||
U(5)
|
||||
#if Z7_CRC_NUM_TABLES_USE >= 7 * 4
|
||||
#error Stop_Compiling_Bad_Z7_CRC_NUM_TABLES
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#undef U
|
||||
#undef U2
|
||||
v = x ^ Q(Z7_CRC_NUM_TABLES_USE / 4 - 1, v);
|
||||
#endif
|
||||
}
|
||||
p += Z7_CRC_NUM_TABLES_USE;
|
||||
}
|
||||
while (p <= lim);
|
||||
lim += Z7_CRC_NUM_TABLES_USE;
|
||||
}
|
||||
for (; size > 0; size--, p++)
|
||||
for (; p < lim; p++)
|
||||
v = CRC_UPDATE_BYTE_2_BE(v, *p);
|
||||
return CRC_UINT32_SWAP(v);
|
||||
return Z7_BSWAP32(v);
|
||||
}
|
||||
|
||||
UInt32 MY_FAST_CALL CrcUpdateT1_BeT8(UInt32 v, const void *data, size_t size, const UInt32 *table)
|
||||
{
|
||||
const Byte *p = (const Byte *)data;
|
||||
table += 0x100;
|
||||
v = CRC_UINT32_SWAP(v);
|
||||
for (; size > 0 && ((unsigned)(ptrdiff_t)p & 7) != 0; size--, p++)
|
||||
v = CRC_UPDATE_BYTE_2_BE(v, *p);
|
||||
for (; size >= 8; size -= 8, p += 8)
|
||||
{
|
||||
UInt32 d;
|
||||
v ^= *(const UInt32 *)(const void *)p;
|
||||
v =
|
||||
(table + 0x400)[((v ) & 0xFF)]
|
||||
^ (table + 0x500)[((v >> 8) & 0xFF)]
|
||||
^ (table + 0x600)[((v >> 16) & 0xFF)]
|
||||
^ (table + 0x700)[((v >> 24))];
|
||||
d = *((const UInt32 *)(const void *)p + 1);
|
||||
v ^=
|
||||
(table + 0x000)[((d ) & 0xFF)]
|
||||
^ (table + 0x100)[((d >> 8) & 0xFF)]
|
||||
^ (table + 0x200)[((d >> 16) & 0xFF)]
|
||||
^ (table + 0x300)[((d >> 24))];
|
||||
}
|
||||
for (; size > 0; size--, p++)
|
||||
v = CRC_UPDATE_BYTE_2_BE(v, *p);
|
||||
return CRC_UINT32_SWAP(v);
|
||||
}
|
||||
#undef CRC_UPDATE_BYTE_2_BE
|
||||
#undef R
|
||||
#undef Q
|
||||
#undef CRC_FUNC_PRE_BE
|
||||
#undef CRC_FUNC_PRE_BE2
|
||||
|
||||
#endif
|
||||
#undef Z7_CRC_NUM_TABLES_USE
|
||||
#endif
|
||||
|
|
|
|||
201
C/7zDec.c
201
C/7zDec.c
|
|
@ -1,11 +1,11 @@
|
|||
/* 7zDec.c -- Decoding from 7z folder
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/* #define _7ZIP_PPMD_SUPPPORT */
|
||||
/* #define Z7_PPMD_SUPPORT */
|
||||
|
||||
#include "7z.h"
|
||||
#include "7zCrc.h"
|
||||
|
|
@ -16,27 +16,50 @@
|
|||
#include "Delta.h"
|
||||
#include "LzmaDec.h"
|
||||
#include "Lzma2Dec.h"
|
||||
#ifdef _7ZIP_PPMD_SUPPPORT
|
||||
#ifdef Z7_PPMD_SUPPORT
|
||||
#include "Ppmd7.h"
|
||||
#endif
|
||||
|
||||
#define k_Copy 0
|
||||
#ifndef _7Z_NO_METHOD_LZMA2
|
||||
#ifndef Z7_NO_METHOD_LZMA2
|
||||
#define k_LZMA2 0x21
|
||||
#endif
|
||||
#define k_LZMA 0x30101
|
||||
#define k_BCJ2 0x303011B
|
||||
#ifndef _7Z_NO_METHODS_FILTERS
|
||||
|
||||
#if !defined(Z7_NO_METHODS_FILTERS)
|
||||
#define Z7_USE_BRANCH_FILTER
|
||||
#endif
|
||||
|
||||
#if !defined(Z7_NO_METHODS_FILTERS) || \
|
||||
defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARM64)
|
||||
#define Z7_USE_FILTER_ARM64
|
||||
#ifndef Z7_USE_BRANCH_FILTER
|
||||
#define Z7_USE_BRANCH_FILTER
|
||||
#endif
|
||||
#define k_ARM64 0xa
|
||||
#endif
|
||||
|
||||
#if !defined(Z7_NO_METHODS_FILTERS) || \
|
||||
defined(Z7_USE_NATIVE_BRANCH_FILTER) && defined(MY_CPU_ARMT)
|
||||
#define Z7_USE_FILTER_ARMT
|
||||
#ifndef Z7_USE_BRANCH_FILTER
|
||||
#define Z7_USE_BRANCH_FILTER
|
||||
#endif
|
||||
#define k_ARMT 0x3030701
|
||||
#endif
|
||||
|
||||
#ifndef Z7_NO_METHODS_FILTERS
|
||||
#define k_Delta 3
|
||||
#define k_RISCV 0xb
|
||||
#define k_BCJ 0x3030103
|
||||
#define k_PPC 0x3030205
|
||||
#define k_IA64 0x3030401
|
||||
#define k_ARM 0x3030501
|
||||
#define k_ARMT 0x3030701
|
||||
#define k_SPARC 0x3030805
|
||||
#endif
|
||||
|
||||
#ifdef _7ZIP_PPMD_SUPPPORT
|
||||
#ifdef Z7_PPMD_SUPPORT
|
||||
|
||||
#define k_PPMD 0x30401
|
||||
|
||||
|
|
@ -49,12 +72,12 @@ typedef struct
|
|||
UInt64 processed;
|
||||
BoolInt extra;
|
||||
SRes res;
|
||||
const ILookInStream *inStream;
|
||||
ILookInStreamPtr inStream;
|
||||
} CByteInToLook;
|
||||
|
||||
static Byte ReadByte(const IByteIn *pp)
|
||||
static Byte ReadByte(IByteInPtr pp)
|
||||
{
|
||||
CByteInToLook *p = CONTAINER_FROM_VTBL(pp, CByteInToLook, vt);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CByteInToLook)
|
||||
if (p->cur != p->end)
|
||||
return *p->cur++;
|
||||
if (p->res == SZ_OK)
|
||||
|
|
@ -67,13 +90,13 @@ static Byte ReadByte(const IByteIn *pp)
|
|||
p->cur = p->begin;
|
||||
p->end = p->begin + size;
|
||||
if (size != 0)
|
||||
return *p->cur++;;
|
||||
return *p->cur++;
|
||||
}
|
||||
p->extra = True;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, const ILookInStream *inStream,
|
||||
static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
|
||||
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
|
||||
{
|
||||
CPpmd7 ppmd;
|
||||
|
|
@ -138,14 +161,14 @@ static SRes SzDecodePpmd(const Byte *props, unsigned propsSize, UInt64 inSize, c
|
|||
#endif
|
||||
|
||||
|
||||
static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
|
||||
static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
|
||||
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
|
||||
{
|
||||
CLzmaDec state;
|
||||
SRes res = SZ_OK;
|
||||
|
||||
LzmaDec_Construct(&state);
|
||||
RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain));
|
||||
LzmaDec_CONSTRUCT(&state)
|
||||
RINOK(LzmaDec_AllocateProbs(&state, props, propsSize, allocMain))
|
||||
state.dic = outBuffer;
|
||||
state.dicBufSize = outSize;
|
||||
LzmaDec_Init(&state);
|
||||
|
|
@ -196,18 +219,18 @@ static SRes SzDecodeLzma(const Byte *props, unsigned propsSize, UInt64 inSize, I
|
|||
}
|
||||
|
||||
|
||||
#ifndef _7Z_NO_METHOD_LZMA2
|
||||
#ifndef Z7_NO_METHOD_LZMA2
|
||||
|
||||
static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStream *inStream,
|
||||
static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize, ILookInStreamPtr inStream,
|
||||
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain)
|
||||
{
|
||||
CLzma2Dec state;
|
||||
SRes res = SZ_OK;
|
||||
|
||||
Lzma2Dec_Construct(&state);
|
||||
Lzma2Dec_CONSTRUCT(&state)
|
||||
if (propsSize != 1)
|
||||
return SZ_ERROR_DATA;
|
||||
RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain));
|
||||
RINOK(Lzma2Dec_AllocateProbs(&state, props[0], allocMain))
|
||||
state.decoder.dic = outBuffer;
|
||||
state.decoder.dicBufSize = outSize;
|
||||
Lzma2Dec_Init(&state);
|
||||
|
|
@ -257,7 +280,7 @@ static SRes SzDecodeLzma2(const Byte *props, unsigned propsSize, UInt64 inSize,
|
|||
#endif
|
||||
|
||||
|
||||
static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer)
|
||||
static SRes SzDecodeCopy(UInt64 inSize, ILookInStreamPtr inStream, Byte *outBuffer)
|
||||
{
|
||||
while (inSize > 0)
|
||||
{
|
||||
|
|
@ -265,13 +288,13 @@ static SRes SzDecodeCopy(UInt64 inSize, ILookInStream *inStream, Byte *outBuffer
|
|||
size_t curSize = (1 << 18);
|
||||
if (curSize > inSize)
|
||||
curSize = (size_t)inSize;
|
||||
RINOK(ILookInStream_Look(inStream, &inBuf, &curSize));
|
||||
RINOK(ILookInStream_Look(inStream, &inBuf, &curSize))
|
||||
if (curSize == 0)
|
||||
return SZ_ERROR_INPUT_EOF;
|
||||
memcpy(outBuffer, inBuf, curSize);
|
||||
outBuffer += curSize;
|
||||
inSize -= curSize;
|
||||
RINOK(ILookInStream_Skip(inStream, curSize));
|
||||
RINOK(ILookInStream_Skip(inStream, curSize))
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
|
@ -282,15 +305,16 @@ static BoolInt IS_MAIN_METHOD(UInt32 m)
|
|||
{
|
||||
case k_Copy:
|
||||
case k_LZMA:
|
||||
#ifndef _7Z_NO_METHOD_LZMA2
|
||||
#ifndef Z7_NO_METHOD_LZMA2
|
||||
case k_LZMA2:
|
||||
#endif
|
||||
#ifdef _7ZIP_PPMD_SUPPPORT
|
||||
#endif
|
||||
#ifdef Z7_PPMD_SUPPORT
|
||||
case k_PPMD:
|
||||
#endif
|
||||
#endif
|
||||
return True;
|
||||
default:
|
||||
return False;
|
||||
}
|
||||
return False;
|
||||
}
|
||||
|
||||
static BoolInt IS_SUPPORTED_CODER(const CSzCoderInfo *c)
|
||||
|
|
@ -317,7 +341,7 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
|
|||
}
|
||||
|
||||
|
||||
#ifndef _7Z_NO_METHODS_FILTERS
|
||||
#if defined(Z7_USE_BRANCH_FILTER)
|
||||
|
||||
if (f->NumCoders == 2)
|
||||
{
|
||||
|
|
@ -333,13 +357,21 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
|
|||
return SZ_ERROR_UNSUPPORTED;
|
||||
switch ((UInt32)c->MethodID)
|
||||
{
|
||||
#if !defined(Z7_NO_METHODS_FILTERS)
|
||||
case k_Delta:
|
||||
case k_BCJ:
|
||||
case k_PPC:
|
||||
case k_IA64:
|
||||
case k_SPARC:
|
||||
case k_ARM:
|
||||
case k_RISCV:
|
||||
#endif
|
||||
#ifdef Z7_USE_FILTER_ARM64
|
||||
case k_ARM64:
|
||||
#endif
|
||||
#ifdef Z7_USE_FILTER_ARMT
|
||||
case k_ARMT:
|
||||
#endif
|
||||
break;
|
||||
default:
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
|
|
@ -372,15 +404,16 @@ static SRes CheckSupportedFolder(const CSzFolder *f)
|
|||
return SZ_ERROR_UNSUPPORTED;
|
||||
}
|
||||
|
||||
#ifndef _7Z_NO_METHODS_FILTERS
|
||||
#define CASE_BRA_CONV(isa) case k_ ## isa: isa ## _Convert(outBuffer, outSize, 0, 0); break;
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static SRes SzFolder_Decode2(const CSzFolder *folder,
|
||||
const Byte *propsData,
|
||||
const UInt64 *unpackSizes,
|
||||
const UInt64 *packPositions,
|
||||
ILookInStream *inStream, UInt64 startPos,
|
||||
ILookInStreamPtr inStream, UInt64 startPos,
|
||||
Byte *outBuffer, SizeT outSize, ISzAllocPtr allocMain,
|
||||
Byte *tempBuf[])
|
||||
{
|
||||
|
|
@ -389,7 +422,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
SizeT tempSize3 = 0;
|
||||
Byte *tempBuf3 = 0;
|
||||
|
||||
RINOK(CheckSupportedFolder(folder));
|
||||
RINOK(CheckSupportedFolder(folder))
|
||||
|
||||
for (ci = 0; ci < folder->NumCoders; ci++)
|
||||
{
|
||||
|
|
@ -404,8 +437,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
SizeT outSizeCur = outSize;
|
||||
if (folder->NumCoders == 4)
|
||||
{
|
||||
UInt32 indices[] = { 3, 2, 0 };
|
||||
UInt64 unpackSize = unpackSizes[ci];
|
||||
const UInt32 indices[] = { 3, 2, 0 };
|
||||
const UInt64 unpackSize = unpackSizes[ci];
|
||||
si = indices[ci];
|
||||
if (ci < 2)
|
||||
{
|
||||
|
|
@ -431,37 +464,37 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
}
|
||||
offset = packPositions[si];
|
||||
inSize = packPositions[(size_t)si + 1] - offset;
|
||||
RINOK(LookInStream_SeekTo(inStream, startPos + offset));
|
||||
RINOK(LookInStream_SeekTo(inStream, startPos + offset))
|
||||
|
||||
if (coder->MethodID == k_Copy)
|
||||
{
|
||||
if (inSize != outSizeCur) /* check it */
|
||||
return SZ_ERROR_DATA;
|
||||
RINOK(SzDecodeCopy(inSize, inStream, outBufCur));
|
||||
RINOK(SzDecodeCopy(inSize, inStream, outBufCur))
|
||||
}
|
||||
else if (coder->MethodID == k_LZMA)
|
||||
{
|
||||
RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
|
||||
RINOK(SzDecodeLzma(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
|
||||
}
|
||||
#ifndef _7Z_NO_METHOD_LZMA2
|
||||
#ifndef Z7_NO_METHOD_LZMA2
|
||||
else if (coder->MethodID == k_LZMA2)
|
||||
{
|
||||
RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
|
||||
RINOK(SzDecodeLzma2(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
|
||||
}
|
||||
#endif
|
||||
#ifdef _7ZIP_PPMD_SUPPPORT
|
||||
#endif
|
||||
#ifdef Z7_PPMD_SUPPORT
|
||||
else if (coder->MethodID == k_PPMD)
|
||||
{
|
||||
RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain));
|
||||
RINOK(SzDecodePpmd(propsData + coder->PropsOffset, coder->PropsSize, inSize, inStream, outBufCur, outSizeCur, allocMain))
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
else
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
}
|
||||
else if (coder->MethodID == k_BCJ2)
|
||||
{
|
||||
UInt64 offset = packPositions[1];
|
||||
UInt64 s3Size = packPositions[2] - offset;
|
||||
const UInt64 offset = packPositions[1];
|
||||
const UInt64 s3Size = packPositions[2] - offset;
|
||||
|
||||
if (ci != 3)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
|
|
@ -473,8 +506,8 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
if (!tempBuf[2] && tempSizes[2] != 0)
|
||||
return SZ_ERROR_MEM;
|
||||
|
||||
RINOK(LookInStream_SeekTo(inStream, startPos + offset));
|
||||
RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]));
|
||||
RINOK(LookInStream_SeekTo(inStream, startPos + offset))
|
||||
RINOK(SzDecodeCopy(s3Size, inStream, tempBuf[2]))
|
||||
|
||||
if ((tempSizes[0] & 3) != 0 ||
|
||||
(tempSizes[1] & 3) != 0 ||
|
||||
|
|
@ -493,26 +526,22 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
p.destLim = outBuffer + outSize;
|
||||
|
||||
Bcj2Dec_Init(&p);
|
||||
RINOK(Bcj2Dec_Decode(&p));
|
||||
RINOK(Bcj2Dec_Decode(&p))
|
||||
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; i++)
|
||||
if (p.bufs[i] != p.lims[i])
|
||||
return SZ_ERROR_DATA;
|
||||
|
||||
if (!Bcj2Dec_IsFinished(&p))
|
||||
return SZ_ERROR_DATA;
|
||||
|
||||
if (p.dest != p.destLim
|
||||
|| p.state != BCJ2_STREAM_MAIN)
|
||||
if (p.dest != p.destLim || !Bcj2Dec_IsMaybeFinished(&p))
|
||||
return SZ_ERROR_DATA;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifndef _7Z_NO_METHODS_FILTERS
|
||||
#if defined(Z7_USE_BRANCH_FILTER)
|
||||
else if (ci == 1)
|
||||
{
|
||||
#if !defined(Z7_NO_METHODS_FILTERS)
|
||||
if (coder->MethodID == k_Delta)
|
||||
{
|
||||
if (coder->PropsSize != 1)
|
||||
|
|
@ -522,31 +551,75 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
Delta_Init(state);
|
||||
Delta_Decode(state, (unsigned)(propsData[coder->PropsOffset]) + 1, outBuffer, outSize);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
#ifdef Z7_USE_FILTER_ARM64
|
||||
if (coder->MethodID == k_ARM64)
|
||||
{
|
||||
UInt32 pc = 0;
|
||||
if (coder->PropsSize == 4)
|
||||
{
|
||||
pc = GetUi32(propsData + coder->PropsOffset);
|
||||
if (pc & 3)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
}
|
||||
else if (coder->PropsSize != 0)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
z7_BranchConv_ARM64_Dec(outBuffer, outSize, pc);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(Z7_NO_METHODS_FILTERS)
|
||||
if (coder->MethodID == k_RISCV)
|
||||
{
|
||||
UInt32 pc = 0;
|
||||
if (coder->PropsSize == 4)
|
||||
{
|
||||
pc = GetUi32(propsData + coder->PropsOffset);
|
||||
if (pc & 1)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
}
|
||||
else if (coder->PropsSize != 0)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
z7_BranchConv_RISCV_Dec(outBuffer, outSize, pc);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
|
||||
{
|
||||
if (coder->PropsSize != 0)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
#define CASE_BRA_CONV(isa) case k_ ## isa: Z7_BRANCH_CONV_DEC(isa)(outBuffer, outSize, 0); break; // pc = 0;
|
||||
switch (coder->MethodID)
|
||||
{
|
||||
#if !defined(Z7_NO_METHODS_FILTERS)
|
||||
case k_BCJ:
|
||||
{
|
||||
UInt32 state;
|
||||
x86_Convert_Init(state);
|
||||
x86_Convert(outBuffer, outSize, 0, &state, 0);
|
||||
UInt32 state = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
|
||||
z7_BranchConvSt_X86_Dec(outBuffer, outSize, 0, &state); // pc = 0
|
||||
break;
|
||||
}
|
||||
CASE_BRA_CONV(PPC)
|
||||
case k_PPC: Z7_BRANCH_CONV_DEC_2(BranchConv_PPC)(outBuffer, outSize, 0); break; // pc = 0;
|
||||
// CASE_BRA_CONV(PPC)
|
||||
CASE_BRA_CONV(IA64)
|
||||
CASE_BRA_CONV(SPARC)
|
||||
CASE_BRA_CONV(ARM)
|
||||
#endif
|
||||
#if !defined(Z7_NO_METHODS_FILTERS) || defined(Z7_USE_FILTER_ARMT)
|
||||
CASE_BRA_CONV(ARMT)
|
||||
#endif
|
||||
default:
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
} // (c == 1)
|
||||
#endif // Z7_USE_BRANCH_FILTER
|
||||
else
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
}
|
||||
|
|
@ -556,7 +629,7 @@ static SRes SzFolder_Decode2(const CSzFolder *folder,
|
|||
|
||||
|
||||
SRes SzAr_DecodeFolder(const CSzAr *p, UInt32 folderIndex,
|
||||
ILookInStream *inStream, UInt64 startPos,
|
||||
ILookInStreamPtr inStream, UInt64 startPos,
|
||||
Byte *outBuffer, size_t outSize,
|
||||
ISzAllocPtr allocMain)
|
||||
{
|
||||
|
|
|
|||
33
C/7zFile.c
33
C/7zFile.c
|
|
@ -1,5 +1,5 @@
|
|||
/* 7zFile.c -- File IO
|
||||
2021-04-29 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -268,7 +268,7 @@ WRes File_Write(CSzFile *p, const void *data, size_t *size)
|
|||
return errno;
|
||||
if (processed == 0)
|
||||
break;
|
||||
data = (void *)((Byte *)data + (size_t)processed);
|
||||
data = (const void *)((const Byte *)data + (size_t)processed);
|
||||
originalSize -= (size_t)processed;
|
||||
*size += (size_t)processed;
|
||||
}
|
||||
|
|
@ -287,7 +287,8 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
|
|||
DWORD moveMethod;
|
||||
UInt32 low = (UInt32)*pos;
|
||||
LONG high = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */
|
||||
switch (origin)
|
||||
// (int) to eliminate clang warning
|
||||
switch ((int)origin)
|
||||
{
|
||||
case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break;
|
||||
case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break;
|
||||
|
|
@ -308,7 +309,7 @@ WRes File_Seek(CSzFile *p, Int64 *pos, ESzSeek origin)
|
|||
|
||||
int moveMethod; // = origin;
|
||||
|
||||
switch (origin)
|
||||
switch ((int)origin)
|
||||
{
|
||||
case SZ_SEEK_SET: moveMethod = SEEK_SET; break;
|
||||
case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break;
|
||||
|
|
@ -387,10 +388,10 @@ WRes File_GetLength(CSzFile *p, UInt64 *length)
|
|||
|
||||
/* ---------- FileSeqInStream ---------- */
|
||||
|
||||
static SRes FileSeqInStream_Read(const ISeqInStream *pp, void *buf, size_t *size)
|
||||
static SRes FileSeqInStream_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
|
||||
{
|
||||
CFileSeqInStream *p = CONTAINER_FROM_VTBL(pp, CFileSeqInStream, vt);
|
||||
WRes wres = File_Read(&p->file, buf, size);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileSeqInStream)
|
||||
const WRes wres = File_Read(&p->file, buf, size);
|
||||
p->wres = wres;
|
||||
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
|
||||
}
|
||||
|
|
@ -403,18 +404,18 @@ void FileSeqInStream_CreateVTable(CFileSeqInStream *p)
|
|||
|
||||
/* ---------- FileInStream ---------- */
|
||||
|
||||
static SRes FileInStream_Read(const ISeekInStream *pp, void *buf, size_t *size)
|
||||
static SRes FileInStream_Read(ISeekInStreamPtr pp, void *buf, size_t *size)
|
||||
{
|
||||
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
|
||||
WRes wres = File_Read(&p->file, buf, size);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream)
|
||||
const WRes wres = File_Read(&p->file, buf, size);
|
||||
p->wres = wres;
|
||||
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
|
||||
}
|
||||
|
||||
static SRes FileInStream_Seek(const ISeekInStream *pp, Int64 *pos, ESzSeek origin)
|
||||
static SRes FileInStream_Seek(ISeekInStreamPtr pp, Int64 *pos, ESzSeek origin)
|
||||
{
|
||||
CFileInStream *p = CONTAINER_FROM_VTBL(pp, CFileInStream, vt);
|
||||
WRes wres = File_Seek(&p->file, pos, origin);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileInStream)
|
||||
const WRes wres = File_Seek(&p->file, pos, origin);
|
||||
p->wres = wres;
|
||||
return (wres == 0) ? SZ_OK : SZ_ERROR_READ;
|
||||
}
|
||||
|
|
@ -428,10 +429,10 @@ void FileInStream_CreateVTable(CFileInStream *p)
|
|||
|
||||
/* ---------- FileOutStream ---------- */
|
||||
|
||||
static size_t FileOutStream_Write(const ISeqOutStream *pp, const void *data, size_t size)
|
||||
static size_t FileOutStream_Write(ISeqOutStreamPtr pp, const void *data, size_t size)
|
||||
{
|
||||
CFileOutStream *p = CONTAINER_FROM_VTBL(pp, CFileOutStream, vt);
|
||||
WRes wres = File_Write(&p->file, data, &size);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CFileOutStream)
|
||||
const WRes wres = File_Write(&p->file, data, &size);
|
||||
p->wres = wres;
|
||||
return size;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* 7zFile.h -- File IO
|
||||
2021-02-15 : Igor Pavlov : Public domain */
|
||||
2023-03-05 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_FILE_H
|
||||
#define __7Z_FILE_H
|
||||
#ifndef ZIP7_INC_FILE_H
|
||||
#define ZIP7_INC_FILE_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#define USE_WINDOWS_FILE
|
||||
|
|
@ -10,7 +10,8 @@
|
|||
#endif
|
||||
|
||||
#ifdef USE_WINDOWS_FILE
|
||||
#include <windows.h>
|
||||
#include "7zWindows.h"
|
||||
|
||||
#else
|
||||
// note: USE_FOPEN mode is limited to 32-bit file size
|
||||
// #define USE_FOPEN
|
||||
|
|
|
|||
67
C/7zStream.c
67
C/7zStream.c
|
|
@ -1,5 +1,5 @@
|
|||
/* 7zStream.c -- 7z Stream functions
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -7,12 +7,33 @@
|
|||
|
||||
#include "7zTypes.h"
|
||||
|
||||
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType)
|
||||
|
||||
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize)
|
||||
{
|
||||
size_t size = *processedSize;
|
||||
*processedSize = 0;
|
||||
while (size != 0)
|
||||
{
|
||||
size_t cur = size;
|
||||
const SRes res = ISeqInStream_Read(stream, buf, &cur);
|
||||
*processedSize += cur;
|
||||
buf = (void *)((Byte *)buf + cur);
|
||||
size -= cur;
|
||||
if (res != SZ_OK)
|
||||
return res;
|
||||
if (cur == 0)
|
||||
return SZ_OK;
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType)
|
||||
{
|
||||
while (size != 0)
|
||||
{
|
||||
size_t processed = size;
|
||||
RINOK(ISeqInStream_Read(stream, buf, &processed));
|
||||
RINOK(ISeqInStream_Read(stream, buf, &processed))
|
||||
if (processed == 0)
|
||||
return errorType;
|
||||
buf = (void *)((Byte *)buf + processed);
|
||||
|
|
@ -21,42 +42,44 @@ SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes
|
|||
return SZ_OK;
|
||||
}
|
||||
|
||||
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size)
|
||||
SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size)
|
||||
{
|
||||
return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
|
||||
}
|
||||
*/
|
||||
|
||||
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf)
|
||||
|
||||
SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf)
|
||||
{
|
||||
size_t processed = 1;
|
||||
RINOK(ISeqInStream_Read(stream, buf, &processed));
|
||||
RINOK(ISeqInStream_Read(stream, buf, &processed))
|
||||
return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF;
|
||||
}
|
||||
|
||||
|
||||
|
||||
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset)
|
||||
SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset)
|
||||
{
|
||||
Int64 t = (Int64)offset;
|
||||
return ILookInStream_Seek(stream, &t, SZ_SEEK_SET);
|
||||
}
|
||||
|
||||
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size)
|
||||
SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size)
|
||||
{
|
||||
const void *lookBuf;
|
||||
if (*size == 0)
|
||||
return SZ_OK;
|
||||
RINOK(ILookInStream_Look(stream, &lookBuf, size));
|
||||
RINOK(ILookInStream_Look(stream, &lookBuf, size))
|
||||
memcpy(buf, lookBuf, *size);
|
||||
return ILookInStream_Skip(stream, *size);
|
||||
}
|
||||
|
||||
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType)
|
||||
SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType)
|
||||
{
|
||||
while (size != 0)
|
||||
{
|
||||
size_t processed = size;
|
||||
RINOK(ILookInStream_Read(stream, buf, &processed));
|
||||
RINOK(ILookInStream_Read(stream, buf, &processed))
|
||||
if (processed == 0)
|
||||
return errorType;
|
||||
buf = (void *)((Byte *)buf + processed);
|
||||
|
|
@ -65,16 +88,16 @@ SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRe
|
|||
return SZ_OK;
|
||||
}
|
||||
|
||||
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size)
|
||||
SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size)
|
||||
{
|
||||
return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define GET_LookToRead2 CLookToRead2 *p = CONTAINER_FROM_VTBL(pp, CLookToRead2, vt);
|
||||
#define GET_LookToRead2 Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLookToRead2)
|
||||
|
||||
static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf, size_t *size)
|
||||
static SRes LookToRead2_Look_Lookahead(ILookInStreamPtr pp, const void **buf, size_t *size)
|
||||
{
|
||||
SRes res = SZ_OK;
|
||||
GET_LookToRead2
|
||||
|
|
@ -93,7 +116,7 @@ static SRes LookToRead2_Look_Lookahead(const ILookInStream *pp, const void **buf
|
|||
return res;
|
||||
}
|
||||
|
||||
static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, size_t *size)
|
||||
static SRes LookToRead2_Look_Exact(ILookInStreamPtr pp, const void **buf, size_t *size)
|
||||
{
|
||||
SRes res = SZ_OK;
|
||||
GET_LookToRead2
|
||||
|
|
@ -113,14 +136,14 @@ static SRes LookToRead2_Look_Exact(const ILookInStream *pp, const void **buf, si
|
|||
return res;
|
||||
}
|
||||
|
||||
static SRes LookToRead2_Skip(const ILookInStream *pp, size_t offset)
|
||||
static SRes LookToRead2_Skip(ILookInStreamPtr pp, size_t offset)
|
||||
{
|
||||
GET_LookToRead2
|
||||
p->pos += offset;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
|
||||
static SRes LookToRead2_Read(ILookInStreamPtr pp, void *buf, size_t *size)
|
||||
{
|
||||
GET_LookToRead2
|
||||
size_t rem = p->size - p->pos;
|
||||
|
|
@ -134,7 +157,7 @@ static SRes LookToRead2_Read(const ILookInStream *pp, void *buf, size_t *size)
|
|||
return SZ_OK;
|
||||
}
|
||||
|
||||
static SRes LookToRead2_Seek(const ILookInStream *pp, Int64 *pos, ESzSeek origin)
|
||||
static SRes LookToRead2_Seek(ILookInStreamPtr pp, Int64 *pos, ESzSeek origin)
|
||||
{
|
||||
GET_LookToRead2
|
||||
p->pos = p->size = 0;
|
||||
|
|
@ -153,9 +176,9 @@ void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead)
|
|||
|
||||
|
||||
|
||||
static SRes SecToLook_Read(const ISeqInStream *pp, void *buf, size_t *size)
|
||||
static SRes SecToLook_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
|
||||
{
|
||||
CSecToLook *p = CONTAINER_FROM_VTBL(pp, CSecToLook, vt);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToLook)
|
||||
return LookInStream_LookRead(p->realStream, buf, size);
|
||||
}
|
||||
|
||||
|
|
@ -164,9 +187,9 @@ void SecToLook_CreateVTable(CSecToLook *p)
|
|||
p->vt.Read = SecToLook_Read;
|
||||
}
|
||||
|
||||
static SRes SecToRead_Read(const ISeqInStream *pp, void *buf, size_t *size)
|
||||
static SRes SecToRead_Read(ISeqInStreamPtr pp, void *buf, size_t *size)
|
||||
{
|
||||
CSecToRead *p = CONTAINER_FROM_VTBL(pp, CSecToRead, vt);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CSecToRead)
|
||||
return ILookInStream_Read(p->realStream, buf, size);
|
||||
}
|
||||
|
||||
|
|
|
|||
274
C/7zTypes.h
274
C/7zTypes.h
|
|
@ -1,8 +1,8 @@
|
|||
/* 7zTypes.h -- Basic types
|
||||
2021-12-25 : Igor Pavlov : Public domain */
|
||||
2024-01-24 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_TYPES_H
|
||||
#define __7Z_TYPES_H
|
||||
#ifndef ZIP7_7Z_TYPES_H
|
||||
#define ZIP7_7Z_TYPES_H
|
||||
|
||||
#ifdef _WIN32
|
||||
/* #include <windows.h> */
|
||||
|
|
@ -52,6 +52,11 @@ typedef int SRes;
|
|||
#define MY_ALIGN(n)
|
||||
#endif
|
||||
#else
|
||||
/*
|
||||
// C11/C++11:
|
||||
#include <stdalign.h>
|
||||
#define MY_ALIGN(n) alignas(n)
|
||||
*/
|
||||
#define MY_ALIGN(n) __attribute__ ((aligned(n)))
|
||||
#endif
|
||||
|
||||
|
|
@ -62,7 +67,7 @@ typedef int SRes;
|
|||
typedef unsigned WRes;
|
||||
#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
|
||||
|
||||
// #define MY_HRES_ERROR__INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
|
||||
// #define MY_HRES_ERROR_INTERNAL_ERROR MY_SRes_HRESULT_FROM_WRes(ERROR_INTERNAL_ERROR)
|
||||
|
||||
#else // _WIN32
|
||||
|
||||
|
|
@ -70,13 +75,13 @@ typedef unsigned WRes;
|
|||
typedef int WRes;
|
||||
|
||||
// (FACILITY_ERRNO = 0x800) is 7zip's FACILITY constant to represent (errno) errors in HRESULT
|
||||
#define MY__FACILITY_ERRNO 0x800
|
||||
#define MY__FACILITY_WIN32 7
|
||||
#define MY__FACILITY__WRes MY__FACILITY_ERRNO
|
||||
#define MY_FACILITY_ERRNO 0x800
|
||||
#define MY_FACILITY_WIN32 7
|
||||
#define MY_FACILITY_WRes MY_FACILITY_ERRNO
|
||||
|
||||
#define MY_HRESULT_FROM_errno_CONST_ERROR(x) ((HRESULT)( \
|
||||
( (HRESULT)(x) & 0x0000FFFF) \
|
||||
| (MY__FACILITY__WRes << 16) \
|
||||
| (MY_FACILITY_WRes << 16) \
|
||||
| (HRESULT)0x80000000 ))
|
||||
|
||||
#define MY_SRes_HRESULT_FROM_WRes(x) \
|
||||
|
|
@ -120,23 +125,19 @@ typedef int WRes;
|
|||
#define ERROR_INVALID_REPARSE_DATA ((HRESULT)0x80071128L)
|
||||
#define ERROR_REPARSE_TAG_INVALID ((HRESULT)0x80071129L)
|
||||
|
||||
// if (MY__FACILITY__WRes != FACILITY_WIN32),
|
||||
// if (MY_FACILITY_WRes != FACILITY_WIN32),
|
||||
// we use FACILITY_WIN32 for COM errors:
|
||||
#define E_OUTOFMEMORY ((HRESULT)0x8007000EL)
|
||||
#define E_INVALIDARG ((HRESULT)0x80070057L)
|
||||
#define MY__E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
|
||||
#define MY_E_ERROR_NEGATIVE_SEEK ((HRESULT)0x80070083L)
|
||||
|
||||
/*
|
||||
// we can use FACILITY_ERRNO for some COM errors, that have errno equivalents:
|
||||
#define E_OUTOFMEMORY MY_HRESULT_FROM_errno_CONST_ERROR(ENOMEM)
|
||||
#define E_INVALIDARG MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
|
||||
#define MY__E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
|
||||
#define MY_E_ERROR_NEGATIVE_SEEK MY_HRESULT_FROM_errno_CONST_ERROR(EINVAL)
|
||||
*/
|
||||
|
||||
// gcc / clang : (sizeof(long) == sizeof(void*)) in 32/64 bits
|
||||
typedef long INT_PTR;
|
||||
typedef unsigned long UINT_PTR;
|
||||
|
||||
#define TEXT(quote) quote
|
||||
|
||||
#define FILE_ATTRIBUTE_READONLY 0x0001
|
||||
|
|
@ -160,18 +161,18 @@ typedef unsigned long UINT_PTR;
|
|||
|
||||
|
||||
#ifndef RINOK
|
||||
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
|
||||
#define RINOK(x) { const int _result_ = (x); if (_result_ != 0) return _result_; }
|
||||
#endif
|
||||
|
||||
#ifndef RINOK_WRes
|
||||
#define RINOK_WRes(x) { WRes __result__ = (x); if (__result__ != 0) return __result__; }
|
||||
#define RINOK_WRes(x) { const WRes _result_ = (x); if (_result_ != 0) return _result_; }
|
||||
#endif
|
||||
|
||||
typedef unsigned char Byte;
|
||||
typedef short Int16;
|
||||
typedef unsigned short UInt16;
|
||||
|
||||
#ifdef _LZMA_UINT32_IS_ULONG
|
||||
#ifdef Z7_DECL_Int32_AS_long
|
||||
typedef long Int32;
|
||||
typedef unsigned long UInt32;
|
||||
#else
|
||||
|
|
@ -210,37 +211,51 @@ typedef size_t SIZE_T;
|
|||
#endif // _WIN32
|
||||
|
||||
|
||||
#define MY_HRES_ERROR__INTERNAL_ERROR ((HRESULT)0x8007054FL)
|
||||
#define MY_HRES_ERROR_INTERNAL_ERROR ((HRESULT)0x8007054FL)
|
||||
|
||||
|
||||
#ifdef _SZ_NO_INT_64
|
||||
|
||||
/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
|
||||
NOTES: Some code will work incorrectly in that case! */
|
||||
#ifdef Z7_DECL_Int64_AS_long
|
||||
|
||||
typedef long Int64;
|
||||
typedef unsigned long UInt64;
|
||||
|
||||
#else
|
||||
|
||||
#if defined(_MSC_VER) || defined(__BORLANDC__)
|
||||
#if (defined(_MSC_VER) || defined(__BORLANDC__)) && !defined(__clang__)
|
||||
typedef __int64 Int64;
|
||||
typedef unsigned __int64 UInt64;
|
||||
#define UINT64_CONST(n) n
|
||||
#else
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#include <stdint.h>
|
||||
typedef int64_t Int64;
|
||||
typedef uint64_t UInt64;
|
||||
#else
|
||||
typedef long long int Int64;
|
||||
typedef unsigned long long int UInt64;
|
||||
#define UINT64_CONST(n) n ## ULL
|
||||
// #define UINT64_CONST(n) n ## ULL
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _LZMA_NO_SYSTEM_SIZE_T
|
||||
typedef UInt32 SizeT;
|
||||
#define UINT64_CONST(n) n
|
||||
|
||||
|
||||
#ifdef Z7_DECL_SizeT_AS_unsigned_int
|
||||
typedef unsigned int SizeT;
|
||||
#else
|
||||
typedef size_t SizeT;
|
||||
#endif
|
||||
|
||||
/*
|
||||
#if (defined(_MSC_VER) && _MSC_VER <= 1200)
|
||||
typedef size_t MY_uintptr_t;
|
||||
#else
|
||||
#include <stdint.h>
|
||||
typedef uintptr_t MY_uintptr_t;
|
||||
#endif
|
||||
*/
|
||||
|
||||
typedef int BoolInt;
|
||||
/* typedef BoolInt Bool; */
|
||||
#define True 1
|
||||
|
|
@ -248,23 +263,23 @@ typedef int BoolInt;
|
|||
|
||||
|
||||
#ifdef _WIN32
|
||||
#define MY_STD_CALL __stdcall
|
||||
#define Z7_STDCALL __stdcall
|
||||
#else
|
||||
#define MY_STD_CALL
|
||||
#define Z7_STDCALL
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#if _MSC_VER >= 1300
|
||||
#define MY_NO_INLINE __declspec(noinline)
|
||||
#define Z7_NO_INLINE __declspec(noinline)
|
||||
#else
|
||||
#define MY_NO_INLINE
|
||||
#define Z7_NO_INLINE
|
||||
#endif
|
||||
|
||||
#define MY_FORCE_INLINE __forceinline
|
||||
#define Z7_FORCE_INLINE __forceinline
|
||||
|
||||
#define MY_CDECL __cdecl
|
||||
#define MY_FAST_CALL __fastcall
|
||||
#define Z7_CDECL __cdecl
|
||||
#define Z7_FASTCALL __fastcall
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
|
|
@ -272,27 +287,25 @@ typedef int BoolInt;
|
|||
|| (defined(__clang__) && (__clang_major__ >= 4)) \
|
||||
|| defined(__INTEL_COMPILER) \
|
||||
|| defined(__xlC__)
|
||||
#define MY_NO_INLINE __attribute__((noinline))
|
||||
// #define MY_FORCE_INLINE __attribute__((always_inline)) inline
|
||||
#define Z7_NO_INLINE __attribute__((noinline))
|
||||
#define Z7_FORCE_INLINE __attribute__((always_inline)) inline
|
||||
#else
|
||||
#define MY_NO_INLINE
|
||||
#define Z7_NO_INLINE
|
||||
#define Z7_FORCE_INLINE
|
||||
#endif
|
||||
|
||||
#define MY_FORCE_INLINE
|
||||
|
||||
|
||||
#define MY_CDECL
|
||||
#define Z7_CDECL
|
||||
|
||||
#if defined(_M_IX86) \
|
||||
|| defined(__i386__)
|
||||
// #define MY_FAST_CALL __attribute__((fastcall))
|
||||
// #define MY_FAST_CALL __attribute__((cdecl))
|
||||
#define MY_FAST_CALL
|
||||
// #define Z7_FASTCALL __attribute__((fastcall))
|
||||
// #define Z7_FASTCALL __attribute__((cdecl))
|
||||
#define Z7_FASTCALL
|
||||
#elif defined(MY_CPU_AMD64)
|
||||
// #define MY_FAST_CALL __attribute__((ms_abi))
|
||||
#define MY_FAST_CALL
|
||||
// #define Z7_FASTCALL __attribute__((ms_abi))
|
||||
#define Z7_FASTCALL
|
||||
#else
|
||||
#define MY_FAST_CALL
|
||||
#define Z7_FASTCALL
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
|
@ -300,41 +313,49 @@ typedef int BoolInt;
|
|||
|
||||
/* The following interfaces use first parameter as pointer to structure */
|
||||
|
||||
typedef struct IByteIn IByteIn;
|
||||
struct IByteIn
|
||||
// #define Z7_C_IFACE_CONST_QUAL
|
||||
#define Z7_C_IFACE_CONST_QUAL const
|
||||
|
||||
#define Z7_C_IFACE_DECL(a) \
|
||||
struct a ## _; \
|
||||
typedef Z7_C_IFACE_CONST_QUAL struct a ## _ * a ## Ptr; \
|
||||
typedef struct a ## _ a; \
|
||||
struct a ## _
|
||||
|
||||
|
||||
Z7_C_IFACE_DECL (IByteIn)
|
||||
{
|
||||
Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
|
||||
Byte (*Read)(IByteInPtr p); /* reads one byte, returns 0 in case of EOF or error */
|
||||
};
|
||||
#define IByteIn_Read(p) (p)->Read(p)
|
||||
|
||||
|
||||
typedef struct IByteOut IByteOut;
|
||||
struct IByteOut
|
||||
Z7_C_IFACE_DECL (IByteOut)
|
||||
{
|
||||
void (*Write)(const IByteOut *p, Byte b);
|
||||
void (*Write)(IByteOutPtr p, Byte b);
|
||||
};
|
||||
#define IByteOut_Write(p, b) (p)->Write(p, b)
|
||||
|
||||
|
||||
typedef struct ISeqInStream ISeqInStream;
|
||||
struct ISeqInStream
|
||||
Z7_C_IFACE_DECL (ISeqInStream)
|
||||
{
|
||||
SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
|
||||
SRes (*Read)(ISeqInStreamPtr p, void *buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
};
|
||||
#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
|
||||
|
||||
/* try to read as much as avail in stream and limited by (*processedSize) */
|
||||
SRes SeqInStream_ReadMax(ISeqInStreamPtr stream, void *buf, size_t *processedSize);
|
||||
/* it can return SZ_ERROR_INPUT_EOF */
|
||||
SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
|
||||
SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
|
||||
SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
|
||||
// SRes SeqInStream_Read(ISeqInStreamPtr stream, void *buf, size_t size);
|
||||
// SRes SeqInStream_Read2(ISeqInStreamPtr stream, void *buf, size_t size, SRes errorType);
|
||||
SRes SeqInStream_ReadByte(ISeqInStreamPtr stream, Byte *buf);
|
||||
|
||||
|
||||
typedef struct ISeqOutStream ISeqOutStream;
|
||||
struct ISeqOutStream
|
||||
Z7_C_IFACE_DECL (ISeqOutStream)
|
||||
{
|
||||
size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
|
||||
size_t (*Write)(ISeqOutStreamPtr p, const void *buf, size_t size);
|
||||
/* Returns: result - the number of actually written bytes.
|
||||
(result < size) means error */
|
||||
};
|
||||
|
|
@ -348,29 +369,26 @@ typedef enum
|
|||
} ESzSeek;
|
||||
|
||||
|
||||
typedef struct ISeekInStream ISeekInStream;
|
||||
struct ISeekInStream
|
||||
Z7_C_IFACE_DECL (ISeekInStream)
|
||||
{
|
||||
SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size); /* same as ISeqInStream::Read */
|
||||
SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
|
||||
SRes (*Read)(ISeekInStreamPtr p, void *buf, size_t *size); /* same as ISeqInStream::Read */
|
||||
SRes (*Seek)(ISeekInStreamPtr p, Int64 *pos, ESzSeek origin);
|
||||
};
|
||||
#define ISeekInStream_Read(p, buf, size) (p)->Read(p, buf, size)
|
||||
#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
|
||||
|
||||
|
||||
typedef struct ILookInStream ILookInStream;
|
||||
struct ILookInStream
|
||||
Z7_C_IFACE_DECL (ILookInStream)
|
||||
{
|
||||
SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
|
||||
SRes (*Look)(ILookInStreamPtr p, const void **buf, size_t *size);
|
||||
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
|
||||
(output(*size) > input(*size)) is not allowed
|
||||
(output(*size) < input(*size)) is allowed */
|
||||
SRes (*Skip)(const ILookInStream *p, size_t offset);
|
||||
SRes (*Skip)(ILookInStreamPtr p, size_t offset);
|
||||
/* offset must be <= output(*size) of Look */
|
||||
|
||||
SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
|
||||
SRes (*Read)(ILookInStreamPtr p, void *buf, size_t *size);
|
||||
/* reads directly (without buffer). It's same as ISeqInStream::Read */
|
||||
SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
|
||||
SRes (*Seek)(ILookInStreamPtr p, Int64 *pos, ESzSeek origin);
|
||||
};
|
||||
|
||||
#define ILookInStream_Look(p, buf, size) (p)->Look(p, buf, size)
|
||||
|
|
@ -379,19 +397,18 @@ struct ILookInStream
|
|||
#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
|
||||
|
||||
|
||||
SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
|
||||
SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
|
||||
SRes LookInStream_LookRead(ILookInStreamPtr stream, void *buf, size_t *size);
|
||||
SRes LookInStream_SeekTo(ILookInStreamPtr stream, UInt64 offset);
|
||||
|
||||
/* reads via ILookInStream::Read */
|
||||
SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
|
||||
SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
|
||||
|
||||
SRes LookInStream_Read2(ILookInStreamPtr stream, void *buf, size_t size, SRes errorType);
|
||||
SRes LookInStream_Read(ILookInStreamPtr stream, void *buf, size_t size);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ILookInStream vt;
|
||||
const ISeekInStream *realStream;
|
||||
ISeekInStreamPtr realStream;
|
||||
|
||||
size_t pos;
|
||||
size_t size; /* it's data size */
|
||||
|
|
@ -403,13 +420,13 @@ typedef struct
|
|||
|
||||
void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
|
||||
|
||||
#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
|
||||
#define LookToRead2_INIT(p) { (p)->pos = (p)->size = 0; }
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ISeqInStream vt;
|
||||
const ILookInStream *realStream;
|
||||
ILookInStreamPtr realStream;
|
||||
} CSecToLook;
|
||||
|
||||
void SecToLook_CreateVTable(CSecToLook *p);
|
||||
|
|
@ -419,20 +436,19 @@ void SecToLook_CreateVTable(CSecToLook *p);
|
|||
typedef struct
|
||||
{
|
||||
ISeqInStream vt;
|
||||
const ILookInStream *realStream;
|
||||
ILookInStreamPtr realStream;
|
||||
} CSecToRead;
|
||||
|
||||
void SecToRead_CreateVTable(CSecToRead *p);
|
||||
|
||||
|
||||
typedef struct ICompressProgress ICompressProgress;
|
||||
|
||||
struct ICompressProgress
|
||||
Z7_C_IFACE_DECL (ICompressProgress)
|
||||
{
|
||||
SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
|
||||
SRes (*Progress)(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize);
|
||||
/* Returns: result. (result != SZ_OK) means break.
|
||||
Value (UInt64)(Int64)-1 for size means unknown value. */
|
||||
};
|
||||
|
||||
#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
|
||||
|
||||
|
||||
|
|
@ -470,13 +486,13 @@ struct ISzAlloc
|
|||
|
||||
|
||||
|
||||
#ifndef MY_container_of
|
||||
#ifndef Z7_container_of
|
||||
|
||||
/*
|
||||
#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
|
||||
#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
|
||||
#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
|
||||
#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
|
||||
#define Z7_container_of(ptr, type, m) container_of(ptr, type, m)
|
||||
#define Z7_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
|
||||
#define Z7_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
|
||||
#define Z7_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
|
||||
*/
|
||||
|
||||
/*
|
||||
|
|
@ -485,24 +501,64 @@ struct ISzAlloc
|
|||
GCC 4.8.1 : classes with non-public variable members"
|
||||
*/
|
||||
|
||||
#define MY_container_of(ptr, type, m) ((type *)(void *)((char *)(void *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
|
||||
#define Z7_container_of(ptr, type, m) \
|
||||
((type *)(void *)((char *)(void *) \
|
||||
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
|
||||
|
||||
#define Z7_container_of_CONST(ptr, type, m) \
|
||||
((const type *)(const void *)((const char *)(const void *) \
|
||||
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
|
||||
|
||||
/*
|
||||
#define Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m) \
|
||||
((type *)(void *)(const void *)((const char *)(const void *) \
|
||||
(1 ? (ptr) : &((type *)NULL)->m) - MY_offsetof(type, m)))
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
|
||||
#define Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(void *)(ptr))
|
||||
|
||||
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
|
||||
#define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of(ptr, type, m)
|
||||
// #define Z7_CONTAINER_FROM_VTBL(ptr, type, m) Z7_container_of_NON_CONST_FROM_CONST(ptr, type, m)
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_CONST(ptr, type, m) Z7_container_of_CONST(ptr, type, m)
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
|
||||
/*
|
||||
#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
|
||||
#define Z7_CONTAINER_FROM_VTBL_CLS(ptr, type, m) Z7_CONTAINER_FROM_VTBL(ptr, type, m)
|
||||
*/
|
||||
#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
|
||||
#if defined (__clang__) || defined(__GNUC__)
|
||||
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wcast-qual\"")
|
||||
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#else
|
||||
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL
|
||||
#define Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
|
||||
#endif
|
||||
|
||||
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
|
||||
/*
|
||||
#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
|
||||
*/
|
||||
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(ptr, type, m, p) \
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_CAST_QUAL \
|
||||
type *p = Z7_CONTAINER_FROM_VTBL(ptr, type, m); \
|
||||
Z7_DIAGNOSTIC_IGNORE_END_CAST_QUAL
|
||||
|
||||
#define Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(type) \
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR(pp, type, vt, p)
|
||||
|
||||
|
||||
#define MY_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
|
||||
// #define ZIP7_DECLARE_HANDLE(name) typedef void *name;
|
||||
#define Z7_DECLARE_HANDLE(name) struct name##_dummy{int unused;}; typedef struct name##_dummy *name;
|
||||
|
||||
|
||||
#define Z7_memset_0_ARRAY(a) memset((a), 0, sizeof(a))
|
||||
|
||||
#ifndef Z7_ARRAY_SIZE
|
||||
#define Z7_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
|
|
@ -520,6 +576,22 @@ struct ISzAlloc
|
|||
|
||||
#endif
|
||||
|
||||
#define k_PropVar_TimePrec_0 0
|
||||
#define k_PropVar_TimePrec_Unix 1
|
||||
#define k_PropVar_TimePrec_DOS 2
|
||||
#define k_PropVar_TimePrec_HighPrec 3
|
||||
#define k_PropVar_TimePrec_Base 16
|
||||
#define k_PropVar_TimePrec_100ns (k_PropVar_TimePrec_Base + 7)
|
||||
#define k_PropVar_TimePrec_1ns (k_PropVar_TimePrec_Base + 9)
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifndef Z7_ST
|
||||
#ifdef _7ZIP_ST
|
||||
#define Z7_ST
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#define MY_VER_MAJOR 21
|
||||
#define MY_VER_MINOR 07
|
||||
#define MY_VER_MAJOR 25
|
||||
#define MY_VER_MINOR 1
|
||||
#define MY_VER_BUILD 0
|
||||
#define MY_VERSION_NUMBERS "21.07"
|
||||
#define MY_VERSION_NUMBERS "25.01"
|
||||
#define MY_VERSION MY_VERSION_NUMBERS
|
||||
|
||||
#ifdef MY_CPU_NAME
|
||||
|
|
@ -10,12 +10,12 @@
|
|||
#define MY_VERSION_CPU MY_VERSION
|
||||
#endif
|
||||
|
||||
#define MY_DATE "2021-12-26"
|
||||
#define MY_DATE "2025-08-03"
|
||||
#undef MY_COPYRIGHT
|
||||
#undef MY_VERSION_COPYRIGHT_DATE
|
||||
#define MY_AUTHOR_NAME "Igor Pavlov"
|
||||
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
|
||||
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2021 Igor Pavlov"
|
||||
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2025 Igor Pavlov"
|
||||
|
||||
#ifdef USE_COPYRIGHT_CR
|
||||
#define MY_COPYRIGHT MY_COPYRIGHT_CR
|
||||
|
|
|
|||
101
C/7zWindows.h
Normal file
101
C/7zWindows.h
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
/* 7zWindows.h -- StdAfx
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_7Z_WINDOWS_H
|
||||
#define ZIP7_INC_7Z_WINDOWS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic push
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
#pragma warning(push)
|
||||
#pragma warning(disable : 4668) // '_WIN32_WINNT' is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
|
||||
|
||||
#if _MSC_VER == 1900
|
||||
// for old kit10 versions
|
||||
// #pragma warning(disable : 4255) // winuser.h(13979): warning C4255: 'GetThreadDpiAwarenessContext':
|
||||
#endif
|
||||
// win10 Windows Kit:
|
||||
#endif // _MSC_VER
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
|
||||
// for msvc6 without sdk2003
|
||||
#define RPC_NO_WINDOWS_H
|
||||
#endif
|
||||
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
// #if defined(__GNUC__) && !defined(__clang__)
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
// #include <basetsd.h>
|
||||
// #include <wtypes.h>
|
||||
|
||||
// but if precompiled with clang-cl then we need
|
||||
// #include <windows.h>
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(pop)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER <= 1200 && !defined(_WIN64)
|
||||
#ifndef _W64
|
||||
|
||||
typedef long LONG_PTR, *PLONG_PTR;
|
||||
typedef unsigned long ULONG_PTR, *PULONG_PTR;
|
||||
typedef ULONG_PTR DWORD_PTR, *PDWORD_PTR;
|
||||
|
||||
#define Z7_OLD_WIN_SDK
|
||||
#endif // _W64
|
||||
#endif // _MSC_VER == 1200
|
||||
|
||||
#ifdef Z7_OLD_WIN_SDK
|
||||
|
||||
#ifndef INVALID_FILE_ATTRIBUTES
|
||||
#define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
|
||||
#endif
|
||||
#ifndef INVALID_SET_FILE_POINTER
|
||||
#define INVALID_SET_FILE_POINTER ((DWORD)-1)
|
||||
#endif
|
||||
#ifndef FILE_SPECIAL_ACCESS
|
||||
#define FILE_SPECIAL_ACCESS (FILE_ANY_ACCESS)
|
||||
#endif
|
||||
|
||||
// ShlObj.h:
|
||||
// #define BIF_NEWDIALOGSTYLE 0x0040
|
||||
|
||||
#pragma warning(disable : 4201)
|
||||
// #pragma warning(disable : 4115)
|
||||
|
||||
#undef VARIANT_TRUE
|
||||
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
|
||||
#endif
|
||||
|
||||
#endif // Z7_OLD_WIN_SDK
|
||||
|
||||
#ifdef UNDER_CE
|
||||
#undef VARIANT_TRUE
|
||||
#define VARIANT_TRUE ((VARIANT_BOOL)-1)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if _MSC_VER >= 1400 && _MSC_VER <= 1600
|
||||
// BaseTsd.h(148) : 'HandleToULong' : unreferenced inline function has been removed
|
||||
// string.h
|
||||
// #pragma warning(disable : 4514)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* #include "7zTypes.h" */
|
||||
|
||||
#endif
|
||||
113
C/7zip_gcc_c.mak
113
C/7zip_gcc_c.mak
|
|
@ -4,24 +4,57 @@ MY_ARCH_2 = $(MY_ARCH)
|
|||
MY_ASM = jwasm
|
||||
MY_ASM = asmc
|
||||
|
||||
ifndef RC
|
||||
#RC=windres.exe --target=pe-x86-64
|
||||
#RC=windres.exe -F pe-i386
|
||||
RC=windres.exe
|
||||
endif
|
||||
|
||||
PROGPATH = $(O)/$(PROG)
|
||||
PROGPATH_STATIC = $(O)/$(PROG)s
|
||||
|
||||
ifneq ($(CC), xlc)
|
||||
CFLAGS_WARN_WALL = -Wall -Werror -Wextra
|
||||
endif
|
||||
|
||||
# for object file
|
||||
CFLAGS_BASE_LIST = -c
|
||||
# for ASM file
|
||||
# CFLAGS_BASE_LIST = -S
|
||||
CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) -Wall -Werror -Wextra $(CFLAGS_WARN) \
|
||||
|
||||
FLAGS_FLTO = -flto
|
||||
FLAGS_FLTO =
|
||||
|
||||
CFLAGS_BASE = $(MY_ARCH_2) -O2 $(CFLAGS_BASE_LIST) $(CFLAGS_WARN_WALL) $(CFLAGS_WARN) \
|
||||
-DNDEBUG -D_REENTRANT -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
|
||||
|
||||
|
||||
LDFLAGS_STATIC = -DNDEBUG
|
||||
# -static
|
||||
|
||||
ifdef SystemDrive
|
||||
IS_MINGW = 1
|
||||
else
|
||||
ifdef SYSTEMDRIVE
|
||||
# ifdef OS
|
||||
IS_MINGW = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef IS_MINGW
|
||||
LDFLAGS_STATIC_2 = -static
|
||||
else
|
||||
ifndef DEF_FILE
|
||||
ifndef IS_NOT_STANDALONE
|
||||
ifndef MY_DYNAMIC_LINK
|
||||
ifneq ($(CC), clang)
|
||||
LDFLAGS_STATIC_2 =
|
||||
# -static
|
||||
# -static-libstdc++ -static-libgcc
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
LDFLAGS_STATIC = -DNDEBUG $(LDFLAGS_STATIC_2)
|
||||
|
||||
ifdef DEF_FILE
|
||||
|
||||
|
|
@ -62,22 +95,29 @@ endif
|
|||
|
||||
ifdef IS_MINGW
|
||||
|
||||
ifdef MSYSTEM
|
||||
RM = rm -f
|
||||
MY_MKDIR=mkdir -p
|
||||
DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(PROGPATH_STATIC) $(OBJS)
|
||||
else
|
||||
RM = del
|
||||
MY_MKDIR=mkdir
|
||||
LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32
|
||||
DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll
|
||||
endif
|
||||
|
||||
|
||||
CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE
|
||||
LIB2 = -lOle32 -loleaut32 -luuid -ladvapi32 -lUser32 -lShell32
|
||||
|
||||
CFLAGS_EXTRA = -DUNICODE -D_UNICODE
|
||||
# -Wno-delete-non-virtual-dtor
|
||||
|
||||
DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll
|
||||
|
||||
else
|
||||
|
||||
RM = rm -f
|
||||
MY_MKDIR=mkdir -p
|
||||
# CFLAGS_BASE := $(CFLAGS_BASE) -D_7ZIP_ST
|
||||
# CXXFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
|
||||
# CFLAGS_BASE := $(CFLAGS_BASE) -DZ7_ST
|
||||
# CFLAGS_EXTRA = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE
|
||||
|
||||
# LOCAL_LIBS=-lpthread
|
||||
# LOCAL_LIBS_DLL=$(LOCAL_LIBS) -ldl
|
||||
|
|
@ -88,10 +128,6 @@ DEL_OBJ_EXE = -$(RM) $(PROGPATH) $(PROGPATH_STATIC) $(OBJS)
|
|||
endif
|
||||
|
||||
|
||||
|
||||
CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CC_SHARED) -o $@
|
||||
|
||||
|
||||
ifdef IS_X64
|
||||
AFLAGS_ABI = -elf64 -DABI_LINUX
|
||||
else
|
||||
|
|
@ -102,12 +138,9 @@ AFLAGS_ABI = -elf -DABI_LINUX -DABI_CDECL
|
|||
endif
|
||||
AFLAGS = $(AFLAGS_ABI) -Fo$(O)/
|
||||
|
||||
C_WARN_FLAGS =
|
||||
|
||||
CXX_WARN_FLAGS =
|
||||
#-Wno-invalid-offsetof
|
||||
#-Wno-reorder
|
||||
|
||||
CXXFLAGS = $(LOCAL_FLAGS) $(CXXFLAGS_BASE2) $(CFLAGS_BASE) $(CXXFLAGS_EXTRA) $(CC_SHARED) -o $@ $(CXX_WARN_FLAGS)
|
||||
CFLAGS = $(LOCAL_FLAGS) $(CFLAGS_BASE2) $(CFLAGS_BASE) $(CFLAGS_EXTRA) $(C_WARN_FLAGS) $(FLAGS_FLTO) $(CC_SHARED) -o $@
|
||||
|
||||
STATIC_TARGET=
|
||||
ifdef COMPL_STATIC
|
||||
|
|
@ -120,18 +153,27 @@ all: $(O) $(PROGPATH) $(STATIC_TARGET)
|
|||
$(O):
|
||||
$(MY_MKDIR) $(O)
|
||||
|
||||
LFLAGS_ALL = -s $(MY_ARCH_2) $(LDFLAGS) $(LD_arch) $(OBJS) $(MY_LIBS) $(LIB2)
|
||||
ifneq ($(CC), $(CROSS_COMPILE)clang)
|
||||
LFLAGS_STRIP = -s
|
||||
endif
|
||||
|
||||
LFLAGS_ALL = $(LFLAGS_STRIP) $(MY_ARCH_2) $(LDFLAGS) $(FLAGS_FLTO) $(LD_arch) $(OBJS) $(MY_LIBS) $(LIB2)
|
||||
$(PROGPATH): $(OBJS)
|
||||
$(CXX) -o $(PROGPATH) $(LFLAGS_ALL)
|
||||
$(CC) -o $(PROGPATH) $(LFLAGS_ALL)
|
||||
|
||||
$(PROGPATH_STATIC): $(OBJS)
|
||||
$(CXX) -static -o $(PROGPATH_STATIC) $(LFLAGS_ALL)
|
||||
$(CC) -static -o $(PROGPATH_STATIC) $(LFLAGS_ALL)
|
||||
|
||||
|
||||
ifndef NO_DEFAULT_RES
|
||||
# old mingw without -FO
|
||||
# windres.exe $(RFLAGS) resource.rc $O/resource.o
|
||||
$O/resource.o: resource.rc
|
||||
windres.exe $(RFLAGS) resource.rc $O/resource.o
|
||||
$(RC) $(RFLAGS) resource.rc $(O)/resource.o
|
||||
endif
|
||||
# windres.exe $(RFLAGS) resource.rc $(O)\resource.o
|
||||
# windres.exe $(RFLAGS) resource.rc -FO $(O)/resource.o
|
||||
# $(RC) $(RFLAGS) resource.rc -FO $(O)/resource.o
|
||||
|
||||
|
||||
|
||||
|
|
@ -229,10 +271,18 @@ $O/Sha256.o: ../../../C/Sha256.c
|
|||
$(CC) $(CFLAGS) $<
|
||||
$O/Sort.o: ../../../C/Sort.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/SwapBytes.o: ../../../C/SwapBytes.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/Xz.o: ../../../C/Xz.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/XzCrc64.o: ../../../C/XzCrc64.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/XzDec.o: ../../../C/XzDec.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/XzEnc.o: ../../../C/XzEnc.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/XzIn.o: ../../../C/XzIn.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
|
||||
|
||||
ifdef USE_ASM
|
||||
|
|
@ -279,11 +329,11 @@ endif
|
|||
|
||||
ifdef IS_ARM64
|
||||
$O/LzmaDecOpt.o: ../../../Asm/arm64/LzmaDecOpt.S ../../../Asm/arm64/7zAsm.S
|
||||
$(CC) $(CFLAGS) $<
|
||||
$(CC) $(CFLAGS) $(ASM_FLAGS) $<
|
||||
endif
|
||||
|
||||
$O/LzmaDec.o: ../../LzmaDec.c
|
||||
$(CC) $(CFLAGS) -D_LZMA_DEC_OPT $<
|
||||
$(CC) $(CFLAGS) -DZ7_LZMA_DEC_OPT $<
|
||||
|
||||
else
|
||||
|
||||
|
|
@ -294,19 +344,16 @@ endif
|
|||
|
||||
|
||||
|
||||
$O/XzDec.o: ../../../C/XzDec.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/XzEnc.o: ../../../C/XzEnc.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/XzIn.o: ../../../C/XzIn.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
|
||||
|
||||
$O/7zMain.o: ../../../C/Util/7z/7zMain.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/7zipInstall.o: ../../../C/Util/7zipInstall/7zipInstall.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/7zipUninstall.o: ../../../C/Util/7zipUninstall/7zipUninstall.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
$O/LzmaUtil.o: ../../../C/Util/Lzma/LzmaUtil.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
|
||||
$O/XzUtil.o: ../../../C/Util/Xz/XzUtil.c
|
||||
$(CC) $(CFLAGS) $<
|
||||
|
||||
|
||||
clean:
|
||||
|
|
|
|||
162
C/Aes.c
162
C/Aes.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Aes.c -- AES encryption / decryption
|
||||
2021-05-13 : Igor Pavlov : Public domain */
|
||||
2024-03-01 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -7,13 +7,15 @@
|
|||
#include "Aes.h"
|
||||
|
||||
AES_CODE_FUNC g_AesCbc_Decode;
|
||||
#ifndef _SFX
|
||||
#ifndef Z7_SFX
|
||||
AES_CODE_FUNC g_AesCbc_Encode;
|
||||
AES_CODE_FUNC g_AesCtr_Code;
|
||||
UInt32 g_Aes_SupportedFunctions_Flags;
|
||||
#endif
|
||||
|
||||
MY_ALIGN(64)
|
||||
static UInt32 T[256 * 4];
|
||||
MY_ALIGN(64)
|
||||
static const Byte Sbox[256] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
|
|
@ -33,7 +35,9 @@ static const Byte Sbox[256] = {
|
|||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};
|
||||
|
||||
|
||||
MY_ALIGN(64)
|
||||
static UInt32 D[256 * 4];
|
||||
MY_ALIGN(64)
|
||||
static Byte InvS[256];
|
||||
|
||||
#define xtime(x) ((((x) << 1) ^ (((x) & 0x80) != 0 ? 0x1B : 0)) & 0xFF)
|
||||
|
|
@ -51,32 +55,62 @@ static Byte InvS[256];
|
|||
#define DD(x) (D + (x << 8))
|
||||
|
||||
|
||||
// #define _SHOW_AES_STATUS
|
||||
// #define Z7_SHOW_AES_STATUS
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#define USE_HW_AES
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
|
||||
#if defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#define USE_HW_AES
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 6) // fix that check
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#if (__INTEL_COMPILER >= 1110)
|
||||
#define USE_HW_AES
|
||||
#if (__INTEL_COMPILER >= 1900)
|
||||
#define USE_HW_VAES
|
||||
#endif
|
||||
#endif
|
||||
#elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400)
|
||||
#define USE_HW_AES
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 8)
|
||||
#define USE_HW_VAES
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if _MSC_VER >= 1910
|
||||
#define USE_HW_AES
|
||||
#define USE_HW_VAES
|
||||
#endif
|
||||
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
|
||||
|
||||
#if defined(__ARM_FEATURE_AES) \
|
||||
|| defined(__ARM_FEATURE_CRYPTO)
|
||||
#define USE_HW_AES
|
||||
#else
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL)
|
||||
#if defined(__ARM_FP) && \
|
||||
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 6) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| !defined(Z7_CLANG_VERSION) \
|
||||
|| defined(__ARM_NEON) && \
|
||||
(Z7_CLANG_VERSION < 170000 || \
|
||||
Z7_CLANG_VERSION > 170001)
|
||||
#define USE_HW_AES
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_HW_AES
|
||||
#ifdef _SHOW_AES_STATUS
|
||||
// #pragma message("=== Aes.c USE_HW_AES === ")
|
||||
#ifdef Z7_SHOW_AES_STATUS
|
||||
#include <stdio.h>
|
||||
#define _PRF(x) x
|
||||
#define PRF(x) x
|
||||
#else
|
||||
#define _PRF(x)
|
||||
#define PRF(x)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
@ -90,23 +124,23 @@ void AesGenTables(void)
|
|||
for (i = 0; i < 256; i++)
|
||||
{
|
||||
{
|
||||
UInt32 a1 = Sbox[i];
|
||||
UInt32 a2 = xtime(a1);
|
||||
UInt32 a3 = a2 ^ a1;
|
||||
const UInt32 a1 = Sbox[i];
|
||||
const UInt32 a2 = xtime(a1);
|
||||
const UInt32 a3 = a2 ^ a1;
|
||||
TT(0)[i] = Ui32(a2, a1, a1, a3);
|
||||
TT(1)[i] = Ui32(a3, a2, a1, a1);
|
||||
TT(2)[i] = Ui32(a1, a3, a2, a1);
|
||||
TT(3)[i] = Ui32(a1, a1, a3, a2);
|
||||
}
|
||||
{
|
||||
UInt32 a1 = InvS[i];
|
||||
UInt32 a2 = xtime(a1);
|
||||
UInt32 a4 = xtime(a2);
|
||||
UInt32 a8 = xtime(a4);
|
||||
UInt32 a9 = a8 ^ a1;
|
||||
UInt32 aB = a8 ^ a2 ^ a1;
|
||||
UInt32 aD = a8 ^ a4 ^ a1;
|
||||
UInt32 aE = a8 ^ a4 ^ a2;
|
||||
const UInt32 a1 = InvS[i];
|
||||
const UInt32 a2 = xtime(a1);
|
||||
const UInt32 a4 = xtime(a2);
|
||||
const UInt32 a8 = xtime(a4);
|
||||
const UInt32 a9 = a8 ^ a1;
|
||||
const UInt32 aB = a8 ^ a2 ^ a1;
|
||||
const UInt32 aD = a8 ^ a4 ^ a1;
|
||||
const UInt32 aE = a8 ^ a4 ^ a2;
|
||||
DD(0)[i] = Ui32(aE, a9, aD, aB);
|
||||
DD(1)[i] = Ui32(aB, aE, a9, aD);
|
||||
DD(2)[i] = Ui32(aD, aB, aE, a9);
|
||||
|
|
@ -116,7 +150,7 @@ void AesGenTables(void)
|
|||
|
||||
{
|
||||
AES_CODE_FUNC d = AesCbc_Decode;
|
||||
#ifndef _SFX
|
||||
#ifndef Z7_SFX
|
||||
AES_CODE_FUNC e = AesCbc_Encode;
|
||||
AES_CODE_FUNC c = AesCtr_Code;
|
||||
UInt32 flags = 0;
|
||||
|
|
@ -126,31 +160,33 @@ void AesGenTables(void)
|
|||
if (CPU_IsSupported_AES())
|
||||
{
|
||||
// #pragma message ("AES HW")
|
||||
_PRF(printf("\n===AES HW\n"));
|
||||
PRF(printf("\n===AES HW\n"));
|
||||
d = AesCbc_Decode_HW;
|
||||
|
||||
#ifndef _SFX
|
||||
#ifndef Z7_SFX
|
||||
e = AesCbc_Encode_HW;
|
||||
c = AesCtr_Code_HW;
|
||||
flags = k_Aes_SupportedFunctions_HW;
|
||||
#endif
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#ifdef USE_HW_VAES
|
||||
if (CPU_IsSupported_VAES_AVX2())
|
||||
{
|
||||
_PRF(printf("\n===vaes avx2\n"));
|
||||
PRF(printf("\n===vaes avx2\n"));
|
||||
d = AesCbc_Decode_HW_256;
|
||||
#ifndef _SFX
|
||||
#ifndef Z7_SFX
|
||||
c = AesCtr_Code_HW_256;
|
||||
flags |= k_Aes_SupportedFunctions_HW_256;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
g_AesCbc_Decode = d;
|
||||
#ifndef _SFX
|
||||
#ifndef Z7_SFX
|
||||
g_AesCbc_Encode = e;
|
||||
g_AesCtr_Code = c;
|
||||
g_Aes_SupportedFunctions_Flags = flags;
|
||||
|
|
@ -194,7 +230,7 @@ void AesGenTables(void)
|
|||
#define FD(i, x) InvS[gb(x, m[(i - x) & 3])]
|
||||
#define FD4(i) dest[i] = Ui32(FD(i, 0), FD(i, 1), FD(i, 2), FD(i, 3)) ^ w[i];
|
||||
|
||||
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
|
||||
void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
|
||||
{
|
||||
unsigned i, m;
|
||||
const UInt32 *wLim;
|
||||
|
|
@ -230,7 +266,7 @@ void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *w, const Byte *key, unsigned keySize)
|
|||
while (++w != wLim);
|
||||
}
|
||||
|
||||
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
|
||||
void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
|
||||
{
|
||||
unsigned i, num;
|
||||
Aes_SetKey_Enc(w, key, keySize);
|
||||
|
|
@ -251,7 +287,7 @@ void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *w, const Byte *key, unsigned keySize)
|
|||
src and dest are pointers to 4 UInt32 words.
|
||||
src and dest can point to same block */
|
||||
|
||||
// MY_FORCE_INLINE
|
||||
// Z7_FORCE_INLINE
|
||||
static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
|
||||
{
|
||||
UInt32 s[4];
|
||||
|
|
@ -265,17 +301,20 @@ static void Aes_Encode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
|
|||
w += 4;
|
||||
for (;;)
|
||||
{
|
||||
HT16(m, s, 0);
|
||||
HT16(m, s, 0)
|
||||
if (--numRounds2 == 0)
|
||||
break;
|
||||
HT16(s, m, 4);
|
||||
HT16(s, m, 4)
|
||||
w += 8;
|
||||
}
|
||||
w += 4;
|
||||
FT4(0); FT4(1); FT4(2); FT4(3);
|
||||
FT4(0)
|
||||
FT4(1)
|
||||
FT4(2)
|
||||
FT4(3)
|
||||
}
|
||||
|
||||
MY_FORCE_INLINE
|
||||
Z7_FORCE_INLINE
|
||||
static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
|
||||
{
|
||||
UInt32 s[4];
|
||||
|
|
@ -289,12 +328,15 @@ static void Aes_Decode(const UInt32 *w, UInt32 *dest, const UInt32 *src)
|
|||
for (;;)
|
||||
{
|
||||
w -= 8;
|
||||
HD16(m, s, 4);
|
||||
HD16(m, s, 4)
|
||||
if (--numRounds2 == 0)
|
||||
break;
|
||||
HD16(s, m, 0);
|
||||
HD16(s, m, 0)
|
||||
}
|
||||
FD4(0); FD4(1); FD4(2); FD4(3);
|
||||
FD4(0)
|
||||
FD4(1)
|
||||
FD4(2)
|
||||
FD4(3)
|
||||
}
|
||||
|
||||
void AesCbc_Init(UInt32 *p, const Byte *iv)
|
||||
|
|
@ -304,7 +346,7 @@ void AesCbc_Init(UInt32 *p, const Byte *iv)
|
|||
p[i] = GetUi32(iv + i * 4);
|
||||
}
|
||||
|
||||
void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
|
||||
{
|
||||
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
|
||||
{
|
||||
|
|
@ -315,14 +357,14 @@ void MY_FAST_CALL AesCbc_Encode(UInt32 *p, Byte *data, size_t numBlocks)
|
|||
|
||||
Aes_Encode(p + 4, p, p);
|
||||
|
||||
SetUi32(data, p[0]);
|
||||
SetUi32(data + 4, p[1]);
|
||||
SetUi32(data + 8, p[2]);
|
||||
SetUi32(data + 12, p[3]);
|
||||
SetUi32(data, p[0])
|
||||
SetUi32(data + 4, p[1])
|
||||
SetUi32(data + 8, p[2])
|
||||
SetUi32(data + 12, p[3])
|
||||
}
|
||||
}
|
||||
|
||||
void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
|
||||
{
|
||||
UInt32 in[4], out[4];
|
||||
for (; numBlocks != 0; numBlocks--, data += AES_BLOCK_SIZE)
|
||||
|
|
@ -334,10 +376,10 @@ void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
|
|||
|
||||
Aes_Decode(p + 4, out, in);
|
||||
|
||||
SetUi32(data, p[0] ^ out[0]);
|
||||
SetUi32(data + 4, p[1] ^ out[1]);
|
||||
SetUi32(data + 8, p[2] ^ out[2]);
|
||||
SetUi32(data + 12, p[3] ^ out[3]);
|
||||
SetUi32(data, p[0] ^ out[0])
|
||||
SetUi32(data + 4, p[1] ^ out[1])
|
||||
SetUi32(data + 8, p[2] ^ out[2])
|
||||
SetUi32(data + 12, p[3] ^ out[3])
|
||||
|
||||
p[0] = in[0];
|
||||
p[1] = in[1];
|
||||
|
|
@ -346,7 +388,7 @@ void MY_FAST_CALL AesCbc_Decode(UInt32 *p, Byte *data, size_t numBlocks)
|
|||
}
|
||||
}
|
||||
|
||||
void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
|
||||
{
|
||||
for (; numBlocks != 0; numBlocks--)
|
||||
{
|
||||
|
|
@ -360,7 +402,7 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
|
|||
|
||||
for (i = 0; i < 4; i++, data += 4)
|
||||
{
|
||||
UInt32 t = temp[i];
|
||||
const UInt32 t = temp[i];
|
||||
|
||||
#ifdef MY_CPU_LE_UNALIGN
|
||||
*((UInt32 *)(void *)data) ^= t;
|
||||
|
|
@ -373,3 +415,15 @@ void MY_FAST_CALL AesCtr_Code(UInt32 *p, Byte *data, size_t numBlocks)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef xtime
|
||||
#undef Ui32
|
||||
#undef gb0
|
||||
#undef gb1
|
||||
#undef gb2
|
||||
#undef gb3
|
||||
#undef gb
|
||||
#undef TT
|
||||
#undef DD
|
||||
#undef USE_HW_AES
|
||||
#undef PRF
|
||||
|
|
|
|||
36
C/Aes.h
36
C/Aes.h
|
|
@ -1,8 +1,8 @@
|
|||
/* Aes.h -- AES encryption / decryption
|
||||
2018-04-28 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __AES_H
|
||||
#define __AES_H
|
||||
#ifndef ZIP7_INC_AES_H
|
||||
#define ZIP7_INC_AES_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -20,19 +20,19 @@ void AesGenTables(void);
|
|||
|
||||
/* aes - 16-byte aligned pointer to keyMode+roundKeys sequence */
|
||||
/* keySize = 16 or 24 or 32 (bytes) */
|
||||
typedef void (MY_FAST_CALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize);
|
||||
void MY_FAST_CALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize);
|
||||
void MY_FAST_CALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize);
|
||||
typedef void (Z7_FASTCALL *AES_SET_KEY_FUNC)(UInt32 *aes, const Byte *key, unsigned keySize);
|
||||
void Z7_FASTCALL Aes_SetKey_Enc(UInt32 *aes, const Byte *key, unsigned keySize);
|
||||
void Z7_FASTCALL Aes_SetKey_Dec(UInt32 *aes, const Byte *key, unsigned keySize);
|
||||
|
||||
/* ivAes - 16-byte aligned pointer to iv+keyMode+roundKeys sequence: UInt32[AES_NUM_IVMRK_WORDS] */
|
||||
void AesCbc_Init(UInt32 *ivAes, const Byte *iv); /* iv size is AES_BLOCK_SIZE */
|
||||
|
||||
/* data - 16-byte aligned pointer to data */
|
||||
/* numBlocks - the number of 16-byte blocks in data array */
|
||||
typedef void (MY_FAST_CALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
|
||||
typedef void (Z7_FASTCALL *AES_CODE_FUNC)(UInt32 *ivAes, Byte *data, size_t numBlocks);
|
||||
|
||||
extern AES_CODE_FUNC g_AesCbc_Decode;
|
||||
#ifndef _SFX
|
||||
#ifndef Z7_SFX
|
||||
extern AES_CODE_FUNC g_AesCbc_Encode;
|
||||
extern AES_CODE_FUNC g_AesCtr_Code;
|
||||
#define k_Aes_SupportedFunctions_HW (1 << 2)
|
||||
|
|
@ -41,19 +41,19 @@ extern UInt32 g_Aes_SupportedFunctions_Flags;
|
|||
#endif
|
||||
|
||||
|
||||
#define DECLARE__AES_CODE_FUNC(funcName) \
|
||||
void MY_FAST_CALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
|
||||
#define Z7_DECLARE_AES_CODE_FUNC(funcName) \
|
||||
void Z7_FASTCALL funcName(UInt32 *ivAes, Byte *data, size_t numBlocks);
|
||||
|
||||
DECLARE__AES_CODE_FUNC (AesCbc_Encode)
|
||||
DECLARE__AES_CODE_FUNC (AesCbc_Decode)
|
||||
DECLARE__AES_CODE_FUNC (AesCtr_Code)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code)
|
||||
|
||||
DECLARE__AES_CODE_FUNC (AesCbc_Encode_HW)
|
||||
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW)
|
||||
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCbc_Encode_HW)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW)
|
||||
|
||||
DECLARE__AES_CODE_FUNC (AesCbc_Decode_HW_256)
|
||||
DECLARE__AES_CODE_FUNC (AesCtr_Code_HW_256)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCbc_Decode_HW_256)
|
||||
Z7_DECLARE_AES_CODE_FUNC (AesCtr_Code_HW_256)
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
706
C/AesOpt.c
706
C/AesOpt.c
File diff suppressed because it is too large
Load diff
330
C/Alloc.c
330
C/Alloc.c
|
|
@ -1,38 +1,53 @@
|
|||
/* Alloc.c -- Memory allocation functions
|
||||
2021-07-13 : Igor Pavlov : Public domain */
|
||||
2024-02-18 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#include "7zWindows.h"
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "Alloc.h"
|
||||
|
||||
/* #define _SZ_ALLOC_DEBUG */
|
||||
#if defined(Z7_LARGE_PAGES) && defined(_WIN32) && \
|
||||
(!defined(Z7_WIN32_WINNT_MIN) || Z7_WIN32_WINNT_MIN < 0x0502) // < Win2003 (xp-64)
|
||||
#define Z7_USE_DYN_GetLargePageMinimum
|
||||
#endif
|
||||
|
||||
/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
// for debug:
|
||||
#if 0
|
||||
#if defined(__CHERI__) && defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
|
||||
// #pragma message("=== Z7_ALLOC_NO_OFFSET_ALLOCATOR === ")
|
||||
#define Z7_ALLOC_NO_OFFSET_ALLOCATOR
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// #define SZ_ALLOC_DEBUG
|
||||
/* #define SZ_ALLOC_DEBUG */
|
||||
|
||||
/* use SZ_ALLOC_DEBUG to debug alloc/free operations */
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
int g_allocCount = 0;
|
||||
int g_allocCountMid = 0;
|
||||
int g_allocCountBig = 0;
|
||||
static int g_allocCount = 0;
|
||||
#ifdef _WIN32
|
||||
static int g_allocCountMid = 0;
|
||||
static int g_allocCountBig = 0;
|
||||
#endif
|
||||
|
||||
|
||||
#define CONVERT_INT_TO_STR(charType, tempSize) \
|
||||
unsigned char temp[tempSize]; unsigned i = 0; \
|
||||
while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
|
||||
char temp[tempSize]; unsigned i = 0; \
|
||||
while (val >= 10) { temp[i++] = (char)('0' + (unsigned)(val % 10)); val /= 10; } \
|
||||
*s++ = (charType)('0' + (unsigned)val); \
|
||||
while (i != 0) { i--; *s++ = temp[i]; } \
|
||||
*s = 0;
|
||||
|
||||
static void ConvertUInt64ToString(UInt64 val, char *s)
|
||||
{
|
||||
CONVERT_INT_TO_STR(char, 24);
|
||||
CONVERT_INT_TO_STR(char, 24)
|
||||
}
|
||||
|
||||
#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
|
||||
|
|
@ -77,7 +92,7 @@ static void PrintAligned(const char *s, size_t align)
|
|||
Print(s);
|
||||
}
|
||||
|
||||
static void PrintLn()
|
||||
static void PrintLn(void)
|
||||
{
|
||||
Print("\n");
|
||||
}
|
||||
|
|
@ -89,10 +104,10 @@ static void PrintHex(UInt64 v, size_t align)
|
|||
PrintAligned(s, align);
|
||||
}
|
||||
|
||||
static void PrintDec(UInt64 v, size_t align)
|
||||
static void PrintDec(int v, size_t align)
|
||||
{
|
||||
char s[32];
|
||||
ConvertUInt64ToString(v, s);
|
||||
ConvertUInt64ToString((unsigned)v, s);
|
||||
PrintAligned(s, align);
|
||||
}
|
||||
|
||||
|
|
@ -102,12 +117,19 @@ static void PrintAddr(void *p)
|
|||
}
|
||||
|
||||
|
||||
#define PRINT_ALLOC(name, cnt, size, ptr) \
|
||||
#define PRINT_REALLOC(name, cnt, size, ptr) { \
|
||||
Print(name " "); \
|
||||
if (!ptr) PrintDec(cnt++, 10); \
|
||||
PrintHex(size, 10); \
|
||||
PrintAddr(ptr); \
|
||||
PrintLn(); }
|
||||
|
||||
#define PRINT_ALLOC(name, cnt, size, ptr) { \
|
||||
Print(name " "); \
|
||||
PrintDec(cnt++, 10); \
|
||||
PrintHex(size, 10); \
|
||||
PrintAddr(ptr); \
|
||||
PrintLn();
|
||||
PrintLn(); }
|
||||
|
||||
#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
|
||||
Print(name " "); \
|
||||
|
|
@ -117,26 +139,45 @@ static void PrintAddr(void *p)
|
|||
|
||||
#else
|
||||
|
||||
#ifdef _WIN32
|
||||
#define PRINT_ALLOC(name, cnt, size, ptr)
|
||||
#endif
|
||||
#define PRINT_FREE(name, cnt, ptr)
|
||||
#define Print(s)
|
||||
#define PrintLn()
|
||||
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
|
||||
#define PrintHex(v, align)
|
||||
#endif
|
||||
#define PrintAddr(p)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
by specification:
|
||||
malloc(non_NULL, 0) : returns NULL or a unique pointer value that can later be successfully passed to free()
|
||||
realloc(NULL, size) : the call is equivalent to malloc(size)
|
||||
realloc(non_NULL, 0) : the call is equivalent to free(ptr)
|
||||
|
||||
in main compilers:
|
||||
malloc(0) : returns non_NULL
|
||||
realloc(NULL, 0) : returns non_NULL
|
||||
realloc(non_NULL, 0) : returns NULL
|
||||
*/
|
||||
|
||||
|
||||
void *MyAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return NULL;
|
||||
PRINT_ALLOC("Alloc ", g_allocCount, size, NULL);
|
||||
#ifdef _SZ_ALLOC_DEBUG
|
||||
// PRINT_ALLOC("Alloc ", g_allocCount, size, NULL)
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = malloc(size);
|
||||
// PRINT_ALLOC("Alloc ", g_allocCount, size, p);
|
||||
if (p)
|
||||
{
|
||||
PRINT_ALLOC("Alloc ", g_allocCount, size, p)
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
|
|
@ -146,71 +187,107 @@ void *MyAlloc(size_t size)
|
|||
|
||||
void MyFree(void *address)
|
||||
{
|
||||
PRINT_FREE("Free ", g_allocCount, address);
|
||||
PRINT_FREE("Free ", g_allocCount, address)
|
||||
|
||||
free(address);
|
||||
}
|
||||
|
||||
void *MyRealloc(void *address, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
{
|
||||
MyFree(address);
|
||||
return NULL;
|
||||
}
|
||||
// PRINT_REALLOC("Realloc ", g_allocCount, size, address)
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = realloc(address, size);
|
||||
if (p)
|
||||
{
|
||||
PRINT_REALLOC("Realloc ", g_allocCount, size, address)
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
return realloc(address, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
void *MidAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return NULL;
|
||||
|
||||
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
|
||||
|
||||
#ifdef SZ_ALLOC_DEBUG
|
||||
{
|
||||
void *p = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
if (p)
|
||||
{
|
||||
PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, p)
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#else
|
||||
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
#endif
|
||||
}
|
||||
|
||||
void MidFree(void *address)
|
||||
{
|
||||
PRINT_FREE("Free-Mid", g_allocCountMid, address);
|
||||
PRINT_FREE("Free-Mid", g_allocCountMid, address)
|
||||
|
||||
if (!address)
|
||||
return;
|
||||
VirtualFree(address, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
|
||||
#ifdef MEM_LARGE_PAGES
|
||||
#define MY__MEM_LARGE_PAGES MEM_LARGE_PAGES
|
||||
#define MY_MEM_LARGE_PAGES MEM_LARGE_PAGES
|
||||
#else
|
||||
#define MY__MEM_LARGE_PAGES 0x20000000
|
||||
#define MY_MEM_LARGE_PAGES 0x20000000
|
||||
#endif
|
||||
|
||||
extern
|
||||
SIZE_T g_LargePageSize;
|
||||
SIZE_T g_LargePageSize = 0;
|
||||
typedef SIZE_T (WINAPI *GetLargePageMinimumP)(VOID);
|
||||
typedef SIZE_T (WINAPI *Func_GetLargePageMinimum)(VOID);
|
||||
|
||||
#endif // _7ZIP_LARGE_PAGES
|
||||
|
||||
void SetLargePageSize()
|
||||
void SetLargePageSize(void)
|
||||
{
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
SIZE_T size;
|
||||
GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
|
||||
GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
|
||||
if (!largePageMinimum)
|
||||
#ifdef Z7_USE_DYN_GetLargePageMinimum
|
||||
Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
|
||||
|
||||
const
|
||||
Func_GetLargePageMinimum fn =
|
||||
(Func_GetLargePageMinimum) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
|
||||
"GetLargePageMinimum");
|
||||
if (!fn)
|
||||
return;
|
||||
size = largePageMinimum();
|
||||
size = fn();
|
||||
#else
|
||||
size = GetLargePageMinimum();
|
||||
#endif
|
||||
if (size == 0 || (size & (size - 1)) != 0)
|
||||
return;
|
||||
g_LargePageSize = size;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // Z7_LARGE_PAGES
|
||||
|
||||
void *BigAlloc(size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return NULL;
|
||||
|
||||
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
|
||||
|
||||
#ifdef _7ZIP_LARGE_PAGES
|
||||
PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL)
|
||||
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
{
|
||||
SIZE_T ps = g_LargePageSize;
|
||||
if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
|
||||
|
|
@ -220,56 +297,43 @@ void *BigAlloc(size_t size)
|
|||
size2 = (size + ps) & ~ps;
|
||||
if (size2 >= size)
|
||||
{
|
||||
void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MY__MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (res)
|
||||
return res;
|
||||
void *p = VirtualAlloc(NULL, size2, MEM_COMMIT | MY_MEM_LARGE_PAGES, PAGE_READWRITE);
|
||||
if (p)
|
||||
{
|
||||
PRINT_ALLOC("Alloc-BM ", g_allocCountMid, size2, p)
|
||||
return p;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
|
||||
return MidAlloc(size);
|
||||
}
|
||||
|
||||
void BigFree(void *address)
|
||||
{
|
||||
PRINT_FREE("Free-Big", g_allocCountBig, address);
|
||||
|
||||
if (!address)
|
||||
return;
|
||||
VirtualFree(address, 0, MEM_RELEASE);
|
||||
PRINT_FREE("Free-Big", g_allocCountBig, address)
|
||||
MidFree(address);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
|
||||
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
|
||||
static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MyAlloc(size); }
|
||||
static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MyFree(address); }
|
||||
const ISzAlloc g_Alloc = { SzAlloc, SzFree };
|
||||
|
||||
#ifdef _WIN32
|
||||
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
|
||||
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
|
||||
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
|
||||
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
|
||||
static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return MidAlloc(size); }
|
||||
static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) MidFree(address); }
|
||||
static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p) return BigAlloc(size); }
|
||||
static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p) BigFree(address); }
|
||||
const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
|
||||
const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
|
||||
#endif
|
||||
|
||||
/*
|
||||
uintptr_t : <stdint.h> C99 (optional)
|
||||
: unsupported in VS6
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
typedef UINT_PTR UIntPtr;
|
||||
#else
|
||||
/*
|
||||
typedef uintptr_t UIntPtr;
|
||||
*/
|
||||
typedef ptrdiff_t UIntPtr;
|
||||
#endif
|
||||
|
||||
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
|
||||
|
||||
#define ADJUST_ALLOC_SIZE 0
|
||||
/*
|
||||
|
|
@ -280,14 +344,36 @@ const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
|
|||
MyAlloc() can return address that is NOT multiple of sizeof(void *).
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
|
||||
uintptr_t : <stdint.h> C99 (optional)
|
||||
: unsupported in VS6
|
||||
*/
|
||||
#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
|
||||
typedef
|
||||
#ifdef _WIN32
|
||||
UINT_PTR
|
||||
#elif 1
|
||||
uintptr_t
|
||||
#else
|
||||
ptrdiff_t
|
||||
#endif
|
||||
MY_uintptr_t;
|
||||
|
||||
#if 0 \
|
||||
|| (defined(__CHERI__) \
|
||||
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ > 8))
|
||||
// for 128-bit pointers (cheri):
|
||||
#define MY_ALIGN_PTR_DOWN(p, align) \
|
||||
((void *)((char *)(p) - ((size_t)(MY_uintptr_t)(p) & ((align) - 1))))
|
||||
#else
|
||||
#define MY_ALIGN_PTR_DOWN(p, align) \
|
||||
((void *)((((MY_uintptr_t)(p)) & ~((MY_uintptr_t)(align) - 1))))
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) && defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L)
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32) \
|
||||
&& (defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR) \
|
||||
|| defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L))
|
||||
#define USE_posix_memalign
|
||||
#endif
|
||||
|
||||
|
|
@ -327,14 +413,13 @@ static int posix_memalign(void **ptr, size_t align, size_t size)
|
|||
|
||||
#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
|
||||
|
||||
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
|
||||
void *z7_AlignedAlloc(size_t size)
|
||||
{
|
||||
#ifndef USE_posix_memalign
|
||||
#ifndef USE_posix_memalign
|
||||
|
||||
void *p;
|
||||
void *pAligned;
|
||||
size_t newSize;
|
||||
UNUSED_VAR(pp);
|
||||
|
||||
/* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
|
||||
block to prevent cache line sharing with another allocated blocks */
|
||||
|
|
@ -359,10 +444,9 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
|
|||
|
||||
return pAligned;
|
||||
|
||||
#else
|
||||
#else
|
||||
|
||||
void *p;
|
||||
UNUSED_VAR(pp);
|
||||
if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
|
||||
return NULL;
|
||||
|
||||
|
|
@ -371,19 +455,37 @@ static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
|
|||
|
||||
return p;
|
||||
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void z7_AlignedFree(void *address)
|
||||
{
|
||||
#ifndef USE_posix_memalign
|
||||
if (address)
|
||||
MyFree(((void **)address)[-1]);
|
||||
#else
|
||||
free(address);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
|
||||
{
|
||||
UNUSED_VAR(pp)
|
||||
return z7_AlignedAlloc(size);
|
||||
}
|
||||
|
||||
|
||||
static void SzAlignedFree(ISzAllocPtr pp, void *address)
|
||||
{
|
||||
UNUSED_VAR(pp);
|
||||
#ifndef USE_posix_memalign
|
||||
UNUSED_VAR(pp)
|
||||
#ifndef USE_posix_memalign
|
||||
if (address)
|
||||
MyFree(((void **)address)[-1]);
|
||||
#else
|
||||
#else
|
||||
free(address);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -391,17 +493,45 @@ const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
|
|||
|
||||
|
||||
|
||||
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
|
||||
|
||||
/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
|
||||
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
|
||||
/*
|
||||
#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
|
||||
*/
|
||||
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
|
||||
#if 1
|
||||
#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
|
||||
#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
|
||||
#else
|
||||
// we can use this simplified code,
|
||||
// if (CAlignOffsetAlloc::offset == (k * sizeof(void *))
|
||||
#define REAL_BLOCK_PTR_VAR(p) (((void **)(p))[-1])
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if 0
|
||||
#ifndef Z7_ALLOC_NO_OFFSET_ALLOCATOR
|
||||
#include <stdio.h>
|
||||
static void PrintPtr(const char *s, const void *p)
|
||||
{
|
||||
const Byte *p2 = (const Byte *)&p;
|
||||
unsigned i;
|
||||
printf("%s %p ", s, p);
|
||||
for (i = sizeof(p); i != 0;)
|
||||
{
|
||||
i--;
|
||||
printf("%02x", p2[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
|
||||
{
|
||||
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
|
||||
#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
|
||||
UNUSED_VAR(pp)
|
||||
return z7_AlignedAlloc(size);
|
||||
#else
|
||||
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
|
||||
void *adr;
|
||||
void *pAligned;
|
||||
size_t newSize;
|
||||
|
|
@ -429,6 +559,12 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
|
|||
pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
|
||||
alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
|
||||
|
||||
#if 0
|
||||
printf("\nalignSize = %6x, offset=%6x, size=%8x \n", (unsigned)alignSize, (unsigned)p->offset, (unsigned)size);
|
||||
PrintPtr("base", adr);
|
||||
PrintPtr("alig", pAligned);
|
||||
#endif
|
||||
|
||||
PrintLn();
|
||||
Print("- Aligned: ");
|
||||
Print(" size="); PrintHex(size, 8);
|
||||
|
|
@ -440,19 +576,25 @@ static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
|
|||
REAL_BLOCK_PTR_VAR(pAligned) = adr;
|
||||
|
||||
return pAligned;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
|
||||
{
|
||||
#if defined(Z7_ALLOC_NO_OFFSET_ALLOCATOR)
|
||||
UNUSED_VAR(pp)
|
||||
z7_AlignedFree(address);
|
||||
#else
|
||||
if (address)
|
||||
{
|
||||
CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
|
||||
const CAlignOffsetAlloc *p = Z7_CONTAINER_FROM_VTBL_CONST(pp, CAlignOffsetAlloc, vt);
|
||||
PrintLn();
|
||||
Print("- Aligned Free: ");
|
||||
PrintLn();
|
||||
ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
32
C/Alloc.h
32
C/Alloc.h
|
|
@ -1,31 +1,49 @@
|
|||
/* Alloc.h -- Memory allocation functions
|
||||
2021-07-13 : Igor Pavlov : Public domain */
|
||||
2024-01-22 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __COMMON_ALLOC_H
|
||||
#define __COMMON_ALLOC_H
|
||||
#ifndef ZIP7_INC_ALLOC_H
|
||||
#define ZIP7_INC_ALLOC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/*
|
||||
MyFree(NULL) : is allowed, as free(NULL)
|
||||
MyAlloc(0) : returns NULL : but malloc(0) is allowed to return NULL or non_NULL
|
||||
MyRealloc(NULL, 0) : returns NULL : but realloc(NULL, 0) is allowed to return NULL or non_NULL
|
||||
MyRealloc() is similar to realloc() for the following cases:
|
||||
MyRealloc(non_NULL, 0) : returns NULL and always calls MyFree(ptr)
|
||||
MyRealloc(NULL, non_ZERO) : returns NULL, if allocation failed
|
||||
MyRealloc(non_NULL, non_ZERO) : returns NULL, if reallocation failed
|
||||
*/
|
||||
|
||||
void *MyAlloc(size_t size);
|
||||
void MyFree(void *address);
|
||||
void *MyRealloc(void *address, size_t size);
|
||||
|
||||
void *z7_AlignedAlloc(size_t size);
|
||||
void z7_AlignedFree(void *p);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#ifdef Z7_LARGE_PAGES
|
||||
void SetLargePageSize(void);
|
||||
#endif
|
||||
|
||||
void *MidAlloc(size_t size);
|
||||
void MidFree(void *address);
|
||||
void *BigAlloc(size_t size);
|
||||
void BigFree(void *address);
|
||||
|
||||
/* #define Z7_BIG_ALLOC_IS_ZERO_FILLED */
|
||||
|
||||
#else
|
||||
|
||||
#define MidAlloc(size) MyAlloc(size)
|
||||
#define MidFree(address) MyFree(address)
|
||||
#define BigAlloc(size) MyAlloc(size)
|
||||
#define BigFree(address) MyFree(address)
|
||||
#define MidAlloc(size) z7_AlignedAlloc(size)
|
||||
#define MidFree(address) z7_AlignedFree(address)
|
||||
#define BigAlloc(size) z7_AlignedAlloc(size)
|
||||
#define BigFree(address) z7_AlignedFree(address)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
12
C/Asm_c.mak
Normal file
12
C/Asm_c.mak
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
!IFDEF ASM_OBJS
|
||||
!IF "$(PLATFORM)" == "arm64"
|
||||
$(ASM_OBJS): ../../../Asm/arm64/$(*B).S
|
||||
$(COMPL_ASM_CLANG)
|
||||
!ELSEIF "$(PLATFORM)" == "arm"
|
||||
$(ASM_OBJS): ../../../Asm/arm/$(*B).asm
|
||||
$(COMPL_ASM)
|
||||
!ELSEIF "$(PLATFORM)" != "ia64" && "$(PLATFORM)" != "mips"
|
||||
$(ASM_OBJS): ../../../Asm/x86/$(*B).asm
|
||||
$(COMPL_ASM)
|
||||
!ENDIF
|
||||
!ENDIF
|
||||
325
C/Bcj2.c
325
C/Bcj2.c
|
|
@ -1,29 +1,24 @@
|
|||
/* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
2023-03-01 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "Bcj2.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define CProb UInt16
|
||||
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
#define kNumModelBits 11
|
||||
#define kBitModelTotal (1 << kNumModelBits)
|
||||
#define kNumBitModelTotalBits 11
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
#define kNumMoveBits 5
|
||||
|
||||
#define _IF_BIT_0 ttt = *prob; bound = (p->range >> kNumModelBits) * ttt; if (p->code < bound)
|
||||
#define _UPDATE_0 p->range = bound; *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
#define _UPDATE_1 p->range -= bound; p->code -= bound; *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
|
||||
// UInt32 bcj2_stats[256 + 2][2];
|
||||
|
||||
void Bcj2Dec_Init(CBcj2Dec *p)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
p->state = BCJ2_DEC_STATE_OK;
|
||||
p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
|
||||
p->ip = 0;
|
||||
p->temp[3] = 0;
|
||||
p->temp = 0;
|
||||
p->range = 0;
|
||||
p->code = 0;
|
||||
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
|
||||
|
|
@ -32,217 +27,248 @@ void Bcj2Dec_Init(CBcj2Dec *p)
|
|||
|
||||
SRes Bcj2Dec_Decode(CBcj2Dec *p)
|
||||
{
|
||||
UInt32 v = p->temp;
|
||||
// const Byte *src;
|
||||
if (p->range <= 5)
|
||||
{
|
||||
p->state = BCJ2_DEC_STATE_OK;
|
||||
UInt32 code = p->code;
|
||||
p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
|
||||
for (; p->range != 5; p->range++)
|
||||
{
|
||||
if (p->range == 1 && p->code != 0)
|
||||
if (p->range == 1 && code != 0)
|
||||
return SZ_ERROR_DATA;
|
||||
|
||||
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
|
||||
{
|
||||
p->state = BCJ2_STREAM_RC;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
||||
code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
||||
p->code = code;
|
||||
}
|
||||
|
||||
if (p->code == 0xFFFFFFFF)
|
||||
if (code == 0xffffffff)
|
||||
return SZ_ERROR_DATA;
|
||||
|
||||
p->range = 0xFFFFFFFF;
|
||||
p->range = 0xffffffff;
|
||||
}
|
||||
else if (p->state >= BCJ2_DEC_STATE_ORIG_0)
|
||||
// else
|
||||
{
|
||||
while (p->state <= BCJ2_DEC_STATE_ORIG_3)
|
||||
unsigned state = p->state;
|
||||
// we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
|
||||
if (BCJ2_IS_32BIT_STREAM(state))
|
||||
{
|
||||
const Byte *cur = p->bufs[state];
|
||||
if (cur == p->lims[state])
|
||||
return SZ_OK;
|
||||
p->bufs[state] = cur + 4;
|
||||
{
|
||||
const UInt32 ip = p->ip + 4;
|
||||
v = GetBe32a(cur) - ip;
|
||||
p->ip = ip;
|
||||
}
|
||||
state = BCJ2_DEC_STATE_ORIG_0;
|
||||
}
|
||||
if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
|
||||
{
|
||||
Byte *dest = p->dest;
|
||||
if (dest == p->destLim)
|
||||
return SZ_OK;
|
||||
*dest = p->temp[(size_t)p->state - BCJ2_DEC_STATE_ORIG_0];
|
||||
p->state++;
|
||||
p->dest = dest + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if (BCJ2_IS_32BIT_STREAM(p->state))
|
||||
{
|
||||
const Byte *cur = p->bufs[p->state];
|
||||
if (cur == p->lims[p->state])
|
||||
return SZ_OK;
|
||||
p->bufs[p->state] = cur + 4;
|
||||
|
||||
{
|
||||
UInt32 val;
|
||||
Byte *dest;
|
||||
SizeT rem;
|
||||
|
||||
p->ip += 4;
|
||||
val = GetBe32(cur) - p->ip;
|
||||
dest = p->dest;
|
||||
rem = p->destLim - dest;
|
||||
if (rem < 4)
|
||||
for (;;)
|
||||
{
|
||||
SizeT i;
|
||||
SetUi32(p->temp, val);
|
||||
for (i = 0; i < rem; i++)
|
||||
dest[i] = p->temp[i];
|
||||
p->dest = dest + rem;
|
||||
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
|
||||
return SZ_OK;
|
||||
if (dest == p->destLim)
|
||||
{
|
||||
p->state = state;
|
||||
p->temp = v;
|
||||
return SZ_OK;
|
||||
}
|
||||
*dest++ = (Byte)v;
|
||||
p->dest = dest;
|
||||
if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
|
||||
break;
|
||||
v >>= 8;
|
||||
}
|
||||
SetUi32(dest, val);
|
||||
p->temp[3] = (Byte)(val >> 24);
|
||||
p->dest = dest + 4;
|
||||
p->state = BCJ2_DEC_STATE_OK;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// src = p->bufs[BCJ2_STREAM_MAIN];
|
||||
for (;;)
|
||||
{
|
||||
/*
|
||||
if (BCJ2_IS_32BIT_STREAM(p->state))
|
||||
p->state = BCJ2_DEC_STATE_OK;
|
||||
else
|
||||
*/
|
||||
{
|
||||
if (p->range < kTopValue)
|
||||
{
|
||||
if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
|
||||
{
|
||||
p->state = BCJ2_STREAM_RC;
|
||||
p->temp = v;
|
||||
return SZ_OK;
|
||||
}
|
||||
p->range <<= 8;
|
||||
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
||||
}
|
||||
|
||||
{
|
||||
const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
|
||||
const Byte *srcLim;
|
||||
Byte *dest;
|
||||
SizeT num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
|
||||
|
||||
if (num == 0)
|
||||
Byte *dest = p->dest;
|
||||
{
|
||||
p->state = BCJ2_STREAM_MAIN;
|
||||
return SZ_OK;
|
||||
const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
|
||||
SizeT num = (SizeT)(p->destLim - dest);
|
||||
if (num >= rem)
|
||||
num = rem;
|
||||
#define NUM_ITERS 4
|
||||
#if (NUM_ITERS & (NUM_ITERS - 1)) == 0
|
||||
num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
|
||||
#else
|
||||
num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
|
||||
#endif
|
||||
srcLim = src + num;
|
||||
}
|
||||
|
||||
dest = p->dest;
|
||||
if (num > (SizeT)(p->destLim - dest))
|
||||
|
||||
#define NUM_SHIFT_BITS 24
|
||||
#define ONE_ITER(indx) { \
|
||||
const unsigned b = src[indx]; \
|
||||
*dest++ = (Byte)b; \
|
||||
v = (v << NUM_SHIFT_BITS) | b; \
|
||||
if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
|
||||
if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
|
||||
((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
|
||||
/* ++dest */; /* v = b; */ }
|
||||
|
||||
if (src != srcLim)
|
||||
for (;;)
|
||||
{
|
||||
num = (SizeT)(p->destLim - dest);
|
||||
if (num == 0)
|
||||
/* The dependency chain of 2-cycle for (v) calculation is not big problem here.
|
||||
But we can remove dependency chain with v = b in the end of loop. */
|
||||
ONE_ITER(0)
|
||||
#if (NUM_ITERS > 1)
|
||||
ONE_ITER(1)
|
||||
#if (NUM_ITERS > 2)
|
||||
ONE_ITER(2)
|
||||
#if (NUM_ITERS > 3)
|
||||
ONE_ITER(3)
|
||||
#if (NUM_ITERS > 4)
|
||||
ONE_ITER(4)
|
||||
#if (NUM_ITERS > 5)
|
||||
ONE_ITER(5)
|
||||
#if (NUM_ITERS > 6)
|
||||
ONE_ITER(6)
|
||||
#if (NUM_ITERS > 7)
|
||||
ONE_ITER(7)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
src += NUM_ITERS;
|
||||
if (src == srcLim)
|
||||
break;
|
||||
}
|
||||
|
||||
if (src == srcLim)
|
||||
#if (NUM_ITERS > 1)
|
||||
for (;;)
|
||||
#endif
|
||||
{
|
||||
#if (NUM_ITERS > 1)
|
||||
if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
|
||||
#endif
|
||||
{
|
||||
p->state = BCJ2_DEC_STATE_ORIG;
|
||||
const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
|
||||
p->bufs[BCJ2_STREAM_MAIN] = src;
|
||||
p->dest = dest;
|
||||
p->ip += (UInt32)num;
|
||||
/* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
|
||||
p->state =
|
||||
src == p->lims[BCJ2_STREAM_MAIN] ?
|
||||
(unsigned)BCJ2_STREAM_MAIN :
|
||||
(unsigned)BCJ2_DEC_STATE_ORIG;
|
||||
p->temp = v;
|
||||
return SZ_OK;
|
||||
}
|
||||
#if (NUM_ITERS > 1)
|
||||
ONE_ITER(0)
|
||||
src++;
|
||||
#endif
|
||||
}
|
||||
|
||||
srcLim = src + num;
|
||||
|
||||
if (p->temp[3] == 0x0F && (src[0] & 0xF0) == 0x80)
|
||||
*dest = src[0];
|
||||
else for (;;)
|
||||
{
|
||||
Byte b = *src;
|
||||
*dest = b;
|
||||
if (b != 0x0F)
|
||||
{
|
||||
if ((b & 0xFE) == 0xE8)
|
||||
break;
|
||||
dest++;
|
||||
if (++src != srcLim)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
dest++;
|
||||
if (++src == srcLim)
|
||||
break;
|
||||
if ((*src & 0xF0) != 0x80)
|
||||
continue;
|
||||
*dest = *src;
|
||||
break;
|
||||
}
|
||||
|
||||
num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
|
||||
|
||||
if (src == srcLim)
|
||||
{
|
||||
p->temp[3] = src[-1];
|
||||
p->bufs[BCJ2_STREAM_MAIN] = src;
|
||||
const SizeT num = (SizeT)(dest - p->dest);
|
||||
p->dest = dest; // p->dest += num;
|
||||
p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
|
||||
p->ip += (UInt32)num;
|
||||
p->dest += num;
|
||||
p->state =
|
||||
p->bufs[BCJ2_STREAM_MAIN] ==
|
||||
p->lims[BCJ2_STREAM_MAIN] ?
|
||||
(unsigned)BCJ2_STREAM_MAIN :
|
||||
(unsigned)BCJ2_DEC_STATE_ORIG;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 bound, ttt;
|
||||
CProb *prob;
|
||||
Byte b = src[0];
|
||||
Byte prev = (Byte)(num == 0 ? p->temp[3] : src[-1]);
|
||||
|
||||
p->temp[3] = b;
|
||||
p->bufs[BCJ2_STREAM_MAIN] = src + 1;
|
||||
num++;
|
||||
p->ip += (UInt32)num;
|
||||
p->dest += num;
|
||||
|
||||
prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)prev : (b == 0xE9 ? 1 : 0));
|
||||
|
||||
_IF_BIT_0
|
||||
CBcj2Prob *prob; // unsigned index;
|
||||
/*
|
||||
prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
|
||||
2 + (Byte)(v >> 8) :
|
||||
((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
|
||||
*/
|
||||
{
|
||||
_UPDATE_0
|
||||
const unsigned c = ((v + 0x17) >> 6) & 1;
|
||||
prob = p->probs + (unsigned)
|
||||
(((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
|
||||
// (Byte)
|
||||
// 8x->0 : e9->1 : xxe8->xx+2
|
||||
// 8x->0x100 : e9->0x101 : xxe8->xx
|
||||
// (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
|
||||
// (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
|
||||
}
|
||||
ttt = *prob;
|
||||
bound = (p->range >> kNumBitModelTotalBits) * ttt;
|
||||
if (p->code < bound)
|
||||
{
|
||||
// bcj2_stats[prob - p->probs][0]++;
|
||||
p->range = bound;
|
||||
*prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
continue;
|
||||
}
|
||||
_UPDATE_1
|
||||
|
||||
{
|
||||
// bcj2_stats[prob - p->probs][1]++;
|
||||
p->range -= bound;
|
||||
p->code -= bound;
|
||||
*prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 val;
|
||||
unsigned cj = (p->temp[3] == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
|
||||
/* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
|
||||
// const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
|
||||
const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
|
||||
const Byte *cur = p->bufs[cj];
|
||||
Byte *dest;
|
||||
SizeT rem;
|
||||
|
||||
if (cur == p->lims[cj])
|
||||
{
|
||||
p->state = cj;
|
||||
break;
|
||||
}
|
||||
|
||||
val = GetBe32(cur);
|
||||
v = GetBe32a(cur);
|
||||
p->bufs[cj] = cur + 4;
|
||||
|
||||
p->ip += 4;
|
||||
val -= p->ip;
|
||||
{
|
||||
const UInt32 ip = p->ip + 4;
|
||||
v -= ip;
|
||||
p->ip = ip;
|
||||
}
|
||||
dest = p->dest;
|
||||
rem = (SizeT)(p->destLim - dest);
|
||||
|
||||
if (rem < 4)
|
||||
{
|
||||
p->temp[0] = (Byte)val; if (rem > 0) dest[0] = (Byte)val; val >>= 8;
|
||||
p->temp[1] = (Byte)val; if (rem > 1) dest[1] = (Byte)val; val >>= 8;
|
||||
p->temp[2] = (Byte)val; if (rem > 2) dest[2] = (Byte)val; val >>= 8;
|
||||
p->temp[3] = (Byte)val;
|
||||
if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
|
||||
if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
|
||||
if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
|
||||
p->temp = v;
|
||||
p->dest = dest + rem;
|
||||
p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
|
||||
break;
|
||||
}
|
||||
|
||||
SetUi32(dest, val);
|
||||
p->temp[3] = (Byte)(val >> 24);
|
||||
SetUi32(dest, v)
|
||||
v >>= 24;
|
||||
p->dest = dest + 4;
|
||||
}
|
||||
}
|
||||
|
|
@ -252,6 +278,13 @@ SRes Bcj2Dec_Decode(CBcj2Dec *p)
|
|||
p->range <<= 8;
|
||||
p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
|
||||
}
|
||||
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
#undef NUM_ITERS
|
||||
#undef ONE_ITER
|
||||
#undef NUM_SHIFT_BITS
|
||||
#undef kTopValue
|
||||
#undef kNumBitModelTotalBits
|
||||
#undef kBitModelTotal
|
||||
#undef kNumMoveBits
|
||||
|
|
|
|||
270
C/Bcj2.h
270
C/Bcj2.h
|
|
@ -1,8 +1,8 @@
|
|||
/* Bcj2.h -- BCJ2 Converter for x86 code
|
||||
2014-11-10 : Igor Pavlov : Public domain */
|
||||
/* Bcj2.h -- BCJ2 converter for x86 code (Branch CALL/JUMP variant2)
|
||||
2023-03-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __BCJ2_H
|
||||
#define __BCJ2_H
|
||||
#ifndef ZIP7_INC_BCJ2_H
|
||||
#define ZIP7_INC_BCJ2_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -26,37 +26,68 @@ enum
|
|||
BCJ2_DEC_STATE_ORIG_3,
|
||||
|
||||
BCJ2_DEC_STATE_ORIG,
|
||||
BCJ2_DEC_STATE_OK
|
||||
BCJ2_DEC_STATE_ERROR /* after detected data error */
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
BCJ2_ENC_STATE_ORIG = BCJ2_NUM_STREAMS,
|
||||
BCJ2_ENC_STATE_OK
|
||||
BCJ2_ENC_STATE_FINISHED /* it's state after fully encoded stream */
|
||||
};
|
||||
|
||||
|
||||
#define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP)
|
||||
/* #define BCJ2_IS_32BIT_STREAM(s) ((s) == BCJ2_STREAM_CALL || (s) == BCJ2_STREAM_JUMP) */
|
||||
#define BCJ2_IS_32BIT_STREAM(s) ((unsigned)((unsigned)(s) - (unsigned)BCJ2_STREAM_CALL) < 2)
|
||||
|
||||
/*
|
||||
CBcj2Dec / CBcj2Enc
|
||||
bufs sizes:
|
||||
BUF_SIZE(n) = lims[n] - bufs[n]
|
||||
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be mutliply of 4:
|
||||
bufs sizes for BCJ2_STREAM_CALL and BCJ2_STREAM_JUMP must be multiply of 4:
|
||||
(BUF_SIZE(BCJ2_STREAM_CALL) & 3) == 0
|
||||
(BUF_SIZE(BCJ2_STREAM_JUMP) & 3) == 0
|
||||
*/
|
||||
|
||||
// typedef UInt32 CBcj2Prob;
|
||||
typedef UInt16 CBcj2Prob;
|
||||
|
||||
/*
|
||||
BCJ2 encoder / decoder internal requirements:
|
||||
- If last bytes of stream contain marker (e8/e8/0f8x), then
|
||||
there is also encoded symbol (0 : no conversion) in RC stream.
|
||||
- One case of overlapped instructions is supported,
|
||||
if last byte of converted instruction is (0f) and next byte is (8x):
|
||||
marker [xx xx xx 0f] 8x
|
||||
then the pair (0f 8x) is treated as marker.
|
||||
*/
|
||||
|
||||
/* ---------- BCJ2 Decoder ---------- */
|
||||
|
||||
/*
|
||||
CBcj2Dec:
|
||||
dest is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
|
||||
(dest) is allowed to overlap with bufs[BCJ2_STREAM_MAIN], with the following conditions:
|
||||
bufs[BCJ2_STREAM_MAIN] >= dest &&
|
||||
bufs[BCJ2_STREAM_MAIN] - dest >= tempReserv +
|
||||
bufs[BCJ2_STREAM_MAIN] - dest >=
|
||||
BUF_SIZE(BCJ2_STREAM_CALL) +
|
||||
BUF_SIZE(BCJ2_STREAM_JUMP)
|
||||
tempReserv = 0 : for first call of Bcj2Dec_Decode
|
||||
tempReserv = 4 : for any other calls of Bcj2Dec_Decode
|
||||
overlap with offset = 1 is not allowed
|
||||
reserve = bufs[BCJ2_STREAM_MAIN] - dest -
|
||||
( BUF_SIZE(BCJ2_STREAM_CALL) +
|
||||
BUF_SIZE(BCJ2_STREAM_JUMP) )
|
||||
and additional conditions:
|
||||
if (it's first call of Bcj2Dec_Decode() after Bcj2Dec_Init())
|
||||
{
|
||||
(reserve != 1) : if (ver < v23.00)
|
||||
}
|
||||
else // if there are more than one calls of Bcj2Dec_Decode() after Bcj2Dec_Init())
|
||||
{
|
||||
(reserve >= 6) : if (ver < v23.00)
|
||||
(reserve >= 4) : if (ver >= v23.00)
|
||||
We need that (reserve) because after first call of Bcj2Dec_Decode(),
|
||||
CBcj2Dec::temp can contain up to 4 bytes for writing to (dest).
|
||||
}
|
||||
(reserve == 0) is allowed, if we decode full stream via single call of Bcj2Dec_Decode().
|
||||
(reserve == 0) also is allowed in case of multi-call, if we use fixed buffers,
|
||||
and (reserve) is calculated from full (final) sizes of all streams before first call.
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
|
|
@ -68,22 +99,66 @@ typedef struct
|
|||
|
||||
unsigned state; /* BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
|
||||
|
||||
UInt32 ip;
|
||||
Byte temp[4];
|
||||
UInt32 ip; /* property of starting base for decoding */
|
||||
UInt32 temp; /* Byte temp[4]; */
|
||||
UInt32 range;
|
||||
UInt32 code;
|
||||
UInt16 probs[2 + 256];
|
||||
CBcj2Prob probs[2 + 256];
|
||||
} CBcj2Dec;
|
||||
|
||||
|
||||
/* Note:
|
||||
Bcj2Dec_Init() sets (CBcj2Dec::ip = 0)
|
||||
if (ip != 0) property is required, the caller must set CBcj2Dec::ip after Bcj2Dec_Init()
|
||||
*/
|
||||
void Bcj2Dec_Init(CBcj2Dec *p);
|
||||
|
||||
/* Returns: SZ_OK or SZ_ERROR_DATA */
|
||||
|
||||
/* Bcj2Dec_Decode():
|
||||
returns:
|
||||
SZ_OK
|
||||
SZ_ERROR_DATA : if data in 5 starting bytes of BCJ2_STREAM_RC stream are not correct
|
||||
*/
|
||||
SRes Bcj2Dec_Decode(CBcj2Dec *p);
|
||||
|
||||
#define Bcj2Dec_IsFinished(_p_) ((_p_)->code == 0)
|
||||
/* To check that decoding was finished you can compare
|
||||
sizes of processed streams with sizes known from another sources.
|
||||
You must do at least one mandatory check from the two following options:
|
||||
- the check for size of processed output (ORIG) stream.
|
||||
- the check for size of processed input (MAIN) stream.
|
||||
additional optional checks:
|
||||
- the checks for processed sizes of all input streams (MAIN, CALL, JUMP, RC)
|
||||
- the checks Bcj2Dec_IsMaybeFinished*()
|
||||
also before actual decoding you can check that the
|
||||
following condition is met for stream sizes:
|
||||
( size(ORIG) == size(MAIN) + size(CALL) + size(JUMP) )
|
||||
*/
|
||||
|
||||
/* (state == BCJ2_STREAM_MAIN) means that decoder is ready for
|
||||
additional input data in BCJ2_STREAM_MAIN stream.
|
||||
Note that (state == BCJ2_STREAM_MAIN) is allowed for non-finished decoding.
|
||||
*/
|
||||
#define Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) ((_p_)->state == BCJ2_STREAM_MAIN)
|
||||
|
||||
/* if the stream decoding was finished correctly, then range decoder
|
||||
part of CBcj2Dec also was finished, and then (CBcj2Dec::code == 0).
|
||||
Note that (CBcj2Dec::code == 0) is allowed for non-finished decoding.
|
||||
*/
|
||||
#define Bcj2Dec_IsMaybeFinished_code(_p_) ((_p_)->code == 0)
|
||||
|
||||
/* use Bcj2Dec_IsMaybeFinished() only as additional check
|
||||
after at least one mandatory check from the two following options:
|
||||
- the check for size of processed output (ORIG) stream.
|
||||
- the check for size of processed input (MAIN) stream.
|
||||
*/
|
||||
#define Bcj2Dec_IsMaybeFinished(_p_) ( \
|
||||
Bcj2Dec_IsMaybeFinished_state_MAIN(_p_) && \
|
||||
Bcj2Dec_IsMaybeFinished_code(_p_))
|
||||
|
||||
|
||||
|
||||
/* ---------- BCJ2 Encoder ---------- */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BCJ2_ENC_FINISH_MODE_CONTINUE,
|
||||
|
|
@ -91,6 +166,91 @@ typedef enum
|
|||
BCJ2_ENC_FINISH_MODE_END_STREAM
|
||||
} EBcj2Enc_FinishMode;
|
||||
|
||||
/*
|
||||
BCJ2_ENC_FINISH_MODE_CONTINUE:
|
||||
process non finished encoding.
|
||||
It notifies the encoder that additional further calls
|
||||
can provide more input data (src) than provided by current call.
|
||||
In that case the CBcj2Enc encoder still can move (src) pointer
|
||||
up to (srcLim), but CBcj2Enc encoder can store some of the last
|
||||
processed bytes (up to 4 bytes) from src to internal CBcj2Enc::temp[] buffer.
|
||||
at return:
|
||||
(CBcj2Enc::src will point to position that includes
|
||||
processed data and data copied to (temp[]) buffer)
|
||||
That data from (temp[]) buffer will be used in further calls.
|
||||
|
||||
BCJ2_ENC_FINISH_MODE_END_BLOCK:
|
||||
finish encoding of current block (ended at srcLim) without RC flushing.
|
||||
at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_ORIG) &&
|
||||
CBcj2Enc::src == CBcj2Enc::srcLim)
|
||||
: it shows that block encoding was finished. And the encoder is
|
||||
ready for new (src) data or for stream finish operation.
|
||||
finished block means
|
||||
{
|
||||
CBcj2Enc has completed block encoding up to (srcLim).
|
||||
(1 + 4 bytes) or (2 + 4 bytes) CALL/JUMP cortages will
|
||||
not cross block boundary at (srcLim).
|
||||
temporary CBcj2Enc buffer for (ORIG) src data is empty.
|
||||
3 output uncompressed streams (MAIN, CALL, JUMP) were flushed.
|
||||
RC stream was not flushed. And RC stream will cross block boundary.
|
||||
}
|
||||
Note: some possible implementation of BCJ2 encoder could
|
||||
write branch marker (e8/e8/0f8x) in one call of Bcj2Enc_Encode(),
|
||||
and it could calculate symbol for RC in another call of Bcj2Enc_Encode().
|
||||
BCJ2 encoder uses ip/fileIp/fileSize/relatLimit values to calculate RC symbol.
|
||||
And these CBcj2Enc variables can have different values in different Bcj2Enc_Encode() calls.
|
||||
So caller must finish each block with BCJ2_ENC_FINISH_MODE_END_BLOCK
|
||||
to ensure that RC symbol is calculated and written in proper block.
|
||||
|
||||
BCJ2_ENC_FINISH_MODE_END_STREAM
|
||||
finish encoding of stream (ended at srcLim) fully including RC flushing.
|
||||
at return: if (CBcj2Enc::state == BCJ2_ENC_STATE_FINISHED)
|
||||
: it shows that stream encoding was finished fully,
|
||||
and all output streams were flushed fully.
|
||||
also Bcj2Enc_IsFinished() can be called.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
32-bit relative offset in JUMP/CALL commands is
|
||||
- (mod 4 GiB) for 32-bit x86 code
|
||||
- signed Int32 for 64-bit x86-64 code
|
||||
BCJ2 encoder also does internal relative to absolute address conversions.
|
||||
And there are 2 possible ways to do it:
|
||||
before v23: we used 32-bit variables and (mod 4 GiB) conversion
|
||||
since v23: we use 64-bit variables and (signed Int32 offset) conversion.
|
||||
The absolute address condition for conversion in v23:
|
||||
((UInt64)((Int64)ip64 - (Int64)fileIp64 + 5 + (Int32)offset) < (UInt64)fileSize64)
|
||||
note that if (fileSize64 > 2 GiB). there is difference between
|
||||
old (mod 4 GiB) way (v22) and new (signed Int32 offset) way (v23).
|
||||
And new (v23) way is more suitable to encode 64-bit x86-64 code for (fileSize64 > 2 GiB) cases.
|
||||
*/
|
||||
|
||||
/*
|
||||
// for old (v22) way for conversion:
|
||||
typedef UInt32 CBcj2Enc_ip_unsigned;
|
||||
typedef Int32 CBcj2Enc_ip_signed;
|
||||
#define BCJ2_ENC_FileSize_MAX ((UInt32)1 << 31)
|
||||
*/
|
||||
typedef UInt64 CBcj2Enc_ip_unsigned;
|
||||
typedef Int64 CBcj2Enc_ip_signed;
|
||||
|
||||
/* maximum size of file that can be used for conversion condition */
|
||||
#define BCJ2_ENC_FileSize_MAX ((CBcj2Enc_ip_unsigned)0 - 2)
|
||||
|
||||
/* default value of fileSize64_minus1 variable that means
|
||||
that absolute address limitation will not be used */
|
||||
#define BCJ2_ENC_FileSizeField_UNLIMITED ((CBcj2Enc_ip_unsigned)0 - 1)
|
||||
|
||||
/* calculate value that later can be set to CBcj2Enc::fileSize64_minus1 */
|
||||
#define BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize) \
|
||||
((CBcj2Enc_ip_unsigned)(fileSize) - 1)
|
||||
|
||||
/* set CBcj2Enc::fileSize64_minus1 variable from size of file */
|
||||
#define Bcj2Enc_SET_FileSize(p, fileSize) \
|
||||
(p)->fileSize64_minus1 = BCJ2_ENC_GET_FileSizeField_VAL_FROM_FileSize(fileSize);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Byte *bufs[BCJ2_NUM_STREAMS];
|
||||
|
|
@ -101,45 +261,71 @@ typedef struct
|
|||
unsigned state;
|
||||
EBcj2Enc_FinishMode finishMode;
|
||||
|
||||
Byte prevByte;
|
||||
Byte context;
|
||||
Byte flushRem;
|
||||
Byte isFlushState;
|
||||
|
||||
Byte cache;
|
||||
UInt32 range;
|
||||
UInt64 low;
|
||||
UInt64 cacheSize;
|
||||
|
||||
// UInt32 context; // for marker version, it can include marker flag.
|
||||
|
||||
UInt32 ip;
|
||||
|
||||
/* 32-bit ralative offset in JUMP/CALL commands is
|
||||
- (mod 4 GB) in 32-bit mode
|
||||
- signed Int32 in 64-bit mode
|
||||
We use (mod 4 GB) check for fileSize.
|
||||
Use fileSize up to 2 GB, if you want to support 32-bit and 64-bit code conversion. */
|
||||
UInt32 fileIp;
|
||||
UInt32 fileSize; /* (fileSize <= ((UInt32)1 << 31)), 0 means no_limit */
|
||||
UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)), 0 means desable_conversion */
|
||||
/* (ip64) and (fileIp64) correspond to virtual source stream position
|
||||
that doesn't include data in temp[] */
|
||||
CBcj2Enc_ip_unsigned ip64; /* current (ip) position */
|
||||
CBcj2Enc_ip_unsigned fileIp64; /* start (ip) position of current file */
|
||||
CBcj2Enc_ip_unsigned fileSize64_minus1; /* size of current file (for conversion limitation) */
|
||||
UInt32 relatLimit; /* (relatLimit <= ((UInt32)1 << 31)) : 0 means disable_conversion */
|
||||
// UInt32 relatExcludeBits;
|
||||
|
||||
UInt32 tempTarget;
|
||||
unsigned tempPos;
|
||||
Byte temp[4 * 2];
|
||||
|
||||
unsigned flushPos;
|
||||
|
||||
UInt16 probs[2 + 256];
|
||||
unsigned tempPos; /* the number of bytes that were copied to temp[] buffer
|
||||
(tempPos <= 4) outside of Bcj2Enc_Encode() */
|
||||
// Byte temp[4]; // for marker version
|
||||
Byte temp[8];
|
||||
CBcj2Prob probs[2 + 256];
|
||||
} CBcj2Enc;
|
||||
|
||||
void Bcj2Enc_Init(CBcj2Enc *p);
|
||||
|
||||
|
||||
/*
|
||||
Bcj2Enc_Encode(): at exit:
|
||||
p->State < BCJ2_NUM_STREAMS : we need more buffer space for output stream
|
||||
(bufs[p->State] == lims[p->State])
|
||||
p->State == BCJ2_ENC_STATE_ORIG : we need more data in input src stream
|
||||
(src == srcLim)
|
||||
p->State == BCJ2_ENC_STATE_FINISHED : after fully encoded stream
|
||||
*/
|
||||
void Bcj2Enc_Encode(CBcj2Enc *p);
|
||||
|
||||
#define Bcj2Enc_Get_InputData_Size(p) ((SizeT)((p)->srcLim - (p)->src) + (p)->tempPos)
|
||||
#define Bcj2Enc_IsFinished(p) ((p)->flushPos == 5)
|
||||
/* Bcj2Enc encoder can look ahead for up 4 bytes of source stream.
|
||||
CBcj2Enc::tempPos : is the number of bytes that were copied from input stream to temp[] buffer.
|
||||
(CBcj2Enc::src) after Bcj2Enc_Encode() is starting position after
|
||||
fully processed data and after data copied to temp buffer.
|
||||
So if the caller needs to get real number of fully processed input
|
||||
bytes (without look ahead data in temp buffer),
|
||||
the caller must subtruct (CBcj2Enc::tempPos) value from processed size
|
||||
value that is calculated based on current (CBcj2Enc::src):
|
||||
cur_processed_pos = Calc_Big_Processed_Pos(enc.src)) -
|
||||
Bcj2Enc_Get_AvailInputSize_in_Temp(&enc);
|
||||
*/
|
||||
/* get the size of input data that was stored in temp[] buffer: */
|
||||
#define Bcj2Enc_Get_AvailInputSize_in_Temp(p) ((p)->tempPos)
|
||||
|
||||
#define Bcj2Enc_IsFinished(p) ((p)->flushRem == 0)
|
||||
|
||||
#define BCJ2_RELAT_LIMIT_NUM_BITS 26
|
||||
#define BCJ2_RELAT_LIMIT ((UInt32)1 << BCJ2_RELAT_LIMIT_NUM_BITS)
|
||||
|
||||
/* limit for CBcj2Enc::fileSize variable */
|
||||
#define BCJ2_FileSize_MAX ((UInt32)1 << 31)
|
||||
/* Note : the decoder supports overlapping of marker (0f 80).
|
||||
But we can eliminate such overlapping cases by setting
|
||||
the limit for relative offset conversion as
|
||||
CBcj2Enc::relatLimit <= (0x0f << 24) == (240 MiB)
|
||||
*/
|
||||
/* default value for CBcj2Enc::relatLimit */
|
||||
#define BCJ2_ENC_RELAT_LIMIT_DEFAULT ((UInt32)0x0f << 24)
|
||||
#define BCJ2_ENC_RELAT_LIMIT_MAX ((UInt32)1 << 31)
|
||||
// #define BCJ2_RELAT_EXCLUDE_NUM_BITS 5
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
571
C/Bcj2Enc.c
571
C/Bcj2Enc.c
|
|
@ -1,60 +1,62 @@
|
|||
/* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
/* Bcj2Enc.c -- BCJ2 Encoder converter for x86 code (Branch CALL/JUMP variant2)
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
/* #define SHOW_STAT */
|
||||
|
||||
#ifdef SHOW_STAT
|
||||
#include <stdio.h>
|
||||
#define PRF(x) x
|
||||
#define PRF2(s) printf("%s ip=%8x tempPos=%d src= %8x\n", s, (unsigned)p->ip64, p->tempPos, (unsigned)(p->srcLim - p->src));
|
||||
#else
|
||||
#define PRF(x)
|
||||
#define PRF2(s)
|
||||
#endif
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "Bcj2.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define CProb UInt16
|
||||
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
#define kNumModelBits 11
|
||||
#define kBitModelTotal (1 << kNumModelBits)
|
||||
#define kNumBitModelTotalBits 11
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
#define kNumMoveBits 5
|
||||
|
||||
void Bcj2Enc_Init(CBcj2Enc *p)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
p->state = BCJ2_ENC_STATE_OK;
|
||||
p->state = BCJ2_ENC_STATE_ORIG;
|
||||
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
|
||||
|
||||
p->prevByte = 0;
|
||||
|
||||
p->context = 0;
|
||||
p->flushRem = 5;
|
||||
p->isFlushState = 0;
|
||||
p->cache = 0;
|
||||
p->range = 0xFFFFFFFF;
|
||||
p->range = 0xffffffff;
|
||||
p->low = 0;
|
||||
p->cacheSize = 1;
|
||||
|
||||
p->ip = 0;
|
||||
|
||||
p->fileIp = 0;
|
||||
p->fileSize = 0;
|
||||
p->relatLimit = BCJ2_RELAT_LIMIT;
|
||||
|
||||
p->ip64 = 0;
|
||||
p->fileIp64 = 0;
|
||||
p->fileSize64_minus1 = BCJ2_ENC_FileSizeField_UNLIMITED;
|
||||
p->relatLimit = BCJ2_ENC_RELAT_LIMIT_DEFAULT;
|
||||
// p->relatExcludeBits = 0;
|
||||
p->tempPos = 0;
|
||||
|
||||
p->flushPos = 0;
|
||||
|
||||
for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
|
||||
p->probs[i] = kBitModelTotal >> 1;
|
||||
}
|
||||
|
||||
static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
|
||||
// Z7_NO_INLINE
|
||||
Z7_FORCE_INLINE
|
||||
static BoolInt Bcj2_RangeEnc_ShiftLow(CBcj2Enc *p)
|
||||
{
|
||||
if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
|
||||
const UInt32 low = (UInt32)p->low;
|
||||
const unsigned high = (unsigned)
|
||||
#if defined(Z7_MSC_VER_ORIGINAL) \
|
||||
&& defined(MY_CPU_X86) \
|
||||
&& defined(MY_CPU_LE) \
|
||||
&& !defined(MY_CPU_64BIT)
|
||||
// we try to rid of __aullshr() call in MSVS-x86
|
||||
(((const UInt32 *)&p->low)[1]); // [1] : for little-endian only
|
||||
#else
|
||||
(p->low >> 32);
|
||||
#endif
|
||||
if (low < (UInt32)0xff000000 || high != 0)
|
||||
{
|
||||
Byte *buf = p->bufs[BCJ2_STREAM_RC];
|
||||
do
|
||||
|
|
@ -65,247 +67,440 @@ static BoolInt MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
|
|||
p->bufs[BCJ2_STREAM_RC] = buf;
|
||||
return True;
|
||||
}
|
||||
*buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));
|
||||
p->cache = 0xFF;
|
||||
*buf++ = (Byte)(p->cache + high);
|
||||
p->cache = 0xff;
|
||||
}
|
||||
while (--p->cacheSize);
|
||||
p->bufs[BCJ2_STREAM_RC] = buf;
|
||||
p->cache = (Byte)((UInt32)p->low >> 24);
|
||||
p->cache = (Byte)(low >> 24);
|
||||
}
|
||||
p->cacheSize++;
|
||||
p->low = (UInt32)p->low << 8;
|
||||
p->low = low << 8;
|
||||
return False;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
We can use 2 alternative versions of code:
|
||||
1) non-marker version:
|
||||
Byte CBcj2Enc::context
|
||||
Byte temp[8];
|
||||
Last byte of marker (e8/e9/[0f]8x) can be written to temp[] buffer.
|
||||
Encoder writes last byte of marker (e8/e9/[0f]8x) to dest, only in conjunction
|
||||
with writing branch symbol to range coder in same Bcj2Enc_Encode_2() call.
|
||||
|
||||
2) marker version:
|
||||
UInt32 CBcj2Enc::context
|
||||
Byte CBcj2Enc::temp[4];
|
||||
MARKER_FLAG in CBcj2Enc::context shows that CBcj2Enc::context contains finded marker.
|
||||
it's allowed that
|
||||
one call of Bcj2Enc_Encode_2() writes last byte of marker (e8/e9/[0f]8x) to dest,
|
||||
and another call of Bcj2Enc_Encode_2() does offset conversion.
|
||||
So different values of (fileIp) and (fileSize) are possible
|
||||
in these different Bcj2Enc_Encode_2() calls.
|
||||
|
||||
Also marker version requires additional if((v & MARKER_FLAG) == 0) check in main loop.
|
||||
So we use non-marker version.
|
||||
*/
|
||||
|
||||
/*
|
||||
Corner cases with overlap in multi-block.
|
||||
before v23: there was one corner case, where converted instruction
|
||||
could start in one sub-stream and finish in next sub-stream.
|
||||
If multi-block (solid) encoding is used,
|
||||
and BCJ2_ENC_FINISH_MODE_END_BLOCK is used for each sub-stream.
|
||||
and (0f) is last byte of previous sub-stream
|
||||
and (8x) is first byte of current sub-stream
|
||||
then (0f 8x) pair is treated as marker by BCJ2 encoder and decoder.
|
||||
BCJ2 encoder can converts 32-bit offset for that (0f 8x) cortage,
|
||||
if that offset meets limit requirements.
|
||||
If encoder allows 32-bit offset conversion for such overlap case,
|
||||
then the data in 3 uncompressed BCJ2 streams for some sub-stream
|
||||
can depend from data of previous sub-stream.
|
||||
That corner case is not big problem, and it's rare case.
|
||||
Since v23.00 we do additional check to prevent conversions in such overlap cases.
|
||||
*/
|
||||
|
||||
/*
|
||||
Bcj2Enc_Encode_2() output variables at exit:
|
||||
{
|
||||
if (Bcj2Enc_Encode_2() exits with (p->state == BCJ2_ENC_STATE_ORIG))
|
||||
{
|
||||
it means that encoder needs more input data.
|
||||
if (p->srcLim == p->src) at exit, then
|
||||
{
|
||||
(p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
|
||||
all input data were read and processed, and we are ready for
|
||||
new input data.
|
||||
}
|
||||
else
|
||||
{
|
||||
(p->srcLim != p->src)
|
||||
(p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
|
||||
The encoder have found e8/e9/0f_8x marker,
|
||||
and p->src points to last byte of that marker,
|
||||
Bcj2Enc_Encode_2() needs more input data to get totally
|
||||
5 bytes (last byte of marker and 32-bit branch offset)
|
||||
as continuous array starting from p->src.
|
||||
(p->srcLim - p->src < 5) requirement is met after exit.
|
||||
So non-processed resedue from p->src to p->srcLim is always less than 5 bytes.
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
Z7_NO_INLINE
|
||||
static void Bcj2Enc_Encode_2(CBcj2Enc *p)
|
||||
{
|
||||
if (BCJ2_IS_32BIT_STREAM(p->state))
|
||||
if (!p->isFlushState)
|
||||
{
|
||||
Byte *cur = p->bufs[p->state];
|
||||
if (cur == p->lims[p->state])
|
||||
return;
|
||||
SetBe32(cur, p->tempTarget);
|
||||
p->bufs[p->state] = cur + 4;
|
||||
}
|
||||
|
||||
p->state = BCJ2_ENC_STATE_ORIG;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (p->range < kTopValue)
|
||||
{
|
||||
if (RangeEnc_ShiftLow(p))
|
||||
return;
|
||||
p->range <<= 8;
|
||||
}
|
||||
|
||||
const Byte *src;
|
||||
UInt32 v;
|
||||
{
|
||||
const unsigned state = p->state;
|
||||
if (BCJ2_IS_32BIT_STREAM(state))
|
||||
{
|
||||
Byte *cur = p->bufs[state];
|
||||
if (cur == p->lims[state])
|
||||
return;
|
||||
SetBe32a(cur, p->tempTarget)
|
||||
p->bufs[state] = cur + 4;
|
||||
}
|
||||
}
|
||||
p->state = BCJ2_ENC_STATE_ORIG; // for main reason of exit
|
||||
src = p->src;
|
||||
v = p->context;
|
||||
|
||||
// #define WRITE_CONTEXT p->context = v; // for marker version
|
||||
#define WRITE_CONTEXT p->context = (Byte)v;
|
||||
#define WRITE_CONTEXT_AND_SRC p->src = src; WRITE_CONTEXT
|
||||
|
||||
for (;;)
|
||||
{
|
||||
// const Byte *src;
|
||||
// UInt32 v;
|
||||
CBcj2Enc_ip_unsigned ip;
|
||||
if (p->range < kTopValue)
|
||||
{
|
||||
// to reduce register pressure and code size: we save and restore local variables.
|
||||
WRITE_CONTEXT_AND_SRC
|
||||
if (Bcj2_RangeEnc_ShiftLow(p))
|
||||
return;
|
||||
p->range <<= 8;
|
||||
src = p->src;
|
||||
v = p->context;
|
||||
}
|
||||
// src = p->src;
|
||||
// #define MARKER_FLAG ((UInt32)1 << 17)
|
||||
// if ((v & MARKER_FLAG) == 0) // for marker version
|
||||
{
|
||||
const Byte *src = p->src;
|
||||
const Byte *srcLim;
|
||||
Byte *dest;
|
||||
SizeT num = (SizeT)(p->srcLim - src);
|
||||
|
||||
if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
|
||||
Byte *dest = p->bufs[BCJ2_STREAM_MAIN];
|
||||
{
|
||||
if (num <= 4)
|
||||
return;
|
||||
num -= 4;
|
||||
const SizeT remSrc = (SizeT)(p->srcLim - src);
|
||||
SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
|
||||
if (rem >= remSrc)
|
||||
rem = remSrc;
|
||||
srcLim = src + rem;
|
||||
}
|
||||
else if (num == 0)
|
||||
break;
|
||||
/* p->context contains context of previous byte:
|
||||
bits [0 : 7] : src[-1], if (src) was changed in this call
|
||||
bits [8 : 31] : are undefined for non-marker version
|
||||
*/
|
||||
// v = p->context;
|
||||
#define NUM_SHIFT_BITS 24
|
||||
#define CONV_FLAG ((UInt32)1 << 16)
|
||||
#define ONE_ITER { \
|
||||
b = src[0]; \
|
||||
*dest++ = (Byte)b; \
|
||||
v = (v << NUM_SHIFT_BITS) | b; \
|
||||
if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
|
||||
if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
|
||||
((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
|
||||
src++; if (src == srcLim) { break; } }
|
||||
|
||||
dest = p->bufs[BCJ2_STREAM_MAIN];
|
||||
if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
|
||||
if (src != srcLim)
|
||||
for (;;)
|
||||
{
|
||||
num = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest);
|
||||
if (num == 0)
|
||||
/* clang can generate ineffective code with setne instead of two jcc instructions.
|
||||
we can use 2 iterations and external (unsigned b) to avoid that ineffective code genaration. */
|
||||
unsigned b;
|
||||
ONE_ITER
|
||||
ONE_ITER
|
||||
}
|
||||
|
||||
ip = p->ip64 + (CBcj2Enc_ip_unsigned)(SizeT)(dest - p->bufs[BCJ2_STREAM_MAIN]);
|
||||
p->bufs[BCJ2_STREAM_MAIN] = dest;
|
||||
p->ip64 = ip;
|
||||
|
||||
if (src == srcLim)
|
||||
{
|
||||
WRITE_CONTEXT_AND_SRC
|
||||
if (src != p->srcLim)
|
||||
{
|
||||
p->state = BCJ2_STREAM_MAIN;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
srcLim = src + num;
|
||||
|
||||
if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)
|
||||
*dest = src[0];
|
||||
else for (;;)
|
||||
{
|
||||
Byte b = *src;
|
||||
*dest = b;
|
||||
if (b != 0x0F)
|
||||
{
|
||||
if ((b & 0xFE) == 0xE8)
|
||||
break;
|
||||
dest++;
|
||||
if (++src != srcLim)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
dest++;
|
||||
if (++src == srcLim)
|
||||
break;
|
||||
if ((*src & 0xF0) != 0x80)
|
||||
continue;
|
||||
*dest = *src;
|
||||
/* (p->src == p->srcLim)
|
||||
(p->state == BCJ2_ENC_STATE_ORIG) */
|
||||
if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
|
||||
return;
|
||||
/* (p->finishMode == BCJ2_ENC_FINISH_MODE_END_STREAM */
|
||||
// (p->flushRem == 5);
|
||||
p->isFlushState = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
num = (SizeT)(src - p->src);
|
||||
|
||||
if (src == srcLim)
|
||||
src++;
|
||||
// p->src = src;
|
||||
}
|
||||
// ip = p->ip; // for marker version
|
||||
/* marker was found */
|
||||
/* (v) contains marker that was found:
|
||||
bits [NUM_SHIFT_BITS : NUM_SHIFT_BITS + 7]
|
||||
: value of src[-2] : xx/xx/0f
|
||||
bits [0 : 7] : value of src[-1] : e8/e9/8x
|
||||
*/
|
||||
{
|
||||
{
|
||||
p->prevByte = src[-1];
|
||||
p->bufs[BCJ2_STREAM_MAIN] = dest;
|
||||
p->src = src;
|
||||
p->ip += (UInt32)num;
|
||||
continue;
|
||||
}
|
||||
|
||||
{
|
||||
Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
|
||||
BoolInt needConvert;
|
||||
|
||||
p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
|
||||
p->ip += (UInt32)num + 1;
|
||||
src++;
|
||||
|
||||
needConvert = False;
|
||||
|
||||
#if NUM_SHIFT_BITS != 24
|
||||
v &= ~(UInt32)CONV_FLAG;
|
||||
#endif
|
||||
// UInt32 relat = 0;
|
||||
if ((SizeT)(p->srcLim - src) >= 4)
|
||||
{
|
||||
UInt32 relatVal = GetUi32(src);
|
||||
if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)
|
||||
&& ((relatVal + p->relatLimit) >> 1) < p->relatLimit)
|
||||
needConvert = True;
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 bound;
|
||||
unsigned ttt;
|
||||
Byte b = src[-1];
|
||||
CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
|
||||
|
||||
ttt = *prob;
|
||||
bound = (p->range >> kNumModelBits) * ttt;
|
||||
|
||||
if (!needConvert)
|
||||
/*
|
||||
if (relat != 0 || (Byte)v != 0xe8)
|
||||
BoolInt isBigOffset = True;
|
||||
*/
|
||||
const UInt32 relat = GetUi32(src);
|
||||
/*
|
||||
#define EXCLUDE_FLAG ((UInt32)1 << 4)
|
||||
#define NEED_CONVERT(rel) ((((rel) + EXCLUDE_FLAG) & (0 - EXCLUDE_FLAG * 2)) != 0)
|
||||
if (p->relatExcludeBits != 0)
|
||||
{
|
||||
const UInt32 flag = (UInt32)1 << (p->relatExcludeBits - 1);
|
||||
isBigOffset = (((relat + flag) & (0 - flag * 2)) != 0);
|
||||
}
|
||||
// isBigOffset = False; // for debug
|
||||
*/
|
||||
ip -= p->fileIp64;
|
||||
// Use the following if check, if (ip) is 64-bit:
|
||||
if (ip > (((v + 0x20) >> 5) & 1)) // 23.00 : we eliminate milti-block overlap for (Of 80) and (e8/e9)
|
||||
if ((CBcj2Enc_ip_unsigned)((CBcj2Enc_ip_signed)ip + 4 + (Int32)relat) <= p->fileSize64_minus1)
|
||||
if (((UInt32)(relat + p->relatLimit) >> 1) < p->relatLimit)
|
||||
v |= CONV_FLAG;
|
||||
}
|
||||
else if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
|
||||
{
|
||||
// (p->srcLim - src < 4)
|
||||
// /*
|
||||
// for non-marker version
|
||||
p->ip64--; // p->ip = ip - 1;
|
||||
p->bufs[BCJ2_STREAM_MAIN]--;
|
||||
src--;
|
||||
v >>= NUM_SHIFT_BITS;
|
||||
// (0 < p->srcLim - p->src <= 4)
|
||||
// */
|
||||
// v |= MARKER_FLAG; // for marker version
|
||||
/* (p->state == BCJ2_ENC_STATE_ORIG) */
|
||||
WRITE_CONTEXT_AND_SRC
|
||||
return;
|
||||
}
|
||||
{
|
||||
const unsigned c = ((v + 0x17) >> 6) & 1;
|
||||
CBcj2Prob *prob = p->probs + (unsigned)
|
||||
(((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
|
||||
/*
|
||||
((Byte)v == 0xe8 ? 2 + ((Byte)(v >> 8)) :
|
||||
((Byte)v < 0xe8 ? 0 : 1)); // ((v >> 5) & 1));
|
||||
*/
|
||||
const unsigned ttt = *prob;
|
||||
const UInt32 bound = (p->range >> kNumBitModelTotalBits) * ttt;
|
||||
if ((v & CONV_FLAG) == 0)
|
||||
{
|
||||
// static int yyy = 0; yyy++; printf("\n!needConvert = %d\n", yyy);
|
||||
// v = (Byte)v; // for marker version
|
||||
p->range = bound;
|
||||
*prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
p->src = src;
|
||||
p->prevByte = b;
|
||||
*prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
// WRITE_CONTEXT_AND_SRC
|
||||
continue;
|
||||
}
|
||||
|
||||
p->low += bound;
|
||||
p->range -= bound;
|
||||
*prob = (CProb)(ttt - (ttt >> kNumMoveBits));
|
||||
|
||||
*prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
|
||||
}
|
||||
// p->context = src[3];
|
||||
{
|
||||
// const unsigned cj = ((Byte)v == 0xe8 ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP);
|
||||
const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
|
||||
ip = p->ip64;
|
||||
v = GetUi32(src); // relat
|
||||
ip += 4;
|
||||
p->ip64 = ip;
|
||||
src += 4;
|
||||
// p->src = src;
|
||||
{
|
||||
UInt32 relatVal = GetUi32(src);
|
||||
UInt32 absVal;
|
||||
p->ip += 4;
|
||||
absVal = p->ip + relatVal;
|
||||
p->prevByte = src[3];
|
||||
src += 4;
|
||||
p->src = src;
|
||||
const UInt32 absol = (UInt32)ip + v;
|
||||
Byte *cur = p->bufs[cj];
|
||||
v >>= 24;
|
||||
// WRITE_CONTEXT
|
||||
if (cur == p->lims[cj])
|
||||
{
|
||||
unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
|
||||
Byte *cur = p->bufs[cj];
|
||||
if (cur == p->lims[cj])
|
||||
{
|
||||
p->state = cj;
|
||||
p->tempTarget = absVal;
|
||||
return;
|
||||
}
|
||||
SetBe32(cur, absVal);
|
||||
p->bufs[cj] = cur + 4;
|
||||
p->state = cj;
|
||||
p->tempTarget = absol;
|
||||
WRITE_CONTEXT_AND_SRC
|
||||
return;
|
||||
}
|
||||
SetBe32a(cur, absol)
|
||||
p->bufs[cj] = cur + 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end of loop
|
||||
}
|
||||
|
||||
if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
|
||||
return;
|
||||
|
||||
for (; p->flushPos < 5; p->flushPos++)
|
||||
if (RangeEnc_ShiftLow(p))
|
||||
for (; p->flushRem != 0; p->flushRem--)
|
||||
if (Bcj2_RangeEnc_ShiftLow(p))
|
||||
return;
|
||||
p->state = BCJ2_ENC_STATE_OK;
|
||||
p->state = BCJ2_ENC_STATE_FINISHED;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
BCJ2 encoder needs look ahead for up to 4 bytes in (src) buffer.
|
||||
So base function Bcj2Enc_Encode_2()
|
||||
in BCJ2_ENC_FINISH_MODE_CONTINUE mode can return with
|
||||
(p->state == BCJ2_ENC_STATE_ORIG && p->src < p->srcLim)
|
||||
Bcj2Enc_Encode() solves that look ahead problem by using p->temp[] buffer.
|
||||
so if (p->state == BCJ2_ENC_STATE_ORIG) after Bcj2Enc_Encode(),
|
||||
then (p->src == p->srcLim).
|
||||
And the caller's code is simpler with Bcj2Enc_Encode().
|
||||
*/
|
||||
|
||||
Z7_NO_INLINE
|
||||
void Bcj2Enc_Encode(CBcj2Enc *p)
|
||||
{
|
||||
PRF(printf("\n"));
|
||||
PRF(printf("---- ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
|
||||
|
||||
PRF2("\n----")
|
||||
if (p->tempPos != 0)
|
||||
{
|
||||
/* extra: number of bytes that were copied from (src) to (temp) buffer in this call */
|
||||
unsigned extra = 0;
|
||||
|
||||
/* We will touch only minimal required number of bytes in input (src) stream.
|
||||
So we will add input bytes from (src) stream to temp[] with step of 1 byte.
|
||||
We don't add new bytes to temp[] before Bcj2Enc_Encode_2() call
|
||||
in first loop iteration because
|
||||
- previous call of Bcj2Enc_Encode() could use another (finishMode),
|
||||
- previous call could finish with (p->state != BCJ2_ENC_STATE_ORIG).
|
||||
the case with full temp[] buffer (p->tempPos == 4) is possible here.
|
||||
*/
|
||||
for (;;)
|
||||
{
|
||||
// (0 < p->tempPos <= 5) // in non-marker version
|
||||
/* p->src : the current src data position including extra bytes
|
||||
that were copied to temp[] buffer in this call */
|
||||
const Byte *src = p->src;
|
||||
const Byte *srcLim = p->srcLim;
|
||||
EBcj2Enc_FinishMode finishMode = p->finishMode;
|
||||
|
||||
const EBcj2Enc_FinishMode finishMode = p->finishMode;
|
||||
if (src != srcLim)
|
||||
{
|
||||
/* if there are some src data after the data copied to temp[],
|
||||
then we use MODE_CONTINUE for temp data */
|
||||
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
|
||||
}
|
||||
p->src = p->temp;
|
||||
p->srcLim = p->temp + p->tempPos;
|
||||
if (src != srcLim)
|
||||
p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
|
||||
|
||||
PRF(printf(" ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
|
||||
|
||||
PRF2(" ")
|
||||
Bcj2Enc_Encode_2(p);
|
||||
|
||||
{
|
||||
unsigned num = (unsigned)(p->src - p->temp);
|
||||
unsigned tempPos = p->tempPos - num;
|
||||
const unsigned num = (unsigned)(p->src - p->temp);
|
||||
const unsigned tempPos = p->tempPos - num;
|
||||
unsigned i;
|
||||
p->tempPos = tempPos;
|
||||
for (i = 0; i < tempPos; i++)
|
||||
p->temp[i] = p->temp[(size_t)i + num];
|
||||
|
||||
p->temp[i] = p->temp[(SizeT)i + num];
|
||||
// tempPos : number of bytes in temp buffer
|
||||
p->src = src;
|
||||
p->srcLim = srcLim;
|
||||
p->finishMode = finishMode;
|
||||
|
||||
if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)
|
||||
if (p->state != BCJ2_ENC_STATE_ORIG)
|
||||
{
|
||||
// (p->tempPos <= 4) // in non-marker version
|
||||
/* if (the reason of exit from Bcj2Enc_Encode_2()
|
||||
is not BCJ2_ENC_STATE_ORIG),
|
||||
then we exit from Bcj2Enc_Encode() with same reason */
|
||||
// optional code begin : we rollback (src) and tempPos, if it's possible:
|
||||
if (extra >= tempPos)
|
||||
extra = tempPos;
|
||||
p->src = src - extra;
|
||||
p->tempPos = tempPos - extra;
|
||||
// optional code end : rollback of (src) and tempPos
|
||||
return;
|
||||
|
||||
}
|
||||
/* (p->tempPos <= 4)
|
||||
(p->state == BCJ2_ENC_STATE_ORIG)
|
||||
so encoder needs more data than in temp[] */
|
||||
if (src == srcLim)
|
||||
return; // src buffer has no more input data.
|
||||
/* (src != srcLim)
|
||||
so we can provide more input data from src for Bcj2Enc_Encode_2() */
|
||||
if (extra >= tempPos)
|
||||
{
|
||||
p->src = src - tempPos;
|
||||
/* (extra >= tempPos) means that temp buffer contains
|
||||
only data from src buffer of this call.
|
||||
So now we can encode without temp buffer */
|
||||
p->src = src - tempPos; // rollback (src)
|
||||
p->tempPos = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
p->temp[tempPos] = src[0];
|
||||
// we append one additional extra byte from (src) to temp[] buffer:
|
||||
p->temp[tempPos] = *src;
|
||||
p->tempPos = tempPos + 1;
|
||||
// (0 < p->tempPos <= 5) // in non-marker version
|
||||
p->src = src + 1;
|
||||
extra++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PRF(printf("++++ ip = %8d tempPos = %8d src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
|
||||
|
||||
PRF2("++++")
|
||||
// (p->tempPos == 0)
|
||||
Bcj2Enc_Encode_2(p);
|
||||
PRF2("====")
|
||||
|
||||
if (p->state == BCJ2_ENC_STATE_ORIG)
|
||||
{
|
||||
const Byte *src = p->src;
|
||||
unsigned rem = (unsigned)(p->srcLim - src);
|
||||
unsigned i;
|
||||
for (i = 0; i < rem; i++)
|
||||
p->temp[i] = src[i];
|
||||
p->tempPos = rem;
|
||||
p->src = src + rem;
|
||||
const Byte *srcLim = p->srcLim;
|
||||
const unsigned rem = (unsigned)(srcLim - src);
|
||||
/* (rem <= 4) here.
|
||||
if (p->src != p->srcLim), then
|
||||
- we copy non-processed bytes from (p->src) to temp[] buffer,
|
||||
- we set p->src equal to p->srcLim.
|
||||
*/
|
||||
if (rem)
|
||||
{
|
||||
unsigned i = 0;
|
||||
p->src = srcLim;
|
||||
p->tempPos = rem;
|
||||
// (0 < p->tempPos <= 4)
|
||||
do
|
||||
p->temp[i] = src[i];
|
||||
while (++i != rem);
|
||||
}
|
||||
// (p->tempPos <= 4)
|
||||
// (p->src == p->srcLim)
|
||||
}
|
||||
}
|
||||
|
||||
#undef PRF2
|
||||
#undef CONV_FLAG
|
||||
#undef MARKER_FLAG
|
||||
#undef WRITE_CONTEXT
|
||||
#undef WRITE_CONTEXT_AND_SRC
|
||||
#undef ONE_ITER
|
||||
#undef NUM_SHIFT_BITS
|
||||
#undef kTopValue
|
||||
#undef kNumBitModelTotalBits
|
||||
#undef kBitModelTotal
|
||||
#undef kNumMoveBits
|
||||
|
|
|
|||
115
C/Blake2.h
115
C/Blake2.h
|
|
@ -1,47 +1,104 @@
|
|||
/* Blake2.h -- BLAKE2 Hash
|
||||
2015-06-30 : Igor Pavlov : Public domain
|
||||
2015 : Samuel Neves : Public domain */
|
||||
/* Blake2.h -- BLAKE2sp Hash
|
||||
2024-01-17 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __BLAKE2_H
|
||||
#define __BLAKE2_H
|
||||
#ifndef ZIP7_INC_BLAKE2_H
|
||||
#define ZIP7_INC_BLAKE2_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
#if 0
|
||||
#include "Compiler.h"
|
||||
#include "CpuArch.h"
|
||||
#if defined(MY_CPU_X86_OR_AMD64)
|
||||
#if defined(__SSE2__) \
|
||||
|| defined(_MSC_VER) && _MSC_VER > 1200 \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 30300) \
|
||||
|| defined(__clang__) \
|
||||
|| defined(__INTEL_COMPILER)
|
||||
#include <emmintrin.h> // SSE2
|
||||
#endif
|
||||
|
||||
#if defined(__AVX2__) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|
||||
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
|
||||
#include <immintrin.h>
|
||||
#if defined(__clang__)
|
||||
#include <avxintrin.h>
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
#endif // avx2
|
||||
#endif // MY_CPU_X86_OR_AMD64
|
||||
#endif // 0
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define BLAKE2S_BLOCK_SIZE 64
|
||||
#define BLAKE2S_DIGEST_SIZE 32
|
||||
#define BLAKE2SP_PARALLEL_DEGREE 8
|
||||
#define Z7_BLAKE2S_BLOCK_SIZE 64
|
||||
#define Z7_BLAKE2S_DIGEST_SIZE 32
|
||||
#define Z7_BLAKE2SP_PARALLEL_DEGREE 8
|
||||
#define Z7_BLAKE2SP_NUM_STRUCT_WORDS 16
|
||||
|
||||
#if 1 || defined(Z7_BLAKE2SP_USE_FUNCTIONS)
|
||||
typedef void (Z7_FASTCALL *Z7_BLAKE2SP_FUNC_COMPRESS)(UInt32 *states, const Byte *data, const Byte *end);
|
||||
typedef void (Z7_FASTCALL *Z7_BLAKE2SP_FUNC_INIT)(UInt32 *states);
|
||||
#endif
|
||||
|
||||
// it's required that CBlake2sp is aligned for 32-bytes,
|
||||
// because the code can use unaligned access with sse and avx256.
|
||||
// but 64-bytes alignment can be better.
|
||||
MY_ALIGN(64)
|
||||
typedef struct
|
||||
{
|
||||
UInt32 h[8];
|
||||
UInt32 t[2];
|
||||
UInt32 f[2];
|
||||
Byte buf[BLAKE2S_BLOCK_SIZE];
|
||||
UInt32 bufPos;
|
||||
UInt32 lastNode_f1;
|
||||
UInt32 dummy[2]; /* for sizeof(CBlake2s) alignment */
|
||||
} CBlake2s;
|
||||
union
|
||||
{
|
||||
#if 0
|
||||
#if defined(MY_CPU_X86_OR_AMD64)
|
||||
#if defined(__SSE2__) \
|
||||
|| defined(_MSC_VER) && _MSC_VER > 1200 \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 30300) \
|
||||
|| defined(__clang__) \
|
||||
|| defined(__INTEL_COMPILER)
|
||||
__m128i _pad_align_128bit[4];
|
||||
#endif // sse2
|
||||
#if defined(__AVX2__) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|
||||
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
|
||||
__m256i _pad_align_256bit[2];
|
||||
#endif // avx2
|
||||
#endif // x86
|
||||
#endif // 0
|
||||
|
||||
/* You need to xor CBlake2s::h[i] with input parameter block after Blake2s_Init0() */
|
||||
/*
|
||||
void Blake2s_Init0(CBlake2s *p);
|
||||
void Blake2s_Update(CBlake2s *p, const Byte *data, size_t size);
|
||||
void Blake2s_Final(CBlake2s *p, Byte *digest);
|
||||
*/
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
CBlake2s S[BLAKE2SP_PARALLEL_DEGREE];
|
||||
unsigned bufPos;
|
||||
void * _pad_align_ptr[8];
|
||||
UInt32 _pad_align_32bit[16];
|
||||
struct
|
||||
{
|
||||
unsigned cycPos;
|
||||
unsigned _pad_unused;
|
||||
#if 1 || defined(Z7_BLAKE2SP_USE_FUNCTIONS)
|
||||
Z7_BLAKE2SP_FUNC_COMPRESS func_Compress_Fast;
|
||||
Z7_BLAKE2SP_FUNC_COMPRESS func_Compress_Single;
|
||||
Z7_BLAKE2SP_FUNC_INIT func_Init;
|
||||
Z7_BLAKE2SP_FUNC_INIT func_Final;
|
||||
#endif
|
||||
} header;
|
||||
} u;
|
||||
// MY_ALIGN(64)
|
||||
UInt32 states[Z7_BLAKE2SP_PARALLEL_DEGREE * Z7_BLAKE2SP_NUM_STRUCT_WORDS];
|
||||
// MY_ALIGN(64)
|
||||
UInt32 buf32[Z7_BLAKE2SP_PARALLEL_DEGREE * Z7_BLAKE2SP_NUM_STRUCT_WORDS * 2];
|
||||
} CBlake2sp;
|
||||
|
||||
|
||||
BoolInt Blake2sp_SetFunction(CBlake2sp *p, unsigned algo);
|
||||
void Blake2sp_Init(CBlake2sp *p);
|
||||
void Blake2sp_InitState(CBlake2sp *p);
|
||||
void Blake2sp_Update(CBlake2sp *p, const Byte *data, size_t size);
|
||||
void Blake2sp_Final(CBlake2sp *p, Byte *digest);
|
||||
void z7_Black2sp_Prepare(void);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
2754
C/Blake2s.c
2754
C/Blake2s.c
File diff suppressed because it is too large
Load diff
817
C/Bra.c
817
C/Bra.c
|
|
@ -1,230 +1,709 @@
|
|||
/* Bra.c -- Converters for RISC code
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
/* Bra.c -- Branch converters for RISC code
|
||||
2024-01-20 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "Bra.h"
|
||||
#include "RotateDefs.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
#if defined(MY_CPU_SIZEOF_POINTER) \
|
||||
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|
||||
|| MY_CPU_SIZEOF_POINTER == 8)
|
||||
#define BR_CONV_USE_OPT_PC_PTR
|
||||
#endif
|
||||
|
||||
#ifdef BR_CONV_USE_OPT_PC_PTR
|
||||
#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
|
||||
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
|
||||
#else
|
||||
#define BR_PC_INIT pc += (UInt32)size;
|
||||
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
|
||||
// #define BR_PC_INIT
|
||||
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
|
||||
#endif
|
||||
|
||||
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
|
||||
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
|
||||
|
||||
#define Z7_BRANCH_CONV(name) z7_ ## name
|
||||
|
||||
#define Z7_BRANCH_FUNC_MAIN(name) \
|
||||
static \
|
||||
Z7_FORCE_INLINE \
|
||||
Z7_ATTRIB_NO_VECTOR \
|
||||
Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
|
||||
|
||||
#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
|
||||
Z7_NO_INLINE \
|
||||
Z7_ATTRIB_NO_VECTOR \
|
||||
Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
|
||||
{ return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
|
||||
|
||||
#ifdef Z7_EXTRACT_ONLY
|
||||
#define Z7_BRANCH_FUNCS_IMP(name) \
|
||||
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
|
||||
#else
|
||||
#define Z7_BRANCH_FUNCS_IMP(name) \
|
||||
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
|
||||
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define BR_EXTERNAL_FOR
|
||||
#define BR_NEXT_ITERATION continue;
|
||||
#else
|
||||
#define BR_EXTERNAL_FOR for (;;)
|
||||
#define BR_NEXT_ITERATION break;
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
|
||||
// GCC is not good for __builtin_expect() here
|
||||
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
|
||||
// #define Z7_unlikely [[unlikely]]
|
||||
// #define Z7_LIKELY(x) (__builtin_expect((x), 1))
|
||||
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
|
||||
// #define Z7_likely [[likely]]
|
||||
#else
|
||||
// #define Z7_LIKELY(x) (x)
|
||||
#define Z7_UNLIKELY(x) (x)
|
||||
// #define Z7_likely
|
||||
#endif
|
||||
|
||||
|
||||
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
|
||||
{
|
||||
Byte *p;
|
||||
// Byte *p = data;
|
||||
const Byte *lim;
|
||||
size &= ~(size_t)3;
|
||||
ip += 4;
|
||||
p = data;
|
||||
lim = data + size;
|
||||
|
||||
if (encoding)
|
||||
|
||||
for (;;)
|
||||
const UInt32 flag = (UInt32)1 << (24 - 4);
|
||||
const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
|
||||
size &= ~(SizeT)3;
|
||||
// if (size == 0) return p;
|
||||
lim = p + size;
|
||||
BR_PC_INIT
|
||||
pc -= 4; // because (p) will point to next instruction
|
||||
|
||||
BR_EXTERNAL_FOR
|
||||
{
|
||||
// Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
for (;;)
|
||||
{
|
||||
if (p >= lim)
|
||||
return (SizeT)(p - data);
|
||||
UInt32 v;
|
||||
if Z7_UNLIKELY(p == lim)
|
||||
return p;
|
||||
v = GetUi32a(p);
|
||||
p += 4;
|
||||
if (p[-1] == 0xEB)
|
||||
break;
|
||||
}
|
||||
{
|
||||
UInt32 v = GetUi32(p - 4);
|
||||
v <<= 2;
|
||||
v += ip + (UInt32)(p - data);
|
||||
v >>= 2;
|
||||
v &= 0x00FFFFFF;
|
||||
v |= 0xEB000000;
|
||||
SetUi32(p - 4, v);
|
||||
}
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (p >= lim)
|
||||
return (SizeT)(p - data);
|
||||
p += 4;
|
||||
if (p[-1] == 0xEB)
|
||||
break;
|
||||
}
|
||||
{
|
||||
UInt32 v = GetUi32(p - 4);
|
||||
v <<= 2;
|
||||
v -= ip + (UInt32)(p - data);
|
||||
v >>= 2;
|
||||
v &= 0x00FFFFFF;
|
||||
v |= 0xEB000000;
|
||||
SetUi32(p - 4, v);
|
||||
if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
|
||||
{
|
||||
UInt32 c = BR_PC_GET >> 2;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
v &= 0x03ffffff;
|
||||
v |= 0x94000000;
|
||||
SetUi32a(p - 4, v)
|
||||
BR_NEXT_ITERATION
|
||||
}
|
||||
// v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
|
||||
v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
|
||||
{
|
||||
UInt32 z, c;
|
||||
// v = rotrFixed(v, 8);
|
||||
v += flag; if Z7_UNLIKELY(v & mask) continue;
|
||||
z = (v & 0xffffffe0) | (v >> 26);
|
||||
c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
|
||||
BR_CONVERT_VAL(z, c)
|
||||
v &= 0x1f;
|
||||
v |= 0x90000000;
|
||||
v |= z << 26;
|
||||
v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
|
||||
SetUi32a(p - 4, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
|
||||
|
||||
|
||||
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
|
||||
{
|
||||
Byte *p;
|
||||
// Byte *p = data;
|
||||
const Byte *lim;
|
||||
size &= ~(size_t)1;
|
||||
p = data;
|
||||
lim = data + size - 4;
|
||||
|
||||
if (encoding)
|
||||
size &= ~(SizeT)3;
|
||||
lim = p + size;
|
||||
BR_PC_INIT
|
||||
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
|
||||
(p) will point to next instruction */
|
||||
pc += 8 - 4;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
UInt32 b1;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 b3;
|
||||
if (p > lim)
|
||||
return (SizeT)(p - data);
|
||||
b1 = p[1];
|
||||
b3 = p[3];
|
||||
p += 2;
|
||||
b1 ^= 8;
|
||||
if ((b3 & b1) >= 0xF8)
|
||||
break;
|
||||
if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
|
||||
if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
|
||||
}
|
||||
{
|
||||
UInt32 v =
|
||||
((UInt32)b1 << 19)
|
||||
+ (((UInt32)p[1] & 0x7) << 8)
|
||||
+ (((UInt32)p[-2] << 11))
|
||||
+ (p[0]);
|
||||
|
||||
p += 2;
|
||||
{
|
||||
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
|
||||
v += cur;
|
||||
}
|
||||
|
||||
p[-4] = (Byte)(v >> 11);
|
||||
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
|
||||
p[-2] = (Byte)v;
|
||||
p[-1] = (Byte)(0xF8 | (v >> 8));
|
||||
}
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
UInt32 b1;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 b3;
|
||||
if (p > lim)
|
||||
return (SizeT)(p - data);
|
||||
b1 = p[1];
|
||||
b3 = p[3];
|
||||
p += 2;
|
||||
b1 ^= 8;
|
||||
if ((b3 & b1) >= 0xF8)
|
||||
break;
|
||||
}
|
||||
{
|
||||
UInt32 v =
|
||||
((UInt32)b1 << 19)
|
||||
+ (((UInt32)p[1] & 0x7) << 8)
|
||||
+ (((UInt32)p[-2] << 11))
|
||||
+ (p[0]);
|
||||
|
||||
p += 2;
|
||||
{
|
||||
UInt32 cur = (ip + (UInt32)(p - data)) >> 1;
|
||||
v -= cur;
|
||||
}
|
||||
|
||||
/*
|
||||
SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
|
||||
SetUi16(p - 2, (UInt16)(v | 0xF800));
|
||||
*/
|
||||
|
||||
p[-4] = (Byte)(v >> 11);
|
||||
p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7));
|
||||
p[-2] = (Byte)v;
|
||||
p[-1] = (Byte)(0xF8 | (v >> 8));
|
||||
UInt32 v = GetUi32a(p - 4);
|
||||
UInt32 c = BR_PC_GET >> 2;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
v &= 0x00ffffff;
|
||||
v |= 0xeb000000;
|
||||
SetUi32a(p - 4, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
|
||||
|
||||
|
||||
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
|
||||
{
|
||||
Byte *p;
|
||||
// Byte *p = data;
|
||||
const Byte *lim;
|
||||
size &= ~(size_t)3;
|
||||
ip -= 4;
|
||||
p = data;
|
||||
lim = data + size;
|
||||
|
||||
size &= ~(SizeT)3;
|
||||
lim = p + size;
|
||||
BR_PC_INIT
|
||||
pc -= 4; // because (p) will point to next instruction
|
||||
|
||||
for (;;)
|
||||
{
|
||||
UInt32 v;
|
||||
for (;;)
|
||||
{
|
||||
if (p >= lim)
|
||||
return (SizeT)(p - data);
|
||||
if Z7_UNLIKELY(p == lim)
|
||||
return p;
|
||||
// v = GetBe32a(p);
|
||||
v = *(UInt32 *)(void *)p;
|
||||
p += 4;
|
||||
/* if ((v & 0xFC000003) == 0x48000001) */
|
||||
if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
|
||||
break;
|
||||
// if ((v & 0xfc000003) == 0x48000001) break;
|
||||
// if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
|
||||
if Z7_UNLIKELY(
|
||||
((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
|
||||
& Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
|
||||
}
|
||||
{
|
||||
UInt32 v = GetBe32(p - 4);
|
||||
if (encoding)
|
||||
v += ip + (UInt32)(p - data);
|
||||
else
|
||||
v -= ip + (UInt32)(p - data);
|
||||
v &= 0x03FFFFFF;
|
||||
v = Z7_CONV_NATIVE_TO_BE_32(v);
|
||||
{
|
||||
UInt32 c = BR_PC_GET;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
}
|
||||
v &= 0x03ffffff;
|
||||
v |= 0x48000000;
|
||||
SetBe32(p - 4, v);
|
||||
SetBe32a(p - 4, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
|
||||
|
||||
|
||||
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
|
||||
#define BR_SPARC_USE_ROTATE
|
||||
#endif
|
||||
|
||||
Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
|
||||
{
|
||||
Byte *p;
|
||||
// Byte *p = data;
|
||||
const Byte *lim;
|
||||
size &= ~(size_t)3;
|
||||
ip -= 4;
|
||||
p = data;
|
||||
lim = data + size;
|
||||
|
||||
const UInt32 flag = (UInt32)1 << 22;
|
||||
size &= ~(SizeT)3;
|
||||
lim = p + size;
|
||||
BR_PC_INIT
|
||||
pc -= 4; // because (p) will point to next instruction
|
||||
for (;;)
|
||||
{
|
||||
UInt32 v;
|
||||
for (;;)
|
||||
{
|
||||
if (p >= lim)
|
||||
return (SizeT)(p - data);
|
||||
/*
|
||||
v = GetBe32(p);
|
||||
p += 4;
|
||||
m = v + ((UInt32)5 << 29);
|
||||
m ^= (UInt32)7 << 29;
|
||||
m += (UInt32)1 << 22;
|
||||
if ((m & ((UInt32)0x1FF << 23)) == 0)
|
||||
break;
|
||||
if Z7_UNLIKELY(p == lim)
|
||||
return p;
|
||||
/* // the code without GetBe32a():
|
||||
{ const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
|
||||
*/
|
||||
v = GetBe32a(p);
|
||||
p += 4;
|
||||
if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
|
||||
(p[-4] == 0x7F && (p[-3] >= 0xC0)))
|
||||
#ifdef BR_SPARC_USE_ROTATE
|
||||
v = rotlFixed(v, 2);
|
||||
v += (flag << 2) - 1;
|
||||
if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
|
||||
#else
|
||||
v += (UInt32)5 << 29;
|
||||
v ^= (UInt32)7 << 29;
|
||||
v += flag;
|
||||
if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
{
|
||||
UInt32 v = GetBe32(p - 4);
|
||||
// UInt32 v = GetBe32a(p - 4);
|
||||
#ifndef BR_SPARC_USE_ROTATE
|
||||
v <<= 2;
|
||||
if (encoding)
|
||||
v += ip + (UInt32)(p - data);
|
||||
else
|
||||
v -= ip + (UInt32)(p - data);
|
||||
|
||||
v &= 0x01FFFFFF;
|
||||
v -= (UInt32)1 << 24;
|
||||
v ^= 0xFF000000;
|
||||
#endif
|
||||
{
|
||||
UInt32 c = BR_PC_GET;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
}
|
||||
v &= (flag << 3) - 1;
|
||||
#ifdef BR_SPARC_USE_ROTATE
|
||||
v -= (flag << 2) - 1;
|
||||
v = rotrFixed(v, 2);
|
||||
#else
|
||||
v -= (flag << 2);
|
||||
v >>= 2;
|
||||
v |= 0x40000000;
|
||||
SetBe32(p - 4, v);
|
||||
v |= (UInt32)1 << 30;
|
||||
#endif
|
||||
SetBe32a(p - 4, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
|
||||
|
||||
|
||||
Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
|
||||
{
|
||||
// Byte *p = data;
|
||||
Byte *lim;
|
||||
size &= ~(SizeT)1;
|
||||
// if (size == 0) return p;
|
||||
if (size <= 2) return p;
|
||||
size -= 2;
|
||||
lim = p + size;
|
||||
BR_PC_INIT
|
||||
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
|
||||
(p) will point to the +2 instructions from current instruction */
|
||||
// pc += 4 - 4;
|
||||
// if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
|
||||
// #define ARMT_TAIL_PROC { goto armt_tail; }
|
||||
#define ARMT_TAIL_PROC { return p; }
|
||||
|
||||
do
|
||||
{
|
||||
/* in MSVC 32-bit x86 compilers:
|
||||
UInt32 version : it loads value from memory with movzx
|
||||
Byte version : it loads value to 8-bit register (AL/CL)
|
||||
movzx version is slightly faster in some cpus
|
||||
*/
|
||||
unsigned b1;
|
||||
// Byte / unsigned
|
||||
b1 = p[1];
|
||||
// optimized version to reduce one (p >= lim) check:
|
||||
// unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
|
||||
for (;;)
|
||||
{
|
||||
unsigned b3; // Byte / UInt32
|
||||
/* (Byte)(b3) normalization can use low byte computations in MSVC.
|
||||
It gives smaller code, and no loss of speed in some compilers/cpus.
|
||||
But new MSVC 32-bit x86 compilers use more slow load
|
||||
from memory to low byte register in that case.
|
||||
So we try to use full 32-bit computations for faster code.
|
||||
*/
|
||||
// if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
|
||||
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
|
||||
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
|
||||
}
|
||||
{
|
||||
/* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
|
||||
But gcc/clang for arm64 can use bfi instruction for full code here */
|
||||
UInt32 v =
|
||||
((UInt32)GetUi16a(p - 2) << 11) |
|
||||
((UInt32)GetUi16a(p) & 0x7FF);
|
||||
/*
|
||||
UInt32 v =
|
||||
((UInt32)p[1 - 2] << 19)
|
||||
+ (((UInt32)p[1] & 0x7) << 8)
|
||||
+ (((UInt32)p[-2] << 11))
|
||||
+ (p[0]);
|
||||
*/
|
||||
p += 2;
|
||||
{
|
||||
UInt32 c = BR_PC_GET >> 1;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
}
|
||||
SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
|
||||
SetUi16a(p - 2, (UInt16)(v | 0xf800))
|
||||
/*
|
||||
p[-4] = (Byte)(v >> 11);
|
||||
p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
|
||||
p[-2] = (Byte)v;
|
||||
p[-1] = (Byte)(0xf8 | (v >> 8));
|
||||
*/
|
||||
}
|
||||
}
|
||||
while (p < lim);
|
||||
return p;
|
||||
// armt_tail:
|
||||
// if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
|
||||
// return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
|
||||
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
|
||||
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
|
||||
}
|
||||
Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
|
||||
|
||||
|
||||
// #define BR_IA64_NO_INLINE
|
||||
|
||||
Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
|
||||
{
|
||||
// Byte *p = data;
|
||||
const Byte *lim;
|
||||
size &= ~(SizeT)15;
|
||||
lim = p + size;
|
||||
pc -= 1 << 4;
|
||||
pc >>= 4 - 1;
|
||||
// pc -= 1 << 1;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
unsigned m;
|
||||
for (;;)
|
||||
{
|
||||
if Z7_UNLIKELY(p == lim)
|
||||
return p;
|
||||
m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
|
||||
p += 16;
|
||||
pc += 1 << 1;
|
||||
if (m &= 3)
|
||||
break;
|
||||
}
|
||||
{
|
||||
p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
|
||||
do
|
||||
{
|
||||
const UInt32 t =
|
||||
#if defined(MY_CPU_X86_OR_AMD64)
|
||||
// we use 32-bit load here to reduce code size on x86:
|
||||
GetUi32(p);
|
||||
#else
|
||||
GetUi16(p);
|
||||
#endif
|
||||
UInt32 z = GetUi32(p + 1) >> m;
|
||||
p += 5;
|
||||
if (((t >> m) & (0x70 << 1)) == 0
|
||||
&& ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
|
||||
{
|
||||
UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
|
||||
z ^= v;
|
||||
#ifdef BR_IA64_NO_INLINE
|
||||
v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
|
||||
{
|
||||
UInt32 c = pc;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
}
|
||||
v &= (0x1fffff << 1) | 1;
|
||||
#else
|
||||
{
|
||||
if (encoding)
|
||||
{
|
||||
// pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
|
||||
pc &= (0x1fffff << 1) | 1;
|
||||
v += pc;
|
||||
}
|
||||
else
|
||||
{
|
||||
// pc |= 0xc00000 << 1; // we need to set at least 2 bits
|
||||
pc |= ~(UInt32)((0x1fffff << 1) | 1);
|
||||
v -= pc;
|
||||
}
|
||||
}
|
||||
v &= ~(UInt32)(0x600000 << 1);
|
||||
#endif
|
||||
v += (0x700000 << 1);
|
||||
v &= (0x8fffff << 1) | 1;
|
||||
z |= v;
|
||||
z <<= m;
|
||||
SetUi32(p + 1 - 5, z)
|
||||
}
|
||||
m++;
|
||||
}
|
||||
while (m &= 3); // while (m < 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
|
||||
|
||||
|
||||
#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
|
||||
#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
|
||||
|
||||
#if 1 && defined(MY_CPU_LE_UNALIGN)
|
||||
#define RISCV_USE_UNALIGNED_LOAD
|
||||
#endif
|
||||
|
||||
#ifdef RISCV_USE_UNALIGNED_LOAD
|
||||
#define RISCV_GET_UI32(p) GetUi32(p)
|
||||
#define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
|
||||
#else
|
||||
#define RISCV_GET_UI32(p) \
|
||||
((UInt32)GetUi16a(p) + \
|
||||
((UInt32)GetUi16a((p) + 2) << 16))
|
||||
#define RISCV_SET_UI32(p, v) { \
|
||||
SetUi16a(p, (UInt16)(v)) \
|
||||
SetUi16a((p) + 2, (UInt16)(v >> 16)) }
|
||||
#endif
|
||||
|
||||
#if 1 && defined(MY_CPU_LE)
|
||||
#define RISCV_USE_16BIT_LOAD
|
||||
#endif
|
||||
|
||||
#ifdef RISCV_USE_16BIT_LOAD
|
||||
#define RISCV_LOAD_VAL(p) GetUi16a(p)
|
||||
#else
|
||||
#define RISCV_LOAD_VAL(p) (*(p))
|
||||
#endif
|
||||
|
||||
#define RISCV_INSTR_SIZE 2
|
||||
#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
|
||||
#define RISCV_STEP_2 4
|
||||
#define RISCV_REG_VAL (2 << 7)
|
||||
#define RISCV_CMD_VAL 3
|
||||
#if 1
|
||||
// for code size optimization:
|
||||
#define RISCV_DELTA_7F 0x7f
|
||||
#else
|
||||
#define RISCV_DELTA_7F 0
|
||||
#endif
|
||||
|
||||
#define RISCV_CHECK_1(v, b) \
|
||||
(((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
|
||||
|
||||
#if 1
|
||||
#define RISCV_CHECK_2(v, r) \
|
||||
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
|
||||
<< 18) \
|
||||
< ((r) & 0x1d))
|
||||
#else
|
||||
// this branch gives larger code, because
|
||||
// compilers generate larger code for big constants.
|
||||
#define RISCV_CHECK_2(v, r) \
|
||||
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
|
||||
& ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
|
||||
< ((r) & 0x1d))
|
||||
#endif
|
||||
|
||||
|
||||
#define RISCV_SCAN_LOOP \
|
||||
Byte *lim; \
|
||||
size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
|
||||
if (size <= 6) return p; \
|
||||
size -= 6; \
|
||||
lim = p + size; \
|
||||
BR_PC_INIT \
|
||||
for (;;) \
|
||||
{ \
|
||||
UInt32 a, v; \
|
||||
/* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
|
||||
for (;;) \
|
||||
{ \
|
||||
if Z7_UNLIKELY(p >= lim) { return p; } \
|
||||
a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
|
||||
if ((a & 0x77) == 0) break; \
|
||||
a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
|
||||
p += RISCV_INSTR_SIZE * 2; \
|
||||
if ((a & 0x77) == 0) \
|
||||
{ \
|
||||
p -= RISCV_INSTR_SIZE; \
|
||||
if Z7_UNLIKELY(p >= lim) { return p; } \
|
||||
break; \
|
||||
} \
|
||||
}
|
||||
// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
|
||||
// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
|
||||
// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
|
||||
// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
|
||||
|
||||
Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
|
||||
{
|
||||
RISCV_SCAN_LOOP
|
||||
v = a;
|
||||
a = RISCV_GET_UI32(p);
|
||||
#ifndef RISCV_USE_16BIT_LOAD
|
||||
v += (UInt32)p[1] << 8;
|
||||
#endif
|
||||
|
||||
if ((v & 8) == 0) // JAL
|
||||
{
|
||||
if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
|
||||
{
|
||||
p += RISCV_INSTR_SIZE;
|
||||
continue;
|
||||
}
|
||||
{
|
||||
v = ((a & 1u << 31) >> 11)
|
||||
| ((a & 0x3ff << 21) >> 20)
|
||||
| ((a & 1 << 20) >> 9)
|
||||
| (a & 0xff << 12);
|
||||
BR_CONVERT_VAL_ENC(v)
|
||||
// ((v & 1) == 0)
|
||||
// v: bits [1 : 20] contain offset bits
|
||||
#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
|
||||
a &= 0xfff;
|
||||
a |= ((UInt32)(v << 23))
|
||||
| ((UInt32)(v << 7) & ((UInt32)0xff << 16))
|
||||
| ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
|
||||
RISCV_SET_UI32(p, a)
|
||||
#else // aligned
|
||||
#if 0
|
||||
SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
|
||||
#else
|
||||
p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
|
||||
#endif
|
||||
|
||||
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
|
||||
v <<= 15;
|
||||
v = Z7_BSWAP32(v);
|
||||
SetUi16a(p + 2, (UInt16)v)
|
||||
#else
|
||||
p[2] = (Byte)(v >> 9);
|
||||
p[3] = (Byte)(v >> 1);
|
||||
#endif
|
||||
#endif // aligned
|
||||
}
|
||||
p += 4;
|
||||
continue;
|
||||
} // JAL
|
||||
|
||||
{
|
||||
// AUIPC
|
||||
if (v & 0xe80) // (not x0) and (not x2)
|
||||
{
|
||||
const UInt32 b = RISCV_GET_UI32(p + 4);
|
||||
if (RISCV_CHECK_1(v, b))
|
||||
{
|
||||
{
|
||||
const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
|
||||
RISCV_SET_UI32(p, temp)
|
||||
}
|
||||
a &= 0xfffff000;
|
||||
{
|
||||
#if 1
|
||||
const int t = -1 >> 1;
|
||||
if (t != -1)
|
||||
a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
|
||||
else
|
||||
#endif
|
||||
a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
|
||||
}
|
||||
BR_CONVERT_VAL_ENC(a)
|
||||
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
|
||||
a = Z7_BSWAP32(a);
|
||||
RISCV_SET_UI32(p + 4, a)
|
||||
#else
|
||||
SetBe32(p + 4, a)
|
||||
#endif
|
||||
p += 8;
|
||||
}
|
||||
else
|
||||
p += RISCV_STEP_1;
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 r = a >> 27;
|
||||
if (RISCV_CHECK_2(v, r))
|
||||
{
|
||||
v = RISCV_GET_UI32(p + 4);
|
||||
r = (r << 7) + 0x17 + (v & 0xfffff000);
|
||||
a = (a >> 12) | (v << 20);
|
||||
RISCV_SET_UI32(p, r)
|
||||
RISCV_SET_UI32(p + 4, a)
|
||||
p += 8;
|
||||
}
|
||||
else
|
||||
p += RISCV_STEP_2;
|
||||
}
|
||||
}
|
||||
} // for
|
||||
}
|
||||
|
||||
|
||||
Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
|
||||
{
|
||||
RISCV_SCAN_LOOP
|
||||
#ifdef RISCV_USE_16BIT_LOAD
|
||||
if ((a & 8) == 0)
|
||||
{
|
||||
#else
|
||||
v = a;
|
||||
a += (UInt32)p[1] << 8;
|
||||
if ((v & 8) == 0)
|
||||
{
|
||||
#endif
|
||||
// JAL
|
||||
a -= 0x100 - RISCV_DELTA_7F;
|
||||
if (a & 0xd80)
|
||||
{
|
||||
p += RISCV_INSTR_SIZE;
|
||||
continue;
|
||||
}
|
||||
{
|
||||
const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
|
||||
#if 0 // unaligned
|
||||
a = GetUi32(p);
|
||||
v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
|
||||
| (UInt32)(a >> 7) & ((UInt32)0xff << 9)
|
||||
#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
|
||||
v = GetUi16a(p + 2);
|
||||
v = Z7_BSWAP32(v) >> 15
|
||||
#else
|
||||
v = (UInt32)p[3] << 1
|
||||
| (UInt32)p[2] << 9
|
||||
#endif
|
||||
| (UInt32)((a & 0xf000) << 5);
|
||||
BR_CONVERT_VAL_DEC(v)
|
||||
a = a_old
|
||||
| (v << 11 & 1u << 31)
|
||||
| (v << 20 & 0x3ff << 21)
|
||||
| (v << 9 & 1 << 20)
|
||||
| (v & 0xff << 12);
|
||||
RISCV_SET_UI32(p, a)
|
||||
}
|
||||
p += 4;
|
||||
continue;
|
||||
} // JAL
|
||||
|
||||
{
|
||||
// AUIPC
|
||||
v = a;
|
||||
#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
|
||||
a = GetUi32(p);
|
||||
#else
|
||||
a |= (UInt32)GetUi16a(p + 2) << 16;
|
||||
#endif
|
||||
if ((v & 0xe80) == 0) // x0/x2
|
||||
{
|
||||
const UInt32 r = a >> 27;
|
||||
if (RISCV_CHECK_2(v, r))
|
||||
{
|
||||
UInt32 b;
|
||||
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
|
||||
b = RISCV_GET_UI32(p + 4);
|
||||
b = Z7_BSWAP32(b);
|
||||
#else
|
||||
b = GetBe32(p + 4);
|
||||
#endif
|
||||
v = a >> 12;
|
||||
BR_CONVERT_VAL_DEC(b)
|
||||
a = (r << 7) + 0x17;
|
||||
a += (b + 0x800) & 0xfffff000;
|
||||
v |= b << 20;
|
||||
RISCV_SET_UI32(p, a)
|
||||
RISCV_SET_UI32(p + 4, v)
|
||||
p += 8;
|
||||
}
|
||||
else
|
||||
p += RISCV_STEP_2;
|
||||
}
|
||||
else
|
||||
{
|
||||
const UInt32 b = RISCV_GET_UI32(p + 4);
|
||||
if (!RISCV_CHECK_1(v, b))
|
||||
p += RISCV_STEP_1;
|
||||
else
|
||||
{
|
||||
v = (a & 0xfffff000) | (b >> 20);
|
||||
a = (b << 12) | (0x17 + RISCV_REG_VAL);
|
||||
RISCV_SET_UI32(p, a)
|
||||
RISCV_SET_UI32(p + 4, v)
|
||||
p += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // for
|
||||
}
|
||||
|
|
|
|||
123
C/Bra.h
123
C/Bra.h
|
|
@ -1,64 +1,105 @@
|
|||
/* Bra.h -- Branch converters for executables
|
||||
2013-01-18 : Igor Pavlov : Public domain */
|
||||
2024-01-20 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __BRA_H
|
||||
#define __BRA_H
|
||||
#ifndef ZIP7_INC_BRA_H
|
||||
#define ZIP7_INC_BRA_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/*
|
||||
These functions convert relative addresses to absolute addresses
|
||||
in CALL instructions to increase the compression ratio.
|
||||
|
||||
In:
|
||||
data - data buffer
|
||||
size - size of data
|
||||
ip - current virtual Instruction Pinter (IP) value
|
||||
state - state variable for x86 converter
|
||||
encoding - 0 (for decoding), 1 (for encoding)
|
||||
|
||||
Out:
|
||||
state - state variable for x86 converter
|
||||
/* #define PPC BAD_PPC_11 // for debug */
|
||||
|
||||
Returns:
|
||||
The number of processed bytes. If you call these functions with multiple calls,
|
||||
you must start next call with first byte after block of processed bytes.
|
||||
#define Z7_BRANCH_CONV_DEC_2(name) z7_ ## name ## _Dec
|
||||
#define Z7_BRANCH_CONV_ENC_2(name) z7_ ## name ## _Enc
|
||||
#define Z7_BRANCH_CONV_DEC(name) Z7_BRANCH_CONV_DEC_2(BranchConv_ ## name)
|
||||
#define Z7_BRANCH_CONV_ENC(name) Z7_BRANCH_CONV_ENC_2(BranchConv_ ## name)
|
||||
#define Z7_BRANCH_CONV_ST_DEC(name) z7_BranchConvSt_ ## name ## _Dec
|
||||
#define Z7_BRANCH_CONV_ST_ENC(name) z7_BranchConvSt_ ## name ## _Enc
|
||||
|
||||
#define Z7_BRANCH_CONV_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc)
|
||||
#define Z7_BRANCH_CONV_ST_DECL(name) Byte * name(Byte *data, SizeT size, UInt32 pc, UInt32 *state)
|
||||
|
||||
typedef Z7_BRANCH_CONV_DECL( (*z7_Func_BranchConv));
|
||||
typedef Z7_BRANCH_CONV_ST_DECL((*z7_Func_BranchConvSt));
|
||||
|
||||
#define Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL 0
|
||||
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_DEC(X86));
|
||||
Z7_BRANCH_CONV_ST_DECL (Z7_BRANCH_CONV_ST_ENC(X86));
|
||||
|
||||
#define Z7_BRANCH_FUNCS_DECL(name) \
|
||||
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_DEC_2(name)); \
|
||||
Z7_BRANCH_CONV_DECL (Z7_BRANCH_CONV_ENC_2(name));
|
||||
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM64)
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_ARM)
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_ARMT)
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_PPC)
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_SPARC)
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_IA64)
|
||||
Z7_BRANCH_FUNCS_DECL (BranchConv_RISCV)
|
||||
|
||||
/*
|
||||
These functions convert data that contain CPU instructions.
|
||||
Each such function converts relative addresses to absolute addresses in some
|
||||
branch instructions: CALL (in all converters) and JUMP (X86 converter only).
|
||||
Such conversion allows to increase compression ratio, if we compress that data.
|
||||
|
||||
There are 2 types of converters:
|
||||
Byte * Conv_RISC (Byte *data, SizeT size, UInt32 pc);
|
||||
Byte * ConvSt_X86(Byte *data, SizeT size, UInt32 pc, UInt32 *state);
|
||||
Each Converter supports 2 versions: one for encoding
|
||||
and one for decoding (_Enc/_Dec postfixes in function name).
|
||||
|
||||
In params:
|
||||
data : data buffer
|
||||
size : size of data
|
||||
pc : current virtual Program Counter (Instruction Pointer) value
|
||||
In/Out param:
|
||||
state : pointer to state variable (for X86 converter only)
|
||||
|
||||
Return:
|
||||
The pointer to position in (data) buffer after last byte that was processed.
|
||||
If the caller calls converter again, it must call it starting with that position.
|
||||
But the caller is allowed to move data in buffer. So pointer to
|
||||
current processed position also will be changed for next call.
|
||||
Also the caller must increase internal (pc) value for next call.
|
||||
|
||||
Each converter has some characteristics: Endian, Alignment, LookAhead.
|
||||
Type Endian Alignment LookAhead
|
||||
|
||||
x86 little 1 4
|
||||
X86 little 1 4
|
||||
ARMT little 2 2
|
||||
RISCV little 2 6
|
||||
ARM little 4 0
|
||||
ARM64 little 4 0
|
||||
PPC big 4 0
|
||||
SPARC big 4 0
|
||||
IA64 little 16 0
|
||||
|
||||
size must be >= Alignment + LookAhead, if it's not last block.
|
||||
If (size < Alignment + LookAhead), converter returns 0.
|
||||
(data) must be aligned for (Alignment).
|
||||
processed size can be calculated as:
|
||||
SizeT processed = Conv(data, size, pc) - data;
|
||||
if (processed == 0)
|
||||
it means that converter needs more data for processing.
|
||||
If (size < Alignment + LookAhead)
|
||||
then (processed == 0) is allowed.
|
||||
|
||||
Example:
|
||||
|
||||
UInt32 ip = 0;
|
||||
for ()
|
||||
{
|
||||
; size must be >= Alignment + LookAhead, if it's not last block
|
||||
SizeT processed = Convert(data, size, ip, 1);
|
||||
data += processed;
|
||||
size -= processed;
|
||||
ip += processed;
|
||||
}
|
||||
Example code for conversion in loop:
|
||||
UInt32 pc = 0;
|
||||
size = 0;
|
||||
for (;;)
|
||||
{
|
||||
size += Load_more_input_data(data + size);
|
||||
SizeT processed = Conv(data, size, pc) - data;
|
||||
if (processed == 0 && no_more_input_data_after_size)
|
||||
break; // we stop convert loop
|
||||
data += processed;
|
||||
size -= processed;
|
||||
pc += processed;
|
||||
}
|
||||
*/
|
||||
|
||||
#define x86_Convert_Init(state) { state = 0; }
|
||||
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
|
||||
SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
|
|
|
|||
221
C/Bra86.c
221
C/Bra86.c
|
|
@ -1,82 +1,187 @@
|
|||
/* Bra86.c -- Converter for x86 code (BCJ)
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
/* Bra86.c -- Branch converter for X86 code (BCJ)
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "Bra.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
|
||||
|
||||
SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
|
||||
#if defined(MY_CPU_SIZEOF_POINTER) \
|
||||
&& ( MY_CPU_SIZEOF_POINTER == 4 \
|
||||
|| MY_CPU_SIZEOF_POINTER == 8)
|
||||
#define BR_CONV_USE_OPT_PC_PTR
|
||||
#endif
|
||||
|
||||
#ifdef BR_CONV_USE_OPT_PC_PTR
|
||||
#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
|
||||
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
|
||||
#else
|
||||
#define BR_PC_INIT pc += (UInt32)size;
|
||||
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
|
||||
// #define BR_PC_INIT
|
||||
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
|
||||
#endif
|
||||
|
||||
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
|
||||
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
|
||||
|
||||
#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
|
||||
|
||||
#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
|
||||
|
||||
#ifdef MY_CPU_LE_UNALIGN
|
||||
#define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
|
||||
#define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
|
||||
#else
|
||||
#define BR86_PREPARE_BCJ_SCAN
|
||||
// bad for MSVC X86 (partial write to byte reg):
|
||||
#define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
|
||||
// bad for old MSVC (partial write to byte reg):
|
||||
// #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
|
||||
#endif
|
||||
|
||||
static
|
||||
Z7_FORCE_INLINE
|
||||
Z7_ATTRIB_NO_VECTOR
|
||||
Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
|
||||
{
|
||||
SizeT pos = 0;
|
||||
UInt32 mask = *state & 7;
|
||||
if (size < 5)
|
||||
return 0;
|
||||
size -= 4;
|
||||
ip += 5;
|
||||
return p;
|
||||
{
|
||||
// Byte *p = data;
|
||||
const Byte *lim = p + size - 4;
|
||||
unsigned mask = (unsigned)*state; // & 7;
|
||||
#ifdef BR_CONV_USE_OPT_PC_PTR
|
||||
/* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
|
||||
because call/jump offset is relative to the next instruction.
|
||||
if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
|
||||
because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
|
||||
*/
|
||||
pc += 4;
|
||||
#endif
|
||||
BR_PC_INIT
|
||||
goto start;
|
||||
|
||||
for (;;)
|
||||
for (;; mask |= 4)
|
||||
{
|
||||
Byte *p = data + pos;
|
||||
const Byte *limit = data + size;
|
||||
for (; p < limit; p++)
|
||||
if ((*p & 0xFE) == 0xE8)
|
||||
break;
|
||||
|
||||
// cont: mask |= 4;
|
||||
start:
|
||||
if (p >= lim)
|
||||
goto fin;
|
||||
{
|
||||
SizeT d = (SizeT)(p - data) - pos;
|
||||
pos = (SizeT)(p - data);
|
||||
if (p >= limit)
|
||||
{
|
||||
*state = (d > 2 ? 0 : mask >> (unsigned)d);
|
||||
return pos;
|
||||
}
|
||||
if (d > 2)
|
||||
mask = 0;
|
||||
else
|
||||
{
|
||||
mask >>= (unsigned)d;
|
||||
if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
|
||||
{
|
||||
mask = (mask >> 1) | 4;
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
BR86_PREPARE_BCJ_SCAN
|
||||
p += 4;
|
||||
if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
|
||||
if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
|
||||
if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
|
||||
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
|
||||
}
|
||||
goto main_loop;
|
||||
|
||||
if (Test86MSByte(p[4]))
|
||||
m0: p--;
|
||||
m1: p--;
|
||||
m2: p--;
|
||||
if (mask == 0)
|
||||
goto a3;
|
||||
if (p > lim)
|
||||
goto fin_p;
|
||||
|
||||
// if (((0x17u >> mask) & 1) == 0)
|
||||
if (mask > 4 || mask == 3)
|
||||
{
|
||||
UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
|
||||
UInt32 cur = ip + (UInt32)pos;
|
||||
pos += 5;
|
||||
if (encoding)
|
||||
v += cur;
|
||||
else
|
||||
v -= cur;
|
||||
if (mask != 0)
|
||||
mask >>= 1;
|
||||
continue; // goto cont;
|
||||
}
|
||||
mask >>= 1;
|
||||
if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
|
||||
continue; // goto cont;
|
||||
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
|
||||
{
|
||||
UInt32 v = GetUi32(p);
|
||||
UInt32 c;
|
||||
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
|
||||
c = BR_PC_GET;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
{
|
||||
unsigned sh = (mask & 6) << 2;
|
||||
if (Test86MSByte((Byte)(v >> sh)))
|
||||
mask <<= 3;
|
||||
if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
|
||||
{
|
||||
v ^= (((UInt32)0x100 << sh) - 1);
|
||||
if (encoding)
|
||||
v += cur;
|
||||
else
|
||||
v -= cur;
|
||||
v ^= (((UInt32)0x100 << mask) - 1);
|
||||
#ifdef MY_CPU_X86
|
||||
// for X86 : we can recalculate (c) to reduce register pressure
|
||||
c = BR_PC_GET;
|
||||
#endif
|
||||
BR_CONVERT_VAL(v, c)
|
||||
}
|
||||
mask = 0;
|
||||
}
|
||||
p[1] = (Byte)v;
|
||||
p[2] = (Byte)(v >> 8);
|
||||
p[3] = (Byte)(v >> 16);
|
||||
p[4] = (Byte)(0 - ((v >> 24) & 1));
|
||||
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
|
||||
v &= (1 << 25) - 1; v -= (1 << 24);
|
||||
SetUi32(p, v)
|
||||
p += 4;
|
||||
goto main_loop;
|
||||
}
|
||||
else
|
||||
|
||||
main_loop:
|
||||
if (p >= lim)
|
||||
goto fin;
|
||||
for (;;)
|
||||
{
|
||||
mask = (mask >> 1) | 4;
|
||||
pos++;
|
||||
BR86_PREPARE_BCJ_SCAN
|
||||
p += 4;
|
||||
if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
|
||||
if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
|
||||
if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
|
||||
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
|
||||
if (p >= lim)
|
||||
goto fin;
|
||||
}
|
||||
|
||||
a0: p--;
|
||||
a1: p--;
|
||||
a2: p--;
|
||||
a3:
|
||||
if (p > lim)
|
||||
goto fin_p;
|
||||
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
|
||||
{
|
||||
UInt32 v = GetUi32(p);
|
||||
UInt32 c;
|
||||
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
|
||||
c = BR_PC_GET;
|
||||
BR_CONVERT_VAL(v, c)
|
||||
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
|
||||
v &= (1 << 25) - 1; v -= (1 << 24);
|
||||
SetUi32(p, v)
|
||||
p += 4;
|
||||
goto main_loop;
|
||||
}
|
||||
}
|
||||
|
||||
fin_p:
|
||||
p--;
|
||||
fin:
|
||||
// the following processing for tail is optional and can be commented
|
||||
/*
|
||||
lim += 4;
|
||||
for (; p < lim; p++, mask >>= 1)
|
||||
if ((*p & 0xfe) == 0xe8)
|
||||
break;
|
||||
*/
|
||||
*state = (UInt32)mask;
|
||||
return p;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
|
||||
Z7_NO_INLINE \
|
||||
Z7_ATTRIB_NO_VECTOR \
|
||||
Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
|
||||
{ return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
|
||||
|
||||
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
|
||||
#ifndef Z7_EXTRACT_ONLY
|
||||
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
|
||||
#endif
|
||||
|
|
|
|||
57
C/BraIA64.c
57
C/BraIA64.c
|
|
@ -1,53 +1,14 @@
|
|||
/* BraIA64.c -- Converter for IA-64 code
|
||||
2017-01-26 : Igor Pavlov : Public domain */
|
||||
2023-02-20 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "Bra.h"
|
||||
// the code was moved to Bra.c
|
||||
|
||||
SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
|
||||
{
|
||||
SizeT i;
|
||||
if (size < 16)
|
||||
return 0;
|
||||
size -= 16;
|
||||
i = 0;
|
||||
do
|
||||
{
|
||||
unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
|
||||
if (m)
|
||||
{
|
||||
m++;
|
||||
do
|
||||
{
|
||||
Byte *p = data + (i + (size_t)m * 5 - 8);
|
||||
if (((p[3] >> m) & 15) == 5
|
||||
&& (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
|
||||
{
|
||||
unsigned raw = GetUi32(p);
|
||||
unsigned v = raw >> m;
|
||||
v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
|
||||
|
||||
v <<= 4;
|
||||
if (encoding)
|
||||
v += ip + (UInt32)i;
|
||||
else
|
||||
v -= ip + (UInt32)i;
|
||||
v >>= 4;
|
||||
|
||||
v &= 0x1FFFFF;
|
||||
v += 0x700000;
|
||||
v &= 0x8FFFFF;
|
||||
raw &= ~((UInt32)0x8FFFFF << m);
|
||||
raw |= (v << m);
|
||||
SetUi32(p, raw);
|
||||
}
|
||||
}
|
||||
while (++m <= 4);
|
||||
}
|
||||
i += 16;
|
||||
}
|
||||
while (i <= size);
|
||||
return i;
|
||||
}
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#pragma GCC diagnostic ignored "-Wempty-translation-unit"
|
||||
#endif
|
||||
|
|
|
|||
473
C/BwtSort.c
473
C/BwtSort.c
|
|
@ -1,5 +1,5 @@
|
|||
/* BwtSort.c -- BWT block sorting
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -7,8 +7,44 @@
|
|||
#include "Sort.h"
|
||||
|
||||
/* #define BLOCK_SORT_USE_HEAP_SORT */
|
||||
// #define BLOCK_SORT_USE_HEAP_SORT
|
||||
|
||||
#ifdef BLOCK_SORT_USE_HEAP_SORT
|
||||
|
||||
#define HeapSortRefDown(p, vals, n, size, temp) \
|
||||
{ size_t k = n; UInt32 val = vals[temp]; for (;;) { \
|
||||
size_t s = k << 1; \
|
||||
if (s > size) break; \
|
||||
if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
|
||||
if (val >= vals[p[s]]) break; \
|
||||
p[k] = p[s]; k = s; \
|
||||
} p[k] = temp; }
|
||||
|
||||
void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
|
||||
{
|
||||
if (size <= 1)
|
||||
return;
|
||||
p--;
|
||||
{
|
||||
size_t i = size / 2;
|
||||
do
|
||||
{
|
||||
UInt32 temp = p[i];
|
||||
HeapSortRefDown(p, vals, i, size, temp);
|
||||
}
|
||||
while (--i != 0);
|
||||
}
|
||||
do
|
||||
{
|
||||
UInt32 temp = p[size];
|
||||
p[size--] = p[1];
|
||||
HeapSortRefDown(p, vals, 1, size, temp);
|
||||
}
|
||||
while (size > 1);
|
||||
}
|
||||
|
||||
#endif // BLOCK_SORT_USE_HEAP_SORT
|
||||
|
||||
#define NO_INLINE MY_FAST_CALL
|
||||
|
||||
/* Don't change it !!! */
|
||||
#define kNumHashBytes 2
|
||||
|
|
@ -29,26 +65,27 @@
|
|||
|
||||
#else
|
||||
|
||||
#define kNumBitsMax 20
|
||||
#define kIndexMask ((1 << kNumBitsMax) - 1)
|
||||
#define kNumExtraBits (32 - kNumBitsMax)
|
||||
#define kNumExtra0Bits (kNumExtraBits - 2)
|
||||
#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
|
||||
#define kNumBitsMax 20
|
||||
#define kIndexMask (((UInt32)1 << kNumBitsMax) - 1)
|
||||
#define kNumExtraBits (32 - kNumBitsMax)
|
||||
#define kNumExtra0Bits (kNumExtraBits - 2)
|
||||
#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
|
||||
|
||||
#define SetFinishedGroupSize(p, size) \
|
||||
{ *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
|
||||
{ *(p) |= ((((UInt32)(size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
|
||||
if ((size) > (1 << kNumExtra0Bits)) { \
|
||||
*(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
|
||||
*(p) |= 0x40000000; \
|
||||
*((p) + 1) |= (((UInt32)(size) - 1) >> kNumExtra0Bits) << kNumBitsMax; } } \
|
||||
|
||||
static void SetGroupSize(UInt32 *p, UInt32 size)
|
||||
static void SetGroupSize(UInt32 *p, size_t size)
|
||||
{
|
||||
if (--size == 0)
|
||||
return;
|
||||
*p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
|
||||
*p |= 0x80000000 | (((UInt32)size & kNumExtra0Mask) << kNumBitsMax);
|
||||
if (size >= (1 << kNumExtra0Bits))
|
||||
{
|
||||
*p |= 0x40000000;
|
||||
p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
|
||||
p[1] |= (((UInt32)size >> kNumExtra0Bits) << kNumBitsMax);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -60,10 +97,15 @@ SortGroup - is recursive Range-Sort function with HeapSort optimization for smal
|
|||
returns: 1 - if there are groups, 0 - no more groups
|
||||
*/
|
||||
|
||||
static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
, UInt32 left, UInt32 range
|
||||
#endif
|
||||
static
|
||||
unsigned
|
||||
Z7_FASTCALL
|
||||
SortGroup(size_t BlockSize, size_t NumSortedBytes,
|
||||
size_t groupOffset, size_t groupSize,
|
||||
unsigned NumRefBits, UInt32 *Indices
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
, size_t left, size_t range
|
||||
#endif
|
||||
)
|
||||
{
|
||||
UInt32 *ind2 = Indices + groupOffset;
|
||||
|
|
@ -72,96 +114,99 @@ static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt3
|
|||
{
|
||||
/*
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
SetFinishedGroupSize(ind2, 1);
|
||||
SetFinishedGroupSize(ind2, 1)
|
||||
#endif
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
Groups = Indices + BlockSize + BS_TEMP_SIZE;
|
||||
if (groupSize <= ((UInt32)1 << NumRefBits)
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
if (groupSize <= ((size_t)1 << NumRefBits)
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
&& groupSize <= range
|
||||
#endif
|
||||
#endif
|
||||
)
|
||||
{
|
||||
UInt32 *temp = Indices + BlockSize;
|
||||
UInt32 j;
|
||||
UInt32 mask, thereAreGroups, group, cg;
|
||||
size_t j, group;
|
||||
UInt32 mask, cg;
|
||||
unsigned thereAreGroups;
|
||||
{
|
||||
UInt32 gPrev;
|
||||
UInt32 gRes = 0;
|
||||
{
|
||||
UInt32 sp = ind2[0] + NumSortedBytes;
|
||||
if (sp >= BlockSize) sp -= BlockSize;
|
||||
size_t sp = ind2[0] + NumSortedBytes;
|
||||
if (sp >= BlockSize)
|
||||
sp -= BlockSize;
|
||||
gPrev = Groups[sp];
|
||||
temp[0] = (gPrev << NumRefBits);
|
||||
temp[0] = gPrev << NumRefBits;
|
||||
}
|
||||
|
||||
for (j = 1; j < groupSize; j++)
|
||||
{
|
||||
UInt32 sp = ind2[j] + NumSortedBytes;
|
||||
size_t sp = ind2[j] + NumSortedBytes;
|
||||
UInt32 g;
|
||||
if (sp >= BlockSize) sp -= BlockSize;
|
||||
if (sp >= BlockSize)
|
||||
sp -= BlockSize;
|
||||
g = Groups[sp];
|
||||
temp[j] = (g << NumRefBits) | j;
|
||||
temp[j] = (g << NumRefBits) | (UInt32)j;
|
||||
gRes |= (gPrev ^ g);
|
||||
}
|
||||
if (gRes == 0)
|
||||
{
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
SetGroupSize(ind2, groupSize);
|
||||
#endif
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
HeapSort(temp, groupSize);
|
||||
mask = (((UInt32)1 << NumRefBits) - 1);
|
||||
mask = ((UInt32)1 << NumRefBits) - 1;
|
||||
thereAreGroups = 0;
|
||||
|
||||
group = groupOffset;
|
||||
cg = (temp[0] >> NumRefBits);
|
||||
cg = temp[0] >> NumRefBits;
|
||||
temp[0] = ind2[temp[0] & mask];
|
||||
|
||||
{
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
UInt32 *Flags = Groups + BlockSize;
|
||||
#else
|
||||
UInt32 prevGroupStart = 0;
|
||||
#endif
|
||||
#else
|
||||
size_t prevGroupStart = 0;
|
||||
#endif
|
||||
|
||||
for (j = 1; j < groupSize; j++)
|
||||
{
|
||||
UInt32 val = temp[j];
|
||||
UInt32 cgCur = (val >> NumRefBits);
|
||||
const UInt32 val = temp[j];
|
||||
const UInt32 cgCur = val >> NumRefBits;
|
||||
|
||||
if (cgCur != cg)
|
||||
{
|
||||
cg = cgCur;
|
||||
group = groupOffset + j;
|
||||
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
{
|
||||
UInt32 t = group - 1;
|
||||
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
|
||||
const size_t t = group - 1;
|
||||
Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
|
||||
}
|
||||
#else
|
||||
#else
|
||||
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
|
||||
prevGroupStart = j;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else
|
||||
thereAreGroups = 1;
|
||||
{
|
||||
UInt32 ind = ind2[val & mask];
|
||||
temp[j] = ind;
|
||||
Groups[ind] = group;
|
||||
const UInt32 ind = ind2[val & mask];
|
||||
temp[j] = ind;
|
||||
Groups[ind] = (UInt32)group;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
for (j = 0; j < groupSize; j++)
|
||||
|
|
@ -171,37 +216,42 @@ static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt3
|
|||
|
||||
/* Check that all strings are in one group (cannot sort) */
|
||||
{
|
||||
UInt32 group, j;
|
||||
UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
|
||||
UInt32 group;
|
||||
size_t j;
|
||||
size_t sp = ind2[0] + NumSortedBytes;
|
||||
if (sp >= BlockSize)
|
||||
sp -= BlockSize;
|
||||
group = Groups[sp];
|
||||
for (j = 1; j < groupSize; j++)
|
||||
{
|
||||
sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
|
||||
sp = ind2[j] + NumSortedBytes;
|
||||
if (sp >= BlockSize)
|
||||
sp -= BlockSize;
|
||||
if (Groups[sp] != group)
|
||||
break;
|
||||
}
|
||||
if (j == groupSize)
|
||||
{
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
SetGroupSize(ind2, groupSize);
|
||||
#endif
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
{
|
||||
/* ---------- Range Sort ---------- */
|
||||
UInt32 i;
|
||||
UInt32 mid;
|
||||
size_t i;
|
||||
size_t mid;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 j;
|
||||
size_t j;
|
||||
if (range <= 1)
|
||||
{
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
SetGroupSize(ind2, groupSize);
|
||||
#endif
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
mid = left + ((range + 1) >> 1);
|
||||
|
|
@ -209,7 +259,7 @@ static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt3
|
|||
i = 0;
|
||||
do
|
||||
{
|
||||
UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
|
||||
size_t sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
|
||||
if (Groups[sp] >= mid)
|
||||
{
|
||||
for (j--; j > i; j--)
|
||||
|
|
@ -237,51 +287,53 @@ static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt3
|
|||
break;
|
||||
}
|
||||
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
{
|
||||
UInt32 t = (groupOffset + i - 1);
|
||||
const size_t t = groupOffset + i - 1;
|
||||
UInt32 *Flags = Groups + BlockSize;
|
||||
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
|
||||
Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
{
|
||||
UInt32 j;
|
||||
size_t j;
|
||||
for (j = i; j < groupSize; j++)
|
||||
Groups[ind2[j]] = groupOffset + i;
|
||||
Groups[ind2[j]] = (UInt32)(groupOffset + i);
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
|
||||
return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
|
||||
unsigned res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
|
||||
return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
#else // BLOCK_SORT_USE_HEAP_SORT
|
||||
|
||||
/* ---------- Heap Sort ---------- */
|
||||
|
||||
{
|
||||
UInt32 j;
|
||||
size_t j;
|
||||
for (j = 0; j < groupSize; j++)
|
||||
{
|
||||
UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
|
||||
ind2[j] = sp;
|
||||
size_t sp = ind2[j] + NumSortedBytes;
|
||||
if (sp >= BlockSize)
|
||||
sp -= BlockSize;
|
||||
ind2[j] = (UInt32)sp;
|
||||
}
|
||||
|
||||
HeapSortRef(ind2, Groups, groupSize);
|
||||
|
||||
/* Write Flags */
|
||||
{
|
||||
UInt32 sp = ind2[0];
|
||||
size_t sp = ind2[0];
|
||||
UInt32 group = Groups[sp];
|
||||
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
UInt32 *Flags = Groups + BlockSize;
|
||||
#else
|
||||
UInt32 prevGroupStart = 0;
|
||||
#endif
|
||||
#else
|
||||
size_t prevGroupStart = 0;
|
||||
#endif
|
||||
|
||||
for (j = 1; j < groupSize; j++)
|
||||
{
|
||||
|
|
@ -289,149 +341,210 @@ static UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt3
|
|||
if (Groups[sp] != group)
|
||||
{
|
||||
group = Groups[sp];
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
{
|
||||
UInt32 t = groupOffset + j - 1;
|
||||
const size_t t = groupOffset + j - 1;
|
||||
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
|
||||
}
|
||||
#else
|
||||
#else
|
||||
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
|
||||
prevGroupStart = j;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
{
|
||||
/* Write new Groups values and Check that there are groups */
|
||||
UInt32 thereAreGroups = 0;
|
||||
unsigned thereAreGroups = 0;
|
||||
for (j = 0; j < groupSize; j++)
|
||||
{
|
||||
UInt32 group = groupOffset + j;
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
size_t group = groupOffset + j;
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
|
||||
if ((ind2[j] & 0x40000000) != 0)
|
||||
if (ind2[j] & 0x40000000)
|
||||
subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
|
||||
subGroupSize++;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 original = ind2[j];
|
||||
UInt32 sp = original & kIndexMask;
|
||||
if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
|
||||
ind2[j] = sp | (original & ~kIndexMask);
|
||||
Groups[sp] = group;
|
||||
const UInt32 original = ind2[j];
|
||||
size_t sp = original & kIndexMask;
|
||||
if (sp < NumSortedBytes)
|
||||
sp += BlockSize;
|
||||
sp -= NumSortedBytes;
|
||||
ind2[j] = (UInt32)sp | (original & ~kIndexMask);
|
||||
Groups[sp] = (UInt32)group;
|
||||
if (--subGroupSize == 0)
|
||||
break;
|
||||
j++;
|
||||
thereAreGroups = 1;
|
||||
}
|
||||
#else
|
||||
#else
|
||||
UInt32 *Flags = Groups + BlockSize;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
|
||||
ind2[j] = sp;
|
||||
Groups[sp] = group;
|
||||
size_t sp = ind2[j];
|
||||
if (sp < NumSortedBytes)
|
||||
sp += BlockSize;
|
||||
sp -= NumSortedBytes;
|
||||
ind2[j] = (UInt32)sp;
|
||||
Groups[sp] = (UInt32)group;
|
||||
if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
|
||||
break;
|
||||
j++;
|
||||
thereAreGroups = 1;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
return thereAreGroups;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif // BLOCK_SORT_USE_HEAP_SORT
|
||||
}
|
||||
|
||||
|
||||
/* conditions: blockSize > 0 */
|
||||
UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
|
||||
UInt32 BlockSort(UInt32 *Indices, const Byte *data, size_t blockSize)
|
||||
{
|
||||
UInt32 *counters = Indices + blockSize;
|
||||
UInt32 i;
|
||||
size_t i;
|
||||
UInt32 *Groups;
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
UInt32 *Flags;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Radix-Sort for 2 bytes */
|
||||
/* Radix-Sort for 2 bytes */
|
||||
// { UInt32 yyy; for (yyy = 0; yyy < 100; yyy++) {
|
||||
for (i = 0; i < kNumHashValues; i++)
|
||||
counters[i] = 0;
|
||||
for (i = 0; i < blockSize - 1; i++)
|
||||
counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
|
||||
counters[((UInt32)data[i] << 8) | data[0]]++;
|
||||
{
|
||||
const Byte *data2 = data;
|
||||
size_t a = data[(size_t)blockSize - 1];
|
||||
const Byte *data_lim = data + blockSize;
|
||||
if (blockSize >= 4)
|
||||
{
|
||||
data_lim -= 3;
|
||||
do
|
||||
{
|
||||
size_t b;
|
||||
b = data2[0]; counters[(a << 8) | b]++;
|
||||
a = data2[1]; counters[(b << 8) | a]++;
|
||||
b = data2[2]; counters[(a << 8) | b]++;
|
||||
a = data2[3]; counters[(b << 8) | a]++;
|
||||
data2 += 4;
|
||||
}
|
||||
while (data2 < data_lim);
|
||||
data_lim += 3;
|
||||
}
|
||||
while (data2 != data_lim)
|
||||
{
|
||||
size_t b = *data2++;
|
||||
counters[(a << 8) | b]++;
|
||||
a = b;
|
||||
}
|
||||
}
|
||||
// }}
|
||||
|
||||
Groups = counters + BS_TEMP_SIZE;
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
Flags = Groups + blockSize;
|
||||
{
|
||||
UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
|
||||
for (i = 0; i < numWords; i++)
|
||||
Flags[i] = kAllFlags;
|
||||
}
|
||||
#endif
|
||||
{
|
||||
const size_t numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
|
||||
for (i = 0; i < numWords; i++)
|
||||
Flags[i] = kAllFlags;
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
UInt32 sum = 0;
|
||||
for (i = 0; i < kNumHashValues; i++)
|
||||
{
|
||||
UInt32 groupSize = counters[i];
|
||||
if (groupSize > 0)
|
||||
const UInt32 groupSize = counters[i];
|
||||
counters[i] = sum;
|
||||
sum += groupSize;
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
if (groupSize)
|
||||
{
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
UInt32 t = sum + groupSize - 1;
|
||||
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
|
||||
#endif
|
||||
sum += groupSize;
|
||||
const UInt32 t = sum - 1;
|
||||
Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
|
||||
}
|
||||
counters[i] = sum - groupSize;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < blockSize - 1; i++)
|
||||
Groups[i] = counters[((unsigned)data[i] << 8) | data[(size_t)i + 1]];
|
||||
Groups[i] = counters[((unsigned)data[i] << 8) | data[0]];
|
||||
|
||||
{
|
||||
#define SET_Indices(a, b, i) \
|
||||
{ UInt32 c; \
|
||||
a = (a << 8) | (b); \
|
||||
c = counters[a]; \
|
||||
Indices[c] = (UInt32)i++; \
|
||||
counters[a] = c + 1; \
|
||||
}
|
||||
|
||||
for (i = 0; i < blockSize - 1; i++)
|
||||
Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
|
||||
Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
|
||||
|
||||
for (i = 0; i < blockSize - 1; i++)
|
||||
Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
|
||||
Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
|
||||
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
size_t a = data[0];
|
||||
const Byte *data_ptr = data + 1;
|
||||
i = 0;
|
||||
if (blockSize >= 3)
|
||||
{
|
||||
blockSize -= 2;
|
||||
do
|
||||
{
|
||||
size_t b;
|
||||
b = data_ptr[0]; SET_Indices(a, b, i)
|
||||
a = data_ptr[1]; SET_Indices(b, a, i)
|
||||
data_ptr += 2;
|
||||
}
|
||||
while (i < blockSize);
|
||||
blockSize += 2;
|
||||
}
|
||||
if (i < blockSize - 1)
|
||||
{
|
||||
SET_Indices(a, data[(size_t)i + 1], i)
|
||||
a = (Byte)a;
|
||||
}
|
||||
SET_Indices(a, data[0], i)
|
||||
}
|
||||
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
{
|
||||
UInt32 prev = 0;
|
||||
for (i = 0; i < kNumHashValues; i++)
|
||||
{
|
||||
UInt32 prevGroupSize = counters[i] - prev;
|
||||
const UInt32 prevGroupSize = counters[i] - prev;
|
||||
if (prevGroupSize == 0)
|
||||
continue;
|
||||
SetGroupSize(Indices + prev, prevGroupSize);
|
||||
prev = counters[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
int NumRefBits;
|
||||
UInt32 NumSortedBytes;
|
||||
for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
|
||||
unsigned NumRefBits;
|
||||
size_t NumSortedBytes;
|
||||
for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++)
|
||||
{}
|
||||
NumRefBits = 32 - NumRefBits;
|
||||
if (NumRefBits > kNumRefBitsMax)
|
||||
NumRefBits = kNumRefBitsMax;
|
||||
NumRefBits = kNumRefBitsMax;
|
||||
|
||||
for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
|
||||
{
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
UInt32 finishedGroupSize = 0;
|
||||
#endif
|
||||
UInt32 newLimit = 0;
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
size_t finishedGroupSize = 0;
|
||||
#endif
|
||||
size_t newLimit = 0;
|
||||
for (i = 0; i < blockSize;)
|
||||
{
|
||||
UInt32 groupSize;
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
size_t groupSize;
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
|
||||
if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
|
||||
{
|
||||
|
|
@ -440,56 +553,56 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
|
|||
}
|
||||
for (groupSize = 1;
|
||||
(Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
|
||||
groupSize++);
|
||||
|
||||
groupSize++)
|
||||
{}
|
||||
groupSize++;
|
||||
|
||||
#else
|
||||
#else
|
||||
|
||||
groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
|
||||
groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax;
|
||||
{
|
||||
BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
|
||||
if ((Indices[i] & 0x40000000) != 0)
|
||||
{
|
||||
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
|
||||
Indices[(size_t)i + 1] &= kIndexMask;
|
||||
}
|
||||
Indices[i] &= kIndexMask;
|
||||
groupSize++;
|
||||
if (finishedGroup || groupSize == 1)
|
||||
{
|
||||
Indices[i - finishedGroupSize] &= kIndexMask;
|
||||
if (finishedGroupSize > 1)
|
||||
Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
|
||||
const BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
|
||||
if (Indices[i] & 0x40000000)
|
||||
{
|
||||
UInt32 newGroupSize = groupSize + finishedGroupSize;
|
||||
SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize);
|
||||
finishedGroupSize = newGroupSize;
|
||||
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
|
||||
Indices[(size_t)i + 1] &= kIndexMask;
|
||||
}
|
||||
i += groupSize;
|
||||
continue;
|
||||
}
|
||||
finishedGroupSize = 0;
|
||||
Indices[i] &= kIndexMask;
|
||||
groupSize++;
|
||||
if (finishedGroup || groupSize == 1)
|
||||
{
|
||||
Indices[i - finishedGroupSize] &= kIndexMask;
|
||||
if (finishedGroupSize > 1)
|
||||
Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
|
||||
{
|
||||
const size_t newGroupSize = groupSize + finishedGroupSize;
|
||||
SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
|
||||
finishedGroupSize = newGroupSize;
|
||||
}
|
||||
i += groupSize;
|
||||
continue;
|
||||
}
|
||||
finishedGroupSize = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (NumSortedBytes >= blockSize)
|
||||
{
|
||||
UInt32 j;
|
||||
size_t j;
|
||||
for (j = 0; j < groupSize; j++)
|
||||
{
|
||||
UInt32 t = (i + j);
|
||||
size_t t = i + j;
|
||||
/* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
|
||||
Groups[Indices[t]] = t;
|
||||
Groups[Indices[t]] = (UInt32)t;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
, 0, blockSize
|
||||
#endif
|
||||
) != 0)
|
||||
#ifndef BLOCK_SORT_USE_HEAP_SORT
|
||||
, 0, blockSize
|
||||
#endif
|
||||
))
|
||||
newLimit = i + groupSize;
|
||||
i += groupSize;
|
||||
}
|
||||
|
|
@ -497,19 +610,19 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
|
|||
break;
|
||||
}
|
||||
}
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
for (i = 0; i < blockSize;)
|
||||
{
|
||||
UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
|
||||
if ((Indices[i] & 0x40000000) != 0)
|
||||
size_t groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax;
|
||||
if (Indices[i] & 0x40000000)
|
||||
{
|
||||
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
|
||||
groupSize += (Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits;
|
||||
Indices[(size_t)i + 1] &= kIndexMask;
|
||||
}
|
||||
Indices[i] &= kIndexMask;
|
||||
groupSize++;
|
||||
i += groupSize;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
return Groups[0];
|
||||
}
|
||||
|
|
|
|||
11
C/BwtSort.h
11
C/BwtSort.h
|
|
@ -1,8 +1,8 @@
|
|||
/* BwtSort.h -- BWT block sorting
|
||||
2013-01-18 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __BWT_SORT_H
|
||||
#define __BWT_SORT_H
|
||||
#ifndef ZIP7_INC_BWT_SORT_H
|
||||
#define ZIP7_INC_BWT_SORT_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -10,16 +10,17 @@ EXTERN_C_BEGIN
|
|||
|
||||
/* use BLOCK_SORT_EXTERNAL_FLAGS if blockSize can be > 1M */
|
||||
/* #define BLOCK_SORT_EXTERNAL_FLAGS */
|
||||
// #define BLOCK_SORT_EXTERNAL_FLAGS
|
||||
|
||||
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
|
||||
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) ((((blockSize) + 31) >> 5))
|
||||
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) (((blockSize) + 31) >> 5)
|
||||
#else
|
||||
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) 0
|
||||
#endif
|
||||
|
||||
#define BLOCK_SORT_BUF_SIZE(blockSize) ((blockSize) * 2 + BLOCK_SORT_EXTERNAL_SIZE(blockSize) + (1 << 16))
|
||||
|
||||
UInt32 BlockSort(UInt32 *indices, const Byte *data, UInt32 blockSize);
|
||||
UInt32 BlockSort(UInt32 *indices, const Byte *data, size_t blockSize);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
247
C/Compiler.h
247
C/Compiler.h
|
|
@ -1,12 +1,105 @@
|
|||
/* Compiler.h
|
||||
2021-01-05 : Igor Pavlov : Public domain */
|
||||
/* Compiler.h : Compiler specific defines and pragmas
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_COMPILER_H
|
||||
#define __7Z_COMPILER_H
|
||||
#ifndef ZIP7_INC_COMPILER_H
|
||||
#define ZIP7_INC_COMPILER_H
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wunused-private-field"
|
||||
#if defined(__clang__)
|
||||
# define Z7_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
|
||||
#endif
|
||||
#if defined(__clang__) && defined(__apple_build_version__)
|
||||
# define Z7_APPLE_CLANG_VERSION Z7_CLANG_VERSION
|
||||
#elif defined(__clang__)
|
||||
# define Z7_LLVM_CLANG_VERSION Z7_CLANG_VERSION
|
||||
#elif defined(__GNUC__)
|
||||
# define Z7_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#if !defined(__clang__) && !defined(__GNUC__)
|
||||
#define Z7_MSC_VER_ORIGINAL _MSC_VER
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
#define Z7_MINGW
|
||||
#endif
|
||||
|
||||
#if defined(__LCC__) && (defined(__MCST__) || defined(__e2k__))
|
||||
#define Z7_MCST_LCC
|
||||
#define Z7_MCST_LCC_VERSION (__LCC__ * 100 + __LCC_MINOR__)
|
||||
#endif
|
||||
|
||||
/*
|
||||
#if defined(__AVX2__) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 40600) \
|
||||
|| defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30100) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1800) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1400)
|
||||
#define Z7_COMPILER_AVX2_SUPPORTED
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
// #pragma GCC diagnostic ignored "-Wunknown-pragmas"
|
||||
|
||||
#ifdef __clang__
|
||||
// padding size of '' with 4 bytes to alignment boundary
|
||||
#pragma GCC diagnostic ignored "-Wpadded"
|
||||
|
||||
#if defined(Z7_LLVM_CLANG_VERSION) && (__clang_major__ == 13) \
|
||||
&& defined(__FreeBSD__)
|
||||
// freebsd:
|
||||
#pragma GCC diagnostic ignored "-Wexcess-padding"
|
||||
#endif
|
||||
|
||||
#if __clang_major__ >= 16
|
||||
#pragma GCC diagnostic ignored "-Wunsafe-buffer-usage"
|
||||
#endif
|
||||
|
||||
#if __clang_major__ == 13
|
||||
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
|
||||
// cheri
|
||||
#pragma GCC diagnostic ignored "-Wcapability-to-integer-cast"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if __clang_major__ == 13
|
||||
// for <arm_neon.h>
|
||||
#pragma GCC diagnostic ignored "-Wreserved-identifier"
|
||||
#endif
|
||||
|
||||
#endif // __clang__
|
||||
|
||||
#if defined(_WIN32) && defined(__clang__) && __clang_major__ >= 16
|
||||
// #pragma GCC diagnostic ignored "-Wcast-function-type-strict"
|
||||
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION \
|
||||
_Pragma("GCC diagnostic ignored \"-Wcast-function-type-strict\"")
|
||||
#else
|
||||
#define Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
|
||||
#endif
|
||||
|
||||
typedef void (*Z7_void_Function)(void);
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Z7_CAST_FUNC_C (Z7_void_Function)
|
||||
#elif defined(_MSC_VER) && _MSC_VER > 1920
|
||||
#define Z7_CAST_FUNC_C (void *)
|
||||
// #pragma warning(disable : 4191) // 'type cast': unsafe conversion from 'FARPROC' to 'void (__cdecl *)()'
|
||||
#else
|
||||
#define Z7_CAST_FUNC_C
|
||||
#endif
|
||||
/*
|
||||
#if (defined(__GNUC__) && (__GNUC__ >= 8)) || defined(__clang__)
|
||||
// #pragma GCC diagnostic ignored "-Wcast-function-type"
|
||||
#endif
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40000) && (Z7_GCC_VERSION < 70000)
|
||||
#pragma GCC diagnostic ignored "-Wstrict-aliasing"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
|
|
@ -17,24 +110,134 @@
|
|||
#pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
|
||||
#endif
|
||||
|
||||
#if _MSC_VER >= 1300
|
||||
#pragma warning(disable : 4996) // This function or variable may be unsafe
|
||||
#else
|
||||
#pragma warning(disable : 4511) // copy constructor could not be generated
|
||||
#pragma warning(disable : 4512) // assignment operator could not be generated
|
||||
#pragma warning(disable : 4514) // unreferenced inline function has been removed
|
||||
#pragma warning(disable : 4702) // unreachable code
|
||||
#pragma warning(disable : 4710) // not inlined
|
||||
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
|
||||
#pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
|
||||
#endif
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1800
|
||||
#pragma warning(disable : 4464) // relative include path contains '..'
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
|
||||
#pragma clang diagnostic ignored "-Wmicrosoft-exception-spec"
|
||||
// #pragma clang diagnostic ignored "-Wreserved-id-macro"
|
||||
#endif
|
||||
// == 1200 : -O1 : for __forceinline
|
||||
// >= 1900 : -O1 : for printf
|
||||
#pragma warning(disable : 4710) // function not inlined
|
||||
|
||||
#if _MSC_VER < 1900
|
||||
// winnt.h: 'Int64ShllMod32'
|
||||
#pragma warning(disable : 4514) // unreferenced inline function has been removed
|
||||
#endif
|
||||
|
||||
#if _MSC_VER < 1300
|
||||
// #pragma warning(disable : 4702) // unreachable code
|
||||
// Bra.c : -O1:
|
||||
#pragma warning(disable : 4714) // function marked as __forceinline not inlined
|
||||
#endif
|
||||
|
||||
/*
|
||||
#if _MSC_VER > 1400 && _MSC_VER <= 1900
|
||||
// strcat: This function or variable may be unsafe
|
||||
// sysinfoapi.h: kit10: GetVersion was declared deprecated
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
*/
|
||||
|
||||
#if _MSC_VER > 1200
|
||||
// -Wall warnings
|
||||
|
||||
#pragma warning(disable : 4711) // function selected for automatic inline expansion
|
||||
#pragma warning(disable : 4820) // '2' bytes padding added after data member
|
||||
|
||||
#if _MSC_VER >= 1400 && _MSC_VER < 1920
|
||||
// 1400: string.h: _DBG_MEMCPY_INLINE_
|
||||
// 1600 - 191x : smmintrin.h __cplusplus'
|
||||
// is not defined as a preprocessor macro, replacing with '0' for '#if/#elif'
|
||||
#pragma warning(disable : 4668)
|
||||
|
||||
// 1400 - 1600 : WinDef.h : 'FARPROC' :
|
||||
// 1900 - 191x : immintrin.h: _readfsbase_u32
|
||||
// no function prototype given : converting '()' to '(void)'
|
||||
#pragma warning(disable : 4255)
|
||||
#endif
|
||||
|
||||
#if _MSC_VER >= 1914
|
||||
// Compiler will insert Spectre mitigation for memory load if /Qspectre switch specified
|
||||
#pragma warning(disable : 5045)
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER > 1200
|
||||
#endif // _MSC_VER
|
||||
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 4)
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
|
||||
_Pragma("clang loop unroll(disable)") \
|
||||
_Pragma("clang loop vectorize(disable)")
|
||||
#define Z7_ATTRIB_NO_VECTORIZE
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 5) \
|
||||
&& (!defined(Z7_MCST_LCC_VERSION) || (Z7_MCST_LCC_VERSION >= 12610))
|
||||
#define Z7_ATTRIB_NO_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
|
||||
// __attribute__((optimize("no-unroll-loops")));
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1920)
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \
|
||||
_Pragma("loop( no_vector )")
|
||||
#define Z7_ATTRIB_NO_VECTORIZE
|
||||
#else
|
||||
#define Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
|
||||
#define Z7_ATTRIB_NO_VECTORIZE
|
||||
#endif
|
||||
|
||||
#if defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1920)
|
||||
#define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE _Pragma("optimize ( \"s\", on )")
|
||||
#define Z7_PRAGMA_OPTIMIZE_DEFAULT _Pragma("optimize ( \"\", on )")
|
||||
#else
|
||||
#define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE
|
||||
#define Z7_PRAGMA_OPTIMIZE_DEFAULT
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(MY_CPU_X86_OR_AMD64) && ( \
|
||||
defined(__clang__) && (__clang_major__ >= 4) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 5))
|
||||
#define Z7_ATTRIB_NO_SSE __attribute__((__target__("no-sse")))
|
||||
#else
|
||||
#define Z7_ATTRIB_NO_SSE
|
||||
#endif
|
||||
|
||||
#define Z7_ATTRIB_NO_VECTOR \
|
||||
Z7_ATTRIB_NO_VECTORIZE \
|
||||
Z7_ATTRIB_NO_SSE
|
||||
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 8) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
|
||||
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
|
||||
// GCC is not good for __builtin_expect()
|
||||
#define Z7_LIKELY(x) (__builtin_expect((x), 1))
|
||||
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
|
||||
// #define Z7_unlikely [[unlikely]]
|
||||
// #define Z7_likely [[likely]]
|
||||
#else
|
||||
#define Z7_LIKELY(x) (x)
|
||||
#define Z7_UNLIKELY(x) (x)
|
||||
// #define Z7_likely
|
||||
#endif
|
||||
|
||||
|
||||
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30600))
|
||||
|
||||
#if (Z7_CLANG_VERSION < 130000)
|
||||
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wreserved-id-macro\"")
|
||||
#else
|
||||
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER \
|
||||
_Pragma("GCC diagnostic push") \
|
||||
_Pragma("GCC diagnostic ignored \"-Wreserved-macro-identifier\"")
|
||||
#endif
|
||||
|
||||
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER \
|
||||
_Pragma("GCC diagnostic pop")
|
||||
#else
|
||||
#define Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
|
||||
#define UNUSED_VAR(x) (void)x;
|
||||
|
|
|
|||
1032
C/CpuArch.c
1032
C/CpuArch.c
File diff suppressed because it is too large
Load diff
408
C/CpuArch.h
408
C/CpuArch.h
|
|
@ -1,8 +1,8 @@
|
|||
/* CpuArch.h -- CPU specific code
|
||||
2021-07-13 : Igor Pavlov : Public domain */
|
||||
Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __CPU_ARCH_H
|
||||
#define __CPU_ARCH_H
|
||||
#ifndef ZIP7_INC_CPU_ARCH_H
|
||||
#define ZIP7_INC_CPU_ARCH_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -20,6 +20,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
MY_CPU_64BIT doesn't mean that (sizeof(void *) == 8)
|
||||
*/
|
||||
|
||||
#if !defined(_M_ARM64EC)
|
||||
#if defined(_M_X64) \
|
||||
|| defined(_M_AMD64) \
|
||||
|| defined(__x86_64__) \
|
||||
|
|
@ -35,6 +36,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#endif
|
||||
#define MY_CPU_64BIT
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_M_IX86) \
|
||||
|
|
@ -45,13 +47,34 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) \
|
||||
|| defined(MY_CPU_AMD64) \
|
||||
|| defined(_M_IX86_FP) && (_M_IX86_FP >= 2)
|
||||
#define MY_CPU_SSE2
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(_M_ARM64) \
|
||||
|| defined(_M_ARM64EC) \
|
||||
|| defined(__AARCH64EL__) \
|
||||
|| defined(__AARCH64EB__) \
|
||||
|| defined(__aarch64__)
|
||||
#define MY_CPU_ARM64
|
||||
#define MY_CPU_NAME "arm64"
|
||||
#if defined(__ILP32__) \
|
||||
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
|
||||
#define MY_CPU_NAME "arm64-32"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#elif defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 16)
|
||||
#define MY_CPU_NAME "arm64-128"
|
||||
#define MY_CPU_SIZEOF_POINTER 16
|
||||
#else
|
||||
#if defined(_M_ARM64EC)
|
||||
#define MY_CPU_NAME "arm64ec"
|
||||
#else
|
||||
#define MY_CPU_NAME "arm64"
|
||||
#endif
|
||||
#define MY_CPU_SIZEOF_POINTER 8
|
||||
#endif
|
||||
#define MY_CPU_64BIT
|
||||
#endif
|
||||
|
||||
|
|
@ -68,8 +91,10 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#define MY_CPU_ARM
|
||||
|
||||
#if defined(__thumb__) || defined(__THUMBEL__) || defined(_M_ARMT)
|
||||
#define MY_CPU_ARMT
|
||||
#define MY_CPU_NAME "armt"
|
||||
#else
|
||||
#define MY_CPU_ARM32
|
||||
#define MY_CPU_NAME "arm"
|
||||
#endif
|
||||
/* #define MY_CPU_32BIT */
|
||||
|
|
@ -103,6 +128,8 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
|| defined(__PPC__) \
|
||||
|| defined(_POWER)
|
||||
|
||||
#define MY_CPU_PPC_OR_PPC64
|
||||
|
||||
#if defined(__ppc64__) \
|
||||
|| defined(__powerpc64__) \
|
||||
|| defined(_LP64) \
|
||||
|
|
@ -123,12 +150,76 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#endif
|
||||
|
||||
|
||||
#if defined(__sparc64__)
|
||||
#define MY_CPU_NAME "sparc64"
|
||||
#if defined(__sparc__) \
|
||||
|| defined(__sparc)
|
||||
#define MY_CPU_SPARC
|
||||
#if defined(__LP64__) \
|
||||
|| defined(_LP64) \
|
||||
|| defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
|
||||
#define MY_CPU_NAME "sparcv9"
|
||||
#define MY_CPU_SIZEOF_POINTER 8
|
||||
#define MY_CPU_64BIT
|
||||
#elif defined(__sparc_v9__) \
|
||||
|| defined(__sparcv9)
|
||||
#define MY_CPU_64BIT
|
||||
#if defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
|
||||
#define MY_CPU_NAME "sparcv9-32"
|
||||
#else
|
||||
#define MY_CPU_NAME "sparcv9m"
|
||||
#endif
|
||||
#elif defined(__sparc_v8__) \
|
||||
|| defined(__sparcv8)
|
||||
#define MY_CPU_NAME "sparcv8"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#else
|
||||
#define MY_CPU_NAME "sparc"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__riscv) \
|
||||
|| defined(__riscv__)
|
||||
#define MY_CPU_RISCV
|
||||
#if __riscv_xlen == 32
|
||||
#define MY_CPU_NAME "riscv32"
|
||||
#elif __riscv_xlen == 64
|
||||
#define MY_CPU_NAME "riscv64"
|
||||
#else
|
||||
#define MY_CPU_NAME "riscv"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__loongarch__)
|
||||
#define MY_CPU_LOONGARCH
|
||||
#if defined(__loongarch64) || defined(__loongarch_grlen) && (__loongarch_grlen == 64)
|
||||
#define MY_CPU_64BIT
|
||||
#endif
|
||||
#if defined(__loongarch64)
|
||||
#define MY_CPU_NAME "loongarch64"
|
||||
#define MY_CPU_LOONGARCH64
|
||||
#else
|
||||
#define MY_CPU_NAME "loongarch"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
// #undef MY_CPU_NAME
|
||||
// #undef MY_CPU_SIZEOF_POINTER
|
||||
// #define __e2k__
|
||||
// #define __SIZEOF_POINTER__ 4
|
||||
#if defined(__e2k__)
|
||||
#define MY_CPU_E2K
|
||||
#if defined(__ILP32__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 4)
|
||||
#define MY_CPU_NAME "e2k-32"
|
||||
#define MY_CPU_SIZEOF_POINTER 4
|
||||
#else
|
||||
#define MY_CPU_NAME "e2k"
|
||||
#if defined(__LP64__) || defined(__SIZEOF_POINTER__) && (__SIZEOF_POINTER__ == 8)
|
||||
#define MY_CPU_SIZEOF_POINTER 8
|
||||
#endif
|
||||
#endif
|
||||
#define MY_CPU_64BIT
|
||||
#elif defined(__sparc__)
|
||||
#define MY_CPU_NAME "sparc"
|
||||
/* #define MY_CPU_32BIT */
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -162,6 +253,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
|| defined(MY_CPU_ARM_LE) \
|
||||
|| defined(MY_CPU_ARM64_LE) \
|
||||
|| defined(MY_CPU_IA64_LE) \
|
||||
|| defined(_LITTLE_ENDIAN) \
|
||||
|| defined(__LITTLE_ENDIAN__) \
|
||||
|| defined(__ARMEL__) \
|
||||
|| defined(__THUMBEL__) \
|
||||
|
|
@ -194,6 +286,9 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#error Stop_Compiling_Bad_Endian
|
||||
#endif
|
||||
|
||||
#if !defined(MY_CPU_LE) && !defined(MY_CPU_BE)
|
||||
#error Stop_Compiling_CPU_ENDIAN_must_be_detected_at_compile_time
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
|
||||
#error Stop_Compiling_Bad_32_64_BIT
|
||||
|
|
@ -235,6 +330,7 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
|
||||
|
||||
#ifndef MY_CPU_NAME
|
||||
// #define MY_CPU_IS_UNKNOWN
|
||||
#ifdef MY_CPU_LE
|
||||
#define MY_CPU_NAME "LE"
|
||||
#elif defined(MY_CPU_BE)
|
||||
|
|
@ -250,15 +346,121 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
|
||||
|
||||
|
||||
#ifdef __has_builtin
|
||||
#define Z7_has_builtin(x) __has_builtin(x)
|
||||
#else
|
||||
#define Z7_has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
|
||||
#define Z7_BSWAP32_CONST(v) \
|
||||
( (((UInt32)(v) << 24) ) \
|
||||
| (((UInt32)(v) << 8) & (UInt32)0xff0000) \
|
||||
| (((UInt32)(v) >> 8) & (UInt32)0xff00 ) \
|
||||
| (((UInt32)(v) >> 24) ))
|
||||
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* Note: these macros will use bswap instruction (486), that is unsupported in 386 cpu */
|
||||
|
||||
#pragma intrinsic(_byteswap_ushort)
|
||||
#pragma intrinsic(_byteswap_ulong)
|
||||
#pragma intrinsic(_byteswap_uint64)
|
||||
|
||||
#define Z7_BSWAP16(v) _byteswap_ushort(v)
|
||||
#define Z7_BSWAP32(v) _byteswap_ulong (v)
|
||||
#define Z7_BSWAP64(v) _byteswap_uint64(v)
|
||||
#define Z7_CPU_FAST_BSWAP_SUPPORTED
|
||||
|
||||
/* GCC can generate slow code that calls function for __builtin_bswap32() for:
|
||||
- GCC for RISCV, if Zbb/XTHeadBb extension is not used.
|
||||
- GCC for SPARC.
|
||||
The code from CLANG for SPARC also is not fastest.
|
||||
So we don't define Z7_CPU_FAST_BSWAP_SUPPORTED in some cases.
|
||||
*/
|
||||
#elif (!defined(MY_CPU_RISCV) || defined (__riscv_zbb) || defined(__riscv_xtheadbb)) \
|
||||
&& !defined(MY_CPU_SPARC) \
|
||||
&& ( \
|
||||
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|
||||
|| (defined(__clang__) && Z7_has_builtin(__builtin_bswap16)) \
|
||||
)
|
||||
|
||||
#define Z7_BSWAP16(v) __builtin_bswap16(v)
|
||||
#define Z7_BSWAP32(v) __builtin_bswap32(v)
|
||||
#define Z7_BSWAP64(v) __builtin_bswap64(v)
|
||||
#define Z7_CPU_FAST_BSWAP_SUPPORTED
|
||||
|
||||
#else
|
||||
|
||||
#define Z7_BSWAP16(v) ((UInt16) \
|
||||
( ((UInt32)(v) << 8) \
|
||||
| ((UInt32)(v) >> 8) \
|
||||
))
|
||||
|
||||
#define Z7_BSWAP32(v) Z7_BSWAP32_CONST(v)
|
||||
|
||||
#define Z7_BSWAP64(v) \
|
||||
( ( ( (UInt64)(v) ) << 8 * 7 ) \
|
||||
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 1) ) << 8 * 5 ) \
|
||||
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 2) ) << 8 * 3 ) \
|
||||
| ( ( (UInt64)(v) & ((UInt32)0xff << 8 * 3) ) << 8 * 1 ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 1 ) & ((UInt32)0xff << 8 * 3) ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 3 ) & ((UInt32)0xff << 8 * 2) ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 5 ) & ((UInt32)0xff << 8 * 1) ) \
|
||||
| ( ( (UInt64)(v) >> 8 * 7 ) ) \
|
||||
)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifdef MY_CPU_LE
|
||||
#if defined(MY_CPU_X86_OR_AMD64) \
|
||||
|| defined(MY_CPU_ARM64)
|
||||
|| defined(MY_CPU_ARM64) \
|
||||
|| defined(MY_CPU_RISCV) && defined(__riscv_misaligned_fast) \
|
||||
|| defined(MY_CPU_E2K) && defined(__iset__) && (__iset__ >= 6)
|
||||
#define MY_CPU_LE_UNALIGN
|
||||
#define MY_CPU_LE_UNALIGN_64
|
||||
#elif defined(__ARM_FEATURE_UNALIGNED)
|
||||
/* gcc9 for 32-bit arm can use LDRD instruction that requires 32-bit alignment.
|
||||
So we can't use unaligned 64-bit operations. */
|
||||
#define MY_CPU_LE_UNALIGN
|
||||
/* === ALIGNMENT on 32-bit arm and LDRD/STRD/LDM/STM instructions.
|
||||
Description of problems:
|
||||
problem-1 : 32-bit ARM architecture:
|
||||
multi-access (pair of 32-bit accesses) instructions (LDRD/STRD/LDM/STM)
|
||||
require 32-bit (WORD) alignment (by 32-bit ARM architecture).
|
||||
So there is "Alignment fault exception", if data is not aligned for 32-bit.
|
||||
|
||||
problem-2 : 32-bit kernels and arm64 kernels:
|
||||
32-bit linux kernels provide fixup for these "paired" instruction "Alignment fault exception".
|
||||
So unaligned paired-access instructions work via exception handler in kernel in 32-bit linux.
|
||||
|
||||
But some arm64 kernels do not handle these faults in 32-bit programs.
|
||||
So we have unhandled exception for such instructions.
|
||||
Probably some new arm64 kernels have fixed it, and unaligned
|
||||
paired-access instructions work in new kernels?
|
||||
|
||||
problem-3 : compiler for 32-bit arm:
|
||||
Compilers use LDRD/STRD/LDM/STM for UInt64 accesses
|
||||
and for another cases where two 32-bit accesses are fused
|
||||
to one multi-access instruction.
|
||||
So UInt64 variables must be aligned for 32-bit, and each
|
||||
32-bit access must be aligned for 32-bit, if we want to
|
||||
avoid "Alignment fault" exception (handled or unhandled).
|
||||
|
||||
problem-4 : performace:
|
||||
Even if unaligned access is handled by kernel, it will be slow.
|
||||
So if we allow unaligned access, we can get fast unaligned
|
||||
single-access, and slow unaligned paired-access.
|
||||
|
||||
We don't allow unaligned access on 32-bit arm, because compiler
|
||||
genarates paired-access instructions that require 32-bit alignment,
|
||||
and some arm64 kernels have no handler for these instructions.
|
||||
Also unaligned paired-access instructions will be slow, if kernel handles them.
|
||||
*/
|
||||
// it must be disabled:
|
||||
// #define MY_CPU_LE_UNALIGN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
@ -269,13 +471,11 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
|
||||
#ifdef MY_CPU_LE_UNALIGN_64
|
||||
#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
|
||||
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
|
||||
#endif
|
||||
|
||||
#define SetUi16(p, v) { *(UInt16 *)(void *)(p) = (v); }
|
||||
#define SetUi32(p, v) { *(UInt32 *)(void *)(p) = (v); }
|
||||
#ifdef MY_CPU_LE_UNALIGN_64
|
||||
#define SetUi64(p, v) { *(UInt64 *)(void *)(p) = (v); }
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
|
|
@ -302,50 +502,33 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
#endif
|
||||
|
||||
|
||||
#ifndef MY_CPU_LE_UNALIGN_64
|
||||
|
||||
#ifndef GetUi64
|
||||
#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
|
||||
#endif
|
||||
|
||||
#ifndef SetUi64
|
||||
#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
|
||||
SetUi32(_ppp2_ , (UInt32)_vvv2_); \
|
||||
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
|
||||
|
||||
SetUi32(_ppp2_ , (UInt32)_vvv2_) \
|
||||
SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)) }
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_LE_UNALIGN) && defined(Z7_CPU_FAST_BSWAP_SUPPORTED)
|
||||
|
||||
|
||||
#ifdef __has_builtin
|
||||
#define MY__has_builtin(x) __has_builtin(x)
|
||||
#if 0
|
||||
// Z7_BSWAP16 can be slow for x86-msvc
|
||||
#define GetBe16_to32(p) (Z7_BSWAP16 (*(const UInt16 *)(const void *)(p)))
|
||||
#else
|
||||
#define MY__has_builtin(x) 0
|
||||
#define GetBe16_to32(p) (Z7_BSWAP32 (*(const UInt16 *)(const void *)(p)) >> 16)
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
#define GetBe32(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p))
|
||||
#define SetBe32(p, v) { (*(UInt32 *)(void *)(p)) = Z7_BSWAP32(v); }
|
||||
|
||||
/* Note: we use bswap instruction, that is unsupported in 386 cpu */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#pragma intrinsic(_byteswap_ushort)
|
||||
#pragma intrinsic(_byteswap_ulong)
|
||||
#pragma intrinsic(_byteswap_uint64)
|
||||
|
||||
/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
|
||||
#define GetBe32(p) _byteswap_ulong (*(const UInt32 *)(const void *)(p))
|
||||
#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const void *)(p))
|
||||
|
||||
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
|
||||
|
||||
#elif defined(MY_CPU_LE_UNALIGN) && ( \
|
||||
(defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
|
||||
|| (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
|
||||
|
||||
/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const void *)(p)) */
|
||||
#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const void *)(p))
|
||||
#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const void *)(p))
|
||||
|
||||
#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
|
||||
#if defined(MY_CPU_LE_UNALIGN_64)
|
||||
#define GetBe64(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p))
|
||||
#define SetBe64(p, v) { (*(UInt64 *)(void *)(p)) = Z7_BSWAP64(v); }
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
|
|
@ -355,8 +538,6 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
((UInt32)((const Byte *)(p))[2] << 8) | \
|
||||
((const Byte *)(p))[3] )
|
||||
|
||||
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
|
||||
|
||||
#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
|
||||
_ppp_[0] = (Byte)(_vvv_ >> 24); \
|
||||
_ppp_[1] = (Byte)(_vvv_ >> 16); \
|
||||
|
|
@ -365,53 +546,115 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
|
|||
|
||||
#endif
|
||||
|
||||
#ifndef GetBe64
|
||||
#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
|
||||
#endif
|
||||
|
||||
#ifndef SetBe64
|
||||
#define SetBe64(p, v) { Byte *_ppp_ = (Byte *)(p); UInt64 _vvv_ = (v); \
|
||||
_ppp_[0] = (Byte)(_vvv_ >> 56); \
|
||||
_ppp_[1] = (Byte)(_vvv_ >> 48); \
|
||||
_ppp_[2] = (Byte)(_vvv_ >> 40); \
|
||||
_ppp_[3] = (Byte)(_vvv_ >> 32); \
|
||||
_ppp_[4] = (Byte)(_vvv_ >> 24); \
|
||||
_ppp_[5] = (Byte)(_vvv_ >> 16); \
|
||||
_ppp_[6] = (Byte)(_vvv_ >> 8); \
|
||||
_ppp_[7] = (Byte)_vvv_; }
|
||||
#endif
|
||||
|
||||
#ifndef GetBe16
|
||||
|
||||
#ifdef GetBe16_to32
|
||||
#define GetBe16(p) ( (UInt16) GetBe16_to32(p))
|
||||
#else
|
||||
#define GetBe16(p) ( (UInt16) ( \
|
||||
((UInt16)((const Byte *)(p))[0] << 8) | \
|
||||
((const Byte *)(p))[1] ))
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_BE)
|
||||
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
|
||||
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
|
||||
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
|
||||
// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b1) | ((b0) << 8))
|
||||
#elif defined(MY_CPU_LE)
|
||||
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
|
||||
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
|
||||
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
|
||||
// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b0) | ((b1) << 8))
|
||||
#else
|
||||
#error Stop_Compiling_Unknown_Endian_CONV
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_BE)
|
||||
|
||||
#define GetBe64a(p) (*(const UInt64 *)(const void *)(p))
|
||||
#define GetBe32a(p) (*(const UInt32 *)(const void *)(p))
|
||||
#define GetBe16a(p) (*(const UInt16 *)(const void *)(p))
|
||||
#define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
|
||||
#define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
|
||||
|
||||
#define GetUi64a(p) GetUi64(p)
|
||||
#define GetUi32a(p) GetUi32(p)
|
||||
#define GetUi16a(p) GetUi16(p)
|
||||
#define SetUi32a(p, v) SetUi32(p, v)
|
||||
#define SetUi16a(p, v) SetUi16(p, v)
|
||||
|
||||
#elif defined(MY_CPU_LE)
|
||||
|
||||
#define GetUi64a(p) (*(const UInt64 *)(const void *)(p))
|
||||
#define GetUi32a(p) (*(const UInt32 *)(const void *)(p))
|
||||
#define GetUi16a(p) (*(const UInt16 *)(const void *)(p))
|
||||
#define SetUi32a(p, v) { *(UInt32 *)(void *)(p) = (v); }
|
||||
#define SetUi16a(p, v) { *(UInt16 *)(void *)(p) = (v); }
|
||||
|
||||
#define GetBe64a(p) GetBe64(p)
|
||||
#define GetBe32a(p) GetBe32(p)
|
||||
#define GetBe16a(p) GetBe16(p)
|
||||
#define SetBe32a(p, v) SetBe32(p, v)
|
||||
#define SetBe16a(p, v) SetBe16(p, v)
|
||||
|
||||
#else
|
||||
#error Stop_Compiling_Unknown_Endian_CPU_a
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef GetBe16_to32
|
||||
#define GetBe16_to32(p) GetBe16(p)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MY_CPU_X86_OR_AMD64) \
|
||||
|| defined(MY_CPU_ARM_OR_ARM64) \
|
||||
|| defined(MY_CPU_PPC_OR_PPC64)
|
||||
#define Z7_CPU_FAST_ROTATE_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UInt32 maxFunc;
|
||||
UInt32 vendor[3];
|
||||
UInt32 ver;
|
||||
UInt32 b;
|
||||
UInt32 c;
|
||||
UInt32 d;
|
||||
} Cx86cpuid;
|
||||
|
||||
enum
|
||||
{
|
||||
CPU_FIRM_INTEL,
|
||||
CPU_FIRM_AMD,
|
||||
CPU_FIRM_VIA
|
||||
};
|
||||
|
||||
void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
|
||||
|
||||
BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
|
||||
int x86cpuid_GetFirm(const Cx86cpuid *p);
|
||||
|
||||
#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
|
||||
#define x86cpuid_GetModel(ver) (((ver >> 12) & 0xF0) | ((ver >> 4) & 0xF))
|
||||
#define x86cpuid_GetStepping(ver) (ver & 0xF)
|
||||
|
||||
BoolInt CPU_Is_InOrder(void);
|
||||
void Z7_FASTCALL z7_x86_cpuid(UInt32 a[4], UInt32 function);
|
||||
UInt32 Z7_FASTCALL z7_x86_cpuid_GetMaxFunc(void);
|
||||
#if defined(MY_CPU_AMD64)
|
||||
#define Z7_IF_X86_CPUID_SUPPORTED
|
||||
#else
|
||||
#define Z7_IF_X86_CPUID_SUPPORTED if (z7_x86_cpuid_GetMaxFunc())
|
||||
#endif
|
||||
|
||||
BoolInt CPU_IsSupported_AES(void);
|
||||
BoolInt CPU_IsSupported_AVX(void);
|
||||
BoolInt CPU_IsSupported_AVX2(void);
|
||||
BoolInt CPU_IsSupported_AVX512F_AVX512VL(void);
|
||||
BoolInt CPU_IsSupported_VAES_AVX2(void);
|
||||
BoolInt CPU_IsSupported_CMOV(void);
|
||||
BoolInt CPU_IsSupported_SSE(void);
|
||||
BoolInt CPU_IsSupported_SSE2(void);
|
||||
BoolInt CPU_IsSupported_SSSE3(void);
|
||||
BoolInt CPU_IsSupported_SSE41(void);
|
||||
BoolInt CPU_IsSupported_SHA(void);
|
||||
BoolInt CPU_IsSupported_SHA512(void);
|
||||
BoolInt CPU_IsSupported_PageGB(void);
|
||||
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
|
|
@ -429,12 +672,13 @@ BoolInt CPU_IsSupported_SHA1(void);
|
|||
BoolInt CPU_IsSupported_SHA2(void);
|
||||
BoolInt CPU_IsSupported_AES(void);
|
||||
#endif
|
||||
BoolInt CPU_IsSupported_SHA512(void);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__)
|
||||
int My_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
|
||||
int My_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
|
||||
int z7_sysctlbyname_Get(const char *name, void *buf, size_t *bufSize);
|
||||
int z7_sysctlbyname_Get_UInt32(const char *name, UInt32 *val);
|
||||
#endif
|
||||
|
||||
EXTERN_C_END
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* Delta.h -- Delta converter
|
||||
2013-01-18 : Igor Pavlov : Public domain */
|
||||
2023-03-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __DELTA_H
|
||||
#define __DELTA_H
|
||||
#ifndef ZIP7_INC_DELTA_H
|
||||
#define ZIP7_INC_DELTA_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
|
|||
115
C/DllSecur.c
115
C/DllSecur.c
|
|
@ -1,110 +1,99 @@
|
|||
/* DllSecur.c -- DLL loading security
|
||||
2021-12-25 : Igor Pavlov : Public domain */
|
||||
2023-12-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <Windows.h>
|
||||
#include "7zWindows.h"
|
||||
|
||||
#include "DllSecur.h"
|
||||
|
||||
#ifndef UNDER_CE
|
||||
|
||||
Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
|
||||
|
||||
typedef BOOL (WINAPI *Func_SetDefaultDllDirectories)(DWORD DirectoryFlags);
|
||||
|
||||
#define MY_LOAD_LIBRARY_SEARCH_USER_DIRS 0x400
|
||||
#define MY_LOAD_LIBRARY_SEARCH_SYSTEM32 0x800
|
||||
|
||||
#define DELIM "\0"
|
||||
|
||||
static const char * const g_Dlls =
|
||||
"userenv"
|
||||
DELIM "setupapi"
|
||||
DELIM "apphelp"
|
||||
DELIM "propsys"
|
||||
DELIM "dwmapi"
|
||||
DELIM "cryptbase"
|
||||
DELIM "oleacc"
|
||||
DELIM "clbcatq"
|
||||
DELIM "version"
|
||||
#ifndef _CONSOLE
|
||||
"UXTHEME\0"
|
||||
DELIM "uxtheme"
|
||||
#endif
|
||||
"USERENV\0"
|
||||
"SETUPAPI\0"
|
||||
"APPHELP\0"
|
||||
"PROPSYS\0"
|
||||
"DWMAPI\0"
|
||||
"CRYPTBASE\0"
|
||||
"OLEACC\0"
|
||||
"CLBCATQ\0"
|
||||
"VERSION\0"
|
||||
;
|
||||
DELIM;
|
||||
|
||||
#endif
|
||||
|
||||
// #define MY_CAST_FUNC (void(*)())
|
||||
#define MY_CAST_FUNC
|
||||
#ifdef __clang__
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#endif
|
||||
#if defined (_MSC_VER) && _MSC_VER >= 1900
|
||||
// sysinfoapi.h: kit10: GetVersion was declared deprecated
|
||||
#pragma warning(disable : 4996)
|
||||
#endif
|
||||
|
||||
void My_SetDefaultDllDirectories()
|
||||
#define IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN \
|
||||
if ((UInt16)GetVersion() != 6) { \
|
||||
const \
|
||||
Func_SetDefaultDllDirectories setDllDirs = \
|
||||
(Func_SetDefaultDllDirectories) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), \
|
||||
"SetDefaultDllDirectories"); \
|
||||
if (setDllDirs) if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS)) return; }
|
||||
|
||||
void My_SetDefaultDllDirectories(void)
|
||||
{
|
||||
#ifndef UNDER_CE
|
||||
|
||||
OSVERSIONINFO vi;
|
||||
vi.dwOSVersionInfoSize = sizeof(vi);
|
||||
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
|
||||
{
|
||||
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
|
||||
MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
|
||||
if (setDllDirs)
|
||||
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
|
||||
return;
|
||||
}
|
||||
|
||||
IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void LoadSecurityDlls()
|
||||
void LoadSecurityDlls(void)
|
||||
{
|
||||
#ifndef UNDER_CE
|
||||
|
||||
wchar_t buf[MAX_PATH + 100];
|
||||
|
||||
{
|
||||
// at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
|
||||
OSVERSIONINFO vi;
|
||||
vi.dwOSVersionInfoSize = sizeof(vi);
|
||||
if (!GetVersionEx(&vi) || vi.dwMajorVersion != 6 || vi.dwMinorVersion != 0)
|
||||
{
|
||||
Func_SetDefaultDllDirectories setDllDirs = (Func_SetDefaultDllDirectories)
|
||||
MY_CAST_FUNC GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "SetDefaultDllDirectories");
|
||||
if (setDllDirs)
|
||||
if (setDllDirs(MY_LOAD_LIBRARY_SEARCH_SYSTEM32 | MY_LOAD_LIBRARY_SEARCH_USER_DIRS))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
unsigned len = GetSystemDirectoryW(buf, MAX_PATH + 2);
|
||||
if (len == 0 || len > MAX_PATH)
|
||||
return;
|
||||
}
|
||||
// at Vista (ver 6.0) : CoCreateInstance(CLSID_ShellLink, ...) doesn't work after SetDefaultDllDirectories() : Check it ???
|
||||
IF_NON_VISTA_SET_DLL_DIRS_AND_RETURN
|
||||
{
|
||||
wchar_t buf[MAX_PATH + 100];
|
||||
const char *dll;
|
||||
unsigned pos = (unsigned)lstrlenW(buf);
|
||||
|
||||
unsigned pos = GetSystemDirectoryW(buf, MAX_PATH + 2);
|
||||
if (pos == 0 || pos > MAX_PATH)
|
||||
return;
|
||||
if (buf[pos - 1] != '\\')
|
||||
buf[pos++] = '\\';
|
||||
|
||||
for (dll = g_Dlls; dll[0] != 0;)
|
||||
for (dll = g_Dlls; *dll != 0;)
|
||||
{
|
||||
unsigned k = 0;
|
||||
wchar_t *dest = &buf[pos];
|
||||
for (;;)
|
||||
{
|
||||
char c = *dll++;
|
||||
buf[pos + k] = (Byte)c;
|
||||
k++;
|
||||
const char c = *dll++;
|
||||
if (c == 0)
|
||||
break;
|
||||
*dest++ = (Byte)c;
|
||||
}
|
||||
|
||||
lstrcatW(buf, L".dll");
|
||||
dest[0] = '.';
|
||||
dest[1] = 'd';
|
||||
dest[2] = 'l';
|
||||
dest[3] = 'l';
|
||||
dest[4] = 0;
|
||||
// lstrcatW(buf, L".dll");
|
||||
LoadLibraryExW(buf, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif // _WIN32
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* DllSecur.h -- DLL loading for security
|
||||
2018-02-19 : Igor Pavlov : Public domain */
|
||||
2023-03-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __DLL_SECUR_H
|
||||
#define __DLL_SECUR_H
|
||||
#ifndef ZIP7_INC_DLL_SECUR_H
|
||||
#define ZIP7_INC_DLL_SECUR_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
|
|||
400
C/HuffEnc.c
400
C/HuffEnc.c
|
|
@ -1,60 +1,125 @@
|
|||
/* HuffEnc.c -- functions for Huffman encoding
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "HuffEnc.h"
|
||||
#include "Sort.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define kMaxLen 16
|
||||
#define NUM_BITS 10
|
||||
#define MASK (((unsigned)1 << NUM_BITS) - 1)
|
||||
#define kMaxLen Z7_HUFFMAN_LEN_MAX
|
||||
#define NUM_BITS 10
|
||||
#define MASK ((1u << NUM_BITS) - 1)
|
||||
#define FREQ_MASK (~(UInt32)MASK)
|
||||
#define NUM_COUNTERS (48 * 2)
|
||||
|
||||
#define NUM_COUNTERS 64
|
||||
#if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE))
|
||||
#if defined(MY_CPU_LE)
|
||||
#define HI_HALF_OFFSET 1
|
||||
#else
|
||||
#define HI_HALF_OFFSET 0
|
||||
#endif
|
||||
#define LOAD_PARENT(p) ((unsigned)*((const UInt16 *)(p) + HI_HALF_OFFSET))
|
||||
#define STORE_PARENT(p, fb, val) *((UInt16 *)(p) + HI_HALF_OFFSET) = (UInt16)(val);
|
||||
#define STORE_PARENT_DIRECT(p, fb, hi) STORE_PARENT(p, fb, hi)
|
||||
#define UPDATE_E(eHi) eHi++;
|
||||
#else
|
||||
#define LOAD_PARENT(p) ((unsigned)(*(p) >> NUM_BITS))
|
||||
#define STORE_PARENT_DIRECT(p, fb, hi) *(p) = ((fb) & MASK) | (hi); // set parent field
|
||||
#define STORE_PARENT(p, fb, val) STORE_PARENT_DIRECT(p, fb, ((UInt32)(val) << NUM_BITS))
|
||||
#define UPDATE_E(eHi) eHi += 1 << NUM_BITS;
|
||||
#endif
|
||||
|
||||
#define HUFFMAN_SPEED_OPT
|
||||
|
||||
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen)
|
||||
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSymbols, unsigned maxLen)
|
||||
{
|
||||
UInt32 num = 0;
|
||||
/* if (maxLen > 10) maxLen = 10; */
|
||||
#if NUM_COUNTERS > 2
|
||||
unsigned counters[NUM_COUNTERS];
|
||||
#endif
|
||||
#if 1 && NUM_COUNTERS > (kMaxLen + 4) * 2
|
||||
#define lenCounters (counters)
|
||||
#define codes (counters + kMaxLen + 4)
|
||||
#else
|
||||
unsigned lenCounters[kMaxLen + 1];
|
||||
UInt32 codes[kMaxLen + 1];
|
||||
#endif
|
||||
|
||||
unsigned num;
|
||||
{
|
||||
UInt32 i;
|
||||
unsigned i;
|
||||
// UInt32 sum = 0;
|
||||
|
||||
#if NUM_COUNTERS > 2
|
||||
|
||||
#ifdef HUFFMAN_SPEED_OPT
|
||||
|
||||
UInt32 counters[NUM_COUNTERS];
|
||||
#define CTR_ITEM_FOR_FREQ(freq) \
|
||||
counters[(freq) >= NUM_COUNTERS - 1 ? NUM_COUNTERS - 1 : (unsigned)(freq)]
|
||||
|
||||
for (i = 0; i < NUM_COUNTERS; i++)
|
||||
counters[i] = 0;
|
||||
for (i = 0; i < numSymbols; i++)
|
||||
memset(lens, 0, numSymbols);
|
||||
{
|
||||
UInt32 freq = freqs[i];
|
||||
counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++;
|
||||
const UInt32 *fp = freqs + numSymbols;
|
||||
#define NUM_UNROLLS 1
|
||||
#if NUM_UNROLLS > 1 // use 1 if odd (numSymbols) is possisble
|
||||
if (numSymbols & 1)
|
||||
{
|
||||
UInt32 f;
|
||||
f = *--fp; CTR_ITEM_FOR_FREQ(f)++;
|
||||
// sum += f;
|
||||
}
|
||||
#endif
|
||||
do
|
||||
{
|
||||
UInt32 f;
|
||||
fp -= NUM_UNROLLS;
|
||||
f = fp[0]; CTR_ITEM_FOR_FREQ(f)++;
|
||||
// sum += f;
|
||||
#if NUM_UNROLLS > 1
|
||||
f = fp[1]; CTR_ITEM_FOR_FREQ(f)++;
|
||||
// sum += f;
|
||||
#endif
|
||||
}
|
||||
while (fp != freqs);
|
||||
}
|
||||
|
||||
for (i = 1; i < NUM_COUNTERS; i++)
|
||||
#if 0
|
||||
printf("\nsum=%8u numSymbols =%3u ctrs:", sum, numSymbols);
|
||||
{
|
||||
UInt32 temp = counters[i];
|
||||
counters[i] = num;
|
||||
num += temp;
|
||||
unsigned k = 0;
|
||||
for (k = 0; k < NUM_COUNTERS; k++)
|
||||
printf(" %u", counters[k]);
|
||||
}
|
||||
|
||||
for (i = 0; i < numSymbols; i++)
|
||||
#endif
|
||||
|
||||
num = counters[1];
|
||||
counters[1] = 0;
|
||||
for (i = 2; i != NUM_COUNTERS; i += 2)
|
||||
{
|
||||
UInt32 freq = freqs[i];
|
||||
if (freq == 0)
|
||||
lens[i] = 0;
|
||||
else
|
||||
p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS);
|
||||
unsigned c;
|
||||
c = (counters )[i]; (counters )[i] = num; num += c;
|
||||
c = (counters + 1)[i]; (counters + 1)[i] = num; num += c;
|
||||
}
|
||||
counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array
|
||||
{
|
||||
i = 0;
|
||||
do
|
||||
{
|
||||
const UInt32 f = freqs[i];
|
||||
#if 0
|
||||
if (f == 0) lens[i] = 0; else
|
||||
#endif
|
||||
p[CTR_ITEM_FOR_FREQ(f)++] = i | (f << NUM_BITS);
|
||||
}
|
||||
while (++i != numSymbols);
|
||||
}
|
||||
counters[0] = 0;
|
||||
HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]);
|
||||
|
||||
#else
|
||||
|
||||
#else // NUM_COUNTERS <= 2
|
||||
|
||||
num = 0;
|
||||
for (i = 0; i < numSymbols; i++)
|
||||
{
|
||||
UInt32 freq = freqs[i];
|
||||
const UInt32 freq = freqs[i];
|
||||
if (freq == 0)
|
||||
lens[i] = 0;
|
||||
else
|
||||
|
|
@ -62,17 +127,27 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
|
|||
}
|
||||
HeapSort(p, num);
|
||||
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
if (num < 2)
|
||||
if (num <= 2)
|
||||
{
|
||||
unsigned minCode = 0;
|
||||
unsigned maxCode = 1;
|
||||
if (num == 1)
|
||||
if (num)
|
||||
{
|
||||
maxCode = (unsigned)p[0] & MASK;
|
||||
if (maxCode == 0)
|
||||
maxCode = (unsigned)p[(size_t)num - 1] & MASK;
|
||||
if (num == 2)
|
||||
{
|
||||
minCode = (unsigned)p[0] & MASK;
|
||||
if (minCode > maxCode)
|
||||
{
|
||||
const unsigned temp = minCode;
|
||||
minCode = maxCode;
|
||||
maxCode = temp;
|
||||
}
|
||||
}
|
||||
else if (maxCode == 0)
|
||||
maxCode++;
|
||||
}
|
||||
p[minCode] = 0;
|
||||
|
|
@ -80,69 +155,206 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
|
|||
lens[minCode] = lens[maxCode] = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 b, e, i;
|
||||
|
||||
i = b = e = 0;
|
||||
do
|
||||
unsigned i;
|
||||
for (i = 0; i <= kMaxLen; i++)
|
||||
lenCounters[i] = 0;
|
||||
lenCounters[1] = 2; // by default root node has 2 child leaves at level 1.
|
||||
}
|
||||
// if (num != 2)
|
||||
{
|
||||
// num > 2
|
||||
// the binary tree will contain (num - 1) internal nodes.
|
||||
// p[num - 2] will be root node of binary tree.
|
||||
UInt32 *b;
|
||||
UInt32 *n;
|
||||
// first node will have two leaf childs: p[0] and p[1]:
|
||||
// p[0] += p[1] & FREQ_MASK; // set frequency sum of child leafs
|
||||
// if (pi == n) exit(0);
|
||||
// if (pi != n)
|
||||
{
|
||||
UInt32 n, m, freq;
|
||||
n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
|
||||
freq = (p[n] & ~MASK);
|
||||
p[n] = (p[n] & MASK) | (e << NUM_BITS);
|
||||
m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
|
||||
freq += (p[m] & ~MASK);
|
||||
p[m] = (p[m] & MASK) | (e << NUM_BITS);
|
||||
p[e] = (p[e] & MASK) | freq;
|
||||
e++;
|
||||
}
|
||||
while (num - e > 1);
|
||||
|
||||
{
|
||||
UInt32 lenCounters[kMaxLen + 1];
|
||||
for (i = 0; i <= kMaxLen; i++)
|
||||
lenCounters[i] = 0;
|
||||
|
||||
p[--e] &= MASK;
|
||||
lenCounters[1] = 2;
|
||||
while (e > 0)
|
||||
UInt32 fb = (p[1] & FREQ_MASK) + p[0];
|
||||
UInt32 f = p[2] & FREQ_MASK;
|
||||
const UInt32 *pi = p + 2;
|
||||
UInt32 *e = p;
|
||||
UInt32 eHi = 0;
|
||||
n = p + num;
|
||||
b = p;
|
||||
// p[0] = fb;
|
||||
for (;;)
|
||||
{
|
||||
UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
|
||||
p[e] = (p[e] & MASK) | (len << NUM_BITS);
|
||||
if (len >= maxLen)
|
||||
for (len = maxLen - 1; lenCounters[len] == 0; len--);
|
||||
lenCounters[len]--;
|
||||
lenCounters[(size_t)len + 1] += 2;
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 len;
|
||||
i = 0;
|
||||
for (len = maxLen; len != 0; len--)
|
||||
{
|
||||
UInt32 k;
|
||||
for (k = lenCounters[len]; k != 0; k--)
|
||||
lens[p[i++] & MASK] = (Byte)len;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
UInt32 nextCodes[kMaxLen + 1];
|
||||
{
|
||||
UInt32 code = 0;
|
||||
UInt32 len;
|
||||
for (len = 1; len <= kMaxLen; len++)
|
||||
nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1;
|
||||
}
|
||||
/* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
|
||||
// (b <= e)
|
||||
UInt32 sum;
|
||||
e++;
|
||||
UPDATE_E(eHi)
|
||||
|
||||
// (b < e)
|
||||
|
||||
// p range : high bits
|
||||
// [0, b) : parent : processed nodes that have parent and childs
|
||||
// [b, e) : FREQ : non-processed nodes that have no parent but have childs
|
||||
// [e, pi) : FREQ : processed leaves for which parent node was created
|
||||
// [pi, n) : FREQ : non-processed leaves for which parent node was not created
|
||||
|
||||
// first child
|
||||
// note : (*b < f) is same result as ((*b & FREQ_MASK) < f)
|
||||
if (fb < f)
|
||||
{
|
||||
UInt32 k;
|
||||
for (k = 0; k < numSymbols; k++)
|
||||
p[k] = nextCodes[lens[k]]++;
|
||||
// node freq is smaller
|
||||
sum = fb & FREQ_MASK;
|
||||
STORE_PARENT_DIRECT (b, fb, eHi)
|
||||
b++;
|
||||
fb = *b;
|
||||
if (b == e)
|
||||
{
|
||||
if (++pi == n)
|
||||
break;
|
||||
sum += f;
|
||||
fb &= MASK;
|
||||
fb |= sum;
|
||||
*e = fb;
|
||||
f = *pi & FREQ_MASK;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (++pi == n)
|
||||
{
|
||||
STORE_PARENT_DIRECT (b, fb, eHi)
|
||||
b++;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
sum = f;
|
||||
f = *pi & FREQ_MASK;
|
||||
}
|
||||
|
||||
// (b < e)
|
||||
|
||||
// second child
|
||||
if (fb < f)
|
||||
{
|
||||
sum += fb;
|
||||
sum &= FREQ_MASK;
|
||||
STORE_PARENT_DIRECT (b, fb, eHi)
|
||||
b++;
|
||||
*e = (*e & MASK) | sum; // set frequency sum
|
||||
// (b <= e) is possible here
|
||||
fb = *b;
|
||||
}
|
||||
else if (++pi == n)
|
||||
break;
|
||||
else
|
||||
{
|
||||
sum += f;
|
||||
f = *pi & FREQ_MASK;
|
||||
*e = (*e & MASK) | sum; // set frequency sum
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// printf("\nnum-e=%3u, numSymbols=%3u, num=%3u, b=%3u", n - e, numSymbols, n - p, b - p);
|
||||
{
|
||||
n -= 2;
|
||||
*n &= MASK; // root node : we clear high bits (zero bits mean level == 0)
|
||||
if (n != b)
|
||||
{
|
||||
// We go here, if we have some number of non-created nodes up to root.
|
||||
// We process them in simplified code:
|
||||
// position of parent for each pair of nodes is known.
|
||||
// n[-2], n[-1] : current pair of child nodes
|
||||
// (p1) : parent node for current pair.
|
||||
UInt32 *p1 = n;
|
||||
do
|
||||
{
|
||||
const unsigned len = LOAD_PARENT(p1) + 1;
|
||||
p1--;
|
||||
(lenCounters )[len] -= 2; // we remove 2 leaves from level (len)
|
||||
(lenCounters + 1)[len] += 2 * 2; // we add 4 leaves at level (len + 1)
|
||||
n -= 2;
|
||||
STORE_PARENT (n , n[0], len)
|
||||
STORE_PARENT (n + 1, n[1], len)
|
||||
}
|
||||
while (n != b);
|
||||
}
|
||||
}
|
||||
|
||||
if (b != p)
|
||||
{
|
||||
// we detect level of each node (realtive to root),
|
||||
// and update lenCounters[].
|
||||
// We process only intermediate nodes and we don't process leaves.
|
||||
do
|
||||
{
|
||||
// if (ii < b) : parent_bits_of (p[ii]) == index of parent node : ii < (p[ii])
|
||||
// if (ii >= b) : parent_bits_of (p[ii]) == level of this (ii) node in tree
|
||||
unsigned len;
|
||||
b--;
|
||||
len = (unsigned)LOAD_PARENT(p + LOAD_PARENT(b)) + 1;
|
||||
STORE_PARENT (b, *b, len)
|
||||
if (len >= maxLen)
|
||||
{
|
||||
// We are not allowed to create node at level (maxLen) and higher,
|
||||
// because all leaves must be placed to level (maxLen) or lower.
|
||||
// We find nearest allowed leaf and place current node to level of that leaf:
|
||||
for (len = maxLen - 1; lenCounters[len] == 0; len--) {}
|
||||
}
|
||||
lenCounters[len]--; // we remove 1 leaf from level (len)
|
||||
(lenCounters + 1)[len] += 2; // we add 2 leaves at level (len + 1)
|
||||
}
|
||||
while (b != p);
|
||||
}
|
||||
}
|
||||
{
|
||||
{
|
||||
unsigned len = maxLen;
|
||||
const UInt32 *p2 = p;
|
||||
do
|
||||
{
|
||||
unsigned k = lenCounters[len];
|
||||
if (k)
|
||||
do
|
||||
lens[(unsigned)*p2++ & MASK] = (Byte)len;
|
||||
while (--k);
|
||||
}
|
||||
while (--len);
|
||||
}
|
||||
codes[0] = 0; // we don't want garbage values to be written to p[] array.
|
||||
// codes[1] = 0;
|
||||
{
|
||||
UInt32 code = 0;
|
||||
unsigned len;
|
||||
for (len = 0; len < kMaxLen; len++)
|
||||
(codes + 1)[len] = code = (code + lenCounters[len]) << 1;
|
||||
}
|
||||
/* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
|
||||
{
|
||||
const Byte * const limit = lens + numSymbols;
|
||||
do
|
||||
{
|
||||
unsigned len;
|
||||
UInt32 c;
|
||||
len = lens[0]; c = codes[len]; p[0] = c; codes[len] = c + 1;
|
||||
// len = lens[1]; c = codes[len]; p[1] = c; codes[len] = c + 1;
|
||||
p += 1;
|
||||
lens += 1;
|
||||
}
|
||||
while (lens != limit);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef kMaxLen
|
||||
#undef NUM_BITS
|
||||
#undef MASK
|
||||
#undef FREQ_MASK
|
||||
#undef NUM_COUNTERS
|
||||
#undef CTR_ITEM_FOR_FREQ
|
||||
#undef LOAD_PARENT
|
||||
#undef STORE_PARENT
|
||||
#undef STORE_PARENT_DIRECT
|
||||
#undef UPDATE_E
|
||||
#undef HI_HALF_OFFSET
|
||||
#undef NUM_UNROLLS
|
||||
#undef lenCounters
|
||||
#undef codes
|
||||
|
|
|
|||
12
C/HuffEnc.h
12
C/HuffEnc.h
|
|
@ -1,21 +1,21 @@
|
|||
/* HuffEnc.h -- Huffman encoding
|
||||
2013-01-18 : Igor Pavlov : Public domain */
|
||||
Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __HUFF_ENC_H
|
||||
#define __HUFF_ENC_H
|
||||
#ifndef ZIP7_INC_HUFF_ENC_H
|
||||
#define ZIP7_INC_HUFF_ENC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define Z7_HUFFMAN_LEN_MAX 16
|
||||
/*
|
||||
Conditions:
|
||||
num <= 1024 = 2 ^ NUM_BITS
|
||||
2 <= num <= 1024 = 2 ^ NUM_BITS
|
||||
Sum(freqs) < 4M = 2 ^ (32 - NUM_BITS)
|
||||
maxLen <= 16 = kMaxLen
|
||||
1 <= maxLen <= 16 = Z7_HUFFMAN_LEN_MAX
|
||||
Num_Items(p) >= HUFFMAN_TEMP_SIZE(num)
|
||||
*/
|
||||
|
||||
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen);
|
||||
|
||||
EXTERN_C_END
|
||||
|
|
|
|||
650
C/LzFind.c
650
C/LzFind.c
File diff suppressed because it is too large
Load diff
56
C/LzFind.h
56
C/LzFind.h
|
|
@ -1,8 +1,8 @@
|
|||
/* LzFind.h -- Match finder for LZ algorithms
|
||||
2021-07-13 : Igor Pavlov : Public domain */
|
||||
2024-01-22 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZ_FIND_H
|
||||
#define __LZ_FIND_H
|
||||
#ifndef ZIP7_INC_LZ_FIND_H
|
||||
#define ZIP7_INC_LZ_FIND_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -10,9 +10,9 @@ EXTERN_C_BEGIN
|
|||
|
||||
typedef UInt32 CLzRef;
|
||||
|
||||
typedef struct _CMatchFinder
|
||||
typedef struct
|
||||
{
|
||||
Byte *buffer;
|
||||
const Byte *buffer;
|
||||
UInt32 pos;
|
||||
UInt32 posLimit;
|
||||
UInt32 streamPos; /* wrap over Zero is allowed (streamPos < pos). Use (UInt32)(streamPos - pos) */
|
||||
|
|
@ -32,8 +32,8 @@ typedef struct _CMatchFinder
|
|||
UInt32 hashMask;
|
||||
UInt32 cutValue;
|
||||
|
||||
Byte *bufferBase;
|
||||
ISeqInStream *stream;
|
||||
Byte *bufBase;
|
||||
ISeqInStreamPtr stream;
|
||||
|
||||
UInt32 blockSize;
|
||||
UInt32 keepSizeBefore;
|
||||
|
|
@ -43,7 +43,9 @@ typedef struct _CMatchFinder
|
|||
size_t directInputRem;
|
||||
UInt32 historySize;
|
||||
UInt32 fixedHashSize;
|
||||
UInt32 hashSizeSum;
|
||||
Byte numHashBytes_Min;
|
||||
Byte numHashOutBits;
|
||||
Byte _pad2_[2];
|
||||
SRes result;
|
||||
UInt32 crc[256];
|
||||
size_t numRefs;
|
||||
|
|
@ -69,24 +71,45 @@ void MatchFinder_ReadIfRequired(CMatchFinder *p);
|
|||
|
||||
void MatchFinder_Construct(CMatchFinder *p);
|
||||
|
||||
/* Conditions:
|
||||
historySize <= 3 GB
|
||||
keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
|
||||
/* (directInput = 0) is default value.
|
||||
It's required to provide correct (directInput) value
|
||||
before calling MatchFinder_Create().
|
||||
You can set (directInput) by any of the following calls:
|
||||
- MatchFinder_SET_DIRECT_INPUT_BUF()
|
||||
- MatchFinder_SET_STREAM()
|
||||
- MatchFinder_SET_STREAM_MODE()
|
||||
*/
|
||||
|
||||
#define MatchFinder_SET_DIRECT_INPUT_BUF(p, _src_, _srcLen_) { \
|
||||
(p)->stream = NULL; \
|
||||
(p)->directInput = 1; \
|
||||
(p)->buffer = (_src_); \
|
||||
(p)->directInputRem = (_srcLen_); }
|
||||
|
||||
/*
|
||||
#define MatchFinder_SET_STREAM_MODE(p) { \
|
||||
(p)->directInput = 0; }
|
||||
*/
|
||||
|
||||
#define MatchFinder_SET_STREAM(p, _stream_) { \
|
||||
(p)->stream = _stream_; \
|
||||
(p)->directInput = 0; }
|
||||
|
||||
|
||||
int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
|
||||
UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
|
||||
ISzAllocPtr alloc);
|
||||
void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
|
||||
void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
|
||||
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
|
||||
|
||||
/*
|
||||
#define Inline_MatchFinder_InitPos(p, val) \
|
||||
#define MatchFinder_INIT_POS(p, val) \
|
||||
(p)->pos = (val); \
|
||||
(p)->streamPos = (val);
|
||||
*/
|
||||
|
||||
#define Inline_MatchFinder_ReduceOffsets(p, subValue) \
|
||||
// void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
|
||||
#define MatchFinder_REDUCE_OFFSETS(p, subValue) \
|
||||
(p)->pos -= (subValue); \
|
||||
(p)->streamPos -= (subValue);
|
||||
|
||||
|
|
@ -107,7 +130,7 @@ typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
|
|||
typedef UInt32 * (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
|
||||
typedef void (*Mf_Skip_Func)(void *object, UInt32);
|
||||
|
||||
typedef struct _IMatchFinder
|
||||
typedef struct
|
||||
{
|
||||
Mf_Init_Func Init;
|
||||
Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
|
||||
|
|
@ -121,7 +144,8 @@ void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder2 *vTable);
|
|||
void MatchFinder_Init_LowHash(CMatchFinder *p);
|
||||
void MatchFinder_Init_HighHash(CMatchFinder *p);
|
||||
void MatchFinder_Init_4(CMatchFinder *p);
|
||||
void MatchFinder_Init(CMatchFinder *p);
|
||||
// void MatchFinder_Init(CMatchFinder *p);
|
||||
void MatchFinder_Init(void *p);
|
||||
|
||||
UInt32* Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
UInt32* Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
|
||||
|
|
|
|||
130
C/LzFindMt.c
130
C/LzFindMt.c
|
|
@ -1,5 +1,5 @@
|
|||
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
|
||||
2021-12-21 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ extern UInt64 g_NumIters_Bytes;
|
|||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
|
||||
|
||||
#define __MT_HASH4_CALC { \
|
||||
#define MT_HASH4_CALC { \
|
||||
UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
|
||||
h2 = temp & (kHash2Size - 1); \
|
||||
temp ^= ((UInt32)cur[2] << 8); \
|
||||
|
|
@ -79,14 +79,16 @@ extern UInt64 g_NumIters_Bytes;
|
|||
*/
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void MtSync_Construct(CMtSync *p)
|
||||
{
|
||||
p->affinityGroup = -1;
|
||||
p->affinityInGroup = 0;
|
||||
p->affinity = 0;
|
||||
p->wasCreated = False;
|
||||
p->csWasInitialized = False;
|
||||
p->csWasEntered = False;
|
||||
Thread_Construct(&p->thread);
|
||||
Thread_CONSTRUCT(&p->thread)
|
||||
Event_Construct(&p->canStart);
|
||||
Event_Construct(&p->wasStopped);
|
||||
Semaphore_Construct(&p->freeSemaphore);
|
||||
|
|
@ -94,7 +96,7 @@ static void MtSync_Construct(CMtSync *p)
|
|||
}
|
||||
|
||||
|
||||
#define DEBUG_BUFFER_LOCK // define it to debug lock state
|
||||
// #define DEBUG_BUFFER_LOCK // define it to debug lock state
|
||||
|
||||
#ifdef DEBUG_BUFFER_LOCK
|
||||
#include <stdlib.h>
|
||||
|
|
@ -116,7 +118,7 @@ static void MtSync_Construct(CMtSync *p)
|
|||
(p)->csWasEntered = False; }
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static UInt32 MtSync_GetNextBlock(CMtSync *p)
|
||||
{
|
||||
UInt32 numBlocks = 0;
|
||||
|
|
@ -140,14 +142,14 @@ static UInt32 MtSync_GetNextBlock(CMtSync *p)
|
|||
|
||||
// buffer is UNLOCKED here
|
||||
Semaphore_Wait(&p->filledSemaphore);
|
||||
LOCK_BUFFER(p);
|
||||
LOCK_BUFFER(p)
|
||||
return numBlocks;
|
||||
}
|
||||
|
||||
|
||||
/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void MtSync_StopWriting(CMtSync *p)
|
||||
{
|
||||
if (!Thread_WasCreated(&p->thread) || p->needStart)
|
||||
|
|
@ -185,7 +187,7 @@ static void MtSync_StopWriting(CMtSync *p)
|
|||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void MtSync_Destruct(CMtSync *p)
|
||||
{
|
||||
PRF(printf("\nMtSync_Destruct %p\n", p));
|
||||
|
|
@ -220,11 +222,11 @@ static void MtSync_Destruct(CMtSync *p)
|
|||
|
||||
// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
|
||||
// we want to get real system error codes here instead of SZ_ERROR_THREAD
|
||||
#define RINOK_THREAD(x) RINOK(x)
|
||||
#define RINOK_THREAD(x) RINOK_WRes(x)
|
||||
|
||||
|
||||
// call it before each new file (when new starting is required):
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
|
||||
{
|
||||
WRes wres;
|
||||
|
|
@ -245,12 +247,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
|
|||
if (p->wasCreated)
|
||||
return SZ_OK;
|
||||
|
||||
RINOK_THREAD(CriticalSection_Init(&p->cs));
|
||||
RINOK_THREAD(CriticalSection_Init(&p->cs))
|
||||
p->csWasInitialized = True;
|
||||
p->csWasEntered = False;
|
||||
|
||||
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart));
|
||||
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped));
|
||||
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
|
||||
RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
|
||||
|
||||
p->needStart = True;
|
||||
p->exit = True; /* p->exit is unused before (canStart) Event.
|
||||
|
|
@ -259,18 +261,24 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
|
|||
// return ERROR_TOO_MANY_POSTS; // for debug
|
||||
// return EINVAL; // for debug
|
||||
|
||||
#ifdef _WIN32
|
||||
if (p->affinityGroup >= 0)
|
||||
wres = Thread_Create_With_Group(&p->thread, startAddress, obj,
|
||||
(unsigned)(UInt32)p->affinityGroup, (CAffinityMask)p->affinityInGroup);
|
||||
else
|
||||
#endif
|
||||
if (p->affinity != 0)
|
||||
wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
|
||||
else
|
||||
wres = Thread_Create(&p->thread, startAddress, obj);
|
||||
|
||||
RINOK_THREAD(wres);
|
||||
RINOK_THREAD(wres)
|
||||
p->wasCreated = True;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
|
||||
{
|
||||
const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
|
||||
|
|
@ -519,7 +527,7 @@ static void HashThreadFunc(CMatchFinderMt *mt)
|
|||
if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
|
||||
{
|
||||
const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
|
||||
Inline_MatchFinder_ReduceOffsets(mf, subValue);
|
||||
MatchFinder_REDUCE_OFFSETS(mf, subValue)
|
||||
MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
|
||||
}
|
||||
|
||||
|
|
@ -560,7 +568,7 @@ static void HashThreadFunc(CMatchFinderMt *mt)
|
|||
*/
|
||||
|
||||
|
||||
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes);
|
||||
|
|
@ -749,7 +757,7 @@ static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
|
|||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static void BtThreadFunc(CMatchFinderMt *mt)
|
||||
{
|
||||
CMtSync *p = &mt->btSync;
|
||||
|
|
@ -864,21 +872,22 @@ SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddB
|
|||
if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
|
||||
return SZ_ERROR_MEM;
|
||||
|
||||
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p));
|
||||
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p));
|
||||
RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
|
||||
RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
|
||||
SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
|
||||
{
|
||||
RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks));
|
||||
RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
|
||||
return MtSync_Init(&p->btSync, kMtBtNumBlocks);
|
||||
}
|
||||
|
||||
|
||||
static void MatchFinderMt_Init(CMatchFinderMt *p)
|
||||
static void MatchFinderMt_Init(void *_p)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
CMatchFinder *mf = MF(p);
|
||||
|
||||
p->btBufPos =
|
||||
|
|
@ -941,7 +950,7 @@ void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
|
|||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
|
||||
{
|
||||
if (p->failure_LZ_BT)
|
||||
|
|
@ -981,8 +990,9 @@ static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
|
|||
|
||||
|
||||
|
||||
static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
|
||||
static const Byte * MatchFinderMt_GetPointerToCurrentPos(void *_p)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
return p->pointerToCurPos;
|
||||
}
|
||||
|
||||
|
|
@ -990,8 +1000,9 @@ static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
|
|||
#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
|
||||
|
||||
|
||||
static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
|
||||
static UInt32 MatchFinderMt_GetNumAvailableBytes(void *_p)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
if (p->btBufPos != p->btBufPosLimit)
|
||||
return p->btNumAvailBytes;
|
||||
return MatchFinderMt_GetNextBlock_Bt(p);
|
||||
|
|
@ -1163,7 +1174,7 @@ UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
|
|||
*/
|
||||
|
||||
|
||||
static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
||||
static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
||||
{
|
||||
UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
|
||||
UInt32 *hash = p->hash;
|
||||
|
|
@ -1179,9 +1190,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
|||
(hash + kFix3HashSize)[h3] = m;
|
||||
// (hash + kFix4HashSize)[h4] = m;
|
||||
|
||||
#define _USE_H2
|
||||
|
||||
#ifdef _USE_H2
|
||||
// #define BT5_USE_H2
|
||||
// #ifdef BT5_USE_H2
|
||||
if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
d[1] = m - c2 - 1;
|
||||
|
|
@ -1197,8 +1207,8 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
|||
}
|
||||
d[0] = 3;
|
||||
d += 2;
|
||||
|
||||
#ifdef _USE_H4
|
||||
|
||||
#ifdef BT5_USE_H4
|
||||
if (c4 >= matchMinPos)
|
||||
if (
|
||||
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
|
||||
|
|
@ -1214,7 +1224,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
|||
d[0] = 2;
|
||||
d += 2;
|
||||
}
|
||||
#endif
|
||||
// #endif
|
||||
|
||||
if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
|
||||
{
|
||||
|
|
@ -1228,7 +1238,7 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
|||
d += 2;
|
||||
}
|
||||
|
||||
#ifdef _USE_H4
|
||||
#ifdef BT5_USE_H4
|
||||
if (c4 >= matchMinPos)
|
||||
if (
|
||||
cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] &&
|
||||
|
|
@ -1244,8 +1254,9 @@ static UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
|
|||
}
|
||||
|
||||
|
||||
static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
|
||||
static UInt32 * MatchFinderMt2_GetMatches(void *_p, UInt32 *d)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
const UInt32 *bt = p->btBufPos;
|
||||
const UInt32 len = *bt++;
|
||||
const UInt32 *btLim = bt + len;
|
||||
|
|
@ -1268,8 +1279,9 @@ static UInt32* MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
|
|||
|
||||
|
||||
|
||||
static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
|
||||
static UInt32 * MatchFinderMt_GetMatches(void *_p, UInt32 *d)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
const UInt32 *bt = p->btBufPos;
|
||||
UInt32 len = *bt++;
|
||||
const UInt32 avail = p->btNumAvailBytes - 1;
|
||||
|
|
@ -1316,14 +1328,16 @@ static UInt32* MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
|
|||
#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
|
||||
#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
|
||||
|
||||
static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
static void MatchFinderMt0_Skip(void *_p, UInt32 num)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
SKIP_HEADER2_MT { p->btNumAvailBytes--;
|
||||
SKIP_FOOTER_MT
|
||||
}
|
||||
|
||||
static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
static void MatchFinderMt2_Skip(void *_p, UInt32 num)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
SKIP_HEADER_MT(2)
|
||||
UInt32 h2;
|
||||
MT_HASH2_CALC
|
||||
|
|
@ -1331,8 +1345,9 @@ static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
|
|||
SKIP_FOOTER_MT
|
||||
}
|
||||
|
||||
static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
|
||||
static void MatchFinderMt3_Skip(void *_p, UInt32 num)
|
||||
{
|
||||
CMatchFinderMt *p = (CMatchFinderMt *)_p;
|
||||
SKIP_HEADER_MT(3)
|
||||
UInt32 h2, h3;
|
||||
MT_HASH3_CALC
|
||||
|
|
@ -1362,39 +1377,46 @@ static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
|
|||
|
||||
void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
|
||||
{
|
||||
vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
|
||||
vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
|
||||
vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
|
||||
vTable->Init = MatchFinderMt_Init;
|
||||
vTable->GetNumAvailableBytes = MatchFinderMt_GetNumAvailableBytes;
|
||||
vTable->GetPointerToCurrentPos = MatchFinderMt_GetPointerToCurrentPos;
|
||||
vTable->GetMatches = MatchFinderMt_GetMatches;
|
||||
|
||||
switch (MF(p)->numHashBytes)
|
||||
{
|
||||
case 2:
|
||||
p->GetHeadsFunc = GetHeads2;
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
|
||||
vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
|
||||
vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
|
||||
p->MixMatchesFunc = NULL;
|
||||
vTable->Skip = MatchFinderMt0_Skip;
|
||||
vTable->GetMatches = MatchFinderMt2_GetMatches;
|
||||
break;
|
||||
case 3:
|
||||
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
|
||||
vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
|
||||
p->MixMatchesFunc = MixMatches2;
|
||||
vTable->Skip = MatchFinderMt2_Skip;
|
||||
break;
|
||||
case 4:
|
||||
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
|
||||
|
||||
// it's fast inline version of GetMatches()
|
||||
// vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
|
||||
// vTable->GetMatches = MatchFinderMt_GetMatches_Bt4;
|
||||
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
|
||||
vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
|
||||
p->MixMatchesFunc = MixMatches3;
|
||||
vTable->Skip = MatchFinderMt3_Skip;
|
||||
break;
|
||||
default:
|
||||
p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
|
||||
p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
|
||||
p->MixMatchesFunc = MixMatches4;
|
||||
vTable->Skip =
|
||||
(Mf_Skip_Func)MatchFinderMt3_Skip;
|
||||
// (Mf_Skip_Func)MatchFinderMt4_Skip;
|
||||
MatchFinderMt3_Skip;
|
||||
// MatchFinderMt4_Skip;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef RINOK_THREAD
|
||||
#undef PRF
|
||||
#undef MF
|
||||
#undef GetUi24hi_from32
|
||||
#undef LOCK_BUFFER
|
||||
#undef UNLOCK_BUFFER
|
||||
|
|
|
|||
19
C/LzFindMt.h
19
C/LzFindMt.h
|
|
@ -1,19 +1,21 @@
|
|||
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
|
||||
2021-07-12 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZ_FIND_MT_H
|
||||
#define __LZ_FIND_MT_H
|
||||
#ifndef ZIP7_INC_LZ_FIND_MT_H
|
||||
#define ZIP7_INC_LZ_FIND_MT_H
|
||||
|
||||
#include "LzFind.h"
|
||||
#include "Threads.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
typedef struct _CMtSync
|
||||
typedef struct
|
||||
{
|
||||
UInt32 numProcessedBlocks;
|
||||
CThread thread;
|
||||
Int32 affinityGroup;
|
||||
UInt64 affinityInGroup;
|
||||
UInt64 affinity;
|
||||
CThread thread;
|
||||
|
||||
BoolInt wasCreated;
|
||||
BoolInt needStart;
|
||||
|
|
@ -31,7 +33,10 @@ typedef struct _CMtSync
|
|||
// UInt32 numBlocks_Sent;
|
||||
} CMtSync;
|
||||
|
||||
typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distances);
|
||||
|
||||
struct CMatchFinderMt_;
|
||||
|
||||
typedef UInt32 * (*Mf_Mix_Matches)(struct CMatchFinderMt_ *p, UInt32 matchMinPos, UInt32 *distances);
|
||||
|
||||
/* kMtCacheLineDummy must be >= size_of_CPU_cache_line */
|
||||
#define kMtCacheLineDummy 128
|
||||
|
|
@ -39,7 +44,7 @@ typedef UInt32 * (*Mf_Mix_Matches)(void *p, UInt32 matchMinPos, UInt32 *distance
|
|||
typedef void (*Mf_GetHeads)(const Byte *buffer, UInt32 pos,
|
||||
UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc);
|
||||
|
||||
typedef struct _CMatchFinderMt
|
||||
typedef struct CMatchFinderMt_
|
||||
{
|
||||
/* LZ */
|
||||
const Byte *pointerToCurPos;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* LzFindOpt.c -- multithreaded Match finder for LZ algorithms
|
||||
2021-07-13 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -41,8 +41,8 @@ UInt64 g_NumIters_Bytes;
|
|||
// #define CYC_TO_POS_OFFSET 1 // for debug
|
||||
|
||||
/*
|
||||
MY_NO_INLINE
|
||||
UInt32 * MY_FAST_CALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
Z7_NO_INLINE
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_1(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, UInt32 *posRes)
|
||||
{
|
||||
do
|
||||
|
|
@ -214,13 +214,13 @@ else
|
|||
to eliminate "movsx" BUG in old MSVC x64 compiler.
|
||||
*/
|
||||
|
||||
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes);
|
||||
|
||||
MY_NO_INLINE
|
||||
UInt32 * MY_FAST_CALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
Z7_NO_INLINE
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes)
|
||||
|
|
@ -404,7 +404,7 @@ else
|
|||
/*
|
||||
typedef UInt32 uint32plus; // size_t
|
||||
|
||||
UInt32 * MY_FAST_CALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 * Z7_FASTCALL GetMatchesSpecN_3(uint32plus lenLimit, size_t pos, const Byte *cur, CLzRef *son,
|
||||
UInt32 _cutValue, UInt32 *d, uint32plus _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
|
||||
size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
|
||||
UInt32 *posRes)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* LzHash.h -- HASH functions for LZ algorithms
|
||||
2019-10-30 : Igor Pavlov : Public domain */
|
||||
/* LzHash.h -- HASH constants for LZ algorithms
|
||||
2023-03-05 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZ_HASH_H
|
||||
#define __LZ_HASH_H
|
||||
#ifndef ZIP7_INC_LZ_HASH_H
|
||||
#define ZIP7_INC_LZ_HASH_H
|
||||
|
||||
/*
|
||||
(kHash2Size >= (1 << 8)) : Required
|
||||
|
|
|
|||
16
C/Lzma2Dec.c
16
C/Lzma2Dec.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Lzma2Dec.c -- LZMA2 Decoder
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
2024-03-01 : Igor Pavlov : Public domain */
|
||||
|
||||
/* #define SHOW_DEBUG_INFO */
|
||||
|
||||
|
|
@ -71,14 +71,14 @@ static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props)
|
|||
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
|
||||
{
|
||||
Byte props[LZMA_PROPS_SIZE];
|
||||
RINOK(Lzma2Dec_GetOldProps(prop, props));
|
||||
RINOK(Lzma2Dec_GetOldProps(prop, props))
|
||||
return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
|
||||
}
|
||||
|
||||
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc)
|
||||
{
|
||||
Byte props[LZMA_PROPS_SIZE];
|
||||
RINOK(Lzma2Dec_GetOldProps(prop, props));
|
||||
RINOK(Lzma2Dec_GetOldProps(prop, props))
|
||||
return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc);
|
||||
}
|
||||
|
||||
|
|
@ -157,8 +157,10 @@ static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b)
|
|||
p->decoder.prop.lp = (Byte)lp;
|
||||
return LZMA2_STATE_DATA;
|
||||
}
|
||||
|
||||
default:
|
||||
return LZMA2_STATE_ERROR;
|
||||
}
|
||||
return LZMA2_STATE_ERROR;
|
||||
}
|
||||
|
||||
static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size)
|
||||
|
|
@ -474,8 +476,8 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
|||
SizeT outSize = *destLen, inSize = *srcLen;
|
||||
*destLen = *srcLen = 0;
|
||||
*status = LZMA_STATUS_NOT_SPECIFIED;
|
||||
Lzma2Dec_Construct(&p);
|
||||
RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc));
|
||||
Lzma2Dec_CONSTRUCT(&p)
|
||||
RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc))
|
||||
p.decoder.dic = dest;
|
||||
p.decoder.dicBufSize = outSize;
|
||||
Lzma2Dec_Init(&p);
|
||||
|
|
@ -487,3 +489,5 @@ SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
|||
Lzma2Dec_FreeProbs(&p, alloc);
|
||||
return res;
|
||||
}
|
||||
|
||||
#undef PRF
|
||||
|
|
|
|||
15
C/Lzma2Dec.h
15
C/Lzma2Dec.h
|
|
@ -1,8 +1,8 @@
|
|||
/* Lzma2Dec.h -- LZMA2 Decoder
|
||||
2018-02-19 : Igor Pavlov : Public domain */
|
||||
2023-03-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA2_DEC_H
|
||||
#define __LZMA2_DEC_H
|
||||
#ifndef ZIP7_INC_LZMA2_DEC_H
|
||||
#define ZIP7_INC_LZMA2_DEC_H
|
||||
|
||||
#include "LzmaDec.h"
|
||||
|
||||
|
|
@ -22,9 +22,10 @@ typedef struct
|
|||
CLzmaDec decoder;
|
||||
} CLzma2Dec;
|
||||
|
||||
#define Lzma2Dec_Construct(p) LzmaDec_Construct(&(p)->decoder)
|
||||
#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
|
||||
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
|
||||
#define Lzma2Dec_CONSTRUCT(p) LzmaDec_CONSTRUCT(&(p)->decoder)
|
||||
#define Lzma2Dec_Construct(p) Lzma2Dec_CONSTRUCT(p)
|
||||
#define Lzma2Dec_FreeProbs(p, alloc) LzmaDec_FreeProbs(&(p)->decoder, alloc)
|
||||
#define Lzma2Dec_Free(p, alloc) LzmaDec_Free(&(p)->decoder, alloc)
|
||||
|
||||
SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
|
||||
SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc);
|
||||
|
|
@ -90,7 +91,7 @@ Lzma2Dec_GetUnpackExtra() returns the value that shows
|
|||
at current input positon.
|
||||
*/
|
||||
|
||||
#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0);
|
||||
#define Lzma2Dec_GetUnpackExtra(p) ((p)->isExtraMode ? (p)->unpackSize : 0)
|
||||
|
||||
|
||||
/* ---------- One Call Interface ---------- */
|
||||
|
|
|
|||
155
C/Lzma2DecMt.c
155
C/Lzma2DecMt.c
|
|
@ -1,44 +1,44 @@
|
|||
/* Lzma2DecMt.c -- LZMA2 Decoder Multi-thread
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
2023-04-13 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
// #define SHOW_DEBUG_INFO
|
||||
|
||||
// #define _7ZIP_ST
|
||||
// #define Z7_ST
|
||||
|
||||
#ifdef SHOW_DEBUG_INFO
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifdef SHOW_DEBUG_INFO
|
||||
#define PRF(x) x
|
||||
#else
|
||||
#define PRF(x)
|
||||
#endif
|
||||
#define PRF_STR(s) PRF(printf("\n" s "\n"))
|
||||
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2))
|
||||
#endif
|
||||
|
||||
#include "Alloc.h"
|
||||
|
||||
#include "Lzma2Dec.h"
|
||||
#include "Lzma2DecMt.h"
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
#include "MtDec.h"
|
||||
|
||||
#define LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT (1 << 28)
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef Z7_ST
|
||||
#ifdef SHOW_DEBUG_INFO
|
||||
#define PRF(x) x
|
||||
#else
|
||||
#define PRF(x)
|
||||
#endif
|
||||
#define PRF_STR(s) PRF(printf("\n" s "\n");)
|
||||
#define PRF_STR_INT_2(s, d1, d2) PRF(printf("\n" s " %d %d\n", (unsigned)d1, (unsigned)d2);)
|
||||
#endif
|
||||
|
||||
|
||||
void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
|
||||
{
|
||||
p->inBufSize_ST = 1 << 20;
|
||||
p->outStep_ST = 1 << 20;
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
p->numThreads = 1;
|
||||
p->inBufSize_MT = 1 << 18;
|
||||
p->outBlockMax = LZMA2DECMT_OUT_BLOCK_MAX_DEFAULT;
|
||||
|
|
@ -48,7 +48,7 @@ void Lzma2DecMtProps_Init(CLzma2DecMtProps *p)
|
|||
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
/* ---------- CLzma2DecMtThread ---------- */
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ typedef struct
|
|||
|
||||
/* ---------- CLzma2DecMt ---------- */
|
||||
|
||||
typedef struct
|
||||
struct CLzma2DecMt
|
||||
{
|
||||
// ISzAllocPtr alloc;
|
||||
ISzAllocPtr allocMid;
|
||||
|
|
@ -90,9 +90,9 @@ typedef struct
|
|||
CLzma2DecMtProps props;
|
||||
Byte prop;
|
||||
|
||||
ISeqInStream *inStream;
|
||||
ISeqOutStream *outStream;
|
||||
ICompressProgress *progress;
|
||||
ISeqInStreamPtr inStream;
|
||||
ISeqOutStreamPtr outStream;
|
||||
ICompressProgressPtr progress;
|
||||
|
||||
BoolInt finishMode;
|
||||
BoolInt outSize_Defined;
|
||||
|
|
@ -111,14 +111,13 @@ typedef struct
|
|||
size_t inPos;
|
||||
size_t inLim;
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
UInt64 outProcessed_Parse;
|
||||
BoolInt mtc_WasConstructed;
|
||||
CMtDec mtc;
|
||||
CLzma2DecMtThread coders[MTDEC__THREADS_MAX];
|
||||
CLzma2DecMtThread coders[MTDEC_THREADS_MAX];
|
||||
#endif
|
||||
|
||||
} CLzma2DecMt;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
|
@ -142,11 +141,11 @@ CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
|
|||
|
||||
// Lzma2DecMtProps_Init(&p->props);
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
p->mtc_WasConstructed = False;
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
{
|
||||
CLzma2DecMtThread *t = &p->coders[i];
|
||||
t->dec_created = False;
|
||||
|
|
@ -156,16 +155,16 @@ CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid)
|
|||
}
|
||||
#endif
|
||||
|
||||
return p;
|
||||
return (CLzma2DecMtHandle)(void *)p;
|
||||
}
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
static void Lzma2DecMt_FreeOutBufs(CLzma2DecMt *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
{
|
||||
CLzma2DecMtThread *t = &p->coders[i];
|
||||
if (t->outBuf)
|
||||
|
|
@ -196,13 +195,15 @@ static void Lzma2DecMt_FreeSt(CLzma2DecMt *p)
|
|||
}
|
||||
|
||||
|
||||
void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp)
|
||||
// #define GET_CLzma2DecMt_p CLzma2DecMt *p = (CLzma2DecMt *)(void *)pp;
|
||||
|
||||
void Lzma2DecMt_Destroy(CLzma2DecMtHandle p)
|
||||
{
|
||||
CLzma2DecMt *p = (CLzma2DecMt *)pp;
|
||||
// GET_CLzma2DecMt_p
|
||||
|
||||
Lzma2DecMt_FreeSt(p);
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
if (p->mtc_WasConstructed)
|
||||
{
|
||||
|
|
@ -211,7 +212,7 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp)
|
|||
}
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
{
|
||||
CLzma2DecMtThread *t = &p->coders[i];
|
||||
if (t->dec_created)
|
||||
|
|
@ -226,19 +227,19 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle pp)
|
|||
|
||||
#endif
|
||||
|
||||
ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, pp);
|
||||
ISzAlloc_Free(p->alignOffsetAlloc.baseAlloc, p);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCallbackInfo *cc)
|
||||
{
|
||||
CLzma2DecMt *me = (CLzma2DecMt *)obj;
|
||||
CLzma2DecMtThread *t = &me->coders[coderIndex];
|
||||
|
||||
PRF_STR_INT_2("Parse", coderIndex, cc->srcSize);
|
||||
PRF_STR_INT_2("Parse", coderIndex, cc->srcSize)
|
||||
|
||||
cc->state = MTDEC_PARSE_CONTINUE;
|
||||
|
||||
|
|
@ -246,7 +247,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
|
|||
{
|
||||
if (!t->dec_created)
|
||||
{
|
||||
Lzma2Dec_Construct(&t->dec);
|
||||
Lzma2Dec_CONSTRUCT(&t->dec)
|
||||
t->dec_created = True;
|
||||
AlignOffsetAlloc_CreateVTable(&t->alloc);
|
||||
{
|
||||
|
|
@ -297,7 +298,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
|
|||
// that must be finished at position <= outBlockMax.
|
||||
|
||||
{
|
||||
const SizeT srcOrig = cc->srcSize;
|
||||
const size_t srcOrig = cc->srcSize;
|
||||
SizeT srcSize_Point = 0;
|
||||
SizeT dicPos_Point = 0;
|
||||
|
||||
|
|
@ -306,10 +307,10 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
|
|||
|
||||
for (;;)
|
||||
{
|
||||
SizeT srcCur = srcOrig - cc->srcSize;
|
||||
SizeT srcCur = (SizeT)(srcOrig - cc->srcSize);
|
||||
|
||||
status = Lzma2Dec_Parse(&t->dec,
|
||||
limit - t->dec.decoder.dicPos,
|
||||
(SizeT)limit - t->dec.decoder.dicPos,
|
||||
cc->src + cc->srcSize, &srcCur,
|
||||
checkFinishBlock);
|
||||
|
||||
|
|
@ -333,7 +334,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
|
|||
if (t->dec.decoder.dicPos >= (1 << 14))
|
||||
break;
|
||||
dicPos_Point = t->dec.decoder.dicPos;
|
||||
srcSize_Point = cc->srcSize;
|
||||
srcSize_Point = (SizeT)cc->srcSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -391,7 +392,7 @@ static void Lzma2DecMt_MtCallback_Parse(void *obj, unsigned coderIndex, CMtDecCa
|
|||
if (unpackRem != 0)
|
||||
{
|
||||
/* we also reserve space for max possible number of output bytes of current LZMA chunk */
|
||||
SizeT rem = limit - dicPos;
|
||||
size_t rem = limit - dicPos;
|
||||
if (rem > unpackRem)
|
||||
rem = unpackRem;
|
||||
dicPos += rem;
|
||||
|
|
@ -444,7 +445,7 @@ static SRes Lzma2DecMt_MtCallback_PreCode(void *pp, unsigned coderIndex)
|
|||
}
|
||||
|
||||
t->dec.decoder.dic = dest;
|
||||
t->dec.decoder.dicBufSize = t->outPreSize;
|
||||
t->dec.decoder.dicBufSize = (SizeT)t->outPreSize;
|
||||
|
||||
t->needInit = True;
|
||||
|
||||
|
|
@ -462,7 +463,7 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
|
|||
|
||||
UNUSED_VAR(srcFinished)
|
||||
|
||||
PRF_STR_INT_2("Code", coderIndex, srcSize);
|
||||
PRF_STR_INT_2("Code", coderIndex, srcSize)
|
||||
|
||||
*inCodePos = t->inCodeSize;
|
||||
*outCodePos = 0;
|
||||
|
|
@ -476,13 +477,13 @@ static SRes Lzma2DecMt_MtCallback_Code(void *pp, unsigned coderIndex,
|
|||
|
||||
{
|
||||
ELzmaStatus status;
|
||||
size_t srcProcessed = srcSize;
|
||||
SizeT srcProcessed = (SizeT)srcSize;
|
||||
BoolInt blockWasFinished =
|
||||
((int)t->parseStatus == LZMA_STATUS_FINISHED_WITH_MARK
|
||||
|| t->parseStatus == LZMA2_PARSE_STATUS_NEW_BLOCK);
|
||||
|
||||
SRes res = Lzma2Dec_DecodeToDic(&t->dec,
|
||||
t->outPreSize,
|
||||
(SizeT)t->outPreSize,
|
||||
src, &srcProcessed,
|
||||
blockWasFinished ? LZMA_FINISH_END : LZMA_FINISH_ANY,
|
||||
&status);
|
||||
|
|
@ -540,7 +541,7 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
|
|||
UNUSED_VAR(srcSize)
|
||||
UNUSED_VAR(isCross)
|
||||
|
||||
PRF_STR_INT_2("Write", coderIndex, srcSize);
|
||||
PRF_STR_INT_2("Write", coderIndex, srcSize)
|
||||
|
||||
*needContinue = False;
|
||||
*canRecode = True;
|
||||
|
|
@ -588,7 +589,7 @@ static SRes Lzma2DecMt_MtCallback_Write(void *pp, unsigned coderIndex,
|
|||
*needContinue = needContinue2;
|
||||
return SZ_OK;
|
||||
}
|
||||
RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0));
|
||||
RINOK(MtProgress_ProgressAdd(&me->mtc.mtProgress, 0, 0))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -611,11 +612,11 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p)
|
|||
{
|
||||
if (!p->dec_created)
|
||||
{
|
||||
Lzma2Dec_Construct(&p->dec);
|
||||
Lzma2Dec_CONSTRUCT(&p->dec)
|
||||
p->dec_created = True;
|
||||
}
|
||||
|
||||
RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt));
|
||||
RINOK(Lzma2Dec_Allocate(&p->dec, p->prop, &p->alignOffsetAlloc.vt))
|
||||
|
||||
if (!p->inBuf || p->inBufSize != p->props.inBufSize_ST)
|
||||
{
|
||||
|
|
@ -634,7 +635,7 @@ static SRes Lzma2Dec_Prepare_ST(CLzma2DecMt *p)
|
|||
|
||||
|
||||
static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
, BoolInt tMode
|
||||
#endif
|
||||
)
|
||||
|
|
@ -646,7 +647,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
|
||||
CLzma2Dec *dec;
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
if (tMode)
|
||||
{
|
||||
Lzma2DecMt_FreeOutBufs(p);
|
||||
|
|
@ -654,7 +655,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
}
|
||||
#endif
|
||||
|
||||
RINOK(Lzma2Dec_Prepare_ST(p));
|
||||
RINOK(Lzma2Dec_Prepare_ST(p))
|
||||
|
||||
dec = &p->dec;
|
||||
|
||||
|
|
@ -681,7 +682,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
|
||||
if (inPos == inLim)
|
||||
{
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
if (tMode)
|
||||
{
|
||||
inData = MtDec_Read(&p->mtc, &inLim);
|
||||
|
|
@ -710,7 +711,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
{
|
||||
SizeT next = dec->decoder.dicBufSize;
|
||||
if (next - wrPos > p->props.outStep_ST)
|
||||
next = wrPos + p->props.outStep_ST;
|
||||
next = wrPos + (SizeT)p->props.outStep_ST;
|
||||
size = next - dicPos;
|
||||
}
|
||||
|
||||
|
|
@ -726,7 +727,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
}
|
||||
}
|
||||
|
||||
inProcessed = inLim - inPos;
|
||||
inProcessed = (SizeT)(inLim - inPos);
|
||||
|
||||
res = Lzma2Dec_DecodeToDic(dec, dicPos + size, inData + inPos, &inProcessed, finishMode, &status);
|
||||
|
||||
|
|
@ -755,7 +756,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
dec->decoder.dicPos = 0;
|
||||
wrPos = dec->decoder.dicPos;
|
||||
|
||||
RINOK(res2);
|
||||
RINOK(res2)
|
||||
|
||||
if (needStop)
|
||||
{
|
||||
|
|
@ -788,7 +789,7 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
UInt64 outDelta = p->outProcessed - outPrev;
|
||||
if (inDelta >= (1 << 22) || outDelta >= (1 << 22))
|
||||
{
|
||||
RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed));
|
||||
RINOK(ICompressProgress_Progress(p->progress, p->inProcessed, p->outProcessed))
|
||||
inPrev = p->inProcessed;
|
||||
outPrev = p->outProcessed;
|
||||
}
|
||||
|
|
@ -798,20 +799,20 @@ static SRes Lzma2Dec_Decode_ST(CLzma2DecMt *p
|
|||
|
||||
|
||||
|
||||
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
|
||||
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
|
||||
Byte prop,
|
||||
const CLzma2DecMtProps *props,
|
||||
ISeqOutStream *outStream, const UInt64 *outDataSize, int finishMode,
|
||||
ISeqOutStreamPtr outStream, const UInt64 *outDataSize, int finishMode,
|
||||
// Byte *outBuf, size_t *outBufSize,
|
||||
ISeqInStream *inStream,
|
||||
ISeqInStreamPtr inStream,
|
||||
// const Byte *inData, size_t inDataSize,
|
||||
UInt64 *inProcessed,
|
||||
// UInt64 *outProcessed,
|
||||
int *isMT,
|
||||
ICompressProgress *progress)
|
||||
ICompressProgressPtr progress)
|
||||
{
|
||||
CLzma2DecMt *p = (CLzma2DecMt *)pp;
|
||||
#ifndef _7ZIP_ST
|
||||
// GET_CLzma2DecMt_p
|
||||
#ifndef Z7_ST
|
||||
BoolInt tMode;
|
||||
#endif
|
||||
|
||||
|
|
@ -845,7 +846,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
|
|||
*isMT = False;
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
tMode = False;
|
||||
|
||||
|
|
@ -939,7 +940,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
|
|||
p->readWasFinished = p->mtc.readWasFinished;
|
||||
p->inProcessed = p->mtc.inProcessed;
|
||||
|
||||
PRF_STR("----- decoding ST -----");
|
||||
PRF_STR("----- decoding ST -----")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -950,7 +951,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
|
|||
|
||||
{
|
||||
SRes res = Lzma2Dec_Decode_ST(p
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
, tMode
|
||||
#endif
|
||||
);
|
||||
|
|
@ -967,7 +968,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
|
|||
res = p->readRes;
|
||||
|
||||
/*
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
if (res == SZ_OK && tMode && p->mtc.parseRes != SZ_OK)
|
||||
res = p->mtc.parseRes;
|
||||
#endif
|
||||
|
|
@ -980,13 +981,13 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle pp,
|
|||
|
||||
/* ---------- Read from CLzma2DecMtHandle Interface ---------- */
|
||||
|
||||
SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
|
||||
SRes Lzma2DecMt_Init(CLzma2DecMtHandle p,
|
||||
Byte prop,
|
||||
const CLzma2DecMtProps *props,
|
||||
const UInt64 *outDataSize, int finishMode,
|
||||
ISeqInStream *inStream)
|
||||
ISeqInStreamPtr inStream)
|
||||
{
|
||||
CLzma2DecMt *p = (CLzma2DecMt *)pp;
|
||||
// GET_CLzma2DecMt_p
|
||||
|
||||
if (prop > 40)
|
||||
return SZ_ERROR_UNSUPPORTED;
|
||||
|
|
@ -1015,11 +1016,11 @@ SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
|
|||
}
|
||||
|
||||
|
||||
SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
|
||||
SRes Lzma2DecMt_Read(CLzma2DecMtHandle p,
|
||||
Byte *data, size_t *outSize,
|
||||
UInt64 *inStreamProcessed)
|
||||
{
|
||||
CLzma2DecMt *p = (CLzma2DecMt *)pp;
|
||||
// GET_CLzma2DecMt_p
|
||||
ELzmaFinishMode finishMode;
|
||||
SRes readRes;
|
||||
size_t size = *outSize;
|
||||
|
|
@ -1055,8 +1056,8 @@ SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
|
|||
readRes = ISeqInStream_Read(p->inStream, p->inBuf, &p->inLim);
|
||||
}
|
||||
|
||||
inCur = p->inLim - p->inPos;
|
||||
outCur = size;
|
||||
inCur = (SizeT)(p->inLim - p->inPos);
|
||||
outCur = (SizeT)size;
|
||||
|
||||
res = Lzma2Dec_DecodeToBuf(&p->dec, data, &outCur,
|
||||
p->inBuf + p->inPos, &inCur, finishMode, &status);
|
||||
|
|
@ -1088,3 +1089,7 @@ SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
|
|||
return readRes;
|
||||
}
|
||||
}
|
||||
|
||||
#undef PRF
|
||||
#undef PRF_STR
|
||||
#undef PRF_STR_INT_2
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* Lzma2DecMt.h -- LZMA2 Decoder Multi-thread
|
||||
2018-02-17 : Igor Pavlov : Public domain */
|
||||
2023-04-13 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA2_DEC_MT_H
|
||||
#define __LZMA2_DEC_MT_H
|
||||
#ifndef ZIP7_INC_LZMA2_DEC_MT_H
|
||||
#define ZIP7_INC_LZMA2_DEC_MT_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -13,7 +13,7 @@ typedef struct
|
|||
size_t inBufSize_ST;
|
||||
size_t outStep_ST;
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
unsigned numThreads;
|
||||
size_t inBufSize_MT;
|
||||
size_t outBlockMax;
|
||||
|
|
@ -38,7 +38,9 @@ SRes:
|
|||
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
typedef void * CLzma2DecMtHandle;
|
||||
typedef struct CLzma2DecMt CLzma2DecMt;
|
||||
typedef CLzma2DecMt * CLzma2DecMtHandle;
|
||||
// Z7_DECLARE_HANDLE(CLzma2DecMtHandle)
|
||||
|
||||
CLzma2DecMtHandle Lzma2DecMt_Create(ISzAllocPtr alloc, ISzAllocPtr allocMid);
|
||||
void Lzma2DecMt_Destroy(CLzma2DecMtHandle p);
|
||||
|
|
@ -46,11 +48,11 @@ void Lzma2DecMt_Destroy(CLzma2DecMtHandle p);
|
|||
SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
|
||||
Byte prop,
|
||||
const CLzma2DecMtProps *props,
|
||||
ISeqOutStream *outStream,
|
||||
ISeqOutStreamPtr outStream,
|
||||
const UInt64 *outDataSize, // NULL means undefined
|
||||
int finishMode, // 0 - partial unpacking is allowed, 1 - if lzma2 stream must be finished
|
||||
// Byte *outBuf, size_t *outBufSize,
|
||||
ISeqInStream *inStream,
|
||||
ISeqInStreamPtr inStream,
|
||||
// const Byte *inData, size_t inDataSize,
|
||||
|
||||
// out variables:
|
||||
|
|
@ -58,7 +60,7 @@ SRes Lzma2DecMt_Decode(CLzma2DecMtHandle p,
|
|||
int *isMT, /* out: (*isMT == 0), if single thread decoding was used */
|
||||
|
||||
// UInt64 *outProcessed,
|
||||
ICompressProgress *progress);
|
||||
ICompressProgressPtr progress);
|
||||
|
||||
|
||||
/* ---------- Read from CLzma2DecMtHandle Interface ---------- */
|
||||
|
|
@ -67,7 +69,7 @@ SRes Lzma2DecMt_Init(CLzma2DecMtHandle pp,
|
|||
Byte prop,
|
||||
const CLzma2DecMtProps *props,
|
||||
const UInt64 *outDataSize, int finishMode,
|
||||
ISeqInStream *inStream);
|
||||
ISeqInStreamPtr inStream);
|
||||
|
||||
SRes Lzma2DecMt_Read(CLzma2DecMtHandle pp,
|
||||
Byte *data, size_t *outSize,
|
||||
|
|
|
|||
176
C/Lzma2Enc.c
176
C/Lzma2Enc.c
|
|
@ -1,18 +1,18 @@
|
|||
/* Lzma2Enc.c -- LZMA2 Encoder
|
||||
2021-02-09 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/* #define _7ZIP_ST */
|
||||
/* #define Z7_ST */
|
||||
|
||||
#include "Lzma2Enc.h"
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
#include "MtCoder.h"
|
||||
#else
|
||||
#define MTCODER__THREADS_MAX 1
|
||||
#define MTCODER_THREADS_MAX 1
|
||||
#endif
|
||||
|
||||
#define LZMA2_CONTROL_LZMA (1 << 7)
|
||||
|
|
@ -40,7 +40,7 @@
|
|||
typedef struct
|
||||
{
|
||||
ISeqInStream vt;
|
||||
ISeqInStream *realStream;
|
||||
ISeqInStreamPtr realStream;
|
||||
UInt64 limit;
|
||||
UInt64 processed;
|
||||
int finished;
|
||||
|
|
@ -53,15 +53,15 @@ static void LimitedSeqInStream_Init(CLimitedSeqInStream *p)
|
|||
p->finished = 0;
|
||||
}
|
||||
|
||||
static SRes LimitedSeqInStream_Read(const ISeqInStream *pp, void *data, size_t *size)
|
||||
static SRes LimitedSeqInStream_Read(ISeqInStreamPtr pp, void *data, size_t *size)
|
||||
{
|
||||
CLimitedSeqInStream *p = CONTAINER_FROM_VTBL(pp, CLimitedSeqInStream, vt);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CLimitedSeqInStream)
|
||||
size_t size2 = *size;
|
||||
SRes res = SZ_OK;
|
||||
|
||||
if (p->limit != (UInt64)(Int64)-1)
|
||||
{
|
||||
UInt64 rem = p->limit - p->processed;
|
||||
const UInt64 rem = p->limit - p->processed;
|
||||
if (size2 > rem)
|
||||
size2 = (size_t)rem;
|
||||
}
|
||||
|
|
@ -95,8 +95,8 @@ static SRes Lzma2EncInt_InitStream(CLzma2EncInt *p, const CLzma2EncProps *props)
|
|||
{
|
||||
SizeT propsSize = LZMA_PROPS_SIZE;
|
||||
Byte propsEncoded[LZMA_PROPS_SIZE];
|
||||
RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps));
|
||||
RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize));
|
||||
RINOK(LzmaEnc_SetProps(p->enc, &props->lzmaProps))
|
||||
RINOK(LzmaEnc_WriteProperties(p->enc, propsEncoded, &propsSize))
|
||||
p->propsByte = propsEncoded[0];
|
||||
p->propsAreSet = True;
|
||||
}
|
||||
|
|
@ -111,23 +111,23 @@ static void Lzma2EncInt_InitBlock(CLzma2EncInt *p)
|
|||
}
|
||||
|
||||
|
||||
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream *inStream, UInt32 keepWindowSize,
|
||||
SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle p, ISeqInStreamPtr inStream, UInt32 keepWindowSize,
|
||||
ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte *src, SizeT srcLen,
|
||||
SRes LzmaEnc_MemPrepare(CLzmaEncHandle p, const Byte *src, SizeT srcLen,
|
||||
UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, BoolInt reInit,
|
||||
SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle p, BoolInt reInit,
|
||||
Byte *dest, size_t *destLen, UInt32 desiredPackSize, UInt32 *unpackSize);
|
||||
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp);
|
||||
void LzmaEnc_Finish(CLzmaEncHandle pp);
|
||||
void LzmaEnc_SaveState(CLzmaEncHandle pp);
|
||||
void LzmaEnc_RestoreState(CLzmaEncHandle pp);
|
||||
const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle p);
|
||||
void LzmaEnc_Finish(CLzmaEncHandle p);
|
||||
void LzmaEnc_SaveState(CLzmaEncHandle p);
|
||||
void LzmaEnc_RestoreState(CLzmaEncHandle p);
|
||||
|
||||
/*
|
||||
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp);
|
||||
UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle p);
|
||||
*/
|
||||
|
||||
static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
|
||||
size_t *packSizeRes, ISeqOutStream *outStream)
|
||||
size_t *packSizeRes, ISeqOutStreamPtr outStream)
|
||||
{
|
||||
size_t packSizeLimit = *packSizeRes;
|
||||
size_t packSize = packSizeLimit;
|
||||
|
|
@ -167,7 +167,7 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
|
|||
|
||||
while (unpackSize > 0)
|
||||
{
|
||||
UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE;
|
||||
const UInt32 u = (unpackSize < LZMA2_COPY_CHUNK_SIZE) ? unpackSize : LZMA2_COPY_CHUNK_SIZE;
|
||||
if (packSizeLimit - destPos < u + 3)
|
||||
return SZ_ERROR_OUTPUT_EOF;
|
||||
outBuf[destPos++] = (Byte)(p->srcPos == 0 ? LZMA2_CONTROL_COPY_RESET_DIC : LZMA2_CONTROL_COPY_NO_RESET);
|
||||
|
|
@ -196,9 +196,9 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
|
|||
|
||||
{
|
||||
size_t destPos = 0;
|
||||
UInt32 u = unpackSize - 1;
|
||||
UInt32 pm = (UInt32)(packSize - 1);
|
||||
unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0);
|
||||
const UInt32 u = unpackSize - 1;
|
||||
const UInt32 pm = (UInt32)(packSize - 1);
|
||||
const unsigned mode = (p->srcPos == 0) ? 3 : (p->needInitState ? (p->needInitProp ? 2 : 1) : 0);
|
||||
|
||||
PRF(printf(" "));
|
||||
|
||||
|
|
@ -231,10 +231,11 @@ static SRes Lzma2EncInt_EncodeSubblock(CLzma2EncInt *p, Byte *outBuf,
|
|||
void Lzma2EncProps_Init(CLzma2EncProps *p)
|
||||
{
|
||||
LzmaEncProps_Init(&p->lzmaProps);
|
||||
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO;
|
||||
p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO;
|
||||
p->numBlockThreads_Reduced = -1;
|
||||
p->numBlockThreads_Max = -1;
|
||||
p->numTotalThreads = -1;
|
||||
p->numThreadGroups = 0;
|
||||
}
|
||||
|
||||
void Lzma2EncProps_Normalize(CLzma2EncProps *p)
|
||||
|
|
@ -251,8 +252,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
|
|||
t2 = p->numBlockThreads_Max;
|
||||
t3 = p->numTotalThreads;
|
||||
|
||||
if (t2 > MTCODER__THREADS_MAX)
|
||||
t2 = MTCODER__THREADS_MAX;
|
||||
if (t2 > MTCODER_THREADS_MAX)
|
||||
t2 = MTCODER_THREADS_MAX;
|
||||
|
||||
if (t3 <= 0)
|
||||
{
|
||||
|
|
@ -268,8 +269,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
|
|||
t1 = 1;
|
||||
t2 = t3;
|
||||
}
|
||||
if (t2 > MTCODER__THREADS_MAX)
|
||||
t2 = MTCODER__THREADS_MAX;
|
||||
if (t2 > MTCODER_THREADS_MAX)
|
||||
t2 = MTCODER_THREADS_MAX;
|
||||
}
|
||||
else if (t1 <= 0)
|
||||
{
|
||||
|
|
@ -286,8 +287,8 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
|
|||
|
||||
fileSize = p->lzmaProps.reduceSize;
|
||||
|
||||
if ( p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
|
||||
&& p->blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO
|
||||
if ( p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
|
||||
&& p->blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO
|
||||
&& (p->blockSize < fileSize || fileSize == (UInt64)(Int64)-1))
|
||||
p->lzmaProps.reduceSize = p->blockSize;
|
||||
|
||||
|
|
@ -297,19 +298,19 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
|
|||
|
||||
t1 = p->lzmaProps.numThreads;
|
||||
|
||||
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID)
|
||||
if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID)
|
||||
{
|
||||
t2r = t2 = 1;
|
||||
t3 = t1;
|
||||
}
|
||||
else if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO && t2 <= 1)
|
||||
else if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO && t2 <= 1)
|
||||
{
|
||||
/* if there is no block multi-threading, we use SOLID block */
|
||||
p->blockSize = LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID;
|
||||
p->blockSize = LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (p->blockSize == LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO)
|
||||
if (p->blockSize == LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO)
|
||||
{
|
||||
const UInt32 kMinSize = (UInt32)1 << 20;
|
||||
const UInt32 kMaxSize = (UInt32)1 << 28;
|
||||
|
|
@ -344,7 +345,7 @@ void Lzma2EncProps_Normalize(CLzma2EncProps *p)
|
|||
}
|
||||
|
||||
|
||||
static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize)
|
||||
static SRes Progress(ICompressProgressPtr p, UInt64 inSize, UInt64 outSize)
|
||||
{
|
||||
return (p && ICompressProgress_Progress(p, inSize, outSize) != SZ_OK) ? SZ_ERROR_PROGRESS : SZ_OK;
|
||||
}
|
||||
|
|
@ -352,7 +353,7 @@ static SRes Progress(ICompressProgress *p, UInt64 inSize, UInt64 outSize)
|
|||
|
||||
/* ---------- Lzma2 ---------- */
|
||||
|
||||
typedef struct
|
||||
struct CLzma2Enc
|
||||
{
|
||||
Byte propEncoded;
|
||||
CLzma2EncProps props;
|
||||
|
|
@ -363,23 +364,22 @@ typedef struct
|
|||
ISzAllocPtr alloc;
|
||||
ISzAllocPtr allocBig;
|
||||
|
||||
CLzma2EncInt coders[MTCODER__THREADS_MAX];
|
||||
CLzma2EncInt coders[MTCODER_THREADS_MAX];
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
ISeqOutStream *outStream;
|
||||
ISeqOutStreamPtr outStream;
|
||||
Byte *outBuf;
|
||||
size_t outBuf_Rem; /* remainder in outBuf */
|
||||
|
||||
size_t outBufSize; /* size of allocated outBufs[i] */
|
||||
size_t outBufsDataSizes[MTCODER__BLOCKS_MAX];
|
||||
size_t outBufsDataSizes[MTCODER_BLOCKS_MAX];
|
||||
BoolInt mtCoder_WasConstructed;
|
||||
CMtCoder mtCoder;
|
||||
Byte *outBufs[MTCODER__BLOCKS_MAX];
|
||||
Byte *outBufs[MTCODER_BLOCKS_MAX];
|
||||
|
||||
#endif
|
||||
|
||||
} CLzma2Enc;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
|
@ -396,30 +396,30 @@ CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig)
|
|||
p->allocBig = allocBig;
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTCODER__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_THREADS_MAX; i++)
|
||||
p->coders[i].enc = NULL;
|
||||
}
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
p->mtCoder_WasConstructed = False;
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
|
||||
p->outBufs[i] = NULL;
|
||||
p->outBufSize = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return p;
|
||||
return (CLzma2EncHandle)p;
|
||||
}
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
|
||||
if (p->outBufs[i])
|
||||
{
|
||||
ISzAlloc_Free(p->alloc, p->outBufs[i]);
|
||||
|
|
@ -430,12 +430,13 @@ static void Lzma2Enc_FreeOutBufs(CLzma2Enc *p)
|
|||
|
||||
#endif
|
||||
|
||||
// #define GET_CLzma2Enc_p CLzma2Enc *p = (CLzma2Enc *)(void *)p;
|
||||
|
||||
void Lzma2Enc_Destroy(CLzma2EncHandle pp)
|
||||
void Lzma2Enc_Destroy(CLzma2EncHandle p)
|
||||
{
|
||||
CLzma2Enc *p = (CLzma2Enc *)pp;
|
||||
// GET_CLzma2Enc_p
|
||||
unsigned i;
|
||||
for (i = 0; i < MTCODER__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_THREADS_MAX; i++)
|
||||
{
|
||||
CLzma2EncInt *t = &p->coders[i];
|
||||
if (t->enc)
|
||||
|
|
@ -446,7 +447,7 @@ void Lzma2Enc_Destroy(CLzma2EncHandle pp)
|
|||
}
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
if (p->mtCoder_WasConstructed)
|
||||
{
|
||||
MtCoder_Destruct(&p->mtCoder);
|
||||
|
|
@ -458,13 +459,13 @@ void Lzma2Enc_Destroy(CLzma2EncHandle pp)
|
|||
ISzAlloc_Free(p->alloc, p->tempBufLzma);
|
||||
p->tempBufLzma = NULL;
|
||||
|
||||
ISzAlloc_Free(p->alloc, pp);
|
||||
ISzAlloc_Free(p->alloc, p);
|
||||
}
|
||||
|
||||
|
||||
SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props)
|
||||
SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props)
|
||||
{
|
||||
CLzma2Enc *p = (CLzma2Enc *)pp;
|
||||
// GET_CLzma2Enc_p
|
||||
CLzmaEncProps lzmaProps = props->lzmaProps;
|
||||
LzmaEncProps_Normalize(&lzmaProps);
|
||||
if (lzmaProps.lc + lzmaProps.lp > LZMA2_LCLP_MAX)
|
||||
|
|
@ -475,16 +476,16 @@ SRes Lzma2Enc_SetProps(CLzma2EncHandle pp, const CLzma2EncProps *props)
|
|||
}
|
||||
|
||||
|
||||
void Lzma2Enc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
|
||||
void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize)
|
||||
{
|
||||
CLzma2Enc *p = (CLzma2Enc *)pp;
|
||||
// GET_CLzma2Enc_p
|
||||
p->expectedDataSize = expectedDataSiize;
|
||||
}
|
||||
|
||||
|
||||
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp)
|
||||
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p)
|
||||
{
|
||||
CLzma2Enc *p = (CLzma2Enc *)pp;
|
||||
// GET_CLzma2Enc_p
|
||||
unsigned i;
|
||||
UInt32 dicSize = LzmaEncProps_GetDictSize(&p->props.lzmaProps);
|
||||
for (i = 0; i < 40; i++)
|
||||
|
|
@ -497,12 +498,12 @@ Byte Lzma2Enc_WriteProperties(CLzma2EncHandle pp)
|
|||
static SRes Lzma2Enc_EncodeMt1(
|
||||
CLzma2Enc *me,
|
||||
CLzma2EncInt *p,
|
||||
ISeqOutStream *outStream,
|
||||
ISeqOutStreamPtr outStream,
|
||||
Byte *outBuf, size_t *outBufSize,
|
||||
ISeqInStream *inStream,
|
||||
ISeqInStreamPtr inStream,
|
||||
const Byte *inData, size_t inDataSize,
|
||||
int finished,
|
||||
ICompressProgress *progress)
|
||||
ICompressProgressPtr progress)
|
||||
{
|
||||
UInt64 unpackTotal = 0;
|
||||
UInt64 packTotal = 0;
|
||||
|
|
@ -540,12 +541,12 @@ static SRes Lzma2Enc_EncodeMt1(
|
|||
}
|
||||
}
|
||||
|
||||
RINOK(Lzma2EncInt_InitStream(p, &me->props));
|
||||
RINOK(Lzma2EncInt_InitStream(p, &me->props))
|
||||
|
||||
for (;;)
|
||||
{
|
||||
SRes res = SZ_OK;
|
||||
size_t inSizeCur = 0;
|
||||
SizeT inSizeCur = 0;
|
||||
|
||||
Lzma2EncInt_InitBlock(p);
|
||||
|
||||
|
|
@ -559,7 +560,7 @@ static SRes Lzma2Enc_EncodeMt1(
|
|||
if (me->expectedDataSize != (UInt64)(Int64)-1
|
||||
&& me->expectedDataSize >= unpackTotal)
|
||||
expected = me->expectedDataSize - unpackTotal;
|
||||
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
|
||||
if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
|
||||
&& expected > me->props.blockSize)
|
||||
expected = (size_t)me->props.blockSize;
|
||||
|
||||
|
|
@ -569,14 +570,14 @@ static SRes Lzma2Enc_EncodeMt1(
|
|||
&limitedInStream.vt,
|
||||
LZMA2_KEEP_WINDOW_SIZE,
|
||||
me->alloc,
|
||||
me->allocBig));
|
||||
me->allocBig))
|
||||
}
|
||||
else
|
||||
{
|
||||
inSizeCur = inDataSize - (size_t)unpackTotal;
|
||||
if (me->props.blockSize != LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID
|
||||
inSizeCur = (SizeT)(inDataSize - (size_t)unpackTotal);
|
||||
if (me->props.blockSize != LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID
|
||||
&& inSizeCur > me->props.blockSize)
|
||||
inSizeCur = (size_t)me->props.blockSize;
|
||||
inSizeCur = (SizeT)(size_t)me->props.blockSize;
|
||||
|
||||
// LzmaEnc_SetDataSize(p->enc, inSizeCur);
|
||||
|
||||
|
|
@ -584,7 +585,7 @@ static SRes Lzma2Enc_EncodeMt1(
|
|||
inData + (size_t)unpackTotal, inSizeCur,
|
||||
LZMA2_KEEP_WINDOW_SIZE,
|
||||
me->alloc,
|
||||
me->allocBig));
|
||||
me->allocBig))
|
||||
}
|
||||
|
||||
for (;;)
|
||||
|
|
@ -621,7 +622,7 @@ static SRes Lzma2Enc_EncodeMt1(
|
|||
|
||||
unpackTotal += p->srcPos;
|
||||
|
||||
RINOK(res);
|
||||
RINOK(res)
|
||||
|
||||
if (p->srcPos != (inStream ? limitedInStream.processed : inSizeCur))
|
||||
return SZ_ERROR_FAIL;
|
||||
|
|
@ -652,12 +653,12 @@ static SRes Lzma2Enc_EncodeMt1(
|
|||
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned outBufIndex,
|
||||
static SRes Lzma2Enc_MtCallback_Code(void *p, unsigned coderIndex, unsigned outBufIndex,
|
||||
const Byte *src, size_t srcSize, int finished)
|
||||
{
|
||||
CLzma2Enc *me = (CLzma2Enc *)pp;
|
||||
CLzma2Enc *me = (CLzma2Enc *)p;
|
||||
size_t destSize = me->outBufSize;
|
||||
SRes res;
|
||||
CMtProgressThunk progressThunk;
|
||||
|
|
@ -692,9 +693,9 @@ static SRes Lzma2Enc_MtCallback_Code(void *pp, unsigned coderIndex, unsigned out
|
|||
}
|
||||
|
||||
|
||||
static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex)
|
||||
static SRes Lzma2Enc_MtCallback_Write(void *p, unsigned outBufIndex)
|
||||
{
|
||||
CLzma2Enc *me = (CLzma2Enc *)pp;
|
||||
CLzma2Enc *me = (CLzma2Enc *)p;
|
||||
size_t size = me->outBufsDataSizes[outBufIndex];
|
||||
const Byte *data = me->outBufs[outBufIndex];
|
||||
|
||||
|
|
@ -713,14 +714,14 @@ static SRes Lzma2Enc_MtCallback_Write(void *pp, unsigned outBufIndex)
|
|||
|
||||
|
||||
|
||||
SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
|
||||
ISeqOutStream *outStream,
|
||||
SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
|
||||
ISeqOutStreamPtr outStream,
|
||||
Byte *outBuf, size_t *outBufSize,
|
||||
ISeqInStream *inStream,
|
||||
ISeqInStreamPtr inStream,
|
||||
const Byte *inData, size_t inDataSize,
|
||||
ICompressProgress *progress)
|
||||
ICompressProgressPtr progress)
|
||||
{
|
||||
CLzma2Enc *p = (CLzma2Enc *)pp;
|
||||
// GET_CLzma2Enc_p
|
||||
|
||||
if (inStream && inData)
|
||||
return SZ_ERROR_PARAM;
|
||||
|
|
@ -730,11 +731,11 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
|
|||
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTCODER__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_THREADS_MAX; i++)
|
||||
p->coders[i].propsAreSet = False;
|
||||
}
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
if (p->props.numBlockThreads_Reduced > 1)
|
||||
{
|
||||
|
|
@ -772,7 +773,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
|
|||
return SZ_ERROR_PARAM; /* SZ_ERROR_MEM */
|
||||
|
||||
{
|
||||
size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16;
|
||||
const size_t destBlockSize = p->mtCoder.blockSize + (p->mtCoder.blockSize >> 10) + 16;
|
||||
if (destBlockSize < p->mtCoder.blockSize)
|
||||
return SZ_ERROR_PARAM;
|
||||
if (p->outBufSize != destBlockSize)
|
||||
|
|
@ -781,10 +782,11 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
|
|||
}
|
||||
|
||||
p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
|
||||
p->mtCoder.numThreadGroups = p->props.numThreadGroups;
|
||||
p->mtCoder.expectedDataSize = p->expectedDataSize;
|
||||
|
||||
{
|
||||
SRes res = MtCoder_Code(&p->mtCoder);
|
||||
const SRes res = MtCoder_Code(&p->mtCoder);
|
||||
if (!outStream)
|
||||
*outBufSize = (size_t)(p->outBuf - outBuf);
|
||||
return res;
|
||||
|
|
@ -801,3 +803,5 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle pp,
|
|||
True, /* finished */
|
||||
progress);
|
||||
}
|
||||
|
||||
#undef PRF
|
||||
|
|
|
|||
21
C/Lzma2Enc.h
21
C/Lzma2Enc.h
|
|
@ -1,15 +1,15 @@
|
|||
/* Lzma2Enc.h -- LZMA2 Encoder
|
||||
2017-07-27 : Igor Pavlov : Public domain */
|
||||
2023-04-13 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA2_ENC_H
|
||||
#define __LZMA2_ENC_H
|
||||
#ifndef ZIP7_INC_LZMA2_ENC_H
|
||||
#define ZIP7_INC_LZMA2_ENC_H
|
||||
|
||||
#include "LzmaEnc.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define LZMA2_ENC_PROPS__BLOCK_SIZE__AUTO 0
|
||||
#define LZMA2_ENC_PROPS__BLOCK_SIZE__SOLID ((UInt64)(Int64)-1)
|
||||
#define LZMA2_ENC_PROPS_BLOCK_SIZE_AUTO 0
|
||||
#define LZMA2_ENC_PROPS_BLOCK_SIZE_SOLID ((UInt64)(Int64)-1)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
|
@ -18,6 +18,7 @@ typedef struct
|
|||
int numBlockThreads_Reduced;
|
||||
int numBlockThreads_Max;
|
||||
int numTotalThreads;
|
||||
unsigned numThreadGroups; // 0 : no groups
|
||||
} CLzma2EncProps;
|
||||
|
||||
void Lzma2EncProps_Init(CLzma2EncProps *p);
|
||||
|
|
@ -36,7 +37,9 @@ SRes:
|
|||
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
typedef void * CLzma2EncHandle;
|
||||
typedef struct CLzma2Enc CLzma2Enc;
|
||||
typedef CLzma2Enc * CLzma2EncHandle;
|
||||
// Z7_DECLARE_HANDLE(CLzma2EncHandle)
|
||||
|
||||
CLzma2EncHandle Lzma2Enc_Create(ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
void Lzma2Enc_Destroy(CLzma2EncHandle p);
|
||||
|
|
@ -44,11 +47,11 @@ SRes Lzma2Enc_SetProps(CLzma2EncHandle p, const CLzma2EncProps *props);
|
|||
void Lzma2Enc_SetDataSize(CLzma2EncHandle p, UInt64 expectedDataSiize);
|
||||
Byte Lzma2Enc_WriteProperties(CLzma2EncHandle p);
|
||||
SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
|
||||
ISeqOutStream *outStream,
|
||||
ISeqOutStreamPtr outStream,
|
||||
Byte *outBuf, size_t *outBufSize,
|
||||
ISeqInStream *inStream,
|
||||
ISeqInStreamPtr inStream,
|
||||
const Byte *inData, size_t inDataSize,
|
||||
ICompressProgress *progress);
|
||||
ICompressProgressPtr progress);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
/* Lzma86.h -- LZMA + x86 (BCJ) Filter
|
||||
2013-01-18 : Igor Pavlov : Public domain */
|
||||
2023-03-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA86_H
|
||||
#define __LZMA86_H
|
||||
#ifndef ZIP7_INC_LZMA86_H
|
||||
#define ZIP7_INC_LZMA86_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder
|
||||
2016-05-16 : Igor Pavlov : Public domain */
|
||||
2023-03-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -46,9 +46,8 @@ SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen)
|
|||
return res;
|
||||
if (useFilter == 1)
|
||||
{
|
||||
UInt32 x86State;
|
||||
x86_Convert_Init(x86State);
|
||||
x86_Convert(dest, *destLen, 0, &x86State, 0);
|
||||
UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
|
||||
z7_BranchConvSt_X86_Dec(dest, *destLen, 0, &x86State);
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* Lzma86Enc.c -- LZMA + x86 (BCJ) Filter Encoder
|
||||
2018-07-04 : Igor Pavlov : Public domain */
|
||||
2023-03-03 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -46,9 +46,8 @@ int Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
|
|||
memcpy(filteredStream, src, srcLen);
|
||||
}
|
||||
{
|
||||
UInt32 x86State;
|
||||
x86_Convert_Init(x86State);
|
||||
x86_Convert(filteredStream, srcLen, 0, &x86State, 1);
|
||||
UInt32 x86State = Z7_BRANCH_CONV_ST_X86_STATE_INIT_VAL;
|
||||
z7_BranchConvSt_X86_Enc(filteredStream, srcLen, 0, &x86State);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
190
C/LzmaDec.c
190
C/LzmaDec.c
|
|
@ -1,5 +1,5 @@
|
|||
/* LzmaDec.c -- LZMA Decoder
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
2023-04-07 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -8,15 +8,15 @@
|
|||
/* #include "CpuArch.h" */
|
||||
#include "LzmaDec.h"
|
||||
|
||||
#define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << kNumTopBits)
|
||||
// #define kNumTopBits 24
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
|
||||
#define kNumBitModelTotalBits 11
|
||||
#define kBitModelTotal (1 << kNumBitModelTotalBits)
|
||||
|
||||
#define RC_INIT_SIZE 5
|
||||
|
||||
#ifndef _LZMA_DEC_OPT
|
||||
#ifndef Z7_LZMA_DEC_OPT
|
||||
|
||||
#define kNumMoveBits 5
|
||||
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
|
||||
|
|
@ -25,14 +25,14 @@
|
|||
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
|
||||
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
|
||||
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
|
||||
{ UPDATE_0(p); i = (i + i); A0; } else \
|
||||
{ UPDATE_1(p); i = (i + i) + 1; A1; }
|
||||
{ UPDATE_0(p) i = (i + i); A0; } else \
|
||||
{ UPDATE_1(p) i = (i + i) + 1; A1; }
|
||||
|
||||
#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
|
||||
|
||||
#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
|
||||
{ UPDATE_0(p + i); A0; } else \
|
||||
{ UPDATE_1(p + i); A1; }
|
||||
{ UPDATE_0(p + i) A0; } else \
|
||||
{ UPDATE_1(p + i) A1; }
|
||||
#define REV_BIT_VAR( p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
|
||||
#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m; , i += m * 2; )
|
||||
#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m , ; )
|
||||
|
|
@ -40,19 +40,19 @@
|
|||
#define TREE_DECODE(probs, limit, i) \
|
||||
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
|
||||
|
||||
/* #define _LZMA_SIZE_OPT */
|
||||
/* #define Z7_LZMA_SIZE_OPT */
|
||||
|
||||
#ifdef _LZMA_SIZE_OPT
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
|
||||
#else
|
||||
#define TREE_6_DECODE(probs, i) \
|
||||
{ i = 1; \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i); \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
TREE_GET_BIT(probs, i) \
|
||||
i -= 0x40; }
|
||||
#endif
|
||||
|
||||
|
|
@ -64,25 +64,25 @@
|
|||
probLit = prob + (offs + bit + symbol); \
|
||||
GET_BIT2(probLit, symbol, offs ^= bit; , ;)
|
||||
|
||||
#endif // _LZMA_DEC_OPT
|
||||
#endif // Z7_LZMA_DEC_OPT
|
||||
|
||||
|
||||
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_INPUT_EOF; range <<= 8; code = (code << 8) | (*buf++); }
|
||||
|
||||
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
|
||||
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
|
||||
#define UPDATE_0_CHECK range = bound;
|
||||
#define UPDATE_1_CHECK range -= bound; code -= bound;
|
||||
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
|
||||
{ UPDATE_0_CHECK; i = (i + i); A0; } else \
|
||||
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; }
|
||||
{ UPDATE_0_CHECK i = (i + i); A0; } else \
|
||||
{ UPDATE_1_CHECK i = (i + i) + 1; A1; }
|
||||
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
|
||||
#define TREE_DECODE_CHECK(probs, limit, i) \
|
||||
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
|
||||
|
||||
|
||||
#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
|
||||
{ UPDATE_0_CHECK; i += m; m += m; } else \
|
||||
{ UPDATE_1_CHECK; m += m; i += m; }
|
||||
{ UPDATE_0_CHECK i += m; m += m; } else \
|
||||
{ UPDATE_1_CHECK m += m; i += m; }
|
||||
|
||||
|
||||
#define kNumPosBitsMax 4
|
||||
|
|
@ -224,14 +224,14 @@ Out:
|
|||
*/
|
||||
|
||||
|
||||
#ifdef _LZMA_DEC_OPT
|
||||
#ifdef Z7_LZMA_DEC_OPT
|
||||
|
||||
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
|
||||
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
|
||||
|
||||
#else
|
||||
|
||||
static
|
||||
int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
int Z7_FASTCALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
{
|
||||
CLzmaProb *probs = GET_PROBS;
|
||||
unsigned state = (unsigned)p->state;
|
||||
|
|
@ -263,7 +263,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
IF_BIT_0(prob)
|
||||
{
|
||||
unsigned symbol;
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
prob = probs + Literal;
|
||||
if (processedPos != 0 || checkDicSize != 0)
|
||||
prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
|
||||
|
|
@ -273,7 +273,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
{
|
||||
state -= (state < 4) ? state : 3;
|
||||
symbol = 1;
|
||||
#ifdef _LZMA_SIZE_OPT
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
do { NORMAL_LITER_DEC } while (symbol < 0x100);
|
||||
#else
|
||||
NORMAL_LITER_DEC
|
||||
|
|
@ -292,7 +292,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
unsigned offs = 0x100;
|
||||
state -= (state < 10) ? 3 : 6;
|
||||
symbol = 1;
|
||||
#ifdef _LZMA_SIZE_OPT
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
do
|
||||
{
|
||||
unsigned bit;
|
||||
|
|
@ -321,25 +321,25 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
}
|
||||
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRep + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
state += kNumStates;
|
||||
prob = probs + LenCoder;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRepG0 + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
prob = probs + IsRep0Long + COMBINED_PS_STATE;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
|
||||
// that case was checked before with kBadRepCode
|
||||
// if (checkDicSize == 0 && processedPos == 0) { len = kMatchSpecLen_Error_Data + 1; break; }
|
||||
|
|
@ -353,30 +353,30 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
state = state < kNumLitStates ? 9 : 11;
|
||||
continue;
|
||||
}
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 distance;
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRepG1 + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
distance = rep1;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
prob = probs + IsRepG2 + state;
|
||||
IF_BIT_0(prob)
|
||||
{
|
||||
UPDATE_0(prob);
|
||||
UPDATE_0(prob)
|
||||
distance = rep2;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(prob);
|
||||
UPDATE_1(prob)
|
||||
distance = rep3;
|
||||
rep3 = rep2;
|
||||
}
|
||||
|
|
@ -389,37 +389,37 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
prob = probs + RepLenCoder;
|
||||
}
|
||||
|
||||
#ifdef _LZMA_SIZE_OPT
|
||||
#ifdef Z7_LZMA_SIZE_OPT
|
||||
{
|
||||
unsigned lim, offset;
|
||||
CLzmaProb *probLen = prob + LenChoice;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen);
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE;
|
||||
offset = 0;
|
||||
lim = (1 << kLenNumLowBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen);
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenChoice2;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen);
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
|
||||
offset = kLenNumLowSymbols;
|
||||
lim = (1 << kLenNumLowBits);
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen);
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenHigh;
|
||||
offset = kLenNumLowSymbols * 2;
|
||||
lim = (1 << kLenNumHighBits);
|
||||
}
|
||||
}
|
||||
TREE_DECODE(probLen, lim, len);
|
||||
TREE_DECODE(probLen, lim, len)
|
||||
len += offset;
|
||||
}
|
||||
#else
|
||||
|
|
@ -427,32 +427,32 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
CLzmaProb *probLen = prob + LenChoice;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen);
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE;
|
||||
len = 1;
|
||||
TREE_GET_BIT(probLen, len);
|
||||
TREE_GET_BIT(probLen, len);
|
||||
TREE_GET_BIT(probLen, len);
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
len -= 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen);
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenChoice2;
|
||||
IF_BIT_0(probLen)
|
||||
{
|
||||
UPDATE_0(probLen);
|
||||
UPDATE_0(probLen)
|
||||
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
|
||||
len = 1;
|
||||
TREE_GET_BIT(probLen, len);
|
||||
TREE_GET_BIT(probLen, len);
|
||||
TREE_GET_BIT(probLen, len);
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
TREE_GET_BIT(probLen, len)
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1(probLen);
|
||||
UPDATE_1(probLen)
|
||||
probLen = prob + LenHigh;
|
||||
TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
|
||||
TREE_DECODE(probLen, (1 << kLenNumHighBits), len)
|
||||
len += kLenNumLowSymbols * 2;
|
||||
}
|
||||
}
|
||||
|
|
@ -464,7 +464,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
UInt32 distance;
|
||||
prob = probs + PosSlot +
|
||||
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
|
||||
TREE_6_DECODE(prob, distance);
|
||||
TREE_6_DECODE(prob, distance)
|
||||
if (distance >= kStartPosModelIndex)
|
||||
{
|
||||
unsigned posSlot = (unsigned)distance;
|
||||
|
|
@ -479,7 +479,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
distance++;
|
||||
do
|
||||
{
|
||||
REV_BIT_VAR(prob, distance, m);
|
||||
REV_BIT_VAR(prob, distance, m)
|
||||
}
|
||||
while (--numDirectBits);
|
||||
distance -= m;
|
||||
|
|
@ -514,10 +514,10 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
distance <<= kNumAlignBits;
|
||||
{
|
||||
unsigned i = 1;
|
||||
REV_BIT_CONST(prob, i, 1);
|
||||
REV_BIT_CONST(prob, i, 2);
|
||||
REV_BIT_CONST(prob, i, 4);
|
||||
REV_BIT_LAST (prob, i, 8);
|
||||
REV_BIT_CONST(prob, i, 1)
|
||||
REV_BIT_CONST(prob, i, 2)
|
||||
REV_BIT_CONST(prob, i, 4)
|
||||
REV_BIT_LAST (prob, i, 8)
|
||||
distance |= i;
|
||||
}
|
||||
if (distance == (UInt32)0xFFFFFFFF)
|
||||
|
|
@ -592,7 +592,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
}
|
||||
while (dicPos < limit && buf < bufLimit);
|
||||
|
||||
NORMALIZE;
|
||||
NORMALIZE
|
||||
|
||||
p->buf = buf;
|
||||
p->range = range;
|
||||
|
|
@ -613,7 +613,7 @@ int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit
|
|||
|
||||
|
||||
|
||||
static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
|
||||
static void Z7_FASTCALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
|
||||
{
|
||||
unsigned len = (unsigned)p->remainLen;
|
||||
if (len == 0 /* || len >= kMatchSpecLenStart */)
|
||||
|
|
@ -683,7 +683,7 @@ and we support the following state of (p->checkDicSize):
|
|||
(p->checkDicSize == p->prop.dicSize)
|
||||
*/
|
||||
|
||||
static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
static int Z7_FASTCALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
|
||||
{
|
||||
if (p->checkDicSize == 0)
|
||||
{
|
||||
|
|
@ -767,54 +767,54 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
|
|||
else
|
||||
{
|
||||
unsigned len;
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
|
||||
prob = probs + IsRep + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
state = 0;
|
||||
prob = probs + LenCoder;
|
||||
res = DUMMY_MATCH;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
res = DUMMY_REP;
|
||||
prob = probs + IsRepG0 + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
prob = probs + IsRep0Long + COMBINED_PS_STATE;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
prob = probs + IsRepG1 + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
prob = probs + IsRepG2 + state;
|
||||
IF_BIT_0_CHECK(prob)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -826,31 +826,31 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
|
|||
const CLzmaProb *probLen = prob + LenChoice;
|
||||
IF_BIT_0_CHECK(probLen)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
probLen = prob + LenLow + GET_LEN_STATE;
|
||||
offset = 0;
|
||||
limit = 1 << kLenNumLowBits;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
probLen = prob + LenChoice2;
|
||||
IF_BIT_0_CHECK(probLen)
|
||||
{
|
||||
UPDATE_0_CHECK;
|
||||
UPDATE_0_CHECK
|
||||
probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
|
||||
offset = kLenNumLowSymbols;
|
||||
limit = 1 << kLenNumLowBits;
|
||||
}
|
||||
else
|
||||
{
|
||||
UPDATE_1_CHECK;
|
||||
UPDATE_1_CHECK
|
||||
probLen = prob + LenHigh;
|
||||
offset = kLenNumLowSymbols * 2;
|
||||
limit = 1 << kLenNumHighBits;
|
||||
}
|
||||
}
|
||||
TREE_DECODE_CHECK(probLen, limit, len);
|
||||
TREE_DECODE_CHECK(probLen, limit, len)
|
||||
len += offset;
|
||||
}
|
||||
|
||||
|
|
@ -860,7 +860,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
|
|||
prob = probs + PosSlot +
|
||||
((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
|
||||
kNumPosSlotBits);
|
||||
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
|
||||
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot)
|
||||
if (posSlot >= kStartPosModelIndex)
|
||||
{
|
||||
unsigned numDirectBits = ((posSlot >> 1) - 1);
|
||||
|
|
@ -888,7 +888,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
|
|||
unsigned m = 1;
|
||||
do
|
||||
{
|
||||
REV_BIT_CHECK(prob, i, m);
|
||||
REV_BIT_CHECK(prob, i, m)
|
||||
}
|
||||
while (--numDirectBits);
|
||||
}
|
||||
|
|
@ -897,7 +897,7 @@ static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, const Byt
|
|||
}
|
||||
break;
|
||||
}
|
||||
NORMALIZE_CHECK;
|
||||
NORMALIZE_CHECK
|
||||
|
||||
*bufOut = buf;
|
||||
return res;
|
||||
|
|
@ -943,7 +943,7 @@ When the decoder lookahead, and the lookahead symbol is not end_marker, we have
|
|||
*/
|
||||
|
||||
|
||||
#define RETURN__NOT_FINISHED__FOR_FINISH \
|
||||
#define RETURN_NOT_FINISHED_FOR_FINISH \
|
||||
*status = LZMA_STATUS_NOT_FINISHED; \
|
||||
return SZ_ERROR_DATA; // for strict mode
|
||||
// return SZ_OK; // for relaxed mode
|
||||
|
|
@ -1029,7 +1029,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
|
|||
}
|
||||
if (p->remainLen != 0)
|
||||
{
|
||||
RETURN__NOT_FINISHED__FOR_FINISH;
|
||||
RETURN_NOT_FINISHED_FOR_FINISH
|
||||
}
|
||||
checkEndMarkNow = 1;
|
||||
}
|
||||
|
|
@ -1072,7 +1072,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
|
|||
for (i = 0; i < (unsigned)dummyProcessed; i++)
|
||||
p->tempBuf[i] = src[i];
|
||||
// p->remainLen = kMatchSpecLen_Error_Data;
|
||||
RETURN__NOT_FINISHED__FOR_FINISH;
|
||||
RETURN_NOT_FINISHED_FOR_FINISH
|
||||
}
|
||||
|
||||
bufLimit = src;
|
||||
|
|
@ -1150,7 +1150,7 @@ SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *sr
|
|||
(*srcLen) += (unsigned)dummyProcessed - p->tempBufSize;
|
||||
p->tempBufSize = (unsigned)dummyProcessed;
|
||||
// p->remainLen = kMatchSpecLen_Error_Data;
|
||||
RETURN__NOT_FINISHED__FOR_FINISH;
|
||||
RETURN_NOT_FINISHED_FOR_FINISH
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1299,8 +1299,8 @@ static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAl
|
|||
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
|
||||
{
|
||||
CLzmaProps propNew;
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
|
||||
p->prop = propNew;
|
||||
return SZ_OK;
|
||||
}
|
||||
|
|
@ -1309,14 +1309,14 @@ SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAll
|
|||
{
|
||||
CLzmaProps propNew;
|
||||
SizeT dicBufSize;
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize));
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
|
||||
RINOK(LzmaProps_Decode(&propNew, props, propsSize))
|
||||
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc))
|
||||
|
||||
{
|
||||
UInt32 dictSize = propNew.dicSize;
|
||||
SizeT mask = ((UInt32)1 << 12) - 1;
|
||||
if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
|
||||
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
|
||||
else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;
|
||||
dicBufSize = ((SizeT)dictSize + mask) & ~mask;
|
||||
if (dicBufSize < dictSize)
|
||||
dicBufSize = dictSize;
|
||||
|
|
@ -1348,8 +1348,8 @@ SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
|
|||
*status = LZMA_STATUS_NOT_SPECIFIED;
|
||||
if (inSize < RC_INIT_SIZE)
|
||||
return SZ_ERROR_INPUT_EOF;
|
||||
LzmaDec_Construct(&p);
|
||||
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
|
||||
LzmaDec_CONSTRUCT(&p)
|
||||
RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc))
|
||||
p.dic = dest;
|
||||
p.dicBufSize = outSize;
|
||||
LzmaDec_Init(&p);
|
||||
|
|
|
|||
17
C/LzmaDec.h
17
C/LzmaDec.h
|
|
@ -1,19 +1,19 @@
|
|||
/* LzmaDec.h -- LZMA Decoder
|
||||
2020-03-19 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA_DEC_H
|
||||
#define __LZMA_DEC_H
|
||||
#ifndef ZIP7_INC_LZMA_DEC_H
|
||||
#define ZIP7_INC_LZMA_DEC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/* #define _LZMA_PROB32 */
|
||||
/* _LZMA_PROB32 can increase the speed on some CPUs,
|
||||
/* #define Z7_LZMA_PROB32 */
|
||||
/* Z7_LZMA_PROB32 can increase the speed on some CPUs,
|
||||
but memory usage for CLzmaDec::probs will be doubled in that case */
|
||||
|
||||
typedef
|
||||
#ifdef _LZMA_PROB32
|
||||
#ifdef Z7_LZMA_PROB32
|
||||
UInt32
|
||||
#else
|
||||
UInt16
|
||||
|
|
@ -25,7 +25,7 @@ typedef
|
|||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
typedef struct _CLzmaProps
|
||||
typedef struct
|
||||
{
|
||||
Byte lc;
|
||||
Byte lp;
|
||||
|
|
@ -73,7 +73,8 @@ typedef struct
|
|||
Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
|
||||
} CLzmaDec;
|
||||
|
||||
#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
|
||||
#define LzmaDec_CONSTRUCT(p) { (p)->dic = NULL; (p)->probs = NULL; }
|
||||
#define LzmaDec_Construct(p) LzmaDec_CONSTRUCT(p)
|
||||
|
||||
void LzmaDec_Init(CLzmaDec *p);
|
||||
|
||||
|
|
|
|||
431
C/LzmaEnc.c
431
C/LzmaEnc.c
File diff suppressed because it is too large
Load diff
25
C/LzmaEnc.h
25
C/LzmaEnc.h
|
|
@ -1,8 +1,8 @@
|
|||
/* LzmaEnc.h -- LZMA Encoder
|
||||
2019-10-30 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA_ENC_H
|
||||
#define __LZMA_ENC_H
|
||||
#ifndef ZIP7_INC_LZMA_ENC_H
|
||||
#define ZIP7_INC_LZMA_ENC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -10,7 +10,7 @@ EXTERN_C_BEGIN
|
|||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
typedef struct _CLzmaEncProps
|
||||
typedef struct
|
||||
{
|
||||
int level; /* 0 <= level <= 9 */
|
||||
UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
|
||||
|
|
@ -23,14 +23,19 @@ typedef struct _CLzmaEncProps
|
|||
int fb; /* 5 <= fb <= 273, default = 32 */
|
||||
int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
|
||||
int numHashBytes; /* 2, 3 or 4, default = 4 */
|
||||
unsigned numHashOutBits; /* default = ? */
|
||||
UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */
|
||||
unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
|
||||
int numThreads; /* 1 or 2, default = 2 */
|
||||
|
||||
// int _pad;
|
||||
Int32 affinityGroup;
|
||||
|
||||
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
|
||||
Encoder uses this value to reduce dictionary size */
|
||||
|
||||
UInt64 affinity;
|
||||
UInt64 affinityInGroup;
|
||||
} CLzmaEncProps;
|
||||
|
||||
void LzmaEncProps_Init(CLzmaEncProps *p);
|
||||
|
|
@ -51,7 +56,9 @@ SRes:
|
|||
SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
typedef void * CLzmaEncHandle;
|
||||
typedef struct CLzmaEnc CLzmaEnc;
|
||||
typedef CLzmaEnc * CLzmaEncHandle;
|
||||
// Z7_DECLARE_HANDLE(CLzmaEncHandle)
|
||||
|
||||
CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
|
||||
void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
|
|
@ -61,17 +68,17 @@ void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
|
|||
SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
|
||||
unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
|
||||
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
|
||||
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr inStream,
|
||||
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
int writeEndMark, ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
|
||||
|
||||
/* ---------- One Call Interface ---------- */
|
||||
|
||||
SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
|
||||
const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
|
||||
ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
ICompressProgressPtr progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,14 @@
|
|||
/* LzmaLib.c -- LZMA library wrapper
|
||||
2015-06-13 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "Alloc.h"
|
||||
#include "LzmaDec.h"
|
||||
#include "LzmaEnc.h"
|
||||
#include "LzmaLib.h"
|
||||
|
||||
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
unsigned char *outProps, size_t *outPropsSize,
|
||||
int level, /* 0 <= level <= 9, default = 5 */
|
||||
unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */
|
||||
|
|
@ -32,7 +34,7 @@ MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char
|
|||
}
|
||||
|
||||
|
||||
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
|
||||
Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t *srcLen,
|
||||
const unsigned char *props, size_t propsSize)
|
||||
{
|
||||
ELzmaStatus status;
|
||||
|
|
|
|||
12
C/LzmaLib.h
12
C/LzmaLib.h
|
|
@ -1,14 +1,14 @@
|
|||
/* LzmaLib.h -- LZMA library interface
|
||||
2021-04-03 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __LZMA_LIB_H
|
||||
#define __LZMA_LIB_H
|
||||
#ifndef ZIP7_INC_LZMA_LIB_H
|
||||
#define ZIP7_INC_LZMA_LIB_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define MY_STDAPI int MY_STD_CALL
|
||||
#define Z7_STDAPI int Z7_STDCALL
|
||||
|
||||
#define LZMA_PROPS_SIZE 5
|
||||
|
||||
|
|
@ -100,7 +100,7 @@ Returns:
|
|||
SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version)
|
||||
*/
|
||||
|
||||
MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
Z7_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
|
||||
unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
|
||||
int level, /* 0 <= level <= 9, default = 5 */
|
||||
unsigned dictSize, /* default = (1 << 24) */
|
||||
|
|
@ -130,7 +130,7 @@ Returns:
|
|||
SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src)
|
||||
*/
|
||||
|
||||
MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
|
||||
Z7_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
|
||||
const unsigned char *props, size_t propsSize);
|
||||
|
||||
EXTERN_C_END
|
||||
|
|
|
|||
206
C/Md5.c
Normal file
206
C/Md5.c
Normal file
|
|
@ -0,0 +1,206 @@
|
|||
/* Md5.c -- MD5 Hash
|
||||
: Igor Pavlov : Public domain
|
||||
This code is based on Colin Plumb's public domain md5.c code */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "Md5.h"
|
||||
#include "RotateDefs.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define MD5_UPDATE_BLOCKS(p) Md5_UpdateBlocks
|
||||
|
||||
Z7_NO_INLINE
|
||||
void Md5_Init(CMd5 *p)
|
||||
{
|
||||
p->count = 0;
|
||||
p->state[0] = 0x67452301;
|
||||
p->state[1] = 0xefcdab89;
|
||||
p->state[2] = 0x98badcfe;
|
||||
p->state[3] = 0x10325476;
|
||||
}
|
||||
|
||||
#if 0 && !defined(MY_CPU_LE_UNALIGN)
|
||||
// optional optimization for Big-endian processors or processors without unaligned access:
|
||||
// it is intended to reduce the number of complex LE32 memory reading from 64 to 16.
|
||||
// But some compilers (sparc, armt) are better without this optimization.
|
||||
#define Z7_MD5_USE_DATA32_ARRAY
|
||||
#endif
|
||||
|
||||
#define LOAD_DATA(i) GetUi32((const UInt32 *)(const void *)data + (i))
|
||||
|
||||
#ifdef Z7_MD5_USE_DATA32_ARRAY
|
||||
#define D(i) data32[i]
|
||||
#else
|
||||
#define D(i) LOAD_DATA(i)
|
||||
#endif
|
||||
|
||||
#define F1(x, y, z) (z ^ (x & (y ^ z)))
|
||||
#define F2(x, y, z) F1(z, x, y)
|
||||
#define F3(x, y, z) (x ^ y ^ z)
|
||||
#define F4(x, y, z) (y ^ (x | ~z))
|
||||
|
||||
#define R1(i, f, start, step, w, x, y, z, s, k) \
|
||||
w += D((start + step * (i)) % 16) + k; \
|
||||
w += f(x, y, z); \
|
||||
w = rotlFixed(w, s) + x; \
|
||||
|
||||
#define R4(i4, f, start, step, s0,s1,s2,s3, k0,k1,k2,k3) \
|
||||
R1 (i4*4+0, f, start, step, a,b,c,d, s0, k0) \
|
||||
R1 (i4*4+1, f, start, step, d,a,b,c, s1, k1) \
|
||||
R1 (i4*4+2, f, start, step, c,d,a,b, s2, k2) \
|
||||
R1 (i4*4+3, f, start, step, b,c,d,a, s3, k3) \
|
||||
|
||||
#define R16(f, start, step, s0,s1,s2,s3, k00,k01,k02,k03, k10,k11,k12,k13, k20,k21,k22,k23, k30,k31,k32,k33) \
|
||||
R4 (0, f, start, step, s0,s1,s2,s3, k00,k01,k02,k03) \
|
||||
R4 (1, f, start, step, s0,s1,s2,s3, k10,k11,k12,k13) \
|
||||
R4 (2, f, start, step, s0,s1,s2,s3, k20,k21,k22,k23) \
|
||||
R4 (3, f, start, step, s0,s1,s2,s3, k30,k31,k32,k33) \
|
||||
|
||||
static
|
||||
Z7_NO_INLINE
|
||||
void Z7_FASTCALL Md5_UpdateBlocks(UInt32 state[4], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
UInt32 a, b, c, d;
|
||||
// if (numBlocks == 0) return;
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
d = state[3];
|
||||
do
|
||||
{
|
||||
#ifdef Z7_MD5_USE_DATA32_ARRAY
|
||||
UInt32 data32[MD5_NUM_BLOCK_WORDS];
|
||||
{
|
||||
#define LOAD_data32_x4(i) { \
|
||||
data32[i ] = LOAD_DATA(i ); \
|
||||
data32[i + 1] = LOAD_DATA(i + 1); \
|
||||
data32[i + 2] = LOAD_DATA(i + 2); \
|
||||
data32[i + 3] = LOAD_DATA(i + 3); }
|
||||
#if 1
|
||||
LOAD_data32_x4 (0 * 4)
|
||||
LOAD_data32_x4 (1 * 4)
|
||||
LOAD_data32_x4 (2 * 4)
|
||||
LOAD_data32_x4 (3 * 4)
|
||||
#else
|
||||
unsigned i;
|
||||
for (i = 0; i < MD5_NUM_BLOCK_WORDS; i += 4)
|
||||
{
|
||||
LOAD_data32_x4(i)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
R16 (F1, 0, 1, 7,12,17,22, 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
|
||||
0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
|
||||
0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
|
||||
0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821)
|
||||
R16 (F2, 1, 5, 5, 9,14,20, 0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
|
||||
0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
|
||||
0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
|
||||
0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a)
|
||||
R16 (F3, 5, 3, 4,11,16,23, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
|
||||
0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
|
||||
0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
|
||||
0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665)
|
||||
R16 (F4, 0, 7, 6,10,15,21, 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
|
||||
0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
|
||||
0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
|
||||
0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391)
|
||||
|
||||
a += state[0];
|
||||
b += state[1];
|
||||
c += state[2];
|
||||
d += state[3];
|
||||
|
||||
state[0] = a;
|
||||
state[1] = b;
|
||||
state[2] = c;
|
||||
state[3] = d;
|
||||
|
||||
data += MD5_BLOCK_SIZE;
|
||||
}
|
||||
while (--numBlocks);
|
||||
}
|
||||
|
||||
|
||||
#define Md5_UpdateBlock(p) MD5_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
|
||||
|
||||
void Md5_Update(CMd5 *p, const Byte *data, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
{
|
||||
const unsigned pos = (unsigned)p->count & (MD5_BLOCK_SIZE - 1);
|
||||
const unsigned num = MD5_BLOCK_SIZE - pos;
|
||||
p->count += size;
|
||||
if (num > size)
|
||||
{
|
||||
memcpy(p->buffer + pos, data, size);
|
||||
return;
|
||||
}
|
||||
if (pos != 0)
|
||||
{
|
||||
size -= num;
|
||||
memcpy(p->buffer + pos, data, num);
|
||||
data += num;
|
||||
Md5_UpdateBlock(p);
|
||||
}
|
||||
}
|
||||
{
|
||||
const size_t numBlocks = size >> 6;
|
||||
if (numBlocks)
|
||||
MD5_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
|
||||
size &= MD5_BLOCK_SIZE - 1;
|
||||
if (size == 0)
|
||||
return;
|
||||
data += (numBlocks << 6);
|
||||
memcpy(p->buffer, data, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Md5_Final(CMd5 *p, Byte *digest)
|
||||
{
|
||||
unsigned pos = (unsigned)p->count & (MD5_BLOCK_SIZE - 1);
|
||||
p->buffer[pos++] = 0x80;
|
||||
if (pos > (MD5_BLOCK_SIZE - 4 * 2))
|
||||
{
|
||||
while (pos != MD5_BLOCK_SIZE) { p->buffer[pos++] = 0; }
|
||||
// memset(&p->buf.buffer[pos], 0, MD5_BLOCK_SIZE - pos);
|
||||
Md5_UpdateBlock(p);
|
||||
pos = 0;
|
||||
}
|
||||
memset(&p->buffer[pos], 0, (MD5_BLOCK_SIZE - 4 * 2) - pos);
|
||||
{
|
||||
const UInt64 numBits = p->count << 3;
|
||||
#if defined(MY_CPU_LE_UNALIGN)
|
||||
SetUi64 (p->buffer + MD5_BLOCK_SIZE - 4 * 2, numBits)
|
||||
#else
|
||||
SetUi32a(p->buffer + MD5_BLOCK_SIZE - 4 * 2, (UInt32)(numBits))
|
||||
SetUi32a(p->buffer + MD5_BLOCK_SIZE - 4 * 1, (UInt32)(numBits >> 32))
|
||||
#endif
|
||||
}
|
||||
Md5_UpdateBlock(p);
|
||||
|
||||
SetUi32(digest, p->state[0])
|
||||
SetUi32(digest + 4, p->state[1])
|
||||
SetUi32(digest + 8, p->state[2])
|
||||
SetUi32(digest + 12, p->state[3])
|
||||
|
||||
Md5_Init(p);
|
||||
}
|
||||
|
||||
#undef R1
|
||||
#undef R4
|
||||
#undef R16
|
||||
#undef D
|
||||
#undef LOAD_DATA
|
||||
#undef LOAD_data32_x4
|
||||
#undef F1
|
||||
#undef F2
|
||||
#undef F3
|
||||
#undef F4
|
||||
34
C/Md5.h
Normal file
34
C/Md5.h
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
/* Md5.h -- MD5 Hash
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_MD5_H
|
||||
#define ZIP7_INC_MD5_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define MD5_NUM_BLOCK_WORDS 16
|
||||
#define MD5_NUM_DIGEST_WORDS 4
|
||||
|
||||
#define MD5_BLOCK_SIZE (MD5_NUM_BLOCK_WORDS * 4)
|
||||
#define MD5_DIGEST_SIZE (MD5_NUM_DIGEST_WORDS * 4)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UInt64 count;
|
||||
UInt64 _pad_1;
|
||||
// we want 16-bytes alignment here
|
||||
UInt32 state[MD5_NUM_DIGEST_WORDS];
|
||||
UInt64 _pad_2[4];
|
||||
// we want 64-bytes alignment here
|
||||
Byte buffer[MD5_BLOCK_SIZE];
|
||||
} CMd5;
|
||||
|
||||
void Md5_Init(CMd5 *p);
|
||||
void Md5_Update(CMd5 *p, const Byte *data, size_t size);
|
||||
void Md5_Final(CMd5 *p, Byte *digest);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
161
C/MtCoder.c
161
C/MtCoder.c
|
|
@ -1,28 +1,28 @@
|
|||
/* MtCoder.c -- Multi-thread Coder
|
||||
2021-12-21 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "MtCoder.h"
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
static SRes MtProgressThunk_Progress(const ICompressProgress *pp, UInt64 inSize, UInt64 outSize)
|
||||
static SRes MtProgressThunk_Progress(ICompressProgressPtr pp, UInt64 inSize, UInt64 outSize)
|
||||
{
|
||||
CMtProgressThunk *thunk = CONTAINER_FROM_VTBL(pp, CMtProgressThunk, vt);
|
||||
Z7_CONTAINER_FROM_VTBL_TO_DECL_VAR_pp_vt_p(CMtProgressThunk)
|
||||
UInt64 inSize2 = 0;
|
||||
UInt64 outSize2 = 0;
|
||||
if (inSize != (UInt64)(Int64)-1)
|
||||
{
|
||||
inSize2 = inSize - thunk->inSize;
|
||||
thunk->inSize = inSize;
|
||||
inSize2 = inSize - p->inSize;
|
||||
p->inSize = inSize;
|
||||
}
|
||||
if (outSize != (UInt64)(Int64)-1)
|
||||
{
|
||||
outSize2 = outSize - thunk->outSize;
|
||||
thunk->outSize = outSize;
|
||||
outSize2 = outSize - p->outSize;
|
||||
p->outSize = outSize;
|
||||
}
|
||||
return MtProgress_ProgressAdd(thunk->mtProgress, inSize2, outSize2);
|
||||
return MtProgress_ProgressAdd(p->mtProgress, inSize2, outSize2);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -36,25 +36,31 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p)
|
|||
#define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
|
||||
|
||||
|
||||
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
|
||||
{
|
||||
if (Event_IsCreated(p))
|
||||
return Event_Reset(p);
|
||||
return AutoResetEvent_CreateNotSignaled(p);
|
||||
}
|
||||
|
||||
|
||||
static THREAD_FUNC_DECL ThreadFunc(void *pp);
|
||||
|
||||
|
||||
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
|
||||
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t
|
||||
#ifdef _WIN32
|
||||
, CMtCoder * const mtc
|
||||
#endif
|
||||
)
|
||||
{
|
||||
WRes wres = ArEvent_OptCreate_And_Reset(&t->startEvent);
|
||||
WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent);
|
||||
// printf("\n====== MtCoderThread_CreateAndStart : \n");
|
||||
if (wres == 0)
|
||||
{
|
||||
t->stop = False;
|
||||
if (!Thread_WasCreated(&t->thread))
|
||||
wres = Thread_Create(&t->thread, ThreadFunc, t);
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if (mtc->numThreadGroups)
|
||||
wres = Thread_Create_With_Group(&t->thread, ThreadFunc, t,
|
||||
ThreadNextGroup_GetNext(&mtc->nextGroup), // group
|
||||
0); // affinityMask
|
||||
else
|
||||
#endif
|
||||
wres = Thread_Create(&t->thread, ThreadFunc, t);
|
||||
}
|
||||
if (wres == 0)
|
||||
wres = Event_Set(&t->startEvent);
|
||||
}
|
||||
|
|
@ -64,6 +70,7 @@ static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
|
|||
}
|
||||
|
||||
|
||||
Z7_FORCE_INLINE
|
||||
static void MtCoderThread_Destruct(CMtCoderThread *t)
|
||||
{
|
||||
if (Thread_WasCreated(&t->thread))
|
||||
|
|
@ -84,24 +91,6 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
|
|||
|
||||
|
||||
|
||||
static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
|
||||
{
|
||||
size_t size = *processedSize;
|
||||
*processedSize = 0;
|
||||
while (size != 0)
|
||||
{
|
||||
size_t cur = size;
|
||||
SRes res = ISeqInStream_Read(stream, data, &cur);
|
||||
*processedSize += cur;
|
||||
data += cur;
|
||||
size -= cur;
|
||||
RINOK(res);
|
||||
if (cur == 0)
|
||||
return SZ_OK;
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
ThreadFunc2() returns:
|
||||
|
|
@ -111,7 +100,7 @@ static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
|
|||
|
||||
static SRes ThreadFunc2(CMtCoderThread *t)
|
||||
{
|
||||
CMtCoder *mtc = t->mtCoder;
|
||||
CMtCoder * const mtc = t->mtCoder;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
|
@ -152,7 +141,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
|
|||
}
|
||||
if (res == SZ_OK)
|
||||
{
|
||||
res = FullRead(mtc->inStream, t->inBuf, &size);
|
||||
res = SeqInStream_ReadMax(mtc->inStream, t->inBuf, &size);
|
||||
readProcessed = mtc->readProcessed + size;
|
||||
mtc->readProcessed = readProcessed;
|
||||
}
|
||||
|
|
@ -211,7 +200,11 @@ static SRes ThreadFunc2(CMtCoderThread *t)
|
|||
if (mtc->numStartedThreads < mtc->numStartedThreadsLimit
|
||||
&& mtc->expectedDataSize != readProcessed)
|
||||
{
|
||||
res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]);
|
||||
res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]
|
||||
#ifdef _WIN32
|
||||
, mtc
|
||||
#endif
|
||||
);
|
||||
if (res == SZ_OK)
|
||||
mtc->numStartedThreads++;
|
||||
else
|
||||
|
|
@ -247,13 +240,13 @@ static SRes ThreadFunc2(CMtCoderThread *t)
|
|||
}
|
||||
|
||||
{
|
||||
CMtCoderBlock *block = &mtc->blocks[bi];
|
||||
CMtCoderBlock * const block = &mtc->blocks[bi];
|
||||
block->res = res;
|
||||
block->bufIndex = bufIndex;
|
||||
block->finished = finished;
|
||||
}
|
||||
|
||||
#ifdef MTCODER__USE_WRITE_THREAD
|
||||
#ifdef MTCODER_USE_WRITE_THREAD
|
||||
RINOK_THREAD(Event_Set(&mtc->writeEvents[bi]))
|
||||
#else
|
||||
{
|
||||
|
|
@ -337,7 +330,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
|
|||
|
||||
static THREAD_FUNC_DECL ThreadFunc(void *pp)
|
||||
{
|
||||
CMtCoderThread *t = (CMtCoderThread *)pp;
|
||||
CMtCoderThread * const t = (CMtCoderThread *)pp;
|
||||
for (;;)
|
||||
{
|
||||
if (Event_Wait(&t->startEvent) != 0)
|
||||
|
|
@ -345,16 +338,16 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp)
|
|||
if (t->stop)
|
||||
return 0;
|
||||
{
|
||||
SRes res = ThreadFunc2(t);
|
||||
const SRes res = ThreadFunc2(t);
|
||||
CMtCoder *mtc = t->mtCoder;
|
||||
if (res != SZ_OK)
|
||||
{
|
||||
MtProgress_SetError(&mtc->mtProgress, res);
|
||||
}
|
||||
|
||||
#ifndef MTCODER__USE_WRITE_THREAD
|
||||
#ifndef MTCODER_USE_WRITE_THREAD
|
||||
{
|
||||
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
|
||||
const unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
|
||||
if (numFinished == mtc->numStartedThreads)
|
||||
if (Event_Set(&mtc->finishedEvent) != 0)
|
||||
return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
|
||||
|
|
@ -372,6 +365,7 @@ void MtCoder_Construct(CMtCoder *p)
|
|||
|
||||
p->blockSize = 0;
|
||||
p->numThreadsMax = 0;
|
||||
p->numThreadGroups = 0;
|
||||
p->expectedDataSize = (UInt64)(Int64)-1;
|
||||
|
||||
p->inStream = NULL;
|
||||
|
|
@ -389,7 +383,7 @@ void MtCoder_Construct(CMtCoder *p)
|
|||
Event_Construct(&p->readEvent);
|
||||
Semaphore_Construct(&p->blocksSemaphore);
|
||||
|
||||
for (i = 0; i < MTCODER__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_THREADS_MAX; i++)
|
||||
{
|
||||
CMtCoderThread *t = &p->threads[i];
|
||||
t->mtCoder = p;
|
||||
|
|
@ -397,11 +391,11 @@ void MtCoder_Construct(CMtCoder *p)
|
|||
t->inBuf = NULL;
|
||||
t->stop = False;
|
||||
Event_Construct(&t->startEvent);
|
||||
Thread_Construct(&t->thread);
|
||||
Thread_CONSTRUCT(&t->thread)
|
||||
}
|
||||
|
||||
#ifdef MTCODER__USE_WRITE_THREAD
|
||||
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
|
||||
#ifdef MTCODER_USE_WRITE_THREAD
|
||||
for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
|
||||
Event_Construct(&p->writeEvents[i]);
|
||||
#else
|
||||
Event_Construct(&p->finishedEvent);
|
||||
|
|
@ -424,14 +418,14 @@ static void MtCoder_Free(CMtCoder *p)
|
|||
Event_Set(&p->readEvent);
|
||||
*/
|
||||
|
||||
for (i = 0; i < MTCODER__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_THREADS_MAX; i++)
|
||||
MtCoderThread_Destruct(&p->threads[i]);
|
||||
|
||||
Event_Close(&p->readEvent);
|
||||
Semaphore_Close(&p->blocksSemaphore);
|
||||
|
||||
#ifdef MTCODER__USE_WRITE_THREAD
|
||||
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
|
||||
#ifdef MTCODER_USE_WRITE_THREAD
|
||||
for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
|
||||
Event_Close(&p->writeEvents[i]);
|
||||
#else
|
||||
Event_Close(&p->finishedEvent);
|
||||
|
|
@ -455,20 +449,22 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
unsigned i;
|
||||
SRes res = SZ_OK;
|
||||
|
||||
if (numThreads > MTCODER__THREADS_MAX)
|
||||
numThreads = MTCODER__THREADS_MAX;
|
||||
numBlocksMax = MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads);
|
||||
// printf("\n====== MtCoder_Code : \n");
|
||||
|
||||
if (numThreads > MTCODER_THREADS_MAX)
|
||||
numThreads = MTCODER_THREADS_MAX;
|
||||
numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads);
|
||||
|
||||
if (p->blockSize < ((UInt32)1 << 26)) numBlocksMax++;
|
||||
if (p->blockSize < ((UInt32)1 << 24)) numBlocksMax++;
|
||||
if (p->blockSize < ((UInt32)1 << 22)) numBlocksMax++;
|
||||
|
||||
if (numBlocksMax > MTCODER__BLOCKS_MAX)
|
||||
numBlocksMax = MTCODER__BLOCKS_MAX;
|
||||
if (numBlocksMax > MTCODER_BLOCKS_MAX)
|
||||
numBlocksMax = MTCODER_BLOCKS_MAX;
|
||||
|
||||
if (p->blockSize != p->allocatedBufsSize)
|
||||
{
|
||||
for (i = 0; i < MTCODER__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_THREADS_MAX; i++)
|
||||
{
|
||||
CMtCoderThread *t = &p->threads[i];
|
||||
if (t->inBuf)
|
||||
|
|
@ -484,23 +480,23 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
|
||||
MtProgress_Init(&p->mtProgress, p->progress);
|
||||
|
||||
#ifdef MTCODER__USE_WRITE_THREAD
|
||||
#ifdef MTCODER_USE_WRITE_THREAD
|
||||
for (i = 0; i < numBlocksMax; i++)
|
||||
{
|
||||
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->writeEvents[i]));
|
||||
RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->writeEvents[i]))
|
||||
}
|
||||
#else
|
||||
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent));
|
||||
RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent))
|
||||
#endif
|
||||
|
||||
{
|
||||
RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->readEvent));
|
||||
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, numBlocksMax, numBlocksMax));
|
||||
RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->readEvent))
|
||||
RINOK_THREAD(Semaphore_OptCreateInit(&p->blocksSemaphore, (UInt32)numBlocksMax, (UInt32)numBlocksMax))
|
||||
}
|
||||
|
||||
for (i = 0; i < MTCODER__BLOCKS_MAX - 1; i++)
|
||||
for (i = 0; i < MTCODER_BLOCKS_MAX - 1; i++)
|
||||
p->freeBlockList[i] = i + 1;
|
||||
p->freeBlockList[MTCODER__BLOCKS_MAX - 1] = (unsigned)(int)-1;
|
||||
p->freeBlockList[MTCODER_BLOCKS_MAX - 1] = (unsigned)(int)-1;
|
||||
p->freeBlockHead = 0;
|
||||
|
||||
p->readProcessed = 0;
|
||||
|
|
@ -508,26 +504,37 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
p->numBlocksMax = numBlocksMax;
|
||||
p->stopReading = False;
|
||||
|
||||
#ifndef MTCODER__USE_WRITE_THREAD
|
||||
#ifndef MTCODER_USE_WRITE_THREAD
|
||||
p->writeIndex = 0;
|
||||
p->writeRes = SZ_OK;
|
||||
for (i = 0; i < MTCODER__BLOCKS_MAX; i++)
|
||||
for (i = 0; i < MTCODER_BLOCKS_MAX; i++)
|
||||
p->ReadyBlocks[i] = False;
|
||||
p->numFinishedThreads = 0;
|
||||
#endif
|
||||
|
||||
p->numStartedThreadsLimit = numThreads;
|
||||
p->numStartedThreads = 0;
|
||||
ThreadNextGroup_Init(&p->nextGroup, p->numThreadGroups, 0); // startGroup
|
||||
|
||||
// for (i = 0; i < numThreads; i++)
|
||||
{
|
||||
// here we create new thread for first block.
|
||||
// And each new thread will create another new thread after block reading
|
||||
// until numStartedThreadsLimit is reached.
|
||||
CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];
|
||||
RINOK(MtCoderThread_CreateAndStart(nextThread));
|
||||
{
|
||||
const SRes res2 = MtCoderThread_CreateAndStart(nextThread
|
||||
#ifdef _WIN32
|
||||
, p
|
||||
#endif
|
||||
);
|
||||
RINOK(res2)
|
||||
}
|
||||
}
|
||||
|
||||
RINOK_THREAD(Event_Set(&p->readEvent))
|
||||
|
||||
#ifdef MTCODER__USE_WRITE_THREAD
|
||||
#ifdef MTCODER_USE_WRITE_THREAD
|
||||
{
|
||||
unsigned bi = 0;
|
||||
|
||||
|
|
@ -539,9 +546,9 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
RINOK_THREAD(Event_Wait(&p->writeEvents[bi]))
|
||||
|
||||
{
|
||||
const CMtCoderBlock *block = &p->blocks[bi];
|
||||
unsigned bufIndex = block->bufIndex;
|
||||
BoolInt finished = block->finished;
|
||||
const CMtCoderBlock * const block = &p->blocks[bi];
|
||||
const unsigned bufIndex = block->bufIndex;
|
||||
const BoolInt finished = block->finished;
|
||||
if (res == SZ_OK && block->res != SZ_OK)
|
||||
res = block->res;
|
||||
|
||||
|
|
@ -571,7 +578,7 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
}
|
||||
#else
|
||||
{
|
||||
WRes wres = Event_Wait(&p->finishedEvent);
|
||||
const WRes wres = Event_Wait(&p->finishedEvent);
|
||||
res = MY_SRes_HRESULT_FROM_WRes(wres);
|
||||
}
|
||||
#endif
|
||||
|
|
@ -582,7 +589,7 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
if (res == SZ_OK)
|
||||
res = p->mtProgress.res;
|
||||
|
||||
#ifndef MTCODER__USE_WRITE_THREAD
|
||||
#ifndef MTCODER_USE_WRITE_THREAD
|
||||
if (res == SZ_OK)
|
||||
res = p->writeRes;
|
||||
#endif
|
||||
|
|
@ -593,3 +600,5 @@ SRes MtCoder_Code(CMtCoder *p)
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#undef RINOK_THREAD
|
||||
|
|
|
|||
53
C/MtCoder.h
53
C/MtCoder.h
|
|
@ -1,30 +1,30 @@
|
|||
/* MtCoder.h -- Multi-thread Coder
|
||||
2018-07-04 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __MT_CODER_H
|
||||
#define __MT_CODER_H
|
||||
#ifndef ZIP7_INC_MT_CODER_H
|
||||
#define ZIP7_INC_MT_CODER_H
|
||||
|
||||
#include "MtDec.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
/*
|
||||
if ( defined MTCODER__USE_WRITE_THREAD) : main thread writes all data blocks to output stream
|
||||
if (not defined MTCODER__USE_WRITE_THREAD) : any coder thread can write data blocks to output stream
|
||||
if ( defined MTCODER_USE_WRITE_THREAD) : main thread writes all data blocks to output stream
|
||||
if (not defined MTCODER_USE_WRITE_THREAD) : any coder thread can write data blocks to output stream
|
||||
*/
|
||||
/* #define MTCODER__USE_WRITE_THREAD */
|
||||
/* #define MTCODER_USE_WRITE_THREAD */
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#define MTCODER__GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
|
||||
#define MTCODER__THREADS_MAX 64
|
||||
#define MTCODER__BLOCKS_MAX (MTCODER__GET_NUM_BLOCKS_FROM_THREADS(MTCODER__THREADS_MAX) + 3)
|
||||
#ifndef Z7_ST
|
||||
#define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
|
||||
#define MTCODER_THREADS_MAX 256
|
||||
#define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3)
|
||||
#else
|
||||
#define MTCODER__THREADS_MAX 1
|
||||
#define MTCODER__BLOCKS_MAX 1
|
||||
#define MTCODER_THREADS_MAX 1
|
||||
#define MTCODER_BLOCKS_MAX 1
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
|
||||
typedef struct
|
||||
|
|
@ -37,15 +37,15 @@ typedef struct
|
|||
|
||||
void MtProgressThunk_CreateVTable(CMtProgressThunk *p);
|
||||
|
||||
#define MtProgressThunk_Init(p) { (p)->inSize = 0; (p)->outSize = 0; }
|
||||
#define MtProgressThunk_INIT(p) { (p)->inSize = 0; (p)->outSize = 0; }
|
||||
|
||||
|
||||
struct _CMtCoder;
|
||||
struct CMtCoder_;
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct _CMtCoder *mtCoder;
|
||||
struct CMtCoder_ *mtCoder;
|
||||
unsigned index;
|
||||
int stop;
|
||||
Byte *inBuf;
|
||||
|
|
@ -71,19 +71,20 @@ typedef struct
|
|||
} CMtCoderBlock;
|
||||
|
||||
|
||||
typedef struct _CMtCoder
|
||||
typedef struct CMtCoder_
|
||||
{
|
||||
/* input variables */
|
||||
|
||||
size_t blockSize; /* size of input block */
|
||||
unsigned numThreadsMax;
|
||||
unsigned numThreadGroups;
|
||||
UInt64 expectedDataSize;
|
||||
|
||||
ISeqInStream *inStream;
|
||||
ISeqInStreamPtr inStream;
|
||||
const Byte *inData;
|
||||
size_t inDataSize;
|
||||
|
||||
ICompressProgress *progress;
|
||||
ICompressProgressPtr progress;
|
||||
ISzAllocPtr allocBig;
|
||||
|
||||
IMtCoderCallback2 *mtCallback;
|
||||
|
|
@ -100,13 +101,13 @@ typedef struct _CMtCoder
|
|||
BoolInt stopReading;
|
||||
SRes readRes;
|
||||
|
||||
#ifdef MTCODER__USE_WRITE_THREAD
|
||||
CAutoResetEvent writeEvents[MTCODER__BLOCKS_MAX];
|
||||
#ifdef MTCODER_USE_WRITE_THREAD
|
||||
CAutoResetEvent writeEvents[MTCODER_BLOCKS_MAX];
|
||||
#else
|
||||
CAutoResetEvent finishedEvent;
|
||||
SRes writeRes;
|
||||
unsigned writeIndex;
|
||||
Byte ReadyBlocks[MTCODER__BLOCKS_MAX];
|
||||
Byte ReadyBlocks[MTCODER_BLOCKS_MAX];
|
||||
LONG numFinishedThreads;
|
||||
#endif
|
||||
|
||||
|
|
@ -120,11 +121,13 @@ typedef struct _CMtCoder
|
|||
CCriticalSection cs;
|
||||
|
||||
unsigned freeBlockHead;
|
||||
unsigned freeBlockList[MTCODER__BLOCKS_MAX];
|
||||
unsigned freeBlockList[MTCODER_BLOCKS_MAX];
|
||||
|
||||
CMtProgress mtProgress;
|
||||
CMtCoderBlock blocks[MTCODER__BLOCKS_MAX];
|
||||
CMtCoderThread threads[MTCODER__THREADS_MAX];
|
||||
CMtCoderBlock blocks[MTCODER_BLOCKS_MAX];
|
||||
CMtCoderThread threads[MTCODER_THREADS_MAX];
|
||||
|
||||
CThreadNextGroup nextGroup;
|
||||
} CMtCoder;
|
||||
|
||||
|
||||
|
|
|
|||
117
C/MtDec.c
117
C/MtDec.c
|
|
@ -1,5 +1,5 @@
|
|||
/* MtDec.c -- Multi-thread Decoder
|
||||
2021-12-21 : Igor Pavlov : Public domain */
|
||||
2024-02-20 : Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
#include "MtDec.h"
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
#ifdef SHOW_DEBUG_INFO
|
||||
#define PRF(x) x
|
||||
|
|
@ -24,7 +24,7 @@
|
|||
|
||||
#define PRF_STR_INT(s, d) PRF(printf("\n" s " %d\n", (unsigned)d))
|
||||
|
||||
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress)
|
||||
void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress)
|
||||
{
|
||||
p->progress = progress;
|
||||
p->res = SZ_OK;
|
||||
|
|
@ -81,36 +81,28 @@ void MtProgress_SetError(CMtProgress *p, SRes res)
|
|||
#define RINOK_THREAD(x) RINOK_WRes(x)
|
||||
|
||||
|
||||
static WRes ArEvent_OptCreate_And_Reset(CEvent *p)
|
||||
struct CMtDecBufLink_
|
||||
{
|
||||
if (Event_IsCreated(p))
|
||||
return Event_Reset(p);
|
||||
return AutoResetEvent_CreateNotSignaled(p);
|
||||
}
|
||||
|
||||
|
||||
struct __CMtDecBufLink
|
||||
{
|
||||
struct __CMtDecBufLink *next;
|
||||
struct CMtDecBufLink_ *next;
|
||||
void *pad[3];
|
||||
};
|
||||
|
||||
typedef struct __CMtDecBufLink CMtDecBufLink;
|
||||
typedef struct CMtDecBufLink_ CMtDecBufLink;
|
||||
|
||||
#define MTDEC__LINK_DATA_OFFSET sizeof(CMtDecBufLink)
|
||||
#define MTDEC__DATA_PTR_FROM_LINK(link) ((Byte *)(link) + MTDEC__LINK_DATA_OFFSET)
|
||||
|
||||
|
||||
|
||||
static THREAD_FUNC_DECL ThreadFunc(void *pp);
|
||||
static THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp);
|
||||
|
||||
|
||||
static WRes MtDecThread_CreateEvents(CMtDecThread *t)
|
||||
{
|
||||
WRes wres = ArEvent_OptCreate_And_Reset(&t->canWrite);
|
||||
WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->canWrite);
|
||||
if (wres == 0)
|
||||
{
|
||||
wres = ArEvent_OptCreate_And_Reset(&t->canRead);
|
||||
wres = AutoResetEvent_OptCreate_And_Reset(&t->canRead);
|
||||
if (wres == 0)
|
||||
return SZ_OK;
|
||||
}
|
||||
|
|
@ -126,7 +118,7 @@ static SRes MtDecThread_CreateAndStart(CMtDecThread *t)
|
|||
{
|
||||
if (Thread_WasCreated(&t->thread))
|
||||
return SZ_OK;
|
||||
wres = Thread_Create(&t->thread, ThreadFunc, t);
|
||||
wres = Thread_Create(&t->thread, MtDec_ThreadFunc, t);
|
||||
if (wres == 0)
|
||||
return SZ_OK;
|
||||
}
|
||||
|
|
@ -167,7 +159,7 @@ static void MtDecThread_CloseThread(CMtDecThread *t)
|
|||
static void MtDec_CloseThreads(CMtDec *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
MtDecThread_CloseThread(&p->threads[i]);
|
||||
}
|
||||
|
||||
|
|
@ -179,25 +171,6 @@ static void MtDecThread_Destruct(CMtDecThread *t)
|
|||
|
||||
|
||||
|
||||
static SRes FullRead(ISeqInStream *stream, Byte *data, size_t *processedSize)
|
||||
{
|
||||
size_t size = *processedSize;
|
||||
*processedSize = 0;
|
||||
while (size != 0)
|
||||
{
|
||||
size_t cur = size;
|
||||
SRes res = ISeqInStream_Read(stream, data, &cur);
|
||||
*processedSize += cur;
|
||||
data += cur;
|
||||
size -= cur;
|
||||
RINOK(res);
|
||||
if (cur == 0)
|
||||
return SZ_OK;
|
||||
}
|
||||
return SZ_OK;
|
||||
}
|
||||
|
||||
|
||||
static SRes MtDec_GetError_Spec(CMtDec *p, UInt64 interruptIndex, BoolInt *wasInterrupted)
|
||||
{
|
||||
SRes res;
|
||||
|
|
@ -253,7 +226,7 @@ Byte *MtDec_GetCrossBuff(CMtDec *p)
|
|||
|
||||
|
||||
/*
|
||||
ThreadFunc2() returns:
|
||||
MtDec_ThreadFunc2() returns:
|
||||
0 - in all normal cases (even for stream error or memory allocation error)
|
||||
(!= 0) - WRes error return by system threading function
|
||||
*/
|
||||
|
|
@ -261,11 +234,11 @@ Byte *MtDec_GetCrossBuff(CMtDec *p)
|
|||
// #define MTDEC_ProgessStep (1 << 22)
|
||||
#define MTDEC_ProgessStep (1 << 0)
|
||||
|
||||
static WRes ThreadFunc2(CMtDecThread *t)
|
||||
static WRes MtDec_ThreadFunc2(CMtDecThread *t)
|
||||
{
|
||||
CMtDec *p = t->mtDec;
|
||||
|
||||
PRF_STR_INT("ThreadFunc2", t->index);
|
||||
PRF_STR_INT("MtDec_ThreadFunc2", t->index)
|
||||
|
||||
// SetThreadAffinityMask(GetCurrentThread(), 1 << t->index);
|
||||
|
||||
|
|
@ -295,13 +268,13 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
// CMtDecCallbackInfo parse;
|
||||
CMtDecThread *nextThread;
|
||||
|
||||
PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index);
|
||||
PRF_STR_INT("=============== Event_Wait(&t->canRead)", t->index)
|
||||
|
||||
RINOK_THREAD(Event_Wait(&t->canRead));
|
||||
RINOK_THREAD(Event_Wait(&t->canRead))
|
||||
if (p->exitThread)
|
||||
return 0;
|
||||
|
||||
PRF_STR_INT("after Event_Wait(&t->canRead)", t->index);
|
||||
PRF_STR_INT("after Event_Wait(&t->canRead)", t->index)
|
||||
|
||||
// if (t->index == 3) return 19; // for test
|
||||
|
||||
|
|
@ -373,7 +346,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
{
|
||||
size = p->inBufSize;
|
||||
|
||||
res = FullRead(p->inStream, data, &size);
|
||||
res = SeqInStream_ReadMax(p->inStream, data, &size);
|
||||
|
||||
// size = 10; // test
|
||||
|
||||
|
|
@ -615,7 +588,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
// if ( !finish ) we must call Event_Set(&nextThread->canWrite) in any case
|
||||
// if ( finish ) we switch to single-thread mode and there are 2 ways at the end of current iteration (current block):
|
||||
// - if (needContinue) after Write(&needContinue), we restore decoding with new iteration
|
||||
// - otherwise we stop decoding and exit from ThreadFunc2()
|
||||
// - otherwise we stop decoding and exit from MtDec_ThreadFunc2()
|
||||
|
||||
// Don't change (finish) variable in the further code
|
||||
|
||||
|
|
@ -688,7 +661,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
|
||||
// ---------- WRITE ----------
|
||||
|
||||
RINOK_THREAD(Event_Wait(&t->canWrite));
|
||||
RINOK_THREAD(Event_Wait(&t->canWrite))
|
||||
|
||||
{
|
||||
BoolInt isErrorMode = False;
|
||||
|
|
@ -801,14 +774,14 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
|
||||
if (!finish)
|
||||
{
|
||||
RINOK_THREAD(Event_Set(&nextThread->canWrite));
|
||||
RINOK_THREAD(Event_Set(&nextThread->canWrite))
|
||||
}
|
||||
else
|
||||
{
|
||||
if (needContinue)
|
||||
{
|
||||
// we restore decoding with new iteration
|
||||
RINOK_THREAD(Event_Set(&p->threads[0].canWrite));
|
||||
RINOK_THREAD(Event_Set(&p->threads[0].canWrite))
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -817,7 +790,7 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
return SZ_OK;
|
||||
p->exitThread = True;
|
||||
}
|
||||
RINOK_THREAD(Event_Set(&p->threads[0].canRead));
|
||||
RINOK_THREAD(Event_Set(&p->threads[0].canRead))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -836,7 +809,17 @@ static WRes ThreadFunc2(CMtDecThread *t)
|
|||
#endif
|
||||
|
||||
|
||||
static THREAD_FUNC_DECL ThreadFunc1(void *pp)
|
||||
typedef
|
||||
#ifdef _WIN32
|
||||
UINT_PTR
|
||||
#elif 1
|
||||
uintptr_t
|
||||
#else
|
||||
ptrdiff_t
|
||||
#endif
|
||||
MY_uintptr_t;
|
||||
|
||||
static THREAD_FUNC_DECL MtDec_ThreadFunc1(void *pp)
|
||||
{
|
||||
WRes res;
|
||||
|
||||
|
|
@ -845,10 +828,10 @@ static THREAD_FUNC_DECL ThreadFunc1(void *pp)
|
|||
|
||||
// fprintf(stdout, "\n%d = %p\n", t->index, &t);
|
||||
|
||||
res = ThreadFunc2(t);
|
||||
res = MtDec_ThreadFunc2(t);
|
||||
p = t->mtDec;
|
||||
if (res == 0)
|
||||
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)p->exitThreadWRes;
|
||||
return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)p->exitThreadWRes;
|
||||
{
|
||||
// it's unexpected situation for some threading function error
|
||||
if (p->exitThreadWRes == 0)
|
||||
|
|
@ -859,17 +842,17 @@ static THREAD_FUNC_DECL ThreadFunc1(void *pp)
|
|||
Event_Set(&p->threads[0].canWrite);
|
||||
MtProgress_SetError(&p->mtProgress, MY_SRes_HRESULT_FROM_WRes(res));
|
||||
}
|
||||
return (THREAD_FUNC_RET_TYPE)(UINT_PTR)res;
|
||||
return (THREAD_FUNC_RET_TYPE)(MY_uintptr_t)res;
|
||||
}
|
||||
|
||||
static MY_NO_INLINE THREAD_FUNC_DECL ThreadFunc(void *pp)
|
||||
static Z7_NO_INLINE THREAD_FUNC_DECL MtDec_ThreadFunc(void *pp)
|
||||
{
|
||||
#ifdef USE_ALLOCA
|
||||
CMtDecThread *t = (CMtDecThread *)pp;
|
||||
// fprintf(stderr, "\n%d = %p - before", t->index, &t);
|
||||
t->allocaPtr = alloca(t->index * 128);
|
||||
#endif
|
||||
return ThreadFunc1(pp);
|
||||
return MtDec_ThreadFunc1(pp);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -883,7 +866,7 @@ int MtDec_PrepareRead(CMtDec *p)
|
|||
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
if (i > p->numStartedThreads
|
||||
|| p->numFilledThreads <=
|
||||
(i >= p->filledThreadStart ?
|
||||
|
|
@ -987,7 +970,7 @@ void MtDec_Construct(CMtDec *p)
|
|||
|
||||
p->allocatedBufsSize = 0;
|
||||
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
{
|
||||
CMtDecThread *t = &p->threads[i];
|
||||
t->mtDec = p;
|
||||
|
|
@ -995,7 +978,7 @@ void MtDec_Construct(CMtDec *p)
|
|||
t->inBuf = NULL;
|
||||
Event_Construct(&t->canRead);
|
||||
Event_Construct(&t->canWrite);
|
||||
Thread_Construct(&t->thread);
|
||||
Thread_CONSTRUCT(&t->thread)
|
||||
}
|
||||
|
||||
// Event_Construct(&p->finishedEvent);
|
||||
|
|
@ -1010,7 +993,7 @@ static void MtDec_Free(CMtDec *p)
|
|||
|
||||
p->exitThread = True;
|
||||
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
MtDecThread_Destruct(&p->threads[i]);
|
||||
|
||||
// Event_Close(&p->finishedEvent);
|
||||
|
|
@ -1061,15 +1044,15 @@ SRes MtDec_Code(CMtDec *p)
|
|||
|
||||
{
|
||||
unsigned numThreads = p->numThreadsMax;
|
||||
if (numThreads > MTDEC__THREADS_MAX)
|
||||
numThreads = MTDEC__THREADS_MAX;
|
||||
if (numThreads > MTDEC_THREADS_MAX)
|
||||
numThreads = MTDEC_THREADS_MAX;
|
||||
p->numStartedThreads_Limit = numThreads;
|
||||
p->numStartedThreads = 0;
|
||||
}
|
||||
|
||||
if (p->inBufSize != p->allocatedBufsSize)
|
||||
{
|
||||
for (i = 0; i < MTDEC__THREADS_MAX; i++)
|
||||
for (i = 0; i < MTDEC_THREADS_MAX; i++)
|
||||
{
|
||||
CMtDecThread *t = &p->threads[i];
|
||||
if (t->inBuf)
|
||||
|
|
@ -1086,7 +1069,7 @@ SRes MtDec_Code(CMtDec *p)
|
|||
|
||||
MtProgress_Init(&p->mtProgress, p->progress);
|
||||
|
||||
// RINOK_THREAD(ArEvent_OptCreate_And_Reset(&p->finishedEvent));
|
||||
// RINOK_THREAD(AutoResetEvent_OptCreate_And_Reset(&p->finishedEvent))
|
||||
p->exitThread = False;
|
||||
p->exitThreadWRes = 0;
|
||||
|
||||
|
|
@ -1098,8 +1081,8 @@ SRes MtDec_Code(CMtDec *p)
|
|||
wres = MtDecThread_CreateEvents(nextThread);
|
||||
if (wres == 0) { wres = Event_Set(&nextThread->canWrite);
|
||||
if (wres == 0) { wres = Event_Set(&nextThread->canRead);
|
||||
if (wres == 0) { THREAD_FUNC_RET_TYPE res = ThreadFunc(nextThread);
|
||||
wres = (WRes)(UINT_PTR)res;
|
||||
if (wres == 0) { THREAD_FUNC_RET_TYPE res = MtDec_ThreadFunc(nextThread);
|
||||
wres = (WRes)(MY_uintptr_t)res;
|
||||
if (wres != 0)
|
||||
{
|
||||
p->needContinue = False;
|
||||
|
|
@ -1137,3 +1120,5 @@ SRes MtDec_Code(CMtDec *p)
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#undef PRF
|
||||
|
|
|
|||
34
C/MtDec.h
34
C/MtDec.h
|
|
@ -1,46 +1,46 @@
|
|||
/* MtDec.h -- Multi-thread Decoder
|
||||
2020-03-05 : Igor Pavlov : Public domain */
|
||||
2023-04-02 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __MT_DEC_H
|
||||
#define __MT_DEC_H
|
||||
#ifndef ZIP7_INC_MT_DEC_H
|
||||
#define ZIP7_INC_MT_DEC_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
#include "Threads.h"
|
||||
#endif
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#define MTDEC__THREADS_MAX 32
|
||||
#ifndef Z7_ST
|
||||
#define MTDEC_THREADS_MAX 32
|
||||
#else
|
||||
#define MTDEC__THREADS_MAX 1
|
||||
#define MTDEC_THREADS_MAX 1
|
||||
#endif
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ICompressProgress *progress;
|
||||
ICompressProgressPtr progress;
|
||||
SRes res;
|
||||
UInt64 totalInSize;
|
||||
UInt64 totalOutSize;
|
||||
CCriticalSection cs;
|
||||
} CMtProgress;
|
||||
|
||||
void MtProgress_Init(CMtProgress *p, ICompressProgress *progress);
|
||||
void MtProgress_Init(CMtProgress *p, ICompressProgressPtr progress);
|
||||
SRes MtProgress_Progress_ST(CMtProgress *p);
|
||||
SRes MtProgress_ProgressAdd(CMtProgress *p, UInt64 inSize, UInt64 outSize);
|
||||
SRes MtProgress_GetError(CMtProgress *p);
|
||||
void MtProgress_SetError(CMtProgress *p, SRes res);
|
||||
|
||||
struct _CMtDec;
|
||||
struct CMtDec;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct _CMtDec *mtDec;
|
||||
struct CMtDec_ *mtDec;
|
||||
unsigned index;
|
||||
void *inBuf;
|
||||
|
||||
|
|
@ -117,7 +117,7 @@ typedef struct
|
|||
|
||||
|
||||
|
||||
typedef struct _CMtDec
|
||||
typedef struct CMtDec_
|
||||
{
|
||||
/* input variables */
|
||||
|
||||
|
|
@ -126,11 +126,11 @@ typedef struct _CMtDec
|
|||
// size_t inBlockMax;
|
||||
unsigned numThreadsMax_2;
|
||||
|
||||
ISeqInStream *inStream;
|
||||
ISeqInStreamPtr inStream;
|
||||
// const Byte *inData;
|
||||
// size_t inDataSize;
|
||||
|
||||
ICompressProgress *progress;
|
||||
ICompressProgressPtr progress;
|
||||
ISzAllocPtr alloc;
|
||||
|
||||
IMtDecCallback2 *mtCallback;
|
||||
|
|
@ -171,11 +171,11 @@ typedef struct _CMtDec
|
|||
unsigned filledThreadStart;
|
||||
unsigned numFilledThreads;
|
||||
|
||||
#ifndef _7ZIP_ST
|
||||
#ifndef Z7_ST
|
||||
BoolInt needInterrupt;
|
||||
UInt64 interruptIndex;
|
||||
CMtProgress mtProgress;
|
||||
CMtDecThread threads[MTDEC__THREADS_MAX];
|
||||
CMtDecThread threads[MTDEC_THREADS_MAX];
|
||||
#endif
|
||||
} CMtDec;
|
||||
|
||||
|
|
|
|||
12
C/Ppmd.h
12
C/Ppmd.h
|
|
@ -1,9 +1,9 @@
|
|||
/* Ppmd.h -- PPMD codec common code
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-03-05 : Igor Pavlov : Public domain
|
||||
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
||||
|
||||
#ifndef __PPMD_H
|
||||
#define __PPMD_H
|
||||
#ifndef ZIP7_INC_PPMD_H
|
||||
#define ZIP7_INC_PPMD_H
|
||||
|
||||
#include "CpuArch.h"
|
||||
|
||||
|
|
@ -48,8 +48,10 @@ typedef struct
|
|||
Byte Count; /* Count to next change of Shift */
|
||||
} CPpmd_See;
|
||||
|
||||
#define Ppmd_See_Update(p) if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
|
||||
{ (p)->Summ = (UInt16)((p)->Summ << 1); (p)->Count = (Byte)(3 << (p)->Shift++); }
|
||||
#define Ppmd_See_UPDATE(p) \
|
||||
{ if ((p)->Shift < PPMD_PERIOD_BITS && --(p)->Count == 0) \
|
||||
{ (p)->Summ = (UInt16)((p)->Summ << 1); \
|
||||
(p)->Count = (Byte)(3 << (p)->Shift++); }}
|
||||
|
||||
|
||||
typedef struct
|
||||
|
|
|
|||
217
C/Ppmd7.c
217
C/Ppmd7.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd7.c -- PPMdH codec
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
|
@ -14,7 +14,7 @@ This code is based on PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
|||
MY_ALIGN(16)
|
||||
static const Byte PPMD7_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
|
||||
MY_ALIGN(16)
|
||||
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
|
||||
static const UInt16 PPMD7_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
|
||||
|
||||
#define MAX_FREQ 124
|
||||
#define UNIT_SIZE 12
|
||||
|
|
@ -33,7 +33,7 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
|
|||
#define ONE_STATE(ctx) Ppmd7Context_OneState(ctx)
|
||||
#define SUFFIX(ctx) CTX((ctx)->Suffix)
|
||||
|
||||
typedef CPpmd7_Context * CTX_PTR;
|
||||
typedef CPpmd7_Context * PPMD7_CTX_PTR;
|
||||
|
||||
struct CPpmd7_Node_;
|
||||
|
||||
|
|
@ -107,14 +107,14 @@ BoolInt Ppmd7_Alloc(CPpmd7 *p, UInt32 size, ISzAllocPtr alloc)
|
|||
// ---------- Internal Memory Allocator ----------
|
||||
|
||||
/* We can use CPpmd7_Node in list of free units (as in Ppmd8)
|
||||
But we still need one additional list walk pass in GlueFreeBlocks().
|
||||
So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in InsertNode() / RemoveNode()
|
||||
But we still need one additional list walk pass in Ppmd7_GlueFreeBlocks().
|
||||
So we use simple CPpmd_Void_Ref instead of CPpmd7_Node in Ppmd7_InsertNode() / Ppmd7_RemoveNode()
|
||||
*/
|
||||
|
||||
#define EMPTY_NODE 0
|
||||
|
||||
|
||||
static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
|
||||
static void Ppmd7_InsertNode(CPpmd7 *p, void *node, unsigned indx)
|
||||
{
|
||||
*((CPpmd_Void_Ref *)node) = p->FreeList[indx];
|
||||
// ((CPpmd7_Node *)node)->Next = (CPpmd7_Node_Ref)p->FreeList[indx];
|
||||
|
|
@ -124,7 +124,7 @@ static void InsertNode(CPpmd7 *p, void *node, unsigned indx)
|
|||
}
|
||||
|
||||
|
||||
static void *RemoveNode(CPpmd7 *p, unsigned indx)
|
||||
static void *Ppmd7_RemoveNode(CPpmd7 *p, unsigned indx)
|
||||
{
|
||||
CPpmd_Void_Ref *node = (CPpmd_Void_Ref *)Ppmd7_GetPtr(p, p->FreeList[indx]);
|
||||
p->FreeList[indx] = *node;
|
||||
|
|
@ -134,32 +134,32 @@ static void *RemoveNode(CPpmd7 *p, unsigned indx)
|
|||
}
|
||||
|
||||
|
||||
static void SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
|
||||
static void Ppmd7_SplitBlock(CPpmd7 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
|
||||
{
|
||||
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
|
||||
ptr = (Byte *)ptr + U2B(I2U(newIndx));
|
||||
if (I2U(i = U2I(nu)) != nu)
|
||||
{
|
||||
unsigned k = I2U(--i);
|
||||
InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
|
||||
Ppmd7_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
|
||||
}
|
||||
InsertNode(p, ptr, i);
|
||||
Ppmd7_InsertNode(p, ptr, i);
|
||||
}
|
||||
|
||||
|
||||
/* we use CPpmd7_Node_Union union to solve XLC -O2 strict pointer aliasing problem */
|
||||
|
||||
typedef union _CPpmd7_Node_Union
|
||||
typedef union
|
||||
{
|
||||
CPpmd7_Node Node;
|
||||
CPpmd7_Node_Ref NextRef;
|
||||
} CPpmd7_Node_Union;
|
||||
|
||||
/* Original PPmdH (Ppmd7) code uses doubly linked list in GlueFreeBlocks()
|
||||
/* Original PPmdH (Ppmd7) code uses doubly linked list in Ppmd7_GlueFreeBlocks()
|
||||
we use single linked list similar to Ppmd8 code */
|
||||
|
||||
|
||||
static void GlueFreeBlocks(CPpmd7 *p)
|
||||
static void Ppmd7_GlueFreeBlocks(CPpmd7 *p)
|
||||
{
|
||||
/*
|
||||
we use first UInt16 field of 12-bytes UNITs as record type stamp
|
||||
|
|
@ -239,27 +239,27 @@ static void GlueFreeBlocks(CPpmd7 *p)
|
|||
if (nu == 0)
|
||||
continue;
|
||||
for (; nu > 128; nu -= 128, node += 128)
|
||||
InsertNode(p, node, PPMD_NUM_INDEXES - 1);
|
||||
Ppmd7_InsertNode(p, node, PPMD_NUM_INDEXES - 1);
|
||||
if (I2U(i = U2I(nu)) != nu)
|
||||
{
|
||||
unsigned k = I2U(--i);
|
||||
InsertNode(p, node + k, (unsigned)nu - k - 1);
|
||||
Ppmd7_InsertNode(p, node + k, (unsigned)nu - k - 1);
|
||||
}
|
||||
InsertNode(p, node, i);
|
||||
Ppmd7_InsertNode(p, node, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
|
||||
Z7_NO_INLINE
|
||||
static void *Ppmd7_AllocUnitsRare(CPpmd7 *p, unsigned indx)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (p->GlueCount == 0)
|
||||
{
|
||||
GlueFreeBlocks(p);
|
||||
Ppmd7_GlueFreeBlocks(p);
|
||||
if (p->FreeList[indx] != 0)
|
||||
return RemoveNode(p, indx);
|
||||
return Ppmd7_RemoveNode(p, indx);
|
||||
}
|
||||
|
||||
i = indx;
|
||||
|
|
@ -277,17 +277,17 @@ static void *AllocUnitsRare(CPpmd7 *p, unsigned indx)
|
|||
while (p->FreeList[i] == 0);
|
||||
|
||||
{
|
||||
void *block = RemoveNode(p, i);
|
||||
SplitBlock(p, block, i, indx);
|
||||
void *block = Ppmd7_RemoveNode(p, i);
|
||||
Ppmd7_SplitBlock(p, block, i, indx);
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void *AllocUnits(CPpmd7 *p, unsigned indx)
|
||||
static void *Ppmd7_AllocUnits(CPpmd7 *p, unsigned indx)
|
||||
{
|
||||
if (p->FreeList[indx] != 0)
|
||||
return RemoveNode(p, indx);
|
||||
return Ppmd7_RemoveNode(p, indx);
|
||||
{
|
||||
UInt32 numBytes = U2B(I2U(indx));
|
||||
Byte *lo = p->LoUnit;
|
||||
|
|
@ -297,13 +297,22 @@ static void *AllocUnits(CPpmd7 *p, unsigned indx)
|
|||
return lo;
|
||||
}
|
||||
}
|
||||
return AllocUnitsRare(p, indx);
|
||||
return Ppmd7_AllocUnitsRare(p, indx);
|
||||
}
|
||||
|
||||
|
||||
#define MyMem12Cpy(dest, src, num) \
|
||||
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
|
||||
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
|
||||
#define MEM_12_CPY(dest, src, num) \
|
||||
{ UInt32 *d = (UInt32 *)(dest); \
|
||||
const UInt32 *z = (const UInt32 *)(src); \
|
||||
unsigned n = (num); \
|
||||
do { \
|
||||
d[0] = z[0]; \
|
||||
d[1] = z[1]; \
|
||||
d[2] = z[2]; \
|
||||
z += 3; \
|
||||
d += 3; \
|
||||
} while (--n); \
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
|
|
@ -315,12 +324,12 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
|
|||
return oldPtr;
|
||||
if (p->FreeList[i1] != 0)
|
||||
{
|
||||
void *ptr = RemoveNode(p, i1);
|
||||
MyMem12Cpy(ptr, oldPtr, newNU);
|
||||
InsertNode(p, oldPtr, i0);
|
||||
void *ptr = Ppmd7_RemoveNode(p, i1);
|
||||
MEM_12_CPY(ptr, oldPtr, newNU)
|
||||
Ppmd7_InsertNode(p, oldPtr, i0);
|
||||
return ptr;
|
||||
}
|
||||
SplitBlock(p, oldPtr, i0, i1);
|
||||
Ppmd7_SplitBlock(p, oldPtr, i0, i1);
|
||||
return oldPtr;
|
||||
}
|
||||
*/
|
||||
|
|
@ -329,14 +338,14 @@ static void *ShrinkUnits(CPpmd7 *p, void *oldPtr, unsigned oldNU, unsigned newNU
|
|||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
|
||||
{
|
||||
Ppmd_SET_SUCCESSOR(p, v);
|
||||
Ppmd_SET_SUCCESSOR(p, v)
|
||||
}
|
||||
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
void RestartModel(CPpmd7 *p)
|
||||
void Ppmd7_RestartModel(CPpmd7 *p)
|
||||
{
|
||||
unsigned i, k;
|
||||
|
||||
|
|
@ -352,8 +361,8 @@ void RestartModel(CPpmd7 *p)
|
|||
p->PrevSuccess = 0;
|
||||
|
||||
{
|
||||
CPpmd7_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
|
||||
CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
|
||||
CPpmd7_Context *mc = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
|
||||
CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd7_AllocUnits(p, PPMD_NUM_INDEXES - 1); */
|
||||
|
||||
p->LoUnit += U2B(256 / 2);
|
||||
p->MaxContext = p->MinContext = mc;
|
||||
|
|
@ -391,7 +400,7 @@ void RestartModel(CPpmd7 *p)
|
|||
{
|
||||
unsigned m;
|
||||
UInt16 *dest = p->BinSumm[i] + k;
|
||||
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 2));
|
||||
const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD7_kInitBinEsc[k] / (i + 2));
|
||||
for (m = 0; m < 64; m += 8)
|
||||
dest[m] = val;
|
||||
}
|
||||
|
|
@ -423,13 +432,13 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
|
|||
{
|
||||
p->MaxOrder = maxOrder;
|
||||
|
||||
RestartModel(p);
|
||||
Ppmd7_RestartModel(p);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
CreateSuccessors()
|
||||
Ppmd7_CreateSuccessors()
|
||||
It's called when (FoundState->Successor) is RAW-Successor,
|
||||
that is the link to position in Raw text.
|
||||
So we create Context records and write the links to
|
||||
|
|
@ -445,10 +454,10 @@ void Ppmd7_Init(CPpmd7 *p, unsigned maxOrder)
|
|||
also it can return pointer to real context of same order,
|
||||
*/
|
||||
|
||||
MY_NO_INLINE
|
||||
static CTX_PTR CreateSuccessors(CPpmd7 *p)
|
||||
Z7_NO_INLINE
|
||||
static PPMD7_CTX_PTR Ppmd7_CreateSuccessors(CPpmd7 *p)
|
||||
{
|
||||
CTX_PTR c = p->MinContext;
|
||||
PPMD7_CTX_PTR c = p->MinContext;
|
||||
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
|
||||
Byte newSym, newFreq;
|
||||
unsigned numPs = 0;
|
||||
|
|
@ -522,15 +531,15 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p)
|
|||
|
||||
do
|
||||
{
|
||||
CTX_PTR c1;
|
||||
PPMD7_CTX_PTR c1;
|
||||
/* = AllocContext(p); */
|
||||
if (p->HiUnit != p->LoUnit)
|
||||
c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
|
||||
c1 = (PPMD7_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
|
||||
else if (p->FreeList[0] != 0)
|
||||
c1 = (CTX_PTR)RemoveNode(p, 0);
|
||||
c1 = (PPMD7_CTX_PTR)Ppmd7_RemoveNode(p, 0);
|
||||
else
|
||||
{
|
||||
c1 = (CTX_PTR)AllocUnitsRare(p, 0);
|
||||
c1 = (PPMD7_CTX_PTR)Ppmd7_AllocUnitsRare(p, 0);
|
||||
if (!c1)
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -550,16 +559,16 @@ static CTX_PTR CreateSuccessors(CPpmd7 *p)
|
|||
|
||||
|
||||
|
||||
#define SwapStates(s) \
|
||||
#define SWAP_STATES(s) \
|
||||
{ CPpmd_State tmp = s[0]; s[0] = s[-1]; s[-1] = tmp; }
|
||||
|
||||
|
||||
void Ppmd7_UpdateModel(CPpmd7 *p);
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
void Ppmd7_UpdateModel(CPpmd7 *p)
|
||||
{
|
||||
CPpmd_Void_Ref maxSuccessor, minSuccessor;
|
||||
CTX_PTR c, mc;
|
||||
PPMD7_CTX_PTR c, mc;
|
||||
unsigned s0, ns;
|
||||
|
||||
|
||||
|
|
@ -592,7 +601,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
|
||||
if (s[0].Freq >= s[-1].Freq)
|
||||
{
|
||||
SwapStates(s);
|
||||
SWAP_STATES(s)
|
||||
s--;
|
||||
}
|
||||
}
|
||||
|
|
@ -610,10 +619,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
{
|
||||
/* MAX ORDER context */
|
||||
/* (FoundState->Successor) is RAW-Successor. */
|
||||
p->MaxContext = p->MinContext = CreateSuccessors(p);
|
||||
p->MaxContext = p->MinContext = Ppmd7_CreateSuccessors(p);
|
||||
if (!p->MinContext)
|
||||
{
|
||||
RestartModel(p);
|
||||
Ppmd7_RestartModel(p);
|
||||
return;
|
||||
}
|
||||
SetSuccessor(p->FoundState, REF(p->MinContext));
|
||||
|
|
@ -629,7 +638,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
p->Text = text;
|
||||
if (text >= p->UnitsStart)
|
||||
{
|
||||
RestartModel(p);
|
||||
Ppmd7_RestartModel(p);
|
||||
return;
|
||||
}
|
||||
maxSuccessor = REF(text);
|
||||
|
|
@ -645,10 +654,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
if (minSuccessor <= maxSuccessor)
|
||||
{
|
||||
// minSuccessor is RAW-Successor. So we will create real contexts records:
|
||||
CTX_PTR cs = CreateSuccessors(p);
|
||||
PPMD7_CTX_PTR cs = Ppmd7_CreateSuccessors(p);
|
||||
if (!cs)
|
||||
{
|
||||
RestartModel(p);
|
||||
Ppmd7_RestartModel(p);
|
||||
return;
|
||||
}
|
||||
minSuccessor = REF(cs);
|
||||
|
|
@ -711,27 +720,27 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
if ((ns1 & 1) == 0)
|
||||
{
|
||||
/* Expand for one UNIT */
|
||||
unsigned oldNU = ns1 >> 1;
|
||||
unsigned i = U2I(oldNU);
|
||||
const unsigned oldNU = ns1 >> 1;
|
||||
const unsigned i = U2I(oldNU);
|
||||
if (i != U2I((size_t)oldNU + 1))
|
||||
{
|
||||
void *ptr = AllocUnits(p, i + 1);
|
||||
void *ptr = Ppmd7_AllocUnits(p, i + 1);
|
||||
void *oldPtr;
|
||||
if (!ptr)
|
||||
{
|
||||
RestartModel(p);
|
||||
Ppmd7_RestartModel(p);
|
||||
return;
|
||||
}
|
||||
oldPtr = STATS(c);
|
||||
MyMem12Cpy(ptr, oldPtr, oldNU);
|
||||
InsertNode(p, oldPtr, i);
|
||||
MEM_12_CPY(ptr, oldPtr, oldNU)
|
||||
Ppmd7_InsertNode(p, oldPtr, i);
|
||||
c->Union4.Stats = STATS_REF(ptr);
|
||||
}
|
||||
}
|
||||
sum = c->Union2.SummFreq;
|
||||
/* max increase of Escape_Freq is 3 here.
|
||||
total increase of Union2.SummFreq for all symbols is less than 256 here */
|
||||
sum += (UInt32)(2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1));
|
||||
sum += (UInt32)(unsigned)((2 * ns1 < ns) + 2 * ((unsigned)(4 * ns1 <= ns) & (sum <= 8 * ns1)));
|
||||
/* original PPMdH uses 16-bit variable for (sum) here.
|
||||
But (sum < 0x9000). So we don't truncate (sum) to 16-bit */
|
||||
// sum = (UInt16)sum;
|
||||
|
|
@ -739,10 +748,10 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
else
|
||||
{
|
||||
// instead of One-symbol context we create 2-symbol context
|
||||
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
|
||||
CPpmd_State *s = (CPpmd_State*)Ppmd7_AllocUnits(p, 0);
|
||||
if (!s)
|
||||
{
|
||||
RestartModel(p);
|
||||
Ppmd7_RestartModel(p);
|
||||
return;
|
||||
}
|
||||
{
|
||||
|
|
@ -761,7 +770,7 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
// (max(s->freq) == 120), when we convert from 1-symbol into 2-symbol context
|
||||
s->Freq = (Byte)freq;
|
||||
// max(InitEsc = PPMD7_kExpEscape[*]) is 25. So the max(escapeFreq) is 26 here
|
||||
sum = freq + p->InitEsc + (ns > 3);
|
||||
sum = (UInt32)(freq + p->InitEsc + (ns > 3));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -795,8 +804,8 @@ void Ppmd7_UpdateModel(CPpmd7 *p)
|
|||
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
static void Rescale(CPpmd7 *p)
|
||||
Z7_NO_INLINE
|
||||
static void Ppmd7_Rescale(CPpmd7 *p)
|
||||
{
|
||||
unsigned i, adder, sumFreq, escFreq;
|
||||
CPpmd_State *stats = STATS(p->MinContext);
|
||||
|
|
@ -885,7 +894,7 @@ static void Rescale(CPpmd7 *p)
|
|||
*s = *stats;
|
||||
s->Freq = (Byte)freq; // (freq <= 260 / 4)
|
||||
p->FoundState = s;
|
||||
InsertNode(p, stats, U2I(n0));
|
||||
Ppmd7_InsertNode(p, stats, U2I(n0));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -899,13 +908,13 @@ static void Rescale(CPpmd7 *p)
|
|||
{
|
||||
if (p->FreeList[i1] != 0)
|
||||
{
|
||||
void *ptr = RemoveNode(p, i1);
|
||||
void *ptr = Ppmd7_RemoveNode(p, i1);
|
||||
p->MinContext->Union4.Stats = STATS_REF(ptr);
|
||||
MyMem12Cpy(ptr, (const void *)stats, n1);
|
||||
InsertNode(p, stats, i0);
|
||||
MEM_12_CPY(ptr, (const void *)stats, n1)
|
||||
Ppmd7_InsertNode(p, stats, i0);
|
||||
}
|
||||
else
|
||||
SplitBlock(p, stats, i0, i1);
|
||||
Ppmd7_SplitBlock(p, stats, i0, i1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -933,10 +942,10 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
|
|||
p->HiBitsFlag;
|
||||
{
|
||||
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
|
||||
unsigned summ = (UInt16)see->Summ; // & 0xFFFF
|
||||
unsigned r = (summ >> see->Shift);
|
||||
const unsigned summ = (UInt16)see->Summ; // & 0xFFFF
|
||||
const unsigned r = (summ >> see->Shift);
|
||||
see->Summ = (UInt16)(summ - r);
|
||||
*escFreq = r + (r == 0);
|
||||
*escFreq = (UInt32)(r + (r == 0));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -948,9 +957,9 @@ CPpmd_See *Ppmd7_MakeEscFreq(CPpmd7 *p, unsigned numMasked, UInt32 *escFreq)
|
|||
}
|
||||
|
||||
|
||||
static void NextContext(CPpmd7 *p)
|
||||
static void Ppmd7_NextContext(CPpmd7 *p)
|
||||
{
|
||||
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
|
||||
PPMD7_CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
|
||||
if (p->OrderFall == 0 && (const Byte *)c > p->Text)
|
||||
p->MaxContext = p->MinContext = c;
|
||||
else
|
||||
|
|
@ -967,12 +976,12 @@ void Ppmd7_Update1(CPpmd7 *p)
|
|||
s->Freq = (Byte)freq;
|
||||
if (freq > s[-1].Freq)
|
||||
{
|
||||
SwapStates(s);
|
||||
SWAP_STATES(s)
|
||||
p->FoundState = --s;
|
||||
if (freq > MAX_FREQ)
|
||||
Rescale(p);
|
||||
Ppmd7_Rescale(p);
|
||||
}
|
||||
NextContext(p);
|
||||
Ppmd7_NextContext(p);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -981,15 +990,15 @@ void Ppmd7_Update1_0(CPpmd7 *p)
|
|||
CPpmd_State *s = p->FoundState;
|
||||
CPpmd7_Context *mc = p->MinContext;
|
||||
unsigned freq = s->Freq;
|
||||
unsigned summFreq = mc->Union2.SummFreq;
|
||||
const unsigned summFreq = mc->Union2.SummFreq;
|
||||
p->PrevSuccess = (2 * freq > summFreq);
|
||||
p->RunLength += (int)p->PrevSuccess;
|
||||
p->RunLength += (Int32)p->PrevSuccess;
|
||||
mc->Union2.SummFreq = (UInt16)(summFreq + 4);
|
||||
freq += 4;
|
||||
s->Freq = (Byte)freq;
|
||||
if (freq > MAX_FREQ)
|
||||
Rescale(p);
|
||||
NextContext(p);
|
||||
Ppmd7_Rescale(p);
|
||||
Ppmd7_NextContext(p);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1000,7 +1009,7 @@ void Ppmd7_UpdateBin(CPpmd7 *p)
|
|||
p->FoundState->Freq = (Byte)(freq + (freq < 128));
|
||||
p->PrevSuccess = 1;
|
||||
p->RunLength++;
|
||||
NextContext(p);
|
||||
Ppmd7_NextContext(p);
|
||||
}
|
||||
*/
|
||||
|
||||
|
|
@ -1013,7 +1022,7 @@ void Ppmd7_Update2(CPpmd7 *p)
|
|||
p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
|
||||
s->Freq = (Byte)freq;
|
||||
if (freq > MAX_FREQ)
|
||||
Rescale(p);
|
||||
Ppmd7_Rescale(p);
|
||||
Ppmd7_UpdateModel(p);
|
||||
}
|
||||
|
||||
|
|
@ -1042,8 +1051,8 @@ Last UNIT of array at offset (Size - 12) is root order-0 CPpmd7_Context record.
|
|||
The code can free UNITs memory blocks that were allocated to store CPpmd_State vectors.
|
||||
The code doesn't free UNITs allocated for CPpmd7_Context records.
|
||||
|
||||
The code calls RestartModel(), when there is no free memory for allocation.
|
||||
And RestartModel() changes the state to orignal start state, with full free block.
|
||||
The code calls Ppmd7_RestartModel(), when there is no free memory for allocation.
|
||||
And Ppmd7_RestartModel() changes the state to orignal start state, with full free block.
|
||||
|
||||
|
||||
The code allocates UNITs with the following order:
|
||||
|
|
@ -1051,14 +1060,14 @@ The code allocates UNITs with the following order:
|
|||
Allocation of 1 UNIT for Context record
|
||||
- from free space (HiUnit) down to (LoUnit)
|
||||
- from FreeList[0]
|
||||
- AllocUnitsRare()
|
||||
- Ppmd7_AllocUnitsRare()
|
||||
|
||||
AllocUnits() for CPpmd_State vectors:
|
||||
Ppmd7_AllocUnits() for CPpmd_State vectors:
|
||||
- from FreeList[i]
|
||||
- from free space (LoUnit) up to (HiUnit)
|
||||
- AllocUnitsRare()
|
||||
- Ppmd7_AllocUnitsRare()
|
||||
|
||||
AllocUnitsRare()
|
||||
Ppmd7_AllocUnitsRare()
|
||||
- if (GlueCount == 0)
|
||||
{ Glue lists, GlueCount = 255, allocate from FreeList[i]] }
|
||||
- loop for all higher sized FreeList[...] lists
|
||||
|
|
@ -1093,8 +1102,8 @@ The PPMd code tries to fulfill the condition:
|
|||
We have (Sum(Stats[].Freq) <= 256 * 124), because of (MAX_FREQ = 124)
|
||||
So (4 = 128 - 124) is average reserve for Escape_Freq for each symbol.
|
||||
If (CPpmd_State::Freq) is not aligned for 4, the reserve can be 5, 6 or 7.
|
||||
SummFreq and Escape_Freq can be changed in Rescale() and *Update*() functions.
|
||||
Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Rescale() for
|
||||
SummFreq and Escape_Freq can be changed in Ppmd7_Rescale() and *Update*() functions.
|
||||
Ppmd7_Rescale() can remove symbols only from max-order contexts. So Escape_Freq can increase after multiple calls of Ppmd7_Rescale() for
|
||||
max-order context.
|
||||
|
||||
When the PPMd code still break (Total <= RC::Range) condition in range coder,
|
||||
|
|
@ -1102,3 +1111,21 @@ we have two ways to resolve that problem:
|
|||
1) we can report error, if we want to keep compatibility with original PPMd code that has no fix for such cases.
|
||||
2) we can reduce (Total) value to (RC::Range) by reducing (Escape_Freq) part of (Total) value.
|
||||
*/
|
||||
|
||||
#undef MAX_FREQ
|
||||
#undef UNIT_SIZE
|
||||
#undef U2B
|
||||
#undef U2I
|
||||
#undef I2U
|
||||
#undef I2U_UInt16
|
||||
#undef REF
|
||||
#undef STATS_REF
|
||||
#undef CTX
|
||||
#undef STATS
|
||||
#undef ONE_STATE
|
||||
#undef SUFFIX
|
||||
#undef NODE
|
||||
#undef EMPTY_NODE
|
||||
#undef MEM_12_CPY
|
||||
#undef SUCCESSOR
|
||||
#undef SWAP_STATES
|
||||
|
|
|
|||
10
C/Ppmd7.h
10
C/Ppmd7.h
|
|
@ -1,11 +1,11 @@
|
|||
/* Ppmd7.h -- Ppmd7 (PPMdH) compression codec
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-04-02 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
||||
|
||||
|
||||
#ifndef __PPMD7_H
|
||||
#define __PPMD7_H
|
||||
#ifndef ZIP7_INC_PPMD7_H
|
||||
#define ZIP7_INC_PPMD7_H
|
||||
|
||||
#include "Ppmd.h"
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ typedef struct
|
|||
UInt32 Range;
|
||||
UInt32 Code;
|
||||
UInt32 Low;
|
||||
IByteIn *Stream;
|
||||
IByteInPtr Stream;
|
||||
} CPpmd7_RangeDec;
|
||||
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ typedef struct
|
|||
// Byte _dummy_[3];
|
||||
UInt64 Low;
|
||||
UInt64 CacheSize;
|
||||
IByteOut *Stream;
|
||||
IByteOutPtr Stream;
|
||||
} CPpmd7z_RangeEnc;
|
||||
|
||||
|
||||
|
|
|
|||
79
C/Ppmd7Dec.c
79
C/Ppmd7Dec.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd7Dec.c -- Ppmd7z (PPMdH with 7z Range Coder) Decoder
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
||||
|
||||
|
|
@ -8,7 +8,7 @@ This code is based on:
|
|||
|
||||
#include "Ppmd7.h"
|
||||
|
||||
#define kTopValue (1 << 24)
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
|
||||
|
||||
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
|
||||
|
|
@ -37,9 +37,9 @@ BoolInt Ppmd7z_RangeDec_Init(CPpmd7_RangeDec *p)
|
|||
|
||||
#define R (&p->rc.dec)
|
||||
|
||||
MY_FORCE_INLINE
|
||||
// MY_NO_INLINE
|
||||
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
|
||||
Z7_FORCE_INLINE
|
||||
// Z7_NO_INLINE
|
||||
static void Ppmd7z_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
|
||||
{
|
||||
|
||||
|
||||
|
|
@ -48,18 +48,18 @@ static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
|
|||
RC_NORM_LOCAL(R)
|
||||
}
|
||||
|
||||
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
|
||||
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
|
||||
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
|
||||
#define RC_Decode(start, size) Ppmd7z_RD_Decode(p, start, size);
|
||||
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
|
||||
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
|
||||
|
||||
|
||||
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
|
||||
typedef CPpmd7_Context * CTX_PTR;
|
||||
// typedef CPpmd7_Context * CTX_PTR;
|
||||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
void Ppmd7_UpdateModel(CPpmd7 *p);
|
||||
|
||||
#define MASK(sym) ((unsigned char *)charMask)[sym]
|
||||
// MY_FORCE_INLINE
|
||||
#define MASK(sym) ((Byte *)charMask)[sym]
|
||||
// Z7_FORCE_INLINE
|
||||
// static
|
||||
int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
||||
{
|
||||
|
|
@ -70,7 +70,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
|
||||
unsigned i;
|
||||
UInt32 count, hiCnt;
|
||||
UInt32 summFreq = p->MinContext->Union2.SummFreq;
|
||||
const UInt32 summFreq = p->MinContext->Union2.SummFreq;
|
||||
|
||||
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
if ((Int32)(count -= s->Freq) < 0)
|
||||
{
|
||||
Byte sym;
|
||||
RC_DecodeFinal(0, s->Freq);
|
||||
RC_DecodeFinal(0, s->Freq)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd7_Update1_0(p);
|
||||
|
|
@ -96,7 +96,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
if ((Int32)(count -= (++s)->Freq) < 0)
|
||||
{
|
||||
Byte sym;
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd7_Update1(p);
|
||||
|
|
@ -109,10 +109,10 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
return PPMD7_SYM_ERROR;
|
||||
|
||||
hiCnt -= count;
|
||||
RC_Decode(hiCnt, summFreq - hiCnt);
|
||||
RC_Decode(hiCnt, summFreq - hiCnt)
|
||||
|
||||
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
// i = p->MinContext->NumStats - 1;
|
||||
// do { MASK((--s)->Symbol) = 0; } while (--i);
|
||||
{
|
||||
|
|
@ -120,8 +120,8 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -152,7 +152,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
// Ppmd7_UpdateBin(p);
|
||||
{
|
||||
unsigned freq = s->Freq;
|
||||
CTX_PTR c = CTX(SUCCESSOR(s));
|
||||
CPpmd7_Context *c = CTX(SUCCESSOR(s));
|
||||
sym = s->Symbol;
|
||||
p->FoundState = s;
|
||||
p->PrevSuccess = 1;
|
||||
|
|
@ -176,7 +176,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
R->Range -= size0;
|
||||
RC_NORM_LOCAL(R)
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
|
||||
p->PrevSuccess = 0;
|
||||
}
|
||||
|
|
@ -209,17 +209,17 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
unsigned num2 = num / 2;
|
||||
|
||||
num &= 1;
|
||||
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
|
||||
hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num);
|
||||
s += num;
|
||||
p->MinContext = mc;
|
||||
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s[0].Symbol;
|
||||
unsigned sym1 = s[1].Symbol;
|
||||
const unsigned sym0 = s[0].Symbol;
|
||||
const unsigned sym1 = s[1].Symbol;
|
||||
s += 2;
|
||||
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
|
||||
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
|
||||
hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0)));
|
||||
hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1)));
|
||||
}
|
||||
while (--num2);
|
||||
}
|
||||
|
|
@ -238,20 +238,20 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
|
||||
s = Ppmd7_GetStats(p, p->MinContext);
|
||||
hiCnt = count;
|
||||
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
|
||||
// count -= s->Freq & (UInt32)(MASK(s->Symbol));
|
||||
// if ((Int32)count >= 0)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
};
|
||||
count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
// count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
}
|
||||
}
|
||||
s--;
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
|
||||
|
||||
// new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
Ppmd_See_Update(see);
|
||||
Ppmd_See_UPDATE(see)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd7_Update2(p);
|
||||
|
|
@ -261,7 +261,7 @@ int Ppmd7z_DecodeSymbol(CPpmd7 *p)
|
|||
if (count >= freqSum)
|
||||
return PPMD7_SYM_ERROR;
|
||||
|
||||
RC_Decode(hiCnt, freqSum - hiCnt);
|
||||
RC_Decode(hiCnt, freqSum - hiCnt)
|
||||
|
||||
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
|
||||
// new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
|
|
@ -295,3 +295,18 @@ Byte *Ppmd7z_DecodeSymbols(CPpmd7 *p, Byte *buf, const Byte *lim)
|
|||
return buf;
|
||||
}
|
||||
*/
|
||||
|
||||
#undef kTopValue
|
||||
#undef READ_BYTE
|
||||
#undef RC_NORM_BASE
|
||||
#undef RC_NORM_1
|
||||
#undef RC_NORM
|
||||
#undef RC_NORM_LOCAL
|
||||
#undef RC_NORM_REMOTE
|
||||
#undef R
|
||||
#undef RC_Decode
|
||||
#undef RC_DecodeFinal
|
||||
#undef RC_GetThreshold
|
||||
#undef CTX
|
||||
#undef SUCCESSOR
|
||||
#undef MASK
|
||||
|
|
|
|||
102
C/Ppmd7Enc.c
102
C/Ppmd7Enc.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd7Enc.c -- Ppmd7z (PPMdH with 7z Range Coder) Encoder
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.H (2001): Dmitry Shkarin : Public domain */
|
||||
|
||||
|
|
@ -8,7 +8,7 @@ This code is based on:
|
|||
|
||||
#include "Ppmd7.h"
|
||||
|
||||
#define kTopValue (1 << 24)
|
||||
#define kTopValue ((UInt32)1 << 24)
|
||||
|
||||
#define R (&p->rc.enc)
|
||||
|
||||
|
|
@ -20,8 +20,8 @@ void Ppmd7z_Init_RangeEnc(CPpmd7 *p)
|
|||
R->CacheSize = 1;
|
||||
}
|
||||
|
||||
MY_NO_INLINE
|
||||
static void RangeEnc_ShiftLow(CPpmd7 *p)
|
||||
Z7_NO_INLINE
|
||||
static void Ppmd7z_RangeEnc_ShiftLow(CPpmd7 *p)
|
||||
{
|
||||
if ((UInt32)R->Low < (UInt32)0xFF000000 || (unsigned)(R->Low >> 32) != 0)
|
||||
{
|
||||
|
|
@ -38,53 +38,53 @@ static void RangeEnc_ShiftLow(CPpmd7 *p)
|
|||
R->Low = (UInt32)((UInt32)R->Low << 8);
|
||||
}
|
||||
|
||||
#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; RangeEnc_ShiftLow(p);
|
||||
#define RC_NORM_1(p) RC_NORM_BASE(p) }
|
||||
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
|
||||
#define RC_NORM_BASE(p) if (R->Range < kTopValue) { R->Range <<= 8; Ppmd7z_RangeEnc_ShiftLow(p);
|
||||
#define RC_NORM_1(p) RC_NORM_BASE(p) }
|
||||
#define RC_NORM(p) RC_NORM_BASE(p) RC_NORM_BASE(p) }}
|
||||
|
||||
// we must use only one type of Normalization from two: LOCAL or REMOTE
|
||||
#define RC_NORM_LOCAL(p) // RC_NORM(p)
|
||||
#define RC_NORM_REMOTE(p) RC_NORM(p)
|
||||
|
||||
/*
|
||||
#define RangeEnc_Encode(p, start, _size_) \
|
||||
#define Ppmd7z_RangeEnc_Encode(p, start, _size_) \
|
||||
{ UInt32 size = _size_; \
|
||||
R->Low += start * R->Range; \
|
||||
R->Range *= size; \
|
||||
RC_NORM_LOCAL(p); }
|
||||
*/
|
||||
|
||||
MY_FORCE_INLINE
|
||||
// MY_NO_INLINE
|
||||
static void RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
|
||||
Z7_FORCE_INLINE
|
||||
// Z7_NO_INLINE
|
||||
static void Ppmd7z_RangeEnc_Encode(CPpmd7 *p, UInt32 start, UInt32 size)
|
||||
{
|
||||
R->Low += start * R->Range;
|
||||
R->Range *= size;
|
||||
RC_NORM_LOCAL(p);
|
||||
RC_NORM_LOCAL(p)
|
||||
}
|
||||
|
||||
void Ppmd7z_Flush_RangeEnc(CPpmd7 *p)
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < 5; i++)
|
||||
RangeEnc_ShiftLow(p);
|
||||
Ppmd7z_RangeEnc_ShiftLow(p);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define RC_Encode(start, size) RangeEnc_Encode(p, start, size);
|
||||
#define RC_EncodeFinal(start, size) RC_Encode(start, size); RC_NORM_REMOTE(p);
|
||||
#define RC_Encode(start, size) Ppmd7z_RangeEnc_Encode(p, start, size);
|
||||
#define RC_EncodeFinal(start, size) RC_Encode(start, size) RC_NORM_REMOTE(p)
|
||||
|
||||
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
|
||||
#define SUFFIX(ctx) CTX((ctx)->Suffix)
|
||||
typedef CPpmd7_Context * CTX_PTR;
|
||||
// typedef CPpmd7_Context * CTX_PTR;
|
||||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
|
||||
void Ppmd7_UpdateModel(CPpmd7 *p);
|
||||
|
||||
#define MASK(sym) ((unsigned char *)charMask)[sym]
|
||||
#define MASK(sym) ((Byte *)charMask)[sym]
|
||||
|
||||
MY_FORCE_INLINE
|
||||
Z7_FORCE_INLINE
|
||||
static
|
||||
void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
||||
{
|
||||
|
|
@ -104,7 +104,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
if (s->Symbol == symbol)
|
||||
{
|
||||
// R->Range /= p->MinContext->Union2.SummFreq;
|
||||
RC_EncodeFinal(0, s->Freq);
|
||||
RC_EncodeFinal(0, s->Freq)
|
||||
p->FoundState = s;
|
||||
Ppmd7_Update1_0(p);
|
||||
return;
|
||||
|
|
@ -117,7 +117,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
if ((++s)->Symbol == symbol)
|
||||
{
|
||||
// R->Range /= p->MinContext->Union2.SummFreq;
|
||||
RC_EncodeFinal(sum, s->Freq);
|
||||
RC_EncodeFinal(sum, s->Freq)
|
||||
p->FoundState = s;
|
||||
Ppmd7_Update1(p);
|
||||
return;
|
||||
|
|
@ -127,10 +127,10 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
while (--i);
|
||||
|
||||
// R->Range /= p->MinContext->Union2.SummFreq;
|
||||
RC_Encode(sum, p->MinContext->Union2.SummFreq - sum);
|
||||
RC_Encode(sum, p->MinContext->Union2.SummFreq - sum)
|
||||
|
||||
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
// MASK(s->Symbol) = 0;
|
||||
// i = p->MinContext->NumStats - 1;
|
||||
// do { MASK((--s)->Symbol) = 0; } while (--i);
|
||||
|
|
@ -139,8 +139,8 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -153,20 +153,20 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
UInt16 *prob = Ppmd7_GetBinSumm(p);
|
||||
CPpmd_State *s = Ppmd7Context_OneState(p->MinContext);
|
||||
UInt32 pr = *prob;
|
||||
UInt32 bound = (R->Range >> 14) * pr;
|
||||
const UInt32 bound = (R->Range >> 14) * pr;
|
||||
pr = PPMD_UPDATE_PROB_1(pr);
|
||||
if (s->Symbol == symbol)
|
||||
{
|
||||
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
|
||||
// RangeEnc_EncodeBit_0(p, bound);
|
||||
R->Range = bound;
|
||||
RC_NORM_1(p);
|
||||
RC_NORM_1(p)
|
||||
|
||||
// p->FoundState = s;
|
||||
// Ppmd7_UpdateBin(p);
|
||||
{
|
||||
unsigned freq = s->Freq;
|
||||
CTX_PTR c = CTX(SUCCESSOR(s));
|
||||
const unsigned freq = s->Freq;
|
||||
CPpmd7_Context *c = CTX(SUCCESSOR(s));
|
||||
p->FoundState = s;
|
||||
p->PrevSuccess = 1;
|
||||
p->RunLength++;
|
||||
|
|
@ -187,7 +187,7 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
R->Range -= bound;
|
||||
RC_NORM_LOCAL(p)
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
MASK(s->Symbol) = 0;
|
||||
p->PrevSuccess = 0;
|
||||
}
|
||||
|
|
@ -248,14 +248,14 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
|
||||
do
|
||||
{
|
||||
unsigned cur = s->Symbol;
|
||||
const unsigned cur = s->Symbol;
|
||||
if ((int)cur == symbol)
|
||||
{
|
||||
UInt32 low = sum;
|
||||
UInt32 freq = s->Freq;
|
||||
const UInt32 low = sum;
|
||||
const UInt32 freq = s->Freq;
|
||||
unsigned num2;
|
||||
|
||||
Ppmd_See_Update(see);
|
||||
Ppmd_See_UPDATE(see)
|
||||
p->FoundState = s;
|
||||
sum += escFreq;
|
||||
|
||||
|
|
@ -265,21 +265,20 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
if (num2 != 0)
|
||||
{
|
||||
s += i;
|
||||
for (;;)
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s[0].Symbol;
|
||||
unsigned sym1 = s[1].Symbol;
|
||||
const unsigned sym0 = s[0].Symbol;
|
||||
const unsigned sym1 = s[1].Symbol;
|
||||
s += 2;
|
||||
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
|
||||
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
|
||||
if (--num2 == 0)
|
||||
break;
|
||||
}
|
||||
while (--num2);
|
||||
}
|
||||
|
||||
|
||||
R->Range /= sum;
|
||||
RC_EncodeFinal(low, freq);
|
||||
RC_EncodeFinal(low, freq)
|
||||
Ppmd7_Update2(p);
|
||||
return;
|
||||
}
|
||||
|
|
@ -289,21 +288,21 @@ void Ppmd7z_EncodeSymbol(CPpmd7 *p, int symbol)
|
|||
while (--i);
|
||||
|
||||
{
|
||||
UInt32 total = sum + escFreq;
|
||||
const UInt32 total = sum + escFreq;
|
||||
see->Summ = (UInt16)(see->Summ + total);
|
||||
|
||||
R->Range /= total;
|
||||
RC_Encode(sum, escFreq);
|
||||
RC_Encode(sum, escFreq)
|
||||
}
|
||||
|
||||
{
|
||||
CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
|
||||
const CPpmd_State *s2 = Ppmd7_GetStats(p, p->MinContext);
|
||||
s--;
|
||||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -321,3 +320,18 @@ void Ppmd7z_EncodeSymbols(CPpmd7 *p, const Byte *buf, const Byte *lim)
|
|||
Ppmd7z_EncodeSymbol(p, *buf);
|
||||
}
|
||||
}
|
||||
|
||||
#undef kTopValue
|
||||
#undef WRITE_BYTE
|
||||
#undef RC_NORM_BASE
|
||||
#undef RC_NORM_1
|
||||
#undef RC_NORM
|
||||
#undef RC_NORM_LOCAL
|
||||
#undef RC_NORM_REMOTE
|
||||
#undef R
|
||||
#undef RC_Encode
|
||||
#undef RC_EncodeFinal
|
||||
#undef SUFFIX
|
||||
#undef CTX
|
||||
#undef SUCCESSOR
|
||||
#undef MASK
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd7aDec.c -- PPMd7a (PPMdH) Decoder
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.H (2001): Dmitry Shkarin : Public domain
|
||||
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
|
||||
|
|
@ -8,8 +8,8 @@ This code is based on:
|
|||
|
||||
#include "Ppmd7.h"
|
||||
|
||||
#define kTop (1 << 24)
|
||||
#define kBot (1 << 15)
|
||||
#define kTop ((UInt32)1 << 24)
|
||||
#define kBot ((UInt32)1 << 15)
|
||||
|
||||
#define READ_BYTE(p) IByteIn_Read((p)->Stream)
|
||||
|
||||
|
|
@ -37,9 +37,9 @@ BoolInt Ppmd7a_RangeDec_Init(CPpmd7_RangeDec *p)
|
|||
|
||||
#define R (&p->rc.dec)
|
||||
|
||||
MY_FORCE_INLINE
|
||||
// MY_NO_INLINE
|
||||
static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
|
||||
Z7_FORCE_INLINE
|
||||
// Z7_NO_INLINE
|
||||
static void Ppmd7a_RD_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
|
||||
{
|
||||
start *= R->Range;
|
||||
R->Low += start;
|
||||
|
|
@ -48,9 +48,9 @@ static void RangeDec_Decode(CPpmd7 *p, UInt32 start, UInt32 size)
|
|||
RC_NORM_LOCAL(R)
|
||||
}
|
||||
|
||||
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
|
||||
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
|
||||
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
|
||||
#define RC_Decode(start, size) Ppmd7a_RD_Decode(p, start, size);
|
||||
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
|
||||
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
|
||||
|
||||
|
||||
#define CTX(ref) ((CPpmd7_Context *)Ppmd7_GetContext(p, ref))
|
||||
|
|
@ -58,7 +58,7 @@ typedef CPpmd7_Context * CTX_PTR;
|
|||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
void Ppmd7_UpdateModel(CPpmd7 *p);
|
||||
|
||||
#define MASK(sym) ((unsigned char *)charMask)[sym]
|
||||
#define MASK(sym) ((Byte *)charMask)[sym]
|
||||
|
||||
|
||||
int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
||||
|
|
@ -70,7 +70,7 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
CPpmd_State *s = Ppmd7_GetStats(p, p->MinContext);
|
||||
unsigned i;
|
||||
UInt32 count, hiCnt;
|
||||
UInt32 summFreq = p->MinContext->Union2.SummFreq;
|
||||
const UInt32 summFreq = p->MinContext->Union2.SummFreq;
|
||||
|
||||
if (summFreq > R->Range)
|
||||
return PPMD7_SYM_ERROR;
|
||||
|
|
@ -81,7 +81,7 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
if ((Int32)(count -= s->Freq) < 0)
|
||||
{
|
||||
Byte sym;
|
||||
RC_DecodeFinal(0, s->Freq);
|
||||
RC_DecodeFinal(0, s->Freq)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd7_Update1_0(p);
|
||||
|
|
@ -96,7 +96,7 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
if ((Int32)(count -= (++s)->Freq) < 0)
|
||||
{
|
||||
Byte sym;
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd7_Update1(p);
|
||||
|
|
@ -109,10 +109,10 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
return PPMD7_SYM_ERROR;
|
||||
|
||||
hiCnt -= count;
|
||||
RC_Decode(hiCnt, summFreq - hiCnt);
|
||||
RC_Decode(hiCnt, summFreq - hiCnt)
|
||||
|
||||
p->HiBitsFlag = PPMD7_HiBitsFlag_3(p->FoundState->Symbol);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
// i = p->MinContext->NumStats - 1;
|
||||
// do { MASK((--s)->Symbol) = 0; } while (--i);
|
||||
{
|
||||
|
|
@ -120,8 +120,8 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -176,7 +176,7 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
|
||||
RC_NORM_LOCAL(R)
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
MASK(Ppmd7Context_OneState(p->MinContext)->Symbol) = 0;
|
||||
p->PrevSuccess = 0;
|
||||
}
|
||||
|
|
@ -209,17 +209,17 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
unsigned num2 = num / 2;
|
||||
|
||||
num &= 1;
|
||||
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
|
||||
hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num);
|
||||
s += num;
|
||||
p->MinContext = mc;
|
||||
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s[0].Symbol;
|
||||
unsigned sym1 = s[1].Symbol;
|
||||
const unsigned sym0 = s[0].Symbol;
|
||||
const unsigned sym1 = s[1].Symbol;
|
||||
s += 2;
|
||||
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
|
||||
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
|
||||
hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0)));
|
||||
hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1)));
|
||||
}
|
||||
while (--num2);
|
||||
}
|
||||
|
|
@ -238,20 +238,20 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
|
||||
s = Ppmd7_GetStats(p, p->MinContext);
|
||||
hiCnt = count;
|
||||
// count -= s->Freq & (unsigned)(MASK(s->Symbol));
|
||||
// count -= s->Freq & (UInt32)(MASK(s->Symbol));
|
||||
// if ((Int32)count >= 0)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
};
|
||||
count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
// count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
}
|
||||
}
|
||||
s--;
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
|
||||
|
||||
// new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
Ppmd_See_Update(see);
|
||||
Ppmd_See_UPDATE(see)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd7_Update2(p);
|
||||
|
|
@ -261,7 +261,7 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
if (count >= freqSum)
|
||||
return PPMD7_SYM_ERROR;
|
||||
|
||||
RC_Decode(hiCnt, freqSum - hiCnt);
|
||||
RC_Decode(hiCnt, freqSum - hiCnt)
|
||||
|
||||
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
|
||||
// new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
|
|
@ -277,3 +277,19 @@ int Ppmd7a_DecodeSymbol(CPpmd7 *p)
|
|||
while (s != s2);
|
||||
}
|
||||
}
|
||||
|
||||
#undef kTop
|
||||
#undef kBot
|
||||
#undef READ_BYTE
|
||||
#undef RC_NORM_BASE
|
||||
#undef RC_NORM_1
|
||||
#undef RC_NORM
|
||||
#undef RC_NORM_LOCAL
|
||||
#undef RC_NORM_REMOTE
|
||||
#undef R
|
||||
#undef RC_Decode
|
||||
#undef RC_DecodeFinal
|
||||
#undef RC_GetThreshold
|
||||
#undef CTX
|
||||
#undef SUCCESSOR
|
||||
#undef MASK
|
||||
|
|
|
|||
279
C/Ppmd8.c
279
C/Ppmd8.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd8.c -- PPMdI codec
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
|
@ -14,7 +14,7 @@ This code is based on PPMd var.I (2002): Dmitry Shkarin : Public domain */
|
|||
MY_ALIGN(16)
|
||||
static const Byte PPMD8_kExpEscape[16] = { 25, 14, 9, 7, 5, 5, 4, 4, 4, 3, 3, 3, 2, 2, 2, 2 };
|
||||
MY_ALIGN(16)
|
||||
static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
|
||||
static const UInt16 PPMD8_kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x5ABC, 0x6632, 0x6051};
|
||||
|
||||
#define MAX_FREQ 124
|
||||
#define UNIT_SIZE 12
|
||||
|
|
@ -33,7 +33,7 @@ static const UInt16 kInitBinEsc[] = { 0x3CDD, 0x1F3F, 0x59BF, 0x48F3, 0x64A1, 0x
|
|||
#define ONE_STATE(ctx) Ppmd8Context_OneState(ctx)
|
||||
#define SUFFIX(ctx) CTX((ctx)->Suffix)
|
||||
|
||||
typedef CPpmd8_Context * CTX_PTR;
|
||||
typedef CPpmd8_Context * PPMD8_CTX_PTR;
|
||||
|
||||
struct CPpmd8_Node_;
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ BoolInt Ppmd8_Alloc(CPpmd8 *p, UInt32 size, ISzAllocPtr alloc)
|
|||
#define EMPTY_NODE 0xFFFFFFFF
|
||||
|
||||
|
||||
static void InsertNode(CPpmd8 *p, void *node, unsigned indx)
|
||||
static void Ppmd8_InsertNode(CPpmd8 *p, void *node, unsigned indx)
|
||||
{
|
||||
((CPpmd8_Node *)node)->Stamp = EMPTY_NODE;
|
||||
((CPpmd8_Node *)node)->Next = (CPpmd8_Node_Ref)p->FreeList[indx];
|
||||
|
|
@ -124,7 +124,7 @@ static void InsertNode(CPpmd8 *p, void *node, unsigned indx)
|
|||
}
|
||||
|
||||
|
||||
static void *RemoveNode(CPpmd8 *p, unsigned indx)
|
||||
static void *Ppmd8_RemoveNode(CPpmd8 *p, unsigned indx)
|
||||
{
|
||||
CPpmd8_Node *node = NODE((CPpmd8_Node_Ref)p->FreeList[indx]);
|
||||
p->FreeList[indx] = node->Next;
|
||||
|
|
@ -134,16 +134,16 @@ static void *RemoveNode(CPpmd8 *p, unsigned indx)
|
|||
}
|
||||
|
||||
|
||||
static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
|
||||
static void Ppmd8_SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
|
||||
{
|
||||
unsigned i, nu = I2U(oldIndx) - I2U(newIndx);
|
||||
ptr = (Byte *)ptr + U2B(I2U(newIndx));
|
||||
if (I2U(i = U2I(nu)) != nu)
|
||||
{
|
||||
unsigned k = I2U(--i);
|
||||
InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
|
||||
Ppmd8_InsertNode(p, ((Byte *)ptr) + U2B(k), nu - k - 1);
|
||||
}
|
||||
InsertNode(p, ptr, i);
|
||||
Ppmd8_InsertNode(p, ptr, i);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -159,7 +159,7 @@ static void SplitBlock(CPpmd8 *p, void *ptr, unsigned oldIndx, unsigned newIndx)
|
|||
|
||||
|
||||
|
||||
static void GlueFreeBlocks(CPpmd8 *p)
|
||||
static void Ppmd8_GlueFreeBlocks(CPpmd8 *p)
|
||||
{
|
||||
/*
|
||||
we use first UInt32 field of 12-bytes UNITs as record type stamp
|
||||
|
|
@ -239,27 +239,27 @@ static void GlueFreeBlocks(CPpmd8 *p)
|
|||
if (nu == 0)
|
||||
continue;
|
||||
for (; nu > 128; nu -= 128, node += 128)
|
||||
InsertNode(p, node, PPMD_NUM_INDEXES - 1);
|
||||
Ppmd8_InsertNode(p, node, PPMD_NUM_INDEXES - 1);
|
||||
if (I2U(i = U2I(nu)) != nu)
|
||||
{
|
||||
unsigned k = I2U(--i);
|
||||
InsertNode(p, node + k, (unsigned)nu - k - 1);
|
||||
Ppmd8_InsertNode(p, node + k, (unsigned)nu - k - 1);
|
||||
}
|
||||
InsertNode(p, node, i);
|
||||
Ppmd8_InsertNode(p, node, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
static void *AllocUnitsRare(CPpmd8 *p, unsigned indx)
|
||||
Z7_NO_INLINE
|
||||
static void *Ppmd8_AllocUnitsRare(CPpmd8 *p, unsigned indx)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (p->GlueCount == 0)
|
||||
{
|
||||
GlueFreeBlocks(p);
|
||||
Ppmd8_GlueFreeBlocks(p);
|
||||
if (p->FreeList[indx] != 0)
|
||||
return RemoveNode(p, indx);
|
||||
return Ppmd8_RemoveNode(p, indx);
|
||||
}
|
||||
|
||||
i = indx;
|
||||
|
|
@ -277,17 +277,17 @@ static void *AllocUnitsRare(CPpmd8 *p, unsigned indx)
|
|||
while (p->FreeList[i] == 0);
|
||||
|
||||
{
|
||||
void *block = RemoveNode(p, i);
|
||||
SplitBlock(p, block, i, indx);
|
||||
void *block = Ppmd8_RemoveNode(p, i);
|
||||
Ppmd8_SplitBlock(p, block, i, indx);
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void *AllocUnits(CPpmd8 *p, unsigned indx)
|
||||
static void *Ppmd8_AllocUnits(CPpmd8 *p, unsigned indx)
|
||||
{
|
||||
if (p->FreeList[indx] != 0)
|
||||
return RemoveNode(p, indx);
|
||||
return Ppmd8_RemoveNode(p, indx);
|
||||
{
|
||||
UInt32 numBytes = U2B(I2U(indx));
|
||||
Byte *lo = p->LoUnit;
|
||||
|
|
@ -297,13 +297,22 @@ static void *AllocUnits(CPpmd8 *p, unsigned indx)
|
|||
return lo;
|
||||
}
|
||||
}
|
||||
return AllocUnitsRare(p, indx);
|
||||
return Ppmd8_AllocUnitsRare(p, indx);
|
||||
}
|
||||
|
||||
|
||||
#define MyMem12Cpy(dest, src, num) \
|
||||
{ UInt32 *d = (UInt32 *)dest; const UInt32 *z = (const UInt32 *)src; UInt32 n = num; \
|
||||
do { d[0] = z[0]; d[1] = z[1]; d[2] = z[2]; z += 3; d += 3; } while (--n); }
|
||||
#define MEM_12_CPY(dest, src, num) \
|
||||
{ UInt32 *d = (UInt32 *)(dest); \
|
||||
const UInt32 *z = (const UInt32 *)(src); \
|
||||
unsigned n = (num); \
|
||||
do { \
|
||||
d[0] = z[0]; \
|
||||
d[1] = z[1]; \
|
||||
d[2] = z[2]; \
|
||||
z += 3; \
|
||||
d += 3; \
|
||||
} while (--n); \
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -315,26 +324,26 @@ static void *ShrinkUnits(CPpmd8 *p, void *oldPtr, unsigned oldNU, unsigned newNU
|
|||
return oldPtr;
|
||||
if (p->FreeList[i1] != 0)
|
||||
{
|
||||
void *ptr = RemoveNode(p, i1);
|
||||
MyMem12Cpy(ptr, oldPtr, newNU);
|
||||
InsertNode(p, oldPtr, i0);
|
||||
void *ptr = Ppmd8_RemoveNode(p, i1);
|
||||
MEM_12_CPY(ptr, oldPtr, newNU)
|
||||
Ppmd8_InsertNode(p, oldPtr, i0);
|
||||
return ptr;
|
||||
}
|
||||
SplitBlock(p, oldPtr, i0, i1);
|
||||
Ppmd8_SplitBlock(p, oldPtr, i0, i1);
|
||||
return oldPtr;
|
||||
}
|
||||
|
||||
|
||||
static void FreeUnits(CPpmd8 *p, void *ptr, unsigned nu)
|
||||
{
|
||||
InsertNode(p, ptr, U2I(nu));
|
||||
Ppmd8_InsertNode(p, ptr, U2I(nu));
|
||||
}
|
||||
|
||||
|
||||
static void SpecialFreeUnit(CPpmd8 *p, void *ptr)
|
||||
{
|
||||
if ((Byte *)ptr != p->UnitsStart)
|
||||
InsertNode(p, ptr, 0);
|
||||
Ppmd8_InsertNode(p, ptr, 0);
|
||||
else
|
||||
{
|
||||
#ifdef PPMD8_FREEZE_SUPPORT
|
||||
|
|
@ -352,10 +361,10 @@ static void *MoveUnitsUp(CPpmd8 *p, void *oldPtr, unsigned nu)
|
|||
void *ptr;
|
||||
if ((Byte *)oldPtr > p->UnitsStart + (1 << 14) || REF(oldPtr) > p->FreeList[indx])
|
||||
return oldPtr;
|
||||
ptr = RemoveNode(p, indx);
|
||||
MyMem12Cpy(ptr, oldPtr, nu);
|
||||
ptr = Ppmd8_RemoveNode(p, indx);
|
||||
MEM_12_CPY(ptr, oldPtr, nu)
|
||||
if ((Byte *)oldPtr != p->UnitsStart)
|
||||
InsertNode(p, oldPtr, indx);
|
||||
Ppmd8_InsertNode(p, oldPtr, indx);
|
||||
else
|
||||
p->UnitsStart += U2B(I2U(indx));
|
||||
return ptr;
|
||||
|
|
@ -411,22 +420,22 @@ static void ExpandTextArea(CPpmd8 *p)
|
|||
|
||||
|
||||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
static void SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
|
||||
static void Ppmd8State_SetSuccessor(CPpmd_State *p, CPpmd_Void_Ref v)
|
||||
{
|
||||
Ppmd_SET_SUCCESSOR(p, v);
|
||||
Ppmd_SET_SUCCESSOR(p, v)
|
||||
}
|
||||
|
||||
#define RESET_TEXT(offs) { p->Text = p->Base + p->AlignOffset + (offs); }
|
||||
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
static
|
||||
void RestartModel(CPpmd8 *p)
|
||||
void Ppmd8_RestartModel(CPpmd8 *p)
|
||||
{
|
||||
unsigned i, k, m;
|
||||
|
||||
memset(p->FreeList, 0, sizeof(p->FreeList));
|
||||
memset(p->Stamps, 0, sizeof(p->Stamps));
|
||||
RESET_TEXT(0);
|
||||
RESET_TEXT(0)
|
||||
p->HiUnit = p->Text + p->Size;
|
||||
p->LoUnit = p->UnitsStart = p->HiUnit - p->Size / 8 / UNIT_SIZE * 7 * UNIT_SIZE;
|
||||
p->GlueCount = 0;
|
||||
|
|
@ -436,8 +445,8 @@ void RestartModel(CPpmd8 *p)
|
|||
p->PrevSuccess = 0;
|
||||
|
||||
{
|
||||
CPpmd8_Context *mc = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
|
||||
CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* AllocUnits(p, PPMD_NUM_INDEXES - 1); */
|
||||
CPpmd8_Context *mc = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE); /* AllocContext(p); */
|
||||
CPpmd_State *s = (CPpmd_State *)p->LoUnit; /* Ppmd8_AllocUnits(p, PPMD_NUM_INDEXES - 1); */
|
||||
|
||||
p->LoUnit += U2B(256 / 2);
|
||||
p->MaxContext = p->MinContext = mc;
|
||||
|
|
@ -452,7 +461,7 @@ void RestartModel(CPpmd8 *p)
|
|||
{
|
||||
s->Symbol = (Byte)i;
|
||||
s->Freq = 1;
|
||||
SetSuccessor(s, 0);
|
||||
Ppmd8State_SetSuccessor(s, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -475,7 +484,7 @@ void RestartModel(CPpmd8 *p)
|
|||
{
|
||||
unsigned r;
|
||||
UInt16 *dest = p->BinSumm[m] + k;
|
||||
UInt16 val = (UInt16)(PPMD_BIN_SCALE - kInitBinEsc[k] / (i + 1));
|
||||
const UInt16 val = (UInt16)(PPMD_BIN_SCALE - PPMD8_kInitBinEsc[k] / (i + 1));
|
||||
for (r = 0; r < 64; r += 8)
|
||||
dest[r] = val;
|
||||
}
|
||||
|
|
@ -507,7 +516,7 @@ void Ppmd8_Init(CPpmd8 *p, unsigned maxOrder, unsigned restoreMethod)
|
|||
{
|
||||
p->MaxOrder = maxOrder;
|
||||
p->RestoreMethod = restoreMethod;
|
||||
RestartModel(p);
|
||||
Ppmd8_RestartModel(p);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -531,7 +540,7 @@ Refresh() is called when we remove some symbols (successors) in context.
|
|||
It increases Escape_Freq for sum of all removed symbols.
|
||||
*/
|
||||
|
||||
static void Refresh(CPpmd8 *p, CTX_PTR ctx, unsigned oldNU, unsigned scale)
|
||||
static void Refresh(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned oldNU, unsigned scale)
|
||||
{
|
||||
unsigned i = ctx->NumStats, escFreq, sumFreq, flags;
|
||||
CPpmd_State *s = (CPpmd_State *)ShrinkUnits(p, STATS(ctx), oldNU, (i + 2) >> 1);
|
||||
|
|
@ -581,7 +590,7 @@ static void Refresh(CPpmd8 *p, CTX_PTR ctx, unsigned oldNU, unsigned scale)
|
|||
}
|
||||
|
||||
|
||||
static void SwapStates(CPpmd_State *t1, CPpmd_State *t2)
|
||||
static void SWAP_STATES(CPpmd_State *t1, CPpmd_State *t2)
|
||||
{
|
||||
CPpmd_State tmp = *t1;
|
||||
*t1 = *t2;
|
||||
|
|
@ -597,7 +606,7 @@ CutOff() reduces contexts:
|
|||
if the (Union4.Stats) is close to (UnitsStart), it moves it up.
|
||||
*/
|
||||
|
||||
static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
||||
static CPpmd_Void_Ref CutOff(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order)
|
||||
{
|
||||
int ns = ctx->NumStats;
|
||||
unsigned nu;
|
||||
|
|
@ -613,7 +622,7 @@ static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
|||
successor = CutOff(p, CTX(successor), order + 1);
|
||||
else
|
||||
successor = 0;
|
||||
SetSuccessor(s, successor);
|
||||
Ppmd8State_SetSuccessor(s, successor);
|
||||
if (successor || order <= 9) /* O_BOUND */
|
||||
return REF(ctx);
|
||||
}
|
||||
|
|
@ -630,11 +639,11 @@ static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
|||
if ((UInt32)((Byte *)stats - p->UnitsStart) <= (1 << 14)
|
||||
&& (CPpmd_Void_Ref)ctx->Union4.Stats <= p->FreeList[indx])
|
||||
{
|
||||
void *ptr = RemoveNode(p, indx);
|
||||
void *ptr = Ppmd8_RemoveNode(p, indx);
|
||||
ctx->Union4.Stats = STATS_REF(ptr);
|
||||
MyMem12Cpy(ptr, (const void *)stats, nu);
|
||||
MEM_12_CPY(ptr, (const void *)stats, nu)
|
||||
if ((Byte *)stats != p->UnitsStart)
|
||||
InsertNode(p, stats, indx);
|
||||
Ppmd8_InsertNode(p, stats, indx);
|
||||
else
|
||||
p->UnitsStart += U2B(I2U(indx));
|
||||
stats = ptr;
|
||||
|
|
@ -656,16 +665,16 @@ static CPpmd_Void_Ref CutOff(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
|||
}
|
||||
else
|
||||
{
|
||||
SwapStates(s, s2);
|
||||
SetSuccessor(s2, 0);
|
||||
SWAP_STATES(s, s2);
|
||||
Ppmd8State_SetSuccessor(s2, 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (order < p->MaxOrder)
|
||||
SetSuccessor(s, CutOff(p, CTX(successor), order + 1));
|
||||
Ppmd8State_SetSuccessor(s, CutOff(p, CTX(successor), order + 1));
|
||||
else
|
||||
SetSuccessor(s, 0);
|
||||
Ppmd8State_SetSuccessor(s, 0);
|
||||
}
|
||||
}
|
||||
while (--s >= stats);
|
||||
|
|
@ -711,7 +720,7 @@ RemoveBinContexts()
|
|||
removes Bin Context without Successor, if suffix of that context is also binary.
|
||||
*/
|
||||
|
||||
static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
||||
static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, PPMD8_CTX_PTR ctx, unsigned order)
|
||||
{
|
||||
if (!ctx->NumStats)
|
||||
{
|
||||
|
|
@ -721,7 +730,7 @@ static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
|||
successor = RemoveBinContexts(p, CTX(successor), order + 1);
|
||||
else
|
||||
successor = 0;
|
||||
SetSuccessor(s, successor);
|
||||
Ppmd8State_SetSuccessor(s, successor);
|
||||
/* Suffix context can be removed already, since different (high-order)
|
||||
Successors may refer to same context. So we check Flags == 0xFF (Stamp == EMPTY_NODE) */
|
||||
if (!successor && (!SUFFIX(ctx)->NumStats || SUFFIX(ctx)->Flags == 0xFF))
|
||||
|
|
@ -737,9 +746,9 @@ static CPpmd_Void_Ref RemoveBinContexts(CPpmd8 *p, CTX_PTR ctx, unsigned order)
|
|||
{
|
||||
CPpmd_Void_Ref successor = SUCCESSOR(s);
|
||||
if ((Byte *)Ppmd8_GetPtr(p, successor) >= p->UnitsStart && order < p->MaxOrder)
|
||||
SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1));
|
||||
Ppmd8State_SetSuccessor(s, RemoveBinContexts(p, CTX(successor), order + 1));
|
||||
else
|
||||
SetSuccessor(s, 0);
|
||||
Ppmd8State_SetSuccessor(s, 0);
|
||||
}
|
||||
while (--s >= STATS(ctx));
|
||||
}
|
||||
|
|
@ -767,15 +776,15 @@ static UInt32 GetUsedMemory(const CPpmd8 *p)
|
|||
#endif
|
||||
|
||||
|
||||
static void RestoreModel(CPpmd8 *p, CTX_PTR ctxError
|
||||
static void RestoreModel(CPpmd8 *p, PPMD8_CTX_PTR ctxError
|
||||
#ifdef PPMD8_FREEZE_SUPPORT
|
||||
, CTX_PTR fSuccessor
|
||||
, PPMD8_CTX_PTR fSuccessor
|
||||
#endif
|
||||
)
|
||||
{
|
||||
CTX_PTR c;
|
||||
PPMD8_CTX_PTR c;
|
||||
CPpmd_State *s;
|
||||
RESET_TEXT(0);
|
||||
RESET_TEXT(0)
|
||||
|
||||
// we go here in cases of error of allocation for context (c1)
|
||||
// Order(MinContext) < Order(ctxError) <= Order(MaxContext)
|
||||
|
|
@ -831,7 +840,7 @@ static void RestoreModel(CPpmd8 *p, CTX_PTR ctxError
|
|||
else
|
||||
#endif
|
||||
if (p->RestoreMethod == PPMD8_RESTORE_METHOD_RESTART || GetUsedMemory(p) < (p->Size >> 1))
|
||||
RestartModel(p);
|
||||
Ppmd8_RestartModel(p);
|
||||
else
|
||||
{
|
||||
while (p->MaxContext->Suffix)
|
||||
|
|
@ -850,8 +859,8 @@ static void RestoreModel(CPpmd8 *p, CTX_PTR ctxError
|
|||
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PTR c)
|
||||
Z7_NO_INLINE
|
||||
static PPMD8_CTX_PTR Ppmd8_CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, PPMD8_CTX_PTR c)
|
||||
{
|
||||
|
||||
CPpmd_Byte_Ref upBranch = (CPpmd_Byte_Ref)SUCCESSOR(p->FoundState);
|
||||
|
|
@ -927,15 +936,15 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
|
|||
|
||||
do
|
||||
{
|
||||
CTX_PTR c1;
|
||||
PPMD8_CTX_PTR c1;
|
||||
/* = AllocContext(p); */
|
||||
if (p->HiUnit != p->LoUnit)
|
||||
c1 = (CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
|
||||
c1 = (PPMD8_CTX_PTR)(void *)(p->HiUnit -= UNIT_SIZE);
|
||||
else if (p->FreeList[0] != 0)
|
||||
c1 = (CTX_PTR)RemoveNode(p, 0);
|
||||
c1 = (PPMD8_CTX_PTR)Ppmd8_RemoveNode(p, 0);
|
||||
else
|
||||
{
|
||||
c1 = (CTX_PTR)AllocUnitsRare(p, 0);
|
||||
c1 = (PPMD8_CTX_PTR)Ppmd8_AllocUnitsRare(p, 0);
|
||||
if (!c1)
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -943,9 +952,9 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
|
|||
c1->NumStats = 0;
|
||||
c1->Union2.State2.Symbol = newSym;
|
||||
c1->Union2.State2.Freq = newFreq;
|
||||
SetSuccessor(ONE_STATE(c1), upBranch);
|
||||
Ppmd8State_SetSuccessor(ONE_STATE(c1), upBranch);
|
||||
c1->Suffix = REF(c);
|
||||
SetSuccessor(ps[--numPs], REF(c1));
|
||||
Ppmd8State_SetSuccessor(ps[--numPs], REF(c1));
|
||||
c = c1;
|
||||
}
|
||||
while (numPs != 0);
|
||||
|
|
@ -954,10 +963,10 @@ static CTX_PTR CreateSuccessors(CPpmd8 *p, BoolInt skip, CPpmd_State *s1, CTX_PT
|
|||
}
|
||||
|
||||
|
||||
static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
||||
static PPMD8_CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, PPMD8_CTX_PTR c)
|
||||
{
|
||||
CPpmd_State *s = NULL;
|
||||
CTX_PTR c1 = c;
|
||||
PPMD8_CTX_PTR c1 = c;
|
||||
CPpmd_Void_Ref upBranch = REF(p->Text);
|
||||
|
||||
#ifdef PPMD8_FREEZE_SUPPORT
|
||||
|
|
@ -967,7 +976,7 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
ps[numPs++] = p->FoundState;
|
||||
#endif
|
||||
|
||||
SetSuccessor(p->FoundState, upBranch);
|
||||
Ppmd8State_SetSuccessor(p->FoundState, upBranch);
|
||||
p->OrderFall++;
|
||||
|
||||
for (;;)
|
||||
|
|
@ -985,8 +994,8 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
#ifdef PPMD8_FREEZE_SUPPORT
|
||||
if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
|
||||
{
|
||||
do { SetSuccessor(ps[--numPs], REF(c)); } while (numPs);
|
||||
RESET_TEXT(1);
|
||||
do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs);
|
||||
RESET_TEXT(1)
|
||||
p->OrderFall = 1;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1014,7 +1023,7 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
#ifdef PPMD8_FREEZE_SUPPORT
|
||||
ps[numPs++] = s;
|
||||
#endif
|
||||
SetSuccessor(s, upBranch);
|
||||
Ppmd8State_SetSuccessor(s, upBranch);
|
||||
p->OrderFall++;
|
||||
}
|
||||
|
||||
|
|
@ -1022,8 +1031,8 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
|
||||
{
|
||||
c = CTX(SUCCESSOR(s));
|
||||
do { SetSuccessor(ps[--numPs], REF(c)); } while (numPs);
|
||||
RESET_TEXT(1);
|
||||
do { Ppmd8State_SetSuccessor(ps[--numPs], REF(c)); } while (numPs);
|
||||
RESET_TEXT(1)
|
||||
p->OrderFall = 1;
|
||||
return c;
|
||||
}
|
||||
|
|
@ -1031,15 +1040,15 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
#endif
|
||||
if (SUCCESSOR(s) <= upBranch)
|
||||
{
|
||||
CTX_PTR successor;
|
||||
PPMD8_CTX_PTR successor;
|
||||
CPpmd_State *s2 = p->FoundState;
|
||||
p->FoundState = s;
|
||||
|
||||
successor = CreateSuccessors(p, False, NULL, c);
|
||||
successor = Ppmd8_CreateSuccessors(p, False, NULL, c);
|
||||
if (!successor)
|
||||
SetSuccessor(s, 0);
|
||||
Ppmd8State_SetSuccessor(s, 0);
|
||||
else
|
||||
SetSuccessor(s, REF(successor));
|
||||
Ppmd8State_SetSuccessor(s, REF(successor));
|
||||
p->FoundState = s2;
|
||||
}
|
||||
|
||||
|
|
@ -1047,7 +1056,7 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
CPpmd_Void_Ref successor = SUCCESSOR(s);
|
||||
if (p->OrderFall == 1 && c1 == p->MaxContext)
|
||||
{
|
||||
SetSuccessor(p->FoundState, successor);
|
||||
Ppmd8State_SetSuccessor(p->FoundState, successor);
|
||||
p->Text--;
|
||||
}
|
||||
if (successor == 0)
|
||||
|
|
@ -1059,11 +1068,11 @@ static CTX_PTR ReduceOrder(CPpmd8 *p, CPpmd_State *s1, CTX_PTR c)
|
|||
|
||||
|
||||
void Ppmd8_UpdateModel(CPpmd8 *p);
|
||||
MY_NO_INLINE
|
||||
Z7_NO_INLINE
|
||||
void Ppmd8_UpdateModel(CPpmd8 *p)
|
||||
{
|
||||
CPpmd_Void_Ref maxSuccessor, minSuccessor = SUCCESSOR(p->FoundState);
|
||||
CTX_PTR c;
|
||||
PPMD8_CTX_PTR c;
|
||||
unsigned s0, ns, fFreq = p->FoundState->Freq;
|
||||
Byte flag, fSymbol = p->FoundState->Symbol;
|
||||
{
|
||||
|
|
@ -1096,7 +1105,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
|
||||
if (s[0].Freq >= s[-1].Freq)
|
||||
{
|
||||
SwapStates(&s[0], &s[-1]);
|
||||
SWAP_STATES(&s[0], &s[-1]);
|
||||
s--;
|
||||
}
|
||||
}
|
||||
|
|
@ -1112,14 +1121,14 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
c = p->MaxContext;
|
||||
if (p->OrderFall == 0 && minSuccessor)
|
||||
{
|
||||
CTX_PTR cs = CreateSuccessors(p, True, s, p->MinContext);
|
||||
PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, True, s, p->MinContext);
|
||||
if (!cs)
|
||||
{
|
||||
SetSuccessor(p->FoundState, 0);
|
||||
Ppmd8State_SetSuccessor(p->FoundState, 0);
|
||||
RESTORE_MODEL(c, CTX(minSuccessor));
|
||||
return;
|
||||
}
|
||||
SetSuccessor(p->FoundState, REF(cs));
|
||||
Ppmd8State_SetSuccessor(p->FoundState, REF(cs));
|
||||
p->MinContext = p->MaxContext = cs;
|
||||
return;
|
||||
}
|
||||
|
|
@ -1141,7 +1150,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
|
||||
if (!minSuccessor)
|
||||
{
|
||||
CTX_PTR cs = ReduceOrder(p, s, p->MinContext);
|
||||
PPMD8_CTX_PTR cs = ReduceOrder(p, s, p->MinContext);
|
||||
if (!cs)
|
||||
{
|
||||
RESTORE_MODEL(c, NULL);
|
||||
|
|
@ -1151,7 +1160,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
}
|
||||
else if ((Byte *)Ppmd8_GetPtr(p, minSuccessor) < p->UnitsStart)
|
||||
{
|
||||
CTX_PTR cs = CreateSuccessors(p, False, s, p->MinContext);
|
||||
PPMD8_CTX_PTR cs = Ppmd8_CreateSuccessors(p, False, s, p->MinContext);
|
||||
if (!cs)
|
||||
{
|
||||
RESTORE_MODEL(c, NULL);
|
||||
|
|
@ -1169,7 +1178,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
else if (p->RestoreMethod > PPMD8_RESTORE_METHOD_FREEZE)
|
||||
{
|
||||
maxSuccessor = minSuccessor;
|
||||
RESET_TEXT(0);
|
||||
RESET_TEXT(0)
|
||||
p->OrderFall = 0;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -1215,11 +1224,11 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
if ((ns1 & 1) != 0)
|
||||
{
|
||||
/* Expand for one UNIT */
|
||||
unsigned oldNU = (ns1 + 1) >> 1;
|
||||
unsigned i = U2I(oldNU);
|
||||
const unsigned oldNU = (ns1 + 1) >> 1;
|
||||
const unsigned i = U2I(oldNU);
|
||||
if (i != U2I((size_t)oldNU + 1))
|
||||
{
|
||||
void *ptr = AllocUnits(p, i + 1);
|
||||
void *ptr = Ppmd8_AllocUnits(p, i + 1);
|
||||
void *oldPtr;
|
||||
if (!ptr)
|
||||
{
|
||||
|
|
@ -1227,15 +1236,15 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
return;
|
||||
}
|
||||
oldPtr = STATS(c);
|
||||
MyMem12Cpy(ptr, oldPtr, oldNU);
|
||||
InsertNode(p, oldPtr, i);
|
||||
MEM_12_CPY(ptr, oldPtr, oldNU)
|
||||
Ppmd8_InsertNode(p, oldPtr, i);
|
||||
c->Union4.Stats = STATS_REF(ptr);
|
||||
}
|
||||
}
|
||||
sum = c->Union2.SummFreq;
|
||||
/* max increase of Escape_Freq is 1 here.
|
||||
an average increase is 1/3 per symbol */
|
||||
sum += (3 * ns1 + 1 < ns);
|
||||
sum += (UInt32)(unsigned)(3 * ns1 + 1 < ns);
|
||||
/* original PPMdH uses 16-bit variable for (sum) here.
|
||||
But (sum < ???). Do we need to truncate (sum) to 16-bit */
|
||||
// sum = (UInt16)sum;
|
||||
|
|
@ -1243,7 +1252,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
else
|
||||
{
|
||||
|
||||
CPpmd_State *s = (CPpmd_State*)AllocUnits(p, 0);
|
||||
CPpmd_State *s = (CPpmd_State*)Ppmd8_AllocUnits(p, 0);
|
||||
if (!s)
|
||||
{
|
||||
RESTORE_MODEL(c, CTX(minSuccessor));
|
||||
|
|
@ -1255,7 +1264,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
s->Symbol = c->Union2.State2.Symbol;
|
||||
s->Successor_0 = c->Union4.State4.Successor_0;
|
||||
s->Successor_1 = c->Union4.State4.Successor_1;
|
||||
// SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
|
||||
// Ppmd8State_SetSuccessor(s, c->Union4.Stats); // call it only for debug purposes to check the order of
|
||||
// (Successor_0 and Successor_1) in LE/BE.
|
||||
c->Union4.Stats = REF(s);
|
||||
if (freq < MAX_FREQ / 4 - 1)
|
||||
|
|
@ -1265,7 +1274,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
|
||||
s->Freq = (Byte)freq;
|
||||
|
||||
sum = freq + p->InitEsc + (ns > 2); // Ppmd8 (> 2)
|
||||
sum = (UInt32)(freq + p->InitEsc + (ns > 2)); // Ppmd8 (> 2)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1275,7 +1284,7 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
UInt32 sf = (UInt32)s0 + sum;
|
||||
s->Symbol = fSymbol;
|
||||
c->NumStats = (Byte)(ns1 + 1);
|
||||
SetSuccessor(s, maxSuccessor);
|
||||
Ppmd8State_SetSuccessor(s, maxSuccessor);
|
||||
c->Flags |= flag;
|
||||
if (cf < 6 * sf)
|
||||
{
|
||||
|
|
@ -1299,8 +1308,8 @@ void Ppmd8_UpdateModel(CPpmd8 *p)
|
|||
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
static void Rescale(CPpmd8 *p)
|
||||
Z7_NO_INLINE
|
||||
static void Ppmd8_Rescale(CPpmd8 *p)
|
||||
{
|
||||
unsigned i, adder, sumFreq, escFreq;
|
||||
CPpmd_State *stats = STATS(p->MinContext);
|
||||
|
|
@ -1389,7 +1398,7 @@ static void Rescale(CPpmd8 *p)
|
|||
*s = *stats;
|
||||
s->Freq = (Byte)freq;
|
||||
p->FoundState = s;
|
||||
InsertNode(p, stats, U2I(n0));
|
||||
Ppmd8_InsertNode(p, stats, U2I(n0));
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1437,10 +1446,10 @@ CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq)
|
|||
|
||||
{
|
||||
// if (see->Summ) field is larger than 16-bit, we need only low 16 bits of Summ
|
||||
unsigned summ = (UInt16)see->Summ; // & 0xFFFF
|
||||
unsigned r = (summ >> see->Shift);
|
||||
const unsigned summ = (UInt16)see->Summ; // & 0xFFFF
|
||||
const unsigned r = (summ >> see->Shift);
|
||||
see->Summ = (UInt16)(summ - r);
|
||||
*escFreq = r + (r == 0);
|
||||
*escFreq = (UInt32)(r + (r == 0));
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
@ -1452,9 +1461,9 @@ CPpmd_See *Ppmd8_MakeEscFreq(CPpmd8 *p, unsigned numMasked1, UInt32 *escFreq)
|
|||
}
|
||||
|
||||
|
||||
static void NextContext(CPpmd8 *p)
|
||||
static void Ppmd8_NextContext(CPpmd8 *p)
|
||||
{
|
||||
CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
|
||||
PPMD8_CTX_PTR c = CTX(SUCCESSOR(p->FoundState));
|
||||
if (p->OrderFall == 0 && (const Byte *)c >= p->UnitsStart)
|
||||
p->MaxContext = p->MinContext = c;
|
||||
else
|
||||
|
|
@ -1471,12 +1480,12 @@ void Ppmd8_Update1(CPpmd8 *p)
|
|||
s->Freq = (Byte)freq;
|
||||
if (freq > s[-1].Freq)
|
||||
{
|
||||
SwapStates(s, &s[-1]);
|
||||
SWAP_STATES(s, &s[-1]);
|
||||
p->FoundState = --s;
|
||||
if (freq > MAX_FREQ)
|
||||
Rescale(p);
|
||||
Ppmd8_Rescale(p);
|
||||
}
|
||||
NextContext(p);
|
||||
Ppmd8_NextContext(p);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1485,15 +1494,15 @@ void Ppmd8_Update1_0(CPpmd8 *p)
|
|||
CPpmd_State *s = p->FoundState;
|
||||
CPpmd8_Context *mc = p->MinContext;
|
||||
unsigned freq = s->Freq;
|
||||
unsigned summFreq = mc->Union2.SummFreq;
|
||||
const unsigned summFreq = mc->Union2.SummFreq;
|
||||
p->PrevSuccess = (2 * freq >= summFreq); // Ppmd8 (>=)
|
||||
p->RunLength += (int)p->PrevSuccess;
|
||||
p->RunLength += (Int32)p->PrevSuccess;
|
||||
mc->Union2.SummFreq = (UInt16)(summFreq + 4);
|
||||
freq += 4;
|
||||
s->Freq = (Byte)freq;
|
||||
if (freq > MAX_FREQ)
|
||||
Rescale(p);
|
||||
NextContext(p);
|
||||
Ppmd8_Rescale(p);
|
||||
Ppmd8_NextContext(p);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1504,7 +1513,7 @@ void Ppmd8_UpdateBin(CPpmd8 *p)
|
|||
p->FoundState->Freq = (Byte)(freq + (freq < 196)); // Ppmd8 (196)
|
||||
p->PrevSuccess = 1;
|
||||
p->RunLength++;
|
||||
NextContext(p);
|
||||
Ppmd8_NextContext(p);
|
||||
}
|
||||
*/
|
||||
|
||||
|
|
@ -1517,7 +1526,7 @@ void Ppmd8_Update2(CPpmd8 *p)
|
|||
p->MinContext->Union2.SummFreq = (UInt16)(p->MinContext->Union2.SummFreq + 4);
|
||||
s->Freq = (Byte)freq;
|
||||
if (freq > MAX_FREQ)
|
||||
Rescale(p);
|
||||
Ppmd8_Rescale(p);
|
||||
Ppmd8_UpdateModel(p);
|
||||
}
|
||||
|
||||
|
|
@ -1526,7 +1535,7 @@ void Ppmd8_Update2(CPpmd8 *p)
|
|||
GlueCount, and Glue method
|
||||
BinSum
|
||||
See / EscFreq
|
||||
CreateSuccessors updates more suffix contexts
|
||||
Ppmd8_CreateSuccessors updates more suffix contexts
|
||||
Ppmd8_UpdateModel consts.
|
||||
PrevSuccess Update
|
||||
|
||||
|
|
@ -1535,3 +1544,31 @@ Flags:
|
|||
(1 << 3) - there is symbol in Stats with (sym >= 0x40) in
|
||||
(1 << 4) - main symbol of context is (sym >= 0x40)
|
||||
*/
|
||||
|
||||
#undef RESET_TEXT
|
||||
#undef FLAG_RESCALED
|
||||
#undef FLAG_PREV_HIGH
|
||||
#undef HiBits_Prepare
|
||||
#undef HiBits_Convert_3
|
||||
#undef HiBits_Convert_4
|
||||
#undef PPMD8_HiBitsFlag_3
|
||||
#undef PPMD8_HiBitsFlag_4
|
||||
#undef RESTORE_MODEL
|
||||
|
||||
#undef MAX_FREQ
|
||||
#undef UNIT_SIZE
|
||||
#undef U2B
|
||||
#undef U2I
|
||||
#undef I2U
|
||||
|
||||
#undef REF
|
||||
#undef STATS_REF
|
||||
#undef CTX
|
||||
#undef STATS
|
||||
#undef ONE_STATE
|
||||
#undef SUFFIX
|
||||
#undef NODE
|
||||
#undef EMPTY_NODE
|
||||
#undef MEM_12_CPY
|
||||
#undef SUCCESSOR
|
||||
#undef SWAP_STATES
|
||||
|
|
|
|||
10
C/Ppmd8.h
10
C/Ppmd8.h
|
|
@ -1,11 +1,11 @@
|
|||
/* Ppmd8.h -- Ppmd8 (PPMdI) compression codec
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-04-02 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.I (2002): Dmitry Shkarin : Public domain
|
||||
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
|
||||
|
||||
#ifndef __PPMD8_H
|
||||
#define __PPMD8_H
|
||||
#ifndef ZIP7_INC_PPMD8_H
|
||||
#define ZIP7_INC_PPMD8_H
|
||||
|
||||
#include "Ppmd.h"
|
||||
|
||||
|
|
@ -87,8 +87,8 @@ typedef struct
|
|||
UInt32 Low;
|
||||
union
|
||||
{
|
||||
IByteIn *In;
|
||||
IByteOut *Out;
|
||||
IByteInPtr In;
|
||||
IByteOutPtr Out;
|
||||
} Stream;
|
||||
|
||||
Byte Indx2Units[PPMD_NUM_INDEXES + 2]; // +2 for alignment
|
||||
|
|
|
|||
78
C/Ppmd8Dec.c
78
C/Ppmd8Dec.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd8Dec.c -- Ppmd8 (PPMdI) Decoder
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.I (2002): Dmitry Shkarin : Public domain
|
||||
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
|
||||
|
|
@ -8,8 +8,8 @@ This code is based on:
|
|||
|
||||
#include "Ppmd8.h"
|
||||
|
||||
#define kTop (1 << 24)
|
||||
#define kBot (1 << 15)
|
||||
#define kTop ((UInt32)1 << 24)
|
||||
#define kBot ((UInt32)1 << 15)
|
||||
|
||||
#define READ_BYTE(p) IByteIn_Read((p)->Stream.In)
|
||||
|
||||
|
|
@ -37,9 +37,9 @@ BoolInt Ppmd8_Init_RangeDec(CPpmd8 *p)
|
|||
|
||||
#define R p
|
||||
|
||||
MY_FORCE_INLINE
|
||||
// MY_NO_INLINE
|
||||
static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
|
||||
Z7_FORCE_INLINE
|
||||
// Z7_NO_INLINE
|
||||
static void Ppmd8_RD_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
|
||||
{
|
||||
start *= R->Range;
|
||||
R->Low += start;
|
||||
|
|
@ -48,17 +48,17 @@ static void RangeDec_Decode(CPpmd8 *p, UInt32 start, UInt32 size)
|
|||
RC_NORM_LOCAL(R)
|
||||
}
|
||||
|
||||
#define RC_Decode(start, size) RangeDec_Decode(p, start, size);
|
||||
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
|
||||
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
|
||||
#define RC_Decode(start, size) Ppmd8_RD_Decode(p, start, size);
|
||||
#define RC_DecodeFinal(start, size) RC_Decode(start, size) RC_NORM_REMOTE(R)
|
||||
#define RC_GetThreshold(total) (R->Code / (R->Range /= (total)))
|
||||
|
||||
|
||||
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
|
||||
typedef CPpmd8_Context * CTX_PTR;
|
||||
// typedef CPpmd8_Context * CTX_PTR;
|
||||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
void Ppmd8_UpdateModel(CPpmd8 *p);
|
||||
|
||||
#define MASK(sym) ((unsigned char *)charMask)[sym]
|
||||
#define MASK(sym) ((Byte *)charMask)[sym]
|
||||
|
||||
|
||||
int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
||||
|
|
@ -81,7 +81,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
if ((Int32)(count -= s->Freq) < 0)
|
||||
{
|
||||
Byte sym;
|
||||
RC_DecodeFinal(0, s->Freq);
|
||||
RC_DecodeFinal(0, s->Freq)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd8_Update1_0(p);
|
||||
|
|
@ -96,7 +96,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
if ((Int32)(count -= (++s)->Freq) < 0)
|
||||
{
|
||||
Byte sym;
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd8_Update1(p);
|
||||
|
|
@ -109,10 +109,10 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
return PPMD8_SYM_ERROR;
|
||||
|
||||
hiCnt -= count;
|
||||
RC_Decode(hiCnt, summFreq - hiCnt);
|
||||
RC_Decode(hiCnt, summFreq - hiCnt)
|
||||
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
// i = p->MinContext->NumStats - 1;
|
||||
// do { MASK((--s)->Symbol) = 0; } while (--i);
|
||||
{
|
||||
|
|
@ -120,8 +120,8 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -152,7 +152,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
// Ppmd8_UpdateBin(p);
|
||||
{
|
||||
unsigned freq = s->Freq;
|
||||
CTX_PTR c = CTX(SUCCESSOR(s));
|
||||
CPpmd8_Context *c = CTX(SUCCESSOR(s));
|
||||
sym = s->Symbol;
|
||||
p->FoundState = s;
|
||||
p->PrevSuccess = 1;
|
||||
|
|
@ -176,7 +176,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - size0;
|
||||
RC_NORM_LOCAL(R)
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
MASK(Ppmd8Context_OneState(p->MinContext)->Symbol) = 0;
|
||||
p->PrevSuccess = 0;
|
||||
}
|
||||
|
|
@ -209,17 +209,17 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
unsigned num2 = num / 2;
|
||||
|
||||
num &= 1;
|
||||
hiCnt = (s->Freq & (unsigned)(MASK(s->Symbol))) & (0 - (UInt32)num);
|
||||
hiCnt = (s->Freq & (UInt32)(MASK(s->Symbol))) & (0 - (UInt32)num);
|
||||
s += num;
|
||||
p->MinContext = mc;
|
||||
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s[0].Symbol;
|
||||
unsigned sym1 = s[1].Symbol;
|
||||
const unsigned sym0 = s[0].Symbol;
|
||||
const unsigned sym1 = s[1].Symbol;
|
||||
s += 2;
|
||||
hiCnt += (s[-2].Freq & (unsigned)(MASK(sym0)));
|
||||
hiCnt += (s[-1].Freq & (unsigned)(MASK(sym1)));
|
||||
hiCnt += (s[-2].Freq & (UInt32)(MASK(sym0)));
|
||||
hiCnt += (s[-1].Freq & (UInt32)(MASK(sym1)));
|
||||
}
|
||||
while (--num2);
|
||||
}
|
||||
|
|
@ -227,7 +227,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
see = Ppmd8_MakeEscFreq(p, numMasked, &freqSum);
|
||||
freqSum += hiCnt;
|
||||
freqSum2 = freqSum;
|
||||
PPMD8_CORRECT_SUM_RANGE(R, freqSum2);
|
||||
PPMD8_CORRECT_SUM_RANGE(R, freqSum2)
|
||||
|
||||
|
||||
count = RC_GetThreshold(freqSum2);
|
||||
|
|
@ -235,7 +235,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
if (count < hiCnt)
|
||||
{
|
||||
Byte sym;
|
||||
// Ppmd_See_Update(see); // new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
// Ppmd_See_UPDATE(see) // new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
s = Ppmd8_GetStats(p, p->MinContext);
|
||||
hiCnt = count;
|
||||
|
||||
|
|
@ -243,15 +243,15 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
{
|
||||
for (;;)
|
||||
{
|
||||
count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
// count -= s->Freq & (unsigned)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
// count -= s->Freq & (UInt32)(MASK((s)->Symbol)); s++; if ((Int32)count < 0) break;
|
||||
}
|
||||
}
|
||||
s--;
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq);
|
||||
RC_DecodeFinal((hiCnt - count) - s->Freq, s->Freq)
|
||||
|
||||
// new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
Ppmd_See_Update(see);
|
||||
Ppmd_See_UPDATE(see)
|
||||
p->FoundState = s;
|
||||
sym = s->Symbol;
|
||||
Ppmd8_Update2(p);
|
||||
|
|
@ -261,7 +261,7 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
if (count >= freqSum2)
|
||||
return PPMD8_SYM_ERROR;
|
||||
|
||||
RC_Decode(hiCnt, freqSum2 - hiCnt);
|
||||
RC_Decode(hiCnt, freqSum2 - hiCnt)
|
||||
|
||||
// We increase (see->Summ) for sum of Freqs of all non_Masked symbols.
|
||||
// new (see->Summ) value can overflow over 16-bits in some rare cases
|
||||
|
|
@ -277,3 +277,19 @@ int Ppmd8_DecodeSymbol(CPpmd8 *p)
|
|||
while (s != s2);
|
||||
}
|
||||
}
|
||||
|
||||
#undef kTop
|
||||
#undef kBot
|
||||
#undef READ_BYTE
|
||||
#undef RC_NORM_BASE
|
||||
#undef RC_NORM_1
|
||||
#undef RC_NORM
|
||||
#undef RC_NORM_LOCAL
|
||||
#undef RC_NORM_REMOTE
|
||||
#undef R
|
||||
#undef RC_Decode
|
||||
#undef RC_DecodeFinal
|
||||
#undef RC_GetThreshold
|
||||
#undef CTX
|
||||
#undef SUCCESSOR
|
||||
#undef MASK
|
||||
|
|
|
|||
101
C/Ppmd8Enc.c
101
C/Ppmd8Enc.c
|
|
@ -1,5 +1,5 @@
|
|||
/* Ppmd8Enc.c -- Ppmd8 (PPMdI) Encoder
|
||||
2021-04-13 : Igor Pavlov : Public domain
|
||||
2023-09-07 : Igor Pavlov : Public domain
|
||||
This code is based on:
|
||||
PPMd var.I (2002): Dmitry Shkarin : Public domain
|
||||
Carryless rangecoder (1999): Dmitry Subbotin : Public domain */
|
||||
|
|
@ -8,8 +8,8 @@ This code is based on:
|
|||
|
||||
#include "Ppmd8.h"
|
||||
|
||||
#define kTop (1 << 24)
|
||||
#define kBot (1 << 15)
|
||||
#define kTop ((UInt32)1 << 24)
|
||||
#define kBot ((UInt32)1 << 15)
|
||||
|
||||
#define WRITE_BYTE(p) IByteOut_Write(p->Stream.Out, (Byte)(p->Low >> 24))
|
||||
|
||||
|
|
@ -54,13 +54,13 @@ void Ppmd8_Flush_RangeEnc(CPpmd8 *p)
|
|||
|
||||
|
||||
|
||||
MY_FORCE_INLINE
|
||||
// MY_NO_INLINE
|
||||
static void RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
|
||||
Z7_FORCE_INLINE
|
||||
// Z7_NO_INLINE
|
||||
static void Ppmd8_RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
|
||||
{
|
||||
R->Low += start * (R->Range /= total);
|
||||
R->Range *= size;
|
||||
RC_NORM_LOCAL(R);
|
||||
RC_NORM_LOCAL(R)
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -72,19 +72,19 @@ static void RangeEnc_Encode(CPpmd8 *p, UInt32 start, UInt32 size, UInt32 total)
|
|||
|
||||
|
||||
|
||||
#define RC_Encode(start, size, total) RangeEnc_Encode(p, start, size, total);
|
||||
#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total); RC_NORM_REMOTE(p);
|
||||
#define RC_Encode(start, size, total) Ppmd8_RangeEnc_Encode(p, start, size, total);
|
||||
#define RC_EncodeFinal(start, size, total) RC_Encode(start, size, total) RC_NORM_REMOTE(p)
|
||||
|
||||
#define CTX(ref) ((CPpmd8_Context *)Ppmd8_GetContext(p, ref))
|
||||
|
||||
typedef CPpmd8_Context * CTX_PTR;
|
||||
// typedef CPpmd8_Context * CTX_PTR;
|
||||
#define SUCCESSOR(p) Ppmd_GET_SUCCESSOR(p)
|
||||
|
||||
void Ppmd8_UpdateModel(CPpmd8 *p);
|
||||
|
||||
#define MASK(sym) ((unsigned char *)charMask)[sym]
|
||||
#define MASK(sym) ((Byte *)charMask)[sym]
|
||||
|
||||
// MY_FORCE_INLINE
|
||||
// Z7_FORCE_INLINE
|
||||
// static
|
||||
void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
||||
{
|
||||
|
|
@ -104,7 +104,7 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
if (s->Symbol == symbol)
|
||||
{
|
||||
|
||||
RC_EncodeFinal(0, s->Freq, summFreq);
|
||||
RC_EncodeFinal(0, s->Freq, summFreq)
|
||||
p->FoundState = s;
|
||||
Ppmd8_Update1_0(p);
|
||||
return;
|
||||
|
|
@ -117,7 +117,7 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
if ((++s)->Symbol == symbol)
|
||||
{
|
||||
|
||||
RC_EncodeFinal(sum, s->Freq, summFreq);
|
||||
RC_EncodeFinal(sum, s->Freq, summFreq)
|
||||
p->FoundState = s;
|
||||
Ppmd8_Update1(p);
|
||||
return;
|
||||
|
|
@ -127,10 +127,10 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
while (--i);
|
||||
|
||||
|
||||
RC_Encode(sum, summFreq - sum, summFreq);
|
||||
RC_Encode(sum, summFreq - sum, summFreq)
|
||||
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
// MASK(s->Symbol) = 0;
|
||||
// i = p->MinContext->NumStats;
|
||||
// do { MASK((--s)->Symbol) = 0; } while (--i);
|
||||
|
|
@ -139,8 +139,8 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -153,20 +153,20 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
UInt16 *prob = Ppmd8_GetBinSumm(p);
|
||||
CPpmd_State *s = Ppmd8Context_OneState(p->MinContext);
|
||||
UInt32 pr = *prob;
|
||||
UInt32 bound = (R->Range >> 14) * pr;
|
||||
const UInt32 bound = (R->Range >> 14) * pr;
|
||||
pr = PPMD_UPDATE_PROB_1(pr);
|
||||
if (s->Symbol == symbol)
|
||||
{
|
||||
*prob = (UInt16)(pr + (1 << PPMD_INT_BITS));
|
||||
// RangeEnc_EncodeBit_0(p, bound);
|
||||
R->Range = bound;
|
||||
RC_NORM(R);
|
||||
RC_NORM(R)
|
||||
|
||||
// p->FoundState = s;
|
||||
// Ppmd8_UpdateBin(p);
|
||||
{
|
||||
unsigned freq = s->Freq;
|
||||
CTX_PTR c = CTX(SUCCESSOR(s));
|
||||
const unsigned freq = s->Freq;
|
||||
CPpmd8_Context *c = CTX(SUCCESSOR(s));
|
||||
p->FoundState = s;
|
||||
p->PrevSuccess = 1;
|
||||
p->RunLength++;
|
||||
|
|
@ -187,7 +187,7 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
R->Range = (R->Range & ~((UInt32)PPMD_BIN_SCALE - 1)) - bound;
|
||||
RC_NORM_LOCAL(R)
|
||||
|
||||
PPMD_SetAllBitsIn256Bytes(charMask);
|
||||
PPMD_SetAllBitsIn256Bytes(charMask)
|
||||
MASK(s->Symbol) = 0;
|
||||
p->PrevSuccess = 0;
|
||||
}
|
||||
|
|
@ -248,14 +248,14 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
|
||||
do
|
||||
{
|
||||
unsigned cur = s->Symbol;
|
||||
const unsigned cur = s->Symbol;
|
||||
if ((int)cur == symbol)
|
||||
{
|
||||
UInt32 low = sum;
|
||||
UInt32 freq = s->Freq;
|
||||
const UInt32 low = sum;
|
||||
const UInt32 freq = s->Freq;
|
||||
unsigned num2;
|
||||
|
||||
Ppmd_See_Update(see);
|
||||
Ppmd_See_UPDATE(see)
|
||||
p->FoundState = s;
|
||||
sum += escFreq;
|
||||
|
||||
|
|
@ -265,21 +265,20 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
if (num2 != 0)
|
||||
{
|
||||
s += i;
|
||||
for (;;)
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s[0].Symbol;
|
||||
unsigned sym1 = s[1].Symbol;
|
||||
const unsigned sym0 = s[0].Symbol;
|
||||
const unsigned sym1 = s[1].Symbol;
|
||||
s += 2;
|
||||
sum += (s[-2].Freq & (unsigned)(MASK(sym0)));
|
||||
sum += (s[-1].Freq & (unsigned)(MASK(sym1)));
|
||||
if (--num2 == 0)
|
||||
break;
|
||||
}
|
||||
while (--num2);
|
||||
}
|
||||
|
||||
PPMD8_CORRECT_SUM_RANGE(p, sum);
|
||||
PPMD8_CORRECT_SUM_RANGE(p, sum)
|
||||
|
||||
RC_EncodeFinal(low, freq, sum);
|
||||
RC_EncodeFinal(low, freq, sum)
|
||||
Ppmd8_Update2(p);
|
||||
return;
|
||||
}
|
||||
|
|
@ -291,19 +290,19 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
{
|
||||
UInt32 total = sum + escFreq;
|
||||
see->Summ = (UInt16)(see->Summ + total);
|
||||
PPMD8_CORRECT_SUM_RANGE(p, total);
|
||||
PPMD8_CORRECT_SUM_RANGE(p, total)
|
||||
|
||||
RC_Encode(sum, total - sum, total);
|
||||
RC_Encode(sum, total - sum, total)
|
||||
}
|
||||
|
||||
{
|
||||
CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
|
||||
const CPpmd_State *s2 = Ppmd8_GetStats(p, p->MinContext);
|
||||
s--;
|
||||
MASK(s->Symbol) = 0;
|
||||
do
|
||||
{
|
||||
unsigned sym0 = s2[0].Symbol;
|
||||
unsigned sym1 = s2[1].Symbol;
|
||||
const unsigned sym0 = s2[0].Symbol;
|
||||
const unsigned sym1 = s2[1].Symbol;
|
||||
s2 += 2;
|
||||
MASK(sym0) = 0;
|
||||
MASK(sym1) = 0;
|
||||
|
|
@ -312,3 +311,27 @@ void Ppmd8_EncodeSymbol(CPpmd8 *p, int symbol)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#undef kTop
|
||||
#undef kBot
|
||||
#undef WRITE_BYTE
|
||||
#undef RC_NORM_BASE
|
||||
#undef RC_NORM_1
|
||||
#undef RC_NORM
|
||||
#undef RC_NORM_LOCAL
|
||||
#undef RC_NORM_REMOTE
|
||||
#undef R
|
||||
#undef RC_Encode
|
||||
#undef RC_EncodeFinal
|
||||
|
||||
#undef CTX
|
||||
#undef SUCCESSOR
|
||||
#undef MASK
|
||||
|
|
|
|||
127
C/Precomp.h
127
C/Precomp.h
|
|
@ -1,10 +1,127 @@
|
|||
/* Precomp.h -- StdAfx
|
||||
2013-11-12 : Igor Pavlov : Public domain */
|
||||
/* Precomp.h -- precompilation file
|
||||
2024-01-25 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_PRECOMP_H
|
||||
#define __7Z_PRECOMP_H
|
||||
#ifndef ZIP7_INC_PRECOMP_H
|
||||
#define ZIP7_INC_PRECOMP_H
|
||||
|
||||
/*
|
||||
this file must be included before another *.h files and before <windows.h>.
|
||||
this file is included from the following files:
|
||||
C\*.c
|
||||
C\Util\*\Precomp.h <- C\Util\*\*.c
|
||||
CPP\Common\Common.h <- *\StdAfx.h <- *\*.cpp
|
||||
|
||||
this file can set the following macros:
|
||||
Z7_LARGE_PAGES 1
|
||||
Z7_LONG_PATH 1
|
||||
Z7_WIN32_WINNT_MIN 0x0500 (or higher) : we require at least win2000+ for 7-Zip
|
||||
_WIN32_WINNT 0x0500 (or higher)
|
||||
WINVER _WIN32_WINNT
|
||||
UNICODE 1
|
||||
_UNICODE 1
|
||||
*/
|
||||
|
||||
#include "Compiler.h"
|
||||
/* #include "7zTypes.h" */
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// #pragma warning(disable : 4206) // nonstandard extension used : translation unit is empty
|
||||
#if _MSC_VER >= 1912
|
||||
// #pragma warning(disable : 5039) // pointer or reference to potentially throwing function passed to 'extern "C"' function under - EHc.Undefined behavior may occur if this function throws an exception.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
// for debug:
|
||||
#define UNICODE 1
|
||||
#define _UNICODE 1
|
||||
#define _WIN32_WINNT 0x0500 // win2000
|
||||
#ifndef WINVER
|
||||
#define WINVER _WIN32_WINNT
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef _WIN32
|
||||
/*
|
||||
this "Precomp.h" file must be included before <windows.h>,
|
||||
if we want to define _WIN32_WINNT before <windows.h>.
|
||||
*/
|
||||
|
||||
#ifndef Z7_LARGE_PAGES
|
||||
#ifndef Z7_NO_LARGE_PAGES
|
||||
#define Z7_LARGE_PAGES 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef Z7_LONG_PATH
|
||||
#ifndef Z7_NO_LONG_PATH
|
||||
#define Z7_LONG_PATH 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef Z7_DEVICE_FILE
|
||||
#ifndef Z7_NO_DEVICE_FILE
|
||||
// #define Z7_DEVICE_FILE 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// we don't change macros if included after <windows.h>
|
||||
#ifndef _WINDOWS_
|
||||
|
||||
#ifndef Z7_WIN32_WINNT_MIN
|
||||
#if defined(_M_ARM64) || defined(__aarch64__)
|
||||
// #define Z7_WIN32_WINNT_MIN 0x0a00 // win10
|
||||
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
|
||||
#elif defined(_M_ARM) && defined(_M_ARMT) && defined(_M_ARM_NT)
|
||||
// #define Z7_WIN32_WINNT_MIN 0x0602 // win8
|
||||
#define Z7_WIN32_WINNT_MIN 0x0600 // vista
|
||||
#elif defined(_M_X64) || defined(_M_AMD64) || defined(__x86_64__) || defined(_M_IA64)
|
||||
#define Z7_WIN32_WINNT_MIN 0x0503 // win2003
|
||||
// #elif defined(_M_IX86) || defined(__i386__)
|
||||
// #define Z7_WIN32_WINNT_MIN 0x0500 // win2000
|
||||
#else // x86 and another(old) systems
|
||||
#define Z7_WIN32_WINNT_MIN 0x0500 // win2000
|
||||
// #define Z7_WIN32_WINNT_MIN 0x0502 // win2003 // for debug
|
||||
#endif
|
||||
#endif // Z7_WIN32_WINNT_MIN
|
||||
|
||||
|
||||
#ifndef Z7_DO_NOT_DEFINE_WIN32_WINNT
|
||||
#ifdef _WIN32_WINNT
|
||||
// #error Stop_Compiling_Bad_WIN32_WINNT
|
||||
#else
|
||||
#ifndef Z7_NO_DEFINE_WIN32_WINNT
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define _WIN32_WINNT Z7_WIN32_WINNT_MIN
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
#endif // _WIN32_WINNT
|
||||
|
||||
#ifndef WINVER
|
||||
#define WINVER _WIN32_WINNT
|
||||
#endif
|
||||
#endif // Z7_DO_NOT_DEFINE_WIN32_WINNT
|
||||
|
||||
|
||||
#ifndef _MBCS
|
||||
#ifndef Z7_NO_UNICODE
|
||||
// UNICODE and _UNICODE are used by <windows.h> and by 7-zip code.
|
||||
|
||||
#ifndef UNICODE
|
||||
#define UNICODE 1
|
||||
#endif
|
||||
|
||||
#ifndef _UNICODE
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define _UNICODE 1
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
|
||||
#endif // Z7_NO_UNICODE
|
||||
#endif // _MBCS
|
||||
#endif // _WINDOWS_
|
||||
|
||||
// #include "7zWindows.h"
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
/* RotateDefs.h -- Rotate functions
|
||||
2015-03-25 : Igor Pavlov : Public domain */
|
||||
2023-06-18 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __ROTATE_DEFS_H
|
||||
#define __ROTATE_DEFS_H
|
||||
#ifndef ZIP7_INC_ROTATE_DEFS_H
|
||||
#define ZIP7_INC_ROTATE_DEFS_H
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/* don't use _rotl with MINGW. It can insert slow call to function. */
|
||||
/* don't use _rotl with old MINGW. It can insert slow call to function. */
|
||||
|
||||
/* #if (_MSC_VER >= 1200) */
|
||||
#pragma intrinsic(_rotl)
|
||||
|
|
@ -18,12 +18,32 @@
|
|||
#define rotlFixed(x, n) _rotl((x), (n))
|
||||
#define rotrFixed(x, n) _rotr((x), (n))
|
||||
|
||||
#if (_MSC_VER >= 1300)
|
||||
#define Z7_ROTL64(x, n) _rotl64((x), (n))
|
||||
#define Z7_ROTR64(x, n) _rotr64((x), (n))
|
||||
#else
|
||||
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
|
||||
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
/* new compilers can translate these macros to fast commands. */
|
||||
|
||||
#if defined(__clang__) && (__clang_major__ >= 4) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 5)
|
||||
/* GCC 4.9.0 and clang 3.5 can recognize more correct version: */
|
||||
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (-(n) & 31)))
|
||||
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (-(n) & 31)))
|
||||
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (-(n) & 63)))
|
||||
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (-(n) & 63)))
|
||||
#else
|
||||
/* for old GCC / clang: */
|
||||
#define rotlFixed(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
|
||||
#define rotrFixed(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
|
||||
#define Z7_ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
|
||||
#define Z7_ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
|||
316
C/Sha1.c
316
C/Sha1.c
|
|
@ -1,64 +1,60 @@
|
|||
/* Sha1.c -- SHA-1 Hash
|
||||
2021-07-13 : Igor Pavlov : Public domain
|
||||
: Igor Pavlov : Public domain
|
||||
This code is based on public domain code of Steve Reid from Wei Dai's Crypto++ library. */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "RotateDefs.h"
|
||||
#include "Sha1.h"
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// #define USE_MY_MM
|
||||
#endif
|
||||
#include "RotateDefs.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER >= 1200
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 8) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#if (__INTEL_COMPILER >= 1800) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
|
||||
|| defined(_MSC_VER) && (_MSC_VER >= 1200)
|
||||
#define Z7_COMPILER_SHA1_SUPPORTED
|
||||
#endif
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER >= 1910 && _MSC_VER >= 1929 && _MSC_FULL_VER >= 192930037
|
||||
#define _SHA_SUPPORTED
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) \
|
||||
&& (!defined(Z7_MSC_VER_ORIGINAL) || (_MSC_VER >= 1929) && (_MSC_FULL_VER >= 192930037))
|
||||
#if defined(__ARM_FEATURE_SHA2) \
|
||||
|| defined(__ARM_FEATURE_CRYPTO)
|
||||
#define Z7_COMPILER_SHA1_SUPPORTED
|
||||
#else
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL)
|
||||
#if defined(__ARM_FP) && \
|
||||
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 6) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| !defined(Z7_CLANG_VERSION) \
|
||||
|| defined(__ARM_NEON) && \
|
||||
(Z7_CLANG_VERSION < 170000 || \
|
||||
Z7_CLANG_VERSION > 170001)
|
||||
#define Z7_COMPILER_SHA1_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 6) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
|
||||
#ifdef _SHA_SUPPORTED
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
#ifdef Z7_COMPILER_SHA1_SUPPORTED
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
|
||||
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
|
||||
static SHA1_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
|
||||
static SHA1_FUNC_UPDATE_BLOCKS g_SHA1_FUNC_UPDATE_BLOCKS = Sha1_UpdateBlocks;
|
||||
static SHA1_FUNC_UPDATE_BLOCKS g_SHA1_FUNC_UPDATE_BLOCKS_HW;
|
||||
|
||||
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks
|
||||
#define SHA1_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
|
||||
#else
|
||||
#define UPDATE_BLOCKS(p) Sha1_UpdateBlocks
|
||||
#define SHA1_UPDATE_BLOCKS(p) Sha1_UpdateBlocks
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -66,16 +62,16 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
{
|
||||
SHA1_FUNC_UPDATE_BLOCKS func = Sha1_UpdateBlocks;
|
||||
|
||||
#ifdef _SHA_SUPPORTED
|
||||
#ifdef Z7_COMPILER_SHA1_SUPPORTED
|
||||
if (algo != SHA1_ALGO_SW)
|
||||
{
|
||||
if (algo == SHA1_ALGO_DEFAULT)
|
||||
func = g_FUNC_UPDATE_BLOCKS;
|
||||
func = g_SHA1_FUNC_UPDATE_BLOCKS;
|
||||
else
|
||||
{
|
||||
if (algo != SHA1_ALGO_HW)
|
||||
return False;
|
||||
func = g_FUNC_UPDATE_BLOCKS_HW;
|
||||
func = g_SHA1_FUNC_UPDATE_BLOCKS_HW;
|
||||
if (!func)
|
||||
return False;
|
||||
}
|
||||
|
|
@ -85,27 +81,28 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
return False;
|
||||
#endif
|
||||
|
||||
p->func_UpdateBlocks = func;
|
||||
p->v.vars.func_UpdateBlocks = func;
|
||||
return True;
|
||||
}
|
||||
|
||||
|
||||
/* define it for speed optimization */
|
||||
// #define _SHA1_UNROLL
|
||||
// #define Z7_SHA1_UNROLL
|
||||
|
||||
// allowed unroll steps: (1, 2, 4, 5, 20)
|
||||
|
||||
#ifdef _SHA1_UNROLL
|
||||
#undef Z7_SHA1_BIG_W
|
||||
#ifdef Z7_SHA1_UNROLL
|
||||
#define STEP_PRE 20
|
||||
#define STEP_MAIN 20
|
||||
#else
|
||||
#define _SHA1_BIG_W
|
||||
#define Z7_SHA1_BIG_W
|
||||
#define STEP_PRE 5
|
||||
#define STEP_MAIN 5
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef _SHA1_BIG_W
|
||||
#ifdef Z7_SHA1_BIG_W
|
||||
#define kNumW 80
|
||||
#define w(i) W[i]
|
||||
#else
|
||||
|
|
@ -150,11 +147,11 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
*/
|
||||
|
||||
#define M5(i, fx, wx0, wx1) \
|
||||
T5 ( a,b,c,d,e, fx, wx0((i) ) ); \
|
||||
T5 ( e,a,b,c,d, fx, wx1((i)+1) ); \
|
||||
T5 ( d,e,a,b,c, fx, wx1((i)+2) ); \
|
||||
T5 ( c,d,e,a,b, fx, wx1((i)+3) ); \
|
||||
T5 ( b,c,d,e,a, fx, wx1((i)+4) ); \
|
||||
T5 ( a,b,c,d,e, fx, wx0((i) ) ) \
|
||||
T5 ( e,a,b,c,d, fx, wx1((i)+1) ) \
|
||||
T5 ( d,e,a,b,c, fx, wx1((i)+2) ) \
|
||||
T5 ( c,d,e,a,b, fx, wx1((i)+3) ) \
|
||||
T5 ( b,c,d,e,a, fx, wx1((i)+4) ) \
|
||||
|
||||
#define R5(i, fx, wx) \
|
||||
M5 ( i, fx, wx, wx) \
|
||||
|
|
@ -163,17 +160,17 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
#if STEP_PRE > 5
|
||||
|
||||
#define R20_START \
|
||||
R5 ( 0, f0, w0); \
|
||||
R5 ( 5, f0, w0); \
|
||||
R5 ( 10, f0, w0); \
|
||||
M5 ( 15, f0, w0, w1); \
|
||||
R5 ( 0, f0, w0) \
|
||||
R5 ( 5, f0, w0) \
|
||||
R5 ( 10, f0, w0) \
|
||||
M5 ( 15, f0, w0, w1) \
|
||||
|
||||
#elif STEP_PRE == 5
|
||||
|
||||
#define R20_START \
|
||||
{ size_t i; for (i = 0; i < 15; i += STEP_PRE) \
|
||||
{ R5(i, f0, w0); } } \
|
||||
M5 ( 15, f0, w0, w1); \
|
||||
{ R5(i, f0, w0) } } \
|
||||
M5 ( 15, f0, w0, w1) \
|
||||
|
||||
#else
|
||||
|
||||
|
|
@ -187,8 +184,8 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
|
||||
#define R20_START \
|
||||
{ size_t i; for (i = 0; i < 16; i += STEP_PRE) \
|
||||
{ R_PRE(i, f0, w0); } } \
|
||||
R4 ( 16, f0, w1); \
|
||||
{ R_PRE(i, f0, w0) } } \
|
||||
R4 ( 16, f0, w1) \
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -197,10 +194,10 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
#if STEP_MAIN > 5
|
||||
|
||||
#define R20(ii, fx) \
|
||||
R5 ( (ii) , fx, w1); \
|
||||
R5 ( (ii) + 5 , fx, w1); \
|
||||
R5 ( (ii) + 10, fx, w1); \
|
||||
R5 ( (ii) + 15, fx, w1); \
|
||||
R5 ( (ii) , fx, w1) \
|
||||
R5 ( (ii) + 5 , fx, w1) \
|
||||
R5 ( (ii) + 10, fx, w1) \
|
||||
R5 ( (ii) + 15, fx, w1) \
|
||||
|
||||
#else
|
||||
|
||||
|
|
@ -216,7 +213,7 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
|
||||
#define R20(ii, fx) \
|
||||
{ size_t i; for (i = (ii); i < (ii) + 20; i += STEP_MAIN) \
|
||||
{ R_MAIN(i, fx, w1); } } \
|
||||
{ R_MAIN(i, fx, w1) } } \
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -224,7 +221,7 @@ BoolInt Sha1_SetFunction(CSha1 *p, unsigned algo)
|
|||
|
||||
void Sha1_InitState(CSha1 *p)
|
||||
{
|
||||
p->count = 0;
|
||||
p->v.vars.count = 0;
|
||||
p->state[0] = 0x67452301;
|
||||
p->state[1] = 0xEFCDAB89;
|
||||
p->state[2] = 0x98BADCFE;
|
||||
|
|
@ -234,9 +231,9 @@ void Sha1_InitState(CSha1 *p)
|
|||
|
||||
void Sha1_Init(CSha1 *p)
|
||||
{
|
||||
p->func_UpdateBlocks =
|
||||
#ifdef _SHA_SUPPORTED
|
||||
g_FUNC_UPDATE_BLOCKS;
|
||||
p->v.vars.func_UpdateBlocks =
|
||||
#ifdef Z7_COMPILER_SHA1_SUPPORTED
|
||||
g_SHA1_FUNC_UPDATE_BLOCKS;
|
||||
#else
|
||||
NULL;
|
||||
#endif
|
||||
|
|
@ -244,12 +241,12 @@ void Sha1_Init(CSha1 *p)
|
|||
}
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
|
||||
Z7_NO_INLINE
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
UInt32 a, b, c, d, e;
|
||||
UInt32 W[kNumW];
|
||||
// if (numBlocks != 0x1264378347) return;
|
||||
|
||||
if (numBlocks == 0)
|
||||
return;
|
||||
|
||||
|
|
@ -266,9 +263,9 @@ void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t nu
|
|||
#endif
|
||||
|
||||
R20_START
|
||||
R20(20, f1);
|
||||
R20(40, f2);
|
||||
R20(60, f3);
|
||||
R20(20, f1)
|
||||
R20(40, f2)
|
||||
R20(60, f3)
|
||||
|
||||
a += state[0];
|
||||
b += state[1];
|
||||
|
|
@ -282,32 +279,27 @@ void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t nu
|
|||
state[3] = d;
|
||||
state[4] = e;
|
||||
|
||||
data += 64;
|
||||
data += SHA1_BLOCK_SIZE;
|
||||
}
|
||||
while (--numBlocks);
|
||||
}
|
||||
|
||||
|
||||
#define Sha1_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
|
||||
#define Sha1_UpdateBlock(p) SHA1_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
|
||||
|
||||
void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
{
|
||||
unsigned pos = (unsigned)p->count & 0x3F;
|
||||
unsigned num;
|
||||
|
||||
p->count += size;
|
||||
|
||||
num = 64 - pos;
|
||||
const unsigned pos = (unsigned)p->v.vars.count & (SHA1_BLOCK_SIZE - 1);
|
||||
const unsigned num = SHA1_BLOCK_SIZE - pos;
|
||||
p->v.vars.count += size;
|
||||
if (num > size)
|
||||
{
|
||||
memcpy(p->buffer + pos, data, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pos != 0)
|
||||
{
|
||||
size -= num;
|
||||
|
|
@ -317,9 +309,10 @@ void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
|
|||
}
|
||||
}
|
||||
{
|
||||
size_t numBlocks = size >> 6;
|
||||
UPDATE_BLOCKS(p)(p->state, data, numBlocks);
|
||||
size &= 0x3F;
|
||||
const size_t numBlocks = size >> 6;
|
||||
// if (numBlocks)
|
||||
SHA1_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
|
||||
size &= SHA1_BLOCK_SIZE - 1;
|
||||
if (size == 0)
|
||||
return;
|
||||
data += (numBlocks << 6);
|
||||
|
|
@ -330,64 +323,40 @@ void Sha1_Update(CSha1 *p, const Byte *data, size_t size)
|
|||
|
||||
void Sha1_Final(CSha1 *p, Byte *digest)
|
||||
{
|
||||
unsigned pos = (unsigned)p->count & 0x3F;
|
||||
|
||||
|
||||
unsigned pos = (unsigned)p->v.vars.count & (SHA1_BLOCK_SIZE - 1);
|
||||
p->buffer[pos++] = 0x80;
|
||||
|
||||
if (pos > (64 - 8))
|
||||
if (pos > (SHA1_BLOCK_SIZE - 4 * 2))
|
||||
{
|
||||
while (pos != 64) { p->buffer[pos++] = 0; }
|
||||
// memset(&p->buf.buffer[pos], 0, 64 - pos);
|
||||
while (pos != SHA1_BLOCK_SIZE) { p->buffer[pos++] = 0; }
|
||||
// memset(&p->buf.buffer[pos], 0, SHA1_BLOCK_SIZE - pos);
|
||||
Sha1_UpdateBlock(p);
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
if (pos & 3)
|
||||
memset(&p->buffer[pos], 0, (SHA1_BLOCK_SIZE - 4 * 2) - pos);
|
||||
{
|
||||
p->buffer[pos] = 0;
|
||||
p->buffer[pos + 1] = 0;
|
||||
p->buffer[pos + 2] = 0;
|
||||
pos += 3;
|
||||
pos &= ~3;
|
||||
const UInt64 numBits = p->v.vars.count << 3;
|
||||
SetBe32(p->buffer + SHA1_BLOCK_SIZE - 4 * 2, (UInt32)(numBits >> 32))
|
||||
SetBe32(p->buffer + SHA1_BLOCK_SIZE - 4 * 1, (UInt32)(numBits))
|
||||
}
|
||||
{
|
||||
for (; pos < 64 - 8; pos += 4)
|
||||
*(UInt32 *)(&p->buffer[pos]) = 0;
|
||||
}
|
||||
*/
|
||||
|
||||
memset(&p->buffer[pos], 0, (64 - 8) - pos);
|
||||
|
||||
{
|
||||
UInt64 numBits = (p->count << 3);
|
||||
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
|
||||
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
|
||||
}
|
||||
|
||||
Sha1_UpdateBlock(p);
|
||||
|
||||
SetBe32(digest, p->state[0]);
|
||||
SetBe32(digest + 4, p->state[1]);
|
||||
SetBe32(digest + 8, p->state[2]);
|
||||
SetBe32(digest + 12, p->state[3]);
|
||||
SetBe32(digest + 16, p->state[4]);
|
||||
SetBe32(digest, p->state[0])
|
||||
SetBe32(digest + 4, p->state[1])
|
||||
SetBe32(digest + 8, p->state[2])
|
||||
SetBe32(digest + 12, p->state[3])
|
||||
SetBe32(digest + 16, p->state[4])
|
||||
|
||||
|
||||
|
||||
|
||||
Sha1_InitState(p);
|
||||
}
|
||||
|
||||
|
||||
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size)
|
||||
{
|
||||
const UInt64 numBits = (p->count + size) << 3;
|
||||
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32));
|
||||
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits));
|
||||
const UInt64 numBits = (p->v.vars.count + size) << 3;
|
||||
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 2], (UInt32)(numBits >> 32))
|
||||
SetBe32(&((UInt32 *)(void *)block)[SHA1_NUM_BLOCK_WORDS - 1], (UInt32)(numBits))
|
||||
// SetBe32((UInt32 *)(block + size), 0x80000000);
|
||||
SetUi32((UInt32 *)(void *)(block + size), 0x80);
|
||||
SetUi32((UInt32 *)(void *)(block + size), 0x80)
|
||||
size += 4;
|
||||
while (size != (SHA1_NUM_BLOCK_WORDS - 2) * 4)
|
||||
{
|
||||
|
|
@ -407,67 +376,66 @@ void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest)
|
|||
st[3] = p->state[3];
|
||||
st[4] = p->state[4];
|
||||
|
||||
UPDATE_BLOCKS(p)(st, data, 1);
|
||||
SHA1_UPDATE_BLOCKS(p)(st, data, 1);
|
||||
|
||||
SetBe32(destDigest + 0 , st[0]);
|
||||
SetBe32(destDigest + 1 * 4, st[1]);
|
||||
SetBe32(destDigest + 2 * 4, st[2]);
|
||||
SetBe32(destDigest + 3 * 4, st[3]);
|
||||
SetBe32(destDigest + 4 * 4, st[4]);
|
||||
SetBe32(destDigest + 0 , st[0])
|
||||
SetBe32(destDigest + 1 * 4, st[1])
|
||||
SetBe32(destDigest + 2 * 4, st[2])
|
||||
SetBe32(destDigest + 3 * 4, st[3])
|
||||
SetBe32(destDigest + 4 * 4, st[4])
|
||||
}
|
||||
|
||||
|
||||
void Sha1Prepare()
|
||||
void Sha1Prepare(void)
|
||||
{
|
||||
#ifdef _SHA_SUPPORTED
|
||||
#ifdef Z7_COMPILER_SHA1_SUPPORTED
|
||||
SHA1_FUNC_UPDATE_BLOCKS f, f_hw;
|
||||
f = Sha1_UpdateBlocks;
|
||||
f_hw = NULL;
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#ifndef USE_MY_MM
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
if (CPU_IsSupported_SHA()
|
||||
&& CPU_IsSupported_SSSE3()
|
||||
// && CPU_IsSupported_SSE41()
|
||||
)
|
||||
#endif
|
||||
#else
|
||||
#else
|
||||
if (CPU_IsSupported_SHA1())
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
// printf("\n========== HW SHA1 ======== \n");
|
||||
#if defined(MY_CPU_ARM_OR_ARM64) && defined(_MSC_VER)
|
||||
#if 1 && defined(MY_CPU_ARM_OR_ARM64) && defined(Z7_MSC_VER_ORIGINAL) && (_MSC_FULL_VER < 192930037)
|
||||
/* there was bug in MSVC compiler for ARM64 -O2 before version VS2019 16.10 (19.29.30037).
|
||||
It generated incorrect SHA-1 code.
|
||||
21.03 : we test sha1-hardware code at runtime initialization */
|
||||
|
||||
#pragma message("== SHA1 code: MSC compiler : failure-check code was inserted")
|
||||
|
||||
UInt32 state[5] = { 0, 1, 2, 3, 4 } ;
|
||||
Byte data[64];
|
||||
unsigned i;
|
||||
for (i = 0; i < sizeof(data); i += 2)
|
||||
{
|
||||
data[i ] = (Byte)(i);
|
||||
data[i + 1] = (Byte)(i + 1);
|
||||
}
|
||||
|
||||
Sha1_UpdateBlocks_HW(state, data, sizeof(data) / 64);
|
||||
|
||||
if ( state[0] != 0x9acd7297
|
||||
|| state[1] != 0x4624d898
|
||||
|| state[2] != 0x0bf079f0
|
||||
|| state[3] != 0x031e61b3
|
||||
|| state[4] != 0x8323fe20)
|
||||
{
|
||||
// printf("\n========== SHA-1 hardware version failure ======== \n");
|
||||
}
|
||||
else
|
||||
#endif
|
||||
It generated incorrect SHA-1 code. */
|
||||
#pragma message("== SHA1 code can work incorrectly with this compiler")
|
||||
#error Stop_Compiling_MSC_Compiler_BUG_SHA1
|
||||
#endif
|
||||
{
|
||||
f = f_hw = Sha1_UpdateBlocks_HW;
|
||||
}
|
||||
}
|
||||
g_FUNC_UPDATE_BLOCKS = f;
|
||||
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
|
||||
#endif
|
||||
g_SHA1_FUNC_UPDATE_BLOCKS = f;
|
||||
g_SHA1_FUNC_UPDATE_BLOCKS_HW = f_hw;
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef kNumW
|
||||
#undef w
|
||||
#undef w0
|
||||
#undef w1
|
||||
#undef f0
|
||||
#undef f1
|
||||
#undef f2
|
||||
#undef f3
|
||||
#undef T1
|
||||
#undef T5
|
||||
#undef M5
|
||||
#undef R1
|
||||
#undef R2
|
||||
#undef R4
|
||||
#undef R5
|
||||
#undef R20_START
|
||||
#undef R_PRE
|
||||
#undef R_MAIN
|
||||
#undef STEP_PRE
|
||||
#undef STEP_MAIN
|
||||
#undef Z7_SHA1_BIG_W
|
||||
#undef Z7_SHA1_UNROLL
|
||||
#undef Z7_COMPILER_SHA1_SUPPORTED
|
||||
|
|
|
|||
28
C/Sha1.h
28
C/Sha1.h
|
|
@ -1,8 +1,8 @@
|
|||
/* Sha1.h -- SHA-1 Hash
|
||||
2021-02-08 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_SHA1_H
|
||||
#define __7Z_SHA1_H
|
||||
#ifndef ZIP7_INC_SHA1_H
|
||||
#define ZIP7_INC_SHA1_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -14,7 +14,10 @@ EXTERN_C_BEGIN
|
|||
#define SHA1_BLOCK_SIZE (SHA1_NUM_BLOCK_WORDS * 4)
|
||||
#define SHA1_DIGEST_SIZE (SHA1_NUM_DIGEST_WORDS * 4)
|
||||
|
||||
typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
|
||||
|
||||
|
||||
typedef void (Z7_FASTCALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
|
||||
/*
|
||||
if (the system supports different SHA1 code implementations)
|
||||
|
|
@ -32,11 +35,18 @@ typedef void (MY_FAST_CALL *SHA1_FUNC_UPDATE_BLOCKS)(UInt32 state[5], const Byte
|
|||
|
||||
typedef struct
|
||||
{
|
||||
SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
|
||||
UInt64 count;
|
||||
UInt64 __pad_2[2];
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
SHA1_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
|
||||
UInt64 count;
|
||||
} vars;
|
||||
UInt64 _pad_64bit[4];
|
||||
void *_pad_align_ptr[2];
|
||||
} v;
|
||||
UInt32 state[SHA1_NUM_DIGEST_WORDS];
|
||||
UInt32 __pad_3[3];
|
||||
UInt32 _pad_3[3];
|
||||
Byte buffer[SHA1_BLOCK_SIZE];
|
||||
} CSha1;
|
||||
|
||||
|
|
@ -62,7 +72,7 @@ void Sha1_Final(CSha1 *p, Byte *digest);
|
|||
void Sha1_PrepareBlock(const CSha1 *p, Byte *block, unsigned size);
|
||||
void Sha1_GetBlockDigest(const CSha1 *p, const Byte *data, Byte *destDigest);
|
||||
|
||||
// void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
// void Z7_FASTCALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
|
||||
/*
|
||||
call Sha1Prepare() once at program start.
|
||||
|
|
|
|||
381
C/Sha1Opt.c
381
C/Sha1Opt.c
|
|
@ -1,71 +1,53 @@
|
|||
/* Sha1Opt.c -- SHA-1 optimized code for SHA-1 hardware instructions
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
|
||||
// #define USE_MY_MM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "Compiler.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
// #define Z7_USE_HW_SHA_STUB // for debug
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#ifndef __SHA__
|
||||
#elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
|
||||
#define USE_HW_SHA
|
||||
#if !defined(__INTEL_COMPILER)
|
||||
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
|
||||
#if !defined(__SHA__) || !defined(__SSSE3__)
|
||||
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
|
||||
#if defined(_MSC_VER)
|
||||
// SSSE3: for clang-cl:
|
||||
#include <tmmintrin.h>
|
||||
#define __SHA__
|
||||
#endif
|
||||
#endif
|
||||
#pragma clang diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 8) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#ifndef __SHA__
|
||||
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
|
||||
// #pragma GCC target("sha,ssse3")
|
||||
#endif
|
||||
#endif
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#if (__INTEL_COMPILER >= 1800) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#ifdef USE_MY_MM
|
||||
#define USE_VER_MIN 1300
|
||||
#else
|
||||
#define USE_VER_MIN 1910
|
||||
#endif
|
||||
#if _MSC_VER >= USE_VER_MIN
|
||||
#if (_MSC_VER >= 1900)
|
||||
#define USE_HW_SHA
|
||||
#else
|
||||
#define Z7_USE_HW_SHA_STUB
|
||||
#endif
|
||||
#endif
|
||||
// #endif // MY_CPU_X86_OR_AMD64
|
||||
#ifndef USE_HW_SHA
|
||||
// #define Z7_USE_HW_SHA_STUB // for debug
|
||||
#endif
|
||||
|
||||
#ifdef USE_HW_SHA
|
||||
|
||||
// #pragma message("Sha1 HW")
|
||||
// #include <wmmintrin.h>
|
||||
|
||||
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
|
||||
|
||||
|
||||
|
||||
// sse/sse2/ssse3:
|
||||
#include <tmmintrin.h>
|
||||
// sha*:
|
||||
#include <immintrin.h>
|
||||
|
||||
#if defined (__clang__) && defined(_MSC_VER)
|
||||
#if !defined(__SHA__)
|
||||
#include <shaintrin.h>
|
||||
#endif
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
|
||||
// #include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef USE_MY_MM
|
||||
#include "My_mm.h"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -87,86 +69,71 @@ SHA:
|
|||
_mm_sha1*
|
||||
*/
|
||||
|
||||
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
|
||||
#define XOR_SI128(dest, src) dest = _mm_xor_si128(dest, src);
|
||||
#define SHUFFLE_EPI8(dest, mask) dest = _mm_shuffle_epi8(dest, mask);
|
||||
#define SHUFFLE_EPI32(dest, mask) dest = _mm_shuffle_epi32(dest, mask);
|
||||
|
||||
#define SHA1_RND4(abcd, e0, f) abcd = _mm_sha1rnds4_epu32(abcd, e0, f);
|
||||
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
|
||||
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
|
||||
|
||||
#ifdef __clang__
|
||||
#define SHA1_RNDS4_RET_TYPE_CAST (__m128i)
|
||||
#else
|
||||
#define SHA1_RNDS4_RET_TYPE_CAST
|
||||
#endif
|
||||
#define SHA1_RND4(abcd, e0, f) abcd = SHA1_RNDS4_RET_TYPE_CAST _mm_sha1rnds4_epu32(abcd, e0, f);
|
||||
#define SHA1_NEXTE(e, m) e = _mm_sha1nexte_epu32(e, m);
|
||||
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
|
||||
#define SHA1_MSG1(dest, src) dest = _mm_sha1msg1_epu32(dest, src);
|
||||
#define SHA1_MSG2(dest, src) dest = _mm_sha1msg2_epu32(dest, src);
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
|
||||
SHUFFLE_EPI8(m, mask); \
|
||||
|
||||
#define SM1(m0, m1, m2, m3) \
|
||||
SHA1_MSG1(m0, m1); \
|
||||
|
||||
#define SM2(m0, m1, m2, m3) \
|
||||
XOR_SI128(m3, m1); \
|
||||
SHA1_MSG2(m3, m2); \
|
||||
|
||||
#define SM3(m0, m1, m2, m3) \
|
||||
XOR_SI128(m3, m1); \
|
||||
SM1(m0, m1, m2, m3) \
|
||||
SHA1_MSG2(m3, m2); \
|
||||
SHUFFLE_EPI8(m, mask) \
|
||||
|
||||
#define NNN(m0, m1, m2, m3)
|
||||
|
||||
#define SM1(m0, m1, m2, m3) \
|
||||
SHA1_MSG1(m0, m1) \
|
||||
|
||||
#define SM2(m0, m1, m2, m3) \
|
||||
XOR_SI128(m3, m1) \
|
||||
SHA1_MSG2(m3, m2) \
|
||||
|
||||
#define SM3(m0, m1, m2, m3) \
|
||||
XOR_SI128(m3, m1) \
|
||||
SM1(m0, m1, m2, m3) \
|
||||
SHA1_MSG2(m3, m2) \
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define R4(k, e0, e1, m0, m1, m2, m3, OP) \
|
||||
#define R4(k, m0, m1, m2, m3, e0, e1, OP) \
|
||||
e1 = abcd; \
|
||||
SHA1_RND4(abcd, e0, (k) / 5); \
|
||||
SHA1_NEXTE(e1, m1); \
|
||||
OP(m0, m1, m2, m3); \
|
||||
SHA1_RND4(abcd, e0, (k) / 5) \
|
||||
SHA1_NEXTE(e1, m1) \
|
||||
OP(m0, m1, m2, m3) \
|
||||
|
||||
|
||||
|
||||
#define R16(k, mx, OP0, OP1, OP2, OP3) \
|
||||
R4 ( (k)*4+0, e0,e1, m0,m1,m2,m3, OP0 ) \
|
||||
R4 ( (k)*4+1, e1,e0, m1,m2,m3,m0, OP1 ) \
|
||||
R4 ( (k)*4+2, e0,e1, m2,m3,m0,m1, OP2 ) \
|
||||
R4 ( (k)*4+3, e1,e0, m3,mx,m1,m2, OP3 ) \
|
||||
R4 ( (k)*4+0, m0,m1,m2,m3, e0,e1, OP0 ) \
|
||||
R4 ( (k)*4+1, m1,m2,m3,m0, e1,e0, OP1 ) \
|
||||
R4 ( (k)*4+2, m2,m3,m0,m1, e0,e1, OP2 ) \
|
||||
R4 ( (k)*4+3, m3,mx,m1,m2, e1,e0, OP3 ) \
|
||||
|
||||
#define PREPARE_STATE \
|
||||
SHUFFLE_EPI32 (abcd, 0x1B); \
|
||||
SHUFFLE_EPI32 (e0, 0x1B); \
|
||||
SHUFFLE_EPI32 (abcd, 0x1B) \
|
||||
SHUFFLE_EPI32 (e0, 0x1B) \
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA
|
||||
ATTRIB_SHA
|
||||
#endif
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
const __m128i mask = _mm_set_epi32(0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f);
|
||||
|
||||
__m128i abcd, e0;
|
||||
|
||||
__m128i abcd, e0;
|
||||
|
||||
if (numBlocks == 0)
|
||||
return;
|
||||
|
||||
|
|
@ -190,15 +157,15 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
|
|||
LOAD_SHUFFLE (m2, 2)
|
||||
LOAD_SHUFFLE (m3, 3)
|
||||
|
||||
ADD_EPI32(e0, m0);
|
||||
ADD_EPI32(e0, m0)
|
||||
|
||||
R16 ( 0, m0, SM1, SM3, SM3, SM3 );
|
||||
R16 ( 1, m0, SM3, SM3, SM3, SM3 );
|
||||
R16 ( 2, m0, SM3, SM3, SM3, SM3 );
|
||||
R16 ( 3, m0, SM3, SM3, SM3, SM3 );
|
||||
R16 ( 4, e2, SM2, NNN, NNN, NNN );
|
||||
R16 ( 0, m0, SM1, SM3, SM3, SM3 )
|
||||
R16 ( 1, m0, SM3, SM3, SM3, SM3 )
|
||||
R16 ( 2, m0, SM3, SM3, SM3, SM3 )
|
||||
R16 ( 3, m0, SM3, SM3, SM3, SM3 )
|
||||
R16 ( 4, e2, SM2, NNN, NNN, NNN )
|
||||
|
||||
ADD_EPI32(abcd, abcd_save);
|
||||
ADD_EPI32(abcd, abcd_save)
|
||||
|
||||
data += 64;
|
||||
}
|
||||
|
|
@ -207,78 +174,155 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
|
|||
PREPARE_STATE
|
||||
|
||||
_mm_storeu_si128((__m128i *) (void *) state, abcd);
|
||||
*(state+4) = (UInt32)_mm_cvtsi128_si32(e0);
|
||||
*(state + 4) = (UInt32)_mm_cvtsi128_si32(e0);
|
||||
}
|
||||
|
||||
#endif // USE_HW_SHA
|
||||
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
|
||||
#if defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) \
|
||||
&& (!defined(Z7_MSC_VER_ORIGINAL) || (_MSC_VER >= 1929) && (_MSC_FULL_VER >= 192930037))
|
||||
#if defined(__ARM_FEATURE_SHA2) \
|
||||
|| defined(__ARM_FEATURE_CRYPTO)
|
||||
#define USE_HW_SHA
|
||||
#else
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL)
|
||||
#if defined(__ARM_FP) && \
|
||||
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 6) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| !defined(Z7_CLANG_VERSION) \
|
||||
|| defined(__ARM_NEON) && \
|
||||
(Z7_CLANG_VERSION < 170000 || \
|
||||
Z7_CLANG_VERSION > 170001)
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 6) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if _MSC_VER >= 1910
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_HW_SHA
|
||||
|
||||
// #pragma message("=== Sha1 HW === ")
|
||||
// __ARM_FEATURE_CRYPTO macro is deprecated in favor of the finer grained feature macro __ARM_FEATURE_SHA2
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if !defined(__ARM_FEATURE_SHA2) && \
|
||||
!defined(__ARM_FEATURE_CRYPTO)
|
||||
#ifdef MY_CPU_ARM64
|
||||
#if defined(__clang__)
|
||||
#define ATTRIB_SHA __attribute__((__target__("crypto")))
|
||||
#else
|
||||
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
|
||||
#endif
|
||||
#else
|
||||
#if defined(__clang__) && (__clang_major__ >= 1)
|
||||
#define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
|
||||
#else
|
||||
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
// _MSC_VER
|
||||
// for arm32
|
||||
#define _ARM_USE_NEW_NEON_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
|
||||
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(__clang__) && __clang_major__ < 16
|
||||
#if !defined(__ARM_FEATURE_SHA2) && \
|
||||
!defined(__ARM_FEATURE_CRYPTO)
|
||||
// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
|
||||
// #if defined(__clang__) && __clang_major__ < 13
|
||||
#define __ARM_FEATURE_CRYPTO 1
|
||||
// #else
|
||||
#define __ARM_FEATURE_SHA2 1
|
||||
// #endif
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
#endif // clang
|
||||
|
||||
#if defined(__clang__)
|
||||
|
||||
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
// #pragma message("#define __ARM_ARCH 8")
|
||||
#undef __ARM_ARCH
|
||||
#define __ARM_ARCH 8
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
|
||||
#endif // clang
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
|
||||
defined(__ARM_FEATURE_CRYPTO) && \
|
||||
defined(__ARM_FEATURE_SHA2)
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#undef __ARM_FEATURE_CRYPTO
|
||||
#undef __ARM_FEATURE_SHA2
|
||||
#undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
|
||||
#endif
|
||||
|
||||
#endif // Z7_MSC_VER_ORIGINAL
|
||||
|
||||
typedef uint32x4_t v128;
|
||||
// typedef __n128 v128; // MSVC
|
||||
|
||||
#ifdef MY_CPU_BE
|
||||
#define MY_rev32_for_LE(x)
|
||||
#else
|
||||
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
|
||||
// the bug in clang 3.8.1:
|
||||
// __builtin_neon_vgetq_lane_i32((int8x16_t)__s0, __p1);
|
||||
#if defined(__clang__) && (__clang_major__ <= 9)
|
||||
#pragma GCC diagnostic ignored "-Wvector-conversion"
|
||||
#endif
|
||||
|
||||
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
|
||||
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
|
||||
#ifdef MY_CPU_BE
|
||||
#define MY_rev32_for_LE(x) x
|
||||
#else
|
||||
#define MY_rev32_for_LE(x) vrev32q_u8(x)
|
||||
#endif
|
||||
|
||||
#define LOAD_128_32(_p) vld1q_u32(_p)
|
||||
#define LOAD_128_8(_p) vld1q_u8 (_p)
|
||||
#define STORE_128_32(_p, _v) vst1q_u32(_p, _v)
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
m = LOAD_128((data + (k) * 16)); \
|
||||
MY_rev32_for_LE(m); \
|
||||
m = vreinterpretq_u32_u8( \
|
||||
MY_rev32_for_LE( \
|
||||
LOAD_128_8(data + (k) * 16))); \
|
||||
|
||||
#define SU0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
|
||||
#define SU1(dest, src) dest = vsha1su1q_u32(dest, src);
|
||||
#define C(e) abcd = vsha1cq_u32(abcd, e, t);
|
||||
#define P(e) abcd = vsha1pq_u32(abcd, e, t);
|
||||
#define M(e) abcd = vsha1mq_u32(abcd, e, t);
|
||||
#define N0(dest, src2, src3)
|
||||
#define N1(dest, src)
|
||||
#define U0(dest, src2, src3) dest = vsha1su0q_u32(dest, src2, src3);
|
||||
#define U1(dest, src) dest = vsha1su1q_u32(dest, src);
|
||||
#define C(e) abcd = vsha1cq_u32(abcd, e, t)
|
||||
#define P(e) abcd = vsha1pq_u32(abcd, e, t)
|
||||
#define M(e) abcd = vsha1mq_u32(abcd, e, t)
|
||||
#define H(e) e = vsha1h_u32(vgetq_lane_u32(abcd, 0))
|
||||
#define T(m, c) t = vaddq_u32(m, c)
|
||||
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#define R16(d0,d1,d2,d3, f0,z0, f1,z1, f2,z2, f3,z3, w0,w1,w2,w3) \
|
||||
T(m0, d0); f0(m3, m0, m1) z0(m2, m1) H(e1); w0(e0); \
|
||||
T(m1, d1); f1(m0, m1, m2) z1(m3, m2) H(e0); w1(e1); \
|
||||
T(m2, d2); f2(m1, m2, m3) z2(m0, m3) H(e1); w2(e0); \
|
||||
T(m3, d3); f3(m2, m3, m0) z3(m1, m0) H(e0); w3(e1); \
|
||||
|
||||
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA
|
||||
ATTRIB_SHA
|
||||
#endif
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
v128 abcd;
|
||||
v128 c0, c1, c2, c3;
|
||||
|
|
@ -292,7 +336,7 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
|
|||
c2 = vdupq_n_u32(0x8f1bbcdc);
|
||||
c3 = vdupq_n_u32(0xca62c1d6);
|
||||
|
||||
abcd = LOAD_128(&state[0]);
|
||||
abcd = LOAD_128_32(&state[0]);
|
||||
e0 = state[4];
|
||||
|
||||
do
|
||||
|
|
@ -310,26 +354,11 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
|
|||
LOAD_SHUFFLE (m2, 2)
|
||||
LOAD_SHUFFLE (m3, 3)
|
||||
|
||||
T(m0, c0); H(e1); C(e0);
|
||||
T(m1, c0); SU0(m0, m1, m2); H(e0); C(e1);
|
||||
T(m2, c0); SU0(m1, m2, m3); SU1(m0, m3); H(e1); C(e0);
|
||||
T(m3, c0); SU0(m2, m3, m0); SU1(m1, m0); H(e0); C(e1);
|
||||
T(m0, c0); SU0(m3, m0, m1); SU1(m2, m1); H(e1); C(e0);
|
||||
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
|
||||
T(m2, c1); SU0(m1, m2, m3); SU1(m0, m3); H(e1); P(e0);
|
||||
T(m3, c1); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
|
||||
T(m0, c1); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
|
||||
T(m1, c1); SU0(m0, m1, m2); SU1(m3, m2); H(e0); P(e1);
|
||||
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
|
||||
T(m3, c2); SU0(m2, m3, m0); SU1(m1, m0); H(e0); M(e1);
|
||||
T(m0, c2); SU0(m3, m0, m1); SU1(m2, m1); H(e1); M(e0);
|
||||
T(m1, c2); SU0(m0, m1, m2); SU1(m3, m2); H(e0); M(e1);
|
||||
T(m2, c2); SU0(m1, m2, m3); SU1(m0, m3); H(e1); M(e0);
|
||||
T(m3, c3); SU0(m2, m3, m0); SU1(m1, m0); H(e0); P(e1);
|
||||
T(m0, c3); SU0(m3, m0, m1); SU1(m2, m1); H(e1); P(e0);
|
||||
T(m1, c3); SU1(m3, m2); H(e0); P(e1);
|
||||
T(m2, c3); H(e1); P(e0);
|
||||
T(m3, c3); H(e0); P(e1);
|
||||
R16 ( c0,c0,c0,c0, N0,N1, U0,N1, U0,U1, U0,U1, C,C,C,C )
|
||||
R16 ( c0,c1,c1,c1, U0,U1, U0,U1, U0,U1, U0,U1, C,P,P,P )
|
||||
R16 ( c1,c1,c2,c2, U0,U1, U0,U1, U0,U1, U0,U1, P,P,M,M )
|
||||
R16 ( c2,c2,c2,c3, U0,U1, U0,U1, U0,U1, U0,U1, M,M,M,P )
|
||||
R16 ( c3,c3,c3,c3, U0,U1, N0,U1, N0,N1, N0,N1, P,P,P,P )
|
||||
|
||||
abcd = vaddq_u32(abcd, abcd_save);
|
||||
e0 += e0_save;
|
||||
|
|
@ -338,7 +367,7 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
|
|||
}
|
||||
while (--numBlocks);
|
||||
|
||||
STORE_128(&state[0], abcd);
|
||||
STORE_128_32(&state[0], abcd);
|
||||
state[4] = e0;
|
||||
}
|
||||
|
||||
|
|
@ -346,19 +375,16 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t
|
|||
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
|
||||
#ifndef USE_HW_SHA
|
||||
|
||||
#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
|
||||
// #error Stop_Compiling_UNSUPPORTED_SHA
|
||||
// #include <stdlib.h>
|
||||
|
||||
// #include "Sha1.h"
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
|
||||
// #if defined(_MSC_VER)
|
||||
#pragma message("Sha1 HW-SW stub was used")
|
||||
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
|
||||
// #endif
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks (UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
Sha1_UpdateBlocks(state, data, numBlocks);
|
||||
/*
|
||||
|
|
@ -369,5 +395,30 @@ void MY_FAST_CALL Sha1_UpdateBlocks_HW(UInt32 state[5], const Byte *data, size_t
|
|||
return;
|
||||
*/
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#undef U0
|
||||
#undef U1
|
||||
#undef N0
|
||||
#undef N1
|
||||
#undef C
|
||||
#undef P
|
||||
#undef M
|
||||
#undef H
|
||||
#undef T
|
||||
#undef MY_rev32_for_LE
|
||||
#undef NNN
|
||||
#undef LOAD_128
|
||||
#undef STORE_128
|
||||
#undef LOAD_SHUFFLE
|
||||
#undef SM1
|
||||
#undef SM2
|
||||
#undef SM3
|
||||
#undef NNN
|
||||
#undef R4
|
||||
#undef R16
|
||||
#undef PREPARE_STATE
|
||||
#undef USE_HW_SHA
|
||||
#undef ATTRIB_SHA
|
||||
#undef USE_VER_MIN
|
||||
#undef Z7_USE_HW_SHA_STUB
|
||||
|
|
|
|||
332
C/Sha256.c
332
C/Sha256.c
|
|
@ -1,64 +1,60 @@
|
|||
/* Sha256.c -- SHA-256 Hash
|
||||
2021-04-01 : Igor Pavlov : Public domain
|
||||
: Igor Pavlov : Public domain
|
||||
This code is based on public domain code from Wei Dai's Crypto++ library. */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "CpuArch.h"
|
||||
#include "RotateDefs.h"
|
||||
#include "Sha256.h"
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1900)
|
||||
// #define USE_MY_MM
|
||||
#endif
|
||||
#include "RotateDefs.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER >= 1200
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 8) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#if (__INTEL_COMPILER >= 1800) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) \
|
||||
|| defined(_MSC_VER) && (_MSC_VER >= 1200)
|
||||
#define Z7_COMPILER_SHA256_SUPPORTED
|
||||
#endif
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
#ifdef _MSC_VER
|
||||
#if _MSC_VER >= 1910
|
||||
#define _SHA_SUPPORTED
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
|
||||
|
||||
#if defined(__ARM_FEATURE_SHA2) \
|
||||
|| defined(__ARM_FEATURE_CRYPTO)
|
||||
#define Z7_COMPILER_SHA256_SUPPORTED
|
||||
#else
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL)
|
||||
#if defined(__ARM_FP) && \
|
||||
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 6) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| !defined(Z7_CLANG_VERSION) \
|
||||
|| defined(__ARM_NEON) && \
|
||||
(Z7_CLANG_VERSION < 170000 || \
|
||||
Z7_CLANG_VERSION > 170001)
|
||||
#define Z7_COMPILER_SHA256_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 6) // fix that check
|
||||
#define _SHA_SUPPORTED
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
#ifdef _SHA_SUPPORTED
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef Z7_COMPILER_SHA256_SUPPORTED
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
|
||||
static SHA256_FUNC_UPDATE_BLOCKS g_FUNC_UPDATE_BLOCKS_HW;
|
||||
static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS = Sha256_UpdateBlocks;
|
||||
static SHA256_FUNC_UPDATE_BLOCKS g_SHA256_FUNC_UPDATE_BLOCKS_HW;
|
||||
|
||||
#define UPDATE_BLOCKS(p) p->func_UpdateBlocks
|
||||
#define SHA256_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
|
||||
#else
|
||||
#define UPDATE_BLOCKS(p) Sha256_UpdateBlocks
|
||||
#define SHA256_UPDATE_BLOCKS(p) Sha256_UpdateBlocks
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -66,16 +62,16 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
|
|||
{
|
||||
SHA256_FUNC_UPDATE_BLOCKS func = Sha256_UpdateBlocks;
|
||||
|
||||
#ifdef _SHA_SUPPORTED
|
||||
#ifdef Z7_COMPILER_SHA256_SUPPORTED
|
||||
if (algo != SHA256_ALGO_SW)
|
||||
{
|
||||
if (algo == SHA256_ALGO_DEFAULT)
|
||||
func = g_FUNC_UPDATE_BLOCKS;
|
||||
func = g_SHA256_FUNC_UPDATE_BLOCKS;
|
||||
else
|
||||
{
|
||||
if (algo != SHA256_ALGO_HW)
|
||||
return False;
|
||||
func = g_FUNC_UPDATE_BLOCKS_HW;
|
||||
func = g_SHA256_FUNC_UPDATE_BLOCKS_HW;
|
||||
if (!func)
|
||||
return False;
|
||||
}
|
||||
|
|
@ -85,24 +81,25 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
|
|||
return False;
|
||||
#endif
|
||||
|
||||
p->func_UpdateBlocks = func;
|
||||
p->v.vars.func_UpdateBlocks = func;
|
||||
return True;
|
||||
}
|
||||
|
||||
|
||||
/* define it for speed optimization */
|
||||
|
||||
#ifdef _SFX
|
||||
#ifdef Z7_SFX
|
||||
#define STEP_PRE 1
|
||||
#define STEP_MAIN 1
|
||||
#else
|
||||
#define STEP_PRE 2
|
||||
#define STEP_MAIN 4
|
||||
// #define _SHA256_UNROLL
|
||||
// #define Z7_SHA256_UNROLL
|
||||
#endif
|
||||
|
||||
#undef Z7_SHA256_BIG_W
|
||||
#if STEP_MAIN != 16
|
||||
#define _SHA256_BIG_W
|
||||
#define Z7_SHA256_BIG_W
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -110,7 +107,7 @@ BoolInt Sha256_SetFunction(CSha256 *p, unsigned algo)
|
|||
|
||||
void Sha256_InitState(CSha256 *p)
|
||||
{
|
||||
p->count = 0;
|
||||
p->v.vars.count = 0;
|
||||
p->state[0] = 0x6a09e667;
|
||||
p->state[1] = 0xbb67ae85;
|
||||
p->state[2] = 0x3c6ef372;
|
||||
|
|
@ -121,21 +118,28 @@ void Sha256_InitState(CSha256 *p)
|
|||
p->state[7] = 0x5be0cd19;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void Sha256_Init(CSha256 *p)
|
||||
{
|
||||
p->func_UpdateBlocks =
|
||||
#ifdef _SHA_SUPPORTED
|
||||
g_FUNC_UPDATE_BLOCKS;
|
||||
p->v.vars.func_UpdateBlocks =
|
||||
#ifdef Z7_COMPILER_SHA256_SUPPORTED
|
||||
g_SHA256_FUNC_UPDATE_BLOCKS;
|
||||
#else
|
||||
NULL;
|
||||
#endif
|
||||
Sha256_InitState(p);
|
||||
}
|
||||
|
||||
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x, 22))
|
||||
#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x, 25))
|
||||
#define S0(x) (rotrFixed(x, 2) ^ rotrFixed(x,13) ^ rotrFixed(x,22))
|
||||
#define S1(x) (rotrFixed(x, 6) ^ rotrFixed(x,11) ^ rotrFixed(x,25))
|
||||
#define s0(x) (rotrFixed(x, 7) ^ rotrFixed(x,18) ^ (x >> 3))
|
||||
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >> 10))
|
||||
#define s1(x) (rotrFixed(x,17) ^ rotrFixed(x,19) ^ (x >>10))
|
||||
|
||||
#define Ch(x,y,z) (z^(x&(y^z)))
|
||||
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
|
||||
|
|
@ -145,7 +149,7 @@ void Sha256_Init(CSha256 *p)
|
|||
|
||||
#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
|
||||
|
||||
#ifdef _SHA256_BIG_W
|
||||
#ifdef Z7_SHA256_BIG_W
|
||||
// we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
|
||||
#define w(j, i) W[(size_t)(j) + i]
|
||||
#define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
|
||||
|
|
@ -176,7 +180,7 @@ void Sha256_Init(CSha256 *p)
|
|||
#define R1_PRE(i) T1( W_PRE, i)
|
||||
#define R1_MAIN(i) T1( W_MAIN, i)
|
||||
|
||||
#if (!defined(_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
|
||||
#if (!defined(Z7_SHA256_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
|
||||
#define R2_MAIN(i) \
|
||||
R1_MAIN(i) \
|
||||
R1_MAIN(i + 1) \
|
||||
|
|
@ -185,7 +189,7 @@ void Sha256_Init(CSha256 *p)
|
|||
|
||||
|
||||
|
||||
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
|
||||
#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
|
||||
|
||||
#define T4( a,b,c,d,e,f,g,h, wx, i) \
|
||||
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
|
||||
|
|
@ -223,14 +227,10 @@ void Sha256_Init(CSha256 *p)
|
|||
|
||||
#endif
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
// static
|
||||
extern MY_ALIGN(64)
|
||||
const UInt32 SHA256_K_ARRAY[64];
|
||||
|
||||
MY_ALIGN(64)
|
||||
const UInt32 SHA256_K_ARRAY[64] = {
|
||||
extern
|
||||
MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64];
|
||||
MY_ALIGN(64) const UInt32 SHA256_K_ARRAY[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
|
|
@ -249,27 +249,29 @@ const UInt32 SHA256_K_ARRAY[64] = {
|
|||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define K SHA256_K_ARRAY
|
||||
|
||||
|
||||
MY_NO_INLINE
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
Z7_NO_INLINE
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
UInt32 W
|
||||
#ifdef _SHA256_BIG_W
|
||||
#ifdef Z7_SHA256_BIG_W
|
||||
[64];
|
||||
#else
|
||||
#else
|
||||
[16];
|
||||
#endif
|
||||
|
||||
#endif
|
||||
unsigned j;
|
||||
|
||||
UInt32 a,b,c,d,e,f,g,h;
|
||||
|
||||
#if !defined(_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
|
||||
#if !defined(Z7_SHA256_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
|
||||
UInt32 tmp;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (numBlocks == 0) return;
|
||||
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
|
|
@ -279,7 +281,7 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
|
|||
g = state[6];
|
||||
h = state[7];
|
||||
|
||||
while (numBlocks)
|
||||
do
|
||||
{
|
||||
|
||||
for (j = 0; j < 16; j += STEP_PRE)
|
||||
|
|
@ -297,12 +299,12 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
|
|||
|
||||
#else
|
||||
|
||||
R1_PRE(0);
|
||||
R1_PRE(0)
|
||||
#if STEP_PRE >= 2
|
||||
R1_PRE(1);
|
||||
R1_PRE(1)
|
||||
#if STEP_PRE >= 4
|
||||
R1_PRE(2);
|
||||
R1_PRE(3);
|
||||
R1_PRE(2)
|
||||
R1_PRE(3)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
@ -311,32 +313,32 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
|
|||
|
||||
for (j = 16; j < 64; j += STEP_MAIN)
|
||||
{
|
||||
#if defined(_SHA256_UNROLL) && STEP_MAIN >= 8
|
||||
#if defined(Z7_SHA256_UNROLL) && STEP_MAIN >= 8
|
||||
|
||||
#if STEP_MAIN < 8
|
||||
R4_MAIN(0);
|
||||
R4_MAIN(0)
|
||||
#else
|
||||
R8_MAIN(0);
|
||||
R8_MAIN(0)
|
||||
#if STEP_MAIN == 16
|
||||
R8_MAIN(8);
|
||||
R8_MAIN(8)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
R1_MAIN(0);
|
||||
R1_MAIN(0)
|
||||
#if STEP_MAIN >= 2
|
||||
R1_MAIN(1);
|
||||
R1_MAIN(1)
|
||||
#if STEP_MAIN >= 4
|
||||
R2_MAIN(2);
|
||||
R2_MAIN(2)
|
||||
#if STEP_MAIN >= 8
|
||||
R2_MAIN(4);
|
||||
R2_MAIN(6);
|
||||
R2_MAIN(4)
|
||||
R2_MAIN(6)
|
||||
#if STEP_MAIN >= 16
|
||||
R2_MAIN(8);
|
||||
R2_MAIN(10);
|
||||
R2_MAIN(12);
|
||||
R2_MAIN(14);
|
||||
R2_MAIN(8)
|
||||
R2_MAIN(10)
|
||||
R2_MAIN(12)
|
||||
R2_MAIN(14)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -353,40 +355,27 @@ void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t
|
|||
g += state[6]; state[6] = g;
|
||||
h += state[7]; state[7] = h;
|
||||
|
||||
data += 64;
|
||||
numBlocks--;
|
||||
data += SHA256_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* Wipe variables */
|
||||
/* memset(W, 0, sizeof(W)); */
|
||||
while (--numBlocks);
|
||||
}
|
||||
|
||||
#undef S0
|
||||
#undef S1
|
||||
#undef s0
|
||||
#undef s1
|
||||
#undef K
|
||||
|
||||
#define Sha256_UpdateBlock(p) UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
|
||||
#define Sha256_UpdateBlock(p) SHA256_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
|
||||
|
||||
void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
{
|
||||
unsigned pos = (unsigned)p->count & 0x3F;
|
||||
unsigned num;
|
||||
|
||||
p->count += size;
|
||||
|
||||
num = 64 - pos;
|
||||
const unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1);
|
||||
const unsigned num = SHA256_BLOCK_SIZE - pos;
|
||||
p->v.vars.count += size;
|
||||
if (num > size)
|
||||
{
|
||||
memcpy(p->buffer + pos, data, size);
|
||||
return;
|
||||
}
|
||||
|
||||
if (pos != 0)
|
||||
{
|
||||
size -= num;
|
||||
|
|
@ -396,9 +385,10 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
|
|||
}
|
||||
}
|
||||
{
|
||||
size_t numBlocks = size >> 6;
|
||||
UPDATE_BLOCKS(p)(p->state, data, numBlocks);
|
||||
size &= 0x3F;
|
||||
const size_t numBlocks = size >> 6;
|
||||
// if (numBlocks)
|
||||
SHA256_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
|
||||
size &= SHA256_BLOCK_SIZE - 1;
|
||||
if (size == 0)
|
||||
return;
|
||||
data += (numBlocks << 6);
|
||||
|
|
@ -409,78 +399,94 @@ void Sha256_Update(CSha256 *p, const Byte *data, size_t size)
|
|||
|
||||
void Sha256_Final(CSha256 *p, Byte *digest)
|
||||
{
|
||||
unsigned pos = (unsigned)p->count & 0x3F;
|
||||
unsigned i;
|
||||
|
||||
unsigned pos = (unsigned)p->v.vars.count & (SHA256_BLOCK_SIZE - 1);
|
||||
p->buffer[pos++] = 0x80;
|
||||
|
||||
if (pos > (64 - 8))
|
||||
if (pos > (SHA256_BLOCK_SIZE - 4 * 2))
|
||||
{
|
||||
while (pos != 64) { p->buffer[pos++] = 0; }
|
||||
// memset(&p->buf.buffer[pos], 0, 64 - pos);
|
||||
while (pos != SHA256_BLOCK_SIZE) { p->buffer[pos++] = 0; }
|
||||
// memset(&p->buf.buffer[pos], 0, SHA256_BLOCK_SIZE - pos);
|
||||
Sha256_UpdateBlock(p);
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
if (pos & 3)
|
||||
memset(&p->buffer[pos], 0, (SHA256_BLOCK_SIZE - 4 * 2) - pos);
|
||||
{
|
||||
p->buffer[pos] = 0;
|
||||
p->buffer[pos + 1] = 0;
|
||||
p->buffer[pos + 2] = 0;
|
||||
pos += 3;
|
||||
pos &= ~3;
|
||||
const UInt64 numBits = p->v.vars.count << 3;
|
||||
SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 2, (UInt32)(numBits >> 32))
|
||||
SetBe32(p->buffer + SHA256_BLOCK_SIZE - 4 * 1, (UInt32)(numBits))
|
||||
}
|
||||
{
|
||||
for (; pos < 64 - 8; pos += 4)
|
||||
*(UInt32 *)(&p->buffer[pos]) = 0;
|
||||
}
|
||||
*/
|
||||
|
||||
memset(&p->buffer[pos], 0, (64 - 8) - pos);
|
||||
|
||||
{
|
||||
UInt64 numBits = (p->count << 3);
|
||||
SetBe32(p->buffer + 64 - 8, (UInt32)(numBits >> 32));
|
||||
SetBe32(p->buffer + 64 - 4, (UInt32)(numBits));
|
||||
}
|
||||
|
||||
Sha256_UpdateBlock(p);
|
||||
|
||||
for (i = 0; i < 8; i += 2)
|
||||
#if 1 && defined(MY_CPU_BE)
|
||||
memcpy(digest, p->state, SHA256_DIGEST_SIZE);
|
||||
#else
|
||||
{
|
||||
UInt32 v0 = p->state[i];
|
||||
UInt32 v1 = p->state[(size_t)i + 1];
|
||||
SetBe32(digest , v0);
|
||||
SetBe32(digest + 4, v1);
|
||||
digest += 8;
|
||||
unsigned i;
|
||||
for (i = 0; i < 8; i += 2)
|
||||
{
|
||||
const UInt32 v0 = p->state[i];
|
||||
const UInt32 v1 = p->state[(size_t)i + 1];
|
||||
SetBe32(digest , v0)
|
||||
SetBe32(digest + 4, v1)
|
||||
digest += 4 * 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
Sha256_InitState(p);
|
||||
}
|
||||
|
||||
|
||||
void Sha256Prepare()
|
||||
void Sha256Prepare(void)
|
||||
{
|
||||
#ifdef _SHA_SUPPORTED
|
||||
#ifdef Z7_COMPILER_SHA256_SUPPORTED
|
||||
SHA256_FUNC_UPDATE_BLOCKS f, f_hw;
|
||||
f = Sha256_UpdateBlocks;
|
||||
f_hw = NULL;
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#ifndef USE_MY_MM
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
if (CPU_IsSupported_SHA()
|
||||
&& CPU_IsSupported_SSSE3()
|
||||
// && CPU_IsSupported_SSE41()
|
||||
)
|
||||
#endif
|
||||
#else
|
||||
#else
|
||||
if (CPU_IsSupported_SHA2())
|
||||
#endif
|
||||
#endif
|
||||
{
|
||||
// printf("\n========== HW SHA256 ======== \n");
|
||||
f = f_hw = Sha256_UpdateBlocks_HW;
|
||||
}
|
||||
g_FUNC_UPDATE_BLOCKS = f;
|
||||
g_FUNC_UPDATE_BLOCKS_HW = f_hw;
|
||||
#endif
|
||||
g_SHA256_FUNC_UPDATE_BLOCKS = f;
|
||||
g_SHA256_FUNC_UPDATE_BLOCKS_HW = f_hw;
|
||||
#endif
|
||||
}
|
||||
|
||||
#undef U64C
|
||||
#undef K
|
||||
#undef S0
|
||||
#undef S1
|
||||
#undef s0
|
||||
#undef s1
|
||||
#undef Ch
|
||||
#undef Maj
|
||||
#undef W_MAIN
|
||||
#undef W_PRE
|
||||
#undef w
|
||||
#undef blk2_main
|
||||
#undef blk2
|
||||
#undef T1
|
||||
#undef T4
|
||||
#undef T8
|
||||
#undef R1_PRE
|
||||
#undef R1_MAIN
|
||||
#undef R2_MAIN
|
||||
#undef R4
|
||||
#undef R4_PRE
|
||||
#undef R4_MAIN
|
||||
#undef R8
|
||||
#undef R8_PRE
|
||||
#undef R8_MAIN
|
||||
#undef STEP_PRE
|
||||
#undef STEP_MAIN
|
||||
#undef Z7_SHA256_BIG_W
|
||||
#undef Z7_SHA256_UNROLL
|
||||
#undef Z7_COMPILER_SHA256_SUPPORTED
|
||||
|
|
|
|||
26
C/Sha256.h
26
C/Sha256.h
|
|
@ -1,8 +1,8 @@
|
|||
/* Sha256.h -- SHA-256 Hash
|
||||
2021-01-01 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __7Z_SHA256_H
|
||||
#define __7Z_SHA256_H
|
||||
#ifndef ZIP7_INC_SHA256_H
|
||||
#define ZIP7_INC_SHA256_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
|
|
@ -14,7 +14,10 @@ EXTERN_C_BEGIN
|
|||
#define SHA256_BLOCK_SIZE (SHA256_NUM_BLOCK_WORDS * 4)
|
||||
#define SHA256_DIGEST_SIZE (SHA256_NUM_DIGEST_WORDS * 4)
|
||||
|
||||
typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
|
||||
|
||||
typedef void (Z7_FASTCALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
/*
|
||||
if (the system supports different SHA256 code implementations)
|
||||
|
|
@ -32,9 +35,16 @@ typedef void (MY_FAST_CALL *SHA256_FUNC_UPDATE_BLOCKS)(UInt32 state[8], const By
|
|||
|
||||
typedef struct
|
||||
{
|
||||
SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
|
||||
UInt64 count;
|
||||
UInt64 __pad_2[2];
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
SHA256_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
|
||||
UInt64 count;
|
||||
} vars;
|
||||
UInt64 _pad_64bit[4];
|
||||
void *_pad_align_ptr[2];
|
||||
} v;
|
||||
UInt32 state[SHA256_NUM_DIGEST_WORDS];
|
||||
|
||||
Byte buffer[SHA256_BLOCK_SIZE];
|
||||
|
|
@ -62,7 +72,7 @@ void Sha256_Final(CSha256 *p, Byte *digest);
|
|||
|
||||
|
||||
|
||||
// void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
// void Z7_FASTCALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
/*
|
||||
call Sha256Prepare() once at program start.
|
||||
|
|
|
|||
356
C/Sha256Opt.c
356
C/Sha256Opt.c
|
|
@ -1,71 +1,53 @@
|
|||
/* Sha256Opt.c -- SHA-256 optimized code for SHA-256 hardware instructions
|
||||
2021-04-01 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if (_MSC_VER < 1900) && (_MSC_VER >= 1200)
|
||||
// #define USE_MY_MM
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "Compiler.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
// #define Z7_USE_HW_SHA_STUB // for debug
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1600) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#ifndef __SHA__
|
||||
#elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 30800) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 50100) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900)
|
||||
#define USE_HW_SHA
|
||||
#if !defined(__INTEL_COMPILER)
|
||||
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
|
||||
#if !defined(__SHA__) || !defined(__SSSE3__)
|
||||
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
|
||||
#if defined(_MSC_VER)
|
||||
// SSSE3: for clang-cl:
|
||||
#include <tmmintrin.h>
|
||||
#define __SHA__
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 8) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#ifndef __SHA__
|
||||
#define ATTRIB_SHA __attribute__((__target__("sha,ssse3")))
|
||||
// #pragma GCC target("sha,ssse3")
|
||||
#endif
|
||||
#endif
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#if (__INTEL_COMPILER >= 1800) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#ifdef USE_MY_MM
|
||||
#define USE_VER_MIN 1300
|
||||
#else
|
||||
#define USE_VER_MIN 1910
|
||||
#endif
|
||||
#if _MSC_VER >= USE_VER_MIN
|
||||
#if (_MSC_VER >= 1900)
|
||||
#define USE_HW_SHA
|
||||
#else
|
||||
#define Z7_USE_HW_SHA_STUB
|
||||
#endif
|
||||
#endif
|
||||
// #endif // MY_CPU_X86_OR_AMD64
|
||||
#ifndef USE_HW_SHA
|
||||
// #define Z7_USE_HW_SHA_STUB // for debug
|
||||
#endif
|
||||
|
||||
#ifdef USE_HW_SHA
|
||||
|
||||
// #pragma message("Sha256 HW")
|
||||
// #include <wmmintrin.h>
|
||||
|
||||
#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
|
||||
|
||||
|
||||
|
||||
// sse/sse2/ssse3:
|
||||
#include <tmmintrin.h>
|
||||
// sha*:
|
||||
#include <immintrin.h>
|
||||
|
||||
#if defined (__clang__) && defined(_MSC_VER)
|
||||
#if !defined(__SHA__)
|
||||
#include <shaintrin.h>
|
||||
#endif
|
||||
#else
|
||||
#include <emmintrin.h>
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1600)
|
||||
// #include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef USE_MY_MM
|
||||
#include "My_mm.h"
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -94,60 +76,44 @@ SHA:
|
|||
extern
|
||||
MY_ALIGN(64)
|
||||
const UInt32 SHA256_K_ARRAY[64];
|
||||
|
||||
#define K SHA256_K_ARRAY
|
||||
|
||||
|
||||
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
|
||||
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
|
||||
#define SHA25G_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
|
||||
|
||||
#define ADD_EPI32(dest, src) dest = _mm_add_epi32(dest, src);
|
||||
#define SHA256_MSG1(dest, src) dest = _mm_sha256msg1_epu32(dest, src);
|
||||
#define SHA256_MSG2(dest, src) dest = _mm_sha256msg2_epu32(dest, src);
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
m = _mm_loadu_si128((const __m128i *)(const void *)(data + (k) * 16)); \
|
||||
m = _mm_shuffle_epi8(m, mask); \
|
||||
|
||||
#define SM1(g0, g1, g2, g3) \
|
||||
SHA256_MSG1(g3, g0); \
|
||||
#define NNN(m0, m1, m2, m3)
|
||||
|
||||
#define SM2(g0, g1, g2, g3) \
|
||||
tmp = _mm_alignr_epi8(g1, g0, 4); \
|
||||
ADD_EPI32(g2, tmp); \
|
||||
SHA25G_MSG2(g2, g1); \
|
||||
|
||||
// #define LS0(k, g0, g1, g2, g3) LOAD_SHUFFLE(g0, k)
|
||||
// #define LS1(k, g0, g1, g2, g3) LOAD_SHUFFLE(g1, k+1)
|
||||
|
||||
|
||||
#define NNN(g0, g1, g2, g3)
|
||||
#define SM1(m1, m2, m3, m0) \
|
||||
SHA256_MSG1(m0, m1); \
|
||||
|
||||
#define SM2(m2, m3, m0, m1) \
|
||||
ADD_EPI32(m0, _mm_alignr_epi8(m3, m2, 4)) \
|
||||
SHA256_MSG2(m0, m3); \
|
||||
|
||||
#define RND2(t0, t1) \
|
||||
t0 = _mm_sha256rnds2_epu32(t0, t1, msg);
|
||||
|
||||
#define RND2_0(m, k) \
|
||||
msg = _mm_add_epi32(m, *(const __m128i *) (const void *) &K[(k) * 4]); \
|
||||
|
||||
|
||||
#define R4(k, m0, m1, m2, m3, OP0, OP1) \
|
||||
msg = _mm_add_epi32(m0, *(const __m128i *) (const void *) &K[(k) * 4]); \
|
||||
RND2(state0, state1); \
|
||||
msg = _mm_shuffle_epi32(msg, 0x0E); \
|
||||
|
||||
|
||||
#define RND2_1 \
|
||||
OP0(m0, m1, m2, m3) \
|
||||
RND2(state1, state0); \
|
||||
|
||||
|
||||
// We use scheme with 3 rounds ahead for SHA256_MSG1 / 2 rounds ahead for SHA256_MSG2
|
||||
|
||||
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
|
||||
RND2_0(g0, k); \
|
||||
OP0(g0, g1, g2, g3); \
|
||||
RND2_1; \
|
||||
OP1(g0, g1, g2, g3); \
|
||||
OP1(m0, m1, m2, m3) \
|
||||
|
||||
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
|
||||
R4 ( (k)*4+0, m0, m1, m2, m3, OP0, OP1 ) \
|
||||
R4 ( (k)*4+1, m1, m2, m3, m0, OP2, OP3 ) \
|
||||
R4 ( (k)*4+2, m2, m3, m0, m1, OP4, OP5 ) \
|
||||
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
|
||||
R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
|
||||
R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
|
||||
R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
|
||||
R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
|
||||
|
||||
#define PREPARE_STATE \
|
||||
tmp = _mm_shuffle_epi32(state0, 0x1B); /* abcd */ \
|
||||
|
|
@ -157,15 +123,16 @@ const UInt32 SHA256_K_ARRAY[64];
|
|||
state1 = _mm_unpackhi_epi64(state1, tmp); /* abef */ \
|
||||
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA
|
||||
ATTRIB_SHA
|
||||
#endif
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
const __m128i mask = _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
|
||||
__m128i tmp;
|
||||
__m128i state0, state1;
|
||||
|
||||
|
||||
__m128i tmp, state0, state1;
|
||||
|
||||
if (numBlocks == 0)
|
||||
return;
|
||||
|
|
@ -192,13 +159,13 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
|
||||
|
||||
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
|
||||
|
||||
ADD_EPI32(state0, state0_save);
|
||||
ADD_EPI32(state1, state1_save);
|
||||
ADD_EPI32(state0, state0_save)
|
||||
ADD_EPI32(state1, state1_save)
|
||||
|
||||
data += 64;
|
||||
}
|
||||
|
|
@ -212,19 +179,28 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
|
||||
#endif // USE_HW_SHA
|
||||
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64)
|
||||
|
||||
#if defined(__clang__)
|
||||
#if (__clang_major__ >= 8) // fix that check
|
||||
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
|
||||
|
||||
#if defined(__ARM_FEATURE_SHA2) \
|
||||
|| defined(__ARM_FEATURE_CRYPTO)
|
||||
#define USE_HW_SHA
|
||||
#else
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| defined(__ARM_ARCH) && (__ARM_ARCH >= 4) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL)
|
||||
#if defined(__ARM_FP) && \
|
||||
( defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 6) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1910)
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
|| !defined(Z7_CLANG_VERSION) \
|
||||
|| defined(__ARM_NEON) && \
|
||||
(Z7_CLANG_VERSION < 170000 || \
|
||||
Z7_CLANG_VERSION > 170001)
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#if (__GNUC__ >= 6) // fix that check
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#elif defined(_MSC_VER)
|
||||
#if _MSC_VER >= 1910
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
@ -232,63 +208,144 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
|
||||
// #pragma message("=== Sha256 HW === ")
|
||||
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if !defined(__ARM_FEATURE_SHA2) && \
|
||||
!defined(__ARM_FEATURE_CRYPTO)
|
||||
#ifdef MY_CPU_ARM64
|
||||
#if defined(__clang__)
|
||||
#define ATTRIB_SHA __attribute__((__target__("crypto")))
|
||||
#else
|
||||
#define ATTRIB_SHA __attribute__((__target__("+crypto")))
|
||||
#endif
|
||||
#else
|
||||
#if defined(__clang__) && (__clang_major__ >= 1)
|
||||
#define ATTRIB_SHA __attribute__((__target__("armv8-a,sha2")))
|
||||
#else
|
||||
#define ATTRIB_SHA __attribute__((__target__("fpu=crypto-neon-fp-armv8")))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#else
|
||||
// _MSC_VER
|
||||
// for arm32
|
||||
#define _ARM_USE_NEW_NEON_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && defined(MY_CPU_ARM64)
|
||||
#if defined(Z7_MSC_VER_ORIGINAL) && defined(MY_CPU_ARM64)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(__clang__) && __clang_major__ < 16
|
||||
#if !defined(__ARM_FEATURE_SHA2) && \
|
||||
!defined(__ARM_FEATURE_CRYPTO)
|
||||
// #pragma message("=== we set __ARM_FEATURE_CRYPTO 1 === ")
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define Z7_ARM_FEATURE_CRYPTO_WAS_SET 1
|
||||
// #if defined(__clang__) && __clang_major__ < 13
|
||||
#define __ARM_FEATURE_CRYPTO 1
|
||||
// #else
|
||||
#define __ARM_FEATURE_SHA2 1
|
||||
// #endif
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
#endif // clang
|
||||
|
||||
#if defined(__clang__)
|
||||
|
||||
#if defined(__ARM_ARCH) && __ARM_ARCH < 8
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
// #pragma message("#define __ARM_ARCH 8")
|
||||
#undef __ARM_ARCH
|
||||
#define __ARM_ARCH 8
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
|
||||
#endif // clang
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(Z7_ARM_FEATURE_CRYPTO_WAS_SET) && \
|
||||
defined(__ARM_FEATURE_CRYPTO) && \
|
||||
defined(__ARM_FEATURE_SHA2)
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#undef __ARM_FEATURE_CRYPTO
|
||||
#undef __ARM_FEATURE_SHA2
|
||||
#undef Z7_ARM_FEATURE_CRYPTO_WAS_SET
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
|
||||
#endif
|
||||
|
||||
#endif // Z7_MSC_VER_ORIGINAL
|
||||
|
||||
typedef uint32x4_t v128;
|
||||
// typedef __n128 v128; // MSVC
|
||||
|
||||
#ifdef MY_CPU_BE
|
||||
#define MY_rev32_for_LE(x)
|
||||
#define MY_rev32_for_LE(x) x
|
||||
#else
|
||||
#define MY_rev32_for_LE(x) x = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(x)))
|
||||
#define MY_rev32_for_LE(x) vrev32q_u8(x)
|
||||
#endif
|
||||
|
||||
#define LOAD_128(_p) (*(const v128 *)(const void *)(_p))
|
||||
#define STORE_128(_p, _v) *(v128 *)(void *)(_p) = (_v)
|
||||
#if 1 // 0 for debug
|
||||
// for arm32: it works slower by some reason than direct code
|
||||
/*
|
||||
for arm32 it generates:
|
||||
MSVC-2022, GCC-9:
|
||||
vld1.32 {d18,d19}, [r10]
|
||||
vst1.32 {d4,d5}, [r3]
|
||||
vld1.8 {d20-d21}, [r4]
|
||||
there is no align hint (like [r10:128]). So instruction allows unaligned access
|
||||
*/
|
||||
#define LOAD_128_32(_p) vld1q_u32(_p)
|
||||
#define LOAD_128_8(_p) vld1q_u8 (_p)
|
||||
#define STORE_128_32(_p, _v) vst1q_u32(_p, _v)
|
||||
#else
|
||||
/*
|
||||
for arm32:
|
||||
MSVC-2022:
|
||||
vldm r10,{d18,d19}
|
||||
vstm r3,{d4,d5}
|
||||
does it require strict alignment?
|
||||
GCC-9:
|
||||
vld1.64 {d30-d31}, [r0:64]
|
||||
vldr d28, [r0, #16]
|
||||
vldr d29, [r0, #24]
|
||||
vst1.64 {d30-d31}, [r0:64]
|
||||
vstr d28, [r0, #16]
|
||||
vstr d29, [r0, #24]
|
||||
there is hint [r0:64], so does it requires 64-bit alignment.
|
||||
*/
|
||||
#define LOAD_128_32(_p) (*(const v128 *)(const void *)(_p))
|
||||
#define LOAD_128_8(_p) vreinterpretq_u8_u32(*(const v128 *)(const void *)(_p))
|
||||
#define STORE_128_32(_p, _v) *(v128 *)(void *)(_p) = (_v)
|
||||
#endif
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
m = LOAD_128((data + (k) * 16)); \
|
||||
MY_rev32_for_LE(m); \
|
||||
m = vreinterpretq_u32_u8( \
|
||||
MY_rev32_for_LE( \
|
||||
LOAD_128_8(data + (k) * 16))); \
|
||||
|
||||
// K array must be aligned for 16-bytes at least.
|
||||
extern
|
||||
MY_ALIGN(64)
|
||||
const UInt32 SHA256_K_ARRAY[64];
|
||||
|
||||
#define K SHA256_K_ARRAY
|
||||
|
||||
|
||||
#define SHA256_SU0(dest, src) dest = vsha256su0q_u32(dest, src);
|
||||
#define SHA25G_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
|
||||
#define SHA256_SU1(dest, src2, src3) dest = vsha256su1q_u32(dest, src2, src3);
|
||||
|
||||
#define SM1(g0, g1, g2, g3) SHA256_SU0(g3, g0)
|
||||
#define SM2(g0, g1, g2, g3) SHA25G_SU1(g2, g0, g1)
|
||||
#define NNN(g0, g1, g2, g3)
|
||||
#define SM1(m0, m1, m2, m3) SHA256_SU0(m3, m0)
|
||||
#define SM2(m0, m1, m2, m3) SHA256_SU1(m2, m0, m1)
|
||||
#define NNN(m0, m1, m2, m3)
|
||||
|
||||
|
||||
#define R4(k, g0, g1, g2, g3, OP0, OP1) \
|
||||
msg = vaddq_u32(g0, *(const v128 *) (const void *) &K[(k) * 4]); \
|
||||
#define R4(k, m0, m1, m2, m3, OP0, OP1) \
|
||||
msg = vaddq_u32(m0, *(const v128 *) (const void *) &K[(k) * 4]); \
|
||||
tmp = state0; \
|
||||
state0 = vsha256hq_u32( state0, state1, msg ); \
|
||||
state1 = vsha256h2q_u32( state1, tmp, msg ); \
|
||||
OP0(g0, g1, g2, g3); \
|
||||
OP1(g0, g1, g2, g3); \
|
||||
OP0(m0, m1, m2, m3); \
|
||||
OP1(m0, m1, m2, m3); \
|
||||
|
||||
|
||||
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
|
||||
|
|
@ -298,19 +355,19 @@ const UInt32 SHA256_K_ARRAY[64];
|
|||
R4 ( (k)*4+3, m3, m0, m1, m2, OP6, OP7 ) \
|
||||
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA
|
||||
ATTRIB_SHA
|
||||
#endif
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
v128 state0, state1;
|
||||
|
||||
if (numBlocks == 0)
|
||||
return;
|
||||
|
||||
state0 = LOAD_128(&state[0]);
|
||||
state1 = LOAD_128(&state[4]);
|
||||
state0 = LOAD_128_32(&state[0]);
|
||||
state1 = LOAD_128_32(&state[4]);
|
||||
|
||||
do
|
||||
{
|
||||
|
|
@ -326,10 +383,10 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
LOAD_SHUFFLE (m2, 2)
|
||||
LOAD_SHUFFLE (m3, 3)
|
||||
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 );
|
||||
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN );
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 3, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
|
||||
|
||||
state0 = vaddq_u32(state0, state0_save);
|
||||
state1 = vaddq_u32(state1, state1_save);
|
||||
|
|
@ -338,8 +395,8 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
}
|
||||
while (--numBlocks);
|
||||
|
||||
STORE_128(&state[0], state0);
|
||||
STORE_128(&state[4], state1);
|
||||
STORE_128_32(&state[0], state0);
|
||||
STORE_128_32(&state[4], state1);
|
||||
}
|
||||
|
||||
#endif // USE_HW_SHA
|
||||
|
|
@ -347,18 +404,19 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
|
||||
#ifndef USE_HW_SHA
|
||||
|
||||
#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
|
||||
// #error Stop_Compiling_UNSUPPORTED_SHA
|
||||
// #include <stdlib.h>
|
||||
|
||||
// We can compile this file with another C compiler,
|
||||
// or we can compile asm version.
|
||||
// So we can generate real code instead of this stub function.
|
||||
// #include "Sha256.h"
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
// #if defined(_MSC_VER)
|
||||
#pragma message("Sha256 HW-SW stub was used")
|
||||
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
// #endif
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks (UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
Sha256_UpdateBlocks(state, data, numBlocks);
|
||||
/*
|
||||
|
|
@ -369,5 +427,25 @@ void MY_FAST_CALL Sha256_UpdateBlocks_HW(UInt32 state[8], const Byte *data, size
|
|||
return;
|
||||
*/
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#undef K
|
||||
#undef RND2
|
||||
#undef MY_rev32_for_LE
|
||||
|
||||
#undef NNN
|
||||
#undef LOAD_128
|
||||
#undef STORE_128
|
||||
#undef LOAD_SHUFFLE
|
||||
#undef SM1
|
||||
#undef SM2
|
||||
|
||||
|
||||
#undef R4
|
||||
#undef R16
|
||||
#undef PREPARE_STATE
|
||||
#undef USE_HW_SHA
|
||||
#undef ATTRIB_SHA
|
||||
#undef USE_VER_MIN
|
||||
#undef Z7_USE_HW_SHA_STUB
|
||||
|
|
|
|||
359
C/Sha3.c
Normal file
359
C/Sha3.c
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
/* Sha3.c -- SHA-3 Hash
|
||||
: Igor Pavlov : Public domain
|
||||
This code is based on public domain code from Wei Dai's Crypto++ library. */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "Sha3.h"
|
||||
#include "RotateDefs.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define U64C(x) UINT64_CONST(x)
|
||||
|
||||
static
|
||||
MY_ALIGN(64)
|
||||
const UInt64 SHA3_K_ARRAY[24] =
|
||||
{
|
||||
U64C(0x0000000000000001), U64C(0x0000000000008082),
|
||||
U64C(0x800000000000808a), U64C(0x8000000080008000),
|
||||
U64C(0x000000000000808b), U64C(0x0000000080000001),
|
||||
U64C(0x8000000080008081), U64C(0x8000000000008009),
|
||||
U64C(0x000000000000008a), U64C(0x0000000000000088),
|
||||
U64C(0x0000000080008009), U64C(0x000000008000000a),
|
||||
U64C(0x000000008000808b), U64C(0x800000000000008b),
|
||||
U64C(0x8000000000008089), U64C(0x8000000000008003),
|
||||
U64C(0x8000000000008002), U64C(0x8000000000000080),
|
||||
U64C(0x000000000000800a), U64C(0x800000008000000a),
|
||||
U64C(0x8000000080008081), U64C(0x8000000000008080),
|
||||
U64C(0x0000000080000001), U64C(0x8000000080008008)
|
||||
};
|
||||
|
||||
void Sha3_Init(CSha3 *p)
|
||||
{
|
||||
p->count = 0;
|
||||
memset(p->state, 0, sizeof(p->state));
|
||||
}
|
||||
|
||||
#define GET_state(i, a) UInt64 a = state[i];
|
||||
#define SET_state(i, a) state[i] = a;
|
||||
|
||||
#define LS_5(M, i, a0,a1,a2,a3,a4) \
|
||||
M ((i) * 5 , a0) \
|
||||
M ((i) * 5 + 1, a1) \
|
||||
M ((i) * 5 + 2, a2) \
|
||||
M ((i) * 5 + 3, a3) \
|
||||
M ((i) * 5 + 4, a4) \
|
||||
|
||||
#define LS_25(M) \
|
||||
LS_5 (M, 0, a50, a51, a52, a53, a54) \
|
||||
LS_5 (M, 1, a60, a61, a62, a63, a64) \
|
||||
LS_5 (M, 2, a70, a71, a72, a73, a74) \
|
||||
LS_5 (M, 3, a80, a81, a82, a83, a84) \
|
||||
LS_5 (M, 4, a90, a91, a92, a93, a94) \
|
||||
|
||||
|
||||
#define XOR_1(i, a0) \
|
||||
a0 ^= GetUi64(data + (i) * 8); \
|
||||
|
||||
#define XOR_4(i, a0,a1,a2,a3) \
|
||||
XOR_1 ((i) , a0); \
|
||||
XOR_1 ((i) + 1, a1); \
|
||||
XOR_1 ((i) + 2, a2); \
|
||||
XOR_1 ((i) + 3, a3); \
|
||||
|
||||
#define D(d,b1,b2) \
|
||||
d = b1 ^ Z7_ROTL64(b2, 1);
|
||||
|
||||
#define D5 \
|
||||
D (d0, c4, c1) \
|
||||
D (d1, c0, c2) \
|
||||
D (d2, c1, c3) \
|
||||
D (d3, c2, c4) \
|
||||
D (d4, c3, c0) \
|
||||
|
||||
#define C0(c,a,d) \
|
||||
c = a ^ d; \
|
||||
|
||||
#define C(c,a,d,k) \
|
||||
c = a ^ d; \
|
||||
c = Z7_ROTL64(c, k); \
|
||||
|
||||
#define E4(e1,e2,e3,e4) \
|
||||
e1 = c1 ^ (~c2 & c3); \
|
||||
e2 = c2 ^ (~c3 & c4); \
|
||||
e3 = c3 ^ (~c4 & c0); \
|
||||
e4 = c4 ^ (~c0 & c1); \
|
||||
|
||||
#define CK( v0,w0, \
|
||||
v1,w1,k1, \
|
||||
v2,w2,k2, \
|
||||
v3,w3,k3, \
|
||||
v4,w4,k4, e0,e1,e2,e3,e4, keccak_c) \
|
||||
C0(c0,v0,w0) \
|
||||
C (c1,v1,w1,k1) \
|
||||
C (c2,v2,w2,k2) \
|
||||
C (c3,v3,w3,k3) \
|
||||
C (c4,v4,w4,k4) \
|
||||
e0 = c0 ^ (~c1 & c2) ^ keccak_c; \
|
||||
E4(e1,e2,e3,e4) \
|
||||
|
||||
#define CE( v0,w0,k0, \
|
||||
v1,w1,k1, \
|
||||
v2,w2,k2, \
|
||||
v3,w3,k3, \
|
||||
v4,w4,k4, e0,e1,e2,e3,e4) \
|
||||
C (c0,v0,w0,k0) \
|
||||
C (c1,v1,w1,k1) \
|
||||
C (c2,v2,w2,k2) \
|
||||
C (c3,v3,w3,k3) \
|
||||
C (c4,v4,w4,k4) \
|
||||
e0 = c0 ^ (~c1 & c2); \
|
||||
E4(e1,e2,e3,e4) \
|
||||
|
||||
// numBlocks != 0
|
||||
static
|
||||
Z7_NO_INLINE
|
||||
void Z7_FASTCALL Sha3_UpdateBlocks(UInt64 state[SHA3_NUM_STATE_WORDS],
|
||||
const Byte *data, size_t numBlocks, size_t blockSize)
|
||||
{
|
||||
LS_25 (GET_state)
|
||||
|
||||
do
|
||||
{
|
||||
unsigned round;
|
||||
XOR_4 ( 0, a50, a51, a52, a53)
|
||||
XOR_4 ( 4, a54, a60, a61, a62)
|
||||
XOR_1 ( 8, a63)
|
||||
if (blockSize > 8 * 9) { XOR_4 ( 9, a64, a70, a71, a72) // sha3-384
|
||||
if (blockSize > 8 * 13) { XOR_4 (13, a73, a74, a80, a81) // sha3-256
|
||||
if (blockSize > 8 * 17) { XOR_1 (17, a82) // sha3-224
|
||||
if (blockSize > 8 * 18) { XOR_1 (18, a83) // shake128
|
||||
XOR_1 (19, a84)
|
||||
XOR_1 (20, a90) }}}}
|
||||
data += blockSize;
|
||||
|
||||
for (round = 0; round < 24; round += 2)
|
||||
{
|
||||
UInt64 c0, c1, c2, c3, c4;
|
||||
UInt64 d0, d1, d2, d3, d4;
|
||||
UInt64 e50, e51, e52, e53, e54;
|
||||
UInt64 e60, e61, e62, e63, e64;
|
||||
UInt64 e70, e71, e72, e73, e74;
|
||||
UInt64 e80, e81, e82, e83, e84;
|
||||
UInt64 e90, e91, e92, e93, e94;
|
||||
|
||||
c0 = a50^a60^a70^a80^a90;
|
||||
c1 = a51^a61^a71^a81^a91;
|
||||
c2 = a52^a62^a72^a82^a92;
|
||||
c3 = a53^a63^a73^a83^a93;
|
||||
c4 = a54^a64^a74^a84^a94;
|
||||
D5
|
||||
CK( a50, d0,
|
||||
a61, d1, 44,
|
||||
a72, d2, 43,
|
||||
a83, d3, 21,
|
||||
a94, d4, 14, e50, e51, e52, e53, e54, SHA3_K_ARRAY[round])
|
||||
CE( a53, d3, 28,
|
||||
a64, d4, 20,
|
||||
a70, d0, 3,
|
||||
a81, d1, 45,
|
||||
a92, d2, 61, e60, e61, e62, e63, e64)
|
||||
CE( a51, d1, 1,
|
||||
a62, d2, 6,
|
||||
a73, d3, 25,
|
||||
a84, d4, 8,
|
||||
a90, d0, 18, e70, e71, e72, e73, e74)
|
||||
CE( a54, d4, 27,
|
||||
a60, d0, 36,
|
||||
a71, d1, 10,
|
||||
a82, d2, 15,
|
||||
a93, d3, 56, e80, e81, e82, e83, e84)
|
||||
CE( a52, d2, 62,
|
||||
a63, d3, 55,
|
||||
a74, d4, 39,
|
||||
a80, d0, 41,
|
||||
a91, d1, 2, e90, e91, e92, e93, e94)
|
||||
|
||||
// ---------- ROUND + 1 ----------
|
||||
|
||||
c0 = e50^e60^e70^e80^e90;
|
||||
c1 = e51^e61^e71^e81^e91;
|
||||
c2 = e52^e62^e72^e82^e92;
|
||||
c3 = e53^e63^e73^e83^e93;
|
||||
c4 = e54^e64^e74^e84^e94;
|
||||
D5
|
||||
CK( e50, d0,
|
||||
e61, d1, 44,
|
||||
e72, d2, 43,
|
||||
e83, d3, 21,
|
||||
e94, d4, 14, a50, a51, a52, a53, a54, SHA3_K_ARRAY[(size_t)round + 1])
|
||||
CE( e53, d3, 28,
|
||||
e64, d4, 20,
|
||||
e70, d0, 3,
|
||||
e81, d1, 45,
|
||||
e92, d2, 61, a60, a61, a62, a63, a64)
|
||||
CE( e51, d1, 1,
|
||||
e62, d2, 6,
|
||||
e73, d3, 25,
|
||||
e84, d4, 8,
|
||||
e90, d0, 18, a70, a71, a72, a73, a74)
|
||||
CE (e54, d4, 27,
|
||||
e60, d0, 36,
|
||||
e71, d1, 10,
|
||||
e82, d2, 15,
|
||||
e93, d3, 56, a80, a81, a82, a83, a84)
|
||||
CE (e52, d2, 62,
|
||||
e63, d3, 55,
|
||||
e74, d4, 39,
|
||||
e80, d0, 41,
|
||||
e91, d1, 2, a90, a91, a92, a93, a94)
|
||||
}
|
||||
}
|
||||
while (--numBlocks);
|
||||
|
||||
LS_25 (SET_state)
|
||||
}
|
||||
|
||||
|
||||
#define Sha3_UpdateBlock(p) \
|
||||
Sha3_UpdateBlocks(p->state, p->buffer, 1, p->blockSize)
|
||||
|
||||
void Sha3_Update(CSha3 *p, const Byte *data, size_t size)
|
||||
{
|
||||
/*
|
||||
for (;;)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
unsigned cur = p->blockSize - p->count;
|
||||
if (cur > size)
|
||||
cur = (unsigned)size;
|
||||
size -= cur;
|
||||
unsigned pos = p->count;
|
||||
p->count = pos + cur;
|
||||
while (pos & 7)
|
||||
{
|
||||
if (cur == 0)
|
||||
return;
|
||||
Byte *pb = &(((Byte *)p->state)[pos]);
|
||||
*pb = (Byte)(*pb ^ *data++);
|
||||
cur--;
|
||||
pos++;
|
||||
}
|
||||
if (cur >= 8)
|
||||
{
|
||||
do
|
||||
{
|
||||
*(UInt64 *)(void *)&(((Byte *)p->state)[pos]) ^= GetUi64(data);
|
||||
data += 8;
|
||||
pos += 8;
|
||||
cur -= 8;
|
||||
}
|
||||
while (cur >= 8);
|
||||
}
|
||||
if (pos != p->blockSize)
|
||||
{
|
||||
if (cur)
|
||||
{
|
||||
Byte *pb = &(((Byte *)p->state)[pos]);
|
||||
do
|
||||
{
|
||||
*pb = (Byte)(*pb ^ *data++);
|
||||
pb++;
|
||||
}
|
||||
while (--cur);
|
||||
}
|
||||
return;
|
||||
}
|
||||
Sha3_UpdateBlock(p->state);
|
||||
p->count = 0;
|
||||
}
|
||||
*/
|
||||
if (size == 0)
|
||||
return;
|
||||
{
|
||||
const unsigned pos = p->count;
|
||||
const unsigned num = p->blockSize - pos;
|
||||
if (num > size)
|
||||
{
|
||||
p->count = pos + (unsigned)size;
|
||||
memcpy(p->buffer + pos, data, size);
|
||||
return;
|
||||
}
|
||||
if (pos != 0)
|
||||
{
|
||||
size -= num;
|
||||
memcpy(p->buffer + pos, data, num);
|
||||
data += num;
|
||||
Sha3_UpdateBlock(p);
|
||||
}
|
||||
}
|
||||
if (size >= p->blockSize)
|
||||
{
|
||||
const size_t numBlocks = size / p->blockSize;
|
||||
const Byte *dataOld = data;
|
||||
data += numBlocks * p->blockSize;
|
||||
size = (size_t)(dataOld + size - data);
|
||||
Sha3_UpdateBlocks(p->state, dataOld, numBlocks, p->blockSize);
|
||||
}
|
||||
p->count = (unsigned)size;
|
||||
if (size)
|
||||
memcpy(p->buffer, data, size);
|
||||
}
|
||||
|
||||
|
||||
// we support only (digestSize % 4 == 0) cases
|
||||
void Sha3_Final(CSha3 *p, Byte *digest, unsigned digestSize, unsigned shake)
|
||||
{
|
||||
memset(p->buffer + p->count, 0, p->blockSize - p->count);
|
||||
// we write bits markers from low to higher in current byte:
|
||||
// - if sha-3 : 2 bits : 0,1
|
||||
// - if shake : 4 bits : 1111
|
||||
// then we write bit 1 to same byte.
|
||||
// And we write bit 1 to highest bit of last byte of block.
|
||||
p->buffer[p->count] = (Byte)(shake ? 0x1f : 0x06);
|
||||
// we need xor operation (^= 0x80) here because we must write 0x80 bit
|
||||
// to same byte as (0x1f : 0x06), if (p->count == p->blockSize - 1) !!!
|
||||
p->buffer[p->blockSize - 1] ^= 0x80;
|
||||
/*
|
||||
((Byte *)p->state)[p->count] ^= (Byte)(shake ? 0x1f : 0x06);
|
||||
((Byte *)p->state)[p->blockSize - 1] ^= 0x80;
|
||||
*/
|
||||
Sha3_UpdateBlock(p);
|
||||
#if 1 && defined(MY_CPU_LE)
|
||||
memcpy(digest, p->state, digestSize);
|
||||
#else
|
||||
{
|
||||
const unsigned numWords = digestSize >> 3;
|
||||
unsigned i;
|
||||
for (i = 0; i < numWords; i++)
|
||||
{
|
||||
const UInt64 v = p->state[i];
|
||||
SetUi64(digest, v)
|
||||
digest += 8;
|
||||
}
|
||||
if (digestSize & 4) // for SHA3-224
|
||||
{
|
||||
const UInt32 v = (UInt32)p->state[numWords];
|
||||
SetUi32(digest, v)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Sha3_Init(p);
|
||||
}
|
||||
|
||||
#undef GET_state
|
||||
#undef SET_state
|
||||
#undef LS_5
|
||||
#undef LS_25
|
||||
#undef XOR_1
|
||||
#undef XOR_4
|
||||
#undef D
|
||||
#undef D5
|
||||
#undef C0
|
||||
#undef C
|
||||
#undef E4
|
||||
#undef CK
|
||||
#undef CE
|
||||
36
C/Sha3.h
Normal file
36
C/Sha3.h
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
/* Sha3.h -- SHA-3 Hash
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_MD5_H
|
||||
#define ZIP7_INC_MD5_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define SHA3_NUM_STATE_WORDS 25
|
||||
|
||||
#define SHA3_BLOCK_SIZE_FROM_DIGEST_SIZE(digestSize) \
|
||||
(SHA3_NUM_STATE_WORDS * 8 - (digestSize) * 2)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UInt32 count; // < blockSize
|
||||
UInt32 blockSize; // <= SHA3_NUM_STATE_WORDS * 8
|
||||
UInt64 _pad1[3];
|
||||
// we want 32-bytes alignment here
|
||||
UInt64 state[SHA3_NUM_STATE_WORDS];
|
||||
UInt64 _pad2[3];
|
||||
// we want 64-bytes alignment here
|
||||
Byte buffer[SHA3_NUM_STATE_WORDS * 8]; // last bytes will be unused with predefined blockSize values
|
||||
} CSha3;
|
||||
|
||||
#define Sha3_SET_blockSize(p, blockSize) { (p)->blockSize = (blockSize); }
|
||||
|
||||
void Sha3_Init(CSha3 *p);
|
||||
void Sha3_Update(CSha3 *p, const Byte *data, size_t size);
|
||||
void Sha3_Final(CSha3 *p, Byte *digest, unsigned digestSize, unsigned shake);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
711
C/Sha512.c
Normal file
711
C/Sha512.c
Normal file
|
|
@ -0,0 +1,711 @@
|
|||
/* Sha512.c -- SHA-512 Hash
|
||||
: Igor Pavlov : Public domain
|
||||
This code is based on public domain code from Wei Dai's Crypto++ library. */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "Sha512.h"
|
||||
#include "RotateDefs.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 170001) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 170001) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 140000) \
|
||||
|| defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 2400) && (__INTEL_COMPILER <= 9900) \
|
||||
|| defined(_MSC_VER) && (_MSC_VER >= 1940)
|
||||
#define Z7_COMPILER_SHA512_SUPPORTED
|
||||
#endif
|
||||
#elif defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
|
||||
#if defined(__ARM_FEATURE_SHA512)
|
||||
#define Z7_COMPILER_SHA512_SUPPORTED
|
||||
#else
|
||||
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 130000) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1940) // fix it
|
||||
#define Z7_COMPILER_SHA512_SUPPORTED
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
#ifdef Z7_COMPILER_SHA512_SUPPORTED
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS = Sha512_UpdateBlocks;
|
||||
static SHA512_FUNC_UPDATE_BLOCKS g_SHA512_FUNC_UPDATE_BLOCKS_HW;
|
||||
|
||||
#define SHA512_UPDATE_BLOCKS(p) p->v.vars.func_UpdateBlocks
|
||||
#else
|
||||
#define SHA512_UPDATE_BLOCKS(p) Sha512_UpdateBlocks
|
||||
#endif
|
||||
|
||||
|
||||
BoolInt Sha512_SetFunction(CSha512 *p, unsigned algo)
|
||||
{
|
||||
SHA512_FUNC_UPDATE_BLOCKS func = Sha512_UpdateBlocks;
|
||||
|
||||
#ifdef Z7_COMPILER_SHA512_SUPPORTED
|
||||
if (algo != SHA512_ALGO_SW)
|
||||
{
|
||||
if (algo == SHA512_ALGO_DEFAULT)
|
||||
func = g_SHA512_FUNC_UPDATE_BLOCKS;
|
||||
else
|
||||
{
|
||||
if (algo != SHA512_ALGO_HW)
|
||||
return False;
|
||||
func = g_SHA512_FUNC_UPDATE_BLOCKS_HW;
|
||||
if (!func)
|
||||
return False;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (algo > 1)
|
||||
return False;
|
||||
#endif
|
||||
|
||||
p->v.vars.func_UpdateBlocks = func;
|
||||
return True;
|
||||
}
|
||||
|
||||
|
||||
/* define it for speed optimization */
|
||||
|
||||
#if 0 // 1 for size optimization
|
||||
#define STEP_PRE 1
|
||||
#define STEP_MAIN 1
|
||||
#else
|
||||
#define STEP_PRE 2
|
||||
#define STEP_MAIN 4
|
||||
// #define Z7_SHA512_UNROLL
|
||||
#endif
|
||||
|
||||
#undef Z7_SHA512_BIG_W
|
||||
#if STEP_MAIN != 16
|
||||
#define Z7_SHA512_BIG_W
|
||||
#endif
|
||||
|
||||
|
||||
#define U64C(x) UINT64_CONST(x)
|
||||
|
||||
static MY_ALIGN(64) const UInt64 SHA512_INIT_ARRAYS[4][8] = {
|
||||
{ U64C(0x8c3d37c819544da2), U64C(0x73e1996689dcd4d6), U64C(0x1dfab7ae32ff9c82), U64C(0x679dd514582f9fcf),
|
||||
U64C(0x0f6d2b697bd44da8), U64C(0x77e36f7304c48942), U64C(0x3f9d85a86a1d36c8), U64C(0x1112e6ad91d692a1)
|
||||
},
|
||||
{ U64C(0x22312194fc2bf72c), U64C(0x9f555fa3c84c64c2), U64C(0x2393b86b6f53b151), U64C(0x963877195940eabd),
|
||||
U64C(0x96283ee2a88effe3), U64C(0xbe5e1e2553863992), U64C(0x2b0199fc2c85b8aa), U64C(0x0eb72ddc81c52ca2)
|
||||
},
|
||||
{ U64C(0xcbbb9d5dc1059ed8), U64C(0x629a292a367cd507), U64C(0x9159015a3070dd17), U64C(0x152fecd8f70e5939),
|
||||
U64C(0x67332667ffc00b31), U64C(0x8eb44a8768581511), U64C(0xdb0c2e0d64f98fa7), U64C(0x47b5481dbefa4fa4)
|
||||
},
|
||||
{ U64C(0x6a09e667f3bcc908), U64C(0xbb67ae8584caa73b), U64C(0x3c6ef372fe94f82b), U64C(0xa54ff53a5f1d36f1),
|
||||
U64C(0x510e527fade682d1), U64C(0x9b05688c2b3e6c1f), U64C(0x1f83d9abfb41bd6b), U64C(0x5be0cd19137e2179)
|
||||
}};
|
||||
|
||||
void Sha512_InitState(CSha512 *p, unsigned digestSize)
|
||||
{
|
||||
p->v.vars.count = 0;
|
||||
memcpy(p->state, SHA512_INIT_ARRAYS[(size_t)(digestSize >> 4) - 1], sizeof(p->state));
|
||||
}
|
||||
|
||||
void Sha512_Init(CSha512 *p, unsigned digestSize)
|
||||
{
|
||||
p->v.vars.func_UpdateBlocks =
|
||||
#ifdef Z7_COMPILER_SHA512_SUPPORTED
|
||||
g_SHA512_FUNC_UPDATE_BLOCKS;
|
||||
#else
|
||||
NULL;
|
||||
#endif
|
||||
Sha512_InitState(p, digestSize);
|
||||
}
|
||||
|
||||
#define S0(x) (Z7_ROTR64(x,28) ^ Z7_ROTR64(x,34) ^ Z7_ROTR64(x,39))
|
||||
#define S1(x) (Z7_ROTR64(x,14) ^ Z7_ROTR64(x,18) ^ Z7_ROTR64(x,41))
|
||||
#define s0(x) (Z7_ROTR64(x, 1) ^ Z7_ROTR64(x, 8) ^ (x >> 7))
|
||||
#define s1(x) (Z7_ROTR64(x,19) ^ Z7_ROTR64(x,61) ^ (x >> 6))
|
||||
|
||||
#define Ch(x,y,z) (z^(x&(y^z)))
|
||||
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
|
||||
|
||||
|
||||
#define W_PRE(i) (W[(i) + (size_t)(j)] = GetBe64(data + ((size_t)(j) + i) * 8))
|
||||
|
||||
#define blk2_main(j, i) s1(w(j, (i)-2)) + w(j, (i)-7) + s0(w(j, (i)-15))
|
||||
|
||||
#ifdef Z7_SHA512_BIG_W
|
||||
// we use +i instead of +(i) to change the order to solve CLANG compiler warning for signed/unsigned.
|
||||
#define w(j, i) W[(size_t)(j) + i]
|
||||
#define blk2(j, i) (w(j, i) = w(j, (i)-16) + blk2_main(j, i))
|
||||
#else
|
||||
#if STEP_MAIN == 16
|
||||
#define w(j, i) W[(i) & 15]
|
||||
#else
|
||||
#define w(j, i) W[((size_t)(j) + (i)) & 15]
|
||||
#endif
|
||||
#define blk2(j, i) (w(j, i) += blk2_main(j, i))
|
||||
#endif
|
||||
|
||||
#define W_MAIN(i) blk2(j, i)
|
||||
|
||||
|
||||
#define T1(wx, i) \
|
||||
tmp = h + S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
|
||||
h = g; \
|
||||
g = f; \
|
||||
f = e; \
|
||||
e = d + tmp; \
|
||||
tmp += S0(a) + Maj(a, b, c); \
|
||||
d = c; \
|
||||
c = b; \
|
||||
b = a; \
|
||||
a = tmp; \
|
||||
|
||||
#define R1_PRE(i) T1( W_PRE, i)
|
||||
#define R1_MAIN(i) T1( W_MAIN, i)
|
||||
|
||||
#if (!defined(Z7_SHA512_UNROLL) || STEP_MAIN < 8) && (STEP_MAIN >= 4)
|
||||
#define R2_MAIN(i) \
|
||||
R1_MAIN(i) \
|
||||
R1_MAIN(i + 1) \
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8
|
||||
|
||||
#define T4( a,b,c,d,e,f,g,h, wx, i) \
|
||||
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
|
||||
tmp = h; \
|
||||
h += d; \
|
||||
d = tmp + S0(a) + Maj(a, b, c); \
|
||||
|
||||
#define R4( wx, i) \
|
||||
T4 ( a,b,c,d,e,f,g,h, wx, (i )); \
|
||||
T4 ( d,a,b,c,h,e,f,g, wx, (i+1)); \
|
||||
T4 ( c,d,a,b,g,h,e,f, wx, (i+2)); \
|
||||
T4 ( b,c,d,a,f,g,h,e, wx, (i+3)); \
|
||||
|
||||
#define R4_PRE(i) R4( W_PRE, i)
|
||||
#define R4_MAIN(i) R4( W_MAIN, i)
|
||||
|
||||
|
||||
#define T8( a,b,c,d,e,f,g,h, wx, i) \
|
||||
h += S1(e) + Ch(e,f,g) + K[(i)+(size_t)(j)] + wx(i); \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c); \
|
||||
|
||||
#define R8( wx, i) \
|
||||
T8 ( a,b,c,d,e,f,g,h, wx, i ); \
|
||||
T8 ( h,a,b,c,d,e,f,g, wx, i+1); \
|
||||
T8 ( g,h,a,b,c,d,e,f, wx, i+2); \
|
||||
T8 ( f,g,h,a,b,c,d,e, wx, i+3); \
|
||||
T8 ( e,f,g,h,a,b,c,d, wx, i+4); \
|
||||
T8 ( d,e,f,g,h,a,b,c, wx, i+5); \
|
||||
T8 ( c,d,e,f,g,h,a,b, wx, i+6); \
|
||||
T8 ( b,c,d,e,f,g,h,a, wx, i+7); \
|
||||
|
||||
#define R8_PRE(i) R8( W_PRE, i)
|
||||
#define R8_MAIN(i) R8( W_MAIN, i)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
extern
|
||||
MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80];
|
||||
MY_ALIGN(64) const UInt64 SHA512_K_ARRAY[80] = {
|
||||
U64C(0x428a2f98d728ae22), U64C(0x7137449123ef65cd), U64C(0xb5c0fbcfec4d3b2f), U64C(0xe9b5dba58189dbbc),
|
||||
U64C(0x3956c25bf348b538), U64C(0x59f111f1b605d019), U64C(0x923f82a4af194f9b), U64C(0xab1c5ed5da6d8118),
|
||||
U64C(0xd807aa98a3030242), U64C(0x12835b0145706fbe), U64C(0x243185be4ee4b28c), U64C(0x550c7dc3d5ffb4e2),
|
||||
U64C(0x72be5d74f27b896f), U64C(0x80deb1fe3b1696b1), U64C(0x9bdc06a725c71235), U64C(0xc19bf174cf692694),
|
||||
U64C(0xe49b69c19ef14ad2), U64C(0xefbe4786384f25e3), U64C(0x0fc19dc68b8cd5b5), U64C(0x240ca1cc77ac9c65),
|
||||
U64C(0x2de92c6f592b0275), U64C(0x4a7484aa6ea6e483), U64C(0x5cb0a9dcbd41fbd4), U64C(0x76f988da831153b5),
|
||||
U64C(0x983e5152ee66dfab), U64C(0xa831c66d2db43210), U64C(0xb00327c898fb213f), U64C(0xbf597fc7beef0ee4),
|
||||
U64C(0xc6e00bf33da88fc2), U64C(0xd5a79147930aa725), U64C(0x06ca6351e003826f), U64C(0x142929670a0e6e70),
|
||||
U64C(0x27b70a8546d22ffc), U64C(0x2e1b21385c26c926), U64C(0x4d2c6dfc5ac42aed), U64C(0x53380d139d95b3df),
|
||||
U64C(0x650a73548baf63de), U64C(0x766a0abb3c77b2a8), U64C(0x81c2c92e47edaee6), U64C(0x92722c851482353b),
|
||||
U64C(0xa2bfe8a14cf10364), U64C(0xa81a664bbc423001), U64C(0xc24b8b70d0f89791), U64C(0xc76c51a30654be30),
|
||||
U64C(0xd192e819d6ef5218), U64C(0xd69906245565a910), U64C(0xf40e35855771202a), U64C(0x106aa07032bbd1b8),
|
||||
U64C(0x19a4c116b8d2d0c8), U64C(0x1e376c085141ab53), U64C(0x2748774cdf8eeb99), U64C(0x34b0bcb5e19b48a8),
|
||||
U64C(0x391c0cb3c5c95a63), U64C(0x4ed8aa4ae3418acb), U64C(0x5b9cca4f7763e373), U64C(0x682e6ff3d6b2b8a3),
|
||||
U64C(0x748f82ee5defb2fc), U64C(0x78a5636f43172f60), U64C(0x84c87814a1f0ab72), U64C(0x8cc702081a6439ec),
|
||||
U64C(0x90befffa23631e28), U64C(0xa4506cebde82bde9), U64C(0xbef9a3f7b2c67915), U64C(0xc67178f2e372532b),
|
||||
U64C(0xca273eceea26619c), U64C(0xd186b8c721c0c207), U64C(0xeada7dd6cde0eb1e), U64C(0xf57d4f7fee6ed178),
|
||||
U64C(0x06f067aa72176fba), U64C(0x0a637dc5a2c898a6), U64C(0x113f9804bef90dae), U64C(0x1b710b35131c471b),
|
||||
U64C(0x28db77f523047d84), U64C(0x32caab7b40c72493), U64C(0x3c9ebe0a15c9bebc), U64C(0x431d67c49c100d4c),
|
||||
U64C(0x4cc5d4becb3e42b6), U64C(0x597f299cfc657e2a), U64C(0x5fcb6fab3ad6faec), U64C(0x6c44198c4a475817)
|
||||
};
|
||||
|
||||
#define K SHA512_K_ARRAY
|
||||
|
||||
Z7_NO_INLINE
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
UInt64 W
|
||||
#ifdef Z7_SHA512_BIG_W
|
||||
[80];
|
||||
#else
|
||||
[16];
|
||||
#endif
|
||||
unsigned j;
|
||||
UInt64 a,b,c,d,e,f,g,h;
|
||||
#if !defined(Z7_SHA512_UNROLL) || (STEP_MAIN <= 4) || (STEP_PRE <= 4)
|
||||
UInt64 tmp;
|
||||
#endif
|
||||
|
||||
if (numBlocks == 0) return;
|
||||
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
d = state[3];
|
||||
e = state[4];
|
||||
f = state[5];
|
||||
g = state[6];
|
||||
h = state[7];
|
||||
|
||||
do
|
||||
{
|
||||
|
||||
for (j = 0; j < 16; j += STEP_PRE)
|
||||
{
|
||||
#if STEP_PRE > 4
|
||||
|
||||
#if STEP_PRE < 8
|
||||
R4_PRE(0);
|
||||
#else
|
||||
R8_PRE(0);
|
||||
#if STEP_PRE == 16
|
||||
R8_PRE(8);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
R1_PRE(0)
|
||||
#if STEP_PRE >= 2
|
||||
R1_PRE(1)
|
||||
#if STEP_PRE >= 4
|
||||
R1_PRE(2)
|
||||
R1_PRE(3)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
for (j = 16; j < 80; j += STEP_MAIN)
|
||||
{
|
||||
#if defined(Z7_SHA512_UNROLL) && STEP_MAIN >= 8
|
||||
|
||||
#if STEP_MAIN < 8
|
||||
R4_MAIN(0)
|
||||
#else
|
||||
R8_MAIN(0)
|
||||
#if STEP_MAIN == 16
|
||||
R8_MAIN(8)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
R1_MAIN(0)
|
||||
#if STEP_MAIN >= 2
|
||||
R1_MAIN(1)
|
||||
#if STEP_MAIN >= 4
|
||||
R2_MAIN(2)
|
||||
#if STEP_MAIN >= 8
|
||||
R2_MAIN(4)
|
||||
R2_MAIN(6)
|
||||
#if STEP_MAIN >= 16
|
||||
R2_MAIN(8)
|
||||
R2_MAIN(10)
|
||||
R2_MAIN(12)
|
||||
R2_MAIN(14)
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
a += state[0]; state[0] = a;
|
||||
b += state[1]; state[1] = b;
|
||||
c += state[2]; state[2] = c;
|
||||
d += state[3]; state[3] = d;
|
||||
e += state[4]; state[4] = e;
|
||||
f += state[5]; state[5] = f;
|
||||
g += state[6]; state[6] = g;
|
||||
h += state[7]; state[7] = h;
|
||||
|
||||
data += SHA512_BLOCK_SIZE;
|
||||
}
|
||||
while (--numBlocks);
|
||||
}
|
||||
|
||||
|
||||
#define Sha512_UpdateBlock(p) SHA512_UPDATE_BLOCKS(p)(p->state, p->buffer, 1)
|
||||
|
||||
void Sha512_Update(CSha512 *p, const Byte *data, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return;
|
||||
{
|
||||
const unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1);
|
||||
const unsigned num = SHA512_BLOCK_SIZE - pos;
|
||||
p->v.vars.count += size;
|
||||
if (num > size)
|
||||
{
|
||||
memcpy(p->buffer + pos, data, size);
|
||||
return;
|
||||
}
|
||||
if (pos != 0)
|
||||
{
|
||||
size -= num;
|
||||
memcpy(p->buffer + pos, data, num);
|
||||
data += num;
|
||||
Sha512_UpdateBlock(p);
|
||||
}
|
||||
}
|
||||
{
|
||||
const size_t numBlocks = size >> 7;
|
||||
// if (numBlocks)
|
||||
SHA512_UPDATE_BLOCKS(p)(p->state, data, numBlocks);
|
||||
size &= SHA512_BLOCK_SIZE - 1;
|
||||
if (size == 0)
|
||||
return;
|
||||
data += (numBlocks << 7);
|
||||
memcpy(p->buffer, data, size);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize)
|
||||
{
|
||||
unsigned pos = (unsigned)p->v.vars.count & (SHA512_BLOCK_SIZE - 1);
|
||||
p->buffer[pos++] = 0x80;
|
||||
if (pos > (SHA512_BLOCK_SIZE - 8 * 2))
|
||||
{
|
||||
while (pos != SHA512_BLOCK_SIZE) { p->buffer[pos++] = 0; }
|
||||
// memset(&p->buf.buffer[pos], 0, SHA512_BLOCK_SIZE - pos);
|
||||
Sha512_UpdateBlock(p);
|
||||
pos = 0;
|
||||
}
|
||||
memset(&p->buffer[pos], 0, (SHA512_BLOCK_SIZE - 8 * 2) - pos);
|
||||
{
|
||||
const UInt64 numBits = p->v.vars.count << 3;
|
||||
SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 2, 0) // = (p->v.vars.count >> (64 - 3)); (high 64-bits)
|
||||
SetBe64(p->buffer + SHA512_BLOCK_SIZE - 8 * 1, numBits)
|
||||
}
|
||||
Sha512_UpdateBlock(p);
|
||||
#if 1 && defined(MY_CPU_BE)
|
||||
memcpy(digest, p->state, digestSize);
|
||||
#else
|
||||
{
|
||||
const unsigned numWords = digestSize >> 3;
|
||||
unsigned i;
|
||||
for (i = 0; i < numWords; i++)
|
||||
{
|
||||
const UInt64 v = p->state[i];
|
||||
SetBe64(digest, v)
|
||||
digest += 8;
|
||||
}
|
||||
if (digestSize & 4) // digestSize == SHA512_224_DIGEST_SIZE
|
||||
{
|
||||
const UInt32 v = (UInt32)((p->state[numWords]) >> 32);
|
||||
SetBe32(digest, v)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Sha512_InitState(p, digestSize);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// #define Z7_SHA512_PROBE_DEBUG // for debug
|
||||
|
||||
#if defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED)
|
||||
|
||||
#if defined(Z7_SHA512_PROBE_DEBUG) \
|
||||
|| defined(_WIN32) && defined(MY_CPU_ARM64)
|
||||
#ifndef Z7_SHA512_USE_PROBE
|
||||
#define Z7_SHA512_USE_PROBE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef Z7_SHA512_USE_PROBE
|
||||
|
||||
#ifdef Z7_SHA512_PROBE_DEBUG
|
||||
#include <stdio.h>
|
||||
#define PRF(x) x
|
||||
#else
|
||||
#define PRF(x)
|
||||
#endif
|
||||
|
||||
#if 0 || !defined(_MSC_VER) // 1 || : for debug LONGJMP mode
|
||||
// MINGW doesn't support __try. So we use signal() / longjmp().
|
||||
// Note: signal() / longjmp() probably is not thread-safe.
|
||||
// So we must call Sha512Prepare() from main thread at program start.
|
||||
#ifndef Z7_SHA512_USE_LONGJMP
|
||||
#define Z7_SHA512_USE_LONGJMP
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef Z7_SHA512_USE_LONGJMP
|
||||
#include <signal.h>
|
||||
#include <setjmp.h>
|
||||
static jmp_buf g_Sha512_jmp_buf;
|
||||
// static int g_Sha512_Unsupported;
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__ >= 8) \
|
||||
|| defined(__clang__) && (__clang_major__ >= 3)
|
||||
__attribute__((noreturn))
|
||||
#endif
|
||||
static void Z7_CDECL Sha512_signal_Handler(int v)
|
||||
{
|
||||
PRF(printf("======== Sha512_signal_Handler = %x\n", (unsigned)v);)
|
||||
// g_Sha512_Unsupported = 1;
|
||||
longjmp(g_Sha512_jmp_buf, 1);
|
||||
}
|
||||
#endif // Z7_SHA512_USE_LONGJMP
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "7zWindows.h"
|
||||
#endif
|
||||
|
||||
#if defined(MY_CPU_ARM64)
|
||||
// #define Z7_SHA512_USE_SIMPLIFIED_PROBE // for debug
|
||||
#endif
|
||||
|
||||
#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE
|
||||
#include <arm_neon.h>
|
||||
#if defined(__clang__)
|
||||
__attribute__((__target__("sha3")))
|
||||
#elif !defined(_MSC_VER)
|
||||
__attribute__((__target__("arch=armv8.2-a+sha3")))
|
||||
#endif
|
||||
#endif
|
||||
static BoolInt CPU_IsSupported_SHA512_Probe(void)
|
||||
{
|
||||
PRF(printf("\n== CPU_IsSupported_SHA512_Probe\n");)
|
||||
#if defined(_WIN32) && defined(MY_CPU_ARM64)
|
||||
// we have no SHA512 flag for IsProcessorFeaturePresent() still.
|
||||
if (!CPU_IsSupported_CRYPTO())
|
||||
return False;
|
||||
PRF(printf("==== Registry check\n");)
|
||||
{
|
||||
// we can't read ID_AA64ISAR0_EL1 register from application.
|
||||
// but ID_AA64ISAR0_EL1 register is mapped to "CP 4030" registry value.
|
||||
HKEY key = NULL;
|
||||
LONG res = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
|
||||
TEXT("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0"),
|
||||
0, KEY_READ, &key);
|
||||
if (res != ERROR_SUCCESS)
|
||||
return False;
|
||||
{
|
||||
DWORD type = 0;
|
||||
DWORD count = sizeof(UInt64);
|
||||
UInt64 val = 0;
|
||||
res = RegQueryValueEx(key, TEXT("CP 4030"), NULL,
|
||||
&type, (LPBYTE)&val, &count);
|
||||
RegCloseKey(key);
|
||||
if (res != ERROR_SUCCESS
|
||||
|| type != REG_QWORD
|
||||
|| count != sizeof(UInt64)
|
||||
|| ((unsigned)(val >> 12) & 0xf) != 2)
|
||||
return False;
|
||||
// we parse SHA2 field of ID_AA64ISAR0_EL1 register:
|
||||
// 0 : No SHA2 instructions implemented
|
||||
// 1 : SHA256 implemented
|
||||
// 2 : SHA256 and SHA512 implemented
|
||||
}
|
||||
}
|
||||
#endif // defined(_WIN32) && defined(MY_CPU_ARM64)
|
||||
|
||||
|
||||
#if 1 // 0 for debug to disable SHA512 PROBE code
|
||||
|
||||
/*
|
||||
----- SHA512 PROBE -----
|
||||
|
||||
We suppose that "CP 4030" registry reading is enough.
|
||||
But we use additional SHA512 PROBE code, because
|
||||
we can catch exception here, and we don't catch exceptions,
|
||||
if we call Sha512 functions from main code.
|
||||
|
||||
NOTE: arm64 PROBE code doesn't work, if we call it via Wine in linux-arm64.
|
||||
The program just stops.
|
||||
Also x64 version of PROBE code doesn't work, if we run it via Intel SDE emulator
|
||||
without SHA512 support (-skl switch),
|
||||
The program stops, and we have message from SDE:
|
||||
TID 0 SDE-ERROR: Executed instruction not valid for specified chip (SKYLAKE): vsha512msg1
|
||||
But we still want to catch that exception instead of process stopping.
|
||||
Does this PROBE code work in native Windows-arm64 (with/without sha512 hw instructions)?
|
||||
Are there any ways to fix the problems with arm64-wine and x64-SDE cases?
|
||||
*/
|
||||
|
||||
PRF(printf("==== CPU_IsSupported_SHA512 PROBE\n");)
|
||||
{
|
||||
BoolInt isSupported = False;
|
||||
#ifdef Z7_SHA512_USE_LONGJMP
|
||||
void (Z7_CDECL *signal_prev)(int);
|
||||
/*
|
||||
if (g_Sha512_Unsupported)
|
||||
{
|
||||
PRF(printf("==== g_Sha512_Unsupported\n");)
|
||||
return False;
|
||||
}
|
||||
*/
|
||||
printf("====== signal(SIGILL)\n");
|
||||
signal_prev = signal(SIGILL, Sha512_signal_Handler);
|
||||
if (signal_prev == SIG_ERR)
|
||||
{
|
||||
PRF(printf("====== signal fail\n");)
|
||||
return False;
|
||||
}
|
||||
// PRF(printf("==== signal_prev = %p\n", (void *)signal_prev);)
|
||||
// docs: Before the specified function is executed,
|
||||
// the value of func is set to SIG_DFL.
|
||||
// So we can exit if (setjmp(g_Sha512_jmp_buf) != 0).
|
||||
PRF(printf("====== setjmp\n");)
|
||||
if (!setjmp(g_Sha512_jmp_buf))
|
||||
#else // Z7_SHA512_USE_LONGJMP
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#ifdef __clang_major__
|
||||
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
|
||||
#endif
|
||||
__try
|
||||
#endif
|
||||
#endif // Z7_SHA512_USE_LONGJMP
|
||||
|
||||
{
|
||||
#if defined(Z7_COMPILER_SHA512_SUPPORTED)
|
||||
#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE
|
||||
// simplified sha512 check for arm64:
|
||||
const uint64x2_t a = vdupq_n_u64(1);
|
||||
const uint64x2_t b = vsha512hq_u64(a, a, a);
|
||||
PRF(printf("======== vsha512hq_u64 probe\n");)
|
||||
if ((UInt32)vgetq_lane_u64(b, 0) == 0x11800002)
|
||||
#else
|
||||
MY_ALIGN(16)
|
||||
UInt64 temp[SHA512_NUM_DIGEST_WORDS + SHA512_NUM_BLOCK_WORDS];
|
||||
memset(temp, 0x5a, sizeof(temp));
|
||||
PRF(printf("======== Sha512_UpdateBlocks_HW\n");)
|
||||
Sha512_UpdateBlocks_HW(temp,
|
||||
(const Byte *)(const void *)(temp + SHA512_NUM_DIGEST_WORDS), 1);
|
||||
// PRF(printf("======== t = %x\n", (UInt32)temp[0]);)
|
||||
if ((UInt32)temp[0] == 0xa33cfdf7)
|
||||
#endif
|
||||
{
|
||||
PRF(printf("======== PROBE SHA512: SHA512 is supported\n");)
|
||||
isSupported = True;
|
||||
}
|
||||
#else // Z7_COMPILER_SHA512_SUPPORTED
|
||||
// for debug : we generate bad instrction or raise exception.
|
||||
// __except() doesn't catch raise() calls.
|
||||
#ifdef Z7_SHA512_USE_LONGJMP
|
||||
PRF(printf("====== raise(SIGILL)\n");)
|
||||
raise(SIGILL);
|
||||
#else
|
||||
#if defined(_MSC_VER) && defined(MY_CPU_X86)
|
||||
__asm ud2
|
||||
#endif
|
||||
#endif // Z7_SHA512_USE_LONGJMP
|
||||
#endif // Z7_COMPILER_SHA512_SUPPORTED
|
||||
}
|
||||
|
||||
#ifdef Z7_SHA512_USE_LONGJMP
|
||||
PRF(printf("====== restore signal SIGILL\n");)
|
||||
signal(SIGILL, signal_prev);
|
||||
#elif _MSC_VER
|
||||
__except (EXCEPTION_EXECUTE_HANDLER)
|
||||
{
|
||||
PRF(printf("==== CPU_IsSupported_SHA512 __except(EXCEPTION_EXECUTE_HANDLER)\n");)
|
||||
}
|
||||
#endif
|
||||
PRF(printf("== return (sha512 supported) = %d\n", isSupported);)
|
||||
return isSupported;
|
||||
}
|
||||
#else
|
||||
// without SHA512 PROBE code
|
||||
return True;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // Z7_SHA512_USE_PROBE
|
||||
#endif // defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED)
|
||||
|
||||
|
||||
void Sha512Prepare(void)
|
||||
{
|
||||
#ifdef Z7_COMPILER_SHA512_SUPPORTED
|
||||
SHA512_FUNC_UPDATE_BLOCKS f, f_hw;
|
||||
f = Sha512_UpdateBlocks;
|
||||
f_hw = NULL;
|
||||
#ifdef Z7_SHA512_USE_PROBE
|
||||
if (CPU_IsSupported_SHA512_Probe())
|
||||
#elif defined(MY_CPU_X86_OR_AMD64)
|
||||
if (CPU_IsSupported_SHA512() && CPU_IsSupported_AVX2())
|
||||
#else
|
||||
if (CPU_IsSupported_SHA512())
|
||||
#endif
|
||||
{
|
||||
// printf("\n========== HW SHA512 ======== \n");
|
||||
f = f_hw = Sha512_UpdateBlocks_HW;
|
||||
}
|
||||
g_SHA512_FUNC_UPDATE_BLOCKS = f;
|
||||
g_SHA512_FUNC_UPDATE_BLOCKS_HW = f_hw;
|
||||
#elif defined(Z7_SHA512_PROBE_DEBUG)
|
||||
CPU_IsSupported_SHA512_Probe(); // for debug
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#undef K
|
||||
#undef S0
|
||||
#undef S1
|
||||
#undef s0
|
||||
#undef s1
|
||||
#undef Ch
|
||||
#undef Maj
|
||||
#undef W_MAIN
|
||||
#undef W_PRE
|
||||
#undef w
|
||||
#undef blk2_main
|
||||
#undef blk2
|
||||
#undef T1
|
||||
#undef T4
|
||||
#undef T8
|
||||
#undef R1_PRE
|
||||
#undef R1_MAIN
|
||||
#undef R2_MAIN
|
||||
#undef R4
|
||||
#undef R4_PRE
|
||||
#undef R4_MAIN
|
||||
#undef R8
|
||||
#undef R8_PRE
|
||||
#undef R8_MAIN
|
||||
#undef STEP_PRE
|
||||
#undef STEP_MAIN
|
||||
#undef Z7_SHA512_BIG_W
|
||||
#undef Z7_SHA512_UNROLL
|
||||
#undef Z7_COMPILER_SHA512_SUPPORTED
|
||||
86
C/Sha512.h
Normal file
86
C/Sha512.h
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
/* Sha512.h -- SHA-512 Hash
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef ZIP7_INC_SHA512_H
|
||||
#define ZIP7_INC_SHA512_H
|
||||
|
||||
#include "7zTypes.h"
|
||||
|
||||
EXTERN_C_BEGIN
|
||||
|
||||
#define SHA512_NUM_BLOCK_WORDS 16
|
||||
#define SHA512_NUM_DIGEST_WORDS 8
|
||||
|
||||
#define SHA512_BLOCK_SIZE (SHA512_NUM_BLOCK_WORDS * 8)
|
||||
#define SHA512_DIGEST_SIZE (SHA512_NUM_DIGEST_WORDS * 8)
|
||||
#define SHA512_224_DIGEST_SIZE (224 / 8)
|
||||
#define SHA512_256_DIGEST_SIZE (256 / 8)
|
||||
#define SHA512_384_DIGEST_SIZE (384 / 8)
|
||||
|
||||
typedef void (Z7_FASTCALL *SHA512_FUNC_UPDATE_BLOCKS)(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
/*
|
||||
if (the system supports different SHA512 code implementations)
|
||||
{
|
||||
(CSha512::func_UpdateBlocks) will be used
|
||||
(CSha512::func_UpdateBlocks) can be set by
|
||||
Sha512_Init() - to default (fastest)
|
||||
Sha512_SetFunction() - to any algo
|
||||
}
|
||||
else
|
||||
{
|
||||
(CSha512::func_UpdateBlocks) is ignored.
|
||||
}
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
SHA512_FUNC_UPDATE_BLOCKS func_UpdateBlocks;
|
||||
UInt64 count;
|
||||
} vars;
|
||||
UInt64 _pad_64bit[8];
|
||||
void *_pad_align_ptr[2];
|
||||
} v;
|
||||
UInt64 state[SHA512_NUM_DIGEST_WORDS];
|
||||
|
||||
Byte buffer[SHA512_BLOCK_SIZE];
|
||||
} CSha512;
|
||||
|
||||
|
||||
#define SHA512_ALGO_DEFAULT 0
|
||||
#define SHA512_ALGO_SW 1
|
||||
#define SHA512_ALGO_HW 2
|
||||
|
||||
/*
|
||||
Sha512_SetFunction()
|
||||
return:
|
||||
0 - (algo) value is not supported, and func_UpdateBlocks was not changed
|
||||
1 - func_UpdateBlocks was set according (algo) value.
|
||||
*/
|
||||
|
||||
BoolInt Sha512_SetFunction(CSha512 *p, unsigned algo);
|
||||
// we support only these (digestSize) values: 224/8, 256/8, 384/8, 512/8
|
||||
void Sha512_InitState(CSha512 *p, unsigned digestSize);
|
||||
void Sha512_Init(CSha512 *p, unsigned digestSize);
|
||||
void Sha512_Update(CSha512 *p, const Byte *data, size_t size);
|
||||
void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize);
|
||||
|
||||
|
||||
|
||||
|
||||
// void Z7_FASTCALL Sha512_UpdateBlocks(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
|
||||
/*
|
||||
call Sha512Prepare() once at program start.
|
||||
It prepares all supported implementations, and detects the fastest implementation.
|
||||
*/
|
||||
|
||||
void Sha512Prepare(void);
|
||||
|
||||
EXTERN_C_END
|
||||
|
||||
#endif
|
||||
395
C/Sha512Opt.c
Normal file
395
C/Sha512Opt.c
Normal file
|
|
@ -0,0 +1,395 @@
|
|||
/* Sha512Opt.c -- SHA-512 optimized code for SHA-512 hardware instructions
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
#include "Compiler.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
// #define Z7_USE_HW_SHA_STUB // for debug
|
||||
#ifdef MY_CPU_X86_OR_AMD64
|
||||
#if defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 2400) && (__INTEL_COMPILER <= 9900) // fix it
|
||||
#define USE_HW_SHA
|
||||
#elif defined(Z7_LLVM_CLANG_VERSION) && (Z7_LLVM_CLANG_VERSION >= 170001) \
|
||||
|| defined(Z7_APPLE_CLANG_VERSION) && (Z7_APPLE_CLANG_VERSION >= 170001) \
|
||||
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 140000)
|
||||
#define USE_HW_SHA
|
||||
#if !defined(__INTEL_COMPILER)
|
||||
// icc defines __GNUC__, but icc doesn't support __attribute__(__target__)
|
||||
#if !defined(__SHA512__) || !defined(__AVX2__)
|
||||
#define ATTRIB_SHA512 __attribute__((__target__("sha512,avx2")))
|
||||
#endif
|
||||
#endif
|
||||
#elif defined(Z7_MSC_VER_ORIGINAL)
|
||||
#if (_MSC_VER >= 1940)
|
||||
#define USE_HW_SHA
|
||||
#else
|
||||
// #define Z7_USE_HW_SHA_STUB
|
||||
#endif
|
||||
#endif
|
||||
// #endif // MY_CPU_X86_OR_AMD64
|
||||
#ifndef USE_HW_SHA
|
||||
// #define Z7_USE_HW_SHA_STUB // for debug
|
||||
#endif
|
||||
|
||||
#ifdef USE_HW_SHA
|
||||
|
||||
// #pragma message("Sha512 HW")
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#if defined (__clang__) && defined(_MSC_VER)
|
||||
#if !defined(__AVX__)
|
||||
#include <avxintrin.h>
|
||||
#endif
|
||||
#if !defined(__AVX2__)
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
#if !defined(__SHA512__)
|
||||
#include <sha512intrin.h>
|
||||
#endif
|
||||
#else
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
SHA512 uses:
|
||||
AVX:
|
||||
_mm256_loadu_si256 (vmovdqu)
|
||||
_mm256_storeu_si256
|
||||
_mm256_set_epi32 (unused)
|
||||
AVX2:
|
||||
_mm256_add_epi64 : vpaddq
|
||||
_mm256_shuffle_epi8 : vpshufb
|
||||
_mm256_shuffle_epi32 : pshufd
|
||||
_mm256_blend_epi32 : vpblendd
|
||||
_mm256_permute4x64_epi64 : vpermq : 3c
|
||||
_mm256_permute2x128_si256: vperm2i128 : 3c
|
||||
_mm256_extracti128_si256 : vextracti128 : 3c
|
||||
SHA512:
|
||||
_mm256_sha512*
|
||||
*/
|
||||
|
||||
// K array must be aligned for 32-bytes at least.
|
||||
// The compiler can look align attribute and selects
|
||||
// vmovdqu - for code without align attribute
|
||||
// vmovdqa - for code with align attribute
|
||||
extern
|
||||
MY_ALIGN(64)
|
||||
const UInt64 SHA512_K_ARRAY[80];
|
||||
#define K SHA512_K_ARRAY
|
||||
|
||||
|
||||
#define ADD_EPI64(dest, src) dest = _mm256_add_epi64(dest, src);
|
||||
#define SHA512_MSG1(dest, src) dest = _mm256_sha512msg1_epi64(dest, _mm256_extracti128_si256(src, 0));
|
||||
#define SHA512_MSG2(dest, src) dest = _mm256_sha512msg2_epi64(dest, src);
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
m = _mm256_loadu_si256((const __m256i *)(const void *)(data + (k) * 32)); \
|
||||
m = _mm256_shuffle_epi8(m, mask); \
|
||||
|
||||
#define NNN(m0, m1, m2, m3)
|
||||
|
||||
#define SM1(m1, m2, m3, m0) \
|
||||
SHA512_MSG1(m0, m1); \
|
||||
|
||||
#define SM2(m2, m3, m0, m1) \
|
||||
ADD_EPI64(m0, _mm256_permute4x64_epi64(_mm256_blend_epi32(m2, m3, 3), 0x39)); \
|
||||
SHA512_MSG2(m0, m3); \
|
||||
|
||||
#define RND2(t0, t1, lane) \
|
||||
t0 = _mm256_sha512rnds2_epi64(t0, t1, _mm256_extracti128_si256(msg, lane));
|
||||
|
||||
|
||||
|
||||
#define R4(k, m0, m1, m2, m3, OP0, OP1) \
|
||||
msg = _mm256_add_epi64(m0, *(const __m256i *) (const void *) &K[(k) * 4]); \
|
||||
RND2(state0, state1, 0); OP0(m0, m1, m2, m3) \
|
||||
RND2(state1, state0, 1); OP1(m0, m1, m2, m3) \
|
||||
|
||||
|
||||
|
||||
|
||||
#define R16(k, OP0, OP1, OP2, OP3, OP4, OP5, OP6, OP7) \
|
||||
R4 ( (k)*4+0, m0,m1,m2,m3, OP0, OP1 ) \
|
||||
R4 ( (k)*4+1, m1,m2,m3,m0, OP2, OP3 ) \
|
||||
R4 ( (k)*4+2, m2,m3,m0,m1, OP4, OP5 ) \
|
||||
R4 ( (k)*4+3, m3,m0,m1,m2, OP6, OP7 ) \
|
||||
|
||||
#define PREPARE_STATE \
|
||||
state0 = _mm256_shuffle_epi32(state0, 0x4e); /* cdab */ \
|
||||
state1 = _mm256_shuffle_epi32(state1, 0x4e); /* ghef */ \
|
||||
tmp = state0; \
|
||||
state0 = _mm256_permute2x128_si256(state0, state1, 0x13); /* cdgh */ \
|
||||
state1 = _mm256_permute2x128_si256(tmp, state1, 2); /* abef */ \
|
||||
|
||||
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA512
|
||||
ATTRIB_SHA512
|
||||
#endif
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
const __m256i mask = _mm256_set_epi32(
|
||||
0x08090a0b,0x0c0d0e0f, 0x00010203,0x04050607,
|
||||
0x08090a0b,0x0c0d0e0f, 0x00010203,0x04050607);
|
||||
__m256i tmp, state0, state1;
|
||||
|
||||
if (numBlocks == 0)
|
||||
return;
|
||||
|
||||
state0 = _mm256_loadu_si256((const __m256i *) (const void *) &state[0]);
|
||||
state1 = _mm256_loadu_si256((const __m256i *) (const void *) &state[4]);
|
||||
|
||||
PREPARE_STATE
|
||||
|
||||
do
|
||||
{
|
||||
__m256i state0_save, state1_save;
|
||||
__m256i m0, m1, m2, m3;
|
||||
__m256i msg;
|
||||
// #define msg tmp
|
||||
|
||||
state0_save = state0;
|
||||
state1_save = state1;
|
||||
|
||||
LOAD_SHUFFLE (m0, 0)
|
||||
LOAD_SHUFFLE (m1, 1)
|
||||
LOAD_SHUFFLE (m2, 2)
|
||||
LOAD_SHUFFLE (m3, 3)
|
||||
|
||||
|
||||
|
||||
R16 ( 0, NNN, NNN, SM1, NNN, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 1, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 2, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 3, SM1, SM2, SM1, SM2, SM1, SM2, SM1, SM2 )
|
||||
R16 ( 4, SM1, SM2, NNN, SM2, NNN, NNN, NNN, NNN )
|
||||
ADD_EPI64(state0, state0_save)
|
||||
ADD_EPI64(state1, state1_save)
|
||||
|
||||
data += 128;
|
||||
}
|
||||
while (--numBlocks);
|
||||
|
||||
PREPARE_STATE
|
||||
|
||||
_mm256_storeu_si256((__m256i *) (void *) &state[0], state0);
|
||||
_mm256_storeu_si256((__m256i *) (void *) &state[4], state1);
|
||||
}
|
||||
|
||||
#endif // USE_HW_SHA
|
||||
|
||||
// gcc 8.5 also supports sha512, but we need also support in assembler that is called by gcc
|
||||
#elif defined(MY_CPU_ARM64) && defined(MY_CPU_LE)
|
||||
|
||||
#if defined(__ARM_FEATURE_SHA512)
|
||||
#define USE_HW_SHA
|
||||
#else
|
||||
#if (defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 130000) \
|
||||
|| defined(__GNUC__) && (__GNUC__ >= 9) \
|
||||
) \
|
||||
|| defined(Z7_MSC_VER_ORIGINAL) && (_MSC_VER >= 1940) // fix it
|
||||
#define USE_HW_SHA
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef USE_HW_SHA
|
||||
|
||||
// #pragma message("=== Sha512 HW === ")
|
||||
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#if !defined(__ARM_FEATURE_SHA512)
|
||||
// #pragma message("=== we define SHA3 ATTRIB_SHA512 === ")
|
||||
#if defined(__clang__)
|
||||
#define ATTRIB_SHA512 __attribute__((__target__("sha3"))) // "armv8.2-a,sha3"
|
||||
#else
|
||||
#define ATTRIB_SHA512 __attribute__((__target__("arch=armv8.2-a+sha3")))
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(Z7_MSC_VER_ORIGINAL)
|
||||
#include <arm64_neon.h>
|
||||
#else
|
||||
|
||||
#if defined(__clang__) && __clang_major__ < 16
|
||||
#if !defined(__ARM_FEATURE_SHA512)
|
||||
// #pragma message("=== we set __ARM_FEATURE_SHA512 1 === ")
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#define Z7_ARM_FEATURE_SHA512_WAS_SET 1
|
||||
#define __ARM_FEATURE_SHA512 1
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
#endif
|
||||
#endif // clang
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#if defined(Z7_ARM_FEATURE_SHA512_WAS_SET) && \
|
||||
defined(__ARM_FEATURE_SHA512)
|
||||
Z7_DIAGNOSTIC_IGNORE_BEGIN_RESERVED_MACRO_IDENTIFIER
|
||||
#undef __ARM_FEATURE_SHA512
|
||||
#undef Z7_ARM_FEATURE_SHA512_WAS_SET
|
||||
Z7_DIAGNOSTIC_IGNORE_END_RESERVED_MACRO_IDENTIFIER
|
||||
// #pragma message("=== we undefine __ARM_FEATURE_CRYPTO === ")
|
||||
#endif
|
||||
|
||||
#endif // Z7_MSC_VER_ORIGINAL
|
||||
|
||||
typedef uint64x2_t v128_64;
|
||||
// typedef __n128 v128_64; // MSVC
|
||||
|
||||
#ifdef MY_CPU_BE
|
||||
#define MY_rev64_for_LE(x) x
|
||||
#else
|
||||
#define MY_rev64_for_LE(x) vrev64q_u8(x)
|
||||
#endif
|
||||
|
||||
#define LOAD_128_64(_p) vld1q_u64(_p)
|
||||
#define LOAD_128_8(_p) vld1q_u8 (_p)
|
||||
#define STORE_128_64(_p, _v) vst1q_u64(_p, _v)
|
||||
|
||||
#define LOAD_SHUFFLE(m, k) \
|
||||
m = vreinterpretq_u64_u8( \
|
||||
MY_rev64_for_LE( \
|
||||
LOAD_128_8(data + (k) * 16))); \
|
||||
|
||||
// K array must be aligned for 16-bytes at least.
|
||||
extern
|
||||
MY_ALIGN(64)
|
||||
const UInt64 SHA512_K_ARRAY[80];
|
||||
#define K SHA512_K_ARRAY
|
||||
|
||||
#define NN(m0, m1, m4, m5, m7)
|
||||
#define SM(m0, m1, m4, m5, m7) \
|
||||
m0 = vsha512su1q_u64(vsha512su0q_u64(m0, m1), m7, vextq_u64(m4, m5, 1));
|
||||
|
||||
#define R2(k, m0,m1,m2,m3,m4,m5,m6,m7, a0,a1,a2,a3, OP) \
|
||||
OP(m0, m1, m4, m5, m7) \
|
||||
t = vaddq_u64(m0, vld1q_u64(k)); \
|
||||
t = vaddq_u64(vextq_u64(t, t, 1), a3); \
|
||||
t = vsha512hq_u64(t, vextq_u64(a2, a3, 1), vextq_u64(a1, a2, 1)); \
|
||||
a3 = vsha512h2q_u64(t, a1, a0); \
|
||||
a1 = vaddq_u64(a1, t); \
|
||||
|
||||
#define R8(k, m0,m1,m2,m3,m4,m5,m6,m7, OP) \
|
||||
R2 ( (k)+0*2, m0,m1,m2,m3,m4,m5,m6,m7, a0,a1,a2,a3, OP ) \
|
||||
R2 ( (k)+1*2, m1,m2,m3,m4,m5,m6,m7,m0, a3,a0,a1,a2, OP ) \
|
||||
R2 ( (k)+2*2, m2,m3,m4,m5,m6,m7,m0,m1, a2,a3,a0,a1, OP ) \
|
||||
R2 ( (k)+3*2, m3,m4,m5,m6,m7,m0,m1,m2, a1,a2,a3,a0, OP ) \
|
||||
|
||||
#define R16(k, OP) \
|
||||
R8 ( (k)+0*2, m0,m1,m2,m3,m4,m5,m6,m7, OP ) \
|
||||
R8 ( (k)+4*2, m4,m5,m6,m7,m0,m1,m2,m3, OP ) \
|
||||
|
||||
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
#ifdef ATTRIB_SHA512
|
||||
ATTRIB_SHA512
|
||||
#endif
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
v128_64 a0, a1, a2, a3;
|
||||
|
||||
if (numBlocks == 0)
|
||||
return;
|
||||
a0 = LOAD_128_64(&state[0]);
|
||||
a1 = LOAD_128_64(&state[2]);
|
||||
a2 = LOAD_128_64(&state[4]);
|
||||
a3 = LOAD_128_64(&state[6]);
|
||||
do
|
||||
{
|
||||
v128_64 a0_save, a1_save, a2_save, a3_save;
|
||||
v128_64 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
v128_64 t;
|
||||
unsigned i;
|
||||
const UInt64 *k_ptr;
|
||||
|
||||
LOAD_SHUFFLE (m0, 0)
|
||||
LOAD_SHUFFLE (m1, 1)
|
||||
LOAD_SHUFFLE (m2, 2)
|
||||
LOAD_SHUFFLE (m3, 3)
|
||||
LOAD_SHUFFLE (m4, 4)
|
||||
LOAD_SHUFFLE (m5, 5)
|
||||
LOAD_SHUFFLE (m6, 6)
|
||||
LOAD_SHUFFLE (m7, 7)
|
||||
|
||||
a0_save = a0;
|
||||
a1_save = a1;
|
||||
a2_save = a2;
|
||||
a3_save = a3;
|
||||
|
||||
R16 ( K, NN )
|
||||
k_ptr = K + 16;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
R16 ( k_ptr, SM )
|
||||
k_ptr += 16;
|
||||
}
|
||||
|
||||
a0 = vaddq_u64(a0, a0_save);
|
||||
a1 = vaddq_u64(a1, a1_save);
|
||||
a2 = vaddq_u64(a2, a2_save);
|
||||
a3 = vaddq_u64(a3, a3_save);
|
||||
|
||||
data += 128;
|
||||
}
|
||||
while (--numBlocks);
|
||||
|
||||
STORE_128_64(&state[0], a0);
|
||||
STORE_128_64(&state[2], a1);
|
||||
STORE_128_64(&state[4], a2);
|
||||
STORE_128_64(&state[6], a3);
|
||||
}
|
||||
|
||||
#endif // USE_HW_SHA
|
||||
|
||||
#endif // MY_CPU_ARM_OR_ARM64
|
||||
|
||||
|
||||
#if !defined(USE_HW_SHA) && defined(Z7_USE_HW_SHA_STUB)
|
||||
// #error Stop_Compiling_UNSUPPORTED_SHA
|
||||
// #include <stdlib.h>
|
||||
// We can compile this file with another C compiler,
|
||||
// or we can compile asm version.
|
||||
// So we can generate real code instead of this stub function.
|
||||
// #include "Sha512.h"
|
||||
// #if defined(_MSC_VER)
|
||||
#pragma message("Sha512 HW-SW stub was used")
|
||||
// #endif
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks (UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks);
|
||||
void Z7_FASTCALL Sha512_UpdateBlocks_HW(UInt64 state[8], const Byte *data, size_t numBlocks)
|
||||
{
|
||||
Sha512_UpdateBlocks(state, data, numBlocks);
|
||||
/*
|
||||
UNUSED_VAR(state);
|
||||
UNUSED_VAR(data);
|
||||
UNUSED_VAR(numBlocks);
|
||||
exit(1);
|
||||
return;
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#undef K
|
||||
#undef RND2
|
||||
#undef MY_rev64_for_LE
|
||||
#undef NN
|
||||
#undef NNN
|
||||
#undef LOAD_128
|
||||
#undef STORE_128
|
||||
#undef LOAD_SHUFFLE
|
||||
#undef SM1
|
||||
#undef SM2
|
||||
#undef SM
|
||||
#undef R2
|
||||
#undef R4
|
||||
#undef R16
|
||||
#undef PREPARE_STATE
|
||||
#undef USE_HW_SHA
|
||||
#undef ATTRIB_SHA512
|
||||
#undef USE_VER_MIN
|
||||
#undef Z7_USE_HW_SHA_STUB
|
||||
367
C/Sort.c
367
C/Sort.c
|
|
@ -1,141 +1,268 @@
|
|||
/* Sort.c -- Sort functions
|
||||
2014-04-05 : Igor Pavlov : Public domain */
|
||||
: Igor Pavlov : Public domain */
|
||||
|
||||
#include "Precomp.h"
|
||||
|
||||
#include "Sort.h"
|
||||
#include "CpuArch.h"
|
||||
|
||||
#define HeapSortDown(p, k, size, temp) \
|
||||
{ for (;;) { \
|
||||
size_t s = (k << 1); \
|
||||
if (s > size) break; \
|
||||
if (s < size && p[s + 1] > p[s]) s++; \
|
||||
if (temp >= p[s]) break; \
|
||||
p[k] = p[s]; k = s; \
|
||||
} p[k] = temp; }
|
||||
#if ( (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
|
||||
|| (defined(__clang__) && Z7_has_builtin(__builtin_prefetch)) \
|
||||
)
|
||||
// the code with prefetch is slow for small arrays on x86.
|
||||
// So we disable prefetch for x86.
|
||||
#ifndef MY_CPU_X86
|
||||
// #pragma message("Z7_PREFETCH : __builtin_prefetch")
|
||||
#define Z7_PREFETCH(a) __builtin_prefetch((a))
|
||||
#endif
|
||||
|
||||
void HeapSort(UInt32 *p, size_t size)
|
||||
{
|
||||
if (size <= 1)
|
||||
return;
|
||||
p--;
|
||||
{
|
||||
size_t i = size / 2;
|
||||
do
|
||||
{
|
||||
UInt32 temp = p[i];
|
||||
size_t k = i;
|
||||
HeapSortDown(p, k, size, temp)
|
||||
}
|
||||
while (--i != 0);
|
||||
}
|
||||
/*
|
||||
do
|
||||
{
|
||||
size_t k = 1;
|
||||
UInt32 temp = p[size];
|
||||
p[size--] = p[1];
|
||||
HeapSortDown(p, k, size, temp)
|
||||
}
|
||||
while (size > 1);
|
||||
*/
|
||||
while (size > 3)
|
||||
{
|
||||
UInt32 temp = p[size];
|
||||
size_t k = (p[3] > p[2]) ? 3 : 2;
|
||||
p[size--] = p[1];
|
||||
p[1] = p[k];
|
||||
HeapSortDown(p, k, size, temp)
|
||||
}
|
||||
{
|
||||
UInt32 temp = p[size];
|
||||
p[size] = p[1];
|
||||
if (size > 2 && p[2] < temp)
|
||||
{
|
||||
p[1] = p[2];
|
||||
p[2] = temp;
|
||||
}
|
||||
else
|
||||
p[1] = temp;
|
||||
}
|
||||
}
|
||||
#elif defined(_WIN32) // || defined(_MSC_VER) && (_MSC_VER >= 1200)
|
||||
|
||||
void HeapSort64(UInt64 *p, size_t size)
|
||||
{
|
||||
if (size <= 1)
|
||||
return;
|
||||
p--;
|
||||
{
|
||||
size_t i = size / 2;
|
||||
do
|
||||
{
|
||||
UInt64 temp = p[i];
|
||||
size_t k = i;
|
||||
HeapSortDown(p, k, size, temp)
|
||||
}
|
||||
while (--i != 0);
|
||||
}
|
||||
/*
|
||||
do
|
||||
{
|
||||
size_t k = 1;
|
||||
UInt64 temp = p[size];
|
||||
p[size--] = p[1];
|
||||
HeapSortDown(p, k, size, temp)
|
||||
}
|
||||
while (size > 1);
|
||||
*/
|
||||
while (size > 3)
|
||||
{
|
||||
UInt64 temp = p[size];
|
||||
size_t k = (p[3] > p[2]) ? 3 : 2;
|
||||
p[size--] = p[1];
|
||||
p[1] = p[k];
|
||||
HeapSortDown(p, k, size, temp)
|
||||
}
|
||||
{
|
||||
UInt64 temp = p[size];
|
||||
p[size] = p[1];
|
||||
if (size > 2 && p[2] < temp)
|
||||
{
|
||||
p[1] = p[2];
|
||||
p[2] = temp;
|
||||
}
|
||||
else
|
||||
p[1] = temp;
|
||||
}
|
||||
}
|
||||
#include "7zWindows.h"
|
||||
|
||||
// NOTE: CLANG/GCC/MSVC can define different values for _MM_HINT_T0 / PF_TEMPORAL_LEVEL_1.
|
||||
// For example, clang-cl can generate "prefetcht2" instruction for
|
||||
// PreFetchCacheLine(PF_TEMPORAL_LEVEL_1) call.
|
||||
// But we want to generate "prefetcht0" instruction.
|
||||
// So for CLANG/GCC we must use __builtin_prefetch() in code branch above
|
||||
// instead of PreFetchCacheLine() / _mm_prefetch().
|
||||
|
||||
// New msvc-x86 compiler generates "prefetcht0" instruction for PreFetchCacheLine() call.
|
||||
// But old x86 cpus don't support "prefetcht0".
|
||||
// So we will use PreFetchCacheLine(), only if we are sure that
|
||||
// generated instruction is supported by all cpus of that isa.
|
||||
#if defined(MY_CPU_AMD64) \
|
||||
|| defined(MY_CPU_ARM64) \
|
||||
|| defined(MY_CPU_IA64)
|
||||
// we need to use additional braces for (a) in PreFetchCacheLine call, because
|
||||
// PreFetchCacheLine macro doesn't use braces:
|
||||
// #define PreFetchCacheLine(l, a) _mm_prefetch((CHAR CONST *) a, l)
|
||||
// #pragma message("Z7_PREFETCH : PreFetchCacheLine")
|
||||
#define Z7_PREFETCH(a) PreFetchCacheLine(PF_TEMPORAL_LEVEL_1, (a))
|
||||
#endif
|
||||
|
||||
#endif // _WIN32
|
||||
|
||||
|
||||
#define PREFETCH_NO(p,k,s,size)
|
||||
|
||||
#ifndef Z7_PREFETCH
|
||||
#define SORT_PREFETCH(p,k,s,size)
|
||||
#else
|
||||
|
||||
// #define PREFETCH_LEVEL 2 // use it if cache line is 32-bytes
|
||||
#define PREFETCH_LEVEL 3 // it is fast for most cases (64-bytes cache line prefetch)
|
||||
// #define PREFETCH_LEVEL 4 // it can be faster for big array (128-bytes prefetch)
|
||||
|
||||
#if PREFETCH_LEVEL == 0
|
||||
|
||||
#define SORT_PREFETCH(p,k,s,size)
|
||||
|
||||
#else // PREFETCH_LEVEL != 0
|
||||
|
||||
/*
|
||||
#define HeapSortRefDown(p, vals, n, size, temp) \
|
||||
{ size_t k = n; UInt32 val = vals[temp]; for (;;) { \
|
||||
size_t s = (k << 1); \
|
||||
if (s > size) break; \
|
||||
if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
|
||||
if (val >= vals[p[s]]) break; \
|
||||
p[k] = p[s]; k = s; \
|
||||
} p[k] = temp; }
|
||||
if defined(USE_PREFETCH_FOR_ALIGNED_ARRAY)
|
||||
we prefetch one value per cache line.
|
||||
Use it if array is aligned for cache line size (64 bytes)
|
||||
or if array is small (less than L1 cache size).
|
||||
|
||||
void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
|
||||
if !defined(USE_PREFETCH_FOR_ALIGNED_ARRAY)
|
||||
we perfetch all cache lines that can be required.
|
||||
it can be faster for big unaligned arrays.
|
||||
*/
|
||||
#define USE_PREFETCH_FOR_ALIGNED_ARRAY
|
||||
|
||||
// s == k * 2
|
||||
#if 0 && PREFETCH_LEVEL <= 3 && defined(MY_CPU_X86_OR_AMD64)
|
||||
// x86 supports (lea r1*8+offset)
|
||||
#define PREFETCH_OFFSET(k,s) ((s) << PREFETCH_LEVEL)
|
||||
#else
|
||||
#define PREFETCH_OFFSET(k,s) ((k) << (PREFETCH_LEVEL + 1))
|
||||
#endif
|
||||
|
||||
#if 1 && PREFETCH_LEVEL <= 3 && defined(USE_PREFETCH_FOR_ALIGNED_ARRAY)
|
||||
#define PREFETCH_ADD_OFFSET 0
|
||||
#else
|
||||
// last offset that can be reqiured in PREFETCH_LEVEL step:
|
||||
#define PREFETCH_RANGE ((2 << PREFETCH_LEVEL) - 1)
|
||||
#define PREFETCH_ADD_OFFSET PREFETCH_RANGE / 2
|
||||
#endif
|
||||
|
||||
#if PREFETCH_LEVEL <= 3
|
||||
|
||||
#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY
|
||||
#define SORT_PREFETCH(p,k,s,size) \
|
||||
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_ADD_OFFSET; \
|
||||
if (s2 <= size) { \
|
||||
Z7_PREFETCH((p + s2)); \
|
||||
}}
|
||||
#else /* for unaligned array */
|
||||
#define SORT_PREFETCH(p,k,s,size) \
|
||||
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \
|
||||
if (s2 <= size) { \
|
||||
Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \
|
||||
Z7_PREFETCH((p + s2)); \
|
||||
}}
|
||||
#endif
|
||||
|
||||
#else // PREFETCH_LEVEL > 3
|
||||
|
||||
#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY
|
||||
#define SORT_PREFETCH(p,k,s,size) \
|
||||
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE - 16 / 2; \
|
||||
if (s2 <= size) { \
|
||||
Z7_PREFETCH((p + s2 - 16)); \
|
||||
Z7_PREFETCH((p + s2)); \
|
||||
}}
|
||||
#else /* for unaligned array */
|
||||
#define SORT_PREFETCH(p,k,s,size) \
|
||||
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \
|
||||
if (s2 <= size) { \
|
||||
Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \
|
||||
Z7_PREFETCH((p + s2 - PREFETCH_RANGE / 2)); \
|
||||
Z7_PREFETCH((p + s2)); \
|
||||
}}
|
||||
#endif
|
||||
|
||||
#endif // PREFETCH_LEVEL > 3
|
||||
#endif // PREFETCH_LEVEL != 0
|
||||
#endif // Z7_PREFETCH
|
||||
|
||||
|
||||
#if defined(MY_CPU_ARM64) \
|
||||
/* || defined(MY_CPU_AMD64) */ \
|
||||
/* || defined(MY_CPU_ARM) && !defined(_MSC_VER) */
|
||||
// we want to use cmov, if cmov is very fast:
|
||||
// - this cmov version is slower for clang-x64.
|
||||
// - this cmov version is faster for gcc-arm64 for some fast arm64 cpus.
|
||||
#define Z7_FAST_CMOV_SUPPORTED
|
||||
#endif
|
||||
|
||||
#ifdef Z7_FAST_CMOV_SUPPORTED
|
||||
// we want to use cmov here, if cmov is fast: new arm64 cpus.
|
||||
// we want the compiler to use conditional move for this branch
|
||||
#define GET_MAX_VAL(n0, n1, max_val_slow) if (n0 < n1) n0 = n1;
|
||||
#else
|
||||
// use this branch, if cpu doesn't support fast conditional move.
|
||||
// it uses slow array access reading:
|
||||
#define GET_MAX_VAL(n0, n1, max_val_slow) n0 = max_val_slow;
|
||||
#endif
|
||||
|
||||
#define HeapSortDown(p, k, size, temp, macro_prefetch) \
|
||||
{ \
|
||||
for (;;) { \
|
||||
UInt32 n0, n1; \
|
||||
size_t s = k * 2; \
|
||||
if (s >= size) { \
|
||||
if (s == size) { \
|
||||
n0 = p[s]; \
|
||||
p[k] = n0; \
|
||||
if (temp < n0) k = s; \
|
||||
} \
|
||||
break; \
|
||||
} \
|
||||
n0 = p[k * 2]; \
|
||||
n1 = p[k * 2 + 1]; \
|
||||
s += n0 < n1; \
|
||||
GET_MAX_VAL(n0, n1, p[s]) \
|
||||
if (temp >= n0) break; \
|
||||
macro_prefetch(p, k, s, size) \
|
||||
p[k] = n0; \
|
||||
k = s; \
|
||||
} \
|
||||
p[k] = temp; \
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
stage-1 : O(n) :
|
||||
we generate intermediate partially sorted binary tree:
|
||||
p[0] : it's additional item for better alignment of tree structure in memory.
|
||||
p[1]
|
||||
p[2] p[3]
|
||||
p[4] p[5] p[6] p[7]
|
||||
...
|
||||
p[x] >= p[x * 2]
|
||||
p[x] >= p[x * 2 + 1]
|
||||
|
||||
stage-2 : O(n)*log2(N):
|
||||
we move largest item p[0] from head of tree to the end of array
|
||||
and insert last item to sorted binary tree.
|
||||
*/
|
||||
|
||||
// (p) must be aligned for cache line size (64-bytes) for best performance
|
||||
|
||||
void Z7_FASTCALL HeapSort(UInt32 *p, size_t size)
|
||||
{
|
||||
if (size <= 1)
|
||||
if (size < 2)
|
||||
return;
|
||||
p--;
|
||||
if (size == 2)
|
||||
{
|
||||
size_t i = size / 2;
|
||||
const UInt32 a0 = p[0];
|
||||
const UInt32 a1 = p[1];
|
||||
const unsigned k = a1 < a0;
|
||||
p[k] = a0;
|
||||
p[k ^ 1] = a1;
|
||||
return;
|
||||
}
|
||||
{
|
||||
// stage-1 : O(n)
|
||||
// we transform array to partially sorted binary tree.
|
||||
size_t i = --size / 2;
|
||||
// (size) now is the index of the last item in tree,
|
||||
// if (i)
|
||||
{
|
||||
do
|
||||
{
|
||||
const UInt32 temp = p[i];
|
||||
size_t k = i;
|
||||
HeapSortDown(p, k, size, temp, PREFETCH_NO)
|
||||
}
|
||||
while (--i);
|
||||
}
|
||||
{
|
||||
const UInt32 temp = p[0];
|
||||
const UInt32 a1 = p[1];
|
||||
if (temp < a1)
|
||||
{
|
||||
size_t k = 1;
|
||||
p[0] = a1;
|
||||
HeapSortDown(p, k, size, temp, PREFETCH_NO)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (size < 3)
|
||||
{
|
||||
// size == 2
|
||||
const UInt32 a0 = p[0];
|
||||
p[0] = p[2];
|
||||
p[2] = a0;
|
||||
return;
|
||||
}
|
||||
if (size != 3)
|
||||
{
|
||||
// stage-2 : O(size) * log2(size):
|
||||
// we move largest item p[0] from head to the end of array,
|
||||
// and insert last item to sorted binary tree.
|
||||
do
|
||||
{
|
||||
UInt32 temp = p[i];
|
||||
HeapSortRefDown(p, vals, i, size, temp);
|
||||
const UInt32 temp = p[size];
|
||||
size_t k = p[2] < p[3] ? 3 : 2;
|
||||
p[size--] = p[0];
|
||||
p[0] = p[1];
|
||||
p[1] = p[k];
|
||||
HeapSortDown(p, k, size, temp, SORT_PREFETCH) // PREFETCH_NO
|
||||
}
|
||||
while (--i != 0);
|
||||
while (size != 3);
|
||||
}
|
||||
do
|
||||
{
|
||||
UInt32 temp = p[size];
|
||||
p[size--] = p[1];
|
||||
HeapSortRefDown(p, vals, 1, size, temp);
|
||||
const UInt32 a2 = p[2];
|
||||
const UInt32 a3 = p[3];
|
||||
const size_t k = a2 < a3;
|
||||
p[2] = p[1];
|
||||
p[3] = p[0];
|
||||
p[k] = a3;
|
||||
p[k ^ 1] = a2;
|
||||
}
|
||||
while (size > 1);
|
||||
}
|
||||
*/
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue