This commit is contained in:
Igor Pavlov 2025-07-05 00:00:00 +00:00
parent e5431fa6f5
commit 395149956d
130 changed files with 5532 additions and 2317 deletions

860
Asm/x86/Sort.asm Normal file
View file

@ -0,0 +1,860 @@
; SortTest.asm -- ASM version of HeapSort() function
; Igor Pavlov : Public domain
include ../../../../Asm/x86/7zAsm.asm
MY_ASM_START
ifndef Z7_SORT_ASM_USE_SEGMENT
if (IS_LINUX gt 0)
; Z7_SORT_ASM_USE_SEGMENT equ 1
else
; Z7_SORT_ASM_USE_SEGMENT equ 1
endif
endif
ifdef Z7_SORT_ASM_USE_SEGMENT
_TEXT$Z7_SORT SEGMENT ALIGN(64) 'CODE'
MY_ALIGN macro num:req
align num
endm
else
MY_ALIGN macro num:req
; We expect that ".text" is aligned for 16-bytes.
; So we don't need large alignment inside our function.
align 16
endm
endif
MY_ALIGN_16 macro
MY_ALIGN 16
endm
MY_ALIGN_32 macro
MY_ALIGN 32
endm
MY_ALIGN_64 macro
MY_ALIGN 64
endm
ifdef x64
NUM_PREFETCH_LEVELS equ 3 ; to prefetch 1x 64-bytes line (is good for most cases)
; NUM_PREFETCH_LEVELS equ 4 ; to prefetch 2x 64-bytes lines (better for big arrays)
acc equ x0
k equ r0
k_x equ x0
p equ r1
s equ r2
s_x equ x2
a0 equ x3
t0 equ a0
a3 equ x5
qq equ a3
a1 equ x6
t1 equ a1
t1_r equ r6
a2 equ x7
t2 equ a2
i equ r8
e0 equ x8
e1 equ x9
num_last equ r10
num_last_x equ x10
next4_lim equ r11
pref_lim equ r12
SORT_2_WITH_TEMP_REG macro b0, b1, temp_reg
mov temp_reg, b0
cmp b0, b1
cmovae b0, b1 ; min
cmovae b1, temp_reg ; max
endm
SORT macro b0, b1
SORT_2_WITH_TEMP_REG b0, b1, acc
endm
LOAD macro dest:req, index:req
mov dest, [p + 4 * index]
endm
STORE macro reg:req, index:req
mov [p + 4 * index], reg
endm
if (NUM_PREFETCH_LEVELS gt 3)
num_prefetches equ (1 SHL (NUM_PREFETCH_LEVELS - 3))
else
num_prefetches equ 1
endif
PREFETCH_OP macro offs
cur_offset = 7 * 4 ; it's average offset in 64-bytes cache line.
; cur_offset = 0 ; we can use zero offset, if we are sure that array is aligned for 64-bytes.
rept num_prefetches
if 1
prefetcht0 byte ptr [p + offs + cur_offset]
else
mov pref_x, dword ptr [p + offs + cur_offset]
endif
cur_offset = cur_offset + 64
endm
endm
PREFETCH_MY macro
if 1
if 1
shl k, NUM_PREFETCH_LEVELS + 3
else
; we delay prefetch instruction to improve main loads
shl k, NUM_PREFETCH_LEVELS
shl k, 3
; shl k, 0
endif
PREFETCH_OP k
elseif 1
shl k, 3
PREFETCH_OP k * (1 SHL NUM_PREFETCH_LEVELS) ; change it
endif
endm
STEP_1 macro exit_label, prefetch_macro
use_cmov_1 equ 1 ; set 1 for cmov, but it's slower in some cases
; set 0 for LOAD after adc s, 0
cmp t0, t1
if use_cmov_1
cmovb t0, t1
; STORE t0, k
endif
adc s, 0
if use_cmov_1 eq 0
LOAD t0, s
endif
cmp qq, t0
jae exit_label
if 1 ; use_cmov_1 eq 0
STORE t0, k
endif
prefetch_macro
mov t0, [p + s * 8]
mov t1, [p + s * 8 + 4]
mov k, s
add s, s ; slower for some cpus
; lea s, dword ptr [s + s] ; slower for some cpus
; shl s, 1 ; faster for some cpus
; lea s, dword ptr [s * 2] ; faster for some cpus
rept 0 ; 1000 for debug : 0 for normal
; number of calls in generate_stage : ~0.6 of number of items
shl k, 0
endm
endm
STEP_2 macro exit_label, prefetch_macro
use_cmov_2 equ 0 ; set 1 for cmov, but it's slower in some cases
; set 0 for LOAD after adc s, 0
cmp t0, t1
if use_cmov_2
mov t2, t0
cmovb t2, t1
; STORE t2, k
endif
mov t0, [p + s * 8]
mov t1, [p + s * 8 + 4]
cmovb t0, [p + s * 8 + 8]
cmovb t1, [p + s * 8 + 12]
adc s, 0
if use_cmov_2 eq 0
LOAD t2, s
endif
cmp qq, t2
jae exit_label
if 1 ; use_cmov_2 eq 0
STORE t2, k
endif
prefetch_macro
mov k, s
; add s, s
; lea s, [s + s]
shl s, 1
; lea s, [s * 2]
endm
MOVE_SMALLEST_UP macro STEP, use_prefetch, num_unrolls
LOCAL exit_1, exit_2, leaves, opt_loop, last_nodes
; s == k * 2
; t0 == (p)[s]
; t1 == (p)[s + 1]
cmp k, next4_lim
jae leaves
rept num_unrolls
STEP exit_2
cmp k, next4_lim
jae leaves
endm
if use_prefetch
prefetch_macro equ PREFETCH_MY
pref_lim_2 equ pref_lim
; lea pref_lim, dword ptr [num_last + 1]
; shr pref_lim, NUM_PREFETCH_LEVELS + 1
cmp k, pref_lim_2
jae last_nodes
else
prefetch_macro equ
pref_lim_2 equ next4_lim
endif
MY_ALIGN_16
opt_loop:
STEP exit_2, prefetch_macro
cmp k, pref_lim_2
jb opt_loop
last_nodes:
; k >= pref_lim_2
; 2 cases are possible:
; case-1: num_after_prefetch_levels == 0 && next4_lim = pref_lim_2
; case-2: num_after_prefetch_levels == NUM_PREFETCH_LEVELS - 1 &&
; next4_lim = pref_lim_2 / (NUM_PREFETCH_LEVELS - 1)
if use_prefetch
yyy = NUM_PREFETCH_LEVELS - 1
while yyy
yyy = yyy - 1
STEP exit_2
if yyy
cmp k, next4_lim
jae leaves
endif
endm
endif
leaves:
; k >= next4_lim == (num_last + 1) / 4 must be provided by previous code.
; we have 2 nodes in (s) level : always
; we can have some nodes in (s * 2) level : low probability case
; we have no nodes in (s * 4) level
; s == k * 2
; t0 == (p)[s]
; t1 == (p)[s + 1]
cmp t0, t1
cmovb t0, t1
adc s, 0
STORE t0, k
; t0 == (p)[s]
; s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1]
; we have 3 possible cases here:
; s * 2 > num_last : (s) node has no childs
; s * 2 == num_last : (s) node has 1 leaf child that is last item of array
; s * 2 < num_last : (s) node has 2 leaf childs. We provide (s * 4 > num_last)
; we check for (s * 2 > num_last) before "cmp qq, t0" check, because
; we will replace conditional jump with cmov instruction later.
lea t1_r, dword ptr [s + s]
cmp t1_r, num_last
ja exit_1 ; if (s * 2 > num_last), we have no childs : it's high probability branch
; it's low probability branch
; s * 2 <= num_last
cmp qq, t0
jae exit_2
; qq < t0, so we go to next level
; we check 1 or 2 childs in next level
mov t0, [p + s * 8]
mov k, s
mov s, t1_r
cmp t1_r, num_last
je @F ; (s == num_last) means that we have single child in tree
; (s < num_last) : so we must read both childs and select max of them.
mov t1, [p + k * 8 + 4]
cmp t0, t1
cmovb t0, t1
adc s, 0
@@:
STORE t0, k
exit_1:
; t0 == (p)[s], s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1]
cmp qq, t0
cmovb k, s
exit_2:
STORE qq, k
endm
ifdef Z7_SORT_ASM_USE_SEGMENT
; MY_ALIGN_64
else
MY_ALIGN_16
endif
MY_PROC HeapSort, 2
if (IS_LINUX gt 0)
mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux
endif
mov num_last, REG_ABI_PARAM_1 ; r10 <- r6 : linux
; r10 <- r2 : win64
cmp num_last, 2
jb end_1
; MY_PUSH_PRESERVED_ABI_REGS
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
push r12
cmp num_last, 4
ja sort_5
LOAD a0, 0
LOAD a1, 1
SORT a0, a1
cmp num_last, 3
jb end_2
LOAD a2, 2
je sort_3
LOAD a3, 3
SORT a2, a3
SORT a1, a3
STORE a3, 3
sort_3:
SORT a0, a2
SORT a1, a2
STORE a2, 2
jmp end_2
sort_5:
; (num_last > 4) is required here
; if (num_last >= 6) : we will use optimized loop for leaf nodes loop_down_1
mov next4_lim, num_last
shr next4_lim, 2
dec num_last
mov k, num_last
shr k, 1
mov i, num_last
shr i, 2
test num_last, 1
jnz size_even
; ODD number of items. So we compare parent with single child
LOAD t1, num_last
LOAD t0, k
SORT_2_WITH_TEMP_REG t1, t0, t2
STORE t1, num_last
STORE t0, k
dec k
size_even:
cmp k, i
jbe loop_down ; jump for num_last == 4 case
if 0 ; 1 for debug
mov r15, k
mov r14d, 1 ; 100
loop_benchmark:
endif
; optimized loop for leaf nodes:
mov t0, [p + k * 8]
mov t1, [p + k * 8 + 4]
MY_ALIGN_16
loop_down_1:
; we compare parent with max of childs:
; lea s, dword ptr [2 * k]
mov s, k
cmp t0, t1
cmovb t0, t1
adc s, s
LOAD t2, k
STORE t0, k
cmp t2, t0
cmovae s, k
dec k
; we preload next items before STORE operation for calculated address
mov t0, [p + k * 8]
mov t1, [p + k * 8 + 4]
STORE t2, s
cmp k, i
jne loop_down_1
if 0 ; 1 for debug
mov k, r15
dec r14d
jnz loop_benchmark
; jmp end_debug
endif
MY_ALIGN_16
loop_down:
mov t0, [p + i * 8]
mov t1, [p + i * 8 + 4]
LOAD qq, i
mov k, i
lea s, dword ptr [i + i]
; jmp end_debug
DOWN_use_prefetch equ 0
DOWN_num_unrolls equ 0
MOVE_SMALLEST_UP STEP_1, DOWN_use_prefetch, DOWN_num_unrolls
sub i, 1
jnb loop_down
; jmp end_debug
LOAD e0, 0
LOAD e1, 1
LEVEL_3_LIMIT equ 8 ; 8 is default, but 7 also can work
cmp num_last, LEVEL_3_LIMIT + 1
jb main_loop_sort_5
MY_ALIGN_16
main_loop_sort:
; num_last > LEVEL_3_LIMIT
; p[size--] = p[0];
LOAD qq, num_last
STORE e0, num_last
mov e0, e1
mov next4_lim, num_last
shr next4_lim, 2
mov pref_lim, num_last
shr pref_lim, NUM_PREFETCH_LEVELS + 1
dec num_last
if 0 ; 1 for debug
; that optional optimization can improve the performance, if there are identical items in array
; 3 times improvement : if all items in array are identical
; 20% improvement : if items are different for 1 bit only
; 1-10% improvement : if items are different for (2+) bits
; no gain : if items are different
cmp qq, e1
jae next_iter_main
endif
LOAD e1, 2
LOAD t0, 3
mov k_x, 2
cmp e1, t0
cmovb e1, t0
mov t0, [p + 4 * (4 + 0)]
mov t1, [p + 4 * (4 + 1)]
cmovb t0, [p + 4 * (4 + 2)]
cmovb t1, [p + 4 * (4 + 3)]
adc k_x, 0
; (qq <= e1), because the tree is correctly sorted
; also here we could check (qq >= e1) or (qq == e1) for faster exit
lea s, dword ptr [k + k]
MAIN_use_prefetch equ 1
MAIN_num_unrolls equ 0
MOVE_SMALLEST_UP STEP_2, MAIN_use_prefetch, MAIN_num_unrolls
next_iter_main:
cmp num_last, LEVEL_3_LIMIT
jne main_loop_sort
; num_last == LEVEL_3_LIMIT
main_loop_sort_5:
; 4 <= num_last <= LEVEL_3_LIMIT
; p[size--] = p[0];
LOAD qq, num_last
STORE e0, num_last
mov e0, e1
dec num_last_x
LOAD e1, 2
LOAD t0, 3
mov k_x, 2
cmp e1, t0
cmovb e1, t0
adc k_x, 0
lea s_x, dword ptr [k * 2]
cmp s_x, num_last_x
ja exit_2
mov t0, [p + k * 8]
je exit_1
; s < num_last
mov t1, [p + k * 8 + 4]
cmp t0, t1
cmovb t0, t1
adc s_x, 0
exit_1:
STORE t0, k
cmp qq, t0
cmovb k_x, s_x
exit_2:
STORE qq, k
cmp num_last_x, 3
jne main_loop_sort_5
; num_last == 3 (real_size == 4)
LOAD a0, 2
LOAD a1, 3
STORE e1, 2
STORE e0, 3
SORT a0, a1
end_2:
STORE a0, 0
STORE a1, 1
; end_debug:
; MY_POP_PRESERVED_ABI_REGS
pop r12
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
end_1:
MY_ENDP
else
; ------------ x86 32-bit ------------
ifdef x64
IS_CDECL = 0
endif
acc equ x0
k equ r0
k_x equ acc
p equ r1
num_last equ r2
num_last_x equ x2
a0 equ x3
t0 equ a0
a3 equ x5
i equ r5
e0 equ a3
a1 equ x6
qq equ a1
a2 equ x7
s equ r7
s_x equ a2
SORT macro b0, b1
cmp b1, b0
jae @F
if 1
xchg b0, b1
else
mov acc, b0
mov b0, b1 ; min
mov b1, acc ; max
endif
@@:
endm
LOAD macro dest:req, index:req
mov dest, [p + 4 * index]
endm
STORE macro reg:req, index:req
mov [p + 4 * index], reg
endm
STEP_1 macro exit_label
mov t0, [p + k * 8]
cmp t0, [p + k * 8 + 4]
adc s, 0
LOAD t0, s
STORE t0, k ; we lookahed stooring for most expected branch
cmp qq, t0
jae exit_label
; STORE t0, k ; use if
mov k, s
add s, s
; lea s, dword ptr [s + s]
; shl s, 1
; lea s, dword ptr [s * 2]
endm
STEP_BRANCH macro exit_label
mov t0, [p + k * 8]
cmp t0, [p + k * 8 + 4]
jae @F
inc s
mov t0, [p + k * 8 + 4]
@@:
cmp qq, t0
jae exit_label
STORE t0, k
mov k, s
add s, s
endm
MOVE_SMALLEST_UP macro STEP, num_unrolls, exit_2
LOCAL leaves, opt_loop, single
; s == k * 2
rept num_unrolls
cmp s, num_last
jae leaves
STEP_1 exit_2
endm
cmp s, num_last
jb opt_loop
leaves:
; (s >= num_last)
jne exit_2
single:
; (s == num_last)
mov t0, [p + k * 8]
cmp qq, t0
jae exit_2
STORE t0, k
mov k, s
jmp exit_2
MY_ALIGN_16
opt_loop:
STEP exit_2
cmp s, num_last
jb opt_loop
je single
exit_2:
STORE qq, k
endm
ifdef Z7_SORT_ASM_USE_SEGMENT
; MY_ALIGN_64
else
MY_ALIGN_16
endif
MY_PROC HeapSort, 2
ifdef x64
if (IS_LINUX gt 0)
mov num_last, REG_ABI_PARAM_1 ; r2 <- r6 : linux
mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux
endif
elseif (IS_CDECL gt 0)
mov num_last, [r4 + REG_SIZE * 2]
mov p, [r4 + REG_SIZE * 1]
endif
cmp num_last, 2
jb end_1
MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
cmp num_last, 4
ja sort_5
LOAD a0, 0
LOAD a1, 1
SORT a0, a1
cmp num_last, 3
jb end_2
LOAD a2, 2
je sort_3
LOAD a3, 3
SORT a2, a3
SORT a1, a3
STORE a3, 3
sort_3:
SORT a0, a2
SORT a1, a2
STORE a2, 2
jmp end_2
sort_5:
; num_last > 4
lea i, dword ptr [num_last - 2]
dec num_last
test i, 1
jz loop_down
; single child
mov t0, [p + num_last * 4]
mov qq, [p + num_last * 2]
dec i
cmp qq, t0
jae loop_down
mov [p + num_last * 2], t0
mov [p + num_last * 4], qq
MY_ALIGN_16
loop_down:
mov t0, [p + i * 4]
cmp t0, [p + i * 4 + 4]
mov k, i
mov qq, [p + i * 2]
adc k, 0
LOAD t0, k
cmp qq, t0
jae down_next
mov [p + i * 2], t0
lea s, dword ptr [k + k]
DOWN_num_unrolls equ 0
MOVE_SMALLEST_UP STEP_1, DOWN_num_unrolls, down_exit_label
down_next:
sub i, 2
jnb loop_down
; jmp end_debug
LOAD e0, 0
MY_ALIGN_16
main_loop_sort:
; num_last > 3
mov t0, [p + 2 * 4]
cmp t0, [p + 3 * 4]
LOAD qq, num_last
STORE e0, num_last
LOAD e0, 1
mov s_x, 2
mov k_x, 1
adc s, 0
LOAD t0, s
dec num_last
cmp qq, t0
jae main_exit_label
STORE t0, 1
mov k, s
add s, s
if 1
; for branch data prefetch mode :
; it's faster for large arrays : larger than (1 << 13) items.
MAIN_num_unrolls equ 10
STEP_LOOP equ STEP_BRANCH
else
MAIN_num_unrolls equ 0
STEP_LOOP equ STEP_1
endif
MOVE_SMALLEST_UP STEP_LOOP, MAIN_num_unrolls, main_exit_label
; jmp end_debug
cmp num_last, 3
jne main_loop_sort
; num_last == 3 (real_size == 4)
LOAD a0, 2
LOAD a1, 3
LOAD a2, 1
STORE e0, 3 ; e0 is alias for a3
STORE a2, 2
SORT a0, a1
end_2:
STORE a0, 0
STORE a1, 1
; end_debug:
MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11
end_1:
MY_ENDP
endif
ifdef Z7_SORT_ASM_USE_SEGMENT
_TEXT$Z7_SORT ENDS
endif
if 0
LEA_IS_D8 (R64) [R2 * 4 + 16]
Lat : TP
2 : 1 : adl-e
2 : 3 p056 adl-p
1 : 2 : p15 hsw-rocket
1 : 2 : p01 snb-ivb
1 : 1 : p1 conroe-wsm
1 : 4 : zen3,zen4
2 : 4 : zen1,zen2
LEA_B_IS (R64) [R2 + R3 * 4]
Lat : TP
1 : 1 : adl-e
2 : 3 p056 adl-p
1 : 2 : p15 hsw-rocket
1 : 2 : p01 snb-ivb
1 : 1 : p1 nhm-wsm
1 : 1 : p0 conroe-wsm
1 : 4 : zen3,zen4
2 :2,4 : zen1,zen2
LEA_B_IS_D8 (R64) [R2 + R3 * 4 + 16]
Lat : TP
2 : 1 : adl-e
2 : 3 p056 adl-p
1 : 2 : p15 ice-rocket
3 : 1 : p1/p15 hsw-rocket
3 : 1 : p01 snb-ivb
1 : 1 : p1 nhm-wsm
1 : 1 : p0 conroe-wsm
2,1 : 2 : zen3,zen4
2 : 2 : zen1,zen2
CMOVB (R64, R64)
Lat : TP
1,2 : 2 : adl-e
1 : 2 p06 adl-p
1 : 2 : p06 bwd-rocket
1,2 : 2 : p0156+p06 hsw
1,2 :1.5 : p015+p05 snb-ivb
1,2 : 1 : p015+p05 nhm
1 : 1 : 2*p015 conroe
1 : 2 : zen3,zen4
1 : 4 : zen1,zen2
ADC (R64, 0)
Lat : TP
1,2 : 2 : adl-e
1 : 2 p06 adl-p
1 : 2 : p06 bwd-rocket
1 :1.5 : p0156+p06 hsw
1 :1.5 : p015+p05 snb-ivb
2 : 1 : 2*p015 conroe-wstm
1 : 2 : zen1,zen2,zen3,zen4
PREFETCHNTA : fetch data into non-temporal cache close to the processor, minimizing cache pollution.
L1 : Pentium3
L2 : NetBurst
L1, not L2: Core duo, Core 2, Atom processors
L1, not L2, may fetch into L3 with fast replacement: Nehalem, Westmere, Sandy Bridge, ...
NEHALEM: Fills L1/L3, L1 LRU is not updated
L3 with fast replacement: Xeon Processors based on Nehalem, Westmere, Sandy Bridge, ...
PREFETCHT0 : fetch data into all cache levels.
PREFETCHT1 : fetch data into L2 and L3
endif
end

View file

@ -1,7 +1,7 @@
#define MY_VER_MAJOR 24
#define MY_VER_MINOR 9
#define MY_VER_MAJOR 25
#define MY_VER_MINOR 0
#define MY_VER_BUILD 0
#define MY_VERSION_NUMBERS "24.09"
#define MY_VERSION_NUMBERS "25.00"
#define MY_VERSION MY_VERSION_NUMBERS
#ifdef MY_CPU_NAME
@ -10,12 +10,12 @@
#define MY_VERSION_CPU MY_VERSION
#endif
#define MY_DATE "2024-11-29"
#define MY_DATE "2025-07-05"
#undef MY_COPYRIGHT
#undef MY_VERSION_COPYRIGHT_DATE
#define MY_AUTHOR_NAME "Igor Pavlov"
#define MY_COPYRIGHT_PD "Igor Pavlov : Public domain"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov"
#define MY_COPYRIGHT_CR "Copyright (c) 1999-2025 Igor Pavlov"
#ifdef USE_COPYRIGHT_CR
#define MY_COPYRIGHT MY_COPYRIGHT_CR

View file

@ -1,5 +1,5 @@
/* BwtSort.c -- BWT block sorting
2023-04-02 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -7,6 +7,44 @@
#include "Sort.h"
/* #define BLOCK_SORT_USE_HEAP_SORT */
// #define BLOCK_SORT_USE_HEAP_SORT
#ifdef BLOCK_SORT_USE_HEAP_SORT
#define HeapSortRefDown(p, vals, n, size, temp) \
{ size_t k = n; UInt32 val = vals[temp]; for (;;) { \
size_t s = k << 1; \
if (s > size) break; \
if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
if (val >= vals[p[s]]) break; \
p[k] = p[s]; k = s; \
} p[k] = temp; }
void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
{
if (size <= 1)
return;
p--;
{
size_t i = size / 2;
do
{
UInt32 temp = p[i];
HeapSortRefDown(p, vals, i, size, temp);
}
while (--i != 0);
}
do
{
UInt32 temp = p[size];
p[size--] = p[1];
HeapSortRefDown(p, vals, 1, size, temp);
}
while (size > 1);
}
#endif // BLOCK_SORT_USE_HEAP_SORT
/* Don't change it !!! */
#define kNumHashBytes 2
@ -27,26 +65,27 @@
#else
#define kNumBitsMax 20
#define kIndexMask ((1 << kNumBitsMax) - 1)
#define kNumExtraBits (32 - kNumBitsMax)
#define kNumExtra0Bits (kNumExtraBits - 2)
#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
#define kNumBitsMax 20
#define kIndexMask (((UInt32)1 << kNumBitsMax) - 1)
#define kNumExtraBits (32 - kNumBitsMax)
#define kNumExtra0Bits (kNumExtraBits - 2)
#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1)
#define SetFinishedGroupSize(p, size) \
{ *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
{ *(p) |= ((((UInt32)(size) - 1) & kNumExtra0Mask) << kNumBitsMax); \
if ((size) > (1 << kNumExtra0Bits)) { \
*(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \
*(p) |= 0x40000000; \
*((p) + 1) |= (((UInt32)(size) - 1) >> kNumExtra0Bits) << kNumBitsMax; } } \
static void SetGroupSize(UInt32 *p, UInt32 size)
static void SetGroupSize(UInt32 *p, size_t size)
{
if (--size == 0)
return;
*p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax);
*p |= 0x80000000 | (((UInt32)size & kNumExtra0Mask) << kNumBitsMax);
if (size >= (1 << kNumExtra0Bits))
{
*p |= 0x40000000;
p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax);
p[1] |= (((UInt32)size >> kNumExtra0Bits) << kNumBitsMax);
}
}
@ -59,12 +98,14 @@ returns: 1 - if there are groups, 0 - no more groups
*/
static
UInt32
unsigned
Z7_FASTCALL
SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, UInt32 left, UInt32 range
#endif
SortGroup(size_t BlockSize, size_t NumSortedBytes,
size_t groupOffset, size_t groupSize,
unsigned NumRefBits, UInt32 *Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, size_t left, size_t range
#endif
)
{
UInt32 *ind2 = Indices + groupOffset;
@ -79,90 +120,93 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
return 0;
}
Groups = Indices + BlockSize + BS_TEMP_SIZE;
if (groupSize <= ((UInt32)1 << NumRefBits)
#ifndef BLOCK_SORT_USE_HEAP_SORT
if (groupSize <= ((size_t)1 << NumRefBits)
#ifndef BLOCK_SORT_USE_HEAP_SORT
&& groupSize <= range
#endif
#endif
)
{
UInt32 *temp = Indices + BlockSize;
UInt32 j;
UInt32 mask, thereAreGroups, group, cg;
size_t j, group;
UInt32 mask, cg;
unsigned thereAreGroups;
{
UInt32 gPrev;
UInt32 gRes = 0;
{
UInt32 sp = ind2[0] + NumSortedBytes;
if (sp >= BlockSize) sp -= BlockSize;
size_t sp = ind2[0] + NumSortedBytes;
if (sp >= BlockSize)
sp -= BlockSize;
gPrev = Groups[sp];
temp[0] = (gPrev << NumRefBits);
temp[0] = gPrev << NumRefBits;
}
for (j = 1; j < groupSize; j++)
{
UInt32 sp = ind2[j] + NumSortedBytes;
size_t sp = ind2[j] + NumSortedBytes;
UInt32 g;
if (sp >= BlockSize) sp -= BlockSize;
if (sp >= BlockSize)
sp -= BlockSize;
g = Groups[sp];
temp[j] = (g << NumRefBits) | j;
temp[j] = (g << NumRefBits) | (UInt32)j;
gRes |= (gPrev ^ g);
}
if (gRes == 0)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
#endif
return 1;
}
}
HeapSort(temp, groupSize);
mask = (((UInt32)1 << NumRefBits) - 1);
mask = ((UInt32)1 << NumRefBits) - 1;
thereAreGroups = 0;
group = groupOffset;
cg = (temp[0] >> NumRefBits);
cg = temp[0] >> NumRefBits;
temp[0] = ind2[temp[0] & mask];
{
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags = Groups + BlockSize;
#else
UInt32 prevGroupStart = 0;
#endif
#else
size_t prevGroupStart = 0;
#endif
for (j = 1; j < groupSize; j++)
{
UInt32 val = temp[j];
UInt32 cgCur = (val >> NumRefBits);
const UInt32 val = temp[j];
const UInt32 cgCur = val >> NumRefBits;
if (cgCur != cg)
{
cg = cgCur;
group = groupOffset + j;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = group - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
const size_t t = group - 1;
Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
}
#else
#else
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
prevGroupStart = j;
#endif
#endif
}
else
thereAreGroups = 1;
{
UInt32 ind = ind2[val & mask];
temp[j] = ind;
Groups[ind] = group;
const UInt32 ind = ind2[val & mask];
temp[j] = ind;
Groups[ind] = (UInt32)group;
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(temp + prevGroupStart, j - prevGroupStart);
#endif
#endif
}
for (j = 0; j < groupSize; j++)
@ -172,37 +216,42 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
/* Check that all strings are in one group (cannot sort) */
{
UInt32 group, j;
UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
UInt32 group;
size_t j;
size_t sp = ind2[0] + NumSortedBytes;
if (sp >= BlockSize)
sp -= BlockSize;
group = Groups[sp];
for (j = 1; j < groupSize; j++)
{
sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
sp = ind2[j] + NumSortedBytes;
if (sp >= BlockSize)
sp -= BlockSize;
if (Groups[sp] != group)
break;
}
if (j == groupSize)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
#endif
return 1;
}
}
#ifndef BLOCK_SORT_USE_HEAP_SORT
#ifndef BLOCK_SORT_USE_HEAP_SORT
{
/* ---------- Range Sort ---------- */
UInt32 i;
UInt32 mid;
size_t i;
size_t mid;
for (;;)
{
UInt32 j;
size_t j;
if (range <= 1)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2, groupSize);
#endif
#endif
return 1;
}
mid = left + ((range + 1) >> 1);
@ -210,7 +259,7 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
i = 0;
do
{
UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
size_t sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
if (Groups[sp] >= mid)
{
for (j--; j > i; j--)
@ -238,51 +287,53 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
break;
}
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = (groupOffset + i - 1);
const size_t t = groupOffset + i - 1;
UInt32 *Flags = Groups + BlockSize;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
}
#endif
#endif
{
UInt32 j;
size_t j;
for (j = i; j < groupSize; j++)
Groups[ind2[j]] = groupOffset + i;
Groups[ind2[j]] = (UInt32)(groupOffset + i);
}
{
UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
unsigned res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left);
return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left));
}
}
#else
#else // BLOCK_SORT_USE_HEAP_SORT
/* ---------- Heap Sort ---------- */
{
UInt32 j;
size_t j;
for (j = 0; j < groupSize; j++)
{
UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize;
ind2[j] = sp;
size_t sp = ind2[j] + NumSortedBytes;
if (sp >= BlockSize)
sp -= BlockSize;
ind2[j] = (UInt32)sp;
}
HeapSortRef(ind2, Groups, groupSize);
/* Write Flags */
{
UInt32 sp = ind2[0];
size_t sp = ind2[0];
UInt32 group = Groups[sp];
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags = Groups + BlockSize;
#else
UInt32 prevGroupStart = 0;
#endif
#else
size_t prevGroupStart = 0;
#endif
for (j = 1; j < groupSize; j++)
{
@ -290,149 +341,210 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr
if (Groups[sp] != group)
{
group = Groups[sp];
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 t = groupOffset + j - 1;
const size_t t = groupOffset + j - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
}
#else
#else
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
prevGroupStart = j;
#endif
#endif
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart);
#endif
#endif
}
{
/* Write new Groups values and Check that there are groups */
UInt32 thereAreGroups = 0;
unsigned thereAreGroups = 0;
for (j = 0; j < groupSize; j++)
{
UInt32 group = groupOffset + j;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
size_t group = groupOffset + j;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax);
if ((ind2[j] & 0x40000000) != 0)
if (ind2[j] & 0x40000000)
subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits);
subGroupSize++;
for (;;)
{
UInt32 original = ind2[j];
UInt32 sp = original & kIndexMask;
if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
ind2[j] = sp | (original & ~kIndexMask);
Groups[sp] = group;
const UInt32 original = ind2[j];
size_t sp = original & kIndexMask;
if (sp < NumSortedBytes)
sp += BlockSize;
sp -= NumSortedBytes;
ind2[j] = (UInt32)sp | (original & ~kIndexMask);
Groups[sp] = (UInt32)group;
if (--subGroupSize == 0)
break;
j++;
thereAreGroups = 1;
}
#else
#else
UInt32 *Flags = Groups + BlockSize;
for (;;)
{
UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes;
ind2[j] = sp;
Groups[sp] = group;
size_t sp = ind2[j];
if (sp < NumSortedBytes)
sp += BlockSize;
sp -= NumSortedBytes;
ind2[j] = (UInt32)sp;
Groups[sp] = (UInt32)group;
if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0)
break;
j++;
thereAreGroups = 1;
}
#endif
#endif
}
return thereAreGroups;
}
}
#endif
#endif // BLOCK_SORT_USE_HEAP_SORT
}
/* conditions: blockSize > 0 */
UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
UInt32 BlockSort(UInt32 *Indices, const Byte *data, size_t blockSize)
{
UInt32 *counters = Indices + blockSize;
UInt32 i;
size_t i;
UInt32 *Groups;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 *Flags;
#endif
#endif
/* Radix-Sort for 2 bytes */
/* Radix-Sort for 2 bytes */
// { UInt32 yyy; for (yyy = 0; yyy < 100; yyy++) {
for (i = 0; i < kNumHashValues; i++)
counters[i] = 0;
for (i = 0; i < blockSize - 1; i++)
counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++;
counters[((UInt32)data[i] << 8) | data[0]]++;
{
const Byte *data2 = data;
size_t a = data[(size_t)blockSize - 1];
const Byte *data_lim = data + blockSize;
if (blockSize >= 4)
{
data_lim -= 3;
do
{
size_t b;
b = data2[0]; counters[(a << 8) | b]++;
a = data2[1]; counters[(b << 8) | a]++;
b = data2[2]; counters[(a << 8) | b]++;
a = data2[3]; counters[(b << 8) | a]++;
data2 += 4;
}
while (data2 < data_lim);
data_lim += 3;
}
while (data2 != data_lim)
{
size_t b = *data2++;
counters[(a << 8) | b]++;
a = b;
}
}
// }}
Groups = counters + BS_TEMP_SIZE;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
Flags = Groups + blockSize;
{
UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
for (i = 0; i < numWords; i++)
Flags[i] = kAllFlags;
}
#endif
{
const size_t numWords = (blockSize + kFlagsMask) >> kNumFlagsBits;
for (i = 0; i < numWords; i++)
Flags[i] = kAllFlags;
}
#endif
{
UInt32 sum = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 groupSize = counters[i];
if (groupSize > 0)
const UInt32 groupSize = counters[i];
counters[i] = sum;
sum += groupSize;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
if (groupSize)
{
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 t = sum + groupSize - 1;
Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask));
#endif
sum += groupSize;
const UInt32 t = sum - 1;
Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask));
}
counters[i] = sum - groupSize;
#endif
}
}
for (i = 0; i < blockSize - 1; i++)
Groups[i] = counters[((unsigned)data[i] << 8) | data[(size_t)i + 1]];
Groups[i] = counters[((unsigned)data[i] << 8) | data[0]];
{
#define SET_Indices(a, b, i) \
{ UInt32 c; \
a = (a << 8) | (b); \
c = counters[a]; \
Indices[c] = (UInt32)i++; \
counters[a] = c + 1; \
}
for (i = 0; i < blockSize - 1; i++)
Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]];
Groups[i] = counters[((UInt32)data[i] << 8) | data[0]];
for (i = 0; i < blockSize - 1; i++)
Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i;
Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
size_t a = data[0];
const Byte *data_ptr = data + 1;
i = 0;
if (blockSize >= 3)
{
blockSize -= 2;
do
{
size_t b;
b = data_ptr[0]; SET_Indices(a, b, i)
a = data_ptr[1]; SET_Indices(b, a, i)
data_ptr += 2;
}
while (i < blockSize);
blockSize += 2;
}
if (i < blockSize - 1)
{
SET_Indices(a, data[(size_t)i + 1], i)
a = (Byte)a;
}
SET_Indices(a, data[0], i)
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
{
UInt32 prev = 0;
for (i = 0; i < kNumHashValues; i++)
{
UInt32 prevGroupSize = counters[i] - prev;
const UInt32 prevGroupSize = counters[i] - prev;
if (prevGroupSize == 0)
continue;
SetGroupSize(Indices + prev, prevGroupSize);
prev = counters[i];
}
}
#endif
}
#endif
{
int NumRefBits;
UInt32 NumSortedBytes;
for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++);
unsigned NumRefBits;
size_t NumSortedBytes;
for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++)
{}
NumRefBits = 32 - NumRefBits;
if (NumRefBits > kNumRefBitsMax)
NumRefBits = kNumRefBitsMax;
NumRefBits = kNumRefBitsMax;
for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1)
{
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
UInt32 finishedGroupSize = 0;
#endif
UInt32 newLimit = 0;
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
size_t finishedGroupSize = 0;
#endif
size_t newLimit = 0;
for (i = 0; i < blockSize;)
{
UInt32 groupSize;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
size_t groupSize;
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0)
{
@ -441,56 +553,56 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
}
for (groupSize = 1;
(Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0;
groupSize++);
groupSize++)
{}
groupSize++;
#else
#else
groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax;
{
BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
if ((Indices[i] & 0x40000000) != 0)
{
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
Indices[(size_t)i + 1] &= kIndexMask;
}
Indices[i] &= kIndexMask;
groupSize++;
if (finishedGroup || groupSize == 1)
{
Indices[i - finishedGroupSize] &= kIndexMask;
if (finishedGroupSize > 1)
Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
const BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0);
if (Indices[i] & 0x40000000)
{
UInt32 newGroupSize = groupSize + finishedGroupSize;
SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
finishedGroupSize = newGroupSize;
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
Indices[(size_t)i + 1] &= kIndexMask;
}
i += groupSize;
continue;
}
finishedGroupSize = 0;
Indices[i] &= kIndexMask;
groupSize++;
if (finishedGroup || groupSize == 1)
{
Indices[i - finishedGroupSize] &= kIndexMask;
if (finishedGroupSize > 1)
Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask;
{
const size_t newGroupSize = groupSize + finishedGroupSize;
SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize)
finishedGroupSize = newGroupSize;
}
i += groupSize;
continue;
}
finishedGroupSize = 0;
}
#endif
#endif
if (NumSortedBytes >= blockSize)
{
UInt32 j;
size_t j;
for (j = 0; j < groupSize; j++)
{
UInt32 t = (i + j);
size_t t = i + j;
/* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */
Groups[Indices[t]] = t;
Groups[Indices[t]] = (UInt32)t;
}
}
else
if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices
#ifndef BLOCK_SORT_USE_HEAP_SORT
, 0, blockSize
#endif
) != 0)
#ifndef BLOCK_SORT_USE_HEAP_SORT
, 0, blockSize
#endif
))
newLimit = i + groupSize;
i += groupSize;
}
@ -498,19 +610,19 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize)
break;
}
}
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
#ifndef BLOCK_SORT_EXTERNAL_FLAGS
for (i = 0; i < blockSize;)
{
UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax);
if ((Indices[i] & 0x40000000) != 0)
size_t groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax;
if (Indices[i] & 0x40000000)
{
groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits);
groupSize += (Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits;
Indices[(size_t)i + 1] &= kIndexMask;
}
Indices[i] &= kIndexMask;
groupSize++;
i += groupSize;
}
#endif
#endif
return Groups[0];
}

View file

@ -1,5 +1,5 @@
/* BwtSort.h -- BWT block sorting
2023-03-03 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_BWT_SORT_H
#define ZIP7_INC_BWT_SORT_H
@ -10,16 +10,17 @@ EXTERN_C_BEGIN
/* use BLOCK_SORT_EXTERNAL_FLAGS if blockSize can be > 1M */
/* #define BLOCK_SORT_EXTERNAL_FLAGS */
// #define BLOCK_SORT_EXTERNAL_FLAGS
#ifdef BLOCK_SORT_EXTERNAL_FLAGS
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) ((((blockSize) + 31) >> 5))
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) (((blockSize) + 31) >> 5)
#else
#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) 0
#endif
#define BLOCK_SORT_BUF_SIZE(blockSize) ((blockSize) * 2 + BLOCK_SORT_EXTERNAL_SIZE(blockSize) + (1 << 16))
UInt32 BlockSort(UInt32 *indices, const Byte *data, UInt32 blockSize);
UInt32 BlockSort(UInt32 *indices, const Byte *data, size_t blockSize);
EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* Compiler.h : Compiler specific defines and pragmas
2024-01-22 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_COMPILER_H
#define ZIP7_INC_COMPILER_H
@ -183,6 +183,16 @@ typedef void (*Z7_void_Function)(void);
#define Z7_ATTRIB_NO_VECTORIZE
#endif
#if defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1920)
#define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE _Pragma("optimize ( \"s\", on )")
#define Z7_PRAGMA_OPTIMIZE_DEFAULT _Pragma("optimize ( \"\", on )")
#else
#define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE
#define Z7_PRAGMA_OPTIMIZE_DEFAULT
#endif
#if defined(MY_CPU_X86_OR_AMD64) && ( \
defined(__clang__) && (__clang_major__ >= 4) \
|| defined(__GNUC__) && (__GNUC__ >= 5))

View file

@ -47,6 +47,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers.
#define MY_CPU_SIZEOF_POINTER 4
#endif
#if defined(__SSE2__) \
|| defined(MY_CPU_AMD64) \
|| defined(_M_IX86_FP) && (_M_IX86_FP >= 2)
#define MY_CPU_SSE2
#endif
#if defined(_M_ARM64) \
|| defined(_M_ARM64EC) \
@ -571,10 +577,12 @@ problem-4 : performace:
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_NATIVE_TO_BE_32(v) (v)
// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b1) | ((b0) << 8))
#elif defined(MY_CPU_LE)
#define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v)
#define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v)
#define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v)
// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b0) | ((b1) << 8))
#else
#error Stop_Compiling_Unknown_Endian_CONV
#endif

View file

@ -1,60 +1,125 @@
/* HuffEnc.c -- functions for Huffman encoding
2023-09-07 : Igor Pavlov : Public domain */
Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "HuffEnc.h"
#include "Sort.h"
#include "CpuArch.h"
#define kMaxLen 16
#define NUM_BITS 10
#define MASK ((1u << NUM_BITS) - 1)
#define kMaxLen Z7_HUFFMAN_LEN_MAX
#define NUM_BITS 10
#define MASK ((1u << NUM_BITS) - 1)
#define FREQ_MASK (~(UInt32)MASK)
#define NUM_COUNTERS (48 * 2)
#define NUM_COUNTERS 64
#if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE))
#if defined(MY_CPU_LE)
#define HI_HALF_OFFSET 1
#else
#define HI_HALF_OFFSET 0
#endif
#define LOAD_PARENT(p) ((unsigned)*((const UInt16 *)(p) + HI_HALF_OFFSET))
#define STORE_PARENT(p, fb, val) *((UInt16 *)(p) + HI_HALF_OFFSET) = (UInt16)(val);
#define STORE_PARENT_DIRECT(p, fb, hi) STORE_PARENT(p, fb, hi)
#define UPDATE_E(eHi) eHi++;
#else
#define LOAD_PARENT(p) ((unsigned)(*(p) >> NUM_BITS))
#define STORE_PARENT_DIRECT(p, fb, hi) *(p) = ((fb) & MASK) | (hi); // set parent field
#define STORE_PARENT(p, fb, val) STORE_PARENT_DIRECT(p, fb, ((UInt32)(val) << NUM_BITS))
#define UPDATE_E(eHi) eHi += 1 << NUM_BITS;
#endif
#define HUFFMAN_SPEED_OPT
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen)
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSymbols, unsigned maxLen)
{
UInt32 num = 0;
/* if (maxLen > 10) maxLen = 10; */
#if NUM_COUNTERS > 2
unsigned counters[NUM_COUNTERS];
#endif
#if 1 && NUM_COUNTERS > (kMaxLen + 4) * 2
#define lenCounters (counters)
#define codes (counters + kMaxLen + 4)
#else
unsigned lenCounters[kMaxLen + 1];
UInt32 codes[kMaxLen + 1];
#endif
unsigned num;
{
UInt32 i;
unsigned i;
// UInt32 sum = 0;
#if NUM_COUNTERS > 2
#ifdef HUFFMAN_SPEED_OPT
UInt32 counters[NUM_COUNTERS];
#define CTR_ITEM_FOR_FREQ(freq) \
counters[(freq) >= NUM_COUNTERS - 1 ? NUM_COUNTERS - 1 : (unsigned)(freq)]
for (i = 0; i < NUM_COUNTERS; i++)
counters[i] = 0;
for (i = 0; i < numSymbols; i++)
memset(lens, 0, numSymbols);
{
UInt32 freq = freqs[i];
counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++;
const UInt32 *fp = freqs + numSymbols;
#define NUM_UNROLLS 1
#if NUM_UNROLLS > 1 // use 1 if odd (numSymbols) is possisble
if (numSymbols & 1)
{
UInt32 f;
f = *--fp; CTR_ITEM_FOR_FREQ(f)++;
// sum += f;
}
#endif
do
{
UInt32 f;
fp -= NUM_UNROLLS;
f = fp[0]; CTR_ITEM_FOR_FREQ(f)++;
// sum += f;
#if NUM_UNROLLS > 1
f = fp[1]; CTR_ITEM_FOR_FREQ(f)++;
// sum += f;
#endif
}
while (fp != freqs);
}
for (i = 1; i < NUM_COUNTERS; i++)
#if 0
printf("\nsum=%8u numSymbols =%3u ctrs:", sum, numSymbols);
{
UInt32 temp = counters[i];
counters[i] = num;
num += temp;
unsigned k = 0;
for (k = 0; k < NUM_COUNTERS; k++)
printf(" %u", counters[k]);
}
for (i = 0; i < numSymbols; i++)
#endif
num = counters[1];
counters[1] = 0;
for (i = 2; i != NUM_COUNTERS; i += 2)
{
UInt32 freq = freqs[i];
if (freq == 0)
lens[i] = 0;
else
p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS);
unsigned c;
c = (counters )[i]; (counters )[i] = num; num += c;
c = (counters + 1)[i]; (counters + 1)[i] = num; num += c;
}
counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array
{
i = 0;
do
{
const UInt32 f = freqs[i];
#if 0
if (f == 0) lens[i] = 0; else
#endif
p[CTR_ITEM_FOR_FREQ(f)++] = i | (f << NUM_BITS);
}
while (++i != numSymbols);
}
counters[0] = 0;
HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]);
#else
#else // NUM_COUNTERS <= 2
num = 0;
for (i = 0; i < numSymbols; i++)
{
UInt32 freq = freqs[i];
const UInt32 freq = freqs[i];
if (freq == 0)
lens[i] = 0;
else
@ -62,17 +127,27 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
}
HeapSort(p, num);
#endif
#endif
}
if (num < 2)
if (num <= 2)
{
unsigned minCode = 0;
unsigned maxCode = 1;
if (num == 1)
if (num)
{
maxCode = (unsigned)p[0] & MASK;
if (maxCode == 0)
maxCode = (unsigned)p[(size_t)num - 1] & MASK;
if (num == 2)
{
minCode = (unsigned)p[0] & MASK;
if (minCode > maxCode)
{
const unsigned temp = minCode;
minCode = maxCode;
maxCode = temp;
}
}
else if (maxCode == 0)
maxCode++;
}
p[minCode] = 0;
@ -80,75 +155,206 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb
lens[minCode] = lens[maxCode] = 1;
return;
}
{
UInt32 b, e, i;
i = b = e = 0;
do
unsigned i;
for (i = 0; i <= kMaxLen; i++)
lenCounters[i] = 0;
lenCounters[1] = 2; // by default root node has 2 child leaves at level 1.
}
// if (num != 2)
{
// num > 2
// the binary tree will contain (num - 1) internal nodes.
// p[num - 2] will be root node of binary tree.
UInt32 *b;
UInt32 *n;
// first node will have two leaf childs: p[0] and p[1]:
// p[0] += p[1] & FREQ_MASK; // set frequency sum of child leafs
// if (pi == n) exit(0);
// if (pi != n)
{
UInt32 n, m, freq;
n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
freq = (p[n] & ~MASK);
p[n] = (p[n] & MASK) | (e << NUM_BITS);
m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++;
freq += (p[m] & ~MASK);
p[m] = (p[m] & MASK) | (e << NUM_BITS);
p[e] = (p[e] & MASK) | freq;
e++;
}
while (num - e > 1);
{
UInt32 lenCounters[kMaxLen + 1];
for (i = 0; i <= kMaxLen; i++)
lenCounters[i] = 0;
p[--e] &= MASK;
lenCounters[1] = 2;
while (e != 0)
UInt32 fb = (p[1] & FREQ_MASK) + p[0];
UInt32 f = p[2] & FREQ_MASK;
const UInt32 *pi = p + 2;
UInt32 *e = p;
UInt32 eHi = 0;
n = p + num;
b = p;
// p[0] = fb;
for (;;)
{
UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1;
p[e] = (p[e] & MASK) | (len << NUM_BITS);
if (len >= maxLen)
for (len = maxLen - 1; lenCounters[len] == 0; len--);
lenCounters[len]--;
lenCounters[(size_t)len + 1] += 2;
}
{
UInt32 len;
i = 0;
for (len = maxLen; len != 0; len--)
{
UInt32 k;
for (k = lenCounters[len]; k != 0; k--)
lens[p[i++] & MASK] = (Byte)len;
}
}
{
UInt32 nextCodes[kMaxLen + 1];
{
UInt32 code = 0;
UInt32 len;
for (len = 1; len <= kMaxLen; len++)
nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1;
}
/* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
// (b <= e)
UInt32 sum;
e++;
UPDATE_E(eHi)
// (b < e)
// p range : high bits
// [0, b) : parent : processed nodes that have parent and childs
// [b, e) : FREQ : non-processed nodes that have no parent but have childs
// [e, pi) : FREQ : processed leaves for which parent node was created
// [pi, n) : FREQ : non-processed leaves for which parent node was not created
// first child
// note : (*b < f) is same result as ((*b & FREQ_MASK) < f)
if (fb < f)
{
UInt32 k;
for (k = 0; k < numSymbols; k++)
p[k] = nextCodes[lens[k]]++;
// node freq is smaller
sum = fb & FREQ_MASK;
STORE_PARENT_DIRECT (b, fb, eHi)
b++;
fb = *b;
if (b == e)
{
if (++pi == n)
break;
sum += f;
fb &= MASK;
fb |= sum;
*e = fb;
f = *pi & FREQ_MASK;
continue;
}
}
else if (++pi == n)
{
STORE_PARENT_DIRECT (b, fb, eHi)
b++;
break;
}
else
{
sum = f;
f = *pi & FREQ_MASK;
}
// (b < e)
// second child
if (fb < f)
{
sum += fb;
sum &= FREQ_MASK;
STORE_PARENT_DIRECT (b, fb, eHi)
b++;
*e = (*e & MASK) | sum; // set frequency sum
// (b <= e) is possible here
fb = *b;
}
else if (++pi == n)
break;
else
{
sum += f;
f = *pi & FREQ_MASK;
*e = (*e & MASK) | sum; // set frequency sum
}
}
}
// printf("\nnum-e=%3u, numSymbols=%3u, num=%3u, b=%3u", n - e, numSymbols, n - p, b - p);
{
n -= 2;
*n &= MASK; // root node : we clear high bits (zero bits mean level == 0)
if (n != b)
{
// We go here, if we have some number of non-created nodes up to root.
// We process them in simplified code:
// position of parent for each pair of nodes is known.
// n[-2], n[-1] : current pair of child nodes
// (p1) : parent node for current pair.
UInt32 *p1 = n;
do
{
const unsigned len = LOAD_PARENT(p1) + 1;
p1--;
(lenCounters )[len] -= 2; // we remove 2 leaves from level (len)
(lenCounters + 1)[len] += 2 * 2; // we add 4 leaves at level (len + 1)
n -= 2;
STORE_PARENT (n , n[0], len)
STORE_PARENT (n + 1, n[1], len)
}
while (n != b);
}
}
if (b != p)
{
// we detect level of each node (realtive to root),
// and update lenCounters[].
// We process only intermediate nodes and we don't process leaves.
do
{
// if (ii < b) : parent_bits_of (p[ii]) == index of parent node : ii < (p[ii])
// if (ii >= b) : parent_bits_of (p[ii]) == level of this (ii) node in tree
unsigned len;
b--;
len = (unsigned)LOAD_PARENT(p + LOAD_PARENT(b)) + 1;
STORE_PARENT (b, *b, len)
if (len >= maxLen)
{
// We are not allowed to create node at level (maxLen) and higher,
// because all leaves must be placed to level (maxLen) or lower.
// We find nearest allowed leaf and place current node to level of that leaf:
for (len = maxLen - 1; lenCounters[len] == 0; len--) {}
}
lenCounters[len]--; // we remove 1 leaf from level (len)
(lenCounters + 1)[len] += 2; // we add 2 leaves at level (len + 1)
}
while (b != p);
}
}
{
{
unsigned len = maxLen;
const UInt32 *p2 = p;
do
{
unsigned k = lenCounters[len];
if (k)
do
lens[(unsigned)*p2++ & MASK] = (Byte)len;
while (--k);
}
while (--len);
}
codes[0] = 0; // we don't want garbage values to be written to p[] array.
// codes[1] = 0;
{
UInt32 code = 0;
unsigned len;
for (len = 0; len < kMaxLen; len++)
(codes + 1)[len] = code = (code + lenCounters[len]) << 1;
}
/* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */
{
const Byte * const limit = lens + numSymbols;
do
{
unsigned len;
UInt32 c;
len = lens[0]; c = codes[len]; p[0] = c; codes[len] = c + 1;
// len = lens[1]; c = codes[len]; p[1] = c; codes[len] = c + 1;
p += 1;
lens += 1;
}
while (lens != limit);
}
}
}
#undef kMaxLen
#undef NUM_BITS
#undef MASK
#undef FREQ_MASK
#undef NUM_COUNTERS
#undef HUFFMAN_SPEED_OPT
#undef CTR_ITEM_FOR_FREQ
#undef LOAD_PARENT
#undef STORE_PARENT
#undef STORE_PARENT_DIRECT
#undef UPDATE_E
#undef HI_HALF_OFFSET
#undef NUM_UNROLLS
#undef lenCounters
#undef codes

View file

@ -1,5 +1,5 @@
/* HuffEnc.h -- Huffman encoding
2023-03-05 : Igor Pavlov : Public domain */
Igor Pavlov : Public domain */
#ifndef ZIP7_INC_HUFF_ENC_H
#define ZIP7_INC_HUFF_ENC_H
@ -8,14 +8,14 @@
EXTERN_C_BEGIN
#define Z7_HUFFMAN_LEN_MAX 16
/*
Conditions:
num <= 1024 = 2 ^ NUM_BITS
2 <= num <= 1024 = 2 ^ NUM_BITS
Sum(freqs) < 4M = 2 ^ (32 - NUM_BITS)
maxLen <= 16 = kMaxLen
1 <= maxLen <= 16 = Z7_HUFFMAN_LEN_MAX
Num_Items(p) >= HUFFMAN_TEMP_SIZE(num)
*/
void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen);
EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* LzFind.c -- Match finder for LZ algorithms
2024-03-01 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -404,7 +404,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
const unsigned nbMax =
(p->numHashBytes == 2 ? 16 :
(p->numHashBytes == 3 ? 24 : 32));
if (numBits > nbMax)
if (numBits >= nbMax)
numBits = nbMax;
if (numBits >= 32)
hs = (UInt32)0 - 1;
@ -416,14 +416,14 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
hs |= (256 << kLzHash_CrcShift_2) - 1;
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize);
if (hs > hs2)
if (hs >= hs2)
hs = hs2;
}
hsCur = hs;
if (p->expectedDataSize < historySize)
{
const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize);
if (hsCur > hs2)
if (hsCur >= hs2)
hsCur = hs2;
}
}
@ -434,7 +434,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
if (p->expectedDataSize < historySize)
{
hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize);
if (hsCur > hs) // is it possible?
if (hsCur >= hs) // is it possible?
hsCur = hs;
}
}
@ -890,7 +890,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
return d;
{
const Byte *pb = cur - delta;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
if (pb[maxLen] == cur[maxLen] && *pb == *cur)
{
UInt32 len = 0;
@ -925,7 +925,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos,
break;
{
ptrdiff_t diff;
curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)];
diff = (ptrdiff_t)0 - (ptrdiff_t)delta;
if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff])
{
@ -972,7 +972,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
// if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; }
cmCheck = (UInt32)(pos - _cyclicBufferSize);
if ((UInt32)pos <= _cyclicBufferSize)
if ((UInt32)pos < _cyclicBufferSize)
cmCheck = 0;
if (cmCheck < curMatch)
@ -980,7 +980,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt
{
const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
const UInt32 pair0 = pair[0];
@ -1039,7 +1039,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
UInt32 cmCheck;
cmCheck = (UInt32)(pos - _cyclicBufferSize);
if ((UInt32)pos <= _cyclicBufferSize)
if ((UInt32)pos < _cyclicBufferSize)
cmCheck = 0;
if (// curMatch >= pos || // failure
@ -1048,7 +1048,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const
{
const UInt32 delta = pos - curMatch;
{
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1);
const Byte *pb = cur - delta;
unsigned len = (len0 < len1 ? len0 : len1);
if (pb[len] == cur[len])
@ -1595,7 +1595,7 @@ static void Bt5_MatchFinder_Skip(void *_p, UInt32 num)
UInt32 pos = p->pos; \
UInt32 num2 = num; \
/* (p->pos == p->posLimit) is not allowed here !!! */ \
{ const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \
{ const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \
num -= num2; \
{ const UInt32 cycPos = p->cyclicBufferPos; \
son = p->son + cycPos; \

View file

@ -1,5 +1,5 @@
/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2024-01-22 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -82,6 +82,8 @@ extern UInt64 g_NumIters_Bytes;
Z7_NO_INLINE
static void MtSync_Construct(CMtSync *p)
{
p->affinityGroup = -1;
p->affinityInGroup = 0;
p->affinity = 0;
p->wasCreated = False;
p->csWasInitialized = False;
@ -259,6 +261,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *
// return ERROR_TOO_MANY_POSTS; // for debug
// return EINVAL; // for debug
#ifdef _WIN32
if (p->affinityGroup >= 0)
wres = Thread_Create_With_Group(&p->thread, startAddress, obj,
(unsigned)(UInt32)p->affinityGroup, (CAffinityMask)p->affinityInGroup);
else
#endif
if (p->affinity != 0)
wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
else

View file

@ -1,5 +1,5 @@
/* LzFindMt.h -- multithreaded Match finder for LZ algorithms
2024-01-22 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZ_FIND_MT_H
#define ZIP7_INC_LZ_FIND_MT_H
@ -12,8 +12,10 @@ EXTERN_C_BEGIN
typedef struct
{
UInt32 numProcessedBlocks;
CThread thread;
Int32 affinityGroup;
UInt64 affinityInGroup;
UInt64 affinity;
CThread thread;
BoolInt wasCreated;
BoolInt needStart;

View file

@ -1,5 +1,5 @@
/* Lzma2Enc.c -- LZMA2 Encoder
2023-04-13 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -235,6 +235,7 @@ void Lzma2EncProps_Init(CLzma2EncProps *p)
p->numBlockThreads_Reduced = -1;
p->numBlockThreads_Max = -1;
p->numTotalThreads = -1;
p->numThreadGroups = 0;
}
void Lzma2EncProps_Normalize(CLzma2EncProps *p)
@ -781,6 +782,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle p,
}
p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max;
p->mtCoder.numThreadGroups = p->props.numThreadGroups;
p->mtCoder.expectedDataSize = p->expectedDataSize;
{

View file

@ -18,6 +18,7 @@ typedef struct
int numBlockThreads_Reduced;
int numBlockThreads_Max;
int numTotalThreads;
unsigned numThreadGroups; // 0 : no groups
} CLzma2EncProps;
void Lzma2EncProps_Init(CLzma2EncProps *p);

View file

@ -62,7 +62,9 @@ void LzmaEncProps_Init(CLzmaEncProps *p)
p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
p->numHashOutBits = 0;
p->writeEndMark = 0;
p->affinityGroup = -1;
p->affinity = 0;
p->affinityInGroup = 0;
}
void LzmaEncProps_Normalize(CLzmaEncProps *p)
@ -598,6 +600,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2)
p->multiThread = (props.numThreads > 1);
p->matchFinderMt.btSync.affinity =
p->matchFinderMt.hashSync.affinity = props.affinity;
p->matchFinderMt.btSync.affinityGroup =
p->matchFinderMt.hashSync.affinityGroup = props.affinityGroup;
p->matchFinderMt.btSync.affinityInGroup =
p->matchFinderMt.hashSync.affinityInGroup = props.affinityInGroup;
#endif
return SZ_OK;

View file

@ -1,5 +1,5 @@
/* LzmaEnc.h -- LZMA Encoder
2023-04-13 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_LZMA_ENC_H
#define ZIP7_INC_LZMA_ENC_H
@ -29,11 +29,13 @@ typedef struct
int numThreads; /* 1 or 2, default = 2 */
// int _pad;
Int32 affinityGroup;
UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
Encoder uses this value to reduce dictionary size */
UInt64 affinity;
UInt64 affinityInGroup;
} CLzmaEncProps;
void LzmaEncProps_Init(CLzmaEncProps *p);

View file

@ -1,5 +1,5 @@
/* MtCoder.c -- Multi-thread Coder
2023-09-07 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -39,14 +39,28 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p)
static THREAD_FUNC_DECL ThreadFunc(void *pp);
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t
#ifdef _WIN32
, CMtCoder * const mtc
#endif
)
{
WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent);
// printf("\n====== MtCoderThread_CreateAndStart : \n");
if (wres == 0)
{
t->stop = False;
if (!Thread_WasCreated(&t->thread))
wres = Thread_Create(&t->thread, ThreadFunc, t);
{
#ifdef _WIN32
if (mtc->numThreadGroups)
wres = Thread_Create_With_Group(&t->thread, ThreadFunc, t,
ThreadNextGroup_GetNext(&mtc->nextGroup), // group
0); // affinityMask
else
#endif
wres = Thread_Create(&t->thread, ThreadFunc, t);
}
if (wres == 0)
wres = Event_Set(&t->startEvent);
}
@ -56,6 +70,7 @@ static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t)
}
Z7_FORCE_INLINE
static void MtCoderThread_Destruct(CMtCoderThread *t)
{
if (Thread_WasCreated(&t->thread))
@ -85,7 +100,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t)
static SRes ThreadFunc2(CMtCoderThread *t)
{
CMtCoder *mtc = t->mtCoder;
CMtCoder * const mtc = t->mtCoder;
for (;;)
{
@ -185,7 +200,11 @@ static SRes ThreadFunc2(CMtCoderThread *t)
if (mtc->numStartedThreads < mtc->numStartedThreadsLimit
&& mtc->expectedDataSize != readProcessed)
{
res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]);
res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]
#ifdef _WIN32
, mtc
#endif
);
if (res == SZ_OK)
mtc->numStartedThreads++;
else
@ -221,7 +240,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
}
{
CMtCoderBlock *block = &mtc->blocks[bi];
CMtCoderBlock * const block = &mtc->blocks[bi];
block->res = res;
block->bufIndex = bufIndex;
block->finished = finished;
@ -311,7 +330,7 @@ static SRes ThreadFunc2(CMtCoderThread *t)
static THREAD_FUNC_DECL ThreadFunc(void *pp)
{
CMtCoderThread *t = (CMtCoderThread *)pp;
CMtCoderThread * const t = (CMtCoderThread *)pp;
for (;;)
{
if (Event_Wait(&t->startEvent) != 0)
@ -319,7 +338,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp)
if (t->stop)
return 0;
{
SRes res = ThreadFunc2(t);
const SRes res = ThreadFunc2(t);
CMtCoder *mtc = t->mtCoder;
if (res != SZ_OK)
{
@ -328,7 +347,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp)
#ifndef MTCODER_USE_WRITE_THREAD
{
unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
const unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads);
if (numFinished == mtc->numStartedThreads)
if (Event_Set(&mtc->finishedEvent) != 0)
return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD;
@ -346,6 +365,7 @@ void MtCoder_Construct(CMtCoder *p)
p->blockSize = 0;
p->numThreadsMax = 0;
p->numThreadGroups = 0;
p->expectedDataSize = (UInt64)(Int64)-1;
p->inStream = NULL;
@ -429,6 +449,8 @@ SRes MtCoder_Code(CMtCoder *p)
unsigned i;
SRes res = SZ_OK;
// printf("\n====== MtCoder_Code : \n");
if (numThreads > MTCODER_THREADS_MAX)
numThreads = MTCODER_THREADS_MAX;
numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads);
@ -492,11 +514,22 @@ SRes MtCoder_Code(CMtCoder *p)
p->numStartedThreadsLimit = numThreads;
p->numStartedThreads = 0;
ThreadNextGroup_Init(&p->nextGroup, p->numThreadGroups, 0); // startGroup
// for (i = 0; i < numThreads; i++)
{
// here we create new thread for first block.
// And each new thread will create another new thread after block reading
// until numStartedThreadsLimit is reached.
CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++];
RINOK(MtCoderThread_CreateAndStart(nextThread))
{
const SRes res2 = MtCoderThread_CreateAndStart(nextThread
#ifdef _WIN32
, p
#endif
);
RINOK(res2)
}
}
RINOK_THREAD(Event_Set(&p->readEvent))
@ -513,9 +546,9 @@ SRes MtCoder_Code(CMtCoder *p)
RINOK_THREAD(Event_Wait(&p->writeEvents[bi]))
{
const CMtCoderBlock *block = &p->blocks[bi];
unsigned bufIndex = block->bufIndex;
BoolInt finished = block->finished;
const CMtCoderBlock * const block = &p->blocks[bi];
const unsigned bufIndex = block->bufIndex;
const BoolInt finished = block->finished;
if (res == SZ_OK && block->res != SZ_OK)
res = block->res;
@ -545,7 +578,7 @@ SRes MtCoder_Code(CMtCoder *p)
}
#else
{
WRes wres = Event_Wait(&p->finishedEvent);
const WRes wres = Event_Wait(&p->finishedEvent);
res = MY_SRes_HRESULT_FROM_WRes(wres);
}
#endif

View file

@ -1,5 +1,5 @@
/* MtCoder.h -- Multi-thread Coder
2023-04-13 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_MT_CODER_H
#define ZIP7_INC_MT_CODER_H
@ -16,7 +16,7 @@ EXTERN_C_BEGIN
#ifndef Z7_ST
#define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1)
#define MTCODER_THREADS_MAX 64
#define MTCODER_THREADS_MAX 256
#define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3)
#else
#define MTCODER_THREADS_MAX 1
@ -77,6 +77,7 @@ typedef struct CMtCoder_
size_t blockSize; /* size of input block */
unsigned numThreadsMax;
unsigned numThreadGroups;
UInt64 expectedDataSize;
ISeqInStreamPtr inStream;
@ -125,6 +126,8 @@ typedef struct CMtCoder_
CMtProgress mtProgress;
CMtCoderBlock blocks[MTCODER_BLOCKS_MAX];
CMtCoderThread threads[MTCODER_THREADS_MAX];
CThreadNextGroup nextGroup;
} CMtCoder;

View file

@ -439,26 +439,78 @@ void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize)
// #define Z7_SHA512_PROBE_DEBUG // for debug
#if defined(_WIN32) && defined(Z7_COMPILER_SHA512_SUPPORTED) \
&& defined(MY_CPU_ARM64) // we can disable this check to debug in x64
#if defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED)
#if 1 // 0 for debug
#include "7zWindows.h"
// #include <stdio.h>
#if 0 && defined(MY_CPU_X86_OR_AMD64)
#include <intrin.h> // for debug : for __ud2()
#if defined(Z7_SHA512_PROBE_DEBUG) \
|| defined(_WIN32) && defined(MY_CPU_ARM64)
#ifndef Z7_SHA512_USE_PROBE
#define Z7_SHA512_USE_PROBE
#endif
#endif
BoolInt CPU_IsSupported_SHA512(void)
#ifdef Z7_SHA512_USE_PROBE
#ifdef Z7_SHA512_PROBE_DEBUG
#include <stdio.h>
#define PRF(x) x
#else
#define PRF(x)
#endif
#if 0 || !defined(_MSC_VER) // 1 || : for debug LONGJMP mode
// MINGW doesn't support __try. So we use signal() / longjmp().
// Note: signal() / longjmp() probably is not thread-safe.
// So we must call Sha512Prepare() from main thread at program start.
#ifndef Z7_SHA512_USE_LONGJMP
#define Z7_SHA512_USE_LONGJMP
#endif
#endif
#ifdef Z7_SHA512_USE_LONGJMP
#include <signal.h>
#include <setjmp.h>
static jmp_buf g_Sha512_jmp_buf;
// static int g_Sha512_Unsupported;
#if defined(__GNUC__) && (__GNUC__ >= 8) \
|| defined(__clang__) && (__clang_major__ >= 3)
__attribute__((noreturn))
#endif
static void Z7_CDECL Sha512_signal_Handler(int v)
{
PRF(printf("======== Sha512_signal_Handler = %x\n", (unsigned)v);)
// g_Sha512_Unsupported = 1;
longjmp(g_Sha512_jmp_buf, 1);
}
#endif // Z7_SHA512_USE_LONGJMP
#if defined(_WIN32)
#include "7zWindows.h"
#endif
#if defined(MY_CPU_ARM64)
// #define Z7_SHA512_USE_SIMPLIFIED_PROBE // for debug
#endif
#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE
#include <arm_neon.h>
#if defined(__clang__)
__attribute__((__target__("sha3")))
#elif !defined(_MSC_VER)
__attribute__((__target__("arch=armv8.2-a+sha3")))
#endif
#endif
static BoolInt CPU_IsSupported_SHA512_Probe(void)
{
PRF(printf("\n== CPU_IsSupported_SHA512_Probe\n");)
#if defined(_WIN32) && defined(MY_CPU_ARM64)
// we have no SHA512 flag for IsProcessorFeaturePresent() still.
if (!CPU_IsSupported_CRYPTO())
return False;
#endif
// printf("\nCPU_IsSupported_SHA512\n");
PRF(printf("==== Registry check\n");)
{
// we can't read ID_AA64ISAR0_EL1 register from application.
// but ID_AA64ISAR0_EL1 register is mapped to "CP 4030" registry value.
@ -486,6 +538,7 @@ BoolInt CPU_IsSupported_SHA512(void)
// 2 : SHA256 and SHA512 implemented
}
}
#endif // defined(_WIN32) && defined(MY_CPU_ARM64)
#if 1 // 0 for debug to disable SHA512 PROBE code
@ -509,59 +562,97 @@ Does this PROBE code work in native Windows-arm64 (with/without sha512 hw instru
Are there any ways to fix the problems with arm64-wine and x64-SDE cases?
*/
// printf("\n========== CPU_IsSupported_SHA512 PROBE ========\n");
PRF(printf("==== CPU_IsSupported_SHA512 PROBE\n");)
{
BoolInt isSupported = False;
#ifdef Z7_SHA512_USE_LONGJMP
void (Z7_CDECL *signal_prev)(int);
/*
if (g_Sha512_Unsupported)
{
PRF(printf("==== g_Sha512_Unsupported\n");)
return False;
}
*/
printf("====== signal(SIGILL)\n");
signal_prev = signal(SIGILL, Sha512_signal_Handler);
if (signal_prev == SIG_ERR)
{
PRF(printf("====== signal fail\n");)
return False;
}
// PRF(printf("==== signal_prev = %p\n", (void *)signal_prev);)
// docs: Before the specified function is executed,
// the value of func is set to SIG_DFL.
// So we can exit if (setjmp(g_Sha512_jmp_buf) != 0).
PRF(printf("====== setjmp\n");)
if (!setjmp(g_Sha512_jmp_buf))
#else // Z7_SHA512_USE_LONGJMP
#ifdef _MSC_VER
#ifdef __clang_major__
#pragma GCC diagnostic ignored "-Wlanguage-extension-token"
#endif
__try
#endif
#endif // Z7_SHA512_USE_LONGJMP
{
#if 0 // 1 : for debug (reduced version to detect sha512)
#if defined(Z7_COMPILER_SHA512_SUPPORTED)
#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE
// simplified sha512 check for arm64:
const uint64x2_t a = vdupq_n_u64(1);
const uint64x2_t b = vsha512hq_u64(a, a, a);
PRF(printf("======== vsha512hq_u64 probe\n");)
if ((UInt32)vgetq_lane_u64(b, 0) == 0x11800002)
return True;
#else
MY_ALIGN(16)
UInt64 temp[SHA512_NUM_DIGEST_WORDS + SHA512_NUM_BLOCK_WORDS];
memset(temp, 0x5a, sizeof(temp));
#if 0 && defined(MY_CPU_X86_OR_AMD64)
__ud2(); // for debug : that exception is not problem for SDE
#endif
#if 1
PRF(printf("======== Sha512_UpdateBlocks_HW\n");)
Sha512_UpdateBlocks_HW(temp,
(const Byte *)(const void *)(temp + SHA512_NUM_DIGEST_WORDS), 1);
// printf("\n==== t = %x\n", (UInt32)temp[0]);
// PRF(printf("======== t = %x\n", (UInt32)temp[0]);)
if ((UInt32)temp[0] == 0xa33cfdf7)
#endif
{
// printf("\n=== PROBE SHA512: SHA512 supported\n");
return True;
PRF(printf("======== PROBE SHA512: SHA512 is supported\n");)
isSupported = True;
}
#else // Z7_COMPILER_SHA512_SUPPORTED
// for debug : we generate bad instrction or raise exception.
// __except() doesn't catch raise() calls.
#ifdef Z7_SHA512_USE_LONGJMP
PRF(printf("====== raise(SIGILL)\n");)
raise(SIGILL);
#else
#if defined(_MSC_VER) && defined(MY_CPU_X86)
__asm ud2
#endif
#endif
#endif // Z7_SHA512_USE_LONGJMP
#endif // Z7_COMPILER_SHA512_SUPPORTED
}
#ifdef Z7_SHA512_USE_LONGJMP
PRF(printf("====== restore signal SIGILL\n");)
signal(SIGILL, signal_prev);
#elif _MSC_VER
__except (EXCEPTION_EXECUTE_HANDLER)
{
// printf("\n==== CPU_IsSupported_SHA512 EXCEPTION_EXECUTE_HANDLER\n");
PRF(printf("==== CPU_IsSupported_SHA512 __except(EXCEPTION_EXECUTE_HANDLER)\n");)
}
#endif
PRF(printf("== return (sha512 supported) = %d\n", isSupported);)
return isSupported;
}
return False;
#else
// without SHA512 PROBE code
return True;
#endif
}
#else
BoolInt CPU_IsSupported_SHA512(void)
{
return False;
}
#endif
#endif // WIN32 arm64
#endif // Z7_SHA512_USE_PROBE
#endif // defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED)
void Sha512Prepare(void)
@ -570,10 +661,10 @@ void Sha512Prepare(void)
SHA512_FUNC_UPDATE_BLOCKS f, f_hw;
f = Sha512_UpdateBlocks;
f_hw = NULL;
#ifdef MY_CPU_X86_OR_AMD64
if (CPU_IsSupported_SHA512()
&& CPU_IsSupported_AVX2()
)
#ifdef Z7_SHA512_USE_PROBE
if (CPU_IsSupported_SHA512_Probe())
#elif defined(MY_CPU_X86_OR_AMD64)
if (CPU_IsSupported_SHA512() && CPU_IsSupported_AVX2())
#else
if (CPU_IsSupported_SHA512())
#endif
@ -583,6 +674,8 @@ void Sha512Prepare(void)
}
g_SHA512_FUNC_UPDATE_BLOCKS = f;
g_SHA512_FUNC_UPDATE_BLOCKS_HW = f_hw;
#elif defined(Z7_SHA512_PROBE_DEBUG)
CPU_IsSupported_SHA512_Probe(); // for debug
#endif
}

367
C/Sort.c
View file

@ -1,141 +1,268 @@
/* Sort.c -- Sort functions
2014-04-05 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
#include "Sort.h"
#include "CpuArch.h"
#define HeapSortDown(p, k, size, temp) \
{ for (;;) { \
size_t s = (k << 1); \
if (s > size) break; \
if (s < size && p[s + 1] > p[s]) s++; \
if (temp >= p[s]) break; \
p[k] = p[s]; k = s; \
} p[k] = temp; }
#if ( (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
|| (defined(__clang__) && Z7_has_builtin(__builtin_prefetch)) \
)
// the code with prefetch is slow for small arrays on x86.
// So we disable prefetch for x86.
#ifndef MY_CPU_X86
// #pragma message("Z7_PREFETCH : __builtin_prefetch")
#define Z7_PREFETCH(a) __builtin_prefetch((a))
#endif
void HeapSort(UInt32 *p, size_t size)
{
if (size <= 1)
return;
p--;
{
size_t i = size / 2;
do
{
UInt32 temp = p[i];
size_t k = i;
HeapSortDown(p, k, size, temp)
}
while (--i != 0);
}
/*
do
{
size_t k = 1;
UInt32 temp = p[size];
p[size--] = p[1];
HeapSortDown(p, k, size, temp)
}
while (size > 1);
*/
while (size > 3)
{
UInt32 temp = p[size];
size_t k = (p[3] > p[2]) ? 3 : 2;
p[size--] = p[1];
p[1] = p[k];
HeapSortDown(p, k, size, temp)
}
{
UInt32 temp = p[size];
p[size] = p[1];
if (size > 2 && p[2] < temp)
{
p[1] = p[2];
p[2] = temp;
}
else
p[1] = temp;
}
}
#elif defined(_WIN32) // || defined(_MSC_VER) && (_MSC_VER >= 1200)
void HeapSort64(UInt64 *p, size_t size)
{
if (size <= 1)
return;
p--;
{
size_t i = size / 2;
do
{
UInt64 temp = p[i];
size_t k = i;
HeapSortDown(p, k, size, temp)
}
while (--i != 0);
}
/*
do
{
size_t k = 1;
UInt64 temp = p[size];
p[size--] = p[1];
HeapSortDown(p, k, size, temp)
}
while (size > 1);
*/
while (size > 3)
{
UInt64 temp = p[size];
size_t k = (p[3] > p[2]) ? 3 : 2;
p[size--] = p[1];
p[1] = p[k];
HeapSortDown(p, k, size, temp)
}
{
UInt64 temp = p[size];
p[size] = p[1];
if (size > 2 && p[2] < temp)
{
p[1] = p[2];
p[2] = temp;
}
else
p[1] = temp;
}
}
#include "7zWindows.h"
// NOTE: CLANG/GCC/MSVC can define different values for _MM_HINT_T0 / PF_TEMPORAL_LEVEL_1.
// For example, clang-cl can generate "prefetcht2" instruction for
// PreFetchCacheLine(PF_TEMPORAL_LEVEL_1) call.
// But we want to generate "prefetcht0" instruction.
// So for CLANG/GCC we must use __builtin_prefetch() in code branch above
// instead of PreFetchCacheLine() / _mm_prefetch().
// New msvc-x86 compiler generates "prefetcht0" instruction for PreFetchCacheLine() call.
// But old x86 cpus don't support "prefetcht0".
// So we will use PreFetchCacheLine(), only if we are sure that
// generated instruction is supported by all cpus of that isa.
#if defined(MY_CPU_AMD64) \
|| defined(MY_CPU_ARM64) \
|| defined(MY_CPU_IA64)
// we need to use additional braces for (a) in PreFetchCacheLine call, because
// PreFetchCacheLine macro doesn't use braces:
// #define PreFetchCacheLine(l, a) _mm_prefetch((CHAR CONST *) a, l)
// #pragma message("Z7_PREFETCH : PreFetchCacheLine")
#define Z7_PREFETCH(a) PreFetchCacheLine(PF_TEMPORAL_LEVEL_1, (a))
#endif
#endif // _WIN32
#define PREFETCH_NO(p,k,s,size)
#ifndef Z7_PREFETCH
#define SORT_PREFETCH(p,k,s,size)
#else
// #define PREFETCH_LEVEL 2 // use it if cache line is 32-bytes
#define PREFETCH_LEVEL 3 // it is fast for most cases (64-bytes cache line prefetch)
// #define PREFETCH_LEVEL 4 // it can be faster for big array (128-bytes prefetch)
#if PREFETCH_LEVEL == 0
#define SORT_PREFETCH(p,k,s,size)
#else // PREFETCH_LEVEL != 0
/*
#define HeapSortRefDown(p, vals, n, size, temp) \
{ size_t k = n; UInt32 val = vals[temp]; for (;;) { \
size_t s = (k << 1); \
if (s > size) break; \
if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
if (val >= vals[p[s]]) break; \
p[k] = p[s]; k = s; \
} p[k] = temp; }
if defined(USE_PREFETCH_FOR_ALIGNED_ARRAY)
we prefetch one value per cache line.
Use it if array is aligned for cache line size (64 bytes)
or if array is small (less than L1 cache size).
void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
if !defined(USE_PREFETCH_FOR_ALIGNED_ARRAY)
we perfetch all cache lines that can be required.
it can be faster for big unaligned arrays.
*/
#define USE_PREFETCH_FOR_ALIGNED_ARRAY
// s == k * 2
#if 0 && PREFETCH_LEVEL <= 3 && defined(MY_CPU_X86_OR_AMD64)
// x86 supports (lea r1*8+offset)
#define PREFETCH_OFFSET(k,s) ((s) << PREFETCH_LEVEL)
#else
#define PREFETCH_OFFSET(k,s) ((k) << (PREFETCH_LEVEL + 1))
#endif
#if 1 && PREFETCH_LEVEL <= 3 && defined(USE_PREFETCH_FOR_ALIGNED_ARRAY)
#define PREFETCH_ADD_OFFSET 0
#else
// last offset that can be reqiured in PREFETCH_LEVEL step:
#define PREFETCH_RANGE ((2 << PREFETCH_LEVEL) - 1)
#define PREFETCH_ADD_OFFSET PREFETCH_RANGE / 2
#endif
#if PREFETCH_LEVEL <= 3
#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY
#define SORT_PREFETCH(p,k,s,size) \
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_ADD_OFFSET; \
if (s2 <= size) { \
Z7_PREFETCH((p + s2)); \
}}
#else /* for unaligned array */
#define SORT_PREFETCH(p,k,s,size) \
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \
if (s2 <= size) { \
Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \
Z7_PREFETCH((p + s2)); \
}}
#endif
#else // PREFETCH_LEVEL > 3
#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY
#define SORT_PREFETCH(p,k,s,size) \
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE - 16 / 2; \
if (s2 <= size) { \
Z7_PREFETCH((p + s2 - 16)); \
Z7_PREFETCH((p + s2)); \
}}
#else /* for unaligned array */
#define SORT_PREFETCH(p,k,s,size) \
{ const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \
if (s2 <= size) { \
Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \
Z7_PREFETCH((p + s2 - PREFETCH_RANGE / 2)); \
Z7_PREFETCH((p + s2)); \
}}
#endif
#endif // PREFETCH_LEVEL > 3
#endif // PREFETCH_LEVEL != 0
#endif // Z7_PREFETCH
#if defined(MY_CPU_ARM64) \
/* || defined(MY_CPU_AMD64) */ \
/* || defined(MY_CPU_ARM) && !defined(_MSC_VER) */
// we want to use cmov, if cmov is very fast:
// - this cmov version is slower for clang-x64.
// - this cmov version is faster for gcc-arm64 for some fast arm64 cpus.
#define Z7_FAST_CMOV_SUPPORTED
#endif
#ifdef Z7_FAST_CMOV_SUPPORTED
// we want to use cmov here, if cmov is fast: new arm64 cpus.
// we want the compiler to use conditional move for this branch
#define GET_MAX_VAL(n0, n1, max_val_slow) if (n0 < n1) n0 = n1;
#else
// use this branch, if cpu doesn't support fast conditional move.
// it uses slow array access reading:
#define GET_MAX_VAL(n0, n1, max_val_slow) n0 = max_val_slow;
#endif
#define HeapSortDown(p, k, size, temp, macro_prefetch) \
{ \
for (;;) { \
UInt32 n0, n1; \
size_t s = k * 2; \
if (s >= size) { \
if (s == size) { \
n0 = p[s]; \
p[k] = n0; \
if (temp < n0) k = s; \
} \
break; \
} \
n0 = p[k * 2]; \
n1 = p[k * 2 + 1]; \
s += n0 < n1; \
GET_MAX_VAL(n0, n1, p[s]) \
if (temp >= n0) break; \
macro_prefetch(p, k, s, size) \
p[k] = n0; \
k = s; \
} \
p[k] = temp; \
}
/*
stage-1 : O(n) :
we generate intermediate partially sorted binary tree:
p[0] : it's additional item for better alignment of tree structure in memory.
p[1]
p[2] p[3]
p[4] p[5] p[6] p[7]
...
p[x] >= p[x * 2]
p[x] >= p[x * 2 + 1]
stage-2 : O(n)*log2(N):
we move largest item p[0] from head of tree to the end of array
and insert last item to sorted binary tree.
*/
// (p) must be aligned for cache line size (64-bytes) for best performance
void Z7_FASTCALL HeapSort(UInt32 *p, size_t size)
{
if (size <= 1)
if (size < 2)
return;
p--;
if (size == 2)
{
size_t i = size / 2;
const UInt32 a0 = p[0];
const UInt32 a1 = p[1];
const unsigned k = a1 < a0;
p[k] = a0;
p[k ^ 1] = a1;
return;
}
{
// stage-1 : O(n)
// we transform array to partially sorted binary tree.
size_t i = --size / 2;
// (size) now is the index of the last item in tree,
// if (i)
{
do
{
const UInt32 temp = p[i];
size_t k = i;
HeapSortDown(p, k, size, temp, PREFETCH_NO)
}
while (--i);
}
{
const UInt32 temp = p[0];
const UInt32 a1 = p[1];
if (temp < a1)
{
size_t k = 1;
p[0] = a1;
HeapSortDown(p, k, size, temp, PREFETCH_NO)
}
}
}
if (size < 3)
{
// size == 2
const UInt32 a0 = p[0];
p[0] = p[2];
p[2] = a0;
return;
}
if (size != 3)
{
// stage-2 : O(size) * log2(size):
// we move largest item p[0] from head to the end of array,
// and insert last item to sorted binary tree.
do
{
UInt32 temp = p[i];
HeapSortRefDown(p, vals, i, size, temp);
const UInt32 temp = p[size];
size_t k = p[2] < p[3] ? 3 : 2;
p[size--] = p[0];
p[0] = p[1];
p[1] = p[k];
HeapSortDown(p, k, size, temp, SORT_PREFETCH) // PREFETCH_NO
}
while (--i != 0);
while (size != 3);
}
do
{
UInt32 temp = p[size];
p[size--] = p[1];
HeapSortRefDown(p, vals, 1, size, temp);
const UInt32 a2 = p[2];
const UInt32 a3 = p[3];
const size_t k = a2 < a3;
p[2] = p[1];
p[3] = p[0];
p[k] = a3;
p[k ^ 1] = a2;
}
while (size > 1);
}
*/

View file

@ -1,5 +1,5 @@
/* Sort.h -- Sort functions
2023-03-05 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_SORT_H
#define ZIP7_INC_SORT_H
@ -8,10 +8,7 @@
EXTERN_C_BEGIN
void HeapSort(UInt32 *p, size_t size);
void HeapSort64(UInt64 *p, size_t size);
/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */
void Z7_FASTCALL HeapSort(UInt32 *p, size_t size);
EXTERN_C_END

View file

@ -1,5 +1,5 @@
/* Threads.c -- multithreading library
2024-03-28 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -59,6 +59,100 @@ WRes Thread_Wait_Close(CThread *p)
return (res != 0 ? res : res2);
}
typedef struct MY_PROCESSOR_NUMBER {
WORD Group;
BYTE Number;
BYTE Reserved;
} MY_PROCESSOR_NUMBER, *MY_PPROCESSOR_NUMBER;
typedef struct MY_GROUP_AFFINITY {
#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 100000)
// KAFFINITY is not defined in old mingw
ULONG_PTR
#else
KAFFINITY
#endif
Mask;
WORD Group;
WORD Reserved[3];
} MY_GROUP_AFFINITY, *MY_PGROUP_AFFINITY;
typedef BOOL (WINAPI *Func_SetThreadGroupAffinity)(
HANDLE hThread,
CONST MY_GROUP_AFFINITY *GroupAffinity,
MY_PGROUP_AFFINITY PreviousGroupAffinity);
typedef BOOL (WINAPI *Func_GetThreadGroupAffinity)(
HANDLE hThread,
MY_PGROUP_AFFINITY GroupAffinity);
typedef BOOL (WINAPI *Func_GetProcessGroupAffinity)(
HANDLE hProcess,
PUSHORT GroupCount,
PUSHORT GroupArray);
Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
#if 0
#include <stdio.h>
#define PRF(x) x
/*
--
before call of SetThreadGroupAffinity()
GetProcessGroupAffinity return one group.
after call of SetThreadGroupAffinity():
GetProcessGroupAffinity return more than group,
if SetThreadGroupAffinity() was to another group.
--
GetProcessAffinityMask MS DOCs:
{
If the calling process contains threads in multiple groups,
the function returns zero for both affinity masks.
}
but tests in win10 with 2 groups (less than 64 cores total):
GetProcessAffinityMask() still returns non-zero affinity masks
even after SetThreadGroupAffinity() calls.
*/
static void PrintProcess_Info()
{
{
const
Func_GetProcessGroupAffinity fn_GetProcessGroupAffinity =
(Func_GetProcessGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetProcessGroupAffinity");
if (fn_GetProcessGroupAffinity)
{
unsigned i;
USHORT GroupCounts[64];
USHORT GroupCount = Z7_ARRAY_SIZE(GroupCounts);
BOOL boolRes = fn_GetProcessGroupAffinity(GetCurrentProcess(),
&GroupCount, GroupCounts);
printf("\n====== GetProcessGroupAffinity : "
"boolRes=%u GroupCounts = %u :",
boolRes, (unsigned)GroupCount);
for (i = 0; i < GroupCount; i++)
printf(" %u", GroupCounts[i]);
printf("\n");
}
}
{
DWORD_PTR processAffinityMask, systemAffinityMask;
if (GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask))
{
PRF(printf("\n====== GetProcessAffinityMask : "
": processAffinityMask=%x, systemAffinityMask=%x\n",
(UInt32)processAffinityMask, (UInt32)systemAffinityMask);)
}
else
printf("\n==GetProcessAffinityMask FAIL");
}
}
#else
#ifndef USE_THREADS_CreateThread
// #define PRF(x)
#endif
#endif
WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
{
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
@ -72,7 +166,43 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
unsigned threadId;
*p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId));
#if 0 // 1 : for debug
{
DWORD_PTR prevMask;
DWORD_PTR affinity = 1 << 0;
prevMask = SetThreadAffinityMask(*p, (DWORD_PTR)affinity);
prevMask = prevMask;
}
#endif
#if 0 // 1 : for debug
{
/* win10: new thread will be created in same group that is assigned to parent thread
but affinity mask will contain all allowed threads of that group,
even if affinity mask of parent group is not full
win11: what group it will be created, if we have set
affinity of parent thread with ThreadGroupAffinity?
*/
const
Func_GetThreadGroupAffinity fn =
(Func_GetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"GetThreadGroupAffinity");
if (fn)
{
// BOOL wres2;
MY_GROUP_AFFINITY groupAffinity;
memset(&groupAffinity, 0, sizeof(groupAffinity));
/* wres2 = */ fn(*p, &groupAffinity);
PRF(printf("\n==Thread_Create cur = %6u GetThreadGroupAffinity(): "
"wres2_BOOL = %u, group=%u mask=%x\n",
GetCurrentThreadId(),
wres2,
groupAffinity.Group,
(UInt32)groupAffinity.Mask);)
}
}
#endif
#endif
/* maybe we must use errno here, but probably GetLastError() is also OK. */
@ -110,7 +240,84 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param
*/
}
{
DWORD prevSuspendCount = ResumeThread(h);
const DWORD prevSuspendCount = ResumeThread(h);
/* ResumeThread() returns:
0 : was_not_suspended
1 : was_resumed
-1 : error
*/
if (prevSuspendCount == (DWORD)-1)
wres = GetError();
}
}
/* maybe we must use errno here, but probably GetLastError() is also OK. */
return wres;
#endif
}
WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask)
{
#ifdef USE_THREADS_CreateThread
UNUSED_VAR(group)
UNUSED_VAR(affinityMask)
return Thread_Create(p, func, param);
#else
/* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */
HANDLE h;
WRes wres;
unsigned threadId;
h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId));
*p = h;
wres = HandleToWRes(h);
if (h)
{
// PrintProcess_Info();
{
const
Func_SetThreadGroupAffinity fn =
(Func_SetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")),
"SetThreadGroupAffinity");
if (fn)
{
// WRes wres2;
MY_GROUP_AFFINITY groupAffinity, prev_groupAffinity;
memset(&groupAffinity, 0, sizeof(groupAffinity));
// groupAffinity.Mask must use only bits that supported by current group
// (groupAffinity.Mask = 0) means all allowed bits
groupAffinity.Mask = affinityMask;
groupAffinity.Group = (WORD)group;
// wres2 =
fn(h, &groupAffinity, &prev_groupAffinity);
/*
if (groupAffinity.Group == prev_groupAffinity.Group)
wres2 = wres2;
else
wres2 = wres2;
if (wres2 == 0)
{
wres2 = GetError();
PRF(printf("\n==SetThreadGroupAffinity error: %u\n", wres2);)
}
else
{
PRF(printf("\n==Thread_Create_With_Group::SetThreadGroupAffinity()"
" threadId = %6u"
" group=%u mask=%x\n",
threadId,
prev_groupAffinity.Group,
(UInt32)prev_groupAffinity.Mask);)
}
*/
}
}
{
const DWORD prevSuspendCount = ResumeThread(h);
/* ResumeThread() returns:
0 : was_not_suspended
1 : was_resumed
@ -297,6 +504,13 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param)
return Thread_Create_With_CpuSet(p, func, param, NULL);
}
/*
WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinity)
{
UNUSED_VAR(group)
return Thread_Create_With_Affinity(p, func, param, affinity);
}
*/
WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity)
{
@ -577,5 +791,22 @@ WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p)
return AutoResetEvent_CreateNotSignaled(p);
}
void ThreadNextGroup_Init(CThreadNextGroup *p, UInt32 numGroups, UInt32 startGroup)
{
// printf("\n====== ThreadNextGroup_Init numGroups = %x: startGroup=%x\n", numGroups, startGroup);
if (numGroups == 0)
numGroups = 1;
p->NumGroups = numGroups;
p->NextGroup = startGroup % numGroups;
}
UInt32 ThreadNextGroup_GetNext(CThreadNextGroup *p)
{
const UInt32 next = p->NextGroup;
p->NextGroup = (next + 1) % p->NumGroups;
return next;
}
#undef PRF
#undef Print

View file

@ -1,5 +1,5 @@
/* Threads.h -- multithreading library
2024-03-28 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_THREADS_H
#define ZIP7_INC_THREADS_H
@ -140,12 +140,22 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param
WRes Thread_Wait_Close(CThread *p);
#ifdef _WIN32
WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask);
#define Thread_Create_With_CpuSet(p, func, param, cs) \
Thread_Create_With_Affinity(p, func, param, *cs)
#else
WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet);
#endif
typedef struct
{
unsigned NumGroups;
unsigned NextGroup;
} CThreadNextGroup;
void ThreadNextGroup_Init(CThreadNextGroup *p, unsigned numGroups, unsigned startGroup);
unsigned ThreadNextGroup_GetNext(CThreadNextGroup *p);
#ifdef _WIN32

View file

@ -122,6 +122,10 @@ SOURCE=..\..\Compiler.h
# End Source File
# Begin Source File
SOURCE=..\..\CpuArch.c
# End Source File
# Begin Source File
SOURCE=..\..\CpuArch.h
# End Source File
# Begin Source File

View file

@ -43,7 +43,7 @@ RSC=rc.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /YX /FD /c
# ADD CPP /nologo /Gr /MT /W3 /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /FD /c
# ADD CPP /nologo /Gr /MT /W4 /WX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /FD /c
# SUBTRACT CPP /YX
# ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32
@ -71,7 +71,7 @@ LINK32=link.exe
# PROP Ignore_Export_Lib 0
# PROP Target_Dir ""
# ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /YX /FD /GZ /c
# ADD CPP /nologo /MTd /W3 /Gm /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /D "COMPRESS_MF_MT" /FD /GZ /c
# ADD CPP /nologo /MTd /W4 /WX /Gm /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /D "COMPRESS_MF_MT" /FD /GZ /c
# SUBTRACT CPP /YX
# ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32
# ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32
@ -128,6 +128,10 @@ SOURCE=..\..\Compiler.h
# End Source File
# Begin Source File
SOURCE=..\..\CpuArch.c
# End Source File
# Begin Source File
SOURCE=..\..\CpuArch.h
# End Source File
# Begin Source File

12
C/Xz.h
View file

@ -1,5 +1,5 @@
/* Xz.h - Xz interface
2024-01-26 : Igor Pavlov : Public domain */
Igor Pavlov : Public domain */
#ifndef ZIP7_INC_XZ_H
#define ZIP7_INC_XZ_H
@ -121,6 +121,7 @@ typedef struct
UInt64 startOffset;
} CXzStream;
#define Xz_CONSTRUCT(p) { (p)->numBlocks = 0; (p)->blocks = NULL; (p)->flags = 0; }
void Xz_Construct(CXzStream *p);
void Xz_Free(CXzStream *p, ISzAllocPtr alloc);
@ -136,8 +137,13 @@ typedef struct
CXzStream *streams;
} CXzs;
#define Xzs_CONSTRUCT(p) { (p)->num = 0; (p)->numAllocated = 0; (p)->streams = NULL; }
void Xzs_Construct(CXzs *p);
void Xzs_Free(CXzs *p, ISzAllocPtr alloc);
/*
Xzs_ReadBackward() must be called for empty CXzs object.
Xzs_ReadBackward() can return non empty object with (p->num != 0) even in case of error.
*/
SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc);
UInt64 Xzs_GetNumBlocks(const CXzs *p);
@ -268,8 +274,8 @@ typedef struct
size_t outBufSize;
size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked
Byte shaDigest[SHA256_DIGEST_SIZE];
Byte buf[XZ_BLOCK_HEADER_SIZE_MAX];
UInt32 shaDigest32[SHA256_DIGEST_SIZE / 4];
Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; // it must be aligned for 4-bytes
} CXzUnpacker;
/* alloc : aligned for cache line allocation is better */

View file

@ -1,5 +1,5 @@
/* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2023-12-08 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -235,7 +235,7 @@ CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
v = Q32BE(1, w1) ^ Q32BE(0, w0);
v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
#endif
#elif
#else
#error Stop_Compiling_Bad_CRC64_NUM_TABLES
#endif
p += Z7_CRC64_NUM_TABLES_USE;

View file

@ -1,5 +1,5 @@
/* XzDec.c -- Xz Decode
2024-03-01 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -59,7 +59,7 @@ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value)
for (i = 0; i < limit;)
{
Byte b = p[i];
const unsigned b = p[i];
*value |= (UInt64)(b & 0x7F) << (7 * i++);
if ((b & 0x80) == 0)
return (b == 0 && i != 1) ? 0 : i;
@ -796,11 +796,10 @@ SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf)
static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *buf)
{
return indexSize == (((UInt64)GetUi32(buf + 4) + 1) << 2)
&& GetUi32(buf) == CrcCalc(buf + 4, 6)
&& flags == GetBe16(buf + 8)
&& buf[10] == XZ_FOOTER_SIG_0
&& buf[11] == XZ_FOOTER_SIG_1;
return indexSize == (((UInt64)GetUi32a(buf + 4) + 1) << 2)
&& GetUi32a(buf) == CrcCalc(buf + 4, 6)
&& flags == GetBe16a(buf + 8)
&& GetUi16a(buf + 10) == (XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8));
}
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
@ -1166,7 +1165,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
p->indexPreSize = 1 + Xz_WriteVarInt(p->buf + 1, p->numBlocks);
p->indexPos = p->indexPreSize;
p->indexSize += p->indexPreSize;
Sha256_Final(&p->sha, p->shaDigest);
Sha256_Final(&p->sha, (Byte *)(void *)p->shaDigest32);
Sha256_Init(&p->sha);
p->crc = CrcUpdate(CRC_INIT_VAL, p->buf, p->indexPreSize);
p->state = XZ_STATE_STREAM_INDEX;
@ -1241,10 +1240,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
break;
}
{
Byte digest[XZ_CHECK_SIZE_MAX];
UInt32 digest32[XZ_CHECK_SIZE_MAX / 4];
p->state = XZ_STATE_BLOCK_HEADER;
p->pos = 0;
if (XzCheck_Final(&p->check, digest) && memcmp(digest, p->buf, checkSize) != 0)
if (XzCheck_Final(&p->check, (void *)digest32) && memcmp(digest32, p->buf, checkSize) != 0)
return SZ_ERROR_CRC;
if (p->decodeOnlyOneBlock)
{
@ -1289,12 +1288,12 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
}
else
{
Byte digest[SHA256_DIGEST_SIZE];
UInt32 digest32[SHA256_DIGEST_SIZE / 4];
p->state = XZ_STATE_STREAM_INDEX_CRC;
p->indexSize += 4;
p->pos = 0;
Sha256_Final(&p->sha, digest);
if (memcmp(digest, p->shaDigest, SHA256_DIGEST_SIZE) != 0)
Sha256_Final(&p->sha, (void *)digest32);
if (memcmp(digest32, p->shaDigest32, SHA256_DIGEST_SIZE) != 0)
return SZ_ERROR_CRC;
}
}
@ -1313,7 +1312,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
const Byte *ptr = p->buf;
p->state = XZ_STATE_STREAM_FOOTER;
p->pos = 0;
if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr))
if (CRC_GET_DIGEST(p->crc) != GetUi32a(ptr))
return SZ_ERROR_CRC;
}
break;
@ -1343,7 +1342,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen,
{
if (*src != 0)
{
if (((UInt32)p->padSize & 3) != 0)
if ((unsigned)p->padSize & 3)
return SZ_ERROR_NO_ARCHIVE;
p->pos = 0;
p->state = XZ_STATE_STREAM_HEADER;

View file

@ -1,5 +1,5 @@
/* XzEnc.c -- Xz Encode
2024-03-01 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
@ -411,6 +411,7 @@ static SRes SeqInFilter_Read(ISeqInStreamPtr pp, void *data, size_t *size)
}
}
Z7_FORCE_INLINE
static void SeqInFilter_Construct(CSeqInFilter *p)
{
p->buf = NULL;
@ -418,6 +419,7 @@ static void SeqInFilter_Construct(CSeqInFilter *p)
p->vt.Read = SeqInFilter_Read;
}
Z7_FORCE_INLINE
static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc)
{
if (p->StateCoder.p)
@ -507,6 +509,7 @@ void XzFilterProps_Init(CXzFilterProps *p)
void XzProps_Init(CXzProps *p)
{
p->checkId = XZ_CHECK_CRC32;
p->numThreadGroups = 0;
p->blockSize = XZ_PROPS_BLOCK_SIZE_AUTO;
p->numBlockThreads_Reduced = -1;
p->numBlockThreads_Max = -1;
@ -689,6 +692,7 @@ typedef struct
} CLzma2WithFilters;
Z7_FORCE_INLINE
static void Lzma2WithFilters_Construct(CLzma2WithFilters *p)
{
p->lzma2 = NULL;
@ -712,6 +716,7 @@ static SRes Lzma2WithFilters_Create(CLzma2WithFilters *p, ISzAllocPtr alloc, ISz
}
Z7_FORCE_INLINE
static void Lzma2WithFilters_Free(CLzma2WithFilters *p, ISzAllocPtr alloc)
{
#ifdef USE_SUBBLOCK
@ -1236,6 +1241,7 @@ SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr in
}
p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max;
p->mtCoder.numThreadGroups = props->numThreadGroups;
p->mtCoder.expectedDataSize = p->expectedDataSize;
RINOK(MtCoder_Code(&p->mtCoder))

View file

@ -1,5 +1,5 @@
/* XzEnc.h -- Xz Encode
2023-04-13 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#ifndef ZIP7_INC_XZ_ENC_H
#define ZIP7_INC_XZ_ENC_H
@ -31,6 +31,7 @@ typedef struct
CLzma2EncProps lzma2Props;
CXzFilterProps filterProps;
unsigned checkId;
unsigned numThreadGroups; // 0 : no groups
UInt64 blockSize;
int numBlockThreads_Reduced;
int numBlockThreads_Max;

265
C/XzIn.c
View file

@ -1,38 +1,39 @@
/* XzIn.c - Xz input
2023-09-07 : Igor Pavlov : Public domain */
: Igor Pavlov : Public domain */
#include "Precomp.h"
#include <string.h>
#include "7zCrc.h"
#include "CpuArch.h"
#include "Xz.h"
#include "CpuArch.h"
/*
#define XZ_FOOTER_SIG_CHECK(p) (memcmp((p), XZ_FOOTER_SIG, XZ_FOOTER_SIG_SIZE) == 0)
*/
#define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1)
#define XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(p) \
(GetUi16a((const Byte *)(const void *)(p) + 10) == \
(XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8)))
SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream)
{
Byte sig[XZ_STREAM_HEADER_SIZE];
UInt32 data32[XZ_STREAM_HEADER_SIZE / 4];
size_t processedSize = XZ_STREAM_HEADER_SIZE;
RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize))
RINOK(SeqInStream_ReadMax(inStream, data32, &processedSize))
if (processedSize != XZ_STREAM_HEADER_SIZE
|| memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0)
|| memcmp(data32, XZ_SIG, XZ_SIG_SIZE) != 0)
return SZ_ERROR_NO_ARCHIVE;
return Xz_ParseHeader(p, sig);
return Xz_ParseHeader(p, (const Byte *)(const void *)data32);
}
#define READ_VARINT_AND_CHECK(buf, pos, size, res) \
{ const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \
#define READ_VARINT_AND_CHECK(buf, size, res) \
{ const unsigned s = Xz_ReadVarInt(buf, size, res); \
if (s == 0) return SZ_ERROR_ARCHIVE; \
pos += s; }
size -= s; \
buf += s; \
}
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
{
MY_ALIGN(4)
Byte header[XZ_BLOCK_HEADER_SIZE_MAX];
unsigned headerSize;
*headerSizeRes = 0;
@ -57,8 +58,12 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex,
return XzBlock_Parse(p, header);
}
#define ADD_SIZE_CHECK(size, val) \
{ const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; }
{ const UInt64 newSize = size + (val); \
if (newSize < size) return XZ_SIZE_OVERFLOW; \
size = newSize; \
}
UInt64 Xz_GetUnpackSize(const CXzStream *p)
{
@ -82,76 +87,85 @@ UInt64 Xz_GetPackSize(const CXzStream *p)
return size;
}
/*
SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream)
{
return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f));
}
*/
static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc)
// input;
// CXzStream (p) is empty object.
// size != 0
// (size & 3) == 0
// (buf) is aligned for at least 4 bytes.
// output:
// p->numBlocks is number of allocated items in p->blocks
// p->blocks[*] values must be ignored, if function returns error.
static SRes Xz_ParseIndex(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc)
{
size_t numBlocks, pos = 1;
UInt32 crc;
size_t numBlocks;
if (size < 5 || buf[0] != 0)
return SZ_ERROR_ARCHIVE;
size -= 4;
crc = CrcCalc(buf, size);
if (crc != GetUi32(buf + size))
return SZ_ERROR_ARCHIVE;
{
const UInt32 crc = CrcCalc(buf, size);
if (crc != GetUi32a(buf + size))
return SZ_ERROR_ARCHIVE;
}
buf++;
size--;
{
UInt64 numBlocks64;
READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64)
READ_VARINT_AND_CHECK(buf, size, &numBlocks64)
// (numBlocks64) is 63-bit value, so we can calculate (numBlocks64 * 2):
if (numBlocks64 * 2 > size)
return SZ_ERROR_ARCHIVE;
if (numBlocks64 >= ((size_t)1 << (sizeof(size_t) * 8 - 1)) / sizeof(CXzBlockSizes))
return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE
numBlocks = (size_t)numBlocks64;
if (numBlocks != numBlocks64 || numBlocks * 2 > size)
return SZ_ERROR_ARCHIVE;
}
Xz_Free(p, alloc);
if (numBlocks != 0)
// Xz_Free(p, alloc); // it's optional, because (p) is empty already
if (numBlocks)
{
size_t i;
p->numBlocks = numBlocks;
p->blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks);
if (!p->blocks)
CXzBlockSizes *blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks);
if (!blocks)
return SZ_ERROR_MEM;
for (i = 0; i < numBlocks; i++)
p->blocks = blocks;
p->numBlocks = numBlocks;
// the caller will call Xz_Free() in case of error
do
{
CXzBlockSizes *block = &p->blocks[i];
READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize)
READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize)
if (block->totalSize == 0)
READ_VARINT_AND_CHECK(buf, size, &blocks->totalSize)
READ_VARINT_AND_CHECK(buf, size, &blocks->unpackSize)
if (blocks->totalSize == 0)
return SZ_ERROR_ARCHIVE;
blocks++;
}
while (--numBlocks);
}
while ((pos & 3) != 0)
if (buf[pos++] != 0)
if (size >= 4)
return SZ_ERROR_ARCHIVE;
while (size)
if (buf[--size])
return SZ_ERROR_ARCHIVE;
return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE;
return SZ_OK;
}
/*
static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc)
{
SRes res;
size_t size;
Byte *buf;
if (indexSize > ((UInt32)1 << 31))
return SZ_ERROR_UNSUPPORTED;
if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1)))
return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE
size = (size_t)indexSize;
if (size != indexSize)
return SZ_ERROR_UNSUPPORTED;
buf = (Byte *)ISzAlloc_Alloc(alloc, size);
if (!buf)
return SZ_ERROR_MEM;
res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED);
if (res == SZ_OK)
res = Xz_ReadIndex2(p, buf, size, alloc);
res = Xz_ParseIndex(p, buf, size, alloc);
ISzAlloc_Free(alloc, buf);
return res;
}
*/
static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size)
{
@ -160,84 +174,102 @@ static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset,
/* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */
}
/*
in:
(*startOffset) is position in (stream) where xz_stream must be finished.
out:
if returns SZ_OK, then (*startOffset) is position in stream that shows start of xz_stream.
*/
static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc)
{
UInt64 indexSize;
Byte buf[XZ_STREAM_FOOTER_SIZE];
#define TEMP_BUF_SIZE (1 << 10)
UInt32 buf32[TEMP_BUF_SIZE / 4];
UInt64 pos = (UInt64)*startOffset;
if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE)
if ((pos & 3) || pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE;
pos -= XZ_STREAM_FOOTER_SIZE;
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE))
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE))
if (!XZ_FOOTER_SIG_CHECK(buf + 10))
if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32))
{
UInt32 total = 0;
pos += XZ_STREAM_FOOTER_SIZE;
for (;;)
{
size_t i;
#define TEMP_BUF_SIZE (1 << 10)
Byte temp[TEMP_BUF_SIZE];
i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos;
// pos != 0
// (pos & 3) == 0
size_t i = pos >= TEMP_BUF_SIZE ? TEMP_BUF_SIZE : (size_t)pos;
pos -= i;
RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i))
total += (UInt32)i;
for (; i != 0; i--)
if (temp[i - 1] != 0)
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, i))
i /= 4;
do
if (buf32[i - 1] != 0)
break;
if (i != 0)
{
if ((i & 3) != 0)
return SZ_ERROR_NO_ARCHIVE;
pos += i;
break;
}
if (pos < XZ_STREAM_FOOTER_SIZE || total > (1 << 16))
while (--i);
pos += i * 4;
#define XZ_STREAM_BACKWARD_READING_PAD_MAX (1 << 16)
// here we don't support rare case with big padding for xz stream.
// so we have padding limit for backward reading.
if ((UInt64)*startOffset - pos > XZ_STREAM_BACKWARD_READING_PAD_MAX)
return SZ_ERROR_NO_ARCHIVE;
if (i)
break;
}
// we try to open xz stream after skipping zero padding.
// ((UInt64)*startOffset == pos) is possible here!
if (pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE;
pos -= XZ_STREAM_FOOTER_SIZE;
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE))
if (!XZ_FOOTER_SIG_CHECK(buf + 10))
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE))
if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32))
return SZ_ERROR_NO_ARCHIVE;
}
p->flags = (CXzStreamFlags)GetBe16(buf + 8);
p->flags = (CXzStreamFlags)GetBe16a(buf32 + 2);
if (!XzFlags_IsSupported(p->flags))
return SZ_ERROR_UNSUPPORTED;
{
/* to eliminate GCC 6.3 warning:
dereferencing type-punned pointer will break strict-aliasing rules */
const Byte *buf_ptr = buf;
if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6))
const UInt32 *buf_ptr = buf32;
if (GetUi32a(buf_ptr) != CrcCalc(buf32 + 1, 6))
return SZ_ERROR_ARCHIVE;
}
indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2;
if (pos < indexSize)
return SZ_ERROR_ARCHIVE;
pos -= indexSize;
RINOK(LookInStream_SeekTo(stream, pos))
RINOK(Xz_ReadIndex(p, stream, indexSize, alloc))
{
UInt64 totalSize = Xz_GetPackSize(p);
if (totalSize == XZ_SIZE_OVERFLOW
|| totalSize >= ((UInt64)1 << 63)
|| pos < totalSize + XZ_STREAM_HEADER_SIZE)
const UInt64 indexSize = ((UInt64)GetUi32a(buf32 + 1) + 1) << 2;
if (pos < indexSize)
return SZ_ERROR_ARCHIVE;
pos -= (totalSize + XZ_STREAM_HEADER_SIZE);
pos -= indexSize;
// v25.00: relaxed indexSize check. We allow big index table.
// if (indexSize > ((UInt32)1 << 31))
if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1)))
return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE
RINOK(LookInStream_SeekTo(stream, pos))
// RINOK(Xz_ReadIndex(p, stream, indexSize, alloc))
{
SRes res;
const size_t size = (size_t)indexSize;
// if (size != indexSize) return SZ_ERROR_UNSUPPORTED;
Byte *buf = (Byte *)ISzAlloc_Alloc(alloc, size);
if (!buf)
return SZ_ERROR_MEM;
res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED);
if (res == SZ_OK)
res = Xz_ParseIndex(p, buf, size, alloc);
ISzAlloc_Free(alloc, buf);
RINOK(res)
}
}
{
UInt64 total = Xz_GetPackSize(p);
if (total == XZ_SIZE_OVERFLOW || total >= ((UInt64)1 << 63))
return SZ_ERROR_ARCHIVE;
total += XZ_STREAM_HEADER_SIZE;
if (pos < total)
return SZ_ERROR_ARCHIVE;
pos -= total;
RINOK(LookInStream_SeekTo(stream, pos))
*startOffset = (Int64)pos;
}
@ -246,7 +278,6 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startO
CSecToRead secToRead;
SecToRead_CreateVTable(&secToRead);
secToRead.realStream = stream;
RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt))
return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE;
}
@ -257,8 +288,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startO
void Xzs_Construct(CXzs *p)
{
p->num = p->numAllocated = 0;
p->streams = 0;
Xzs_CONSTRUCT(p)
}
void Xzs_Free(CXzs *p, ISzAllocPtr alloc)
@ -268,7 +298,7 @@ void Xzs_Free(CXzs *p, ISzAllocPtr alloc)
Xz_Free(&p->streams[i], alloc);
ISzAlloc_Free(alloc, p->streams);
p->num = p->numAllocated = 0;
p->streams = 0;
p->streams = NULL;
}
UInt64 Xzs_GetNumBlocks(const CXzs *p)
@ -307,34 +337,49 @@ UInt64 Xzs_GetPackSize(const CXzs *p)
SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc)
{
Int64 endOffset = 0;
// it's supposed that CXzs object is empty here.
// if CXzs object is not empty, it will add new streams to that non-empty object.
// Xzs_Free(p, alloc); // it's optional call to empty CXzs object.
RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END))
*startOffset = endOffset;
for (;;)
{
CXzStream st;
SRes res;
Xz_Construct(&st);
Xz_CONSTRUCT(&st)
res = Xz_ReadBackward(&st, stream, startOffset, alloc);
// if (res == SZ_OK), then (*startOffset) is start offset of new stream if
// if (res != SZ_OK), then (*startOffset) is unchend or it's expected start offset of stream with error
st.startOffset = (UInt64)*startOffset;
RINOK(res)
// we must store (st) object to array, or we must free (st) local object.
if (res != SZ_OK)
{
Xz_Free(&st, alloc);
return res;
}
if (p->num == p->numAllocated)
{
const size_t newNum = p->num + p->num / 4 + 1;
void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
if (!data)
{
Xz_Free(&st, alloc);
return SZ_ERROR_MEM;
}
p->numAllocated = newNum;
if (p->num != 0)
memcpy(data, p->streams, p->num * sizeof(CXzStream));
ISzAlloc_Free(alloc, p->streams);
p->streams = (CXzStream *)data;
}
// we use direct copying of raw data from local variable (st) to object in array.
// so we don't need to call Xz_Free(&st, alloc) after copying and after p->num++
p->streams[p->num++] = st;
if (*startOffset == 0)
break;
RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset))
return SZ_OK;
// seek operation is optional:
// RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset))
if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
return SZ_ERROR_PROGRESS;
}
return SZ_OK;
}

View file

@ -1245,8 +1245,6 @@ $O/Sha512.o: ../../../../C/Sha512.c
$(CC) $(CFLAGS) $<
$O/Sha512Opt.o: ../../../../C/Sha512Opt.c
$(CC) $(CFLAGS) $<
$O/Sort.o: ../../../../C/Sort.c
$(CC) $(CFLAGS) $<
$O/SwapBytes.o: ../../../../C/SwapBytes.c
$(CC) $(CFLAGS) $<
$O/Xxh64.o: ../../../../C/Xxh64.c
@ -1285,6 +1283,8 @@ $O/Sha1Opt.o: ../../../../Asm/x86/Sha1Opt.asm
$(MY_ASM) $(AFLAGS) $<
$O/Sha256Opt.o: ../../../../Asm/x86/Sha256Opt.asm
$(MY_ASM) $(AFLAGS) $<
$O/Sort.o: ../../../../Asm/x86/Sort.asm
$(MY_ASM) $(AFLAGS) $<
ifndef USE_JWASM
USE_X86_ASM_AES=1
@ -1299,6 +1299,8 @@ $O/Sha1Opt.o: ../../../../C/Sha1Opt.c
$(CC) $(CFLAGS) $<
$O/Sha256Opt.o: ../../../../C/Sha256Opt.c
$(CC) $(CFLAGS) $<
$O/Sort.o: ../../../../C/Sort.c
$(CC) $(CFLAGS) $<
endif

View file

@ -59,6 +59,7 @@ struct CCompressionMethodMode
bool NumThreads_WasForced;
bool MultiThreadMixer;
UInt32 NumThreads;
UInt32 NumThreadGroups;
#endif
UString Password; // _Wipe
@ -74,6 +75,7 @@ struct CCompressionMethodMode
, NumThreads_WasForced(false)
, MultiThreadMixer(true)
, NumThreads(1)
, NumThreadGroups(0)
#endif
, MemoryUsageLimit((UInt64)1 << 30)
{}

View file

@ -111,8 +111,8 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode)
}
}
const UInt64 kSolidBytes_Min = (1 << 24);
const UInt64 kSolidBytes_Max = ((UInt64)1 << 32);
const UInt64 kSolidBytes_Min = 1 << 24;
const UInt64 kSolidBytes_Max = (UInt64)1 << 32; // for non-LZMA2 methods
bool needSolid = false;
@ -122,22 +122,24 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode)
SetGlobalLevelTo(oneMethodInfo);
#ifndef Z7_ST
#ifndef Z7_ST
const bool numThreads_WasSpecifiedInMethod = (oneMethodInfo.Get_NumThreads() >= 0);
if (!numThreads_WasSpecifiedInMethod)
{
// here we set the (NCoderPropID::kNumThreads) property in each method, only if there is no such property already
CMultiMethodProps::SetMethodThreadsTo_IfNotFinded(oneMethodInfo, methodMode.NumThreads);
}
#endif
if (methodMode.NumThreadGroups > 1)
CMultiMethodProps::Set_Method_NumThreadGroups_IfNotFinded(oneMethodInfo, methodMode.NumThreadGroups);
#endif
CMethodFull &methodFull = methodMode.Methods.AddNew();
RINOK(PropsMethod_To_FullMethod(methodFull, oneMethodInfo))
#ifndef Z7_ST
#ifndef Z7_ST
methodFull.Set_NumThreads = true;
methodFull.NumThreads = methodMode.NumThreads;
#endif
#endif
if (methodFull.Id != k_Copy)
needSolid = true;
@ -217,19 +219,18 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode)
// here we get real chunkSize
cs = oneMethodInfo.Get_Xz_BlockSize();
if (dicSize > cs)
dicSize = cs;
dicSize = cs;
const UInt64 kSolidBytes_Lzma2_Max = ((UInt64)1 << 34);
const UInt64 kSolidBytes_Lzma2_Max = (UInt64)1 << 34;
if (numSolidBytes > kSolidBytes_Lzma2_Max)
numSolidBytes = kSolidBytes_Lzma2_Max;
numSolidBytes = kSolidBytes_Lzma2_Max;
methodFull.Set_NumThreads = false; // we don't use ICompressSetCoderMt::SetNumberOfThreads() for LZMA2 encoder
#ifndef Z7_ST
if (!numThreads_WasSpecifiedInMethod
&& !methodMode.NumThreads_WasForced
&& methodMode.MemoryUsageLimit_WasSet
)
&& methodMode.MemoryUsageLimit_WasSet)
{
const UInt32 lzmaThreads = oneMethodInfo.Get_Lzma_NumThreads();
const UInt32 numBlockThreads_Original = methodMode.NumThreads / lzmaThreads;
@ -273,14 +274,14 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode)
{
numSolidBytes = (UInt64)dicSize << 7;
if (numSolidBytes > kSolidBytes_Max)
numSolidBytes = kSolidBytes_Max;
numSolidBytes = kSolidBytes_Max;
}
if (_numSolidBytesDefined)
continue;
if (numSolidBytes < kSolidBytes_Min)
numSolidBytes = kSolidBytes_Min;
numSolidBytes = kSolidBytes_Min;
_numSolidBytes = numSolidBytes;
_numSolidBytesDefined = true;
}
@ -704,6 +705,9 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
methodMode.NumThreads = numThreads;
methodMode.NumThreads_WasForced = _numThreads_WasForced;
methodMode.MultiThreadMixer = _useMultiThreadMixer;
#ifdef _WIN32
methodMode.NumThreadGroups = _numThreadGroups; // _change it
#endif
// headerMethod.NumThreads = 1;
headerMethod.MultiThreadMixer = _useMultiThreadMixer;
}

View file

@ -325,7 +325,7 @@ HRESULT CHandler::ParseLongNames(IInStream *stream)
{
unsigned i;
for (i = 0; i < _items.Size(); i++)
if (_items[i].Name == "//")
if (_items[i].Name.IsEqualTo("//"))
break;
if (i == _items.Size())
return S_OK;
@ -378,7 +378,7 @@ void CHandler::ChangeDuplicateNames()
if (item.Name[0] == '/')
continue;
CItem &prev = _items[i - 1];
if (item.Name == prev.Name)
if (item.Name.IsEqualTo(prev.Name))
{
if (prev.SameNameIndex < 0)
prev.SameNameIndex = 0;
@ -448,9 +448,9 @@ static UInt32 Get32(const Byte *p, unsigned be) { if (be) return GetBe32(p); ret
HRESULT CHandler::ParseLibSymbols(IInStream *stream, unsigned fileIndex)
{
CItem &item = _items[fileIndex];
if (item.Name != "/" &&
item.Name != "__.SYMDEF" &&
item.Name != "__.SYMDEF SORTED")
if (!item.Name.IsEqualTo("/") &&
!item.Name.IsEqualTo("__.SYMDEF") &&
!item.Name.IsEqualTo("__.SYMDEF SORTED"))
return S_OK;
if (item.Size > ((UInt32)1 << 30) ||
item.Size < 4)
@ -462,7 +462,7 @@ HRESULT CHandler::ParseLibSymbols(IInStream *stream, unsigned fileIndex)
size_t pos = 0;
if (item.Name != "/")
if (!item.Name.IsEqualTo("/"))
{
// "__.SYMDEF" parsing (BSD)
unsigned be;
@ -603,7 +603,7 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream,
if (_longNames_FileIndex >= 0)
_items.Delete((unsigned)_longNames_FileIndex);
if (!_items.IsEmpty() && _items[0].Name == "debian-binary")
if (!_items.IsEmpty() && _items[0].Name.IsEqualTo("debian-binary"))
{
_type = kType_Deb;
_items.DeleteFrontal(1);

View file

@ -427,9 +427,13 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
}
CMethodProps props2 = _props;
#ifndef Z7_ST
#ifndef Z7_ST
props2.AddProp_NumThreads(_props._numThreads);
#endif
#ifdef _WIN32
if (_props._numThreadGroups > 1)
props2.AddProp32(NCoderPropID::kNumThreadGroups, _props._numThreadGroups);
#endif
#endif
return UpdateArchive(size, outStream, props2, updateCallback);
}

View file

@ -68,7 +68,7 @@ namespace NItemType
static const Byte kRootStorage = 5;
}
static const UInt32 kNameSizeMax = 64;
static const unsigned kNameSizeMax = 64;
struct CItem
{
@ -98,30 +98,30 @@ struct CRef
class CDatabase
{
UInt32 NumSectorsInMiniStream;
CObjArray<UInt32> MiniSids;
HRESULT AddNode(int parent, UInt32 did);
public:
CObjArray<UInt32> Fat;
UInt32 FatSize;
CObjArray<UInt32> Mat;
UInt32 MatSize;
CObjectVector<CItem> Items;
CRecordVector<CRef> Refs;
private:
UInt32 NumSectorsInMiniStream;
public:
UInt32 MatSize;
UInt32 FatSize;
UInt32 LongStreamMinSize;
unsigned SectorSizeBits;
unsigned MiniSectorSizeBits;
Int32 MainSubfile;
EType Type;
UInt64 PhySize;
UInt64 PhySize_Aligned;
EType Type;
bool IsNotArcType() const
{
@ -148,14 +148,14 @@ public:
UInt64 GetItemPackSize(UInt64 size) const
{
UInt64 mask = ((UInt64)1 << (IsLargeStream(size) ? SectorSizeBits : MiniSectorSizeBits)) - 1;
const UInt64 mask = ((UInt32)1 << (IsLargeStream(size) ? SectorSizeBits : MiniSectorSizeBits)) - 1;
return (size + mask) & ~mask;
}
bool GetMiniCluster(UInt32 sid, UInt64 &res) const
{
unsigned subBits = SectorSizeBits - MiniSectorSizeBits;
UInt32 fid = sid >> subBits;
const unsigned subBits = SectorSizeBits - MiniSectorSizeBits;
const UInt32 fid = sid >> subBits;
if (fid >= NumSectorsInMiniStream)
return false;
res = (((UInt64)MiniSids[fid] + 1) << subBits) + (sid & ((1 << subBits) - 1));
@ -177,7 +177,7 @@ HRESULT CDatabase::ReadSector(IInStream *inStream, Byte *buf, unsigned sectorSiz
HRESULT CDatabase::ReadIDs(IInStream *inStream, Byte *buf, unsigned sectorSizeBits, UInt32 sid, UInt32 *dest)
{
RINOK(ReadSector(inStream, buf, sectorSizeBits, sid))
UInt32 sectorSize = (UInt32)1 << sectorSizeBits;
const UInt32 sectorSize = (UInt32)1 << sectorSizeBits;
for (UInt32 t = 0; t < sectorSize; t += 4)
*dest++ = Get32(buf + t);
return S_OK;
@ -373,7 +373,7 @@ UString CDatabase::GetItemPath(UInt32 index) const
HRESULT CDatabase::Update_PhySize_WithItem(unsigned index)
{
const CItem &item = Items[index];
bool isLargeStream = (index == 0 || IsLargeStream(item.Size));
const bool isLargeStream = (index == 0 || IsLargeStream(item.Size));
if (!isLargeStream)
return S_OK;
const unsigned bsLog = isLargeStream ? SectorSizeBits : MiniSectorSizeBits;
@ -527,6 +527,10 @@ HRESULT CDatabase::Open(IInStream *inStream)
{
CItem item;
item.Parse(sect + i, mode64bit);
// we use (item.Size) check here.
// so we don't need additional overflow checks for (item.Size +) in another code
if (item.Size >= ((UInt64)1 << 63))
return S_FALSE;
Items.Add(item);
}
sid = Fat[sid];
@ -767,11 +771,8 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
UInt64 totalPackSize;
totalSize = totalPackSize = 0;
NCompress::CCopyCoder *copyCoderSpec = new NCompress::CCopyCoder();
CMyComPtr<ICompressCoder> copyCoder = copyCoderSpec;
CLocalProgress *lps = new CLocalProgress;
CMyComPtr<ICompressProgressInfo> progress = lps;
CMyComPtr2_Create<ICompressCoder, NCompress::CCopyCoder> copyCoder;
CMyComPtr2_Create<ICompressProgressInfo, CLocalProgress> lps;
lps->Init(extractCallback, false);
for (i = 0; i < numItems; i++)
@ -781,7 +782,8 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
RINOK(lps->SetCur())
const UInt32 index = allFilesMode ? i : indices[i];
const CItem &item = _db.Items[_db.Refs[index].Did];
Int32 res;
{
CMyComPtr<ISequentialOutStream> outStream;
const Int32 askMode = testMode ?
NExtract::NAskMode::kTest :
@ -801,7 +803,7 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
if (!testMode && !outStream)
continue;
RINOK(extractCallback->PrepareOperation(askMode))
Int32 res = NExtract::NOperationResult::kDataError;
res = NExtract::NOperationResult::kDataError;
CMyComPtr<ISequentialInStream> inStream;
HRESULT hres = GetStream(index, &inStream);
if (hres == S_FALSE)
@ -813,12 +815,12 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
RINOK(hres)
if (inStream)
{
RINOK(copyCoder->Code(inStream, outStream, NULL, NULL, progress))
if (copyCoderSpec->TotalSize == item.Size)
RINOK(copyCoder.Interface()->Code(inStream, outStream, NULL, NULL, lps))
if (copyCoder->TotalSize == item.Size)
res = NExtract::NOperationResult::kOK;
}
}
outStream.Release();
}
RINOK(extractCallback->SetOperationResult(res))
}
return S_OK;

View file

@ -4,8 +4,6 @@
#include "../../../Common/StringToInt.h"
#include "../Common/ParseProperties.h"
#include "HandlerOut.h"
namespace NArchive {
@ -82,6 +80,7 @@ bool ParseSizeString(const wchar_t *s, const PROPVARIANT &prop, UInt64 percentsB
return true;
}
bool CCommonMethodProps::SetCommonProperty(const UString &name, const PROPVARIANT &value, HRESULT &hres)
{
hres = S_OK;
@ -151,6 +150,11 @@ void CMultiMethodProps::SetMethodThreadsTo_Replace(CMethodProps &oneMethodInfo,
SetMethodProp32_Replace(oneMethodInfo, NCoderPropID::kNumThreads, numThreads);
}
void CMultiMethodProps::Set_Method_NumThreadGroups_IfNotFinded(CMethodProps &oneMethodInfo, UInt32 numThreadGroups)
{
SetMethodProp32(oneMethodInfo, NCoderPropID::kNumThreadGroups, numThreadGroups);
}
#endif // Z7_ST

View file

@ -17,11 +17,21 @@ protected:
void InitCommon()
{
// _Write_MTime = true;
#ifndef Z7_ST
_numProcessors = _numThreads = NWindows::NSystem::GetNumberOfProcessors();
_numThreads_WasForced = false;
#endif
{
#ifndef Z7_ST
_numThreads_WasForced = false;
UInt32 numThreads;
#ifdef _WIN32
NWindows::NSystem::CProcessAffinity aff;
numThreads = aff.Load_and_GetNumberOfThreads();
_numThreadGroups = aff.IsGroupMode ? aff.Groups.GroupSizes.Size() : 0;
#else
numThreads = NWindows::NSystem::GetNumberOfProcessors();
#endif // _WIN32
_numProcessors = _numThreads = numThreads;
#endif // Z7_ST
}
size_t memAvail = (size_t)sizeof(size_t) << 28;
_memAvail = memAvail;
_memUsage_Compress = memAvail;
@ -46,11 +56,14 @@ protected:
}
public:
#ifndef Z7_ST
#ifndef Z7_ST
UInt32 _numThreads;
UInt32 _numProcessors;
#ifdef _WIN32
UInt32 _numThreadGroups;
#endif
bool _numThreads_WasForced;
#endif
#endif
bool _memUsage_WasSet;
UInt64 _memUsage_Compress;
@ -80,10 +93,12 @@ public:
void SetGlobalLevelTo(COneMethodInfo &oneMethodInfo) const;
#ifndef Z7_ST
#ifndef Z7_ST
static void SetMethodThreadsTo_IfNotFinded(CMethodProps &props, UInt32 numThreads);
static void SetMethodThreadsTo_Replace(CMethodProps &props, UInt32 numThreads);
#endif
static void Set_Method_NumThreadGroups_IfNotFinded(CMethodProps &props, UInt32 numThreadGroups);
#endif
unsigned GetNumEmptyMethods() const

View file

@ -47,6 +47,25 @@ UString GetOsPath_Remove_TailSlash(const UString &name)
}
#if WCHAR_PATH_SEPARATOR != L'/'
void ReplaceToWinSlashes(UString &name, bool useBackslashReplacement)
{
// name.Replace(kUnixPathSepar, kOsPathSepar);
const unsigned len = name.Len();
for (unsigned i = 0; i < len; i++)
{
wchar_t c = name[i];
if (c == L'/')
c = WCHAR_PATH_SEPARATOR;
else if (useBackslashReplacement && c == L'\\')
c = WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT; // WSL scheme
else
continue;
name.ReplaceOneCharAtPos(i, c);
}
}
#endif
void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool
#if WCHAR_PATH_SEPARATOR != L'/'
useBackslashReplacement
@ -57,21 +76,7 @@ void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool
return;
#if WCHAR_PATH_SEPARATOR != L'/'
{
// name.Replace(kUnixPathSepar, kOsPathSepar);
const unsigned len = name.Len();
for (unsigned i = 0; i < len; i++)
{
wchar_t c = name[i];
if (c == L'/')
c = WCHAR_PATH_SEPARATOR;
else if (useBackslashReplacement && c == L'\\')
c = WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT; // WSL scheme
else
continue;
name.ReplaceOneCharAtPos(i, c);
}
}
ReplaceToWinSlashes(name, useBackslashReplacement);
#endif
if (name.Back() == kOsPathSepar)

View file

@ -13,6 +13,9 @@ void ReplaceSlashes_OsToUnix(UString &name);
UString GetOsPath(const UString &name);
UString GetOsPath_Remove_TailSlash(const UString &name);
#if WCHAR_PATH_SEPARATOR != L'/'
void ReplaceToWinSlashes(UString &name, bool useBackslashReplacement);
#endif
void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool useBackslashReplacement = false);
void NormalizeSlashes_in_FileName_for_OsPath(wchar_t *s, unsigned len);
void NormalizeSlashes_in_FileName_for_OsPath(UString &name);

View file

@ -437,7 +437,14 @@ HRESULT CInArchive::GetNextItem()
return S_OK;
/* v23.02: we have disabled rDevMinor check because real file
from Apple contains rDevMinor==255 by some unknown reason */
from Apple contains rDevMinor==255 by some unknown reason
cpio 2.13 and older versions: it copies stat::st_rdev to archive.
and stat::st_rdev can be non-zero for some old linux/filesystems cases for regular files.
cpio 2.14 (2023) copies st_rdev to archive only if (S_ISBLK (st->st_mode) || S_ISCHR (st->st_mode))
v25.00: we have disabled RDevMajor check here to support some rare case created by cpio 2.13- with old linux.
But we still keep full check in IsArc_Cpio() to reduce false cpio detection cases.
*/
#if 0 // 0 : to disable check to support some old linux cpio archives.
if (item.RDevMajor != 0
// || item.RDevMinor != 0
)
@ -446,6 +453,7 @@ HRESULT CInArchive::GetNextItem()
!MY_LIN_S_ISBLK(item.Mode))
return S_OK;
}
#endif
// Size must be 0 for FIFOs and directories
if (item.IsDir() || MY_LIN_S_ISFIFO(item.Mode))
@ -873,17 +881,13 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val
{
case kpidPath:
{
UString res;
bool needConvert = true;
#ifdef _WIN32
// if (
ConvertUTF8ToUnicode(item.Name, res);
// )
needConvert = false;
#endif
if (needConvert)
res = MultiByteToUnicodeString(item.Name, CP_OEMCP);
prop = NItemName::GetOsPath(res);
#ifdef _WIN32
UString u;
ConvertUTF8ToUnicode(item.Name, u);
#else
const UString u = MultiByteToUnicodeString(item.Name, CP_OEMCP);
#endif
prop = NItemName::GetOsPath(u);
break;
}
case kpidIsDir: prop = item.IsDir(); break;
@ -921,16 +925,12 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val
s.SetFrom_CalcLen((const char *)(const void *)(const Byte *)item.Data, (unsigned)item.Data.Size());
if (s.Len() == item.Data.Size())
{
#ifdef _WIN32
UString u;
bool needConvert = true;
#ifdef _WIN32
// if (
ConvertUTF8ToUnicode(item.Name, u);
// )
needConvert = false;
#endif
if (needConvert)
u = MultiByteToUnicodeString(s, CP_OEMCP);
ConvertUTF8ToUnicode(item.Name, u);
#else
const UString u = MultiByteToUnicodeString(s, CP_OEMCP);
#endif
prop = u;
}
}

View file

@ -444,7 +444,7 @@ const char *Find_Apple_FS_Ext(const AString &name)
{
const CAppleName &a = k_Names[i];
if (a.Ext)
if (name == a.AppleName)
if (name.IsEqualTo(a.AppleName))
return a.Ext;
}
return NULL;
@ -784,7 +784,7 @@ static const CXmlItem *FindKeyPair(const CXmlItem &item, const char *key, const
for (unsigned i = 0; i + 1 < item.SubItems.Size(); i++)
{
const CXmlItem &si = item.SubItems[i];
if (si.IsTagged("key") && si.GetSubString() == key)
if (si.IsTagged("key") && si.GetSubString().IsEqualTo(key))
{
const CXmlItem *si_1 = &item.SubItems[i + 1];
if (si_1->IsTagged(nextTag))
@ -1251,7 +1251,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *openArchiveCall
#endif
}
if (xml.Root.Name != "plist")
if (!xml.Root.Name.IsEqualTo("plist"))
return S_FALSE;
const CXmlItem *dictItem = xml.Root.FindSubTag_GetPtr("dict");

File diff suppressed because it is too large Load diff

View file

@ -4005,7 +4005,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh)
AddParam_Var(params[0]);
AString temp;
ReadString2(temp, params[1]);
if (temp != "$TEMP")
if (!temp.IsEqualTo("$TEMP"))
SpaceQuStr(temp);
break;
}
@ -4410,7 +4410,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh)
}
else
{
if (func == "DllUnregisterServer")
if (func.IsEqualTo("DllUnregisterServer"))
{
s += "UnRegDLL";
printFunc = false;
@ -4418,7 +4418,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh)
else
{
s += "RegDLL";
if (func == "DllRegisterServer")
if (func.IsEqualTo("DllRegisterServer"))
printFunc = false;
}
AddParam(params[0]);
@ -4886,7 +4886,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh)
AddParam_Var(params[1]);
AddParam(params[2]);
AddParam(params[4]);
// if (params[2] == "0") AddCommentAndString("GetWinVer");
// if (params[2].IsEqualTo("0")) AddCommentAndString("GetWinVer");
}
else
s += "GetOsInfo";

View file

@ -1907,7 +1907,7 @@ HRESULT CDatabase::Open()
for (i = 0; i < SecurityAttrs.Size(); i++)
{
const CAttr &attr = SecurityAttrs[i];
if (attr.Name == L"$SII")
if (attr.Name.IsEqualTo("$SII"))
{
if (attr.Type == ATTR_TYPE_INDEX_ROOT)
{

View file

@ -2638,7 +2638,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
{
const CSection &sect = _sections[i];
if (IsOpt())
if (_parseResources && sect.Name == ".rsrc")
if (_parseResources && sect.Name.IsEqualTo(".rsrc"))
{
// 20.01: we try to parse only first copy of .rsrc section.
_parseResources = false;
@ -2727,7 +2727,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback)
for (i = 0; i < _mixItems.Size(); i++)
{
const CMixItem &mixItem = _mixItems[i];
if (mixItem.StringIndex < 0 && mixItem.ResourceIndex < 0 && _sections[mixItem.SectionIndex].Name == "_winzip_")
if (mixItem.StringIndex < 0 && mixItem.ResourceIndex < 0 && _sections[mixItem.SectionIndex].Name.IsEqualTo("_winzip_"))
{
_mainSubfile = (Int32)(int)i;
break;

View file

@ -393,6 +393,7 @@ void CItem::Link_to_Prop(unsigned linkType, NWindows::NCOM::CPropVariant &prop)
if (!FindExtra_Link(link))
return;
bool isWindows = (HostOS == kHost_Windows);
if (link.Type != linkType)
{
if (linkType != NLinkType::kUnixSymLink)
@ -400,8 +401,11 @@ void CItem::Link_to_Prop(unsigned linkType, NWindows::NCOM::CPropVariant &prop)
switch ((unsigned)link.Type)
{
case NLinkType::kUnixSymLink:
isWindows = false;
break;
case NLinkType::kWinSymLink:
case NLinkType::kWinJunction:
isWindows = true;
break;
default: return;
}
@ -409,10 +413,15 @@ void CItem::Link_to_Prop(unsigned linkType, NWindows::NCOM::CPropVariant &prop)
AString s;
s.SetFrom_CalcLen((const char *)(Extra + link.NameOffset), link.NameLen);
UString unicode;
ConvertUTF8ToUnicode(s, unicode);
prop = NItemName::GetOsPath(unicode);
// rar5.0 used '\\' separator for windows symlinks and \??\ prefix for abs paths.
// rar5.1+ uses '/' separator for windows symlinks and /??/ prefix for abs paths.
// v25.00: we convert Windows slashes to Linux slashes:
if (isWindows)
unicode.Replace(L'\\', L'/');
prop = unicode;
// prop = NItemName::GetOsPath(unicode);
}
bool CItem::GetAltStreamName(AString &name) const

View file

@ -286,10 +286,10 @@ struct CItem
bool IsService() const { return RecordType == NHeaderType::kService; }
bool Is_STM() const { return IsService() && Name == "STM"; }
bool Is_CMT() const { return IsService() && Name == "CMT"; }
bool Is_ACL() const { return IsService() && Name == "ACL"; }
// bool Is_QO() const { return IsService() && Name == "QO"; }
bool Is_STM() const { return IsService() && Name.IsEqualTo("STM"); }
bool Is_CMT() const { return IsService() && Name.IsEqualTo("CMT"); }
bool Is_ACL() const { return IsService() && Name.IsEqualTo("ACL"); }
// bool Is_QO() const { return IsService() && Name.IsEqualTo("QO"); }
int FindExtra(unsigned extraID, unsigned &recordDataSize) const;
void PrintInfo(AString &s) const;

View file

@ -435,13 +435,13 @@ bool CInArchive::ReadHeaderReal(const Byte *p, unsigned size, CItem &item)
size -= sizeof(item.Salt);
p += sizeof(item.Salt);
}
if (item.Name == "ACL" && size == 0)
if (item.Name.IsEqualTo("ACL") && size == 0)
{
item.IsAltStream = true;
item.Name.Empty();
item.UnicodeName.SetFromAscii(".ACL");
}
else if (item.Name == "STM" && size != 0 && (size & 1) == 0)
else if (item.Name.IsEqualTo("STM") && size != 0 && (size & 1) == 0)
{
item.IsAltStream = true;
item.Name.Empty();

View file

@ -330,11 +330,11 @@ void CHandler::AddSubFileExtension(AString &res) const
if (!_compressor.IsEmpty())
{
s = _compressor;
if (_compressor == "bzip2")
if (_compressor.IsEqualTo("bzip2"))
s = "bz2";
else if (_compressor == "gzip")
else if (_compressor.IsEqualTo("gzip"))
s = "gz";
else if (_compressor == "zstd")
else if (_compressor.IsEqualTo("zstd"))
s = "zst";
}
else

View file

@ -202,9 +202,12 @@ struct CExtentInfo
// PartitionUUID
// DeviceIdentifier
bool IsType_ZERO() const { return Type == "ZERO"; }
// bool IsType_FLAT() const { return Type == "FLAT"; }
bool IsType_Flat() const { return Type == "FLAT" || Type == "VMFS" || Type == "VMFSRAW"; }
bool IsType_ZERO() const { return Type.IsEqualTo("ZERO"); }
// bool IsType_FLAT() const { return Type.IsEqualTo("FLAT"); }
bool IsType_Flat() const
{ return Type.IsEqualTo("FLAT")
|| Type.IsEqualTo("VMFS")
|| Type.IsEqualTo("VMFSRAW"); }
bool Parse(const char *s);
};

View file

@ -1814,7 +1814,7 @@ bool CWimXml::Parse()
if (!Xml.Parse(utf))
return false;
if (Xml.Root.Name != "WIM")
if (!Xml.Root.Name.IsEqualTo("WIM"))
return false;
FOR_VECTOR (i, Xml.Root.SubItems)

View file

@ -266,7 +266,7 @@ struct CFile
bool IsCopyMethod() const
{
return Method.IsEmpty() || Method == "octet-stream";
return Method.IsEmpty() || Method.IsEqualTo("octet-stream");
}
void UpdateTotalPackSize(UInt64 &totalSize) const
@ -416,7 +416,7 @@ static bool AddItem(const CXmlItem &item, CObjectVector<CFile> &files, int paren
return true;
if (level >= 1024)
return false;
if (item.Name == "file")
if (item.Name.IsEqualTo("file"))
{
CFile file(parent);
parent = (int)files.Size();
@ -435,19 +435,19 @@ static bool AddItem(const CXmlItem &item, CObjectVector<CFile> &files, int paren
{
file.Type = typeItem->GetSubString();
// file.LinkFrom = typeItem->GetPropVal("link");
if (file.Type == "directory")
if (file.Type.IsEqualTo("directory"))
file.IsDir = true;
else
{
// file.IsDir = false;
/*
else if (file.Type == "file")
else if (file.Type.IsEqualTo("file"))
{}
else if (file.Type == "hardlink")
else if (file.Type.IsEqualTo("hardlink"))
{}
else
*/
if (file.Type == "symlink")
if (file.Type.IsEqualTo("symlink"))
file.Is_SymLink = true;
// file.IsDir = false;
}
@ -489,7 +489,7 @@ static bool AddItem(const CXmlItem &item, CObjectVector<CFile> &files, int paren
if (s.IsPrefixedBy(xx))
{
s.DeleteFrontal(xx.Len());
if (s == "gzip")
if (s.IsEqualTo("gzip"))
s = METHOD_NAME_ZLIB;
}
}
@ -692,12 +692,13 @@ HRESULT CHandler::Open2(IInStream *stream)
file.UpdateTotalPackSize(totalPackSize);
if (file.Parent == -1)
{
if (file.Name == "Payload" || file.Name == "Content")
if (file.Name.IsEqualTo("Payload") ||
file.Name.IsEqualTo("Content"))
{
_mainSubfile = (Int32)(int)i;
numMainFiles++;
}
else if (file.Name == "PackageInfo")
else if (file.Name.IsEqualTo("PackageInfo"))
_is_pkg = true;
}
}
@ -1210,9 +1211,9 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
else
opRes = NExtract::NOperationResult::kUnsupportedMethod;
}
else if (item.Method == METHOD_NAME_ZLIB)
else if (item.Method.IsEqualTo(METHOD_NAME_ZLIB))
coder = zlibCoder;
else if (item.Method == "bzip2")
else if (item.Method.IsEqualTo("bzip2"))
coder = bzip2Coder;
else
opRes = NExtract::NOperationResult::kUnsupportedMethod;

View file

@ -446,7 +446,7 @@ void COpenCallbackWrap::Init(IArchiveOpenCallback *callback)
struct CXzsCPP
{
CXzs p;
CXzsCPP() { Xzs_Construct(&p); }
CXzsCPP() { Xzs_CONSTRUCT(&p) }
~CXzsCPP() { Xzs_Free(&p, &g_Alloc); }
};
@ -536,6 +536,9 @@ HRESULT CHandler::Open2(IInStream *inStream, /* UInt32 flags, */ IArchiveOpenCal
if (res2 == SZ_ERROR_ARCHIVE)
return S_FALSE;
// what codes are possible here ?
// ?? res2 == SZ_ERROR_MEM : is possible here
// ?? res2 == SZ_ERROR_UNSUPPORTED : is possible here
}
else if (!isIndex)
{
@ -1159,6 +1162,13 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
*/
#ifndef Z7_ST
#ifdef _WIN32
// we don't use chunk multithreading inside lzma2 stream.
// so we don't set xzProps.lzma2Props.numThreadGroups.
if (_numThreadGroups > 1)
xzProps.numThreadGroups = _numThreadGroups;
#endif
UInt32 numThreads = _numThreads;
@ -1183,6 +1193,8 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
CMultiMethodProps::SetMethodThreadsTo_IfNotFinded(oneMethodInfo, numThreads);
}
// printf("\n====== GetProcessGroupAffinity : \n");
UInt64 cs = _numSolidBytes;
if (cs != XZ_PROPS_BLOCK_SIZE_AUTO)
oneMethodInfo.AddProp_BlockSize2(cs);

View file

@ -250,13 +250,26 @@ struct CThreadInfo
HRESULT CreateEvents()
{
WRes wres = CompressEvent.CreateIfNotCreated_Reset();
const WRes wres = CompressEvent.CreateIfNotCreated_Reset();
return HRESULT_FROM_WIN32(wres);
}
HRESULT CreateThread()
// (group < 0) means no_group.
HRESULT CreateThread_with_group(
#ifdef _WIN32
int group
#endif
)
{
WRes wres = Thread.Create(CoderThread, this);
// tested in win10: If thread is created by another thread,
// child thread probably uses same group as parent thread.
// So we don't need to send (group) to encoder in created thread.
const WRes wres =
#ifdef _WIN32
group >= 0 ?
Thread.Create_With_Group(CoderThread, this, (unsigned)group) :
#endif
Thread.Create(CoderThread, this);
return HRESULT_FROM_WIN32(wres);
}
@ -450,8 +463,12 @@ static HRESULT UpdateItemOldData(
if (ui.NewProps)
{
if (item.HasDescriptor())
return E_NOTIMPL;
{
// we know compressed / uncompressed sizes and crc.
// so we remove descriptor here
item.Flags = (UInt16)(item.Flags & ~NFileHeader::NFlags::kDescriptorUsedMask);
// return E_NOTIMPL;
}
// we keep ExternalAttrib and some another properties from old archive
// item.ExternalAttrib = ui.Attrib;
// if we don't change Comment, we keep Comment from OldProperties
@ -1000,6 +1017,9 @@ static HRESULT Update2(
#ifndef Z7_ST
UInt32 numThreads = options._numThreads;
#ifdef _WIN32
const UInt32 numThreadGroups = options._numThreadGroups;
#endif
UInt32 numZipThreads_limit = numThreads;
if (numZipThreads_limit > numFilesToCompress)
@ -1014,12 +1034,10 @@ static HRESULT Update2(
}
{
// we reduce number of threads for 32-bit to reduce memory usege to 256 MB
const UInt32 kNumMaxThreads =
#ifdef _WIN32
64; // _WIN32 supports only 64 threads in one group. So no need for more threads here
#else
128;
#endif
// _WIN32 (64-bit) supports only 64 threads in one group.
8 << (sizeof(size_t) / 2); // 32 threads for 32-bit : 128 threads for 64-bit
if (numThreads > kNumMaxThreads)
numThreads = kNumMaxThreads;
}
@ -1264,7 +1282,14 @@ static HRESULT Update2(
threadInfo.Progress = threadInfo.ProgressSpec;
threadInfo.ProgressSpec->Init(&mtCompressProgressMixer, i);
threadInfo.MtSem = &mtSem;
RINOK(threadInfo.CreateThread())
const HRESULT hres =
threadInfo.CreateThread_with_group(
#ifdef _WIN32
(numThreadGroups > 1 && numThreads > 1) ?
(int)(i % numThreadGroups) : -1
#endif
);
RINOK(hres)
}
}

View file

@ -5,6 +5,7 @@ CFLAGS = $(CFLAGS) -DZ7_ZIP_LZFSE_DISABLE
# CONSOLE_VARIANT_FLAGS=-DZ7_PROG_VARIANT_A
# ZIP_FLAGS=-DZ7_ZIP_LZFSE_DISABLE
# USE_C_SORT=1
# USE_C_AES = 1
# USE_C_SHA = 1
# USE_C_LZFINDOPT = 1
@ -221,7 +222,6 @@ C_OBJS = \
$O\Ppmd8.obj \
$O\Ppmd8Dec.obj \
$O\Ppmd8Enc.obj \
$O\Sort.obj \
$O\SwapBytes.obj \
$O\Threads.obj \
$O\Xxh64.obj \
@ -240,5 +240,6 @@ C_OBJS = \
!include "../../LzmaDec.mak"
!include "../../Sha1.mak"
!include "../../Sha256.mak"
!include "../../Sort.mak"
!include "../../7zip.mak"

View file

@ -148,7 +148,6 @@ C_OBJS = \
$O\LzmaEnc.obj \
$O\MtCoder.obj \
$O\MtDec.obj \
$O\Sort.obj \
$O\SwapBytes.obj \
$O\Threads.obj \
$O\Xz.obj \
@ -164,5 +163,6 @@ C_OBJS = \
!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../Sha256.mak"
!include "../../Sort.mak"
!include "../../7zip.mak"

View file

@ -135,7 +135,6 @@ C_OBJS = \
$O\Ppmd7.obj \
$O\Ppmd7Dec.obj \
$O\Ppmd7Enc.obj \
$O\Sort.obj \
$O\SwapBytes.obj \
$O\Threads.obj \
@ -144,5 +143,6 @@ C_OBJS = \
!include "../../LzFindOpt.mak"
!include "../../LzmaDec.mak"
!include "../../Sha256.mak"
!include "../../Sort.mak"
!include "../../7zip.mak"

View file

@ -291,7 +291,6 @@ C_OBJS = \
$O\Sha3.obj \
$O\Sha512.obj \
$O\Sha512Opt.obj \
$O\Sort.obj \
$O\SwapBytes.obj \
$O\Threads.obj \
$O\Xxh64.obj \
@ -308,3 +307,4 @@ C_OBJS = \
!include "../../LzmaDec.mak"
!include "../../Sha1.mak"
!include "../../Sha256.mak"
!include "../../Sort.mak"

View file

@ -229,7 +229,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE /* hPrevInstance */,
}
const FString tempDirPath = tempDir.GetPath();
// tempDirPath = L"M:\\1\\"; // to test low disk space
// tempDirPath = "M:\\1\\"; // to test low disk space
{
bool isCorrupt = false;
UString errorMessage;
@ -308,7 +308,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE /* hPrevInstance */,
{
if (appLaunched.IsEmpty())
{
appLaunched = L"setup.exe";
appLaunched = "setup.exe";
if (!NFind::DoesFileExist_FollowLink(us2fs(appLaunched)))
{
if (!assumeYes)

View file

@ -97,6 +97,16 @@ public:
size_t ReadBytesPart(Byte *buf, size_t size);
size_t ReadBytes(Byte *buf, size_t size);
const Byte *Lookahead(size_t &rem)
{
rem = (size_t)(_bufLim - _buf);
if (!rem)
{
ReadBlock();
rem = (size_t)(_bufLim - _buf);
}
return _buf;
}
size_t Skip(size_t size);
};

View file

@ -324,15 +324,22 @@ void CCoderProps::AddProp(const CProp &prop)
HRESULT CProps::SetCoderProps(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce) const
{
return SetCoderProps_DSReduce_Aff(scp, dataSizeReduce, NULL);
return SetCoderProps_DSReduce_Aff(scp, dataSizeReduce, NULL, NULL, NULL);
}
HRESULT CProps::SetCoderProps_DSReduce_Aff(
ICompressSetCoderProperties *scp,
const UInt64 *dataSizeReduce,
const UInt64 *affinity) const
const UInt64 *affinity,
const UInt32 *affinityGroup,
const UInt64 *affinityInGroup) const
{
CCoderProps coderProps(Props.Size() + (dataSizeReduce ? 1 : 0) + (affinity ? 1 : 0) );
CCoderProps coderProps(Props.Size()
+ (dataSizeReduce ? 1 : 0)
+ (affinity ? 1 : 0)
+ (affinityGroup ? 1 : 0)
+ (affinityInGroup ? 1 : 0)
);
FOR_VECTOR (i, Props)
coderProps.AddProp(Props[i]);
if (dataSizeReduce)
@ -349,6 +356,20 @@ HRESULT CProps::SetCoderProps_DSReduce_Aff(
prop.Value = *affinity;
coderProps.AddProp(prop);
}
if (affinityGroup)
{
CProp prop;
prop.Id = NCoderPropID::kThreadGroup;
prop.Value = *affinityGroup;
coderProps.AddProp(prop);
}
if (affinityInGroup)
{
CProp prop;
prop.Id = NCoderPropID::kAffinityInGroup;
prop.Value = *affinityInGroup;
coderProps.AddProp(prop);
}
return coderProps.SetProps(scp);
}
@ -409,6 +430,11 @@ static const CNameToPropID g_NameToPropID[] =
{ VT_UI4, "offset" },
{ VT_UI4, "zhb" }
/*
, { VT_UI4, "tgn" }, // kNumThreadGroups
, { VT_UI4, "tgi" }, // kThreadGroup
, { VT_UI8, "tga" }, // kAffinityInGroup
*/
/*
,
// { VT_UI4, "zhc" },
// { VT_UI4, "zhd" },

View file

@ -80,7 +80,11 @@ struct CProps
}
HRESULT SetCoderProps(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce = NULL) const;
HRESULT SetCoderProps_DSReduce_Aff(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce, const UInt64 *affinity) const;
HRESULT SetCoderProps_DSReduce_Aff(ICompressSetCoderProperties *scp,
const UInt64 *dataSizeReduce,
const UInt64 *affinity,
const UInt32 *affinityGroup,
const UInt64 *affinityInGroup) const;
};
class CMethodProps: public CProps

View file

@ -45,6 +45,7 @@ public:
HRESULT Flush() throw();
void FlushWithCheck();
Z7_FORCE_INLINE
void WriteByte(Byte b)
{
UInt32 pos = _pos;
@ -54,10 +55,34 @@ public:
if (pos == _limitPos)
FlushWithCheck();
}
void WriteBytes(const void *data, size_t size)
{
for (size_t i = 0; i < size; i++)
WriteByte(((const Byte *)data)[i]);
while (size)
{
UInt32 pos = _pos;
size_t cur = (size_t)(_limitPos - pos);
if (cur >= size)
cur = size;
size -= cur;
Byte *dest = _buf + pos;
pos += (UInt32)cur;
_pos = pos;
#if 0
memcpy(dest, data, cur);
data = (const void *)((const Byte *)data + cur);
#else
const Byte * const lim = (const Byte *)data + cur;
do
{
*dest++ = *(const Byte *)data;
data = (const void *)((const Byte *)data + 1);
}
while (data != lim);
#endif
if (pos == _limitPos)
FlushWithCheck();
}
}
Byte *GetOutBuffer(size_t &avail)

View file

@ -46,7 +46,7 @@ const UInt32 kBlockSizeStep = 100000;
const UInt32 kBlockSizeMax = kBlockSizeMultMax * kBlockSizeStep;
const unsigned kNumSelectorsBits = 15;
const UInt32 kNumSelectorsMax = (2 + (kBlockSizeMax / kGroupSize));
const unsigned kNumSelectorsMax = 2 + kBlockSizeMax / kGroupSize;
const unsigned kRleModeRepSize = 4;

File diff suppressed because it is too large Load diff

View file

@ -3,7 +3,6 @@
#ifndef ZIP7_INC_COMPRESS_BZIP2_ENCODER_H
#define ZIP7_INC_COMPRESS_BZIP2_ENCODER_H
#include "../../Common/Defs.h"
#include "../../Common/MyCom.h"
#ifndef Z7_ST
@ -23,80 +22,114 @@
namespace NCompress {
namespace NBZip2 {
class CMsbfEncoderTemp
const unsigned kNumPassesMax = 10;
struct CMsbfEncoderTemp
{
UInt32 _pos;
unsigned _bitPos;
Byte _curByte;
unsigned _bitPos; // 0 < _bitPos <= 8 : number of non-filled low bits in _curByte
unsigned _curByte; // low (_bitPos) bits are zeros
// high (8 - _bitPos) bits are filled
Byte *_buf;
public:
void SetStream(Byte *buf) { _buf = buf; }
Byte *GetStream() const { return _buf; }
Byte *_buf_base;
void SetStream(Byte *buf) { _buf_base = _buf = buf; }
Byte *GetStream() const { return _buf_base; }
void Init()
{
_pos = 0;
_bitPos = 8;
_curByte = 0;
_buf = _buf_base;
}
void Flush()
{
if (_bitPos < 8)
WriteBits(0, _bitPos);
}
// required condition: (value >> numBits) == 0
// numBits == 0 is allowed
void WriteBits(UInt32 value, unsigned numBits)
{
while (numBits > 0)
do
{
unsigned numNewBits = MyMin(numBits, _bitPos);
numBits -= numNewBits;
_curByte = (Byte)(_curByte << numNewBits);
UInt32 newBits = value >> numBits;
_curByte |= Byte(newBits);
value -= (newBits << numBits);
_bitPos -= numNewBits;
if (_bitPos == 0)
unsigned bp = _bitPos;
unsigned curByte = _curByte;
if (numBits < bp)
{
_buf[_pos++] = _curByte;
_bitPos = 8;
bp -= numBits;
_curByte = curByte | (value << bp);
_bitPos = bp;
return;
}
numBits -= bp;
const UInt32 hi = value >> numBits;
value -= (hi << numBits);
Byte *buf = _buf;
_bitPos = 8;
_curByte = 0;
*buf++ = (Byte)(curByte | hi);
_buf = buf;
}
while (numBits);
}
void WriteBit(unsigned value)
{
const unsigned bp = _bitPos - 1;
const unsigned curByte = _curByte | (value << bp);
_curByte = curByte;
_bitPos = bp;
if (bp == 0)
{
*_buf++ = (Byte)curByte;
_curByte = 0;
_bitPos = 8;
}
}
UInt32 GetBytePos() const { return _pos ; }
UInt32 GetPos() const { return _pos * 8 + (8 - _bitPos); }
Byte GetCurByte() const { return _curByte; }
void WriteByte(unsigned b)
{
const unsigned bp = _bitPos;
const unsigned a = _curByte | (b >> (8 - bp));
_curByte = b << bp;
Byte *buf = _buf;
*buf++ = (Byte)a;
_buf = buf;
}
UInt32 GetBytePos() const { return (UInt32)(size_t)(_buf - _buf_base); }
UInt32 GetPos() const { return GetBytePos() * 8 + 8 - _bitPos; }
unsigned GetCurByte() const { return _curByte; }
unsigned GetNonFlushedByteBits() const { return _curByte >> _bitPos; }
void SetPos(UInt32 bitPos)
{
_pos = bitPos >> 3;
_buf = _buf_base + (bitPos >> 3);
_bitPos = 8 - ((unsigned)bitPos & 7);
}
void SetCurState(unsigned bitPos, Byte curByte)
void SetCurState(unsigned bitPos, unsigned curByte)
{
_bitPos = 8 - bitPos;
_curByte = curByte;
}
};
class CEncoder;
const unsigned kNumPassesMax = 10;
class CEncoder;
class CThreadInfo
{
private:
CMsbfEncoderTemp m_OutStreamCurrent;
public:
CEncoder *Encoder;
Byte *m_Block;
private:
Byte *m_MtfArray;
Byte *m_TempArray;
UInt32 *m_BlockSorterIndex;
CMsbfEncoderTemp *m_OutStreamCurrent;
public:
bool m_OptimizeNumTables;
UInt32 m_NumCrcs;
UInt32 m_BlockIndex;
UInt64 m_UnpackSize;
Byte *m_Block_Base;
Byte Lens[kNumTablesMax][kMaxAlphaSize];
UInt32 Freqs[kNumTablesMax][kMaxAlphaSize];
@ -105,20 +138,16 @@ private:
Byte m_Selectors[kNumSelectorsMax];
UInt32 m_CRCs[1 << kNumPassesMax];
UInt32 m_NumCrcs;
void WriteBits2(UInt32 value, unsigned numBits);
void WriteByte2(Byte b);
void WriteBit2(Byte v);
void WriteCrc2(UInt32 v);
void WriteByte2(unsigned b) { WriteBits2(b, 8); }
void WriteBit2(unsigned v) { m_OutStreamCurrent.WriteBit(v); }
void EncodeBlock(const Byte *block, UInt32 blockSize);
UInt32 EncodeBlockWithHeaders(const Byte *block, UInt32 blockSize);
void EncodeBlock2(const Byte *block, UInt32 blockSize, UInt32 numPasses);
public:
bool m_OptimizeNumTables;
CEncoder *Encoder;
#ifndef Z7_ST
#ifndef Z7_ST
NWindows::CThread Thread;
NWindows::NSynchronization::CAutoResetEvent StreamWasFinishedEvent;
@ -127,17 +156,14 @@ public:
// it's not member of this thread. We just need one event per thread
NWindows::NSynchronization::CAutoResetEvent CanWriteEvent;
private:
UInt32 m_BlockIndex;
UInt64 m_UnpackSize;
public:
Byte MtPad[1 << 8]; // It's pad for Multi-Threading. Must be >= Cache_Line_Size.
HRESULT Create();
void FinishStream(bool needLeave);
THREAD_FUNC_RET_TYPE ThreadFunc();
#endif
#endif
CThreadInfo(): m_Block(NULL), m_BlockSorterIndex(NULL) {}
CThreadInfo(): m_BlockSorterIndex(NULL), m_Block_Base(NULL) {}
~CThreadInfo() { Free(); }
bool Alloc();
void Free();
@ -145,16 +171,19 @@ public:
HRESULT EncodeBlock3(UInt32 blockSize);
};
struct CEncProps
{
UInt32 BlockSizeMult;
UInt32 NumPasses;
UInt32 NumThreadGroups;
UInt64 Affinity;
CEncProps()
{
BlockSizeMult = (UInt32)(Int32)-1;
NumPasses = (UInt32)(Int32)-1;
NumThreadGroups = 0;
Affinity = 0;
}
void Normalize(int level);
@ -206,6 +235,7 @@ public:
bool CloseThreads;
bool StreamWasFinished;
NWindows::NSynchronization::CManualResetEvent CanStartWaitingEvent;
CThreadNextGroup ThreadNextGroup;
HRESULT Result;
ICompressProgressInfo *Progress;
@ -218,12 +248,8 @@ public:
UInt64 GetInProcessedSize() const { return m_InStream.GetProcessedSize(); }
UInt32 ReadRleBlock(Byte *buf);
void WriteBytes(const Byte *data, UInt32 sizeInBits, Byte lastByte);
void WriteBits(UInt32 value, unsigned numBits);
void WriteBytes(const Byte *data, UInt32 sizeInBits, unsigned lastByteBits);
void WriteByte(Byte b);
// void WriteBit(Byte v);
void WriteCrc(UInt32 v);
#ifndef Z7_ST
HRESULT Create();

View file

@ -33,6 +33,7 @@ public:
_bitPos = 8;
_curByte = 0;
}
Z7_FORCE_INLINE
void WriteBits(UInt32 value, unsigned numBits)
{
while (numBits > 0)

View file

@ -8,8 +8,9 @@
template<class TOutByte>
class CBitmEncoder
{
unsigned _bitPos;
Byte _curByte;
unsigned _bitPos; // 0 < _bitPos <= 8 : number of non-filled low bits in _curByte
unsigned _curByte; // low (_bitPos) bits are zeros
// high (8 - _bitPos) bits are filled
TOutByte _stream;
public:
bool Create(UInt32 bufferSize) { return _stream.Create(bufferSize); }
@ -24,25 +25,65 @@ public:
HRESULT Flush()
{
if (_bitPos < 8)
WriteBits(0, _bitPos);
return _stream.Flush();
}
void WriteBits(UInt32 value, unsigned numBits)
{
while (numBits > 0)
{
if (numBits < _bitPos)
{
_curByte = (Byte)(_curByte | (value << (_bitPos -= numBits)));
return;
}
numBits -= _bitPos;
UInt32 newBits = (value >> numBits);
value -= (newBits << numBits);
_stream.WriteByte((Byte)(_curByte | newBits));
_stream.WriteByte((Byte)_curByte);
_bitPos = 8;
_curByte = 0;
}
return _stream.Flush();
}
// required condition: (value >> numBits) == 0
// numBits == 0 is allowed
void WriteBits(UInt32 value, unsigned numBits)
{
do
{
unsigned bp = _bitPos;
unsigned curByte = _curByte;
if (numBits < bp)
{
bp -= numBits;
_curByte = curByte | (value << bp);
_bitPos = bp;
return;
}
numBits -= bp;
const UInt32 hi = (value >> numBits);
value -= (hi << numBits);
_stream.WriteByte((Byte)(curByte | hi));
_bitPos = 8;
_curByte = 0;
}
while (numBits);
}
void WriteByte(unsigned b)
{
const unsigned bp = _bitPos;
const unsigned a = _curByte | (b >> (8 - bp));
_curByte = b << bp;
_stream.WriteByte((Byte)a);
}
void WriteBytes(const Byte *data, size_t num)
{
const unsigned bp = _bitPos;
#if 1 // 1 for optional speed-optimized code branch
if (bp == 8)
{
_stream.WriteBytes(data, num);
return;
}
#endif
unsigned c = _curByte;
const unsigned bp_rev = 8 - bp;
for (size_t i = 0; i < num; i++)
{
const unsigned b = data[i];
_stream.WriteByte((Byte)(c | (b >> bp_rev)));
c = b << bp;
}
_curByte = c;
}
};

View file

@ -117,15 +117,13 @@ bool CCoder::ReadTables(void)
if (_numDistLevels > kDistTableSize32)
return false;
Byte levelLevels[kLevelTableSize];
for (unsigned i = 0; i < kLevelTableSize; i++)
{
const unsigned position = kCodeLengthAlphabetOrder[i];
if (i < numLevelCodes)
levelLevels[position] = (Byte)ReadBits(kLevelFieldSize);
else
levelLevels[position] = 0;
}
const unsigned kLevelTableSize_aligned4 = kLevelTableSize + 1;
Byte levelLevels[kLevelTableSize_aligned4];
memset (levelLevels, 0, sizeof(levelLevels));
unsigned i = 0;
do
levelLevels[kCodeLengthAlphabetOrder[i++]] = (Byte)ReadBits(kLevelFieldSize);
while (i != numLevelCodes);
if (m_InBitStream.ExtraBitsWereRead())
return false;

View file

@ -19,12 +19,16 @@
#define NO_INLINE
#endif
#define MAX_HUF_LEN_12 12
namespace NCompress {
namespace NDeflate {
namespace NEncoder {
static const unsigned k_CodeValue_Len_Is_Literal_Flag = 1u << 15;
static const unsigned kNumDivPassesMax = 10; // [0, 16); ratio/speed/ram tradeoff; use big value for better compression ratio.
static const UInt32 kNumTables = (1 << kNumDivPassesMax);
static const unsigned kNumTables = 1u << kNumDivPassesMax;
static const UInt32 kFixedHuffmanCodeBlockSizeMax = (1 << 8); // [0, (1 << 32)); ratio/speed tradeoff; use big value for better compression ratio.
static const UInt32 kDivideCodeBlockSizeMin = (1 << 7); // [1, (1 << 32)); ratio/speed tradeoff; use small value for better compression ratio.
@ -77,7 +81,7 @@ public:
static CFastPosInit g_FastPosInit;
inline UInt32 GetPosSlot(UInt32 pos)
inline unsigned GetPosSlot(UInt32 pos)
{
/*
if (pos < 0x200)
@ -162,13 +166,13 @@ HRESULT CCoder::Create()
// COM_TRY_BEGIN
if (!m_Values)
{
m_Values = (CCodeValue *)MyAlloc((kMaxUncompressedBlockSize) * sizeof(CCodeValue));
m_Values = (CCodeValue *)MyAlloc(kMaxUncompressedBlockSize * sizeof(CCodeValue));
if (!m_Values)
return E_OUTOFMEMORY;
}
if (!m_Tables)
{
m_Tables = (CTables *)MyAlloc((kNumTables) * sizeof(CTables));
m_Tables = (CTables *)MyAlloc(kNumTables * sizeof(CTables));
if (!m_Tables)
return E_OUTOFMEMORY;
}
@ -268,19 +272,21 @@ NO_INLINE void CCoder::GetMatches()
UInt32 distanceTmp[kMatchMaxLen * 2 + 3];
const UInt32 numPairs = (UInt32)((_btMode ?
const size_t numPairs = (size_t)((_btMode ?
Bt3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp):
Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp)) - distanceTmp);
*m_MatchDistances = (UInt16)numPairs;
UInt16 *matchDistances = m_MatchDistances;
*matchDistances++ = (UInt16)numPairs;
if (numPairs != 0)
{
UInt32 i;
size_t i;
for (i = 0; i < numPairs; i += 2)
{
m_MatchDistances[(size_t)i + 1] = (UInt16)distanceTmp[i];
m_MatchDistances[(size_t)i + 2] = (UInt16)distanceTmp[(size_t)i + 1];
matchDistances[0] = (UInt16)distanceTmp[i];
matchDistances[1] = (UInt16)distanceTmp[(size_t)i + 1];
matchDistances += 2;
}
UInt32 len = distanceTmp[(size_t)numPairs - 2];
if (len == m_NumFastBytes && m_NumFastBytes != m_MatchMaxLen)
@ -291,11 +297,11 @@ NO_INLINE void CCoder::GetMatches()
if (numAvail > m_MatchMaxLen)
numAvail = m_MatchMaxLen;
for (; len < numAvail && pby[len] == pby2[len]; len++);
m_MatchDistances[(size_t)i - 1] = (UInt16)len;
matchDistances[-2] = (UInt16)len;
}
}
if (m_IsMultiPass)
m_Pos += numPairs + 1;
m_Pos += (UInt32)numPairs + 1;
if (!m_SecondPass)
m_AdditionalOffset++;
}
@ -535,6 +541,7 @@ NO_INLINE void CCoder::WriteBits(UInt32 value, unsigned numBits)
}
#define WRITE_HF2(codes, lens, i) m_OutStream.WriteBits(codes[i], lens[i])
#define WRITE_HF2_NO_INLINE(codes, lens, i) WriteBits(codes[i], lens[i])
#define WRITE_HF(i) WriteBits(codes[i], lens[i])
NO_INLINE void CCoder::LevelTableCode(const Byte *levels, unsigned numLevels, const Byte *lens, const UInt32 *codes)
@ -619,17 +626,22 @@ static NO_INLINE UInt32 Huffman_GetPrice(const UInt32 *freqs, const Byte *lens,
return price;
}
static NO_INLINE UInt32 Huffman_GetPrice_Spec(const UInt32 *freqs, const Byte *lens, UInt32 num, const Byte *extraBits, UInt32 extraBase)
static NO_INLINE UInt32 Huffman_GetPrice_Spec(
const UInt32 *freqs, const Byte *lens, UInt32 num,
const Byte *extraBits, UInt32 extraBase)
{
return Huffman_GetPrice(freqs, lens, num) +
return
Huffman_GetPrice(freqs, lens, num) +
Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase);
}
NO_INLINE UInt32 CCoder::GetLzBlockPrice() const
{
return
Huffman_GetPrice_Spec(mainFreqs, m_NewLevels.litLenLevels, kFixedMainTableSize, m_LenDirectBits, kSymbolMatch) +
Huffman_GetPrice_Spec(distFreqs, m_NewLevels.distLevels, kDistTableSize64, kDistDirectBits, 0);
Huffman_GetPrice_Spec(mainFreqs, m_NewLevels.litLenLevels,
kFixedMainTableSize, m_LenDirectBits, kSymbolMatch) +
Huffman_GetPrice_Spec(distFreqs, m_NewLevels.distLevels,
kDistTableSize64, kDistDirectBits, 0);
}
NO_INLINE void CCoder::TryBlock()
@ -658,7 +670,7 @@ NO_INLINE void CCoder::TryBlock()
CCodeValue &codeValue = m_Values[m_ValueIndex++];
if (len >= kMatchMinLen)
{
UInt32 newLen = len - kMatchMinLen;
const UInt32 newLen = len - kMatchMinLen;
codeValue.Len = (UInt16)newLen;
mainFreqs[kSymbolMatch + (size_t)g_LenSlots[newLen]]++;
codeValue.Pos = (UInt16)pos;
@ -666,10 +678,10 @@ NO_INLINE void CCoder::TryBlock()
}
else
{
Byte b = *(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - m_AdditionalOffset);
const unsigned b = *(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - m_AdditionalOffset);
mainFreqs[b]++;
codeValue.SetAsLiteral();
codeValue.Pos = b;
codeValue.Len = k_CodeValue_Len_Is_Literal_Flag;
codeValue.Pos = (UInt16)b;
}
m_AdditionalOffset -= len;
BlockSizeRes += len;
@ -704,16 +716,24 @@ NO_INLINE void CCoder::SetPrices(const CLevels &levels)
}
}
#if MAX_HUF_LEN_12 > 12
// Huffman_ReverseBits() now supports 12-bits values only.
#error Stop_Compiling_Bad_MAX_HUF_LEN_12
#endif
static NO_INLINE void Huffman_ReverseBits(UInt32 *codes, const Byte *lens, UInt32 num)
{
for (UInt32 i = 0; i < num; i++)
const Byte * const lens_lim = lens + num;
do
{
UInt32 x = codes[i];
x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1);
x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2);
x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4);
codes[i] = (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - lens[i]);
// we should change constants, if lens[*] can be larger than 12.
UInt32 x = *codes;
x = ((x & (0x555 )) << 2) + (x & (0xAAA ));
x = ((x & (0x333 << 1)) << 4) | (x & (0xCCC << 1));
x = ((x & (0xF0F << 3)) << 8) | (x & (0x0F0 << 3));
// we can use (x) instead of (x & (0xFF << 7)), if we support garabage data after (*lens) bits.
*codes++ = (((x & (0xFF << 7)) << 16) | x) >> (*lens ^ 31);
}
while (++lens != lens_lim);
}
NO_INLINE void CCoder::WriteBlock()
@ -721,24 +741,28 @@ NO_INLINE void CCoder::WriteBlock()
Huffman_ReverseBits(mainCodes, m_NewLevels.litLenLevels, kFixedMainTableSize);
Huffman_ReverseBits(distCodes, m_NewLevels.distLevels, kDistTableSize64);
for (UInt32 i = 0; i < m_ValueIndex; i++)
CCodeValue *values = m_Values;
const CCodeValue * const values_lim = values + m_ValueIndex;
if (values != values_lim)
do
{
const CCodeValue &codeValue = m_Values[i];
if (codeValue.IsLiteral())
WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, codeValue.Pos);
const UInt32 len = values->Len;
const UInt32 dist = values->Pos;
if (len == k_CodeValue_Len_Is_Literal_Flag)
WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, dist);
else
{
UInt32 len = codeValue.Len;
UInt32 lenSlot = g_LenSlots[len];
const unsigned lenSlot = g_LenSlots[len];
WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, kSymbolMatch + lenSlot);
m_OutStream.WriteBits(len - m_LenStart[lenSlot], m_LenDirectBits[lenSlot]);
UInt32 dist = codeValue.Pos;
UInt32 posSlot = GetPosSlot(dist);
const unsigned posSlot = GetPosSlot(dist);
WRITE_HF2(distCodes, m_NewLevels.distLevels, posSlot);
m_OutStream.WriteBits(dist - kDistStart[posSlot], kDistDirectBits[posSlot]);
}
}
WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, kSymbolEndOfBlock);
while (++values != values_lim);
WRITE_HF2_NO_INLINE(mainCodes, m_NewLevels.litLenLevels, kSymbolEndOfBlock);
}
static UInt32 GetStorePrice(UInt32 blockSize, unsigned bitPosition)
@ -787,10 +811,10 @@ NO_INLINE UInt32 CCoder::TryDynBlock(unsigned tableIndex, UInt32 numPasses)
{
m_Pos = posTemp;
TryBlock();
unsigned numHuffBits =
(m_ValueIndex > 18000 ? 12 :
(m_ValueIndex > 7000 ? 11 :
(m_ValueIndex > 2000 ? 10 : 9)));
const unsigned numHuffBits =
m_ValueIndex > 18000 ? MAX_HUF_LEN_12 :
m_ValueIndex > 7000 ? 11 :
m_ValueIndex > 2000 ? 10 : 9;
MakeTables(numHuffBits);
SetPrices(m_NewLevels);
}

View file

@ -52,7 +52,15 @@ HRESULT SetLzma2Prop(PROPID propID, const PROPVARIANT &prop, CLzma2EncProps &lzm
case NCoderPropID::kNumThreads:
if (prop.vt != VT_UI4)
return E_INVALIDARG;
lzma2Props.numTotalThreads = (int)(prop.ulVal);
lzma2Props.numTotalThreads = (int)prop.ulVal;
break;
case NCoderPropID::kNumThreadGroups:
if (prop.vt != VT_UI4)
return E_INVALIDARG;
// 16-bit value supported by Windows
if (prop.ulVal >= (1u << 16))
return E_INVALIDARG;
lzma2Props.numThreadGroups = (unsigned)prop.ulVal;
break;
default:
RINOK(NLzma::SetLzmaProp(propID, prop, lzma2Props.lzmaProps))

View file

@ -101,6 +101,24 @@ HRESULT SetLzmaProp(PROPID propID, const PROPVARIANT &prop, CLzmaEncProps &ep)
return S_OK;
}
if (propID == NCoderPropID::kAffinityInGroup)
{
if (prop.vt == VT_UI8)
ep.affinityInGroup = prop.uhVal.QuadPart;
else
return E_INVALIDARG;
return S_OK;
}
if (propID == NCoderPropID::kThreadGroup)
{
if (prop.vt == VT_UI4)
ep.affinityGroup = (Int32)(UInt32)prop.ulVal;
else
return E_INVALIDARG;
return S_OK;
}
if (propID == NCoderPropID::kHashBits)
{
if (prop.vt == VT_UI4)

View file

@ -13,6 +13,18 @@ struct CMtf8Encoder
unsigned FindAndMove(Byte v) throw()
{
#if 1
Byte b = Buf[0];
if (v == b)
return 0;
Buf[0] = v;
for (unsigned pos = 0;;)
{
Byte a;
a = Buf[++pos]; Buf[pos] = b; if (v == a) return pos;
b = Buf[++pos]; Buf[pos] = a; if (v == b) return pos;
}
#else
size_t pos;
for (pos = 0; Buf[pos] != v; pos++);
const unsigned resPos = (unsigned)pos;
@ -31,6 +43,7 @@ struct CMtf8Encoder
Buf[pos] = Buf[pos - 1];
Buf[0] = v;
return resPos;
#endif
}
};

View file

@ -936,31 +936,30 @@ HRESULT CDecoder::ExecuteFilter(const CFilter &f)
HRESULT CDecoder::WriteBuf()
{
DeleteUnusedFilters();
const UInt64 lzSize = _lzSize + _winPos;
for (unsigned i = 0; i < _numFilters;)
{
const CFilter &f = _filters[i];
const UInt64 blockStart = f.Start;
const size_t lzAvail = (size_t)(lzSize - _lzWritten);
if (lzAvail == 0)
break;
// (lzAvail != 0)
const CFilter &f = _filters[i];
const UInt64 blockStart = f.Start;
if (blockStart > _lzWritten)
{
const UInt64 rem = blockStart - _lzWritten;
// (rem != 0)
size_t size = lzAvail;
if (size > rem)
size = (size_t)rem;
if (size != 0) // is it true always ?
{
RINOK(WriteData(_window + _winPos - lzAvail, size))
_lzWritten += size;
}
// (size != 0)
RINOK(WriteData(_window + _winPos - lzAvail, size))
_lzWritten += size;
continue;
}
// (blockStart <= _lzWritten)
const UInt32 blockSize = f.Size;
size_t offset = (size_t)(_lzWritten - blockStart);
if (offset == 0)
@ -987,10 +986,8 @@ HRESULT CDecoder::WriteBuf()
}
DeleteUnusedFilters();
if (_numFilters)
return S_OK;
const size_t lzAvail = (size_t)(lzSize - _lzWritten);
RINOK(WriteData(_window + _winPos - lzAvail, lzAvail))
_lzWritten += lzAvail;
@ -1367,6 +1364,12 @@ enum enum_exit_type
Z7_HUFF_DECODE_CHECK(sym, huf, kNumHufBits, kNumTableBits, bitStream, { LZ_LOOP_BREAK_ERROR })
/*
DecodeLZ2() will stop decoding if it reaches limit when (_winPos >= _limit)
at return:
(_winPos < _limit + kMaxMatchLen)
also it can write up to (COPY_CHUNK_SIZE - 1) additional junk bytes after (_winPos).
*/
HRESULT CDecoder::DecodeLZ2(const CBitDecoder &bitStream) throw()
{
#if 0
@ -1656,6 +1659,13 @@ decode_error:
/*
input conditions:
_winPos < _winSize
return:
_winPos < _winSize is expected, if (return_res == S_OK)
_winPos >= _winSize is possible in (return_res != S_OK)
*/
HRESULT CDecoder::DecodeLZ()
{
CBitDecoder _bitStream;
@ -1679,6 +1689,8 @@ HRESULT CDecoder::DecodeLZ()
if (winPos >= limit)
{
_winPos = winPos < _winSize ? winPos : _winSize;
// _winPos == min(winPos, _winSize)
// we will not write data after _winSize
RINOK(WriteBuf())
if (_unpackSize_Defined && _writtenFileSize > _unpackSize)
break; // return S_FALSE;
@ -1854,7 +1866,15 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream
{
// if (_winPos > 100) _winPos -= 100; // for debug: corruption
const UInt64 lzSize = _lzSize + _winPos;
if (!_isSolid || !_wasInit
/*
if previous file was decoded with error or for some another cases, then
(lzSize > _lzEnd) is possible
(_winPos > _winSize) is possible
(_winPos < _winSize + kMaxMatchLen)
*/
if (!_window
|| !_isSolid
|| !_wasInit
|| (lzSize < _lzEnd
#if Z7_RAR_RECOVER_SOLID_LIMIT != 0
&& lzSize + Z7_RAR_RECOVER_SOLID_LIMIT < _lzEnd
@ -1863,9 +1883,9 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream
{
if (_isSolid)
_lzError = LZ_ERROR_TYPE_HEADER;
_lzEnd = 0;
_lzSize = 0;
_lzWritten = 0;
// _lzEnd = 0; // it will be set later
// _lzWritten = 0; // it will be set later
_winPos = 0;
for (unsigned i = 0; i < kNumReps; i++)
_reps[i] = (size_t)0 - 1;
@ -1873,51 +1893,67 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream
_tableWasFilled = false;
_wasInit = true;
}
#if Z7_RAR_RECOVER_SOLID_LIMIT != 0
else if (lzSize < _lzEnd)
else
{
#if 0
return S_FALSE;
#else
// we can report that recovering was made:
// _lzError = LZ_ERROR_TYPE_HEADER;
// We write zeros to area after corruption:
if (_window)
const size_t ws = _winSize;
if (_winPos >= ws)
{
UInt64 rem = _lzEnd - lzSize;
const size_t ws = _winSize;
if (rem >= ws)
// we must normalize (_winPos) and data in _window,
_winPos -= ws;
_lzSize += ws;
// (_winPos < kMaxMatchLen < _winSize)
// if (_window)
memcpy(_window, _window + ws, _winPos); // memmove is not required here
}
#if Z7_RAR_RECOVER_SOLID_LIMIT != 0
if (lzSize < _lzEnd)
{
#if 0
return S_FALSE;
#else
// we can report that recovering was made:
// _lzError = LZ_ERROR_TYPE_HEADER;
// We write zeros to area after corruption:
// if (_window)
{
My_ZeroMemory(_window, ws);
_lzSize = ws;
_winPos = 0;
}
else
{
const size_t cur = ws - _winPos;
if (cur <= rem)
UInt64 rem = _lzEnd - lzSize;
if (rem >= ws)
{
rem -= cur;
My_ZeroMemory(_window + _winPos, cur);
_lzSize += _winPos;
My_ZeroMemory(_window, ws);
_lzSize = ws;
_winPos = 0;
}
My_ZeroMemory(_window + _winPos, (size_t)rem);
_winPos += (size_t)rem;
else
{
// rem < _winSize
// _winPos <= ws
const size_t cur = ws - _winPos;
if (cur <= rem)
{
rem -= cur;
My_ZeroMemory(_window + _winPos, cur);
_lzSize = ws;
_winPos = 0;
}
My_ZeroMemory(_window + _winPos, (size_t)rem);
_winPos += (size_t)rem;
}
}
}
// else return S_FALSE;
// else return S_FALSE;
#endif
}
}
#endif
}
// _winPos < _winSize
// we don't want _lzSize overflow
if (_lzSize >= DICT_SIZE_MAX)
_lzSize = DICT_SIZE_MAX;
_lzEnd = _lzSize + _winPos;
// _lzSize <= DICT_SIZE_MAX
// _lzEnd <= DICT_SIZE_MAX * 2
// _lzEnd < DICT_SIZE_MAX + _winSize
size_t newSize = _dictSize;
if (newSize < kWinSize_Min)
@ -1941,10 +1977,11 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream
// If dictionary was increased in solid, we don't want grow.
return S_FALSE; // E_OUTOFMEMORY
}
// (newSize <= _winSize)
// (newSize <= _dictSize_forCheck)
}
else
{
// !_isSolid || !_window
_dictSize_forCheck = newSize;
{
size_t newSize_small = newSize;
@ -1964,7 +2001,7 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream
if (!_window || allocSize > _winSize_Allocated)
{
Z7_RAR_FREE_WINDOW
_window = NULL;
_window = NULL;
_winSize_Allocated = 0;
Byte *win = (Byte *)::BigAlloc(allocSize);
if (!win)

View file

@ -153,7 +153,26 @@ Z7_COM7F_IMF2(UInt32, CAesCtrCoder::Filter(Byte *data, UInt32 size))
#ifndef Z7_EXTRACT_ONLY
#ifdef MY_CPU_X86_OR_AMD64
#define USE_HW_AES
#if defined(__INTEL_COMPILER)
#if (__INTEL_COMPILER >= 1110)
#define USE_HW_AES
#if (__INTEL_COMPILER >= 1900)
#define USE_HW_VAES
#endif
#endif
#elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \
|| defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400)
#define USE_HW_AES
#if defined(__clang__) && (__clang_major__ >= 8) \
|| defined(__GNUC__) && (__GNUC__ >= 8)
#define USE_HW_VAES
#endif
#elif defined(_MSC_VER)
#define USE_HW_AES
#define USE_HW_VAES
#endif
#elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE)
#if defined(__ARM_FEATURE_AES) \
@ -186,15 +205,15 @@ Z7_COM7F_IMF2(UInt32, CAesCtrCoder::Filter(Byte *data, UInt32 size))
#define SET_AES_FUNC_2(f2) \
if (algo == 2) if (g_Aes_SupportedFunctions_Flags & k_Aes_SupportedFunctions_HW) \
{ f = f2; }
#ifdef MY_CPU_X86_OR_AMD64
#ifdef USE_HW_VAES
#define SET_AES_FUNC_23(f2, f3) \
SET_AES_FUNC_2(f2) \
if (algo == 3) if (g_Aes_SupportedFunctions_Flags & k_Aes_SupportedFunctions_HW_256) \
{ f = f3; }
#else // MY_CPU_X86_OR_AMD64
#else // USE_HW_VAES
#define SET_AES_FUNC_23(f2, f3) \
SET_AES_FUNC_2(f2)
#endif // MY_CPU_X86_OR_AMD64
#endif // USE_HW_VAES
#else // USE_HW_AES
#define SET_AES_FUNC_23(f2, f3)
#endif // USE_HW_AES

View file

@ -136,6 +136,9 @@ namespace NCoderPropID
kAffinity, // VT_UI8
kBranchOffset, // VT_UI4
kHashBits, // VT_UI4
kNumThreadGroups, // VT_UI4
kThreadGroup, // VT_UI4
kAffinityInGroup, // VT_UI8
/*
// kHash3Bits, // VT_UI4
// kHash2Bits, // VT_UI4

6
CPP/7zip/Sort.mak Normal file
View file

@ -0,0 +1,6 @@
!IF defined(USE_NO_ASM) || defined(USE_C_SORT) || "$(PLATFORM)" == "ia64" || "$(PLATFORM)" == "mips" || "$(PLATFORM)" == "arm" || "$(PLATFORM)" == "arm64"
C_OBJS = $(C_OBJS) \
!ELSE
ASM_OBJS = $(ASM_OBJS) \
!ENDIF
$O\Sort.obj

View file

@ -63,17 +63,46 @@ EXTERN_C_END
#else
// #define MY_isatty_fileno(x) (isatty(fileno(x)))
// #define MY_IS_TERMINAL(x) (MY_isatty_fileno(x) != 0);
static inline bool MY_IS_TERMINAL(FILE *x)
static bool MY_IS_TERMINAL(FILE *x)
{
return (
#if defined(_MSC_VER) && (_MSC_VER >= 1400)
_isatty(_fileno(x))
#else
isatty(fileno(x))
#endif
!= 0);
#ifdef _WIN32
/*
crt/stdio.h:
typedef struct _iobuf FILE;
#define stdin (&_iob[0])
#define stdout (&_iob[1])
#define stderr (&_iob[2])
*/
// fprintf(stderr, "\nMY_IS_TERMINAL = %p", x);
const int fd = _fileno(x);
/* (fd) is 0, 1 or 2 in console program.
docs: If stdout or stderr is not associated with
an output stream (for example, in a Windows application
without a console window), the file descriptor returned is -2.
In previous versions, the file descriptor returned was -1.
*/
if (fd < 0) // is not associated with an output stream application (without a console window)
return false;
// fprintf(stderr, "\n\nstderr _fileno(%p) = %d", x, fd);
if (!_isatty(fd))
return false;
// fprintf(stderr, "\nisatty_val = true");
const HANDLE h = (HANDLE)_get_osfhandle(fd);
/* _get_osfhandle() returns intptr_t in new SDK, or long in MSVC6.
Also it can return (INVALID_HANDLE_VALUE).
docs: _get_osfhandle also returns the special value -2 when
the file descriptor is not associated with a stream
in old msvcrt.dll: it returns (-1) for incorrect value
*/
// fprintf(stderr, "\n_get_osfhandle() = %p", (void *)h);
if (h == NULL || h == INVALID_HANDLE_VALUE)
return false;
DWORD st;
// fprintf(stderr, "\nGetConsoleMode() = %u", (unsigned)GetConsoleMode(h, &st));
return GetConsoleMode(h, &st) != 0;
#else
return isatty(fileno(x)) != 0;
#endif
}
#endif
@ -1088,7 +1117,7 @@ void CArcCmdLineParser::Parse1(const UStringVector &commandStrings,
const UString &s = parser[NKey::kLargePages].PostStrings[0];
if (s.IsEmpty())
slp = 1;
else if (s != L"-")
else if (!s.IsEqualTo("-"))
{
if (!StringToUInt32(s, slp))
throw CArcCmdLineException("Unsupported switch postfix for -slp", s);
@ -1338,7 +1367,7 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options)
const UString &s = parser[NKey::kFullPathMode].PostStrings[0];
if (!s.IsEmpty())
{
if (s == L"2")
if (s.IsEqualTo("2"))
censorPathMode = NWildcard::k_FullPath;
else
throw CArcCmdLineException("Unsupported -spf:", s);
@ -1400,6 +1429,7 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options)
const bool isExtractGroupCommand = options.Command.IsFromExtractGroup();
const bool isExtractOrList = isExtractGroupCommand || options.Command.CommandType == NCommandType::kList;
const bool isRename = options.Command.CommandType == NCommandType::kRename;
options.UpdateOptions.RenameMode = isRename;
if ((isExtractOrList || isRename) && options.StdInMode)
thereIsArchiveName = false;
@ -1516,9 +1546,9 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options)
const UString &s = parser[NKey::kZoneFile].PostStrings[0];
if (!s.IsEmpty())
{
if (s == L"0") eo.ZoneMode = NExtract::NZoneIdMode::kNone;
else if (s == L"1") eo.ZoneMode = NExtract::NZoneIdMode::kAll;
else if (s == L"2") eo.ZoneMode = NExtract::NZoneIdMode::kOffice;
if (s.IsEqualTo("0")) eo.ZoneMode = NExtract::NZoneIdMode::kNone;
else if (s.IsEqualTo("1")) eo.ZoneMode = NExtract::NZoneIdMode::kAll;
else if (s.IsEqualTo("2")) eo.ZoneMode = NExtract::NZoneIdMode::kOffice;
else
throw CArcCmdLineException("Unsupported -snz:", s);
}

File diff suppressed because it is too large Load diff

View file

@ -178,36 +178,50 @@ struct CDirPathTime: public CFiTimesCAM
#ifdef SUPPORT_LINKS
enum ELinkType
{
k_LinkType_HardLink,
k_LinkType_PureSymLink,
k_LinkType_Junction,
k_LinkType_WSL
// , k_LinkType_CopyLink;
};
struct CLinkInfo
{
// bool isCopyLink;
bool isHardLink;
bool isJunction;
ELinkType LinkType;
bool isRelative;
bool isWSL;
UString linkPath;
// if (isRelative == false), then (LinkPath) is relative to root folder of archive
// if (isRelative == true ), then (LinkPath) is relative to current item
bool isWindowsPath;
UString LinkPath;
bool IsSymLink() const { return !isHardLink; }
bool Is_HardLink() const { return LinkType == k_LinkType_HardLink; }
bool Is_AnySymLink() const { return LinkType != k_LinkType_HardLink; }
bool Is_WSL() const { return LinkType == k_LinkType_WSL; }
CLinkInfo():
// IsCopyLink(false),
isHardLink(false),
isJunction(false),
LinkType(k_LinkType_PureSymLink),
isRelative(false),
isWSL(false)
isWindowsPath(false)
{}
void Clear()
{
// IsCopyLink = false;
isHardLink = false;
isJunction = false;
LinkType = k_LinkType_PureSymLink;
isRelative = false;
isWSL = false;
linkPath.Empty();
isWindowsPath = false;
LinkPath.Empty();
}
bool Parse(const Byte *data, size_t dataSize, bool isLinuxData);
bool Parse_from_WindowsReparseData(const Byte *data, size_t dataSize);
bool Parse_from_LinuxData(const Byte *data, size_t dataSize);
void Normalize_to_RelativeSafe(UStringVector &removePathParts);
private:
void Remove_AbsPathPrefixes();
};
#endif // SUPPORT_LINKS
@ -287,8 +301,8 @@ private:
bool _isRenamed;
bool _extractMode;
// bool _is_SymLink_in_Data;
bool _is_SymLink_in_Data_Linux; // false = WIN32, true = LINUX
bool _is_SymLink_in_Data_Linux; // false = WIN32, true = LINUX.
// _is_SymLink_in_Data_Linux is detected from Windows/Linux part of attributes of file.
bool _needSetAttrib;
bool _isSymLinkCreated;
bool _itemFailure;
@ -420,6 +434,7 @@ public:
HRESULT SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path);
HRESULT SendMessageError_with_LastError(const char *message, const FString &path);
HRESULT SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2);
HRESULT SendMessageError2_with_LastError(const char *message, const FString &path1, const FString &path2);
#if defined(_WIN32) && !defined(UNDER_CE) && !defined(Z7_SFX)
NExtract::NZoneIdMode::EEnum ZoneMode;
@ -487,11 +502,16 @@ public:
private:
CHardLinks _hardLinks;
CLinkInfo _link;
// const void *NtReparse_Data;
// UInt32 NtReparse_Size;
// FString _copyFile_Path;
// HRESULT MyCopyFile(ISequentialOutStream *outStream);
HRESULT Link(const FString &fullProcessedPath);
HRESULT ReadLink();
HRESULT SetLink(
const FString &fullProcessedPath_from,
const CLinkInfo &linkInfo,
bool &linkWasSet);
public:
// call PrepareHardLinks() after Init()
@ -538,16 +558,6 @@ private:
HRESULT CloseReparseAndFile();
HRESULT CloseReparseAndFile2();
HRESULT SetDirsTimes();
const void *NtReparse_Data;
UInt32 NtReparse_Size;
#ifdef SUPPORT_LINKS
HRESULT SetFromLinkPath(
const FString &fullProcessedPath,
const CLinkInfo &linkInfo,
bool &linkWasSet);
#endif
};

View file

@ -871,14 +871,27 @@ struct CAffinityMode
unsigned NumCoreThreads;
unsigned NumCores;
// unsigned DivideNum;
#ifdef _WIN32
unsigned NumGroups;
#endif
UInt32 Sizes[NUM_CPU_LEVELS_MAX];
void SetLevels(unsigned numCores, unsigned numCoreThreads);
DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
bool NeedAffinity() const { return NumBundleThreads != 0; }
#ifdef _WIN32
bool NeedGroupsMode() const { return NumGroups > 1; }
#endif
WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
{
#ifdef _WIN32
if (NeedGroupsMode()) // we need fix for bundleIndex usage
return thread.Create_With_Group(startAddress, parameter, bundleIndex % NumGroups);
#endif
if (NeedAffinity())
{
CCpuSet cpuSet;
@ -892,6 +905,9 @@ struct CAffinityMode
NumBundleThreads(0),
NumLevels(0),
NumCoreThreads(1)
#ifdef _WIN32
, NumGroups(0)
#endif
// DivideNum(1)
{}
};
@ -1288,22 +1304,28 @@ HRESULT CEncoderInfo::Generate()
if (scp)
{
const UInt64 reduceSize = kBufferSize;
/* in posix new thread uses same affinity as parent thread,
/* in posix : new thread uses same affinity as parent thread,
so we don't need to send affinity to coder in posix */
UInt64 affMask;
#if !defined(Z7_ST) && defined(_WIN32)
UInt64 affMask = 0;
UInt32 affinityGroup = (UInt32)(Int32)-1;
// UInt64 affinityInGroup = 0;
#if !defined(Z7_ST) && defined(_WIN32)
{
CCpuSet cpuSet;
affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
if (AffinityMode.NeedGroupsMode()) // we need fix for affinityInGroup also
affinityGroup = EncoderIndex % AffinityMode.NumGroups;
else
affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
}
#else
affMask = 0;
#endif
// affMask <<= 3; // debug line: to test no affinity in coder;
// affMask = 0;
RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
#endif
// affMask <<= 3; // debug line: to test no affinity in coder
// affMask = 0; // for debug
// affinityGroup = 0; // for debug
// affinityInGroup = 1; // for debug
RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize,
affMask != 0 ? &affMask : NULL,
affinityGroup != (UInt32)(Int32)-1 ? &affinityGroup : NULL,
/* affinityInGroup != 0 ? &affinityInGroup : */ NULL))
}
else
{
@ -2962,7 +2984,7 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
{
AString s;
// s.Add_UInt32(ti.numProcessThreads);
unsigned numSysThreads = ti.GetNumSystemThreads();
const unsigned numSysThreads = ti.GetNumSystemThreads();
if (ti.GetNumProcessThreads() != numSysThreads)
{
// if (ti.numProcessThreads != ti.numSysThreads)
@ -2992,6 +3014,35 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
}
#endif
}
#ifdef _WIN32
if (ti.Groups.GroupSizes.Size() > 1 ||
(ti.Groups.GroupSizes.Size() == 1
&& ti.Groups.NumThreadsTotal != numSysThreads))
{
s += " : ";
s.Add_UInt32(ti.Groups.GroupSizes.Size());
s += " groups : ";
if (ti.Groups.NumThreadsTotal == numSysThreads)
{
s.Add_UInt32(ti.Groups.NumThreadsTotal);
s += " c : ";
}
UInt32 minSize, maxSize;
ti.Groups.Get_GroupSize_Min_Max(minSize, maxSize);
if (minSize == maxSize)
{
s.Add_UInt32(ti.Groups.GroupSizes[0]);
s += " c/g";
}
else
FOR_VECTOR (i, ti.Groups.GroupSizes)
{
if (i != 0)
s.Add_Char(' ');
s.Add_UInt32(ti.Groups.GroupSizes[i]);
}
}
#endif
return s;
}
@ -3753,9 +3804,13 @@ HRESULT Bench(
UInt64 complexInCommands = kComplexInCommands;
UInt32 numThreads_Start = 1;
#ifndef Z7_ST
#ifndef Z7_ST
CAffinityMode affinityMode;
#endif
#ifdef _WIN32
if (threadsInfo.IsGroupMode && threadsInfo.Groups.GroupSizes.Size() > 1)
affinityMode.NumGroups = threadsInfo.Groups.GroupSizes.Size();
#endif
#endif
COneMethodInfo method;
@ -4861,7 +4916,7 @@ HRESULT Bench(
if (AreSameMethodNames(benchMethod, methodName))
{
if (benchProps.IsEmpty()
|| (benchProps == "x5" && method.PropsString.IsEmpty())
|| (benchProps.IsEqualTo("x5") && method.PropsString.IsEmpty())
|| method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
{
callback.BenchProps.EncComplex = h.EncComplex;

View file

@ -1213,11 +1213,13 @@ HRESULT CDirItems::FillFixedReparse()
// continue; // for debug
if (!item.Has_Attrib_ReparsePoint())
continue;
/*
We want to get properties of target file instead of properies of symbolic link.
Probably this code is unused, because
CFileInfo::Find(with followLink = true) called Fill_From_ByHandleFileInfo() already.
*/
// if (item.IsDir()) continue;
const FString phyPath = GetPhyPath(i);
NFind::CFileInfo fi;
if (fi.Fill_From_ByHandleFileInfo(phyPath)) // item.IsDir()
{
@ -1228,38 +1230,13 @@ HRESULT CDirItems::FillFixedReparse()
item.Attrib = fi.Attrib;
continue;
}
/*
// we request properties of target file instead of properies of symbolic link
// here we also can manually parse unsupported links (like WSL links)
NIO::CInFile inFile;
if (inFile.Open(phyPath))
{
BY_HANDLE_FILE_INFORMATION info;
if (inFile.GetFileInformation(&info))
{
// Stat.FilesSize doesn't contain item.Size already
// Stat.FilesSize -= item.Size;
item.Size = (((UInt64)info.nFileSizeHigh) << 32) + info.nFileSizeLow;
Stat.FilesSize += item.Size;
item.CTime = info.ftCreationTime;
item.ATime = info.ftLastAccessTime;
item.MTime = info.ftLastWriteTime;
item.Attrib = info.dwFileAttributes;
continue;
}
}
*/
RINOK(AddError(phyPath))
continue;
}
// (SymLinks == true) here
// (SymLinks == true)
if (item.ReparseData.Size() == 0)
continue;
// if (item.Size == 0)
{
// 20.03: we use Reparse Data instead of real data
@ -1277,7 +1254,7 @@ HRESULT CDirItems::FillFixedReparse()
/* imagex/WIM reduces absolute paths in links (raparse data),
if we archive non root folder. We do same thing here */
bool isWSL = false;
// bool isWSL = false;
if (attr.IsSymLink_WSL())
{
// isWSL = true;
@ -1314,21 +1291,27 @@ HRESULT CDirItems::FillFixedReparse()
continue;
if (rootPrefixSize == prefix.Len())
continue; // simple case: paths are from root
if (link.Len() <= prefix.Len())
continue;
if (CompareFileNames(link.Left(prefix.Len()), prefix) != 0)
continue;
UString newLink = prefix.Left(rootPrefixSize);
newLink += link.Ptr(prefix.Len());
CByteBuffer data;
bool isSymLink = !attr.IsMountPoint();
if (!FillLinkData(data, newLink, isSymLink, isWSL))
CByteBuffer &data = item.ReparseData2;
/*
if (isWSL)
{
Convert_WinPath_to_WslLinuxPath(newLink, true); // is absolute : change it
FillLinkData_WslLink(data, newLink);
}
else
*/
FillLinkData_WinLink(data, newLink, !attr.IsMountPoint());
if (data.Size() == 0)
continue;
item.ReparseData2 = data;
// item.ReparseData2 = data;
}
return S_OK;
}

View file

@ -389,7 +389,7 @@ HRESULT Extract(
{
UString s = arcPath.Ptr(pos + 1);
int index = codecs->FindFormatForExtension(s);
if (index >= 0 && s == L"001")
if (index >= 0 && s.IsEqualTo("001"))
{
s = arcPath.Left(pos);
pos = s.ReverseFind(L'.');

View file

@ -208,7 +208,7 @@ void Correct_FsPath(bool absIsAllowed, bool keepAndReplaceEmptyPrefixes, UString
if (parts.Size() > 1 && parts[1].IsEmpty())
{
i = 2;
if (parts.Size() > 2 && parts[2] == L"?")
if (parts.Size() > 2 && parts[2].IsEqualTo("?"))
{
i = 3;
if (parts.Size() > 3 && NWindows::NFile::NName::IsDrivePath2(parts[3]))

View file

@ -62,7 +62,7 @@ HRESULT CHashBundle::SetMethods(DECL_EXTERNAL_CODECS_LOC_VARS const UStringVecto
if (m.MethodName.IsEmpty())
m.MethodName = k_DefaultHashMethod;
if (m.MethodName == "*")
if (m.MethodName.IsEqualTo("*"))
{
CRecordVector<CMethodId> tempMethods;
GetHashMethods(EXTERNAL_CODECS_LOC_VARS tempMethods);
@ -431,6 +431,19 @@ static void WriteLine(CDynLimBuf &hashFileString,
}
static void Convert_TagName_to_MethodName(AString &method)
{
// we need to convert at least SHA512/256 to SHA512-256, and SHA512/224 to SHA512-224
// but we convert any '/' to '-'.
method.Replace('/', '-');
}
static void Convert_MethodName_to_TagName(AString &method)
{
if (method.IsPrefixedBy_Ascii_NoCase("SHA512-2"))
method.ReplaceOneCharAtPos(6, '/');
}
static void WriteLine(CDynLimBuf &hashFileString,
const CHashOptionsLocal &options,
@ -440,8 +453,10 @@ static void WriteLine(CDynLimBuf &hashFileString,
{
AString methodName;
if (!hb.Hashers.IsEmpty())
{
methodName = hb.Hashers[0].Name;
Convert_MethodName_to_TagName(methodName);
}
AString hashesString;
AddHashResultLine(hashesString, hb.Hashers);
WriteLine(hashFileString, options, path, isDir, methodName, hashesString);
@ -752,7 +767,7 @@ bool CHashPair::ParseCksum(const char *s)
Name = end;
Hash.Alloc(4);
SetBe32(Hash, crc)
SetBe32a(Hash, crc)
Size_from_Arc = size;
Size_from_Arc_Defined = true;
@ -773,56 +788,87 @@ static const char * const k_CsumMethodNames[] =
{
"sha256"
, "sha224"
// , "sha512-224"
// , "sha512-256"
, "sha512-224"
, "sha512-256"
, "sha384"
, "sha512"
// , "sha3-224"
, "sha3-224"
, "sha3-256"
// , "sha3-384"
// , "sha3-512"
, "sha3-384"
, "sha3-512"
// , "shake128"
// , "shake256"
, "sha1"
, "sha2"
, "sha3"
, "sha"
, "md5"
, "blake2sp"
, "blake2s"
, "blake2b"
, "blake2sp"
, "xxh64"
, "crc64"
, "crc32"
, "crc64"
, "cksum"
};
static UString GetMethod_from_FileName(const UString &name)
// returns true, if (method) is known hash method or hash method group name.
static bool GetMethod_from_FileName(const UString &name, AString &method)
{
method.Empty();
AString s;
ConvertUnicodeToUTF8(name, s);
const int dotPos = s.ReverseFind_Dot();
const char *src = s.Ptr();
bool isExtension = false;
if (dotPos >= 0)
{
isExtension = true;
src = s.Ptr(dotPos + 1);
method = s.Ptr(dotPos + 1);
if (method.IsEqualTo_Ascii_NoCase("txt") ||
method.IsEqualTo_Ascii_NoCase("asc"))
{
method.Empty();
const int dotPos2 = s.Find('.');
if (dotPos2 >= 0)
s.DeleteFrom(dotPos2);
}
}
const char *m = "";
if (method.IsEmpty())
{
// we support file names with "sum" and "sums" postfixes: "sha256sum", "sha256sums"
unsigned size;
if (s.Len() > 4 && StringsAreEqualNoCase_Ascii(s.RightPtr(4), "sums"))
size = 4;
else if (s.Len() > 3 && StringsAreEqualNoCase_Ascii(s.RightPtr(3), "sum"))
size = 3;
else
return false;
method = s;
method.DeleteFrom(s.Len() - size);
}
unsigned i;
for (i = 0; i < Z7_ARRAY_SIZE(k_CsumMethodNames); i++)
{
m = k_CsumMethodNames[i];
if (isExtension)
const char *m = k_CsumMethodNames[i];
if (method.IsEqualTo_Ascii_NoCase(m))
{
if (StringsAreEqual_Ascii(src, m))
break;
// method = m; // we can get lowcase
return true;
}
else if (IsString1PrefixedByString2_NoCase_Ascii(src, m))
if (StringsAreEqual_Ascii(src + strlen(m), "sums"))
break;
}
UString res;
if (i != Z7_ARRAY_SIZE(k_CsumMethodNames))
res = m;
return res;
/*
for (i = 0; i < Z7_ARRAY_SIZE(k_CsumMethodNames); i++)
{
const char *m = k_CsumMethodNames[i];
if (method.IsPrefixedBy_Ascii_NoCase(m))
{
method = m; // we get lowcase
return true;
}
}
*/
return false;
}
@ -1047,7 +1093,7 @@ Z7_COM7F_IMF(CHandler::GetRawProp(UInt32 index, PROPID propID, const void **data
if (propID == kpidChecksum)
{
const CHashPair &hp = HashPairs[index];
if (hp.Hash.Size() > 0)
if (hp.Hash.Size() != 0)
{
*data = hp.Hash;
*dataSize = (UInt32)hp.Hash.Size();
@ -1100,11 +1146,6 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value))
s.Add_UInt32(_hashSize * 8);
s += "-bit";
}
if (!_nameExtenstion.IsEmpty())
{
s.Add_Space_if_NotEmpty();
s += _nameExtenstion;
}
if (_is_PgpMethod)
{
Add_OptSpace_String(s, "PGP");
@ -1120,6 +1161,18 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value))
Add_OptSpace_String(s, "TAG");
if (_are_there_Dirs)
Add_OptSpace_String(s, "DIRS");
if (!_method_from_FileName.IsEmpty())
{
Add_OptSpace_String(s, "filename_method:");
s += _method_from_FileName;
if (!_is_KnownMethod_in_FileName)
s += ":UNKNOWN";
}
if (!_methods.IsEmpty())
{
Add_OptSpace_String(s, "cmd_method:");
s += _methods[0];
}
prop = s;
break;
}
@ -1228,6 +1281,15 @@ static HRESULT ReadStream_to_Buf(IInStream *stream, CByteBuffer &buf, IArchiveOp
}
static bool isThere_Zero_Byte(const Byte *data, size_t size)
{
for (size_t i = 0; i < size; i++)
if (data[i] == 0)
return true;
return false;
}
Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallback *openCallback))
{
COM_TRY_BEGIN
@ -1239,17 +1301,9 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallb
CObjectVector<CHashPair> &pairs = HashPairs;
bool zeroMode = false;
bool cr_lf_Mode = false;
{
for (size_t i = 0; i < buf.Size(); i++)
if (buf.ConstData()[i] == 0)
{
zeroMode = true;
break;
}
}
const bool zeroMode = isThere_Zero_Byte(buf, buf.Size());
_is_ZeroMode = zeroMode;
bool cr_lf_Mode = false;
if (!zeroMode)
cr_lf_Mode = Is_CR_LF_Data(buf, buf.Size());
@ -1263,13 +1317,21 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallb
NCOM::CPropVariant prop;
RINOK(openVolumeCallback->GetProperty(kpidName, &prop))
if (prop.vt == VT_BSTR)
_nameExtenstion = GetMethod_from_FileName(prop.bstrVal);
_is_KnownMethod_in_FileName = GetMethod_from_FileName(prop.bstrVal, _method_from_FileName);
}
}
bool cksumMode = false;
if (_nameExtenstion.IsEqualTo_Ascii_NoCase("cksum"))
cksumMode = true;
if (!_methods.IsEmpty())
{
ConvertUnicodeToUTF8(_methods[0], _method_for_Extraction);
}
if (_method_for_Extraction.IsEmpty())
{
// if (_is_KnownMethod_in_FileName)
_method_for_Extraction = _method_from_FileName;
}
const bool cksumMode = _method_for_Extraction.IsEqualTo_Ascii_NoCase("cksum");
_is_CksumMode = cksumMode;
size_t pos = 0;
@ -1366,6 +1428,7 @@ void CHandler::ClearVars()
_is_ZeroMode = false;
_are_there_Tags = false;
_are_there_Dirs = false;
_is_KnownMethod_in_FileName = false;
_hashSize_Defined = false;
_hashSize = 0;
}
@ -1374,7 +1437,8 @@ void CHandler::ClearVars()
Z7_COM7F_IMF(CHandler::Close())
{
ClearVars();
_nameExtenstion.Empty();
_method_from_FileName.Empty();
_method_for_Extraction.Empty();
_pgpMethod.Empty();
HashPairs.Clear();
return S_OK;
@ -1401,19 +1465,73 @@ static bool CheckDigests(const Byte *a, const Byte *b, size_t size)
}
static void AddDefaultMethod(UStringVector &methods, unsigned size)
static void AddDefaultMethod(UStringVector &methods,
const char *name, unsigned size)
{
int shaVersion = -1;
if (name)
{
if (StringsAreEqualNoCase_Ascii(name, "sha"))
{
shaVersion = 0;
if (size == 0)
size = 32;
}
else if (StringsAreEqualNoCase_Ascii(name, "sha1"))
{
shaVersion = 1;
if (size == 0)
size = 20;
}
else if (StringsAreEqualNoCase_Ascii(name, "sha2"))
{
shaVersion = 2;
if (size == 0)
size = 32;
}
else if (StringsAreEqualNoCase_Ascii(name, "sha3"))
{
if (size == 0 ||
size == 32) name = "sha3-256";
else if (size == 28) name = "sha3-224";
else if (size == 48) name = "sha3-384";
else if (size == 64) name = "sha3-512";
}
else if (StringsAreEqualNoCase_Ascii(name, "sha512"))
{
// we allow any sha512 derived hash inside .sha512 file:
if (size == 48) name = "sha384";
else if (size == 32) name = "sha512-256";
else if (size == 28) name = "sha512-224";
}
if (shaVersion >= 0)
name = NULL;
}
const char *m = NULL;
if (size == 32) m = "sha256";
else if (size == 20) m = "sha1";
else if (size == 16) m = "md5";
else if (size == 8) m = "crc64";
else if (size == 4) m = "crc32";
if (name)
m = name;
else
{
if (size == 64) m = "sha512";
else if (size == 48) m = "sha384";
else if (size == 32) m = "sha256";
else if (size == 28) m = "sha224";
else if (size == 20) m = "sha1";
else if (shaVersion < 0)
{
if (size == 16) m = "md5";
else if (size == 8) m = "crc64";
else if (size == 4) m = "crc32";
}
}
if (!m)
return;
#ifdef Z7_EXTERNAL_CODECS
#ifdef Z7_EXTERNAL_CODECS
const CExternalCodecs *_externalCodecs = g_ExternalCodecs_Ptr;
#endif
#endif
CMethodId id;
if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS
AString(m), id))
@ -1444,15 +1562,15 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
CHashBundle hb_Glob;
// UStringVector methods = options.Methods;
UStringVector methods;
if (methods.IsEmpty() && !_nameExtenstion.IsEmpty())
/*
if (methods.IsEmpty() && !utf_nameExtenstion.IsEmpty() && !_hashSize_Defined)
{
AString utf;
ConvertUnicodeToUTF8(_nameExtenstion, utf);
CMethodId id;
if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS utf, id))
if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS utf_nameExtenstion, id))
methods.Add(_nameExtenstion);
}
*/
if (methods.IsEmpty() && !_pgpMethod.IsEmpty())
{
@ -1461,12 +1579,21 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
methods.Add(UString(_pgpMethod));
}
/*
if (methods.IsEmpty() && _pgpMethod.IsEmpty() && _hashSize_Defined)
AddDefaultMethod(methods, _hashSize);
{
AddDefaultMethod(methods,
utf_nameExtenstion.IsEmpty() ? NULL : utf_nameExtenstion.Ptr(),
_hashSize);
}
*/
RINOK(hb_Glob.SetMethods(
if (!methods.IsEmpty())
{
RINOK(hb_Glob.SetMethods(
EXTERNAL_CODECS_LOC_VARS
methods))
}
Z7_DECL_CMyComPtr_QI_FROM(
IArchiveUpdateCallbackFile,
@ -1561,9 +1688,11 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
{
hb_Use = &hb_Loc;
CMethodId id;
if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS hp.Method, id))
AString methodName = hp.Method;
Convert_TagName_to_MethodName(methodName);
if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, id))
{
methods_loc.Add(UString(hp.Method));
methods_loc.Add(UString(methodName));
RINOK(hb_Loc.SetMethods(
EXTERNAL_CODECS_LOC_VARS
methods_loc))
@ -1573,7 +1702,10 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
}
else if (methods.IsEmpty())
{
AddDefaultMethod(methods_loc, (unsigned)hp.Hash.Size());
AddDefaultMethod(methods_loc,
_method_for_Extraction.IsEmpty() ? NULL :
_method_for_Extraction.Ptr(),
(unsigned)hp.Hash.Size());
if (!methods_loc.IsEmpty())
{
hb_Use = &hb_Loc;
@ -1621,7 +1753,7 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems,
Int32 opRes = NArchive::NExtract::NOperationResult::kUnsupportedMethod;
if (isSupportedMode
&& res_SetMethods != E_NOTIMPL
&& hb_Use->Hashers.Size() > 0
&& !hb_Use->Hashers.IsEmpty()
)
{
const CHasherState &hs = hb_Use->Hashers[0];
@ -1774,10 +1906,6 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
methods.Add(_methods[k]);
}
}
else if (_crcSize_WasSet)
{
AddDefaultMethod(methods, _crcSize);
}
else
{
Z7_DECL_CMyComPtr_QI_FROM(
@ -1789,12 +1917,23 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt
RINOK(getRootProps->GetRootProp(kpidArcFileName, &prop))
if (prop.vt == VT_BSTR)
{
const UString method = GetMethod_from_FileName(prop.bstrVal);
AString method;
/* const bool isKnownMethod = */ GetMethod_from_FileName(prop.bstrVal, method);
if (!method.IsEmpty())
methods.Add(method);
{
AddDefaultMethod(methods, method, _crcSize_WasSet ? _crcSize : 0);
if (methods.IsEmpty())
return E_NOTIMPL;
}
}
}
}
if (methods.IsEmpty() && _crcSize_WasSet)
{
AddDefaultMethod(methods,
NULL, // name
_crcSize);
}
RINOK(hb.SetMethods(EXTERNAL_CODECS_LOC_VARS methods))
@ -2038,6 +2177,15 @@ HRESULT CHandler::SetProperty(const wchar_t *nameSpec, const PROPVARIANT &value)
}
void CHandler::InitProps()
{
_supportWindowsBackslash = true;
_crcSize_WasSet = false;
_crcSize = 4;
_methods.Clear();
_options.Init_HashOptionsLocal();
}
Z7_COM7F_IMF(CHandler::SetProperties(const wchar_t * const *names, const PROPVARIANT *values, UInt32 numProps))
{
COM_TRY_BEGIN
@ -2088,22 +2236,27 @@ void Codecs_AddHashArcHandler(CCodecs *codecs)
" sha512"
" sha384"
" sha224"
// " sha512-224"
// " sha512-256"
// " sha3-224"
" sha512-224"
" sha512-256"
" sha3-224"
" sha3-256"
// " sha3-384"
// " sha3-512"
" sha3-384"
" sha3-512"
// " shake128"
// " shake256"
" sha1"
" sha2"
" sha3"
" sha"
" md5"
" blake2s"
" blake2b"
" blake2sp"
" xxh64"
" crc32 crc64"
" asc"
" crc32"
" crc64"
" cksum"
" asc"
// " b2sum"
),
UString());

View file

@ -279,32 +279,25 @@ Z7_CLASS_IMP_CHandler_IInArchive_3(
bool _isArc;
bool _supportWindowsBackslash;
bool _crcSize_WasSet;
UInt64 _phySize;
CObjectVector<CHashPair> HashPairs;
UString _nameExtenstion;
// UString _method_fromName;
AString _pgpMethod;
bool _is_CksumMode;
bool _is_PgpMethod;
bool _is_ZeroMode;
bool _are_there_Tags;
bool _are_there_Dirs;
bool _is_KnownMethod_in_FileName;
bool _hashSize_Defined;
unsigned _hashSize;
UInt32 _crcSize;
UInt64 _phySize;
CObjectVector<CHashPair> HashPairs;
UStringVector _methods;
AString _method_from_FileName;
AString _pgpMethod;
AString _method_for_Extraction;
CHashOptionsLocal _options;
void ClearVars();
void InitProps()
{
_supportWindowsBackslash = true;
_crcSize_WasSet = false;
_crcSize = 4;
_methods.Clear();
_options.Init_HashOptionsLocal();
}
void InitProps();
bool CanUpdate() const
{

View file

@ -170,7 +170,7 @@ void CArcInfoEx::AddExts(const UString &ext, const UString &addExt)
if (i < addExts.Size())
{
extInfo.AddExt = addExts[i];
if (extInfo.AddExt == L"*")
if (extInfo.AddExt.IsEqualTo("*"))
extInfo.AddExt.Empty();
}
Exts.Add(extInfo);
@ -931,8 +931,8 @@ bool CCodecs::FindFormatForArchiveType(const UString &arcType, CIntVector &forma
const UString name = arcType.Mid(pos, (unsigned)pos2 - pos);
if (name.IsEmpty())
return false;
int index = FindFormatForArchiveType(name);
if (index < 0 && name != L"*")
const int index = FindFormatForArchiveType(name);
if (index < 0 && !name.IsEqualTo("*"))
{
formatIndices.Clear();
return false;

View file

@ -474,7 +474,7 @@ static HRESULT Compress(
CArcToDoStat stat2;
if (options.RenamePairs.Size() != 0)
if (options.RenameMode || options.RenamePairs.Size() != 0)
{
FOR_VECTOR (i, arcItems)
{
@ -1920,7 +1920,7 @@ Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION
if (NFind::DoesDirExist(phyPath))
{
RINOK(callback->DeletingAfterArchiving(phyPath, true))
RemoveDir(phyPath);
RemoveDirAlways_if_Empty(phyPath);
}
}

View file

@ -94,6 +94,7 @@ struct CUpdateOptions
bool DeleteAfterCompressing;
bool SetArcMTime;
bool RenameMode;
CBoolPair NtSecurity;
CBoolPair AltStreams;
@ -139,6 +140,7 @@ struct CUpdateOptions
DeleteAfterCompressing(false),
SetArcMTime(false),
RenameMode(false),
ArcNameMode(k_ArcNameMode_Smart),
PathMode(NWildcard::k_RelatPath)

View file

@ -32,6 +32,7 @@
#include "../../../Windows/PropVariant.h"
#include "../../Common/StreamObjects.h"
#include "../../Archive/Common/ItemNameUtils.h"
#include "UpdateCallback.h"
@ -306,7 +307,7 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetRawProp(UInt32 index, PROPID propID, con
#if defined(_WIN32) && !defined(UNDER_CE)
static UString GetRelativePath(const UString &to, const UString &from)
static UString GetRelativePath(const UString &to, const UString &from, bool isWSL)
{
UStringVector partsTo, partsFrom;
SplitPathToParts(to, partsTo);
@ -324,11 +325,12 @@ static UString GetRelativePath(const UString &to, const UString &from)
if (i == 0)
{
#ifdef _WIN32
if (NName::IsDrivePath(to) ||
NName::IsDrivePath(from))
#ifdef _WIN32
if (isWSL ||
(NName::IsDrivePath(to) ||
NName::IsDrivePath(from)))
return to;
#endif
#endif
}
UString s;
@ -373,54 +375,87 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR
return S_OK;
}
#if !defined(UNDER_CE)
#if !defined(UNDER_CE)
if (up.DirIndex >= 0)
{
const CDirItem &di = DirItems->Items[(unsigned)up.DirIndex];
#ifdef _WIN32
// if (di.IsDir())
if (di.ReparseData.Size())
{
#ifdef _WIN32
CReparseAttr attr;
if (attr.Parse(di.ReparseData, di.ReparseData.Size()))
{
const UString simpleName = attr.GetPath();
if (!attr.IsSymLink_WSL() && attr.IsRelative_Win())
prop = simpleName;
else
UString path = attr.GetPath();
if (!path.IsEmpty())
{
const FString phyPath = DirItems->GetPhyPath((unsigned)up.DirIndex);
FString fullPath;
if (NDir::MyGetFullPathName(phyPath, fullPath))
bool isWSL = attr.IsSymLink_WSL();
if (isWSL)
NArchive::NItemName::ReplaceToWinSlashes(path, true); // useBackslashReplacement
// it's expected that (path) now uses windows slashes.
// CReparseAttr::IsRelative_Win() returns true if FLAG_RELATIVE is set
// CReparseAttr::IsRelative_Win() returns true for "\dir1\path"
// but we want to store real relative paths without "\" root prefix.
// so we parse path instead of IsRelative_Win() calling.
if (// attr.IsRelative_Win() ||
(isWSL ?
IS_PATH_SEPAR(path[0]) :
NName::IsAbsolutePath(path)))
{
prop = GetRelativePath(simpleName, fs2us(fullPath));
// (path) is abolute path or relative to root: "\path"
// we try to convert (path) to relative path for writing to archive.
const FString phyPath = DirItems->GetPhyPath((unsigned)up.DirIndex);
FString fullPath;
if (NDir::MyGetFullPathName(phyPath, fullPath))
{
if (IS_PATH_SEPAR(path[0]) &&
!IS_PATH_SEPAR(path[1]))
{
// path is relative to root of (fullPath): "\path"
const unsigned prefixSize = NName::GetRootPrefixSize(fullPath);
if (prefixSize)
{
path.DeleteFrontal(1);
path.Insert(0, fs2us(fullPath.Left(prefixSize)));
// we have changed "\" prefix to drive prefix "c:\" in (path).
// (path) is Windows path now.
isWSL = false;
}
}
}
path = GetRelativePath(path, fs2us(fullPath), isWSL);
}
#if WCHAR_PATH_SEPARATOR != L'/'
// 7-Zip's TAR handler in Windows replaces windows slashes to linux slashes.
// so we can return any slashes to TAR handler.
// or we can convert to linux slashes here,
// because input IInArchive handler uses linux slashes for kpidSymLink.
// path.Replace(WCHAR_PATH_SEPARATOR, L'/');
#endif
if (!path.IsEmpty())
prop = path;
}
prop.Detach(value);
return S_OK;
}
}
#else // _WIN32
if (di.ReparseData.Size() != 0)
{
#else // ! _WIN32
AString utf;
utf.SetFrom_CalcLen((const char *)(const Byte *)di.ReparseData, (unsigned)di.ReparseData.Size());
#if 0 // 0 - for debug
// it's expected that link data uses system codepage.
// fs2us() ignores conversion errors. But we want correct path
UString us (fs2us(utf));
#else
UString us;
if (ConvertUTF8ToUnicode(utf, us))
#endif
{
prop = us;
prop.Detach(value);
return S_OK;
if (!us.IsEmpty())
prop = us;
}
#endif // ! _WIN32
}
#endif // _WIN32
prop.Detach(value);
return S_OK;
}
#endif // !defined(UNDER_CE)
#endif // !defined(UNDER_CE)
}
else if (propID == kpidHardLink)
{
@ -428,7 +463,12 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR
{
const CKeyKeyValPair &pair = _map[_hardIndex_To];
const CUpdatePair2 &up2 = (*UpdatePairs)[pair.Value];
prop = DirItems->GetLogPath((unsigned)up2.DirIndex);
const UString path = DirItems->GetLogPath((unsigned)up2.DirIndex);
#if WCHAR_PATH_SEPARATOR != L'/'
// 7-Zip's TAR handler in Windows replaces windows slashes to linux slashes.
// path.Replace(WCHAR_PATH_SEPARATOR, L'/');
#endif
prop = path;
prop.Detach(value);
return S_OK;
}
@ -438,7 +478,7 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR
return S_OK;
}
}
}
} // if (up.NewData)
if (up.IsAnti
&& propID != kpidIsDir

View file

@ -908,9 +908,12 @@ int Main2(
if (options.EnableHeaders)
{
ShowCopyrightAndHelp(g_StdStream, false);
if (!parser.Parse1Log.IsEmpty())
*g_StdStream << parser.Parse1Log;
if (g_StdStream)
{
ShowCopyrightAndHelp(g_StdStream, false);
if (!parser.Parse1Log.IsEmpty())
*g_StdStream << parser.Parse1Log;
}
}
parser.Parse2(options);

View file

@ -59,10 +59,10 @@ COMPRESS_OBJS = \
C_OBJS = $(C_OBJS) \
$O\Alloc.obj \
$O\CpuArch.obj \
$O\Sort.obj \
$O\Threads.obj \
!include "../../Crc.mak"
!include "../../Sort.mak"
!include "Console.mak"
!include "../../7zip.mak"

View file

@ -72,7 +72,7 @@ FM_OBJS = \
C_OBJS = \
$O\CpuArch.obj \
$O\Sort.obj \
$O\Threads.obj \
!include "../../Sort.mak"
!include "../../7zip.mak"

View file

@ -61,7 +61,6 @@ static void MyGetFileTime(IFolderFolder *folder, UInt32 itemIndex,
}
#define kDotsReplaceString "[[..]]"
#define kDotsReplaceStringU L"[[..]]"
static void CopyStrLimited(char *dest, const AString &src, unsigned len)
{
@ -84,7 +83,7 @@ void CPlugin::ReadPluginPanelItem(PluginPanelItem &panelItem, UInt32 itemIndex)
throw 272340;
AString oemString (UnicodeStringToMultiByte(prop.bstrVal, CP_OEMCP));
if (oemString == "..")
if (oemString.IsEqualTo(".."))
oemString = kDotsReplaceString;
COPY_STR_LIMITED(panelItem.FindData.cFileName, oemString);
@ -193,7 +192,7 @@ void CPlugin::EnterToDirectory(const UString &dirName)
{
CMyComPtr<IFolderFolder> newFolder;
UString s = dirName;
if (dirName == kDotsReplaceStringU)
if (dirName.IsEqualTo(kDotsReplaceString))
s = "..";
_folder->BindToFolder(s, &newFolder);
if (!newFolder)
@ -209,12 +208,12 @@ void CPlugin::EnterToDirectory(const UString &dirName)
int CPlugin::SetDirectory(const char *aszDir, int /* opMode */)
{
UString path = MultiByteToUnicodeString(aszDir, CP_OEMCP);
if (path == WSTRING_PATH_SEPARATOR)
if (path.IsEqualTo(STRING_PATH_SEPARATOR))
{
_folder.Release();
m_ArchiveHandler->BindToRootFolder(&_folder);
}
else if (path == L"..")
else if (path.IsEqualTo(".."))
{
CMyComPtr<IFolderFolder> newFolder;
_folder->BindToParentFolder(&newFolder);

View file

@ -99,9 +99,9 @@ COMPRESS_OBJS = \
C_OBJS = \
$O\Alloc.obj \
$O\CpuArch.obj \
$O\Sort.obj \
$O\Threads.obj \
!include "../../Crc.mak"
!include "../../Sort.mak"
!include "../../7zip.mak"

View file

@ -651,7 +651,7 @@ static int WINAPI WinMain2(int nCmdShow)
SplitStringToTwoStrings(commandsString, paramString, tailString);
paramString.Trim();
tailString.Trim();
if (tailString.IsPrefixedBy(L"-t"))
if (tailString.IsPrefixedBy("-t"))
g_ArcFormat = tailString.Ptr(2);
/*

View file

@ -309,15 +309,13 @@ void ReloadLang()
{
g_Lang.Clear();
ReadRegLang(g_LangID);
#ifndef _UNICODE
if (g_IsNT)
#endif
if (g_LangID.IsEmpty())
{
if (g_LangID.IsEmpty())
{
#ifndef _UNICODE
if (g_IsNT)
#endif
OpenDefaultLang();
return;
}
return;
}
if (g_LangID.Len() > 1 || g_LangID[0] != L'-')
{

View file

@ -45,28 +45,24 @@ static bool GetSymLink(CFSTR path, CReparseAttr &attr, UString &errorMessage)
CByteBuffer buf;
if (!NIO::GetReparseData(path, buf, NULL))
return false;
if (!attr.Parse(buf, buf.Size()))
{
SetLastError(attr.ErrorCode);
return false;
}
CByteBuffer data2;
if (!FillLinkData(data2, attr.GetPath(),
!attr.IsMountPoint(), attr.IsSymLink_WSL()))
FillLinkData(data2, attr.GetPath(),
!attr.IsMountPoint(), attr.IsSymLink_WSL());
if (data2.Size() == 0)
{
errorMessage = "Cannot reproduce reparse point";
return false;
}
if (data2.Size() != buf.Size() ||
memcmp(data2, buf, buf.Size()) != 0)
if (data2 != buf)
{
errorMessage = "mismatch for reproduced reparse point";
return false;
}
return true;
}
@ -113,8 +109,8 @@ bool CLinkDialog::OnInit()
const bool res = GetSymLink(us2fs(FilePath), attr, error);
if (!res && error.IsEmpty())
{
DWORD lastError = GetLastError();
if (lastError != 0)
const DWORD lastError = GetLastError();
if (lastError)
error = NError::MyFormatMessage(lastError);
}
@ -319,10 +315,10 @@ void CLinkDialog::OnButton_Link()
return;
}
const bool isSymLink = (idb != IDR_LINK_TYPE_JUNCTION);
CByteBuffer data;
if (!FillLinkData(data, to, isSymLink, isWSL))
const bool isSymLink = (idb != IDR_LINK_TYPE_JUNCTION);
FillLinkData(data, to, isSymLink, isWSL);
if (data.Size() == 0)
{
ShowError(L"Incorrect link");
return;
@ -386,6 +382,9 @@ void CApp::Link()
path = destPanel.GetFsPath();
}
CSelectedState srcSelState;
srcPanel.SaveSelectedState(srcSelState);
CLinkDialog dlg;
dlg.CurDirPrefix = fsPrefix;
dlg.FilePath = srcPath + itemName;
@ -394,7 +393,10 @@ void CApp::Link()
if (dlg.Create(srcPanel.GetParent()) != IDOK)
return;
// fix it: we should refresh panel with changed link
// we refresh srcPanel to show changes in "Link" (kpidNtReparse) column.
// maybe we should refresh another panel also?
if (srcPanel._visibleColumns.FindItem_for_PropID(kpidNtReparse) >= 0)
srcPanel.RefreshListCtrl(srcSelState);
RefreshTitleAlways();
}

View file

@ -711,8 +711,8 @@ public:
}
// bool IsFsOrDrivesFolder() const { return IsFSFolder() || IsFSDrivesFolder(); }
bool IsDeviceDrivesPrefix() const { return _currentFolderPrefix == L"\\\\.\\"; }
bool IsSuperDrivesPrefix() const { return _currentFolderPrefix == L"\\\\?\\"; }
bool IsDeviceDrivesPrefix() const { return _currentFolderPrefix.IsEqualTo("\\\\.\\"); }
bool IsSuperDrivesPrefix() const { return _currentFolderPrefix.IsEqualTo("\\\\?\\"); }
/*
c:\Dir

Some files were not shown because too many files have changed in this diff Show more