From 395149956d696e6e3099d8b76d797437f94a6942 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Sat, 5 Jul 2025 00:00:00 +0000 Subject: [PATCH 1/3] 25.00 --- Asm/x86/Sort.asm | 860 ++++++++++++++++++ C/7zVersion.h | 10 +- C/BwtSort.c | 470 ++++++---- C/BwtSort.h | 7 +- C/Compiler.h | 12 +- C/CpuArch.h | 8 + C/HuffEnc.c | 396 ++++++-- C/HuffEnc.h | 8 +- C/LzFind.c | 24 +- C/LzFindMt.c | 10 +- C/LzFindMt.h | 6 +- C/Lzma2Enc.c | 4 +- C/Lzma2Enc.h | 1 + C/LzmaEnc.c | 6 + C/LzmaEnc.h | 4 +- C/MtCoder.c | 61 +- C/MtCoder.h | 7 +- C/Sha512.c | 169 +++- C/Sort.c | 367 +++++--- C/Sort.h | 7 +- C/Threads.c | 237 ++++- C/Threads.h | 12 +- C/Util/Lzma/LzmaUtil.dsp | 4 + C/Util/LzmaLib/LzmaLib.dsp | 8 +- C/Xz.h | 12 +- C/XzCrc64Opt.c | 4 +- C/XzDec.c | 29 +- C/XzEnc.c | 8 +- C/XzEnc.h | 3 +- C/XzIn.c | 265 +++--- CPP/7zip/7zip_gcc.mak | 6 +- CPP/7zip/Archive/7z/7zCompressionMode.h | 2 + CPP/7zip/Archive/7z/7zHandlerOut.cpp | 30 +- CPP/7zip/Archive/ArHandler.cpp | 14 +- CPP/7zip/Archive/Bz2Handler.cpp | 8 +- CPP/7zip/Archive/ComHandler.cpp | 48 +- CPP/7zip/Archive/Common/HandlerOut.cpp | 8 +- CPP/7zip/Archive/Common/HandlerOut.h | 33 +- CPP/7zip/Archive/Common/ItemNameUtils.cpp | 35 +- CPP/7zip/Archive/Common/ItemNameUtils.h | 3 + CPP/7zip/Archive/CpioHandler.cpp | 42 +- CPP/7zip/Archive/DmgHandler.cpp | 6 +- CPP/7zip/Archive/FatHandler.cpp | 817 +++++++++++------ CPP/7zip/Archive/Nsis/NsisIn.cpp | 8 +- CPP/7zip/Archive/NtfsHandler.cpp | 2 +- CPP/7zip/Archive/PeHandler.cpp | 4 +- CPP/7zip/Archive/Rar/Rar5Handler.cpp | 13 +- CPP/7zip/Archive/Rar/Rar5Handler.h | 8 +- CPP/7zip/Archive/Rar/RarHandler.cpp | 4 +- CPP/7zip/Archive/RpmHandler.cpp | 6 +- CPP/7zip/Archive/VmdkHandler.cpp | 9 +- CPP/7zip/Archive/Wim/WimIn.cpp | 2 +- CPP/7zip/Archive/XarHandler.cpp | 23 +- CPP/7zip/Archive/XzHandler.cpp | 14 +- CPP/7zip/Archive/Zip/ZipUpdate.cpp | 47 +- CPP/7zip/Bundles/Alone/makefile | 3 +- CPP/7zip/Bundles/Alone7z/makefile | 2 +- CPP/7zip/Bundles/Format7z/makefile | 2 +- CPP/7zip/Bundles/Format7zF/Arc.mak | 2 +- CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp | 4 +- CPP/7zip/Common/InBuffer.h | 10 + CPP/7zip/Common/MethodProps.cpp | 32 +- CPP/7zip/Common/MethodProps.h | 6 +- CPP/7zip/Common/OutBuffer.h | 29 +- CPP/7zip/Compress/BZip2Const.h | 2 +- CPP/7zip/Compress/BZip2Encoder.cpp | 700 +++++++++----- CPP/7zip/Compress/BZip2Encoder.h | 136 +-- CPP/7zip/Compress/BitlEncoder.h | 1 + CPP/7zip/Compress/BitmEncoder.h | 75 +- CPP/7zip/Compress/DeflateDecoder.cpp | 16 +- CPP/7zip/Compress/DeflateEncoder.cpp | 100 +- CPP/7zip/Compress/Lzma2Encoder.cpp | 10 +- CPP/7zip/Compress/LzmaEncoder.cpp | 18 + CPP/7zip/Compress/Mtf8.h | 13 + CPP/7zip/Compress/Rar5Decoder.cpp | 127 ++- CPP/7zip/Crypto/MyAes.cpp | 27 +- CPP/7zip/ICoder.h | 3 + CPP/7zip/Sort.mak | 6 + CPP/7zip/UI/Common/ArchiveCommandLine.cpp | 60 +- CPP/7zip/UI/Common/ArchiveExtractCallback.cpp | 722 ++++++++------- CPP/7zip/UI/Common/ArchiveExtractCallback.h | 68 +- CPP/7zip/UI/Common/Bench.cpp | 87 +- CPP/7zip/UI/Common/EnumDirItems.cpp | 55 +- CPP/7zip/UI/Common/Extract.cpp | 2 +- CPP/7zip/UI/Common/ExtractingFilePath.cpp | 2 +- CPP/7zip/UI/Common/HashCalc.cpp | 313 +++++-- CPP/7zip/UI/Common/HashCalc.h | 21 +- CPP/7zip/UI/Common/LoadCodecs.cpp | 6 +- CPP/7zip/UI/Common/Update.cpp | 4 +- CPP/7zip/UI/Common/Update.h | 2 + CPP/7zip/UI/Common/UpdateCallback.cpp | 110 ++- CPP/7zip/UI/Console/Main.cpp | 9 +- CPP/7zip/UI/Console/makefile | 2 +- CPP/7zip/UI/Explorer/makefile | 2 +- CPP/7zip/UI/Far/Plugin.cpp | 9 +- CPP/7zip/UI/Far/makefile | 2 +- CPP/7zip/UI/FileManager/FM.cpp | 2 +- CPP/7zip/UI/FileManager/LangUtils.cpp | 12 +- CPP/7zip/UI/FileManager/LinkDialog.cpp | 30 +- CPP/7zip/UI/FileManager/Panel.h | 4 +- CPP/7zip/UI/FileManager/PanelCopy.cpp | 2 +- CPP/7zip/UI/FileManager/PanelFolderChange.cpp | 14 +- CPP/7zip/UI/FileManager/PanelOperations.cpp | 4 +- CPP/7zip/UI/FileManager/RootFolder.cpp | 4 +- CPP/7zip/UI/FileManager/makefile | 2 +- CPP/7zip/UI/GUI/BenchmarkDialog.cpp | 4 +- CPP/7zip/UI/GUI/BenchmarkDialog.rc | 2 +- CPP/7zip/UI/GUI/CompressDialog.cpp | 23 +- CPP/7zip/UI/GUI/makefile | 3 +- CPP/Build.mak | 8 +- CPP/Common/MyString.cpp | 29 - CPP/Common/MyString.h | 13 +- CPP/Common/MyXml.cpp | 4 +- CPP/Common/Sha3Reg.cpp | 2 +- CPP/Common/Wildcard.cpp | 13 +- CPP/Windows/FileDir.cpp | 29 + CPP/Windows/FileDir.h | 5 + CPP/Windows/FileFind.cpp | 9 +- CPP/Windows/FileIO.h | 43 +- CPP/Windows/FileLink.cpp | 246 +++-- CPP/Windows/FileName.cpp | 71 +- CPP/Windows/FileName.h | 13 +- CPP/Windows/System.cpp | 128 ++- CPP/Windows/System.h | 61 +- CPP/Windows/Thread.h | 8 +- CPP/Windows/TimeUtils.cpp | 3 +- DOC/7zip.wxs | 4 +- DOC/License.txt | 6 +- DOC/readme.txt | 25 +- DOC/src-history.txt | 12 + 130 files changed, 5532 insertions(+), 2317 deletions(-) create mode 100644 Asm/x86/Sort.asm create mode 100644 CPP/7zip/Sort.mak diff --git a/Asm/x86/Sort.asm b/Asm/x86/Sort.asm new file mode 100644 index 0000000..517c615 --- /dev/null +++ b/Asm/x86/Sort.asm @@ -0,0 +1,860 @@ +; SortTest.asm -- ASM version of HeapSort() function +; Igor Pavlov : Public domain + +include ../../../../Asm/x86/7zAsm.asm + +MY_ASM_START + +ifndef Z7_SORT_ASM_USE_SEGMENT +if (IS_LINUX gt 0) + ; Z7_SORT_ASM_USE_SEGMENT equ 1 +else + ; Z7_SORT_ASM_USE_SEGMENT equ 1 +endif +endif + +ifdef Z7_SORT_ASM_USE_SEGMENT +_TEXT$Z7_SORT SEGMENT ALIGN(64) 'CODE' +MY_ALIGN macro num:req + align num +endm +else +MY_ALIGN macro num:req + ; We expect that ".text" is aligned for 16-bytes. + ; So we don't need large alignment inside our function. + align 16 +endm +endif + + +MY_ALIGN_16 macro + MY_ALIGN 16 +endm + +MY_ALIGN_32 macro + MY_ALIGN 32 +endm + +MY_ALIGN_64 macro + MY_ALIGN 64 +endm + +ifdef x64 + +NUM_PREFETCH_LEVELS equ 3 ; to prefetch 1x 64-bytes line (is good for most cases) +; NUM_PREFETCH_LEVELS equ 4 ; to prefetch 2x 64-bytes lines (better for big arrays) + +acc equ x0 +k equ r0 +k_x equ x0 + +p equ r1 + +s equ r2 +s_x equ x2 + +a0 equ x3 +t0 equ a0 + +a3 equ x5 +qq equ a3 + +a1 equ x6 +t1 equ a1 +t1_r equ r6 + +a2 equ x7 +t2 equ a2 + +i equ r8 +e0 equ x8 + +e1 equ x9 + +num_last equ r10 +num_last_x equ x10 + +next4_lim equ r11 +pref_lim equ r12 + + + +SORT_2_WITH_TEMP_REG macro b0, b1, temp_reg + mov temp_reg, b0 + cmp b0, b1 + cmovae b0, b1 ; min + cmovae b1, temp_reg ; max +endm + +SORT macro b0, b1 + SORT_2_WITH_TEMP_REG b0, b1, acc +endm + +LOAD macro dest:req, index:req + mov dest, [p + 4 * index] +endm + +STORE macro reg:req, index:req + mov [p + 4 * index], reg +endm + + +if (NUM_PREFETCH_LEVELS gt 3) + num_prefetches equ (1 SHL (NUM_PREFETCH_LEVELS - 3)) +else + num_prefetches equ 1 +endif + +PREFETCH_OP macro offs + cur_offset = 7 * 4 ; it's average offset in 64-bytes cache line. + ; cur_offset = 0 ; we can use zero offset, if we are sure that array is aligned for 64-bytes. + rept num_prefetches + if 1 + prefetcht0 byte ptr [p + offs + cur_offset] + else + mov pref_x, dword ptr [p + offs + cur_offset] + endif + cur_offset = cur_offset + 64 + endm +endm + +PREFETCH_MY macro +if 1 + if 1 + shl k, NUM_PREFETCH_LEVELS + 3 + else + ; we delay prefetch instruction to improve main loads + shl k, NUM_PREFETCH_LEVELS + shl k, 3 + ; shl k, 0 + endif + PREFETCH_OP k +elseif 1 + shl k, 3 + PREFETCH_OP k * (1 SHL NUM_PREFETCH_LEVELS) ; change it +endif +endm + + +STEP_1 macro exit_label, prefetch_macro +use_cmov_1 equ 1 ; set 1 for cmov, but it's slower in some cases + ; set 0 for LOAD after adc s, 0 + cmp t0, t1 + if use_cmov_1 + cmovb t0, t1 + ; STORE t0, k + endif + adc s, 0 + if use_cmov_1 eq 0 + LOAD t0, s + endif + cmp qq, t0 + jae exit_label + if 1 ; use_cmov_1 eq 0 + STORE t0, k + endif + prefetch_macro + mov t0, [p + s * 8] + mov t1, [p + s * 8 + 4] + mov k, s + add s, s ; slower for some cpus + ; lea s, dword ptr [s + s] ; slower for some cpus + ; shl s, 1 ; faster for some cpus + ; lea s, dword ptr [s * 2] ; faster for some cpus + rept 0 ; 1000 for debug : 0 for normal + ; number of calls in generate_stage : ~0.6 of number of items + shl k, 0 + endm +endm + + +STEP_2 macro exit_label, prefetch_macro +use_cmov_2 equ 0 ; set 1 for cmov, but it's slower in some cases + ; set 0 for LOAD after adc s, 0 + cmp t0, t1 + if use_cmov_2 + mov t2, t0 + cmovb t2, t1 + ; STORE t2, k + endif + mov t0, [p + s * 8] + mov t1, [p + s * 8 + 4] + cmovb t0, [p + s * 8 + 8] + cmovb t1, [p + s * 8 + 12] + adc s, 0 + if use_cmov_2 eq 0 + LOAD t2, s + endif + cmp qq, t2 + jae exit_label + if 1 ; use_cmov_2 eq 0 + STORE t2, k + endif + prefetch_macro + mov k, s + ; add s, s + ; lea s, [s + s] + shl s, 1 + ; lea s, [s * 2] +endm + + +MOVE_SMALLEST_UP macro STEP, use_prefetch, num_unrolls + LOCAL exit_1, exit_2, leaves, opt_loop, last_nodes + + ; s == k * 2 + ; t0 == (p)[s] + ; t1 == (p)[s + 1] + cmp k, next4_lim + jae leaves + + rept num_unrolls + STEP exit_2 + cmp k, next4_lim + jae leaves + endm + + if use_prefetch + prefetch_macro equ PREFETCH_MY + pref_lim_2 equ pref_lim + ; lea pref_lim, dword ptr [num_last + 1] + ; shr pref_lim, NUM_PREFETCH_LEVELS + 1 + cmp k, pref_lim_2 + jae last_nodes + else + prefetch_macro equ + pref_lim_2 equ next4_lim + endif + +MY_ALIGN_16 +opt_loop: + STEP exit_2, prefetch_macro + cmp k, pref_lim_2 + jb opt_loop + +last_nodes: + ; k >= pref_lim_2 + ; 2 cases are possible: + ; case-1: num_after_prefetch_levels == 0 && next4_lim = pref_lim_2 + ; case-2: num_after_prefetch_levels == NUM_PREFETCH_LEVELS - 1 && + ; next4_lim = pref_lim_2 / (NUM_PREFETCH_LEVELS - 1) + if use_prefetch + yyy = NUM_PREFETCH_LEVELS - 1 + while yyy + yyy = yyy - 1 + STEP exit_2 + if yyy + cmp k, next4_lim + jae leaves + endif + endm + endif + +leaves: + ; k >= next4_lim == (num_last + 1) / 4 must be provided by previous code. + ; we have 2 nodes in (s) level : always + ; we can have some nodes in (s * 2) level : low probability case + ; we have no nodes in (s * 4) level + ; s == k * 2 + ; t0 == (p)[s] + ; t1 == (p)[s + 1] + cmp t0, t1 + cmovb t0, t1 + adc s, 0 + STORE t0, k + + ; t0 == (p)[s] + ; s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1] + ; we have 3 possible cases here: + ; s * 2 > num_last : (s) node has no childs + ; s * 2 == num_last : (s) node has 1 leaf child that is last item of array + ; s * 2 < num_last : (s) node has 2 leaf childs. We provide (s * 4 > num_last) + ; we check for (s * 2 > num_last) before "cmp qq, t0" check, because + ; we will replace conditional jump with cmov instruction later. + lea t1_r, dword ptr [s + s] + cmp t1_r, num_last + ja exit_1 ; if (s * 2 > num_last), we have no childs : it's high probability branch + + ; it's low probability branch + ; s * 2 <= num_last + cmp qq, t0 + jae exit_2 + + ; qq < t0, so we go to next level + ; we check 1 or 2 childs in next level + mov t0, [p + s * 8] + mov k, s + mov s, t1_r + cmp t1_r, num_last + je @F ; (s == num_last) means that we have single child in tree + + ; (s < num_last) : so we must read both childs and select max of them. + mov t1, [p + k * 8 + 4] + cmp t0, t1 + cmovb t0, t1 + adc s, 0 +@@: + STORE t0, k +exit_1: + ; t0 == (p)[s], s / 2 == k : (s) is index of max item from (p)[k * 2], (p)[k * 2 + 1] + cmp qq, t0 + cmovb k, s +exit_2: + STORE qq, k +endm + + + + +ifdef Z7_SORT_ASM_USE_SEGMENT +; MY_ALIGN_64 +else + MY_ALIGN_16 +endif + +MY_PROC HeapSort, 2 + +if (IS_LINUX gt 0) + mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux +endif + mov num_last, REG_ABI_PARAM_1 ; r10 <- r6 : linux + ; r10 <- r2 : win64 + cmp num_last, 2 + jb end_1 + + ; MY_PUSH_PRESERVED_ABI_REGS + MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 + push r12 + + cmp num_last, 4 + ja sort_5 + + LOAD a0, 0 + LOAD a1, 1 + SORT a0, a1 + cmp num_last, 3 + jb end_2 + + LOAD a2, 2 + je sort_3 + + LOAD a3, 3 + SORT a2, a3 + SORT a1, a3 + STORE a3, 3 +sort_3: + SORT a0, a2 + SORT a1, a2 + STORE a2, 2 + jmp end_2 + +sort_5: + ; (num_last > 4) is required here + ; if (num_last >= 6) : we will use optimized loop for leaf nodes loop_down_1 + mov next4_lim, num_last + shr next4_lim, 2 + + dec num_last + mov k, num_last + shr k, 1 + mov i, num_last + shr i, 2 + test num_last, 1 + jnz size_even + + ; ODD number of items. So we compare parent with single child + LOAD t1, num_last + LOAD t0, k + SORT_2_WITH_TEMP_REG t1, t0, t2 + STORE t1, num_last + STORE t0, k + dec k + +size_even: + cmp k, i + jbe loop_down ; jump for num_last == 4 case + +if 0 ; 1 for debug + mov r15, k + mov r14d, 1 ; 100 +loop_benchmark: +endif + ; optimized loop for leaf nodes: + mov t0, [p + k * 8] + mov t1, [p + k * 8 + 4] + +MY_ALIGN_16 +loop_down_1: + ; we compare parent with max of childs: + ; lea s, dword ptr [2 * k] + mov s, k + cmp t0, t1 + cmovb t0, t1 + adc s, s + LOAD t2, k + STORE t0, k + cmp t2, t0 + cmovae s, k + dec k + ; we preload next items before STORE operation for calculated address + mov t0, [p + k * 8] + mov t1, [p + k * 8 + 4] + STORE t2, s + cmp k, i + jne loop_down_1 + +if 0 ; 1 for debug + mov k, r15 + dec r14d + jnz loop_benchmark + ; jmp end_debug +endif + +MY_ALIGN_16 +loop_down: + mov t0, [p + i * 8] + mov t1, [p + i * 8 + 4] + LOAD qq, i + mov k, i + lea s, dword ptr [i + i] + ; jmp end_debug + DOWN_use_prefetch equ 0 + DOWN_num_unrolls equ 0 + MOVE_SMALLEST_UP STEP_1, DOWN_use_prefetch, DOWN_num_unrolls + sub i, 1 + jnb loop_down + + ; jmp end_debug + LOAD e0, 0 + LOAD e1, 1 + + LEVEL_3_LIMIT equ 8 ; 8 is default, but 7 also can work + + cmp num_last, LEVEL_3_LIMIT + 1 + jb main_loop_sort_5 + +MY_ALIGN_16 +main_loop_sort: + ; num_last > LEVEL_3_LIMIT + ; p[size--] = p[0]; + LOAD qq, num_last + STORE e0, num_last + mov e0, e1 + + mov next4_lim, num_last + shr next4_lim, 2 + mov pref_lim, num_last + shr pref_lim, NUM_PREFETCH_LEVELS + 1 + + dec num_last +if 0 ; 1 for debug + ; that optional optimization can improve the performance, if there are identical items in array + ; 3 times improvement : if all items in array are identical + ; 20% improvement : if items are different for 1 bit only + ; 1-10% improvement : if items are different for (2+) bits + ; no gain : if items are different + cmp qq, e1 + jae next_iter_main +endif + LOAD e1, 2 + LOAD t0, 3 + mov k_x, 2 + cmp e1, t0 + cmovb e1, t0 + mov t0, [p + 4 * (4 + 0)] + mov t1, [p + 4 * (4 + 1)] + cmovb t0, [p + 4 * (4 + 2)] + cmovb t1, [p + 4 * (4 + 3)] + adc k_x, 0 + ; (qq <= e1), because the tree is correctly sorted + ; also here we could check (qq >= e1) or (qq == e1) for faster exit + lea s, dword ptr [k + k] + MAIN_use_prefetch equ 1 + MAIN_num_unrolls equ 0 + MOVE_SMALLEST_UP STEP_2, MAIN_use_prefetch, MAIN_num_unrolls + +next_iter_main: + cmp num_last, LEVEL_3_LIMIT + jne main_loop_sort + + ; num_last == LEVEL_3_LIMIT +main_loop_sort_5: + ; 4 <= num_last <= LEVEL_3_LIMIT + ; p[size--] = p[0]; + LOAD qq, num_last + STORE e0, num_last + mov e0, e1 + dec num_last_x + + LOAD e1, 2 + LOAD t0, 3 + mov k_x, 2 + cmp e1, t0 + cmovb e1, t0 + adc k_x, 0 + + lea s_x, dword ptr [k * 2] + cmp s_x, num_last_x + ja exit_2 + + mov t0, [p + k * 8] + je exit_1 + + ; s < num_last + mov t1, [p + k * 8 + 4] + cmp t0, t1 + cmovb t0, t1 + adc s_x, 0 +exit_1: + STORE t0, k + cmp qq, t0 + cmovb k_x, s_x +exit_2: + STORE qq, k + cmp num_last_x, 3 + jne main_loop_sort_5 + + ; num_last == 3 (real_size == 4) + LOAD a0, 2 + LOAD a1, 3 + STORE e1, 2 + STORE e0, 3 + SORT a0, a1 +end_2: + STORE a0, 0 + STORE a1, 1 +; end_debug: + ; MY_POP_PRESERVED_ABI_REGS + pop r12 + MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 +end_1: +MY_ENDP + + + +else +; ------------ x86 32-bit ------------ + +ifdef x64 +IS_CDECL = 0 +endif + +acc equ x0 +k equ r0 +k_x equ acc + +p equ r1 + +num_last equ r2 +num_last_x equ x2 + +a0 equ x3 +t0 equ a0 + +a3 equ x5 +i equ r5 +e0 equ a3 + +a1 equ x6 +qq equ a1 + +a2 equ x7 +s equ r7 +s_x equ a2 + + +SORT macro b0, b1 + cmp b1, b0 + jae @F + if 1 + xchg b0, b1 + else + mov acc, b0 + mov b0, b1 ; min + mov b1, acc ; max + endif +@@: +endm + +LOAD macro dest:req, index:req + mov dest, [p + 4 * index] +endm + +STORE macro reg:req, index:req + mov [p + 4 * index], reg +endm + + +STEP_1 macro exit_label + mov t0, [p + k * 8] + cmp t0, [p + k * 8 + 4] + adc s, 0 + LOAD t0, s + STORE t0, k ; we lookahed stooring for most expected branch + cmp qq, t0 + jae exit_label + ; STORE t0, k ; use if + mov k, s + add s, s + ; lea s, dword ptr [s + s] + ; shl s, 1 + ; lea s, dword ptr [s * 2] +endm + +STEP_BRANCH macro exit_label + mov t0, [p + k * 8] + cmp t0, [p + k * 8 + 4] + jae @F + inc s + mov t0, [p + k * 8 + 4] +@@: + cmp qq, t0 + jae exit_label + STORE t0, k + mov k, s + add s, s +endm + + + +MOVE_SMALLEST_UP macro STEP, num_unrolls, exit_2 + LOCAL leaves, opt_loop, single + + ; s == k * 2 + rept num_unrolls + cmp s, num_last + jae leaves + STEP_1 exit_2 + endm + cmp s, num_last + jb opt_loop + +leaves: + ; (s >= num_last) + jne exit_2 +single: + ; (s == num_last) + mov t0, [p + k * 8] + cmp qq, t0 + jae exit_2 + STORE t0, k + mov k, s + jmp exit_2 + +MY_ALIGN_16 +opt_loop: + STEP exit_2 + cmp s, num_last + jb opt_loop + je single +exit_2: + STORE qq, k +endm + + + + +ifdef Z7_SORT_ASM_USE_SEGMENT +; MY_ALIGN_64 +else + MY_ALIGN_16 +endif + +MY_PROC HeapSort, 2 + ifdef x64 + if (IS_LINUX gt 0) + mov num_last, REG_ABI_PARAM_1 ; r2 <- r6 : linux + mov p, REG_ABI_PARAM_0 ; r1 <- r7 : linux + endif + elseif (IS_CDECL gt 0) + mov num_last, [r4 + REG_SIZE * 2] + mov p, [r4 + REG_SIZE * 1] + endif + cmp num_last, 2 + jb end_1 + MY_PUSH_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 + + cmp num_last, 4 + ja sort_5 + + LOAD a0, 0 + LOAD a1, 1 + SORT a0, a1 + cmp num_last, 3 + jb end_2 + + LOAD a2, 2 + je sort_3 + + LOAD a3, 3 + SORT a2, a3 + SORT a1, a3 + STORE a3, 3 +sort_3: + SORT a0, a2 + SORT a1, a2 + STORE a2, 2 + jmp end_2 + +sort_5: + ; num_last > 4 + lea i, dword ptr [num_last - 2] + dec num_last + test i, 1 + jz loop_down + + ; single child + mov t0, [p + num_last * 4] + mov qq, [p + num_last * 2] + dec i + cmp qq, t0 + jae loop_down + + mov [p + num_last * 2], t0 + mov [p + num_last * 4], qq + +MY_ALIGN_16 +loop_down: + mov t0, [p + i * 4] + cmp t0, [p + i * 4 + 4] + mov k, i + mov qq, [p + i * 2] + adc k, 0 + LOAD t0, k + cmp qq, t0 + jae down_next + mov [p + i * 2], t0 + lea s, dword ptr [k + k] + + DOWN_num_unrolls equ 0 + MOVE_SMALLEST_UP STEP_1, DOWN_num_unrolls, down_exit_label +down_next: + sub i, 2 + jnb loop_down + ; jmp end_debug + + LOAD e0, 0 + +MY_ALIGN_16 +main_loop_sort: + ; num_last > 3 + mov t0, [p + 2 * 4] + cmp t0, [p + 3 * 4] + LOAD qq, num_last + STORE e0, num_last + LOAD e0, 1 + mov s_x, 2 + mov k_x, 1 + adc s, 0 + LOAD t0, s + dec num_last + cmp qq, t0 + jae main_exit_label + STORE t0, 1 + mov k, s + add s, s + if 1 + ; for branch data prefetch mode : + ; it's faster for large arrays : larger than (1 << 13) items. + MAIN_num_unrolls equ 10 + STEP_LOOP equ STEP_BRANCH + else + MAIN_num_unrolls equ 0 + STEP_LOOP equ STEP_1 + endif + + MOVE_SMALLEST_UP STEP_LOOP, MAIN_num_unrolls, main_exit_label + + ; jmp end_debug + cmp num_last, 3 + jne main_loop_sort + + ; num_last == 3 (real_size == 4) + LOAD a0, 2 + LOAD a1, 3 + LOAD a2, 1 + STORE e0, 3 ; e0 is alias for a3 + STORE a2, 2 + SORT a0, a1 +end_2: + STORE a0, 0 + STORE a1, 1 +; end_debug: + MY_POP_PRESERVED_ABI_REGS_UP_TO_INCLUDING_R11 +end_1: +MY_ENDP + +endif + +ifdef Z7_SORT_ASM_USE_SEGMENT +_TEXT$Z7_SORT ENDS +endif + +if 0 +LEA_IS_D8 (R64) [R2 * 4 + 16] + Lat : TP + 2 : 1 : adl-e + 2 : 3 p056 adl-p + 1 : 2 : p15 hsw-rocket + 1 : 2 : p01 snb-ivb + 1 : 1 : p1 conroe-wsm + 1 : 4 : zen3,zen4 + 2 : 4 : zen1,zen2 + +LEA_B_IS (R64) [R2 + R3 * 4] + Lat : TP + 1 : 1 : adl-e + 2 : 3 p056 adl-p + 1 : 2 : p15 hsw-rocket + 1 : 2 : p01 snb-ivb + 1 : 1 : p1 nhm-wsm + 1 : 1 : p0 conroe-wsm + 1 : 4 : zen3,zen4 + 2 :2,4 : zen1,zen2 + +LEA_B_IS_D8 (R64) [R2 + R3 * 4 + 16] + Lat : TP + 2 : 1 : adl-e + 2 : 3 p056 adl-p + 1 : 2 : p15 ice-rocket + 3 : 1 : p1/p15 hsw-rocket + 3 : 1 : p01 snb-ivb + 1 : 1 : p1 nhm-wsm + 1 : 1 : p0 conroe-wsm + 2,1 : 2 : zen3,zen4 + 2 : 2 : zen1,zen2 + +CMOVB (R64, R64) + Lat : TP + 1,2 : 2 : adl-e + 1 : 2 p06 adl-p + 1 : 2 : p06 bwd-rocket + 1,2 : 2 : p0156+p06 hsw + 1,2 :1.5 : p015+p05 snb-ivb + 1,2 : 1 : p015+p05 nhm + 1 : 1 : 2*p015 conroe + 1 : 2 : zen3,zen4 + 1 : 4 : zen1,zen2 + +ADC (R64, 0) + Lat : TP + 1,2 : 2 : adl-e + 1 : 2 p06 adl-p + 1 : 2 : p06 bwd-rocket + 1 :1.5 : p0156+p06 hsw + 1 :1.5 : p015+p05 snb-ivb + 2 : 1 : 2*p015 conroe-wstm + 1 : 2 : zen1,zen2,zen3,zen4 + +PREFETCHNTA : fetch data into non-temporal cache close to the processor, minimizing cache pollution. + L1 : Pentium3 + L2 : NetBurst + L1, not L2: Core duo, Core 2, Atom processors + L1, not L2, may fetch into L3 with fast replacement: Nehalem, Westmere, Sandy Bridge, ... + NEHALEM: Fills L1/L3, L1 LRU is not updated + L3 with fast replacement: Xeon Processors based on Nehalem, Westmere, Sandy Bridge, ... +PREFETCHT0 : fetch data into all cache levels. +PREFETCHT1 : fetch data into L2 and L3 +endif + +end diff --git a/C/7zVersion.h b/C/7zVersion.h index e82ba0b..72733f7 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 24 -#define MY_VER_MINOR 9 +#define MY_VER_MAJOR 25 +#define MY_VER_MINOR 0 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "24.09" +#define MY_VERSION_NUMBERS "25.00" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2024-11-29" +#define MY_DATE "2025-07-05" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2024 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2025 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/C/BwtSort.c b/C/BwtSort.c index 05ad6de..8f64f9d 100644 --- a/C/BwtSort.c +++ b/C/BwtSort.c @@ -1,5 +1,5 @@ /* BwtSort.c -- BWT block sorting -2023-04-02 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -7,6 +7,44 @@ #include "Sort.h" /* #define BLOCK_SORT_USE_HEAP_SORT */ +// #define BLOCK_SORT_USE_HEAP_SORT + +#ifdef BLOCK_SORT_USE_HEAP_SORT + +#define HeapSortRefDown(p, vals, n, size, temp) \ + { size_t k = n; UInt32 val = vals[temp]; for (;;) { \ + size_t s = k << 1; \ + if (s > size) break; \ + if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \ + if (val >= vals[p[s]]) break; \ + p[k] = p[s]; k = s; \ + } p[k] = temp; } + +void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size) +{ + if (size <= 1) + return; + p--; + { + size_t i = size / 2; + do + { + UInt32 temp = p[i]; + HeapSortRefDown(p, vals, i, size, temp); + } + while (--i != 0); + } + do + { + UInt32 temp = p[size]; + p[size--] = p[1]; + HeapSortRefDown(p, vals, 1, size, temp); + } + while (size > 1); +} + +#endif // BLOCK_SORT_USE_HEAP_SORT + /* Don't change it !!! */ #define kNumHashBytes 2 @@ -27,26 +65,27 @@ #else -#define kNumBitsMax 20 -#define kIndexMask ((1 << kNumBitsMax) - 1) -#define kNumExtraBits (32 - kNumBitsMax) -#define kNumExtra0Bits (kNumExtraBits - 2) -#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) +#define kNumBitsMax 20 +#define kIndexMask (((UInt32)1 << kNumBitsMax) - 1) +#define kNumExtraBits (32 - kNumBitsMax) +#define kNumExtra0Bits (kNumExtraBits - 2) +#define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) #define SetFinishedGroupSize(p, size) \ - { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ + { *(p) |= ((((UInt32)(size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ if ((size) > (1 << kNumExtra0Bits)) { \ - *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ + *(p) |= 0x40000000; \ + *((p) + 1) |= (((UInt32)(size) - 1) >> kNumExtra0Bits) << kNumBitsMax; } } \ -static void SetGroupSize(UInt32 *p, UInt32 size) +static void SetGroupSize(UInt32 *p, size_t size) { if (--size == 0) return; - *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); + *p |= 0x80000000 | (((UInt32)size & kNumExtra0Mask) << kNumBitsMax); if (size >= (1 << kNumExtra0Bits)) { *p |= 0x40000000; - p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); + p[1] |= (((UInt32)size >> kNumExtra0Bits) << kNumBitsMax); } } @@ -59,12 +98,14 @@ returns: 1 - if there are groups, 0 - no more groups */ static -UInt32 +unsigned Z7_FASTCALL -SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices - #ifndef BLOCK_SORT_USE_HEAP_SORT - , UInt32 left, UInt32 range - #endif +SortGroup(size_t BlockSize, size_t NumSortedBytes, + size_t groupOffset, size_t groupSize, + unsigned NumRefBits, UInt32 *Indices +#ifndef BLOCK_SORT_USE_HEAP_SORT + , size_t left, size_t range +#endif ) { UInt32 *ind2 = Indices + groupOffset; @@ -79,90 +120,93 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr return 0; } Groups = Indices + BlockSize + BS_TEMP_SIZE; - if (groupSize <= ((UInt32)1 << NumRefBits) - #ifndef BLOCK_SORT_USE_HEAP_SORT + if (groupSize <= ((size_t)1 << NumRefBits) +#ifndef BLOCK_SORT_USE_HEAP_SORT && groupSize <= range - #endif +#endif ) { UInt32 *temp = Indices + BlockSize; - UInt32 j; - UInt32 mask, thereAreGroups, group, cg; + size_t j, group; + UInt32 mask, cg; + unsigned thereAreGroups; { UInt32 gPrev; UInt32 gRes = 0; { - UInt32 sp = ind2[0] + NumSortedBytes; - if (sp >= BlockSize) sp -= BlockSize; + size_t sp = ind2[0] + NumSortedBytes; + if (sp >= BlockSize) + sp -= BlockSize; gPrev = Groups[sp]; - temp[0] = (gPrev << NumRefBits); + temp[0] = gPrev << NumRefBits; } for (j = 1; j < groupSize; j++) { - UInt32 sp = ind2[j] + NumSortedBytes; + size_t sp = ind2[j] + NumSortedBytes; UInt32 g; - if (sp >= BlockSize) sp -= BlockSize; + if (sp >= BlockSize) + sp -= BlockSize; g = Groups[sp]; - temp[j] = (g << NumRefBits) | j; + temp[j] = (g << NumRefBits) | (UInt32)j; gRes |= (gPrev ^ g); } if (gRes == 0) { - #ifndef BLOCK_SORT_EXTERNAL_FLAGS +#ifndef BLOCK_SORT_EXTERNAL_FLAGS SetGroupSize(ind2, groupSize); - #endif +#endif return 1; } } HeapSort(temp, groupSize); - mask = (((UInt32)1 << NumRefBits) - 1); + mask = ((UInt32)1 << NumRefBits) - 1; thereAreGroups = 0; group = groupOffset; - cg = (temp[0] >> NumRefBits); + cg = temp[0] >> NumRefBits; temp[0] = ind2[temp[0] & mask]; { - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS UInt32 *Flags = Groups + BlockSize; - #else - UInt32 prevGroupStart = 0; - #endif +#else + size_t prevGroupStart = 0; +#endif for (j = 1; j < groupSize; j++) { - UInt32 val = temp[j]; - UInt32 cgCur = (val >> NumRefBits); + const UInt32 val = temp[j]; + const UInt32 cgCur = val >> NumRefBits; if (cgCur != cg) { cg = cgCur; group = groupOffset + j; - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS { - UInt32 t = group - 1; - Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); + const size_t t = group - 1; + Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); } - #else +#else SetGroupSize(temp + prevGroupStart, j - prevGroupStart); prevGroupStart = j; - #endif +#endif } else thereAreGroups = 1; { - UInt32 ind = ind2[val & mask]; - temp[j] = ind; - Groups[ind] = group; + const UInt32 ind = ind2[val & mask]; + temp[j] = ind; + Groups[ind] = (UInt32)group; } } - #ifndef BLOCK_SORT_EXTERNAL_FLAGS +#ifndef BLOCK_SORT_EXTERNAL_FLAGS SetGroupSize(temp + prevGroupStart, j - prevGroupStart); - #endif +#endif } for (j = 0; j < groupSize; j++) @@ -172,37 +216,42 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr /* Check that all strings are in one group (cannot sort) */ { - UInt32 group, j; - UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + UInt32 group; + size_t j; + size_t sp = ind2[0] + NumSortedBytes; + if (sp >= BlockSize) + sp -= BlockSize; group = Groups[sp]; for (j = 1; j < groupSize; j++) { - sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + sp = ind2[j] + NumSortedBytes; + if (sp >= BlockSize) + sp -= BlockSize; if (Groups[sp] != group) break; } if (j == groupSize) { - #ifndef BLOCK_SORT_EXTERNAL_FLAGS +#ifndef BLOCK_SORT_EXTERNAL_FLAGS SetGroupSize(ind2, groupSize); - #endif +#endif return 1; } } - #ifndef BLOCK_SORT_USE_HEAP_SORT +#ifndef BLOCK_SORT_USE_HEAP_SORT { /* ---------- Range Sort ---------- */ - UInt32 i; - UInt32 mid; + size_t i; + size_t mid; for (;;) { - UInt32 j; + size_t j; if (range <= 1) { - #ifndef BLOCK_SORT_EXTERNAL_FLAGS +#ifndef BLOCK_SORT_EXTERNAL_FLAGS SetGroupSize(ind2, groupSize); - #endif +#endif return 1; } mid = left + ((range + 1) >> 1); @@ -210,7 +259,7 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr i = 0; do { - UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; + size_t sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; if (Groups[sp] >= mid) { for (j--; j > i; j--) @@ -238,51 +287,53 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr break; } - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS { - UInt32 t = (groupOffset + i - 1); + const size_t t = groupOffset + i - 1; UInt32 *Flags = Groups + BlockSize; - Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); + Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); } - #endif +#endif { - UInt32 j; + size_t j; for (j = i; j < groupSize; j++) - Groups[ind2[j]] = groupOffset + i; + Groups[ind2[j]] = (UInt32)(groupOffset + i); } { - UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); - return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); + unsigned res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); + return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); } } - #else +#else // BLOCK_SORT_USE_HEAP_SORT /* ---------- Heap Sort ---------- */ { - UInt32 j; + size_t j; for (j = 0; j < groupSize; j++) { - UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; - ind2[j] = sp; + size_t sp = ind2[j] + NumSortedBytes; + if (sp >= BlockSize) + sp -= BlockSize; + ind2[j] = (UInt32)sp; } HeapSortRef(ind2, Groups, groupSize); /* Write Flags */ { - UInt32 sp = ind2[0]; + size_t sp = ind2[0]; UInt32 group = Groups[sp]; - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS UInt32 *Flags = Groups + BlockSize; - #else - UInt32 prevGroupStart = 0; - #endif +#else + size_t prevGroupStart = 0; +#endif for (j = 1; j < groupSize; j++) { @@ -290,149 +341,210 @@ SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 gr if (Groups[sp] != group) { group = Groups[sp]; - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS { - UInt32 t = groupOffset + j - 1; + const size_t t = groupOffset + j - 1; Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); } - #else +#else SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); prevGroupStart = j; - #endif +#endif } } - #ifndef BLOCK_SORT_EXTERNAL_FLAGS +#ifndef BLOCK_SORT_EXTERNAL_FLAGS SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); - #endif +#endif } { /* Write new Groups values and Check that there are groups */ - UInt32 thereAreGroups = 0; + unsigned thereAreGroups = 0; for (j = 0; j < groupSize; j++) { - UInt32 group = groupOffset + j; - #ifndef BLOCK_SORT_EXTERNAL_FLAGS + size_t group = groupOffset + j; +#ifndef BLOCK_SORT_EXTERNAL_FLAGS UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); - if ((ind2[j] & 0x40000000) != 0) + if (ind2[j] & 0x40000000) subGroupSize += ((ind2[(size_t)j + 1] >> kNumBitsMax) << kNumExtra0Bits); subGroupSize++; for (;;) { - UInt32 original = ind2[j]; - UInt32 sp = original & kIndexMask; - if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; - ind2[j] = sp | (original & ~kIndexMask); - Groups[sp] = group; + const UInt32 original = ind2[j]; + size_t sp = original & kIndexMask; + if (sp < NumSortedBytes) + sp += BlockSize; + sp -= NumSortedBytes; + ind2[j] = (UInt32)sp | (original & ~kIndexMask); + Groups[sp] = (UInt32)group; if (--subGroupSize == 0) break; j++; thereAreGroups = 1; } - #else +#else UInt32 *Flags = Groups + BlockSize; for (;;) { - UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; - ind2[j] = sp; - Groups[sp] = group; + size_t sp = ind2[j]; + if (sp < NumSortedBytes) + sp += BlockSize; + sp -= NumSortedBytes; + ind2[j] = (UInt32)sp; + Groups[sp] = (UInt32)group; if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) break; j++; thereAreGroups = 1; } - #endif +#endif } return thereAreGroups; } } - #endif +#endif // BLOCK_SORT_USE_HEAP_SORT } + /* conditions: blockSize > 0 */ -UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) +UInt32 BlockSort(UInt32 *Indices, const Byte *data, size_t blockSize) { UInt32 *counters = Indices + blockSize; - UInt32 i; + size_t i; UInt32 *Groups; - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS UInt32 *Flags; - #endif +#endif - /* Radix-Sort for 2 bytes */ +/* Radix-Sort for 2 bytes */ +// { UInt32 yyy; for (yyy = 0; yyy < 100; yyy++) { for (i = 0; i < kNumHashValues; i++) counters[i] = 0; - for (i = 0; i < blockSize - 1; i++) - counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++; - counters[((UInt32)data[i] << 8) | data[0]]++; + { + const Byte *data2 = data; + size_t a = data[(size_t)blockSize - 1]; + const Byte *data_lim = data + blockSize; + if (blockSize >= 4) + { + data_lim -= 3; + do + { + size_t b; + b = data2[0]; counters[(a << 8) | b]++; + a = data2[1]; counters[(b << 8) | a]++; + b = data2[2]; counters[(a << 8) | b]++; + a = data2[3]; counters[(b << 8) | a]++; + data2 += 4; + } + while (data2 < data_lim); + data_lim += 3; + } + while (data2 != data_lim) + { + size_t b = *data2++; + counters[(a << 8) | b]++; + a = b; + } + } +// }} Groups = counters + BS_TEMP_SIZE; - #ifdef BLOCK_SORT_EXTERNAL_FLAGS +#ifdef BLOCK_SORT_EXTERNAL_FLAGS Flags = Groups + blockSize; - { - UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; - for (i = 0; i < numWords; i++) - Flags[i] = kAllFlags; - } - #endif + { + const size_t numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; + for (i = 0; i < numWords; i++) + Flags[i] = kAllFlags; + } +#endif { UInt32 sum = 0; for (i = 0; i < kNumHashValues; i++) { - UInt32 groupSize = counters[i]; - if (groupSize > 0) + const UInt32 groupSize = counters[i]; + counters[i] = sum; + sum += groupSize; +#ifdef BLOCK_SORT_EXTERNAL_FLAGS + if (groupSize) { - #ifdef BLOCK_SORT_EXTERNAL_FLAGS - UInt32 t = sum + groupSize - 1; - Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); - #endif - sum += groupSize; + const UInt32 t = sum - 1; + Flags[t >> kNumFlagsBits] &= ~((UInt32)1 << (t & kFlagsMask)); } - counters[i] = sum - groupSize; +#endif + } + } + + for (i = 0; i < blockSize - 1; i++) + Groups[i] = counters[((unsigned)data[i] << 8) | data[(size_t)i + 1]]; + Groups[i] = counters[((unsigned)data[i] << 8) | data[0]]; + + { +#define SET_Indices(a, b, i) \ + { UInt32 c; \ + a = (a << 8) | (b); \ + c = counters[a]; \ + Indices[c] = (UInt32)i++; \ + counters[a] = c + 1; \ } - for (i = 0; i < blockSize - 1; i++) - Groups[i] = counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]; - Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; - - for (i = 0; i < blockSize - 1; i++) - Indices[counters[((UInt32)data[i] << 8) | data[(size_t)i + 1]]++] = i; - Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; - - #ifndef BLOCK_SORT_EXTERNAL_FLAGS + size_t a = data[0]; + const Byte *data_ptr = data + 1; + i = 0; + if (blockSize >= 3) { + blockSize -= 2; + do + { + size_t b; + b = data_ptr[0]; SET_Indices(a, b, i) + a = data_ptr[1]; SET_Indices(b, a, i) + data_ptr += 2; + } + while (i < blockSize); + blockSize += 2; + } + if (i < blockSize - 1) + { + SET_Indices(a, data[(size_t)i + 1], i) + a = (Byte)a; + } + SET_Indices(a, data[0], i) + } + +#ifndef BLOCK_SORT_EXTERNAL_FLAGS + { UInt32 prev = 0; for (i = 0; i < kNumHashValues; i++) { - UInt32 prevGroupSize = counters[i] - prev; + const UInt32 prevGroupSize = counters[i] - prev; if (prevGroupSize == 0) continue; SetGroupSize(Indices + prev, prevGroupSize); prev = counters[i]; } - } - #endif } +#endif { - int NumRefBits; - UInt32 NumSortedBytes; - for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); + unsigned NumRefBits; + size_t NumSortedBytes; + for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++) + {} NumRefBits = 32 - NumRefBits; if (NumRefBits > kNumRefBitsMax) - NumRefBits = kNumRefBitsMax; + NumRefBits = kNumRefBitsMax; for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) { - #ifndef BLOCK_SORT_EXTERNAL_FLAGS - UInt32 finishedGroupSize = 0; - #endif - UInt32 newLimit = 0; +#ifndef BLOCK_SORT_EXTERNAL_FLAGS + size_t finishedGroupSize = 0; +#endif + size_t newLimit = 0; for (i = 0; i < blockSize;) { - UInt32 groupSize; - #ifdef BLOCK_SORT_EXTERNAL_FLAGS + size_t groupSize; +#ifdef BLOCK_SORT_EXTERNAL_FLAGS if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) { @@ -441,56 +553,56 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) } for (groupSize = 1; (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; - groupSize++); - + groupSize++) + {} groupSize++; - #else +#else - groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); + groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax; { - BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); - if ((Indices[i] & 0x40000000) != 0) - { - groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); - Indices[(size_t)i + 1] &= kIndexMask; - } - Indices[i] &= kIndexMask; - groupSize++; - if (finishedGroup || groupSize == 1) - { - Indices[i - finishedGroupSize] &= kIndexMask; - if (finishedGroupSize > 1) - Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; + const BoolInt finishedGroup = ((Indices[i] & 0x80000000) == 0); + if (Indices[i] & 0x40000000) { - UInt32 newGroupSize = groupSize + finishedGroupSize; - SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) - finishedGroupSize = newGroupSize; + groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); + Indices[(size_t)i + 1] &= kIndexMask; } - i += groupSize; - continue; - } - finishedGroupSize = 0; + Indices[i] &= kIndexMask; + groupSize++; + if (finishedGroup || groupSize == 1) + { + Indices[i - finishedGroupSize] &= kIndexMask; + if (finishedGroupSize > 1) + Indices[(size_t)(i - finishedGroupSize) + 1] &= kIndexMask; + { + const size_t newGroupSize = groupSize + finishedGroupSize; + SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize) + finishedGroupSize = newGroupSize; + } + i += groupSize; + continue; + } + finishedGroupSize = 0; } - #endif +#endif if (NumSortedBytes >= blockSize) { - UInt32 j; + size_t j; for (j = 0; j < groupSize; j++) { - UInt32 t = (i + j); + size_t t = i + j; /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ - Groups[Indices[t]] = t; + Groups[Indices[t]] = (UInt32)t; } } else if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices - #ifndef BLOCK_SORT_USE_HEAP_SORT - , 0, blockSize - #endif - ) != 0) + #ifndef BLOCK_SORT_USE_HEAP_SORT + , 0, blockSize + #endif + )) newLimit = i + groupSize; i += groupSize; } @@ -498,19 +610,19 @@ UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) break; } } - #ifndef BLOCK_SORT_EXTERNAL_FLAGS +#ifndef BLOCK_SORT_EXTERNAL_FLAGS for (i = 0; i < blockSize;) { - UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); - if ((Indices[i] & 0x40000000) != 0) + size_t groupSize = (Indices[i] & ~0xC0000000) >> kNumBitsMax; + if (Indices[i] & 0x40000000) { - groupSize += ((Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits); + groupSize += (Indices[(size_t)i + 1] >> kNumBitsMax) << kNumExtra0Bits; Indices[(size_t)i + 1] &= kIndexMask; } Indices[i] &= kIndexMask; groupSize++; i += groupSize; } - #endif +#endif return Groups[0]; } diff --git a/C/BwtSort.h b/C/BwtSort.h index a34b243..1bd2316 100644 --- a/C/BwtSort.h +++ b/C/BwtSort.h @@ -1,5 +1,5 @@ /* BwtSort.h -- BWT block sorting -2023-03-03 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_BWT_SORT_H #define ZIP7_INC_BWT_SORT_H @@ -10,16 +10,17 @@ EXTERN_C_BEGIN /* use BLOCK_SORT_EXTERNAL_FLAGS if blockSize can be > 1M */ /* #define BLOCK_SORT_EXTERNAL_FLAGS */ +// #define BLOCK_SORT_EXTERNAL_FLAGS #ifdef BLOCK_SORT_EXTERNAL_FLAGS -#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) ((((blockSize) + 31) >> 5)) +#define BLOCK_SORT_EXTERNAL_SIZE(blockSize) (((blockSize) + 31) >> 5) #else #define BLOCK_SORT_EXTERNAL_SIZE(blockSize) 0 #endif #define BLOCK_SORT_BUF_SIZE(blockSize) ((blockSize) * 2 + BLOCK_SORT_EXTERNAL_SIZE(blockSize) + (1 << 16)) -UInt32 BlockSort(UInt32 *indices, const Byte *data, UInt32 blockSize); +UInt32 BlockSort(UInt32 *indices, const Byte *data, size_t blockSize); EXTERN_C_END diff --git a/C/Compiler.h b/C/Compiler.h index 2a9c2b7..b266b27 100644 --- a/C/Compiler.h +++ b/C/Compiler.h @@ -1,5 +1,5 @@ /* Compiler.h : Compiler specific defines and pragmas -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_COMPILER_H #define ZIP7_INC_COMPILER_H @@ -183,6 +183,16 @@ typedef void (*Z7_void_Function)(void); #define Z7_ATTRIB_NO_VECTORIZE #endif +#if defined(Z7_MSC_VER_ORIGINAL) && (Z7_MSC_VER_ORIGINAL >= 1920) + #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE _Pragma("optimize ( \"s\", on )") + #define Z7_PRAGMA_OPTIMIZE_DEFAULT _Pragma("optimize ( \"\", on )") +#else + #define Z7_PRAGMA_OPTIMIZE_FOR_CODE_SIZE + #define Z7_PRAGMA_OPTIMIZE_DEFAULT +#endif + + + #if defined(MY_CPU_X86_OR_AMD64) && ( \ defined(__clang__) && (__clang_major__ >= 4) \ || defined(__GNUC__) && (__GNUC__ >= 5)) diff --git a/C/CpuArch.h b/C/CpuArch.h index a6297ea..1690a5b 100644 --- a/C/CpuArch.h +++ b/C/CpuArch.h @@ -47,6 +47,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_SIZEOF_POINTER 4 #endif +#if defined(__SSE2__) \ + || defined(MY_CPU_AMD64) \ + || defined(_M_IX86_FP) && (_M_IX86_FP >= 2) +#define MY_CPU_SSE2 +#endif + #if defined(_M_ARM64) \ || defined(_M_ARM64EC) \ @@ -571,10 +577,12 @@ problem-4 : performace: #define Z7_CONV_BE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) #define Z7_CONV_NATIVE_TO_BE_32(v) (v) +// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b1) | ((b0) << 8)) #elif defined(MY_CPU_LE) #define Z7_CONV_BE_TO_NATIVE_CONST32(v) Z7_BSWAP32_CONST(v) #define Z7_CONV_LE_TO_NATIVE_CONST32(v) (v) #define Z7_CONV_NATIVE_TO_BE_32(v) Z7_BSWAP32(v) +// #define Z7_GET_NATIVE16_FROM_2_BYTES(b0, b1) ((b0) | ((b1) << 8)) #else #error Stop_Compiling_Unknown_Endian_CONV #endif diff --git a/C/HuffEnc.c b/C/HuffEnc.c index 996da30..cbf8c22 100644 --- a/C/HuffEnc.c +++ b/C/HuffEnc.c @@ -1,60 +1,125 @@ /* HuffEnc.c -- functions for Huffman encoding -2023-09-07 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #include "Precomp.h" +#include + #include "HuffEnc.h" #include "Sort.h" +#include "CpuArch.h" -#define kMaxLen 16 -#define NUM_BITS 10 -#define MASK ((1u << NUM_BITS) - 1) +#define kMaxLen Z7_HUFFMAN_LEN_MAX +#define NUM_BITS 10 +#define MASK ((1u << NUM_BITS) - 1) +#define FREQ_MASK (~(UInt32)MASK) +#define NUM_COUNTERS (48 * 2) -#define NUM_COUNTERS 64 +#if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE)) +#if defined(MY_CPU_LE) + #define HI_HALF_OFFSET 1 +#else + #define HI_HALF_OFFSET 0 +#endif +#define LOAD_PARENT(p) ((unsigned)*((const UInt16 *)(p) + HI_HALF_OFFSET)) +#define STORE_PARENT(p, fb, val) *((UInt16 *)(p) + HI_HALF_OFFSET) = (UInt16)(val); +#define STORE_PARENT_DIRECT(p, fb, hi) STORE_PARENT(p, fb, hi) +#define UPDATE_E(eHi) eHi++; +#else +#define LOAD_PARENT(p) ((unsigned)(*(p) >> NUM_BITS)) +#define STORE_PARENT_DIRECT(p, fb, hi) *(p) = ((fb) & MASK) | (hi); // set parent field +#define STORE_PARENT(p, fb, val) STORE_PARENT_DIRECT(p, fb, ((UInt32)(val) << NUM_BITS)) +#define UPDATE_E(eHi) eHi += 1 << NUM_BITS; +#endif -#define HUFFMAN_SPEED_OPT - -void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymbols, UInt32 maxLen) +void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSymbols, unsigned maxLen) { - UInt32 num = 0; - /* if (maxLen > 10) maxLen = 10; */ +#if NUM_COUNTERS > 2 + unsigned counters[NUM_COUNTERS]; +#endif +#if 1 && NUM_COUNTERS > (kMaxLen + 4) * 2 + #define lenCounters (counters) + #define codes (counters + kMaxLen + 4) +#else + unsigned lenCounters[kMaxLen + 1]; + UInt32 codes[kMaxLen + 1]; +#endif + + unsigned num; { - UInt32 i; + unsigned i; + // UInt32 sum = 0; + +#if NUM_COUNTERS > 2 - #ifdef HUFFMAN_SPEED_OPT - - UInt32 counters[NUM_COUNTERS]; +#define CTR_ITEM_FOR_FREQ(freq) \ + counters[(freq) >= NUM_COUNTERS - 1 ? NUM_COUNTERS - 1 : (unsigned)(freq)] + for (i = 0; i < NUM_COUNTERS; i++) counters[i] = 0; - for (i = 0; i < numSymbols; i++) + memset(lens, 0, numSymbols); { - UInt32 freq = freqs[i]; - counters[(freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1]++; + const UInt32 *fp = freqs + numSymbols; +#define NUM_UNROLLS 1 +#if NUM_UNROLLS > 1 // use 1 if odd (numSymbols) is possisble + if (numSymbols & 1) + { + UInt32 f; + f = *--fp; CTR_ITEM_FOR_FREQ(f)++; + // sum += f; + } +#endif + do + { + UInt32 f; + fp -= NUM_UNROLLS; + f = fp[0]; CTR_ITEM_FOR_FREQ(f)++; + // sum += f; +#if NUM_UNROLLS > 1 + f = fp[1]; CTR_ITEM_FOR_FREQ(f)++; + // sum += f; +#endif + } + while (fp != freqs); } - - for (i = 1; i < NUM_COUNTERS; i++) +#if 0 + printf("\nsum=%8u numSymbols =%3u ctrs:", sum, numSymbols); { - UInt32 temp = counters[i]; - counters[i] = num; - num += temp; + unsigned k = 0; + for (k = 0; k < NUM_COUNTERS; k++) + printf(" %u", counters[k]); } - - for (i = 0; i < numSymbols; i++) +#endif + + num = counters[1]; + counters[1] = 0; + for (i = 2; i != NUM_COUNTERS; i += 2) { - UInt32 freq = freqs[i]; - if (freq == 0) - lens[i] = 0; - else - p[counters[((freq < NUM_COUNTERS - 1) ? freq : NUM_COUNTERS - 1)]++] = i | (freq << NUM_BITS); + unsigned c; + c = (counters )[i]; (counters )[i] = num; num += c; + c = (counters + 1)[i]; (counters + 1)[i] = num; num += c; + } + counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array + { + i = 0; + do + { + const UInt32 f = freqs[i]; +#if 0 + if (f == 0) lens[i] = 0; else +#endif + p[CTR_ITEM_FOR_FREQ(f)++] = i | (f << NUM_BITS); + } + while (++i != numSymbols); } - counters[0] = 0; HeapSort(p + counters[NUM_COUNTERS - 2], counters[NUM_COUNTERS - 1] - counters[NUM_COUNTERS - 2]); - #else - +#else // NUM_COUNTERS <= 2 + + num = 0; for (i = 0; i < numSymbols; i++) { - UInt32 freq = freqs[i]; + const UInt32 freq = freqs[i]; if (freq == 0) lens[i] = 0; else @@ -62,17 +127,27 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb } HeapSort(p, num); - #endif +#endif } - if (num < 2) + if (num <= 2) { unsigned minCode = 0; unsigned maxCode = 1; - if (num == 1) + if (num) { - maxCode = (unsigned)p[0] & MASK; - if (maxCode == 0) + maxCode = (unsigned)p[(size_t)num - 1] & MASK; + if (num == 2) + { + minCode = (unsigned)p[0] & MASK; + if (minCode > maxCode) + { + const unsigned temp = minCode; + minCode = maxCode; + maxCode = temp; + } + } + else if (maxCode == 0) maxCode++; } p[minCode] = 0; @@ -80,75 +155,206 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 numSymb lens[minCode] = lens[maxCode] = 1; return; } - { - UInt32 b, e, i; - - i = b = e = 0; - do + unsigned i; + for (i = 0; i <= kMaxLen; i++) + lenCounters[i] = 0; + lenCounters[1] = 2; // by default root node has 2 child leaves at level 1. + } + // if (num != 2) + { + // num > 2 + // the binary tree will contain (num - 1) internal nodes. + // p[num - 2] will be root node of binary tree. + UInt32 *b; + UInt32 *n; + // first node will have two leaf childs: p[0] and p[1]: + // p[0] += p[1] & FREQ_MASK; // set frequency sum of child leafs + // if (pi == n) exit(0); + // if (pi != n) { - UInt32 n, m, freq; - n = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; - freq = (p[n] & ~MASK); - p[n] = (p[n] & MASK) | (e << NUM_BITS); - m = (i != num && (b == e || (p[i] >> NUM_BITS) <= (p[b] >> NUM_BITS))) ? i++ : b++; - freq += (p[m] & ~MASK); - p[m] = (p[m] & MASK) | (e << NUM_BITS); - p[e] = (p[e] & MASK) | freq; - e++; - } - while (num - e > 1); - - { - UInt32 lenCounters[kMaxLen + 1]; - for (i = 0; i <= kMaxLen; i++) - lenCounters[i] = 0; - - p[--e] &= MASK; - lenCounters[1] = 2; - while (e != 0) + UInt32 fb = (p[1] & FREQ_MASK) + p[0]; + UInt32 f = p[2] & FREQ_MASK; + const UInt32 *pi = p + 2; + UInt32 *e = p; + UInt32 eHi = 0; + n = p + num; + b = p; + // p[0] = fb; + for (;;) { - UInt32 len = (p[p[--e] >> NUM_BITS] >> NUM_BITS) + 1; - p[e] = (p[e] & MASK) | (len << NUM_BITS); - if (len >= maxLen) - for (len = maxLen - 1; lenCounters[len] == 0; len--); - lenCounters[len]--; - lenCounters[(size_t)len + 1] += 2; - } - - { - UInt32 len; - i = 0; - for (len = maxLen; len != 0; len--) - { - UInt32 k; - for (k = lenCounters[len]; k != 0; k--) - lens[p[i++] & MASK] = (Byte)len; - } - } - - { - UInt32 nextCodes[kMaxLen + 1]; - { - UInt32 code = 0; - UInt32 len; - for (len = 1; len <= kMaxLen; len++) - nextCodes[len] = code = (code + lenCounters[(size_t)len - 1]) << 1; - } - /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ + // (b <= e) + UInt32 sum; + e++; + UPDATE_E(eHi) + // (b < e) + + // p range : high bits + // [0, b) : parent : processed nodes that have parent and childs + // [b, e) : FREQ : non-processed nodes that have no parent but have childs + // [e, pi) : FREQ : processed leaves for which parent node was created + // [pi, n) : FREQ : non-processed leaves for which parent node was not created + + // first child + // note : (*b < f) is same result as ((*b & FREQ_MASK) < f) + if (fb < f) { - UInt32 k; - for (k = 0; k < numSymbols; k++) - p[k] = nextCodes[lens[k]]++; + // node freq is smaller + sum = fb & FREQ_MASK; + STORE_PARENT_DIRECT (b, fb, eHi) + b++; + fb = *b; + if (b == e) + { + if (++pi == n) + break; + sum += f; + fb &= MASK; + fb |= sum; + *e = fb; + f = *pi & FREQ_MASK; + continue; + } + } + else if (++pi == n) + { + STORE_PARENT_DIRECT (b, fb, eHi) + b++; + break; + } + else + { + sum = f; + f = *pi & FREQ_MASK; + } + + // (b < e) + + // second child + if (fb < f) + { + sum += fb; + sum &= FREQ_MASK; + STORE_PARENT_DIRECT (b, fb, eHi) + b++; + *e = (*e & MASK) | sum; // set frequency sum + // (b <= e) is possible here + fb = *b; + } + else if (++pi == n) + break; + else + { + sum += f; + f = *pi & FREQ_MASK; + *e = (*e & MASK) | sum; // set frequency sum } } } + + // printf("\nnum-e=%3u, numSymbols=%3u, num=%3u, b=%3u", n - e, numSymbols, n - p, b - p); + { + n -= 2; + *n &= MASK; // root node : we clear high bits (zero bits mean level == 0) + if (n != b) + { + // We go here, if we have some number of non-created nodes up to root. + // We process them in simplified code: + // position of parent for each pair of nodes is known. + // n[-2], n[-1] : current pair of child nodes + // (p1) : parent node for current pair. + UInt32 *p1 = n; + do + { + const unsigned len = LOAD_PARENT(p1) + 1; + p1--; + (lenCounters )[len] -= 2; // we remove 2 leaves from level (len) + (lenCounters + 1)[len] += 2 * 2; // we add 4 leaves at level (len + 1) + n -= 2; + STORE_PARENT (n , n[0], len) + STORE_PARENT (n + 1, n[1], len) + } + while (n != b); + } + } + + if (b != p) + { + // we detect level of each node (realtive to root), + // and update lenCounters[]. + // We process only intermediate nodes and we don't process leaves. + do + { + // if (ii < b) : parent_bits_of (p[ii]) == index of parent node : ii < (p[ii]) + // if (ii >= b) : parent_bits_of (p[ii]) == level of this (ii) node in tree + unsigned len; + b--; + len = (unsigned)LOAD_PARENT(p + LOAD_PARENT(b)) + 1; + STORE_PARENT (b, *b, len) + if (len >= maxLen) + { + // We are not allowed to create node at level (maxLen) and higher, + // because all leaves must be placed to level (maxLen) or lower. + // We find nearest allowed leaf and place current node to level of that leaf: + for (len = maxLen - 1; lenCounters[len] == 0; len--) {} + } + lenCounters[len]--; // we remove 1 leaf from level (len) + (lenCounters + 1)[len] += 2; // we add 2 leaves at level (len + 1) + } + while (b != p); + } + } + { + { + unsigned len = maxLen; + const UInt32 *p2 = p; + do + { + unsigned k = lenCounters[len]; + if (k) + do + lens[(unsigned)*p2++ & MASK] = (Byte)len; + while (--k); + } + while (--len); + } + codes[0] = 0; // we don't want garbage values to be written to p[] array. + // codes[1] = 0; + { + UInt32 code = 0; + unsigned len; + for (len = 0; len < kMaxLen; len++) + (codes + 1)[len] = code = (code + lenCounters[len]) << 1; + } + /* if (code + lenCounters[kMaxLen] - 1 != (1 << kMaxLen) - 1) throw 1; */ + { + const Byte * const limit = lens + numSymbols; + do + { + unsigned len; + UInt32 c; + len = lens[0]; c = codes[len]; p[0] = c; codes[len] = c + 1; + // len = lens[1]; c = codes[len]; p[1] = c; codes[len] = c + 1; + p += 1; + lens += 1; + } + while (lens != limit); + } } } #undef kMaxLen #undef NUM_BITS #undef MASK +#undef FREQ_MASK #undef NUM_COUNTERS -#undef HUFFMAN_SPEED_OPT +#undef CTR_ITEM_FOR_FREQ +#undef LOAD_PARENT +#undef STORE_PARENT +#undef STORE_PARENT_DIRECT +#undef UPDATE_E +#undef HI_HALF_OFFSET +#undef NUM_UNROLLS +#undef lenCounters +#undef codes diff --git a/C/HuffEnc.h b/C/HuffEnc.h index cbc5d11..2217f55 100644 --- a/C/HuffEnc.h +++ b/C/HuffEnc.h @@ -1,5 +1,5 @@ /* HuffEnc.h -- Huffman encoding -2023-03-05 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_HUFF_ENC_H #define ZIP7_INC_HUFF_ENC_H @@ -8,14 +8,14 @@ EXTERN_C_BEGIN +#define Z7_HUFFMAN_LEN_MAX 16 /* Conditions: - num <= 1024 = 2 ^ NUM_BITS + 2 <= num <= 1024 = 2 ^ NUM_BITS Sum(freqs) < 4M = 2 ^ (32 - NUM_BITS) - maxLen <= 16 = kMaxLen + 1 <= maxLen <= 16 = Z7_HUFFMAN_LEN_MAX Num_Items(p) >= HUFFMAN_TEMP_SIZE(num) */ - void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen); EXTERN_C_END diff --git a/C/LzFind.c b/C/LzFind.c index 1ce4046..6aba919 100644 --- a/C/LzFind.c +++ b/C/LzFind.c @@ -1,5 +1,5 @@ /* LzFind.c -- Match finder for LZ algorithms -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -404,7 +404,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, const unsigned nbMax = (p->numHashBytes == 2 ? 16 : (p->numHashBytes == 3 ? 24 : 32)); - if (numBits > nbMax) + if (numBits >= nbMax) numBits = nbMax; if (numBits >= 32) hs = (UInt32)0 - 1; @@ -416,14 +416,14 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, hs |= (256 << kLzHash_CrcShift_2) - 1; { const UInt32 hs2 = MatchFinder_GetHashMask2(p, historySize); - if (hs > hs2) + if (hs >= hs2) hs = hs2; } hsCur = hs; if (p->expectedDataSize < historySize) { const UInt32 hs2 = MatchFinder_GetHashMask2(p, (UInt32)p->expectedDataSize); - if (hsCur > hs2) + if (hsCur >= hs2) hsCur = hs2; } } @@ -434,7 +434,7 @@ int MatchFinder_Create(CMatchFinder *p, UInt32 historySize, if (p->expectedDataSize < historySize) { hsCur = MatchFinder_GetHashMask(p, (UInt32)p->expectedDataSize); - if (hsCur > hs) // is it possible? + if (hsCur >= hs) // is it possible? hsCur = hs; } } @@ -890,7 +890,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, return d; { const Byte *pb = cur - delta; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)]; if (pb[maxLen] == cur[maxLen] && *pb == *cur) { UInt32 len = 0; @@ -925,7 +925,7 @@ static UInt32 * Hc_GetMatchesSpec(size_t lenLimit, UInt32 curMatch, UInt32 pos, break; { ptrdiff_t diff; - curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; + curMatch = son[_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)]; diff = (ptrdiff_t)0 - (ptrdiff_t)delta; if (cur[maxLen] == cur[(ptrdiff_t)maxLen + diff]) { @@ -972,7 +972,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt // if (curMatch >= pos) { *ptr0 = *ptr1 = kEmptyHashValue; return NULL; } cmCheck = (UInt32)(pos - _cyclicBufferSize); - if ((UInt32)pos <= _cyclicBufferSize) + if ((UInt32)pos < _cyclicBufferSize) cmCheck = 0; if (cmCheck < curMatch) @@ -980,7 +980,7 @@ UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byt { const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); const UInt32 pair0 = pair[0]; @@ -1039,7 +1039,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const UInt32 cmCheck; cmCheck = (UInt32)(pos - _cyclicBufferSize); - if ((UInt32)pos <= _cyclicBufferSize) + if ((UInt32)pos < _cyclicBufferSize) cmCheck = 0; if (// curMatch >= pos || // failure @@ -1048,7 +1048,7 @@ static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const { const UInt32 delta = pos - curMatch; { - CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); + CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + (_cyclicBufferPos < delta ? _cyclicBufferSize : 0)) << 1); const Byte *pb = cur - delta; unsigned len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) @@ -1595,7 +1595,7 @@ static void Bt5_MatchFinder_Skip(void *_p, UInt32 num) UInt32 pos = p->pos; \ UInt32 num2 = num; \ /* (p->pos == p->posLimit) is not allowed here !!! */ \ - { const UInt32 rem = p->posLimit - pos; if (num2 > rem) num2 = rem; } \ + { const UInt32 rem = p->posLimit - pos; if (num2 >= rem) num2 = rem; } \ num -= num2; \ { const UInt32 cycPos = p->cyclicBufferPos; \ son = p->son + cycPos; \ diff --git a/C/LzFindMt.c b/C/LzFindMt.c index ac9d59d..25fcc46 100644 --- a/C/LzFindMt.c +++ b/C/LzFindMt.c @@ -1,5 +1,5 @@ /* LzFindMt.c -- multithreaded Match finder for LZ algorithms -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -82,6 +82,8 @@ extern UInt64 g_NumIters_Bytes; Z7_NO_INLINE static void MtSync_Construct(CMtSync *p) { + p->affinityGroup = -1; + p->affinityInGroup = 0; p->affinity = 0; p->wasCreated = False; p->csWasInitialized = False; @@ -259,6 +261,12 @@ static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void * // return ERROR_TOO_MANY_POSTS; // for debug // return EINVAL; // for debug +#ifdef _WIN32 + if (p->affinityGroup >= 0) + wres = Thread_Create_With_Group(&p->thread, startAddress, obj, + (unsigned)(UInt32)p->affinityGroup, (CAffinityMask)p->affinityInGroup); + else +#endif if (p->affinity != 0) wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); else diff --git a/C/LzFindMt.h b/C/LzFindMt.h index fcb479d..89984f5 100644 --- a/C/LzFindMt.h +++ b/C/LzFindMt.h @@ -1,5 +1,5 @@ /* LzFindMt.h -- multithreaded Match finder for LZ algorithms -2024-01-22 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZ_FIND_MT_H #define ZIP7_INC_LZ_FIND_MT_H @@ -12,8 +12,10 @@ EXTERN_C_BEGIN typedef struct { UInt32 numProcessedBlocks; - CThread thread; + Int32 affinityGroup; + UInt64 affinityInGroup; UInt64 affinity; + CThread thread; BoolInt wasCreated; BoolInt needStart; diff --git a/C/Lzma2Enc.c b/C/Lzma2Enc.c index 703e146..72aec69 100644 --- a/C/Lzma2Enc.c +++ b/C/Lzma2Enc.c @@ -1,5 +1,5 @@ /* Lzma2Enc.c -- LZMA2 Encoder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -235,6 +235,7 @@ void Lzma2EncProps_Init(CLzma2EncProps *p) p->numBlockThreads_Reduced = -1; p->numBlockThreads_Max = -1; p->numTotalThreads = -1; + p->numThreadGroups = 0; } void Lzma2EncProps_Normalize(CLzma2EncProps *p) @@ -781,6 +782,7 @@ SRes Lzma2Enc_Encode2(CLzma2EncHandle p, } p->mtCoder.numThreadsMax = (unsigned)p->props.numBlockThreads_Max; + p->mtCoder.numThreadGroups = p->props.numThreadGroups; p->mtCoder.expectedDataSize = p->expectedDataSize; { diff --git a/C/Lzma2Enc.h b/C/Lzma2Enc.h index cb25275..1e6b50c 100644 --- a/C/Lzma2Enc.h +++ b/C/Lzma2Enc.h @@ -18,6 +18,7 @@ typedef struct int numBlockThreads_Reduced; int numBlockThreads_Max; int numTotalThreads; + unsigned numThreadGroups; // 0 : no groups } CLzma2EncProps; void Lzma2EncProps_Init(CLzma2EncProps *p); diff --git a/C/LzmaEnc.c b/C/LzmaEnc.c index 088b78f..84a29a5 100644 --- a/C/LzmaEnc.c +++ b/C/LzmaEnc.c @@ -62,7 +62,9 @@ void LzmaEncProps_Init(CLzmaEncProps *p) p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->numHashOutBits = 0; p->writeEndMark = 0; + p->affinityGroup = -1; p->affinity = 0; + p->affinityInGroup = 0; } void LzmaEncProps_Normalize(CLzmaEncProps *p) @@ -598,6 +600,10 @@ SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props2) p->multiThread = (props.numThreads > 1); p->matchFinderMt.btSync.affinity = p->matchFinderMt.hashSync.affinity = props.affinity; + p->matchFinderMt.btSync.affinityGroup = + p->matchFinderMt.hashSync.affinityGroup = props.affinityGroup; + p->matchFinderMt.btSync.affinityInGroup = + p->matchFinderMt.hashSync.affinityInGroup = props.affinityInGroup; #endif return SZ_OK; diff --git a/C/LzmaEnc.h b/C/LzmaEnc.h index 9f8039a..3feb5b4 100644 --- a/C/LzmaEnc.h +++ b/C/LzmaEnc.h @@ -1,5 +1,5 @@ /* LzmaEnc.h -- LZMA Encoder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_LZMA_ENC_H #define ZIP7_INC_LZMA_ENC_H @@ -29,11 +29,13 @@ typedef struct int numThreads; /* 1 or 2, default = 2 */ // int _pad; + Int32 affinityGroup; UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1. Encoder uses this value to reduce dictionary size */ UInt64 affinity; + UInt64 affinityInGroup; } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps *p); diff --git a/C/MtCoder.c b/C/MtCoder.c index 03959b6..923b19a 100644 --- a/C/MtCoder.c +++ b/C/MtCoder.c @@ -1,5 +1,5 @@ /* MtCoder.c -- Multi-thread Coder -2023-09-07 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -39,14 +39,28 @@ void MtProgressThunk_CreateVTable(CMtProgressThunk *p) static THREAD_FUNC_DECL ThreadFunc(void *pp); -static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) +static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t +#ifdef _WIN32 + , CMtCoder * const mtc +#endif + ) { WRes wres = AutoResetEvent_OptCreate_And_Reset(&t->startEvent); + // printf("\n====== MtCoderThread_CreateAndStart : \n"); if (wres == 0) { t->stop = False; if (!Thread_WasCreated(&t->thread)) - wres = Thread_Create(&t->thread, ThreadFunc, t); + { +#ifdef _WIN32 + if (mtc->numThreadGroups) + wres = Thread_Create_With_Group(&t->thread, ThreadFunc, t, + ThreadNextGroup_GetNext(&mtc->nextGroup), // group + 0); // affinityMask + else +#endif + wres = Thread_Create(&t->thread, ThreadFunc, t); + } if (wres == 0) wres = Event_Set(&t->startEvent); } @@ -56,6 +70,7 @@ static SRes MtCoderThread_CreateAndStart(CMtCoderThread *t) } +Z7_FORCE_INLINE static void MtCoderThread_Destruct(CMtCoderThread *t) { if (Thread_WasCreated(&t->thread)) @@ -85,7 +100,7 @@ static void MtCoderThread_Destruct(CMtCoderThread *t) static SRes ThreadFunc2(CMtCoderThread *t) { - CMtCoder *mtc = t->mtCoder; + CMtCoder * const mtc = t->mtCoder; for (;;) { @@ -185,7 +200,11 @@ static SRes ThreadFunc2(CMtCoderThread *t) if (mtc->numStartedThreads < mtc->numStartedThreadsLimit && mtc->expectedDataSize != readProcessed) { - res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads]); + res = MtCoderThread_CreateAndStart(&mtc->threads[mtc->numStartedThreads] +#ifdef _WIN32 + , mtc +#endif + ); if (res == SZ_OK) mtc->numStartedThreads++; else @@ -221,7 +240,7 @@ static SRes ThreadFunc2(CMtCoderThread *t) } { - CMtCoderBlock *block = &mtc->blocks[bi]; + CMtCoderBlock * const block = &mtc->blocks[bi]; block->res = res; block->bufIndex = bufIndex; block->finished = finished; @@ -311,7 +330,7 @@ static SRes ThreadFunc2(CMtCoderThread *t) static THREAD_FUNC_DECL ThreadFunc(void *pp) { - CMtCoderThread *t = (CMtCoderThread *)pp; + CMtCoderThread * const t = (CMtCoderThread *)pp; for (;;) { if (Event_Wait(&t->startEvent) != 0) @@ -319,7 +338,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp) if (t->stop) return 0; { - SRes res = ThreadFunc2(t); + const SRes res = ThreadFunc2(t); CMtCoder *mtc = t->mtCoder; if (res != SZ_OK) { @@ -328,7 +347,7 @@ static THREAD_FUNC_DECL ThreadFunc(void *pp) #ifndef MTCODER_USE_WRITE_THREAD { - unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); + const unsigned numFinished = (unsigned)InterlockedIncrement(&mtc->numFinishedThreads); if (numFinished == mtc->numStartedThreads) if (Event_Set(&mtc->finishedEvent) != 0) return (THREAD_FUNC_RET_TYPE)SZ_ERROR_THREAD; @@ -346,6 +365,7 @@ void MtCoder_Construct(CMtCoder *p) p->blockSize = 0; p->numThreadsMax = 0; + p->numThreadGroups = 0; p->expectedDataSize = (UInt64)(Int64)-1; p->inStream = NULL; @@ -429,6 +449,8 @@ SRes MtCoder_Code(CMtCoder *p) unsigned i; SRes res = SZ_OK; + // printf("\n====== MtCoder_Code : \n"); + if (numThreads > MTCODER_THREADS_MAX) numThreads = MTCODER_THREADS_MAX; numBlocksMax = MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads); @@ -492,11 +514,22 @@ SRes MtCoder_Code(CMtCoder *p) p->numStartedThreadsLimit = numThreads; p->numStartedThreads = 0; + ThreadNextGroup_Init(&p->nextGroup, p->numThreadGroups, 0); // startGroup // for (i = 0; i < numThreads; i++) { + // here we create new thread for first block. + // And each new thread will create another new thread after block reading + // until numStartedThreadsLimit is reached. CMtCoderThread *nextThread = &p->threads[p->numStartedThreads++]; - RINOK(MtCoderThread_CreateAndStart(nextThread)) + { + const SRes res2 = MtCoderThread_CreateAndStart(nextThread +#ifdef _WIN32 + , p +#endif + ); + RINOK(res2) + } } RINOK_THREAD(Event_Set(&p->readEvent)) @@ -513,9 +546,9 @@ SRes MtCoder_Code(CMtCoder *p) RINOK_THREAD(Event_Wait(&p->writeEvents[bi])) { - const CMtCoderBlock *block = &p->blocks[bi]; - unsigned bufIndex = block->bufIndex; - BoolInt finished = block->finished; + const CMtCoderBlock * const block = &p->blocks[bi]; + const unsigned bufIndex = block->bufIndex; + const BoolInt finished = block->finished; if (res == SZ_OK && block->res != SZ_OK) res = block->res; @@ -545,7 +578,7 @@ SRes MtCoder_Code(CMtCoder *p) } #else { - WRes wres = Event_Wait(&p->finishedEvent); + const WRes wres = Event_Wait(&p->finishedEvent); res = MY_SRes_HRESULT_FROM_WRes(wres); } #endif diff --git a/C/MtCoder.h b/C/MtCoder.h index 1231d3c..8166cca 100644 --- a/C/MtCoder.h +++ b/C/MtCoder.h @@ -1,5 +1,5 @@ /* MtCoder.h -- Multi-thread Coder -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_MT_CODER_H #define ZIP7_INC_MT_CODER_H @@ -16,7 +16,7 @@ EXTERN_C_BEGIN #ifndef Z7_ST #define MTCODER_GET_NUM_BLOCKS_FROM_THREADS(numThreads) ((numThreads) + (numThreads) / 8 + 1) - #define MTCODER_THREADS_MAX 64 + #define MTCODER_THREADS_MAX 256 #define MTCODER_BLOCKS_MAX (MTCODER_GET_NUM_BLOCKS_FROM_THREADS(MTCODER_THREADS_MAX) + 3) #else #define MTCODER_THREADS_MAX 1 @@ -77,6 +77,7 @@ typedef struct CMtCoder_ size_t blockSize; /* size of input block */ unsigned numThreadsMax; + unsigned numThreadGroups; UInt64 expectedDataSize; ISeqInStreamPtr inStream; @@ -125,6 +126,8 @@ typedef struct CMtCoder_ CMtProgress mtProgress; CMtCoderBlock blocks[MTCODER_BLOCKS_MAX]; CMtCoderThread threads[MTCODER_THREADS_MAX]; + + CThreadNextGroup nextGroup; } CMtCoder; diff --git a/C/Sha512.c b/C/Sha512.c index 04827d6..f0787fd 100644 --- a/C/Sha512.c +++ b/C/Sha512.c @@ -439,26 +439,78 @@ void Sha512_Final(CSha512 *p, Byte *digest, unsigned digestSize) +// #define Z7_SHA512_PROBE_DEBUG // for debug -#if defined(_WIN32) && defined(Z7_COMPILER_SHA512_SUPPORTED) \ - && defined(MY_CPU_ARM64) // we can disable this check to debug in x64 +#if defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED) -#if 1 // 0 for debug - -#include "7zWindows.h" -// #include -#if 0 && defined(MY_CPU_X86_OR_AMD64) -#include // for debug : for __ud2() +#if defined(Z7_SHA512_PROBE_DEBUG) \ + || defined(_WIN32) && defined(MY_CPU_ARM64) +#ifndef Z7_SHA512_USE_PROBE +#define Z7_SHA512_USE_PROBE +#endif #endif -BoolInt CPU_IsSupported_SHA512(void) +#ifdef Z7_SHA512_USE_PROBE + +#ifdef Z7_SHA512_PROBE_DEBUG +#include +#define PRF(x) x +#else +#define PRF(x) +#endif + +#if 0 || !defined(_MSC_VER) // 1 || : for debug LONGJMP mode +// MINGW doesn't support __try. So we use signal() / longjmp(). +// Note: signal() / longjmp() probably is not thread-safe. +// So we must call Sha512Prepare() from main thread at program start. +#ifndef Z7_SHA512_USE_LONGJMP +#define Z7_SHA512_USE_LONGJMP +#endif +#endif + +#ifdef Z7_SHA512_USE_LONGJMP +#include +#include +static jmp_buf g_Sha512_jmp_buf; +// static int g_Sha512_Unsupported; + +#if defined(__GNUC__) && (__GNUC__ >= 8) \ + || defined(__clang__) && (__clang_major__ >= 3) + __attribute__((noreturn)) +#endif +static void Z7_CDECL Sha512_signal_Handler(int v) { + PRF(printf("======== Sha512_signal_Handler = %x\n", (unsigned)v);) + // g_Sha512_Unsupported = 1; + longjmp(g_Sha512_jmp_buf, 1); +} +#endif // Z7_SHA512_USE_LONGJMP + + +#if defined(_WIN32) +#include "7zWindows.h" +#endif + #if defined(MY_CPU_ARM64) +// #define Z7_SHA512_USE_SIMPLIFIED_PROBE // for debug +#endif + +#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE +#include +#if defined(__clang__) + __attribute__((__target__("sha3"))) +#elif !defined(_MSC_VER) + __attribute__((__target__("arch=armv8.2-a+sha3"))) +#endif +#endif +static BoolInt CPU_IsSupported_SHA512_Probe(void) +{ + PRF(printf("\n== CPU_IsSupported_SHA512_Probe\n");) +#if defined(_WIN32) && defined(MY_CPU_ARM64) // we have no SHA512 flag for IsProcessorFeaturePresent() still. if (!CPU_IsSupported_CRYPTO()) return False; -#endif - // printf("\nCPU_IsSupported_SHA512\n"); + PRF(printf("==== Registry check\n");) { // we can't read ID_AA64ISAR0_EL1 register from application. // but ID_AA64ISAR0_EL1 register is mapped to "CP 4030" registry value. @@ -486,6 +538,7 @@ BoolInt CPU_IsSupported_SHA512(void) // 2 : SHA256 and SHA512 implemented } } +#endif // defined(_WIN32) && defined(MY_CPU_ARM64) #if 1 // 0 for debug to disable SHA512 PROBE code @@ -509,59 +562,97 @@ Does this PROBE code work in native Windows-arm64 (with/without sha512 hw instru Are there any ways to fix the problems with arm64-wine and x64-SDE cases? */ - // printf("\n========== CPU_IsSupported_SHA512 PROBE ========\n"); + PRF(printf("==== CPU_IsSupported_SHA512 PROBE\n");) { + BoolInt isSupported = False; +#ifdef Z7_SHA512_USE_LONGJMP + void (Z7_CDECL *signal_prev)(int); + /* + if (g_Sha512_Unsupported) + { + PRF(printf("==== g_Sha512_Unsupported\n");) + return False; + } + */ + printf("====== signal(SIGILL)\n"); + signal_prev = signal(SIGILL, Sha512_signal_Handler); + if (signal_prev == SIG_ERR) + { + PRF(printf("====== signal fail\n");) + return False; + } + // PRF(printf("==== signal_prev = %p\n", (void *)signal_prev);) + // docs: Before the specified function is executed, + // the value of func is set to SIG_DFL. + // So we can exit if (setjmp(g_Sha512_jmp_buf) != 0). + PRF(printf("====== setjmp\n");) + if (!setjmp(g_Sha512_jmp_buf)) +#else // Z7_SHA512_USE_LONGJMP + +#ifdef _MSC_VER #ifdef __clang_major__ #pragma GCC diagnostic ignored "-Wlanguage-extension-token" #endif __try +#endif +#endif // Z7_SHA512_USE_LONGJMP + { -#if 0 // 1 : for debug (reduced version to detect sha512) +#if defined(Z7_COMPILER_SHA512_SUPPORTED) +#ifdef Z7_SHA512_USE_SIMPLIFIED_PROBE + // simplified sha512 check for arm64: const uint64x2_t a = vdupq_n_u64(1); const uint64x2_t b = vsha512hq_u64(a, a, a); + PRF(printf("======== vsha512hq_u64 probe\n");) if ((UInt32)vgetq_lane_u64(b, 0) == 0x11800002) - return True; #else MY_ALIGN(16) UInt64 temp[SHA512_NUM_DIGEST_WORDS + SHA512_NUM_BLOCK_WORDS]; memset(temp, 0x5a, sizeof(temp)); -#if 0 && defined(MY_CPU_X86_OR_AMD64) - __ud2(); // for debug : that exception is not problem for SDE -#endif -#if 1 + PRF(printf("======== Sha512_UpdateBlocks_HW\n");) Sha512_UpdateBlocks_HW(temp, (const Byte *)(const void *)(temp + SHA512_NUM_DIGEST_WORDS), 1); - // printf("\n==== t = %x\n", (UInt32)temp[0]); + // PRF(printf("======== t = %x\n", (UInt32)temp[0]);) if ((UInt32)temp[0] == 0xa33cfdf7) +#endif { - // printf("\n=== PROBE SHA512: SHA512 supported\n"); - return True; + PRF(printf("======== PROBE SHA512: SHA512 is supported\n");) + isSupported = True; } +#else // Z7_COMPILER_SHA512_SUPPORTED + // for debug : we generate bad instrction or raise exception. + // __except() doesn't catch raise() calls. +#ifdef Z7_SHA512_USE_LONGJMP + PRF(printf("====== raise(SIGILL)\n");) + raise(SIGILL); +#else +#if defined(_MSC_VER) && defined(MY_CPU_X86) + __asm ud2 #endif -#endif +#endif // Z7_SHA512_USE_LONGJMP +#endif // Z7_COMPILER_SHA512_SUPPORTED } + +#ifdef Z7_SHA512_USE_LONGJMP + PRF(printf("====== restore signal SIGILL\n");) + signal(SIGILL, signal_prev); +#elif _MSC_VER __except (EXCEPTION_EXECUTE_HANDLER) { - // printf("\n==== CPU_IsSupported_SHA512 EXCEPTION_EXECUTE_HANDLER\n"); + PRF(printf("==== CPU_IsSupported_SHA512 __except(EXCEPTION_EXECUTE_HANDLER)\n");) } +#endif + PRF(printf("== return (sha512 supported) = %d\n", isSupported);) + return isSupported; } - return False; #else // without SHA512 PROBE code return True; #endif - } -#else - -BoolInt CPU_IsSupported_SHA512(void) -{ - return False; -} - -#endif -#endif // WIN32 arm64 +#endif // Z7_SHA512_USE_PROBE +#endif // defined(Z7_SHA512_PROBE_DEBUG) || defined(Z7_COMPILER_SHA512_SUPPORTED) void Sha512Prepare(void) @@ -570,10 +661,10 @@ void Sha512Prepare(void) SHA512_FUNC_UPDATE_BLOCKS f, f_hw; f = Sha512_UpdateBlocks; f_hw = NULL; -#ifdef MY_CPU_X86_OR_AMD64 - if (CPU_IsSupported_SHA512() - && CPU_IsSupported_AVX2() - ) +#ifdef Z7_SHA512_USE_PROBE + if (CPU_IsSupported_SHA512_Probe()) +#elif defined(MY_CPU_X86_OR_AMD64) + if (CPU_IsSupported_SHA512() && CPU_IsSupported_AVX2()) #else if (CPU_IsSupported_SHA512()) #endif @@ -583,6 +674,8 @@ void Sha512Prepare(void) } g_SHA512_FUNC_UPDATE_BLOCKS = f; g_SHA512_FUNC_UPDATE_BLOCKS_HW = f_hw; +#elif defined(Z7_SHA512_PROBE_DEBUG) + CPU_IsSupported_SHA512_Probe(); // for debug #endif } diff --git a/C/Sort.c b/C/Sort.c index e1097e3..20e3e69 100644 --- a/C/Sort.c +++ b/C/Sort.c @@ -1,141 +1,268 @@ /* Sort.c -- Sort functions -2014-04-05 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" #include "Sort.h" +#include "CpuArch.h" -#define HeapSortDown(p, k, size, temp) \ - { for (;;) { \ - size_t s = (k << 1); \ - if (s > size) break; \ - if (s < size && p[s + 1] > p[s]) s++; \ - if (temp >= p[s]) break; \ - p[k] = p[s]; k = s; \ - } p[k] = temp; } +#if ( (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ + || (defined(__clang__) && Z7_has_builtin(__builtin_prefetch)) \ + ) +// the code with prefetch is slow for small arrays on x86. +// So we disable prefetch for x86. +#ifndef MY_CPU_X86 + // #pragma message("Z7_PREFETCH : __builtin_prefetch") + #define Z7_PREFETCH(a) __builtin_prefetch((a)) +#endif -void HeapSort(UInt32 *p, size_t size) -{ - if (size <= 1) - return; - p--; - { - size_t i = size / 2; - do - { - UInt32 temp = p[i]; - size_t k = i; - HeapSortDown(p, k, size, temp) - } - while (--i != 0); - } - /* - do - { - size_t k = 1; - UInt32 temp = p[size]; - p[size--] = p[1]; - HeapSortDown(p, k, size, temp) - } - while (size > 1); - */ - while (size > 3) - { - UInt32 temp = p[size]; - size_t k = (p[3] > p[2]) ? 3 : 2; - p[size--] = p[1]; - p[1] = p[k]; - HeapSortDown(p, k, size, temp) - } - { - UInt32 temp = p[size]; - p[size] = p[1]; - if (size > 2 && p[2] < temp) - { - p[1] = p[2]; - p[2] = temp; - } - else - p[1] = temp; - } -} +#elif defined(_WIN32) // || defined(_MSC_VER) && (_MSC_VER >= 1200) -void HeapSort64(UInt64 *p, size_t size) -{ - if (size <= 1) - return; - p--; - { - size_t i = size / 2; - do - { - UInt64 temp = p[i]; - size_t k = i; - HeapSortDown(p, k, size, temp) - } - while (--i != 0); - } - /* - do - { - size_t k = 1; - UInt64 temp = p[size]; - p[size--] = p[1]; - HeapSortDown(p, k, size, temp) - } - while (size > 1); - */ - while (size > 3) - { - UInt64 temp = p[size]; - size_t k = (p[3] > p[2]) ? 3 : 2; - p[size--] = p[1]; - p[1] = p[k]; - HeapSortDown(p, k, size, temp) - } - { - UInt64 temp = p[size]; - p[size] = p[1]; - if (size > 2 && p[2] < temp) - { - p[1] = p[2]; - p[2] = temp; - } - else - p[1] = temp; - } -} +#include "7zWindows.h" + +// NOTE: CLANG/GCC/MSVC can define different values for _MM_HINT_T0 / PF_TEMPORAL_LEVEL_1. +// For example, clang-cl can generate "prefetcht2" instruction for +// PreFetchCacheLine(PF_TEMPORAL_LEVEL_1) call. +// But we want to generate "prefetcht0" instruction. +// So for CLANG/GCC we must use __builtin_prefetch() in code branch above +// instead of PreFetchCacheLine() / _mm_prefetch(). + +// New msvc-x86 compiler generates "prefetcht0" instruction for PreFetchCacheLine() call. +// But old x86 cpus don't support "prefetcht0". +// So we will use PreFetchCacheLine(), only if we are sure that +// generated instruction is supported by all cpus of that isa. +#if defined(MY_CPU_AMD64) \ + || defined(MY_CPU_ARM64) \ + || defined(MY_CPU_IA64) +// we need to use additional braces for (a) in PreFetchCacheLine call, because +// PreFetchCacheLine macro doesn't use braces: +// #define PreFetchCacheLine(l, a) _mm_prefetch((CHAR CONST *) a, l) + // #pragma message("Z7_PREFETCH : PreFetchCacheLine") + #define Z7_PREFETCH(a) PreFetchCacheLine(PF_TEMPORAL_LEVEL_1, (a)) +#endif + +#endif // _WIN32 + + +#define PREFETCH_NO(p,k,s,size) + +#ifndef Z7_PREFETCH + #define SORT_PREFETCH(p,k,s,size) +#else + +// #define PREFETCH_LEVEL 2 // use it if cache line is 32-bytes +#define PREFETCH_LEVEL 3 // it is fast for most cases (64-bytes cache line prefetch) +// #define PREFETCH_LEVEL 4 // it can be faster for big array (128-bytes prefetch) + +#if PREFETCH_LEVEL == 0 + + #define SORT_PREFETCH(p,k,s,size) + +#else // PREFETCH_LEVEL != 0 /* -#define HeapSortRefDown(p, vals, n, size, temp) \ - { size_t k = n; UInt32 val = vals[temp]; for (;;) { \ - size_t s = (k << 1); \ - if (s > size) break; \ - if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \ - if (val >= vals[p[s]]) break; \ - p[k] = p[s]; k = s; \ - } p[k] = temp; } +if defined(USE_PREFETCH_FOR_ALIGNED_ARRAY) + we prefetch one value per cache line. + Use it if array is aligned for cache line size (64 bytes) + or if array is small (less than L1 cache size). -void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size) +if !defined(USE_PREFETCH_FOR_ALIGNED_ARRAY) + we perfetch all cache lines that can be required. + it can be faster for big unaligned arrays. +*/ + #define USE_PREFETCH_FOR_ALIGNED_ARRAY + +// s == k * 2 +#if 0 && PREFETCH_LEVEL <= 3 && defined(MY_CPU_X86_OR_AMD64) + // x86 supports (lea r1*8+offset) + #define PREFETCH_OFFSET(k,s) ((s) << PREFETCH_LEVEL) +#else + #define PREFETCH_OFFSET(k,s) ((k) << (PREFETCH_LEVEL + 1)) +#endif + +#if 1 && PREFETCH_LEVEL <= 3 && defined(USE_PREFETCH_FOR_ALIGNED_ARRAY) + #define PREFETCH_ADD_OFFSET 0 +#else + // last offset that can be reqiured in PREFETCH_LEVEL step: + #define PREFETCH_RANGE ((2 << PREFETCH_LEVEL) - 1) + #define PREFETCH_ADD_OFFSET PREFETCH_RANGE / 2 +#endif + +#if PREFETCH_LEVEL <= 3 + +#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_ADD_OFFSET; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2)); \ + }} +#else /* for unaligned array */ + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \ + Z7_PREFETCH((p + s2)); \ + }} +#endif + +#else // PREFETCH_LEVEL > 3 + +#ifdef USE_PREFETCH_FOR_ALIGNED_ARRAY + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE - 16 / 2; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2 - 16)); \ + Z7_PREFETCH((p + s2)); \ + }} +#else /* for unaligned array */ + #define SORT_PREFETCH(p,k,s,size) \ + { const size_t s2 = PREFETCH_OFFSET(k,s) + PREFETCH_RANGE; \ + if (s2 <= size) { \ + Z7_PREFETCH((p + s2 - PREFETCH_RANGE)); \ + Z7_PREFETCH((p + s2 - PREFETCH_RANGE / 2)); \ + Z7_PREFETCH((p + s2)); \ + }} +#endif + +#endif // PREFETCH_LEVEL > 3 +#endif // PREFETCH_LEVEL != 0 +#endif // Z7_PREFETCH + + +#if defined(MY_CPU_ARM64) \ + /* || defined(MY_CPU_AMD64) */ \ + /* || defined(MY_CPU_ARM) && !defined(_MSC_VER) */ + // we want to use cmov, if cmov is very fast: + // - this cmov version is slower for clang-x64. + // - this cmov version is faster for gcc-arm64 for some fast arm64 cpus. + #define Z7_FAST_CMOV_SUPPORTED +#endif + +#ifdef Z7_FAST_CMOV_SUPPORTED + // we want to use cmov here, if cmov is fast: new arm64 cpus. + // we want the compiler to use conditional move for this branch + #define GET_MAX_VAL(n0, n1, max_val_slow) if (n0 < n1) n0 = n1; +#else + // use this branch, if cpu doesn't support fast conditional move. + // it uses slow array access reading: + #define GET_MAX_VAL(n0, n1, max_val_slow) n0 = max_val_slow; +#endif + +#define HeapSortDown(p, k, size, temp, macro_prefetch) \ +{ \ + for (;;) { \ + UInt32 n0, n1; \ + size_t s = k * 2; \ + if (s >= size) { \ + if (s == size) { \ + n0 = p[s]; \ + p[k] = n0; \ + if (temp < n0) k = s; \ + } \ + break; \ + } \ + n0 = p[k * 2]; \ + n1 = p[k * 2 + 1]; \ + s += n0 < n1; \ + GET_MAX_VAL(n0, n1, p[s]) \ + if (temp >= n0) break; \ + macro_prefetch(p, k, s, size) \ + p[k] = n0; \ + k = s; \ + } \ + p[k] = temp; \ +} + + +/* +stage-1 : O(n) : + we generate intermediate partially sorted binary tree: + p[0] : it's additional item for better alignment of tree structure in memory. + p[1] + p[2] p[3] + p[4] p[5] p[6] p[7] + ... + p[x] >= p[x * 2] + p[x] >= p[x * 2 + 1] + +stage-2 : O(n)*log2(N): + we move largest item p[0] from head of tree to the end of array + and insert last item to sorted binary tree. +*/ + +// (p) must be aligned for cache line size (64-bytes) for best performance + +void Z7_FASTCALL HeapSort(UInt32 *p, size_t size) { - if (size <= 1) + if (size < 2) return; - p--; + if (size == 2) { - size_t i = size / 2; + const UInt32 a0 = p[0]; + const UInt32 a1 = p[1]; + const unsigned k = a1 < a0; + p[k] = a0; + p[k ^ 1] = a1; + return; + } + { + // stage-1 : O(n) + // we transform array to partially sorted binary tree. + size_t i = --size / 2; + // (size) now is the index of the last item in tree, + // if (i) + { + do + { + const UInt32 temp = p[i]; + size_t k = i; + HeapSortDown(p, k, size, temp, PREFETCH_NO) + } + while (--i); + } + { + const UInt32 temp = p[0]; + const UInt32 a1 = p[1]; + if (temp < a1) + { + size_t k = 1; + p[0] = a1; + HeapSortDown(p, k, size, temp, PREFETCH_NO) + } + } + } + + if (size < 3) + { + // size == 2 + const UInt32 a0 = p[0]; + p[0] = p[2]; + p[2] = a0; + return; + } + if (size != 3) + { + // stage-2 : O(size) * log2(size): + // we move largest item p[0] from head to the end of array, + // and insert last item to sorted binary tree. do { - UInt32 temp = p[i]; - HeapSortRefDown(p, vals, i, size, temp); + const UInt32 temp = p[size]; + size_t k = p[2] < p[3] ? 3 : 2; + p[size--] = p[0]; + p[0] = p[1]; + p[1] = p[k]; + HeapSortDown(p, k, size, temp, SORT_PREFETCH) // PREFETCH_NO } - while (--i != 0); + while (size != 3); } - do { - UInt32 temp = p[size]; - p[size--] = p[1]; - HeapSortRefDown(p, vals, 1, size, temp); + const UInt32 a2 = p[2]; + const UInt32 a3 = p[3]; + const size_t k = a2 < a3; + p[2] = p[1]; + p[3] = p[0]; + p[k] = a3; + p[k ^ 1] = a2; } - while (size > 1); } -*/ diff --git a/C/Sort.h b/C/Sort.h index 1817b65..de5a4e8 100644 --- a/C/Sort.h +++ b/C/Sort.h @@ -1,5 +1,5 @@ /* Sort.h -- Sort functions -2023-03-05 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_SORT_H #define ZIP7_INC_SORT_H @@ -8,10 +8,7 @@ EXTERN_C_BEGIN -void HeapSort(UInt32 *p, size_t size); -void HeapSort64(UInt64 *p, size_t size); - -/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */ +void Z7_FASTCALL HeapSort(UInt32 *p, size_t size); EXTERN_C_END diff --git a/C/Threads.c b/C/Threads.c index 464efec..177d1d9 100644 --- a/C/Threads.c +++ b/C/Threads.c @@ -1,5 +1,5 @@ /* Threads.c -- multithreading library -2024-03-28 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -59,6 +59,100 @@ WRes Thread_Wait_Close(CThread *p) return (res != 0 ? res : res2); } +typedef struct MY_PROCESSOR_NUMBER { + WORD Group; + BYTE Number; + BYTE Reserved; +} MY_PROCESSOR_NUMBER, *MY_PPROCESSOR_NUMBER; + +typedef struct MY_GROUP_AFFINITY { +#if defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION < 100000) + // KAFFINITY is not defined in old mingw + ULONG_PTR +#else + KAFFINITY +#endif + Mask; + WORD Group; + WORD Reserved[3]; +} MY_GROUP_AFFINITY, *MY_PGROUP_AFFINITY; + +typedef BOOL (WINAPI *Func_SetThreadGroupAffinity)( + HANDLE hThread, + CONST MY_GROUP_AFFINITY *GroupAffinity, + MY_PGROUP_AFFINITY PreviousGroupAffinity); + +typedef BOOL (WINAPI *Func_GetThreadGroupAffinity)( + HANDLE hThread, + MY_PGROUP_AFFINITY GroupAffinity); + +typedef BOOL (WINAPI *Func_GetProcessGroupAffinity)( + HANDLE hProcess, + PUSHORT GroupCount, + PUSHORT GroupArray); + +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + +#if 0 +#include +#define PRF(x) x +/* +-- + before call of SetThreadGroupAffinity() + GetProcessGroupAffinity return one group. + after call of SetThreadGroupAffinity(): + GetProcessGroupAffinity return more than group, + if SetThreadGroupAffinity() was to another group. +-- + GetProcessAffinityMask MS DOCs: + { + If the calling process contains threads in multiple groups, + the function returns zero for both affinity masks. + } + but tests in win10 with 2 groups (less than 64 cores total): + GetProcessAffinityMask() still returns non-zero affinity masks + even after SetThreadGroupAffinity() calls. +*/ +static void PrintProcess_Info() +{ + { + const + Func_GetProcessGroupAffinity fn_GetProcessGroupAffinity = + (Func_GetProcessGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetProcessGroupAffinity"); + if (fn_GetProcessGroupAffinity) + { + unsigned i; + USHORT GroupCounts[64]; + USHORT GroupCount = Z7_ARRAY_SIZE(GroupCounts); + BOOL boolRes = fn_GetProcessGroupAffinity(GetCurrentProcess(), + &GroupCount, GroupCounts); + printf("\n====== GetProcessGroupAffinity : " + "boolRes=%u GroupCounts = %u :", + boolRes, (unsigned)GroupCount); + for (i = 0; i < GroupCount; i++) + printf(" %u", GroupCounts[i]); + printf("\n"); + } + } + { + DWORD_PTR processAffinityMask, systemAffinityMask; + if (GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask)) + { + PRF(printf("\n====== GetProcessAffinityMask : " + ": processAffinityMask=%x, systemAffinityMask=%x\n", + (UInt32)processAffinityMask, (UInt32)systemAffinityMask);) + } + else + printf("\n==GetProcessAffinityMask FAIL"); + } +} +#else +#ifndef USE_THREADS_CreateThread +// #define PRF(x) +#endif +#endif + WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) { /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ @@ -72,7 +166,43 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) unsigned threadId; *p = (HANDLE)(_beginthreadex(NULL, 0, func, param, 0, &threadId)); - + +#if 0 // 1 : for debug + { + DWORD_PTR prevMask; + DWORD_PTR affinity = 1 << 0; + prevMask = SetThreadAffinityMask(*p, (DWORD_PTR)affinity); + prevMask = prevMask; + } +#endif +#if 0 // 1 : for debug + { + /* win10: new thread will be created in same group that is assigned to parent thread + but affinity mask will contain all allowed threads of that group, + even if affinity mask of parent group is not full + win11: what group it will be created, if we have set + affinity of parent thread with ThreadGroupAffinity? + */ + const + Func_GetThreadGroupAffinity fn = + (Func_GetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "GetThreadGroupAffinity"); + if (fn) + { + // BOOL wres2; + MY_GROUP_AFFINITY groupAffinity; + memset(&groupAffinity, 0, sizeof(groupAffinity)); + /* wres2 = */ fn(*p, &groupAffinity); + PRF(printf("\n==Thread_Create cur = %6u GetThreadGroupAffinity(): " + "wres2_BOOL = %u, group=%u mask=%x\n", + GetCurrentThreadId(), + wres2, + groupAffinity.Group, + (UInt32)groupAffinity.Mask);) + } + } +#endif + #endif /* maybe we must use errno here, but probably GetLastError() is also OK. */ @@ -110,7 +240,84 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param */ } { - DWORD prevSuspendCount = ResumeThread(h); + const DWORD prevSuspendCount = ResumeThread(h); + /* ResumeThread() returns: + 0 : was_not_suspended + 1 : was_resumed + -1 : error + */ + if (prevSuspendCount == (DWORD)-1) + wres = GetError(); + } + } + + /* maybe we must use errno here, but probably GetLastError() is also OK. */ + return wres; + + #endif +} + + +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask) +{ +#ifdef USE_THREADS_CreateThread + + UNUSED_VAR(group) + UNUSED_VAR(affinityMask) + return Thread_Create(p, func, param); + +#else + + /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ + HANDLE h; + WRes wres; + unsigned threadId; + h = (HANDLE)(_beginthreadex(NULL, 0, func, param, CREATE_SUSPENDED, &threadId)); + *p = h; + wres = HandleToWRes(h); + if (h) + { + // PrintProcess_Info(); + { + const + Func_SetThreadGroupAffinity fn = + (Func_SetThreadGroupAffinity) Z7_CAST_FUNC_C GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), + "SetThreadGroupAffinity"); + if (fn) + { + // WRes wres2; + MY_GROUP_AFFINITY groupAffinity, prev_groupAffinity; + memset(&groupAffinity, 0, sizeof(groupAffinity)); + // groupAffinity.Mask must use only bits that supported by current group + // (groupAffinity.Mask = 0) means all allowed bits + groupAffinity.Mask = affinityMask; + groupAffinity.Group = (WORD)group; + // wres2 = + fn(h, &groupAffinity, &prev_groupAffinity); + /* + if (groupAffinity.Group == prev_groupAffinity.Group) + wres2 = wres2; + else + wres2 = wres2; + if (wres2 == 0) + { + wres2 = GetError(); + PRF(printf("\n==SetThreadGroupAffinity error: %u\n", wres2);) + } + else + { + PRF(printf("\n==Thread_Create_With_Group::SetThreadGroupAffinity()" + " threadId = %6u" + " group=%u mask=%x\n", + threadId, + prev_groupAffinity.Group, + (UInt32)prev_groupAffinity.Mask);) + } + */ + } + } + { + const DWORD prevSuspendCount = ResumeThread(h); /* ResumeThread() returns: 0 : was_not_suspended 1 : was_resumed @@ -297,6 +504,13 @@ WRes Thread_Create(CThread *p, THREAD_FUNC_TYPE func, LPVOID param) return Thread_Create_With_CpuSet(p, func, param, NULL); } +/* +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinity) +{ + UNUSED_VAR(group) + return Thread_Create_With_Affinity(p, func, param, affinity); +} +*/ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, CAffinityMask affinity) { @@ -577,5 +791,22 @@ WRes AutoResetEvent_OptCreate_And_Reset(CAutoResetEvent *p) return AutoResetEvent_CreateNotSignaled(p); } +void ThreadNextGroup_Init(CThreadNextGroup *p, UInt32 numGroups, UInt32 startGroup) +{ + // printf("\n====== ThreadNextGroup_Init numGroups = %x: startGroup=%x\n", numGroups, startGroup); + if (numGroups == 0) + numGroups = 1; + p->NumGroups = numGroups; + p->NextGroup = startGroup % numGroups; +} + + +UInt32 ThreadNextGroup_GetNext(CThreadNextGroup *p) +{ + const UInt32 next = p->NextGroup; + p->NextGroup = (next + 1) % p->NumGroups; + return next; +} + #undef PRF #undef Print diff --git a/C/Threads.h b/C/Threads.h index c1484a2..be12e6e 100644 --- a/C/Threads.h +++ b/C/Threads.h @@ -1,5 +1,5 @@ /* Threads.h -- multithreading library -2024-03-28 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_THREADS_H #define ZIP7_INC_THREADS_H @@ -140,12 +140,22 @@ WRes Thread_Create_With_Affinity(CThread *p, THREAD_FUNC_TYPE func, LPVOID param WRes Thread_Wait_Close(CThread *p); #ifdef _WIN32 +WRes Thread_Create_With_Group(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, unsigned group, CAffinityMask affinityMask); #define Thread_Create_With_CpuSet(p, func, param, cs) \ Thread_Create_With_Affinity(p, func, param, *cs) #else WRes Thread_Create_With_CpuSet(CThread *p, THREAD_FUNC_TYPE func, LPVOID param, const CCpuSet *cpuSet); #endif +typedef struct +{ + unsigned NumGroups; + unsigned NextGroup; +} CThreadNextGroup; + +void ThreadNextGroup_Init(CThreadNextGroup *p, unsigned numGroups, unsigned startGroup); +unsigned ThreadNextGroup_GetNext(CThreadNextGroup *p); + #ifdef _WIN32 diff --git a/C/Util/Lzma/LzmaUtil.dsp b/C/Util/Lzma/LzmaUtil.dsp index e2e7d42..71de950 100644 --- a/C/Util/Lzma/LzmaUtil.dsp +++ b/C/Util/Lzma/LzmaUtil.dsp @@ -122,6 +122,10 @@ SOURCE=..\..\Compiler.h # End Source File # Begin Source File +SOURCE=..\..\CpuArch.c +# End Source File +# Begin Source File + SOURCE=..\..\CpuArch.h # End Source File # Begin Source File diff --git a/C/Util/LzmaLib/LzmaLib.dsp b/C/Util/LzmaLib/LzmaLib.dsp index bacd967..f413137 100644 --- a/C/Util/LzmaLib/LzmaLib.dsp +++ b/C/Util/LzmaLib/LzmaLib.dsp @@ -43,7 +43,7 @@ RSC=rc.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MT /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /YX /FD /c -# ADD CPP /nologo /Gr /MT /W3 /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /FD /c +# ADD CPP /nologo /Gr /MT /W4 /WX /O2 /D "NDEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /FD /c # SUBTRACT CPP /YX # ADD BASE MTL /nologo /D "NDEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "NDEBUG" /mktyplib203 /win32 @@ -71,7 +71,7 @@ LINK32=link.exe # PROP Ignore_Export_Lib 0 # PROP Target_Dir "" # ADD BASE CPP /nologo /MTd /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /YX /FD /GZ /c -# ADD CPP /nologo /MTd /W3 /Gm /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /D "COMPRESS_MF_MT" /FD /GZ /c +# ADD CPP /nologo /MTd /W4 /WX /Gm /ZI /Od /D "_DEBUG" /D "WIN32" /D "_WINDOWS" /D "_MBCS" /D "_USRDLL" /D "LZMALIB_EXPORTS" /D "COMPRESS_MF_MT" /FD /GZ /c # SUBTRACT CPP /YX # ADD BASE MTL /nologo /D "_DEBUG" /mktyplib203 /win32 # ADD MTL /nologo /D "_DEBUG" /mktyplib203 /win32 @@ -128,6 +128,10 @@ SOURCE=..\..\Compiler.h # End Source File # Begin Source File +SOURCE=..\..\CpuArch.c +# End Source File +# Begin Source File + SOURCE=..\..\CpuArch.h # End Source File # Begin Source File diff --git a/C/Xz.h b/C/Xz.h index 42bc685..ad63b48 100644 --- a/C/Xz.h +++ b/C/Xz.h @@ -1,5 +1,5 @@ /* Xz.h - Xz interface -2024-01-26 : Igor Pavlov : Public domain */ +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_XZ_H #define ZIP7_INC_XZ_H @@ -121,6 +121,7 @@ typedef struct UInt64 startOffset; } CXzStream; +#define Xz_CONSTRUCT(p) { (p)->numBlocks = 0; (p)->blocks = NULL; (p)->flags = 0; } void Xz_Construct(CXzStream *p); void Xz_Free(CXzStream *p, ISzAllocPtr alloc); @@ -136,8 +137,13 @@ typedef struct CXzStream *streams; } CXzs; +#define Xzs_CONSTRUCT(p) { (p)->num = 0; (p)->numAllocated = 0; (p)->streams = NULL; } void Xzs_Construct(CXzs *p); void Xzs_Free(CXzs *p, ISzAllocPtr alloc); +/* +Xzs_ReadBackward() must be called for empty CXzs object. +Xzs_ReadBackward() can return non empty object with (p->num != 0) even in case of error. +*/ SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr inStream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc); UInt64 Xzs_GetNumBlocks(const CXzs *p); @@ -268,8 +274,8 @@ typedef struct size_t outBufSize; size_t outDataWritten; // the size of data in (outBuf) that were fully unpacked - Byte shaDigest[SHA256_DIGEST_SIZE]; - Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; + UInt32 shaDigest32[SHA256_DIGEST_SIZE / 4]; + Byte buf[XZ_BLOCK_HEADER_SIZE_MAX]; // it must be aligned for 4-bytes } CXzUnpacker; /* alloc : aligned for cache line allocation is better */ diff --git a/C/XzCrc64Opt.c b/C/XzCrc64Opt.c index 0c1fc2f..6eea4a3 100644 --- a/C/XzCrc64Opt.c +++ b/C/XzCrc64Opt.c @@ -1,5 +1,5 @@ /* XzCrc64Opt.c -- CRC64 calculation (optimized functions) -2023-12-08 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -235,7 +235,7 @@ CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE) v = Q32BE(1, w1) ^ Q32BE(0, w0); v ^= Q32BE(3, d1) ^ Q32BE(2, d0); #endif -#elif +#else #error Stop_Compiling_Bad_CRC64_NUM_TABLES #endif p += Z7_CRC64_NUM_TABLES_USE; diff --git a/C/XzDec.c b/C/XzDec.c index 3d1c98e..2dac324 100644 --- a/C/XzDec.c +++ b/C/XzDec.c @@ -1,5 +1,5 @@ /* XzDec.c -- Xz Decode -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -59,7 +59,7 @@ unsigned Xz_ReadVarInt(const Byte *p, size_t maxSize, UInt64 *value) for (i = 0; i < limit;) { - Byte b = p[i]; + const unsigned b = p[i]; *value |= (UInt64)(b & 0x7F) << (7 * i++); if ((b & 0x80) == 0) return (b == 0 && i != 1) ? 0 : i; @@ -796,11 +796,10 @@ SRes Xz_ParseHeader(CXzStreamFlags *p, const Byte *buf) static BoolInt Xz_CheckFooter(CXzStreamFlags flags, UInt64 indexSize, const Byte *buf) { - return indexSize == (((UInt64)GetUi32(buf + 4) + 1) << 2) - && GetUi32(buf) == CrcCalc(buf + 4, 6) - && flags == GetBe16(buf + 8) - && buf[10] == XZ_FOOTER_SIG_0 - && buf[11] == XZ_FOOTER_SIG_1; + return indexSize == (((UInt64)GetUi32a(buf + 4) + 1) << 2) + && GetUi32a(buf) == CrcCalc(buf + 4, 6) + && flags == GetBe16a(buf + 8) + && GetUi16a(buf + 10) == (XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8)); } #define READ_VARINT_AND_CHECK(buf, pos, size, res) \ @@ -1166,7 +1165,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, p->indexPreSize = 1 + Xz_WriteVarInt(p->buf + 1, p->numBlocks); p->indexPos = p->indexPreSize; p->indexSize += p->indexPreSize; - Sha256_Final(&p->sha, p->shaDigest); + Sha256_Final(&p->sha, (Byte *)(void *)p->shaDigest32); Sha256_Init(&p->sha); p->crc = CrcUpdate(CRC_INIT_VAL, p->buf, p->indexPreSize); p->state = XZ_STATE_STREAM_INDEX; @@ -1241,10 +1240,10 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, break; } { - Byte digest[XZ_CHECK_SIZE_MAX]; + UInt32 digest32[XZ_CHECK_SIZE_MAX / 4]; p->state = XZ_STATE_BLOCK_HEADER; p->pos = 0; - if (XzCheck_Final(&p->check, digest) && memcmp(digest, p->buf, checkSize) != 0) + if (XzCheck_Final(&p->check, (void *)digest32) && memcmp(digest32, p->buf, checkSize) != 0) return SZ_ERROR_CRC; if (p->decodeOnlyOneBlock) { @@ -1289,12 +1288,12 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, } else { - Byte digest[SHA256_DIGEST_SIZE]; + UInt32 digest32[SHA256_DIGEST_SIZE / 4]; p->state = XZ_STATE_STREAM_INDEX_CRC; p->indexSize += 4; p->pos = 0; - Sha256_Final(&p->sha, digest); - if (memcmp(digest, p->shaDigest, SHA256_DIGEST_SIZE) != 0) + Sha256_Final(&p->sha, (void *)digest32); + if (memcmp(digest32, p->shaDigest32, SHA256_DIGEST_SIZE) != 0) return SZ_ERROR_CRC; } } @@ -1313,7 +1312,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, const Byte *ptr = p->buf; p->state = XZ_STATE_STREAM_FOOTER; p->pos = 0; - if (CRC_GET_DIGEST(p->crc) != GetUi32(ptr)) + if (CRC_GET_DIGEST(p->crc) != GetUi32a(ptr)) return SZ_ERROR_CRC; } break; @@ -1343,7 +1342,7 @@ SRes XzUnpacker_Code(CXzUnpacker *p, Byte *dest, SizeT *destLen, { if (*src != 0) { - if (((UInt32)p->padSize & 3) != 0) + if ((unsigned)p->padSize & 3) return SZ_ERROR_NO_ARCHIVE; p->pos = 0; p->state = XZ_STATE_STREAM_HEADER; diff --git a/C/XzEnc.c b/C/XzEnc.c index c1affad..e40f0c8 100644 --- a/C/XzEnc.c +++ b/C/XzEnc.c @@ -1,5 +1,5 @@ /* XzEnc.c -- Xz Encode -2024-03-01 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" @@ -411,6 +411,7 @@ static SRes SeqInFilter_Read(ISeqInStreamPtr pp, void *data, size_t *size) } } +Z7_FORCE_INLINE static void SeqInFilter_Construct(CSeqInFilter *p) { p->buf = NULL; @@ -418,6 +419,7 @@ static void SeqInFilter_Construct(CSeqInFilter *p) p->vt.Read = SeqInFilter_Read; } +Z7_FORCE_INLINE static void SeqInFilter_Free(CSeqInFilter *p, ISzAllocPtr alloc) { if (p->StateCoder.p) @@ -507,6 +509,7 @@ void XzFilterProps_Init(CXzFilterProps *p) void XzProps_Init(CXzProps *p) { p->checkId = XZ_CHECK_CRC32; + p->numThreadGroups = 0; p->blockSize = XZ_PROPS_BLOCK_SIZE_AUTO; p->numBlockThreads_Reduced = -1; p->numBlockThreads_Max = -1; @@ -689,6 +692,7 @@ typedef struct } CLzma2WithFilters; +Z7_FORCE_INLINE static void Lzma2WithFilters_Construct(CLzma2WithFilters *p) { p->lzma2 = NULL; @@ -712,6 +716,7 @@ static SRes Lzma2WithFilters_Create(CLzma2WithFilters *p, ISzAllocPtr alloc, ISz } +Z7_FORCE_INLINE static void Lzma2WithFilters_Free(CLzma2WithFilters *p, ISzAllocPtr alloc) { #ifdef USE_SUBBLOCK @@ -1236,6 +1241,7 @@ SRes XzEnc_Encode(CXzEncHandle p, ISeqOutStreamPtr outStream, ISeqInStreamPtr in } p->mtCoder.numThreadsMax = (unsigned)props->numBlockThreads_Max; + p->mtCoder.numThreadGroups = props->numThreadGroups; p->mtCoder.expectedDataSize = p->expectedDataSize; RINOK(MtCoder_Code(&p->mtCoder)) diff --git a/C/XzEnc.h b/C/XzEnc.h index 77b78c0..ac6bbf7 100644 --- a/C/XzEnc.h +++ b/C/XzEnc.h @@ -1,5 +1,5 @@ /* XzEnc.h -- Xz Encode -2023-04-13 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #ifndef ZIP7_INC_XZ_ENC_H #define ZIP7_INC_XZ_ENC_H @@ -31,6 +31,7 @@ typedef struct CLzma2EncProps lzma2Props; CXzFilterProps filterProps; unsigned checkId; + unsigned numThreadGroups; // 0 : no groups UInt64 blockSize; int numBlockThreads_Reduced; int numBlockThreads_Max; diff --git a/C/XzIn.c b/C/XzIn.c index b68af96..ba31636 100644 --- a/C/XzIn.c +++ b/C/XzIn.c @@ -1,38 +1,39 @@ /* XzIn.c - Xz input -2023-09-07 : Igor Pavlov : Public domain */ +: Igor Pavlov : Public domain */ #include "Precomp.h" #include #include "7zCrc.h" -#include "CpuArch.h" #include "Xz.h" +#include "CpuArch.h" -/* -#define XZ_FOOTER_SIG_CHECK(p) (memcmp((p), XZ_FOOTER_SIG, XZ_FOOTER_SIG_SIZE) == 0) -*/ -#define XZ_FOOTER_SIG_CHECK(p) ((p)[0] == XZ_FOOTER_SIG_0 && (p)[1] == XZ_FOOTER_SIG_1) - +#define XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(p) \ + (GetUi16a((const Byte *)(const void *)(p) + 10) == \ + (XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8))) SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream) { - Byte sig[XZ_STREAM_HEADER_SIZE]; + UInt32 data32[XZ_STREAM_HEADER_SIZE / 4]; size_t processedSize = XZ_STREAM_HEADER_SIZE; - RINOK(SeqInStream_ReadMax(inStream, sig, &processedSize)) + RINOK(SeqInStream_ReadMax(inStream, data32, &processedSize)) if (processedSize != XZ_STREAM_HEADER_SIZE - || memcmp(sig, XZ_SIG, XZ_SIG_SIZE) != 0) + || memcmp(data32, XZ_SIG, XZ_SIG_SIZE) != 0) return SZ_ERROR_NO_ARCHIVE; - return Xz_ParseHeader(p, sig); + return Xz_ParseHeader(p, (const Byte *)(const void *)data32); } -#define READ_VARINT_AND_CHECK(buf, pos, size, res) \ - { const unsigned s = Xz_ReadVarInt(buf + pos, size - pos, res); \ +#define READ_VARINT_AND_CHECK(buf, size, res) \ +{ const unsigned s = Xz_ReadVarInt(buf, size, res); \ if (s == 0) return SZ_ERROR_ARCHIVE; \ - pos += s; } + size -= s; \ + buf += s; \ +} SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes) { + MY_ALIGN(4) Byte header[XZ_BLOCK_HEADER_SIZE_MAX]; unsigned headerSize; *headerSizeRes = 0; @@ -57,8 +58,12 @@ SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, return XzBlock_Parse(p, header); } + #define ADD_SIZE_CHECK(size, val) \ - { const UInt64 newSize = size + (val); if (newSize < size) return XZ_SIZE_OVERFLOW; size = newSize; } +{ const UInt64 newSize = size + (val); \ + if (newSize < size) return XZ_SIZE_OVERFLOW; \ + size = newSize; \ +} UInt64 Xz_GetUnpackSize(const CXzStream *p) { @@ -82,76 +87,85 @@ UInt64 Xz_GetPackSize(const CXzStream *p) return size; } -/* -SRes XzBlock_ReadFooter(CXzBlock *p, CXzStreamFlags f, ISeqInStreamPtr inStream) -{ - return SeqInStream_Read(inStream, p->check, XzFlags_GetCheckSize(f)); -} -*/ -static SRes Xz_ReadIndex2(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc) +// input; +// CXzStream (p) is empty object. +// size != 0 +// (size & 3) == 0 +// (buf) is aligned for at least 4 bytes. +// output: +// p->numBlocks is number of allocated items in p->blocks +// p->blocks[*] values must be ignored, if function returns error. +static SRes Xz_ParseIndex(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc) { - size_t numBlocks, pos = 1; - UInt32 crc; - + size_t numBlocks; if (size < 5 || buf[0] != 0) return SZ_ERROR_ARCHIVE; - size -= 4; - crc = CrcCalc(buf, size); - if (crc != GetUi32(buf + size)) - return SZ_ERROR_ARCHIVE; - + { + const UInt32 crc = CrcCalc(buf, size); + if (crc != GetUi32a(buf + size)) + return SZ_ERROR_ARCHIVE; + } + buf++; + size--; { UInt64 numBlocks64; - READ_VARINT_AND_CHECK(buf, pos, size, &numBlocks64) + READ_VARINT_AND_CHECK(buf, size, &numBlocks64) + // (numBlocks64) is 63-bit value, so we can calculate (numBlocks64 * 2): + if (numBlocks64 * 2 > size) + return SZ_ERROR_ARCHIVE; + if (numBlocks64 >= ((size_t)1 << (sizeof(size_t) * 8 - 1)) / sizeof(CXzBlockSizes)) + return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE numBlocks = (size_t)numBlocks64; - if (numBlocks != numBlocks64 || numBlocks * 2 > size) - return SZ_ERROR_ARCHIVE; } - - Xz_Free(p, alloc); - if (numBlocks != 0) + // Xz_Free(p, alloc); // it's optional, because (p) is empty already + if (numBlocks) { - size_t i; - p->numBlocks = numBlocks; - p->blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks); - if (!p->blocks) + CXzBlockSizes *blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks); + if (!blocks) return SZ_ERROR_MEM; - for (i = 0; i < numBlocks; i++) + p->blocks = blocks; + p->numBlocks = numBlocks; + // the caller will call Xz_Free() in case of error + do { - CXzBlockSizes *block = &p->blocks[i]; - READ_VARINT_AND_CHECK(buf, pos, size, &block->totalSize) - READ_VARINT_AND_CHECK(buf, pos, size, &block->unpackSize) - if (block->totalSize == 0) + READ_VARINT_AND_CHECK(buf, size, &blocks->totalSize) + READ_VARINT_AND_CHECK(buf, size, &blocks->unpackSize) + if (blocks->totalSize == 0) return SZ_ERROR_ARCHIVE; + blocks++; } + while (--numBlocks); } - while ((pos & 3) != 0) - if (buf[pos++] != 0) + if (size >= 4) + return SZ_ERROR_ARCHIVE; + while (size) + if (buf[--size]) return SZ_ERROR_ARCHIVE; - return (pos == size) ? SZ_OK : SZ_ERROR_ARCHIVE; + return SZ_OK; } + +/* static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc) { SRes res; size_t size; Byte *buf; - if (indexSize > ((UInt32)1 << 31)) - return SZ_ERROR_UNSUPPORTED; + if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1))) + return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE size = (size_t)indexSize; - if (size != indexSize) - return SZ_ERROR_UNSUPPORTED; buf = (Byte *)ISzAlloc_Alloc(alloc, size); if (!buf) return SZ_ERROR_MEM; res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED); if (res == SZ_OK) - res = Xz_ReadIndex2(p, buf, size, alloc); + res = Xz_ParseIndex(p, buf, size, alloc); ISzAlloc_Free(alloc, buf); return res; } +*/ static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size) { @@ -160,84 +174,102 @@ static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, /* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */ } + +/* +in: + (*startOffset) is position in (stream) where xz_stream must be finished. +out: + if returns SZ_OK, then (*startOffset) is position in stream that shows start of xz_stream. +*/ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc) { - UInt64 indexSize; - Byte buf[XZ_STREAM_FOOTER_SIZE]; + #define TEMP_BUF_SIZE (1 << 10) + UInt32 buf32[TEMP_BUF_SIZE / 4]; UInt64 pos = (UInt64)*startOffset; - if ((pos & 3) != 0 || pos < XZ_STREAM_FOOTER_SIZE) + if ((pos & 3) || pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; - pos -= XZ_STREAM_FOOTER_SIZE; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE)) - if (!XZ_FOOTER_SIG_CHECK(buf + 10)) + if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32)) { - UInt32 total = 0; pos += XZ_STREAM_FOOTER_SIZE; - for (;;) { - size_t i; - #define TEMP_BUF_SIZE (1 << 10) - Byte temp[TEMP_BUF_SIZE]; - - i = (pos > TEMP_BUF_SIZE) ? TEMP_BUF_SIZE : (size_t)pos; + // pos != 0 + // (pos & 3) == 0 + size_t i = pos >= TEMP_BUF_SIZE ? TEMP_BUF_SIZE : (size_t)pos; pos -= i; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, temp, i)) - total += (UInt32)i; - for (; i != 0; i--) - if (temp[i - 1] != 0) + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, i)) + i /= 4; + do + if (buf32[i - 1] != 0) break; - if (i != 0) - { - if ((i & 3) != 0) - return SZ_ERROR_NO_ARCHIVE; - pos += i; - break; - } - if (pos < XZ_STREAM_FOOTER_SIZE || total > (1 << 16)) + while (--i); + + pos += i * 4; + #define XZ_STREAM_BACKWARD_READING_PAD_MAX (1 << 16) + // here we don't support rare case with big padding for xz stream. + // so we have padding limit for backward reading. + if ((UInt64)*startOffset - pos > XZ_STREAM_BACKWARD_READING_PAD_MAX) return SZ_ERROR_NO_ARCHIVE; + if (i) + break; } - + // we try to open xz stream after skipping zero padding. + // ((UInt64)*startOffset == pos) is possible here! if (pos < XZ_STREAM_FOOTER_SIZE) return SZ_ERROR_NO_ARCHIVE; pos -= XZ_STREAM_FOOTER_SIZE; - RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf, XZ_STREAM_FOOTER_SIZE)) - if (!XZ_FOOTER_SIG_CHECK(buf + 10)) + RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE)) + if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32)) return SZ_ERROR_NO_ARCHIVE; } - p->flags = (CXzStreamFlags)GetBe16(buf + 8); - + p->flags = (CXzStreamFlags)GetBe16a(buf32 + 2); if (!XzFlags_IsSupported(p->flags)) return SZ_ERROR_UNSUPPORTED; - { /* to eliminate GCC 6.3 warning: dereferencing type-punned pointer will break strict-aliasing rules */ - const Byte *buf_ptr = buf; - if (GetUi32(buf_ptr) != CrcCalc(buf + 4, 6)) + const UInt32 *buf_ptr = buf32; + if (GetUi32a(buf_ptr) != CrcCalc(buf32 + 1, 6)) return SZ_ERROR_ARCHIVE; } - - indexSize = ((UInt64)GetUi32(buf + 4) + 1) << 2; - - if (pos < indexSize) - return SZ_ERROR_ARCHIVE; - - pos -= indexSize; - RINOK(LookInStream_SeekTo(stream, pos)) - RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)) - { - UInt64 totalSize = Xz_GetPackSize(p); - if (totalSize == XZ_SIZE_OVERFLOW - || totalSize >= ((UInt64)1 << 63) - || pos < totalSize + XZ_STREAM_HEADER_SIZE) + const UInt64 indexSize = ((UInt64)GetUi32a(buf32 + 1) + 1) << 2; + if (pos < indexSize) return SZ_ERROR_ARCHIVE; - pos -= (totalSize + XZ_STREAM_HEADER_SIZE); + pos -= indexSize; + // v25.00: relaxed indexSize check. We allow big index table. + // if (indexSize > ((UInt32)1 << 31)) + if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1))) + return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE + RINOK(LookInStream_SeekTo(stream, pos)) + // RINOK(Xz_ReadIndex(p, stream, indexSize, alloc)) + { + SRes res; + const size_t size = (size_t)indexSize; + // if (size != indexSize) return SZ_ERROR_UNSUPPORTED; + Byte *buf = (Byte *)ISzAlloc_Alloc(alloc, size); + if (!buf) + return SZ_ERROR_MEM; + res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED); + if (res == SZ_OK) + res = Xz_ParseIndex(p, buf, size, alloc); + ISzAlloc_Free(alloc, buf); + RINOK(res) + } + } + { + UInt64 total = Xz_GetPackSize(p); + if (total == XZ_SIZE_OVERFLOW || total >= ((UInt64)1 << 63)) + return SZ_ERROR_ARCHIVE; + total += XZ_STREAM_HEADER_SIZE; + if (pos < total) + return SZ_ERROR_ARCHIVE; + pos -= total; RINOK(LookInStream_SeekTo(stream, pos)) *startOffset = (Int64)pos; } @@ -246,7 +278,6 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startO CSecToRead secToRead; SecToRead_CreateVTable(&secToRead); secToRead.realStream = stream; - RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt)) return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE; } @@ -257,8 +288,7 @@ static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startO void Xzs_Construct(CXzs *p) { - p->num = p->numAllocated = 0; - p->streams = 0; + Xzs_CONSTRUCT(p) } void Xzs_Free(CXzs *p, ISzAllocPtr alloc) @@ -268,7 +298,7 @@ void Xzs_Free(CXzs *p, ISzAllocPtr alloc) Xz_Free(&p->streams[i], alloc); ISzAlloc_Free(alloc, p->streams); p->num = p->numAllocated = 0; - p->streams = 0; + p->streams = NULL; } UInt64 Xzs_GetNumBlocks(const CXzs *p) @@ -307,34 +337,49 @@ UInt64 Xzs_GetPackSize(const CXzs *p) SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc) { Int64 endOffset = 0; + // it's supposed that CXzs object is empty here. + // if CXzs object is not empty, it will add new streams to that non-empty object. + // Xzs_Free(p, alloc); // it's optional call to empty CXzs object. RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END)) *startOffset = endOffset; for (;;) { CXzStream st; SRes res; - Xz_Construct(&st); + Xz_CONSTRUCT(&st) res = Xz_ReadBackward(&st, stream, startOffset, alloc); + // if (res == SZ_OK), then (*startOffset) is start offset of new stream if + // if (res != SZ_OK), then (*startOffset) is unchend or it's expected start offset of stream with error st.startOffset = (UInt64)*startOffset; - RINOK(res) + // we must store (st) object to array, or we must free (st) local object. + if (res != SZ_OK) + { + Xz_Free(&st, alloc); + return res; + } if (p->num == p->numAllocated) { const size_t newNum = p->num + p->num / 4 + 1; void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream)); if (!data) + { + Xz_Free(&st, alloc); return SZ_ERROR_MEM; + } p->numAllocated = newNum; if (p->num != 0) memcpy(data, p->streams, p->num * sizeof(CXzStream)); ISzAlloc_Free(alloc, p->streams); p->streams = (CXzStream *)data; } + // we use direct copying of raw data from local variable (st) to object in array. + // so we don't need to call Xz_Free(&st, alloc) after copying and after p->num++ p->streams[p->num++] = st; if (*startOffset == 0) - break; - RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)) + return SZ_OK; + // seek operation is optional: + // RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset)) if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK) return SZ_ERROR_PROGRESS; } - return SZ_OK; } diff --git a/CPP/7zip/7zip_gcc.mak b/CPP/7zip/7zip_gcc.mak index bcb06a0..12f1ef2 100644 --- a/CPP/7zip/7zip_gcc.mak +++ b/CPP/7zip/7zip_gcc.mak @@ -1245,8 +1245,6 @@ $O/Sha512.o: ../../../../C/Sha512.c $(CC) $(CFLAGS) $< $O/Sha512Opt.o: ../../../../C/Sha512Opt.c $(CC) $(CFLAGS) $< -$O/Sort.o: ../../../../C/Sort.c - $(CC) $(CFLAGS) $< $O/SwapBytes.o: ../../../../C/SwapBytes.c $(CC) $(CFLAGS) $< $O/Xxh64.o: ../../../../C/Xxh64.c @@ -1285,6 +1283,8 @@ $O/Sha1Opt.o: ../../../../Asm/x86/Sha1Opt.asm $(MY_ASM) $(AFLAGS) $< $O/Sha256Opt.o: ../../../../Asm/x86/Sha256Opt.asm $(MY_ASM) $(AFLAGS) $< +$O/Sort.o: ../../../../Asm/x86/Sort.asm + $(MY_ASM) $(AFLAGS) $< ifndef USE_JWASM USE_X86_ASM_AES=1 @@ -1299,6 +1299,8 @@ $O/Sha1Opt.o: ../../../../C/Sha1Opt.c $(CC) $(CFLAGS) $< $O/Sha256Opt.o: ../../../../C/Sha256Opt.c $(CC) $(CFLAGS) $< +$O/Sort.o: ../../../../C/Sort.c + $(CC) $(CFLAGS) $< endif diff --git a/CPP/7zip/Archive/7z/7zCompressionMode.h b/CPP/7zip/Archive/7z/7zCompressionMode.h index 737722d..ecfee7c 100644 --- a/CPP/7zip/Archive/7z/7zCompressionMode.h +++ b/CPP/7zip/Archive/7z/7zCompressionMode.h @@ -59,6 +59,7 @@ struct CCompressionMethodMode bool NumThreads_WasForced; bool MultiThreadMixer; UInt32 NumThreads; + UInt32 NumThreadGroups; #endif UString Password; // _Wipe @@ -74,6 +75,7 @@ struct CCompressionMethodMode , NumThreads_WasForced(false) , MultiThreadMixer(true) , NumThreads(1) + , NumThreadGroups(0) #endif , MemoryUsageLimit((UInt64)1 << 30) {} diff --git a/CPP/7zip/Archive/7z/7zHandlerOut.cpp b/CPP/7zip/Archive/7z/7zHandlerOut.cpp index ea5ea0f..c1c2b63 100644 --- a/CPP/7zip/Archive/7z/7zHandlerOut.cpp +++ b/CPP/7zip/Archive/7z/7zHandlerOut.cpp @@ -111,8 +111,8 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) } } - const UInt64 kSolidBytes_Min = (1 << 24); - const UInt64 kSolidBytes_Max = ((UInt64)1 << 32); + const UInt64 kSolidBytes_Min = 1 << 24; + const UInt64 kSolidBytes_Max = (UInt64)1 << 32; // for non-LZMA2 methods bool needSolid = false; @@ -122,22 +122,24 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) SetGlobalLevelTo(oneMethodInfo); - #ifndef Z7_ST +#ifndef Z7_ST const bool numThreads_WasSpecifiedInMethod = (oneMethodInfo.Get_NumThreads() >= 0); if (!numThreads_WasSpecifiedInMethod) { // here we set the (NCoderPropID::kNumThreads) property in each method, only if there is no such property already CMultiMethodProps::SetMethodThreadsTo_IfNotFinded(oneMethodInfo, methodMode.NumThreads); } - #endif + if (methodMode.NumThreadGroups > 1) + CMultiMethodProps::Set_Method_NumThreadGroups_IfNotFinded(oneMethodInfo, methodMode.NumThreadGroups); +#endif CMethodFull &methodFull = methodMode.Methods.AddNew(); RINOK(PropsMethod_To_FullMethod(methodFull, oneMethodInfo)) - #ifndef Z7_ST +#ifndef Z7_ST methodFull.Set_NumThreads = true; methodFull.NumThreads = methodMode.NumThreads; - #endif +#endif if (methodFull.Id != k_Copy) needSolid = true; @@ -217,19 +219,18 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) // here we get real chunkSize cs = oneMethodInfo.Get_Xz_BlockSize(); if (dicSize > cs) - dicSize = cs; + dicSize = cs; - const UInt64 kSolidBytes_Lzma2_Max = ((UInt64)1 << 34); + const UInt64 kSolidBytes_Lzma2_Max = (UInt64)1 << 34; if (numSolidBytes > kSolidBytes_Lzma2_Max) - numSolidBytes = kSolidBytes_Lzma2_Max; + numSolidBytes = kSolidBytes_Lzma2_Max; methodFull.Set_NumThreads = false; // we don't use ICompressSetCoderMt::SetNumberOfThreads() for LZMA2 encoder #ifndef Z7_ST if (!numThreads_WasSpecifiedInMethod && !methodMode.NumThreads_WasForced - && methodMode.MemoryUsageLimit_WasSet - ) + && methodMode.MemoryUsageLimit_WasSet) { const UInt32 lzmaThreads = oneMethodInfo.Get_Lzma_NumThreads(); const UInt32 numBlockThreads_Original = methodMode.NumThreads / lzmaThreads; @@ -273,14 +274,14 @@ HRESULT CHandler::SetMainMethod(CCompressionMethodMode &methodMode) { numSolidBytes = (UInt64)dicSize << 7; if (numSolidBytes > kSolidBytes_Max) - numSolidBytes = kSolidBytes_Max; + numSolidBytes = kSolidBytes_Max; } if (_numSolidBytesDefined) continue; if (numSolidBytes < kSolidBytes_Min) - numSolidBytes = kSolidBytes_Min; + numSolidBytes = kSolidBytes_Min; _numSolidBytes = numSolidBytes; _numSolidBytesDefined = true; } @@ -704,6 +705,9 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt methodMode.NumThreads = numThreads; methodMode.NumThreads_WasForced = _numThreads_WasForced; methodMode.MultiThreadMixer = _useMultiThreadMixer; +#ifdef _WIN32 + methodMode.NumThreadGroups = _numThreadGroups; // _change it +#endif // headerMethod.NumThreads = 1; headerMethod.MultiThreadMixer = _useMultiThreadMixer; } diff --git a/CPP/7zip/Archive/ArHandler.cpp b/CPP/7zip/Archive/ArHandler.cpp index 95e4719..944eec4 100644 --- a/CPP/7zip/Archive/ArHandler.cpp +++ b/CPP/7zip/Archive/ArHandler.cpp @@ -325,7 +325,7 @@ HRESULT CHandler::ParseLongNames(IInStream *stream) { unsigned i; for (i = 0; i < _items.Size(); i++) - if (_items[i].Name == "//") + if (_items[i].Name.IsEqualTo("//")) break; if (i == _items.Size()) return S_OK; @@ -378,7 +378,7 @@ void CHandler::ChangeDuplicateNames() if (item.Name[0] == '/') continue; CItem &prev = _items[i - 1]; - if (item.Name == prev.Name) + if (item.Name.IsEqualTo(prev.Name)) { if (prev.SameNameIndex < 0) prev.SameNameIndex = 0; @@ -448,9 +448,9 @@ static UInt32 Get32(const Byte *p, unsigned be) { if (be) return GetBe32(p); ret HRESULT CHandler::ParseLibSymbols(IInStream *stream, unsigned fileIndex) { CItem &item = _items[fileIndex]; - if (item.Name != "/" && - item.Name != "__.SYMDEF" && - item.Name != "__.SYMDEF SORTED") + if (!item.Name.IsEqualTo("/") && + !item.Name.IsEqualTo("__.SYMDEF") && + !item.Name.IsEqualTo("__.SYMDEF SORTED")) return S_OK; if (item.Size > ((UInt32)1 << 30) || item.Size < 4) @@ -462,7 +462,7 @@ HRESULT CHandler::ParseLibSymbols(IInStream *stream, unsigned fileIndex) size_t pos = 0; - if (item.Name != "/") + if (!item.Name.IsEqualTo("/")) { // "__.SYMDEF" parsing (BSD) unsigned be; @@ -603,7 +603,7 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, if (_longNames_FileIndex >= 0) _items.Delete((unsigned)_longNames_FileIndex); - if (!_items.IsEmpty() && _items[0].Name == "debian-binary") + if (!_items.IsEmpty() && _items[0].Name.IsEqualTo("debian-binary")) { _type = kType_Deb; _items.DeleteFrontal(1); diff --git a/CPP/7zip/Archive/Bz2Handler.cpp b/CPP/7zip/Archive/Bz2Handler.cpp index ffd7ad0..02eeee6 100644 --- a/CPP/7zip/Archive/Bz2Handler.cpp +++ b/CPP/7zip/Archive/Bz2Handler.cpp @@ -427,9 +427,13 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt } CMethodProps props2 = _props; - #ifndef Z7_ST +#ifndef Z7_ST props2.AddProp_NumThreads(_props._numThreads); - #endif +#ifdef _WIN32 + if (_props._numThreadGroups > 1) + props2.AddProp32(NCoderPropID::kNumThreadGroups, _props._numThreadGroups); +#endif +#endif return UpdateArchive(size, outStream, props2, updateCallback); } diff --git a/CPP/7zip/Archive/ComHandler.cpp b/CPP/7zip/Archive/ComHandler.cpp index 82d939d..144369e 100644 --- a/CPP/7zip/Archive/ComHandler.cpp +++ b/CPP/7zip/Archive/ComHandler.cpp @@ -68,7 +68,7 @@ namespace NItemType static const Byte kRootStorage = 5; } -static const UInt32 kNameSizeMax = 64; +static const unsigned kNameSizeMax = 64; struct CItem { @@ -98,30 +98,30 @@ struct CRef class CDatabase { - UInt32 NumSectorsInMiniStream; CObjArray MiniSids; HRESULT AddNode(int parent, UInt32 did); + public: - CObjArray Fat; - UInt32 FatSize; - CObjArray Mat; - UInt32 MatSize; - CObjectVector Items; CRecordVector Refs; +private: + UInt32 NumSectorsInMiniStream; +public: + UInt32 MatSize; + UInt32 FatSize; UInt32 LongStreamMinSize; unsigned SectorSizeBits; unsigned MiniSectorSizeBits; Int32 MainSubfile; + EType Type; UInt64 PhySize; UInt64 PhySize_Aligned; - EType Type; bool IsNotArcType() const { @@ -148,14 +148,14 @@ public: UInt64 GetItemPackSize(UInt64 size) const { - UInt64 mask = ((UInt64)1 << (IsLargeStream(size) ? SectorSizeBits : MiniSectorSizeBits)) - 1; + const UInt64 mask = ((UInt32)1 << (IsLargeStream(size) ? SectorSizeBits : MiniSectorSizeBits)) - 1; return (size + mask) & ~mask; } bool GetMiniCluster(UInt32 sid, UInt64 &res) const { - unsigned subBits = SectorSizeBits - MiniSectorSizeBits; - UInt32 fid = sid >> subBits; + const unsigned subBits = SectorSizeBits - MiniSectorSizeBits; + const UInt32 fid = sid >> subBits; if (fid >= NumSectorsInMiniStream) return false; res = (((UInt64)MiniSids[fid] + 1) << subBits) + (sid & ((1 << subBits) - 1)); @@ -177,7 +177,7 @@ HRESULT CDatabase::ReadSector(IInStream *inStream, Byte *buf, unsigned sectorSiz HRESULT CDatabase::ReadIDs(IInStream *inStream, Byte *buf, unsigned sectorSizeBits, UInt32 sid, UInt32 *dest) { RINOK(ReadSector(inStream, buf, sectorSizeBits, sid)) - UInt32 sectorSize = (UInt32)1 << sectorSizeBits; + const UInt32 sectorSize = (UInt32)1 << sectorSizeBits; for (UInt32 t = 0; t < sectorSize; t += 4) *dest++ = Get32(buf + t); return S_OK; @@ -373,7 +373,7 @@ UString CDatabase::GetItemPath(UInt32 index) const HRESULT CDatabase::Update_PhySize_WithItem(unsigned index) { const CItem &item = Items[index]; - bool isLargeStream = (index == 0 || IsLargeStream(item.Size)); + const bool isLargeStream = (index == 0 || IsLargeStream(item.Size)); if (!isLargeStream) return S_OK; const unsigned bsLog = isLargeStream ? SectorSizeBits : MiniSectorSizeBits; @@ -527,6 +527,10 @@ HRESULT CDatabase::Open(IInStream *inStream) { CItem item; item.Parse(sect + i, mode64bit); + // we use (item.Size) check here. + // so we don't need additional overflow checks for (item.Size +) in another code + if (item.Size >= ((UInt64)1 << 63)) + return S_FALSE; Items.Add(item); } sid = Fat[sid]; @@ -767,11 +771,8 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, UInt64 totalPackSize; totalSize = totalPackSize = 0; - NCompress::CCopyCoder *copyCoderSpec = new NCompress::CCopyCoder(); - CMyComPtr copyCoder = copyCoderSpec; - - CLocalProgress *lps = new CLocalProgress; - CMyComPtr progress = lps; + CMyComPtr2_Create copyCoder; + CMyComPtr2_Create lps; lps->Init(extractCallback, false); for (i = 0; i < numItems; i++) @@ -781,7 +782,8 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, RINOK(lps->SetCur()) const UInt32 index = allFilesMode ? i : indices[i]; const CItem &item = _db.Items[_db.Refs[index].Did]; - + Int32 res; + { CMyComPtr outStream; const Int32 askMode = testMode ? NExtract::NAskMode::kTest : @@ -801,7 +803,7 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, if (!testMode && !outStream) continue; RINOK(extractCallback->PrepareOperation(askMode)) - Int32 res = NExtract::NOperationResult::kDataError; + res = NExtract::NOperationResult::kDataError; CMyComPtr inStream; HRESULT hres = GetStream(index, &inStream); if (hres == S_FALSE) @@ -813,12 +815,12 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, RINOK(hres) if (inStream) { - RINOK(copyCoder->Code(inStream, outStream, NULL, NULL, progress)) - if (copyCoderSpec->TotalSize == item.Size) + RINOK(copyCoder.Interface()->Code(inStream, outStream, NULL, NULL, lps)) + if (copyCoder->TotalSize == item.Size) res = NExtract::NOperationResult::kOK; } } - outStream.Release(); + } RINOK(extractCallback->SetOperationResult(res)) } return S_OK; diff --git a/CPP/7zip/Archive/Common/HandlerOut.cpp b/CPP/7zip/Archive/Common/HandlerOut.cpp index 17fed67..5a11e30 100644 --- a/CPP/7zip/Archive/Common/HandlerOut.cpp +++ b/CPP/7zip/Archive/Common/HandlerOut.cpp @@ -4,8 +4,6 @@ #include "../../../Common/StringToInt.h" -#include "../Common/ParseProperties.h" - #include "HandlerOut.h" namespace NArchive { @@ -82,6 +80,7 @@ bool ParseSizeString(const wchar_t *s, const PROPVARIANT &prop, UInt64 percentsB return true; } + bool CCommonMethodProps::SetCommonProperty(const UString &name, const PROPVARIANT &value, HRESULT &hres) { hres = S_OK; @@ -151,6 +150,11 @@ void CMultiMethodProps::SetMethodThreadsTo_Replace(CMethodProps &oneMethodInfo, SetMethodProp32_Replace(oneMethodInfo, NCoderPropID::kNumThreads, numThreads); } +void CMultiMethodProps::Set_Method_NumThreadGroups_IfNotFinded(CMethodProps &oneMethodInfo, UInt32 numThreadGroups) +{ + SetMethodProp32(oneMethodInfo, NCoderPropID::kNumThreadGroups, numThreadGroups); +} + #endif // Z7_ST diff --git a/CPP/7zip/Archive/Common/HandlerOut.h b/CPP/7zip/Archive/Common/HandlerOut.h index 9340e1b..3122b05 100644 --- a/CPP/7zip/Archive/Common/HandlerOut.h +++ b/CPP/7zip/Archive/Common/HandlerOut.h @@ -17,11 +17,21 @@ protected: void InitCommon() { // _Write_MTime = true; - #ifndef Z7_ST - _numProcessors = _numThreads = NWindows::NSystem::GetNumberOfProcessors(); - _numThreads_WasForced = false; - #endif - + { +#ifndef Z7_ST + _numThreads_WasForced = false; + UInt32 numThreads; +#ifdef _WIN32 + NWindows::NSystem::CProcessAffinity aff; + numThreads = aff.Load_and_GetNumberOfThreads(); + _numThreadGroups = aff.IsGroupMode ? aff.Groups.GroupSizes.Size() : 0; +#else + numThreads = NWindows::NSystem::GetNumberOfProcessors(); +#endif // _WIN32 + _numProcessors = _numThreads = numThreads; +#endif // Z7_ST + } + size_t memAvail = (size_t)sizeof(size_t) << 28; _memAvail = memAvail; _memUsage_Compress = memAvail; @@ -46,11 +56,14 @@ protected: } public: - #ifndef Z7_ST +#ifndef Z7_ST UInt32 _numThreads; UInt32 _numProcessors; +#ifdef _WIN32 + UInt32 _numThreadGroups; +#endif bool _numThreads_WasForced; - #endif +#endif bool _memUsage_WasSet; UInt64 _memUsage_Compress; @@ -80,10 +93,12 @@ public: void SetGlobalLevelTo(COneMethodInfo &oneMethodInfo) const; - #ifndef Z7_ST +#ifndef Z7_ST static void SetMethodThreadsTo_IfNotFinded(CMethodProps &props, UInt32 numThreads); static void SetMethodThreadsTo_Replace(CMethodProps &props, UInt32 numThreads); - #endif + + static void Set_Method_NumThreadGroups_IfNotFinded(CMethodProps &props, UInt32 numThreadGroups); +#endif unsigned GetNumEmptyMethods() const diff --git a/CPP/7zip/Archive/Common/ItemNameUtils.cpp b/CPP/7zip/Archive/Common/ItemNameUtils.cpp index 8caf1d1..150efc9 100644 --- a/CPP/7zip/Archive/Common/ItemNameUtils.cpp +++ b/CPP/7zip/Archive/Common/ItemNameUtils.cpp @@ -47,6 +47,25 @@ UString GetOsPath_Remove_TailSlash(const UString &name) } +#if WCHAR_PATH_SEPARATOR != L'/' +void ReplaceToWinSlashes(UString &name, bool useBackslashReplacement) +{ + // name.Replace(kUnixPathSepar, kOsPathSepar); + const unsigned len = name.Len(); + for (unsigned i = 0; i < len; i++) + { + wchar_t c = name[i]; + if (c == L'/') + c = WCHAR_PATH_SEPARATOR; + else if (useBackslashReplacement && c == L'\\') + c = WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT; // WSL scheme + else + continue; + name.ReplaceOneCharAtPos(i, c); + } +} +#endif + void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool #if WCHAR_PATH_SEPARATOR != L'/' useBackslashReplacement @@ -57,21 +76,7 @@ void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool return; #if WCHAR_PATH_SEPARATOR != L'/' - { - // name.Replace(kUnixPathSepar, kOsPathSepar); - const unsigned len = name.Len(); - for (unsigned i = 0; i < len; i++) - { - wchar_t c = name[i]; - if (c == L'/') - c = WCHAR_PATH_SEPARATOR; - else if (useBackslashReplacement && c == L'\\') - c = WCHAR_IN_FILE_NAME_BACKSLASH_REPLACEMENT; // WSL scheme - else - continue; - name.ReplaceOneCharAtPos(i, c); - } - } + ReplaceToWinSlashes(name, useBackslashReplacement); #endif if (name.Back() == kOsPathSepar) diff --git a/CPP/7zip/Archive/Common/ItemNameUtils.h b/CPP/7zip/Archive/Common/ItemNameUtils.h index 8ab9b61..cea8dcc 100644 --- a/CPP/7zip/Archive/Common/ItemNameUtils.h +++ b/CPP/7zip/Archive/Common/ItemNameUtils.h @@ -13,6 +13,9 @@ void ReplaceSlashes_OsToUnix(UString &name); UString GetOsPath(const UString &name); UString GetOsPath_Remove_TailSlash(const UString &name); +#if WCHAR_PATH_SEPARATOR != L'/' +void ReplaceToWinSlashes(UString &name, bool useBackslashReplacement); +#endif void ReplaceToOsSlashes_Remove_TailSlash(UString &name, bool useBackslashReplacement = false); void NormalizeSlashes_in_FileName_for_OsPath(wchar_t *s, unsigned len); void NormalizeSlashes_in_FileName_for_OsPath(UString &name); diff --git a/CPP/7zip/Archive/CpioHandler.cpp b/CPP/7zip/Archive/CpioHandler.cpp index 2228040..62184f0 100644 --- a/CPP/7zip/Archive/CpioHandler.cpp +++ b/CPP/7zip/Archive/CpioHandler.cpp @@ -437,7 +437,14 @@ HRESULT CInArchive::GetNextItem() return S_OK; /* v23.02: we have disabled rDevMinor check because real file - from Apple contains rDevMinor==255 by some unknown reason */ + from Apple contains rDevMinor==255 by some unknown reason + cpio 2.13 and older versions: it copies stat::st_rdev to archive. + and stat::st_rdev can be non-zero for some old linux/filesystems cases for regular files. + cpio 2.14 (2023) copies st_rdev to archive only if (S_ISBLK (st->st_mode) || S_ISCHR (st->st_mode)) + v25.00: we have disabled RDevMajor check here to support some rare case created by cpio 2.13- with old linux. + But we still keep full check in IsArc_Cpio() to reduce false cpio detection cases. + */ +#if 0 // 0 : to disable check to support some old linux cpio archives. if (item.RDevMajor != 0 // || item.RDevMinor != 0 ) @@ -446,6 +453,7 @@ HRESULT CInArchive::GetNextItem() !MY_LIN_S_ISBLK(item.Mode)) return S_OK; } +#endif // Size must be 0 for FIFOs and directories if (item.IsDir() || MY_LIN_S_ISFIFO(item.Mode)) @@ -873,17 +881,13 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val { case kpidPath: { - UString res; - bool needConvert = true; - #ifdef _WIN32 - // if ( - ConvertUTF8ToUnicode(item.Name, res); - // ) - needConvert = false; - #endif - if (needConvert) - res = MultiByteToUnicodeString(item.Name, CP_OEMCP); - prop = NItemName::GetOsPath(res); +#ifdef _WIN32 + UString u; + ConvertUTF8ToUnicode(item.Name, u); +#else + const UString u = MultiByteToUnicodeString(item.Name, CP_OEMCP); +#endif + prop = NItemName::GetOsPath(u); break; } case kpidIsDir: prop = item.IsDir(); break; @@ -921,16 +925,12 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val s.SetFrom_CalcLen((const char *)(const void *)(const Byte *)item.Data, (unsigned)item.Data.Size()); if (s.Len() == item.Data.Size()) { +#ifdef _WIN32 UString u; - bool needConvert = true; - #ifdef _WIN32 - // if ( - ConvertUTF8ToUnicode(item.Name, u); - // ) - needConvert = false; - #endif - if (needConvert) - u = MultiByteToUnicodeString(s, CP_OEMCP); + ConvertUTF8ToUnicode(item.Name, u); +#else + const UString u = MultiByteToUnicodeString(s, CP_OEMCP); +#endif prop = u; } } diff --git a/CPP/7zip/Archive/DmgHandler.cpp b/CPP/7zip/Archive/DmgHandler.cpp index 3901192..9079dc7 100644 --- a/CPP/7zip/Archive/DmgHandler.cpp +++ b/CPP/7zip/Archive/DmgHandler.cpp @@ -444,7 +444,7 @@ const char *Find_Apple_FS_Ext(const AString &name) { const CAppleName &a = k_Names[i]; if (a.Ext) - if (name == a.AppleName) + if (name.IsEqualTo(a.AppleName)) return a.Ext; } return NULL; @@ -784,7 +784,7 @@ static const CXmlItem *FindKeyPair(const CXmlItem &item, const char *key, const for (unsigned i = 0; i + 1 < item.SubItems.Size(); i++) { const CXmlItem &si = item.SubItems[i]; - if (si.IsTagged("key") && si.GetSubString() == key) + if (si.IsTagged("key") && si.GetSubString().IsEqualTo(key)) { const CXmlItem *si_1 = &item.SubItems[i + 1]; if (si_1->IsTagged(nextTag)) @@ -1251,7 +1251,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *openArchiveCall #endif } - if (xml.Root.Name != "plist") + if (!xml.Root.Name.IsEqualTo("plist")) return S_FALSE; const CXmlItem *dictItem = xml.Root.FindSubTag_GetPtr("dict"); diff --git a/CPP/7zip/Archive/FatHandler.cpp b/CPP/7zip/Archive/FatHandler.cpp index b31ee4f..18fce91 100644 --- a/CPP/7zip/Archive/FatHandler.cpp +++ b/CPP/7zip/Archive/FatHandler.cpp @@ -2,13 +2,12 @@ #include "StdAfx.h" -// #include - #include "../../../C/CpuArch.h" #include "../../Common/ComTry.h" #include "../../Common/IntToString.h" #include "../../Common/MyBuffer.h" +#include "../../Common/MyBuffer2.h" #include "../../Common/MyCom.h" #include "../../Common/StringConvert.h" @@ -22,14 +21,19 @@ #include "../Compress/CopyCoder.h" -#include "Common/DummyOutStream.h" +#include "Common/ItemNameUtils.h" #define Get16(p) GetUi16(p) #define Get32(p) GetUi32(p) #define Get16a(p) GetUi16a(p) #define Get32a(p) GetUi32a(p) -#define PRF(x) /* x */ +#if 0 +#include +#define PRF(x) x +#else +#define PRF(x) +#endif namespace NArchive { namespace NFat { @@ -38,35 +42,34 @@ static const UInt32 kFatItemUsedByDirMask = (UInt32)1 << 31; struct CHeader { - UInt32 NumSectors; - UInt16 NumReservedSectors; + Byte NumFatBits; + Byte SectorSizeLog; + Byte SectorsPerClusterLog; + Byte ClusterSizeLog; Byte NumFats; + Byte MediaType; + + bool VolFieldsDefined; + bool HeadersWarning; + + UInt32 FatSize; + UInt32 BadCluster; + + UInt16 NumReservedSectors; + UInt32 NumSectors; UInt32 NumFatSectors; UInt32 RootDirSector; UInt32 NumRootDirSectors; UInt32 DataSector; - UInt32 FatSize; - UInt32 BadCluster; - - Byte NumFatBits; - Byte SectorSizeLog; - Byte SectorsPerClusterLog; - Byte ClusterSizeLog; - UInt16 SectorsPerTrack; UInt16 NumHeads; UInt32 NumHiddenSectors; - - bool VolFieldsDefined; - bool HeadersWarning; UInt32 VolId; // Byte VolName[11]; // Byte FileSys[8]; - // Byte OemName[5]; - Byte MediaType; // 32-bit FAT UInt16 Flags; @@ -104,15 +107,8 @@ struct CHeader bool Parse(const Byte *p); }; -static int GetLog(UInt32 num) -{ - for (int i = 0; i < 31; i++) - if (((UInt32)1 << i) == num) - return i; - return -1; -} -static const UInt32 kHeaderSize = 512; +static const unsigned kHeaderSize = 512; API_FUNC_IsArc IsArc_Fat(const Byte *p, size_t size); API_FUNC_IsArc IsArc_Fat(const Byte *p, size_t size) @@ -125,7 +121,7 @@ API_FUNC_IsArc IsArc_Fat(const Byte *p, size_t size) bool CHeader::Parse(const Byte *p) { - if (p[0x1FE] != 0x55 || p[0x1FF] != 0xAA) + if (Get16(p + 0x1FE) != 0xAA55) return false; HeadersWarning = false; @@ -139,22 +135,40 @@ bool CHeader::Parse(const Byte *p) } { { - const UInt32 val32 = Get16(p + 11); - const int s = GetLog(val32); - if (s < 9 || s > 12) - return false; - SectorSizeLog = (Byte)s; + const unsigned num = Get16(p + 11); + unsigned i = 9; + unsigned m = 1 << i; + for (;;) + { + if (m == num) + break; + m <<= 1; + if (++i > 12) + return false; + } + SectorSizeLog = (Byte)i; } { - const UInt32 val32 = p[13]; - const int s = GetLog(val32); - if (s < 0) + const unsigned num = p[13]; + unsigned i = 0; + unsigned m = 1 << i; + for (;;) + { + if (m == num) + break; + m <<= 1; + if (++i > 7) + return false; + } + SectorsPerClusterLog = (Byte)i; + i += SectorSizeLog; + ClusterSizeLog = (Byte)i; + // (2^15 = 32 KB is safe cluster size that is suported by all system. + // (2^16 = 64 KB is supported by some systems + // (128 KB / 256 KB) can be created by some tools, but it is not supported by many tools. + if (i > 18) // 256 KB return false; - SectorsPerClusterLog = (Byte)s; } - ClusterSizeLog = (Byte)(SectorSizeLog + SectorsPerClusterLog); - if (ClusterSizeLog > 24) - return false; } NumReservedSectors = Get16(p + 14); @@ -169,7 +183,7 @@ bool CHeader::Parse(const Byte *p) const bool isOkOffset = (codeOffset == 0) || (codeOffset == (p[0] == 0xEB ? 2 : 3)); - const UInt16 numRootDirEntries = Get16(p + 17); + const unsigned numRootDirEntries = Get16(p + 17); if (numRootDirEntries == 0) { if (codeOffset < 90 && !isOkOffset) @@ -183,10 +197,10 @@ bool CHeader::Parse(const Byte *p) if (codeOffset < 62 - 24 && !isOkOffset) return false; NumFatBits = 0; - const UInt32 mask = (1 << (SectorSizeLog - 5)) - 1; - if ((numRootDirEntries & mask) != 0) + const unsigned mask = (1u << (SectorSizeLog - 5)) - 1; + if (numRootDirEntries & mask) return false; - NumRootDirSectors = (numRootDirEntries + mask) >> (SectorSizeLog - 5); + NumRootDirSectors = (numRootDirEntries /* + mask */) >> (SectorSizeLog - 5); } NumSectors = Get16(p + 19); @@ -198,7 +212,6 @@ bool CHeader::Parse(const Byte *p) else if (IsFat32()) return false; */ - MediaType = p[21]; NumFatSectors = Get16(p + 22); SectorsPerTrack = Get16(p + 24); @@ -222,7 +235,7 @@ bool CHeader::Parse(const Byte *p) return false; RootCluster = Get32(p + 8); FsInfoSector = Get16(p + 12); - for (int i = 16; i < 28; i++) + for (unsigned i = 16; i < 28; i++) if (p[i] != 0) return false; p += 28; @@ -260,7 +273,7 @@ bool CHeader::Parse(const Byte *p) if (numClusters >= 0xFFF5) return false; NumFatBits = (Byte)(numClusters < 0xFF5 ? 12 : 16); - BadCluster &= ((1 << NumFatBits) - 1); + BadCluster &= (((UInt32)1 << NumFatBits) - 1); } FatSize = numClusters + 2; @@ -283,103 +296,157 @@ bool CHeader::Parse(const Byte *p) return true; } -struct CItem + + +class CItem { - UString UName; - char DosName[11]; - Byte CTime2; - UInt32 CTime; - UInt32 MTime; - UInt16 ADate; - Byte Attrib; - Byte Flags; + Z7_CLASS_NO_COPY(CItem) +public: UInt32 Size; + Byte Attrib; + Byte CTime2; + UInt16 ADate; + CByteBuffer LongName; // if LongName.Size() == 0 : no long name + // if LongName.Size() != 0 : it's NULL terminated UTF16-LE string. + char DosName[11]; + Byte Flags; + UInt32 MTime; + UInt32 CTime; UInt32 Cluster; Int32 Parent; + CItem() {} + // NT uses Flags to store Low Case status bool NameIsLow() const { return (Flags & 0x8) != 0; } bool ExtIsLow() const { return (Flags & 0x10) != 0; } bool IsDir() const { return (Attrib & 0x10) != 0; } - UString GetShortName() const; - UString GetName() const; - UString GetVolName() const; + void GetShortName(UString &dest) const; + void GetName(UString &name) const; }; -static unsigned CopyAndTrim(char *dest, const char *src, unsigned size, bool toLower) + +static char *CopyAndTrim(char *dest, const char *src, + unsigned size, unsigned toLower) { - memcpy(dest, src, size); - if (toLower) + do { - for (unsigned i = 0; i < size; i++) + if (src[(size_t)size - 1] != ' ') { - char c = dest[i]; - if (c >= 'A' && c <= 'Z') - dest[i] = (char)(c + 0x20); + const unsigned range = toLower ? 'Z' - 'A' + 1 : 0; + do + { + unsigned c = (Byte)*src++; + if ((unsigned)(c - 'A') < range) + c += 0x20; + *dest++ = (char)c; + } + while (--size); + break; } } - - for (unsigned i = size;;) - { - if (i == 0) - return 0; - if (dest[i - 1] != ' ') - return i; - i--; - } + while (--size); + *dest = 0; + return dest; } -static UString FatStringToUnicode(const char *s) + +static void FatStringToUnicode(UString &dest, const char *s) { - return MultiByteToUnicodeString(s, CP_OEMCP); + MultiByteToUnicodeString2(dest, AString(s), CP_OEMCP); } -UString CItem::GetShortName() const +void CItem::GetShortName(UString &shortName) const { char s[16]; - unsigned i = CopyAndTrim(s, DosName, 8, NameIsLow()); - s[i++] = '.'; - unsigned j = CopyAndTrim(s + i, DosName + 8, 3, ExtIsLow()); - if (j == 0) - i--; - s[i + j] = 0; - return FatStringToUnicode(s); + char *dest = CopyAndTrim(s, DosName, 8, NameIsLow()); + *dest++ = '.'; + char *dest2 = CopyAndTrim(dest, DosName + 8, 3, ExtIsLow()); + if (dest == dest2) + dest[-1] = 0; + FatStringToUnicode(shortName, s); } -UString CItem::GetName() const + + +// numWords != 0 +static unsigned ParseLongName(UInt16 *buf, unsigned numWords) { - if (!UName.IsEmpty()) - return UName; - return GetShortName(); + unsigned i; + for (i = 0; i < numWords; i++) + { + const unsigned c = buf[i]; + if (c == 0) + break; + if (c == 0xFFFF) + return 0; + } + if (i == 0) + return 0; + buf[i] = 0; + numWords -= i; + i++; + if (numWords > 1) + { + numWords--; + buf += i; + do + if (*buf++ != 0xFFFF) + return 0; + while (--numWords); + } + return i; // it includes NULL terminator } -UString CItem::GetVolName() const + +void CItem::GetName(UString &name) const +{ + if (LongName.Size() >= 2) + { + const Byte * const p = LongName; + const unsigned numWords = ((unsigned)LongName.Size() - 2) / 2; + wchar_t *dest = name.GetBuf(numWords); + for (unsigned i = 0; i < numWords; i++) + dest[i] = (wchar_t)Get16(p + (size_t)i * 2); + name.ReleaseBuf_SetEnd(numWords); + } + else + GetShortName(name); + if (name.IsEmpty()) // it's unexpected + name = '_'; + NItemName::NormalizeSlashes_in_FileName_for_OsPath(name); +} + + +static void GetVolName(const char dosName[11], NWindows::NCOM::CPropVariant &prop) { - if (!UName.IsEmpty()) - return UName; char s[12]; - unsigned i = CopyAndTrim(s, DosName, 11, false); - s[i] = 0; - return FatStringToUnicode(s); + CopyAndTrim(s, dosName, 11, false); + UString u; + FatStringToUnicode(u, AString(s)); + prop = u; } + struct CDatabase { - CHeader Header; CObjectVector Items; UInt32 *Fat; + CHeader Header; CMyComPtr InStream; IArchiveOpenCallback *OpenCallback; + CAlignedBuffer ByteBuf; + CByteBuffer LfnBuf; UInt32 NumFreeClusters; - bool VolItemDefined; - CItem VolItem; UInt32 NumDirClusters; - CByteBuffer ByteBuf; UInt64 NumCurUsedBytes; - UInt64 PhySize; + UInt32 Vol_MTime; + char VolLabel[11]; + bool VolItem_Defined; + CDatabase(): Fat(NULL) {} ~CDatabase() { ClearAndClose(); } @@ -388,7 +455,7 @@ struct CDatabase HRESULT OpenProgressFat(bool changeTotal = true); HRESULT OpenProgress(); - UString GetItemPath(UInt32 index) const; + void GetItemPath(UInt32 index, UString &s) const; HRESULT Open(); HRESULT ReadDir(Int32 parent, UInt32 cluster, unsigned level); @@ -400,6 +467,7 @@ struct CDatabase HRESULT SeekToCluster(UInt32 cluster) { return SeekToSector(Header.ClusterToSector(cluster)); } }; + HRESULT CDatabase::SeekToSector(UInt32 sector) { return InStream_SeekSet(InStream, (UInt64)sector << Header.SectorSizeLog); @@ -408,7 +476,7 @@ HRESULT CDatabase::SeekToSector(UInt32 sector) void CDatabase::Clear() { PhySize = 0; - VolItemDefined = false; + VolItem_Defined = false; NumDirClusters = 0; NumCurUsedBytes = 0; @@ -440,49 +508,35 @@ HRESULT CDatabase::OpenProgress() { if (!OpenCallback) return S_OK; - UInt64 numItems = Items.Size(); + const UInt64 numItems = Items.Size(); return OpenCallback->SetCompleted(&numItems, &NumCurUsedBytes); } -UString CDatabase::GetItemPath(UInt32 index) const +void CDatabase::GetItemPath(UInt32 index, UString &s) const { - const CItem *item = &Items[index]; - UString name = item->GetName(); + UString name; for (;;) { - index = (UInt32)item->Parent; - if (item->Parent < 0) - return name; - item = &Items[index]; - name.InsertAtFront(WCHAR_PATH_SEPARATOR); - if (item->UName.IsEmpty()) - name.Insert(0, item->GetShortName()); - else - name.Insert(0, item->UName); + const CItem &item = Items[index]; + item.GetName(name); + if (item.Parent >= 0) + name.InsertAtFront(WCHAR_PATH_SEPARATOR); + s.Insert(0, name); + index = (UInt32)item.Parent; + if (item.Parent < 0) + break; } } -static wchar_t *AddSubStringToName(wchar_t *dest, const Byte *p, unsigned numChars) -{ - for (unsigned i = 0; i < numChars; i++) - { - wchar_t c = Get16(p + i * 2); - if (c != 0 && c != 0xFFFF) - *dest++ = c; - } - *dest = 0; - return dest; -} HRESULT CDatabase::ReadDir(Int32 parent, UInt32 cluster, unsigned level) { - unsigned startIndex = Items.Size(); + const unsigned startIndex = Items.Size(); if (startIndex >= (1 << 30) || level > 256) return S_FALSE; - UInt32 sectorIndex = 0; UInt32 blockSize = Header.ClusterSize(); - bool clusterMode = (Header.IsFat32() || parent >= 0); + const bool clusterMode = (Header.IsFat32() || parent >= 0); if (!clusterMode) { blockSize = Header.SectorSize(); @@ -490,21 +544,26 @@ HRESULT CDatabase::ReadDir(Int32 parent, UInt32 cluster, unsigned level) } ByteBuf.Alloc(blockSize); - UString curName; - int checkSum = -1; - int numLongRecords = -1; + + const unsigned k_NumLfnRecords_MAX = 20; // 260 symbols limit (strict limit) + // const unsigned k_NumLfnRecords_MAX = 0x40 - 1; // 1260 symbols limit (relaxed limit) + const unsigned k_NumLfnBytes_in_Record = 13 * 2; + // we reserve 2 additional bytes for NULL terminator + LfnBuf.Alloc(k_NumLfnRecords_MAX * k_NumLfnBytes_in_Record + 2 * 1); + UInt32 curDirBytes_read = 0; + UInt32 sectorIndex = 0; + unsigned num_lfn_records = 0; + unsigned lfn_RecordIndex = 0; + int checkSum = -1; + bool is_record_error = false; + for (UInt32 pos = blockSize;; pos += 32) { if (pos == blockSize) { pos = 0; - if ((NumDirClusters & 0xFF) == 0) - { - RINOK(OpenProgress()) - } - if (clusterMode) { if (Header.IsEoc(cluster)) @@ -514,21 +573,37 @@ HRESULT CDatabase::ReadDir(Int32 parent, UInt32 cluster, unsigned level) PRF(printf("\nCluster = %4X", cluster)); RINOK(SeekToCluster(cluster)) const UInt32 newCluster = Fat[cluster]; - if ((newCluster & kFatItemUsedByDirMask) != 0) + if (newCluster & kFatItemUsedByDirMask) return S_FALSE; Fat[cluster] |= kFatItemUsedByDirMask; cluster = newCluster; NumDirClusters++; + if ((NumDirClusters & 0xFF) == 0) + { + RINOK(OpenProgress()) + } NumCurUsedBytes += Header.ClusterSize(); } else if (sectorIndex++ >= Header.NumRootDirSectors) break; + // if (curDirBytes_read > (1u << 28)) // do we need some relaxed limit for non-MS FATs? + if (curDirBytes_read >= (1u << 21)) // 2MB limit from FAT specification. + return S_FALSE; RINOK(ReadStream_FALSE(InStream, ByteBuf, blockSize)) + curDirBytes_read += blockSize; } - const Byte *p = ByteBuf + pos; - + if (is_record_error) + { + Header.HeadersWarning = true; + num_lfn_records = 0; + lfn_RecordIndex = 0; + checkSum = -1; + } + + const Byte * const p = ByteBuf + pos; + if (p[0] == 0) { /* @@ -538,125 +613,191 @@ HRESULT CDatabase::ReadDir(Int32 parent, UInt32 cluster, unsigned level) */ break; } - + + is_record_error = true; + if (p[0] == 0xE5) { - if (numLongRecords > 0) - return S_FALSE; + // deleted entry + if (num_lfn_records == 0) + is_record_error = false; continue; } - - Byte attrib = p[11]; - if ((attrib & 0x3F) == 0xF) + // else { - if (p[0] > 0x7F || Get16(p + 26) != 0) - return S_FALSE; - int longIndex = p[0] & 0x3F; - if (longIndex == 0) - return S_FALSE; - bool isLast = (p[0] & 0x40) != 0; - if (numLongRecords < 0) + const Byte attrib = p[11]; + // maybe we can use more strick check : (attrib == 0xF) ? + if ((attrib & 0x3F) == 0xF) { - if (!isLast) + // long file name (LFN) entry + const unsigned longIndex = p[0] & 0x3F; + if (longIndex == 0 + || longIndex > k_NumLfnRecords_MAX + || p[0] > 0x7F + || Get16a(p + 26) != 0 // LDIR_FstClusLO + ) + { return S_FALSE; - numLongRecords = longIndex; - } - else if (isLast || numLongRecords != longIndex) - return S_FALSE; - - numLongRecords--; - - if (p[12] == 0) - { - wchar_t nameBuf[14]; - wchar_t *dest; + // break; + } + const bool isLast = (p[0] & 0x40) != 0; + if (num_lfn_records == 0) + { + if (!isLast) + continue; // orphan + num_lfn_records = longIndex; + } + else if (isLast || longIndex != lfn_RecordIndex) + { + return S_FALSE; + // break; + } - dest = AddSubStringToName(nameBuf, p + 1, 5); - dest = AddSubStringToName(dest, p + 14, 6); - AddSubStringToName(dest, p + 28, 2); - curName = nameBuf + curName; - if (isLast) - checkSum = p[13]; - if (checkSum != p[13]) - return S_FALSE; + lfn_RecordIndex = longIndex - 1; + + if (p[12] == 0) + { + Byte * const dest = LfnBuf + k_NumLfnBytes_in_Record * lfn_RecordIndex; + memcpy(dest, p + 1, 5 * 2); + memcpy(dest + 5 * 2, p + 14, 6 * 2); + memcpy(dest + 11 * 2, p + 28, 2 * 2); + if (isLast) + checkSum = p[13]; + if (checkSum == p[13]) + is_record_error = false; + // else return S_FALSE; + continue; + } + // else + checkSum = -1; // we will ignore LfnBuf in this case + continue; } - } - else - { - if (numLongRecords > 0) + + if (lfn_RecordIndex) + { + Header.HeadersWarning = true; + // return S_FALSE; + } + // lfn_RecordIndex = 0; + + const unsigned type_in_attrib = attrib & 0x18; + if (type_in_attrib == 0x18) + { + // invalid directory record (both flags are set: dir_flag and volume_flag) return S_FALSE; - CItem item; - memcpy(item.DosName, p, 11); - - if (checkSum >= 0) - { - Byte sum = 0; - for (unsigned i = 0; i < 11; i++) - sum = (Byte)(((sum & 1) ? 0x80 : 0) + (sum >> 1) + (Byte)item.DosName[i]); - if (sum == checkSum) - item.UName = curName; + // break; + // continue; } - - if (item.DosName[0] == 5) - item.DosName[0] = (char)(Byte)0xE5; - item.Attrib = attrib; - item.Flags = p[12]; - item.Size = Get32(p + 28); - item.Cluster = Get16(p + 26); - if (Header.NumFatBits > 16) - item.Cluster |= ((UInt32)Get16(p + 20) << 16); - else + if (type_in_attrib == 8) // volume_flag { - // OS/2 and WinNT probably can store EA (extended atributes) in that field. + if (!VolItem_Defined && level == 0) + { + VolItem_Defined = true; + memcpy(VolLabel, p, 11); + Vol_MTime = Get32(p + 22); + is_record_error = false; + } } - - item.CTime = Get32(p + 14); - item.CTime2 = p[13]; - item.ADate = Get16(p + 18); - item.MTime = Get32(p + 22); - item.Parent = parent; - - if (attrib == 8) + else if (memcmp(p, ". ", 11) == 0 + || memcmp(p, ".. ", 11) == 0) { - VolItem = item; - VolItemDefined = true; + if (num_lfn_records == 0 && type_in_attrib == 0x10) // dir_flag + is_record_error = false; } else - if (memcmp(item.DosName, ". ", 11) != 0 && - memcmp(item.DosName, ".. ", 11) != 0) { - if (!item.IsDir()) - NumCurUsedBytes += Header.GetFilePackSize(item.Size); - Items.Add(item); - PRF(printf("\n%7d: %S", Items.Size(), GetItemPath(Items.Size() - 1))); + CItem &item = Items.AddNew(); + memcpy(item.DosName, p, 11); + if (item.DosName[0] == 5) + item.DosName[0] = (char)(Byte)0xE5; // 0xE5 is valid KANJI lead byte value. + item.Attrib = attrib; + item.Flags = p[12]; + item.Size = Get32a(p + 28); + item.Cluster = Get16a(p + 26); + if (Header.NumFatBits > 16) + item.Cluster |= ((UInt32)Get16a(p + 20) << 16); + else + { + // OS/2 and WinNT probably can store EA (extended atributes) in that field. + } + item.CTime = Get32(p + 14); + item.CTime2 = p[13]; + item.ADate = Get16a(p + 18); + item.MTime = Get32(p + 22); + item.Parent = parent; + { + if (!item.IsDir()) + NumCurUsedBytes += Header.GetFilePackSize(item.Size); + // PRF(printf("\n%7d: %S", Items.Size(), GetItemPath(Items.Size() - 1))); + PRF(printf("\n%7d" /* ": %S" */, Items.Size() /* , item.GetShortName() */ );) + } + if (num_lfn_records == 0) + is_record_error = false; + else if (checkSum >= 0 && lfn_RecordIndex == 0) + { + Byte sum = 0; + for (unsigned i = 0; i < 11; i++) + sum = (Byte)((sum << 7) + (sum >> 1) + (Byte)item.DosName[i]); + if (sum == checkSum) + { + const unsigned numWords = ParseLongName((UInt16 *)(void *)(Byte *)LfnBuf, + num_lfn_records * k_NumLfnBytes_in_Record / 2); + if (numWords > 1) + { + // numWords includes NULL terminator + item.LongName.CopyFrom(LfnBuf, numWords * 2); + is_record_error = false; + } + } + } + + if ( + // item.LongName.Size() < 20 || // for debug + item.LongName.Size() <= 2 * 1 + && memcmp(p, " ", 11) == 0) + { + char s[16 + 16]; + const size_t numChars = (size_t)(ConvertUInt32ToString( + Items.Size() - 1 - startIndex, + MyStpCpy(s, "[NONAME]-")) - s) + 1; + item.LongName.Alloc(numChars * 2); + for (size_t i = 0; i < numChars; i++) + { + SetUi16a(item.LongName + i * 2, (Byte)s[i]) + } + Header.HeadersWarning = true; + } } - numLongRecords = -1; - curName.Empty(); - checkSum = -1; + num_lfn_records = 0; } } - unsigned finishIndex = Items.Size(); + if (is_record_error) + Header.HeadersWarning = true; + + const unsigned finishIndex = Items.Size(); for (unsigned i = startIndex; i < finishIndex; i++) { const CItem &item = Items[i]; if (item.IsDir()) { - PRF(printf("\n%S", GetItemPath(i))); - RINOK(CDatabase::ReadDir((int)i, item.Cluster, level + 1)) + PRF(printf("\n---- %c%c%c%c%c", item.DosName[0], item.DosName[1], item.DosName[2], item.DosName[3], item.DosName[4])); + RINOK(ReadDir((int)i, item.Cluster, level + 1)) } } return S_OK; } + + HRESULT CDatabase::Open() { Clear(); - bool numFreeClustersDefined = false; + bool numFreeClusters_Defined = false; { - Byte buf[kHeaderSize]; - RINOK(ReadStream_FALSE(InStream, buf, kHeaderSize)) - if (!Header.Parse(buf)) + UInt32 buf32[kHeaderSize / 4]; + RINOK(ReadStream_FALSE(InStream, buf32, kHeaderSize)) + if (!Header.Parse((Byte *)(void *)buf32)) return S_FALSE; UInt64 fileSize; RINOK(InStream_GetSize_SeekToEnd(InStream, fileSize)) @@ -671,21 +812,21 @@ HRESULT CDatabase::Open() { if (((UInt32)Header.FsInfoSector << Header.SectorSizeLog) + kHeaderSize <= fileSize && SeekToSector(Header.FsInfoSector) == S_OK - && ReadStream_FALSE(InStream, buf, kHeaderSize) == S_OK - && 0xaa550000 == Get32(buf + 508) - && 0x41615252 == Get32(buf) - && 0x61417272 == Get32(buf + 484)) + && ReadStream_FALSE(InStream, buf32, kHeaderSize) == S_OK + && 0xaa550000 == Get32a(buf32 + 508 / 4) + && 0x41615252 == Get32a(buf32) + && 0x61417272 == Get32a(buf32 + 484 / 4)) { - NumFreeClusters = Get32(buf + 488); - numFreeClustersDefined = (NumFreeClusters <= Header.FatSize); + NumFreeClusters = Get32a(buf32 + 488 / 4); + numFreeClusters_Defined = (NumFreeClusters <= Header.FatSize); } else Header.HeadersWarning = true; } } - // numFreeClustersDefined = false; // to recalculate NumFreeClusters - if (!numFreeClustersDefined) + // numFreeClusters_Defined = false; // to recalculate NumFreeClusters + if (!numFreeClusters_Defined) NumFreeClusters = 0; CByteBuffer byteBuf; @@ -695,7 +836,7 @@ HRESULT CDatabase::Open() RINOK(SeekToSector(Header.GetFatSector())) if (Header.NumFatBits == 32) { - const UInt32 kBufSize = (1 << 15); + const UInt32 kBufSize = 1 << 15; byteBuf.Alloc(kBufSize); for (UInt32 i = 0;;) { @@ -712,7 +853,7 @@ HRESULT CDatabase::Open() const UInt32 *src = (const UInt32 *)(const void *)(const Byte *)byteBuf; UInt32 *dest = Fat + i; const UInt32 *srcLim = src + size; - if (numFreeClustersDefined) + if (numFreeClusters_Defined) do *dest++ = Get32a(src) & 0x0FFFFFFF; while (++src != srcLim); @@ -731,7 +872,7 @@ HRESULT CDatabase::Open() i += size; if ((i & 0xFFFFF) == 0) { - RINOK(OpenProgressFat(!numFreeClustersDefined)) + RINOK(OpenProgressFat(!numFreeClusters_Defined)) } } } @@ -751,7 +892,7 @@ HRESULT CDatabase::Open() for (UInt32 j = 0; j < fatSize; j++) fat[j] = (Get16(p + j * 3 / 2) >> ((j & 1) << 2)) & 0xFFF; - if (!numFreeClustersDefined) + if (!numFreeClusters_Defined) { UInt32 numFreeClusters = 0; for (UInt32 i = 0; i < fatSize; i++) @@ -781,11 +922,12 @@ HRESULT CDatabase::Open() Z7_class_CHandler_final: public IInArchive, + public IArchiveGetRawProps, public IInArchiveGetStream, public CMyUnknownImp, CDatabase { - Z7_IFACES_IMP_UNK_2(IInArchive, IInArchiveGetStream) + Z7_IFACES_IMP_UNK_3(IInArchive, IArchiveGetRawProps, IInArchiveGetStream) }; Z7_COM7F_IMF(CHandler::GetStream(UInt32 index, ISequentialInStream **stream)) @@ -831,6 +973,8 @@ Z7_COM7F_IMF(CHandler::GetStream(UInt32 index, ISequentialInStream **stream)) COM_TRY_END } + + static const Byte kProps[] = { kpidPath, @@ -842,6 +986,7 @@ static const Byte kProps[] = kpidATime, kpidAttrib, kpidShortName + // , kpidCharacts }; enum @@ -922,15 +1067,16 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) case kpidPhySize: prop = PhySize; break; case kpidFreeSpace: prop = (UInt64)NumFreeClusters << Header.ClusterSizeLog; break; case kpidHeadersSize: prop = GetHeadersSize(); break; - case kpidMTime: if (VolItemDefined) PropVariant_SetFrom_DosTime(prop, VolItem.MTime); break; + case kpidMTime: if (VolItem_Defined) PropVariant_SetFrom_DosTime(prop, Vol_MTime); break; case kpidShortComment: - case kpidVolumeName: if (VolItemDefined) prop = VolItem.GetVolName(); break; + case kpidVolumeName: if (VolItem_Defined) GetVolName(VolLabel, prop); break; case kpidNumFats: if (Header.NumFats != 2) prop = Header.NumFats; break; case kpidSectorSize: prop = (UInt32)1 << Header.SectorSizeLog; break; // case kpidSectorsPerTrack: prop = Header.SectorsPerTrack; break; // case kpidNumHeads: prop = Header.NumHeads; break; // case kpidOemName: STRING_TO_PROP(Header.OemName, prop); break; case kpidId: if (Header.VolFieldsDefined) prop = Header.VolId; break; + case kpidIsTree: prop = true; break; // case kpidVolName: if (Header.VolFieldsDefined) STRING_TO_PROP(Header.VolName, prop); break; // case kpidFileSysType: if (Header.VolFieldsDefined) STRING_TO_PROP(Header.FileSys, prop); break; // case kpidHiddenSectors: prop = Header.NumHiddenSectors; break; @@ -948,6 +1094,52 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) COM_TRY_END } + +Z7_COM7F_IMF(CHandler::GetNumRawProps(UInt32 *numProps)) +{ + *numProps = 0; + return S_OK; +} + +Z7_COM7F_IMF(CHandler::GetRawPropInfo(UInt32 /* index */ , BSTR *name, PROPID *propID)) +{ + *name = NULL; + *propID = 0; + return S_OK; +} + +Z7_COM7F_IMF(CHandler::GetParent(UInt32 index, UInt32 *parent, UInt32 *parentType)) +{ + *parentType = NParentType::kDir; + int par = -1; + if (index < Items.Size()) + par = Items[index].Parent; + *parent = (UInt32)(Int32)par; + return S_OK; +} + +Z7_COM7F_IMF(CHandler::GetRawProp(UInt32 index, PROPID propID, const void **data, UInt32 *dataSize, UInt32 *propType)) +{ + *data = NULL; + *dataSize = 0; + *propType = 0; + + if (index < Items.Size() + && propID == kpidName) + { + CByteBuffer &buf = Items[index].LongName; + const UInt32 size = (UInt32)buf.Size(); + if (size != 0) + { + *dataSize = size; + *propType = NPropDataType::kUtf16z; + *data = (const void *)(const Byte *)buf; + } + } + return S_OK; +} + + Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *value)) { COM_TRY_BEGIN @@ -955,8 +1147,28 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val const CItem &item = Items[index]; switch (propID) { - case kpidPath: prop = GetItemPath(index); break; - case kpidShortName: prop = item.GetShortName(); break; + case kpidPath: + case kpidName: + case kpidShortName: + { + UString s; + if (propID == kpidPath) + GetItemPath(index, s); + else if (propID == kpidName) + item.GetName(s); + else + item.GetShortName(s); + prop = s; + break; + } +/* + case kpidCharacts: + { + if (item.LongName.Size()) + prop = "LFN"; + break; + } +*/ case kpidIsDir: prop = item.IsDir(); break; case kpidMTime: PropVariant_SetFrom_DosTime(prop, item.MTime); break; case kpidCTime: FatTimeToProp(item.CTime, item.CTime2, prop); break; @@ -1004,34 +1216,44 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, Int32 testMode, IArchiveExtractCallback *extractCallback)) { COM_TRY_BEGIN - const bool allFilesMode = (numItems == (UInt32)(Int32)-1); - if (allFilesMode) - numItems = Items.Size(); - if (numItems == 0) - return S_OK; - UInt32 i; - UInt64 totalSize = 0; - for (i = 0; i < numItems; i++) + if (numItems == (UInt32)(Int32)-1) { - const CItem &item = Items[allFilesMode ? i : indices[i]]; - if (!item.IsDir()) - totalSize += item.Size; + indices = NULL; + numItems = Items.Size(); + if (numItems == 0) + return S_OK; + } + else + { + if (numItems == 0) + return S_OK; + if (!indices) + return E_INVALIDARG; + } + UInt64 totalSize = 0; + { + UInt32 i = 0; + do + { + UInt32 index = i; + if (indices) + index = indices[i]; + const CItem &item = Items[index]; + if (!item.IsDir()) + totalSize += item.Size; + } + while (++i != numItems); } RINOK(extractCallback->SetTotal(totalSize)) + CMyComPtr2_Create lps; + lps->Init(extractCallback, false); + CMyComPtr2_Create copyCoder; + UInt64 totalPackSize; totalSize = totalPackSize = 0; - - NCompress::CCopyCoder *copyCoderSpec = new NCompress::CCopyCoder(); - CMyComPtr copyCoder = copyCoderSpec; - - CLocalProgress *lps = new CLocalProgress; - CMyComPtr progress = lps; - lps->Init(extractCallback, false); - - CDummyOutStream *outStreamSpec = new CDummyOutStream; - CMyComPtr outStream(outStreamSpec); + UInt32 i; for (i = 0;; i++) { lps->InSize = totalPackSize; @@ -1039,46 +1261,45 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, RINOK(lps->SetCur()) if (i == numItems) break; - CMyComPtr realOutStream; - const Int32 askMode = testMode ? - NExtract::NAskMode::kTest : - NExtract::NAskMode::kExtract; - const UInt32 index = allFilesMode ? i : indices[i]; - const CItem &item = Items[index]; - RINOK(extractCallback->GetStream(index, &realOutStream, askMode)) - - if (item.IsDir()) + int res; { - RINOK(extractCallback->PrepareOperation(askMode)) - RINOK(extractCallback->SetOperationResult(NExtract::NOperationResult::kOK)) - continue; - } - - totalPackSize += Header.GetFilePackSize(item.Size); - totalSize += item.Size; - - if (!testMode && !realOutStream) - continue; - RINOK(extractCallback->PrepareOperation(askMode)) - - outStreamSpec->SetStream(realOutStream); - realOutStream.Release(); - outStreamSpec->Init(); - - int res = NExtract::NOperationResult::kDataError; - CMyComPtr inStream; - HRESULT hres = GetStream(index, &inStream); - if (hres != S_FALSE) - { - RINOK(hres) - if (inStream) + CMyComPtr realOutStream; + const Int32 askMode = testMode ? + NExtract::NAskMode::kTest : + NExtract::NAskMode::kExtract; + UInt32 index = i; + if (indices) + index = indices[i]; + const CItem &item = Items[index]; + RINOK(extractCallback->GetStream(index, &realOutStream, askMode)) + + if (item.IsDir()) { - RINOK(copyCoder->Code(inStream, outStream, NULL, NULL, progress)) - if (copyCoderSpec->TotalSize == item.Size) - res = NExtract::NOperationResult::kOK; + RINOK(extractCallback->PrepareOperation(askMode)) + RINOK(extractCallback->SetOperationResult(NExtract::NOperationResult::kOK)) + continue; + } + + totalPackSize += Header.GetFilePackSize(item.Size); + totalSize += item.Size; + + if (!testMode && !realOutStream) + continue; + RINOK(extractCallback->PrepareOperation(askMode)) + res = NExtract::NOperationResult::kDataError; + CMyComPtr inStream; + const HRESULT hres = GetStream(index, &inStream); + if (hres != S_FALSE) + { + RINOK(hres) + if (inStream) + { + RINOK(copyCoder.Interface()->Code(inStream, realOutStream, NULL, NULL, lps)) + if (copyCoder->TotalSize == item.Size) + res = NExtract::NOperationResult::kOK; + } } } - outStreamSpec->ReleaseStream(); RINOK(extractCallback->SetOperationResult(res)) } return S_OK; diff --git a/CPP/7zip/Archive/Nsis/NsisIn.cpp b/CPP/7zip/Archive/Nsis/NsisIn.cpp index c9e2c01..194e5bf 100644 --- a/CPP/7zip/Archive/Nsis/NsisIn.cpp +++ b/CPP/7zip/Archive/Nsis/NsisIn.cpp @@ -4005,7 +4005,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh) AddParam_Var(params[0]); AString temp; ReadString2(temp, params[1]); - if (temp != "$TEMP") + if (!temp.IsEqualTo("$TEMP")) SpaceQuStr(temp); break; } @@ -4410,7 +4410,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh) } else { - if (func == "DllUnregisterServer") + if (func.IsEqualTo("DllUnregisterServer")) { s += "UnRegDLL"; printFunc = false; @@ -4418,7 +4418,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh) else { s += "RegDLL"; - if (func == "DllRegisterServer") + if (func.IsEqualTo("DllRegisterServer")) printFunc = false; } AddParam(params[0]); @@ -4886,7 +4886,7 @@ HRESULT CInArchive::ReadEntries(const CBlockHeader &bh) AddParam_Var(params[1]); AddParam(params[2]); AddParam(params[4]); - // if (params[2] == "0") AddCommentAndString("GetWinVer"); + // if (params[2].IsEqualTo("0")) AddCommentAndString("GetWinVer"); } else s += "GetOsInfo"; diff --git a/CPP/7zip/Archive/NtfsHandler.cpp b/CPP/7zip/Archive/NtfsHandler.cpp index d55521d..05c177f 100644 --- a/CPP/7zip/Archive/NtfsHandler.cpp +++ b/CPP/7zip/Archive/NtfsHandler.cpp @@ -1907,7 +1907,7 @@ HRESULT CDatabase::Open() for (i = 0; i < SecurityAttrs.Size(); i++) { const CAttr &attr = SecurityAttrs[i]; - if (attr.Name == L"$SII") + if (attr.Name.IsEqualTo("$SII")) { if (attr.Type == ATTR_TYPE_INDEX_ROOT) { diff --git a/CPP/7zip/Archive/PeHandler.cpp b/CPP/7zip/Archive/PeHandler.cpp index 8a0ff05..79f89ef 100644 --- a/CPP/7zip/Archive/PeHandler.cpp +++ b/CPP/7zip/Archive/PeHandler.cpp @@ -2638,7 +2638,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback) { const CSection § = _sections[i]; if (IsOpt()) - if (_parseResources && sect.Name == ".rsrc") + if (_parseResources && sect.Name.IsEqualTo(".rsrc")) { // 20.01: we try to parse only first copy of .rsrc section. _parseResources = false; @@ -2727,7 +2727,7 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *callback) for (i = 0; i < _mixItems.Size(); i++) { const CMixItem &mixItem = _mixItems[i]; - if (mixItem.StringIndex < 0 && mixItem.ResourceIndex < 0 && _sections[mixItem.SectionIndex].Name == "_winzip_") + if (mixItem.StringIndex < 0 && mixItem.ResourceIndex < 0 && _sections[mixItem.SectionIndex].Name.IsEqualTo("_winzip_")) { _mainSubfile = (Int32)(int)i; break; diff --git a/CPP/7zip/Archive/Rar/Rar5Handler.cpp b/CPP/7zip/Archive/Rar/Rar5Handler.cpp index 7d75aae..a639d8b 100644 --- a/CPP/7zip/Archive/Rar/Rar5Handler.cpp +++ b/CPP/7zip/Archive/Rar/Rar5Handler.cpp @@ -393,6 +393,7 @@ void CItem::Link_to_Prop(unsigned linkType, NWindows::NCOM::CPropVariant &prop) if (!FindExtra_Link(link)) return; + bool isWindows = (HostOS == kHost_Windows); if (link.Type != linkType) { if (linkType != NLinkType::kUnixSymLink) @@ -400,8 +401,11 @@ void CItem::Link_to_Prop(unsigned linkType, NWindows::NCOM::CPropVariant &prop) switch ((unsigned)link.Type) { case NLinkType::kUnixSymLink: + isWindows = false; + break; case NLinkType::kWinSymLink: case NLinkType::kWinJunction: + isWindows = true; break; default: return; } @@ -409,10 +413,15 @@ void CItem::Link_to_Prop(unsigned linkType, NWindows::NCOM::CPropVariant &prop) AString s; s.SetFrom_CalcLen((const char *)(Extra + link.NameOffset), link.NameLen); - UString unicode; ConvertUTF8ToUnicode(s, unicode); - prop = NItemName::GetOsPath(unicode); + // rar5.0 used '\\' separator for windows symlinks and \??\ prefix for abs paths. + // rar5.1+ uses '/' separator for windows symlinks and /??/ prefix for abs paths. + // v25.00: we convert Windows slashes to Linux slashes: + if (isWindows) + unicode.Replace(L'\\', L'/'); + prop = unicode; + // prop = NItemName::GetOsPath(unicode); } bool CItem::GetAltStreamName(AString &name) const diff --git a/CPP/7zip/Archive/Rar/Rar5Handler.h b/CPP/7zip/Archive/Rar/Rar5Handler.h index 8f6581a..913ba85 100644 --- a/CPP/7zip/Archive/Rar/Rar5Handler.h +++ b/CPP/7zip/Archive/Rar/Rar5Handler.h @@ -286,10 +286,10 @@ struct CItem bool IsService() const { return RecordType == NHeaderType::kService; } - bool Is_STM() const { return IsService() && Name == "STM"; } - bool Is_CMT() const { return IsService() && Name == "CMT"; } - bool Is_ACL() const { return IsService() && Name == "ACL"; } - // bool Is_QO() const { return IsService() && Name == "QO"; } + bool Is_STM() const { return IsService() && Name.IsEqualTo("STM"); } + bool Is_CMT() const { return IsService() && Name.IsEqualTo("CMT"); } + bool Is_ACL() const { return IsService() && Name.IsEqualTo("ACL"); } + // bool Is_QO() const { return IsService() && Name.IsEqualTo("QO"); } int FindExtra(unsigned extraID, unsigned &recordDataSize) const; void PrintInfo(AString &s) const; diff --git a/CPP/7zip/Archive/Rar/RarHandler.cpp b/CPP/7zip/Archive/Rar/RarHandler.cpp index 9157acc..dfbad33 100644 --- a/CPP/7zip/Archive/Rar/RarHandler.cpp +++ b/CPP/7zip/Archive/Rar/RarHandler.cpp @@ -435,13 +435,13 @@ bool CInArchive::ReadHeaderReal(const Byte *p, unsigned size, CItem &item) size -= sizeof(item.Salt); p += sizeof(item.Salt); } - if (item.Name == "ACL" && size == 0) + if (item.Name.IsEqualTo("ACL") && size == 0) { item.IsAltStream = true; item.Name.Empty(); item.UnicodeName.SetFromAscii(".ACL"); } - else if (item.Name == "STM" && size != 0 && (size & 1) == 0) + else if (item.Name.IsEqualTo("STM") && size != 0 && (size & 1) == 0) { item.IsAltStream = true; item.Name.Empty(); diff --git a/CPP/7zip/Archive/RpmHandler.cpp b/CPP/7zip/Archive/RpmHandler.cpp index da2b6ee..4f8aaaa 100644 --- a/CPP/7zip/Archive/RpmHandler.cpp +++ b/CPP/7zip/Archive/RpmHandler.cpp @@ -330,11 +330,11 @@ void CHandler::AddSubFileExtension(AString &res) const if (!_compressor.IsEmpty()) { s = _compressor; - if (_compressor == "bzip2") + if (_compressor.IsEqualTo("bzip2")) s = "bz2"; - else if (_compressor == "gzip") + else if (_compressor.IsEqualTo("gzip")) s = "gz"; - else if (_compressor == "zstd") + else if (_compressor.IsEqualTo("zstd")) s = "zst"; } else diff --git a/CPP/7zip/Archive/VmdkHandler.cpp b/CPP/7zip/Archive/VmdkHandler.cpp index 9c293a3..221af21 100644 --- a/CPP/7zip/Archive/VmdkHandler.cpp +++ b/CPP/7zip/Archive/VmdkHandler.cpp @@ -202,9 +202,12 @@ struct CExtentInfo // PartitionUUID // DeviceIdentifier - bool IsType_ZERO() const { return Type == "ZERO"; } - // bool IsType_FLAT() const { return Type == "FLAT"; } - bool IsType_Flat() const { return Type == "FLAT" || Type == "VMFS" || Type == "VMFSRAW"; } + bool IsType_ZERO() const { return Type.IsEqualTo("ZERO"); } + // bool IsType_FLAT() const { return Type.IsEqualTo("FLAT"); } + bool IsType_Flat() const + { return Type.IsEqualTo("FLAT") + || Type.IsEqualTo("VMFS") + || Type.IsEqualTo("VMFSRAW"); } bool Parse(const char *s); }; diff --git a/CPP/7zip/Archive/Wim/WimIn.cpp b/CPP/7zip/Archive/Wim/WimIn.cpp index 614755a..a748e99 100644 --- a/CPP/7zip/Archive/Wim/WimIn.cpp +++ b/CPP/7zip/Archive/Wim/WimIn.cpp @@ -1814,7 +1814,7 @@ bool CWimXml::Parse() if (!Xml.Parse(utf)) return false; - if (Xml.Root.Name != "WIM") + if (!Xml.Root.Name.IsEqualTo("WIM")) return false; FOR_VECTOR (i, Xml.Root.SubItems) diff --git a/CPP/7zip/Archive/XarHandler.cpp b/CPP/7zip/Archive/XarHandler.cpp index 6ef8941..cba546e 100644 --- a/CPP/7zip/Archive/XarHandler.cpp +++ b/CPP/7zip/Archive/XarHandler.cpp @@ -266,7 +266,7 @@ struct CFile bool IsCopyMethod() const { - return Method.IsEmpty() || Method == "octet-stream"; + return Method.IsEmpty() || Method.IsEqualTo("octet-stream"); } void UpdateTotalPackSize(UInt64 &totalSize) const @@ -416,7 +416,7 @@ static bool AddItem(const CXmlItem &item, CObjectVector &files, int paren return true; if (level >= 1024) return false; - if (item.Name == "file") + if (item.Name.IsEqualTo("file")) { CFile file(parent); parent = (int)files.Size(); @@ -435,19 +435,19 @@ static bool AddItem(const CXmlItem &item, CObjectVector &files, int paren { file.Type = typeItem->GetSubString(); // file.LinkFrom = typeItem->GetPropVal("link"); - if (file.Type == "directory") + if (file.Type.IsEqualTo("directory")) file.IsDir = true; else { // file.IsDir = false; /* - else if (file.Type == "file") + else if (file.Type.IsEqualTo("file")) {} - else if (file.Type == "hardlink") + else if (file.Type.IsEqualTo("hardlink")) {} else */ - if (file.Type == "symlink") + if (file.Type.IsEqualTo("symlink")) file.Is_SymLink = true; // file.IsDir = false; } @@ -489,7 +489,7 @@ static bool AddItem(const CXmlItem &item, CObjectVector &files, int paren if (s.IsPrefixedBy(xx)) { s.DeleteFrontal(xx.Len()); - if (s == "gzip") + if (s.IsEqualTo("gzip")) s = METHOD_NAME_ZLIB; } } @@ -692,12 +692,13 @@ HRESULT CHandler::Open2(IInStream *stream) file.UpdateTotalPackSize(totalPackSize); if (file.Parent == -1) { - if (file.Name == "Payload" || file.Name == "Content") + if (file.Name.IsEqualTo("Payload") || + file.Name.IsEqualTo("Content")) { _mainSubfile = (Int32)(int)i; numMainFiles++; } - else if (file.Name == "PackageInfo") + else if (file.Name.IsEqualTo("PackageInfo")) _is_pkg = true; } } @@ -1210,9 +1211,9 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, else opRes = NExtract::NOperationResult::kUnsupportedMethod; } - else if (item.Method == METHOD_NAME_ZLIB) + else if (item.Method.IsEqualTo(METHOD_NAME_ZLIB)) coder = zlibCoder; - else if (item.Method == "bzip2") + else if (item.Method.IsEqualTo("bzip2")) coder = bzip2Coder; else opRes = NExtract::NOperationResult::kUnsupportedMethod; diff --git a/CPP/7zip/Archive/XzHandler.cpp b/CPP/7zip/Archive/XzHandler.cpp index 907376c..5aaa405 100644 --- a/CPP/7zip/Archive/XzHandler.cpp +++ b/CPP/7zip/Archive/XzHandler.cpp @@ -446,7 +446,7 @@ void COpenCallbackWrap::Init(IArchiveOpenCallback *callback) struct CXzsCPP { CXzs p; - CXzsCPP() { Xzs_Construct(&p); } + CXzsCPP() { Xzs_CONSTRUCT(&p) } ~CXzsCPP() { Xzs_Free(&p, &g_Alloc); } }; @@ -536,6 +536,9 @@ HRESULT CHandler::Open2(IInStream *inStream, /* UInt32 flags, */ IArchiveOpenCal if (res2 == SZ_ERROR_ARCHIVE) return S_FALSE; + // what codes are possible here ? + // ?? res2 == SZ_ERROR_MEM : is possible here + // ?? res2 == SZ_ERROR_UNSUPPORTED : is possible here } else if (!isIndex) { @@ -1159,6 +1162,13 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt */ #ifndef Z7_ST + +#ifdef _WIN32 + // we don't use chunk multithreading inside lzma2 stream. + // so we don't set xzProps.lzma2Props.numThreadGroups. + if (_numThreadGroups > 1) + xzProps.numThreadGroups = _numThreadGroups; +#endif UInt32 numThreads = _numThreads; @@ -1183,6 +1193,8 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt CMultiMethodProps::SetMethodThreadsTo_IfNotFinded(oneMethodInfo, numThreads); } + // printf("\n====== GetProcessGroupAffinity : \n"); + UInt64 cs = _numSolidBytes; if (cs != XZ_PROPS_BLOCK_SIZE_AUTO) oneMethodInfo.AddProp_BlockSize2(cs); diff --git a/CPP/7zip/Archive/Zip/ZipUpdate.cpp b/CPP/7zip/Archive/Zip/ZipUpdate.cpp index bc047b7..b2684dc 100644 --- a/CPP/7zip/Archive/Zip/ZipUpdate.cpp +++ b/CPP/7zip/Archive/Zip/ZipUpdate.cpp @@ -250,13 +250,26 @@ struct CThreadInfo HRESULT CreateEvents() { - WRes wres = CompressEvent.CreateIfNotCreated_Reset(); + const WRes wres = CompressEvent.CreateIfNotCreated_Reset(); return HRESULT_FROM_WIN32(wres); } - HRESULT CreateThread() + // (group < 0) means no_group. + HRESULT CreateThread_with_group( +#ifdef _WIN32 + int group +#endif + ) { - WRes wres = Thread.Create(CoderThread, this); + // tested in win10: If thread is created by another thread, + // child thread probably uses same group as parent thread. + // So we don't need to send (group) to encoder in created thread. + const WRes wres = +#ifdef _WIN32 + group >= 0 ? + Thread.Create_With_Group(CoderThread, this, (unsigned)group) : +#endif + Thread.Create(CoderThread, this); return HRESULT_FROM_WIN32(wres); } @@ -450,8 +463,12 @@ static HRESULT UpdateItemOldData( if (ui.NewProps) { if (item.HasDescriptor()) - return E_NOTIMPL; - + { + // we know compressed / uncompressed sizes and crc. + // so we remove descriptor here + item.Flags = (UInt16)(item.Flags & ~NFileHeader::NFlags::kDescriptorUsedMask); + // return E_NOTIMPL; + } // we keep ExternalAttrib and some another properties from old archive // item.ExternalAttrib = ui.Attrib; // if we don't change Comment, we keep Comment from OldProperties @@ -1000,6 +1017,9 @@ static HRESULT Update2( #ifndef Z7_ST UInt32 numThreads = options._numThreads; +#ifdef _WIN32 + const UInt32 numThreadGroups = options._numThreadGroups; +#endif UInt32 numZipThreads_limit = numThreads; if (numZipThreads_limit > numFilesToCompress) @@ -1014,12 +1034,10 @@ static HRESULT Update2( } { + // we reduce number of threads for 32-bit to reduce memory usege to 256 MB const UInt32 kNumMaxThreads = - #ifdef _WIN32 - 64; // _WIN32 supports only 64 threads in one group. So no need for more threads here - #else - 128; - #endif + // _WIN32 (64-bit) supports only 64 threads in one group. + 8 << (sizeof(size_t) / 2); // 32 threads for 32-bit : 128 threads for 64-bit if (numThreads > kNumMaxThreads) numThreads = kNumMaxThreads; } @@ -1264,7 +1282,14 @@ static HRESULT Update2( threadInfo.Progress = threadInfo.ProgressSpec; threadInfo.ProgressSpec->Init(&mtCompressProgressMixer, i); threadInfo.MtSem = &mtSem; - RINOK(threadInfo.CreateThread()) + const HRESULT hres = + threadInfo.CreateThread_with_group( +#ifdef _WIN32 + (numThreadGroups > 1 && numThreads > 1) ? + (int)(i % numThreadGroups) : -1 +#endif + ); + RINOK(hres) } } diff --git a/CPP/7zip/Bundles/Alone/makefile b/CPP/7zip/Bundles/Alone/makefile index 7547590..9f81f9e 100644 --- a/CPP/7zip/Bundles/Alone/makefile +++ b/CPP/7zip/Bundles/Alone/makefile @@ -5,6 +5,7 @@ CFLAGS = $(CFLAGS) -DZ7_ZIP_LZFSE_DISABLE # CONSOLE_VARIANT_FLAGS=-DZ7_PROG_VARIANT_A # ZIP_FLAGS=-DZ7_ZIP_LZFSE_DISABLE +# USE_C_SORT=1 # USE_C_AES = 1 # USE_C_SHA = 1 # USE_C_LZFINDOPT = 1 @@ -221,7 +222,6 @@ C_OBJS = \ $O\Ppmd8.obj \ $O\Ppmd8Dec.obj \ $O\Ppmd8Enc.obj \ - $O\Sort.obj \ $O\SwapBytes.obj \ $O\Threads.obj \ $O\Xxh64.obj \ @@ -240,5 +240,6 @@ C_OBJS = \ !include "../../LzmaDec.mak" !include "../../Sha1.mak" !include "../../Sha256.mak" +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Alone7z/makefile b/CPP/7zip/Bundles/Alone7z/makefile index 89584e1..f0a813a 100644 --- a/CPP/7zip/Bundles/Alone7z/makefile +++ b/CPP/7zip/Bundles/Alone7z/makefile @@ -148,7 +148,6 @@ C_OBJS = \ $O\LzmaEnc.obj \ $O\MtCoder.obj \ $O\MtDec.obj \ - $O\Sort.obj \ $O\SwapBytes.obj \ $O\Threads.obj \ $O\Xz.obj \ @@ -164,5 +163,6 @@ C_OBJS = \ !include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha256.mak" +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Format7z/makefile b/CPP/7zip/Bundles/Format7z/makefile index fe6f94d..3d4754c 100644 --- a/CPP/7zip/Bundles/Format7z/makefile +++ b/CPP/7zip/Bundles/Format7z/makefile @@ -135,7 +135,6 @@ C_OBJS = \ $O\Ppmd7.obj \ $O\Ppmd7Dec.obj \ $O\Ppmd7Enc.obj \ - $O\Sort.obj \ $O\SwapBytes.obj \ $O\Threads.obj \ @@ -144,5 +143,6 @@ C_OBJS = \ !include "../../LzFindOpt.mak" !include "../../LzmaDec.mak" !include "../../Sha256.mak" +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/Bundles/Format7zF/Arc.mak b/CPP/7zip/Bundles/Format7zF/Arc.mak index 7166ab3..b1c6fe2 100644 --- a/CPP/7zip/Bundles/Format7zF/Arc.mak +++ b/CPP/7zip/Bundles/Format7zF/Arc.mak @@ -291,7 +291,6 @@ C_OBJS = \ $O\Sha3.obj \ $O\Sha512.obj \ $O\Sha512Opt.obj \ - $O\Sort.obj \ $O\SwapBytes.obj \ $O\Threads.obj \ $O\Xxh64.obj \ @@ -308,3 +307,4 @@ C_OBJS = \ !include "../../LzmaDec.mak" !include "../../Sha1.mak" !include "../../Sha256.mak" +!include "../../Sort.mak" diff --git a/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp b/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp index eb28f5d..0c09807 100644 --- a/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp +++ b/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp @@ -229,7 +229,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE /* hPrevInstance */, } const FString tempDirPath = tempDir.GetPath(); - // tempDirPath = L"M:\\1\\"; // to test low disk space + // tempDirPath = "M:\\1\\"; // to test low disk space { bool isCorrupt = false; UString errorMessage; @@ -308,7 +308,7 @@ int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE /* hPrevInstance */, { if (appLaunched.IsEmpty()) { - appLaunched = L"setup.exe"; + appLaunched = "setup.exe"; if (!NFind::DoesFileExist_FollowLink(us2fs(appLaunched))) { if (!assumeYes) diff --git a/CPP/7zip/Common/InBuffer.h b/CPP/7zip/Common/InBuffer.h index a8ccb40..13ec088 100644 --- a/CPP/7zip/Common/InBuffer.h +++ b/CPP/7zip/Common/InBuffer.h @@ -97,6 +97,16 @@ public: size_t ReadBytesPart(Byte *buf, size_t size); size_t ReadBytes(Byte *buf, size_t size); + const Byte *Lookahead(size_t &rem) + { + rem = (size_t)(_bufLim - _buf); + if (!rem) + { + ReadBlock(); + rem = (size_t)(_bufLim - _buf); + } + return _buf; + } size_t Skip(size_t size); }; diff --git a/CPP/7zip/Common/MethodProps.cpp b/CPP/7zip/Common/MethodProps.cpp index d87884c..a5d90cf 100644 --- a/CPP/7zip/Common/MethodProps.cpp +++ b/CPP/7zip/Common/MethodProps.cpp @@ -324,15 +324,22 @@ void CCoderProps::AddProp(const CProp &prop) HRESULT CProps::SetCoderProps(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce) const { - return SetCoderProps_DSReduce_Aff(scp, dataSizeReduce, NULL); + return SetCoderProps_DSReduce_Aff(scp, dataSizeReduce, NULL, NULL, NULL); } HRESULT CProps::SetCoderProps_DSReduce_Aff( ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce, - const UInt64 *affinity) const + const UInt64 *affinity, + const UInt32 *affinityGroup, + const UInt64 *affinityInGroup) const { - CCoderProps coderProps(Props.Size() + (dataSizeReduce ? 1 : 0) + (affinity ? 1 : 0) ); + CCoderProps coderProps(Props.Size() + + (dataSizeReduce ? 1 : 0) + + (affinity ? 1 : 0) + + (affinityGroup ? 1 : 0) + + (affinityInGroup ? 1 : 0) + ); FOR_VECTOR (i, Props) coderProps.AddProp(Props[i]); if (dataSizeReduce) @@ -349,6 +356,20 @@ HRESULT CProps::SetCoderProps_DSReduce_Aff( prop.Value = *affinity; coderProps.AddProp(prop); } + if (affinityGroup) + { + CProp prop; + prop.Id = NCoderPropID::kThreadGroup; + prop.Value = *affinityGroup; + coderProps.AddProp(prop); + } + if (affinityInGroup) + { + CProp prop; + prop.Id = NCoderPropID::kAffinityInGroup; + prop.Value = *affinityInGroup; + coderProps.AddProp(prop); + } return coderProps.SetProps(scp); } @@ -409,6 +430,11 @@ static const CNameToPropID g_NameToPropID[] = { VT_UI4, "offset" }, { VT_UI4, "zhb" } /* + , { VT_UI4, "tgn" }, // kNumThreadGroups + , { VT_UI4, "tgi" }, // kThreadGroup + , { VT_UI8, "tga" }, // kAffinityInGroup + */ + /* , // { VT_UI4, "zhc" }, // { VT_UI4, "zhd" }, diff --git a/CPP/7zip/Common/MethodProps.h b/CPP/7zip/Common/MethodProps.h index a52f4bc..be108fa 100644 --- a/CPP/7zip/Common/MethodProps.h +++ b/CPP/7zip/Common/MethodProps.h @@ -80,7 +80,11 @@ struct CProps } HRESULT SetCoderProps(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce = NULL) const; - HRESULT SetCoderProps_DSReduce_Aff(ICompressSetCoderProperties *scp, const UInt64 *dataSizeReduce, const UInt64 *affinity) const; + HRESULT SetCoderProps_DSReduce_Aff(ICompressSetCoderProperties *scp, + const UInt64 *dataSizeReduce, + const UInt64 *affinity, + const UInt32 *affinityGroup, + const UInt64 *affinityInGroup) const; }; class CMethodProps: public CProps diff --git a/CPP/7zip/Common/OutBuffer.h b/CPP/7zip/Common/OutBuffer.h index 88f5787..af78c4f 100644 --- a/CPP/7zip/Common/OutBuffer.h +++ b/CPP/7zip/Common/OutBuffer.h @@ -45,6 +45,7 @@ public: HRESULT Flush() throw(); void FlushWithCheck(); + Z7_FORCE_INLINE void WriteByte(Byte b) { UInt32 pos = _pos; @@ -54,10 +55,34 @@ public: if (pos == _limitPos) FlushWithCheck(); } + void WriteBytes(const void *data, size_t size) { - for (size_t i = 0; i < size; i++) - WriteByte(((const Byte *)data)[i]); + while (size) + { + UInt32 pos = _pos; + size_t cur = (size_t)(_limitPos - pos); + if (cur >= size) + cur = size; + size -= cur; + Byte *dest = _buf + pos; + pos += (UInt32)cur; + _pos = pos; +#if 0 + memcpy(dest, data, cur); + data = (const void *)((const Byte *)data + cur); +#else + const Byte * const lim = (const Byte *)data + cur; + do + { + *dest++ = *(const Byte *)data; + data = (const void *)((const Byte *)data + 1); + } + while (data != lim); +#endif + if (pos == _limitPos) + FlushWithCheck(); + } } Byte *GetOutBuffer(size_t &avail) diff --git a/CPP/7zip/Compress/BZip2Const.h b/CPP/7zip/Compress/BZip2Const.h index 0dfcfe5..3380aaf 100644 --- a/CPP/7zip/Compress/BZip2Const.h +++ b/CPP/7zip/Compress/BZip2Const.h @@ -46,7 +46,7 @@ const UInt32 kBlockSizeStep = 100000; const UInt32 kBlockSizeMax = kBlockSizeMultMax * kBlockSizeStep; const unsigned kNumSelectorsBits = 15; -const UInt32 kNumSelectorsMax = (2 + (kBlockSizeMax / kGroupSize)); +const unsigned kNumSelectorsMax = 2 + kBlockSizeMax / kGroupSize; const unsigned kRleModeRepSize = 4; diff --git a/CPP/7zip/Compress/BZip2Encoder.cpp b/CPP/7zip/Compress/BZip2Encoder.cpp index ef2555a..f8ee0c9 100644 --- a/CPP/7zip/Compress/BZip2Encoder.cpp +++ b/CPP/7zip/Compress/BZip2Encoder.cpp @@ -6,18 +6,20 @@ #include "../../../C/BwtSort.h" #include "../../../C/HuffEnc.h" -#include "BZip2Crc.h" #include "BZip2Encoder.h" -#include "Mtf8.h" namespace NCompress { namespace NBZip2 { -const unsigned kMaxHuffmanLenForEncoding = 16; // it must be < kMaxHuffmanLen = 20 - -static const UInt32 kBufferSize = (1 << 17); +#define HUFFMAN_LEN 16 +#if HUFFMAN_LEN > Z7_HUFFMAN_LEN_MAX + #error Stop_Compiling_Bad_HUFFMAN_LEN_BZip2Encoder +#endif + +static const size_t kBufferSize = 1 << 17; static const unsigned kNumHuffPasses = 4; + bool CThreadInfo::Alloc() { if (!m_BlockSorterIndex) @@ -27,11 +29,15 @@ bool CThreadInfo::Alloc() return false; } - if (!m_Block) + if (!m_Block_Base) { - m_Block = (Byte *)::MidAlloc(kBlockSizeMax * 5 + kBlockSizeMax / 10 + (20 << 10)); - if (!m_Block) + const unsigned kPadSize = 1 << 7; // we need at least 1 byte backward padding, becuase we use (m_Block - 1) pointer; + m_Block_Base = (Byte *)::MidAlloc(kBlockSizeMax * 5 + + kBlockSizeMax / 10 + (20 << 10) + + kPadSize); + if (!m_Block_Base) return false; + m_Block = m_Block_Base + kPadSize; m_MtfArray = m_Block + kBlockSizeMax; m_TempArray = m_MtfArray + kBlockSizeMax * 2 + 2; } @@ -42,8 +48,8 @@ void CThreadInfo::Free() { ::BigFree(m_BlockSorterIndex); m_BlockSorterIndex = NULL; - ::MidFree(m_Block); - m_Block = NULL; + ::MidFree(m_Block_Base); + m_Block_Base = NULL; } #ifndef Z7_ST @@ -60,6 +66,14 @@ HRESULT CThreadInfo::Create() if (wres == 0) { wres = CanWriteEvent.Create(); if (wres == 0) { +#ifdef _WIN32 + if (Encoder->_props.NumThreadGroups != 0) + { + const UInt32 group = ThreadNextGroup_GetNext(&Encoder->ThreadNextGroup); + wres = Thread.Create_With_Group(MFThread, this, group, 0); // affinity + } + else +#endif if (Encoder->_props.Affinity != 0) wres = Thread.Create_With_Affinity(MFThread, this, (CAffinityMask)Encoder->_props.Affinity); else @@ -216,94 +230,251 @@ void CEncoder::Free() } #endif +struct CRleEncoder +{ + const Byte *_src; + const Byte *_srcLim; + Byte *_dest; + const Byte *_destLim; + Byte _prevByte; + unsigned _numReps; + + void Encode(); +}; + +Z7_NO_INLINE +void CRleEncoder::Encode() +{ + const Byte *src = _src; + const Byte * const srcLim = _srcLim; + Byte *dest = _dest; + const Byte * const destLim = _destLim; + Byte prev = _prevByte; + unsigned numReps = _numReps; + // (dest < destLim) + // src = srcLim; // for debug + while (dest < destLim) + { + if (src == srcLim) + break; + const Byte b = *src++; + if (b != prev) + { + if (numReps >= kRleModeRepSize) + *dest++ = (Byte)(numReps - kRleModeRepSize); + *dest++ = b; + numReps = 1; + prev = b; + /* + { // speed optimization code: + if (dest >= destLim || src == srcLim) + break; + const Byte b2 = *src++; + *dest++ = b2; + numReps += (prev == b2); + prev = b2; + } + */ + continue; + } + numReps++; + if (numReps <= kRleModeRepSize) + *dest++ = b; + else if (numReps == kRleModeRepSize + 255) + { + *dest++ = (Byte)(numReps - kRleModeRepSize); + numReps = 0; + } + } + _src = src; + _dest = dest; + _prevByte = prev; + _numReps = numReps; + // (dest <= destLim + 1) +} + + +// out: return value is blockSize: size of data filled in buffer[]: +// (returned_blockSize <= _props.BlockSizeMult * kBlockSizeStep) UInt32 CEncoder::ReadRleBlock(Byte *buffer) { + CRleEncoder rle; UInt32 i = 0; - Byte prevByte; - if (m_InStream.ReadByte(prevByte)) + if (m_InStream.ReadByte(rle._prevByte)) { NumBlocks++; - const UInt32 blockSize = _props.BlockSizeMult * kBlockSizeStep - 1; - unsigned numReps = 1; - buffer[i++] = prevByte; - while (i < blockSize) // "- 1" to support RLE + const UInt32 blockSize = _props.BlockSizeMult * kBlockSizeStep - 1; // -1 for RLE + rle._destLim = buffer + blockSize; + rle._numReps = 1; + buffer[i++] = rle._prevByte; + while (i < blockSize) { - Byte b; - if (!m_InStream.ReadByte(b)) + rle._dest = buffer + i; + size_t rem; + const Byte * const ptr = m_InStream.Lookahead(rem); + if (rem == 0) break; - if (b != prevByte) - { - if (numReps >= kRleModeRepSize) - buffer[i++] = (Byte)(numReps - kRleModeRepSize); - buffer[i++] = b; - numReps = 1; - prevByte = b; - continue; - } - numReps++; - if (numReps <= kRleModeRepSize) - buffer[i++] = b; - else if (numReps == kRleModeRepSize + 255) - { - buffer[i++] = (Byte)(numReps - kRleModeRepSize); - numReps = 0; - } + rle._src = ptr; + rle._srcLim = ptr + rem; + rle.Encode(); + m_InStream.Skip((size_t)(rle._src - ptr)); + i = (UInt32)(size_t)(rle._dest - buffer); + // (i <= blockSize + 1) } - // it's to support original BZip2 decoder - if (numReps >= kRleModeRepSize) - buffer[i++] = (Byte)(numReps - kRleModeRepSize); + const int n = (int)rle._numReps - (int)kRleModeRepSize; + if (n >= 0) + buffer[i++] = (Byte)n; } return i; } -void CThreadInfo::WriteBits2(UInt32 value, unsigned numBits) { m_OutStreamCurrent->WriteBits(value, numBits); } -void CThreadInfo::WriteByte2(Byte b) { WriteBits2(b, 8); } -void CThreadInfo::WriteBit2(Byte v) { WriteBits2(v, 1); } -void CThreadInfo::WriteCrc2(UInt32 v) -{ - for (unsigned i = 0; i < 4; i++) - WriteByte2(((Byte)(v >> (24 - i * 8)))); + + +Z7_NO_INLINE +void CThreadInfo::WriteBits2(UInt32 value, unsigned numBits) + { m_OutStreamCurrent.WriteBits(value, numBits); } +/* +Z7_NO_INLINE +void CThreadInfo::WriteByte2(unsigned b) + { m_OutStreamCurrent.WriteByte(b); } +*/ +// void CEncoder::WriteBits(UInt32 value, unsigned numBits) { m_OutStream.WriteBits(value, numBits); } +Z7_NO_INLINE +void CEncoder::WriteByte(Byte b) { m_OutStream.WriteByte(b); } + + +#define WRITE_BITS_UPDATE(value, numBits) \ +{ \ + numBits -= _bitPos; \ + const UInt32 hi = value >> numBits; \ + *_buf++ = (Byte)(_curByte | hi); \ + value -= hi << numBits; \ + _bitPos = 8; \ + _curByte = 0; \ } -void CEncoder::WriteBits(UInt32 value, unsigned numBits) { m_OutStream.WriteBits(value, numBits); } -void CEncoder::WriteByte(Byte b) { WriteBits(b, 8); } -// void CEncoder::WriteBit(Byte v) { WriteBits(v, 1); } -void CEncoder::WriteCrc(UInt32 v) -{ - for (unsigned i = 0; i < 4; i++) - WriteByte(((Byte)(v >> (24 - i * 8)))); +#if HUFFMAN_LEN > 16 + +#define WRITE_BITS_HUFF(value2, numBits2) \ +{ \ + UInt32 value = value2; \ + unsigned numBits = numBits2; \ + while (numBits >= _bitPos) { \ + WRITE_BITS_UPDATE(value, numBits) \ + } \ + _bitPos -= numBits; \ + _curByte |= (value << _bitPos); \ +} + +#else // HUFFMAN_LEN <= 16 + +// numBits2 <= 16 is supported +#define WRITE_BITS_HUFF(value2, numBits2) \ +{ \ + UInt32 value = value2; \ + unsigned numBits = numBits2; \ + if (numBits >= _bitPos) \ + { \ + WRITE_BITS_UPDATE(value, numBits) \ + if (numBits >= _bitPos) \ + { \ + numBits -= _bitPos; \ + const UInt32 hi = value >> numBits; \ + *_buf++ = (Byte)hi; \ + value -= hi << numBits; \ + } \ + } \ + _bitPos -= numBits; \ + _curByte |= (value << _bitPos); \ +} + +#endif + +#define WRITE_BITS_8(value2, numBits2) \ +{ \ + UInt32 value = value2; \ + unsigned numBits = numBits2; \ + if (numBits >= _bitPos) \ + { \ + WRITE_BITS_UPDATE(value, numBits) \ + } \ + _bitPos -= numBits; \ + _curByte |= (value << _bitPos); \ +} + +#define WRITE_BIT_PRE \ + { _bitPos--; } + +#define WRITE_BIT_POST \ +{ \ + if (_bitPos == 0) \ + { \ + *_buf++ = (Byte)_curByte; \ + _curByte = 0; \ + _bitPos = 8; \ + } \ +} + +#define WRITE_BIT_0 \ +{ \ + WRITE_BIT_PRE \ + WRITE_BIT_POST \ +} + +#define WRITE_BIT_1 \ +{ \ + WRITE_BIT_PRE \ + _curByte |= 1u << _bitPos; \ + WRITE_BIT_POST \ } // blockSize > 0 void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) { - WriteBit2(0); // Randomised = false - + // WriteBit2(0); // Randomised = false { - UInt32 origPtr = BlockSort(m_BlockSorterIndex, block, blockSize); + const UInt32 origPtr = BlockSort(m_BlockSorterIndex, block, blockSize); // if (m_BlockSorterIndex[origPtr] != 0) throw 1; m_BlockSorterIndex[origPtr] = blockSize; - WriteBits2(origPtr, kNumOrigBits); + WriteBits2(origPtr, kNumOrigBits + 1); // + 1 for additional high bit flag (Randomised = false) } - - CMtf8Encoder mtf; - unsigned numInUse = 0; + Byte mtfBuf[256]; + // memset(mtfBuf, 0, sizeof(mtfBuf)); // to disable MSVC warning + unsigned numInUse; { Byte inUse[256]; Byte inUse16[16]; - UInt32 i; + unsigned i; for (i = 0; i < 256; i++) inUse[i] = 0; for (i = 0; i < 16; i++) inUse16[i] = 0; - for (i = 0; i < blockSize; i++) - inUse[block[i]] = 1; + { + const Byte * cur = block; + block = block + (size_t)blockSize - 1; + if (cur != block) + { + do + { + const unsigned b0 = cur[0]; + const unsigned b1 = cur[1]; + cur += 2; + inUse[b0] = 1; + inUse[b1] = 1; + } + while (cur < block); + } + if (cur == block) + inUse[cur[0]] = 1; + block -= blockSize; // block pointer is (original_block - 1) + } + numInUse = 0; for (i = 0; i < 256; i++) if (inUse[i]) { inUse16[i >> 4] = 1; - mtf.Buf[numInUse++] = (Byte)i; + mtfBuf[numInUse++] = (Byte)i; } for (i = 0; i < 16; i++) WriteBit2(inUse16[i]); @@ -311,65 +482,88 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) if (inUse16[i >> 4]) WriteBit2(inUse[i]); } - unsigned alphaSize = numInUse + 2; + const unsigned alphaSize = numInUse + 2; - Byte *mtfs = m_MtfArray; - UInt32 mtfArraySize = 0; UInt32 symbolCounts[kMaxAlphaSize]; { for (unsigned i = 0; i < kMaxAlphaSize; i++) symbolCounts[i] = 0; + symbolCounts[(size_t)alphaSize - 1] = 1; } + Byte *mtfs = m_MtfArray; { - UInt32 rleSize = 0; - UInt32 i = 0; const UInt32 *bsIndex = m_BlockSorterIndex; - block--; + const UInt32 *bsIndex_rle = bsIndex; + const UInt32 * const bsIndex_end = bsIndex + blockSize; + // block--; // backward fix + // block pointer is (original_block - 1) do { - unsigned pos = mtf.FindAndMove(block[bsIndex[i]]); - if (pos == 0) - rleSize++; - else + const Byte v = block[*bsIndex++]; + Byte a = mtfBuf[0]; + if (v != a) { - while (rleSize != 0) + mtfBuf[0] = v; { - rleSize--; - mtfs[mtfArraySize++] = (Byte)(rleSize & 1); - symbolCounts[rleSize & 1]++; - rleSize >>= 1; - } - if (pos >= 0xFE) - { - mtfs[mtfArraySize++] = 0xFF; - mtfs[mtfArraySize++] = (Byte)(pos - 0xFE); + UInt32 rleSize = (UInt32)(size_t)(bsIndex - bsIndex_rle) - 1; + bsIndex_rle = bsIndex; + while (rleSize) + { + const unsigned sym = (unsigned)(--rleSize & 1); + *mtfs++ = (Byte)sym; + symbolCounts[sym]++; + rleSize >>= 1; + } } + unsigned pos1 = 2; // = real_pos + 1 + Byte b; + b = mtfBuf[1]; mtfBuf[1] = a; if (v != b) + { a = mtfBuf[2]; mtfBuf[2] = b; if (v == a) pos1 = 3; + else { b = mtfBuf[3]; mtfBuf[3] = a; if (v == b) pos1 = 4; else - mtfs[mtfArraySize++] = (Byte)(pos + 1); - symbolCounts[(size_t)pos + 1]++; + { + Byte *m = mtfBuf + 7; + for (;;) + { + a = m[-3]; m[-3] = b; if (v == a) { pos1 = (unsigned)(size_t)(m - (mtfBuf + 2)); break; } + b = m[-2]; m[-2] = a; if (v == b) { pos1 = (unsigned)(size_t)(m - (mtfBuf + 1)); break; } + a = m[-1]; m[-1] = b; if (v == a) { pos1 = (unsigned)(size_t)(m - (mtfBuf )); break; } + b = m[ 0]; m[ 0] = a; m += 4; if (v == b) { pos1 = (unsigned)(size_t)(m - (mtfBuf + 3)); break; } + } + }}} + symbolCounts[pos1]++; + if (pos1 >= 0xff) + { + *mtfs++ = 0xff; + // pos1 -= 0xff; + pos1++; // we need only low byte + } + *mtfs++ = (Byte)pos1; } } - while (++i < blockSize); + while (bsIndex < bsIndex_end); - while (rleSize != 0) + UInt32 rleSize = (UInt32)(size_t)(bsIndex - bsIndex_rle); + while (rleSize) { - rleSize--; - mtfs[mtfArraySize++] = (Byte)(rleSize & 1); - symbolCounts[rleSize & 1]++; + const unsigned sym = (unsigned)(--rleSize & 1); + *mtfs++ = (Byte)sym; + symbolCounts[sym]++; rleSize >>= 1; } - - if (alphaSize < 256) - mtfs[mtfArraySize++] = (Byte)(alphaSize - 1); - else + + unsigned d = alphaSize - 1; + if (alphaSize >= 256) { - mtfs[mtfArraySize++] = 0xFF; - mtfs[mtfArraySize++] = (Byte)(alphaSize - 256); + *mtfs++ = 0xff; + d = alphaSize; // (-256) } - symbolCounts[(size_t)alphaSize - 1]++; + *mtfs++ = (Byte)d; } + const Byte * const mtf_lim = mtfs; + UInt32 numSymbols = 0; { for (unsigned i = 0; i < kMaxAlphaSize; i++) @@ -378,34 +572,30 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) unsigned bestNumTables = kNumTablesMin; UInt32 bestPrice = 0xFFFFFFFF; - UInt32 startPos = m_OutStreamCurrent->GetPos(); - Byte startCurByte = m_OutStreamCurrent->GetCurByte(); + const UInt32 startPos = m_OutStreamCurrent.GetPos(); + const unsigned startCurByte = m_OutStreamCurrent.GetCurByte(); for (unsigned nt = kNumTablesMin; nt <= kNumTablesMax + 1; nt++) { unsigned numTables; if (m_OptimizeNumTables) { - m_OutStreamCurrent->SetPos(startPos); - m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte); - if (nt <= kNumTablesMax) - numTables = nt; - else - numTables = bestNumTables; + m_OutStreamCurrent.SetPos(startPos); + m_OutStreamCurrent.SetCurState(startPos & 7, startCurByte); + numTables = (nt <= kNumTablesMax ? nt : bestNumTables); } else { - if (numSymbols < 200) numTables = 2; - else if (numSymbols < 600) numTables = 3; + if (numSymbols < 200) numTables = 2; + else if (numSymbols < 600) numTables = 3; else if (numSymbols < 1200) numTables = 4; else if (numSymbols < 2400) numTables = 5; - else numTables = 6; + else numTables = 6; } WriteBits2(numTables, kNumTablesBits); - - UInt32 numSelectors = (numSymbols + kGroupSize - 1) / kGroupSize; - WriteBits2(numSelectors, kNumSelectorsBits); + const unsigned numSelectors = (numSymbols + kGroupSize - 1) / kGroupSize; + WriteBits2((UInt32)numSelectors, kNumSelectorsBits); { UInt32 remFreq = numSymbols; @@ -436,28 +626,23 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) for (unsigned pass = 0; pass < kNumHuffPasses; pass++) { + memset(Freqs, 0, sizeof(Freqs[0]) * numTables); + // memset(Freqs, 0, sizeof(Freqs)); { - unsigned t = 0; - do - memset(Freqs[t], 0, sizeof(Freqs[t])); - while (++t < numTables); - } - - { - UInt32 mtfPos = 0; + mtfs = m_MtfArray; UInt32 g = 0; do { - UInt32 symbols[kGroupSize]; + unsigned symbols[kGroupSize]; unsigned i = 0; do { - UInt32 symbol = mtfs[mtfPos++]; + UInt32 symbol = *mtfs++; if (symbol >= 0xFF) - symbol += mtfs[mtfPos++]; + symbol += *mtfs++; symbols[i] = symbol; } - while (++i < kGroupSize && mtfPos < mtfArraySize); + while (++i < kGroupSize && mtfs < mtf_lim); UInt32 bestPrice2 = 0xFFFFFFFF; unsigned t = 0; @@ -482,7 +667,7 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) freqs[symbols[j]]++; while (++j < i); } - while (mtfPos < mtfArraySize); + while (mtfs < mtf_lim); } unsigned t = 0; @@ -494,11 +679,15 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) if (freqs[i] == 0) freqs[i] = 1; while (++i < alphaSize); - Huffman_Generate(freqs, Codes[t], Lens[t], kMaxAlphaSize, kMaxHuffmanLenForEncoding); + Huffman_Generate(freqs, Codes[t], Lens[t], kMaxAlphaSize, HUFFMAN_LEN); } while (++t < numTables); } + unsigned _bitPos; // 0 < _bitPos <= 8 : number of non-filled low bits in _curByte + unsigned _curByte; // low (_bitPos) bits are zeros + // high (8 - _bitPos) bits are filled + Byte *_buf; { Byte mtfSel[kNumTablesMax]; { @@ -507,81 +696,97 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) mtfSel[t] = (Byte)t; while (++t < numTables); } + + _bitPos = m_OutStreamCurrent._bitPos; + _curByte = m_OutStreamCurrent._curByte; + _buf = m_OutStreamCurrent._buf; + // stream.Init_from_Global(m_OutStreamCurrent); - UInt32 i = 0; + const Byte *selectors = m_Selectors; + const Byte * const selectors_lim = selectors + numSelectors; + Byte prev = 0; // mtfSel[0]; do { - Byte sel = m_Selectors[i]; - unsigned pos; - for (pos = 0; mtfSel[pos] != sel; pos++) - WriteBit2(1); - WriteBit2(0); - for (; pos > 0; pos--) - mtfSel[pos] = mtfSel[(size_t)pos - 1]; - mtfSel[0] = sel; + const Byte sel = *selectors++; + if (prev != sel) + { + Byte *mtfSel_cur = &mtfSel[1]; + for (;;) + { + WRITE_BIT_1 + const Byte next = *mtfSel_cur; + *mtfSel_cur++ = prev; + prev = next; + if (next == sel) + break; + } + // mtfSel[0] = sel; + } + WRITE_BIT_0 } - while (++i < numSelectors); + while (selectors != selectors_lim); } - { unsigned t = 0; do { const Byte *lens = Lens[t]; - UInt32 len = lens[0]; - WriteBits2(len, kNumLevelsBits); + unsigned len = lens[0]; + WRITE_BITS_8(len, kNumLevelsBits) unsigned i = 0; do { - UInt32 level = lens[i]; + const unsigned level = lens[i]; while (len != level) { - WriteBit2(1); + WRITE_BIT_1 if (len < level) { - WriteBit2(0); len++; + WRITE_BIT_0 } else { - WriteBit2(1); len--; + WRITE_BIT_1 } } - WriteBit2(0); + WRITE_BIT_0 } while (++i < alphaSize); } while (++t < numTables); } - { - UInt32 groupSize = 0; - UInt32 groupIndex = 0; + UInt32 groupSize = 1; + const Byte *selectors = m_Selectors; const Byte *lens = NULL; const UInt32 *codes = NULL; - UInt32 mtfPos = 0; + mtfs = m_MtfArray; do { - UInt32 symbol = mtfs[mtfPos++]; + unsigned symbol = *mtfs++; if (symbol >= 0xFF) - symbol += mtfs[mtfPos++]; - if (groupSize == 0) + symbol += *mtfs++; + if (--groupSize == 0) { groupSize = kGroupSize; - unsigned t = m_Selectors[groupIndex++]; + const unsigned t = *selectors++; lens = Lens[t]; codes = Codes[t]; } - groupSize--; - m_OutStreamCurrent->WriteBits(codes[symbol], lens[symbol]); + WRITE_BITS_HUFF(codes[symbol], lens[symbol]) } - while (mtfPos < mtfArraySize); + while (mtfs < mtf_lim); } + // Restore_from_Local: + m_OutStreamCurrent._bitPos = _bitPos; + m_OutStreamCurrent._curByte = _curByte; + m_OutStreamCurrent._buf = _buf; if (!m_OptimizeNumTables) break; - UInt32 price = m_OutStreamCurrent->GetPos() - startPos; + const UInt32 price = m_OutStreamCurrent.GetPos() - startPos; if (price <= bestPrice) { if (nt == kNumTablesMax) @@ -592,6 +797,7 @@ void CThreadInfo::EncodeBlock(const Byte *block, UInt32 blockSize) } } + // blockSize > 0 UInt32 CThreadInfo::EncodeBlockWithHeaders(const Byte *block, UInt32 blockSize) { @@ -603,148 +809,134 @@ UInt32 CThreadInfo::EncodeBlockWithHeaders(const Byte *block, UInt32 blockSize) WriteByte2(kBlockSig5); CBZip2Crc crc; - unsigned numReps = 0; - Byte prevByte = block[0]; - UInt32 i = 0; - do + const Byte * const lim = block + blockSize; + unsigned b = *block++; + crc.UpdateByte(b); + for (;;) { - Byte b = block[i]; - if (numReps == kRleModeRepSize) - { - for (; b > 0; b--) - crc.UpdateByte(prevByte); - numReps = 0; - continue; - } - if (prevByte == b) - numReps++; - else - { - numReps = 1; - prevByte = b; - } - crc.UpdateByte(b); + const unsigned prev = b; + if (block >= lim) { break; } b = *block++; crc.UpdateByte(b); if (prev != b) continue; + if (block >= lim) { break; } b = *block++; crc.UpdateByte(b); if (prev != b) continue; + if (block >= lim) { break; } b = *block++; crc.UpdateByte(b); if (prev != b) continue; + if (block >= lim) { break; } b = *block++; if (b) do crc.UpdateByte(prev); while (--b); + if (block >= lim) { break; } b = *block++; crc.UpdateByte(b); } - while (++i < blockSize); - UInt32 crcRes = crc.GetDigest(); - WriteCrc2(crcRes); - EncodeBlock(block, blockSize); + const UInt32 crcRes = crc.GetDigest(); + for (int i = 24; i >= 0; i -= 8) + WriteByte2((Byte)(crcRes >> i)); + EncodeBlock(lim - blockSize, blockSize); return crcRes; } + void CThreadInfo::EncodeBlock2(const Byte *block, UInt32 blockSize, UInt32 numPasses) { - UInt32 numCrcs = m_NumCrcs; - bool needCompare = false; + const UInt32 numCrcs = m_NumCrcs; - UInt32 startBytePos = m_OutStreamCurrent->GetBytePos(); - UInt32 startPos = m_OutStreamCurrent->GetPos(); - Byte startCurByte = m_OutStreamCurrent->GetCurByte(); - Byte endCurByte = 0; - UInt32 endPos = 0; + const UInt32 startBytePos = m_OutStreamCurrent.GetBytePos(); + const UInt32 startPos = m_OutStreamCurrent.GetPos(); + const unsigned startCurByte = m_OutStreamCurrent.GetCurByte(); + unsigned endCurByte = 0; + UInt32 endPos = 0; // 0 means no no additional passes if (numPasses > 1 && blockSize >= (1 << 10)) { - UInt32 blockSize0 = blockSize / 2; // ???? + UInt32 bs0 = blockSize / 2; + for (; bs0 < blockSize && + (block[ bs0 ] == + block[(size_t)bs0 - 1] || + block[(size_t)bs0 - 1] == + block[(size_t)bs0 - 2]); + bs0++) + {} - for (; (block[blockSize0] == block[(size_t)blockSize0 - 1] - || block[(size_t)blockSize0 - 1] == block[(size_t)blockSize0 - 2]) - && blockSize0 < blockSize; - blockSize0++); - - if (blockSize0 < blockSize) + if (bs0 < blockSize) { - EncodeBlock2(block, blockSize0, numPasses - 1); - EncodeBlock2(block + blockSize0, blockSize - blockSize0, numPasses - 1); - endPos = m_OutStreamCurrent->GetPos(); - endCurByte = m_OutStreamCurrent->GetCurByte(); - if ((endPos & 7) > 0) + EncodeBlock2(block, bs0, numPasses - 1); + EncodeBlock2(block + bs0, blockSize - bs0, numPasses - 1); + endPos = m_OutStreamCurrent.GetPos(); + endCurByte = m_OutStreamCurrent.GetCurByte(); + // we prepare next byte as identical byte to starting byte for main encoding attempt: + if (endPos & 7) WriteBits2(0, 8 - (endPos & 7)); - m_OutStreamCurrent->SetCurState((startPos & 7), startCurByte); - needCompare = true; + m_OutStreamCurrent.SetCurState((startPos & 7), startCurByte); } } - UInt32 startBytePos2 = m_OutStreamCurrent->GetBytePos(); - UInt32 startPos2 = m_OutStreamCurrent->GetPos(); - UInt32 crcVal = EncodeBlockWithHeaders(block, blockSize); - UInt32 endPos2 = m_OutStreamCurrent->GetPos(); + const UInt32 startBytePos2 = m_OutStreamCurrent.GetBytePos(); + const UInt32 startPos2 = m_OutStreamCurrent.GetPos(); + const UInt32 crcVal = EncodeBlockWithHeaders(block, blockSize); - if (needCompare) + if (endPos) { - UInt32 size2 = endPos2 - startPos2; - if (size2 < endPos - startPos) + const UInt32 size2 = m_OutStreamCurrent.GetPos() - startPos2; + if (size2 >= endPos - startPos) { - UInt32 numBytes = m_OutStreamCurrent->GetBytePos() - startBytePos2; - Byte *buffer = m_OutStreamCurrent->GetStream(); - for (UInt32 i = 0; i < numBytes; i++) - buffer[startBytePos + i] = buffer[startBytePos2 + i]; - m_OutStreamCurrent->SetPos(startPos + endPos2 - startPos2); - m_NumCrcs = numCrcs; - m_CRCs[m_NumCrcs++] = crcVal; - } - else - { - m_OutStreamCurrent->SetPos(endPos); - m_OutStreamCurrent->SetCurState((endPos & 7), endCurByte); + m_OutStreamCurrent.SetPos(endPos); + m_OutStreamCurrent.SetCurState((endPos & 7), endCurByte); + return; } + const UInt32 numBytes = m_OutStreamCurrent.GetBytePos() - startBytePos2; + Byte * const buffer = m_OutStreamCurrent.GetStream(); + memmove(buffer + startBytePos, buffer + startBytePos2, numBytes); + m_OutStreamCurrent.SetPos(startPos + size2); + // we don't call m_OutStreamCurrent.SetCurState() here because + // m_OutStreamCurrent._curByte is correct already } - else - { - m_NumCrcs = numCrcs; - m_CRCs[m_NumCrcs++] = crcVal; - } + m_CRCs[numCrcs] = crcVal; + m_NumCrcs = numCrcs + 1; } + HRESULT CThreadInfo::EncodeBlock3(UInt32 blockSize) { - CMsbfEncoderTemp outStreamTemp; + CMsbfEncoderTemp &outStreamTemp = m_OutStreamCurrent; outStreamTemp.SetStream(m_TempArray); outStreamTemp.Init(); - m_OutStreamCurrent = &outStreamTemp; - m_NumCrcs = 0; EncodeBlock2(m_Block, blockSize, Encoder->_props.NumPasses); - #ifndef Z7_ST +#ifndef Z7_ST if (Encoder->MtMode) Encoder->ThreadsInfo[m_BlockIndex].CanWriteEvent.Lock(); - #endif +#endif + for (UInt32 i = 0; i < m_NumCrcs; i++) Encoder->CombinedCrc.Update(m_CRCs[i]); - Encoder->WriteBytes(m_TempArray, outStreamTemp.GetPos(), outStreamTemp.GetCurByte()); + Encoder->WriteBytes(m_TempArray, outStreamTemp.GetPos(), outStreamTemp.GetNonFlushedByteBits()); HRESULT res = S_OK; - #ifndef Z7_ST + +#ifndef Z7_ST if (Encoder->MtMode) { UInt32 blockIndex = m_BlockIndex + 1; if (blockIndex == Encoder->NumThreads) blockIndex = 0; - if (Encoder->Progress) { const UInt64 packSize = Encoder->m_OutStream.GetProcessedSize(); res = Encoder->Progress->SetRatioInfo(&m_UnpackSize, &packSize); } - Encoder->ThreadsInfo[blockIndex].CanWriteEvent.Set(); } - #endif +#endif return res; } -void CEncoder::WriteBytes(const Byte *data, UInt32 sizeInBits, Byte lastByte) +void CEncoder::WriteBytes(const Byte *data, UInt32 sizeInBits, unsigned lastByteBits) { - UInt32 bytesSize = (sizeInBits >> 3); - for (UInt32 i = 0; i < bytesSize; i++) - m_OutStream.WriteBits(data[i], 8); - WriteBits(lastByte, (sizeInBits & 7)); + m_OutStream.WriteBytes(data, sizeInBits >> 3); + sizeInBits &= 7; + if (sizeInBits) + m_OutStream.WriteBits(lastByteBits, sizeInBits); } HRESULT CEncoder::CodeReal(ISequentialInStream *inStream, ISequentialOutStream *outStream, const UInt64 * /* inSize */, const UInt64 * /* outSize */, ICompressProgressInfo *progress) { + ThreadNextGroup_Init(&ThreadNextGroup, _props.NumThreadGroups, 0); // startGroup + NumBlocks = 0; #ifndef Z7_ST Progress = progress; @@ -823,11 +1015,11 @@ HRESULT CEncoder::CodeReal(ISequentialInStream *inStream, ISequentialOutStream * { CThreadInfo &ti = #ifndef Z7_ST - ThreadsInfo[0]; + ThreadsInfo[0]; #else - ThreadsInfo; + ThreadsInfo; #endif - UInt32 blockSize = ReadRleBlock(ti.m_Block); + const UInt32 blockSize = ReadRleBlock(ti.m_Block); if (blockSize == 0) break; RINOK(ti.EncodeBlock3(blockSize)) @@ -845,8 +1037,11 @@ HRESULT CEncoder::CodeReal(ISequentialInStream *inStream, ISequentialOutStream * WriteByte(kFinSig3); WriteByte(kFinSig4); WriteByte(kFinSig5); - - WriteCrc(CombinedCrc.GetDigest()); + { + const UInt32 v = CombinedCrc.GetDigest(); + for (int i = 24; i >= 0; i -= 8) + WriteByte((Byte)(v >> i)); + } RINOK(Flush()) if (!m_InStream.WasFinished()) return E_FAIL; @@ -869,14 +1064,21 @@ Z7_COM7F_IMF(CEncoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIA for (UInt32 i = 0; i < numProps; i++) { const PROPVARIANT &prop = coderProps[i]; - PROPID propID = propIDs[i]; + const PROPID propID = propIDs[i]; if (propID == NCoderPropID::kAffinity) { - if (prop.vt == VT_UI8) - props.Affinity = prop.uhVal.QuadPart; - else + if (prop.vt != VT_UI8) return E_INVALIDARG; + props.Affinity = prop.uhVal.QuadPart; + continue; + } + + if (propID == NCoderPropID::kNumThreadGroups) + { + if (prop.vt != VT_UI4) + return E_INVALIDARG; + props.NumThreadGroups = (UInt32)prop.ulVal; continue; } @@ -884,7 +1086,7 @@ Z7_COM7F_IMF(CEncoder::SetCoderProperties(const PROPID *propIDs, const PROPVARIA continue; if (prop.vt != VT_UI4) return E_INVALIDARG; - UInt32 v = (UInt32)prop.ulVal; + const UInt32 v = (UInt32)prop.ulVal; switch (propID) { case NCoderPropID::kNumPasses: props.NumPasses = v; break; diff --git a/CPP/7zip/Compress/BZip2Encoder.h b/CPP/7zip/Compress/BZip2Encoder.h index 4a04fbd..bcb4025 100644 --- a/CPP/7zip/Compress/BZip2Encoder.h +++ b/CPP/7zip/Compress/BZip2Encoder.h @@ -3,7 +3,6 @@ #ifndef ZIP7_INC_COMPRESS_BZIP2_ENCODER_H #define ZIP7_INC_COMPRESS_BZIP2_ENCODER_H -#include "../../Common/Defs.h" #include "../../Common/MyCom.h" #ifndef Z7_ST @@ -23,80 +22,114 @@ namespace NCompress { namespace NBZip2 { -class CMsbfEncoderTemp +const unsigned kNumPassesMax = 10; + +struct CMsbfEncoderTemp { - UInt32 _pos; - unsigned _bitPos; - Byte _curByte; + unsigned _bitPos; // 0 < _bitPos <= 8 : number of non-filled low bits in _curByte + unsigned _curByte; // low (_bitPos) bits are zeros + // high (8 - _bitPos) bits are filled Byte *_buf; -public: - void SetStream(Byte *buf) { _buf = buf; } - Byte *GetStream() const { return _buf; } + Byte *_buf_base; + void SetStream(Byte *buf) { _buf_base = _buf = buf; } + Byte *GetStream() const { return _buf_base; } void Init() { - _pos = 0; _bitPos = 8; _curByte = 0; + _buf = _buf_base; } - void Flush() - { - if (_bitPos < 8) - WriteBits(0, _bitPos); - } - + // required condition: (value >> numBits) == 0 + // numBits == 0 is allowed void WriteBits(UInt32 value, unsigned numBits) { - while (numBits > 0) + do { - unsigned numNewBits = MyMin(numBits, _bitPos); - numBits -= numNewBits; - - _curByte = (Byte)(_curByte << numNewBits); - UInt32 newBits = value >> numBits; - _curByte |= Byte(newBits); - value -= (newBits << numBits); - - _bitPos -= numNewBits; - - if (_bitPos == 0) + unsigned bp = _bitPos; + unsigned curByte = _curByte; + if (numBits < bp) { - _buf[_pos++] = _curByte; - _bitPos = 8; + bp -= numBits; + _curByte = curByte | (value << bp); + _bitPos = bp; + return; } + numBits -= bp; + const UInt32 hi = value >> numBits; + value -= (hi << numBits); + Byte *buf = _buf; + _bitPos = 8; + _curByte = 0; + *buf++ = (Byte)(curByte | hi); + _buf = buf; + } + while (numBits); + } + + void WriteBit(unsigned value) + { + const unsigned bp = _bitPos - 1; + const unsigned curByte = _curByte | (value << bp); + _curByte = curByte; + _bitPos = bp; + if (bp == 0) + { + *_buf++ = (Byte)curByte; + _curByte = 0; + _bitPos = 8; } } - - UInt32 GetBytePos() const { return _pos ; } - UInt32 GetPos() const { return _pos * 8 + (8 - _bitPos); } - Byte GetCurByte() const { return _curByte; } + + void WriteByte(unsigned b) + { + const unsigned bp = _bitPos; + const unsigned a = _curByte | (b >> (8 - bp)); + _curByte = b << bp; + Byte *buf = _buf; + *buf++ = (Byte)a; + _buf = buf; + } + + UInt32 GetBytePos() const { return (UInt32)(size_t)(_buf - _buf_base); } + UInt32 GetPos() const { return GetBytePos() * 8 + 8 - _bitPos; } + unsigned GetCurByte() const { return _curByte; } + unsigned GetNonFlushedByteBits() const { return _curByte >> _bitPos; } void SetPos(UInt32 bitPos) { - _pos = bitPos >> 3; + _buf = _buf_base + (bitPos >> 3); _bitPos = 8 - ((unsigned)bitPos & 7); } - void SetCurState(unsigned bitPos, Byte curByte) + void SetCurState(unsigned bitPos, unsigned curByte) { _bitPos = 8 - bitPos; _curByte = curByte; } }; -class CEncoder; -const unsigned kNumPassesMax = 10; +class CEncoder; class CThreadInfo { +private: + CMsbfEncoderTemp m_OutStreamCurrent; public: + CEncoder *Encoder; Byte *m_Block; private: Byte *m_MtfArray; Byte *m_TempArray; UInt32 *m_BlockSorterIndex; - CMsbfEncoderTemp *m_OutStreamCurrent; +public: + bool m_OptimizeNumTables; + UInt32 m_NumCrcs; + UInt32 m_BlockIndex; + UInt64 m_UnpackSize; + + Byte *m_Block_Base; Byte Lens[kNumTablesMax][kMaxAlphaSize]; UInt32 Freqs[kNumTablesMax][kMaxAlphaSize]; @@ -105,20 +138,16 @@ private: Byte m_Selectors[kNumSelectorsMax]; UInt32 m_CRCs[1 << kNumPassesMax]; - UInt32 m_NumCrcs; void WriteBits2(UInt32 value, unsigned numBits); - void WriteByte2(Byte b); - void WriteBit2(Byte v); - void WriteCrc2(UInt32 v); + void WriteByte2(unsigned b) { WriteBits2(b, 8); } + void WriteBit2(unsigned v) { m_OutStreamCurrent.WriteBit(v); } void EncodeBlock(const Byte *block, UInt32 blockSize); UInt32 EncodeBlockWithHeaders(const Byte *block, UInt32 blockSize); void EncodeBlock2(const Byte *block, UInt32 blockSize, UInt32 numPasses); public: - bool m_OptimizeNumTables; - CEncoder *Encoder; - #ifndef Z7_ST +#ifndef Z7_ST NWindows::CThread Thread; NWindows::NSynchronization::CAutoResetEvent StreamWasFinishedEvent; @@ -127,17 +156,14 @@ public: // it's not member of this thread. We just need one event per thread NWindows::NSynchronization::CAutoResetEvent CanWriteEvent; -private: - UInt32 m_BlockIndex; - UInt64 m_UnpackSize; public: Byte MtPad[1 << 8]; // It's pad for Multi-Threading. Must be >= Cache_Line_Size. HRESULT Create(); void FinishStream(bool needLeave); THREAD_FUNC_RET_TYPE ThreadFunc(); - #endif +#endif - CThreadInfo(): m_Block(NULL), m_BlockSorterIndex(NULL) {} + CThreadInfo(): m_BlockSorterIndex(NULL), m_Block_Base(NULL) {} ~CThreadInfo() { Free(); } bool Alloc(); void Free(); @@ -145,16 +171,19 @@ public: HRESULT EncodeBlock3(UInt32 blockSize); }; + struct CEncProps { UInt32 BlockSizeMult; UInt32 NumPasses; + UInt32 NumThreadGroups; UInt64 Affinity; CEncProps() { BlockSizeMult = (UInt32)(Int32)-1; NumPasses = (UInt32)(Int32)-1; + NumThreadGroups = 0; Affinity = 0; } void Normalize(int level); @@ -206,6 +235,7 @@ public: bool CloseThreads; bool StreamWasFinished; NWindows::NSynchronization::CManualResetEvent CanStartWaitingEvent; + CThreadNextGroup ThreadNextGroup; HRESULT Result; ICompressProgressInfo *Progress; @@ -218,12 +248,8 @@ public: UInt64 GetInProcessedSize() const { return m_InStream.GetProcessedSize(); } UInt32 ReadRleBlock(Byte *buf); - void WriteBytes(const Byte *data, UInt32 sizeInBits, Byte lastByte); - - void WriteBits(UInt32 value, unsigned numBits); + void WriteBytes(const Byte *data, UInt32 sizeInBits, unsigned lastByteBits); void WriteByte(Byte b); - // void WriteBit(Byte v); - void WriteCrc(UInt32 v); #ifndef Z7_ST HRESULT Create(); diff --git a/CPP/7zip/Compress/BitlEncoder.h b/CPP/7zip/Compress/BitlEncoder.h index 67b1428..364f84d 100644 --- a/CPP/7zip/Compress/BitlEncoder.h +++ b/CPP/7zip/Compress/BitlEncoder.h @@ -33,6 +33,7 @@ public: _bitPos = 8; _curByte = 0; } + Z7_FORCE_INLINE void WriteBits(UInt32 value, unsigned numBits) { while (numBits > 0) diff --git a/CPP/7zip/Compress/BitmEncoder.h b/CPP/7zip/Compress/BitmEncoder.h index 978ee1c..f7448cd 100644 --- a/CPP/7zip/Compress/BitmEncoder.h +++ b/CPP/7zip/Compress/BitmEncoder.h @@ -8,8 +8,9 @@ template class CBitmEncoder { - unsigned _bitPos; - Byte _curByte; + unsigned _bitPos; // 0 < _bitPos <= 8 : number of non-filled low bits in _curByte + unsigned _curByte; // low (_bitPos) bits are zeros + // high (8 - _bitPos) bits are filled TOutByte _stream; public: bool Create(UInt32 bufferSize) { return _stream.Create(bufferSize); } @@ -24,25 +25,65 @@ public: HRESULT Flush() { if (_bitPos < 8) - WriteBits(0, _bitPos); - return _stream.Flush(); - } - void WriteBits(UInt32 value, unsigned numBits) - { - while (numBits > 0) { - if (numBits < _bitPos) - { - _curByte = (Byte)(_curByte | (value << (_bitPos -= numBits))); - return; - } - numBits -= _bitPos; - UInt32 newBits = (value >> numBits); - value -= (newBits << numBits); - _stream.WriteByte((Byte)(_curByte | newBits)); + _stream.WriteByte((Byte)_curByte); _bitPos = 8; _curByte = 0; } + return _stream.Flush(); + } + + // required condition: (value >> numBits) == 0 + // numBits == 0 is allowed + void WriteBits(UInt32 value, unsigned numBits) + { + do + { + unsigned bp = _bitPos; + unsigned curByte = _curByte; + if (numBits < bp) + { + bp -= numBits; + _curByte = curByte | (value << bp); + _bitPos = bp; + return; + } + numBits -= bp; + const UInt32 hi = (value >> numBits); + value -= (hi << numBits); + _stream.WriteByte((Byte)(curByte | hi)); + _bitPos = 8; + _curByte = 0; + } + while (numBits); + } + void WriteByte(unsigned b) + { + const unsigned bp = _bitPos; + const unsigned a = _curByte | (b >> (8 - bp)); + _curByte = b << bp; + _stream.WriteByte((Byte)a); + } + + void WriteBytes(const Byte *data, size_t num) + { + const unsigned bp = _bitPos; +#if 1 // 1 for optional speed-optimized code branch + if (bp == 8) + { + _stream.WriteBytes(data, num); + return; + } +#endif + unsigned c = _curByte; + const unsigned bp_rev = 8 - bp; + for (size_t i = 0; i < num; i++) + { + const unsigned b = data[i]; + _stream.WriteByte((Byte)(c | (b >> bp_rev))); + c = b << bp; + } + _curByte = c; } }; diff --git a/CPP/7zip/Compress/DeflateDecoder.cpp b/CPP/7zip/Compress/DeflateDecoder.cpp index 73895fe..7993176 100644 --- a/CPP/7zip/Compress/DeflateDecoder.cpp +++ b/CPP/7zip/Compress/DeflateDecoder.cpp @@ -117,15 +117,13 @@ bool CCoder::ReadTables(void) if (_numDistLevels > kDistTableSize32) return false; - Byte levelLevels[kLevelTableSize]; - for (unsigned i = 0; i < kLevelTableSize; i++) - { - const unsigned position = kCodeLengthAlphabetOrder[i]; - if (i < numLevelCodes) - levelLevels[position] = (Byte)ReadBits(kLevelFieldSize); - else - levelLevels[position] = 0; - } + const unsigned kLevelTableSize_aligned4 = kLevelTableSize + 1; + Byte levelLevels[kLevelTableSize_aligned4]; + memset (levelLevels, 0, sizeof(levelLevels)); + unsigned i = 0; + do + levelLevels[kCodeLengthAlphabetOrder[i++]] = (Byte)ReadBits(kLevelFieldSize); + while (i != numLevelCodes); if (m_InBitStream.ExtraBitsWereRead()) return false; diff --git a/CPP/7zip/Compress/DeflateEncoder.cpp b/CPP/7zip/Compress/DeflateEncoder.cpp index 87b4f83..afc5f12 100644 --- a/CPP/7zip/Compress/DeflateEncoder.cpp +++ b/CPP/7zip/Compress/DeflateEncoder.cpp @@ -19,12 +19,16 @@ #define NO_INLINE #endif +#define MAX_HUF_LEN_12 12 + namespace NCompress { namespace NDeflate { namespace NEncoder { +static const unsigned k_CodeValue_Len_Is_Literal_Flag = 1u << 15; + static const unsigned kNumDivPassesMax = 10; // [0, 16); ratio/speed/ram tradeoff; use big value for better compression ratio. -static const UInt32 kNumTables = (1 << kNumDivPassesMax); +static const unsigned kNumTables = 1u << kNumDivPassesMax; static const UInt32 kFixedHuffmanCodeBlockSizeMax = (1 << 8); // [0, (1 << 32)); ratio/speed tradeoff; use big value for better compression ratio. static const UInt32 kDivideCodeBlockSizeMin = (1 << 7); // [1, (1 << 32)); ratio/speed tradeoff; use small value for better compression ratio. @@ -77,7 +81,7 @@ public: static CFastPosInit g_FastPosInit; -inline UInt32 GetPosSlot(UInt32 pos) +inline unsigned GetPosSlot(UInt32 pos) { /* if (pos < 0x200) @@ -162,13 +166,13 @@ HRESULT CCoder::Create() // COM_TRY_BEGIN if (!m_Values) { - m_Values = (CCodeValue *)MyAlloc((kMaxUncompressedBlockSize) * sizeof(CCodeValue)); + m_Values = (CCodeValue *)MyAlloc(kMaxUncompressedBlockSize * sizeof(CCodeValue)); if (!m_Values) return E_OUTOFMEMORY; } if (!m_Tables) { - m_Tables = (CTables *)MyAlloc((kNumTables) * sizeof(CTables)); + m_Tables = (CTables *)MyAlloc(kNumTables * sizeof(CTables)); if (!m_Tables) return E_OUTOFMEMORY; } @@ -268,19 +272,21 @@ NO_INLINE void CCoder::GetMatches() UInt32 distanceTmp[kMatchMaxLen * 2 + 3]; - const UInt32 numPairs = (UInt32)((_btMode ? + const size_t numPairs = (size_t)((_btMode ? Bt3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp): Hc3Zip_MatchFinder_GetMatches(&_lzInWindow, distanceTmp)) - distanceTmp); - *m_MatchDistances = (UInt16)numPairs; + UInt16 *matchDistances = m_MatchDistances; + *matchDistances++ = (UInt16)numPairs; if (numPairs != 0) { - UInt32 i; + size_t i; for (i = 0; i < numPairs; i += 2) { - m_MatchDistances[(size_t)i + 1] = (UInt16)distanceTmp[i]; - m_MatchDistances[(size_t)i + 2] = (UInt16)distanceTmp[(size_t)i + 1]; + matchDistances[0] = (UInt16)distanceTmp[i]; + matchDistances[1] = (UInt16)distanceTmp[(size_t)i + 1]; + matchDistances += 2; } UInt32 len = distanceTmp[(size_t)numPairs - 2]; if (len == m_NumFastBytes && m_NumFastBytes != m_MatchMaxLen) @@ -291,11 +297,11 @@ NO_INLINE void CCoder::GetMatches() if (numAvail > m_MatchMaxLen) numAvail = m_MatchMaxLen; for (; len < numAvail && pby[len] == pby2[len]; len++); - m_MatchDistances[(size_t)i - 1] = (UInt16)len; + matchDistances[-2] = (UInt16)len; } } if (m_IsMultiPass) - m_Pos += numPairs + 1; + m_Pos += (UInt32)numPairs + 1; if (!m_SecondPass) m_AdditionalOffset++; } @@ -535,6 +541,7 @@ NO_INLINE void CCoder::WriteBits(UInt32 value, unsigned numBits) } #define WRITE_HF2(codes, lens, i) m_OutStream.WriteBits(codes[i], lens[i]) +#define WRITE_HF2_NO_INLINE(codes, lens, i) WriteBits(codes[i], lens[i]) #define WRITE_HF(i) WriteBits(codes[i], lens[i]) NO_INLINE void CCoder::LevelTableCode(const Byte *levels, unsigned numLevels, const Byte *lens, const UInt32 *codes) @@ -619,17 +626,22 @@ static NO_INLINE UInt32 Huffman_GetPrice(const UInt32 *freqs, const Byte *lens, return price; } -static NO_INLINE UInt32 Huffman_GetPrice_Spec(const UInt32 *freqs, const Byte *lens, UInt32 num, const Byte *extraBits, UInt32 extraBase) +static NO_INLINE UInt32 Huffman_GetPrice_Spec( + const UInt32 *freqs, const Byte *lens, UInt32 num, + const Byte *extraBits, UInt32 extraBase) { - return Huffman_GetPrice(freqs, lens, num) + + return + Huffman_GetPrice(freqs, lens, num) + Huffman_GetPrice(freqs + extraBase, extraBits, num - extraBase); } NO_INLINE UInt32 CCoder::GetLzBlockPrice() const { return - Huffman_GetPrice_Spec(mainFreqs, m_NewLevels.litLenLevels, kFixedMainTableSize, m_LenDirectBits, kSymbolMatch) + - Huffman_GetPrice_Spec(distFreqs, m_NewLevels.distLevels, kDistTableSize64, kDistDirectBits, 0); + Huffman_GetPrice_Spec(mainFreqs, m_NewLevels.litLenLevels, + kFixedMainTableSize, m_LenDirectBits, kSymbolMatch) + + Huffman_GetPrice_Spec(distFreqs, m_NewLevels.distLevels, + kDistTableSize64, kDistDirectBits, 0); } NO_INLINE void CCoder::TryBlock() @@ -658,7 +670,7 @@ NO_INLINE void CCoder::TryBlock() CCodeValue &codeValue = m_Values[m_ValueIndex++]; if (len >= kMatchMinLen) { - UInt32 newLen = len - kMatchMinLen; + const UInt32 newLen = len - kMatchMinLen; codeValue.Len = (UInt16)newLen; mainFreqs[kSymbolMatch + (size_t)g_LenSlots[newLen]]++; codeValue.Pos = (UInt16)pos; @@ -666,10 +678,10 @@ NO_INLINE void CCoder::TryBlock() } else { - Byte b = *(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - m_AdditionalOffset); + const unsigned b = *(Inline_MatchFinder_GetPointerToCurrentPos(&_lzInWindow) - m_AdditionalOffset); mainFreqs[b]++; - codeValue.SetAsLiteral(); - codeValue.Pos = b; + codeValue.Len = k_CodeValue_Len_Is_Literal_Flag; + codeValue.Pos = (UInt16)b; } m_AdditionalOffset -= len; BlockSizeRes += len; @@ -704,16 +716,24 @@ NO_INLINE void CCoder::SetPrices(const CLevels &levels) } } +#if MAX_HUF_LEN_12 > 12 +// Huffman_ReverseBits() now supports 12-bits values only. +#error Stop_Compiling_Bad_MAX_HUF_LEN_12 +#endif static NO_INLINE void Huffman_ReverseBits(UInt32 *codes, const Byte *lens, UInt32 num) { - for (UInt32 i = 0; i < num; i++) + const Byte * const lens_lim = lens + num; + do { - UInt32 x = codes[i]; - x = ((x & 0x5555) << 1) | ((x & 0xAAAA) >> 1); - x = ((x & 0x3333) << 2) | ((x & 0xCCCC) >> 2); - x = ((x & 0x0F0F) << 4) | ((x & 0xF0F0) >> 4); - codes[i] = (((x & 0x00FF) << 8) | ((x & 0xFF00) >> 8)) >> (16 - lens[i]); + // we should change constants, if lens[*] can be larger than 12. + UInt32 x = *codes; + x = ((x & (0x555 )) << 2) + (x & (0xAAA )); + x = ((x & (0x333 << 1)) << 4) | (x & (0xCCC << 1)); + x = ((x & (0xF0F << 3)) << 8) | (x & (0x0F0 << 3)); + // we can use (x) instead of (x & (0xFF << 7)), if we support garabage data after (*lens) bits. + *codes++ = (((x & (0xFF << 7)) << 16) | x) >> (*lens ^ 31); } + while (++lens != lens_lim); } NO_INLINE void CCoder::WriteBlock() @@ -721,24 +741,28 @@ NO_INLINE void CCoder::WriteBlock() Huffman_ReverseBits(mainCodes, m_NewLevels.litLenLevels, kFixedMainTableSize); Huffman_ReverseBits(distCodes, m_NewLevels.distLevels, kDistTableSize64); - for (UInt32 i = 0; i < m_ValueIndex; i++) + CCodeValue *values = m_Values; + const CCodeValue * const values_lim = values + m_ValueIndex; + + if (values != values_lim) + do { - const CCodeValue &codeValue = m_Values[i]; - if (codeValue.IsLiteral()) - WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, codeValue.Pos); + const UInt32 len = values->Len; + const UInt32 dist = values->Pos; + if (len == k_CodeValue_Len_Is_Literal_Flag) + WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, dist); else { - UInt32 len = codeValue.Len; - UInt32 lenSlot = g_LenSlots[len]; + const unsigned lenSlot = g_LenSlots[len]; WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, kSymbolMatch + lenSlot); m_OutStream.WriteBits(len - m_LenStart[lenSlot], m_LenDirectBits[lenSlot]); - UInt32 dist = codeValue.Pos; - UInt32 posSlot = GetPosSlot(dist); + const unsigned posSlot = GetPosSlot(dist); WRITE_HF2(distCodes, m_NewLevels.distLevels, posSlot); m_OutStream.WriteBits(dist - kDistStart[posSlot], kDistDirectBits[posSlot]); } } - WRITE_HF2(mainCodes, m_NewLevels.litLenLevels, kSymbolEndOfBlock); + while (++values != values_lim); + WRITE_HF2_NO_INLINE(mainCodes, m_NewLevels.litLenLevels, kSymbolEndOfBlock); } static UInt32 GetStorePrice(UInt32 blockSize, unsigned bitPosition) @@ -787,10 +811,10 @@ NO_INLINE UInt32 CCoder::TryDynBlock(unsigned tableIndex, UInt32 numPasses) { m_Pos = posTemp; TryBlock(); - unsigned numHuffBits = - (m_ValueIndex > 18000 ? 12 : - (m_ValueIndex > 7000 ? 11 : - (m_ValueIndex > 2000 ? 10 : 9))); + const unsigned numHuffBits = + m_ValueIndex > 18000 ? MAX_HUF_LEN_12 : + m_ValueIndex > 7000 ? 11 : + m_ValueIndex > 2000 ? 10 : 9; MakeTables(numHuffBits); SetPrices(m_NewLevels); } diff --git a/CPP/7zip/Compress/Lzma2Encoder.cpp b/CPP/7zip/Compress/Lzma2Encoder.cpp index 0dc7e23..ffe1152 100644 --- a/CPP/7zip/Compress/Lzma2Encoder.cpp +++ b/CPP/7zip/Compress/Lzma2Encoder.cpp @@ -52,7 +52,15 @@ HRESULT SetLzma2Prop(PROPID propID, const PROPVARIANT &prop, CLzma2EncProps &lzm case NCoderPropID::kNumThreads: if (prop.vt != VT_UI4) return E_INVALIDARG; - lzma2Props.numTotalThreads = (int)(prop.ulVal); + lzma2Props.numTotalThreads = (int)prop.ulVal; + break; + case NCoderPropID::kNumThreadGroups: + if (prop.vt != VT_UI4) + return E_INVALIDARG; + // 16-bit value supported by Windows + if (prop.ulVal >= (1u << 16)) + return E_INVALIDARG; + lzma2Props.numThreadGroups = (unsigned)prop.ulVal; break; default: RINOK(NLzma::SetLzmaProp(propID, prop, lzma2Props.lzmaProps)) diff --git a/CPP/7zip/Compress/LzmaEncoder.cpp b/CPP/7zip/Compress/LzmaEncoder.cpp index 08e3ba5..bca2eee 100644 --- a/CPP/7zip/Compress/LzmaEncoder.cpp +++ b/CPP/7zip/Compress/LzmaEncoder.cpp @@ -101,6 +101,24 @@ HRESULT SetLzmaProp(PROPID propID, const PROPVARIANT &prop, CLzmaEncProps &ep) return S_OK; } + if (propID == NCoderPropID::kAffinityInGroup) + { + if (prop.vt == VT_UI8) + ep.affinityInGroup = prop.uhVal.QuadPart; + else + return E_INVALIDARG; + return S_OK; + } + + if (propID == NCoderPropID::kThreadGroup) + { + if (prop.vt == VT_UI4) + ep.affinityGroup = (Int32)(UInt32)prop.ulVal; + else + return E_INVALIDARG; + return S_OK; + } + if (propID == NCoderPropID::kHashBits) { if (prop.vt == VT_UI4) diff --git a/CPP/7zip/Compress/Mtf8.h b/CPP/7zip/Compress/Mtf8.h index 1b44d00..5fce30e 100644 --- a/CPP/7zip/Compress/Mtf8.h +++ b/CPP/7zip/Compress/Mtf8.h @@ -13,6 +13,18 @@ struct CMtf8Encoder unsigned FindAndMove(Byte v) throw() { +#if 1 + Byte b = Buf[0]; + if (v == b) + return 0; + Buf[0] = v; + for (unsigned pos = 0;;) + { + Byte a; + a = Buf[++pos]; Buf[pos] = b; if (v == a) return pos; + b = Buf[++pos]; Buf[pos] = a; if (v == b) return pos; + } +#else size_t pos; for (pos = 0; Buf[pos] != v; pos++); const unsigned resPos = (unsigned)pos; @@ -31,6 +43,7 @@ struct CMtf8Encoder Buf[pos] = Buf[pos - 1]; Buf[0] = v; return resPos; +#endif } }; diff --git a/CPP/7zip/Compress/Rar5Decoder.cpp b/CPP/7zip/Compress/Rar5Decoder.cpp index 8be24e2..7279b5a 100644 --- a/CPP/7zip/Compress/Rar5Decoder.cpp +++ b/CPP/7zip/Compress/Rar5Decoder.cpp @@ -936,31 +936,30 @@ HRESULT CDecoder::ExecuteFilter(const CFilter &f) HRESULT CDecoder::WriteBuf() { DeleteUnusedFilters(); - const UInt64 lzSize = _lzSize + _winPos; for (unsigned i = 0; i < _numFilters;) { - const CFilter &f = _filters[i]; - const UInt64 blockStart = f.Start; const size_t lzAvail = (size_t)(lzSize - _lzWritten); if (lzAvail == 0) break; - + // (lzAvail != 0) + const CFilter &f = _filters[i]; + const UInt64 blockStart = f.Start; if (blockStart > _lzWritten) { const UInt64 rem = blockStart - _lzWritten; + // (rem != 0) size_t size = lzAvail; if (size > rem) size = (size_t)rem; - if (size != 0) // is it true always ? - { - RINOK(WriteData(_window + _winPos - lzAvail, size)) - _lzWritten += size; - } + // (size != 0) + RINOK(WriteData(_window + _winPos - lzAvail, size)) + _lzWritten += size; continue; } - + + // (blockStart <= _lzWritten) const UInt32 blockSize = f.Size; size_t offset = (size_t)(_lzWritten - blockStart); if (offset == 0) @@ -987,10 +986,8 @@ HRESULT CDecoder::WriteBuf() } DeleteUnusedFilters(); - if (_numFilters) return S_OK; - const size_t lzAvail = (size_t)(lzSize - _lzWritten); RINOK(WriteData(_window + _winPos - lzAvail, lzAvail)) _lzWritten += lzAvail; @@ -1367,6 +1364,12 @@ enum enum_exit_type Z7_HUFF_DECODE_CHECK(sym, huf, kNumHufBits, kNumTableBits, bitStream, { LZ_LOOP_BREAK_ERROR }) +/* + DecodeLZ2() will stop decoding if it reaches limit when (_winPos >= _limit) + at return: + (_winPos < _limit + kMaxMatchLen) + also it can write up to (COPY_CHUNK_SIZE - 1) additional junk bytes after (_winPos). +*/ HRESULT CDecoder::DecodeLZ2(const CBitDecoder &bitStream) throw() { #if 0 @@ -1656,6 +1659,13 @@ decode_error: +/* +input conditions: + _winPos < _winSize +return: + _winPos < _winSize is expected, if (return_res == S_OK) + _winPos >= _winSize is possible in (return_res != S_OK) +*/ HRESULT CDecoder::DecodeLZ() { CBitDecoder _bitStream; @@ -1679,6 +1689,8 @@ HRESULT CDecoder::DecodeLZ() if (winPos >= limit) { _winPos = winPos < _winSize ? winPos : _winSize; + // _winPos == min(winPos, _winSize) + // we will not write data after _winSize RINOK(WriteBuf()) if (_unpackSize_Defined && _writtenFileSize > _unpackSize) break; // return S_FALSE; @@ -1854,7 +1866,15 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream { // if (_winPos > 100) _winPos -= 100; // for debug: corruption const UInt64 lzSize = _lzSize + _winPos; - if (!_isSolid || !_wasInit +/* + if previous file was decoded with error or for some another cases, then + (lzSize > _lzEnd) is possible + (_winPos > _winSize) is possible + (_winPos < _winSize + kMaxMatchLen) +*/ + if (!_window + || !_isSolid + || !_wasInit || (lzSize < _lzEnd #if Z7_RAR_RECOVER_SOLID_LIMIT != 0 && lzSize + Z7_RAR_RECOVER_SOLID_LIMIT < _lzEnd @@ -1863,9 +1883,9 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream { if (_isSolid) _lzError = LZ_ERROR_TYPE_HEADER; - _lzEnd = 0; _lzSize = 0; - _lzWritten = 0; + // _lzEnd = 0; // it will be set later + // _lzWritten = 0; // it will be set later _winPos = 0; for (unsigned i = 0; i < kNumReps; i++) _reps[i] = (size_t)0 - 1; @@ -1873,51 +1893,67 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream _tableWasFilled = false; _wasInit = true; } -#if Z7_RAR_RECOVER_SOLID_LIMIT != 0 - else if (lzSize < _lzEnd) + else { -#if 0 - return S_FALSE; -#else - // we can report that recovering was made: - // _lzError = LZ_ERROR_TYPE_HEADER; - // We write zeros to area after corruption: - if (_window) + const size_t ws = _winSize; + if (_winPos >= ws) { - UInt64 rem = _lzEnd - lzSize; - const size_t ws = _winSize; - if (rem >= ws) + // we must normalize (_winPos) and data in _window, + _winPos -= ws; + _lzSize += ws; + // (_winPos < kMaxMatchLen < _winSize) + // if (_window) + memcpy(_window, _window + ws, _winPos); // memmove is not required here + } + +#if Z7_RAR_RECOVER_SOLID_LIMIT != 0 + if (lzSize < _lzEnd) + { +#if 0 + return S_FALSE; +#else + // we can report that recovering was made: + // _lzError = LZ_ERROR_TYPE_HEADER; + // We write zeros to area after corruption: + // if (_window) { - My_ZeroMemory(_window, ws); - _lzSize = ws; - _winPos = 0; - } - else - { - const size_t cur = ws - _winPos; - if (cur <= rem) + UInt64 rem = _lzEnd - lzSize; + if (rem >= ws) { - rem -= cur; - My_ZeroMemory(_window + _winPos, cur); - _lzSize += _winPos; + My_ZeroMemory(_window, ws); + _lzSize = ws; _winPos = 0; } - My_ZeroMemory(_window + _winPos, (size_t)rem); - _winPos += (size_t)rem; + else + { + // rem < _winSize + // _winPos <= ws + const size_t cur = ws - _winPos; + if (cur <= rem) + { + rem -= cur; + My_ZeroMemory(_window + _winPos, cur); + _lzSize = ws; + _winPos = 0; + } + My_ZeroMemory(_window + _winPos, (size_t)rem); + _winPos += (size_t)rem; + } } - } - // else return S_FALSE; + // else return S_FALSE; #endif + } } #endif } + // _winPos < _winSize // we don't want _lzSize overflow if (_lzSize >= DICT_SIZE_MAX) _lzSize = DICT_SIZE_MAX; _lzEnd = _lzSize + _winPos; // _lzSize <= DICT_SIZE_MAX - // _lzEnd <= DICT_SIZE_MAX * 2 + // _lzEnd < DICT_SIZE_MAX + _winSize size_t newSize = _dictSize; if (newSize < kWinSize_Min) @@ -1941,10 +1977,11 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream // If dictionary was increased in solid, we don't want grow. return S_FALSE; // E_OUTOFMEMORY } - // (newSize <= _winSize) + // (newSize <= _dictSize_forCheck) } else { + // !_isSolid || !_window _dictSize_forCheck = newSize; { size_t newSize_small = newSize; @@ -1964,7 +2001,7 @@ Z7_COM7F_IMF(CDecoder::Code(ISequentialInStream *inStream, ISequentialOutStream if (!_window || allocSize > _winSize_Allocated) { Z7_RAR_FREE_WINDOW - _window = NULL; + _window = NULL; _winSize_Allocated = 0; Byte *win = (Byte *)::BigAlloc(allocSize); if (!win) diff --git a/CPP/7zip/Crypto/MyAes.cpp b/CPP/7zip/Crypto/MyAes.cpp index f84bca8..0ae0e16 100644 --- a/CPP/7zip/Crypto/MyAes.cpp +++ b/CPP/7zip/Crypto/MyAes.cpp @@ -153,7 +153,26 @@ Z7_COM7F_IMF2(UInt32, CAesCtrCoder::Filter(Byte *data, UInt32 size)) #ifndef Z7_EXTRACT_ONLY #ifdef MY_CPU_X86_OR_AMD64 - #define USE_HW_AES + + #if defined(__INTEL_COMPILER) + #if (__INTEL_COMPILER >= 1110) + #define USE_HW_AES + #if (__INTEL_COMPILER >= 1900) + #define USE_HW_VAES + #endif + #endif + #elif defined(Z7_CLANG_VERSION) && (Z7_CLANG_VERSION >= 30800) \ + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40400) + #define USE_HW_AES + #if defined(__clang__) && (__clang_major__ >= 8) \ + || defined(__GNUC__) && (__GNUC__ >= 8) + #define USE_HW_VAES + #endif + #elif defined(_MSC_VER) + #define USE_HW_AES + #define USE_HW_VAES + #endif + #elif defined(MY_CPU_ARM_OR_ARM64) && defined(MY_CPU_LE) #if defined(__ARM_FEATURE_AES) \ @@ -186,15 +205,15 @@ Z7_COM7F_IMF2(UInt32, CAesCtrCoder::Filter(Byte *data, UInt32 size)) #define SET_AES_FUNC_2(f2) \ if (algo == 2) if (g_Aes_SupportedFunctions_Flags & k_Aes_SupportedFunctions_HW) \ { f = f2; } - #ifdef MY_CPU_X86_OR_AMD64 + #ifdef USE_HW_VAES #define SET_AES_FUNC_23(f2, f3) \ SET_AES_FUNC_2(f2) \ if (algo == 3) if (g_Aes_SupportedFunctions_Flags & k_Aes_SupportedFunctions_HW_256) \ { f = f3; } - #else // MY_CPU_X86_OR_AMD64 + #else // USE_HW_VAES #define SET_AES_FUNC_23(f2, f3) \ SET_AES_FUNC_2(f2) - #endif // MY_CPU_X86_OR_AMD64 + #endif // USE_HW_VAES #else // USE_HW_AES #define SET_AES_FUNC_23(f2, f3) #endif // USE_HW_AES diff --git a/CPP/7zip/ICoder.h b/CPP/7zip/ICoder.h index aec2834..20d0ff7 100644 --- a/CPP/7zip/ICoder.h +++ b/CPP/7zip/ICoder.h @@ -136,6 +136,9 @@ namespace NCoderPropID kAffinity, // VT_UI8 kBranchOffset, // VT_UI4 kHashBits, // VT_UI4 + kNumThreadGroups, // VT_UI4 + kThreadGroup, // VT_UI4 + kAffinityInGroup, // VT_UI8 /* // kHash3Bits, // VT_UI4 // kHash2Bits, // VT_UI4 diff --git a/CPP/7zip/Sort.mak b/CPP/7zip/Sort.mak new file mode 100644 index 0000000..ca0ff59 --- /dev/null +++ b/CPP/7zip/Sort.mak @@ -0,0 +1,6 @@ +!IF defined(USE_NO_ASM) || defined(USE_C_SORT) || "$(PLATFORM)" == "ia64" || "$(PLATFORM)" == "mips" || "$(PLATFORM)" == "arm" || "$(PLATFORM)" == "arm64" +C_OBJS = $(C_OBJS) \ +!ELSE +ASM_OBJS = $(ASM_OBJS) \ +!ENDIF + $O\Sort.obj diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp index 556b25a..de9f43e 100644 --- a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp +++ b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp @@ -63,17 +63,46 @@ EXTERN_C_END #else -// #define MY_isatty_fileno(x) (isatty(fileno(x))) -// #define MY_IS_TERMINAL(x) (MY_isatty_fileno(x) != 0); -static inline bool MY_IS_TERMINAL(FILE *x) +static bool MY_IS_TERMINAL(FILE *x) { - return ( - #if defined(_MSC_VER) && (_MSC_VER >= 1400) - _isatty(_fileno(x)) - #else - isatty(fileno(x)) - #endif - != 0); +#ifdef _WIN32 + /* +crt/stdio.h: +typedef struct _iobuf FILE; +#define stdin (&_iob[0]) +#define stdout (&_iob[1]) +#define stderr (&_iob[2]) +*/ + // fprintf(stderr, "\nMY_IS_TERMINAL = %p", x); + const int fd = _fileno(x); + /* (fd) is 0, 1 or 2 in console program. + docs: If stdout or stderr is not associated with + an output stream (for example, in a Windows application + without a console window), the file descriptor returned is -2. + In previous versions, the file descriptor returned was -1. + */ + if (fd < 0) // is not associated with an output stream application (without a console window) + return false; + // fprintf(stderr, "\n\nstderr _fileno(%p) = %d", x, fd); + if (!_isatty(fd)) + return false; + // fprintf(stderr, "\nisatty_val = true"); + const HANDLE h = (HANDLE)_get_osfhandle(fd); + /* _get_osfhandle() returns intptr_t in new SDK, or long in MSVC6. + Also it can return (INVALID_HANDLE_VALUE). + docs: _get_osfhandle also returns the special value -2 when + the file descriptor is not associated with a stream + in old msvcrt.dll: it returns (-1) for incorrect value + */ + // fprintf(stderr, "\n_get_osfhandle() = %p", (void *)h); + if (h == NULL || h == INVALID_HANDLE_VALUE) + return false; + DWORD st; + // fprintf(stderr, "\nGetConsoleMode() = %u", (unsigned)GetConsoleMode(h, &st)); + return GetConsoleMode(h, &st) != 0; +#else + return isatty(fileno(x)) != 0; +#endif } #endif @@ -1088,7 +1117,7 @@ void CArcCmdLineParser::Parse1(const UStringVector &commandStrings, const UString &s = parser[NKey::kLargePages].PostStrings[0]; if (s.IsEmpty()) slp = 1; - else if (s != L"-") + else if (!s.IsEqualTo("-")) { if (!StringToUInt32(s, slp)) throw CArcCmdLineException("Unsupported switch postfix for -slp", s); @@ -1338,7 +1367,7 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) const UString &s = parser[NKey::kFullPathMode].PostStrings[0]; if (!s.IsEmpty()) { - if (s == L"2") + if (s.IsEqualTo("2")) censorPathMode = NWildcard::k_FullPath; else throw CArcCmdLineException("Unsupported -spf:", s); @@ -1400,6 +1429,7 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) const bool isExtractGroupCommand = options.Command.IsFromExtractGroup(); const bool isExtractOrList = isExtractGroupCommand || options.Command.CommandType == NCommandType::kList; const bool isRename = options.Command.CommandType == NCommandType::kRename; + options.UpdateOptions.RenameMode = isRename; if ((isExtractOrList || isRename) && options.StdInMode) thereIsArchiveName = false; @@ -1516,9 +1546,9 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) const UString &s = parser[NKey::kZoneFile].PostStrings[0]; if (!s.IsEmpty()) { - if (s == L"0") eo.ZoneMode = NExtract::NZoneIdMode::kNone; - else if (s == L"1") eo.ZoneMode = NExtract::NZoneIdMode::kAll; - else if (s == L"2") eo.ZoneMode = NExtract::NZoneIdMode::kOffice; + if (s.IsEqualTo("0")) eo.ZoneMode = NExtract::NZoneIdMode::kNone; + else if (s.IsEqualTo("1")) eo.ZoneMode = NExtract::NZoneIdMode::kAll; + else if (s.IsEqualTo("2")) eo.ZoneMode = NExtract::NZoneIdMode::kOffice; else throw CArcCmdLineException("Unsupported -snz:", s); } diff --git a/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp b/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp index 67ea29c..3abcd2d 100644 --- a/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp +++ b/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp @@ -6,12 +6,10 @@ #undef printf // #include -// #include "../../../../C/CpuTicks.h" #include "../../../../C/Alloc.h" #include "../../../../C/CpuArch.h" - #include "../../../Common/ComTry.h" #include "../../../Common/IntToString.h" #include "../../../Common/StringConvert.h" @@ -33,6 +31,8 @@ #include "../../Common/FilePathAutoRename.h" #include "../../Common/StreamUtils.h" +#include "../../Archive/Common/ItemNameUtils.h" + #include "../Common/ExtractingFilePath.h" #include "../Common/PropIDUtils.h" @@ -56,6 +56,19 @@ static const char * const kCantCreateHardLink = "Cannot create hard link"; static const char * const kCantCreateSymLink = "Cannot create symbolic link"; #endif +static const unsigned k_LinkDataSize_LIMIT = 1 << 12; + +#if WCHAR_PATH_SEPARATOR != L'/' + // we convert linux slashes to windows slashes for further processing. + // also we convert linux backslashes to BackslashReplacement character. + #define REPLACE_SLASHES_from_Linux_to_Sys(s) \ + { NArchive::NItemName::ReplaceToWinSlashes(s, true); } // useBackslashReplacement + // { s.Replace(L'/', WCHAR_PATH_SEPARATOR); } +#else + #define REPLACE_SLASHES_from_Linux_to_Sys(s) +#endif + + #ifndef Z7_SFX Z7_COM7F_IMF(COutStreamWithHash::Write(const void *data, UInt32 size, UInt32 *processedSize)) @@ -217,7 +230,7 @@ HRESULT CArchiveExtractCallback::PrepareHardLinks(const CRecordVector *r if (!_arc->Ask_INode) return S_OK; - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; CRecordVector &hardIDs = _hardLinks.IDs; { @@ -574,6 +587,13 @@ HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char return _extractCallback2->MessageError(s); } +HRESULT CArchiveExtractCallback::SendMessageError2_with_LastError( + const char *message, const FString &path1, const FString &path2) +{ + const HRESULT errorCode = GetLastError_noZero_HRESULT(); + return SendMessageError2(errorCode, message, path1, path2); +} + #ifndef Z7_SFX Z7_CLASS_IMP_COM_1( @@ -604,36 +624,20 @@ Z7_COM7F_IMF(CGetProp::GetProp(PROPID propID, PROPVARIANT *value)) #endif // Z7_SFX -#ifdef SUPPORT_LINKS - -static UString GetDirPrefixOf(const UString &src) -{ - UString s (src); - if (!s.IsEmpty()) - { - if (IsPathSepar(s.Back())) - s.DeleteBack(); - int pos = s.ReverseFind_PathSepar(); - s.DeleteFrom((unsigned)(pos + 1)); - } - return s; -} - -#endif // SUPPORT_LINKS - struct CLinkLevelsInfo { bool IsAbsolute; int LowLevel; int FinalLevel; - void Parse(const UString &path); + void Parse(const UString &path, bool isWSL); }; -void CLinkLevelsInfo::Parse(const UString &path) +void CLinkLevelsInfo::Parse(const UString &path, bool isWSL) { - IsAbsolute = NName::IsAbsolutePath(path); - + IsAbsolute = isWSL ? + IS_PATH_SEPAR(path[0]) : + NName::IsAbsolutePath(path); LowLevel = 0; FinalLevel = 0; @@ -650,9 +654,9 @@ void CLinkLevelsInfo::Parse(const UString &path) IsAbsolute = true; continue; } - if (s == L".") + if (s.IsEqualTo(".")) continue; - if (s == L"..") + if (s.IsEqualTo("..")) { level--; if (LowLevel > level) @@ -666,16 +670,20 @@ void CLinkLevelsInfo::Parse(const UString &path) } -bool IsSafePath(const UString &path); -bool IsSafePath(const UString &path) +static bool IsSafePath(const UString &path, bool isWSL) { CLinkLevelsInfo levelsInfo; - levelsInfo.Parse(path); + levelsInfo.Parse(path, isWSL); return !levelsInfo.IsAbsolute && levelsInfo.LowLevel >= 0 && levelsInfo.FinalLevel > 0; } +bool IsSafePath(const UString &path); +bool IsSafePath(const UString &path) +{ + return IsSafePath(path, false); // isWSL +} bool CensorNode_CheckPath2(const NWildcard::CCensorNode &node, const CReadArcItem &item, bool &include); bool CensorNode_CheckPath2(const NWildcard::CCensorNode &node, const CReadArcItem &item, bool &include) @@ -791,159 +799,113 @@ HRESULT CArchiveExtractCallback::MyCopyFile(ISequentialOutStream *outStream) HRESULT CArchiveExtractCallback::ReadLink() { - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; const UInt32 index = _index; - _link.Clear(); - + // _link.Clear(); // _link.Clear() was called already. { NCOM::CPropVariant prop; RINOK(archive->GetProperty(index, kpidHardLink, &prop)) if (prop.vt == VT_BSTR) { - _link.isHardLink = true; - // _link.isCopyLink = false; + _link.LinkType = k_LinkType_HardLink; _link.isRelative = false; // RAR5, TAR: hard links are from root folder of archive - _link.linkPath.SetFromBstr(prop.bstrVal); + _link.LinkPath.SetFromBstr(prop.bstrVal); + // 7-Zip 24-: tar handler returned original path (with linux slash in most case) + // 7-Zip 24-: rar5 handler returned path with system slash. + // 7-Zip 25+: tar/rar5 handlers return linux path in most cases. } else if (prop.vt != VT_EMPTY) return E_FAIL; } - /* { NCOM::CPropVariant prop; RINOK(archive->GetProperty(index, kpidCopyLink, &prop)); if (prop.vt == VT_BSTR) { - _link.isHardLink = false; - _link.isCopyLink = true; + _link.LinkType = k_LinkType_CopyLink; _link.isRelative = false; // RAR5: copy links are from root folder of archive - _link.linkPath.SetFromBstr(prop.bstrVal); + _link.LinkPath.SetFromBstr(prop.bstrVal); } else if (prop.vt != VT_EMPTY) return E_FAIL; } */ - { NCOM::CPropVariant prop; RINOK(archive->GetProperty(index, kpidSymLink, &prop)) if (prop.vt == VT_BSTR) { - _link.isHardLink = false; - // _link.isCopyLink = false; - _link.isRelative = true; // RAR5, TAR: symbolic links can be relative - _link.linkPath.SetFromBstr(prop.bstrVal); + _link.LinkType = k_LinkType_PureSymLink; + _link.isRelative = true; // RAR5, TAR: symbolic links are relative by default + _link.LinkPath.SetFromBstr(prop.bstrVal); + // 7-Zip 24-: (tar, cpio, xar, ext, iso) handlers returned returned original path (with linux slash in most case) + // 7-Zip 24-: rar5 handler returned path with system slash. + // 7-Zip 25+: all handlers return linux path in most cases. } else if (prop.vt != VT_EMPTY) return E_FAIL; } - NtReparse_Data = NULL; - NtReparse_Size = 0; - - if (_link.linkPath.IsEmpty() && _arc->GetRawProps) + // linux path separator in (_link.LinkPath) is expected for most cases, + // if new handler code is used, and if data in archive is correct. + // NtReparse_Data = NULL; + // NtReparse_Size = 0; + if (!_link.LinkPath.IsEmpty()) + { + REPLACE_SLASHES_from_Linux_to_Sys(_link.LinkPath) + } + else if (_arc->GetRawProps) { const void *data; - UInt32 dataSize; - UInt32 propType; - - _arc->GetRawProps->GetRawProp(_index, kpidNtReparse, &data, &dataSize, &propType); - - // if (dataSize == 1234567) // for debug: unpacking without reparse - if (dataSize != 0) + UInt32 dataSize, propType; + if (_arc->GetRawProps->GetRawProp(_index, kpidNtReparse, &data, &dataSize, &propType) == S_OK + // && dataSize == 1234567 // for debug: unpacking without reparse + && dataSize) { if (propType != NPropDataType::kRaw) return E_FAIL; - // 21.06: we need kpidNtReparse in linux for wim archives created in Windows - // #ifdef _WIN32 - - NtReparse_Data = data; - NtReparse_Size = dataSize; - - CReparseAttr reparse; - bool isOkReparse = reparse.Parse((const Byte *)data, dataSize); - if (isOkReparse) - { - _link.isHardLink = false; - // _link.isCopyLink = false; - _link.linkPath = reparse.GetPath(); - _link.isJunction = reparse.IsMountPoint(); - - if (reparse.IsSymLink_WSL()) - { - _link.isWSL = true; - _link.isRelative = reparse.IsRelative_WSL(); - } - else - _link.isRelative = reparse.IsRelative_Win(); - - // const AString s = GetAnsiString(_link.linkPath); - // printf("\n_link.linkPath: %s\n", s.Ptr()); - - #ifndef _WIN32 - _link.linkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); - #endif - } - // #endif + // NtReparse_Data = data; + // NtReparse_Size = dataSize; + // we ignore error code here, if there is failure of parsing: + _link.Parse_from_WindowsReparseData((const Byte *)data, dataSize); } } - if (_link.linkPath.IsEmpty()) + if (_link.LinkPath.IsEmpty()) return S_OK; - + // (_link.LinkPath) uses system path separator. + // windows: (_link.LinkPath) doesn't contain linux separator (slash). { - #ifdef _WIN32 - _link.linkPath.Replace(L'/', WCHAR_PATH_SEPARATOR); - #endif - - // rar5 uses "\??\" prefix for absolute links - if (_link.linkPath.IsPrefixedBy(WSTRING_PATH_SEPARATOR L"??" WSTRING_PATH_SEPARATOR)) + // _link.LinkPath = "\\??\\r:\\1\\2"; // for debug + // rar5+ returns kpidSymLink absolute link path with "\??\" prefix. + // we normalize such prefix: + if (_link.LinkPath.IsPrefixedBy(STRING_PATH_SEPARATOR "??" STRING_PATH_SEPARATOR)) { _link.isRelative = false; - _link.linkPath.DeleteFrontal(4); - } - - for (;;) - // while (NName::IsAbsolutePath(linkPath)) - { - unsigned n = NName::GetRootPrefixSize(_link.linkPath); - if (n == 0) - break; - _link.isRelative = false; - _link.linkPath.DeleteFrontal(n); - } - } - - if (_link.linkPath.IsEmpty()) - return S_OK; - - if (!_link.isRelative && _removePathParts.Size() != 0) - { - UStringVector pathParts; - SplitPathToParts(_link.linkPath, pathParts); - bool badPrefix = false; - FOR_VECTOR (i, _removePathParts) - { - if (CompareFileNames(_removePathParts[i], pathParts[i]) != 0) + // we normalize prefix from "\??\" to "\\?\": + _link.LinkPath.ReplaceOneCharAtPos(1, WCHAR_PATH_SEPARATOR); + _link.isWindowsPath = true; + if (_link.LinkPath.IsPrefixedBy_Ascii_NoCase( + STRING_PATH_SEPARATOR + STRING_PATH_SEPARATOR "?" + STRING_PATH_SEPARATOR "UNC" + STRING_PATH_SEPARATOR)) { - badPrefix = true; - break; + // we normalize prefix from "\\?\UNC\path" to "\\path": + _link.LinkPath.DeleteFrontal(6); + _link.LinkPath.ReplaceOneCharAtPos(0, WCHAR_PATH_SEPARATOR); + } + else + { + const unsigned k_prefix_Size = 4; + if (NName::IsDrivePath(_link.LinkPath.Ptr(k_prefix_Size))) + _link.LinkPath.DeleteFrontal(k_prefix_Size); } } - if (!badPrefix) - pathParts.DeleteFrontal(_removePathParts.Size()); - _link.linkPath = MakePathFromParts(pathParts); } - - /* - if (!_link.linkPath.IsEmpty()) - { - printf("\n_link %s to -> %s\n", GetOemString(_item.Path).Ptr(), GetOemString(_link.linkPath).Ptr()); - } - */ - + _link.Normalize_to_RelativeSafe(_removePathParts); return S_OK; } @@ -961,7 +923,7 @@ static HRESULT GetOwner(IInArchive *archive, if (prop.vt == VT_UI4) { res.Id_Defined = true; - res.Id = prop.ulVal; // for debug + res.Id = prop.ulVal; // res.Id++; // for debug // if (pidId == kpidGroupId) res.Id += 7; // for debug // res.Id = 0; // for debug @@ -993,7 +955,7 @@ static HRESULT GetOwner(IInArchive *archive, HRESULT CArchiveExtractCallback::Read_fi_Props() { - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; const UInt32 index = _index; _fi.Attrib_Defined = false; @@ -1134,7 +1096,7 @@ void CArchiveExtractCallback::CreateFolders() if (!_item.IsDir #ifdef SUPPORT_LINKS #ifndef WIN32 - || !_link.linkPath.IsEmpty() + || !_link.LinkPath.IsEmpty() #endif #endif ) @@ -1273,8 +1235,7 @@ HRESULT CArchiveExtractCallback::CheckExistFile(FString &fullProcessedPath, bool // MyMoveFile can rename folders. So it's OK to use it for folders too if (!MyMoveFile(fullProcessedPath, existPath)) { - HRESULT errorCode = GetLastError_noZero_HRESULT(); - RINOK(SendMessageError2(errorCode, kCantRenameFile, existPath, fullProcessedPath)) + RINOK(SendMessageError2_with_LastError(kCantRenameFile, existPath, fullProcessedPath)) return E_FAIL; } } @@ -1341,7 +1302,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrArchive; + IInArchive * const archive = _arc->Archive; #endif const UInt32 index = _index; @@ -1387,7 +1348,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtr 0 && _curSize < (1 << 12)) + if (_curSize_Defined && _curSize && _curSize < k_LinkDataSize_LIMIT) { if (_fi.IsLinuxSymLink()) { @@ -1513,7 +1469,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrInit(_outMemBuf, _outMemBuf.Size()); outStreamLoc = _bufPtrSeqOutStream; } - else // not reprase + else // not reparse { if (_ntOptions.PreAllocateOutFile && !_isSplit && _curSize_Defined && _curSize > (1 << 12)) { @@ -1568,7 +1524,7 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrSeek((Int64)_position, STREAM_SEEK_SET, NULL)) } outStreamLoc = outFileStream_Loc; - } // if not reprase + } // if not reparse _outFileStream = outFileStream_Loc; @@ -1620,8 +1576,7 @@ Z7_COM7F_IMF(CArchiveExtractCallback::GetStream(UInt32 index, ISequentialOutStre _fileLength_WasSet = false; _isRenamed = false; // _fi.Clear(); - _extractMode = false; - // _is_SymLink_in_Data = false; + _extractMode = false; _is_SymLink_in_Data_Linux = false; _needSetAttrib = false; _isSymLinkCreated = false; @@ -1661,7 +1616,7 @@ Z7_COM7F_IMF(CArchiveExtractCallback::GetStream(UInt32 index, ISequentialOutStre } - IInArchive *archive = _arc->Archive; + IInArchive * const archive = _arc->Archive; RINOK(GetItem(index)) @@ -1677,10 +1632,9 @@ Z7_COM7F_IMF(CArchiveExtractCallback::GetStream(UInt32 index, ISequentialOutStre } } - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS RINOK(ReadLink()) - #endif // SUPPORT_LINKS - +#endif RINOK(Archive_GetItemBoolProp(archive, index, kpidEncrypted, _encrypted)) @@ -2016,63 +1970,80 @@ HRESULT CArchiveExtractCallback::CloseFile() #ifdef SUPPORT_LINKS +/* +in: + link.LinkPath : must be relative (non-absolute) path in any case !!! + link.isRelative / target path that must stored as created link: + == false / _dirPathPrefix_Full + link.LinkPath + == true / link.LinkPath +*/ -HRESULT CArchiveExtractCallback::SetFromLinkPath( - const FString &fullProcessedPath, - const CLinkInfo &linkInfo, +HRESULT CArchiveExtractCallback::SetLink( + const FString &fullProcessedPath_from, + const CLinkInfo &link, bool &linkWasSet) { linkWasSet = false; - if (!_ntOptions.SymLinks.Val && !linkInfo.isHardLink) + if (link.LinkPath.IsEmpty()) + return S_OK; + if (!_ntOptions.SymLinks.Val && link.Is_AnySymLink()) return S_OK; - - UString relatPath; - - /* if (linkInfo.isRelative) - linkInfo.linkPath is final link path that must be stored to file link field - else - linkInfo.linkPath is path from root of archive. So we must add _dirPathPrefix_Full before linkPath. - */ - - if (linkInfo.isRelative) - relatPath = GetDirPrefixOf(_item.Path); - relatPath += linkInfo.linkPath; - - if (!IsSafePath(relatPath)) { - return SendMessageError2( - 0, // errorCode + UString path; + if (link.isRelative) + { + // _item.PathParts : parts that will be created in output folder. + // we want to get directory prefix of link item. + // so we remove file name (last non-empty part) from PathParts: + UStringVector v = _item.PathParts; + while (!v.IsEmpty()) + { + const unsigned len = v.Back().Len(); + v.DeleteBack(); + if (len) + break; + } + path = MakePathFromParts(v); + NName::NormalizeDirPathPrefix(path); + } + path += link.LinkPath; + /* + path is calculated virtual target path of link + path is relative to root folder of extracted items + if (!link.isRelative), then (path == link.LinkPath) + */ + if (!IsSafePath(path, link.Is_WSL())) + return SendMessageError2(0, // errorCode "Dangerous link path was ignored", - us2fs(_item.Path), - us2fs(linkInfo.linkPath)); // us2fs(relatPath) + us2fs(_item.Path), us2fs(link.LinkPath)); } - FString existPath; - if (linkInfo.isHardLink /* || linkInfo.IsCopyLink */ || !linkInfo.isRelative) + FString target; // target path that will be stored to link field + if (link.Is_HardLink() /* || link.IsCopyLink */ || !link.isRelative) { - if (!NName::GetFullPath(_dirPathPrefix_Full, us2fs(relatPath), existPath)) - { - RINOK(SendMessageError("Incorrect path", us2fs(relatPath))) - } + // isRelative == false + // all hard links and absolute symbolic links + // relatPath == link.LinkPath + // we get absolute link path for target: + if (!NName::GetFullPath(_dirPathPrefix_Full, us2fs(link.LinkPath), target)) + return SendMessageError("Incorrect link path", us2fs(link.LinkPath)); + // (target) is (_dirPathPrefix_Full + relatPath) } else { - existPath = us2fs(linkInfo.linkPath); - // printf("\nlinkPath = : %s\n", GetOemString(linkInfo.linkPath).Ptr()); + // link.isRelative == true + // relative symbolic links only + target = us2fs(link.LinkPath); } - - if (existPath.IsEmpty()) - return SendMessageError("Empty link", fullProcessedPath); + if (target.IsEmpty()) + return SendMessageError("Empty link", fullProcessedPath_from); - if (linkInfo.isHardLink /* || linkInfo.IsCopyLink */) + if (link.Is_HardLink() /* || link.IsCopyLink */) { - // if (linkInfo.isHardLink) + // if (link.isHardLink) { - if (!MyCreateHardLink(fullProcessedPath, existPath)) - { - const HRESULT errorCode = GetLastError_noZero_HRESULT(); - RINOK(SendMessageError2(errorCode, kCantCreateHardLink, fullProcessedPath, existPath)) - } + if (!MyCreateHardLink(fullProcessedPath_from, target)) + return SendMessageError2_with_LastError(kCantCreateHardLink, fullProcessedPath_from, target); /* RINOK(PrepareOperation(NArchive::NExtract::NAskMode::kExtract)) _op_WasReported = true; @@ -2085,19 +2056,19 @@ HRESULT CArchiveExtractCallback::SetFromLinkPath( // IsCopyLink { NFind::CFileInfo fi; - if (!fi.Find(existPath)) + if (!fi.Find(target)) { - RINOK(SendMessageError2("Cannot find the file for copying", existPath, fullProcessedPath)); + RINOK(SendMessageError2("Cannot find the file for copying", target, fullProcessedPath)); } else { if (_curSize_Defined && _curSize == fi.Size) - _copyFile_Path = existPath; + _copyFile_Path = target; else { - RINOK(SendMessageError2("File size collision for file copying", existPath, fullProcessedPath)); + RINOK(SendMessageError2("File size collision for file copying", target, fullProcessedPath)); } - // RINOK(MyCopyFile(existPath, fullProcessedPath)); + // RINOK(MyCopyFile(target, fullProcessedPath)); } } */ @@ -2111,127 +2082,249 @@ HRESULT CArchiveExtractCallback::SetFromLinkPath( // Windows before Vista doesn't support symbolic links. // we could convert such symbolic links to Junction Points // isJunction = true; - // convertToAbs = true; } */ - if (!_ntOptions.SymLinks_AllowDangerous.Val) +#ifdef _WIN32 + const bool isDir = (_item.IsDir || link.LinkType == k_LinkType_Junction); +#endif + + if (!_ntOptions.SymLinks_AllowDangerous.Val && link.isRelative) { - #ifdef _WIN32 - if (_item.IsDir) - #endif - if (linkInfo.isRelative) - { - CLinkLevelsInfo levelsInfo; - levelsInfo.Parse(linkInfo.linkPath); - if (levelsInfo.FinalLevel < 1 || levelsInfo.IsAbsolute) - { - return SendMessageError2( - 0, // errorCode - "Dangerous symbolic link path was ignored", - us2fs(_item.Path), - us2fs(linkInfo.linkPath)); - } - } + /* + We want to use additional check for links that can link to directory. + - linux: all symbolic links are files. + - windows: we can have file/directory symbolic link, + but file symbolic link works like directory link in windows. + So we use additional check for all relative links. + + We don't allow decreasing of final level of link. + So if some another extracted file will use this link, + then number of real path parts (after link redirection) cannot be + smaller than number of requested path parts from archive records. + + Now we check only (link.LinkPath) without (_item.PathParts). + */ + CLinkLevelsInfo levelsInfo; + levelsInfo.Parse(link.LinkPath, link.Is_WSL()); + if (levelsInfo.FinalLevel < 1 + // || levelsInfo.LowLevel < 0 // we allow negative temporary levels + || levelsInfo.IsAbsolute) + return SendMessageError2(0, // errorCode + "Dangerous symbolic link path was ignored", + us2fs(_item.Path), us2fs(link.LinkPath)); } - #ifdef _WIN32 - +#ifdef _WIN32 CByteBuffer data; - // printf("\nFillLinkData(): %s\n", GetOemString(existPath).Ptr()); - if (!FillLinkData(data, fs2us(existPath), !linkInfo.isJunction, linkInfo.isWSL)) + // printf("\nFillLinkData(): %s\n", GetOemString(target).Ptr()); + if (link.Is_WSL()) + { + Convert_WinPath_to_WslLinuxPath(target, !link.isRelative); + FillLinkData_WslLink(data, fs2us(target)); + } + else + FillLinkData_WinLink(data, fs2us(target), link.LinkType != k_LinkType_Junction); + if (data.Size() == 0) return SendMessageError("Cannot fill link data", us2fs(_item.Path)); - /* if (NtReparse_Size != data.Size() || memcmp(NtReparse_Data, data, data.Size()) != 0) - { - SendMessageError("reconstructed Reparse is different", fs2us(existPath)); - } + SendMessageError("reconstructed Reparse is different", fs2us(target)); */ - - CReparseAttr attr; - if (!attr.Parse(data, data.Size())) { - RINOK(SendMessageError("Internal error for symbolic link file", us2fs(_item.Path))) - return S_OK; + // we check that reparse data is correct, but we ignore attr.MinorError. + CReparseAttr attr; + if (!attr.Parse(data, data.Size())) + return SendMessageError("Internal error for symbolic link file", us2fs(_item.Path)); } - if (!NFile::NIO::SetReparseData(fullProcessedPath, _item.IsDir, data, (DWORD)data.Size())) + if (!NFile::NIO::SetReparseData(fullProcessedPath_from, isDir, data, (DWORD)data.Size())) +#else // ! _WIN32 + if (!NFile::NIO::SetSymLink(fullProcessedPath_from, target)) +#endif // ! _WIN32 { - RINOK(SendMessageError_with_LastError(kCantCreateSymLink, fullProcessedPath)) - return S_OK; + return SendMessageError_with_LastError(kCantCreateSymLink, fullProcessedPath_from); } linkWasSet = true; - return S_OK; - - - #else // ! _WIN32 - - if (!NFile::NIO::SetSymLink(fullProcessedPath, existPath)) - { - RINOK(SendMessageError_with_LastError(kCantCreateSymLink, fullProcessedPath)) - return S_OK; - } - linkWasSet = true; - - return S_OK; - - #endif // ! _WIN32 } -bool CLinkInfo::Parse(const Byte *data, size_t dataSize, bool isLinuxData) -{ - Clear(); - // this->isLinux = isLinuxData; - - if (isLinuxData) - { - isJunction = false; - isHardLink = false; - AString utf; - if (dataSize >= (1 << 12)) - return false; - utf.SetFrom_CalcLen((const char *)data, (unsigned)dataSize); - UString u; - if (!ConvertUTF8ToUnicode(utf, u)) - return false; - linkPath = u; - - // in linux symbolic data: we expect that linux separator '/' is used - // if windows link was created, then we also must use linux separator - if (u.IsEmpty()) - return false; - const wchar_t c = u[0]; - isRelative = !IS_PATH_SEPAR(c); - return true; - } +bool CLinkInfo::Parse_from_WindowsReparseData(const Byte *data, size_t dataSize) +{ CReparseAttr reparse; if (!reparse.Parse(data, dataSize)) return false; - isHardLink = false; - // isCopyLink = false; - linkPath = reparse.GetPath(); - isJunction = reparse.IsMountPoint(); - + // const AString s = GetAnsiString(LinkPath); + // printf("\nlinkPath: %s\n", s.Ptr()); + LinkPath = reparse.GetPath(); if (reparse.IsSymLink_WSL()) { - isWSL = true; - isRelative = reparse.IsRelative_WSL(); + LinkType = k_LinkType_WSL; + isRelative = reparse.IsRelative_WSL(); // detected from LinkPath[0] + // LinkPath is original raw name converted to UString from AString + // Linux separator '/' is expected here. + REPLACE_SLASHES_from_Linux_to_Sys(LinkPath) } else - isRelative = reparse.IsRelative_Win(); - - // FIXME !!! - #ifndef _WIN32 - linkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); - #endif - + { + LinkType = reparse.IsMountPoint() ? k_LinkType_Junction : k_LinkType_PureSymLink; + isRelative = reparse.IsRelative_Win(); // detected by (Flags == Z7_WIN_SYMLINK_FLAG_RELATIVE) + isWindowsPath = true; + // LinkPath is original windows link path from raparse data with \??\ prefix removed. + // windows '\\' separator is expected here. + // linux '/' separator is not expected here. + // we translate both types of separators to system separator. + LinkPath.Replace( +#if WCHAR_PATH_SEPARATOR == L'\\' + L'/' +#else + L'\\' +#endif + , WCHAR_PATH_SEPARATOR); + } + // (LinkPath) uses system path separator. + // windows: (LinkPath) doesn't contain linux separator (slash). + return true; +} + + +bool CLinkInfo::Parse_from_LinuxData(const Byte *data, size_t dataSize) +{ + // Clear(); // *this object was cleared by constructor already. + LinkType = k_LinkType_PureSymLink; + AString utf; + if (dataSize >= k_LinkDataSize_LIMIT) + return false; + utf.SetFrom_CalcLen((const char *)data, (unsigned)dataSize); + UString u; + if (!ConvertUTF8ToUnicode(utf, u)) + return false; + if (u.IsEmpty()) + return false; + const wchar_t c = u[0]; + isRelative = (c != L'/'); + // linux path separator is expected + REPLACE_SLASHES_from_Linux_to_Sys(u) + LinkPath = u; + // (LinkPath) uses system path separator. + // windows: (LinkPath) doesn't contain linux separator (slash). return true; } + +// in/out: (LinkPath) uses system path separator +// in/out: windows: (LinkPath) doesn't contain linux separator (slash). +// out: (LinkPath) is relative path, and LinkPath[0] is not path separator +// out: isRelative changed to false, if any prefix was removed. +// note: absolute windows links "c:\" to root will be reduced to empty string: +void CLinkInfo::Remove_AbsPathPrefixes() +{ + while (!LinkPath.IsEmpty()) + { + unsigned n = 0; + if (!Is_WSL()) + { + n = +#ifndef _WIN32 + isWindowsPath ? + NName::GetRootPrefixSize_WINDOWS(LinkPath) : +#endif + NName::GetRootPrefixSize(LinkPath); +/* + // "c:path" will be ignored later as "Dangerous absolute path" + // so check is not required + if (n == 0 +#ifndef _WIN32 + && isWindowsPath +#endif + && NName::IsDrivePath2(LinkPath)) + n = 2; +*/ + } + if (n == 0) + { + if (!IS_PATH_SEPAR(LinkPath[0])) + break; + n = 1; + } + isRelative = false; // (LinkPath) will be treated as relative to root folder of archive + LinkPath.DeleteFrontal(n); + } +} + + +/* + it removes redundant separators, if there are double separators, + but it keeps double separators at start of string //name/. + in/out: system path separator is used + windows: slash character (linux separator) is not treated as separator + windows: (path) doesn't contain linux separator (slash). +*/ +static void RemoveRedundantPathSeparators(UString &path) +{ + wchar_t *dest = path.GetBuf(); + const wchar_t * const start = dest; + const wchar_t *src = dest; + for (;;) + { + wchar_t c = *src++; + if (c == 0) + break; + // if (IS_PATH_SEPAR(c)) // for Windows: we can change (/) to (\). + if (c == WCHAR_PATH_SEPARATOR) + { + if (dest - start >= 2 && dest[-1] == WCHAR_PATH_SEPARATOR) + continue; + // c = WCHAR_PATH_SEPARATOR; // for Windows: we can change (/) to (\). + } + *dest++ = c; + } + *dest = 0; + path.ReleaseBuf_SetLen((unsigned)(dest - path.Ptr())); +} + + +// in/out: (LinkPath) uses system path separator +// in/out: windows: (LinkPath) doesn't contain linux separator (slash). +// out: (LinkPath) is relative path, and LinkPath[0] is not path separator +void CLinkInfo::Normalize_to_RelativeSafe(UStringVector &removePathParts) +{ + // We WILL NOT WRITE original absolute link path from archive to filesystem. + // So here we remove all root prefixes from (LinkPath). + // If we see any absolute root prefix, then we suppose that this prefix is virtual prefix + // that shows that link is relative to root folder of archive + RemoveRedundantPathSeparators(LinkPath); + // LinkPath = "\\\\?\\r:test\\test2"; // for debug + Remove_AbsPathPrefixes(); + // (LinkPath) now is relative: + // if (isRelative == false), then (LinkPath) is relative to root folder of archive + // if (isRelative == true ), then (LinkPath) is relative to current item + if (LinkPath.IsEmpty() || isRelative || removePathParts.Size() == 0) + return; + + // if LinkPath is prefixed by _removePathParts, we remove these paths + UStringVector pathParts; + SplitPathToParts(LinkPath, pathParts); + bool badPrefix = false; + { + FOR_VECTOR (i, removePathParts) + { + if (i >= pathParts.Size() + || CompareFileNames(removePathParts[i], pathParts[i]) != 0) + { + badPrefix = true; + break; + } + } + } + if (!badPrefix) + pathParts.DeleteFrontal(removePathParts.Size()); + LinkPath = MakePathFromParts(pathParts); + Remove_AbsPathPrefixes(); +} + #endif // SUPPORT_LINKS @@ -2239,12 +2332,12 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() { HRESULT res = S_OK; - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS size_t reparseSize = 0; bool repraseMode = false; bool needSetReparse = false; - CLinkInfo linkInfo; + CLinkInfo link; if (_bufPtrSeqOutStream) { @@ -2258,15 +2351,19 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() needSetReparse = reparse.Parse(_outMemBuf, reparseSize, errorCode); if (needSetReparse) { - UString linkPath = reparse.GetPath(); + UString LinkPath = reparse.GetPath(); #ifndef _WIN32 - linkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); + LinkPath.Replace(L'\\', WCHAR_PATH_SEPARATOR); #endif } */ - needSetReparse = linkInfo.Parse(_outMemBuf, reparseSize, _is_SymLink_in_Data_Linux); + needSetReparse = _is_SymLink_in_Data_Linux ? + link.Parse_from_LinuxData(_outMemBuf, reparseSize) : + link.Parse_from_WindowsReparseData(_outMemBuf, reparseSize); if (!needSetReparse) res = SendMessageError_with_LastError("Incorrect reparse stream", us2fs(_item.Path)); + // (link.LinkPath) uses system path separator. + // windows: (link.LinkPath) doesn't contain linux separator (slash). } else { @@ -2281,23 +2378,18 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() _bufPtrSeqOutStream.Release(); } - #endif // SUPPORT_LINKS - +#endif // SUPPORT_LINKS const HRESULT res2 = CloseFile(); - if (res == S_OK) res = res2; - RINOK(res) - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS if (repraseMode) { _curSize = reparseSize; _curSize_Defined = true; - - #ifdef SUPPORT_LINKS if (needSetReparse) { // in Linux : we must delete empty file before symbolic link creation @@ -2307,31 +2399,19 @@ HRESULT CArchiveExtractCallback::CloseReparseAndFile() RINOK(SendMessageError_with_LastError("can't delete file", _diskFilePath)) } { - /* - // for DEBUG ONLY: we can extract sym links as WSL links - // to eliminate (non-admin) errors for sym links. - #ifdef _WIN32 - if (!linkInfo.isHardLink && !linkInfo.isJunction) - linkInfo.isWSL = true; - #endif - */ bool linkWasSet = false; - RINOK(SetFromLinkPath(_diskFilePath, linkInfo, linkWasSet)) + // link.LinkPath = "r:\\1\\2"; // for debug + // link.isJunction = true; // for debug + link.Normalize_to_RelativeSafe(_removePathParts); + RINOK(SetLink(_diskFilePath, link, linkWasSet)) if (linkWasSet) - _isSymLinkCreated = linkInfo.IsSymLink(); + _isSymLinkCreated = true; // link.IsSymLink(); else _needSetAttrib = false; } - /* - if (!NFile::NIO::SetReparseData(_diskFilePath, _item.IsDir, )) - { - res = SendMessageError_with_LastError(kCantCreateSymLink, _diskFilePath); - } - */ } - #endif } - #endif +#endif // SUPPORT_LINKS return res; } diff --git a/CPP/7zip/UI/Common/ArchiveExtractCallback.h b/CPP/7zip/UI/Common/ArchiveExtractCallback.h index f3ee01c..71fa3ef 100644 --- a/CPP/7zip/UI/Common/ArchiveExtractCallback.h +++ b/CPP/7zip/UI/Common/ArchiveExtractCallback.h @@ -178,36 +178,50 @@ struct CDirPathTime: public CFiTimesCAM #ifdef SUPPORT_LINKS + +enum ELinkType +{ + k_LinkType_HardLink, + k_LinkType_PureSymLink, + k_LinkType_Junction, + k_LinkType_WSL + // , k_LinkType_CopyLink; +}; + + struct CLinkInfo { - // bool isCopyLink; - bool isHardLink; - bool isJunction; + ELinkType LinkType; bool isRelative; - bool isWSL; - UString linkPath; + // if (isRelative == false), then (LinkPath) is relative to root folder of archive + // if (isRelative == true ), then (LinkPath) is relative to current item + bool isWindowsPath; + UString LinkPath; - bool IsSymLink() const { return !isHardLink; } + bool Is_HardLink() const { return LinkType == k_LinkType_HardLink; } + bool Is_AnySymLink() const { return LinkType != k_LinkType_HardLink; } + + bool Is_WSL() const { return LinkType == k_LinkType_WSL; } CLinkInfo(): - // IsCopyLink(false), - isHardLink(false), - isJunction(false), + LinkType(k_LinkType_PureSymLink), isRelative(false), - isWSL(false) + isWindowsPath(false) {} void Clear() { - // IsCopyLink = false; - isHardLink = false; - isJunction = false; + LinkType = k_LinkType_PureSymLink; isRelative = false; - isWSL = false; - linkPath.Empty(); + isWindowsPath = false; + LinkPath.Empty(); } - bool Parse(const Byte *data, size_t dataSize, bool isLinuxData); + bool Parse_from_WindowsReparseData(const Byte *data, size_t dataSize); + bool Parse_from_LinuxData(const Byte *data, size_t dataSize); + void Normalize_to_RelativeSafe(UStringVector &removePathParts); +private: + void Remove_AbsPathPrefixes(); }; #endif // SUPPORT_LINKS @@ -287,8 +301,8 @@ private: bool _isRenamed; bool _extractMode; - // bool _is_SymLink_in_Data; - bool _is_SymLink_in_Data_Linux; // false = WIN32, true = LINUX + bool _is_SymLink_in_Data_Linux; // false = WIN32, true = LINUX. + // _is_SymLink_in_Data_Linux is detected from Windows/Linux part of attributes of file. bool _needSetAttrib; bool _isSymLinkCreated; bool _itemFailure; @@ -420,6 +434,7 @@ public: HRESULT SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path); HRESULT SendMessageError_with_LastError(const char *message, const FString &path); HRESULT SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2); + HRESULT SendMessageError2_with_LastError(const char *message, const FString &path1, const FString &path2); #if defined(_WIN32) && !defined(UNDER_CE) && !defined(Z7_SFX) NExtract::NZoneIdMode::EEnum ZoneMode; @@ -487,11 +502,16 @@ public: private: CHardLinks _hardLinks; CLinkInfo _link; + // const void *NtReparse_Data; + // UInt32 NtReparse_Size; // FString _copyFile_Path; // HRESULT MyCopyFile(ISequentialOutStream *outStream); - HRESULT Link(const FString &fullProcessedPath); HRESULT ReadLink(); + HRESULT SetLink( + const FString &fullProcessedPath_from, + const CLinkInfo &linkInfo, + bool &linkWasSet); public: // call PrepareHardLinks() after Init() @@ -538,16 +558,6 @@ private: HRESULT CloseReparseAndFile(); HRESULT CloseReparseAndFile2(); HRESULT SetDirsTimes(); - - const void *NtReparse_Data; - UInt32 NtReparse_Size; - - #ifdef SUPPORT_LINKS - HRESULT SetFromLinkPath( - const FString &fullProcessedPath, - const CLinkInfo &linkInfo, - bool &linkWasSet); - #endif }; diff --git a/CPP/7zip/UI/Common/Bench.cpp b/CPP/7zip/UI/Common/Bench.cpp index 05d66aa..eb24e7f 100644 --- a/CPP/7zip/UI/Common/Bench.cpp +++ b/CPP/7zip/UI/Common/Bench.cpp @@ -871,14 +871,27 @@ struct CAffinityMode unsigned NumCoreThreads; unsigned NumCores; // unsigned DivideNum; + +#ifdef _WIN32 + unsigned NumGroups; +#endif + UInt32 Sizes[NUM_CPU_LEVELS_MAX]; void SetLevels(unsigned numCores, unsigned numCoreThreads); DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const; bool NeedAffinity() const { return NumBundleThreads != 0; } +#ifdef _WIN32 + bool NeedGroupsMode() const { return NumGroups > 1; } +#endif + WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const { +#ifdef _WIN32 + if (NeedGroupsMode()) // we need fix for bundleIndex usage + return thread.Create_With_Group(startAddress, parameter, bundleIndex % NumGroups); +#endif if (NeedAffinity()) { CCpuSet cpuSet; @@ -892,6 +905,9 @@ struct CAffinityMode NumBundleThreads(0), NumLevels(0), NumCoreThreads(1) +#ifdef _WIN32 + , NumGroups(0) +#endif // DivideNum(1) {} }; @@ -1288,22 +1304,28 @@ HRESULT CEncoderInfo::Generate() if (scp) { const UInt64 reduceSize = kBufferSize; - - /* in posix new thread uses same affinity as parent thread, + /* in posix : new thread uses same affinity as parent thread, so we don't need to send affinity to coder in posix */ - UInt64 affMask; - #if !defined(Z7_ST) && defined(_WIN32) + UInt64 affMask = 0; + UInt32 affinityGroup = (UInt32)(Int32)-1; + // UInt64 affinityInGroup = 0; +#if !defined(Z7_ST) && defined(_WIN32) { CCpuSet cpuSet; - affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); + if (AffinityMode.NeedGroupsMode()) // we need fix for affinityInGroup also + affinityGroup = EncoderIndex % AffinityMode.NumGroups; + else + affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); } - #else - affMask = 0; - #endif - // affMask <<= 3; // debug line: to test no affinity in coder; - // affMask = 0; - - RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL))) +#endif + // affMask <<= 3; // debug line: to test no affinity in coder + // affMask = 0; // for debug + // affinityGroup = 0; // for debug + // affinityInGroup = 1; // for debug + RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, + affMask != 0 ? &affMask : NULL, + affinityGroup != (UInt32)(Int32)-1 ? &affinityGroup : NULL, + /* affinityInGroup != 0 ? &affinityInGroup : */ NULL)) } else { @@ -2962,7 +2984,7 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) { AString s; // s.Add_UInt32(ti.numProcessThreads); - unsigned numSysThreads = ti.GetNumSystemThreads(); + const unsigned numSysThreads = ti.GetNumSystemThreads(); if (ti.GetNumProcessThreads() != numSysThreads) { // if (ti.numProcessThreads != ti.numSysThreads) @@ -2992,6 +3014,35 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) } #endif } +#ifdef _WIN32 + if (ti.Groups.GroupSizes.Size() > 1 || + (ti.Groups.GroupSizes.Size() == 1 + && ti.Groups.NumThreadsTotal != numSysThreads)) + { + s += " : "; + s.Add_UInt32(ti.Groups.GroupSizes.Size()); + s += " groups : "; + if (ti.Groups.NumThreadsTotal == numSysThreads) + { + s.Add_UInt32(ti.Groups.NumThreadsTotal); + s += " c : "; + } + UInt32 minSize, maxSize; + ti.Groups.Get_GroupSize_Min_Max(minSize, maxSize); + if (minSize == maxSize) + { + s.Add_UInt32(ti.Groups.GroupSizes[0]); + s += " c/g"; + } + else + FOR_VECTOR (i, ti.Groups.GroupSizes) + { + if (i != 0) + s.Add_Char(' '); + s.Add_UInt32(ti.Groups.GroupSizes[i]); + } + } +#endif return s; } @@ -3753,9 +3804,13 @@ HRESULT Bench( UInt64 complexInCommands = kComplexInCommands; UInt32 numThreads_Start = 1; - #ifndef Z7_ST +#ifndef Z7_ST CAffinityMode affinityMode; - #endif +#ifdef _WIN32 + if (threadsInfo.IsGroupMode && threadsInfo.Groups.GroupSizes.Size() > 1) + affinityMode.NumGroups = threadsInfo.Groups.GroupSizes.Size(); +#endif +#endif COneMethodInfo method; @@ -4861,7 +4916,7 @@ HRESULT Bench( if (AreSameMethodNames(benchMethod, methodName)) { if (benchProps.IsEmpty() - || (benchProps == "x5" && method.PropsString.IsEmpty()) + || (benchProps.IsEqualTo("x5") && method.PropsString.IsEmpty()) || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps)) { callback.BenchProps.EncComplex = h.EncComplex; diff --git a/CPP/7zip/UI/Common/EnumDirItems.cpp b/CPP/7zip/UI/Common/EnumDirItems.cpp index 11643ae..cada2e6 100644 --- a/CPP/7zip/UI/Common/EnumDirItems.cpp +++ b/CPP/7zip/UI/Common/EnumDirItems.cpp @@ -1213,11 +1213,13 @@ HRESULT CDirItems::FillFixedReparse() // continue; // for debug if (!item.Has_Attrib_ReparsePoint()) continue; - + /* + We want to get properties of target file instead of properies of symbolic link. + Probably this code is unused, because + CFileInfo::Find(with followLink = true) called Fill_From_ByHandleFileInfo() already. + */ // if (item.IsDir()) continue; - const FString phyPath = GetPhyPath(i); - NFind::CFileInfo fi; if (fi.Fill_From_ByHandleFileInfo(phyPath)) // item.IsDir() { @@ -1228,38 +1230,13 @@ HRESULT CDirItems::FillFixedReparse() item.Attrib = fi.Attrib; continue; } - - /* - // we request properties of target file instead of properies of symbolic link - // here we also can manually parse unsupported links (like WSL links) - NIO::CInFile inFile; - if (inFile.Open(phyPath)) - { - BY_HANDLE_FILE_INFORMATION info; - if (inFile.GetFileInformation(&info)) - { - // Stat.FilesSize doesn't contain item.Size already - // Stat.FilesSize -= item.Size; - item.Size = (((UInt64)info.nFileSizeHigh) << 32) + info.nFileSizeLow; - Stat.FilesSize += item.Size; - item.CTime = info.ftCreationTime; - item.ATime = info.ftLastAccessTime; - item.MTime = info.ftLastWriteTime; - item.Attrib = info.dwFileAttributes; - continue; - } - } - */ - RINOK(AddError(phyPath)) continue; } - // (SymLinks == true) here - + // (SymLinks == true) if (item.ReparseData.Size() == 0) continue; - // if (item.Size == 0) { // 20.03: we use Reparse Data instead of real data @@ -1277,7 +1254,7 @@ HRESULT CDirItems::FillFixedReparse() /* imagex/WIM reduces absolute paths in links (raparse data), if we archive non root folder. We do same thing here */ - bool isWSL = false; + // bool isWSL = false; if (attr.IsSymLink_WSL()) { // isWSL = true; @@ -1314,21 +1291,27 @@ HRESULT CDirItems::FillFixedReparse() continue; if (rootPrefixSize == prefix.Len()) continue; // simple case: paths are from root - if (link.Len() <= prefix.Len()) continue; - if (CompareFileNames(link.Left(prefix.Len()), prefix) != 0) continue; UString newLink = prefix.Left(rootPrefixSize); newLink += link.Ptr(prefix.Len()); - CByteBuffer data; - bool isSymLink = !attr.IsMountPoint(); - if (!FillLinkData(data, newLink, isSymLink, isWSL)) + CByteBuffer &data = item.ReparseData2; +/* + if (isWSL) + { + Convert_WinPath_to_WslLinuxPath(newLink, true); // is absolute : change it + FillLinkData_WslLink(data, newLink); + } + else +*/ + FillLinkData_WinLink(data, newLink, !attr.IsMountPoint()); + if (data.Size() == 0) continue; - item.ReparseData2 = data; + // item.ReparseData2 = data; } return S_OK; } diff --git a/CPP/7zip/UI/Common/Extract.cpp b/CPP/7zip/UI/Common/Extract.cpp index 010b01c..0301976 100644 --- a/CPP/7zip/UI/Common/Extract.cpp +++ b/CPP/7zip/UI/Common/Extract.cpp @@ -389,7 +389,7 @@ HRESULT Extract( { UString s = arcPath.Ptr(pos + 1); int index = codecs->FindFormatForExtension(s); - if (index >= 0 && s == L"001") + if (index >= 0 && s.IsEqualTo("001")) { s = arcPath.Left(pos); pos = s.ReverseFind(L'.'); diff --git a/CPP/7zip/UI/Common/ExtractingFilePath.cpp b/CPP/7zip/UI/Common/ExtractingFilePath.cpp index 88da4ad..5ca5e66 100644 --- a/CPP/7zip/UI/Common/ExtractingFilePath.cpp +++ b/CPP/7zip/UI/Common/ExtractingFilePath.cpp @@ -208,7 +208,7 @@ void Correct_FsPath(bool absIsAllowed, bool keepAndReplaceEmptyPrefixes, UString if (parts.Size() > 1 && parts[1].IsEmpty()) { i = 2; - if (parts.Size() > 2 && parts[2] == L"?") + if (parts.Size() > 2 && parts[2].IsEqualTo("?")) { i = 3; if (parts.Size() > 3 && NWindows::NFile::NName::IsDrivePath2(parts[3])) diff --git a/CPP/7zip/UI/Common/HashCalc.cpp b/CPP/7zip/UI/Common/HashCalc.cpp index 9caac36..f026f80 100644 --- a/CPP/7zip/UI/Common/HashCalc.cpp +++ b/CPP/7zip/UI/Common/HashCalc.cpp @@ -62,7 +62,7 @@ HRESULT CHashBundle::SetMethods(DECL_EXTERNAL_CODECS_LOC_VARS const UStringVecto if (m.MethodName.IsEmpty()) m.MethodName = k_DefaultHashMethod; - if (m.MethodName == "*") + if (m.MethodName.IsEqualTo("*")) { CRecordVector tempMethods; GetHashMethods(EXTERNAL_CODECS_LOC_VARS tempMethods); @@ -431,6 +431,19 @@ static void WriteLine(CDynLimBuf &hashFileString, } +static void Convert_TagName_to_MethodName(AString &method) +{ + // we need to convert at least SHA512/256 to SHA512-256, and SHA512/224 to SHA512-224 + // but we convert any '/' to '-'. + method.Replace('/', '-'); +} + +static void Convert_MethodName_to_TagName(AString &method) +{ + if (method.IsPrefixedBy_Ascii_NoCase("SHA512-2")) + method.ReplaceOneCharAtPos(6, '/'); +} + static void WriteLine(CDynLimBuf &hashFileString, const CHashOptionsLocal &options, @@ -440,8 +453,10 @@ static void WriteLine(CDynLimBuf &hashFileString, { AString methodName; if (!hb.Hashers.IsEmpty()) + { methodName = hb.Hashers[0].Name; - + Convert_MethodName_to_TagName(methodName); + } AString hashesString; AddHashResultLine(hashesString, hb.Hashers); WriteLine(hashFileString, options, path, isDir, methodName, hashesString); @@ -752,7 +767,7 @@ bool CHashPair::ParseCksum(const char *s) Name = end; Hash.Alloc(4); - SetBe32(Hash, crc) + SetBe32a(Hash, crc) Size_from_Arc = size; Size_from_Arc_Defined = true; @@ -773,56 +788,87 @@ static const char * const k_CsumMethodNames[] = { "sha256" , "sha224" -// , "sha512-224" -// , "sha512-256" + , "sha512-224" + , "sha512-256" , "sha384" , "sha512" -// , "sha3-224" + , "sha3-224" , "sha3-256" -// , "sha3-384" -// , "sha3-512" + , "sha3-384" + , "sha3-512" // , "shake128" // , "shake256" , "sha1" + , "sha2" + , "sha3" + , "sha" , "md5" - , "blake2sp" + , "blake2s" , "blake2b" + , "blake2sp" , "xxh64" - , "crc64" , "crc32" + , "crc64" , "cksum" }; -static UString GetMethod_from_FileName(const UString &name) + +// returns true, if (method) is known hash method or hash method group name. +static bool GetMethod_from_FileName(const UString &name, AString &method) { + method.Empty(); AString s; ConvertUnicodeToUTF8(name, s); const int dotPos = s.ReverseFind_Dot(); - const char *src = s.Ptr(); - bool isExtension = false; if (dotPos >= 0) { - isExtension = true; - src = s.Ptr(dotPos + 1); + method = s.Ptr(dotPos + 1); + if (method.IsEqualTo_Ascii_NoCase("txt") || + method.IsEqualTo_Ascii_NoCase("asc")) + { + method.Empty(); + const int dotPos2 = s.Find('.'); + if (dotPos2 >= 0) + s.DeleteFrom(dotPos2); + } } - const char *m = ""; + if (method.IsEmpty()) + { + // we support file names with "sum" and "sums" postfixes: "sha256sum", "sha256sums" + unsigned size; + if (s.Len() > 4 && StringsAreEqualNoCase_Ascii(s.RightPtr(4), "sums")) + size = 4; + else if (s.Len() > 3 && StringsAreEqualNoCase_Ascii(s.RightPtr(3), "sum")) + size = 3; + else + return false; + method = s; + method.DeleteFrom(s.Len() - size); + } + unsigned i; for (i = 0; i < Z7_ARRAY_SIZE(k_CsumMethodNames); i++) { - m = k_CsumMethodNames[i]; - if (isExtension) + const char *m = k_CsumMethodNames[i]; + if (method.IsEqualTo_Ascii_NoCase(m)) { - if (StringsAreEqual_Ascii(src, m)) - break; + // method = m; // we can get lowcase + return true; } - else if (IsString1PrefixedByString2_NoCase_Ascii(src, m)) - if (StringsAreEqual_Ascii(src + strlen(m), "sums")) - break; } - UString res; - if (i != Z7_ARRAY_SIZE(k_CsumMethodNames)) - res = m; - return res; + +/* + for (i = 0; i < Z7_ARRAY_SIZE(k_CsumMethodNames); i++) + { + const char *m = k_CsumMethodNames[i]; + if (method.IsPrefixedBy_Ascii_NoCase(m)) + { + method = m; // we get lowcase + return true; + } + } +*/ + return false; } @@ -1047,7 +1093,7 @@ Z7_COM7F_IMF(CHandler::GetRawProp(UInt32 index, PROPID propID, const void **data if (propID == kpidChecksum) { const CHashPair &hp = HashPairs[index]; - if (hp.Hash.Size() > 0) + if (hp.Hash.Size() != 0) { *data = hp.Hash; *dataSize = (UInt32)hp.Hash.Size(); @@ -1100,11 +1146,6 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) s.Add_UInt32(_hashSize * 8); s += "-bit"; } - if (!_nameExtenstion.IsEmpty()) - { - s.Add_Space_if_NotEmpty(); - s += _nameExtenstion; - } if (_is_PgpMethod) { Add_OptSpace_String(s, "PGP"); @@ -1120,6 +1161,18 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) Add_OptSpace_String(s, "TAG"); if (_are_there_Dirs) Add_OptSpace_String(s, "DIRS"); + if (!_method_from_FileName.IsEmpty()) + { + Add_OptSpace_String(s, "filename_method:"); + s += _method_from_FileName; + if (!_is_KnownMethod_in_FileName) + s += ":UNKNOWN"; + } + if (!_methods.IsEmpty()) + { + Add_OptSpace_String(s, "cmd_method:"); + s += _methods[0]; + } prop = s; break; } @@ -1228,6 +1281,15 @@ static HRESULT ReadStream_to_Buf(IInStream *stream, CByteBuffer &buf, IArchiveOp } +static bool isThere_Zero_Byte(const Byte *data, size_t size) +{ + for (size_t i = 0; i < size; i++) + if (data[i] == 0) + return true; + return false; +} + + Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallback *openCallback)) { COM_TRY_BEGIN @@ -1239,17 +1301,9 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallb CObjectVector &pairs = HashPairs; - bool zeroMode = false; - bool cr_lf_Mode = false; - { - for (size_t i = 0; i < buf.Size(); i++) - if (buf.ConstData()[i] == 0) - { - zeroMode = true; - break; - } - } + const bool zeroMode = isThere_Zero_Byte(buf, buf.Size()); _is_ZeroMode = zeroMode; + bool cr_lf_Mode = false; if (!zeroMode) cr_lf_Mode = Is_CR_LF_Data(buf, buf.Size()); @@ -1263,13 +1317,21 @@ Z7_COM7F_IMF(CHandler::Open(IInStream *stream, const UInt64 *, IArchiveOpenCallb NCOM::CPropVariant prop; RINOK(openVolumeCallback->GetProperty(kpidName, &prop)) if (prop.vt == VT_BSTR) - _nameExtenstion = GetMethod_from_FileName(prop.bstrVal); + _is_KnownMethod_in_FileName = GetMethod_from_FileName(prop.bstrVal, _method_from_FileName); } } - bool cksumMode = false; - if (_nameExtenstion.IsEqualTo_Ascii_NoCase("cksum")) - cksumMode = true; + if (!_methods.IsEmpty()) + { + ConvertUnicodeToUTF8(_methods[0], _method_for_Extraction); + } + if (_method_for_Extraction.IsEmpty()) + { + // if (_is_KnownMethod_in_FileName) + _method_for_Extraction = _method_from_FileName; + } + + const bool cksumMode = _method_for_Extraction.IsEqualTo_Ascii_NoCase("cksum"); _is_CksumMode = cksumMode; size_t pos = 0; @@ -1366,6 +1428,7 @@ void CHandler::ClearVars() _is_ZeroMode = false; _are_there_Tags = false; _are_there_Dirs = false; + _is_KnownMethod_in_FileName = false; _hashSize_Defined = false; _hashSize = 0; } @@ -1374,7 +1437,8 @@ void CHandler::ClearVars() Z7_COM7F_IMF(CHandler::Close()) { ClearVars(); - _nameExtenstion.Empty(); + _method_from_FileName.Empty(); + _method_for_Extraction.Empty(); _pgpMethod.Empty(); HashPairs.Clear(); return S_OK; @@ -1401,19 +1465,73 @@ static bool CheckDigests(const Byte *a, const Byte *b, size_t size) } -static void AddDefaultMethod(UStringVector &methods, unsigned size) +static void AddDefaultMethod(UStringVector &methods, + const char *name, unsigned size) { + int shaVersion = -1; + if (name) + { + if (StringsAreEqualNoCase_Ascii(name, "sha")) + { + shaVersion = 0; + if (size == 0) + size = 32; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha1")) + { + shaVersion = 1; + if (size == 0) + size = 20; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha2")) + { + shaVersion = 2; + if (size == 0) + size = 32; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha3")) + { + if (size == 0 || + size == 32) name = "sha3-256"; + else if (size == 28) name = "sha3-224"; + else if (size == 48) name = "sha3-384"; + else if (size == 64) name = "sha3-512"; + } + else if (StringsAreEqualNoCase_Ascii(name, "sha512")) + { + // we allow any sha512 derived hash inside .sha512 file: + if (size == 48) name = "sha384"; + else if (size == 32) name = "sha512-256"; + else if (size == 28) name = "sha512-224"; + } + if (shaVersion >= 0) + name = NULL; + } + const char *m = NULL; - if (size == 32) m = "sha256"; - else if (size == 20) m = "sha1"; - else if (size == 16) m = "md5"; - else if (size == 8) m = "crc64"; - else if (size == 4) m = "crc32"; + if (name) + m = name; else + { + if (size == 64) m = "sha512"; + else if (size == 48) m = "sha384"; + else if (size == 32) m = "sha256"; + else if (size == 28) m = "sha224"; + else if (size == 20) m = "sha1"; + else if (shaVersion < 0) + { + if (size == 16) m = "md5"; + else if (size == 8) m = "crc64"; + else if (size == 4) m = "crc32"; + } + } + + if (!m) return; - #ifdef Z7_EXTERNAL_CODECS + +#ifdef Z7_EXTERNAL_CODECS const CExternalCodecs *_externalCodecs = g_ExternalCodecs_Ptr; - #endif +#endif CMethodId id; if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS AString(m), id)) @@ -1444,15 +1562,15 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, CHashBundle hb_Glob; // UStringVector methods = options.Methods; UStringVector methods; - - if (methods.IsEmpty() && !_nameExtenstion.IsEmpty()) + +/* + if (methods.IsEmpty() && !utf_nameExtenstion.IsEmpty() && !_hashSize_Defined) { - AString utf; - ConvertUnicodeToUTF8(_nameExtenstion, utf); CMethodId id; - if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS utf, id)) + if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS utf_nameExtenstion, id)) methods.Add(_nameExtenstion); } +*/ if (methods.IsEmpty() && !_pgpMethod.IsEmpty()) { @@ -1461,12 +1579,21 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, methods.Add(UString(_pgpMethod)); } +/* if (methods.IsEmpty() && _pgpMethod.IsEmpty() && _hashSize_Defined) - AddDefaultMethod(methods, _hashSize); + { + AddDefaultMethod(methods, + utf_nameExtenstion.IsEmpty() ? NULL : utf_nameExtenstion.Ptr(), + _hashSize); + } +*/ - RINOK(hb_Glob.SetMethods( + if (!methods.IsEmpty()) + { + RINOK(hb_Glob.SetMethods( EXTERNAL_CODECS_LOC_VARS methods)) + } Z7_DECL_CMyComPtr_QI_FROM( IArchiveUpdateCallbackFile, @@ -1561,9 +1688,11 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, { hb_Use = &hb_Loc; CMethodId id; - if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS hp.Method, id)) + AString methodName = hp.Method; + Convert_TagName_to_MethodName(methodName); + if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, id)) { - methods_loc.Add(UString(hp.Method)); + methods_loc.Add(UString(methodName)); RINOK(hb_Loc.SetMethods( EXTERNAL_CODECS_LOC_VARS methods_loc)) @@ -1573,7 +1702,10 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, } else if (methods.IsEmpty()) { - AddDefaultMethod(methods_loc, (unsigned)hp.Hash.Size()); + AddDefaultMethod(methods_loc, + _method_for_Extraction.IsEmpty() ? NULL : + _method_for_Extraction.Ptr(), + (unsigned)hp.Hash.Size()); if (!methods_loc.IsEmpty()) { hb_Use = &hb_Loc; @@ -1621,7 +1753,7 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, Int32 opRes = NArchive::NExtract::NOperationResult::kUnsupportedMethod; if (isSupportedMode && res_SetMethods != E_NOTIMPL - && hb_Use->Hashers.Size() > 0 + && !hb_Use->Hashers.IsEmpty() ) { const CHasherState &hs = hb_Use->Hashers[0]; @@ -1774,10 +1906,6 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt methods.Add(_methods[k]); } } - else if (_crcSize_WasSet) - { - AddDefaultMethod(methods, _crcSize); - } else { Z7_DECL_CMyComPtr_QI_FROM( @@ -1789,12 +1917,23 @@ Z7_COM7F_IMF(CHandler::UpdateItems(ISequentialOutStream *outStream, UInt32 numIt RINOK(getRootProps->GetRootProp(kpidArcFileName, &prop)) if (prop.vt == VT_BSTR) { - const UString method = GetMethod_from_FileName(prop.bstrVal); + AString method; + /* const bool isKnownMethod = */ GetMethod_from_FileName(prop.bstrVal, method); if (!method.IsEmpty()) - methods.Add(method); + { + AddDefaultMethod(methods, method, _crcSize_WasSet ? _crcSize : 0); + if (methods.IsEmpty()) + return E_NOTIMPL; + } } } } + if (methods.IsEmpty() && _crcSize_WasSet) + { + AddDefaultMethod(methods, + NULL, // name + _crcSize); + } RINOK(hb.SetMethods(EXTERNAL_CODECS_LOC_VARS methods)) @@ -2038,6 +2177,15 @@ HRESULT CHandler::SetProperty(const wchar_t *nameSpec, const PROPVARIANT &value) } +void CHandler::InitProps() +{ + _supportWindowsBackslash = true; + _crcSize_WasSet = false; + _crcSize = 4; + _methods.Clear(); + _options.Init_HashOptionsLocal(); +} + Z7_COM7F_IMF(CHandler::SetProperties(const wchar_t * const *names, const PROPVARIANT *values, UInt32 numProps)) { COM_TRY_BEGIN @@ -2088,22 +2236,27 @@ void Codecs_AddHashArcHandler(CCodecs *codecs) " sha512" " sha384" " sha224" - // " sha512-224" - // " sha512-256" - // " sha3-224" + " sha512-224" + " sha512-256" + " sha3-224" " sha3-256" - // " sha3-384" - // " sha3-512" + " sha3-384" + " sha3-512" // " shake128" // " shake256" " sha1" + " sha2" + " sha3" " sha" " md5" + " blake2s" + " blake2b" " blake2sp" " xxh64" - " crc32 crc64" - " asc" + " crc32" + " crc64" " cksum" + " asc" // " b2sum" ), UString()); diff --git a/CPP/7zip/UI/Common/HashCalc.h b/CPP/7zip/UI/Common/HashCalc.h index 1e9dbf4..b8f867f 100644 --- a/CPP/7zip/UI/Common/HashCalc.h +++ b/CPP/7zip/UI/Common/HashCalc.h @@ -279,32 +279,25 @@ Z7_CLASS_IMP_CHandler_IInArchive_3( bool _isArc; bool _supportWindowsBackslash; bool _crcSize_WasSet; - UInt64 _phySize; - CObjectVector HashPairs; - UString _nameExtenstion; - // UString _method_fromName; - AString _pgpMethod; bool _is_CksumMode; bool _is_PgpMethod; bool _is_ZeroMode; bool _are_there_Tags; bool _are_there_Dirs; + bool _is_KnownMethod_in_FileName; bool _hashSize_Defined; unsigned _hashSize; UInt32 _crcSize; + UInt64 _phySize; + CObjectVector HashPairs; UStringVector _methods; + AString _method_from_FileName; + AString _pgpMethod; + AString _method_for_Extraction; CHashOptionsLocal _options; void ClearVars(); - - void InitProps() - { - _supportWindowsBackslash = true; - _crcSize_WasSet = false; - _crcSize = 4; - _methods.Clear(); - _options.Init_HashOptionsLocal(); - } + void InitProps(); bool CanUpdate() const { diff --git a/CPP/7zip/UI/Common/LoadCodecs.cpp b/CPP/7zip/UI/Common/LoadCodecs.cpp index 6bf53ea..943435a 100644 --- a/CPP/7zip/UI/Common/LoadCodecs.cpp +++ b/CPP/7zip/UI/Common/LoadCodecs.cpp @@ -170,7 +170,7 @@ void CArcInfoEx::AddExts(const UString &ext, const UString &addExt) if (i < addExts.Size()) { extInfo.AddExt = addExts[i]; - if (extInfo.AddExt == L"*") + if (extInfo.AddExt.IsEqualTo("*")) extInfo.AddExt.Empty(); } Exts.Add(extInfo); @@ -931,8 +931,8 @@ bool CCodecs::FindFormatForArchiveType(const UString &arcType, CIntVector &forma const UString name = arcType.Mid(pos, (unsigned)pos2 - pos); if (name.IsEmpty()) return false; - int index = FindFormatForArchiveType(name); - if (index < 0 && name != L"*") + const int index = FindFormatForArchiveType(name); + if (index < 0 && !name.IsEqualTo("*")) { formatIndices.Clear(); return false; diff --git a/CPP/7zip/UI/Common/Update.cpp b/CPP/7zip/UI/Common/Update.cpp index b959a3c..1c2754e 100644 --- a/CPP/7zip/UI/Common/Update.cpp +++ b/CPP/7zip/UI/Common/Update.cpp @@ -474,7 +474,7 @@ static HRESULT Compress( CArcToDoStat stat2; - if (options.RenamePairs.Size() != 0) + if (options.RenameMode || options.RenamePairs.Size() != 0) { FOR_VECTOR (i, arcItems) { @@ -1920,7 +1920,7 @@ Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION if (NFind::DoesDirExist(phyPath)) { RINOK(callback->DeletingAfterArchiving(phyPath, true)) - RemoveDir(phyPath); + RemoveDirAlways_if_Empty(phyPath); } } diff --git a/CPP/7zip/UI/Common/Update.h b/CPP/7zip/UI/Common/Update.h index 216339a..ae141e5 100644 --- a/CPP/7zip/UI/Common/Update.h +++ b/CPP/7zip/UI/Common/Update.h @@ -94,6 +94,7 @@ struct CUpdateOptions bool DeleteAfterCompressing; bool SetArcMTime; + bool RenameMode; CBoolPair NtSecurity; CBoolPair AltStreams; @@ -139,6 +140,7 @@ struct CUpdateOptions DeleteAfterCompressing(false), SetArcMTime(false), + RenameMode(false), ArcNameMode(k_ArcNameMode_Smart), PathMode(NWildcard::k_RelatPath) diff --git a/CPP/7zip/UI/Common/UpdateCallback.cpp b/CPP/7zip/UI/Common/UpdateCallback.cpp index d3ee639..e2f1866 100644 --- a/CPP/7zip/UI/Common/UpdateCallback.cpp +++ b/CPP/7zip/UI/Common/UpdateCallback.cpp @@ -32,6 +32,7 @@ #include "../../../Windows/PropVariant.h" #include "../../Common/StreamObjects.h" +#include "../../Archive/Common/ItemNameUtils.h" #include "UpdateCallback.h" @@ -306,7 +307,7 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetRawProp(UInt32 index, PROPID propID, con #if defined(_WIN32) && !defined(UNDER_CE) -static UString GetRelativePath(const UString &to, const UString &from) +static UString GetRelativePath(const UString &to, const UString &from, bool isWSL) { UStringVector partsTo, partsFrom; SplitPathToParts(to, partsTo); @@ -324,11 +325,12 @@ static UString GetRelativePath(const UString &to, const UString &from) if (i == 0) { - #ifdef _WIN32 - if (NName::IsDrivePath(to) || - NName::IsDrivePath(from)) +#ifdef _WIN32 + if (isWSL || + (NName::IsDrivePath(to) || + NName::IsDrivePath(from))) return to; - #endif +#endif } UString s; @@ -373,54 +375,87 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR return S_OK; } - #if !defined(UNDER_CE) - +#if !defined(UNDER_CE) if (up.DirIndex >= 0) { const CDirItem &di = DirItems->Items[(unsigned)up.DirIndex]; - - #ifdef _WIN32 - // if (di.IsDir()) + if (di.ReparseData.Size()) { +#ifdef _WIN32 CReparseAttr attr; if (attr.Parse(di.ReparseData, di.ReparseData.Size())) { - const UString simpleName = attr.GetPath(); - if (!attr.IsSymLink_WSL() && attr.IsRelative_Win()) - prop = simpleName; - else + UString path = attr.GetPath(); + if (!path.IsEmpty()) { - const FString phyPath = DirItems->GetPhyPath((unsigned)up.DirIndex); - FString fullPath; - if (NDir::MyGetFullPathName(phyPath, fullPath)) + bool isWSL = attr.IsSymLink_WSL(); + if (isWSL) + NArchive::NItemName::ReplaceToWinSlashes(path, true); // useBackslashReplacement + // it's expected that (path) now uses windows slashes. + // CReparseAttr::IsRelative_Win() returns true if FLAG_RELATIVE is set + // CReparseAttr::IsRelative_Win() returns true for "\dir1\path" + // but we want to store real relative paths without "\" root prefix. + // so we parse path instead of IsRelative_Win() calling. + if (// attr.IsRelative_Win() || + (isWSL ? + IS_PATH_SEPAR(path[0]) : + NName::IsAbsolutePath(path))) { - prop = GetRelativePath(simpleName, fs2us(fullPath)); + // (path) is abolute path or relative to root: "\path" + // we try to convert (path) to relative path for writing to archive. + const FString phyPath = DirItems->GetPhyPath((unsigned)up.DirIndex); + FString fullPath; + if (NDir::MyGetFullPathName(phyPath, fullPath)) + { + if (IS_PATH_SEPAR(path[0]) && + !IS_PATH_SEPAR(path[1])) + { + // path is relative to root of (fullPath): "\path" + const unsigned prefixSize = NName::GetRootPrefixSize(fullPath); + if (prefixSize) + { + path.DeleteFrontal(1); + path.Insert(0, fs2us(fullPath.Left(prefixSize))); + // we have changed "\" prefix to drive prefix "c:\" in (path). + // (path) is Windows path now. + isWSL = false; + } + } + } + path = GetRelativePath(path, fs2us(fullPath), isWSL); } +#if WCHAR_PATH_SEPARATOR != L'/' + // 7-Zip's TAR handler in Windows replaces windows slashes to linux slashes. + // so we can return any slashes to TAR handler. + // or we can convert to linux slashes here, + // because input IInArchive handler uses linux slashes for kpidSymLink. + // path.Replace(WCHAR_PATH_SEPARATOR, L'/'); +#endif + if (!path.IsEmpty()) + prop = path; } - prop.Detach(value); - return S_OK; } - } - - #else // _WIN32 - - if (di.ReparseData.Size() != 0) - { +#else // ! _WIN32 AString utf; utf.SetFrom_CalcLen((const char *)(const Byte *)di.ReparseData, (unsigned)di.ReparseData.Size()); - + #if 0 // 0 - for debug + // it's expected that link data uses system codepage. + // fs2us() ignores conversion errors. But we want correct path + UString us (fs2us(utf)); + #else UString us; if (ConvertUTF8ToUnicode(utf, us)) + #endif { - prop = us; - prop.Detach(value); - return S_OK; + if (!us.IsEmpty()) + prop = us; } +#endif // ! _WIN32 } - - #endif // _WIN32 + prop.Detach(value); + return S_OK; } - #endif // !defined(UNDER_CE) +#endif // !defined(UNDER_CE) } else if (propID == kpidHardLink) { @@ -428,7 +463,12 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR { const CKeyKeyValPair &pair = _map[_hardIndex_To]; const CUpdatePair2 &up2 = (*UpdatePairs)[pair.Value]; - prop = DirItems->GetLogPath((unsigned)up2.DirIndex); + const UString path = DirItems->GetLogPath((unsigned)up2.DirIndex); +#if WCHAR_PATH_SEPARATOR != L'/' + // 7-Zip's TAR handler in Windows replaces windows slashes to linux slashes. + // path.Replace(WCHAR_PATH_SEPARATOR, L'/'); +#endif + prop = path; prop.Detach(value); return S_OK; } @@ -438,7 +478,7 @@ Z7_COM7F_IMF(CArchiveUpdateCallback::GetProperty(UInt32 index, PROPID propID, PR return S_OK; } } - } + } // if (up.NewData) if (up.IsAnti && propID != kpidIsDir diff --git a/CPP/7zip/UI/Console/Main.cpp b/CPP/7zip/UI/Console/Main.cpp index dabd696..90e00a4 100644 --- a/CPP/7zip/UI/Console/Main.cpp +++ b/CPP/7zip/UI/Console/Main.cpp @@ -908,9 +908,12 @@ int Main2( if (options.EnableHeaders) { - ShowCopyrightAndHelp(g_StdStream, false); - if (!parser.Parse1Log.IsEmpty()) - *g_StdStream << parser.Parse1Log; + if (g_StdStream) + { + ShowCopyrightAndHelp(g_StdStream, false); + if (!parser.Parse1Log.IsEmpty()) + *g_StdStream << parser.Parse1Log; + } } parser.Parse2(options); diff --git a/CPP/7zip/UI/Console/makefile b/CPP/7zip/UI/Console/makefile index a20b0cc..d449b38 100644 --- a/CPP/7zip/UI/Console/makefile +++ b/CPP/7zip/UI/Console/makefile @@ -59,10 +59,10 @@ COMPRESS_OBJS = \ C_OBJS = $(C_OBJS) \ $O\Alloc.obj \ $O\CpuArch.obj \ - $O\Sort.obj \ $O\Threads.obj \ !include "../../Crc.mak" +!include "../../Sort.mak" !include "Console.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/UI/Explorer/makefile b/CPP/7zip/UI/Explorer/makefile index 3901d6b..311d70e 100644 --- a/CPP/7zip/UI/Explorer/makefile +++ b/CPP/7zip/UI/Explorer/makefile @@ -72,7 +72,7 @@ FM_OBJS = \ C_OBJS = \ $O\CpuArch.obj \ - $O\Sort.obj \ $O\Threads.obj \ +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/UI/Far/Plugin.cpp b/CPP/7zip/UI/Far/Plugin.cpp index 2d31b8a..b7f91d6 100644 --- a/CPP/7zip/UI/Far/Plugin.cpp +++ b/CPP/7zip/UI/Far/Plugin.cpp @@ -61,7 +61,6 @@ static void MyGetFileTime(IFolderFolder *folder, UInt32 itemIndex, } #define kDotsReplaceString "[[..]]" -#define kDotsReplaceStringU L"[[..]]" static void CopyStrLimited(char *dest, const AString &src, unsigned len) { @@ -84,7 +83,7 @@ void CPlugin::ReadPluginPanelItem(PluginPanelItem &panelItem, UInt32 itemIndex) throw 272340; AString oemString (UnicodeStringToMultiByte(prop.bstrVal, CP_OEMCP)); - if (oemString == "..") + if (oemString.IsEqualTo("..")) oemString = kDotsReplaceString; COPY_STR_LIMITED(panelItem.FindData.cFileName, oemString); @@ -193,7 +192,7 @@ void CPlugin::EnterToDirectory(const UString &dirName) { CMyComPtr newFolder; UString s = dirName; - if (dirName == kDotsReplaceStringU) + if (dirName.IsEqualTo(kDotsReplaceString)) s = ".."; _folder->BindToFolder(s, &newFolder); if (!newFolder) @@ -209,12 +208,12 @@ void CPlugin::EnterToDirectory(const UString &dirName) int CPlugin::SetDirectory(const char *aszDir, int /* opMode */) { UString path = MultiByteToUnicodeString(aszDir, CP_OEMCP); - if (path == WSTRING_PATH_SEPARATOR) + if (path.IsEqualTo(STRING_PATH_SEPARATOR)) { _folder.Release(); m_ArchiveHandler->BindToRootFolder(&_folder); } - else if (path == L"..") + else if (path.IsEqualTo("..")) { CMyComPtr newFolder; _folder->BindToParentFolder(&newFolder); diff --git a/CPP/7zip/UI/Far/makefile b/CPP/7zip/UI/Far/makefile index a66f9d7..7bc166b 100644 --- a/CPP/7zip/UI/Far/makefile +++ b/CPP/7zip/UI/Far/makefile @@ -99,9 +99,9 @@ COMPRESS_OBJS = \ C_OBJS = \ $O\Alloc.obj \ $O\CpuArch.obj \ - $O\Sort.obj \ $O\Threads.obj \ !include "../../Crc.mak" +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/UI/FileManager/FM.cpp b/CPP/7zip/UI/FileManager/FM.cpp index 7310802..b2f4c2b 100644 --- a/CPP/7zip/UI/FileManager/FM.cpp +++ b/CPP/7zip/UI/FileManager/FM.cpp @@ -651,7 +651,7 @@ static int WINAPI WinMain2(int nCmdShow) SplitStringToTwoStrings(commandsString, paramString, tailString); paramString.Trim(); tailString.Trim(); - if (tailString.IsPrefixedBy(L"-t")) + if (tailString.IsPrefixedBy("-t")) g_ArcFormat = tailString.Ptr(2); /* diff --git a/CPP/7zip/UI/FileManager/LangUtils.cpp b/CPP/7zip/UI/FileManager/LangUtils.cpp index 8fcb507..4712192 100644 --- a/CPP/7zip/UI/FileManager/LangUtils.cpp +++ b/CPP/7zip/UI/FileManager/LangUtils.cpp @@ -309,15 +309,13 @@ void ReloadLang() { g_Lang.Clear(); ReadRegLang(g_LangID); - #ifndef _UNICODE - if (g_IsNT) - #endif + if (g_LangID.IsEmpty()) { - if (g_LangID.IsEmpty()) - { +#ifndef _UNICODE + if (g_IsNT) +#endif OpenDefaultLang(); - return; - } + return; } if (g_LangID.Len() > 1 || g_LangID[0] != L'-') { diff --git a/CPP/7zip/UI/FileManager/LinkDialog.cpp b/CPP/7zip/UI/FileManager/LinkDialog.cpp index 0f24761..a92ee4d 100644 --- a/CPP/7zip/UI/FileManager/LinkDialog.cpp +++ b/CPP/7zip/UI/FileManager/LinkDialog.cpp @@ -45,28 +45,24 @@ static bool GetSymLink(CFSTR path, CReparseAttr &attr, UString &errorMessage) CByteBuffer buf; if (!NIO::GetReparseData(path, buf, NULL)) return false; - if (!attr.Parse(buf, buf.Size())) { SetLastError(attr.ErrorCode); return false; } - CByteBuffer data2; - if (!FillLinkData(data2, attr.GetPath(), - !attr.IsMountPoint(), attr.IsSymLink_WSL())) + FillLinkData(data2, attr.GetPath(), + !attr.IsMountPoint(), attr.IsSymLink_WSL()); + if (data2.Size() == 0) { errorMessage = "Cannot reproduce reparse point"; return false; } - - if (data2.Size() != buf.Size() || - memcmp(data2, buf, buf.Size()) != 0) + if (data2 != buf) { errorMessage = "mismatch for reproduced reparse point"; return false; } - return true; } @@ -113,8 +109,8 @@ bool CLinkDialog::OnInit() const bool res = GetSymLink(us2fs(FilePath), attr, error); if (!res && error.IsEmpty()) { - DWORD lastError = GetLastError(); - if (lastError != 0) + const DWORD lastError = GetLastError(); + if (lastError) error = NError::MyFormatMessage(lastError); } @@ -319,10 +315,10 @@ void CLinkDialog::OnButton_Link() return; } - const bool isSymLink = (idb != IDR_LINK_TYPE_JUNCTION); - CByteBuffer data; - if (!FillLinkData(data, to, isSymLink, isWSL)) + const bool isSymLink = (idb != IDR_LINK_TYPE_JUNCTION); + FillLinkData(data, to, isSymLink, isWSL); + if (data.Size() == 0) { ShowError(L"Incorrect link"); return; @@ -386,6 +382,9 @@ void CApp::Link() path = destPanel.GetFsPath(); } + CSelectedState srcSelState; + srcPanel.SaveSelectedState(srcSelState); + CLinkDialog dlg; dlg.CurDirPrefix = fsPrefix; dlg.FilePath = srcPath + itemName; @@ -394,7 +393,10 @@ void CApp::Link() if (dlg.Create(srcPanel.GetParent()) != IDOK) return; - // fix it: we should refresh panel with changed link + // we refresh srcPanel to show changes in "Link" (kpidNtReparse) column. + // maybe we should refresh another panel also? + if (srcPanel._visibleColumns.FindItem_for_PropID(kpidNtReparse) >= 0) + srcPanel.RefreshListCtrl(srcSelState); RefreshTitleAlways(); } diff --git a/CPP/7zip/UI/FileManager/Panel.h b/CPP/7zip/UI/FileManager/Panel.h index 9c53048..9ef0926 100644 --- a/CPP/7zip/UI/FileManager/Panel.h +++ b/CPP/7zip/UI/FileManager/Panel.h @@ -711,8 +711,8 @@ public: } // bool IsFsOrDrivesFolder() const { return IsFSFolder() || IsFSDrivesFolder(); } - bool IsDeviceDrivesPrefix() const { return _currentFolderPrefix == L"\\\\.\\"; } - bool IsSuperDrivesPrefix() const { return _currentFolderPrefix == L"\\\\?\\"; } + bool IsDeviceDrivesPrefix() const { return _currentFolderPrefix.IsEqualTo("\\\\.\\"); } + bool IsSuperDrivesPrefix() const { return _currentFolderPrefix.IsEqualTo("\\\\?\\"); } /* c:\Dir diff --git a/CPP/7zip/UI/FileManager/PanelCopy.cpp b/CPP/7zip/UI/FileManager/PanelCopy.cpp index d4f1db7..f070be9 100644 --- a/CPP/7zip/UI/FileManager/PanelCopy.cpp +++ b/CPP/7zip/UI/FileManager/PanelCopy.cpp @@ -284,7 +284,7 @@ HRESULT CPanel::CopyTo(CCopyToOptions &options, if (options.hashMethods.Size() == 1) { const UString &s = options.hashMethods[0]; - if (s != L"*") + if (!s.IsEqualTo("*")) title = s; } } diff --git a/CPP/7zip/UI/FileManager/PanelFolderChange.cpp b/CPP/7zip/UI/FileManager/PanelFolderChange.cpp index c34cb74..b0fb53e 100644 --- a/CPP/7zip/UI/FileManager/PanelFolderChange.cpp +++ b/CPP/7zip/UI/FileManager/PanelFolderChange.cpp @@ -428,7 +428,7 @@ void CPanel::LoadFullPathAndShow() UString name_Computer = RootFolder_GetName_Computer(iconIndex); name_Computer.Add_PathSepar(); if (path == name_Computer - || path == L"\\\\?\\") + || path.IsEqualTo("\\\\?\\")) item.iImage = iconIndex; else { @@ -639,7 +639,7 @@ bool CPanel::OnComboBoxCommand(UINT code, LPARAM /* param */, LRESULT &result) unsigned indent = 0; { UString path = _currentFolderPrefix; - // path = L"\\\\.\\y:\\"; // for debug + // path = "\\\\.\\y:\\"; // for debug UString prefix0; if (path.IsPrefixedBy_Ascii_NoCase("\\\\")) { @@ -702,7 +702,7 @@ bool CPanel::OnComboBoxCommand(UINT code, LPARAM /* param */, LRESULT &result) int iconIndex_Computer; const UString name_Computer = RootFolder_GetName_Computer(iconIndex_Computer); - // const bool is_devicePrefix = (sumPath == L"\\\\.\\"); + // const bool is_devicePrefix = (sumPath.IsEqualTo("\\\\.\\")); if (pathParts.Size() > 1) if (!sumPath.IsEmpty() @@ -901,8 +901,8 @@ UString CPanel::GetParentDirPrefix() const { s = _currentFolderPrefix; s.DeleteBack(); - if (s != L"\\\\." && - s != L"\\\\?") + if (!s.IsEqualTo("\\\\.") && + !s.IsEqualTo("\\\\?")) { int pos = s.ReverseFind_PathSepar(); if (pos >= 0) @@ -935,8 +935,8 @@ void CPanel::OpenParentFolder() } else */ - if (focusedName != L"\\\\." && - focusedName != L"\\\\?") + if (!focusedName.IsEqualTo("\\\\.") && + !focusedName.IsEqualTo("\\\\?")) { const int pos = focusedName.ReverseFind_PathSepar(); if (pos >= 0) diff --git a/CPP/7zip/UI/FileManager/PanelOperations.cpp b/CPP/7zip/UI/FileManager/PanelOperations.cpp index 8b16224..427464b 100644 --- a/CPP/7zip/UI/FileManager/PanelOperations.cpp +++ b/CPP/7zip/UI/FileManager/PanelOperations.cpp @@ -275,8 +275,8 @@ static bool IsCorrectFsName(const UString &name) { const UString lastPart = name.Ptr((unsigned)(name.ReverseFind_PathSepar() + 1)); return - lastPart != L"." && - lastPart != L".."; + !lastPart.IsEqualTo(".") && + !lastPart.IsEqualTo(".."); } bool CorrectFsPath(const UString &relBase, const UString &path, UString &result); diff --git a/CPP/7zip/UI/FileManager/RootFolder.cpp b/CPP/7zip/UI/FileManager/RootFolder.cpp index 192f660..b512f3b 100644 --- a/CPP/7zip/UI/FileManager/RootFolder.cpp +++ b/CPP/7zip/UI/FileManager/RootFolder.cpp @@ -249,7 +249,7 @@ Z7_COM7F_IMF(CRootFolder::BindToFolder(const wchar_t *name, IFolderFolder **resu AreEqualNames(name2, L"Documents")) return BindToFolder((UInt32)ROOT_INDEX_DOCUMENTS, resultFolder); #else - if (name2 == WSTRING_PATH_SEPARATOR) + if (name2.IsEqualTo(STRING_PATH_SEPARATOR)) return BindToFolder((UInt32)ROOT_INDEX_COMPUTER, resultFolder); #endif @@ -257,7 +257,7 @@ Z7_COM7F_IMF(CRootFolder::BindToFolder(const wchar_t *name, IFolderFolder **resu AreEqualNames(name2, L"Computer")) return BindToFolder((UInt32)ROOT_INDEX_COMPUTER, resultFolder); - if (name2 == WSTRING_PATH_SEPARATOR) + if (name2.IsEqualTo(STRING_PATH_SEPARATOR)) { CMyComPtr subFolder = this; *resultFolder = subFolder.Detach(); diff --git a/CPP/7zip/UI/FileManager/makefile b/CPP/7zip/UI/FileManager/makefile index 0ca5caa..24dc4ca 100644 --- a/CPP/7zip/UI/FileManager/makefile +++ b/CPP/7zip/UI/FileManager/makefile @@ -104,7 +104,7 @@ AR_COMMON_OBJS = \ C_OBJS = $(C_OBJS) \ $O\Alloc.obj \ $O\CpuArch.obj \ - $O\Sort.obj \ $O\Threads.obj \ +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp index ce5473a..1686c69 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp +++ b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp @@ -1856,7 +1856,7 @@ HRESULT Benchmark( const CProperty &prop = props[i]; UString name = prop.Name; name.MakeLower_Ascii(); - if (name.IsEqualTo_Ascii_NoCase("m") && prop.Value == L"*") + if (name.IsEqualTo_Ascii_NoCase("m") && prop.Value.IsEqualTo("*")) { bd.TotalMode = true; continue; @@ -1865,7 +1865,7 @@ HRESULT Benchmark( NCOM::CPropVariant propVariant; if (!prop.Value.IsEmpty()) ParseNumberString(prop.Value, propVariant); - if (name.IsPrefixedBy(L"mt")) + if (name.IsPrefixedBy("mt")) { #ifndef Z7_ST RINOK(ParseMtProp(name.Ptr(2), propVariant, numCPUs, numThreads)) diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.rc b/CPP/7zip/UI/GUI/BenchmarkDialog.rc index 3e73e46..5df7ff2 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialog.rc +++ b/CPP/7zip/UI/GUI/BenchmarkDialog.rc @@ -81,7 +81,7 @@ BEGIN LTEXT "&Number of CPU threads:", IDT_BENCH_NUM_THREADS, m, 30, g0xs, 8 COMBOBOX IDC_BENCH_NUM_THREADS, g1x, 29, g1xs, 140, MY_COMBO - LTEXT "", IDT_BENCH_HARDWARE_THREADS, gc2x, 30, g7xs, MY_TEXT_NOPREFIX + LTEXT "", IDT_BENCH_HARDWARE_THREADS, gc2x, 30, g7xs, 24, SS_NOPREFIX RTEXT "Size", IDT_BENCH_SIZE, xSize, 54, sSize, MY_TEXT_NOPREFIX RTEXT "CPU Usage", IDT_BENCH_USAGE_LABEL, xUsage, 54, sUsage, MY_TEXT_NOPREFIX diff --git a/CPP/7zip/UI/GUI/CompressDialog.cpp b/CPP/7zip/UI/GUI/CompressDialog.cpp index 58f863e..85d7186 100644 --- a/CPP/7zip/UI/GUI/CompressDialog.cpp +++ b/CPP/7zip/UI/GUI/CompressDialog.cpp @@ -2600,11 +2600,17 @@ void CCompressDialog::SetNumThreads2() UInt32 numAlgoThreadsMax = numHardwareThreads * 2; const int methodID = GetMethodID(); - switch (methodID) + const bool isZip = IsZipFormat(); + if (isZip) + numAlgoThreadsMax = + 8 << (sizeof(size_t) / 2); // 32 threads for 32-bit : 128 threads for 64-bit + else if (IsXzFormat()) + numAlgoThreadsMax = 256 * 2; + else switch (methodID) { case kLZMA: numAlgoThreadsMax = 2; break; case kLZMA2: numAlgoThreadsMax = 256; break; - case kBZip2: numAlgoThreadsMax = 32; break; + case kBZip2: numAlgoThreadsMax = 64; break; // case kZSTD: numAlgoThreadsMax = num_ZSTD_threads_MAX; break; case kCopy: case kPPMd: @@ -2613,17 +2619,6 @@ void CCompressDialog::SetNumThreads2() case kPPMdZip: numAlgoThreadsMax = 1; } - const bool isZip = IsZipFormat(); - if (isZip) - { - numAlgoThreadsMax = - #ifdef _WIN32 - 64; // _WIN32 supports only 64 threads in one group. So no need for more threads here - #else - 128; - #endif - } - UInt32 autoThreads = numHardwareThreads; if (autoThreads > numAlgoThreadsMax) autoThreads = numAlgoThreadsMax; @@ -3008,7 +3003,7 @@ UInt64 CCompressDialog::GetMemoryUsage_Threads_Dict_DecompMem(UInt32 numThreads, else { size += numBlockThreads * (size1 + chunkSize); - UInt32 numPackChunks = numBlockThreads + (numBlockThreads / 8) + 1; + const UInt32 numPackChunks = numBlockThreads + (numBlockThreads / 8) + 1; if (chunkSize < ((UInt32)1 << 26)) numBlockThreads++; if (chunkSize < ((UInt32)1 << 24)) numBlockThreads++; if (chunkSize < ((UInt32)1 << 22)) numBlockThreads++; diff --git a/CPP/7zip/UI/GUI/makefile b/CPP/7zip/UI/GUI/makefile index 22ae095..b879a5d 100644 --- a/CPP/7zip/UI/GUI/makefile +++ b/CPP/7zip/UI/GUI/makefile @@ -141,10 +141,9 @@ C_OBJS = \ $O\Alloc.obj \ $O\CpuArch.obj \ $O\DllSecur.obj \ - $O\Sort.obj \ $O\Threads.obj \ !include "../../Crc.mak" - +!include "../../Sort.mak" !include "../../7zip.mak" diff --git a/CPP/Build.mak b/CPP/Build.mak index afb7ae8..86cc2af 100644 --- a/CPP/Build.mak +++ b/CPP/Build.mak @@ -111,7 +111,13 @@ CFLAGS = $(CFLAGS) -Zc:forScope !IFNDEF UNDER_CE !IF "$(CC)" != "clang-cl" -CFLAGS = $(CFLAGS) -MP4 +MP_NPROC = 16 +!IFDEF NUMBER_OF_PROCESSORS +!IF $(NUMBER_OF_PROCESSORS) < $(MP_NPROC) +MP_NPROC = $(NUMBER_OF_PROCESSORS) +!ENDIF +!ENDIF +CFLAGS = $(CFLAGS) -MP$(MP_NPROC) !ENDIF !IFNDEF PLATFORM # CFLAGS = $(CFLAGS) -arch:IA32 diff --git a/CPP/Common/MyString.cpp b/CPP/Common/MyString.cpp index b5f7e52..10e2331 100644 --- a/CPP/Common/MyString.cpp +++ b/CPP/Common/MyString.cpp @@ -208,35 +208,6 @@ bool StringsAreEqualNoCase(const wchar_t *s1, const wchar_t *s2) throw() // ---------- ASCII ---------- -bool AString::IsPrefixedBy_Ascii_NoCase(const char *s) const throw() -{ - const char *s1 = _chars; - for (;;) - { - const char c2 = *s++; - if (c2 == 0) - return true; - const char c1 = *s1++; - if (MyCharLower_Ascii(c1) != - MyCharLower_Ascii(c2)) - return false; - } -} - -bool UString::IsPrefixedBy_Ascii_NoCase(const char *s) const throw() -{ - const wchar_t *s1 = _chars; - for (;;) - { - const char c2 = *s++; - if (c2 == 0) - return true; - const wchar_t c1 = *s1++; - if (MyCharLower_Ascii(c1) != (unsigned char)MyCharLower_Ascii(c2)) - return false; - } -} - bool StringsAreEqual_Ascii(const char *u, const char *a) throw() { for (;;) diff --git a/CPP/Common/MyString.h b/CPP/Common/MyString.h index ba9914e..639b874 100644 --- a/CPP/Common/MyString.h +++ b/CPP/Common/MyString.h @@ -429,11 +429,11 @@ public: // int CompareNoCase(const char *s) const { return MyStringCompareNoCase(_chars, s); } // int CompareNoCase(const AString &s) const { return MyStringCompareNoCase(_chars, s._chars); } bool IsPrefixedBy(const char *s) const { return IsString1PrefixedByString2(_chars, s); } - bool IsPrefixedBy_Ascii_NoCase(const char *s) const throw(); + bool IsPrefixedBy_Ascii_NoCase(const char *s) const { return IsString1PrefixedByString2_NoCase_Ascii(_chars, s); } bool IsAscii() const { - unsigned len = Len(); + const unsigned len = Len(); const char *s = _chars; for (unsigned i = 0; i < len; i++) if ((unsigned char)s[i] >= 0x80) @@ -727,22 +727,23 @@ public: // int CompareNoCase(const wchar_t *s) const { return MyStringCompareNoCase(_chars, s); } // int CompareNoCase(const UString &s) const { return MyStringCompareNoCase(_chars, s._chars); } bool IsPrefixedBy(const wchar_t *s) const { return IsString1PrefixedByString2(_chars, s); } + bool IsPrefixedBy(const char *s) const { return IsString1PrefixedByString2(_chars, s); } bool IsPrefixedBy_NoCase(const wchar_t *s) const { return IsString1PrefixedByString2_NoCase(_chars, s); } - bool IsPrefixedBy_Ascii_NoCase(const char *s) const throw(); + bool IsPrefixedBy_Ascii_NoCase(const char *s) const { return IsString1PrefixedByString2_NoCase_Ascii(_chars, s); } bool IsAscii() const { - unsigned len = Len(); + const unsigned len = Len(); const wchar_t *s = _chars; for (unsigned i = 0; i < len; i++) - if (s[i] >= 0x80) + if ((unsigned)(int)s[i] >= 0x80) return false; return true; } int Find(wchar_t c) const { return FindCharPosInString(_chars, c); } int Find(wchar_t c, unsigned startIndex) const { - int pos = FindCharPosInString(_chars + startIndex, c); + const int pos = FindCharPosInString(_chars + startIndex, c); return pos < 0 ? -1 : (int)startIndex + pos; } diff --git a/CPP/Common/MyXml.cpp b/CPP/Common/MyXml.cpp index cc891fc..8364aae 100644 --- a/CPP/Common/MyXml.cpp +++ b/CPP/Common/MyXml.cpp @@ -24,7 +24,7 @@ static bool IsSpaceChar(char c) int CXmlItem::FindProp(const char *propName) const throw() { FOR_VECTOR (i, Props) - if (Props[i].Name == propName) + if (Props[i].Name.IsEqualTo(propName)) return (int)i; return -1; } @@ -39,7 +39,7 @@ AString CXmlItem::GetPropVal(const char *propName) const bool CXmlItem::IsTagged(const char *tag) const throw() { - return (IsTag && Name == tag); + return (IsTag && Name.IsEqualTo(tag)); } int CXmlItem::FindSubTag(const char *tag) const throw() diff --git a/CPP/Common/Sha3Reg.cpp b/CPP/Common/Sha3Reg.cpp index 95db25e..cd2e288 100644 --- a/CPP/Common/Sha3Reg.cpp +++ b/CPP/Common/Sha3Reg.cpp @@ -58,7 +58,7 @@ Z7_COM7F_IMF2(UInt32, CSha3Hasher::GetDigestSize()) static IHasher *CreateHasherSpec() \ { return new CSha3Hasher(digestSize / 8, isShake, \ SHA3_BLOCK_SIZE_FROM_DIGEST_SIZE(digestSize_for_blockSize / 8)); } \ - static const CHasherInfo g_HasherInfo = { CreateHasherSpec, id, name, digestSize }; \ + static const CHasherInfo g_HasherInfo = { CreateHasherSpec, id, name, digestSize / 8 }; \ struct REGISTER_HASHER_NAME(cls) { REGISTER_HASHER_NAME(cls)() { RegisterHasher(&g_HasherInfo); }}; \ static REGISTER_HASHER_NAME(cls) g_RegisterHasher; } diff --git a/CPP/Common/Wildcard.cpp b/CPP/Common/Wildcard.cpp index 798cbd9..b561a89 100644 --- a/CPP/Common/Wildcard.cpp +++ b/CPP/Common/Wildcard.cpp @@ -255,7 +255,8 @@ ForDir nonrec [0, M) same as ForBoth-File bool CItem::AreAllAllowed() const { - return ForFile && ForDir && WildcardMatching && PathParts.Size() == 1 && PathParts.Front() == L"*"; + return ForFile && ForDir && WildcardMatching + && PathParts.Size() == 1 && PathParts.Front().IsEqualTo("*"); } bool CItem::CheckPath(const UStringVector &pathParts, bool isFile) const @@ -542,7 +543,7 @@ unsigned GetNumPrefixParts_if_DrivePath(UStringVector &pathParts) { if (pathParts.Size() < 4 || !pathParts[1].IsEmpty() - || pathParts[2] != L"?") + || !pathParts[2].IsEqualTo("?")) return 0; testIndex = 3; } @@ -574,11 +575,11 @@ static unsigned GetNumPrefixParts(const UStringVector &pathParts) return 1; if (pathParts.Size() == 2) return 2; - if (pathParts[2] == L".") + if (pathParts[2].IsEqualTo(".")) return 3; unsigned networkParts = 2; - if (pathParts[2] == L"?") + if (pathParts[2].IsEqualTo("?")) { if (pathParts.Size() == 3) return 3; @@ -642,7 +643,7 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat if (pathParts.Size() >= 3 && pathParts[0].IsEmpty() && pathParts[1].IsEmpty() - && pathParts[2] == L"?") + && pathParts[2].IsEqualTo("?")) ignoreWildcardIndex = 2; // #endif @@ -665,7 +666,7 @@ void CCensor::AddItem(ECensorPathMode pathMode, bool include, const UString &pat for (unsigned i = numPrefixParts; i < pathParts.Size(); i++) { const UString &part = pathParts[i]; - if (part == L".." || part == L".") + if (part.IsEqualTo("..") || part.IsEqualTo(".")) dotsIndex = (int)i; } diff --git a/CPP/Windows/FileDir.cpp b/CPP/Windows/FileDir.cpp index 2cb83b2..10c4e98 100644 --- a/CPP/Windows/FileDir.cpp +++ b/CPP/Windows/FileDir.cpp @@ -651,6 +651,35 @@ bool RemoveDirWithSubItems(const FString &path) return RemoveDir(path); } +bool RemoveDirAlways_if_Empty(const FString &path) +{ + const DWORD attrib = NFind::GetFileAttrib(path); + if (attrib != INVALID_FILE_ATTRIBUTES + && (attrib & FILE_ATTRIBUTE_READONLY)) + { + bool need_ClearAttrib = true; + if ((attrib & FILE_ATTRIBUTE_REPARSE_POINT) == 0) + { + FString s (path); + s.Add_PathSepar(); + NFind::CEnumerator enumerator; + enumerator.SetDirPrefix(s); + NFind::CDirEntry fi; + if (enumerator.Next(fi)) + { + // we don't want to change attributes, if there are files + // in directory, because RemoveDir(path) will fail. + need_ClearAttrib = false; + // SetLastError(ERROR_DIR_NOT_EMPTY); + // return false; + } + } + if (need_ClearAttrib) + SetFileAttrib(path, 0); // we clear read-only attrib to remove read-only dir + } + return RemoveDir(path); +} + #endif // _WIN32 #ifdef UNDER_CE diff --git a/CPP/Windows/FileDir.h b/CPP/Windows/FileDir.h index 74675ee..65e6368 100644 --- a/CPP/Windows/FileDir.h +++ b/CPP/Windows/FileDir.h @@ -78,6 +78,11 @@ bool CreateComplexDir(CFSTR path); bool DeleteFileAlways(CFSTR name); bool RemoveDirWithSubItems(const FString &path); +#ifdef _WIN32 +bool RemoveDirAlways_if_Empty(const FString &path); +#else +#define RemoveDirAlways_if_Empty RemoveDir +#endif bool MyGetFullPathName(CFSTR path, FString &resFullPath); bool GetFullPathAndSplit(CFSTR path, FString &resDirPrefix, FString &resFileName); diff --git a/CPP/Windows/FileFind.cpp b/CPP/Windows/FileFind.cpp index ca387f6..64075ab 100644 --- a/CPP/Windows/FileFind.cpp +++ b/CPP/Windows/FileFind.cpp @@ -731,7 +731,7 @@ bool CFileInfo::Find(CFSTR path, bool followLink) bool isOK = false; if (finder.FindFirst(s, *this)) { - if (Name == FTEXT(".")) + if (Name.IsEqualTo(".")) { Name = path + prefixSize; return true; @@ -769,6 +769,13 @@ bool CFileInfo::Find(CFSTR path, bool followLink) // return FollowReparse(path, IsDir()); return Fill_From_ByHandleFileInfo(path); +/* + // Fill_From_ByHandleFileInfo returns false (with Access Denied error), + // if there is reparse link file (not directory reparse item). + if (Fill_From_ByHandleFileInfo(path)) + return true; + return HasReparsePoint(); +*/ } bool CFileInfoBase::Fill_From_ByHandleFileInfo(CFSTR path) diff --git a/CPP/Windows/FileIO.h b/CPP/Windows/FileIO.h index 6ba40eb..26edef4 100644 --- a/CPP/Windows/FileIO.h +++ b/CPP/Windows/FileIO.h @@ -11,8 +11,7 @@ #define Z7_WIN_SYMLINK_FLAG_RELATIVE 1 -// what the meaning of that FLAG or field (2)? -#define Z7_WIN_LX_SYMLINK_FLAG 2 +#define Z7_WIN_LX_SYMLINK_VERSION_2 2 #ifdef _WIN32 @@ -44,7 +43,33 @@ namespace NWindows { namespace NFile { #if defined(_WIN32) && !defined(UNDER_CE) -bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool isWSL); +/* + in: (CByteBuffer &dest) is empty + in: (path) uses Windows path separator (\). + out: (path) uses Linux path separator (/). + if (isAbsPath == true), then "c:\\" prefix is replaced to "/mnt/c/" prefix +*/ +void Convert_WinPath_to_WslLinuxPath(FString &path, bool convertDrivePath); +// (path) must use Linux path separator (/). +void FillLinkData_WslLink(CByteBuffer &dest, const wchar_t *path); + +/* + in: (CByteBuffer &dest) is empty + if (isSymLink == false) : MOUNT_POINT : (path) must be absolute. + if (isSymLink == true) : SYMLINK : Windows + (path) must use Windows path separator (\). + (path) must be without link "\\??\\" prefix. + link "\\??\\" prefix will be added inside FillLinkData(), if path is absolute. +*/ +void FillLinkData_WinLink(CByteBuffer &dest, const wchar_t *path, bool isSymLink); +// in: (CByteBuffer &dest) is empty +inline void FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool isWSL) +{ + if (isWSL) + FillLinkData_WslLink(dest, path); + else + FillLinkData_WinLink(dest, path, isSymLink); +} #endif struct CReparseShortInfo @@ -61,7 +86,6 @@ struct CReparseAttr UInt32 Flags; UString SubsName; UString PrintName; - AString WslName; bool HeaderError; @@ -71,8 +95,7 @@ struct CReparseAttr CReparseAttr(): Tag(0), Flags(0) {} - // Parse() - // returns (true) and (ErrorCode = 0), if (it'a correct known link) + // returns (true) and (ErrorCode = 0), if (it's correct known link) // returns (false) and (ErrorCode = ERROR_REPARSE_TAG_INVALID), if unknown tag bool Parse(const Byte *p, size_t size); @@ -80,18 +103,14 @@ struct CReparseAttr bool IsSymLink_Win() const { return Tag == Z7_WIN_IO_REPARSE_TAG_SYMLINK; } bool IsSymLink_WSL() const { return Tag == Z7_WIN_IO_REPARSE_TAG_LX_SYMLINK; } + // note: "/dir1/path" is marked as relative. bool IsRelative_Win() const { return Flags == Z7_WIN_SYMLINK_FLAG_RELATIVE; } bool IsRelative_WSL() const { - if (WslName.IsEmpty()) - return true; - char c = WslName[0]; - return !IS_PATH_SEPAR(c); + return WslName[0] != '/'; // WSL uses unix path separator } - // bool IsVolume() const; - bool IsOkNamePair() const; UString GetPath() const; }; diff --git a/CPP/Windows/FileLink.cpp b/CPP/Windows/FileLink.cpp index bb380ec..2883c82 100644 --- a/CPP/Windows/FileLink.cpp +++ b/CPP/Windows/FileLink.cpp @@ -38,13 +38,25 @@ namespace NFile { using namespace NName; +/* +Win10 Junctions/SymLinks: + - (/) slash doesn't work as path separator + - Win10 preinstalled junctions don't use tail backslash, but tail backslashes also work. + - double backslash works only after drive prefix "c:\\dir1\dir2\", + and doesn't work in another places. + - absolute path without \??\ prefix doesn't work + - absolute path "c:" doesn't work +*/ + /* Reparse Points (Junctions and Symbolic Links): struct { UInt32 Tag; UInt16 Size; // not including starting 8 bytes - UInt16 Reserved; // = 0 + UInt16 Reserved; // = 0, DOCs: // Length, in bytes, of the unparsed portion of + // the file name pointed to by the FileName member of the associated file object. + // This member is only valid for create operations when the I/O fails with STATUS_REPARSE. UInt16 SubstituteOffset; // offset in bytes from start of namesChars UInt16 SubstituteLen; // size in bytes, it doesn't include tailed NUL @@ -68,6 +80,16 @@ using namespace NName; 2) Default Order in table: Print Path Substitute Path + +DOCS: + The print name SHOULD be an informative pathname, suitable for display + to a user, that also identifies the target of the mount point. + Neither of these pathnames can contain dot directory names. + +reparse tags, with the exception of IO_REPARSE_TAG_SYMLINK, +are processed on the server and are not processed by a client +after transmission over the wire. +Clients SHOULD treat associated reparse data as opaque data. */ /* @@ -93,7 +115,8 @@ static const UInt32 kReparseFlags_Microsoft = ((UInt32)1 << 31); #define Get16(p) GetUi16(p) #define Get32(p) GetUi32(p) -static const wchar_t * const k_LinkPrefix = L"\\??\\"; +static const char * const k_LinkPrefix = "\\??\\"; +static const char * const k_LinkPrefix_UNC = "\\??\\UNC\\"; static const unsigned k_LinkPrefix_Size = 4; static bool IsLinkPrefix(const wchar_t *s) @@ -102,7 +125,7 @@ static bool IsLinkPrefix(const wchar_t *s) } /* -static const wchar_t * const k_VolumePrefix = L"Volume{"; +static const char * const k_VolumePrefix = "Volume{"; static const bool IsVolumeName(const wchar_t *s) { return IsString1PrefixedByString2(s, k_VolumePrefix); @@ -118,7 +141,7 @@ static void WriteString(Byte *dest, const wchar_t *path) { for (;;) { - wchar_t c = *path++; + const wchar_t c = *path++; if (c == 0) return; Set16(dest, (UInt16)c) @@ -126,62 +149,103 @@ static void WriteString(Byte *dest, const wchar_t *path) } } -bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool isWSL) +#ifdef _WIN32 +void Convert_WinPath_to_WslLinuxPath(FString &s, bool convertDrivePath) { - bool isAbs = IsAbsolutePath(path); - if (!isAbs && !isSymLink) - return false; - - if (isWSL) + if (convertDrivePath && IsDrivePath(s)) { - // unsupported characters probably use Replacement Character UTF-16 0xFFFD - AString utf; - ConvertUnicodeToUTF8(path, utf); - const size_t size = 4 + utf.Len(); - if (size != (UInt16)size) - return false; - dest.Alloc(8 + size); - Byte *p = dest; - Set32(p, Z7_WIN_IO_REPARSE_TAG_LX_SYMLINK) - Set16(p + 4, (UInt16)(size)) - Set16(p + 6, 0) - Set32(p + 8, Z7_WIN_LX_SYMLINK_FLAG) - memcpy(p + 12, utf.Ptr(), utf.Len()); - return true; + FChar c = s[0]; + c = MyCharLower_Ascii(c); + s.DeleteFrontal(2); + s.InsertAtFront(c); + s.Insert(0, FTEXT("/mnt/")); } + s.Replace(FCHAR_PATH_SEPARATOR, FTEXT('/')); +} +#endif - // usual symbolic LINK (NOT WSL) + +static const unsigned k_Link_Size_Limit = 1u << 16; // 16-bit field is used for size. + +void FillLinkData_WslLink(CByteBuffer &dest, const wchar_t *path) +{ + // dest.Free(); // it's empty already + // WSL probably uses Replacement Character UTF-16 0xFFFD for unsupported characters? + AString utf; + ConvertUnicodeToUTF8(path, utf); + const unsigned size = 4 + utf.Len(); + if (size >= k_Link_Size_Limit) + return; + dest.Alloc(8 + size); + Byte *p = dest; + Set32(p, Z7_WIN_IO_REPARSE_TAG_LX_SYMLINK) + // Set32(p + 4, (UInt32)size) + Set16(p + 4, (UInt16)size) + Set16(p + 6, 0) + Set32(p + 8, Z7_WIN_LX_SYMLINK_VERSION_2) + memcpy(p + 12, utf.Ptr(), utf.Len()); +} + + +void FillLinkData_WinLink(CByteBuffer &dest, const wchar_t *path, bool isSymLink) +{ + // dest.Free(); // it's empty already + bool isAbs = false; + if (IS_PATH_SEPAR(path[0])) + { + // root paths "\dir1\path" are marked as relative + if (IS_PATH_SEPAR(path[1])) + isAbs = true; + } + else + isAbs = IsAbsolutePath(path); + if (!isAbs && !isSymLink) + { + // Win10 allows us to create relative MOUNT_POINT. + // But relative MOUNT_POINT will not work when accessing it. + // So we prevent useless creation of a relative MOUNT_POINT. + return; + } bool needPrintName = true; - - if (IsSuperPath(path)) + UString subs (path); + if (isAbs) { - path += kSuperPathPrefixSize; - if (!IsDrivePath(path)) - needPrintName = false; + const bool isSuperPath = IsSuperPath(path); + if (!isSuperPath && NName::IsNetworkPath(us2fs(path))) + { + subs = k_LinkPrefix_UNC; + subs += (path + 2); + } + else + { + if (isSuperPath) + { + // we remove super prefix: + path += kSuperPathPrefixSize; + // we want to get correct abolute path in PrintName still. + if (!IsDrivePath(path)) + needPrintName = false; // we need "\\server\path" for print name. + } + subs = k_LinkPrefix; + subs += path; + } } - - const unsigned add_Prefix_Len = isAbs ? k_LinkPrefix_Size : 0; - + const size_t len1 = subs.Len() * 2; size_t len2 = (size_t)MyStringLen(path) * 2; - const size_t len1 = len2 + add_Prefix_Len * 2; if (!needPrintName) len2 = 0; - - size_t totalNamesSize = (len1 + len2); - + size_t totalNamesSize = len1 + len2; /* some WIM imagex software uses old scheme for symbolic links. - so we can old scheme for byte to byte compatibility */ - - bool newOrderScheme = isSymLink; + so we can use old scheme for byte to byte compatibility */ + const bool newOrderScheme = isSymLink; // newOrderScheme = false; - if (!newOrderScheme) - totalNamesSize += 2 * 2; + totalNamesSize += 2 * 2; // we use NULL terminators in old scheme. const size_t size = 8 + 8 + (isSymLink ? 4 : 0) + totalNamesSize; - if (size != (UInt16)size) - return false; + if (size >= k_Link_Size_Limit) + return; dest.Alloc(size); memset(dest, 0, size); const UInt32 tag = isSymLink ? @@ -189,6 +253,7 @@ bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool i Z7_WIN_IO_REPARSE_TAG_MOUNT_POINT; Byte *p = dest; Set32(p, tag) + // Set32(p + 4, (UInt32)(size - 8)) Set16(p + 4, (UInt16)(size - 8)) Set16(p + 6, 0) p += 8; @@ -204,21 +269,16 @@ bool FillLinkData(CByteBuffer &dest, const wchar_t *path, bool isSymLink, bool i Set16(p + 2, (UInt16)len1) Set16(p + 4, (UInt16)printOffs) Set16(p + 6, (UInt16)len2) - p += 8; if (isSymLink) { - UInt32 flags = isAbs ? 0 : Z7_WIN_SYMLINK_FLAG_RELATIVE; + const UInt32 flags = isAbs ? 0 : Z7_WIN_SYMLINK_FLAG_RELATIVE; Set32(p, flags) p += 4; } - - if (add_Prefix_Len != 0) - WriteString(p + subOffs, k_LinkPrefix); - WriteString(p + subOffs + add_Prefix_Len * 2, path); + WriteString(p + subOffs, subs); if (needPrintName) WriteString(p + printOffs, path); - return true; } #endif // defined(_WIN32) && !defined(UNDER_CE) @@ -230,7 +290,7 @@ static void GetString(const Byte *p, unsigned len, UString &res) unsigned i; for (i = 0; i < len; i++) { - wchar_t c = Get16(p + i * 2); + const wchar_t c = Get16(p + (size_t)i * 2); if (c == 0) break; s[i] = c; @@ -239,6 +299,7 @@ static void GetString(const Byte *p, unsigned len, UString &res) res.ReleaseBuf_SetLen(i); } + bool CReparseAttr::Parse(const Byte *p, size_t size) { ErrorCode = (DWORD)ERROR_INVALID_REPARSE_DATA; @@ -250,7 +311,12 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) return false; Tag = Get32(p); if (Get16(p + 6) != 0) // padding - return false; + { + // DOCs: Reserved : the field SHOULD be set to 0 + // and MUST be ignored (by parser). + // Win10 ignores it. + MinorError = true; // optional + } unsigned len = Get16(p + 4); p += 8; size -= 8; @@ -262,8 +328,6 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) (type & kReparseFlags_Microsoft) == 0 || (type & 0xFFFF) != 3) */ - - HeaderError = false; if ( Tag != Z7_WIN_IO_REPARSE_TAG_MOUNT_POINT @@ -282,8 +346,7 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) { if (len < 4) return false; - Flags = Get32(p); // maybe it's not Flags - if (Flags != Z7_WIN_LX_SYMLINK_FLAG) + if (Get32(p) != Z7_WIN_LX_SYMLINK_VERSION_2) return false; len -= 4; p += 4; @@ -291,12 +354,13 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) unsigned i; for (i = 0; i < len; i++) { - char c = (char)p[i]; + const char c = (char)p[i]; s[i] = c; if (c == 0) break; } - WslName.ReleaseBuf_SetEnd(i); + s[i] = 0; + WslName.ReleaseBuf_SetLen(i); MinorError = (i != len); ErrorCode = 0; return true; @@ -304,10 +368,10 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) if (len < 8) return false; - unsigned subOffs = Get16(p); - unsigned subLen = Get16(p + 2); - unsigned printOffs = Get16(p + 4); - unsigned printLen = Get16(p + 6); + const unsigned subOffs = Get16(p); + const unsigned subLen = Get16(p + 2); + const unsigned printOffs = Get16(p + 4); + const unsigned printLen = Get16(p + 6); len -= 8; p += 8; @@ -335,15 +399,17 @@ bool CReparseAttr::Parse(const Byte *p, size_t size) bool CReparseShortInfo::Parse(const Byte *p, size_t size) { - const Byte *start = p; - Offset= 0; + const Byte * const start = p; + Offset = 0; Size = 0; if (size < 8) return false; - UInt32 Tag = Get32(p); + const UInt32 Tag = Get32(p); UInt32 len = Get16(p + 4); + /* if (len + 8 > size) return false; + */ /* if ((type & kReparseFlags_Alias) == 0 || (type & kReparseFlags_Microsoft) == 0 || @@ -353,16 +419,14 @@ bool CReparseShortInfo::Parse(const Byte *p, size_t size) Tag != Z7_WIN_IO_REPARSE_TAG_SYMLINK) // return true; return false; - + /* if (Get16(p + 6) != 0) // padding return false; - + */ p += 8; size -= 8; - if (len != size) // do we need that check? return false; - if (len < 8) return false; unsigned subOffs = Get16(p); @@ -396,10 +460,14 @@ bool CReparseAttr::IsOkNamePair() const { if (IsLinkPrefix(SubsName)) { + if (PrintName == GetPath()) + return true; +/* if (!IsDrivePath(SubsName.Ptr(k_LinkPrefix_Size))) return PrintName.IsEmpty(); if (wcscmp(SubsName.Ptr(k_LinkPrefix_Size), PrintName) == 0) return true; +*/ } return wcscmp(SubsName, PrintName) == 0; } @@ -415,21 +483,26 @@ bool CReparseAttr::IsVolume() const UString CReparseAttr::GetPath() const { + UString s (SubsName); if (IsSymLink_WSL()) { - UString u; // if (CheckUTF8(attr.WslName) - if (!ConvertUTF8ToUnicode(WslName, u)) - MultiByteToUnicodeString2(u, WslName); - return u; + if (!ConvertUTF8ToUnicode(WslName, s)) + MultiByteToUnicodeString2(s, WslName); } - - UString s (SubsName); - if (IsLinkPrefix(s)) + else if (IsLinkPrefix(s)) { - s.ReplaceOneCharAtPos(1, '\\'); // we normalize prefix from "\??\" to "\\?\" - if (IsDrivePath(s.Ptr(k_LinkPrefix_Size))) - s.DeleteFrontal(k_LinkPrefix_Size); + if (IsString1PrefixedByString2_NoCase_Ascii(s.Ptr(), k_LinkPrefix_UNC)) + { + s.DeleteFrontal(6); + s.ReplaceOneCharAtPos(0, '\\'); + } + else + { + s.ReplaceOneCharAtPos(1, '\\'); // we normalize prefix from "\??\" to "\\?\" + if (IsDrivePath(s.Ptr(k_LinkPrefix_Size))) + s.DeleteFrontal(k_LinkPrefix_Size); + } } return s; } @@ -468,7 +541,7 @@ bool GetReparseData(CFSTR path, CByteBuffer &reparseData, BY_HANDLE_FILE_INFORMA static bool CreatePrefixDirOfFile(CFSTR path) { FString path2 (path); - int pos = path2.ReverseFind_PathSepar(); + const int pos = path2.ReverseFind_PathSepar(); if (pos < 0) return true; #ifdef _WIN32 @@ -494,6 +567,8 @@ static bool OutIoReparseData(DWORD controlCode, CFSTR path, void *data, DWORD si } +// MOUNT_POINT (Junction Point) and LX_SYMLINK (WSL) can be written without administrator rights. +// SYMLINK requires administrator rights. // If there is Reparse data already, it still writes new Reparse data bool SetReparseData(CFSTR path, bool isDir, const void *data, DWORD size) { @@ -540,10 +615,11 @@ bool DeleteReparseData(CFSTR path) SetLastError(ERROR_INVALID_REPARSE_DATA); return false; } - BYTE buf[my_REPARSE_DATA_BUFFER_HEADER_SIZE]; - memset(buf, 0, sizeof(buf)); - memcpy(buf, reparseData, 4); // tag - return OutIoReparseData(my_FSCTL_DELETE_REPARSE_POINT, path, buf, sizeof(buf)); + // BYTE buf[my_REPARSE_DATA_BUFFER_HEADER_SIZE]; + // memset(buf, 0, sizeof(buf)); + // memcpy(buf, reparseData, 4); // tag + memset(reparseData + 4, 0, my_REPARSE_DATA_BUFFER_HEADER_SIZE - 4); + return OutIoReparseData(my_FSCTL_DELETE_REPARSE_POINT, path, reparseData, my_REPARSE_DATA_BUFFER_HEADER_SIZE); } } diff --git a/CPP/Windows/FileName.cpp b/CPP/Windows/FileName.cpp index 1f4a6da..eb62567 100644 --- a/CPP/Windows/FileName.cpp +++ b/CPP/Windows/FileName.cpp @@ -65,8 +65,15 @@ void NormalizeDirPathPrefix(UString &dirPath) dirPath.Add_PathSepar(); } + +#define IS_LETTER_CHAR(c) ((((unsigned)(int)(c) | 0x20) - (unsigned)'a' <= (unsigned)('z' - 'a'))) +bool IsDrivePath (const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && IS_SEPAR(s[2]); } +// bool IsDriveName2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && s[2] == 0; } + #ifdef _WIN32 +bool IsDrivePath2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':'; } + #ifndef USE_UNICODE_FSTRING #ifdef Z7_LONG_PATH static void NormalizeDirSeparators(UString &s) @@ -87,13 +94,6 @@ void NormalizeDirSeparators(FString &s) s.ReplaceOneCharAtPos(i, FCHAR_PATH_SEPARATOR); } -#endif - - -#define IS_LETTER_CHAR(c) ((((unsigned)(int)(c) | 0x20) - (unsigned)'a' <= (unsigned)('z' - 'a'))) - -bool IsDrivePath(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && IS_SEPAR(s[2]); } - bool IsAltPathPrefix(CFSTR s) throw() { unsigned len = MyStringLen(s); @@ -117,16 +117,23 @@ bool IsAltPathPrefix(CFSTR s) throw() return true; } -#if defined(_WIN32) && !defined(UNDER_CE) +#endif // _WIN32 -const char * const kSuperPathPrefix = "\\\\?\\"; + +const char * const kSuperPathPrefix = + STRING_PATH_SEPARATOR + STRING_PATH_SEPARATOR "?" + STRING_PATH_SEPARATOR; #ifdef Z7_LONG_PATH -static const char * const kSuperUncPrefix = "\\\\?\\UNC\\"; +static const char * const kSuperUncPrefix = + STRING_PATH_SEPARATOR + STRING_PATH_SEPARATOR "?" + STRING_PATH_SEPARATOR "UNC" + STRING_PATH_SEPARATOR; #endif #define IS_DEVICE_PATH(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && (s)[2] == '.' && IS_SEPAR((s)[3])) #define IS_SUPER_PREFIX(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && (s)[2] == '?' && IS_SEPAR((s)[3])) -#define IS_SUPER_OR_DEVICE_PATH(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && ((s)[2] == '?' || (s)[2] == '.') && IS_SEPAR((s)[3])) #define IS_UNC_WITH_SLASH(s) ( \ ((s)[0] == 'U' || (s)[0] == 'u') \ @@ -134,6 +141,16 @@ static const char * const kSuperUncPrefix = "\\\\?\\UNC\\"; && ((s)[2] == 'C' || (s)[2] == 'c') \ && IS_SEPAR((s)[3])) +static const unsigned kDrivePrefixSize = 3; /* c:\ */ + +bool IsSuperPath(const wchar_t *s) throw(); +bool IsSuperPath(const wchar_t *s) throw() { return IS_SUPER_PREFIX(s); } +// bool IsSuperUncPath(const wchar_t *s) throw() { return (IS_SUPER_PREFIX(s) && IS_UNC_WITH_SLASH(s + kSuperPathPrefixSize)); } + +#if defined(_WIN32) && !defined(UNDER_CE) + +#define IS_SUPER_OR_DEVICE_PATH(s) (IS_SEPAR((s)[0]) && IS_SEPAR((s)[1]) && ((s)[2] == '?' || (s)[2] == '.') && IS_SEPAR((s)[3])) +bool IsSuperOrDevicePath(const wchar_t *s) throw() { return IS_SUPER_OR_DEVICE_PATH(s); } bool IsDevicePath(CFSTR s) throw() { #ifdef UNDER_CE @@ -154,7 +171,7 @@ bool IsDevicePath(CFSTR s) throw() if (!IS_DEVICE_PATH(s)) return false; - unsigned len = MyStringLen(s); + const unsigned len = MyStringLen(s); if (len == 6 && s[5] == ':') return true; if (len < 18 || len > 22 || !IsString1PrefixedByString2(s + kDevicePathPrefixSize, "PhysicalDrive")) @@ -174,7 +191,7 @@ bool IsNetworkPath(CFSTR s) throw() return false; if (IsSuperUncPath(s)) return true; - FChar c = s[2]; + const FChar c = s[2]; return (c != '.' && c != '?'); } @@ -187,7 +204,7 @@ unsigned GetNetworkServerPrefixSize(CFSTR s) throw() prefixSize = kSuperUncPathPrefixSize; else { - FChar c = s[2]; + const FChar c = s[2]; if (c == '.' || c == '?') return 0; } @@ -209,14 +226,6 @@ bool IsNetworkShareRootPath(CFSTR s) throw() return s[(unsigned)pos + 1] == 0; } -static const unsigned kDrivePrefixSize = 3; /* c:\ */ - -bool IsDrivePath2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':'; } -// bool IsDriveName2(const wchar_t *s) throw() { return IS_LETTER_CHAR(s[0]) && s[1] == ':' && s[2] == 0; } -bool IsSuperPath(const wchar_t *s) throw() { return IS_SUPER_PREFIX(s); } -bool IsSuperOrDevicePath(const wchar_t *s) throw() { return IS_SUPER_OR_DEVICE_PATH(s); } -// bool IsSuperUncPath(const wchar_t *s) throw() { return (IS_SUPER_PREFIX(s) && IS_UNC_WITH_SLASH(s + kSuperPathPrefixSize)); } - bool IsAltStreamPrefixWithColon(const UString &s) throw() { if (s.IsEmpty()) @@ -349,14 +358,16 @@ unsigned GetRootPrefixSize(CFSTR s) throw() } #endif // USE_UNICODE_FSTRING +#endif // _WIN32 + static unsigned GetRootPrefixSize_Of_NetworkPath(const wchar_t *s) throw() { // Network path: we look "server\path\" as root prefix - int pos = FindSepar(s); + const int pos = FindSepar(s); if (pos < 0) return 0; - int pos2 = FindSepar(s + (unsigned)pos + 1); + const int pos2 = FindSepar(s + (unsigned)pos + 1); if (pos2 < 0) return 0; return (unsigned)(pos + pos2 + 2); @@ -370,7 +381,7 @@ static unsigned GetRootPrefixSize_Of_SimplePath(const wchar_t *s) throw() return 0; if (s[1] == 0 || !IS_SEPAR(s[1])) return 1; - unsigned size = GetRootPrefixSize_Of_NetworkPath(s + 2); + const unsigned size = GetRootPrefixSize_Of_NetworkPath(s + 2); return (size == 0) ? 0 : 2 + size; } @@ -378,17 +389,21 @@ static unsigned GetRootPrefixSize_Of_SuperPath(const wchar_t *s) throw() { if (IS_UNC_WITH_SLASH(s + kSuperPathPrefixSize)) { - unsigned size = GetRootPrefixSize_Of_NetworkPath(s + kSuperUncPathPrefixSize); + const unsigned size = GetRootPrefixSize_Of_NetworkPath(s + kSuperUncPathPrefixSize); return (size == 0) ? 0 : kSuperUncPathPrefixSize + size; } // we support \\?\c:\ paths and volume GUID paths \\?\Volume{GUID}\" - int pos = FindSepar(s + kSuperPathPrefixSize); + const int pos = FindSepar(s + kSuperPathPrefixSize); if (pos < 0) return 0; return kSuperPathPrefixSize + (unsigned)(pos + 1); } +#ifdef _WIN32 unsigned GetRootPrefixSize(const wchar_t *s) throw() +#else +unsigned GetRootPrefixSize_WINDOWS(const wchar_t *s) throw() +#endif { if (IS_DEVICE_PATH(s)) return kDevicePathPrefixSize; @@ -397,7 +412,7 @@ unsigned GetRootPrefixSize(const wchar_t *s) throw() return GetRootPrefixSize_Of_SimplePath(s); } -#else // _WIN32 +#ifndef _WIN32 bool IsAbsolutePath(const wchar_t *s) throw() { return IS_SEPAR(s[0]); } diff --git a/CPP/Windows/FileName.h b/CPP/Windows/FileName.h index 219b656..ce26e78 100644 --- a/CPP/Windows/FileName.h +++ b/CPP/Windows/FileName.h @@ -25,13 +25,13 @@ bool IsDrivePath(const wchar_t *s) throw(); // first 3 chars are drive chars li bool IsAltPathPrefix(CFSTR s) throw(); /* name: */ -#if defined(_WIN32) && !defined(UNDER_CE) - extern const char * const kSuperPathPrefix; /* \\?\ */ const unsigned kDevicePathPrefixSize = 4; const unsigned kSuperPathPrefixSize = 4; const unsigned kSuperUncPathPrefixSize = kSuperPathPrefixSize + 4; +#if defined(_WIN32) && !defined(UNDER_CE) + bool IsDevicePath(CFSTR s) throw(); /* \\.\ */ bool IsSuperUncPath(CFSTR s) throw(); /* \\?\UNC\ */ bool IsNetworkPath(CFSTR s) throw(); /* \\?\UNC\ or \\SERVER */ @@ -86,6 +86,15 @@ int FindAltStreamColon(CFSTR path) throw(); bool IsAbsolutePath(const wchar_t *s) throw(); unsigned GetRootPrefixSize(const wchar_t *s) throw(); +#ifndef _WIN32 +/* GetRootPrefixSize_WINDOWS() is called in linux, but it parses path by windows rules. + It supports only paths system (linux) slash separators (STRING_PATH_SEPARATOR), + It doesn't parses paths with backslash (windows) separators. + "c:/dir/file" is supported. +*/ +unsigned GetRootPrefixSize_WINDOWS(const wchar_t *s) throw(); +#endif + #ifdef Z7_LONG_PATH const int kSuperPathType_UseOnlyMain = 0; diff --git a/CPP/Windows/System.cpp b/CPP/Windows/System.cpp index 5fa87f3..4745785 100644 --- a/CPP/Windows/System.cpp +++ b/CPP/Windows/System.cpp @@ -25,6 +25,69 @@ namespace NSystem { #ifdef _WIN32 +/* +note: returned value in 32-bit version can be limited by value 32. + while 64-bit version returns full value. +GetMaximumProcessorCount(groupNumber) can return higher value than +GetActiveProcessorCount(groupNumber) in some cases, because CPUs can be added. +*/ +// typedef DWORD (WINAPI *Func_GetMaximumProcessorCount)(WORD GroupNumber); +typedef DWORD (WINAPI *Func_GetActiveProcessorCount)(WORD GroupNumber); +typedef WORD (WINAPI *Func_GetActiveProcessorGroupCount)(VOID); +/* +#if 0 && defined(ALL_PROCESSOR_GROUPS) +#define MY_ALL_PROCESSOR_GROUPS ALL_PROCESSOR_GROUPS +#else +#define MY_ALL_PROCESSOR_GROUPS 0xffff +#endif +*/ + +Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION + +bool CCpuGroups::Load() +{ + NumThreadsTotal = 0; + GroupSizes.Clear(); + const HMODULE hmodule = ::GetModuleHandleA("kernel32.dll"); + // Is_Win11_Groups = GetProcAddress(hmodule, "SetThreadSelectedCpuSetMasks") != NULL; + const + Func_GetActiveProcessorGroupCount + fn_GetActiveProcessorGroupCount = Z7_GET_PROC_ADDRESS( + Func_GetActiveProcessorGroupCount, hmodule, + "GetActiveProcessorGroupCount"); + const + Func_GetActiveProcessorCount + fn_GetActiveProcessorCount = Z7_GET_PROC_ADDRESS( + Func_GetActiveProcessorCount, hmodule, + "GetActiveProcessorCount"); + if (!fn_GetActiveProcessorGroupCount || + !fn_GetActiveProcessorCount) + return false; + + const unsigned numGroups = fn_GetActiveProcessorGroupCount(); + if (numGroups == 0) + return false; + UInt32 sum = 0; + for (unsigned i = 0; i < numGroups; i++) + { + const UInt32 num = fn_GetActiveProcessorCount((WORD)i); + /* + if (num == 0) + { + // it means error + // but is it possible that some group is empty by some reason? + // GroupSizes.Clear(); + // return false; + } + */ + sum += num; + GroupSizes.Add(num); + } + NumThreadsTotal = sum; + // NumThreadsTotal = fn_GetActiveProcessorCount(MY_ALL_PROCESSOR_GROUPS); + return true; +} + UInt32 CountAffinity(DWORD_PTR mask) { UInt32 num = 0; @@ -38,31 +101,62 @@ UInt32 CountAffinity(DWORD_PTR mask) BOOL CProcessAffinity::Get() { - #ifndef UNDER_CE - return GetProcessAffinityMask(GetCurrentProcess(), &processAffinityMask, &systemAffinityMask); - #else - return FALSE; - #endif + IsGroupMode = false; + Groups.Load(); + // SetThreadAffinityMask(GetCurrentThread(), 1); + // SetProcessAffinityMask(GetCurrentProcess(), 1); + BOOL res = GetProcessAffinityMask(GetCurrentProcess(), + &processAffinityMask, &systemAffinityMask); + /* DOCs: On a system with more than 64 processors, if the threads + of the calling process are in a single processor group, the + function sets the variables pointed to by lpProcessAffinityMask + and lpSystemAffinityMask to the process affinity mask and the + processor mask of active logical processors for that group. + If the calling process contains threads in multiple groups, + the function returns zero for both affinity masks + + note: tested in Win10: GetProcessAffinityMask() doesn't return 0 + in (processAffinityMask) and (systemAffinityMask) masks. + We need to test it in Win11: how to get mask==0 from GetProcessAffinityMask()? + */ + if (!res) + { + processAffinityMask = 0; + systemAffinityMask = 0; + } + if (Groups.GroupSizes.Size() > 1 && Groups.NumThreadsTotal) + if (// !res || + processAffinityMask == 0 || // to support case described in DOCs and for (!res) case + processAffinityMask == systemAffinityMask) // for default nonchanged affinity + { + // we set IsGroupMode only if processAffinity is default (not changed). + res = TRUE; + IsGroupMode = true; + } + return res; } +UInt32 CProcessAffinity::Load_and_GetNumberOfThreads() +{ + if (Get()) + { + const UInt32 numProcessors = GetNumProcessThreads(); + if (numProcessors) + return numProcessors; + } + SYSTEM_INFO systemInfo; + GetSystemInfo(&systemInfo); + // the number of logical processors in the current group + return systemInfo.dwNumberOfProcessors; +} + UInt32 GetNumberOfProcessors() { // We need to know how many threads we can use. // By default the process is assigned to one group. - // So we get the number of logical processors (threads) - // assigned to current process in the current group. - // Group size can be smaller than total number logical processors, for exammple, 2x36 - CProcessAffinity pa; - - if (pa.Get() && pa.processAffinityMask != 0) - return pa.GetNumProcessThreads(); - - SYSTEM_INFO systemInfo; - GetSystemInfo(&systemInfo); - // the number of logical processors in the current group - return (UInt32)systemInfo.dwNumberOfProcessors; + return pa.Load_and_GetNumberOfThreads(); } #else diff --git a/CPP/Windows/System.h b/CPP/Windows/System.h index 9951b8b..0c80373 100644 --- a/CPP/Windows/System.h +++ b/CPP/Windows/System.h @@ -9,6 +9,7 @@ #endif #include "../Common/MyTypes.h" +#include "../Common/MyVector.h" #include "../Common/MyWindows.h" namespace NWindows { @@ -16,6 +17,34 @@ namespace NSystem { #ifdef _WIN32 +struct CCpuGroups +{ + CRecordVector GroupSizes; + UInt32 NumThreadsTotal; // sum of threads in all groups + // bool Is_Win11_Groups; // useless + + void Get_GroupSize_Min_Max(UInt32 &minSize, UInt32 &maxSize) const + { + unsigned num = GroupSizes.Size(); + UInt32 minSize2 = 0, maxSize2 = 0; + if (num) + { + minSize2 = (UInt32)0 - 1; + do + { + const UInt32 v = GroupSizes[--num]; + if (minSize2 > v) minSize2 = v; + if (maxSize2 < v) maxSize2 = v; + } + while (num); + } + minSize = minSize2; + maxSize = maxSize2; + } + bool Load(); + CCpuGroups(): NumThreadsTotal(0) {} +}; + UInt32 CountAffinity(DWORD_PTR mask); struct CProcessAffinity @@ -25,14 +54,28 @@ struct CProcessAffinity DWORD_PTR processAffinityMask; DWORD_PTR systemAffinityMask; + CCpuGroups Groups; + bool IsGroupMode; + /* + IsGroupMode == true, if + Groups.GroupSizes.Size() > 1) && { dafalt affinity was not changed } + IsGroupMode == false, if single group or affinity was changed + */ + + UInt32 Load_and_GetNumberOfThreads(); + void InitST() { // numProcessThreads = 1; // numSysThreads = 1; processAffinityMask = 1; systemAffinityMask = 1; + IsGroupMode = false; + // Groups.NumThreadsTotal = 0; + // Groups.Is_Win11_Groups = false; } +/* void CpuZero() { processAffinityMask = 0; @@ -42,9 +85,23 @@ struct CProcessAffinity { processAffinityMask |= ((DWORD_PTR)1 << cpuIndex); } +*/ - UInt32 GetNumProcessThreads() const { return CountAffinity(processAffinityMask); } - UInt32 GetNumSystemThreads() const { return CountAffinity(systemAffinityMask); } + UInt32 GetNumProcessThreads() const + { + if (IsGroupMode) + return Groups.NumThreadsTotal; + // IsGroupMode == false + // so we don't want to use groups + // we return number of threads in default primary group: + return CountAffinity(processAffinityMask); + } + UInt32 GetNumSystemThreads() const + { + if (Groups.GroupSizes.Size() > 1 && Groups.NumThreadsTotal) + return Groups.NumThreadsTotal; + return CountAffinity(systemAffinityMask); + } BOOL Get(); diff --git a/CPP/Windows/Thread.h b/CPP/Windows/Thread.h index d72f64c..75c1616 100644 --- a/CPP/Windows/Thread.h +++ b/CPP/Windows/Thread.h @@ -26,8 +26,10 @@ public: { return Thread_Create_With_Affinity(&thread, startAddress, param, affinity); } WRes Create_With_CpuSet(THREAD_FUNC_TYPE startAddress, LPVOID param, const CCpuSet *cpuSet) { return Thread_Create_With_CpuSet(&thread, startAddress, param, cpuSet); } - - #ifdef _WIN32 + +#ifdef _WIN32 + WRes Create_With_Group(THREAD_FUNC_TYPE startAddress, LPVOID param, unsigned group, CAffinityMask affinity = 0) + { return Thread_Create_With_Group(&thread, startAddress, param, group, affinity); } operator HANDLE() { return thread; } void Attach(HANDLE handle) { thread = handle; } HANDLE Detach() { HANDLE h = thread; thread = NULL; return h; } @@ -36,7 +38,7 @@ public: bool Terminate(DWORD exitCode) { return BOOLToBool(::TerminateThread(thread, exitCode)); } int GetPriority() { return ::GetThreadPriority(thread); } bool SetPriority(int priority) { return BOOLToBool(::SetThreadPriority(thread, priority)); } - #endif +#endif }; } diff --git a/CPP/Windows/TimeUtils.cpp b/CPP/Windows/TimeUtils.cpp index bbd79ba..4e3bc59 100644 --- a/CPP/Windows/TimeUtils.cpp +++ b/CPP/Windows/TimeUtils.cpp @@ -258,8 +258,9 @@ bool GetSecondsSince1601(unsigned year, unsigned month, unsigned day, FreeBSD 11.0, NetBSD 7.1, OpenBSD 6.0, Minix 3.1.8, AIX 7.1, HP-UX 11.31, IRIX 6.5, Solaris 11.3, Cygwin 2.9, mingw, MSVC 14, Android 9.0. + Android NDK defines TIME_UTC but doesn't have the timespec_get(). */ -#if defined(TIME_UTC) +#if defined(TIME_UTC) && !defined(__ANDROID__) #define ZIP7_USE_timespec_get // #pragma message("ZIP7_USE_timespec_get") #elif defined(CLOCK_REALTIME) diff --git a/DOC/7zip.wxs b/DOC/7zip.wxs index 867e3d1..d369074 100644 --- a/DOC/7zip.wxs +++ b/DOC/7zip.wxs @@ -1,7 +1,7 @@ - - + + diff --git a/DOC/License.txt b/DOC/License.txt index 8917dfc..bbb56a3 100644 --- a/DOC/License.txt +++ b/DOC/License.txt @@ -3,7 +3,7 @@ License for use and distribution ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 7-Zip Copyright (C) 1999-2024 Igor Pavlov. + 7-Zip Copyright (C) 1999-2025 Igor Pavlov. The licenses for files are: @@ -58,7 +58,7 @@ BSD 3-clause License in 7-Zip code Copyright (c) 2015-2016, Apple Inc. All rights reserved. Copyright (c) Facebook, Inc. All rights reserved. - Copyright (c) 2023-2024 Igor Pavlov. + Copyright (c) 2023-2025 Igor Pavlov. Text of the "BSD 3-clause License" ---------------------------------- @@ -102,7 +102,7 @@ BSD 2-clause License in 7-Zip code XXH64 code in 7-Zip was derived from the original XXH64 code developed by Yann Collet. Copyright (c) 2012-2021 Yann Collet. - Copyright (c) 2023-2024 Igor Pavlov. + Copyright (c) 2023-2025 Igor Pavlov. Text of the "BSD 2-clause License" ---------------------------------- diff --git a/DOC/readme.txt b/DOC/readme.txt index ad1d842..7fbbdc8 100644 --- a/DOC/readme.txt +++ b/DOC/readme.txt @@ -1,9 +1,9 @@ -7-Zip 24.09 Sources +7-Zip 25.00 Sources ------------------- 7-Zip is a file archiver for Windows. -7-Zip Copyright (C) 1999-2024 Igor Pavlov. +7-Zip Copyright (C) 1999-2025 Igor Pavlov. License Info @@ -73,8 +73,8 @@ All final 7-Zip binaries are compiled via makefiles, that provide best optimization options. -How to compile with makefile ----------------------------- +How to compile with makefile in Windows +--------------------------------------- Some macronames can be defined for compiling with makefile: @@ -88,6 +88,23 @@ MY_DYNAMIC_LINK for dynamic linking to the run-time library (msvcrt.dll). The default makefile option is static linking to the run-time library. +To compile all 7-Zip files for x64 with Visual Studio 2022, +use the following command sequence: + + cd SRC\CPP\7zip + %comspec% /k "C:\Program Files\VS2022\VC\Auxiliary\Build\vcvars64.bat" + nmake + +You can use another "vcvars*.bat" files from "VS2022\VC\Auxiliary\Build" directory +to compile for other platforms: + vcvars64.bat + vcvarsamd64_arm64.bat + vcvarsamd64_x86.bat + +Also you can compile single binary from directory with related project. +For example, to compile 7za.exe, use the following command sequence: + cd SRC\CPP\7zip\Bundles\Alone\ + nmake Compiling 7-Zip for Unix/Linux diff --git a/DOC/src-history.txt b/DOC/src-history.txt index 6b57694..70b11b5 100644 --- a/DOC/src-history.txt +++ b/DOC/src-history.txt @@ -1,6 +1,18 @@ HISTORY of the 7-Zip source code -------------------------------- +25.00 2025-07-05 +------------------------- +- 7-Zip for Windows can now use more than 64 CPU threads for compression + to zip/7z/xz archives and for the 7-Zip benchmark. + If there are more than one processor group in Windows (on systems with more than + 64 cpu threads), 7-Zip distributes running CPU threads across different processor groups. +- bzip2 compression speed was increased by 15-40%. +- deflate (zip/gz) compression speed was increased by 1-3%. +- improved support for zip, cpio and fat archives. +- fixed some bugs and vulnerabilities. + + 24.09 2024-11-29 ------------------------- - The default dictionary size values for LZMA/LZMA2 compression methods were increased: From 5e96a8279489832924056b1fa82f29d5837c9469 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Sun, 3 Aug 2025 00:00:00 +0000 Subject: [PATCH 2/3] 25.01 --- C/7zVersion.h | 6 +- C/LzFind.c | 2 +- CPP/7zip/Bundles/Alone/Alone.dsp | 20 - CPP/7zip/Compress/BZip2Encoder.cpp | 23 +- CPP/7zip/UI/Agent/AgentProxy.cpp | 2 +- CPP/7zip/UI/Client7z/makefile.gcc | 2 +- CPP/7zip/UI/Common/ArchiveCommandLine.cpp | 20 +- CPP/7zip/UI/Common/ArchiveExtractCallback.cpp | 554 +++++++++++++----- CPP/7zip/UI/Common/ArchiveExtractCallback.h | 169 ++++-- CPP/Windows/FileDir.cpp | 44 +- CPP/Windows/FileDir.h | 13 +- DOC/7zip.wxs | 2 +- DOC/readme.txt | 2 +- DOC/src-history.txt | 10 + 14 files changed, 582 insertions(+), 287 deletions(-) diff --git a/C/7zVersion.h b/C/7zVersion.h index 72733f7..b6142e9 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h @@ -1,7 +1,7 @@ #define MY_VER_MAJOR 25 -#define MY_VER_MINOR 0 +#define MY_VER_MINOR 1 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "25.00" +#define MY_VERSION_NUMBERS "25.01" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,7 +10,7 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2025-07-05" +#define MY_DATE "2025-08-03" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" diff --git a/C/LzFind.c b/C/LzFind.c index 6aba919..330bc17 100644 --- a/C/LzFind.c +++ b/C/LzFind.c @@ -598,7 +598,7 @@ void MatchFinder_Init(void *_p) #ifdef MY_CPU_X86_OR_AMD64 #if defined(__clang__) && (__clang_major__ >= 4) \ - || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40701) + || defined(Z7_GCC_VERSION) && (Z7_GCC_VERSION >= 40900) // || defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 1900) #define USE_LZFIND_SATUR_SUB_128 diff --git a/CPP/7zip/Bundles/Alone/Alone.dsp b/CPP/7zip/Bundles/Alone/Alone.dsp index beed5a7..558c41e 100644 --- a/CPP/7zip/Bundles/Alone/Alone.dsp +++ b/CPP/7zip/Bundles/Alone/Alone.dsp @@ -1148,26 +1148,6 @@ SOURCE=..\..\Compress\PpmdZip.cpp SOURCE=..\..\Compress\PpmdZip.h # End Source File # End Group -# Begin Group "RangeCoder" - -# PROP Default_Filter "" -# Begin Source File - -SOURCE=..\..\Compress\RangeCoder.h -# End Source File -# Begin Source File - -SOURCE=..\..\Compress\RangeCoderBit.h -# End Source File -# Begin Source File - -SOURCE=..\..\Compress\RangeCoderBitTree.h -# End Source File -# Begin Source File - -SOURCE=..\..\Compress\RangeCoderOpt.h -# End Source File -# End Group # Begin Group "Shrink" # PROP Default_Filter "" diff --git a/CPP/7zip/Compress/BZip2Encoder.cpp b/CPP/7zip/Compress/BZip2Encoder.cpp index f8ee0c9..af0b312 100644 --- a/CPP/7zip/Compress/BZip2Encoder.cpp +++ b/CPP/7zip/Compress/BZip2Encoder.cpp @@ -66,18 +66,14 @@ HRESULT CThreadInfo::Create() if (wres == 0) { wres = CanWriteEvent.Create(); if (wres == 0) { + wres = #ifdef _WIN32 - if (Encoder->_props.NumThreadGroups != 0) - { - const UInt32 group = ThreadNextGroup_GetNext(&Encoder->ThreadNextGroup); - wres = Thread.Create_With_Group(MFThread, this, group, 0); // affinity - } - else + Encoder->_props.NumThreadGroups > 1 ? + Thread.Create_With_Group(MFThread, this, ThreadNextGroup_GetNext(&Encoder->ThreadNextGroup), 0) : // affinity #endif - if (Encoder->_props.Affinity != 0) - wres = Thread.Create_With_Affinity(MFThread, this, (CAffinityMask)Encoder->_props.Affinity); - else - wres = Thread.Create(MFThread, this); + Encoder->_props.Affinity != 0 ? + Thread.Create_With_Affinity(MFThread, this, (CAffinityMask)Encoder->_props.Affinity) : + Thread.Create(MFThread, this); }}} return HRESULT_FROM_WIN32(wres); } @@ -935,14 +931,13 @@ void CEncoder::WriteBytes(const Byte *data, UInt32 sizeInBits, unsigned lastByte HRESULT CEncoder::CodeReal(ISequentialInStream *inStream, ISequentialOutStream *outStream, const UInt64 * /* inSize */, const UInt64 * /* outSize */, ICompressProgressInfo *progress) { - ThreadNextGroup_Init(&ThreadNextGroup, _props.NumThreadGroups, 0); // startGroup - NumBlocks = 0; - #ifndef Z7_ST +#ifndef Z7_ST Progress = progress; + ThreadNextGroup_Init(&ThreadNextGroup, _props.NumThreadGroups, 0); // startGroup RINOK(Create()) for (UInt32 t = 0; t < NumThreads; t++) - #endif +#endif { #ifndef Z7_ST CThreadInfo &ti = ThreadsInfo[t]; diff --git a/CPP/7zip/UI/Agent/AgentProxy.cpp b/CPP/7zip/UI/Agent/AgentProxy.cpp index 176f39b..d04ddab 100644 --- a/CPP/7zip/UI/Agent/AgentProxy.cpp +++ b/CPP/7zip/UI/Agent/AgentProxy.cpp @@ -636,7 +636,7 @@ HRESULT CProxyArc2::Load(const CArc &arc, IProgress *progress) file.Name = (const wchar_t *)p; file.NameLen = 0; if (size >= sizeof(wchar_t)) - file.NameLen = size / sizeof(wchar_t) - 1; + file.NameLen = size / (unsigned)sizeof(wchar_t) - 1; } else #endif diff --git a/CPP/7zip/UI/Client7z/makefile.gcc b/CPP/7zip/UI/Client7z/makefile.gcc index fe27011..0f89cb0 100644 --- a/CPP/7zip/UI/Client7z/makefile.gcc +++ b/CPP/7zip/UI/Client7z/makefile.gcc @@ -24,7 +24,6 @@ else SYS_OBJS = \ $O/MyWindows.o \ - $O/TimeUtils.o \ endif @@ -53,6 +52,7 @@ WIN_OBJS = \ $O/FileName.o \ $O/PropVariant.o \ $O/PropVariantConv.o \ + $O/TimeUtils.o \ 7ZIP_COMMON_OBJS = \ $O/FileStreams.o \ diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp index de9f43e..7fe18fb 100644 --- a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp +++ b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp @@ -341,7 +341,7 @@ static const CSwitchForm kSwitchForms[] = { "spf", SWFRM_STRING_SINGL(0) }, { "snh", SWFRM_MINUS }, - { "snld", SWFRM_MINUS }, + { "snld", SWFRM_STRING }, { "snl", SWFRM_MINUS }, { "sni", SWFRM_SIMPLE }, @@ -1479,14 +1479,8 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) SetBoolPair(parser, NKey::kStoreOwnerId, options.StoreOwnerId); SetBoolPair(parser, NKey::kStoreOwnerName, options.StoreOwnerName); - - CBoolPair symLinks_AllowDangerous; - SetBoolPair(parser, NKey::kSymLinks_AllowDangerous, symLinks_AllowDangerous); - - /* bool supportSymLink = options.SymLinks.Val; - if (!options.SymLinks.Def) { if (isExtractOrList) @@ -1494,7 +1488,6 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) else supportSymLink = false; } - #ifdef ENV_HAVE_LSTAT if (supportSymLink) global_use_lstat = 1; @@ -1503,7 +1496,6 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) #endif */ - if (isExtractOrList) { CExtractOptionsBase &eo = options.ExtractOptions; @@ -1527,7 +1519,15 @@ void CArcCmdLineParser::Parse2(CArcCmdLineOptions &options) if (!options.SymLinks.Def) nt.SymLinks.Val = true; - nt.SymLinks_AllowDangerous = symLinks_AllowDangerous; + if (parser[NKey::kSymLinks_AllowDangerous].ThereIs) + { + const UString &s = parser[NKey::kSymLinks_AllowDangerous].PostStrings[0]; + UInt32 v = 9; // default value for "-snld" instead of default = 5 without "-snld". + if (!s.IsEmpty()) + if (!StringToUInt32(s, v)) + throw CArcCmdLineException("Unsupported switch postfix -snld", s); + nt.SymLinks_DangerousLevel = (unsigned)v; + } nt.ReplaceColonForAltStream = parser[NKey::kReplaceColonForAltStream].ThereIs; nt.WriteToAltStreamIfColon = parser[NKey::kWriteToAltStreamIfColon].ThereIs; diff --git a/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp b/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp index 3abcd2d..6631629 100644 --- a/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp +++ b/CPP/7zip/UI/Common/ArchiveExtractCallback.cpp @@ -54,10 +54,14 @@ static const char * const kCantSetFileLen = "Cannot set length for output file"; #ifdef SUPPORT_LINKS static const char * const kCantCreateHardLink = "Cannot create hard link"; static const char * const kCantCreateSymLink = "Cannot create symbolic link"; +static const char * const k_HardLink_to_SymLink_Ignored = "Hard link to symbolic link was ignored"; +static const char * const k_CantDelete_File_for_SymLink = "Cannot delete file for symbolic link creation"; +static const char * const k_CantDelete_Dir_for_SymLink = "Cannot delete directory for symbolic link creation"; #endif static const unsigned k_LinkDataSize_LIMIT = 1 << 12; +#ifdef SUPPORT_LINKS #if WCHAR_PATH_SEPARATOR != L'/' // we convert linux slashes to windows slashes for further processing. // also we convert linux backslashes to BackslashReplacement character. @@ -67,7 +71,7 @@ static const unsigned k_LinkDataSize_LIMIT = 1 << 12; #else #define REPLACE_SLASHES_from_Linux_to_Sys(s) #endif - +#endif #ifndef Z7_SFX @@ -326,13 +330,14 @@ void CArchiveExtractCallback::Init( _outFileStream.Release(); _bufPtrSeqOutStream.Release(); - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS _hardLinks.Clear(); - #endif + _postLinks.Clear(); +#endif - #ifdef SUPPORT_ALT_STREAMS +#ifdef SUPPORT_ALT_STREAMS _renamedFiles.Clear(); - #endif +#endif _ntOptions = ntOptions; _wildcardCensor = wildcardCensor; @@ -455,7 +460,8 @@ Z7_COM7F_IMF(CArchiveExtractCallback::SetRatioInfo(const UInt64 *inSize, const U } -void CArchiveExtractCallback::CreateComplexDirectory(const UStringVector &dirPathParts, FString &fullPath) +void CArchiveExtractCallback::CreateComplexDirectory( + const UStringVector &dirPathParts, bool isFinal, FString &fullPath) { // we use (_item.IsDir) in this function @@ -487,7 +493,7 @@ void CArchiveExtractCallback::CreateComplexDirectory(const UStringVector &dirPat const UString &s = dirPathParts[i]; fullPath += us2fs(s); - const bool isFinalDir = (i == dirPathParts.Size() - 1 && _item.IsDir); + const bool isFinalDir = (i == dirPathParts.Size() - 1 && isFinal && _item.IsDir); if (fullPath.IsEmpty()) { @@ -548,7 +554,7 @@ static void AddPathToMessage(UString &s, const FString &path) s += fs2us(path); } -HRESULT CArchiveExtractCallback::SendMessageError(const char *message, const FString &path) +HRESULT CArchiveExtractCallback::SendMessageError(const char *message, const FString &path) const { UString s (message); AddPathToMessage(s, path); @@ -556,7 +562,7 @@ HRESULT CArchiveExtractCallback::SendMessageError(const char *message, const FSt } -HRESULT CArchiveExtractCallback::SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path) +HRESULT CArchiveExtractCallback::SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path) const { UString s (message); if (errorCode != S_OK) @@ -568,13 +574,13 @@ HRESULT CArchiveExtractCallback::SendMessageError_with_Error(HRESULT errorCode, return _extractCallback2->MessageError(s); } -HRESULT CArchiveExtractCallback::SendMessageError_with_LastError(const char *message, const FString &path) +HRESULT CArchiveExtractCallback::SendMessageError_with_LastError(const char *message, const FString &path) const { const HRESULT errorCode = GetLastError_noZero_HRESULT(); return SendMessageError_with_Error(errorCode, message, path); } -HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2) +HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2) const { UString s (message); if (errorCode != 0) @@ -588,7 +594,7 @@ HRESULT CArchiveExtractCallback::SendMessageError2(HRESULT errorCode, const char } HRESULT CArchiveExtractCallback::SendMessageError2_with_LastError( - const char *message, const FString &path1, const FString &path2) + const char *message, const FString &path1, const FString &path2) const { const HRESULT errorCode = GetLastError_noZero_HRESULT(); return SendMessageError2(errorCode, message, path1, path2); @@ -627,6 +633,7 @@ Z7_COM7F_IMF(CGetProp::GetProp(PROPID propID, PROPVARIANT *value)) struct CLinkLevelsInfo { bool IsAbsolute; + bool ParentDirDots_after_NonParent; int LowLevel; int FinalLevel; @@ -640,6 +647,8 @@ void CLinkLevelsInfo::Parse(const UString &path, bool isWSL) NName::IsAbsolutePath(path); LowLevel = 0; FinalLevel = 0; + ParentDirDots_after_NonParent = false; + bool nonParentDir = false; UStringVector parts; SplitPathToParts(path, parts); @@ -658,12 +667,17 @@ void CLinkLevelsInfo::Parse(const UString &path, bool isWSL) continue; if (s.IsEqualTo("..")) { + if (IsAbsolute || nonParentDir) + ParentDirDots_after_NonParent = true; level--; if (LowLevel > level) - LowLevel = level; + LowLevel = level; } else + { + nonParentDir = true; level++; + } } FinalLevel = level; @@ -915,7 +929,7 @@ HRESULT CArchiveExtractCallback::ReadLink() #ifndef _WIN32 static HRESULT GetOwner(IInArchive *archive, - UInt32 index, UInt32 pidName, UInt32 pidId, COwnerInfo &res) + UInt32 index, UInt32 pidName, UInt32 pidId, CProcessedFileInfo::COwnerInfo &res) { { NWindows::NCOM::CPropVariant prop; @@ -1047,7 +1061,7 @@ void CArchiveExtractCallback::CorrectPathParts() } -void CArchiveExtractCallback::GetFiTimesCAM(CFiTimesCAM &pt) +static void GetFiTimesCAM(const CProcessedFileInfo &fi, CFiTimesCAM &pt, const CArc &arc) { pt.CTime_Defined = false; pt.ATime_Defined = false; @@ -1055,27 +1069,27 @@ void CArchiveExtractCallback::GetFiTimesCAM(CFiTimesCAM &pt) // if (Write_MTime) { - if (_fi.MTime.Def) + if (fi.MTime.Def) { - _fi.MTime.Write_To_FiTime(pt.MTime); + fi.MTime.Write_To_FiTime(pt.MTime); pt.MTime_Defined = true; } - else if (_arc->MTime.Def) + else if (arc.MTime.Def) { - _arc->MTime.Write_To_FiTime(pt.MTime); + arc.MTime.Write_To_FiTime(pt.MTime); pt.MTime_Defined = true; } } - if (/* Write_CTime && */ _fi.CTime.Def) + if (/* Write_CTime && */ fi.CTime.Def) { - _fi.CTime.Write_To_FiTime(pt.CTime); + fi.CTime.Write_To_FiTime(pt.CTime); pt.CTime_Defined = true; } - if (/* Write_ATime && */ _fi.ATime.Def) + if (/* Write_ATime && */ fi.ATime.Def) { - _fi.ATime.Write_To_FiTime(pt.ATime); + fi.ATime.Write_To_FiTime(pt.ATime); pt.ATime_Defined = true; } } @@ -1086,6 +1100,7 @@ void CArchiveExtractCallback::CreateFolders() // 21.04 : we don't change original (_item.PathParts) here UStringVector pathParts = _item.PathParts; + bool isFinal = true; // bool is_DirOp = false; if (!pathParts.IsEmpty()) { @@ -1095,12 +1110,15 @@ void CArchiveExtractCallback::CreateFolders() but if we create dir item here, it's not problem. */ if (!_item.IsDir #ifdef SUPPORT_LINKS - #ifndef WIN32 + // #ifndef WIN32 || !_link.LinkPath.IsEmpty() - #endif + // #endif #endif ) + { pathParts.DeleteBack(); + isFinal = false; // last path part was excluded + } // else is_DirOp = true; } @@ -1124,7 +1142,7 @@ void CArchiveExtractCallback::CreateFolders() */ FString fullPathNew; - CreateComplexDirectory(pathParts, fullPathNew); + CreateComplexDirectory(pathParts, isFinal, fullPathNew); /* if (is_DirOp) @@ -1145,12 +1163,12 @@ void CArchiveExtractCallback::CreateFolders() return; CDirPathTime pt; - GetFiTimesCAM(pt); + GetFiTimesCAM(_fi, pt, *_arc); if (pt.IsSomeTimeDefined()) { pt.Path = fullPathNew; - pt.SetDirTime(); + pt.SetDirTime_to_FS_2(); _extractedFolders.Add(pt); } } @@ -1292,9 +1310,11 @@ HRESULT CArchiveExtractCallback::CheckExistFile(FString &fullProcessedPath, bool - - - +/* +return: + needExit = false: caller will use (outStreamLoc) and _hashStreamSpec + needExit = true : caller will not use (outStreamLoc) and _hashStreamSpec. +*/ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtr &outStreamLoc, bool &needExit) { needExit = true; @@ -1383,12 +1403,15 @@ HRESULT CArchiveExtractCallback::GetExtractStream(CMyComPtrGetRawProps) + { + const void *data; + UInt32 dataSize; + UInt32 propType; + _arc->GetRawProps->GetRawProp(indexInArc, kpidNtSecure, &data, &dataSize, &propType); + if (dataSize != 0) + { + if (propType != NPropDataType::kRaw) + return E_FAIL; + if (CheckNtSecure((const Byte *)data, dataSize)) + { + SECURITY_INFORMATION securInfo = DACL_SECURITY_INFORMATION | GROUP_SECURITY_INFORMATION | OWNER_SECURITY_INFORMATION; + if (_saclEnabled) + securInfo |= SACL_SECURITY_INFORMATION; + // if (! + ::SetFileSecurityW(fs2us(path), securInfo, (PSECURITY_DESCRIPTOR)(void *)(const Byte *)(data)); + { + // RINOK(SendMessageError_with_LastError("SetFileSecurity FAILS", path)) + } + } + } + } + return S_OK; +} +#endif // Z7_USE_SECURITY_CODE + + Z7_COM7F_IMF(CArchiveExtractCallback::SetOperationResult(Int32 opRes)) { COM_TRY_BEGIN @@ -2490,27 +2705,9 @@ Z7_COM7F_IMF(CArchiveExtractCallback::SetOperationResult(Int32 opRes)) RINOK(CloseReparseAndFile()) - #ifdef Z7_USE_SECURITY_CODE - if (!_stdOutMode && _extractMode && _ntOptions.NtSecurity.Val && _arc->GetRawProps) - { - const void *data; - UInt32 dataSize; - UInt32 propType; - _arc->GetRawProps->GetRawProp(_index, kpidNtSecure, &data, &dataSize, &propType); - if (dataSize != 0) - { - if (propType != NPropDataType::kRaw) - return E_FAIL; - if (CheckNtSecure((const Byte *)data, dataSize)) - { - SECURITY_INFORMATION securInfo = DACL_SECURITY_INFORMATION | GROUP_SECURITY_INFORMATION | OWNER_SECURITY_INFORMATION; - if (_saclEnabled) - securInfo |= SACL_SECURITY_INFORMATION; - ::SetFileSecurityW(fs2us(_diskFilePath), securInfo, (PSECURITY_DESCRIPTOR)(void *)(const Byte *)(data)); - } - } - } - #endif // Z7_USE_SECURITY_CODE +#ifdef Z7_USE_SECURITY_CODE + RINOK(SetSecurityInfo(_index, _diskFilePath)) +#endif if (!_curSize_Defined) GetUnpackSize(); @@ -2754,15 +2951,58 @@ void CDirPathSortPair::SetNumSlashes(const FChar *s) } -bool CDirPathTime::SetDirTime() const +bool CFiTimesCAM::SetDirTime_to_FS(CFSTR path) const { - return NDir::SetDirTime(Path, + // it's same function for dir and for file + return NDir::SetDirTime(path, CTime_Defined ? &CTime : NULL, ATime_Defined ? &ATime : NULL, MTime_Defined ? &MTime : NULL); } +#ifdef SUPPORT_LINKS + +bool CFiTimesCAM::SetLinkFileTime_to_FS(CFSTR path) const +{ + // it's same function for dir and for file + return NDir::SetLinkFileTime(path, + CTime_Defined ? &CTime : NULL, + ATime_Defined ? &ATime : NULL, + MTime_Defined ? &MTime : NULL); +} + +HRESULT CArchiveExtractCallback::SetPostLinks() const +{ + FOR_VECTOR (i, _postLinks) + { + const CPostLink &link = _postLinks[i]; + bool linkWasSet = false; + RINOK(SetLink2(*this, link, linkWasSet)) + if (linkWasSet) + { +#ifdef _WIN32 + // Linux now doesn't support permissions for symlinks + SetAttrib_Base(link.fullProcessedPath_from, link.item_FileInfo, *this); +#endif + + CFiTimesCAM pt; + GetFiTimesCAM(link.item_FileInfo, pt, *_arc); + if (pt.IsSomeTimeDefined()) + pt.SetLinkFileTime_to_FS(link.fullProcessedPath_from); + +#ifdef Z7_USE_SECURITY_CODE + // we set security information after timestamps setting + RINOK(SetSecurityInfo(link.Index_in_Arc, link.fullProcessedPath_from)) +#endif + } + } + return S_OK; +} + +#endif + + HRESULT CArchiveExtractCallback::SetDirsTimes() { if (!_arc) @@ -2786,7 +3026,7 @@ HRESULT CArchiveExtractCallback::SetDirsTimes() for (i = 0; i < pairs.Size(); i++) { const CDirPathTime &dpt = _extractedFolders[pairs[i].Index]; - if (!dpt.SetDirTime()) + if (!dpt.SetDirTime_to_FS_2()) { // result = E_FAIL; // do we need error message here in Windows and in posix? @@ -2818,10 +3058,20 @@ HRESULT CArchiveExtractCallback::SetDirsTimes() HRESULT CArchiveExtractCallback::CloseArc() { + // we call CloseReparseAndFile() here because we can have non-closed file in some cases? HRESULT res = CloseReparseAndFile(); - const HRESULT res2 = SetDirsTimes(); - if (res == S_OK) - res = res2; +#ifdef SUPPORT_LINKS + { + const HRESULT res2 = SetPostLinks(); + if (res == S_OK) + res = res2; + } +#endif + { + const HRESULT res2 = SetDirsTimes(); + if (res == S_OK) + res = res2; + } _arc = NULL; return res; } diff --git a/CPP/7zip/UI/Common/ArchiveExtractCallback.h b/CPP/7zip/UI/Common/ArchiveExtractCallback.h index 71fa3ef..3c62763 100644 --- a/CPP/7zip/UI/Common/ArchiveExtractCallback.h +++ b/CPP/7zip/UI/Common/ArchiveExtractCallback.h @@ -52,7 +52,6 @@ struct CExtractNtOptions { CBoolPair NtSecurity; CBoolPair SymLinks; - CBoolPair SymLinks_AllowDangerous; CBoolPair HardLinks; CBoolPair AltStreams; bool ReplaceColonForAltStream; @@ -66,6 +65,8 @@ struct CExtractNtOptions bool PreserveATime; bool OpenShareForWrite; + unsigned SymLinks_DangerousLevel; + UInt64 MemLimit; CExtractNtOptions(): @@ -74,10 +75,10 @@ struct CExtractNtOptions ExtractOwner(false), PreserveATime(false), OpenShareForWrite(false), + SymLinks_DangerousLevel(5), MemLimit((UInt64)(Int64)-1) { SymLinks.Val = true; - SymLinks_AllowDangerous.Val = false; HardLinks.Val = true; AltStreams.Val = true; @@ -166,19 +167,22 @@ struct CFiTimesCAM ATime_Defined | MTime_Defined; } + bool SetDirTime_to_FS(CFSTR path) const; +#ifdef SUPPORT_LINKS + bool SetLinkFileTime_to_FS(CFSTR path) const; +#endif }; struct CDirPathTime: public CFiTimesCAM { FString Path; - bool SetDirTime() const; + bool SetDirTime_to_FS_2() const { return SetDirTime_to_FS(Path); } }; #ifdef SUPPORT_LINKS - enum ELinkType { k_LinkType_HardLink, @@ -227,6 +231,15 @@ private: #endif // SUPPORT_LINKS + +struct CProcessedFileInfo +{ + CArcTime CTime; + CArcTime ATime; + CArcTime MTime; + UInt32 Attrib; + bool Attrib_Defined; + #ifndef _WIN32 struct COwnerInfo @@ -243,8 +256,76 @@ struct COwnerInfo } }; + COwnerInfo Owner; + COwnerInfo Group; #endif + void Clear() + { +#ifndef _WIN32 + Attrib_Defined = false; + Owner.Clear(); +#endif + } + + bool IsReparse() const + { + return (Attrib_Defined && (Attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0); + } + + bool IsLinuxSymLink() const + { + return (Attrib_Defined && MY_LIN_S_ISLNK(Attrib >> 16)); + } + + void SetFromPosixAttrib(UInt32 a) + { + // here we set only part of combined attribute required by SetFileAttrib() call + #ifdef _WIN32 + // Windows sets FILE_ATTRIBUTE_NORMAL, if we try to set 0 as attribute. + Attrib = MY_LIN_S_ISDIR(a) ? + FILE_ATTRIBUTE_DIRECTORY : + FILE_ATTRIBUTE_ARCHIVE; + if ((a & 0222) == 0) // (& S_IWUSR) in p7zip + Attrib |= FILE_ATTRIBUTE_READONLY; + // 22.00 : we need type bits for (MY_LIN_S_IFLNK) for IsLinuxSymLink() + a &= MY_LIN_S_IFMT; + if (a == MY_LIN_S_IFLNK) + Attrib |= (a << 16); + #else + Attrib = (a << 16) | FILE_ATTRIBUTE_UNIX_EXTENSION; + #endif + Attrib_Defined = true; + } +}; + + +#ifdef SUPPORT_LINKS + +struct CPostLink +{ + UInt32 Index_in_Arc; + bool item_IsDir; // _item.IsDir + UString item_Path; // _item.Path; + UStringVector item_PathParts; // _item.PathParts; + CProcessedFileInfo item_FileInfo; // _fi + FString fullProcessedPath_from; // full file path in FS + CLinkInfo LinkInfo; +}; + +/* +struct CPostLinks +{ + void Clear() + { + Links.Clear(); + } +}; +*/ + +#endif // SUPPORT_LINKS + + class CArchiveExtractCallback Z7_final: public IArchiveExtractCallback, @@ -292,8 +373,9 @@ public: private: const CArc *_arc; +public: CExtractNtOptions _ntOptions; - +private: bool _encrypted; bool _isSplit; bool _curSize_Defined; @@ -325,7 +407,9 @@ private: CMyComPtr _cryptoGetTextPassword; FString _dirPathPrefix; +public: FString _dirPathPrefix_Full; +private: #ifndef Z7_SFX @@ -337,49 +421,7 @@ private: CReadArcItem _item; FString _diskFilePath; - struct CProcessedFileInfo - { - CArcTime CTime; - CArcTime ATime; - CArcTime MTime; - UInt32 Attrib; - bool Attrib_Defined; - - #ifndef _WIN32 - COwnerInfo Owner; - COwnerInfo Group; - #endif - - bool IsReparse() const - { - return (Attrib_Defined && (Attrib & FILE_ATTRIBUTE_REPARSE_POINT) != 0); - } - - bool IsLinuxSymLink() const - { - return (Attrib_Defined && MY_LIN_S_ISLNK(Attrib >> 16)); - } - - void SetFromPosixAttrib(UInt32 a) - { - // here we set only part of combined attribute required by SetFileAttrib() call - #ifdef _WIN32 - // Windows sets FILE_ATTRIBUTE_NORMAL, if we try to set 0 as attribute. - Attrib = MY_LIN_S_ISDIR(a) ? - FILE_ATTRIBUTE_DIRECTORY : - FILE_ATTRIBUTE_ARCHIVE; - if ((a & 0222) == 0) // (& S_IWUSR) in p7zip - Attrib |= FILE_ATTRIBUTE_READONLY; - // 22.00 : we need type bits for (MY_LIN_S_IFLNK) for IsLinuxSymLink() - a &= MY_LIN_S_IFMT; - if (a == MY_LIN_S_IFLNK) - Attrib |= (a << 16); - #else - Attrib = (a << 16) | FILE_ATTRIBUTE_UNIX_EXTENSION; - #endif - Attrib_Defined = true; - } - } _fi; + CProcessedFileInfo _fi; UInt64 _position; UInt64 _curSize; @@ -421,20 +463,21 @@ private: // CObjectVector _delayedSymLinks; #endif - void CreateComplexDirectory(const UStringVector &dirPathParts, FString &fullPath); + void CreateComplexDirectory( + const UStringVector &dirPathParts, bool isFinal, FString &fullPath); HRESULT GetTime(UInt32 index, PROPID propID, CArcTime &ft); HRESULT GetUnpackSize(); FString Hash_GetFullFilePath(); - void SetAttrib(); + void SetAttrib() const; public: - HRESULT SendMessageError(const char *message, const FString &path); - HRESULT SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path); - HRESULT SendMessageError_with_LastError(const char *message, const FString &path); - HRESULT SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2); - HRESULT SendMessageError2_with_LastError(const char *message, const FString &path1, const FString &path2); + HRESULT SendMessageError(const char *message, const FString &path) const; + HRESULT SendMessageError_with_Error(HRESULT errorCode, const char *message, const FString &path) const; + HRESULT SendMessageError_with_LastError(const char *message, const FString &path) const; + HRESULT SendMessageError2(HRESULT errorCode, const char *message, const FString &path1, const FString &path2) const; + HRESULT SendMessageError2_with_LastError(const char *message, const FString &path1, const FString &path2) const; #if defined(_WIN32) && !defined(UNDER_CE) && !defined(Z7_SFX) NExtract::NZoneIdMode::EEnum ZoneMode; @@ -497,10 +540,11 @@ public: UInt64 packSize); - #ifdef SUPPORT_LINKS +#ifdef SUPPORT_LINKS private: CHardLinks _hardLinks; + CObjectVector _postLinks; CLinkInfo _link; // const void *NtReparse_Data; // UInt32 NtReparse_Size; @@ -512,13 +556,16 @@ private: const FString &fullProcessedPath_from, const CLinkInfo &linkInfo, bool &linkWasSet); + HRESULT SetPostLinks() const; public: - // call PrepareHardLinks() after Init() + HRESULT CreateHardLink2(const FString &newFilePath, + const FString &existFilePath, bool &link_was_Created) const; + HRESULT DeleteLinkFileAlways_or_RemoveEmptyDir(const FString &path, bool checkThatFileIsEmpty) const; HRESULT PrepareHardLinks(const CRecordVector *realIndices); // NULL means all items +#endif - #endif - +private: #ifdef SUPPORT_ALT_STREAMS CObjectVector _renamedFiles; @@ -526,6 +573,7 @@ public: // call it after Init() +public: #ifndef Z7_SFX void SetBaseParentFolderIndex(UInt32 indexInArc) { @@ -547,7 +595,6 @@ private: HRESULT Read_fi_Props(); void CorrectPathParts(); - void GetFiTimesCAM(CFiTimesCAM &pt); void CreateFolders(); HRESULT CheckExistFile(FString &fullProcessedPath, bool &needExit); @@ -556,8 +603,8 @@ private: HRESULT CloseFile(); HRESULT CloseReparseAndFile(); - HRESULT CloseReparseAndFile2(); HRESULT SetDirsTimes(); + HRESULT SetSecurityInfo(UInt32 indexInArc, const FString &path) const; }; diff --git a/CPP/Windows/FileDir.cpp b/CPP/Windows/FileDir.cpp index 10c4e98..ad0d8c9 100644 --- a/CPP/Windows/FileDir.cpp +++ b/CPP/Windows/FileDir.cpp @@ -124,7 +124,7 @@ bool GetSystemDir(FString &path) #endif // UNDER_CE -bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +static bool SetFileTime_Base(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime, DWORD dwFlagsAndAttributes) { #ifndef _UNICODE if (!g_IsNT) @@ -137,14 +137,14 @@ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CF HANDLE hDir = INVALID_HANDLE_VALUE; IF_USE_MAIN_PATH hDir = ::CreateFileW(fs2us(path), GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + NULL, OPEN_EXISTING, dwFlagsAndAttributes, NULL); #ifdef Z7_LONG_PATH if (hDir == INVALID_HANDLE_VALUE && USE_SUPER_PATH) { UString superPath; if (GetSuperPath(path, superPath, USE_MAIN_PATH)) hDir = ::CreateFileW(superPath, GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); + NULL, OPEN_EXISTING, dwFlagsAndAttributes, NULL); } #endif @@ -157,6 +157,15 @@ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CF return res; } +bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, FILE_FLAG_BACKUP_SEMANTICS); +} + +bool SetLinkFileTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OPEN_REPARSE_POINT); +} bool SetFileAttrib(CFSTR path, DWORD attrib) @@ -1173,17 +1182,15 @@ bool GetCurrentDir(FString &path) -bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +static bool SetFileTime_Base(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime, const int flags) { // need testing /* struct utimbuf buf; struct stat st; UNUSED_VAR(cTime) - printf("\nstat = %s\n", path); int ret = stat(path, &st); - if (ret == 0) { buf.actime = st.st_atime; @@ -1195,47 +1202,42 @@ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CF buf.actime = cur_time; buf.modtime = cur_time; } - if (aTime) { UInt32 ut; if (NTime::FileTimeToUnixTime(*aTime, ut)) buf.actime = ut; } - if (mTime) { UInt32 ut; if (NTime::FileTimeToUnixTime(*mTime, ut)) buf.modtime = ut; } - return utime(path, &buf) == 0; */ // if (!aTime && !mTime) return true; - struct timespec times[2]; UNUSED_VAR(cTime) - bool needChange; needChange = FiTime_To_timespec(aTime, times[0]); needChange |= FiTime_To_timespec(mTime, times[1]); - - /* - if (mTime) - { - printf("\n time = %ld.%9ld\n", mTime->tv_sec, mTime->tv_nsec); - } - */ - + // if (mTime) { printf("\n time = %ld.%9ld\n", mTime->tv_sec, mTime->tv_nsec); } if (!needChange) return true; - const int flags = 0; // follow link - // = AT_SYMLINK_NOFOLLOW; // don't follow link return utimensat(AT_FDCWD, path, times, flags) == 0; } +bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, 0); // (flags = 0) means follow_link +} + +bool SetLinkFileTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime) +{ + return SetFileTime_Base(path, cTime, aTime, mTime, AT_SYMLINK_NOFOLLOW); +} struct C_umask diff --git a/CPP/Windows/FileDir.h b/CPP/Windows/FileDir.h index 65e6368..9ba98fc 100644 --- a/CPP/Windows/FileDir.h +++ b/CPP/Windows/FileDir.h @@ -18,9 +18,20 @@ bool GetSystemDir(FString &path); WIN32 API : SetFileTime() doesn't allow to set zero timestamps in file but linux : allows unix time = 0 in filesystem */ - +/* +SetDirTime() can be used to set time for file or for dir. +If path is symbolic link, SetDirTime() will follow symbolic link, +and it will set timestamps of symbolic link's target file or dir. +*/ bool SetDirTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime); +/* +SetLinkFileTime() doesn't follow symbolic link, +and it sets timestamps for symbolic link file itself. +If (path) is not symbolic link, it still can work (at least in some new OS versions). +*/ +bool SetLinkFileTime(CFSTR path, const CFiTime *cTime, const CFiTime *aTime, const CFiTime *mTime); + #ifdef _WIN32 diff --git a/DOC/7zip.wxs b/DOC/7zip.wxs index d369074..703e22e 100644 --- a/DOC/7zip.wxs +++ b/DOC/7zip.wxs @@ -1,7 +1,7 @@ - + diff --git a/DOC/readme.txt b/DOC/readme.txt index 7fbbdc8..cc89a39 100644 --- a/DOC/readme.txt +++ b/DOC/readme.txt @@ -1,4 +1,4 @@ -7-Zip 25.00 Sources +7-Zip 25.01 Sources ------------------- 7-Zip is a file archiver for Windows. diff --git a/DOC/src-history.txt b/DOC/src-history.txt index 70b11b5..48c9647 100644 --- a/DOC/src-history.txt +++ b/DOC/src-history.txt @@ -1,6 +1,14 @@ HISTORY of the 7-Zip source code -------------------------------- +25.01 2025-08-03 +------------------------- +- The code for handling symbolic links has been changed + to provide greater security when extracting files from archives. + Command line switch -snld20 can be used to bypass default security + checks when creating symbolic links. + + 25.00 2025-07-05 ------------------------- - 7-Zip for Windows can now use more than 64 CPU threads for compression @@ -11,6 +19,8 @@ HISTORY of the 7-Zip source code - deflate (zip/gz) compression speed was increased by 1-3%. - improved support for zip, cpio and fat archives. - fixed some bugs and vulnerabilities. +- the bug was fixed : CVE-2025-53816 : 7-Zip could work incorrectly for some incorrect RAR archives. +- the bug was fixed : CVE-2025-53817 : 7-Zip could crash for some incorrect COM (Compound File) archives. 24.09 2024-11-29 From 839151eaaad24771892afaae6bac690e31e58384 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Thu, 12 Feb 2026 00:00:00 +0000 Subject: [PATCH 3/3] 26.00 --- C/7zVersion.h | 10 +- C/7zWindows.h | 10 +- C/7zip_gcc_c.mak | 2 +- C/CpuArch.c | 4 +- C/CpuArch.h | 19 +- C/HuffEnc.c | 9 +- C/HuffEnc.h | 2 +- C/Xxh64.c | 98 +- CPP/7zip/7zip_gcc.mak | 4 +- CPP/7zip/Archive/7z/7zUpdate.cpp | 2 +- CPP/7zip/Archive/ComHandler.cpp | 1364 ++++++++++++++------- CPP/7zip/Archive/CpioHandler.cpp | 2 +- CPP/7zip/Archive/QcowHandler.cpp | 5 +- CPP/7zip/Archive/Rar/Rar5Handler.cpp | 11 +- CPP/7zip/Archive/Rar/RarHandler.cpp | 22 +- CPP/7zip/Archive/Tar/TarHandler.cpp | 35 +- CPP/7zip/Archive/Tar/TarIn.cpp | 155 ++- CPP/7zip/Archive/Tar/TarItem.h | 1 + CPP/7zip/Archive/Udf/UdfIn.cpp | 18 +- CPP/7zip/Archive/Udf/UdfIn.h | 7 +- CPP/7zip/Archive/Zip/ZipIn.cpp | 90 +- CPP/7zip/Archive/Zip/ZipOut.cpp | 44 +- CPP/7zip/Bundles/SFXCon/SfxCon.cpp | 2 +- CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp | 50 +- CPP/7zip/Common/FileStreams.cpp | 2 +- CPP/7zip/Common/FileStreams.h | 4 +- CPP/7zip/UI/Common/ArchiveCommandLine.cpp | 2 +- CPP/7zip/UI/Common/ArchiveName.cpp | 4 +- CPP/7zip/UI/Common/Bench.cpp | 9 +- CPP/7zip/UI/Console/List.cpp | 4 +- CPP/7zip/UI/FileManager/BrowseDialog2.cpp | 94 +- CPP/7zip/UI/FileManager/FSFolder.cpp | 8 +- CPP/7zip/UI/FileManager/LangPage.cpp | 3 +- CPP/7zip/UI/FileManager/MenuPage.cpp | 3 +- CPP/7zip/UI/FileManager/PanelItemOpen.cpp | 25 +- CPP/7zip/UI/FileManager/PanelMenu.cpp | 4 +- CPP/7zip/UI/FileManager/PanelSort.cpp | 55 +- CPP/7zip/UI/GUI/BenchmarkDialog.cpp | 55 +- CPP/7zip/UI/GUI/CompressDialog.cpp | 129 +- CPP/7zip/UI/GUI/CompressDialog.rc | 4 +- CPP/7zip/UI/GUI/ExtractDialog.cpp | 3 +- CPP/Common/Common0.h | 5 +- CPP/Common/MyBuffer.h | 48 +- CPP/Windows/Control/ComboBox.cpp | 9 + CPP/Windows/Control/ComboBox.h | 2 + CPP/Windows/FileFind.cpp | 13 +- CPP/Windows/FileFind.h | 6 +- CPP/Windows/SecurityUtils.h | 4 + CPP/Windows/System.cpp | 10 +- CPP/Windows/System.h | 23 +- CPP/Windows/SystemInfo.cpp | 4 +- CPP/Windows/TimeUtils.h | 8 + DOC/7zip.wxs | 4 +- DOC/License.txt | 6 +- DOC/readme.txt | 50 +- DOC/src-history.txt | 182 +-- 56 files changed, 1764 insertions(+), 984 deletions(-) diff --git a/C/7zVersion.h b/C/7zVersion.h index b6142e9..770370a 100644 --- a/C/7zVersion.h +++ b/C/7zVersion.h @@ -1,7 +1,7 @@ -#define MY_VER_MAJOR 25 -#define MY_VER_MINOR 1 +#define MY_VER_MAJOR 26 +#define MY_VER_MINOR 0 #define MY_VER_BUILD 0 -#define MY_VERSION_NUMBERS "25.01" +#define MY_VERSION_NUMBERS "26.00" #define MY_VERSION MY_VERSION_NUMBERS #ifdef MY_CPU_NAME @@ -10,12 +10,12 @@ #define MY_VERSION_CPU MY_VERSION #endif -#define MY_DATE "2025-08-03" +#define MY_DATE "2026-02-12" #undef MY_COPYRIGHT #undef MY_VERSION_COPYRIGHT_DATE #define MY_AUTHOR_NAME "Igor Pavlov" #define MY_COPYRIGHT_PD "Igor Pavlov : Public domain" -#define MY_COPYRIGHT_CR "Copyright (c) 1999-2025 Igor Pavlov" +#define MY_COPYRIGHT_CR "Copyright (c) 1999-2026 Igor Pavlov" #ifdef USE_COPYRIGHT_CR #define MY_COPYRIGHT MY_COPYRIGHT_CR diff --git a/C/7zWindows.h b/C/7zWindows.h index 42c6db8..381159e 100644 --- a/C/7zWindows.h +++ b/C/7zWindows.h @@ -1,11 +1,17 @@ -/* 7zWindows.h -- StdAfx -2023-04-02 : Igor Pavlov : Public domain */ +/* 7zWindows.h -- Windows.h and related code +Igor Pavlov : Public domain */ #ifndef ZIP7_INC_7Z_WINDOWS_H #define ZIP7_INC_7Z_WINDOWS_H #ifdef _WIN32 +#if defined(_MSC_VER) && _MSC_VER >= 1950 && !defined(__clang__) // VS2026 +// and some another windows files need that option +// VS2026: wtypesbase.h: warning C4865: 'tagCLSCTX': the underlying type will change from 'int' to 'unsigned int' when '/Zc:enumTypes' is specified on the command line +#pragma warning(disable : 4865) +#endif + #if defined(__clang__) # pragma clang diagnostic push #endif diff --git a/C/7zip_gcc_c.mak b/C/7zip_gcc_c.mak index 195d23d..006cfe0 100644 --- a/C/7zip_gcc_c.mak +++ b/C/7zip_gcc_c.mak @@ -106,7 +106,7 @@ DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll endif -LIB2 = -lOle32 -loleaut32 -luuid -ladvapi32 -lUser32 -lShell32 +LIB2 = -lole32 -loleaut32 -luuid -ladvapi32 -luser32 -lshell32 CFLAGS_EXTRA = -DUNICODE -D_UNICODE # -Wno-delete-non-virtual-dtor diff --git a/C/CpuArch.c b/C/CpuArch.c index 6e02551..342280d 100644 --- a/C/CpuArch.c +++ b/C/CpuArch.c @@ -859,7 +859,7 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) #define Z7_GETAUXV_AVAILABLE -#else +#elif !defined(__QNXNTO__) // #pragma message("=== is not NEW GLIBC === ") #if defined __has_include #if __has_include () @@ -877,7 +877,7 @@ BoolInt CPU_IsSupported_AES (void) { return APPLE_CRYPTO_SUPPORT_VAL; } #ifdef USE_HWCAP -#if defined(__FreeBSD__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) static unsigned long MY_getauxval(int aux) { unsigned long val; diff --git a/C/CpuArch.h b/C/CpuArch.h index 1690a5b..c682720 100644 --- a/C/CpuArch.h +++ b/C/CpuArch.h @@ -31,7 +31,12 @@ MY_CPU_64BIT means that processor can work with 64-bit registers. #define MY_CPU_NAME "x32" #define MY_CPU_SIZEOF_POINTER 4 #else - #define MY_CPU_NAME "x64" + #if defined(__APX_EGPR__) || defined(__EGPR__) + #define MY_CPU_NAME "x64-apx" + #define MY_CPU_AMD64_APX + #else + #define MY_CPU_NAME "x64" + #endif #define MY_CPU_SIZEOF_POINTER 8 #endif #define MY_CPU_64BIT @@ -596,8 +601,20 @@ problem-4 : performace: #define SetBe32a(p, v) { *(UInt32 *)(void *)(p) = (v); } #define SetBe16a(p, v) { *(UInt16 *)(void *)(p) = (v); } +// gcc and clang for powerpc can transform load byte access to load reverse word access. +// sp we can use byte access instead of word access. Z7_BSWAP64 cab be slow +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_64BIT) +#define GetUi64a(p) Z7_BSWAP64 (*(const UInt64 *)(const void *)(p)) +#else #define GetUi64a(p) GetUi64(p) +#endif + +#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) +#define GetUi32a(p) Z7_BSWAP32 (*(const UInt32 *)(const void *)(p)) +#else #define GetUi32a(p) GetUi32(p) +#endif + #define GetUi16a(p) GetUi16(p) #define SetUi32a(p, v) SetUi32(p, v) #define SetUi16a(p, v) SetUi16(p, v) diff --git a/C/HuffEnc.c b/C/HuffEnc.c index cbf8c22..297b41a 100644 --- a/C/HuffEnc.c +++ b/C/HuffEnc.c @@ -13,7 +13,7 @@ Igor Pavlov : Public domain */ #define NUM_BITS 10 #define MASK ((1u << NUM_BITS) - 1) #define FREQ_MASK (~(UInt32)MASK) -#define NUM_COUNTERS (48 * 2) +#define NUM_COUNTERS (104 * 2) // (80 * 2) or (128 * 2) : ((prime_number + 1) * 2) for smaller code. #if 1 && (defined(MY_CPU_LE) || defined(MY_CPU_BE)) #if defined(MY_CPU_LE) @@ -95,9 +95,10 @@ void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned numSy counters[1] = 0; for (i = 2; i != NUM_COUNTERS; i += 2) { - unsigned c; - c = (counters )[i]; (counters )[i] = num; num += c; - c = (counters + 1)[i]; (counters + 1)[i] = num; num += c; + const unsigned c0 = (counters )[i]; + const unsigned c1 = (counters + 1)[i]; + (counters )[i] = num; num += c0; + (counters + 1)[i] = num; num += c1; } counters[0] = num; // we want to write (freq==0) symbols to the end of (p) array { diff --git a/C/HuffEnc.h b/C/HuffEnc.h index 2217f55..45567d0 100644 --- a/C/HuffEnc.h +++ b/C/HuffEnc.h @@ -16,7 +16,7 @@ Conditions: 1 <= maxLen <= 16 = Z7_HUFFMAN_LEN_MAX Num_Items(p) >= HUFFMAN_TEMP_SIZE(num) */ -void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, UInt32 num, UInt32 maxLen); +void Huffman_Generate(const UInt32 *freqs, UInt32 *p, Byte *lens, unsigned num, unsigned maxLen); EXTERN_C_END diff --git a/C/Xxh64.c b/C/Xxh64.c index dc02a02..660e0be 100644 --- a/C/Xxh64.c +++ b/C/Xxh64.c @@ -1,6 +1,6 @@ /* Xxh64.c -- XXH64 hash calculation original code: Copyright (c) Yann Collet. -2023-08-18 : modified by Igor Pavlov. +modified by Igor Pavlov. This source code is licensed under BSD 2-Clause License. */ @@ -27,6 +27,14 @@ void Xxh64State_Init(CXxh64State *p) #if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) && defined(_MSC_VER) #define Z7_XXH64_USE_ASM +#elif !defined(MY_CPU_LE_UNALIGN_64) // && defined (MY_CPU_LE) + #define Z7_XXH64_USE_ALIGNED +#endif + +#ifdef Z7_XXH64_USE_ALIGNED + #define Xxh64State_UpdateBlocks_Unaligned_Select Xxh64State_UpdateBlocks_Unaligned +#else + #define Xxh64State_UpdateBlocks_Unaligned_Select Xxh64State_UpdateBlocks #endif #if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) \ @@ -188,32 +196,76 @@ Xxh64State_UpdateBlocks(CXxh64State *p, const void *data, const void *end) #else +#ifdef Z7_XXH64_USE_ALIGNED +static +#endif void Z7_NO_INLINE Z7_FASTCALL -Xxh64State_UpdateBlocks(CXxh64State *p, const void *_data, const void *end) +Xxh64State_UpdateBlocks_Unaligned_Select(CXxh64State *p, const void *_data, const void *end) { const Byte *data = (const Byte *)_data; - UInt64 v[4]; - v[0] = p->v[0]; - v[1] = p->v[1]; - v[2] = p->v[2]; - v[3] = p->v[3]; + UInt64 v0, v1, v2, v3; + v0 = p->v[0]; + v1 = p->v[1]; + v2 = p->v[2]; + v3 = p->v[3]; do { - v[0] = Xxh64_Round(v[0], GetUi64(data)); data += 8; - v[1] = Xxh64_Round(v[1], GetUi64(data)); data += 8; - v[2] = Xxh64_Round(v[2], GetUi64(data)); data += 8; - v[3] = Xxh64_Round(v[3], GetUi64(data)); data += 8; + v0 = Xxh64_Round(v0, GetUi64(data)); data += 8; + v1 = Xxh64_Round(v1, GetUi64(data)); data += 8; + v2 = Xxh64_Round(v2, GetUi64(data)); data += 8; + v3 = Xxh64_Round(v3, GetUi64(data)); data += 8; } while (data != end); - p->v[0] = v[0]; - p->v[1] = v[1]; - p->v[2] = v[2]; - p->v[3] = v[3]; + p->v[0] = v0; + p->v[1] = v1; + p->v[2] = v2; + p->v[3] = v3; } -#endif + +#ifdef Z7_XXH64_USE_ALIGNED + +static +void +Z7_NO_INLINE +Z7_FASTCALL +Xxh64State_UpdateBlocks_Aligned(CXxh64State *p, const void *_data, const void *end) +{ + const Byte *data = (const Byte *)_data; + UInt64 v0, v1, v2, v3; + v0 = p->v[0]; + v1 = p->v[1]; + v2 = p->v[2]; + v3 = p->v[3]; + do + { + v0 = Xxh64_Round(v0, GetUi64a(data)); data += 8; + v1 = Xxh64_Round(v1, GetUi64a(data)); data += 8; + v2 = Xxh64_Round(v2, GetUi64a(data)); data += 8; + v3 = Xxh64_Round(v3, GetUi64a(data)); data += 8; + } + while (data != end); + p->v[0] = v0; + p->v[1] = v1; + p->v[2] = v2; + p->v[3] = v3; +} + +void +Z7_NO_INLINE +Z7_FASTCALL +Xxh64State_UpdateBlocks(CXxh64State *p, const void *data, const void *end) +{ + if (((unsigned)(ptrdiff_t)data & 7) == 0) + Xxh64State_UpdateBlocks_Aligned(p, data, end); + else + Xxh64State_UpdateBlocks_Unaligned(p, data, end); +} + +#endif // Z7_XXH64_USE_ALIGNED +#endif // Z7_XXH64_USE_ASM UInt64 Xxh64State_Digest(const CXxh64State *p, const void *_data, UInt64 count) { @@ -306,12 +358,22 @@ void Xxh64_Update(CXxh64 *p, const void *_data, size_t size) while (--rem); if (cnt != 32) return; - Xxh64State_UpdateBlocks(&p->state, p->buf64, &p->buf64[4]); +#ifdef Z7_XXH64_USE_ALIGNED + Xxh64State_UpdateBlocks_Aligned +#else + Xxh64State_UpdateBlocks_Unaligned_Select +#endif + (&p->state, p->buf64, &p->buf64[4]); } if (size &= ~(size_t)31) { - Xxh64State_UpdateBlocks(&p->state, data, data + size); +#ifdef Z7_XXH64_USE_ALIGNED + if (((unsigned)(ptrdiff_t)data & 7) == 0) + Xxh64State_UpdateBlocks_Aligned(&p->state, data, data + size); + else +#endif + Xxh64State_UpdateBlocks_Unaligned_Select(&p->state, data, data + size); data += size; } diff --git a/CPP/7zip/7zip_gcc.mak b/CPP/7zip/7zip_gcc.mak index 12f1ef2..a78c0fa 100644 --- a/CPP/7zip/7zip_gcc.mak +++ b/CPP/7zip/7zip_gcc.mak @@ -142,8 +142,8 @@ MY_MKDIR=mkdir DEL_OBJ_EXE = -$(RM) $(O)\*.o $(O)\$(PROG).exe $(O)\$(PROG).dll endif -LIB2_GUI = -lOle32 -lGdi32 -lComctl32 -lComdlg32 -lShell32 $(LIB_HTMLHELP) -LIB2 = -loleaut32 -luuid -ladvapi32 -lUser32 $(LIB2_GUI) +LIB2_GUI = -lole32 -lgdi32 -lcomctl32 -lcomdlg32 -lshell32 $(LIB_HTMLHELP) +LIB2 = -loleaut32 -luuid -ladvapi32 -luser32 $(LIB2_GUI) # v24.00: -DUNICODE and -D_UNICODE are defined in precompilation header files # CXXFLAGS_EXTRA = -DUNICODE -D_UNICODE diff --git a/CPP/7zip/Archive/7z/7zUpdate.cpp b/CPP/7zip/Archive/7z/7zUpdate.cpp index c8c5d26..6ba04a2 100644 --- a/CPP/7zip/Archive/7z/7zUpdate.cpp +++ b/CPP/7zip/Archive/7z/7zUpdate.cpp @@ -721,7 +721,7 @@ static int CompareEmptyItems(const unsigned *p1, const unsigned *p2, void *param return (u1.IsDir && u1.IsAnti) ? -n : n; } -static const char *g_Exts = +static const char * const g_Exts = " 7z xz lzma ace arc arj bz tbz bz2 tbz2 cab deb gz tgz ha lha lzh lzo lzx pak rar rpm sit zoo" " zip jar ear war msi" " 3gp avi mov mpeg mpg mpe wmv" diff --git a/CPP/7zip/Archive/ComHandler.cpp b/CPP/7zip/Archive/ComHandler.cpp index 144369e..40a5349 100644 --- a/CPP/7zip/Archive/ComHandler.cpp +++ b/CPP/7zip/Archive/ComHandler.cpp @@ -2,41 +2,62 @@ #include "StdAfx.h" -#include "../../../C/Alloc.h" #include "../../../C/CpuArch.h" -#include "../../Common/IntToString.h" #include "../../Common/ComTry.h" -#include "../../Common/MyCom.h" -#include "../../Common/MyBuffer.h" -#include "../../Common/MyString.h" #include "../../Windows/PropVariant.h" #include "../Common/LimitedStreams.h" #include "../Common/ProgressUtils.h" #include "../Common/RegisterArc.h" +#include "../Common/StreamObjects.h" #include "../Common/StreamUtils.h" #include "../Compress/CopyCoder.h" -#define Get16(p) GetUi16(p) -#define Get32(p) GetUi32(p) +#include "Common/ItemNameUtils.h" + +#define Get16(p) GetUi16a(p) +#define Get32(p) GetUi32a(p) + +// we don't expect to get deleted files in real files +// define Z7_COMPOUND_SHOW_DELETED for debug +// #define Z7_COMPOUND_SHOW_DELETED namespace NArchive { namespace NCom { -static const Byte kSignature[] = - { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1 }; +static const unsigned k_Long_path_level_limit = 256; -enum EType +static const Byte kSignature[] = + { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; + +// encoded "[!]MsiPatchSequence" name in "msp" file +static const Byte k_Sequence_msp[] = + { 0x40, 0x48, 0x96, 0x45, 0x6c, 0x3e, 0xe4, 0x45, + 0xe6, 0x42, 0x16, 0x42, 0x37, 0x41, 0x27, 0x41, + 0x37, 0x41, 0, 0 }; + +// encoded "MergeModule.CABinet" name in "msm" file +static const Byte k_Sequence_msm[] = + { 0x16, 0x42, 0xb5, 0x42, 0xa8, 0x3d, 0xf2, 0x41, + 0xf8, 0x43, 0xa8, 0x47, 0x8c, 0x3a, 0x0b, 0x43, + 0x31, 0x42, 0x37, 0x48, 0, 0 }; + +// static const Byte k_CLSID_AAF_V3[] = { 0x41, 0x41, 0x46, 0x42, 0x0d, 0x00, 0x4f, 0x4d, 0x06, 0x0e, 0x2b, 0x34, 0x01, 0x01, 0x01, 0xff }; +// static const Byte k_CLSID_AAF_V4[] = { 0x01, 0x02, 0x01, 0x0d, 0x00, 0x02, 0x00, 0x00, 0x06, 0x0e, 0x2b, 0x34, 0x03, 0x02, 0x01, 0x01 }; + +enum EArcType { k_Type_Common, k_Type_Msi, k_Type_Msp, + k_Type_Msm, k_Type_Doc, k_Type_Ppt, - k_Type_Xls + k_Type_Xls, + k_Type_Aaf }; static const char * const kExtensions[] = @@ -44,35 +65,63 @@ static const char * const kExtensions[] = "compound" , "msi" , "msp" + , "msm" , "doc" , "ppt" , "xls" + , "aaf" }; namespace NFatID { - // static const UInt32 kFree = 0xFFFFFFFF; - static const UInt32 kEndOfChain = 0xFFFFFFFE; - // static const UInt32 kFatSector = 0xFFFFFFFD; - // static const UInt32 kMatSector = 0xFFFFFFFC; - static const UInt32 kMaxValue = 0xFFFFFFFA; + static const UInt32 kFree = 0xffffffff; + static const UInt32 kEndOfChain = 0xfffffffe; + static const UInt32 kFatSector = 0xfffffffd; + static const UInt32 k_DIF_SECT = 0xfffffffc; // double-indirect file allocation table (DIFAT) + static const UInt32 kMaxValue = 0xfffffffa; } namespace NItemType { - static const Byte kEmpty = 0; - static const Byte kStorage = 1; - // static const Byte kStream = 2; - // static const Byte kLockBytes = 3; - // static const Byte kProperty = 4; - static const Byte kRootStorage = 5; + static const unsigned kEmpty = 0; + static const unsigned kStorage = 1; + static const unsigned kStream = 2; + // static const unsigned kLockBytes = 3; + // static const unsigned kProperty = 4; + static const unsigned kRootStorage = 5; +} + +static const unsigned k_MiniSectorSizeBits = 6; +static const UInt32 k_LongStreamMinSize = 1 << 12; + +static const unsigned k_Msi_NumBits = 6; +static const unsigned k_Msi_NumChars = 1 << k_Msi_NumBits; +static const unsigned k_Msi_CharMask = k_Msi_NumChars - 1; +static const unsigned k_Msi_UnicodeRange = k_Msi_NumChars * (k_Msi_NumChars + 1); +static const unsigned k_Msi_StartUnicodeChar = 0x3800; +static const unsigned k_Msi_SpecUnicodeChar = k_Msi_StartUnicodeChar + k_Msi_UnicodeRange; +// (k_Msi_SpecUnicodeChar == 0x4840) is used as special symbol that is used +// as first character in some names in dir entries +/* +static bool IsMsiName(const Byte *p) +{ + unsigned c = Get16(p); + c -= k_Msi_StartUnicodeChar; + return c <= k_Msi_UnicodeRange; +} +*/ + +Z7_FORCE_INLINE static bool IsLargeStream(UInt64 size) +{ + return size >= k_LongStreamMinSize; } static const unsigned kNameSizeMax = 64; +static const UInt32 k_Item_Level_Unused = (UInt32)0 - 1; struct CItem { - Byte Name[kNameSizeMax]; + Byte Name[kNameSizeMax]; // must be aligned for 2-bytes // UInt16 NameSize; // UInt32 Flags; FILETIME CTime; @@ -82,484 +131,792 @@ struct CItem UInt32 RightDid; UInt32 SonDid; UInt32 Sid; - Byte Type; + unsigned Type; // Byte : we use unsigned instead of Byte for alignment - bool IsEmpty() const { return Type == NItemType::kEmpty; } + UInt32 Level; + + bool IsEmptyType() const { return Type == NItemType::kEmpty; } bool IsDir() const { return Type == NItemType::kStorage || Type == NItemType::kRootStorage; } + bool IsStorage() const { return Type == NItemType::kStorage; } - void Parse(const Byte *p, bool mode64bit); + bool IsLevel_Unused() const { return Level == k_Item_Level_Unused; } + + // bool IsSpecMsiName() const { return Get16(Name) == k_Msi_SpecUnicodeChar; } + bool AreMsiChars() const + { + for (unsigned i = 0; i < kNameSizeMax; i += 2) + { + unsigned c = Get16(Name + i); + if (c == 0) + break; + c -= k_Msi_StartUnicodeChar; + if (c <= k_Msi_UnicodeRange) + return true; + } + return false; + } + bool Parse(const Byte *p, bool mode64bit); }; + +static const UInt32 k_Ref_Parent_Root = 0xffffffff; + struct CRef { - int Parent; - UInt32 Did; + UInt32 Parent; // index in Refs[] + UInt32 Did; // index in Items[] }; + class CDatabase { - CObjArray MiniSids; - - HRESULT AddNode(int parent, UInt32 did); - public: + CRecordVector Refs; + CObjectVector Items; CObjArray Fat; CObjArray Mat; - CObjectVector Items; - CRecordVector Refs; -private: - UInt32 NumSectorsInMiniStream; -public: - UInt32 MatSize; - UInt32 FatSize; + CObjArray MiniSids; - UInt32 LongStreamMinSize; + UInt32 FatSize; + UInt32 MatSize; + UInt32 NumSectors_in_MiniStream; + + // UInt32 LongStreamMinSize; unsigned SectorSizeBits; - unsigned MiniSectorSizeBits; Int32 MainSubfile; - EType Type; + EArcType Type; + + bool IsArc; + bool HeadersError; + // bool IsMsi; UInt64 PhySize; - UInt64 PhySize_Aligned; + UInt64 PhySize_Unaligned; + // UInt64 FreeSize; + IArchiveOpenCallback *OpenCallback; + UInt32 Callback_Cur; + +private: + /* + HRESULT IncreaseOpenTotal(UInt32 numSects) + { + if (!OpenCallback) + return S_OK; + const UInt64 total = (UInt64)(Callback_Cur + numSects) << SectorSizeBits; + return OpenCallback->SetTotal(NULL, &total); + } + */ + HRESULT AddNodes(); + HRESULT ReadSector(IInStream *inStream, Byte *buf, UInt32 sid); + HRESULT ReadIDs(IInStream *inStream, Byte *buf, UInt32 sid, UInt32 *dest); + HRESULT Check_Item(unsigned index); + +public: bool IsNotArcType() const { return Type != k_Type_Msi && - Type != k_Type_Msp; + Type != k_Type_Msp && + Type != k_Type_Msm; } - void UpdatePhySize(UInt64 val, UInt64 val_Aligned) - { - if (PhySize < val) - PhySize = val; - if (PhySize_Aligned < val_Aligned) - PhySize_Aligned = val_Aligned; - } - HRESULT ReadSector(IInStream *inStream, Byte *buf, unsigned sectorSizeBits, UInt32 sid); - HRESULT ReadIDs(IInStream *inStream, Byte *buf, unsigned sectorSizeBits, UInt32 sid, UInt32 *dest); - - HRESULT Update_PhySize_WithItem(unsigned index); - void Clear(); - bool IsLargeStream(UInt64 size) const { return size >= LongStreamMinSize; } UString GetItemPath(UInt32 index) const; UInt64 GetItemPackSize(UInt64 size) const { - const UInt64 mask = ((UInt32)1 << (IsLargeStream(size) ? SectorSizeBits : MiniSectorSizeBits)) - 1; - return (size + mask) & ~mask; - } - - bool GetMiniCluster(UInt32 sid, UInt64 &res) const - { - const unsigned subBits = SectorSizeBits - MiniSectorSizeBits; - const UInt32 fid = sid >> subBits; - if (fid >= NumSectorsInMiniStream) - return false; - res = (((UInt64)MiniSids[fid] + 1) << subBits) + (sid & ((1 << subBits) - 1)); - return true; + const UInt64 mask = ((UInt32)1 << (IsLargeStream(size) ? SectorSizeBits : k_MiniSectorSizeBits)) - 1; + return (size + mask) & ~(UInt64)mask; } HRESULT Open(IInStream *inStream); }; -HRESULT CDatabase::ReadSector(IInStream *inStream, Byte *buf, unsigned sectorSizeBits, UInt32 sid) +HRESULT CDatabase::ReadSector(IInStream *inStream, Byte *buf, UInt32 sid) { - const UInt64 end = ((UInt64)sid + 2) << sectorSizeBits; - UpdatePhySize(end, end); - RINOK(InStream_SeekSet(inStream, (((UInt64)sid + 1) << sectorSizeBits))) - return ReadStream_FALSE(inStream, buf, (size_t)1 << sectorSizeBits); + const unsigned sb = SectorSizeBits; + RINOK(InStream_SeekSet(inStream, ((UInt64)sid + 1) << sb)) + RINOK(ReadStream_FALSE(inStream, buf, (size_t)1 << sb)) + if (OpenCallback) + { + if ((++Callback_Cur & 0xfff) == 0) + { + const UInt64 processed = (UInt64)Callback_Cur << sb; + const UInt64 numFiles = Items.Size(); + RINOK(OpenCallback->SetCompleted(&numFiles, &processed)) + } + } + return S_OK; } -HRESULT CDatabase::ReadIDs(IInStream *inStream, Byte *buf, unsigned sectorSizeBits, UInt32 sid, UInt32 *dest) +HRESULT CDatabase::ReadIDs(IInStream *inStream, Byte *buf, UInt32 sid, UInt32 *dest) { - RINOK(ReadSector(inStream, buf, sectorSizeBits, sid)) - const UInt32 sectorSize = (UInt32)1 << sectorSizeBits; - for (UInt32 t = 0; t < sectorSize; t += 4) + RINOK(ReadSector(inStream, buf, sid)) + const size_t sectorSize = (size_t)1 << SectorSizeBits; + for (size_t t = 0; t < sectorSize; t += 4) *dest++ = Get32(buf + t); return S_OK; } + +Z7_FORCE_INLINE static void GetFileTimeFromMem(const Byte *p, FILETIME *ft) { ft->dwLowDateTime = Get32(p); ft->dwHighDateTime = Get32(p + 4); } -void CItem::Parse(const Byte *p, bool mode64bit) +bool CItem::Parse(const Byte *p, bool mode64bit) { memcpy(Name, p, kNameSizeMax); - // NameSize = Get16(p + 64); + unsigned i; + for (i = 0; i < kNameSizeMax; i += 2) + if (*(const UInt16 *)(const void *)(p + i) == 0) + break; +#if 0 // 1 : for debug : for more strict field check + { + for (unsigned k = i; k < kNameSizeMax; k += 2) + if (*(const UInt16 *)(const void *)(p + k) != 0) + return false; + } +#endif Type = p[66]; + // DOC: names are limited to 32 UTF-16 code points, including the terminating null character. + if (!IsEmptyType()) + if (i == kNameSizeMax || i + 2 != Get16(p + 64)) // NameLength + return false; + if (p[67] >= 2) // Color: 0 (red) or 1 (black) + return false; LeftDid = Get32(p + 68); RightDid = Get32(p + 72); SonDid = Get32(p + 76); - // Flags = Get32(p + 96); + // if (Get32(p + 96) == 0) return false; // State / Flags GetFileTimeFromMem(p + 100, &CTime); GetFileTimeFromMem(p + 108, &MTime); Sid = Get32(p + 116); Size = Get32(p + 120); + /* MS DOC: it is recommended that parsers ignore the most + significant 32 bits of this field in version 3 compound files */ if (mode64bit) Size |= ((UInt64)Get32(p + 124) << 32); + return true; } + void CDatabase::Clear() { + Type = k_Type_Common; + MainSubfile = -1; + IsArc = false; + HeadersError = false; + // IsMsi = false; PhySize = 0; - PhySize_Aligned = 0; + PhySize_Unaligned = 0; + // FreeSize = 0; + Callback_Cur = 0; + // OpenCallback = NULL; + FatSize = 0; + MatSize = 0; + NumSectors_in_MiniStream = 0; + Fat.Free(); - MiniSids.Free(); Mat.Free(); + MiniSids.Free(); Items.Clear(); Refs.Clear(); } -static const UInt32 kNoDid = 0xFFFFFFFF; -HRESULT CDatabase::AddNode(int parent, UInt32 did) +static const UInt32 kNoDid = 0xffffffff; + +HRESULT CDatabase::AddNodes() { - if (did == kNoDid) + UInt32 index = Items[0].SonDid; // Items[0] is root item + if (index == kNoDid) // no files case return S_OK; - if (did >= (UInt32)Items.Size()) + if (index == 0 || index >= Items.Size()) return S_FALSE; - const CItem &item = Items[did]; - if (item.IsEmpty()) - return S_FALSE; - CRef ref; - ref.Parent = parent; - ref.Did = did; - const unsigned index = Refs.Add(ref); - if (Refs.Size() > Items.Size()) - return S_FALSE; - RINOK(AddNode(parent, item.LeftDid)) - RINOK(AddNode(parent, item.RightDid)) - if (item.IsDir()) + + CObjArray itemParents(Items.Size()); + CByteArr states(Items.Size()); + memset(itemParents, 0, (size_t)Items.Size() * sizeof(itemParents[0])); // optional + memset(states, 0, Items.Size()); + +#if 1 // 0 : for debug + const UInt32 k_exitParent = 0; + const UInt32 k_startLevel = 1; + // we don't show "Root Entry" dir + states[0] = 3; // we mark root node as processed, also we block any cycle links to root node + // itemParents[0] = 0xffffffff; // optional / unused value +#else + // we show item[0] "Root Entry" dir + const UInt32 k_exitParent = 0xffffffff; + const UInt32 k_startLevel = 0; + index = 0; +#endif + + UInt32 level = k_startLevel; // directory level + unsigned state = 0; + UInt32 parent = k_exitParent; // in Items[], itemParents[], states[] + UInt32 refParent = k_Ref_Parent_Root; // in Refs[] + + for (;;) { - RINOK(AddNode((int)index, item.SonDid)) + if (state >= 3) + { + // we return to parent node + if (state != 3) + return E_FAIL; + index = parent; + if (index == k_exitParent) + break; + if (index >= Items.Size()) + return E_FAIL; // (index) was checked already + parent = itemParents[index]; + state = states[index]; + if (state == 0) + return E_FAIL; + if (state == 2) + { + // we return to parent Dir node + if (refParent >= Refs.Size()) + return E_FAIL; + refParent = Refs[refParent].Parent; + level--; + } + continue; + } + + if (index >= Items.Size()) + return S_FALSE; + CItem &item = Items[index]; + if (item.IsEmptyType()) + return S_FALSE; + item.Level = level; + state++; + states[index] = (Byte)state; // we mark current (index) node as used node + + UInt32 newIndex; + if (state != 2) + newIndex = (state < 2) ? item.LeftDid : item.RightDid; + else + { + CRef ref; + ref.Parent = refParent; + ref.Did = index; + const unsigned refIndex = Refs.Add(ref); + if (!item.IsDir()) + continue; + newIndex = item.SonDid; + if (newIndex != kNoDid) + { + level++; + refParent = refIndex; + } + } + + if (newIndex != kNoDid) + { + itemParents[index] = parent; + state = 0; + parent = index; + index = newIndex; + if (index >= Items.Size() || states[index]) + return S_FALSE; + } } + + if (level != k_startLevel || refParent != k_Ref_Parent_Root) + return E_FAIL; +#if 1 // 1 : optional + // we check that all non-empty items were processed correctly + FOR_VECTOR(i, Items) + { + const unsigned st = states[i]; + if (Items[i].IsEmptyType()) + { + if (st) + return E_FAIL; + } + else if (st == 3) + continue; + else if (st) + return E_FAIL; + else + return S_FALSE; // there is unused directory item + } +#endif return S_OK; } -static UString CompoundNameToFileName(const UString &s) -{ - UString res; - for (unsigned i = 0; i < s.Len(); i++) - { - const wchar_t c = s[i]; - if ((unsigned)(int)c < 0x20) - { - res.Add_Char('['); - res.Add_UInt32((UInt32)(unsigned)(int)c); - res.Add_Char(']'); - } - else - res += c; - } - return res; -} static const char k_Msi_Chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz._"; - -// static const char * const k_Msi_ID = ""; // "{msi}"; -static const char k_Msi_SpecChar = '!'; - -static const unsigned k_Msi_NumBits = 6; -static const unsigned k_Msi_NumChars = 1 << k_Msi_NumBits; -static const unsigned k_Msi_CharMask = k_Msi_NumChars - 1; -static const unsigned k_Msi_StartUnicodeChar = 0x3800; -static const unsigned k_Msi_UnicodeRange = k_Msi_NumChars * (k_Msi_NumChars + 1); - - -static bool IsMsiName(const Byte *p) -{ - UInt32 c = Get16(p); - return - c >= k_Msi_StartUnicodeChar && - c <= k_Msi_StartUnicodeChar + k_Msi_UnicodeRange; -} +static const char k_Msi_SpecChar_Replace = '!'; static bool AreEqualNames(const Byte *rawName, const char *asciiName) { - for (unsigned i = 0; i < kNameSizeMax / 2; i++) + for (;;) { - wchar_t c = Get16(rawName + i * 2); - wchar_t c2 = (Byte)asciiName[i]; + const unsigned c = Get16(rawName); + rawName += 2; + const unsigned c2 = (Byte)*asciiName; + asciiName++; if (c != c2) return false; - if (c == 0) + if (c2 == 0) return true; } - return false; } -static bool CompoundMsiNameToFileName(const UString &name, UString &res) + +static void MsiName_To_FileName(const Byte *p, UString &res) { res.Empty(); - for (unsigned i = 0; i < name.Len(); i++) - { - wchar_t c = name[i]; - if (c < (wchar_t)k_Msi_StartUnicodeChar || c > (wchar_t)(k_Msi_StartUnicodeChar + k_Msi_UnicodeRange)) - return false; - /* - if (i == 0) - res += k_Msi_ID; - */ - c -= (wchar_t)k_Msi_StartUnicodeChar; - - const unsigned c0 = (unsigned)c & k_Msi_CharMask; - const unsigned c1 = (unsigned)c >> k_Msi_NumBits; - - if (c1 <= k_Msi_NumChars) - { - res.Add_Char(k_Msi_Chars[c0]); - if (c1 == k_Msi_NumChars) - break; - res.Add_Char(k_Msi_Chars[c1]); - } - else - res.Add_Char(k_Msi_SpecChar); - } - return true; -} - -static UString ConvertName(const Byte *p, bool &isMsi) -{ - isMsi = false; - UString s; - for (unsigned i = 0; i < kNameSizeMax; i += 2) { - wchar_t c = Get16(p + i); + unsigned c = Get16(p + i); if (c == 0) break; - s += c; + if (c <= k_Msi_SpecUnicodeChar) + { + if (c < k_Msi_StartUnicodeChar) + { + if (c < 0x20) + { + res.Add_Char('['); + res.Add_UInt32((UInt32)c); + c = ']'; + } + } + else + { +#if 0 // 1 : for debug + if (i == 0) res += "{msi}"; +#endif + c -= k_Msi_StartUnicodeChar; + const unsigned c1 = (unsigned)c >> k_Msi_NumBits; + if (c1 <= k_Msi_NumChars) + { + res.Add_Char(k_Msi_Chars[(unsigned)c & k_Msi_CharMask]); + if (c1 == k_Msi_NumChars) + continue; + c = (Byte)k_Msi_Chars[c1]; + } + else + c = k_Msi_SpecChar_Replace; + } + } + res += (wchar_t)c; } - - UString msiName; - if (CompoundMsiNameToFileName(s, msiName)) - { - isMsi = true; - return msiName; - } - return CompoundNameToFileName(s); } -static UString ConvertName(const Byte *p) -{ - bool isMsi; - return ConvertName(p, isMsi); -} UString CDatabase::GetItemPath(UInt32 index) const { UString s; - while (index != kNoDid) + UString name; + unsigned level = 0; + while (index != k_Ref_Parent_Root) { const CRef &ref = Refs[index]; const CItem &item = Items[ref.Did]; if (!s.IsEmpty()) s.InsertAtFront(WCHAR_PATH_SEPARATOR); - s.Insert(0, ConvertName(item.Name)); - index = (unsigned)ref.Parent; + // if (IsMsi) + MsiName_To_FileName(item.Name, name); + // else NonMsiName_To_FileName(item.Name, name); + NItemName::NormalizeSlashes_in_FileName_for_OsPath(name); + if (name.IsEmpty()) + name = "[]"; + s.Insert(0, name); + index = ref.Parent; +#ifdef Z7_COMPOUND_SHOW_DELETED + if (item.IsLevel_Unused()) + { + s.Insert(0, L"[DELETED]" WSTRING_PATH_SEPARATOR); + break; + } +#endif + if (item.Level >= k_Long_path_level_limit && level) + { + s.Insert(0, L"[LONG_PATH]" WSTRING_PATH_SEPARATOR); + break; + } + level = 1; // level++; } return s; } -HRESULT CDatabase::Update_PhySize_WithItem(unsigned index) + +HRESULT CDatabase::Check_Item(unsigned index) { const CItem &item = Items[index]; - const bool isLargeStream = (index == 0 || IsLargeStream(item.Size)); - if (!isLargeStream) + if (item.IsEmptyType() || item.IsStorage()) return S_OK; - const unsigned bsLog = isLargeStream ? SectorSizeBits : MiniSectorSizeBits; - // streamSpec->Size = item.Size; - - const UInt32 clusterSize = (UInt32)1 << bsLog; - const UInt64 numClusters64 = (item.Size + clusterSize - 1) >> bsLog; - if (numClusters64 >= ((UInt32)1 << 31)) - return S_FALSE; - UInt32 sid = item.Sid; UInt64 size = item.Size; - - if (size != 0) + const bool isLargeStream = (index == 0 || IsLargeStream(size)); + if (!isLargeStream) { - for (;; size -= clusterSize) + const unsigned bsLog = k_MiniSectorSizeBits; + const UInt32 clusterSize = (UInt32)1 << bsLog; + const UInt64 numClusters = (size + clusterSize - 1) >> bsLog; + if (numClusters > MatSize) + return S_FALSE; + UInt32 sid = item.Sid; + if (size != 0) { - // if (isLargeStream) + for (;; size -= clusterSize) + { + if (sid >= MatSize) + return S_FALSE; + const unsigned subBits = SectorSizeBits - k_MiniSectorSizeBits; + const UInt32 fid = sid >> subBits; + if (fid >= NumSectors_in_MiniStream) + return false; + sid = Mat[sid]; + if (size <= clusterSize) + break; + } + } + if (sid != NFatID::kEndOfChain) + return S_FALSE; + } + else + { + const unsigned bsLog = SectorSizeBits; + const UInt32 clusterSize = (UInt32)1 << bsLog; + const UInt64 numClusters = (size + clusterSize - 1) >> bsLog; + if (numClusters > FatSize) + return S_FALSE; + UInt32 sid = item.Sid; + if (size != 0) + { + for (;; size -= clusterSize) { if (sid >= FatSize) return S_FALSE; - UInt64 end = ((UInt64)sid + 1) << bsLog; - const UInt64 end_Aligned = end + clusterSize; - if (size < clusterSize) - end += size; - else - end = end_Aligned; - UpdatePhySize(end, end_Aligned); + const UInt32 sidPrev = sid; sid = Fat[sid]; + if (size <= clusterSize) + { + const UInt64 phySize = (((UInt64)sidPrev + 1) << SectorSizeBits) + size; + if (PhySize_Unaligned < phySize) + PhySize_Unaligned = phySize; + break; + } } - if (size <= clusterSize) - break; } + if (sid != NFatID::kEndOfChain) + return S_FALSE; } - if (sid != NFatID::kEndOfChain) - return S_FALSE; return S_OK; } -// There is name "[!]MsiPatchSequence" in msp files -static const unsigned kMspSequence_Size = 18; -static const Byte kMspSequence[kMspSequence_Size] = - { 0x40, 0x48, 0x96, 0x45, 0x6C, 0x3E, 0xE4, 0x45, - 0xE6, 0x42, 0x16, 0x42, 0x37, 0x41, 0x27, 0x41, - 0x37, 0x41 }; HRESULT CDatabase::Open(IInStream *inStream) { - MainSubfile = -1; - Type = k_Type_Common; - const UInt32 kHeaderSize = 512; - Byte p[kHeaderSize]; - PhySize = kHeaderSize; - RINOK(ReadStream_FALSE(inStream, p, kHeaderSize)) - if (memcmp(p, kSignature, Z7_ARRAY_SIZE(kSignature)) != 0) + const unsigned kHeaderSize = 512; + UInt32 p32[kHeaderSize / 4]; + RINOK(ReadStream_FALSE(inStream, p32, kHeaderSize)) + const Byte *p = (const Byte *)(const void *)p32; + if (memcmp(p, kSignature, Z7_ARRAY_SIZE(kSignature))) return S_FALSE; - if (Get16(p + 0x1A) > 4) // majorVer + /* + if (memcmp(p + 8, k_CLSID_AAF_V3, Z7_ARRAY_SIZE(k_CLSID_AAF_V3)) == 0 || + memcmp(p + 8, k_CLSID_AAF_V4, Z7_ARRAY_SIZE(k_CLSID_AAF_V4)) == 0) + */ + if (Get32(p32 + 4) == 0x342b0e06) // simplified AAF signature check + Type = k_Type_Aaf; + if (Get16(p + 0x18) != 0x3e) // minorVer return S_FALSE; - if (Get16(p + 0x1C) != 0xFFFE) // Little-endian + const unsigned ver = Get16(p + 0x1a); // majorVer + if (ver < 3 || ver > 4) + return S_FALSE; + if (Get16(p + 0x1c) != 0xfffe) // Little-endian + return S_FALSE; + const unsigned sectorSizeBits = Get16(p + 0x1e); + if (sectorSizeBits != ver * 3) // (ver == 3 ? 9 : 12) return S_FALSE; - unsigned sectorSizeBits = Get16(p + 0x1E); - bool mode64bit = (sectorSizeBits >= 12); - unsigned miniSectorSizeBits = Get16(p + 0x20); SectorSizeBits = sectorSizeBits; - MiniSectorSizeBits = miniSectorSizeBits; - - if (sectorSizeBits > 24 || - sectorSizeBits < 7 || - miniSectorSizeBits > 24 || - miniSectorSizeBits < 2 || - miniSectorSizeBits > sectorSizeBits) + if (Get16(p + 0x20) != k_MiniSectorSizeBits) return S_FALSE; - UInt32 numSectorsForFAT = Get32(p + 0x2C); // SAT - LongStreamMinSize = Get32(p + 0x38); - - UInt32 sectSize = (UInt32)1 << sectorSizeBits; - CByteBuffer sect(sectSize); + IsArc = true; + HeadersError = true; - unsigned ssb2 = sectorSizeBits - 2; - UInt32 numSidsInSec = (UInt32)1 << ssb2; - UInt32 numFatItems = numSectorsForFAT << ssb2; - if ((numFatItems >> ssb2) != numSectorsForFAT) + const bool mode64bit = (sectorSizeBits >= 12); // (ver == 4) + if (Get16(p + 0x22) || p32[9]) // reserved return S_FALSE; - FatSize = numFatItems; + + const UInt32 numDirSectors = Get32(p32 + 10); + // If (ver==3), the Number of Directory Sectors MUST be zero. + if (ver != 3 + (unsigned)(numDirSectors != 0)) + return S_FALSE; + if (numDirSectors > ((1u << (32 - 2)) >> (sectorSizeBits - (7 + 2)))) + return S_FALSE; + + const UInt32 numSectorsForFAT = Get32(p32 + 11); // SAT + + // MSDOC: A 512-byte sector compound file MUST be no greater than 2 GB in size for compatibility reasons. + // but actual restriction for windows compond creation code can be more strict: + // (numSectorsForFAT < (1 << 15)) : actual restriction in win10 for compound creation code + // (numSectorsForFAT <= (1 << 15)) : relaxed restriction to allow 2 GB files. + if (sectorSizeBits == 9 && + numSectorsForFAT >= (1u << (31 - (9 + 9 - 2)))) // we use most strict check + return S_FALSE; + + // const UInt32 TransactionSignatureNumber = Get32(p32 + 13); + if (Get32(p32 + 14) != k_LongStreamMinSize) + return S_FALSE; + + const unsigned ssb2 = sectorSizeBits - 2; + const UInt32 numSidsInSec = (UInt32)1 << ssb2; + const UInt32 numFatItems = numSectorsForFAT << ssb2; + if (numFatItems == 0 || (numFatItems >> ssb2) != numSectorsForFAT) + return S_FALSE; + + const size_t sectSize = (size_t)1 << sectorSizeBits; + CByteArr sect(sectSize); + CByteArr used(numFatItems); + // don't change these const values. These values use same order as (0xffffffff - NFatID::const) + // const Byte k_Used_Free = 0; + const Byte k_Used_ChainTo = 1; + const Byte k_Used_FAT = 2; + const Byte k_Used_DIFAT = 3; + memset(used, 0, numFatItems); + UInt32 *fat; + { + // ========== READ FAT ========== + const UInt32 numSectorsForBat = Get32(p32 + 18); // master sector allocation table + const unsigned ssb2_m1 = ssb2 - 1; + if (numSectorsForBat > ((1u << 30) >> ssb2_m1 >> ssb2_m1)) + return S_FALSE; + const unsigned kNumHeaderBatItems = 109; + UInt32 numBatItems = kNumHeaderBatItems + (numSectorsForBat << ssb2); // real size can be smaller + CObjArray bat(numBatItems); + size_t i; + for (i = 0; i < kNumHeaderBatItems; i++) + bat[i] = Get32(p32 + 19 + i); + { + UInt32 sid = Get32(p32 + 17); + for (UInt32 s = 0; s < numSectorsForBat; s++) + { + if (sid >= numFatItems || used[sid]) + return S_FALSE; + used[sid] = k_Used_DIFAT; + RINOK(ReadIDs(inStream, sect, sid, bat + i)) + i += numSidsInSec - 1; + sid = bat[i]; + } + if (sid != NFatID::kEndOfChain // NFatID::kEndOfChain is expected value for most files + && sid != NFatID::kFree) // NFatID::kFree is used in some AAF files + return S_FALSE; + } + numBatItems = (UInt32)i; // corrected value + if (numSectorsForFAT > numBatItems) + return S_FALSE; + for (i = numSectorsForFAT; i < numBatItems; i++) + if (bat[i] != NFatID::kFree) + return S_FALSE; + + // RINOK(IncreaseOpenTotal(numSectorsForFAT + numDirSectors)) + + Fat.Alloc(numFatItems); + fat = Fat; + for (i = 0; i < numSectorsForFAT; i++) + { + const UInt32 sectorIndex = bat[i]; + if (sectorIndex >= numFatItems) + return S_FALSE; + if (used[sectorIndex]) + return S_FALSE; + used[sectorIndex] = k_Used_FAT; + UInt32 *fat2 = fat + ((size_t)i << ssb2); + RINOK(ReadIDs(inStream, sect, sectorIndex, fat2)) + for (size_t k = 0; k < numSidsInSec; k++) + { + const UInt32 sid = fat2[k]; + if (sid > NFatID::kMaxValue) + { + if (sid == NFatID::k_DIF_SECT + && used[((size_t)i << ssb2) + k] != k_Used_DIFAT) + return S_FALSE; + continue; + } + if (sid >= numFatItems || used[sid]) + return S_FALSE; // strict error check + used[sid] = k_Used_ChainTo; + } + } + { + for (i = 0; i < numSectorsForFAT; i++) + if (fat[bat[i]] != NFatID::kFatSector) + return S_FALSE; + } + FatSize = numFatItems; + } { - UInt32 numSectorsForBat = Get32(p + 0x48); // master sector allocation table - const UInt32 kNumHeaderBatItems = 109; - UInt32 numBatItems = kNumHeaderBatItems + (numSectorsForBat << ssb2); - if (numBatItems < kNumHeaderBatItems || ((numBatItems - kNumHeaderBatItems) >> ssb2) != numSectorsForBat) - return S_FALSE; - CObjArray bat(numBatItems); - UInt32 i; - for (i = 0; i < kNumHeaderBatItems; i++) - bat[i] = Get32(p + 0x4c + i * 4); - UInt32 sid = Get32(p + 0x44); - for (UInt32 s = 0; s < numSectorsForBat; s++) + size_t i = numFatItems; + do + if (fat[i - 1] != NFatID::kFree) + break; + while (--i); + PhySize = ((UInt64)i + 1) << sectorSizeBits; + /* + if (i) { - RINOK(ReadIDs(inStream, sect, sectorSizeBits, sid, bat + i)) - i += numSidsInSec - 1; - sid = bat[i]; + const UInt32 *lim = fat + i; + UInt32 num = 0; + do + if (*fat++ == NFatID::kFree) + num++; + while (fat != lim); + FreeSize = num << sectorSizeBits; } - numBatItems = i; - - Fat.Alloc(numFatItems); - UInt32 j = 0; - - for (i = 0; i < numFatItems; j++, i += numSidsInSec) - { - if (j >= numBatItems) - return S_FALSE; - RINOK(ReadIDs(inStream, sect, sectorSizeBits, bat[j], Fat + i)) - } - FatSize = numFatItems = i; + */ } UInt32 numMatItems; { - UInt32 numSectorsForMat = Get32(p + 0x40); + // ========== READ MAT ========== + const UInt32 numSectorsForMat = Get32(p32 + 16); numMatItems = (UInt32)numSectorsForMat << ssb2; if ((numMatItems >> ssb2) != numSectorsForMat) return S_FALSE; Mat.Alloc(numMatItems); - UInt32 i; - UInt32 sid = Get32(p + 0x3C); // short-sector table SID - for (i = 0; i < numMatItems; i += numSidsInSec) + UInt32 sid = Get32(p32 + 15); // short-sector table SID + if (numMatItems) + { + if (sid >= numFatItems || used[sid]) + return S_FALSE; + used[sid] = k_Used_ChainTo; + } + for (UInt32 i = 0; i < numMatItems; i += numSidsInSec) { - RINOK(ReadIDs(inStream, sect, sectorSizeBits, sid, Mat + i)) if (sid >= numFatItems) return S_FALSE; - sid = Fat[sid]; + RINOK(ReadIDs(inStream, sect, sid, Mat + i)) + sid = fat[sid]; } if (sid != NFatID::kEndOfChain) return S_FALSE; } { - CByteBuffer used(numFatItems); - for (UInt32 i = 0; i < numFatItems; i++) - used[i] = 0; - UInt32 sid = Get32(p + 0x30); // directory stream SID - for (;;) + // ========== READ DIR ITEMS ========== + UInt32 sid = Get32(p32 + 12); // directory stream SID + UInt32 numDirSectors_Processed = 0; + if (sid >= numFatItems || used[sid]) + return S_FALSE; + used[sid] = k_Used_ChainTo; + do { + // we need to check sid here becase kEndOfChain sid < numFatItems is required if (sid >= numFatItems) return S_FALSE; - if (used[sid]) + if (numDirSectors && numDirSectors_Processed >= numDirSectors) return S_FALSE; - used[sid] = 1; - RINOK(ReadSector(inStream, sect, sectorSizeBits, sid)) - for (UInt32 i = 0; i < sectSize; i += 128) + numDirSectors_Processed++; + RINOK(ReadSector(inStream, sect, sid)) + for (size_t i = 0; i < sectSize; i += (1 << 7)) { CItem item; - item.Parse(sect + i, mode64bit); + item.Level = k_Item_Level_Unused; + if (!item.Parse(sect + i, mode64bit)) + return S_FALSE; // we use (item.Size) check here. // so we don't need additional overflow checks for (item.Size +) in another code - if (item.Size >= ((UInt64)1 << 63)) + if ((UInt32)(item.Size >> 32) >= sectSize) // it's because FAT size is limited by (1 << 32) items. return S_FALSE; + + if (Items.IsEmpty()) + { + if (item.Type != NItemType::kRootStorage + || item.LeftDid != kNoDid + || item.RightDid != kNoDid + || item.SonDid == 0) + return S_FALSE; + if (item.Sid != NFatID::kEndOfChain) + { + if (item.Sid >= numFatItems || used[item.Sid]) + return S_FALSE; + used[item.Sid] = k_Used_ChainTo; + } + } + else if (item.IsStorage()) + { + if (item.Size != 0) // by specification + return S_FALSE; + if (item.Sid != 0 // by specification + && item.Sid != NFatID::kFree) // NFatID::kFree is used in some AAF files + return S_FALSE; + } + // else if (item.Type == NItemType::kRootStorage) return S_FALSE; + else if (item.IsEmptyType()) + { + // kNoDid is expected in *Did fileds, but rare case MSI contains zero in all fields + if ((item.Sid != 0 // expected value + && item.Sid != NFatID::kFree // NFatID::kFree is used in some AAF files + && item.Sid != NFatID::kEndOfChain) // used by some MSI file + || (item.LeftDid != kNoDid && item.LeftDid) + || (item.RightDid != kNoDid && item.RightDid) + || (item.SonDid != kNoDid && item.SonDid) + // || item.Size != 0 // the check is disabled because some MSI file contains non zero + // || Get16(item.Name) != 0 // the check is disabled because some MSI file contains some name + ) + return S_FALSE; + } + else + { + if (item.Type != NItemType::kStream) + return S_FALSE; + // NItemType::kStream case + if (item.SonDid != kNoDid) // optional check + return S_FALSE; + if (item.Size == 0) + { + if (item.Sid != NFatID::kEndOfChain) + return S_FALSE; + } + else if (IsLargeStream(item.Size)) + { + if (item.Sid >= numFatItems || used[item.Sid]) + return S_FALSE; + used[item.Sid] = k_Used_ChainTo; + } + } + Items.Add(item); } - sid = Fat[sid]; - if (sid == NFatID::kEndOfChain) - break; + sid = fat[sid]; } + while (sid != NFatID::kEndOfChain); } - const CItem &root = Items[0]; - { + // root stream contains all data that stored with mini Sectors + const CItem &root = Items[0]; UInt32 numSectorsInMiniStream; { - UInt64 numSatSects64 = (root.Size + sectSize - 1) >> sectorSizeBits; - if (numSatSects64 > NFatID::kMaxValue) + const UInt64 numSatSects64 = (root.Size + sectSize - 1) >> sectorSizeBits; + if (numSatSects64 > NFatID::kMaxValue + 1) return S_FALSE; numSectorsInMiniStream = (UInt32)numSatSects64; } - NumSectorsInMiniStream = numSectorsInMiniStream; - MiniSids.Alloc(numSectorsInMiniStream); { - UInt64 matSize64 = (root.Size + ((UInt64)1 << miniSectorSizeBits) - 1) >> miniSectorSizeBits; - if (matSize64 > NFatID::kMaxValue) + const UInt64 matSize64 = (root.Size + (1 << k_MiniSectorSizeBits) - 1) >> k_MiniSectorSizeBits; + if (matSize64 > numMatItems) return S_FALSE; MatSize = (UInt32)matSize64; - if (numMatItems < MatSize) - return S_FALSE; } - + MiniSids.Alloc(numSectorsInMiniStream); + UInt32 * const miniSids = MiniSids; UInt32 sid = root.Sid; for (UInt32 i = 0; ; i++) { @@ -571,95 +928,186 @@ HRESULT CDatabase::Open(IInStream *inStream) } if (i >= numSectorsInMiniStream) return S_FALSE; - MiniSids[i] = sid; if (sid >= numFatItems) return S_FALSE; - sid = Fat[sid]; + miniSids[i] = sid; + sid = fat[sid]; + } + NumSectors_in_MiniStream = numSectorsInMiniStream; + } + + + { +/* + MS DOCs: + The range lock sector covers file offsets 0x7FFFFF00-0x7FFFFFFF. + These offsets are reserved for byte-range locking to support + concurrency, transactions, and other compound file features. + The range lock sector MUST be allocated in the FAT and marked with + ENDOFCHAIN (0xFFFFFFFE), when the compound file grows beyond 2 GB. + If the compound file is greater than 2 GB and then shrinks to below 2 GB, + the range lock sector SHOULD be marked as FREESECT (0xFFFFFFFF) in the FAT. +*/ + { + const UInt32 lockSector = (0x7fffffff >> sectorSizeBits) - 1; + if (lockSector < numFatItems) + { + if (used[lockSector]) + return S_FALSE; + const UInt32 f = fat[lockSector]; + if (f == NFatID::kEndOfChain) + used[lockSector] = k_Used_ChainTo; // we use fake state to pass the check in loop below + else if (f != NFatID::kFree) + return S_FALSE; + } + } + for (size_t i = 0; i < numFatItems; i++) + { + UInt32 f = fat[i]; + const UInt32 u = ~(UInt32)used[i]; // (0xffffffff - used[i]) + if (f < NFatID::kMaxValue + 1) + f = NFatID::kEndOfChain; + if (f != u) + return S_FALSE; } } - RINOK(AddNode(-1, root.SonDid)) - - unsigned numCabs = 0; - - FOR_VECTOR (i, Refs) { - const CItem &item = Items[Refs[i].Did]; - if (item.IsDir() || numCabs > 1) - continue; - bool isMsiName; - const UString msiName = ConvertName(item.Name, isMsiName); - if (isMsiName && !msiName.IsEmpty()) + // Don't move that code up, becase Check_Item uses Mat[] array. + FOR_VECTOR(t, Items) { - // bool isThereExt = (msiName.Find(L'.') >= 0); - bool isMsiSpec = (msiName[0] == k_Msi_SpecChar); - if ((msiName.Len() >= 4 && StringsAreEqualNoCase_Ascii(msiName.RightPtr(4), ".cab")) - || (!isMsiSpec && msiName.Len() >= 3 && StringsAreEqualNoCase_Ascii(msiName.RightPtr(3), "exe")) - // || (!isMsiSpec && !isThereExt) - ) + RINOK(Check_Item(t)) + } + } + + RINOK(AddNodes()) + + { + // some msi (in rare cases) have unaligned size of archive, + // unaligned size of compond files is also possible if we create just one stream + // where there is no padding data after payload data in last cluster of archive + UInt64 fileSize; + RINOK(InStream_GetSize_SeekToEnd(inStream, fileSize)) + if ( fileSize < PhySize + && fileSize > PhySize - sectSize + && fileSize >= PhySize_Unaligned + && PhySize_Unaligned > PhySize - sectSize) + PhySize = PhySize_Unaligned; + } + + bool isMsi = false; + { + FOR_VECTOR (i, Refs) + { + const CItem &item = Items[Refs[i].Did]; + if (item.IsDir()) + continue; + if (item.AreMsiChars()) + // if (item.IsSpecMsiName()) + { + isMsi = true; + break; + } + } + } + + // IsMsi = isMsi; + if (isMsi) + { + unsigned numCabs = 0; + UString name; + FOR_VECTOR (i, Refs) + { + const CItem &item = Items[Refs[i].Did]; + if (item.IsDir() /* || item.IsSpecMsiName() */) + continue; + MsiName_To_FileName(item.Name, name); + if ( (name.Len() >= 4 && StringsAreEqualNoCase_Ascii(name.RightPtr(4), ".cab")) + || (name.Len() >= 3 && StringsAreEqualNoCase_Ascii(name.RightPtr(3), "exe")) + ) { numCabs++; + if (numCabs > 1) + { + MainSubfile = -1; + break; + } MainSubfile = (int)i; } } } - - if (numCabs > 1) - MainSubfile = -1; + if (isMsi) // we provide msi priority over AAF + Type = k_Type_Msi; + if (Type != k_Type_Aaf) { - FOR_VECTOR (t, Items) + FOR_VECTOR (i, Refs) { - Update_PhySize_WithItem(t); - } - } - { - if (PhySize != PhySize_Aligned) - { - /* some msi (in rare cases) have unaligned size of archive, - where there is no padding data after payload data in last cluster of archive */ - UInt64 fileSize; - RINOK(InStream_GetSize_SeekToEnd(inStream, fileSize)) - if (PhySize != fileSize) - PhySize = PhySize_Aligned; - } - } - { - FOR_VECTOR (t, Items) - { - const CItem &item = Items[t]; - - if (IsMsiName(item.Name)) + const CItem &item = Items[Refs[i].Did]; + if (item.IsDir()) + continue; + const Byte *name = item.Name; + // if (IsMsiName(name)) + if (isMsi) { - Type = k_Type_Msi; - if (memcmp(item.Name, kMspSequence, kMspSequence_Size) == 0) + if (memcmp(name, k_Sequence_msp, sizeof(k_Sequence_msp)) == 0) { Type = k_Type_Msp; break; } - continue; + if (memcmp(name, k_Sequence_msm, sizeof(k_Sequence_msm)) == 0) + { + Type = k_Type_Msm; + break; + } } - if (AreEqualNames(item.Name, "WordDocument")) + else { - Type = k_Type_Doc; - break; - } - if (AreEqualNames(item.Name, "PowerPoint Document")) - { - Type = k_Type_Ppt; - break; - } - if (AreEqualNames(item.Name, "Workbook")) - { - Type = k_Type_Xls; - break; + if (AreEqualNames(name, "WordDocument")) + { + Type = k_Type_Doc; + break; + } + if (AreEqualNames(name, "PowerPoint Document")) + { + Type = k_Type_Ppt; + break; + } + if (AreEqualNames(name, "Workbook")) + { + Type = k_Type_Xls; + break; + } } } } +#ifdef Z7_COMPOUND_SHOW_DELETED + { + // we skip Items[0] that is root item + for (unsigned t = 1; t < Items.Size(); t++) + { + const CItem &item = Items[t]; + if ( +#if 1 // 0 for debug to show empty files + item.IsEmptyType() || +#endif + !item.IsLevel_Unused()) + continue; + CRef ref; + ref.Parent = k_Ref_Parent_Root; + ref.Did = t; + Refs.Add(ref); + } + } +#endif + + HeadersError = false; return S_OK; } + + Z7_CLASS_IMP_CHandler_IInArchive_1( IInArchiveGetStream ) @@ -674,13 +1122,15 @@ static const Byte kProps[] = kpidPackSize, kpidCTime, kpidMTime + // , kpidCharacts // for debug }; static const Byte kArcProps[] = { kpidExtension, - kpidClusterSize, - kpidSectorSize + kpidClusterSize + // , kpidSectorSize + // , kpidFreeSpace }; IMP_IInArchive_Props @@ -695,9 +1145,20 @@ Z7_COM7F_IMF(CHandler::GetArchiveProperty(PROPID propID, PROPVARIANT *value)) case kpidExtension: prop = kExtensions[(unsigned)_db.Type]; break; case kpidPhySize: prop = _db.PhySize; break; case kpidClusterSize: prop = (UInt32)1 << _db.SectorSizeBits; break; - case kpidSectorSize: prop = (UInt32)1 << _db.MiniSectorSizeBits; break; + // case kpidSectorSize: prop = (UInt32)1 << _db.MiniSectorSizeBits; break; case kpidMainSubfile: if (_db.MainSubfile >= 0) prop = (UInt32)_db.MainSubfile; break; + // case kpidFreeSpace: prop = _db.FreeSize; break; case kpidIsNotArcType: if (_db.IsNotArcType()) prop = true; break; + case kpidErrorFlags: + { + UInt32 v = 0; + if (!_db.IsArc) + v |= kpv_ErrorFlags_IsNotArc; + if (_db.HeadersError) + v |= kpv_ErrorFlags_HeadersError; + prop = v; + break; + } } prop.Detach(value); return S_OK; @@ -719,6 +1180,7 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val case kpidMTime: prop = item.MTime; break; case kpidPackSize: if (!item.IsDir()) prop = _db.GetItemPackSize(item.Size); break; case kpidSize: if (!item.IsDir()) prop = item.Size; break; + // case kpidCharacts: prop = item.Level; break; } prop.Detach(value); return S_OK; @@ -727,17 +1189,17 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val Z7_COM7F_IMF(CHandler::Open(IInStream *inStream, const UInt64 * /* maxCheckStartPosition */, - IArchiveOpenCallback * /* openArchiveCallback */)) + IArchiveOpenCallback *openArchiveCallback)) { COM_TRY_BEGIN Close(); - try + _db.OpenCallback = openArchiveCallback; + // try { - if (_db.Open(inStream) != S_OK) - return S_FALSE; + RINOK(_db.Open(inStream)) _stream = inStream; } - catch(...) { return S_FALSE; } + // catch(...) { return S_FALSE; } return S_OK; COM_TRY_END } @@ -775,52 +1237,57 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, CMyComPtr2_Create lps; lps->Init(extractCallback, false); - for (i = 0; i < numItems; i++) + for (i = 0;; i++) { lps->InSize = totalPackSize; lps->OutSize = totalSize; RINOK(lps->SetCur()) + if (i >= numItems) + break; + const UInt32 index = allFilesMode ? i : indices[i]; const CItem &item = _db.Items[_db.Refs[index].Did]; Int32 res; - { - CMyComPtr outStream; - const Int32 askMode = testMode ? - NExtract::NAskMode::kTest : - NExtract::NAskMode::kExtract; - RINOK(extractCallback->GetStream(index, &outStream, askMode)) - - if (item.IsDir()) { - RINOK(extractCallback->PrepareOperation(askMode)) - RINOK(extractCallback->SetOperationResult(NExtract::NOperationResult::kOK)) - continue; - } - - totalPackSize += _db.GetItemPackSize(item.Size); - totalSize += item.Size; - - if (!testMode && !outStream) - continue; - RINOK(extractCallback->PrepareOperation(askMode)) - res = NExtract::NOperationResult::kDataError; - CMyComPtr inStream; - HRESULT hres = GetStream(index, &inStream); - if (hres == S_FALSE) - res = NExtract::NOperationResult::kDataError; - else if (hres == E_NOTIMPL) - res = NExtract::NOperationResult::kUnsupportedMethod; - else - { - RINOK(hres) - if (inStream) + CMyComPtr outStream; + const Int32 askMode = testMode ? + NExtract::NAskMode::kTest : + NExtract::NAskMode::kExtract; + RINOK(extractCallback->GetStream(index, &outStream, askMode)) + + if (item.IsDir()) { - RINOK(copyCoder.Interface()->Code(inStream, outStream, NULL, NULL, lps)) - if (copyCoder->TotalSize == item.Size) - res = NExtract::NOperationResult::kOK; + RINOK(extractCallback->PrepareOperation(askMode)) + RINOK(extractCallback->SetOperationResult(NExtract::NOperationResult::kOK)) + continue; + } + + totalPackSize += _db.GetItemPackSize(item.Size); + totalSize += item.Size; + + if (!testMode && !outStream) + continue; + RINOK(extractCallback->PrepareOperation(askMode)) + res = NExtract::NOperationResult::kDataError; + CMyComPtr inStream; + const HRESULT hres = GetStream(index, &inStream); + if (hres == S_FALSE) + res = NExtract::NOperationResult::kDataError; + /* + else if (hres == E_NOTIMPL) + res = NExtract::NOperationResult::kUnsupportedMethod; + */ + else + { + RINOK(hres) + if (inStream) + { + RINOK(copyCoder.Interface()->Code(inStream, outStream, NULL, NULL, lps)) + if (copyCoder->TotalSize == item.Size) + res = NExtract::NOperationResult::kOK; + } } } - } RINOK(extractCallback->SetOperationResult(res)) } return S_OK; @@ -839,20 +1306,64 @@ Z7_COM7F_IMF(CHandler::GetStream(UInt32 index, ISequentialInStream **stream)) *stream = NULL; const UInt32 itemIndex = _db.Refs[index].Did; const CItem &item = _db.Items[itemIndex]; + if (item.IsDir()) + return S_FALSE; + const bool isLargeStream = (itemIndex == 0 || IsLargeStream(item.Size)); + if (!isLargeStream) + { + CBufferInStream *streamSpec = new CBufferInStream; + CMyComPtr streamTemp = streamSpec; + + UInt32 size = (UInt32)item.Size; + streamSpec->Buf.Alloc(size); + streamSpec->Init(); + + UInt32 sid = item.Sid; + Byte *dest = streamSpec->Buf; + + UInt64 phyPos = 0; + while (size) + { + if (sid >= _db.MatSize) + return S_FALSE; + const unsigned subBits = _db.SectorSizeBits - k_MiniSectorSizeBits; + const UInt32 fid = sid >> subBits; + if (fid >= _db.NumSectors_in_MiniStream) + return false; + const UInt64 offset = (((UInt64)_db.MiniSids[fid] + 1) << _db.SectorSizeBits) + + ((sid & ((1u << subBits) - 1)) << k_MiniSectorSizeBits); + if (phyPos != offset) + { + RINOK(InStream_SeekSet(_stream, offset)) + phyPos = offset; + } + UInt32 readSize = (UInt32)1 << k_MiniSectorSizeBits; + if (readSize > size) + readSize = size; + RINOK(ReadStream_FALSE(_stream, dest, readSize)) + phyPos += readSize; + dest += readSize; + sid = _db.Mat[sid]; + size -= readSize; + } + if (sid != NFatID::kEndOfChain) + return S_FALSE; + *stream = streamTemp.Detach(); + return S_OK; + } + CClusterInStream *streamSpec = new CClusterInStream; CMyComPtr streamTemp = streamSpec; streamSpec->Stream = _stream; streamSpec->StartOffset = 0; - - const bool isLargeStream = (itemIndex == 0 || _db.IsLargeStream(item.Size)); - const unsigned bsLog = isLargeStream ? _db.SectorSizeBits : _db.MiniSectorSizeBits; + const unsigned bsLog = _db.SectorSizeBits; streamSpec->BlockSizeLog = bsLog; streamSpec->Size = item.Size; const UInt32 clusterSize = (UInt32)1 << bsLog; const UInt64 numClusters64 = (item.Size + clusterSize - 1) >> bsLog; - if (numClusters64 >= ((UInt32)1 << 31)) - return E_NOTIMPL; + if (numClusters64 > _db.FatSize) + return S_FALSE; streamSpec->Vector.ClearAndReserve((unsigned)numClusters64); UInt32 sid = item.Sid; UInt64 size = item.Size; @@ -861,21 +1372,10 @@ Z7_COM7F_IMF(CHandler::GetStream(UInt32 index, ISequentialInStream **stream)) { for (;; size -= clusterSize) { - if (isLargeStream) - { - if (sid >= _db.FatSize) - return S_FALSE; - streamSpec->Vector.AddInReserved(sid + 1); - sid = _db.Fat[sid]; - } - else - { - UInt64 val = 0; - if (sid >= _db.MatSize || !_db.GetMiniCluster(sid, val) || val >= (UInt64)1 << 32) - return S_FALSE; - streamSpec->Vector.AddInReserved((UInt32)val); - sid = _db.Mat[sid]; - } + if (sid >= _db.FatSize) + return S_FALSE; + streamSpec->Vector.AddInReserved(sid + 1); + sid = _db.Fat[sid]; if (size <= clusterSize) break; } @@ -889,7 +1389,7 @@ Z7_COM7F_IMF(CHandler::GetStream(UInt32 index, ISequentialInStream **stream)) } REGISTER_ARC_I( - "Compound", "msi msp doc xls ppt", NULL, 0xE5, + "Compound", "msi msp msm doc xls ppt aaf", NULL, 0xe5, kSignature, 0, 0, diff --git a/CPP/7zip/Archive/CpioHandler.cpp b/CPP/7zip/Archive/CpioHandler.cpp index 62184f0..e1d6d81 100644 --- a/CPP/7zip/Archive/CpioHandler.cpp +++ b/CPP/7zip/Archive/CpioHandler.cpp @@ -927,7 +927,7 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val { #ifdef _WIN32 UString u; - ConvertUTF8ToUnicode(item.Name, u); + ConvertUTF8ToUnicode(s, u); #else const UString u = MultiByteToUnicodeString(s, CP_OEMCP); #endif diff --git a/CPP/7zip/Archive/QcowHandler.cpp b/CPP/7zip/Archive/QcowHandler.cpp index b072880..6edf86d 100644 --- a/CPP/7zip/Archive/QcowHandler.cpp +++ b/CPP/7zip/Archive/QcowHandler.cpp @@ -482,6 +482,10 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *openCallback) if (_phySize < headerSize) _phySize = headerSize; + // we use 32 MiB limit for L1 size, as QEMU with QCOW_MAX_L1_SIZE limit. + if (l1Size > (1u << 22)) // if (l1Size > (1u << (sizeof(size_t) * 8 - 4))) + return S_FALSE; + _isArc = true; { const UInt64 backOffset = Get64((const Byte *)(const void *)buf64 + 8); @@ -519,7 +523,6 @@ HRESULT CHandler::Open2(IInStream *stream, IArchiveOpenCallback *openCallback) } CObjArray table64(l1Size); { - // if ((t1SizeBytes >> 3) != l1Size) return S_FALSE; RINOK(InStream_SeekSet(stream, l1Offset)) RINOK(ReadStream_FALSE(stream, table64, t1SizeBytes)) } diff --git a/CPP/7zip/Archive/Rar/Rar5Handler.cpp b/CPP/7zip/Archive/Rar/Rar5Handler.cpp index a639d8b..c15ff52 100644 --- a/CPP/7zip/Archive/Rar/Rar5Handler.cpp +++ b/CPP/7zip/Archive/Rar/Rar5Handler.cpp @@ -8,6 +8,7 @@ #include "../../../Common/ComTry.h" #include "../../../Common/IntToString.h" #include "../../../Common/MyBuffer2.h" +#include "../../../Common/MyLinux.h" #include "../../../Common/UTFConvert.h" #include "../../../Windows/PropVariantUtils.h" @@ -1184,7 +1185,15 @@ HRESULT CUnpacker::Code(const CItem &item, const CItem &lastItem, UInt64 packSiz const UInt64 processedSize = outStream->GetPos(); if (res == S_OK && !lastItem.Is_UnknownSize() && processedSize != lastItem.Size) - res = S_FALSE; + { + // rar_v7.13-: linux archive contains symLink with (packSize == 0 && lastItem.Size != 0) + // v25.02: we ignore such record in rar headers: + if (packSize != 0 + || method != 0 + || lastItem.HostOS != kHost_Unix + || !MY_LIN_S_ISLNK(lastItem.Attrib)) + res = S_FALSE; + } // if (res == S_OK) { diff --git a/CPP/7zip/Archive/Rar/RarHandler.cpp b/CPP/7zip/Archive/Rar/RarHandler.cpp index dfbad33..6c53847 100644 --- a/CPP/7zip/Archive/Rar/RarHandler.cpp +++ b/CPP/7zip/Archive/Rar/RarHandler.cpp @@ -7,6 +7,7 @@ #include "../../../Common/ComTry.h" #include "../../../Common/IntToString.h" #include "../../../Common/MyBuffer2.h" +#include "../../../Common/MyLinux.h" #include "../../../Common/UTFConvert.h" #include "../../../Windows/PropVariantUtils.h" @@ -70,8 +71,14 @@ bool CItem::IsDir() const case NHeader::NFile::kHostMSDOS: case NHeader::NFile::kHostOS2: case NHeader::NFile::kHostWin32: - if ((Attrib & FILE_ATTRIBUTE_DIRECTORY) != 0) + if (Attrib & FILE_ATTRIBUTE_DIRECTORY) return true; + break; + case NHeader::NFile::kHostUnix: + case NHeader::NFile::kHostBeOS: + if (MY_LIN_S_ISDIR(Attrib)) + return true; + break; } return false; } @@ -86,11 +93,20 @@ UInt32 CItem::GetWinAttrib() const case NHeader::NFile::kHostWin32: a = Attrib; break; + case NHeader::NFile::kHostUnix: + case NHeader::NFile::kHostBeOS: + a = Attrib << 16; + a |= 0x8000; // add posix mode marker + break; + // case NHeader::NFile::kHostMacOS: + // kHostMacOS was used only by some very old rare case rar. + // New rar4-rar7 for macos probably uses kHostUnix. + // So we process kHostMacOS without attribute parsing: default: - a = 0; // must be converted from unix value; + a = 0; } if (IsDir()) - a |= NHeader::NFile::kWinFileDirectoryAttributeMask; + a |= FILE_ATTRIBUTE_DIRECTORY; return a; } diff --git a/CPP/7zip/Archive/Tar/TarHandler.cpp b/CPP/7zip/Archive/Tar/TarHandler.cpp index 29f28e8..5761ea3 100644 --- a/CPP/7zip/Archive/Tar/TarHandler.cpp +++ b/CPP/7zip/Archive/Tar/TarHandler.cpp @@ -65,7 +65,7 @@ static const Byte kArcProps[] = kpidComment }; -static const char *k_Characts_Prefix = "PREFIX"; +static const char * const k_Characts_Prefix = "PREFIX"; IMP_IInArchive_Props IMP_IInArchive_ArcProps @@ -684,10 +684,14 @@ Z7_COM7F_IMF(CHandler::GetProperty(UInt32 index, PROPID propID, PROPVARIANT *val s.Add_OptSpaced("SCHILY.fflags="); s += item->SCHILY_fflags; } + if (item->Is_Sparse()) + s.Add_OptSpaced("SPARSE"); if (item->IsThereWarning()) s.Add_OptSpaced("WARNING"); if (item->HeaderError) s.Add_OptSpaced("ERROR"); + if (item->Method_Error) + s.Add_OptSpaced("METHOD_ERROR"); if (item->Pax_Error) s.Add_OptSpaced("PAX_error"); if (!item->PaxExtra.RawLines.IsEmpty()) @@ -812,11 +816,16 @@ Z7_COM7F_IMF(CHandler::Extract(const UInt32 *indices, UInt32 numItems, inStream2 = inStream; else { - GetStream(index, &inStream2); - if (!inStream2) - return E_FAIL; + const HRESULT hres = GetStream(index, &inStream2); + if (hres == E_NOTIMPL) + opRes = NExtract::NOperationResult::kHeadersError; // kUnsupportedMethod + else if (!inStream2) + { + opRes = NExtract::NOperationResult::kDataError; + // return E_FAIL; + } } - + if (opRes == NExtract::NOperationResult::kOK) { if (item->Is_SymLink()) { @@ -855,9 +864,9 @@ Z7_CLASS_IMP_IInStream( bool _needStartSeek; public: + unsigned ItemIndex; CHandler *Handler; CMyComPtr HandlerRef; - unsigned ItemIndex; CRecordVector PhyOffsets; void Init() @@ -879,7 +888,7 @@ Z7_COM7F_IMF(CSparseStream::Read(void *data, UInt32 size, UInt32 *processedSize) if (_virtPos >= item.Size) return S_OK; { - UInt64 rem = item.Size - _virtPos; + const UInt64 rem = item.Size - _virtPos; if (size > rem) size = (UInt32)rem; } @@ -903,17 +912,17 @@ Z7_COM7F_IMF(CSparseStream::Read(void *data, UInt32 size, UInt32 *processedSize) } const CSparseBlock &sb = item.SparseBlocks[left]; - UInt64 relat = _virtPos - sb.Offset; + const UInt64 relat = _virtPos - sb.Offset; if (_virtPos >= sb.Offset && relat < sb.Size) { - UInt64 rem = sb.Size - relat; + const UInt64 rem = sb.Size - relat; if (size > rem) size = (UInt32)rem; - UInt64 phyPos = PhyOffsets[left] + relat; + const UInt64 phyPos = PhyOffsets[left] + relat; if (_needStartSeek || _phyPos != phyPos) { - RINOK(InStream_SeekSet(Handler->_stream, (item.Get_DataPos() + phyPos))) + RINOK(InStream_SeekSet(Handler->_stream, item.Get_DataPos() + phyPos)) _needStartSeek = false; _phyPos = phyPos; } @@ -927,7 +936,7 @@ Z7_COM7F_IMF(CSparseStream::Read(void *data, UInt32 size, UInt32 *processedSize) next = sb.Offset; else if (left + 1 < item.SparseBlocks.Size()) next = item.SparseBlocks[left + 1].Offset; - UInt64 rem = next - _virtPos; + const UInt64 rem = next - _virtPos; if (size > rem) size = (UInt32)rem; memset(data, 0, size); @@ -965,6 +974,8 @@ Z7_COM7F_IMF(CHandler::GetStream(UInt32 index, ISequentialInStream **stream)) if (item.Is_Sparse()) { + if (item.Method_Error) + return E_NOTIMPL; // S_FALSE CSparseStream *streamSpec = new CSparseStream; CMyComPtr streamTemp = streamSpec; streamSpec->Init(); diff --git a/CPP/7zip/Archive/Tar/TarIn.cpp b/CPP/7zip/Archive/Tar/TarIn.cpp index 22b8902..e702b68 100644 --- a/CPP/7zip/Archive/Tar/TarIn.cpp +++ b/CPP/7zip/Archive/Tar/TarIn.cpp @@ -181,6 +181,7 @@ HRESULT CArchive::GetNextItemReal(CItemEx &item) { char buf[NFileHeader::kRecordSize]; + item.Method_Error = false; error = k_ErrorType_OK; filled = false; @@ -218,10 +219,7 @@ HRESULT CArchive::GetNextItemReal(CItemEx &item) break; item.HeaderSize += NFileHeader::kRecordSize; thereAreEmptyRecords = true; - if (OpenCallback) - { - RINOK(Progress(item, 0)) - } + RINOK(Progress(item, 0)) } if (thereAreEmptyRecords) { @@ -335,37 +333,60 @@ HRESULT CArchive::GetNextItemReal(CItemEx &item) if (item.LinkFlag == NFileHeader::NLinkFlag::kSparse) { - Byte isExtended = (Byte)buf[482]; - if (isExtended != 0 && isExtended != 1) - return S_OK; + // OLD GNU format: parse sparse file information: + // PackSize = cumulative size of all non-empty blocks of the file. + // We read actual file size from 'realsize' member of oldgnu_header: RIF(ParseSize(buf + 483, item.Size, item.Size_IsBin)) - UInt64 min = 0; - for (unsigned i = 0; i < 4; i++) - { - p = buf + 386 + 24 * i; - if (GetBe32(p) == 0) - { - if (isExtended != 0) - return S_OK; - break; - } - CSparseBlock sb; - RIF(ParseSize(p, sb.Offset)) - RIF(ParseSize(p + 12, sb.Size)) - item.SparseBlocks.Add(sb); - if (sb.Offset < min || sb.Offset > item.Size) - return S_OK; - if ((sb.Offset & 0x1FF) != 0 || (sb.Size & 0x1FF) != 0) - return S_OK; - min = sb.Offset + sb.Size; - if (min < sb.Offset) - return S_OK; - } - if (min > item.Size) + if (item.Size < item.PackSize) // additional check return S_OK; - while (isExtended != 0) + p = buf + 386; + + UInt64 end = 0, packSum = 0; + unsigned numRecords = 4; + unsigned isExtended = (Byte)p[4 * 24]; // (Byte)p[numRecords * 24]; + // the list of blocks contains non-empty blocks. All another data is empty. + + for (;;) { + // const unsigned isExtended = (Byte)p[numRecords * 24]; + if (isExtended > 1) + return S_OK; + do + { + if (GetBe32(p) == 0) + { + if (isExtended) + return S_OK; + break; + } + CSparseBlock sb; + RIF(ParseSize(p, sb.Offset)) + RIF(ParseSize(p + 12, sb.Size)) + p += 24; + /* for all non-last blocks we expect : + ((sb.Size & 0x1ff) == 0) && ((sb.Offset & 0x1ff) == 0) + for last block : (sb.Size == 0) is possible. + */ + if (sb.Offset < end + || item.Size < sb.Offset + || item.Size - sb.Offset < sb.Size) + return S_OK; + // optional check: + if (sb.Size && ((end & 0x1ff) || (sb.Offset & 0x1ff))) + { + item.Method_Error = true; // relaxed check + // return S_OK; + } + end = sb.Offset + sb.Size; + packSum += sb.Size; + item.SparseBlocks.Add(sb); + } + while (--numRecords); + + if (!isExtended) + break; + size_t processedSize = NFileHeader::kRecordSize; RINOK(ReadStream(SeqStream, buf, &processedSize)) if (processedSize != NFileHeader::kRecordSize) @@ -373,46 +394,22 @@ HRESULT CArchive::GetNextItemReal(CItemEx &item) error = k_ErrorType_UnexpectedEnd; return S_OK; } - item.HeaderSize += NFileHeader::kRecordSize; - - if (OpenCallback) - { - RINOK(Progress(item, 0)) - } - - isExtended = (Byte)buf[21 * 24]; - if (isExtended != 0 && isExtended != 1) - return S_OK; - for (unsigned i = 0; i < 21; i++) - { - p = buf + 24 * i; - if (GetBe32(p) == 0) - { - if (isExtended != 0) - return S_OK; - break; - } - CSparseBlock sb; - RIF(ParseSize(p, sb.Offset)) - RIF(ParseSize(p + 12, sb.Size)) - item.SparseBlocks.Add(sb); - if (sb.Offset < min || sb.Offset > item.Size) - return S_OK; - if ((sb.Offset & 0x1FF) != 0 || (sb.Size & 0x1FF) != 0) - return S_OK; - min = sb.Offset + sb.Size; - if (min < sb.Offset) - return S_OK; - } + RINOK(Progress(item, 0)) + p = buf; + numRecords = 21; + isExtended = (Byte)p[21 * 24]; // (Byte)p[numRecords * 24]; + } + // optional checks for strict size consistency: + if (end != item.Size || packSum != item.PackSize) + { + item.Method_Error = true; // relaxed check + // return S_OK; } - if (min > item.Size) - return S_OK; } - if (item.PackSize >= (UInt64)1 << 63) + if (item.PackSize >= (UInt64)1 << 63) // optional check. It was checked in ParseSize() already return S_OK; - filled = true; error = k_ErrorType_OK; return S_OK; @@ -421,6 +418,8 @@ HRESULT CArchive::GetNextItemReal(CItemEx &item) HRESULT CArchive::Progress(const CItemEx &item, UInt64 posOffset) { + if (!OpenCallback) + return S_OK; const UInt64 pos = item.Get_DataPos() + posOffset; if (NumFiles - NumFiles_Prev < (1 << 16) // && NumRecords - NumRecords_Prev < (1 << 16) @@ -500,10 +499,7 @@ HRESULT CArchive::ReadDataToBuffer(const CItemEx &item, do { - if (OpenCallback) - { - RINOK(Progress(item, pos)) - } + RINOK(Progress(item, pos)) unsigned size = kBufSize; if (size > packSize) @@ -813,6 +809,7 @@ HRESULT CArchive::ReadItem2(CItemEx &item) item.LongLink_WasUsed_2 = false; item.HeaderError = false; + item.Method_Error = false; item.IsSignedChecksum = false; item.Prefix_WasUsed = false; @@ -838,13 +835,8 @@ HRESULT CArchive::ReadItem2(CItemEx &item) for (;;) { - if (OpenCallback) - { - RINOK(Progress(item, 0)) - } - + RINOK(Progress(item, 0)) RINOK(GetNextItemReal(item)) - // NumRecords++; if (!filled) @@ -1064,9 +1056,14 @@ HRESULT CArchive::ReadItem2(CItemEx &item) // GNU TAR ignores (item.Size) in that case if (item.Size != 0 && item.Size != piSize) item.Pax_Error = true; - item.Size = piSize; - item.PackSize = piSize; - item.pax_size_WasUsed = true; + if (piSize >= ((UInt64)1 << 63)) + item.Pax_Error = true; + else + { + item.Size = piSize; + item.PackSize = piSize; + item.pax_size_WasUsed = true; + } } item.PaxTimes = paxInfo; diff --git a/CPP/7zip/Archive/Tar/TarItem.h b/CPP/7zip/Archive/Tar/TarItem.h index 112f38d..d4e2ea5 100644 --- a/CPP/7zip/Archive/Tar/TarItem.h +++ b/CPP/7zip/Archive/Tar/TarItem.h @@ -322,6 +322,7 @@ struct CPaxExtra struct CItemEx: public CItem { bool HeaderError; + bool Method_Error; bool IsSignedChecksum; bool Prefix_WasUsed; diff --git a/CPP/7zip/Archive/Udf/UdfIn.cpp b/CPP/7zip/Archive/Udf/UdfIn.cpp index ce87c54..a9e4ebf 100644 --- a/CPP/7zip/Archive/Udf/UdfIn.cpp +++ b/CPP/7zip/Archive/Udf/UdfIn.cpp @@ -500,10 +500,15 @@ size_t CFileId::Parse(const Byte *p, size_t size) processed += impLen; Id.Parse(p + processed, idLen); processed += idLen; + // const size_t processed2 = processed; for (;(processed & 3) != 0; processed++) if (p[processed] != 0) return 0; - if ((size_t)tag.CrcLen + 16 != processed) return 0; + // some program can create non-standard UDF file where CrcLen doesn't include Padding data + if ((size_t)tag.CrcLen + 16 != processed + // && (size_t)tag.CrcLen + 16 != processed2 // we can enable this check to support non-standard UDF + ) + return 0; return (processed <= size) ? processed : 0; } @@ -577,15 +582,20 @@ HRESULT CInArchive::ReadItem(unsigned volIndex, int fsIndex, const CLongAllocDes item.IcbTag.Parse(p + 16); + // maybe another FileType values are possible in rare cases. + // Shoud we ignore FileType here? if (fsIndex < 0) { + // if (item.IcbTag.FileType == ICB_FILE_TYPE_DIR) return S_FALSE; if (item.IcbTag.FileType != ICB_FILE_TYPE_METADATA && - item.IcbTag.FileType != ICB_FILE_TYPE_METADATA_MIRROR) + item.IcbTag.FileType != ICB_FILE_TYPE_METADATA_MIRROR && + item.IcbTag.FileType != ICB_FILE_TYPE_METADATA_BITMAP) return S_FALSE; } else if ( item.IcbTag.FileType != ICB_FILE_TYPE_DIR && - item.IcbTag.FileType != ICB_FILE_TYPE_FILE) + item.IcbTag.FileType != ICB_FILE_TYPE_FILE && + item.IcbTag.FileType != ICB_FILE_TYPE_REAL_TIME_FILE) // M2TS files in /BDMV/STREAM/ in Blu-ray movie return S_FALSE; item.Parse(p); @@ -1210,7 +1220,7 @@ HRESULT CInArchive::Open2() if (tag.Id != DESC_TYPE_FileSet) return S_FALSE; - PRF(printf("\n FileSet", volIndex)); + PRF(printf("\n FileSet")); CFileSet fs; fs.RecordingTime.Parse(p + 16); // fs.InterchangeLevel = Get16(p + 18); diff --git a/CPP/7zip/Archive/Udf/UdfIn.h b/CPP/7zip/Archive/Udf/UdfIn.h index 9ccbf74..cbe1a27 100644 --- a/CPP/7zip/Archive/Udf/UdfIn.h +++ b/CPP/7zip/Archive/Udf/UdfIn.h @@ -250,9 +250,10 @@ enum EIcbFileType { ICB_FILE_TYPE_DIR = 4, ICB_FILE_TYPE_FILE = 5, - - ICB_FILE_TYPE_METADATA = 250, // 2.2.13.1 Metadata File - ICB_FILE_TYPE_METADATA_MIRROR = 251 + ICB_FILE_TYPE_REAL_TIME_FILE = 249, // 2.3.5.2.1 + ICB_FILE_TYPE_METADATA = 250, // 2.2.13.1 + ICB_FILE_TYPE_METADATA_MIRROR = 251, // 2.2.13.1 + ICB_FILE_TYPE_METADATA_BITMAP = 252 // 2.2.13.2 }; enum EIcbDescriptorType diff --git a/CPP/7zip/Archive/Zip/ZipIn.cpp b/CPP/7zip/Archive/Zip/ZipIn.cpp index 788810f..9d77e87 100644 --- a/CPP/7zip/Archive/Zip/ZipIn.cpp +++ b/CPP/7zip/Archive/Zip/ZipIn.cpp @@ -1718,61 +1718,49 @@ HRESULT CInArchive::TryEcd64(UInt64 offset, CCdInfo &cdInfo) HRESULT CInArchive::FindCd(bool checkOffsetMode) { - CCdInfo &cdInfo = Vols.ecd; - - UInt64 endPos; - // There are no useful data in cache in most cases here. - // So here we don't use cache data from previous operations . - + // So here we don't use cache data from previous operations. InitBuf(); + UInt64 endPos; RINOK(InStream_GetSize_SeekToEnd(Stream, endPos)) _streamPos = endPos; - - // const UInt32 kBufSizeMax2 = ((UInt32)1 << 16) + kEcdSize + kEcd64Locator_Size + kEcd64_FullSize; - const size_t kBufSizeMax = ((size_t)1 << 17); // must be larger than kBufSizeMax2 - + const size_t kBufSizeMax = (size_t)1 << 17; // must be larger than + // (1 << 16) + kEcdSize + kEcd64Locator_Size + kEcd64_FullSize const size_t bufSize = (endPos < kBufSizeMax) ? (size_t)endPos : kBufSizeMax; if (bufSize < kEcdSize) return S_FALSE; - // CByteArr byteBuffer(bufSize); - RINOK(AllocateBuffer(kBufSizeMax)) + { + RINOK(Seek_SavePos(endPos - bufSize)) + size_t processed = bufSize; + const HRESULT res = ReadStream(Stream, Buffer, &processed); + _streamPos += processed; + _bufCached = processed; + _bufPos = 0; + _cnt += processed; + if (res != S_OK) + return res; + if (processed != bufSize) + return S_FALSE; + } - RINOK(Seek_SavePos(endPos - bufSize)) - - size_t processed = bufSize; - HRESULT res = ReadStream(Stream, Buffer, &processed); - _streamPos += processed; - _bufCached = processed; - _bufPos = 0; - _cnt += processed; - if (res != S_OK) - return res; - if (processed != bufSize) - return S_FALSE; - + CCdInfo &cdInfo = Vols.ecd; for (size_t i = bufSize - kEcdSize + 1;;) { - if (i == 0) - return S_FALSE; - const Byte *buf = Buffer; - - for (;;) { - i--; - if (buf[i] == 0x50) - break; - if (i == 0) - return S_FALSE; - } - - if (Get32(buf + i) != NSignature::kEcd) - continue; + const Byte *p = buf + i; + do + if (p == buf) + return S_FALSE; + while (*(--p) != 0x50); - cdInfo.ParseEcd32(buf + i); + i = (size_t)(p - buf); + if (Get32(p) != NSignature::kEcd) + continue; + cdInfo.ParseEcd32(p); + } if (i >= kEcd64Locator_Size) { @@ -1793,29 +1781,24 @@ HRESULT CInArchive::FindCd(bool checkOffsetMode) // Most of the zip64 use fixed size Zip64 ECD // we try relative backward reading. - - UInt64 absEcd64 = endPos - bufSize + i - (kEcd64Locator_Size + kEcd64_FullSize); + const UInt64 absEcd64 = endPos - bufSize + i - (kEcd64Locator_Size + kEcd64_FullSize); if (locatorIndex >= kEcd64_FullSize) if (checkOffsetMode || absEcd64 == locator.Ecd64Offset) { const Byte *ecd64 = buf + locatorIndex - kEcd64_FullSize; - if (Get32(ecd64) == NSignature::kEcd64) + if (Get32(ecd64) == NSignature::kEcd64 && + Get64(ecd64 + 4) == kEcd64_MainSize) { - UInt64 mainEcd64Size = Get64(ecd64 + 4); - if (mainEcd64Size == kEcd64_MainSize) - { - cdInfo.ParseEcd64e(ecd64 + 12); - ArcInfo.Base = (Int64)(absEcd64 - locator.Ecd64Offset); - // ArcInfo.BaseVolIndex = cdInfo.ThisDisk; - return S_OK; - } + cdInfo.ParseEcd64e(ecd64 + 12); + ArcInfo.Base = (Int64)(absEcd64 - locator.Ecd64Offset); + // ArcInfo.BaseVolIndex = cdInfo.ThisDisk; + return S_OK; } } // some zip64 use variable size Zip64 ECD. // we try to use absolute offset from locator. - if (absEcd64 != locator.Ecd64Offset) { if (TryEcd64(locator.Ecd64Offset, cdInfo) == S_OK) @@ -1881,6 +1864,9 @@ HRESULT CInArchive::TryReadCd(CObjectVector &items, const CCdInfo &cdIn items.Clear(); IsCdUnsorted = false; + if ((Int64)cdOffset < 0) + return S_FALSE; + // _startLocalFromCd_Disk = (UInt32)(Int32)-1; // _startLocalFromCd_Offset = (UInt64)(Int64)-1; diff --git a/CPP/7zip/Archive/Zip/ZipOut.cpp b/CPP/7zip/Archive/Zip/ZipOut.cpp index 63f1a71..e8a21c2 100644 --- a/CPP/7zip/Archive/Zip/ZipOut.cpp +++ b/CPP/7zip/Archive/Zip/ZipOut.cpp @@ -49,42 +49,54 @@ void COutArchive::SeekToCurPos() // #define DOES_NEED_ZIP64(v) (v >= 0) +Z7_NO_INLINE void COutArchive::WriteBytes(const void *data, size_t size) { m_OutBuffer.WriteBytes(data, size); m_CurPos += size; } +Z7_NO_INLINE void COutArchive::Write8(Byte b) { m_OutBuffer.WriteByte(b); m_CurPos++; } +Z7_NO_INLINE void COutArchive::Write16(UInt16 val) { Write8((Byte)val); Write8((Byte)(val >> 8)); } +Z7_NO_INLINE void COutArchive::Write32(UInt32 val) { for (int i = 0; i < 4; i++) { - Write8((Byte)val); + // Write8((Byte)val); + m_OutBuffer.WriteByte((Byte)val); val >>= 8; } + m_CurPos += 4; } +#define WRITE_CONST_PAIR_16_16(a, b) { Write32((a) | ((UInt32)(b) << 16)); } + +Z7_NO_INLINE void COutArchive::Write64(UInt64 val) { for (int i = 0; i < 8; i++) { - Write8((Byte)val); + // Write8((Byte)val); + m_OutBuffer.WriteByte((Byte)val); val >>= 8; } + m_CurPos += 8; } +Z7_NO_INLINE void COutArchive::WriteExtra(const CExtraBlock &extra) { FOR_VECTOR (i, extra.SubBlocks) @@ -134,11 +146,9 @@ void COutArchive::WriteTimeExtra(const CItemOut &item, bool writeNtfs) if (writeNtfs) { // windows explorer ignores that extra - Write16(NFileHeader::NExtraID::kNTFS); - Write16(k_Ntfs_ExtraSize); + WRITE_CONST_PAIR_16_16(NFileHeader::NExtraID::kNTFS, k_Ntfs_ExtraSize) Write32(0); // reserved - Write16(NFileHeader::NNtfsExtra::kTagTime); - Write16(8 * 3); + WRITE_CONST_PAIR_16_16(NFileHeader::NNtfsExtra::kTagTime, 8 * 3) WriteNtfsTime(item.Ntfs_MTime); WriteNtfsTime(item.Ntfs_ATime); WriteNtfsTime(item.Ntfs_CTime); @@ -148,8 +158,7 @@ void COutArchive::WriteTimeExtra(const CItemOut &item, bool writeNtfs) { // windows explorer ignores that extra // by specification : should we write to local header also? - Write16(NFileHeader::NExtraID::kUnixTime); - Write16(k_UnixTime_ExtraSize); + WRITE_CONST_PAIR_16_16(NFileHeader::NExtraID::kUnixTime, k_UnixTime_ExtraSize) const Byte flags = (Byte)((unsigned)1 << NFileHeader::NUnixTime::kMTime); Write8(flags); UInt32 unixTime; @@ -217,8 +226,7 @@ void COutArchive::WriteLocalHeader(CItemOut &item, bool needCheck) if (isZip64) { - Write16(NFileHeader::NExtraID::kZip64); - Write16(8 + 8); + WRITE_CONST_PAIR_16_16(NFileHeader::NExtraID::kZip64, 8 + 8) Write64(size); Write64(packSize); } @@ -357,8 +365,9 @@ HRESULT COutArchive::WriteCentralDir(const CObjectVector &items, const const UInt64 cdSize = cd64EndOffset - cdOffset; const bool cdOffset64 = DOES_NEED_ZIP64(cdOffset); const bool cdSize64 = DOES_NEED_ZIP64(cdSize); - const bool items64 = items.Size() >= 0xFFFF; - const bool isZip64 = (cdOffset64 || cdSize64 || items64); + const bool need_Items_64 = items.Size() >= 0xFFFF; + const unsigned items16 = (UInt16)(need_Items_64 ? 0xFFFF: items.Size()); + const bool isZip64 = (cdOffset64 || cdSize64 || need_Items_64); // isZip64 = true; // to test Zip64 @@ -371,8 +380,8 @@ HRESULT COutArchive::WriteCentralDir(const CObjectVector &items, const // const UInt32 extraSize = 1 << 26; // Write64(kEcd64_MainSize + extraSize); - Write16(45); // made by version - Write16(45); // extract version + WRITE_CONST_PAIR_16_16(45, // made by version + 45) // extract version Write32(0); // ThisDiskNumber Write32(0); // StartCentralDirectoryDiskNumber Write64((UInt64)items.Size()); @@ -389,10 +398,9 @@ HRESULT COutArchive::WriteCentralDir(const CObjectVector &items, const } Write32(NSignature::kEcd); - Write16(0); // ThisDiskNumber - Write16(0); // StartCentralDirectoryDiskNumber - Write16((UInt16)(items64 ? 0xFFFF: items.Size())); - Write16((UInt16)(items64 ? 0xFFFF: items.Size())); + WRITE_CONST_PAIR_16_16(0, 0) // ThisDiskNumber, StartCentralDirectoryDiskNumber + Write16((UInt16)items16); + Write16((UInt16)items16); WRITE_32_VAL_SPEC(cdSize, cdSize64) WRITE_32_VAL_SPEC(cdOffset, cdOffset64) diff --git a/CPP/7zip/Bundles/SFXCon/SfxCon.cpp b/CPP/7zip/Bundles/SFXCon/SfxCon.cpp index aac4e28..9e2d13d 100644 --- a/CPP/7zip/Bundles/SFXCon/SfxCon.cpp +++ b/CPP/7zip/Bundles/SFXCon/SfxCon.cpp @@ -153,7 +153,7 @@ namespace NCommandType }; } -static const char *g_Commands = "txl"; +static const char * const g_Commands = "txl"; struct CArchiveCommand { diff --git a/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp b/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp index 0c09807..d4240d9 100644 --- a/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp +++ b/CPP/7zip/Bundles/SFXSetup/SfxSetup.cpp @@ -48,72 +48,60 @@ static bool ReadDataString(CFSTR fileName, LPCSTR startID, NIO::CInFile inFile; if (!inFile.Open(fileName)) return false; - const size_t kBufferSize = (1 << 12); + const size_t kBufferSize = 1 << 12; Byte buffer[kBufferSize]; - const unsigned signatureStartSize = MyStringLen(startID); - const unsigned signatureEndSize = MyStringLen(endID); + const size_t signatureStartSize = MyStringLen(startID + 1); + const size_t signatureEndSize = MyStringLen(endID + 1); size_t numBytesPrev = 0; bool writeMode = false; - UInt64 posTotal = 0; + UInt32 posTotal = 0; for (;;) { - if (posTotal > (1 << 20)) - return (stringResult.IsEmpty()); const size_t numReadBytes = kBufferSize - numBytesPrev; size_t processedSize; if (!inFile.ReadFull(buffer + numBytesPrev, numReadBytes, processedSize)) return false; if (processedSize == 0) return true; - const size_t numBytesInBuffer = numBytesPrev + processedSize; - UInt32 pos = 0; + numBytesPrev += processedSize; + size_t pos = 0; for (;;) { if (writeMode) { - if (pos + signatureEndSize > numBytesInBuffer) + if (pos + signatureEndSize > numBytesPrev) break; - if (memcmp(buffer + pos, endID, signatureEndSize) == 0) - return true; - const Byte b = buffer[pos]; + const Byte b = buffer[pos++]; if (b == 0) return false; + if (b == ';' && memcmp(buffer + pos, endID + 1, signatureEndSize) == 0) + return true; stringResult += (char)b; - pos++; } else { - if (pos + signatureStartSize > numBytesInBuffer) + if (pos + signatureStartSize > numBytesPrev) break; - if (memcmp(buffer + pos, startID, signatureStartSize) == 0) + const Byte b = buffer[pos++]; + if (b == ';' && memcmp(buffer + pos, startID + 1, signatureStartSize) == 0) { writeMode = true; pos += signatureStartSize; } - else - pos++; } } - numBytesPrev = numBytesInBuffer - pos; - posTotal += pos; + posTotal += (UInt32)pos; + if (posTotal > (1 << 21)) + return stringResult.IsEmpty(); + numBytesPrev -= pos; memmove(buffer, buffer + pos, numBytesPrev); } } -static char kStartID[] = { ',','!','@','I','n','s','t','a','l','l','@','!','U','T','F','-','8','!', 0 }; -static char kEndID[] = { ',','!','@','I','n','s','t','a','l','l','E','n','d','@','!', 0 }; - -static struct CInstallIDInit -{ - CInstallIDInit() - { - kStartID[0] = ';'; - kEndID[0] = ';'; - } -} g_CInstallIDInit; - +static const char * const kStartID = ",!@Install@!UTF-8!"; +static const char * const kEndID = ",!@InstallEnd@!"; #if defined(_WIN32) && defined(_UNICODE) && !defined(_WIN64) && !defined(UNDER_CE) #define NT_CHECK_FAIL_ACTION ShowErrorMessage(L"Unsupported Windows version"); return 1; diff --git a/CPP/7zip/Common/FileStreams.cpp b/CPP/7zip/Common/FileStreams.cpp index f90e280..b7e4fbe 100644 --- a/CPP/7zip/Common/FileStreams.cpp +++ b/CPP/7zip/Common/FileStreams.cpp @@ -753,7 +753,7 @@ Z7_COM7F_IMF(CInFileStream::GetProperty(PROPID propID, PROPVARIANT *value)) { if (StoreOwnerName) { - const uid_t gid = st.st_gid; + const gid_t gid = st.st_gid; { if (!OwnerGroup.IsEmpty() && _gid == gid) prop = OwnerGroup; diff --git a/CPP/7zip/Common/FileStreams.h b/CPP/7zip/Common/FileStreams.h index 212d4f0..7f465cf 100644 --- a/CPP/7zip/Common/FileStreams.h +++ b/CPP/7zip/Common/FileStreams.h @@ -84,8 +84,8 @@ public: BY_HANDLE_FILE_INFORMATION _info; #else struct stat _info; - UInt32 _uid; - UInt32 _gid; + uid_t _uid; // uid_t can be unsigned or signed int + gid_t _gid; UString OwnerName; UString OwnerGroup; bool StoreOwnerId; diff --git a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp index 7fe18fb..73974e6 100644 --- a/CPP/7zip/UI/Common/ArchiveCommandLine.cpp +++ b/CPP/7zip/UI/Common/ArchiveCommandLine.cpp @@ -426,7 +426,7 @@ static NRecursedType::EEnum GetRecursedTypeFromIndex(int index) } } -static const char *g_Commands = "audtexlbih"; +static const char * const g_Commands = "audtexlbih"; static bool ParseArchiveCommand(const UString &commandString, CArcCommand &command) { diff --git a/CPP/7zip/UI/Common/ArchiveName.cpp b/CPP/7zip/UI/Common/ArchiveName.cpp index 3c0976d..f859d94 100644 --- a/CPP/7zip/UI/Common/ArchiveName.cpp +++ b/CPP/7zip/UI/Common/ArchiveName.cpp @@ -17,14 +17,14 @@ using namespace NWindows; using namespace NFile; -static const char *g_ArcExts = +static const char * const g_ArcExts = "7z" "\0" "zip" "\0" "tar" "\0" "wim" "\0"; -static const char *g_HashExts = +static const char * const g_HashExts = "sha256" "\0"; diff --git a/CPP/7zip/UI/Common/Bench.cpp b/CPP/7zip/UI/Common/Bench.cpp index eb24e7f..316c980 100644 --- a/CPP/7zip/UI/Common/Bench.cpp +++ b/CPP/7zip/UI/Common/Bench.cpp @@ -3038,7 +3038,7 @@ AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) FOR_VECTOR (i, ti.Groups.GroupSizes) { if (i != 0) - s.Add_Char(' '); + s.Add_Space(); s.Add_UInt32(ti.Groups.GroupSizes[i]); } } @@ -3773,10 +3773,11 @@ HRESULT Bench( #ifndef Z7_ST - if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0) - numCPUs = threadsInfo.GetNumProcessThreads(); - else + if (!threadsInfo.Get() + || (numCPUs = threadsInfo.GetNumProcessThreads()) == 0) numCPUs = NSystem::GetNumberOfProcessors(); + // numCPUs : is number of threads assigned to process with affinity, + // or it's total number of threads in all groups, if IsGroupMode == true, and there is default affinity. #endif diff --git a/CPP/7zip/UI/Console/List.cpp b/CPP/7zip/UI/Console/List.cpp index 874caef..2d9f5a3 100644 --- a/CPP/7zip/UI/Console/List.cpp +++ b/CPP/7zip/UI/Console/List.cpp @@ -201,8 +201,8 @@ static const CFieldInfoInit kStandardFieldTable[] = { kpidPath, "Name", kLeft, kLeft, 2, 24 } }; -const unsigned kNumSpacesMax = 32; // it must be larger than max CFieldInfoInit.Width -static const char *g_Spaces = +static const unsigned kNumSpacesMax = 32; // it must be larger than max CFieldInfoInit.Width +static const char * const g_Spaces = " " ; static void PrintSpaces(unsigned numSpaces) diff --git a/CPP/7zip/UI/FileManager/BrowseDialog2.cpp b/CPP/7zip/UI/FileManager/BrowseDialog2.cpp index ee98ab4..9f083c5 100644 --- a/CPP/7zip/UI/FileManager/BrowseDialog2.cpp +++ b/CPP/7zip/UI/FileManager/BrowseDialog2.cpp @@ -9,6 +9,7 @@ #include #include "../../../Common/IntToString.h" +#include "../../../Common/MyCom.h" #include "../../../Common/StringConvert.h" #include "../../../Common/Wildcard.h" @@ -19,6 +20,7 @@ #include "../../../Windows/Menu.h" #include "../../../Windows/ProcessUtils.h" #include "../../../Windows/PropVariantConv.h" +#include "../../../Windows/Shell.h" #include "../../../Windows/Control/ComboBox.h" #include "../../../Windows/Control/Dialog.h" #include "../../../Windows/Control/Edit.h" @@ -57,7 +59,7 @@ static const int kParentIndex = -1; // static const UINT k_Message_RefreshPathEdit = WM_APP + 1; -static const wchar_t *k_Message_Link_operation_was_Blocked = +static const wchar_t * const k_Message_Link_operation_was_Blocked = L"link openning was blocked by 7-Zip"; extern UString HResultToMessage(HRESULT errorCode); @@ -978,35 +980,61 @@ void CBrowseDialog2::OnHelp() #endif +HRESULT ShellFolder_ParseDisplayName(IShellFolder *shellFolder, + HWND hwnd, const UString &path, LPITEMIDLIST *ppidl); + HRESULT StartApplication(const UString &dir, const UString &path, HWND window, CProcess &process); HRESULT StartApplication(const UString &dir, const UString &path, HWND window, CProcess &process) { UString path2 = path; - - #ifdef _WIN32 + UINT32 result; + { +#ifdef _WIN32 + NShell::CItemIDList pidl; + // SHELLEXECUTEINFO::pidl is more accurate way than SHELLEXECUTEINFO::lpFile + { + CMyComPtr desktop; + if (SHGetDesktopFolder(&desktop) == S_OK && desktop) + if (ShellFolder_ParseDisplayName(desktop, + NULL, // HWND : do we need (window) or NULL here? + path, + &pidl) != S_OK) + pidl.Detach(); + } { const int dot = path2.ReverseFind_Dot(); const int separ = path2.ReverseFind_PathSepar(); - if (dot < 0 || dot < separ) - path2.Add_Dot(); + if (separ != (int)path2.Len() - 1) + if (dot < 0 || dot < separ) + path2.Add_Dot(); } - #endif +#endif // _WIN32 - UINT32 result; - #ifndef _UNICODE if (g_IsNT) { SHELLEXECUTEINFOW execInfo; + memset(&execInfo, 0, sizeof(execInfo)); + // execInfo.hwnd = NULL; + // execInfo.lpVerb = NULL; + // execInfo.lpFile = NULL; + // execInfo.lpDirectory = NULL; + // execInfo.lpParameters = NULL; + // execInfo.hProcess = NULL; execInfo.cbSize = sizeof(execInfo); execInfo.fMask = SEE_MASK_NOCLOSEPROCESS | SEE_MASK_FLAG_DDEWAIT; - execInfo.hwnd = NULL; - execInfo.lpVerb = NULL; - execInfo.lpFile = path2; - execInfo.lpParameters = NULL; - execInfo.lpDirectory = dir.IsEmpty() ? NULL : (LPCWSTR)dir; + if (!dir.IsEmpty()) + execInfo.lpDirectory = dir; execInfo.nShow = SW_SHOWNORMAL; - execInfo.hProcess = NULL; + + if ((LPCITEMIDLIST)pidl) + { + execInfo.lpIDList = pidl; + execInfo.fMask |= SEE_MASK_IDLIST; + } + else + execInfo.lpFile = path2; + typedef BOOL (WINAPI * Func_ShellExecuteExW)(LPSHELLEXECUTEINFOW lpExecInfo); Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION const @@ -1024,34 +1052,40 @@ Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION #endif { SHELLEXECUTEINFO execInfo; + memset(&execInfo, 0, sizeof(execInfo)); + // execInfo.hwnd = NULL; + // execInfo.lpVerb = NULL; + // execInfo.lpFile = NULL; + // execInfo.lpDirectory = NULL; + // execInfo.lpParameters = NULL; + // execInfo.hProcess = NULL; execInfo.cbSize = sizeof(execInfo); execInfo.fMask = SEE_MASK_NOCLOSEPROCESS #ifndef UNDER_CE | SEE_MASK_FLAG_DDEWAIT #endif ; - execInfo.hwnd = NULL; - execInfo.lpVerb = NULL; + execInfo.nShow = SW_SHOWNORMAL; const CSysString sysPath (GetSystemString(path2)); const CSysString sysDir (GetSystemString(dir)); - execInfo.lpFile = sysPath; - execInfo.lpParameters = NULL; - execInfo.lpDirectory = - #ifdef UNDER_CE - NULL - #else - sysDir.IsEmpty() ? NULL : (LPCTSTR)sysDir - #endif - ; - execInfo.nShow = SW_SHOWNORMAL; - execInfo.hProcess = NULL; + #ifndef UNDER_CE + if (!sysDir.IsEmpty()) + execInfo.lpDirectory = sysDir; + #endif + + if ((LPCITEMIDLIST)pidl) + { + execInfo.lpIDList = pidl; + execInfo.fMask |= SEE_MASK_IDLIST; + } + else + execInfo.lpFile = sysPath; ::ShellExecuteEx(&execInfo); result = (UINT32)(UINT_PTR)execInfo.hInstApp; process.Attach(execInfo.hProcess); } - // DEBUG_PRINT_NUM("-- ShellExecuteEx -- execInfo.hInstApp = ", result) - + } if (result <= 32) { switch (result) @@ -1063,10 +1097,8 @@ Z7_DIAGNOSTIC_IGNORE_CAST_FUNCTION // L"There is no application associated with the given file name extension", ); } - return E_FAIL; // fixed in 15.13. Can we use it for any Windows version? } - return S_OK; } diff --git a/CPP/7zip/UI/FileManager/FSFolder.cpp b/CPP/7zip/UI/FileManager/FSFolder.cpp index 7956d86..51dfaa9 100644 --- a/CPP/7zip/UI/FileManager/FSFolder.cpp +++ b/CPP/7zip/UI/FileManager/FSFolder.cpp @@ -748,8 +748,8 @@ Z7_COM7F_IMF2(Int32, CFSFolder::CompareItems(UInt32 index1, UInt32 index2, PROPI case kpidMTime: return CompareFileTime(&fi1.MTime, &fi2.MTime); case kpidIsDir: { - bool isDir1 = /* ss1 ? false : */ fi1.IsDir(); - bool isDir2 = /* ss2 ? false : */ fi2.IsDir(); + const bool isDir1 = /* ss1 ? false : */ fi1.IsDir(); + const bool isDir2 = /* ss2 ? false : */ fi2.IsDir(); if (isDir1 == isDir2) return 0; return isDir1 ? -1 : 1; @@ -798,7 +798,9 @@ Z7_COM7F_IMF2(Int32, CFSFolder::CompareItems(UInt32 index1, UInt32 index2, PROPI return MyStringCompareNoCase(comment1, comment2); } case kpidPrefix: - if (fi1.Parent < 0) return (fi2.Parent < 0) ? 0 : -1; + if (fi1.Parent == fi2.Parent) + return 0; + if (fi1.Parent < 0) return -1; if (fi2.Parent < 0) return 1; return CompareFileNames_ForFolderList( Folders[fi1.Parent], diff --git a/CPP/7zip/UI/FileManager/LangPage.cpp b/CPP/7zip/UI/FileManager/LangPage.cpp index 3aeaf13..626c91b 100644 --- a/CPP/7zip/UI/FileManager/LangPage.cpp +++ b/CPP/7zip/UI/FileManager/LangPage.cpp @@ -253,8 +253,7 @@ bool CLangPage::OnInit() temp += " "; temp += rec.Mark; } - const int index = (int)_langCombo.AddString(temp); - _langCombo.SetItemData(index, (LPARAM)rec.LangInfoIndex); + const int index = (int)_langCombo.AddString_SetItemData(temp, (LPARAM)rec.LangInfoIndex); if (rec.IsSelected) _langCombo.SetCurSel(index); } diff --git a/CPP/7zip/UI/FileManager/MenuPage.cpp b/CPP/7zip/UI/FileManager/MenuPage.cpp index e8736b8..61dd8cb 100644 --- a/CPP/7zip/UI/FileManager/MenuPage.cpp +++ b/CPP/7zip/UI/FileManager/MenuPage.cpp @@ -222,8 +222,7 @@ bool CMenuPage::OnInit() s.Add_UInt32(val); if (i == 0) s.Insert(0, L"* "); - const int index = (int)_zoneCombo.AddString(s); - _zoneCombo.SetItemData(index, (LPARAM)val); + const int index = (int)_zoneCombo.AddString_SetItemData(s, (LPARAM)val); if (val == wz) _zoneCombo.SetCurSel(index); } diff --git a/CPP/7zip/UI/FileManager/PanelItemOpen.cpp b/CPP/7zip/UI/FileManager/PanelItemOpen.cpp index aa56ef5..9d78368 100644 --- a/CPP/7zip/UI/FileManager/PanelItemOpen.cpp +++ b/CPP/7zip/UI/FileManager/PanelItemOpen.cpp @@ -825,7 +825,10 @@ void CPanel::EditItem(unsigned index, bool useEditor) return; } CProcess process; - StartEditApplication(GetItemFullPath(index), useEditor, (HWND)*this, process); + StartEditApplication(GetItemFullPath(index), useEditor, + // (HWND)*this, + GetParent(), + process); } @@ -854,7 +857,10 @@ void CPanel::OpenFolderExternal(unsigned index) path.Add_PathSepar(); } - StartApplicationDontWait(prefix, path, (HWND)*this); + StartApplicationDontWait(prefix, path, + // (HWND)*this + GetParent() + ); } @@ -981,7 +987,10 @@ void CPanel::OpenItem(unsigned index, bool tryInternal, bool tryExternal, const { // SetCurrentDirectory opens HANDLE to folder!!! // NDirectory::MySetCurrentDirectory(prefix); - StartApplicationDontWait(prefix, fullPath, (HWND)*this); + StartApplicationDontWait(prefix, fullPath, + // (HWND)*this + GetParent() + ); } } @@ -1732,9 +1741,15 @@ void CPanel::OpenItemInArchive(unsigned index, bool tryInternal, bool tryExterna CProcess process; HRESULT res; if (editMode) - res = StartEditApplication(fs2us(tempFilePath), useEditor, (HWND)*this, process); + res = StartEditApplication(fs2us(tempFilePath), useEditor, + // (HWND)*this, + GetParent(), + process); else - res = StartApplication(fs2us(tempDirNorm), fs2us(tempFilePath), (HWND)*this, process); + res = StartApplication(fs2us(tempDirNorm), fs2us(tempFilePath), + // (HWND)*this, + GetParent(), + process); if ((HANDLE)process == NULL) { diff --git a/CPP/7zip/UI/FileManager/PanelMenu.cpp b/CPP/7zip/UI/FileManager/PanelMenu.cpp index 9086996..e655843 100644 --- a/CPP/7zip/UI/FileManager/PanelMenu.cpp +++ b/CPP/7zip/UI/FileManager/PanelMenu.cpp @@ -488,7 +488,9 @@ struct CFolderPidls }; -static HRESULT ShellFolder_ParseDisplayName(IShellFolder *shellFolder, +HRESULT ShellFolder_ParseDisplayName(IShellFolder *shellFolder, + HWND hwnd, const UString &path, LPITEMIDLIST *ppidl); +HRESULT ShellFolder_ParseDisplayName(IShellFolder *shellFolder, HWND hwnd, const UString &path, LPITEMIDLIST *ppidl) { ULONG eaten = 0; diff --git a/CPP/7zip/UI/FileManager/PanelSort.cpp b/CPP/7zip/UI/FileManager/PanelSort.cpp index f95f8ee..57ac877 100644 --- a/CPP/7zip/UI/FileManager/PanelSort.cpp +++ b/CPP/7zip/UI/FileManager/PanelSort.cpp @@ -82,7 +82,7 @@ static inline const wchar_t *GetExtensionPtr(const UString &name) void CPanel::SetSortRawStatus() { - _isRawSortProp = false; + _isRawSortProp = 0; // false; FOR_VECTOR (i, _columns) { const CPropColumn &prop = _columns[i]; @@ -95,21 +95,15 @@ void CPanel::SetSortRawStatus() } -static int CALLBACK CompareItems2(LPARAM lParam1, LPARAM lParam2, LPARAM lpData) +static int CALLBACK CompareItems2(const LPARAM lParam1, const LPARAM lParam2, + const CPanel * const panel, const PROPID propID, const Int32 isRawProp) { - if (lpData == 0) - return 0; - CPanel *panel = (CPanel*)lpData; - - - PROPID propID = panel->_sortID; - if (propID == kpidNoProperty) return MyCompare(lParam1, lParam2); - if (panel->_isRawSortProp) + if (isRawProp) { - // Sha1, NtSecurity, NtReparse + // Sha1, Checksum, NtSecurity, NtReparse const void *data1; const void *data2; UInt32 dataSize1; @@ -135,7 +129,7 @@ static int CALLBACK CompareItems2(LPARAM lParam1, LPARAM lParam2, LPARAM lpData) } if (panel->_folderCompare) - return panel->_folderCompare->CompareItems((UInt32)lParam1, (UInt32)lParam2, propID, panel->_isRawSortProp); + return panel->_folderCompare->CompareItems((UInt32)lParam1, (UInt32)lParam2, propID, isRawProp); switch (propID) { @@ -189,16 +183,41 @@ int CALLBACK CompareItems(LPARAM lParam1, LPARAM lParam2, LPARAM lpData) if (lParam1 == (int)kParentIndex) return -1; if (lParam2 == (int)kParentIndex) return 1; - CPanel *panel = (CPanel*)lpData; + const CPanel *panel = (CPanel*)lpData; const bool isDir1 = panel->IsItem_Folder((unsigned)lParam1); const bool isDir2 = panel->IsItem_Folder((unsigned)lParam2); - - if (isDir1 && !isDir2) return -1; - if (isDir2 && !isDir1) return 1; + if (isDir1 != isDir2) + return isDir1 ? -1 : 1; - const int result = CompareItems2(lParam1, lParam2, lpData); - return panel->_ascending ? result: (-result); + /* + we have up to 3 iterations: + 1: prop, + 2: kpidName, kpidPrefix + 3: prop, kpidName, kpidPrefix + 3: kpidPrefix, kpidName, kpidPrefix : is some rare case + */ + PROPID propID = panel->_sortID; + int res = 0; + for (unsigned iter = 0; iter < 3; iter++) + { + res = CompareItems2(lParam1, lParam2, panel, propID, + iter ? 0 : panel->_isRawSortProp); + if (res) + break; + if (propID == kpidName) + { + // if (!_flatMode.IsEmpty()) break; // !_flatMode ; + propID = kpidPrefix; + continue; + } + if (iter) + break; + propID = kpidName; + } + if (res == 0) + res = MyCompare(lParam1, lParam2); // order of LoadSubItems() + return panel->_ascending ? res: -res; } diff --git a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp index 1686c69..980161f 100644 --- a/CPP/7zip/UI/GUI/BenchmarkDialog.cpp +++ b/CPP/7zip/UI/GUI/BenchmarkDialog.cpp @@ -440,11 +440,9 @@ static const size_t kMaxDicSize = (size_t)1 << (22 + sizeof(size_t) / 4 * 5); static int ComboBox_Add_UInt32(NWindows::NControl::CComboBox &cb, UInt32 v) { - TCHAR s[16]; + WCHAR s[16]; ConvertUInt32ToString(v, s); - const int index = (int)cb.AddString(s); - cb.SetItemData(index, (LPARAM)v); - return index; + return (int)cb.AddString_SetItemData(s, (LPARAM)v); } @@ -481,21 +479,17 @@ bool CBenchmarkDialog::OnInit() _consoleEdit.SendMsg(WM_SETFONT, (WPARAM)_font._font, TRUE); } - UInt32 numCPUs = 1; + UInt32 numCPUs = 1; // process threads + UInt32 numCPUs_Sys = 1; // system threads { - AString s ("/ "); - NSystem::CProcessAffinity threadsInfo; threadsInfo.InitST(); +#ifndef Z7_ST + threadsInfo.Get_and_return_NumProcessThreads_and_SysThreads(numCPUs, numCPUs_Sys); +#endif - #ifndef Z7_ST - if (threadsInfo.Get() && threadsInfo.processAffinityMask != 0) - numCPUs = threadsInfo.GetNumProcessThreads(); - else - numCPUs = NSystem::GetNumberOfProcessors(); - #endif - + AString s ("/ "); s.Add_UInt32(numCPUs); s += GetProcessThreadsInfo(threadsInfo); SetItemTextA(IDT_BENCH_HARDWARE_THREADS, s); @@ -506,10 +500,8 @@ bool CBenchmarkDialog::OnInit() SetItemTextA(IDT_BENCH_SYS1, s); if (s != s2 && !s2.IsEmpty()) SetItemTextA(IDT_BENCH_SYS2, s2); - } - { - AString registers; - GetCpuName_MultiLine(s, registers); + + GetCpuName_MultiLine(s, s2); // s2==registers SetItemTextA(IDT_BENCH_CPU, s); } { @@ -526,22 +518,18 @@ bool CBenchmarkDialog::OnInit() // ----- Num Threads ---------- - if (numCPUs < 1) - numCPUs = 1; - numCPUs = MyMin(numCPUs, (UInt32)(1 << 6)); // it's WIN32 limit - UInt32 numThreads = Sync.NumThreads; - if (numThreads == (UInt32)(Int32)-1) numThreads = numCPUs; - if (numThreads > 1) - numThreads &= ~(UInt32)1; - const UInt32 kNumThreadsMax = (1 << 12); - if (numThreads > kNumThreadsMax) - numThreads = kNumThreadsMax; + numThreads &= ~(UInt32)1; + if (numThreads == 0) + numThreads = 1; + numThreads = MyMin(numThreads, (UInt32)(1u << 14)); m_NumThreads.Attach(GetItem(IDC_BENCH_NUM_THREADS)); - const UInt32 numTheads_Combo = numCPUs * 2; + if (numCPUs_Sys == 0) + numCPUs_Sys = 1; + const UInt32 numTheads_Combo = numCPUs_Sys * 2; UInt32 v = 1; int cur = 0; for (; v <= numTheads_Combo;) @@ -1069,16 +1057,17 @@ static void AddUsageString(UString &s, const CTotalBenchRes &info) numIter = 1000000; UInt64 usage = GetUsagePercents(info.Usage / numIter); - wchar_t w[64]; - ConvertUInt64ToString(usage, w); - unsigned len = MyStringLen(w); + wchar_t w[32]; + wchar_t *p = ConvertUInt64ToString(usage, w); + p[0] = '%'; + p[1] = 0; + unsigned len = (unsigned)(size_t)(p - w); while (len < 5) { s.Add_Space(); len++; } s += w; - s += "%"; } diff --git a/CPP/7zip/UI/GUI/CompressDialog.cpp b/CPP/7zip/UI/GUI/CompressDialog.cpp index 85d7186..53e56fe 100644 --- a/CPP/7zip/UI/GUI/CompressDialog.cpp +++ b/CPP/7zip/UI/GUI/CompressDialog.cpp @@ -506,8 +506,7 @@ bool CCompressDialog::OnInit() { const unsigned arcIndex = ArcIndices[i]; const CArcInfoEx &ai = (*ArcFormats)[arcIndex]; - const int index = (int)m_Format.AddString(ai.Name); - m_Format.SetItemData(index, (LPARAM)arcIndex); + const int index = (int)m_Format.AddString_SetItemData(ai.Name, (LPARAM)arcIndex); if (!needSetMain) { if (Info.FormatIndex == (int)arcIndex) @@ -540,11 +539,6 @@ bool CCompressDialog::OnInit() AddComboItems(m_PathMode, k_PathMode_IDs, Z7_ARRAY_SIZE(k_PathMode_IDs), k_PathMode_Vals, Info.PathMode); - - TCHAR s[32] = { TEXT('/'), TEXT(' '), 0 }; - ConvertUInt32ToString(NSystem::GetNumberOfProcessors(), s + 2); - SetItemText(IDT_COMPRESS_HARDWARE_THREADS, s); - CheckButton(IDX_COMPRESS_SHARED, Info.OpenShareForWrite); CheckButton(IDX_COMPRESS_DEL, Info.DeleteAfterCompressing); @@ -653,7 +647,19 @@ void CCompressDialog::EnableMultiCombo(unsigned id) EnableItem(id, enable); } -static LRESULT ComboBox_AddStringAscii(NControl::CComboBox &cb, const char *s); +static LRESULT ComboBox_AddStringAscii(NControl::CComboBox &cb, const char *s) +{ + return cb.AddString((CSysString)s); +} + +static LRESULT ComboBox_AddStringAscii_SetItemData(NControl::CComboBox &cb, + const char *s, LPARAM lParam) +{ + const LRESULT index = ComboBox_AddStringAscii(cb, s); + if (index >= 0) // optional check + cb.SetItemData((int)index, lParam); + return index; +} static void Combine_Two_BoolPairs(const CBoolPair &b1, const CBoolPair &b2, CBool1 &res) { @@ -1604,20 +1610,14 @@ void CCompressDialog::SetLevel2() AddLangString(s, langID); } } - const int index = (int)m_Level.AddString(s); - m_Level.SetItemData(index, (LPARAM)i); + m_Level.AddString_SetItemData(s, (LPARAM)i); } } SetNearestSelectComboBox(m_Level, level); } -static LRESULT ComboBox_AddStringAscii(NControl::CComboBox &cb, const char *s) -{ - return cb.AddString((CSysString)s); -} - -static const char *k_Auto_Prefix = "* "; +static const char * const k_Auto_Prefix = "* "; static void Modify_Auto(AString &s) { @@ -1690,8 +1690,8 @@ void CCompressDialog::SetMethod2(int keepMethodId) writtenMethodId = -1; Modify_Auto(s); } - const int itemIndex = (int)ComboBox_AddStringAscii(m_Method, s); - m_Method.SetItemData(itemIndex, writtenMethodId); + const int itemIndex = (int)ComboBox_AddStringAscii_SetItemData(m_Method, + s, writtenMethodId); if (keepMethodId == methodID) { m_Method.SetCurSel(itemIndex); @@ -1731,7 +1731,7 @@ void CCompressDialog::SetEncryptionMethod() } else if (ai.Is_Zip()) { - int index = FindRegistryFormat(ai.Name); + const int index = FindRegistryFormat(ai.Name); UString encryptionMethod; if (index >= 0) { @@ -1836,9 +1836,7 @@ static int Combo_AddDict2(NWindows::NControl::CComboBox &cb, size_t sizeReal, si s.Add_Char('B'); if (sizeReal == k_Auto_Dict) Modify_Auto(s); - const int index = (int)ComboBox_AddStringAscii(cb, s); - cb.SetItemData(index, (LPARAM)sizeReal); - return index; + return (int)ComboBox_AddStringAscii_SetItemData(cb, s, (LPARAM)sizeReal); } int CCompressDialog::AddDict2(size_t sizeReal, size_t sizeShow) @@ -2201,9 +2199,7 @@ int CCompressDialog::AddOrder(UInt32 size) { char s[32]; ConvertUInt32ToString(size, s); - const int index = (int)ComboBox_AddStringAscii(m_Order, s); - m_Order.SetItemData(index, (LPARAM)size); - return index; + return (int)ComboBox_AddStringAscii_SetItemData(m_Order, s, (LPARAM)size); } int CCompressDialog::AddOrder_Auto() @@ -2211,9 +2207,7 @@ int CCompressDialog::AddOrder_Auto() AString s; s.Add_UInt32(_auto_Order); Modify_Auto(s); - int index = (int)ComboBox_AddStringAscii(m_Order, s); - m_Order.SetItemData(index, (LPARAM)(INT_PTR)(-1)); - return index; + return (int)ComboBox_AddStringAscii_SetItemData(m_Order, s, (LPARAM)(INT_PTR)(-1)); } void CCompressDialog::SetOrder2() @@ -2490,9 +2484,7 @@ void CCompressDialog::SetSolidBlockSize2() AString s; Add_Size(s, _auto_Solid); Modify_Auto(s); - const int index = (int)ComboBox_AddStringAscii(m_Solid, s); - m_Solid.SetItemData(index, (LPARAM)(UInt32)(Int32)-1); - curSel = index; + curSel = (int)ComboBox_AddStringAscii_SetItemData(m_Solid, s, (LPARAM)(UInt32)(Int32)-1); } if (is7z) @@ -2501,8 +2493,7 @@ void CCompressDialog::SetSolidBlockSize2() // kSolidLog_NoSolid = 0 for xz means default blockSize if (is7z) LangString(IDS_COMPRESS_NON_SOLID, s); - const int index = (int)m_Solid.AddString(s); - m_Solid.SetItemData(index, (LPARAM)(UInt32)kSolidLog_NoSolid); + const int index = (int)m_Solid.AddString_SetItemData(s, (LPARAM)(UInt32)kSolidLog_NoSolid); if (defaultBlockSize == kSolidLog_NoSolid) curSel = index; } @@ -2511,16 +2502,15 @@ void CCompressDialog::SetSolidBlockSize2() { AString s; Add_Size(s, (UInt64)1 << i); - const int index = (int)ComboBox_AddStringAscii(m_Solid, s); - m_Solid.SetItemData(index, (LPARAM)(UInt32)i); + const int index = (int)ComboBox_AddStringAscii_SetItemData(m_Solid, s, (LPARAM)(UInt32)i); if (defaultBlockSize != (UInt32)(Int32)-1) if (i <= defaultBlockSize || index <= 1) curSel = index; } { - const int index = (int)m_Solid.AddString(LangString(IDS_COMPRESS_SOLID)); - m_Solid.SetItemData(index, (LPARAM)kSolidLog_FullSolid); + const int index = (int)m_Solid.AddString_SetItemData( + LangString(IDS_COMPRESS_SOLID), (LPARAM)kSolidLog_FullSolid); if (defaultBlockSize == kSolidLog_FullSolid) curSel = index; } @@ -2564,7 +2554,7 @@ static bool Is_Zstd_Mt_Supported() } */ -static const char *k_ST_Threads = " (ST)"; +static const char * const k_ST_Threads = " (ST)"; void CCompressDialog::SetNumThreads2() { @@ -2575,15 +2565,31 @@ void CCompressDialog::SetNumThreads2() if (!fi.MultiThread_()) return; - const UInt32 numHardwareThreads = NSystem::GetNumberOfProcessors(); - // 64; // for debug: + UInt32 numCPUs = 1; // process threads + UInt32 numHardwareThreads = 1; // system threads + NSystem::CProcessAffinity threadsInfo; + threadsInfo.InitST(); +#ifndef Z7_ST + threadsInfo.Get_and_return_NumProcessThreads_and_SysThreads(numCPUs, numHardwareThreads); +#endif - UInt32 defaultValue = numHardwareThreads; + AString s ("/ "); + { + s.Add_UInt32(numCPUs); + if (numCPUs != numHardwareThreads) + { + s += " / "; + s.Add_UInt32(numHardwareThreads); + } + SetItemTextA(IDT_COMPRESS_HARDWARE_THREADS, s.Ptr()); + } + + UInt32 defaultValue = numCPUs; bool useAutoThreads = true; { const CArcInfoEx &ai = Get_ArcInfoEx(); - int index = FindRegistryFormat(ai.Name); + const int index = FindRegistryFormat(ai.Name); if (index >= 0) { const NCompression::CFormatOptions &fo = m_RegistryInfo.Formats[index]; @@ -2597,19 +2603,19 @@ void CCompressDialog::SetNumThreads2() // const UInt32 num_ZSTD_threads_MAX = Is_Zstd_Mt_Supported() ? MY_ZSTDMT_NBWORKERS_MAX : 0; - UInt32 numAlgoThreadsMax = numHardwareThreads * 2; const int methodID = GetMethodID(); - const bool isZip = IsZipFormat(); + + UInt32 numAlgoThreadsMax = numHardwareThreads * 2; // for unknow methods if (isZip) numAlgoThreadsMax = 8 << (sizeof(size_t) / 2); // 32 threads for 32-bit : 128 threads for 64-bit else if (IsXzFormat()) - numAlgoThreadsMax = 256 * 2; + numAlgoThreadsMax = 256 * 2; // MTCODER_THREADS_MAX * 2 else switch (methodID) { case kLZMA: numAlgoThreadsMax = 2; break; - case kLZMA2: numAlgoThreadsMax = 256; break; + case kLZMA2: numAlgoThreadsMax = 256 * 2; break; // MTCODER_THREADS_MAX * 2 case kBZip2: numAlgoThreadsMax = 64; break; // case kZSTD: numAlgoThreadsMax = num_ZSTD_threads_MAX; break; case kCopy: @@ -2619,9 +2625,9 @@ void CCompressDialog::SetNumThreads2() case kPPMdZip: numAlgoThreadsMax = 1; } - UInt32 autoThreads = numHardwareThreads; + UInt32 autoThreads = numCPUs; if (autoThreads > numAlgoThreadsMax) - autoThreads = numAlgoThreadsMax; + autoThreads = numAlgoThreadsMax; const UInt64 memUse_Limit = Get_MemUse_Bytes(); @@ -2676,13 +2682,12 @@ void CCompressDialog::SetNumThreads2() int curSel = -1; { - AString s; + s.Empty(); s.Add_UInt32(autoThreads); if (autoThreads == 0) s += k_ST_Threads; Modify_Auto(s); - const int index = (int)ComboBox_AddStringAscii(m_NumThreads, s); - m_NumThreads.SetItemData(index, (LPARAM)(INT_PTR)(-1)); - // m_NumThreads.SetItemData(index, autoThreads); + const int index = (int)ComboBox_AddStringAscii_SetItemData(m_NumThreads, + s, (LPARAM)(INT_PTR)(-1)); if (useAutoThreads) curSel = index; } @@ -2693,11 +2698,11 @@ void CCompressDialog::SetNumThreads2() 1; i <= numHardwareThreads * 2 && i <= numAlgoThreadsMax; i++) { - AString s; + s.Empty(); s.Add_UInt32(i); if (i == 0) s += k_ST_Threads; - const int index = (int)ComboBox_AddStringAscii(m_NumThreads, s); - m_NumThreads.SetItemData(index, (LPARAM)(UInt32)i); + const int index = (int)ComboBox_AddStringAscii_SetItemData(m_NumThreads, + s, (LPARAM)(UInt32)i); if (!useAutoThreads && i == defaultValue) curSel = index; } @@ -2754,9 +2759,7 @@ int CCompressDialog::AddMemComboItem(UInt64 val, bool isPercent, bool isDefault) sRegistry.DeleteBack(); } const unsigned dataIndex = _memUse_Strings.Add(sRegistry); - const int index = (int)m_MemUse.AddString(sUser); - m_MemUse.SetItemData(index, (LPARAM)dataIndex); - return index; + return (int)m_MemUse.AddString_SetItemData(sUser, (LPARAM)dataIndex); } @@ -3439,11 +3442,7 @@ static const unsigned kTimePrec_1ns = 3; static void AddTimeOption(UString &s, UInt32 val, const UString &unit, const char *sys = NULL) { // s += " : "; - { - AString s2; - s2.Add_UInt32(val); - s += s2; - } + s.Add_UInt32(val); s.Add_Space(); s += unit; if (sys) @@ -3476,9 +3475,7 @@ int COptionsDialog::AddPrec(unsigned prec, bool isDefault) } else s.Add_UInt32(prec); - const int index = (int)m_Prec.AddString(s); - m_Prec.SetItemData(index, (LPARAM)writePrec); - return index; + return (int)m_Prec.AddString_SetItemData(s, (LPARAM)writePrec); } diff --git a/CPP/7zip/UI/GUI/CompressDialog.rc b/CPP/7zip/UI/GUI/CompressDialog.rc index 9c3ed88..df1516c 100644 --- a/CPP/7zip/UI/GUI/CompressDialog.rc +++ b/CPP/7zip/UI/GUI/CompressDialog.rc @@ -87,8 +87,8 @@ BEGIN COMBOBOX IDC_COMPRESS_SOLID, g1x, 144, g1xs, 140, MY_COMBO LTEXT "Number of CPU &threads:", IDT_COMPRESS_THREADS, m, 167, g0xs, 8 - COMBOBOX IDC_COMPRESS_THREADS, g1x, 165, g1xs - 35, 140, MY_COMBO - RTEXT "", IDT_COMPRESS_HARDWARE_THREADS, g1x + g1xs - 35 + 10, 167, 25, MY_TEXT_NOPREFIX + COMBOBOX IDC_COMPRESS_THREADS, g1x, 165, g1xs - 40, 140, MY_COMBO + RTEXT "", IDT_COMPRESS_HARDWARE_THREADS, g1x + g1xs - 40, 167, 40, 16, SS_NOPREFIX LTEXT "Memory usage for Compressing:", IDT_COMPRESS_MEMORY, m, 184, g2xs, 8 diff --git a/CPP/7zip/UI/GUI/ExtractDialog.cpp b/CPP/7zip/UI/GUI/ExtractDialog.cpp index 4628482..467cf18 100644 --- a/CPP/7zip/UI/GUI/ExtractDialog.cpp +++ b/CPP/7zip/UI/GUI/ExtractDialog.cpp @@ -102,8 +102,7 @@ void AddComboItems(NControl::CComboBox &combo, const UInt32 *langIDs, unsigned n { UString s = LangString(langIDs[i]); s.RemoveChar(L'&'); - const int index = (int)combo.AddString(s); - combo.SetItemData(index, (LPARAM)i); + combo.AddString_SetItemData(s, (LPARAM)i); if (values[i] == curVal) curSel = i; } diff --git a/CPP/Common/Common0.h b/CPP/Common/Common0.h index 55606cd..5781a95 100644 --- a/CPP/Common/Common0.h +++ b/CPP/Common/Common0.h @@ -126,8 +126,9 @@ if compiled with new GCC libstdc++, GCC libstdc++ can use: #pragma GCC diagnostic ignored "-Wglobal-constructors" #pragma GCC diagnostic ignored "-Wexit-time-destructors" -#if defined(Z7_LLVM_CLANG_VERSION) && __clang_major__ >= 18 // 18.1.0RC -#pragma GCC diagnostic ignored "-Wswitch-default" +#if defined(Z7_LLVM_CLANG_VERSION) && __clang_major__ >= 18 /* 18.1.0RC */ \ + || defined(Z7_APPLE_CLANG_VERSION) && __clang_major__ >= 16 // for APPLE=17 (LLVM=19) + #pragma GCC diagnostic ignored "-Wswitch-default" #endif // #pragma GCC diagnostic ignored "-Wunused-private-field" // #pragma GCC diagnostic ignored "-Wnonportable-system-include-path" diff --git a/CPP/Common/MyBuffer.h b/CPP/Common/MyBuffer.h index 80f0205..08c10a3 100644 --- a/CPP/Common/MyBuffer.h +++ b/CPP/Common/MyBuffer.h @@ -202,7 +202,53 @@ public: } }; -typedef CObjArray CByteArr; + +/* CSmallObjArray can be used for Byte arrays + or for arrays whose total size in bytes does not exceed size_t ranges. + So there is no need to use Z7_ARRAY_NEW macro in CSmallObjArray code. */ +template class CSmallObjArray +{ +protected: + T *_items; +private: + // we disable copy + CSmallObjArray(const CSmallObjArray &buffer); + void operator=(const CSmallObjArray &buffer); +public: + void Free() + { + delete []_items; + _items = NULL; + } + CSmallObjArray(size_t size): _items(NULL) + { + if (size != 0) + { + // Z7_ARRAY_NEW(_items, T, size) + _items = new T[size]; + } + } + CSmallObjArray(): _items(NULL) {} + ~CSmallObjArray() { delete []_items; } + + operator T *() { return _items; } + operator const T *() const { return _items; } + const T* ConstData() const { return _items; } + T* NonConstData() const { return _items; } + T* NonConstData() { return _items; } + // const T* Data() const { return _items; } + // T* Data() { return _items; } + + void Alloc(size_t newSize) + { + delete []_items; + _items = NULL; + // Z7_ARRAY_NEW(_items, T, newSize) + _items = new T[newSize]; + } +}; + +typedef CSmallObjArray CByteArr; typedef CObjArray CBoolArr; typedef CObjArray CIntArr; typedef CObjArray CUIntArr; diff --git a/CPP/Windows/Control/ComboBox.cpp b/CPP/Windows/Control/ComboBox.cpp index 8da487d..2e9c8cb 100644 --- a/CPP/Windows/Control/ComboBox.cpp +++ b/CPP/Windows/Control/ComboBox.cpp @@ -63,4 +63,13 @@ LRESULT CComboBox::GetLBText(int index, UString &s) } #endif +LRESULT CComboBox::AddString_SetItemData(LPCWSTR s, LPARAM lParam) +{ + const LRESULT index = AddString(s); + // NOTE: SetItemData((int)-1, lParam) works as unexpected. + if (index >= 0) // optional check, because (index < 0) is not expected for normal inputs + SetItemData((int)index, lParam); + return index; +} + }} diff --git a/CPP/Windows/Control/ComboBox.h b/CPP/Windows/Control/ComboBox.h index 2a60b8a..224efca 100644 --- a/CPP/Windows/Control/ComboBox.h +++ b/CPP/Windows/Control/ComboBox.h @@ -21,6 +21,8 @@ public: LRESULT AddString(LPCWSTR s); #endif + LRESULT AddString_SetItemData(LPCWSTR s, LPARAM lParam); + /* If this parameter is -1, any current selection in the list is removed and the edit control is cleared.*/ LRESULT SetCurSel(int index) { return SendMsg(CB_SETCURSEL, MY_int_TO_WPARAM(index), 0); } LRESULT SetCurSel(unsigned index) { return SendMsg(CB_SETCURSEL, index, 0); } diff --git a/CPP/Windows/FileFind.cpp b/CPP/Windows/FileFind.cpp index 64075ab..669541e 100644 --- a/CPP/Windows/FileFind.cpp +++ b/CPP/Windows/FileFind.cpp @@ -1162,6 +1162,15 @@ void CFileInfoBase::SetFrom_stat(const struct stat &st) MTime = st.st_mtimespec; ATime = st.st_atimespec; + #elif defined(__QNXNTO__) && defined(__ARM__) && !defined(__aarch64__) + + // CTime = ST_CTIME(st); + // MTime = ST_MTIME(st); + // ATime = ST_ATIME(st); + CTime.tv_sec = st.st_ctime; CTime.tv_nsec = 0; + MTime.tv_sec = st.st_mtime; MTime.tv_nsec = 0; + ATime.tv_sec = st.st_atime; ATime.tv_nsec = 0; + #else // timespec_To_FILETIME(st.st_ctim, CTime, &CTime_ns100); // timespec_To_FILETIME(st.st_mtim, MTime, &MTime_ns100); @@ -1312,7 +1321,7 @@ bool CDirEntry::IsDots() const throw() /* some systems (like CentOS 7.x on XFS) have (Type == DT_UNKNOWN) we can call fstatat() for that case, but we use only (Name) check here */ -#if !defined(_AIX) && !defined(__sun) +#if !defined(_AIX) && !defined(__sun) && !defined(__QNXNTO__) if (Type != DT_DIR && Type != DT_UNKNOWN) return false; #endif @@ -1352,7 +1361,7 @@ bool CEnumerator::NextAny(CDirEntry &fi, bool &found) fi.iNode = de->d_ino; -#if !defined(_AIX) && !defined(__sun) +#if !defined(_AIX) && !defined(__sun) && !defined(__QNXNTO__) fi.Type = de->d_type; /* some systems (like CentOS 7.x on XFS) have (Type == DT_UNKNOWN) we can set (Type) from fstatat() in that case. diff --git a/CPP/Windows/FileFind.h b/CPP/Windows/FileFind.h index f68673a..944bca2 100644 --- a/CPP/Windows/FileFind.h +++ b/CPP/Windows/FileFind.h @@ -277,13 +277,13 @@ typedef CFileInfo CDirEntry; struct CDirEntry { ino_t iNode; -#if !defined(_AIX) && !defined(__sun) +#if !defined(_AIX) && !defined(__sun) && !defined(__QNXNTO__) Byte Type; #endif FString Name; /* -#if !defined(_AIX) && !defined(__sun) +#if !defined(_AIX) && !defined(__sun) && !defined(__QNXNTO__) bool IsDir() const { // (Type == DT_UNKNOWN) on some systems @@ -310,7 +310,7 @@ public: bool Fill_FileInfo(const CDirEntry &de, CFileInfo &fileInfo, bool followLink) const; bool DirEntry_IsDir(const CDirEntry &de, bool followLink) const { -#if !defined(_AIX) && !defined(__sun) +#if !defined(_AIX) && !defined(__sun) && !defined(__QNXNTO__) if (de.Type == DT_DIR) return true; if (de.Type != DT_UNKNOWN) diff --git a/CPP/Windows/SecurityUtils.h b/CPP/Windows/SecurityUtils.h index 7219f06..022a8f3 100644 --- a/CPP/Windows/SecurityUtils.h +++ b/CPP/Windows/SecurityUtils.h @@ -3,7 +3,11 @@ #ifndef ZIP7_INC_WINDOWS_SECURITY_UTILS_H #define ZIP7_INC_WINDOWS_SECURITY_UTILS_H +#if defined(__MINGW32__) || defined(__MINGW64__) +#include +#else #include +#endif #include "Defs.h" diff --git a/CPP/Windows/System.cpp b/CPP/Windows/System.cpp index 4745785..6999ef9 100644 --- a/CPP/Windows/System.cpp +++ b/CPP/Windows/System.cpp @@ -5,8 +5,9 @@ #ifndef _WIN32 #include #include -#if defined(__APPLE__) || defined(__DragonFly__) || \ - defined(BSD) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) +#if defined(__APPLE__) || defined(__DragonFly__) \ + || defined(BSD) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) \ + || defined(__QNXNTO__) #include #else #include @@ -299,8 +300,9 @@ bool GetRamSize(size_t &size) size = (size_t)sizeof(size_t) << 29; size64 = size; -#if defined(__APPLE__) || defined(__DragonFly__) || \ - defined(BSD) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) +#if defined(__APPLE__) || defined(__DragonFly__) \ + || defined(BSD) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) \ + || defined(__QNXNTO__) uint64_t val = 0; int mib[2]; diff --git a/CPP/Windows/System.h b/CPP/Windows/System.h index 0c80373..041a44d 100644 --- a/CPP/Windows/System.h +++ b/CPP/Windows/System.h @@ -15,6 +15,8 @@ namespace NWindows { namespace NSystem { +UInt32 GetNumberOfProcessors(); + #ifdef _WIN32 struct CCpuGroups @@ -103,6 +105,25 @@ struct CProcessAffinity return CountAffinity(systemAffinityMask); } + // it returns normilized number of threads + void Get_and_return_NumProcessThreads_and_SysThreads(UInt32 &numProcessThreads, UInt32 &numSysThreads) + { + UInt32 num1 = 0, num2 = 0; + if (Get()) + { + num1 = GetNumProcessThreads(); + num2 = GetNumSystemThreads(); + } + if (num1 == 0) + num1 = NSystem::GetNumberOfProcessors(); + if (num1 == 0) + num1 = 1; + if (num2 < num1) + num2 = num1; + numProcessThreads = num1; + numSysThreads = num2; + } + BOOL Get(); BOOL SetProcAffinity() const @@ -177,8 +198,6 @@ struct CProcessAffinity #endif // _WIN32 -UInt32 GetNumberOfProcessors(); - bool GetRamSize(size_t &size); // returns false, if unknown ram size unsigned long Get_File_OPEN_MAX(); diff --git a/CPP/Windows/SystemInfo.cpp b/CPP/Windows/SystemInfo.cpp index 35846e0..2eced2a 100644 --- a/CPP/Windows/SystemInfo.cpp +++ b/CPP/Windows/SystemInfo.cpp @@ -22,7 +22,7 @@ #if defined(__GLIBC__) && (__GLIBC__ * 100 + __GLIBC_MINOR__ >= 216) #define Z7_GETAUXV_AVAILABLE -#else +#elif !defined(__QNXNTO__) // #pragma message("=== is not NEW GLIBC === ") #if defined __has_include #if __has_include () @@ -58,7 +58,7 @@ #ifdef USE_HWCAP -#if defined(__FreeBSD__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) // #if (__FreeBSD__ >= 13) // (FreeBSD 12.01 is required for elf_aux_info() ???) static unsigned long MY_getauxval(int aux) diff --git a/CPP/Windows/TimeUtils.h b/CPP/Windows/TimeUtils.h index 4a9d0f2..8e1e478 100644 --- a/CPP/Windows/TimeUtils.h +++ b/CPP/Windows/TimeUtils.h @@ -65,6 +65,14 @@ inline bool FILETIME_IsZero(const FILETIME &ft) #define ST_MTIME(st) st.st_mtimespec #define ST_ATIME(st) st.st_atimespec #define ST_CTIME(st) st.st_ctimespec + #elif defined(__QNXNTO__) && defined(__ARM__) && !defined(__aarch64__) + // QNX armv7le (32-bit) for "struct stat" timestamps uses time_t instead of timespec + inline CFiTime ST_MTIME(const struct stat &st) + { timespec ts; ts.tv_sec = st.st_mtime; ts.tv_nsec = 0; return ts; } + inline CFiTime ST_ATIME(const struct stat &st) + { timespec ts; ts.tv_sec = st.st_atime; ts.tv_nsec = 0; return ts; } + inline CFiTime ST_CTIME(const struct stat &st) + { timespec ts; ts.tv_sec = st.st_ctime; ts.tv_nsec = 0; return ts; } #else #define ST_MTIME(st) st.st_mtim #define ST_ATIME(st) st.st_atim diff --git a/DOC/7zip.wxs b/DOC/7zip.wxs index 703e22e..8c6ef4c 100644 --- a/DOC/7zip.wxs +++ b/DOC/7zip.wxs @@ -1,7 +1,7 @@ - - + + diff --git a/DOC/License.txt b/DOC/License.txt index bbb56a3..b1a421a 100644 --- a/DOC/License.txt +++ b/DOC/License.txt @@ -3,7 +3,7 @@ License for use and distribution ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - 7-Zip Copyright (C) 1999-2025 Igor Pavlov. + 7-Zip Copyright (C) 1999-2026 Igor Pavlov. The licenses for files are: @@ -58,7 +58,7 @@ BSD 3-clause License in 7-Zip code Copyright (c) 2015-2016, Apple Inc. All rights reserved. Copyright (c) Facebook, Inc. All rights reserved. - Copyright (c) 2023-2025 Igor Pavlov. + Copyright (c) 2023-2026 Igor Pavlov. Text of the "BSD 3-clause License" ---------------------------------- @@ -102,7 +102,7 @@ BSD 2-clause License in 7-Zip code XXH64 code in 7-Zip was derived from the original XXH64 code developed by Yann Collet. Copyright (c) 2012-2021 Yann Collet. - Copyright (c) 2023-2025 Igor Pavlov. + Copyright (c) 2023-2026 Igor Pavlov. Text of the "BSD 2-clause License" ---------------------------------- diff --git a/DOC/readme.txt b/DOC/readme.txt index cc89a39..26d0f5a 100644 --- a/DOC/readme.txt +++ b/DOC/readme.txt @@ -1,15 +1,15 @@ -7-Zip 25.01 Sources +7-Zip 26.00 Sources ------------------- -7-Zip is a file archiver for Windows. +7-Zip is a file archiver for Windows. -7-Zip Copyright (C) 1999-2025 Igor Pavlov. +7-Zip Copyright (C) 1999-2026 Igor Pavlov. License Info ------------ -7-Zip is free software distributed under the GNU LGPL +7-Zip is free software distributed under the GNU LGPL (except for unRar code). Also some code is licensed under the "BSD 3-clause License". Read "License.txt" for more infomation about license. @@ -27,7 +27,7 @@ Please check main restriction from unRar license: not be used to develop a RAR (WinRAR) compatible archiver. In brief it means: -1) You can compile and use compiled files under GNU LGPL rules, since +1) You can compile and use compiled files under GNU LGPL rules, since unRAR license almost has no restrictions for compiled files. You can link these compiled files to LGPL programs. 2) You can fix bugs in source code and use compiled fixed version. @@ -60,7 +60,7 @@ Tools / Options / Directories - Library files Also you need Microsoft Macro Assembler: - - ml.exe for x86 + - ml.exe for x86 - ml64.exe for x64 You can use ml.exe from Windows SDK for Windows Vista or some later versions. @@ -85,7 +85,7 @@ OLD_COMPILER for old VC compiler, like MSCV 6.0. MY_DYNAMIC_LINK - for dynamic linking to the run-time library (msvcrt.dll). + for dynamic linking to the run-time library (msvcrt.dll). The default makefile option is static linking to the run-time library. To compile all 7-Zip files for x64 with Visual Studio 2022, @@ -116,23 +116,23 @@ So if you compile the version with Assembeler code, you will get faster 7-Zip bi 7-Zip's assembler code uses the following syntax for different platforms: -1) x86 and x86-64 (AMD64): MASM syntax. +1) x86 and x86-64 (AMD64): MASM syntax. Now there are 3 programs that supports MASM syntax in Linux. -' 'Asmc Macro Assembler, JWasm, and UASM. Note that JWasm now doesn't support some +' 'Asmc Macro Assembler, JWasm, and UASM. Note that JWasm now doesn't support some cpu instructions used in 7-Zip. - So you must install Asmc Macro Assembler in Linux or UASM, if you want to compile + So you must install Asmc Macro Assembler in Linux or UASM, if you want to compile fastest version of 7-Zip x86 and x86-64: https://github.com/nidud/asmc https://github.com/Terraspace/UASM -2) arm64: GNU assembler for ARM64 with preprocessor. +2) arm64: GNU assembler for ARM64 with preprocessor. That systax is supported by GCC and CLANG for ARM64. There are different binaries that can be compiled from 7-Zip source. There are 2 main files in folder for compiling: makefile - that can be used for compiling Windows version of 7-Zip with nmake command - makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip or Windows version + makefile.gcc - that can be used for compiling Linux/macOS versions of 7-Zip or Windows version with MINGW (GCC) with make command. At first you must change the current folder to folder that contains `makefile.gcc`: @@ -143,7 +143,7 @@ Then you can compile `makefile.gcc` with the command: make -j -f makefile.gcc -Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile +Also there are additional "*.mak" files in folder "CPP/7zip/" that can be used to compile 7-Zip binaries with optimized code and optimzing options. To compile with GCC without assembler: @@ -171,10 +171,10 @@ makefile.gcc supports some variables that can change compile options USE_JWASM=1 use JWasm assembler instead of Asmc. - Note that JWasm doesn't support AES instructions. So AES code from C version AesOpt.c + Note that JWasm doesn't support AES instructions. So AES code from C version AesOpt.c will be used instead of assembler code from AesOpt.asm. -If you want to use UASM for x86-64 compiling, you can change 7zip_gcc.mak, +If you want to use UASM for x86-64 compiling, you can change 7zip_gcc.mak, or send IS_X64=1 USE_ASM=1 MY_ASM="$UASM" to make command calling: UASM="$PWD/GccUnixR/uasm" cd "7zip-src/CPP/7zip/Bundles/Alone2" @@ -187,11 +187,11 @@ DISABLE_RAR=1 DISABLE_RAR_COMPRESS=1 removes "not fully free" code of RAR decompression codecs from compilation. -RAR decompression codecs in 7-Zip code has some additional license restrictions, +RAR decompression codecs in 7-Zip code has some additional license restrictions, that can be treated as not fully compatible with free-software licenses. DISABLE_RAR_COMPRESS=1 allows to exclude such "not-fully-free" RAR code from compilation. -if DISABLE_RAR_COMPRESS=1 is specified, 7-zip will not be able to decompress files -from rar archives, but 7-zip still will be able to open rar archives to get list of +if DISABLE_RAR_COMPRESS=1 is specified, 7-zip will not be able to decompress files +from rar archives, but 7-zip still will be able to open rar archives to get list of files or to extract files that are stored without compression. if DISABLE_RAR=1 is specified, 7-zip will not be able to work with RAR archives. @@ -203,11 +203,11 @@ Now there are two different ports of 7-Zip for Linux/macOS: 1) p7zip - another port of 7-Zip for Linux, made by an independent developer. The latest version of p7zip now is 16.02, and that p7zip 16.02 is outdated now. - http://sourceforge.net/projects/p7zip/ + http://sourceforge.net/projects/p7zip/ 2) 7-Zip for Linux/macOS - this package - it's new code with all changes from latest 7-Zip for Windows. -These two ports are not identical. +These two ports are not identical. Note also that some Linux specific things can be implemented better in p7zip than in new 7-Zip for Linux. @@ -218,13 +218,13 @@ Notes: 7-Zip consists of COM modules (DLL files). But 7-Zip doesn't use standard COM interfaces for creating objects. Look at -7zip\UI\Client7z folder for example of using DLL files of 7-Zip. +7zip\UI\Client7z folder for example of using DLL files of 7-Zip. Some DLL files can use other DLL files from 7-Zip. If you don't like it, you must use standalone version of DLL. To compile standalone version of DLL you must include all used parts -to project and define some defs. -For example, 7zip\Bundles\Format7z is a standalone version of 7z.dll -that works with 7z format. So you can use such DLL in your project +to project and define some defs. +For example, 7zip\Bundles\Format7z is a standalone version of 7z.dll +that works with 7z format. So you can use such DLL in your project without additional DLL files. @@ -284,7 +284,7 @@ Windows common files for Windows related code UI Agent Intermediary modules for FAR plugin and Explorer plugin - Client7z Test application for 7za.dll + Client7z Test application for 7za.dll Common Common UI files Console 7z.exe : Console version Explorer 7-zip.dll: 7-Zip Shell extension diff --git a/DOC/src-history.txt b/DOC/src-history.txt index 48c9647..657f04f 100644 --- a/DOC/src-history.txt +++ b/DOC/src-history.txt @@ -1,11 +1,20 @@ HISTORY of the 7-Zip source code -------------------------------- +26.00 2026-02-12 +------------------------- +- improved code for ZIP, CPIO, RAR, UFD, QCOW, Compound. +- 7-Zip File Manager: improved sorting order of the file list. It uses file name as secondary sorting key. +- 7-Zip File Manager: improved Benchmark to support systems with more than 64 CPU threads. +- the bug was fixed: 7-Zip could not correctly extract TAR archives containing sparse files. +- some bugs were fixed. + + 25.01 2025-08-03 ------------------------- -- The code for handling symbolic links has been changed +- CVE-2025-55188 : The code for handling symbolic links has been changed to provide greater security when extracting files from archives. - Command line switch -snld20 can be used to bypass default security + Command line switch -snld20 can be used to bypass default security checks when creating symbolic links. @@ -18,17 +27,19 @@ HISTORY of the 7-Zip source code - bzip2 compression speed was increased by 15-40%. - deflate (zip/gz) compression speed was increased by 1-3%. - improved support for zip, cpio and fat archives. -- fixed some bugs and vulnerabilities. -- the bug was fixed : CVE-2025-53816 : 7-Zip could work incorrectly for some incorrect RAR archives. -- the bug was fixed : CVE-2025-53817 : 7-Zip could crash for some incorrect COM (Compound File) archives. +- fixed some bugs. +- CVE-2025-11001 and CVE-2025-11002 : A vulnerability was fixed for symbolic links processing, + when extracting files from archives. +- the bug was fixed : CVE-2025-53816 : 7-Zip could work incorrectly for some incorrect RAR archives. +- the bug was fixed : CVE-2025-53817 : 7-Zip could crash for some incorrect COM (Compound File) archives. 24.09 2024-11-29 ------------------------- - The default dictionary size values for LZMA/LZMA2 compression methods were increased: dictionary size compression level - v24.08 v24.09 v24.09 - 32-bit 64-bit + v24.08 v24.09 v24.09 + 32-bit 64-bit 8 MB 16 MB 16 MB -mx4 16 MB 32 MB 32 MB -mx5 : Normal 32 MB 64 MB 64 MB -mx6 @@ -38,11 +49,11 @@ HISTORY of the 7-Zip source code The default dictionary size values for 32-bit versions of LZMA/LZMA2 don't exceed 64 MB. - 7-Zip now can calculate the following hash checksums: SHA-512, SHA-384, SHA3-256 and MD5. - APM and HFS support was improved. -- If an archive update operation uses a temporary archive folder and - the archive is moved to the destination folder, 7-Zip shows the progress of moving +- If an archive update operation uses a temporary archive folder and + the archive is moved to the destination folder, 7-Zip shows the progress of moving the archive file, as this operation can take a long time if the archive is large. - The bug was fixed: 7-Zip File Manager didn't propagate Zone.Identifier stream - for extacted files from nested archives (if there is open archive inside another open archive). + for extracted files from nested archives (if there is open archive inside another open archive). - Some bugs were fixed. @@ -76,7 +87,7 @@ HISTORY of the 7-Zip source code ------------------------- - New switch -myv={MMNN} to set decoder compatibility version for 7z archive creating. {MMNN} is 4-digit number that represents the version of 7-Zip without a dot. - If -myv={MMNN} switch is specified, 7-Zip will only use compression methods that can + If -myv={MMNN} switch is specified, 7-Zip will only use compression methods that can be decoded by the specified version {MMNN} of 7-Zip and newer versions. If -myv={MMNN} switch is not specified, -myv=2300 is used, and 7-Zip will only use compression methods that can be decoded by 7-Zip 23.00 and newer versions. @@ -127,15 +138,15 @@ HISTORY of the 7-Zip source code And some warning types are disabled in 2 files: - C/Compiler.h for C/C++ code warnings. - CPP/Common/Common.h for C++ code warnings. -- Linux/macOS versions of 7-Zip: IUnknown interface in new code doesn't use +- Linux/macOS versions of 7-Zip: IUnknown interface in new code doesn't use virtual destructor that was used in previous 7-Zip and p7zip: // virtual ~IUnknown() {} - So 7-Zip's dynamically linked shared libraries (codecs) are not compatible + So 7-Zip's dynamically linked shared libraries (codecs) are not compatible between new 7-Zip for Linux/macOS and old 7-Zip (and p7zip). - Some optimizations in filters code: BCJ, BCJ2, Swap* and opthers. -- If 7-Zip uses BCJ2 filter for big datasets compressing, it can use additional temp - files in system's TEMP folder. 7-Zip uses temp file for additional compressed - data stream, if size of such compressed stream is larger than predefined limit: +- If 7-Zip uses BCJ2 filter for big datasets compressing, it can use additional temp + files in system's TEMP folder. 7-Zip uses temp file for additional compressed + data stream, if size of such compressed stream is larger than predefined limit: 16 MiB in 32-bit version, 4 GiB in 64-bit version. - Some bugs were fixed. @@ -157,7 +168,7 @@ HISTORY of the 7-Zip source code 21.06 2021-11-24 ------------------------- - Bug in LZMA encoder in file LzmaEnc.c was fixed: - LzmaEnc_MemEncode(), LzmaEncode() and LzmaCompress() could work incorrectly, + LzmaEnc_MemEncode(), LzmaEncode() and LzmaCompress() could work incorrectly, if size value for output buffer is smaller than size required for all compressed data. LzmaEnc_Encode() could work incorrectly, if callback ISeqOutStream::Write() doesn't write all compressed data. @@ -171,8 +182,8 @@ HISTORY of the 7-Zip source code ------------------------- - 7-Zip now reduces the number of working CPU threads for compression, if RAM size is not enough for compression with big LZMA2 dictionary. -- 7-Zip now can create and check "file.sha256" and "file.sha1" text files - that contain the list of file names and SHA-1 / SHA-256 checksums in format +- 7-Zip now can create and check "file.sha256" and "file.sha1" text files + that contain the list of file names and SHA-1 / SHA-256 checksums in format compatible with sha1sum/sha256sum programs. @@ -187,7 +198,7 @@ HISTORY of the 7-Zip source code - 7-Zip now writes additional field for filename in UTF-8 encoding to zip archives. It allows to extract correct file name from zip archives on different systems. - The command line version of 7-Zip for macOS was released. -- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux +- The speed for LZMA and LZMA2 decompression in arm64 versions for macOS and Linux was increased by 20%-60%. - Some changes and improvements in ZIP, TAR and NSIS code. @@ -195,7 +206,7 @@ HISTORY of the 7-Zip source code 21.01 alpha 2021-03-09 ------------------------- - The command line version of 7-Zip for Linux was released. -- The improvements for speed of ARM64 version using hardware CPU instructions +- The improvements for speed of ARM64 version using hardware CPU instructions for AES, CRC-32, SHA-1 and SHA-256. - The bug in versions 18.02 - 21.00 was fixed: 7-Zip could not correctly extract some ZIP archives created with xz compression method. @@ -205,30 +216,30 @@ HISTORY of the 7-Zip source code 20.02 alpha 2020-08-08 ------------------------- - The default number of LZMA2 chunks per solid block in 7z archive was increased to 64. - It allows to increase the compression speed for big 7z archives, if there is a big number + It allows to increase the compression speed for big 7z archives, if there is a big number of CPU cores and threads. - The speed of PPMd compressing/decompressing was increased for 7z/ZIP/RAR archives. -- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system - to modify "Last Access Time" property of source files for archiving and hashing operations. +- The new -ssp switch. If the switch -ssp is specified, 7-Zip doesn't allow the system + to modify "Last Access Time" property of source files for archiving and hashing operations. - Some bugs were fixed. 20.00 alpha 2020-02-06 ------------------------- -- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, +- 7-Zip now supports new optional match finders for LZMA/LZMA2 compression: bt5 and hc5, that can work faster than bt4 and hc4 match finders for the data with big redundancy. -- The compression ratio was improved for Fast and Fastest compression levels with the +- The compression ratio was improved for Fast and Fastest compression levels with the following default settings: - Fastest level (-mx1) : hc5 match finder with 256 KB dictionary. - Fast level (-mx3) : hc5 match finder with 4 MB dictionary. -- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra +- Minor speed optimizations in multithreaded LZMA/LZMA2 compression for Normal/Maximum/Ultra compression levels. - bzip2 decoding code was updated to support bzip2 archives, created by lbzip2 program. 19.02 2019-09-05 ------------------------- -- Support for SHA-1/SHA-256 optimized code in +- Support for SHA-1/SHA-256 optimized code in Sha1Opt.c, Sha256Opt.c, Sha256Opt.asm, Sha1Opt.asm. @@ -249,7 +260,7 @@ HISTORY of the 7-Zip source code There was memory leak in multithreading xz decoder - XzDecMt_Decode(), if xz stream contains only one block. - 7-Zip 18.02-18.05 used only one CPU thread for bz2 archive creation. -- The changes for MSVS compiler makefiles: +- The changes for MSVS compiler makefiles: - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64) instead of "CPU" macroname with values (AMD64, ARM64). - the makefiles by default now use static version of the run-time library. @@ -257,17 +268,17 @@ HISTORY of the 7-Zip source code 18.05 2018-04-30 ------------------------- -- The speed for LZMA/LZMA2 compressing was increased - by 8% for fastest/fast compression levels and +- The speed for LZMA/LZMA2 compressing was increased + by 8% for fastest/fast compression levels and by 3% for normal/maximum compression levels. - Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in - Windows 10 because of some BUG with "Large Pages" in Windows 10. + Windows 10 because of some BUG with "Large Pages" in Windows 10. Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299). 18.03 beta 2018-03-04 ------------------------- -- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm +- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm for x64 with about 30% higher speed than main version of LZMA decoder written in C. - The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%. - 7-Zip now can use multi-threading for 7z/LZMA2 decoding, @@ -278,7 +289,7 @@ HISTORY of the 7-Zip source code 17.00 beta 2017-04-29 ------------------------- -- NewHandler.h / NewHandler.cpp: +- NewHandler.h / NewHandler.cpp: now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900). - C/7zTypes.h : the names of variables in interface structures were changed (vt). - Some bugs were fixed. 7-Zip could crash in some cases. @@ -288,53 +299,53 @@ HISTORY of the 7-Zip source code 16.02 2016-05-21 ------------------------- - The BUG in 16.00 - 16.01 was fixed: - Split Handler (SplitHandler.cpp) returned incorrect + Split Handler (SplitHandler.cpp) returned incorrect total size value (kpidSize) for split archives. 16.01 2016-05-19 -------------------------- +------------------------- - Some bugs were fixed, - Some internal changes to reduce the number of compiler warnings. 16.00 2016-05-10 -------------------------- +------------------------- - 7-Zip now can extract multivolume ZIP archives (z01, z02, ... , zip). - Some bugs were fixed, 15.12 2015-11-19 -------------------------- +------------------------- - The BUG in C version of 7z decoder was fixed: 7zDec.c : SzDecodeLzma2() 7z decoder could mistakenly report about decoding error for some 7z archives that use LZMA2 compression method. - The probability to get that mistaken decoding error report was about - one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). + The probability to get that mistaken decoding error report was about + one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). - The BUG (in 9.26-15.11) in C version of 7z decoder was fixed: 7zArcIn.c : SzReadHeader2() - 7z decoder worked incorrectly for 7z archives that contain - empty solid blocks, that can be placed to 7z archive, if some file is + 7z decoder worked incorrectly for 7z archives that contain + empty solid blocks, that can be placed to 7z archive, if some file is unavailable for reading during archive creation. 15.09 beta 2015-10-16 -------------------------- +------------------------- - The BUG in LZMA / LZMA2 encoding code was fixed. The BUG in LzFind.c::MatchFinder_ReadBlock() function. If input data size is larger than (4 GiB - dictionary_size), the following code worked incorrectly: - - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions - for compressing from memory to memory. + - LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions + for compressing from memory to memory. That BUG is not related to LZMA encoder version that works via streams. - - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if - default value of chunk size (CLzma2EncProps::blockSize) is changed + - LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if + default value of chunk size (CLzma2EncProps::blockSize) is changed to value larger than (4 GiB - dictionary_size). 9.38 beta 2015-01-03 -------------------------- +------------------------- - The BUG in 9.31-9.37 was fixed: IArchiveGetRawProps interface was disabled for 7z archives. - The BUG in 9.26-9.36 was fixed: @@ -342,10 +353,10 @@ HISTORY of the 7-Zip source code 9.36 beta 2014-12-26 -------------------------- +------------------------- - The BUG in command line version was fixed: 7-Zip created temporary archive in current folder during update archive - operation, if -w{Path} switch was not specified. + operation, if -w{Path} switch was not specified. The fixed 7-Zip creates temporary archive in folder that contains updated archive. - The BUG in 9.33-9.35 was fixed: 7-Zip silently ignored file reading errors during 7z or gz archive creation, @@ -355,8 +366,8 @@ HISTORY of the 7-Zip source code 9.31 2012-10-31 ------------------------- -- InBuffer.h : CInBuffer uses ISequentialInStream *_stream; instead of CMyComPtr - OutBuffer.h: COutBuffer uses ISequentialOutStream *_stream; instead of CMyComPtr +- InBuffer.h : CInBuffer uses ISequentialInStream *_stream; instead of CMyComPtr + OutBuffer.h: COutBuffer uses ISequentialOutStream *_stream; instead of CMyComPtr 9.26 2011-04-11 @@ -366,13 +377,13 @@ HISTORY of the 7-Zip source code 9.21 2011-04-11 -------------------------- +------------------------- - New class FString for file names at file systems. - Speed optimization in CRC code for big-endian CPUs. 9.18 2010-11-02 -------------------------- +------------------------- - New small SFX module for installers (C/Util/SfxSetup). @@ -409,7 +420,7 @@ HISTORY of the 7-Zip source code 4.61 2008-11-23 ------------------------- - Bug in ver. 4.58+ was fixed: - 7-Zip didn't use any -m* switch after -mtc, -mcl or -mcu for .zip archives. + 7-Zip didn't use any -m* switch after -mtc, -mcl or -mcu for .zip archives. - Bug in .CAB code was fixed. 7-Zip didn't show some empty files, if .CAB archive contains more than one empty file. @@ -417,13 +428,13 @@ HISTORY of the 7-Zip source code 4.59 2008-07-27 ------------------------- - Bug was fixed: - LZMA Encoder in fast compression mode could access memory outside of + LZMA Encoder in fast compression mode could access memory outside of allocated range in some rare cases. 4.59 alpha 2008-05-30 ------------------------- -- BUGS was fixed: +- BUGS was fixed: 7zOut.cpp: 7-Zip incorrectly wrote size of property records in some cases. 7zIn.cpp: 7-Zip incorrectly work with archive, containg archive properties. @@ -440,13 +451,13 @@ HISTORY of the 7-Zip source code 1) Default mode: 7-Zip uses UTF-8, if the local code page doesn't contain required symbols. 2) -mcu switch: 7-Zip uses UTF-8, if there are non-ASCII symbols. 3) -mcl switch: 7-Zip uses local code page. -- Now it's possible to use -mSW- and -mSW+ switches instead of -mSW=off and -mSW=on +- Now it's possible to use -mSW- and -mSW+ switches instead of -mSW=off and -mSW=on 4.58 alpha 7 2008-04-08 ------------------------- -- BUG was fixed: BZip2Encoder and BZip2Decoder used CEvent objects without - creating, when BZip2 code was called with one thread (with -mmt1 switch or with +- BUG was fixed: BZip2Encoder and BZip2Decoder used CEvent objects without + creating, when BZip2 code was called with one thread (with -mmt1 switch or with default switches on single thread CPU). - .lzma support. - RPM and NSIS support was improved. @@ -472,7 +483,7 @@ HISTORY of the 7-Zip source code - 7-Zip now has 128 MB dictionary limit for 32-bit version: It's for speed optimization: kNumLogBits = 9 + sizeof(size_t) / 2; - TAR: 'D' link flag support. -- 7-Zip now can unpack multivolume RAR archives created with +- 7-Zip now can unpack multivolume RAR archives created with "old style volume names" scheme (-vn switch) and names *.001, *.002, ... - Fixed bugs: - 7-Zip FM could not copy / move files to root network folders like \\COMPNAME\FOLDERNAME\ @@ -484,7 +495,7 @@ HISTORY of the 7-Zip source code 7-zip tries to delete all extra fileds (except for WzAES). And that code could hang. - 7-Zip GUI didn't suggest BZip2 dictionary size used in previous run. - - If creation time stamp was included in .RAR archive, 7-zip used creation time stamp + - If creation time stamp was included in .RAR archive, 7-zip used creation time stamp as modification time stamp. 4.58 alpha 2 2007-12-31 @@ -531,7 +542,7 @@ HISTORY of the 7-Zip source code 4.45 beta 2007-04-16 ------------------------- -- 7-Zip now uses C version of CRC, so you must call CrcGenerateTable at +- 7-Zip now uses C version of CRC, so you must call CrcGenerateTable at stratup code, or you must add CPP/Common/CRC.cpp to your project. - Method ID in .7z now is 63-bit integer (UInt64). - Open error messages @@ -606,7 +617,7 @@ HISTORY of the 7-Zip source code 4.07 beta 2004-10-03 ------------------------- -- some interfaces were changed slightly to support +- some interfaces were changed slightly to support -stdin -stdout mode. - FilterCoder for simple filters - Wildcard censor class was changed. @@ -682,7 +693,7 @@ HISTORY of the 7-Zip source code 2.30 Beta 24 2002-11-01 ------------------------- - SDK/Windows/Synchronization.h + SDK/Windows/Synchronization.h SDK/Windows/Synchronization.cpp - some changes. @@ -711,9 +722,9 @@ HISTORY of the 7-Zip source code 2.30 Beta 20 2002-07-01 ------------------------- -- SDK/Stream/WindowOut.h +- SDK/Stream/WindowOut.h now it uses only required memory (dictionary size). -- Project/Archiver/Resource +- Project/Archiver/Resource contains common resurces @@ -727,8 +738,8 @@ HISTORY of the 7-Zip source code - SDK/Archive/Cab/MSZipDecoder.cpp SDK/Archive/Cab/LZXDecoder.cpp: bug with corrupted archives was fixed -- Project/Compress/LZ/MatchFinder/BinTree/BinTree.h -- Project/Compress/LZ/MatchFinder/BinTree/BinTreeMain.h +- Project/Compress/LZ/MatchFinder/BinTree/BinTree.h +- Project/Compress/LZ/MatchFinder/BinTree/BinTreeMain.h some speed optimization (using prefetching) @@ -743,7 +754,7 @@ HISTORY of the 7-Zip source code Bug was fixed: LZMA could not extract more than 4 GB. - RPM and CPIO formats. - Project/Compress/LZ/LZMA/Encoder.* - Project/Archiver/Format/7z/OutHandler.cpp + Project/Archiver/Format/7z/OutHandler.cpp New fast compression mode for LZMA: -m0a=0. - New match finders for LZMA: bt4b, hc3, hc4. @@ -752,23 +763,23 @@ HISTORY of the 7-Zip source code ------------------------- - Compression ratio in LZMA was slightly improved: Project/Compress/LZ/LZMA/Encoder.* - Project/Archiver/Format/7z/OutHandler.cpp + Project/Archiver/Format/7z/OutHandler.cpp 2.30 Beta 14 2002-02-10 ------------------------- - Supporting multithreading for LZMA: - Project/Compress/LZ/MatchFinder/MT + Project/Compress/LZ/MatchFinder/MT - Common/String.h: CStringBase::Replace function was fixed. 2.30 Beta 13 2002-01-27 ------------------------- -- Compress/LZ/MatchFinder/BinTree3.h: +- Compress/LZ/MatchFinder/BinTree3.h: method -- Compress/LZ/MatchFinder/BinTreemain.h: - - one VirtualAlloc array was splitted to +- Compress/LZ/MatchFinder/BinTreemain.h: + - one VirtualAlloc array was splitted to the for 3 arrays. - Hash-functions were changed. @@ -776,23 +787,23 @@ HISTORY of the 7-Zip source code 2.30 Beta 12 2002-01-16 ------------------------- -- Compress/LZ/MatchFinder/BinTreemain.h: - Compress/LZ/MatchFinder/Patricia.h: - Compress/PPM/PPMd/SubAlloc.h: +- Compress/LZ/MatchFinder/BinTreemain.h: + Compress/LZ/MatchFinder/Patricia.h: + Compress/PPM/PPMd/SubAlloc.h: Beta 11 bugs were fixed: - VirtualFree was used incorrectly - checking WIN32 instead _WINDOWS. - Compress/LZ/MatchFinder/Patricia.h: + Compress/LZ/MatchFinder/Patricia.h: Beta 11 bug with deleting m_Hash2Descendants was fixed. 2.30 Beta 11 2002-01-15 ------------------------- -- Compress/LZ/MatchFinder/BinTreemain.h: - Compress/LZ/MatchFinder/Patricia.h: - Compress/PPM/PPMd/SubAlloc.h: +- Compress/LZ/MatchFinder/BinTreemain.h: + Compress/LZ/MatchFinder/Patricia.h: + Compress/PPM/PPMd/SubAlloc.h: using VirtualAlloc for memory allocating -- Exlorer/ContextMenu.cpp: +- Exlorer/ContextMenu.cpp: Testing supporting. CreateProcess instead WinExec - Format/Common/IArchiveHandler.h: @@ -808,9 +819,9 @@ HISTORY of the 7-Zip source code 2.30 Beta 10 2002-01-11 ------------------------- -- Exlorer/ContextMenu.cpp: bug with context menu on +- Exlorer/ContextMenu.cpp: bug with context menu on Windows NT4 in Unicode version was fixed. -- Format/7z/UpdateArchiveEngine.cpp: bug was fixed - +- Format/7z/UpdateArchiveEngine.cpp: bug was fixed - Updating in Beta 8 and 9 didn't work. - Exlorer/CCompressDialog.cpp: history growing bug was fixed. @@ -823,4 +834,3 @@ HISTORY of the 7-Zip source code - SDK/Archive/Zip/InEngine.cpp: bug was fixed. - SDK/Windows/FileDir.cpp: function CreateComplexDirectory was changed. -