diff --git a/ChangeLog b/ChangeLog index 71fca57..4afa085 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,13 @@ lrzip ChangeLog +NOVEMBER 2019, updates to version 0.631, Peter Hyman + +* Fixups to Assembler code in configure.ac lzma/C/Makefile.am by using +optimized code from p7zip LZMA SDK 16.02. Now works for 64 and 32 bit using +one optimized source. Hack to allow libtool to compile and link assembler +code. + + JUNE 2016, version 0.630 Con Kolivas, Peter Hyman, Petr Písař, Joel Fredrikson * checksum.buf should only be changed after the semaphore wait diff --git a/TODO b/TODO index 6653cbb..a3e7adc 100644 --- a/TODO +++ b/TODO @@ -21,5 +21,3 @@ Consider ncurses version or even GUI one. Consider using LZMA Filters for processor-optimised coding to increase compression. - -Get the ASM working on 64bit - it's only the CRC check so probably no point. diff --git a/WHATS-NEW b/WHATS-NEW index 0864bce..d859e0e 100644 --- a/WHATS-NEW +++ b/WHATS-NEW @@ -1,3 +1,7 @@ +lrzip-0.631 + +Assembler code is back and works with x86_64 + lrzip-0.621 Substantial speed ups for the rzip stage in both regular and unlimited modes. diff --git a/configure.ac b/configure.ac index 5adb6fd..29e8d35 100644 --- a/configure.ac +++ b/configure.ac @@ -51,23 +51,25 @@ AC_PROG_INSTALL AC_PROG_LN_S AC_SUBST(SHELL) AC_SYS_LARGEFILE +AC_FUNC_FSEEKO AC_FUNC_ALLOCA AC_PROG_CC_C99 -AS_IF([test "x$ac_cv_prog_cc_c99" = "xno"], +AS_IF([test x"$ac_cv_prog_cc_c99" = x"no"], AC_MSG_ERROR([C compiler does not support C99], 1)) AC_CHECK_PROG([HAVE_POD2MAN], [pod2man], [yes]) AS_IF([test "$HAVE_POD2MAN" != "yes"], AC_MSG_FAILURE([pod2man is needed to generate manual from POD])) - AC_ARG_ENABLE( +AC_ARG_ENABLE( asm, [AC_HELP_STRING([--enable-asm],[Enable native Assembly code])], ASM=$enableval, ASM=yes ) -if test x"$ASM" = xyes; then - AC_CHECK_PROG( ASM_PROG, nasm, yes, no ) + +if test x"$ASM" = x"yes"; then + AC_CHECK_PROG( ASM_PROG, nasm, nasm, no ) # fix to set ASM_PROG to nasm, not yes. if test x"$ASM_PROG" = x"no "; then ASM=no fi @@ -78,7 +80,7 @@ AC_ARG_ENABLE([static-bin], [AC_HELP_STRING([--enable-static-bin],[Build statically linked binary @<:@default=no@:>@])], [static=$enableval] ) -AM_CONDITIONAL([STATIC], [test "x$static" = "xyes"]) +AM_CONDITIONAL([STATIC], [test x"$static" = x"yes"]) AC_CHECK_HEADERS(fcntl.h sys/time.h unistd.h sys/mman.h) AC_CHECK_HEADERS(ctype.h errno.h sys/resource.h) @@ -92,12 +94,6 @@ AC_CHECK_SIZEOF(int) AC_CHECK_SIZEOF(long) AC_CHECK_SIZEOF(short) -if test $ac_cv_sizeof_long != 4 -a "x$ASM" = "xyes" ; then - AC_MSG_WARN([64bit arch detected, disabling ASM]) - ASM=no -fi - - AC_CACHE_CHECK([for large file support],rzip_cv_HAVE_LARGE_FILES,[ AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include @@ -130,24 +126,23 @@ LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" CXXFLAGS="$CXXFLAGS $PTHREAD_CXXFLAGS" -# final checks for x86 and/or assembler -if test x"$ASM" = x"no"; then - ASM_OBJ=7zCrc.o - ASM=no -else +# final checks for assembler +# ASM is back for x86_64 by using newer CRC code from p7zip-16.02 +# object files handled in lzma/C/Makefile.am +if test x"$ASM" = x"yes"; then + ASM_OPT="-I../ASM/x86/" case $host in i?86-*) - ASM_OBJ="7zCrcT8.o 7zCrcT8U.o" - ASM_CMD="$ASM_PROG -f elf" ;; -# x86_64 code is broken still -# x86_64-*) -# ASM_OBJ="7zCrcT8.o 7zCrcT8U_64.o" -# ASM_CMD="$ASM_PROG -f elf64" ;; - *) ASM_OBJ=7zCrc.o ;; + ASM_OPT="$ASM_OPT -f elf" ;; + x86_64-*) + ASM_OPT="$ASM_OPT -Dx64 -f elf64" ;; + *) ASM_OPT= ;; esac +else + ASM_OPT= fi -AM_CONDITIONAL([USE_ASM], [test "x$ASM" != "xyes" -a "x$ASM" != "xno"]) -AC_SUBST([ASM_OBJ]) +AM_CONDITIONAL([USE_ASM], [test x"$ASM" = x"yes"]) +AC_SUBST([ASM_OPT]) AC_SUBST([ASM_CMD]) EFL_CHECK_DOXYGEN([build_doc="yes"], [build_doc="no"]) @@ -171,7 +166,7 @@ echo echo echo "Configuration Options Summary:" echo -echo " ASM.(32 bit only)..: $ASM" +echo " ASM................: $ASM" echo " Static binary......: $static" echo echo "Documentation..........: ${build_doc}" diff --git a/doc/README.Assembler b/doc/README.Assembler index 2e5df4b..d7a9b53 100644 --- a/doc/README.Assembler +++ b/doc/README.Assembler @@ -1,5 +1,20 @@ README.Assembler +Update November 2019 + +Assembler is enabled by +./configure --enable-asm +and disabled by +./configure --disable-asm +not +ASM=no ./configure + +New files replace 32 and 64 bit assembler code. +fixes to lzma/C/Makefile.am permit libtool linking. + +Original text follows. +========================== + Notes about CRC Assembly Language Coding. lrzip-0.21 makes use of an x86 assembly language file diff --git a/lzma/ASM/x86/7zAsm.asm b/lzma/ASM/x86/7zAsm.asm new file mode 100644 index 0000000..a0a254c --- /dev/null +++ b/lzma/ASM/x86/7zAsm.asm @@ -0,0 +1,100 @@ +; 7zAsm.asm -- ASM macros +; 2009-12-12 : Igor Pavlov : Public domain +; 2011-10-12 : P7ZIP : Public domain + +%define NOT ~ + +%macro MY_ASM_START 0 + SECTION .text +%endmacro + +%macro MY_PROC 2 ; macro name:req, numParams:req + align 16 + %define proc_numParams %2 ; numParams + global %1 + global _%1 + %1: + _%1: +%endmacro + +%macro MY_ENDP 0 + %ifdef x64 + ret + ; proc_name ENDP + %else + ret ; (proc_numParams - 2) * 4 + %endif +%endmacro + +%ifdef x64 + REG_SIZE equ 8 +%else + REG_SIZE equ 4 +%endif + + %define x0 EAX + %define x1 ECX + %define x2 EDX + %define x3 EBX + %define x4 ESP + %define x5 EBP + %define x6 ESI + %define x7 EDI + + %define x0_L AL + %define x1_L CL + %define x2_L DL + %define x3_L BL + + %define x0_H AH + %define x1_H CH + %define x2_H DH + %define x3_H BH + +%ifdef x64 + %define r0 RAX + %define r1 RCX + %define r2 RDX + %define r3 RBX + %define r4 RSP + %define r5 RBP + %define r6 RSI + %define r7 RDI +%else + %define r0 x0 + %define r1 x1 + %define r2 x2 + %define r3 x3 + %define r4 x4 + %define r5 x5 + %define r6 x6 + %define r7 x7 +%endif + +%macro MY_PUSH_4_REGS 0 + push r3 + push r5 +%ifdef x64 + %ifdef CYGWIN64 + push r6 + push r7 + %endif +%else + push r6 + push r7 +%endif +%endmacro + +%macro MY_POP_4_REGS 0 +%ifdef x64 + %ifdef CYGWIN64 + pop r7 + pop r6 + %endif +%else + pop r7 + pop r6 +%endif + pop r5 + pop r3 +%endmacro diff --git a/lzma/ASM/x86/7zCrcOpt_asm.asm b/lzma/ASM/x86/7zCrcOpt_asm.asm new file mode 100644 index 0000000..37465ac --- /dev/null +++ b/lzma/ASM/x86/7zCrcOpt_asm.asm @@ -0,0 +1,141 @@ +; 7zCrcOpt.asm -- CRC32 calculation : optimized version +; 2009-12-12 : Igor Pavlov : Public domain + +%include "7zAsm.asm" + +MY_ASM_START + +%define rD r2 +%define rN r7 + +%ifdef x64 + %define num_VAR r8 + %define table_VAR r9 +%else + data_size equ (REG_SIZE * 7) + crc_table equ (REG_SIZE + data_size) + %define num_VAR [r4 + data_size] + %define table_VAR [r4 + crc_table] +%endif + +%define SRCDAT rN + rD + 4 * + +%macro CRC 4 ;CRC macro op:req, dest:req, src:req, t:req + %1 %2, DWORD [r5 + %3 * 4 + 0400h * %4] ; op dest, DWORD [r5 + src * 4 + 0400h * t] +%endmacro + +%macro CRC_XOR 3 ; CRC_XOR macro dest:req, src:req, t:req + CRC xor, %1, %2, %3 +%endmacro + +%macro CRC_MOV 3 ; CRC_MOV macro dest:req, src:req, t:req + CRC mov, %1, %2, %3 ; CRC mov, dest, src, t +%endmacro + +%macro CRC1b 0 + movzx x6, BYTE [rD] + inc rD + movzx x3, x0_L + xor x6, x3 + shr x0, 8 + CRC xor, x0, r6, 0 + dec rN +%endmacro + +%macro MY_PROLOG 1 ; MY_PROLOG macro crc_end:req + MY_PUSH_4_REGS + + +%ifdef x64 + %ifdef CYGWIN64 + ;ECX=CRC, RDX=buf, R8=size R9=table + ; already in R8 : mov num_VAR,R8 ; LEN + ; already in RDX : mov rD, RDX ; BUF + ; already in R9 : mov table_VAR,R9; table + mov x0, ECX ; CRC + %else + ;EDI=CRC, RSI=buf, RDX=size RCX=table + mov num_VAR,RDX ; LEN + mov rD, RSI ; BUF + mov table_VAR,RCX; table + mov x0, EDI ; CRC + %endif +%else + mov x0, [r4 + 20] ; CRC + mov rD, [r4 + 24] ; buf +%endif + mov rN, num_VAR + mov r5, table_VAR + test rN, rN + jz near %1 ; crc_end + %%sl: + test rD, 7 + jz %%sl_end + CRC1b + jnz %%sl + %%sl_end: + cmp rN, 16 + jb near %1; crc_end + add rN, rD + mov num_VAR, rN + sub rN, 8 + and rN, NOT 7 + sub rD, rN + xor x0, [SRCDAT 0] +%endmacro + +%macro MY_EPILOG 1 ; MY_EPILOG macro crc_end:req + xor x0, [SRCDAT 0] + mov rD, rN + mov rN, num_VAR + sub rN, rD + %1: ; crc_end: + test rN, rN + jz %%end ; @F + CRC1b + jmp %1 ; crc_end + %%end: + MY_POP_4_REGS +%endmacro + +MY_PROC CrcUpdateT8, 4 + MY_PROLOG crc_end_8 + mov x1, [SRCDAT 1] + align 16 + main_loop_8: + mov x6, [SRCDAT 2] + movzx x3, x1_L + CRC_XOR x6, r3, 3 + movzx x3, x1_H + CRC_XOR x6, r3, 2 + shr x1, 16 + movzx x3, x1_L + movzx x1, x1_H + CRC_XOR x6, r3, 1 + movzx x3, x0_L + CRC_XOR x6, r1, 0 + + mov x1, [SRCDAT 3] + CRC_XOR x6, r3, 7 + movzx x3, x0_H + shr x0, 16 + CRC_XOR x6, r3, 6 + movzx x3, x0_L + CRC_XOR x6, r3, 5 + movzx x3, x0_H + CRC_MOV x0, r3, 4 + xor x0, x6 + add rD, 8 + jnz main_loop_8 + + MY_EPILOG crc_end_8 +MY_ENDP + +; T4 CRC deleted + +; end + +%ifidn __OUTPUT_FORMAT__,elf +section .note.GNU-stack noalloc noexec nowrite progbits +%endif + diff --git a/lzma/ASM/x86/7zCrcT8U.s b/lzma/ASM/x86/7zCrcT8U.s deleted file mode 100644 index b066b76..0000000 --- a/lzma/ASM/x86/7zCrcT8U.s +++ /dev/null @@ -1,102 +0,0 @@ - -SECTION .text - -%macro CRC1b 0 - movzx EDX, BYTE [ESI] - inc ESI - movzx EBX, AL - xor EDX, EBX - shr EAX, 8 - xor EAX, [EBP + EDX * 4] - dec EDI -%endmacro - -data_size equ (28) -crc_table equ (data_size + 4) - - align 16 - global CrcUpdateT8 - global _CrcUpdateT8 -CrcUpdateT8: -_CrcUpdateT8: - push EBX - push ESI - push EDI - push EBP - - mov EAX, [ESP + 20] - mov ESI, [ESP + 24] - mov EDI, [ESP + data_size] - mov EBP, [ESP + crc_table] - - test EDI, EDI - jz sl_end - sl: - test ESI, 7 - jz sl_end - CRC1b - jnz sl - sl_end: - - cmp EDI, 16 - jb NEAR crc_end - mov [ESP + data_size], EDI - sub EDI, 8 - and EDI, ~ 7 - sub [ESP + data_size], EDI - - add EDI, ESI - xor EAX, [ESI] - mov EBX, [ESI + 4] - movzx ECX, BL - align 16 - main_loop: - mov EDX, [EBP + ECX*4 + 0C00h] - movzx ECX, BH - xor EDX, [EBP + ECX*4 + 0800h] - shr EBX, 16 - movzx ECX, BL - xor EDX, [EBP + ECX*4 + 0400h] - xor EDX, [ESI + 8] - movzx ECX, AL - movzx EBX, BH - xor EDX, [EBP + EBX*4 + 0000h] - - mov EBX, [ESI + 12] - - xor EDX, [EBP + ECX*4 + 01C00h] - movzx ECX, AH - add ESI, 8 - shr EAX, 16 - xor EDX, [EBP + ECX*4 + 01800h] - movzx ECX, AL - xor EDX, [EBP + ECX*4 + 01400h] - movzx ECX, AH - mov EAX, [EBP + ECX*4 + 01000h] - movzx ECX, BL - xor EAX,EDX - - cmp ESI, EDI - jne main_loop - xor EAX, [ESI] - - mov EDI, [ESP + data_size] - - crc_end: - - test EDI, EDI - jz fl_end - fl: - CRC1b - jnz fl - fl_end: - - pop EBP - pop EDI - pop ESI - pop EBX - ret - -%ifidn __OUTPUT_FORMAT__,elf - section .note.GNU-stack noalloc noexec nowrite progbits -%endif diff --git a/lzma/ASM/x86_64/7zCrcT8U_64.s b/lzma/ASM/x86_64/7zCrcT8U_64.s deleted file mode 100644 index d68bde0..0000000 --- a/lzma/ASM/x86_64/7zCrcT8U_64.s +++ /dev/null @@ -1,105 +0,0 @@ - -SECTION .text - - - - -%macro CRC1b 0 - movzx EDX, BYTE [RSI] - inc RSI - movzx EBX, AL - xor EDX, EBX - shr EAX, 8 - xor EAX, [RDI + RDX * 4] - dec R8 -%endmacro - - - - -align 16 -global CrcUpdateT8 - -CrcUpdateT8: - - push RBX - push RSI - push RDI - push RBP - - mov EAX, ECX - mov RSI, RDX - mov RDI, R9 - - - test R8, R8 - jz sl_end - sl: - test RSI, 7 - jz sl_end - CRC1b - jnz sl - sl_end: - - cmp R8, 16 - jb crc_end - mov R9, R8 - and R8, 7 - add R8, 8 - sub R9, R8 - - add R9, RSI - xor EAX, [RSI] - mov EBX, [RSI + 4] - movzx ECX, BL - align 16 - main_loop: - mov EDX, [RDI + RCX*4 + 0C00h] - movzx EBP, BH - xor EDX, [RDI + RBP*4 + 0800h] - shr EBX, 16 - movzx ECX, BL - xor EDX, [RSI + 8] - xor EDX, [RDI + RCX*4 + 0400h] - movzx ECX, AL - movzx EBP, BH - xor EDX, [RDI + RBP*4 + 0000h] - - mov EBX, [RSI + 12] - - xor EDX, [RDI + RCX*4 + 01C00h] - movzx EBP, AH - shr EAX, 16 - movzx ECX, AL - xor EDX, [RDI + RBP*4 + 01800h] - movzx EBP, AH - mov EAX, [RDI + RCX*4 + 01400h] - add RSI, 8 - xor EAX, [RDI + RBP*4 + 01000h] - movzx ECX, BL - xor EAX,EDX - - cmp RSI, R9 - jne main_loop - xor EAX, [RSI] - - - - crc_end: - - test R8, R8 - jz fl_end - fl: - CRC1b - jnz fl - fl_end: - - pop RBP - pop RDI - pop RSI - pop RBX - ret - -%ifidn __OUTPUT_FORMAT__,elf - section .note.GNU-stack noalloc noexec nowrite progbits -%endif diff --git a/lzma/C/Makefile.am b/lzma/C/Makefile.am index f7f271a..c710d9c 100644 --- a/lzma/C/Makefile.am +++ b/lzma/C/Makefile.am @@ -1,22 +1,30 @@ MAINTAINERCLEANFILES = Makefile.in +# Update -D AM_CFLAGS = \ - -DCOMPRESS_MF_MT \ -D_REENTRANT \ -I@top_builddir@ \ -I@top_srcdir@ -ASM_LIBS = ASM_S = +ASM_H = +ASM_7z = +C_S = if USE_ASM - ASM_LIBS += @ASM_OBJ@ + ASM_7z += 7zCrcOpt_asm + ASM_S += @top_srcdir@/lzma/ASM/x86/$(ASM_7z).asm + ASM_H += @top_srcdir@/lzma/ASM/x86/7zAsm.asm + C_S += 7zCrcT8.c else - ASM_S += 7zCrc.c 7zCrc.h + C_S += 7zCrcT8.c endif noinst_LTLIBRARIES = liblzma.la +# need separate variable for ASM so that make will compile later +# to prevent an error even if -j## is used. liblzma_la_SOURCES = \ - $(ASM_S) \ + $(C_S) \ + 7z.Crc.h \ LzmaDec.h \ LzmaEnc.h \ LzFind.c \ @@ -36,11 +44,22 @@ liblzma_la_SOURCES = \ windows.h \ basetyps.h \ MyWindows.h \ - MyGuidDef.h -liblzma_so_LIBS = $(ASM_LIBS) + MyGuidDef.h \ + $(ASM_S) $(ASM_H) -7zCrcT8U.o: @top_srcdir@/lzma/ASM/x86/7zCrcT8U.s - @ASM_CMD@ -o 7zCrcT8U.o @top_srcdir@/lzma/ASM/x86/7zCrcT8U.s +## hack to force asm compilation and to trick libtool with .lo file +if USE_ASM +liblzma_la_LIBADD = $(ASM_7z).lo -7zCrcT8U_64.o: @top_srcdir@/lzma/ASM/x86_64/7zCrcT8U_64.s - @ASM_CMD@ -o 7zCrcT8U_64.o @top_srcdir@/lzma/ASM/x86_64/7zCrcT8U_64.s +7ZIPASMLOFILE := \ +\# $(ASM_7z).lo - a libtool object file\ +\n\# Generated by libtool -- hack to allow asm linking\ +\n\# Peter Hyman\ +\npic_object='.libs/$(ASM_7z).o'\ +\nnon_pic_object='$(ASM_7z).o' + +$(ASM_7z).lo: $(ASM_S) + $(ASM_PROG) $(ASM_OPT) -o $(ASM_7z).o $(ASM_S) + cp $(ASM_7z).o .libs/ + @echo -e "$(7ZIPASMLOFILE)" > $(ASM_7z).lo +endif