Update and bring back Assembler code for CRC check.

This commit is contained in:
Peter Hyman 2019-11-25 08:44:46 -06:00
parent 02a7bdc6c4
commit 9f16f65705
10 changed files with 319 additions and 246 deletions

View file

@ -1,5 +1,13 @@
lrzip ChangeLog
NOVEMBER 2019, updates to version 0.631, Peter Hyman
* Fixups to Assembler code in configure.ac lzma/C/Makefile.am by using
optimized code from p7zip LZMA SDK 16.02. Now works for 64 and 32 bit using
one optimized source. Hack to allow libtool to compile and link assembler
code.
JUNE 2016, version 0.630 Con Kolivas, Peter Hyman, Petr Písař, Joel Fredrikson
* checksum.buf should only be changed after the semaphore wait

2
TODO
View file

@ -21,5 +21,3 @@ Consider ncurses version or even GUI one.
Consider using LZMA Filters for processor-optimised
coding to increase compression.
Get the ASM working on 64bit - it's only the CRC check so probably no point.

View file

@ -1,3 +1,7 @@
lrzip-0.631
Assembler code is back and works with x86_64
lrzip-0.621
Substantial speed ups for the rzip stage in both regular and unlimited modes.

View file

@ -51,23 +51,25 @@ AC_PROG_INSTALL
AC_PROG_LN_S
AC_SUBST(SHELL)
AC_SYS_LARGEFILE
AC_FUNC_FSEEKO
AC_FUNC_ALLOCA
AC_PROG_CC_C99
AS_IF([test "x$ac_cv_prog_cc_c99" = "xno"],
AS_IF([test x"$ac_cv_prog_cc_c99" = x"no"],
AC_MSG_ERROR([C compiler does not support C99], 1))
AC_CHECK_PROG([HAVE_POD2MAN], [pod2man], [yes])
AS_IF([test "$HAVE_POD2MAN" != "yes"],
AC_MSG_FAILURE([pod2man is needed to generate manual from POD]))
AC_ARG_ENABLE(
AC_ARG_ENABLE(
asm,
[AC_HELP_STRING([--enable-asm],[Enable native Assembly code])],
ASM=$enableval,
ASM=yes
)
if test x"$ASM" = xyes; then
AC_CHECK_PROG( ASM_PROG, nasm, yes, no )
if test x"$ASM" = x"yes"; then
AC_CHECK_PROG( ASM_PROG, nasm, nasm, no ) # fix to set ASM_PROG to nasm, not yes.
if test x"$ASM_PROG" = x"no "; then
ASM=no
fi
@ -78,7 +80,7 @@ AC_ARG_ENABLE([static-bin],
[AC_HELP_STRING([--enable-static-bin],[Build statically linked binary @<:@default=no@:>@])],
[static=$enableval]
)
AM_CONDITIONAL([STATIC], [test "x$static" = "xyes"])
AM_CONDITIONAL([STATIC], [test x"$static" = x"yes"])
AC_CHECK_HEADERS(fcntl.h sys/time.h unistd.h sys/mman.h)
AC_CHECK_HEADERS(ctype.h errno.h sys/resource.h)
@ -92,12 +94,6 @@ AC_CHECK_SIZEOF(int)
AC_CHECK_SIZEOF(long)
AC_CHECK_SIZEOF(short)
if test $ac_cv_sizeof_long != 4 -a "x$ASM" = "xyes" ; then
AC_MSG_WARN([64bit arch detected, disabling ASM])
ASM=no
fi
AC_CACHE_CHECK([for large file support],rzip_cv_HAVE_LARGE_FILES,[
AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <stdio.h>
@ -130,24 +126,23 @@ LIBS="$PTHREAD_LIBS $LIBS"
CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
CXXFLAGS="$CXXFLAGS $PTHREAD_CXXFLAGS"
# final checks for x86 and/or assembler
if test x"$ASM" = x"no"; then
ASM_OBJ=7zCrc.o
ASM=no
else
# final checks for assembler
# ASM is back for x86_64 by using newer CRC code from p7zip-16.02
# object files handled in lzma/C/Makefile.am
if test x"$ASM" = x"yes"; then
ASM_OPT="-I../ASM/x86/"
case $host in
i?86-*)
ASM_OBJ="7zCrcT8.o 7zCrcT8U.o"
ASM_CMD="$ASM_PROG -f elf" ;;
# x86_64 code is broken still
# x86_64-*)
# ASM_OBJ="7zCrcT8.o 7zCrcT8U_64.o"
# ASM_CMD="$ASM_PROG -f elf64" ;;
*) ASM_OBJ=7zCrc.o ;;
ASM_OPT="$ASM_OPT -f elf" ;;
x86_64-*)
ASM_OPT="$ASM_OPT -Dx64 -f elf64" ;;
*) ASM_OPT= ;;
esac
else
ASM_OPT=
fi
AM_CONDITIONAL([USE_ASM], [test "x$ASM" != "xyes" -a "x$ASM" != "xno"])
AC_SUBST([ASM_OBJ])
AM_CONDITIONAL([USE_ASM], [test x"$ASM" = x"yes"])
AC_SUBST([ASM_OPT])
AC_SUBST([ASM_CMD])
EFL_CHECK_DOXYGEN([build_doc="yes"], [build_doc="no"])
@ -171,7 +166,7 @@ echo
echo
echo "Configuration Options Summary:"
echo
echo " ASM.(32 bit only)..: $ASM"
echo " ASM................: $ASM"
echo " Static binary......: $static"
echo
echo "Documentation..........: ${build_doc}"

View file

@ -1,5 +1,20 @@
README.Assembler
Update November 2019
Assembler is enabled by
./configure --enable-asm
and disabled by
./configure --disable-asm
not
ASM=no ./configure
New files replace 32 and 64 bit assembler code.
fixes to lzma/C/Makefile.am permit libtool linking.
Original text follows.
==========================
Notes about CRC Assembly Language Coding.
lrzip-0.21 makes use of an x86 assembly language file

100
lzma/ASM/x86/7zAsm.asm Normal file
View file

@ -0,0 +1,100 @@
; 7zAsm.asm -- ASM macros
; 2009-12-12 : Igor Pavlov : Public domain
; 2011-10-12 : P7ZIP : Public domain
%define NOT ~
%macro MY_ASM_START 0
SECTION .text
%endmacro
%macro MY_PROC 2 ; macro name:req, numParams:req
align 16
%define proc_numParams %2 ; numParams
global %1
global _%1
%1:
_%1:
%endmacro
%macro MY_ENDP 0
%ifdef x64
ret
; proc_name ENDP
%else
ret ; (proc_numParams - 2) * 4
%endif
%endmacro
%ifdef x64
REG_SIZE equ 8
%else
REG_SIZE equ 4
%endif
%define x0 EAX
%define x1 ECX
%define x2 EDX
%define x3 EBX
%define x4 ESP
%define x5 EBP
%define x6 ESI
%define x7 EDI
%define x0_L AL
%define x1_L CL
%define x2_L DL
%define x3_L BL
%define x0_H AH
%define x1_H CH
%define x2_H DH
%define x3_H BH
%ifdef x64
%define r0 RAX
%define r1 RCX
%define r2 RDX
%define r3 RBX
%define r4 RSP
%define r5 RBP
%define r6 RSI
%define r7 RDI
%else
%define r0 x0
%define r1 x1
%define r2 x2
%define r3 x3
%define r4 x4
%define r5 x5
%define r6 x6
%define r7 x7
%endif
%macro MY_PUSH_4_REGS 0
push r3
push r5
%ifdef x64
%ifdef CYGWIN64
push r6
push r7
%endif
%else
push r6
push r7
%endif
%endmacro
%macro MY_POP_4_REGS 0
%ifdef x64
%ifdef CYGWIN64
pop r7
pop r6
%endif
%else
pop r7
pop r6
%endif
pop r5
pop r3
%endmacro

View file

@ -0,0 +1,141 @@
; 7zCrcOpt.asm -- CRC32 calculation : optimized version
; 2009-12-12 : Igor Pavlov : Public domain
%include "7zAsm.asm"
MY_ASM_START
%define rD r2
%define rN r7
%ifdef x64
%define num_VAR r8
%define table_VAR r9
%else
data_size equ (REG_SIZE * 7)
crc_table equ (REG_SIZE + data_size)
%define num_VAR [r4 + data_size]
%define table_VAR [r4 + crc_table]
%endif
%define SRCDAT rN + rD + 4 *
%macro CRC 4 ;CRC macro op:req, dest:req, src:req, t:req
%1 %2, DWORD [r5 + %3 * 4 + 0400h * %4] ; op dest, DWORD [r5 + src * 4 + 0400h * t]
%endmacro
%macro CRC_XOR 3 ; CRC_XOR macro dest:req, src:req, t:req
CRC xor, %1, %2, %3
%endmacro
%macro CRC_MOV 3 ; CRC_MOV macro dest:req, src:req, t:req
CRC mov, %1, %2, %3 ; CRC mov, dest, src, t
%endmacro
%macro CRC1b 0
movzx x6, BYTE [rD]
inc rD
movzx x3, x0_L
xor x6, x3
shr x0, 8
CRC xor, x0, r6, 0
dec rN
%endmacro
%macro MY_PROLOG 1 ; MY_PROLOG macro crc_end:req
MY_PUSH_4_REGS
%ifdef x64
%ifdef CYGWIN64
;ECX=CRC, RDX=buf, R8=size R9=table
; already in R8 : mov num_VAR,R8 ; LEN
; already in RDX : mov rD, RDX ; BUF
; already in R9 : mov table_VAR,R9; table
mov x0, ECX ; CRC
%else
;EDI=CRC, RSI=buf, RDX=size RCX=table
mov num_VAR,RDX ; LEN
mov rD, RSI ; BUF
mov table_VAR,RCX; table
mov x0, EDI ; CRC
%endif
%else
mov x0, [r4 + 20] ; CRC
mov rD, [r4 + 24] ; buf
%endif
mov rN, num_VAR
mov r5, table_VAR
test rN, rN
jz near %1 ; crc_end
%%sl:
test rD, 7
jz %%sl_end
CRC1b
jnz %%sl
%%sl_end:
cmp rN, 16
jb near %1; crc_end
add rN, rD
mov num_VAR, rN
sub rN, 8
and rN, NOT 7
sub rD, rN
xor x0, [SRCDAT 0]
%endmacro
%macro MY_EPILOG 1 ; MY_EPILOG macro crc_end:req
xor x0, [SRCDAT 0]
mov rD, rN
mov rN, num_VAR
sub rN, rD
%1: ; crc_end:
test rN, rN
jz %%end ; @F
CRC1b
jmp %1 ; crc_end
%%end:
MY_POP_4_REGS
%endmacro
MY_PROC CrcUpdateT8, 4
MY_PROLOG crc_end_8
mov x1, [SRCDAT 1]
align 16
main_loop_8:
mov x6, [SRCDAT 2]
movzx x3, x1_L
CRC_XOR x6, r3, 3
movzx x3, x1_H
CRC_XOR x6, r3, 2
shr x1, 16
movzx x3, x1_L
movzx x1, x1_H
CRC_XOR x6, r3, 1
movzx x3, x0_L
CRC_XOR x6, r1, 0
mov x1, [SRCDAT 3]
CRC_XOR x6, r3, 7
movzx x3, x0_H
shr x0, 16
CRC_XOR x6, r3, 6
movzx x3, x0_L
CRC_XOR x6, r3, 5
movzx x3, x0_H
CRC_MOV x0, r3, 4
xor x0, x6
add rD, 8
jnz main_loop_8
MY_EPILOG crc_end_8
MY_ENDP
; T4 CRC deleted
; end
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View file

@ -1,102 +0,0 @@
SECTION .text
%macro CRC1b 0
movzx EDX, BYTE [ESI]
inc ESI
movzx EBX, AL
xor EDX, EBX
shr EAX, 8
xor EAX, [EBP + EDX * 4]
dec EDI
%endmacro
data_size equ (28)
crc_table equ (data_size + 4)
align 16
global CrcUpdateT8
global _CrcUpdateT8
CrcUpdateT8:
_CrcUpdateT8:
push EBX
push ESI
push EDI
push EBP
mov EAX, [ESP + 20]
mov ESI, [ESP + 24]
mov EDI, [ESP + data_size]
mov EBP, [ESP + crc_table]
test EDI, EDI
jz sl_end
sl:
test ESI, 7
jz sl_end
CRC1b
jnz sl
sl_end:
cmp EDI, 16
jb NEAR crc_end
mov [ESP + data_size], EDI
sub EDI, 8
and EDI, ~ 7
sub [ESP + data_size], EDI
add EDI, ESI
xor EAX, [ESI]
mov EBX, [ESI + 4]
movzx ECX, BL
align 16
main_loop:
mov EDX, [EBP + ECX*4 + 0C00h]
movzx ECX, BH
xor EDX, [EBP + ECX*4 + 0800h]
shr EBX, 16
movzx ECX, BL
xor EDX, [EBP + ECX*4 + 0400h]
xor EDX, [ESI + 8]
movzx ECX, AL
movzx EBX, BH
xor EDX, [EBP + EBX*4 + 0000h]
mov EBX, [ESI + 12]
xor EDX, [EBP + ECX*4 + 01C00h]
movzx ECX, AH
add ESI, 8
shr EAX, 16
xor EDX, [EBP + ECX*4 + 01800h]
movzx ECX, AL
xor EDX, [EBP + ECX*4 + 01400h]
movzx ECX, AH
mov EAX, [EBP + ECX*4 + 01000h]
movzx ECX, BL
xor EAX,EDX
cmp ESI, EDI
jne main_loop
xor EAX, [ESI]
mov EDI, [ESP + data_size]
crc_end:
test EDI, EDI
jz fl_end
fl:
CRC1b
jnz fl
fl_end:
pop EBP
pop EDI
pop ESI
pop EBX
ret
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View file

@ -1,105 +0,0 @@
SECTION .text
%macro CRC1b 0
movzx EDX, BYTE [RSI]
inc RSI
movzx EBX, AL
xor EDX, EBX
shr EAX, 8
xor EAX, [RDI + RDX * 4]
dec R8
%endmacro
align 16
global CrcUpdateT8
CrcUpdateT8:
push RBX
push RSI
push RDI
push RBP
mov EAX, ECX
mov RSI, RDX
mov RDI, R9
test R8, R8
jz sl_end
sl:
test RSI, 7
jz sl_end
CRC1b
jnz sl
sl_end:
cmp R8, 16
jb crc_end
mov R9, R8
and R8, 7
add R8, 8
sub R9, R8
add R9, RSI
xor EAX, [RSI]
mov EBX, [RSI + 4]
movzx ECX, BL
align 16
main_loop:
mov EDX, [RDI + RCX*4 + 0C00h]
movzx EBP, BH
xor EDX, [RDI + RBP*4 + 0800h]
shr EBX, 16
movzx ECX, BL
xor EDX, [RSI + 8]
xor EDX, [RDI + RCX*4 + 0400h]
movzx ECX, AL
movzx EBP, BH
xor EDX, [RDI + RBP*4 + 0000h]
mov EBX, [RSI + 12]
xor EDX, [RDI + RCX*4 + 01C00h]
movzx EBP, AH
shr EAX, 16
movzx ECX, AL
xor EDX, [RDI + RBP*4 + 01800h]
movzx EBP, AH
mov EAX, [RDI + RCX*4 + 01400h]
add RSI, 8
xor EAX, [RDI + RBP*4 + 01000h]
movzx ECX, BL
xor EAX,EDX
cmp RSI, R9
jne main_loop
xor EAX, [RSI]
crc_end:
test R8, R8
jz fl_end
fl:
CRC1b
jnz fl
fl_end:
pop RBP
pop RDI
pop RSI
pop RBX
ret
%ifidn __OUTPUT_FORMAT__,elf
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View file

@ -1,22 +1,30 @@
MAINTAINERCLEANFILES = Makefile.in
# Update -D
AM_CFLAGS = \
-DCOMPRESS_MF_MT \
-D_REENTRANT \
-I@top_builddir@ \
-I@top_srcdir@
ASM_LIBS =
ASM_S =
ASM_H =
ASM_7z =
C_S =
if USE_ASM
ASM_LIBS += @ASM_OBJ@
ASM_7z += 7zCrcOpt_asm
ASM_S += @top_srcdir@/lzma/ASM/x86/$(ASM_7z).asm
ASM_H += @top_srcdir@/lzma/ASM/x86/7zAsm.asm
C_S += 7zCrcT8.c
else
ASM_S += 7zCrc.c 7zCrc.h
C_S += 7zCrcT8.c
endif
noinst_LTLIBRARIES = liblzma.la
# need separate variable for ASM so that make will compile later
# to prevent an error even if -j## is used.
liblzma_la_SOURCES = \
$(ASM_S) \
$(C_S) \
7z.Crc.h \
LzmaDec.h \
LzmaEnc.h \
LzFind.c \
@ -36,11 +44,22 @@ liblzma_la_SOURCES = \
windows.h \
basetyps.h \
MyWindows.h \
MyGuidDef.h
liblzma_so_LIBS = $(ASM_LIBS)
MyGuidDef.h \
$(ASM_S) $(ASM_H)
7zCrcT8U.o: @top_srcdir@/lzma/ASM/x86/7zCrcT8U.s
@ASM_CMD@ -o 7zCrcT8U.o @top_srcdir@/lzma/ASM/x86/7zCrcT8U.s
## hack to force asm compilation and to trick libtool with .lo file
if USE_ASM
liblzma_la_LIBADD = $(ASM_7z).lo
7zCrcT8U_64.o: @top_srcdir@/lzma/ASM/x86_64/7zCrcT8U_64.s
@ASM_CMD@ -o 7zCrcT8U_64.o @top_srcdir@/lzma/ASM/x86_64/7zCrcT8U_64.s
7ZIPASMLOFILE := \
\# $(ASM_7z).lo - a libtool object file\
\n\# Generated by libtool -- hack to allow asm linking\
\n\# Peter Hyman\
\npic_object='.libs/$(ASM_7z).o'\
\nnon_pic_object='$(ASM_7z).o'
$(ASM_7z).lo: $(ASM_S)
$(ASM_PROG) $(ASM_OPT) -o $(ASM_7z).o $(ASM_S)
cp $(ASM_7z).o .libs/
@echo -e "$(7ZIPASMLOFILE)" > $(ASM_7z).lo
endif