mirror of
https://github.com/ip7z/7zip.git
synced 2025-12-06 07:12:00 +01:00
328 lines
7.6 KiB
C
328 lines
7.6 KiB
C
|
|
/* Xxh64.c -- XXH64 hash calculation
|
||
|
|
original code: Copyright (c) Yann Collet.
|
||
|
|
2023-08-18 : modified by Igor Pavlov.
|
||
|
|
This source code is licensed under BSD 2-Clause License.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#include "Precomp.h"
|
||
|
|
|
||
|
|
#include "CpuArch.h"
|
||
|
|
#include "RotateDefs.h"
|
||
|
|
#include "Xxh64.h"
|
||
|
|
|
||
|
|
#define Z7_XXH_PRIME64_1 UINT64_CONST(0x9E3779B185EBCA87)
|
||
|
|
#define Z7_XXH_PRIME64_2 UINT64_CONST(0xC2B2AE3D27D4EB4F)
|
||
|
|
#define Z7_XXH_PRIME64_3 UINT64_CONST(0x165667B19E3779F9)
|
||
|
|
#define Z7_XXH_PRIME64_4 UINT64_CONST(0x85EBCA77C2B2AE63)
|
||
|
|
#define Z7_XXH_PRIME64_5 UINT64_CONST(0x27D4EB2F165667C5)
|
||
|
|
|
||
|
|
void Xxh64State_Init(CXxh64State *p)
|
||
|
|
{
|
||
|
|
const UInt64 seed = 0;
|
||
|
|
p->v[0] = seed + Z7_XXH_PRIME64_1 + Z7_XXH_PRIME64_2;
|
||
|
|
p->v[1] = seed + Z7_XXH_PRIME64_2;
|
||
|
|
p->v[2] = seed;
|
||
|
|
p->v[3] = seed - Z7_XXH_PRIME64_1;
|
||
|
|
}
|
||
|
|
|
||
|
|
#if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) && defined(_MSC_VER)
|
||
|
|
#define Z7_XXH64_USE_ASM
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#if !defined(MY_CPU_64BIT) && defined(MY_CPU_X86) \
|
||
|
|
&& defined(Z7_MSC_VER_ORIGINAL) && Z7_MSC_VER_ORIGINAL > 1200
|
||
|
|
/* we try to avoid __allmul calls in MSVC for 64-bit multiply.
|
||
|
|
But MSVC6 still uses __allmul for our code.
|
||
|
|
So for MSVC6 we use default 64-bit multiply without our optimization.
|
||
|
|
*/
|
||
|
|
#define LOW32(b) ((UInt32)(b & 0xffffffff))
|
||
|
|
/* MSVC compiler (MSVC > 1200) can use "mul" instruction
|
||
|
|
without __allmul for our MY_emulu MACRO.
|
||
|
|
MY_emulu is similar to __emulu(a, b) MACRO */
|
||
|
|
#define MY_emulu(a, b) ((UInt64)(a) * (b))
|
||
|
|
#define MY_SET_HIGH32(a) ((UInt64)(a) << 32)
|
||
|
|
#define MY_MUL32_SET_HIGH32(a, b) MY_SET_HIGH32((UInt32)(a) * (UInt32)(b))
|
||
|
|
// /*
|
||
|
|
#define MY_MUL64(a, b) \
|
||
|
|
( MY_emulu((UInt32)(a), LOW32(b)) + \
|
||
|
|
MY_SET_HIGH32( \
|
||
|
|
(UInt32)((a) >> 32) * LOW32(b) + \
|
||
|
|
(UInt32)(a) * (UInt32)((b) >> 32) \
|
||
|
|
))
|
||
|
|
// */
|
||
|
|
/*
|
||
|
|
#define MY_MUL64(a, b) \
|
||
|
|
( MY_emulu((UInt32)(a), LOW32(b)) \
|
||
|
|
+ MY_MUL32_SET_HIGH32((a) >> 32, LOW32(b)) + \
|
||
|
|
+ MY_MUL32_SET_HIGH32(a, (b) >> 32) \
|
||
|
|
)
|
||
|
|
*/
|
||
|
|
|
||
|
|
#define MY_MUL_32_64(a32, b) \
|
||
|
|
( MY_emulu((UInt32)(a32), LOW32(b)) \
|
||
|
|
+ MY_MUL32_SET_HIGH32(a32, (b) >> 32) \
|
||
|
|
)
|
||
|
|
|
||
|
|
#else
|
||
|
|
#define MY_MUL64(a, b) ((a) * (b))
|
||
|
|
#define MY_MUL_32_64(a32, b) ((a32) * (UInt64)(b))
|
||
|
|
#endif
|
||
|
|
|
||
|
|
|
||
|
|
static
|
||
|
|
Z7_FORCE_INLINE
|
||
|
|
UInt64 Xxh64_Round(UInt64 acc, UInt64 input)
|
||
|
|
{
|
||
|
|
acc += MY_MUL64(input, Z7_XXH_PRIME64_2);
|
||
|
|
acc = Z7_ROTL64(acc, 31);
|
||
|
|
return MY_MUL64(acc, Z7_XXH_PRIME64_1);
|
||
|
|
}
|
||
|
|
|
||
|
|
static UInt64 Xxh64_Merge(UInt64 acc, UInt64 val)
|
||
|
|
{
|
||
|
|
acc ^= Xxh64_Round(0, val);
|
||
|
|
return MY_MUL64(acc, Z7_XXH_PRIME64_1) + Z7_XXH_PRIME64_4;
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
#ifdef Z7_XXH64_USE_ASM
|
||
|
|
|
||
|
|
#define Z7_XXH_PRIME64_1_HIGH 0x9E3779B1
|
||
|
|
#define Z7_XXH_PRIME64_1_LOW 0x85EBCA87
|
||
|
|
#define Z7_XXH_PRIME64_2_HIGH 0xC2B2AE3D
|
||
|
|
#define Z7_XXH_PRIME64_2_LOW 0x27D4EB4F
|
||
|
|
|
||
|
|
void
|
||
|
|
Z7_NO_INLINE
|
||
|
|
__declspec(naked)
|
||
|
|
Z7_FASTCALL
|
||
|
|
Xxh64State_UpdateBlocks(CXxh64State *p, const void *data, const void *end)
|
||
|
|
{
|
||
|
|
#if !defined(__clang__)
|
||
|
|
UNUSED_VAR(p)
|
||
|
|
UNUSED_VAR(data)
|
||
|
|
UNUSED_VAR(end)
|
||
|
|
#endif
|
||
|
|
__asm push ebx
|
||
|
|
__asm push ebp
|
||
|
|
__asm push esi
|
||
|
|
__asm push edi
|
||
|
|
|
||
|
|
#define STACK_OFFSET 4 * 8
|
||
|
|
__asm sub esp, STACK_OFFSET
|
||
|
|
|
||
|
|
#define COPY_1(n) \
|
||
|
|
__asm mov eax, [ecx + n * 4] \
|
||
|
|
__asm mov [esp + n * 4], eax \
|
||
|
|
|
||
|
|
#define COPY_2(n) \
|
||
|
|
__asm mov eax, [esp + n * 4] \
|
||
|
|
__asm mov [ecx + n * 4], eax \
|
||
|
|
|
||
|
|
COPY_1(0)
|
||
|
|
__asm mov edi, [ecx + 1 * 4] \
|
||
|
|
COPY_1(2)
|
||
|
|
COPY_1(3)
|
||
|
|
COPY_1(4)
|
||
|
|
COPY_1(5)
|
||
|
|
COPY_1(6)
|
||
|
|
COPY_1(7)
|
||
|
|
|
||
|
|
__asm mov esi, edx \
|
||
|
|
__asm mov [esp + 0 * 8 + 4], ecx
|
||
|
|
__asm mov ecx, Z7_XXH_PRIME64_2_LOW \
|
||
|
|
__asm mov ebp, Z7_XXH_PRIME64_1_LOW \
|
||
|
|
|
||
|
|
#define R(n, state1, state1_reg) \
|
||
|
|
__asm mov eax, [esi + n * 8] \
|
||
|
|
__asm imul ebx, eax, Z7_XXH_PRIME64_2_HIGH \
|
||
|
|
__asm add ebx, state1 \
|
||
|
|
__asm mul ecx \
|
||
|
|
__asm add edx, ebx \
|
||
|
|
__asm mov ebx, [esi + n * 8 + 4] \
|
||
|
|
__asm imul ebx, ecx \
|
||
|
|
__asm add eax, [esp + n * 8] \
|
||
|
|
__asm adc edx, ebx \
|
||
|
|
__asm mov ebx, eax \
|
||
|
|
__asm shld eax, edx, 31 \
|
||
|
|
__asm shld edx, ebx, 31 \
|
||
|
|
__asm imul state1_reg, eax, Z7_XXH_PRIME64_1_HIGH \
|
||
|
|
__asm imul edx, ebp \
|
||
|
|
__asm add state1_reg, edx \
|
||
|
|
__asm mul ebp \
|
||
|
|
__asm add state1_reg, edx \
|
||
|
|
__asm mov [esp + n * 8], eax \
|
||
|
|
|
||
|
|
#define R2(n) \
|
||
|
|
R(n, [esp + n * 8 + 4], ebx) \
|
||
|
|
__asm mov [esp + n * 8 + 4], ebx \
|
||
|
|
|
||
|
|
__asm align 16
|
||
|
|
__asm main_loop:
|
||
|
|
R(0, edi, edi)
|
||
|
|
R2(1)
|
||
|
|
R2(2)
|
||
|
|
R2(3)
|
||
|
|
__asm add esi, 32
|
||
|
|
__asm cmp esi, [esp + STACK_OFFSET + 4 * 4 + 4]
|
||
|
|
__asm jne main_loop
|
||
|
|
|
||
|
|
__asm mov ecx, [esp + 0 * 8 + 4]
|
||
|
|
|
||
|
|
COPY_2(0)
|
||
|
|
__asm mov [ecx + 1 * 4], edi
|
||
|
|
COPY_2(2)
|
||
|
|
COPY_2(3)
|
||
|
|
COPY_2(4)
|
||
|
|
COPY_2(5)
|
||
|
|
COPY_2(6)
|
||
|
|
COPY_2(7)
|
||
|
|
|
||
|
|
__asm add esp, STACK_OFFSET
|
||
|
|
__asm pop edi
|
||
|
|
__asm pop esi
|
||
|
|
__asm pop ebp
|
||
|
|
__asm pop ebx
|
||
|
|
__asm ret 4
|
||
|
|
}
|
||
|
|
|
||
|
|
#else
|
||
|
|
|
||
|
|
void
|
||
|
|
Z7_NO_INLINE
|
||
|
|
Z7_FASTCALL
|
||
|
|
Xxh64State_UpdateBlocks(CXxh64State *p, const void *_data, const void *end)
|
||
|
|
{
|
||
|
|
const Byte *data = (const Byte *)_data;
|
||
|
|
UInt64 v[4];
|
||
|
|
v[0] = p->v[0];
|
||
|
|
v[1] = p->v[1];
|
||
|
|
v[2] = p->v[2];
|
||
|
|
v[3] = p->v[3];
|
||
|
|
do
|
||
|
|
{
|
||
|
|
v[0] = Xxh64_Round(v[0], GetUi64(data)); data += 8;
|
||
|
|
v[1] = Xxh64_Round(v[1], GetUi64(data)); data += 8;
|
||
|
|
v[2] = Xxh64_Round(v[2], GetUi64(data)); data += 8;
|
||
|
|
v[3] = Xxh64_Round(v[3], GetUi64(data)); data += 8;
|
||
|
|
}
|
||
|
|
while (data != end);
|
||
|
|
p->v[0] = v[0];
|
||
|
|
p->v[1] = v[1];
|
||
|
|
p->v[2] = v[2];
|
||
|
|
p->v[3] = v[3];
|
||
|
|
}
|
||
|
|
|
||
|
|
#endif
|
||
|
|
|
||
|
|
UInt64 Xxh64State_Digest(const CXxh64State *p, const void *_data, UInt64 count)
|
||
|
|
{
|
||
|
|
UInt64 h = p->v[2];
|
||
|
|
|
||
|
|
if (count >= 32)
|
||
|
|
{
|
||
|
|
h = Z7_ROTL64(p->v[0], 1) +
|
||
|
|
Z7_ROTL64(p->v[1], 7) +
|
||
|
|
Z7_ROTL64(h, 12) +
|
||
|
|
Z7_ROTL64(p->v[3], 18);
|
||
|
|
h = Xxh64_Merge(h, p->v[0]);
|
||
|
|
h = Xxh64_Merge(h, p->v[1]);
|
||
|
|
h = Xxh64_Merge(h, p->v[2]);
|
||
|
|
h = Xxh64_Merge(h, p->v[3]);
|
||
|
|
}
|
||
|
|
else
|
||
|
|
h += Z7_XXH_PRIME64_5;
|
||
|
|
|
||
|
|
h += count;
|
||
|
|
|
||
|
|
// XXH64_finalize():
|
||
|
|
{
|
||
|
|
unsigned cnt = (unsigned)count & 31;
|
||
|
|
const Byte *data = (const Byte *)_data;
|
||
|
|
while (cnt >= 8)
|
||
|
|
{
|
||
|
|
h ^= Xxh64_Round(0, GetUi64(data));
|
||
|
|
data += 8;
|
||
|
|
h = Z7_ROTL64(h, 27);
|
||
|
|
h = MY_MUL64(h, Z7_XXH_PRIME64_1) + Z7_XXH_PRIME64_4;
|
||
|
|
cnt -= 8;
|
||
|
|
}
|
||
|
|
if (cnt >= 4)
|
||
|
|
{
|
||
|
|
const UInt32 v = GetUi32(data);
|
||
|
|
data += 4;
|
||
|
|
h ^= MY_MUL_32_64(v, Z7_XXH_PRIME64_1);
|
||
|
|
h = Z7_ROTL64(h, 23);
|
||
|
|
h = MY_MUL64(h, Z7_XXH_PRIME64_2) + Z7_XXH_PRIME64_3;
|
||
|
|
cnt -= 4;
|
||
|
|
}
|
||
|
|
while (cnt)
|
||
|
|
{
|
||
|
|
const UInt32 v = *data++;
|
||
|
|
h ^= MY_MUL_32_64(v, Z7_XXH_PRIME64_5);
|
||
|
|
h = Z7_ROTL64(h, 11);
|
||
|
|
h = MY_MUL64(h, Z7_XXH_PRIME64_1);
|
||
|
|
cnt--;
|
||
|
|
}
|
||
|
|
// XXH64_avalanche(h):
|
||
|
|
h ^= h >> 33; h = MY_MUL64(h, Z7_XXH_PRIME64_2);
|
||
|
|
h ^= h >> 29; h = MY_MUL64(h, Z7_XXH_PRIME64_3);
|
||
|
|
h ^= h >> 32;
|
||
|
|
return h;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
void Xxh64_Init(CXxh64 *p)
|
||
|
|
{
|
||
|
|
Xxh64State_Init(&p->state);
|
||
|
|
p->count = 0;
|
||
|
|
p->buf64[0] = 0;
|
||
|
|
p->buf64[1] = 0;
|
||
|
|
p->buf64[2] = 0;
|
||
|
|
p->buf64[3] = 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
void Xxh64_Update(CXxh64 *p, const void *_data, size_t size)
|
||
|
|
{
|
||
|
|
const Byte *data = (const Byte *)_data;
|
||
|
|
unsigned cnt;
|
||
|
|
if (size == 0)
|
||
|
|
return;
|
||
|
|
cnt = (unsigned)p->count;
|
||
|
|
p->count += size;
|
||
|
|
|
||
|
|
if (cnt &= 31)
|
||
|
|
{
|
||
|
|
unsigned rem = 32 - cnt;
|
||
|
|
Byte *dest = (Byte *)p->buf64 + cnt;
|
||
|
|
if (rem > size)
|
||
|
|
rem = (unsigned)size;
|
||
|
|
size -= rem;
|
||
|
|
cnt += rem;
|
||
|
|
// memcpy((Byte *)p->buf64 + cnt, data, rem);
|
||
|
|
do
|
||
|
|
*dest++ = *data++;
|
||
|
|
while (--rem);
|
||
|
|
if (cnt != 32)
|
||
|
|
return;
|
||
|
|
Xxh64State_UpdateBlocks(&p->state, p->buf64, &p->buf64[4]);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (size &= ~(size_t)31)
|
||
|
|
{
|
||
|
|
Xxh64State_UpdateBlocks(&p->state, data, data + size);
|
||
|
|
data += size;
|
||
|
|
}
|
||
|
|
|
||
|
|
cnt = (unsigned)p->count & 31;
|
||
|
|
if (cnt)
|
||
|
|
{
|
||
|
|
// memcpy(p->buf64, data, cnt);
|
||
|
|
Byte *dest = (Byte *)p->buf64;
|
||
|
|
do
|
||
|
|
*dest++ = *data++;
|
||
|
|
while (--cnt);
|
||
|
|
}
|
||
|
|
}
|