7zip/C/XzIn.c

386 lines
11 KiB
C
Raw Permalink Normal View History

2021-12-27 01:00:00 +01:00
/* XzIn.c - Xz input
2025-07-05 02:00:00 +02:00
: Igor Pavlov : Public domain */
2021-12-27 01:00:00 +01:00
#include "Precomp.h"
#include <string.h>
#include "7zCrc.h"
#include "Xz.h"
2025-07-05 02:00:00 +02:00
#include "CpuArch.h"
2021-12-27 01:00:00 +01:00
2025-07-05 02:00:00 +02:00
#define XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(p) \
(GetUi16a((const Byte *)(const void *)(p) + 10) == \
(XZ_FOOTER_SIG_0 | (XZ_FOOTER_SIG_1 << 8)))
2021-12-27 01:00:00 +01:00
2023-06-21 02:00:00 +02:00
SRes Xz_ReadHeader(CXzStreamFlags *p, ISeqInStreamPtr inStream)
2021-12-27 01:00:00 +01:00
{
2025-07-05 02:00:00 +02:00
UInt32 data32[XZ_STREAM_HEADER_SIZE / 4];
2023-06-21 02:00:00 +02:00
size_t processedSize = XZ_STREAM_HEADER_SIZE;
2025-07-05 02:00:00 +02:00
RINOK(SeqInStream_ReadMax(inStream, data32, &processedSize))
2023-06-21 02:00:00 +02:00
if (processedSize != XZ_STREAM_HEADER_SIZE
2025-07-05 02:00:00 +02:00
|| memcmp(data32, XZ_SIG, XZ_SIG_SIZE) != 0)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_NO_ARCHIVE;
2025-07-05 02:00:00 +02:00
return Xz_ParseHeader(p, (const Byte *)(const void *)data32);
2021-12-27 01:00:00 +01:00
}
2025-07-05 02:00:00 +02:00
#define READ_VARINT_AND_CHECK(buf, size, res) \
{ const unsigned s = Xz_ReadVarInt(buf, size, res); \
2021-12-27 01:00:00 +01:00
if (s == 0) return SZ_ERROR_ARCHIVE; \
2025-07-05 02:00:00 +02:00
size -= s; \
buf += s; \
}
2021-12-27 01:00:00 +01:00
2023-06-21 02:00:00 +02:00
SRes XzBlock_ReadHeader(CXzBlock *p, ISeqInStreamPtr inStream, BoolInt *isIndex, UInt32 *headerSizeRes)
2021-12-27 01:00:00 +01:00
{
2025-07-05 02:00:00 +02:00
MY_ALIGN(4)
2021-12-27 01:00:00 +01:00
Byte header[XZ_BLOCK_HEADER_SIZE_MAX];
unsigned headerSize;
*headerSizeRes = 0;
2023-06-21 02:00:00 +02:00
RINOK(SeqInStream_ReadByte(inStream, &header[0]))
2024-05-14 02:00:00 +02:00
headerSize = header[0];
2021-12-27 01:00:00 +01:00
if (headerSize == 0)
{
*headerSizeRes = 1;
*isIndex = True;
return SZ_OK;
}
*isIndex = False;
headerSize = (headerSize << 2) + 4;
2024-05-14 02:00:00 +02:00
*headerSizeRes = (UInt32)headerSize;
2023-06-21 02:00:00 +02:00
{
size_t processedSize = headerSize - 1;
RINOK(SeqInStream_ReadMax(inStream, header + 1, &processedSize))
if (processedSize != headerSize - 1)
return SZ_ERROR_INPUT_EOF;
}
2021-12-27 01:00:00 +01:00
return XzBlock_Parse(p, header);
}
2025-07-05 02:00:00 +02:00
2021-12-27 01:00:00 +01:00
#define ADD_SIZE_CHECK(size, val) \
2025-07-05 02:00:00 +02:00
{ const UInt64 newSize = size + (val); \
if (newSize < size) return XZ_SIZE_OVERFLOW; \
size = newSize; \
}
2021-12-27 01:00:00 +01:00
UInt64 Xz_GetUnpackSize(const CXzStream *p)
{
UInt64 size = 0;
size_t i;
for (i = 0; i < p->numBlocks; i++)
2023-06-21 02:00:00 +02:00
{
ADD_SIZE_CHECK(size, p->blocks[i].unpackSize)
}
2021-12-27 01:00:00 +01:00
return size;
}
UInt64 Xz_GetPackSize(const CXzStream *p)
{
UInt64 size = 0;
size_t i;
for (i = 0; i < p->numBlocks; i++)
2023-06-21 02:00:00 +02:00
{
ADD_SIZE_CHECK(size, (p->blocks[i].totalSize + 3) & ~(UInt64)3)
}
2021-12-27 01:00:00 +01:00
return size;
}
2025-07-05 02:00:00 +02:00
// input;
// CXzStream (p) is empty object.
// size != 0
// (size & 3) == 0
// (buf) is aligned for at least 4 bytes.
// output:
// p->numBlocks is number of allocated items in p->blocks
// p->blocks[*] values must be ignored, if function returns error.
static SRes Xz_ParseIndex(CXzStream *p, const Byte *buf, size_t size, ISzAllocPtr alloc)
2021-12-27 01:00:00 +01:00
{
2025-07-05 02:00:00 +02:00
size_t numBlocks;
2021-12-27 01:00:00 +01:00
if (size < 5 || buf[0] != 0)
return SZ_ERROR_ARCHIVE;
size -= 4;
2025-07-05 02:00:00 +02:00
{
const UInt32 crc = CrcCalc(buf, size);
if (crc != GetUi32a(buf + size))
return SZ_ERROR_ARCHIVE;
}
buf++;
size--;
2021-12-27 01:00:00 +01:00
{
UInt64 numBlocks64;
2025-07-05 02:00:00 +02:00
READ_VARINT_AND_CHECK(buf, size, &numBlocks64)
// (numBlocks64) is 63-bit value, so we can calculate (numBlocks64 * 2):
if (numBlocks64 * 2 > size)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_ARCHIVE;
2025-07-05 02:00:00 +02:00
if (numBlocks64 >= ((size_t)1 << (sizeof(size_t) * 8 - 1)) / sizeof(CXzBlockSizes))
return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE
numBlocks = (size_t)numBlocks64;
2021-12-27 01:00:00 +01:00
}
2025-07-05 02:00:00 +02:00
// Xz_Free(p, alloc); // it's optional, because (p) is empty already
if (numBlocks)
2021-12-27 01:00:00 +01:00
{
2025-07-05 02:00:00 +02:00
CXzBlockSizes *blocks = (CXzBlockSizes *)ISzAlloc_Alloc(alloc, sizeof(CXzBlockSizes) * numBlocks);
if (!blocks)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_MEM;
2025-07-05 02:00:00 +02:00
p->blocks = blocks;
p->numBlocks = numBlocks;
// the caller will call Xz_Free() in case of error
do
2021-12-27 01:00:00 +01:00
{
2025-07-05 02:00:00 +02:00
READ_VARINT_AND_CHECK(buf, size, &blocks->totalSize)
READ_VARINT_AND_CHECK(buf, size, &blocks->unpackSize)
if (blocks->totalSize == 0)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_ARCHIVE;
2025-07-05 02:00:00 +02:00
blocks++;
2021-12-27 01:00:00 +01:00
}
2025-07-05 02:00:00 +02:00
while (--numBlocks);
2021-12-27 01:00:00 +01:00
}
2025-07-05 02:00:00 +02:00
if (size >= 4)
return SZ_ERROR_ARCHIVE;
while (size)
if (buf[--size])
2021-12-27 01:00:00 +01:00
return SZ_ERROR_ARCHIVE;
2025-07-05 02:00:00 +02:00
return SZ_OK;
2021-12-27 01:00:00 +01:00
}
2025-07-05 02:00:00 +02:00
/*
2023-06-21 02:00:00 +02:00
static SRes Xz_ReadIndex(CXzStream *p, ILookInStreamPtr stream, UInt64 indexSize, ISzAllocPtr alloc)
2021-12-27 01:00:00 +01:00
{
SRes res;
size_t size;
Byte *buf;
2025-07-05 02:00:00 +02:00
if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1)))
return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE
2021-12-27 01:00:00 +01:00
size = (size_t)indexSize;
buf = (Byte *)ISzAlloc_Alloc(alloc, size);
if (!buf)
return SZ_ERROR_MEM;
res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED);
if (res == SZ_OK)
2025-07-05 02:00:00 +02:00
res = Xz_ParseIndex(p, buf, size, alloc);
2021-12-27 01:00:00 +01:00
ISzAlloc_Free(alloc, buf);
return res;
}
2025-07-05 02:00:00 +02:00
*/
2021-12-27 01:00:00 +01:00
2023-06-21 02:00:00 +02:00
static SRes LookInStream_SeekRead_ForArc(ILookInStreamPtr stream, UInt64 offset, void *buf, size_t size)
2021-12-27 01:00:00 +01:00
{
2023-06-21 02:00:00 +02:00
RINOK(LookInStream_SeekTo(stream, offset))
2021-12-27 01:00:00 +01:00
return LookInStream_Read(stream, buf, size);
/* return LookInStream_Read2(stream, buf, size, SZ_ERROR_NO_ARCHIVE); */
}
2025-07-05 02:00:00 +02:00
/*
in:
(*startOffset) is position in (stream) where xz_stream must be finished.
out:
if returns SZ_OK, then (*startOffset) is position in stream that shows start of xz_stream.
*/
2023-06-21 02:00:00 +02:00
static SRes Xz_ReadBackward(CXzStream *p, ILookInStreamPtr stream, Int64 *startOffset, ISzAllocPtr alloc)
2021-12-27 01:00:00 +01:00
{
2025-07-05 02:00:00 +02:00
#define TEMP_BUF_SIZE (1 << 10)
UInt32 buf32[TEMP_BUF_SIZE / 4];
2021-12-27 01:00:00 +01:00
UInt64 pos = (UInt64)*startOffset;
2025-07-05 02:00:00 +02:00
if ((pos & 3) || pos < XZ_STREAM_FOOTER_SIZE)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_NO_ARCHIVE;
pos -= XZ_STREAM_FOOTER_SIZE;
2025-07-05 02:00:00 +02:00
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE))
2021-12-27 01:00:00 +01:00
2025-07-05 02:00:00 +02:00
if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32))
2021-12-27 01:00:00 +01:00
{
pos += XZ_STREAM_FOOTER_SIZE;
for (;;)
{
2025-07-05 02:00:00 +02:00
// pos != 0
// (pos & 3) == 0
size_t i = pos >= TEMP_BUF_SIZE ? TEMP_BUF_SIZE : (size_t)pos;
2021-12-27 01:00:00 +01:00
pos -= i;
2025-07-05 02:00:00 +02:00
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, i))
i /= 4;
do
if (buf32[i - 1] != 0)
2021-12-27 01:00:00 +01:00
break;
2025-07-05 02:00:00 +02:00
while (--i);
pos += i * 4;
#define XZ_STREAM_BACKWARD_READING_PAD_MAX (1 << 16)
// here we don't support rare case with big padding for xz stream.
// so we have padding limit for backward reading.
if ((UInt64)*startOffset - pos > XZ_STREAM_BACKWARD_READING_PAD_MAX)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_NO_ARCHIVE;
2025-07-05 02:00:00 +02:00
if (i)
break;
2021-12-27 01:00:00 +01:00
}
2025-07-05 02:00:00 +02:00
// we try to open xz stream after skipping zero padding.
// ((UInt64)*startOffset == pos) is possible here!
2021-12-27 01:00:00 +01:00
if (pos < XZ_STREAM_FOOTER_SIZE)
return SZ_ERROR_NO_ARCHIVE;
pos -= XZ_STREAM_FOOTER_SIZE;
2025-07-05 02:00:00 +02:00
RINOK(LookInStream_SeekRead_ForArc(stream, pos, buf32, XZ_STREAM_FOOTER_SIZE))
if (!XZ_FOOTER_12B_ALIGNED16_SIG_CHECK(buf32))
2021-12-27 01:00:00 +01:00
return SZ_ERROR_NO_ARCHIVE;
}
2025-07-05 02:00:00 +02:00
p->flags = (CXzStreamFlags)GetBe16a(buf32 + 2);
2021-12-27 01:00:00 +01:00
if (!XzFlags_IsSupported(p->flags))
return SZ_ERROR_UNSUPPORTED;
{
/* to eliminate GCC 6.3 warning:
dereferencing type-punned pointer will break strict-aliasing rules */
2025-07-05 02:00:00 +02:00
const UInt32 *buf_ptr = buf32;
if (GetUi32a(buf_ptr) != CrcCalc(buf32 + 1, 6))
2021-12-27 01:00:00 +01:00
return SZ_ERROR_ARCHIVE;
}
{
2025-07-05 02:00:00 +02:00
const UInt64 indexSize = ((UInt64)GetUi32a(buf32 + 1) + 1) << 2;
if (pos < indexSize)
2021-12-27 01:00:00 +01:00
return SZ_ERROR_ARCHIVE;
2025-07-05 02:00:00 +02:00
pos -= indexSize;
// v25.00: relaxed indexSize check. We allow big index table.
// if (indexSize > ((UInt32)1 << 31))
if (indexSize >= ((size_t)1 << (sizeof(size_t) * 8 - 1)))
return SZ_ERROR_MEM; // SZ_ERROR_ARCHIVE
RINOK(LookInStream_SeekTo(stream, pos))
// RINOK(Xz_ReadIndex(p, stream, indexSize, alloc))
{
SRes res;
const size_t size = (size_t)indexSize;
// if (size != indexSize) return SZ_ERROR_UNSUPPORTED;
Byte *buf = (Byte *)ISzAlloc_Alloc(alloc, size);
if (!buf)
return SZ_ERROR_MEM;
res = LookInStream_Read2(stream, buf, size, SZ_ERROR_UNSUPPORTED);
if (res == SZ_OK)
res = Xz_ParseIndex(p, buf, size, alloc);
ISzAlloc_Free(alloc, buf);
RINOK(res)
}
}
{
UInt64 total = Xz_GetPackSize(p);
if (total == XZ_SIZE_OVERFLOW || total >= ((UInt64)1 << 63))
return SZ_ERROR_ARCHIVE;
total += XZ_STREAM_HEADER_SIZE;
if (pos < total)
return SZ_ERROR_ARCHIVE;
pos -= total;
2023-06-21 02:00:00 +02:00
RINOK(LookInStream_SeekTo(stream, pos))
2021-12-27 01:00:00 +01:00
*startOffset = (Int64)pos;
}
{
CXzStreamFlags headerFlags;
CSecToRead secToRead;
SecToRead_CreateVTable(&secToRead);
secToRead.realStream = stream;
2023-06-21 02:00:00 +02:00
RINOK(Xz_ReadHeader(&headerFlags, &secToRead.vt))
2021-12-27 01:00:00 +01:00
return (p->flags == headerFlags) ? SZ_OK : SZ_ERROR_ARCHIVE;
}
}
/* ---------- Xz Streams ---------- */
void Xzs_Construct(CXzs *p)
{
2025-07-05 02:00:00 +02:00
Xzs_CONSTRUCT(p)
2021-12-27 01:00:00 +01:00
}
void Xzs_Free(CXzs *p, ISzAllocPtr alloc)
{
size_t i;
for (i = 0; i < p->num; i++)
Xz_Free(&p->streams[i], alloc);
ISzAlloc_Free(alloc, p->streams);
p->num = p->numAllocated = 0;
2025-07-05 02:00:00 +02:00
p->streams = NULL;
2021-12-27 01:00:00 +01:00
}
UInt64 Xzs_GetNumBlocks(const CXzs *p)
{
UInt64 num = 0;
size_t i;
for (i = 0; i < p->num; i++)
num += p->streams[i].numBlocks;
return num;
}
UInt64 Xzs_GetUnpackSize(const CXzs *p)
{
UInt64 size = 0;
size_t i;
for (i = 0; i < p->num; i++)
2023-06-21 02:00:00 +02:00
{
ADD_SIZE_CHECK(size, Xz_GetUnpackSize(&p->streams[i]))
}
2021-12-27 01:00:00 +01:00
return size;
}
/*
UInt64 Xzs_GetPackSize(const CXzs *p)
{
UInt64 size = 0;
size_t i;
for (i = 0; i < p->num; i++)
2023-06-21 02:00:00 +02:00
{
ADD_SIZE_CHECK(size, Xz_GetTotalSize(&p->streams[i]))
}
2021-12-27 01:00:00 +01:00
return size;
}
*/
2023-06-21 02:00:00 +02:00
SRes Xzs_ReadBackward(CXzs *p, ILookInStreamPtr stream, Int64 *startOffset, ICompressProgressPtr progress, ISzAllocPtr alloc)
2021-12-27 01:00:00 +01:00
{
Int64 endOffset = 0;
2025-07-05 02:00:00 +02:00
// it's supposed that CXzs object is empty here.
// if CXzs object is not empty, it will add new streams to that non-empty object.
// Xzs_Free(p, alloc); // it's optional call to empty CXzs object.
2023-06-21 02:00:00 +02:00
RINOK(ILookInStream_Seek(stream, &endOffset, SZ_SEEK_END))
2021-12-27 01:00:00 +01:00
*startOffset = endOffset;
for (;;)
{
CXzStream st;
SRes res;
2025-07-05 02:00:00 +02:00
Xz_CONSTRUCT(&st)
2021-12-27 01:00:00 +01:00
res = Xz_ReadBackward(&st, stream, startOffset, alloc);
2025-07-05 02:00:00 +02:00
// if (res == SZ_OK), then (*startOffset) is start offset of new stream if
// if (res != SZ_OK), then (*startOffset) is unchend or it's expected start offset of stream with error
2021-12-27 01:00:00 +01:00
st.startOffset = (UInt64)*startOffset;
2025-07-05 02:00:00 +02:00
// we must store (st) object to array, or we must free (st) local object.
if (res != SZ_OK)
{
Xz_Free(&st, alloc);
return res;
}
2021-12-27 01:00:00 +01:00
if (p->num == p->numAllocated)
{
const size_t newNum = p->num + p->num / 4 + 1;
void *data = ISzAlloc_Alloc(alloc, newNum * sizeof(CXzStream));
if (!data)
2025-07-05 02:00:00 +02:00
{
Xz_Free(&st, alloc);
2021-12-27 01:00:00 +01:00
return SZ_ERROR_MEM;
2025-07-05 02:00:00 +02:00
}
2021-12-27 01:00:00 +01:00
p->numAllocated = newNum;
if (p->num != 0)
memcpy(data, p->streams, p->num * sizeof(CXzStream));
ISzAlloc_Free(alloc, p->streams);
p->streams = (CXzStream *)data;
}
2025-07-05 02:00:00 +02:00
// we use direct copying of raw data from local variable (st) to object in array.
// so we don't need to call Xz_Free(&st, alloc) after copying and after p->num++
2021-12-27 01:00:00 +01:00
p->streams[p->num++] = st;
if (*startOffset == 0)
2025-07-05 02:00:00 +02:00
return SZ_OK;
// seek operation is optional:
// RINOK(LookInStream_SeekTo(stream, (UInt64)*startOffset))
2021-12-27 01:00:00 +01:00
if (progress && ICompressProgress_Progress(progress, (UInt64)(endOffset - *startOffset), (UInt64)(Int64)-1) != SZ_OK)
return SZ_ERROR_PROGRESS;
}
}