Skip to content

Commit 741b87b

Browse files
authored
Fuzzing and bugfixes for magicless-format decoding (#3976)
* fuzzing and bugfixes for magicless format * reset dctx before each decompression * do not memcmp empty buffers * nit: decompressor errata
1 parent 6a0052a commit 741b87b

File tree

6 files changed

+163
-4
lines changed

6 files changed

+163
-4
lines changed

CHANGELOG

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln
99
lib: fix CCtx size estimation with external sequence producer, by @embg
1010
lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
1111
lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
12+
lib: fix several bugs in magicless-format decoding, by @embg
1213
cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
1314
cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
1415
cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte

doc/decompressor_errata.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`.
125125
See the compressor workaround code:
126126

127127
https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682
128+
129+
Magicless format
130+
----------------------
131+
132+
**Last affected version**: v1.5.5
133+
134+
**Affected decompressor component(s)**: Library
135+
136+
**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)
137+
138+
**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`
139+
140+
v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
141+
These include but are not limited to:
142+
* Valid frames that happen to begin with a legacy magic number (little-endian)
143+
* Valid frames that happen to begin with a skippable magic number (little-endian)
144+
145+
If you are affected by this issue and cannot update to v1.5.6 or later, there is a
146+
workaround to recover affected data. Simply prepend the ZSTD magic number
147+
`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
148+
decoder.

lib/decompress/zstd_decompress.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
10851085
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
10861086

10871087
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
1088-
if (ZSTD_isLegacy(src, srcSize)) {
1088+
if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
10891089
size_t decodedSize;
10901090
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
10911091
if (ZSTD_isError(frameSize)) return frameSize;
@@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
11151115
}
11161116
#endif
11171117

1118-
if (srcSize >= 4) {
1118+
if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
11191119
U32 const magicNumber = MEM_readLE32(src);
11201120
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
11211121
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
14121412
case ZSTDds_decodeSkippableHeader:
14131413
assert(src != NULL);
14141414
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
1415+
assert(dctx->format != ZSTD_f_zstd1_magicless);
14151416
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
14161417
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
14171418
dctx->stage = ZSTDds_skipFrame;
@@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
22092210
DEBUGLOG(4, "Consume header");
22102211
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
22112212

2212-
if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2213+
if (zds->format == ZSTD_f_zstd1
2214+
&& (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
22132215
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
22142216
zds->stage = ZSTDds_skipFrame;
22152217
} else {

tests/fuzz/Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ FUZZ_TARGETS := \
124124
sequence_compression_api \
125125
seekable_roundtrip \
126126
huf_round_trip \
127-
huf_decompress
127+
huf_decompress \
128+
decompress_cross_format
128129

129130
all: libregression.a $(FUZZ_TARGETS)
130131

@@ -238,6 +239,9 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o
238239

239240
huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
240241
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@
242+
243+
decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
244+
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@
241245

242246
libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
243247
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o

tests/fuzz/decompress_cross_format.c

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under both the BSD-style license (found in the
6+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
7+
* in the COPYING file in the root directory of this source tree).
8+
* You may select, at your option, one of the above-listed licenses.
9+
*/
10+
11+
// This fuzz target validates decompression of magicless-format compressed data.
12+
13+
#include <stddef.h>
14+
#include <stdlib.h>
15+
#include <stdio.h>
16+
#include <string.h>
17+
#include "fuzz_helpers.h"
18+
#define ZSTD_STATIC_LINKING_ONLY
19+
#include "zstd.h"
20+
#include "fuzz_data_producer.h"
21+
22+
static ZSTD_DCtx *dctx = NULL;
23+
24+
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
25+
{
26+
// Give a random portion of src data to the producer, to use for parameter generation.
27+
// The rest will be interpreted as magicless compressed data.
28+
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
29+
size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
30+
const void* const magiclessSrc = src;
31+
size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
32+
void* const standardDst = FUZZ_malloc(dstSize);
33+
void* const magiclessDst = FUZZ_malloc(dstSize);
34+
35+
// Create standard-format src from magicless-format src
36+
const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
37+
size_t standardSize = sizeof(zstd_magic) + magiclessSize;
38+
void* const standardSrc = FUZZ_malloc(standardSize);
39+
memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
40+
memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);
41+
42+
// Truncate to a single frame
43+
{
44+
const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize);
45+
if (ZSTD_isError(standardFrameCompressedSize)) {
46+
goto cleanup_and_return;
47+
}
48+
standardSize = standardFrameCompressedSize;
49+
magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic);
50+
}
51+
52+
// Create DCtx if needed
53+
if (!dctx) {
54+
dctx = ZSTD_createDCtx();
55+
FUZZ_ASSERT(dctx);
56+
}
57+
58+
// Test one-shot decompression
59+
{
60+
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
61+
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
62+
const size_t standardRet = ZSTD_decompressDCtx(
63+
dctx, standardDst, dstSize, standardSrc, standardSize);
64+
65+
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
66+
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
67+
const size_t magiclessRet = ZSTD_decompressDCtx(
68+
dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize);
69+
70+
// Standard accepts => magicless should accept
71+
if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet);
72+
73+
// Magicless accepts => standard should accept
74+
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
75+
if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet);
76+
77+
// If both accept, decompressed size and data should match
78+
if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) {
79+
FUZZ_ASSERT(standardRet == magiclessRet);
80+
if (standardRet > 0) {
81+
FUZZ_ASSERT(
82+
memcmp(standardDst, magiclessDst, standardRet) == 0
83+
);
84+
}
85+
}
86+
}
87+
88+
// Test streaming decompression
89+
{
90+
ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 };
91+
ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 };
92+
ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 };
93+
ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 };
94+
95+
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
96+
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
97+
const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn);
98+
99+
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
100+
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
101+
const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn);
102+
103+
// Standard accepts => magicless should accept
104+
if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0);
105+
106+
// Magicless accepts => standard should accept
107+
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
108+
if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0);
109+
110+
// If both accept, decompressed size and data should match
111+
if (standardRet == 0 && magiclessRet == 0) {
112+
FUZZ_ASSERT(standardOut.pos == magiclessOut.pos);
113+
if (standardOut.pos > 0) {
114+
FUZZ_ASSERT(
115+
memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0
116+
);
117+
}
118+
}
119+
}
120+
121+
cleanup_and_return:
122+
#ifndef STATEFUL_FUZZING
123+
ZSTD_freeDCtx(dctx); dctx = NULL;
124+
#endif
125+
free(standardSrc);
126+
free(standardDst);
127+
free(magiclessDst);
128+
FUZZ_dataProducer_free(producer);
129+
return 0;
130+
}

tests/fuzz/fuzz.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
6565
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
6666
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
6767
'huf_decompress': TargetInfo(InputType.RAW_DATA),
68+
'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
6869
}
6970
TARGETS = list(TARGET_INFO.keys())
7071
ALL_TARGETS = TARGETS + ['all']

0 commit comments

Comments
 (0)