Skip to content

Commit 5bae43b

Browse files
authored
Merge pull request #4178 from facebook/split_fromBorders
Add fastest block-splitter variant
2 parents 7fb5347 + 4e1a879 commit 5bae43b

File tree

6 files changed

+229
-177
lines changed

6 files changed

+229
-177
lines changed

lib/compress/hist.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
2626
/*-**************************************************************
2727
* Histogram functions
2828
****************************************************************/
29+
void HIST_add(unsigned* count, const void* src, size_t srcSize)
30+
{
31+
const BYTE* ip = (const BYTE*)src;
32+
const BYTE* const end = ip + srcSize;
33+
34+
while (ip<end) {
35+
count[*ip++]++;
36+
}
37+
}
38+
2939
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
3040
const void* src, size_t srcSize)
3141
{

lib/compress/hist.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,10 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
7373
*/
7474
unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
7575
const void* src, size_t srcSize);
76+
77+
/*! HIST_add() :
78+
* Lowest level: just add nb of occurences of characters from @src into @count.
79+
* @count is not reset. @count array is presumed large enough (i.e. 1 KB).
80+
@ This function does not need any additional stack memory.
81+
*/
82+
void HIST_add(unsigned* count, const void* src, size_t srcSize);

lib/compress/zstd_compress.c

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4493,32 +4493,22 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
44934493

44944494
static size_t ZSTD_optimalBlockSize(ZSTD_CCtx* cctx, const void* src, size_t srcSize, size_t blockSizeMax, ZSTD_strategy strat, S64 savings)
44954495
{
4496+
/* split level based on compression strategy, from `fast` to `btultra2` */
4497+
static const int splitLevels[] = { 0, 0, 1, 2, 2, 3, 3, 4, 4, 4 };
44964498
/* note: conservatively only split full blocks (128 KB) currently.
44974499
* While it's possible to go lower, let's keep it simple for a first implementation.
44984500
* Besides, benefits of splitting are reduced when blocks are already small.
44994501
*/
45004502
if (srcSize < 128 KB || blockSizeMax < 128 KB)
45014503
return MIN(srcSize, blockSizeMax);
45024504
/* do not split incompressible data though:
4503-
* ensure a 3 bytes per full block overhead limit.
4504-
* Note: as a consequence, the first full block skips the splitting detector.
4505+
* require verified savings to allow pre-splitting.
4506+
* Note: as a consequence, the first full block is not split.
45054507
*/
45064508
if (savings < 3) return 128 KB;
45074509
/* dynamic splitting has a cpu cost for analysis,
4508-
* due to that cost it's only used for higher levels */
4509-
if (strat >= ZSTD_btopt)
4510-
return ZSTD_splitBlock(src, blockSizeMax, 3, cctx->tmpWorkspace, cctx->tmpWkspSize);
4511-
if (strat >= ZSTD_lazy2)
4512-
return ZSTD_splitBlock(src, blockSizeMax, 2, cctx->tmpWorkspace, cctx->tmpWkspSize);
4513-
if (strat >= ZSTD_greedy)
4514-
return ZSTD_splitBlock(src, blockSizeMax, 1, cctx->tmpWorkspace, cctx->tmpWkspSize);
4515-
if (strat >= ZSTD_dfast)
4516-
return ZSTD_splitBlock(src, blockSizeMax, 0, cctx->tmpWorkspace, cctx->tmpWkspSize);
4517-
/* blind split strategy
4518-
* heuristic value, tested as being "generally better".
4519-
* no cpu cost, but can over-split homegeneous data.
4520-
*/
4521-
return 92 KB;
4510+
* select a variant among multiple gradual speed/accuracy tradeoffs */
4511+
return ZSTD_splitBlock(src, blockSizeMax, splitLevels[strat], cctx->tmpWorkspace, cctx->tmpWkspSize);
45224512
}
45234513

45244514
/*! ZSTD_compress_frameChunk() :

lib/compress/zstd_preSplit.c

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "../common/mem.h" /* S64 */
1313
#include "../common/zstd_deps.h" /* ZSTD_memset */
1414
#include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
15+
#include "hist.h" /* HIST_add */
1516
#include "zstd_preSplit.h"
1617

1718

@@ -77,10 +78,10 @@ typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize)
7778

7879
#define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
7980

80-
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
81+
#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \
8182
static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
82-
{ \
83-
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
83+
{ \
84+
recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \
8485
}
8586

8687
ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
@@ -185,10 +186,52 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
185186
(void)flushEvents; (void)removeEvents;
186187
}
187188

189+
/* ZSTD_splitBlock_fromBorders(): very fast strategy :
190+
* compare fingerprint from beginning and end of the block,
191+
* derive from their difference if it's preferable to split in the middle,
192+
* repeat the process a second time, for finer grained decision.
193+
* 3 times did not brought improvements, so I stopped at 2.
194+
* Benefits are good enough for a cheap heuristic.
195+
* More accurate splitting saves more, but speed impact is also more perceptible.
196+
* For better accuracy, use more elaborate variant *_byChunks.
197+
*/
198+
static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize,
199+
void* workspace, size_t wkspSize)
200+
{
201+
#define SEGMENT_SIZE 512
202+
FPStats* const fpstats = (FPStats*)workspace;
203+
Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned));
204+
assert(blockSize == (128 << 10));
205+
assert(workspace != NULL);
206+
assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0);
207+
ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats));
208+
assert(wkspSize >= sizeof(FPStats)); (void)wkspSize;
209+
210+
initStats(fpstats);
211+
HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE);
212+
HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE);
213+
fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE;
214+
if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8))
215+
return blockSize;
216+
217+
HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE);
218+
middleEvents->nbEvents = SEGMENT_SIZE;
219+
{ U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8);
220+
U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8);
221+
U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3;
222+
if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance)
223+
return 64 KB;
224+
return (distFromBegin > distFromEnd) ? 32 KB : 96 KB;
225+
}
226+
}
227+
188228
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
189229
int level,
190230
void* workspace, size_t wkspSize)
191231
{
192-
assert(0<=level && level<=3);
193-
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize);
232+
assert(0<=level && level<=4);
233+
if (level == 0)
234+
return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize);
235+
/* level >= 1*/
236+
return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize);
194237
}

lib/compress/zstd_preSplit.h

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@ extern "C" {
1919

2020
#define ZSTD_SLIPBLOCK_WORKSPACESIZE 8208
2121

22-
/* @level must be a value between 0 and 3.
23-
* higher levels spend more energy to find block boundaries
24-
* @workspace must be aligned on 8-bytes boundaries
22+
/* ZSTD_splitBlock():
23+
* @level must be a value between 0 and 4.
24+
* higher levels spend more energy to detect block boundaries.
25+
* @workspace must be aligned for size_t.
2526
* @wkspSize must be at least >= ZSTD_SLIPBLOCK_WORKSPACESIZE
26-
* note2:
27-
* for the time being, this function only accepts full 128 KB blocks,
28-
* therefore @blockSizeMax must be == 128 KB.
29-
* This could be extended to smaller sizes in the future.
27+
* note:
28+
* For the time being, this function only accepts full 128 KB blocks.
29+
* Therefore, @blockSize must be == 128 KB.
30+
* While this could be extended to smaller sizes in the future,
31+
* it is not yet clear if this would be useful. TBD.
3032
*/
3133
size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
3234
int level,

0 commit comments

Comments
 (0)