|
12 | 12 | #include "../common/mem.h" /* S64 */
|
13 | 13 | #include "../common/zstd_deps.h" /* ZSTD_memset */
|
14 | 14 | #include "../common/zstd_internal.h" /* ZSTD_STATIC_ASSERT */
|
| 15 | +#include "hist.h" /* HIST_add */ |
15 | 16 | #include "zstd_preSplit.h"
|
16 | 17 |
|
17 | 18 |
|
@@ -77,10 +78,10 @@ typedef void (*RecordEvents_f)(Fingerprint* fp, const void* src, size_t srcSize)
|
77 | 78 |
|
78 | 79 | #define FP_RECORD(_rate) ZSTD_recordFingerprint_##_rate
|
79 | 80 |
|
80 |
| -#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \ |
| 81 | +#define ZSTD_GEN_RECORD_FINGERPRINT(_rate, _hSize) \ |
81 | 82 | static void FP_RECORD(_rate)(Fingerprint* fp, const void* src, size_t srcSize) \
|
82 |
| - { \ |
83 |
| - recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \ |
| 83 | + { \ |
| 84 | + recordFingerprint_generic(fp, src, srcSize, _rate, _hSize); \ |
84 | 85 | }
|
85 | 86 |
|
86 | 87 | ZSTD_GEN_RECORD_FINGERPRINT(1, 10)
|
@@ -185,10 +186,52 @@ static size_t ZSTD_splitBlock_byChunks(const void* blockStart, size_t blockSize,
|
185 | 186 | (void)flushEvents; (void)removeEvents;
|
186 | 187 | }
|
187 | 188 |
|
| 189 | +/* ZSTD_splitBlock_fromBorders(): very fast strategy : |
| 190 | + * compare fingerprint from beginning and end of the block, |
| 191 | + * derive from their difference if it's preferable to split in the middle, |
| 192 | + * repeat the process a second time, for finer grained decision. |
| 193 | + * 3 times did not brought improvements, so I stopped at 2. |
| 194 | + * Benefits are good enough for a cheap heuristic. |
| 195 | + * More accurate splitting saves more, but speed impact is also more perceptible. |
| 196 | + * For better accuracy, use more elaborate variant *_byChunks. |
| 197 | + */ |
| 198 | +static size_t ZSTD_splitBlock_fromBorders(const void* blockStart, size_t blockSize, |
| 199 | + void* workspace, size_t wkspSize) |
| 200 | +{ |
| 201 | +#define SEGMENT_SIZE 512 |
| 202 | + FPStats* const fpstats = (FPStats*)workspace; |
| 203 | + Fingerprint* middleEvents = (Fingerprint*)(void*)((char*)workspace + 512 * sizeof(unsigned)); |
| 204 | + assert(blockSize == (128 << 10)); |
| 205 | + assert(workspace != NULL); |
| 206 | + assert((size_t)workspace % ZSTD_ALIGNOF(FPStats) == 0); |
| 207 | + ZSTD_STATIC_ASSERT(ZSTD_SLIPBLOCK_WORKSPACESIZE >= sizeof(FPStats)); |
| 208 | + assert(wkspSize >= sizeof(FPStats)); (void)wkspSize; |
| 209 | + |
| 210 | + initStats(fpstats); |
| 211 | + HIST_add(fpstats->pastEvents.events, blockStart, SEGMENT_SIZE); |
| 212 | + HIST_add(fpstats->newEvents.events, (const char*)blockStart + blockSize - SEGMENT_SIZE, SEGMENT_SIZE); |
| 213 | + fpstats->pastEvents.nbEvents = fpstats->newEvents.nbEvents = SEGMENT_SIZE; |
| 214 | + if (!compareFingerprints(&fpstats->pastEvents, &fpstats->newEvents, 0, 8)) |
| 215 | + return blockSize; |
| 216 | + |
| 217 | + HIST_add(middleEvents->events, (const char*)blockStart + blockSize/2 - SEGMENT_SIZE/2, SEGMENT_SIZE); |
| 218 | + middleEvents->nbEvents = SEGMENT_SIZE; |
| 219 | + { U64 const distFromBegin = fpDistance(&fpstats->pastEvents, middleEvents, 8); |
| 220 | + U64 const distFromEnd = fpDistance(&fpstats->newEvents, middleEvents, 8); |
| 221 | + U64 const minDistance = SEGMENT_SIZE * SEGMENT_SIZE / 3; |
| 222 | + if (abs64((S64)distFromBegin - (S64)distFromEnd) < minDistance) |
| 223 | + return 64 KB; |
| 224 | + return (distFromBegin > distFromEnd) ? 32 KB : 96 KB; |
| 225 | + } |
| 226 | +} |
| 227 | + |
188 | 228 | size_t ZSTD_splitBlock(const void* blockStart, size_t blockSize,
|
189 | 229 | int level,
|
190 | 230 | void* workspace, size_t wkspSize)
|
191 | 231 | {
|
192 |
| - assert(0<=level && level<=3); |
193 |
| - return ZSTD_splitBlock_byChunks(blockStart, blockSize, level, workspace, wkspSize); |
| 232 | + assert(0<=level && level<=4); |
| 233 | + if (level == 0) |
| 234 | + return ZSTD_splitBlock_fromBorders(blockStart, blockSize, workspace, wkspSize); |
| 235 | + /* level >= 1*/ |
| 236 | + return ZSTD_splitBlock_byChunks(blockStart, blockSize, level-1, workspace, wkspSize); |
194 | 237 | }
|
0 commit comments