Skip to content

Commit f349d18

Browse files
authored
Merge pull request #3127 from embg/repcode_history
Correct and clarify repcode offset history logic
2 parents 8af64f4 + 3620a0a commit f349d18

File tree

3 files changed

+53
-26
lines changed

3 files changed

+53
-26
lines changed

lib/compress/zstd_double_fast.c

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
6767
const BYTE* const iend = istart + srcSize;
6868
const BYTE* const ilimit = iend - HASH_READ_SIZE;
6969
U32 offset_1=rep[0], offset_2=rep[1];
70-
U32 offsetSaved = 0;
70+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
7171

7272
size_t mLength;
7373
U32 offset;
@@ -100,8 +100,8 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
100100
U32 const current = (U32)(ip - base);
101101
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
102102
U32 const maxRep = current - windowLow;
103-
if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
104-
if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
103+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
104+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
105105
}
106106

107107
/* Outer Loop: one iteration per match found and stored */
@@ -175,9 +175,13 @@ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
175175
} while (ip1 <= ilimit);
176176

177177
_cleanup:
178+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
179+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
180+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
181+
178182
/* save reps for next block */
179-
rep[0] = offset_1 ? offset_1 : offsetSaved;
180-
rep[1] = offset_2 ? offset_2 : offsetSaved;
183+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
184+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
181185

182186
/* Return the last literals size */
183187
return (size_t)(iend - anchor);
@@ -275,7 +279,6 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
275279
const BYTE* const iend = istart + srcSize;
276280
const BYTE* const ilimit = iend - HASH_READ_SIZE;
277281
U32 offset_1=rep[0], offset_2=rep[1];
278-
U32 offsetSaved = 0;
279282

280283
const ZSTD_matchState_t* const dms = ms->dictMatchState;
281284
const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
@@ -461,8 +464,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
461464
} /* while (ip < ilimit) */
462465

463466
/* save reps for next block */
464-
rep[0] = offset_1 ? offset_1 : offsetSaved;
465-
rep[1] = offset_2 ? offset_2 : offsetSaved;
467+
rep[0] = offset_1;
468+
rep[1] = offset_2;
466469

467470
/* Return the last literals size */
468471
return (size_t)(iend - anchor);

lib/compress/zstd_fast.c

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ ZSTD_compressBlock_fast_noDict_generic(
117117

118118
U32 rep_offset1 = rep[0];
119119
U32 rep_offset2 = rep[1];
120-
U32 offsetSaved = 0;
120+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
121121

122122
size_t hash0; /* hash for ip0 */
123123
size_t hash1; /* hash for ip1 */
@@ -141,8 +141,8 @@ ZSTD_compressBlock_fast_noDict_generic(
141141
{ U32 const curr = (U32)(ip0 - base);
142142
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
143143
U32 const maxRep = curr - windowLow;
144-
if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
145-
if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
144+
if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0;
145+
if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0;
146146
}
147147

148148
/* start each op */
@@ -281,9 +281,24 @@ ZSTD_compressBlock_fast_noDict_generic(
281281
* However, it seems to be a meaningful performance hit to try to search
282282
* them. So let's not. */
283283

284+
/* When the repcodes are outside of the prefix, we set them to zero before the loop.
285+
* When the offsets are still zero, we need to restore them after the block to have a correct
286+
* repcode history. If only one offset was invalid, it is easy. The tricky case is when both
287+
* offsets were invalid. We need to figure out which offset to refill with.
288+
* - If both offsets are zero they are in the same order.
289+
* - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
290+
* - If only one is zero, we need to decide which offset to restore.
291+
* - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
292+
* - It is impossible for rep_offset2 to be non-zero.
293+
*
294+
* So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
295+
* set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
296+
*/
297+
offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2;
298+
284299
/* save reps for next block */
285-
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
286-
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
300+
rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1;
301+
rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2;
287302

288303
/* Return the last literals size */
289304
return (size_t)(iend - anchor);
@@ -410,7 +425,6 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
410425
const BYTE* const iend = istart + srcSize;
411426
const BYTE* const ilimit = iend - HASH_READ_SIZE;
412427
U32 offset_1=rep[0], offset_2=rep[1];
413-
U32 offsetSaved = 0;
414428

415429
const ZSTD_matchState_t* const dms = ms->dictMatchState;
416430
const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
@@ -567,8 +581,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
567581

568582
_cleanup:
569583
/* save reps for next block */
570-
rep[0] = offset_1 ? offset_1 : offsetSaved;
571-
rep[1] = offset_2 ? offset_2 : offsetSaved;
584+
rep[0] = offset_1;
585+
rep[1] = offset_2;
572586

573587
/* Return the last literals size */
574588
return (size_t)(iend - anchor);
@@ -625,6 +639,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
625639
const BYTE* const iend = istart + srcSize;
626640
const BYTE* const ilimit = iend - 8;
627641
U32 offset_1=rep[0], offset_2=rep[1];
642+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
628643

629644
const BYTE* ip0 = istart;
630645
const BYTE* ip1;
@@ -657,8 +672,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
657672

658673
{ U32 const curr = (U32)(ip0 - base);
659674
U32 const maxRep = curr - dictStartIndex;
660-
if (offset_2 >= maxRep) offset_2 = 0;
661-
if (offset_1 >= maxRep) offset_1 = 0;
675+
if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0;
676+
if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0;
662677
}
663678

664679
/* start each op */
@@ -780,9 +795,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
780795
* However, it seems to be a meaningful performance hit to try to search
781796
* them. So let's not. */
782797

798+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
799+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
800+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
801+
783802
/* save reps for next block */
784-
rep[0] = offset_1 ? offset_1 : rep[0];
785-
rep[1] = offset_2 ? offset_2 : rep[1];
803+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
804+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
786805

787806
/* Return the last literals size */
788807
return (size_t)(iend - anchor);

lib/compress/zstd_lazy.c

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1461,7 +1461,8 @@ ZSTD_compressBlock_lazy_generic(
14611461
const BYTE* const prefixLowest = base + prefixLowestIndex;
14621462

14631463
searchMax_f const searchMax = ZSTD_selectLazyVTable(ms, searchMethod, dictMode)->searchMax;
1464-
U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
1464+
U32 offset_1 = rep[0], offset_2 = rep[1];
1465+
U32 offsetSaved1 = 0, offsetSaved2 = 0;
14651466

14661467
const int isDMS = dictMode == ZSTD_dictMatchState;
14671468
const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
@@ -1484,8 +1485,8 @@ ZSTD_compressBlock_lazy_generic(
14841485
U32 const curr = (U32)(ip - base);
14851486
U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
14861487
U32 const maxRep = curr - windowLow;
1487-
if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1488-
if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1488+
if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
1489+
if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
14891490
}
14901491
if (isDxS) {
14911492
/* dictMatchState repCode checks don't currently handle repCode == 0
@@ -1681,9 +1682,13 @@ ZSTD_compressBlock_lazy_generic(
16811682
continue; /* faster when present ... (?) */
16821683
} } }
16831684

1684-
/* Save reps for next block */
1685-
rep[0] = offset_1 ? offset_1 : savedOffset;
1686-
rep[1] = offset_2 ? offset_2 : savedOffset;
1685+
/* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
1686+
* rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
1687+
offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
1688+
1689+
/* save reps for next block */
1690+
rep[0] = offset_1 ? offset_1 : offsetSaved1;
1691+
rep[1] = offset_2 ? offset_2 : offsetSaved2;
16871692

16881693
/* Return the last literals size */
16891694
return (size_t)(iend - anchor);

0 commit comments

Comments
 (0)