@@ -117,7 +117,7 @@ ZSTD_compressBlock_fast_noDict_generic(
117
117
118
118
U32 rep_offset1 = rep [0 ];
119
119
U32 rep_offset2 = rep [1 ];
120
- U32 offsetSaved = 0 ;
120
+ U32 offsetSaved1 = 0 , offsetSaved2 = 0 ;
121
121
122
122
size_t hash0 ; /* hash for ip0 */
123
123
size_t hash1 ; /* hash for ip1 */
@@ -141,8 +141,8 @@ ZSTD_compressBlock_fast_noDict_generic(
141
141
{ U32 const curr = (U32 )(ip0 - base );
142
142
U32 const windowLow = ZSTD_getLowestPrefixIndex (ms , curr , cParams -> windowLog );
143
143
U32 const maxRep = curr - windowLow ;
144
- if (rep_offset2 > maxRep ) offsetSaved = rep_offset2 , rep_offset2 = 0 ;
145
- if (rep_offset1 > maxRep ) offsetSaved = rep_offset1 , rep_offset1 = 0 ;
144
+ if (rep_offset2 > maxRep ) offsetSaved2 = rep_offset2 , rep_offset2 = 0 ;
145
+ if (rep_offset1 > maxRep ) offsetSaved1 = rep_offset1 , rep_offset1 = 0 ;
146
146
}
147
147
148
148
/* start each op */
@@ -281,9 +281,24 @@ ZSTD_compressBlock_fast_noDict_generic(
281
281
* However, it seems to be a meaningful performance hit to try to search
282
282
* them. So let's not. */
283
283
284
+ /* When the repcodes are outside of the prefix, we set them to zero before the loop.
285
+ * When the offsets are still zero, we need to restore them after the block to have a correct
286
+ * repcode history. If only one offset was invalid, it is easy. The tricky case is when both
287
+ * offsets were invalid. We need to figure out which offset to refill with.
288
+ * - If both offsets are zero they are in the same order.
289
+ * - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`.
290
+ * - If only one is zero, we need to decide which offset to restore.
291
+ * - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1.
292
+ * - It is impossible for rep_offset2 to be non-zero.
293
+ *
294
+ * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then
295
+ * set rep[0] = rep_offset1 and rep[1] = offsetSaved1.
296
+ */
297
+ offsetSaved2 = ((offsetSaved1 != 0 ) && (rep_offset1 != 0 )) ? offsetSaved1 : offsetSaved2 ;
298
+
284
299
/* save reps for next block */
285
- rep [0 ] = rep_offset1 ? rep_offset1 : offsetSaved ;
286
- rep [1 ] = rep_offset2 ? rep_offset2 : offsetSaved ;
300
+ rep [0 ] = rep_offset1 ? rep_offset1 : offsetSaved1 ;
301
+ rep [1 ] = rep_offset2 ? rep_offset2 : offsetSaved2 ;
287
302
288
303
/* Return the last literals size */
289
304
return (size_t )(iend - anchor );
@@ -410,7 +425,6 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
410
425
const BYTE * const iend = istart + srcSize ;
411
426
const BYTE * const ilimit = iend - HASH_READ_SIZE ;
412
427
U32 offset_1 = rep [0 ], offset_2 = rep [1 ];
413
- U32 offsetSaved = 0 ;
414
428
415
429
const ZSTD_matchState_t * const dms = ms -> dictMatchState ;
416
430
const ZSTD_compressionParameters * const dictCParams = & dms -> cParams ;
@@ -567,8 +581,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
567
581
568
582
_cleanup :
569
583
/* save reps for next block */
570
- rep [0 ] = offset_1 ? offset_1 : offsetSaved ;
571
- rep [1 ] = offset_2 ? offset_2 : offsetSaved ;
584
+ rep [0 ] = offset_1 ;
585
+ rep [1 ] = offset_2 ;
572
586
573
587
/* Return the last literals size */
574
588
return (size_t )(iend - anchor );
@@ -625,6 +639,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
625
639
const BYTE * const iend = istart + srcSize ;
626
640
const BYTE * const ilimit = iend - 8 ;
627
641
U32 offset_1 = rep [0 ], offset_2 = rep [1 ];
642
+ U32 offsetSaved1 = 0 , offsetSaved2 = 0 ;
628
643
629
644
const BYTE * ip0 = istart ;
630
645
const BYTE * ip1 ;
@@ -657,8 +672,8 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
657
672
658
673
{ U32 const curr = (U32 )(ip0 - base );
659
674
U32 const maxRep = curr - dictStartIndex ;
660
- if (offset_2 >= maxRep ) offset_2 = 0 ;
661
- if (offset_1 >= maxRep ) offset_1 = 0 ;
675
+ if (offset_2 >= maxRep ) offsetSaved2 = offset_2 , offset_2 = 0 ;
676
+ if (offset_1 >= maxRep ) offsetSaved1 = offset_1 , offset_1 = 0 ;
662
677
}
663
678
664
679
/* start each op */
@@ -780,9 +795,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
780
795
* However, it seems to be a meaningful performance hit to try to search
781
796
* them. So let's not. */
782
797
798
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
799
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
800
+ offsetSaved2 = ((offsetSaved1 != 0 ) && (offset_1 != 0 )) ? offsetSaved1 : offsetSaved2 ;
801
+
783
802
/* save reps for next block */
784
- rep [0 ] = offset_1 ? offset_1 : rep [ 0 ] ;
785
- rep [1 ] = offset_2 ? offset_2 : rep [ 1 ] ;
803
+ rep [0 ] = offset_1 ? offset_1 : offsetSaved1 ;
804
+ rep [1 ] = offset_2 ? offset_2 : offsetSaved2 ;
786
805
787
806
/* Return the last literals size */
788
807
return (size_t )(iend - anchor );
0 commit comments