@@ -3361,29 +3361,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
3361
3361
return ZSTDbss_compress ;
3362
3362
}
3363
3363
3364
- static void ZSTD_copyBlockSequences (ZSTD_CCtx * zc )
3364
+ static size_t ZSTD_copyBlockSequences (SeqCollector * seqCollector , const seqStore_t * seqStore , const U32 prevRepcodes [ ZSTD_REP_NUM ] )
3365
3365
{
3366
- const seqStore_t * seqStore = ZSTD_getSeqStore (zc );
3367
- const seqDef * seqStoreSeqs = seqStore -> sequencesStart ;
3368
- size_t seqStoreSeqSize = seqStore -> sequences - seqStoreSeqs ;
3369
- size_t seqStoreLiteralsSize = (size_t )(seqStore -> lit - seqStore -> litStart );
3370
- size_t literalsRead = 0 ;
3371
- size_t lastLLSize ;
3366
+ const seqDef * inSeqs = seqStore -> sequencesStart ;
3367
+ const size_t nbInSequences = seqStore -> sequences - inSeqs ;
3368
+ const size_t nbInLiterals = (size_t )(seqStore -> lit - seqStore -> litStart );
3372
3369
3373
- ZSTD_Sequence * outSeqs = & zc -> seqCollector .seqStart [zc -> seqCollector .seqIndex ];
3370
+ ZSTD_Sequence * outSeqs = seqCollector -> seqIndex == 0 ? seqCollector -> seqStart : seqCollector -> seqStart + seqCollector -> seqIndex ;
3371
+ const size_t nbOutSequences = nbInSequences + 1 ;
3372
+ size_t nbOutLiterals = 0 ;
3373
+ repcodes_t repcodes ;
3374
3374
size_t i ;
3375
- repcodes_t updatedRepcodes ;
3376
3375
3377
- assert (zc -> seqCollector .seqIndex + 1 < zc -> seqCollector .maxSequences );
3378
- /* Ensure we have enough space for last literals "sequence" */
3379
- assert (zc -> seqCollector .maxSequences >= seqStoreSeqSize + 1 );
3380
- ZSTD_memcpy (updatedRepcodes .rep , zc -> blockState .prevCBlock -> rep , sizeof (repcodes_t ));
3381
- for (i = 0 ; i < seqStoreSeqSize ; ++ i ) {
3382
- U32 rawOffset = seqStoreSeqs [i ].offBase - ZSTD_REP_NUM ;
3383
- outSeqs [i ].litLength = seqStoreSeqs [i ].litLength ;
3384
- outSeqs [i ].matchLength = seqStoreSeqs [i ].mlBase + MINMATCH ;
3376
+ /* Bounds check that we have enough space for every input sequence
3377
+ * and the block delimiter
3378
+ */
3379
+ assert (seqCollector -> seqIndex <= seqCollector -> maxSequences );
3380
+ RETURN_ERROR_IF (
3381
+ nbOutSequences > (size_t )(seqCollector -> maxSequences - seqCollector -> seqIndex ),
3382
+ dstSize_tooSmall ,
3383
+ "Not enough space to copy sequences" );
3384
+
3385
+ ZSTD_memcpy (& repcodes , prevRepcodes , sizeof (repcodes ));
3386
+ for (i = 0 ; i < nbInSequences ; ++ i ) {
3387
+ U32 rawOffset ;
3388
+ outSeqs [i ].litLength = inSeqs [i ].litLength ;
3389
+ outSeqs [i ].matchLength = inSeqs [i ].mlBase + MINMATCH ;
3385
3390
outSeqs [i ].rep = 0 ;
3386
3391
3392
+ /* Handle the possible single length >= 64K
3393
+ * There can only be one because we add MINMATCH to every match length,
3394
+ * and blocks are at most 128K.
3395
+ */
3387
3396
if (i == seqStore -> longLengthPos ) {
3388
3397
if (seqStore -> longLengthType == ZSTD_llt_literalLength ) {
3389
3398
outSeqs [i ].litLength += 0x10000 ;
@@ -3392,41 +3401,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
3392
3401
}
3393
3402
}
3394
3403
3395
- if (seqStoreSeqs [i ].offBase <= ZSTD_REP_NUM ) {
3396
- /* Derive the correct offset corresponding to a repcode */
3397
- outSeqs [i ].rep = seqStoreSeqs [i ].offBase ;
3404
+ /* Determine the raw offset given the offBase, which may be a repcode. */
3405
+ if (OFFBASE_IS_REPCODE (inSeqs [i ].offBase )) {
3406
+ const U32 repcode = OFFBASE_TO_REPCODE (inSeqs [i ].offBase );
3407
+ assert (repcode > 0 );
3408
+ outSeqs [i ].rep = repcode ;
3398
3409
if (outSeqs [i ].litLength != 0 ) {
3399
- rawOffset = updatedRepcodes .rep [outSeqs [ i ]. rep - 1 ];
3410
+ rawOffset = repcodes .rep [repcode - 1 ];
3400
3411
} else {
3401
- if (outSeqs [i ].rep == 3 ) {
3402
- rawOffset = updatedRepcodes .rep [0 ] - 1 ;
3412
+ if (repcode == 3 ) {
3413
+ assert (repcodes .rep [0 ] > 1 );
3414
+ rawOffset = repcodes .rep [0 ] - 1 ;
3403
3415
} else {
3404
- rawOffset = updatedRepcodes .rep [outSeqs [ i ]. rep ];
3416
+ rawOffset = repcodes .rep [repcode ];
3405
3417
}
3406
3418
}
3419
+ } else {
3420
+ rawOffset = OFFBASE_TO_OFFSET (inSeqs [i ].offBase );
3407
3421
}
3408
3422
outSeqs [i ].offset = rawOffset ;
3409
- /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
3410
- so we provide seqStoreSeqs[i].offset - 1 */
3411
- ZSTD_updateRep (updatedRepcodes .rep ,
3412
- seqStoreSeqs [i ].offBase ,
3413
- seqStoreSeqs [i ].litLength == 0 );
3414
- literalsRead += outSeqs [i ].litLength ;
3423
+
3424
+ /* Update repcode history for the sequence */
3425
+ ZSTD_updateRep (repcodes .rep ,
3426
+ inSeqs [i ].offBase ,
3427
+ inSeqs [i ].litLength == 0 );
3428
+
3429
+ nbOutLiterals += outSeqs [i ].litLength ;
3415
3430
}
3416
3431
/* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
3417
3432
* If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
3418
3433
* for the block boundary, according to the API.
3419
3434
*/
3420
- assert (seqStoreLiteralsSize >= literalsRead );
3421
- lastLLSize = seqStoreLiteralsSize - literalsRead ;
3422
- outSeqs [i ].litLength = (U32 )lastLLSize ;
3423
- outSeqs [i ].matchLength = outSeqs [i ].offset = outSeqs [i ].rep = 0 ;
3424
- seqStoreSeqSize ++ ;
3425
- zc -> seqCollector .seqIndex += seqStoreSeqSize ;
3435
+ assert (nbInLiterals >= nbOutLiterals );
3436
+ {
3437
+ const size_t lastLLSize = nbInLiterals - nbOutLiterals ;
3438
+ outSeqs [nbInSequences ].litLength = (U32 )lastLLSize ;
3439
+ outSeqs [nbInSequences ].matchLength = 0 ;
3440
+ outSeqs [nbInSequences ].offset = 0 ;
3441
+ assert (nbOutSequences == nbInSequences + 1 );
3442
+ }
3443
+ seqCollector -> seqIndex += nbOutSequences ;
3444
+ assert (seqCollector -> seqIndex <= seqCollector -> maxSequences );
3445
+
3446
+ return 0 ;
3426
3447
}
3427
3448
3428
3449
size_t ZSTD_sequenceBound (size_t srcSize ) {
3429
- return (srcSize / ZSTD_MINMATCH_MIN ) + 1 ;
3450
+ const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN ) + 1 ;
3451
+ const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN ) + 1 ;
3452
+ return maxNbSeq + maxNbDelims ;
3430
3453
}
3431
3454
3432
3455
size_t ZSTD_generateSequences (ZSTD_CCtx * zc , ZSTD_Sequence * outSeqs ,
@@ -3435,6 +3458,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
3435
3458
const size_t dstCapacity = ZSTD_compressBound (srcSize );
3436
3459
void * dst = ZSTD_customMalloc (dstCapacity , ZSTD_defaultCMem );
3437
3460
SeqCollector seqCollector ;
3461
+ {
3462
+ int targetCBlockSize ;
3463
+ FORWARD_IF_ERROR (ZSTD_CCtx_getParameter (zc , ZSTD_c_targetCBlockSize , & targetCBlockSize ), "" );
3464
+ RETURN_ERROR_IF (targetCBlockSize != 0 , parameter_unsupported , "targetCBlockSize != 0" );
3465
+ }
3466
+ {
3467
+ int nbWorkers ;
3468
+ FORWARD_IF_ERROR (ZSTD_CCtx_getParameter (zc , ZSTD_c_nbWorkers , & nbWorkers ), "" );
3469
+ RETURN_ERROR_IF (nbWorkers != 0 , parameter_unsupported , "nbWorkers != 0" );
3470
+ }
3438
3471
3439
3472
RETURN_ERROR_IF (dst == NULL , memory_allocation , "NULL pointer!" );
3440
3473
@@ -3444,8 +3477,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
3444
3477
seqCollector .maxSequences = outSeqsSize ;
3445
3478
zc -> seqCollector = seqCollector ;
3446
3479
3447
- ZSTD_compress2 (zc , dst , dstCapacity , src , srcSize );
3448
- ZSTD_customFree (dst , ZSTD_defaultCMem );
3480
+ {
3481
+ const size_t ret = ZSTD_compress2 (zc , dst , dstCapacity , src , srcSize );
3482
+ ZSTD_customFree (dst , ZSTD_defaultCMem );
3483
+ FORWARD_IF_ERROR (ret , "ZSTD_compress2 failed" );
3484
+ }
3485
+ assert (zc -> seqCollector .seqIndex <= ZSTD_sequenceBound (srcSize ));
3449
3486
return zc -> seqCollector .seqIndex ;
3450
3487
}
3451
3488
@@ -4038,8 +4075,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
4038
4075
cSeqsSize = 1 ;
4039
4076
}
4040
4077
4078
+ /* Sequence collection not supported when block splitting */
4041
4079
if (zc -> seqCollector .collectSequences ) {
4042
- ZSTD_copyBlockSequences (zc );
4080
+ FORWARD_IF_ERROR ( ZSTD_copyBlockSequences (& zc -> seqCollector , seqStore , dRepOriginal . rep ), "copyBlockSequences failed" );
4043
4081
ZSTD_blockState_confirmRepcodesAndEntropyTables (& zc -> blockState );
4044
4082
return 0 ;
4045
4083
}
@@ -4261,6 +4299,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
4261
4299
if (bss == ZSTDbss_noCompress ) {
4262
4300
if (zc -> blockState .prevCBlock -> entropy .fse .offcode_repeatMode == FSE_repeat_valid )
4263
4301
zc -> blockState .prevCBlock -> entropy .fse .offcode_repeatMode = FSE_repeat_check ;
4302
+ RETURN_ERROR_IF (zc -> seqCollector .collectSequences , sequenceProducer_failed , "Uncompressible block" );
4264
4303
cSize = ZSTD_noCompressBlock (dst , dstCapacity , src , srcSize , lastBlock );
4265
4304
FORWARD_IF_ERROR (cSize , "ZSTD_noCompressBlock failed" );
4266
4305
DEBUGLOG (4 , "ZSTD_compressBlock_splitBlock: Nocompress block" );
@@ -4293,11 +4332,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
4293
4332
4294
4333
{ const size_t bss = ZSTD_buildSeqStore (zc , src , srcSize );
4295
4334
FORWARD_IF_ERROR (bss , "ZSTD_buildSeqStore failed" );
4296
- if (bss == ZSTDbss_noCompress ) { cSize = 0 ; goto out ; }
4335
+ if (bss == ZSTDbss_noCompress ) {
4336
+ RETURN_ERROR_IF (zc -> seqCollector .collectSequences , sequenceProducer_failed , "Uncompressible block" );
4337
+ cSize = 0 ;
4338
+ goto out ;
4339
+ }
4297
4340
}
4298
4341
4299
4342
if (zc -> seqCollector .collectSequences ) {
4300
- ZSTD_copyBlockSequences (zc );
4343
+ FORWARD_IF_ERROR ( ZSTD_copyBlockSequences (& zc -> seqCollector , ZSTD_getSeqStore ( zc ), zc -> blockState . prevCBlock -> rep ), "copyBlockSequences failed" );
4301
4344
ZSTD_blockState_confirmRepcodesAndEntropyTables (& zc -> blockState );
4302
4345
return 0 ;
4303
4346
}
0 commit comments