Skip to content

Commit c65eaea

Browse files
committed
dec: adjust seqSymbol load on aarch64
ZSTD_seqSymbol is a structure with total 64 bits wide. So it can be loaded in one operation and extract its fields by simply shift or bit extrac on aarch64. GCC doesn't recongnizes this and generates more uncessary ldr/ldrb/ldrh operations that causes performance drop. With this change it is observed 2~4% uplift of silesia and 2.5~6% of cantrbry @L8 on Arm N1. Signed-off-by: Jun He <[email protected]> Change-Id: I7748909204cf78a17eb9d4f2333692d53239daa8
1 parent fda537b commit c65eaea

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

lib/decompress/zstd_decompress_block.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,9 +1170,19 @@ FORCE_INLINE_TEMPLATE seq_t
11701170
ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
11711171
{
11721172
seq_t seq;
1173+
#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1174+
ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1175+
ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1176+
ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1177+
ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1178+
ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1179+
ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1180+
ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1181+
#else
11731182
const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
11741183
const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
11751184
const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1185+
#endif
11761186
seq.matchLength = mlDInfo->baseValue;
11771187
seq.litLength = llDInfo->baseValue;
11781188
{ U32 const ofBase = ofDInfo->baseValue;

0 commit comments

Comments
 (0)