Skip to content

Commit 8cee7a2

Browse files
committed
perf(changelog): optimize enforceSizeBoundary
These optimizations make the enforceSizeBoundary function more efficient across various scenarios, leading to overall better performance while maintaining the same behaviour (*). Running tool: /home/gg/.goenv/versions/1.22.4/bin/go test -benchmem -run=^$ -tags ebpf -bench ^(BenchmarkEnforceSizeBoundaryOld|BenchmarkEnforceSizeBoundary)$ github.com/aquasecurity/tracee/pkg/changelog -benchtime=100000000x goos: linux goarch: amd64 pkg: github.com/aquasecurity/tracee/pkg/changelog cpu: AMD Ryzen 9 7950X 16-Core Processor | Test Case | Old (ns/op) | New (ns/op) | (%) | |------------------------|-------------|-------------|--------| | No change needed | 1.446 | 1.434 | 0.83% | | Trim excess duplicates | 30.18 | 21.97 | 27.21% | | Remove oldest entries | 27.29 | 21.39 | 21.60% | Since enforceSizeBoundary is called by setAt, the performance improvements will be reflected in the latter: -bench ^(setAtOld|setAt)$ | Test Case | Old (ns/op) | New (ns/op) | (%) | |--------------------|-------------|-------------|--------| | All Scenarios | 1267 | 1163 | 8.21% | | Within Limit | 1770 | 1747 | 1.30% | Tests were fixed and added to ensure the correctness of the implementation. Benchmarks were also added to track the performance improvements. (*) The behaviour of the function is the same, but the implementation fixes a bug that when coalescing duplicate values, the removal of the oldest entry was not being done correctly. Instead of removing the oldest entry, the function was removing the newest one.
1 parent 3bc61a8 commit 8cee7a2

File tree

3 files changed

+448
-38
lines changed

3 files changed

+448
-38
lines changed

pkg/changelog/changelog.go

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package changelog
22

33
import (
4-
"slices"
54
"time"
65

76
"github.com/aquasecurity/tracee/pkg/logger"
@@ -165,33 +164,57 @@ func (clv *Changelog[T]) findIndex(target time.Time) int {
165164

166165
// enforceSizeBoundary ensures that the size of the inner array doesn't exceed the limit.
167166
// It applies two methods to reduce the log size to the maximum allowed:
168-
// 1. Unite duplicate values that are trailing one another.
167+
// 1. Unite duplicate values that are trailing one another, removing the oldest of the pair.
169168
// 2. Remove the oldest logs as they are likely less important.
170169

171170
func (clv *Changelog[T]) enforceSizeBoundary() {
172-
if len(clv.changes) > clv.maxSize {
173-
// Get rid of oldest changes to keep max size boundary
174-
boundaryDiff := len(clv.changes) - clv.maxSize
175-
176-
// First try to unite states
177-
clv.changes = slices.CompactFunc(clv.changes, func(i item[T], i2 item[T]) bool {
178-
if i.value == i2.value && boundaryDiff > 0 {
179-
delete(clv.timestamps, i2.timestamp)
180-
boundaryDiff--
181-
return true
182-
}
183-
return false
184-
})
185-
186-
if boundaryDiff == 0 {
187-
return
171+
if len(clv.changes) <= clv.maxSize {
172+
// Nothing to do
173+
return
174+
}
175+
176+
boundaryDiff := len(clv.changes) - clv.maxSize
177+
changed := false
178+
179+
// Compact the slice in place
180+
writeIdx := 0
181+
for readIdx := 0; readIdx < len(clv.changes); readIdx++ {
182+
nextIdx := readIdx + 1
183+
if nextIdx < len(clv.changes) &&
184+
clv.changes[nextIdx].value == clv.changes[readIdx].value &&
185+
boundaryDiff > 0 {
186+
// Remove the oldest (readIdx) from the duplicate pair
187+
delete(clv.timestamps, clv.changes[readIdx].timestamp)
188+
boundaryDiff--
189+
changed = true
190+
continue
188191
}
189-
removedChanges := clv.changes[:boundaryDiff]
190-
clv.changes = clv.changes[boundaryDiff:]
191-
for _, removedChange := range removedChanges {
192-
delete(clv.timestamps, removedChange.timestamp)
192+
193+
// If elements have been removed or moved, update the map and the slice
194+
if changed {
195+
clv.changes[writeIdx] = clv.changes[readIdx]
193196
}
197+
198+
writeIdx++
199+
}
200+
201+
if changed {
202+
clear(clv.changes[writeIdx:])
203+
clv.changes = clv.changes[:writeIdx]
204+
}
205+
206+
if len(clv.changes) <= clv.maxSize {
207+
// Size is within limits after compaction
208+
return
209+
}
210+
211+
// As it still exceeds maxSize, remove the oldest entries in the remaining slice
212+
boundaryDiff = len(clv.changes) - clv.maxSize
213+
for i := 0; i < boundaryDiff; i++ {
214+
delete(clv.timestamps, clv.changes[i].timestamp)
194215
}
216+
clear(clv.changes[:boundaryDiff])
217+
clv.changes = clv.changes[boundaryDiff:]
195218
}
196219

197220
// returnZero returns the zero value of the type T.
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
package changelog
2+
3+
import (
4+
"testing"
5+
"time"
6+
)
7+
8+
func Benchmark_enforceSizeBoundary(b *testing.B) {
9+
testCases := []struct {
10+
name string
11+
changelog Changelog[int]
12+
}{
13+
{
14+
name: "No change needed",
15+
changelog: Changelog[int]{
16+
changes: []item[int]{
17+
{value: 1, timestamp: getTimeFromSec(1)},
18+
{value: 2, timestamp: getTimeFromSec(2)},
19+
},
20+
timestamps: map[time.Time]struct{}{
21+
getTimeFromSec(1): {},
22+
getTimeFromSec(2): {},
23+
},
24+
maxSize: 5,
25+
},
26+
},
27+
{
28+
name: "Trim excess with duplicates",
29+
changelog: Changelog[int]{
30+
changes: []item[int]{
31+
{value: 1, timestamp: getTimeFromSec(1)},
32+
{value: 1, timestamp: getTimeFromSec(2)},
33+
{value: 2, timestamp: getTimeFromSec(3)},
34+
{value: 3, timestamp: getTimeFromSec(4)},
35+
{value: 3, timestamp: getTimeFromSec(5)},
36+
},
37+
timestamps: map[time.Time]struct{}{
38+
getTimeFromSec(1): {},
39+
getTimeFromSec(2): {},
40+
getTimeFromSec(3): {},
41+
getTimeFromSec(4): {},
42+
getTimeFromSec(5): {},
43+
},
44+
maxSize: 3,
45+
},
46+
},
47+
{
48+
name: "Remove oldest entries",
49+
changelog: Changelog[int]{
50+
changes: []item[int]{
51+
{value: 1, timestamp: getTimeFromSec(1)},
52+
{value: 2, timestamp: getTimeFromSec(2)},
53+
{value: 3, timestamp: getTimeFromSec(3)},
54+
{value: 4, timestamp: getTimeFromSec(4)},
55+
},
56+
timestamps: map[time.Time]struct{}{
57+
getTimeFromSec(1): {},
58+
getTimeFromSec(2): {},
59+
getTimeFromSec(3): {},
60+
getTimeFromSec(4): {},
61+
},
62+
maxSize: 2,
63+
},
64+
},
65+
}
66+
67+
for _, tc := range testCases {
68+
b.Run(tc.name, func(b *testing.B) {
69+
for i := 0; i < b.N; i++ {
70+
clv := tc.changelog // Create a copy for each iteration
71+
clv.enforceSizeBoundary()
72+
}
73+
})
74+
}
75+
}
76+
77+
func Benchmark_setAt(b *testing.B) {
78+
// Test cases where the Changelog needs to enforce the size boundary
79+
testCasesAllScenarios := []struct {
80+
value int
81+
time time.Time
82+
}{
83+
{
84+
value: 42,
85+
time: getTimeFromSec(0),
86+
},
87+
{
88+
value: 72,
89+
time: getTimeFromSec(1),
90+
},
91+
{
92+
value: 642,
93+
time: getTimeFromSec(2),
94+
},
95+
{
96+
value: 672,
97+
time: getTimeFromSec(3), // will trigger removal of oldest entry
98+
},
99+
{
100+
value: 642,
101+
time: getTimeFromSec(4), // will trigger coalescing of duplicate values
102+
},
103+
{
104+
value: 672,
105+
time: getTimeFromSec(5), // will trigger coalescing of duplicate values
106+
},
107+
{
108+
value: 6642,
109+
time: getTimeFromSec(6), // will trigger removal of oldest entry
110+
},
111+
{
112+
value: 672,
113+
time: getTimeFromSec(7), // will trigger coalescing of duplicate values
114+
},
115+
{
116+
value: 642,
117+
time: getTimeFromSec(8), // will trigger coalescing of duplicate values
118+
},
119+
{
120+
value: 6672,
121+
time: getTimeFromSec(9), // will trigger removal of oldest entry
122+
},
123+
{
124+
value: 9642,
125+
time: getTimeFromSec(10), // will trigger removal of oldest entry
126+
},
127+
{
128+
value: 0,
129+
time: getTimeFromSec(0), // will just update the value
130+
},
131+
{
132+
value: 0,
133+
time: getTimeFromSec(1), // will just update the value
134+
},
135+
{
136+
value: 0,
137+
time: getTimeFromSec(2), // will just update the value
138+
},
139+
{
140+
value: 0,
141+
time: getTimeFromSec(3), // will just update the value
142+
},
143+
}
144+
145+
b.Run("All Scenarios", func(b *testing.B) {
146+
for i := 0; i < b.N; i++ {
147+
b.StopTimer()
148+
clv := NewChangelog[int](3)
149+
b.StartTimer()
150+
for _, tc := range testCasesAllScenarios {
151+
clv.setAt(tc.value, tc.time)
152+
}
153+
}
154+
})
155+
156+
// Test cases where the changelog is within the maximum size limit
157+
testCasesWithinLimit := []struct {
158+
value int
159+
time time.Time
160+
}{
161+
{
162+
value: 0,
163+
time: getTimeFromSec(0),
164+
},
165+
{
166+
value: 1,
167+
time: getTimeFromSec(1),
168+
},
169+
{
170+
value: 2,
171+
time: getTimeFromSec(2),
172+
},
173+
{
174+
value: 3,
175+
time: getTimeFromSec(3),
176+
},
177+
{
178+
value: 4,
179+
time: getTimeFromSec(4),
180+
},
181+
{
182+
value: 5,
183+
time: getTimeFromSec(5),
184+
},
185+
{
186+
value: 6,
187+
time: getTimeFromSec(6),
188+
},
189+
{
190+
value: 7,
191+
time: getTimeFromSec(7),
192+
},
193+
{
194+
value: 8,
195+
time: getTimeFromSec(8),
196+
},
197+
{
198+
value: 9,
199+
time: getTimeFromSec(9),
200+
},
201+
{
202+
value: 10,
203+
time: getTimeFromSec(10),
204+
},
205+
{
206+
value: 11,
207+
time: getTimeFromSec(11),
208+
},
209+
{
210+
value: 12,
211+
time: getTimeFromSec(12),
212+
},
213+
{
214+
value: 13,
215+
time: getTimeFromSec(13),
216+
},
217+
{
218+
value: 14,
219+
time: getTimeFromSec(14),
220+
},
221+
}
222+
223+
b.Run("Within Limit", func(b *testing.B) {
224+
for i := 0; i < b.N; i++ {
225+
b.StopTimer()
226+
clv := NewChangelog[int](15)
227+
b.StartTimer()
228+
for _, tc := range testCasesWithinLimit {
229+
clv.setAt(tc.value, tc.time)
230+
}
231+
}
232+
})
233+
}

0 commit comments

Comments
 (0)