Skip to content

Commit dbeeaaf

Browse files
authored
Add a benchmark to measure multi-stage block serde cost (#13336)
1 parent cae5e4e commit dbeeaaf

File tree

1 file changed

+244
-0
lines changed

1 file changed

+244
-0
lines changed
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.pinot.perf;
20+
21+
import java.io.File;
22+
import java.io.IOException;
23+
import java.math.BigDecimal;
24+
import java.nio.ByteBuffer;
25+
import java.util.ArrayList;
26+
import java.util.Collections;
27+
import java.util.List;
28+
import java.util.Random;
29+
import java.util.concurrent.TimeUnit;
30+
import java.util.function.Consumer;
31+
import org.apache.commons.lang3.SystemUtils;
32+
import org.apache.pinot.common.datablock.DataBlock;
33+
import org.apache.pinot.common.datablock.DataBlockUtils;
34+
import org.apache.pinot.common.utils.DataSchema;
35+
import org.apache.pinot.core.common.datablock.DataBlockBuilder;
36+
import org.apache.pinot.spi.utils.ByteArray;
37+
import org.openjdk.jmh.annotations.Benchmark;
38+
import org.openjdk.jmh.annotations.Fork;
39+
import org.openjdk.jmh.annotations.Level;
40+
import org.openjdk.jmh.annotations.Measurement;
41+
import org.openjdk.jmh.annotations.OutputTimeUnit;
42+
import org.openjdk.jmh.annotations.Param;
43+
import org.openjdk.jmh.annotations.Scope;
44+
import org.openjdk.jmh.annotations.Setup;
45+
import org.openjdk.jmh.annotations.State;
46+
import org.openjdk.jmh.annotations.Threads;
47+
import org.openjdk.jmh.annotations.Warmup;
48+
import org.openjdk.jmh.profile.GCProfiler;
49+
import org.openjdk.jmh.results.format.ResultFormatType;
50+
import org.openjdk.jmh.runner.Runner;
51+
import org.openjdk.jmh.runner.RunnerException;
52+
import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
53+
import org.openjdk.jmh.runner.options.OptionsBuilder;
54+
55+
@Warmup(iterations = 2, time = 2)
56+
@Measurement(iterations = 5, time = 1)
57+
@Fork(value = 1, jvmArgsPrepend = {
58+
"--add-opens=java.base/java.nio=ALL-UNNAMED",
59+
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
60+
"--add-opens=java.base/java.lang=ALL-UNNAMED",
61+
"--add-opens=java.base/java.util=ALL-UNNAMED",
62+
"--add-opens=java.base/java.lang.reflect=ALL-UNNAMED"
63+
})
64+
@Threads(5)
65+
@State(Scope.Benchmark)
66+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
67+
public class BenchmarkDataBlock {
68+
69+
public static void main(String[] args)
70+
throws RunnerException {
71+
start(BenchmarkDataBlock.class, opt -> opt
72+
// .addProfiler(LinuxPerfAsmProfiler.class)
73+
// .addProfiler(JavaFlightRecorderProfiler.class)
74+
.addProfiler(GCProfiler.class));
75+
}
76+
77+
@Param(value = {"INT", "LONG", "STRING", "BYTES", "BIG_DECIMAL", "LONG_ARRAY", "STRING_ARRAY"})
78+
DataSchema.ColumnDataType _dataType;
79+
@Param(value = {"COLUMNAR", "ROW"})
80+
DataBlock.Type _blockType = DataBlock.Type.COLUMNAR;
81+
// @Param(value = {"0", "10", "90"})
82+
int _nullPerCent = 10;
83+
84+
@Param(value = {"10000", "1000000"})
85+
int _rows;
86+
87+
BenchmarkState _state;
88+
89+
@Setup(Level.Trial)
90+
public void setup()
91+
throws IOException {
92+
_state = new BenchmarkState(_rows, _dataType, _nullPerCent, _blockType);
93+
}
94+
95+
public static void start(Class<? extends BenchmarkDataBlock> benchmarkClass,
96+
Consumer<ChainedOptionsBuilder> builderConsumer)
97+
throws RunnerException {
98+
ChainedOptionsBuilder opt = new OptionsBuilder()
99+
.include(benchmarkClass.getSimpleName());
100+
101+
File sdkJava = SystemUtils.getUserHome().toPath().resolve(".sdkman/candidates/java/current/bin/java").toFile();
102+
System.out.println("Java SDK: " + sdkJava);
103+
if (sdkJava.canExecute()) {
104+
opt.jvm(sdkJava.getAbsolutePath());
105+
}
106+
opt.resultFormat(ResultFormatType.CSV);
107+
108+
builderConsumer.accept(opt);
109+
110+
new Runner(opt.build()).run();
111+
}
112+
113+
@Benchmark
114+
public DataBlock buildBlock()
115+
throws IOException {
116+
return _state.createDataBlock();
117+
}
118+
119+
@Benchmark
120+
public Object serialize()
121+
throws IOException {
122+
return _state._dataBlock.toBytes();
123+
}
124+
125+
@Benchmark
126+
public DataBlock deserialize()
127+
throws IOException {
128+
_state._bytes.clear();
129+
return DataBlockUtils.getDataBlock(_state._bytes);
130+
}
131+
132+
@Benchmark
133+
public DataBlock all()
134+
throws IOException {
135+
DataBlock dataBlock = _state.createDataBlock();
136+
ByteBuffer buffers = ByteBuffer.wrap(dataBlock.toBytes());
137+
return DataBlockUtils.getDataBlock(buffers);
138+
}
139+
140+
public static class BenchmarkState {
141+
142+
private final DataSchema.ColumnDataType _columnDataType;
143+
private final int _nullPerCent;
144+
private final DataBlock.Type _blockType;
145+
146+
private final DataSchema _schema;
147+
private final List<Object[]> _data;
148+
private final DataBlock _dataBlock;
149+
private final ByteBuffer _bytes;
150+
151+
private final CheckedFunction<List<Object[]>, DataBlock> _generateBlock;
152+
153+
public BenchmarkState(int rows, DataSchema.ColumnDataType columnDataType, int nullPerCent,
154+
DataBlock.Type blockType)
155+
throws IOException {
156+
_nullPerCent = nullPerCent;
157+
_blockType = blockType;
158+
_columnDataType = columnDataType;
159+
160+
_schema = new DataSchema(new String[]{"value"}, new DataSchema.ColumnDataType[]{columnDataType});
161+
_data = createData(rows);
162+
163+
if (blockType == DataBlock.Type.COLUMNAR) {
164+
_dataBlock = DataBlockBuilder.buildFromColumns(_data, _schema);
165+
} else {
166+
_dataBlock = DataBlockBuilder.buildFromRows(_data, _schema);
167+
}
168+
_bytes = ByteBuffer.wrap(_dataBlock.toBytes());
169+
170+
if (blockType == DataBlock.Type.COLUMNAR) {
171+
_generateBlock = (data) -> DataBlockBuilder.buildFromColumns(data, _schema);
172+
} else {
173+
_generateBlock = (data) -> DataBlockBuilder.buildFromRows(data, _schema);
174+
}
175+
}
176+
177+
private List<Object[]> createData(int numRows) {
178+
Random r = new Random(42);
179+
switch (_blockType) {
180+
case COLUMNAR:
181+
Object[] column = new Object[numRows];
182+
for (int i = 0; i < numRows; i++) {
183+
column[i] = generateValue(r);
184+
}
185+
return Collections.singletonList(column);
186+
case ROW:
187+
ArrayList<Object[]> data = new ArrayList<>();
188+
for (int i = 0; i < numRows; i++) {
189+
data.add(new Object[]{generateValue(r)});
190+
}
191+
return data;
192+
default:
193+
throw new IllegalArgumentException("Unsupported data block type: " + _blockType);
194+
}
195+
}
196+
197+
private Object generateValue(Random r) {
198+
if (r.nextInt(100) < _nullPerCent) {
199+
return null;
200+
}
201+
int distinctStrings = 100;
202+
switch (_columnDataType) {
203+
case INT:
204+
return r.nextInt();
205+
case LONG:
206+
return r.nextLong();
207+
case STRING:
208+
return "string" + r.nextInt(distinctStrings);
209+
case BYTES:
210+
byte[] bytes = new byte[100];
211+
r.nextBytes(bytes);
212+
return new ByteArray(bytes);
213+
case BIG_DECIMAL:
214+
return new BigDecimal(r.nextDouble());
215+
case BOOLEAN:
216+
return r.nextBoolean() ? 1 : 0;
217+
case LONG_ARRAY:
218+
long[] longArray = new long[10];
219+
for (int i = 0; i < longArray.length; i++) {
220+
longArray[i] = r.nextLong();
221+
}
222+
return longArray;
223+
case STRING_ARRAY:
224+
String[] stringArray = new String[10];
225+
for (int i = 0; i < stringArray.length; i++) {
226+
stringArray[i] = "string" + r.nextInt(distinctStrings);
227+
}
228+
return stringArray;
229+
default:
230+
throw new IllegalArgumentException("Unsupported column data type: " + _columnDataType);
231+
}
232+
}
233+
234+
private DataBlock createDataBlock()
235+
throws IOException {
236+
return _generateBlock.apply(_data);
237+
}
238+
}
239+
240+
interface CheckedFunction<I, O> {
241+
O apply(I input)
242+
throws IOException;
243+
}
244+
}

0 commit comments

Comments
 (0)