Skip to content

Commit 6de4fea

Browse files
edwinsolisfsyurkevi
authored andcommitted
Updated benchmarks and docs
1 parent 381672c commit 6de4fea

17 files changed

+386
-53
lines changed

benchmarks/src/README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,18 @@ Benchmarks
33

44
## Setting up environment
55

6+
Create a python environment and install pytest and the compute libraries:
67
```sh
78
python -m pip install -r requirements.txt
89
```
910

11+
If running `dpnp` with Nvidia or AMD devices, you must install the oneapi toolkit along with the corresponding oneapi pluging:
12+
13+
```sh
14+
# install oneapi toolkit and plugins
15+
source /opt/intel/oneapi/setvars.sh
16+
```
17+
1018
## Benchmark parameters
1119

1220
The benchmark packages, rounds, array sizes, and numeric type may be specified on the constants at the top of [pytest_benchmark/common.py](pytest_benchmark/common.py).
@@ -20,16 +28,18 @@ These are the steps to run the benchmarks, and produce the graphs
2028

2129
Run the benchmarks and store the results in `results.json`
2230
```sh
23-
pytest .\pytest_benchmark --benchmark-json=results.json
31+
pytest ./pytest_benchmark --benchmark-json=results.json
2432
```
2533

2634
To create graphs and store the timing results after creating the `results.json`, run:
2735
```sh
36+
mkdir img
2837
python graphs.py
2938
```
3039

3140
To modify the tests being shown, modify the `TESTS` list at the top of the `graphs.py` file.
32-
To modify the labels shown, modify `PKG_LABELS`
41+
To modify the legend of the package labels shown, modify `PKG_LABELS`
42+
To modify the name of the tests shown, modify `TESTS_GRAPH_NAME`
3343
To modify the hardware display, modify `HARDWARE`
3444

3545
## Notes

benchmarks/src/graphs.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
BENCHMARKS_JSON = "results.json"
88

99
# Hardware details shown in title
10-
HARDWARE = "AMD Ryzen 9 9900X 12-Core Processor 63032 MB (fp64 fp16)\noneAPI 2025.1.3 Intel(R) OpenCL Graphics: Intel(R) Arc(TM) B580 Graphics, 11873 MB (fp64 fp16)"
10+
HARDWARE = "Intel Xeon Gold 5315Y (8 Processors) @ 3.201GHz 63032 MB\noneAPI 2025.2.1 NVIDIA RTX A4000, 16222 MB, CUDA 12.8 Compute 8.6"
1111

1212
# Show speedup in graph
1313
SHOW_NUMBERS = True
@@ -16,12 +16,13 @@
1616
ROUND_NUMBERS = 1
1717

1818
# package list in graph order; arrayfire packages are added later
19-
PKG_NAMES = ["numpy", "dpnp", "cupy"]
19+
PKG_NAMES = ["numpy", "dpnp", "cupy", "cupynumeric"]
2020

2121
# color used in graphs
2222
PKG_COLOR = {
2323
"numpy": "tab:blue",
2424
"cupy": "tab:green",
25+
"cupynumeric": "green",
2526
"dpnp": "tab:red",
2627
"afcpu": "tab:orange",
2728
"afopencl": "tab:orange",
@@ -32,8 +33,9 @@
3233
# labels displayed in the graph
3334
PKG_LABELS = {
3435
"numpy": "numpy[cpu]",
35-
"dpnp": "dpnp[level_zero:gpu]",
36+
"dpnp": "dpnp[cuda:gpu]",
3637
"cupy": "cupy",
38+
"cupynumeric": "cupynumeric",
3739
"afcpu": "afcpu",
3840
"afcuda": "afcuda",
3941
"afopencl": "afopencl[opencl:gpu]",
@@ -44,16 +46,16 @@
4446

4547
# Tests to be shown in graphs
4648
TESTS = [
47-
"qr",
49+
"group_elementwise",
4850
"neural_network",
49-
"gemm",
51+
"black_scholes",
5052
"mandelbrot",
5153
"nbody",
5254
"pi",
53-
"black_scholes",
54-
"fft",
5555
"normal",
56-
"group_elementwise",
56+
"gemm",
57+
"fft",
58+
"qr",
5759
# Other tests
5860
# 'svd
5961
# 'cholesky',
@@ -63,6 +65,25 @@
6365
# 'inv'
6466
]
6567

68+
# Reverse list so it appears in order on graph
69+
TESTS.reverse()
70+
71+
TESTS_GRAPH_NAME = {
72+
"group_elementwise": "Group_elementwise (JIT)",
73+
"neural_network": "Neural Network (JIT)",
74+
"black_scholes": "Black Scholes (JIT)",
75+
"mandelbrot": "Mandelbrot (JIT)",
76+
"nbody": "Nbody (JIT)",
77+
"pi": "Montecarlo Pi (JIT)",
78+
"normal": "Normal Distribution",
79+
"gemm": "General Matrix Multiplication",
80+
"fft": "2D FFT",
81+
"qr": "QR Decomposition",
82+
}
83+
84+
for name in TESTS:
85+
if name not in TESTS_GRAPH_NAME:
86+
TESTS_GRAPH_NAME[name] = name
6687

6788
def get_benchmark_data():
6889
results = {}
@@ -189,7 +210,7 @@ def generate_group_graph(test_list=None, show_numbers=False, filename="compariso
189210

190211
xlabels = []
191212
for test in tests:
192-
xlabels.append(test + "\n" + descriptions[test])
213+
xlabels.append(TESTS_GRAPH_NAME[test] + "\n" + descriptions[test])
193214

194215
ax.set_xlabel("Speedup")
195216
ax.set_xscale("log")

benchmarks/src/pytest_benchmark/common.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import math
3030

3131
import cupy
32+
import cupynumeric
3233
import dpctl
3334
import dpnp
3435
import numpy as np
@@ -38,19 +39,19 @@
3839

3940
# modify parameters for most benchmarks
4041
ROUNDS = 30
41-
NSIZE = 2**13
42+
NSIZE = 2**11
4243
NNSIZE = NSIZE**2
4344
DTYPE = "float32"
4445

4546
# comment a line to remove that package from testing
4647
PKGDICT = {
47-
"dpnp": dpnp,
4848
"numpy": np,
4949
"cupy": cupy,
5050
# "afcpu": af,
5151
"afopencl": af,
52-
"afcuda": af,
5352
"afoneapi": af,
53+
"dpnp": dpnp,
54+
"cupynumeric": cupynumeric,
5455
}
5556

5657
PKGS = []
@@ -66,11 +67,13 @@ def initialize_package(PKG_ID):
6667
pkg = PKGDICT[PKG_ID]
6768

6869
try:
70+
# Free all unused memory
71+
gc.collect()
6972
af.device_gc()
7073
mempool = cupy.get_default_memory_pool()
7174
mempool.free_all_blocks()
72-
except:
73-
pass
75+
except Exception as e:
76+
print(e)
7477

7578
if PKG_ID == "afcpu":
7679
af.set_backend(af.BackendType.cpu)
@@ -98,8 +101,7 @@ def initialize_package(PKG_ID):
98101
print(cupy.cuda.Device())
99102
mempool = cupy.get_default_memory_pool()
100103
mempool.free_all_blocks()
104+
elif PKG_ID == "cupynumeric":
105+
pass
101106
else:
102-
raise NotImplementedError()
103-
104-
# Free all unused memory
105-
gc.collect()
107+
raise NotImplementedError()

benchmarks/src/pytest_benchmark/test_blackscholes.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,29 @@ def cnd(x):
9292

9393
return (C, P)
9494

95+
def black_scholes_cupynumeric(S, X, R, V, T):
96+
# S = Underlying stock price
97+
# X = Strike Price
98+
# R = Risk free rate of interest
99+
# V = Volatility
100+
# T = Time to maturity
101+
def cnd(x):
102+
temp = x > 0
103+
erf = lambda arr: cupynumeric.exp(-arr * arr)
104+
return temp * (0.5 + erf(x / sqrt2) / 2) + (1 - temp) * (0.5 - erf((-x) / sqrt2) / 2)
105+
106+
d1 = cupynumeric.log(S / X)
107+
d1 = d1 + (R + (V * V) * 0.5) * T
108+
d1 = d1 / (V * cupynumeric.sqrt(T))
109+
110+
d2 = d1 - (V * cupynumeric.sqrt(T))
111+
cnd_d1 = cnd(d1)
112+
cnd_d2 = cnd(d2)
113+
114+
C = S * cnd_d1 - (X * cupynumeric.exp((-R) * T) * cnd_d2)
115+
P = X * cupynumeric.exp((-R) * T) * (1 - cnd_d2) - (S * (1 - cnd_d1))
116+
117+
return (C, P)
95118

96119
def black_scholes_arrayfire(S, X, R, V, T):
97120
def cnd(x):
@@ -137,6 +160,9 @@ def generate_arrays(pkgid, count):
137160
elif "numpy" == pkg:
138161
for i in range(count):
139162
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
163+
elif "cupynumeric" == pkg:
164+
for i in range(count):
165+
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))
140166

141167
return arr_list
142168

@@ -146,4 +172,5 @@ def generate_arrays(pkgid, count):
146172
"numpy": black_scholes_numpy,
147173
"cupy": black_scholes_cupy,
148174
"arrayfire": black_scholes_arrayfire,
175+
"cupynumeric": black_scholes_cupynumeric
149176
}

benchmarks/src/pytest_benchmark/test_elementwise.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def func_cupy(arr):
5252
cupy.cuda.runtime.deviceSynchronize()
5353
return x
5454

55-
GROUP_FUNCS = {"numpy": func, "cupy": func_cupy, "arrayfire": func_af, "dpnp": func}
55+
GROUP_FUNCS = {"numpy": func, "cupy": func_cupy, "arrayfire": func_af, "dpnp": func, "cupynumeric": func}
5656

5757
benchmark.extra_info["description"] = f"{NSIZE}x{NSIZE} Matrix"
5858
result = benchmark.pedantic(
@@ -312,5 +312,8 @@ def generate_arrays(pkgid, count):
312312
elif "numpy" == pkg:
313313
for i in range(count):
314314
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
315+
elif "cupynumeric" == pkg:
316+
for i in range(count):
317+
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))
315318

316319
return arr_list

benchmarks/src/pytest_benchmark/test_fft.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ def generate_arrays(pkgid, count):
5050
elif "numpy" == pkg:
5151
for i in range(count):
5252
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
53+
elif "cupynumeric" == pkg:
54+
for i in range(count):
55+
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))
5356

5457
return arr_list
5558

@@ -86,5 +89,7 @@ def fft_cupy(arr):
8689
cupy.cuda.runtime.deviceSynchronize()
8790
return res
8891

92+
def fft_cupynumeric(arr):
93+
return cupynumeric.fft.fft(arr)
8994

90-
FUNCS = {"dpnp": fft_dpnp, "numpy": fft_np, "cupy": fft_cupy, "arrayfire": fft_af}
95+
FUNCS = {"dpnp": fft_dpnp, "numpy": fft_np, "cupy": fft_cupy, "arrayfire": fft_af, "cupynumeric": fft_cupynumeric}

benchmarks/src/pytest_benchmark/test_gemm.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ def generate_arrays(pkgid, count):
8181
np.random.rand(1)
8282
for i in range(count):
8383
arr_list.append(np.random.rand(NSIZE, NSIZE).astype(DTYPE))
84+
elif "cupynumeric" == pkg:
85+
for i in range(count):
86+
arr_list.append(cupynumeric.random.rand(NSIZE, NSIZE).astype(DTYPE))
8487

8588
return arr_list
8689

@@ -117,5 +120,7 @@ def gemm_cupy(A, B, C):
117120
cupy.cuda.runtime.deviceSynchronize()
118121
return C
119122

123+
def gemm_cupynumeric(A, B, C):
124+
return alpha * cupynumeric.matmul(A, B) + beta * C
120125

121-
FUNCS = {"numpy": gemm_np, "cupy": gemm_cupy, "arrayfire": gemm_af, "dpnp": gemm_dpnp}
126+
FUNCS = {"numpy": gemm_np, "cupy": gemm_cupy, "arrayfire": gemm_af, "dpnp": gemm_dpnp, "cupynumeric": gemm_cupynumeric}

benchmarks/src/pytest_benchmark/test_kmeans.py

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ class TestKmeans:
1212
def test_kmeans(self, benchmark, pkgid):
1313
initialize_package(pkgid)
1414
pkg = PKGDICT[pkgid]
15-
kmean_class = {"dpnp": kmeans_dpnp, "numpy": kmeans_numpy, "cupy": kmeans_cupy, "arrayfire": kmeans_af}
15+
kmean_class = {"dpnp": kmeans_dpnp, "numpy": kmeans_numpy, "cupy": kmeans_cupy, "arrayfire": kmeans_af,
16+
"cupynumeric": kmeans_cupynumeric}
1617
obj = kmean_class[pkg.__name__]()
1718

1819
benchmark.extra_info["description"] = f"{NSAMPLES}x{NFEATURES} over {K} centers"
@@ -189,6 +190,94 @@ def kmeans(self):
189190
return centroids, cluster_assignments
190191

191192

193+
194+
class kmeans_cupynumeric:
195+
def __init__(self):
196+
self.data = cupynumeric.random.random((NSAMPLES, NFEATURES))
197+
self.centroid_indices = cupynumeric.random.choice(self.data.shape[0], K, replace=False)
198+
199+
def initialize_centroids(self):
200+
"""
201+
Randomly initializes k centroids from the data points.
202+
203+
Args:
204+
data (np.ndarray): The input data points (n_samples, n_features).
205+
k (int): The number of clusters.
206+
207+
Returns:
208+
np.ndarray: Initial centroids (k, n_features).
209+
"""
210+
211+
return self.data[self.centroid_indices, :]
212+
213+
def assign_to_clusters(self, centroids):
214+
"""
215+
Assigns each data point to the closest centroid.
216+
217+
Args:
218+
data (np.ndarray): The input data points (n_samples, n_features).
219+
centroids (np.ndarray): The current centroids (k, n_features).
220+
221+
Returns:
222+
np.ndarray: An array of cluster assignments for each data point (n_samples,).
223+
"""
224+
distances = cupynumeric.sqrt(((self.data[:, cupynumeric.newaxis, :] - centroids[cupynumeric.newaxis, :, :]) ** 2).sum(axis=2))
225+
cluster_assignments = cupynumeric.argmin(distances, axis=1)
226+
return cluster_assignments
227+
228+
def update_centroids(self, cluster_assignments):
229+
"""
230+
Recalculates the centroids based on the mean of the assigned data points.
231+
232+
Args:
233+
data (np.ndarray): The input data points (n_samples, n_features).
234+
cluster_assignments (np.ndarray): An array of cluster assignments.
235+
k (int): The number of clusters.
236+
237+
Returns:
238+
np.ndarray: Updated centroids (k, n_features).
239+
"""
240+
new_centroids = cupynumeric.zeros((K, self.data.shape[1]))
241+
for i in range(K):
242+
points_in_cluster = self.data[cluster_assignments == i]
243+
if len(points_in_cluster) > 0:
244+
new_centroids[i] = cupynumeric.mean(points_in_cluster, axis=0)
245+
return new_centroids
246+
247+
def kmeans(self):
248+
"""
249+
Performs the K-Means clustering algorithm.
250+
251+
Args:
252+
data (np.ndarray): The input data points (n_samples, n_features).
253+
k (int): The number of clusters.
254+
max_iterations (int): Maximum number of iterations to run the algorithm.
255+
tolerance (float): The tolerance for convergence (change in centroids).
256+
257+
Returns:
258+
tuple: A tuple containing:
259+
- np.ndarray: Final centroids (k, n_features).
260+
- np.ndarray: Final cluster assignments for each data point (n_samples,).
261+
"""
262+
centroids = self.initialize_centroids()
263+
cluster_assignments = None
264+
265+
for i in range(ITERATIONS):
266+
old_centroids = cupynumeric.copy(centroids)
267+
268+
# E-step: Assign points to clusters
269+
cluster_assignments = self.assign_to_clusters(centroids)
270+
271+
# M-step: Update centroids
272+
centroids = self.update_centroids(cluster_assignments)
273+
274+
# Check for convergence
275+
if cupynumeric.linalg.norm(centroids - old_centroids) < TOLERANCE:
276+
break
277+
278+
return centroids, cluster_assignments
279+
280+
192281
class kmeans_af:
193282
def __init__(self):
194283
self.data = af.Array(np.random.random((NSAMPLES, NFEATURES)).flatten().tolist(), shape=(NSAMPLES, NFEATURES))

0 commit comments

Comments
 (0)