Skip to content

Commit 84bf287

Browse files
authored
[Sync] Upstream 20250910 commit e408272
[Sync] Upstream 20250910 commit `e408272`
2 parents 294874d + 5910bc5 commit 84bf287

File tree

1,084 files changed

+63890
-31283
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,084 files changed

+63890
-31283
lines changed

.buildkite/check-wheel-size.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
import sys
66
import zipfile
77

8-
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 400 MiB
9-
# Note that we have 400 MiB quota, please use it wisely.
10-
# See https://github.com/pypi/support/issues/3792 .
8+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 450 MiB
9+
# Note that we have 800 MiB quota, please use it wisely.
10+
# See https://github.com/pypi/support/issues/6326 .
1111
# Please also sync the value with the one in Dockerfile.
12-
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 450))
1313

1414

1515
def print_top_10_largest_files(zip_file):

.buildkite/generate_index.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
<html>
99
<body>
1010
<h1>Links for vLLM</h1/>
11-
<a href="../{wheel_html_escaped}">{wheel}</a><br/>
11+
<a href="../{x86_wheel_html_escaped}">{x86_wheel}</a><br/>
12+
<a href="../{arm_wheel_html_escaped}">{arm_wheel}</a><br/>
1213
</body>
1314
</html>
1415
"""
@@ -21,7 +22,25 @@
2122

2223
with open("index.html", "w") as f:
2324
print(f"Generated index.html for {args.wheel}")
25+
# sync the abi tag with .buildkite/scripts/upload-wheels.sh
26+
if "x86_64" in filename:
27+
x86_wheel = filename
28+
arm_wheel = filename.replace("x86_64", "aarch64").replace(
29+
"manylinux1", "manylinux2014"
30+
)
31+
elif "aarch64" in filename:
32+
x86_wheel = filename.replace("aarch64", "x86_64").replace(
33+
"manylinux2014", "manylinux1"
34+
)
35+
arm_wheel = filename
36+
else:
37+
raise ValueError(f"Unsupported wheel: {filename}")
2438
# cloudfront requires escaping the '+' character
2539
f.write(
26-
template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
40+
template.format(
41+
x86_wheel=x86_wheel,
42+
x86_wheel_html_escaped=x86_wheel.replace("+", "%2B"),
43+
arm_wheel=arm_wheel,
44+
arm_wheel_html_escaped=arm_wheel.replace("+", "%2B"),
45+
)
2746
)

.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml

Lines changed: 0 additions & 12 deletions
This file was deleted.

.buildkite/lm-eval-harness/configs/models-large.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,3 @@ Meta-Llama-3-70B-Instruct.yaml
33
Mixtral-8x7B-Instruct-v0.1.yaml
44
Qwen2-57B-A14-Instruct.yaml
55
DeepSeek-V2-Lite-Chat.yaml
6-
Meta-Llama-3-8B-QQQ.yaml

.buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# We can use this script to compute baseline accuracy on GSM for transformers.
33
#
44
# Make sure you have lm-eval-harness installed:
5-
# pip install lm-eval==0.4.4
5+
# pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api]
66

77
usage() {
88
echo``

.buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# We use this for fp8, which HF does not support.
44
#
55
# Make sure you have lm-eval-harness installed:
6-
# pip install lm-eval==0.4.4
6+
# pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d#egg=lm-eval[api]
77

88
usage() {
99
echo``

.buildkite/nightly-benchmarks/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ When run, benchmark script generates results under `benchmark/results` folder, a
141141
`compare-json-results.py` compares two `benchmark_results.json` files and provides performance ratio e.g. for Output Tput, Median TTFT and Median TPOT.
142142
If only one benchmark_results.json is passed, `compare-json-results.py` compares different TP and PP configurations in the benchmark_results.json instead.
143143

144-
Here is an example using the script to compare result_a and result_b with Model, Dataset name, input/output lenght, max concurrency and qps.
144+
Here is an example using the script to compare result_a and result_b with Model, Dataset name, input/output length, max concurrency and qps.
145145
`python3 compare-json-results.py -f results_a/benchmark_results.json -f results_b/benchmark_results.json`
146146

147147
| | Model | Dataset Name | Input Len | Output Len | # of max concurrency | qps | results_a/benchmark_results.json | results_b/benchmark_results.json | perf_ratio |

.buildkite/nightly-benchmarks/nightly-descriptions.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Latest reproduction guilde: [github issue link](https://github.com/vllm-project/
1717
- SGLang: `lmsysorg/sglang:v0.3.2-cu121`
1818
- LMDeploy: `openmmlab/lmdeploy:v0.6.1-cu12`
1919
- TensorRT-LLM: `nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3`
20-
- *NOTE: we uses r24.07 as the current implementation only works for this version. We are going to bump this up.*
20+
- *NOTE: we use r24.07 as the current implementation only works for this version. We are going to bump this up.*
2121
- Check [nightly-pipeline.yaml](nightly-pipeline.yaml) for the concrete docker images, specs and commands we use for the benchmark.
2222
- Hardware
2323
- 8x Nvidia A100 GPUs

.buildkite/nightly-benchmarks/scripts/compare-json-results.py

Lines changed: 120 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3,44 +3,129 @@
33
import argparse
44
import json
55
import os
6+
from importlib import util
67

78
import pandas as pd
89

10+
plotly_found = util.find_spec("plotly.express") is not None
11+
912

1013
def compare_data_columns(
1114
files, name_column, data_column, info_cols, drop_column, debug=False
1215
):
13-
print("\ncompare_data_column: " + data_column)
16+
"""
17+
Align concatenation by keys derived from info_cols instead of row order.
18+
- Pick one canonical key list: subset of info_cols present in ALL files.
19+
- For each file: set index to those keys, aggregate duplicates
20+
- (mean for metric, first for names).
21+
- Concat along axis=1 (indexes align), then reset_index so callers can
22+
- group by columns.
23+
- If --debug, add a <file_label>_name column per file.
24+
"""
25+
print("\ncompare_data_column:", data_column)
26+
1427
frames = []
1528
raw_data_cols = []
1629
compare_frames = []
30+
31+
# 1) choose a canonical key list from info_cols that exists in ALL files
32+
cols_per_file = []
33+
for f in files:
34+
try:
35+
df_tmp = pd.read_json(f, orient="records")
36+
except Exception as err:
37+
raise ValueError(f"Failed to read {f}") from err
38+
cols_per_file.append(set(df_tmp.columns))
39+
40+
key_cols = [c for c in info_cols if all(c in cset for cset in cols_per_file)]
41+
if not key_cols:
42+
# soft fallback: use any info_cols present in the first file
43+
key_cols = [c for c in info_cols if c in list(cols_per_file[0])]
44+
if not key_cols:
45+
raise ValueError(
46+
"No common key columns found from info_cols across the input files."
47+
)
48+
49+
# 2) build a single "meta" block (keys as columns) once, aligned by the key index
50+
meta_added = False
51+
1752
for file in files:
18-
data_df = pd.read_json(file)
19-
serving_df = data_df.dropna(subset=[drop_column], ignore_index=True)
20-
# Show all info columns in the first couple columns
21-
if not frames:
22-
for col in info_cols:
23-
if col not in serving_df.columns:
24-
print(f"Skipping missing column: {col}")
25-
continue
26-
frames.append(serving_df[col])
27-
# only show test name under debug mode
28-
if debug is True:
29-
serving_df = serving_df.rename(columns={name_column: file + "_name"})
30-
frames.append(serving_df[file + "_name"])
31-
32-
file = "/".join(file.split("/")[:-1])
33-
serving_df = serving_df.rename(columns={data_column: file})
34-
frames.append(serving_df[file])
35-
raw_data_cols.append(file)
36-
compare_frames.append(serving_df[file])
53+
df = pd.read_json(file, orient="records")
54+
55+
# Keep rows that actually have the compared metric (same as original behavior)
56+
if drop_column in df.columns:
57+
df = df.dropna(subset=[drop_column], ignore_index=True)
58+
59+
# Stabilize numeric key columns (harmless if missing)
60+
for c in (
61+
"Input Len",
62+
"Output Len",
63+
"TP Size",
64+
"PP Size",
65+
"# of max concurrency.",
66+
"qps",
67+
):
68+
if c in df.columns:
69+
df[c] = pd.to_numeric(df[c], errors="coerce")
70+
71+
# Ensure all key columns exist
72+
for c in key_cols:
73+
if c not in df.columns:
74+
df[c] = pd.NA
75+
76+
# Set index = key_cols and aggregate duplicates → unique MultiIndex
77+
df_idx = df.set_index(key_cols, drop=False)
78+
79+
# meta (key columns), unique per key
80+
meta = df_idx[key_cols]
81+
if not meta.index.is_unique:
82+
meta = meta.groupby(level=key_cols, dropna=False).first()
83+
84+
# metric series for this file, aggregated to one row per key
85+
file_label = "/".join(file.split("/")[:-1]) or os.path.basename(file)
86+
s = df_idx[data_column]
87+
if not s.index.is_unique:
88+
s = s.groupby(level=key_cols, dropna=False).mean()
89+
s.name = file_label # column label like original
90+
91+
# add meta once (from first file) so keys are the leftmost columns
92+
if not meta_added:
93+
frames.append(meta)
94+
meta_added = True
95+
96+
# (NEW) debug: aligned test-name column per file
97+
if debug and name_column in df_idx.columns:
98+
name_s = df_idx[name_column]
99+
if not name_s.index.is_unique:
100+
name_s = name_s.groupby(level=key_cols, dropna=False).first()
101+
name_s.name = f"{file_label}_name"
102+
frames.append(name_s)
103+
104+
frames.append(s)
105+
raw_data_cols.append(file_label)
106+
compare_frames.append(s)
107+
108+
# Generalize ratio: for any file N>=2, add ratio (fileN / file1)
37109
if len(compare_frames) >= 2:
38-
# Compare numbers among two files
39-
ratio_df = compare_frames[1] / compare_frames[0]
40-
frames.append(ratio_df)
41-
compare_frames.pop(1)
110+
base = compare_frames[0]
111+
current = compare_frames[-1]
112+
ratio = current / base
113+
ratio = ratio.mask(base == 0) # avoid inf when baseline is 0
114+
ratio.name = f"Ratio 1 vs {len(compare_frames)}"
115+
frames.append(ratio)
42116

117+
# 4) concat on columns with aligned MultiIndex;
118+
# then reset_index to return keys as columns
43119
concat_df = pd.concat(frames, axis=1)
120+
concat_df = concat_df.reset_index(drop=True).reset_index()
121+
if "index" in concat_df.columns:
122+
concat_df = concat_df.drop(columns=["index"])
123+
124+
# Ensure key/info columns appear first (in your info_cols order)
125+
front = [c for c in info_cols if c in concat_df.columns]
126+
rest = [c for c in concat_df.columns if c not in front]
127+
concat_df = concat_df[front + rest]
128+
44129
print(raw_data_cols)
45130
return concat_df, raw_data_cols
46131

@@ -67,6 +152,15 @@ def split_json_by_tp_pp(
67152

68153
df = pd.DataFrame(data)
69154

155+
# Keep only "serving" tests
156+
name_col = next(
157+
(c for c in ["Test name", "test_name", "Test Name"] if c in df.columns), None
158+
)
159+
if name_col:
160+
df = df[
161+
df[name_col].astype(str).str.contains(r"serving", case=False, na=False)
162+
].copy()
163+
70164
# Handle alias column names
71165
rename_map = {
72166
"tp_size": "TP Size",
@@ -124,7 +218,7 @@ def split_json_by_tp_pp(
124218
"--xaxis",
125219
type=str,
126220
default="# of max concurrency.",
127-
help="column name to use as X Axis in comparision graph",
221+
help="column name to use as X Axis in comparison graph",
128222
)
129223
args = parser.parse_args()
130224

@@ -181,16 +275,14 @@ def split_json_by_tp_pp(
181275
f"Expected subset: {filtered_info_cols}, "
182276
f"but DataFrame has: {list(output_df.columns)}"
183277
)
184-
185278
output_df_sorted = output_df.sort_values(by=existing_group_cols)
186279
output_groups = output_df_sorted.groupby(existing_group_cols, dropna=False)
187280
for name, group in output_groups:
188281
html = group.to_html()
189282
text_file.write(html_msgs_for_data_cols[i])
190283
text_file.write(html)
191284

192-
if plot is True:
193-
import pandas as pd
285+
if plot and plotly_found:
194286
import plotly.express as px
195287

196288
df = group[raw_data_cols]

.buildkite/nightly-benchmarks/scripts/run-nightly-benchmarks.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ run_genai_perf_tests() {
382382
client_command="genai-perf profile \
383383
-m $model \
384384
--service-kind openai \
385-
--backend vllm \
385+
--backend "$backend" \
386386
--endpoint-type chat \
387387
--streaming \
388388
--url localhost:$port \

0 commit comments

Comments
 (0)