Skip to content

Commit ede847b

Browse files
rickyyxIsotr0py
authored andcommitted
[v1][stats][1/n] Add RequestStatsUpdate and RequestStats types (vllm-project#10907)
Signed-off-by: rickyx <[email protected]> Signed-off-by: Isotr0py <[email protected]>
1 parent ccb3e86 commit ede847b

File tree

3 files changed

+749
-0
lines changed

3 files changed

+749
-0
lines changed

tests/v1/test_stats.py

Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
import pytest
2+
3+
from vllm.sampling_params import SamplingParams
4+
from vllm.v1.stats.common import RequestStats, RequestStatsUpdate
5+
6+
7+
def make_update(
8+
request_id: str,
9+
update_type: RequestStatsUpdate.Type,
10+
monotonic_ts_s: float,
11+
**kwargs,
12+
):
13+
if update_type == RequestStatsUpdate.Type.INPUT_PROCESSED:
14+
kwargs.setdefault("sampling_params", SamplingParams(n=1))
15+
kwargs.setdefault("num_prompt_tokens", 10)
16+
elif update_type == RequestStatsUpdate.Type.PREFILLING:
17+
kwargs.setdefault("num_computed_tokens", 10)
18+
kwargs.setdefault("num_cached_tokens", 10)
19+
elif update_type == RequestStatsUpdate.Type.DETOKENIZED:
20+
kwargs.setdefault("num_new_tokens", 10)
21+
elif update_type == RequestStatsUpdate.Type.FINISHED:
22+
kwargs.setdefault("finish_reason", "test_reason")
23+
24+
return RequestStatsUpdate(
25+
request_id=request_id,
26+
type=update_type,
27+
monotonic_ts_s=monotonic_ts_s,
28+
**kwargs,
29+
)
30+
31+
32+
def test_invalid_request_update():
33+
request_id = "test_request"
34+
update_specific_required_fields = {
35+
RequestStatsUpdate.Type.INPUT_PROCESSED: [
36+
"sampling_params",
37+
"num_prompt_tokens",
38+
],
39+
RequestStatsUpdate.Type.PREFILLING: [
40+
"num_computed_tokens",
41+
"num_cached_tokens",
42+
],
43+
RequestStatsUpdate.Type.DETOKENIZED: ["num_new_tokens"],
44+
RequestStatsUpdate.Type.FINISHED: ["finish_reason"],
45+
}
46+
47+
# Missing a required field should raise an assertion error.
48+
for update_type in RequestStatsUpdate.Type:
49+
required_fields = update_specific_required_fields.get(update_type, [])
50+
51+
# Try to miss one of the required fields.
52+
kwargs = {field: object() for field in required_fields}
53+
for field in required_fields:
54+
copy_kwargs = kwargs.copy()
55+
copy_kwargs.pop(field)
56+
with pytest.raises(ValueError):
57+
RequestStatsUpdate(
58+
request_id=request_id,
59+
type=update_type,
60+
**copy_kwargs,
61+
)
62+
63+
64+
def test_invalid_request_update_transition():
65+
# Test invalid transition type.
66+
for src in RequestStatsUpdate.Type:
67+
for dst in RequestStatsUpdate.Type:
68+
if dst not in RequestStatsUpdate._VALID_TRANSITIONS[src]:
69+
with pytest.raises(AssertionError):
70+
RequestStatsUpdate.check_valid_update(
71+
make_update(
72+
update_type=dst,
73+
request_id="test_request",
74+
monotonic_ts_s=1,
75+
),
76+
last_update_type=src,
77+
last_updated_ts_s=0,
78+
)
79+
else:
80+
RequestStatsUpdate.check_valid_update(
81+
make_update(
82+
request_id="test_request",
83+
update_type=dst,
84+
monotonic_ts_s=1,
85+
),
86+
last_update_type=src,
87+
last_updated_ts_s=0,
88+
)
89+
90+
# Test invalid timestamp.
91+
with pytest.raises(AssertionError):
92+
RequestStatsUpdate.check_valid_update(
93+
make_update(
94+
request_id="test_request",
95+
update_type=RequestStatsUpdate.Type.ARRIVED,
96+
monotonic_ts_s=1,
97+
),
98+
last_update_type=None,
99+
last_updated_ts_s=2,
100+
)
101+
102+
103+
def test_lifecycle_updates():
104+
request_id = "test_request"
105+
stats = RequestStats(request_id=request_id)
106+
107+
# Test the below scenario:
108+
arrived_ts = 0
109+
input_processed_ts = 1
110+
queued_ts = 2
111+
prefilling_ts = 3
112+
decoded_ts = 5
113+
detokenized_ts = 6
114+
decoded_2_ts = 7
115+
detokenized_2_ts = 8
116+
preempted_ts = 9
117+
resumed_ts = 10
118+
decoded_3_ts = 11
119+
detokenized_3_ts = 12
120+
finished_ts = 13
121+
122+
# Test ARRIVED
123+
arrived_update = RequestStatsUpdate(
124+
request_id=request_id,
125+
type=RequestStatsUpdate.Type.ARRIVED,
126+
monotonic_ts_s=arrived_ts,
127+
)
128+
stats.update_from(arrived_update)
129+
assert stats.arrival_ts_s == arrived_ts
130+
assert stats.last_updated_ts_s == arrived_ts
131+
132+
# Test INPUT_PROCESSED
133+
sampling_params = SamplingParams(n=1)
134+
input_processed_update = RequestStatsUpdate(
135+
request_id=request_id,
136+
type=RequestStatsUpdate.Type.INPUT_PROCESSED,
137+
monotonic_ts_s=input_processed_ts,
138+
sampling_params=sampling_params,
139+
num_prompt_tokens=6,
140+
)
141+
stats.update_from(input_processed_update)
142+
assert stats.input_processor_end_ts_s == input_processed_ts
143+
assert stats.last_updated_ts_s == input_processed_ts
144+
assert stats.num_prompt_tokens == 6
145+
assert stats.sampling_params == sampling_params
146+
147+
assert stats.first_token_ts_s is None
148+
assert stats.prefill_ts_s is None
149+
150+
# Test QUEUED
151+
queued_update = RequestStatsUpdate(
152+
request_id=request_id,
153+
type=RequestStatsUpdate.Type.QUEUED,
154+
monotonic_ts_s=queued_ts,
155+
)
156+
stats.update_from(queued_update)
157+
assert stats.queued_ts_s == queued_ts
158+
assert stats.last_updated_ts_s == queued_ts
159+
160+
# Test PREFILLING
161+
prefilling_update = RequestStatsUpdate(
162+
request_id=request_id,
163+
type=RequestStatsUpdate.Type.PREFILLING,
164+
monotonic_ts_s=prefilling_ts,
165+
num_computed_tokens=3,
166+
num_cached_tokens=1,
167+
)
168+
stats.update_from(prefilling_update)
169+
assert stats.prefill_ts_s == prefilling_ts
170+
assert stats.num_computed_tokens == 3
171+
assert stats.num_cached_tokens == 1
172+
assert stats.queue_duration_s == prefilling_ts - queued_ts
173+
174+
# Test DECODING
175+
decoded_update = RequestStatsUpdate(
176+
request_id=request_id,
177+
type=RequestStatsUpdate.Type.DECODING,
178+
monotonic_ts_s=decoded_ts,
179+
)
180+
stats.update_from(decoded_update)
181+
assert stats.last_updated_ts_s == decoded_ts
182+
183+
# Test DETOKENIZED
184+
detokenized_update = RequestStatsUpdate(
185+
request_id=request_id,
186+
type=RequestStatsUpdate.Type.DETOKENIZED,
187+
monotonic_ts_s=detokenized_ts,
188+
num_new_tokens=1,
189+
)
190+
stats.update_from(detokenized_update)
191+
assert stats.last_updated_ts_s == detokenized_ts
192+
assert stats.num_output_tokens == 1
193+
# Since arrival
194+
assert stats.first_token_latency_s == detokenized_ts - arrived_ts
195+
# Since first scheduled
196+
assert stats.prefill_latency_s == detokenized_ts - prefilling_ts
197+
198+
# Test another DECODING and DETOKENIZED should
199+
# yield correct inter token latency
200+
decoded_update = RequestStatsUpdate(
201+
request_id=request_id,
202+
type=RequestStatsUpdate.Type.DECODING,
203+
monotonic_ts_s=decoded_2_ts,
204+
)
205+
stats.update_from(decoded_update)
206+
207+
detokenized_update = RequestStatsUpdate(
208+
request_id=request_id,
209+
type=RequestStatsUpdate.Type.DETOKENIZED,
210+
monotonic_ts_s=detokenized_2_ts,
211+
num_new_tokens=1,
212+
)
213+
stats.update_from(detokenized_update)
214+
assert stats.output_token_latency_s_lst == [
215+
detokenized_2_ts - detokenized_ts,
216+
]
217+
assert stats.num_output_tokens == 2
218+
219+
# Test PREEMPTED
220+
preempted_update = RequestStatsUpdate(
221+
request_id=request_id,
222+
type=RequestStatsUpdate.Type.PREEMPTED,
223+
monotonic_ts_s=preempted_ts,
224+
)
225+
stats.update_from(preempted_update)
226+
assert stats.last_updated_ts_s == preempted_ts
227+
assert stats.preempted_ts_s_lst == [preempted_ts]
228+
# States should be reset
229+
assert stats.num_computed_tokens == 0
230+
assert stats.num_cached_tokens == 0
231+
# These states should not be reset
232+
assert stats.num_output_tokens == 2
233+
assert stats.output_token_latency_s_lst == [
234+
detokenized_2_ts - detokenized_ts,
235+
]
236+
assert stats.prefill_latency_s == prefilling_ts - arrived_ts
237+
assert stats.num_prompt_tokens == 6
238+
assert stats.prefill_start_ts_s_lst == [prefilling_ts]
239+
240+
# Test resumed
241+
resumed_update = RequestStatsUpdate(
242+
request_id=request_id,
243+
type=RequestStatsUpdate.Type.PREFILLING,
244+
monotonic_ts_s=resumed_ts,
245+
num_computed_tokens=6,
246+
num_cached_tokens=2,
247+
)
248+
stats.update_from(resumed_update)
249+
# prefill timestamp should not be updated since it's a resumed prefill
250+
assert stats.prefill_ts_s == prefilling_ts
251+
assert stats.num_computed_tokens == 6
252+
assert stats.num_cached_tokens == 2
253+
assert stats.prefill_start_ts_s_lst == [
254+
prefilling_ts,
255+
resumed_ts,
256+
]
257+
assert stats.last_updated_ts_s == resumed_ts
258+
259+
# Test another DECODED/DETOKENIZED should yield correct first token latency.
260+
decoded_update = RequestStatsUpdate(
261+
request_id=request_id,
262+
type=RequestStatsUpdate.Type.DECODING,
263+
monotonic_ts_s=decoded_3_ts,
264+
)
265+
detokenized_update = RequestStatsUpdate(
266+
request_id=request_id,
267+
type=RequestStatsUpdate.Type.DETOKENIZED,
268+
monotonic_ts_s=detokenized_3_ts,
269+
num_new_tokens=1,
270+
)
271+
stats.update_from(decoded_update)
272+
stats.update_from(detokenized_update)
273+
assert stats.first_token_ts_s == detokenized_ts - arrived_ts
274+
assert stats.num_output_tokens == 3
275+
assert stats.output_token_latency_s_lst == [
276+
detokenized_2_ts - detokenized_ts,
277+
detokenized_3_ts - detokenized_2_ts,
278+
]
279+
280+
# Test FINISHED
281+
finished_update = RequestStatsUpdate(
282+
request_id=request_id,
283+
type=RequestStatsUpdate.Type.FINISHED,
284+
monotonic_ts_s=finished_ts,
285+
finish_reason="test_reason",
286+
)
287+
stats.update_from(finished_update)
288+
assert stats.last_updated_ts_s == finished_ts
289+
assert stats.e2e_latency_s == finished_ts - arrived_ts
290+
assert stats.inference_latency_s == finished_ts - prefilling_ts
291+
assert stats.prefill_latency_s == detokenized_ts - prefilling_ts
292+
assert stats.decode_latency_s == finished_ts - detokenized_ts
293+
assert stats.first_token_latency_s == detokenized_ts - arrived_ts
294+
assert stats.queue_duration_s == prefilling_ts - queued_ts
295+
assert stats.is_finished
296+
assert stats.finish_reason == "test_reason"
297+
298+
# TODO(rickyx): Add model forward/execute time.
299+
assert stats.model_forward_duration_s == 0.0
300+
assert stats.model_execute_duration_s == 0.0

vllm/v1/stats/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)