|
10 | 10 | from vllm.sampling_params import SamplingParams
|
11 | 11 |
|
12 | 12 |
|
13 |
| -class RequestStatsUpdate(msgspec.Struct, |
14 |
| - array_like=True, |
15 |
| - omit_defaults=True, |
16 |
| - gc=False): |
| 13 | +class RequestStatsUpdate( |
| 14 | + msgspec.Struct, # type: ignore |
| 15 | + array_like=True, |
| 16 | + omit_defaults=True, |
| 17 | + gc=False): |
17 | 18 | """
|
18 | 19 | An update to the request stats.
|
19 | 20 |
|
@@ -341,16 +342,16 @@ def update_from(self, update: "RequestStatsUpdate"):
|
341 | 342 | self.queued_ts_s = ts
|
342 | 343 | elif update.type == RequestStatsUpdate.Type.PREFILLING:
|
343 | 344 | self.prefill_start_ts_s_lst.append(ts)
|
344 |
| - self.num_cached_tokens = update.num_cached_tokens |
345 |
| - self.num_computed_tokens = update.num_computed_tokens |
| 345 | + self.num_cached_tokens = update.num_cached_tokens or 0 |
| 346 | + self.num_computed_tokens = update.num_computed_tokens or 0 |
346 | 347 | elif update.type == RequestStatsUpdate.Type.PREEMPTED:
|
347 | 348 | self._reset_for_preemption(ts)
|
348 | 349 | elif update.type == RequestStatsUpdate.Type.DECODING:
|
349 | 350 | self.decoding_ts_s_lst.append(ts)
|
350 | 351 | elif update.type == RequestStatsUpdate.Type.DETOKENIZED:
|
351 | 352 | self._record_detokenized_output(
|
352 | 353 | ts,
|
353 |
| - update.num_new_tokens, |
| 354 | + update.num_new_tokens or 0, |
354 | 355 | )
|
355 | 356 | elif update.type == RequestStatsUpdate.Type.FINISHED:
|
356 | 357 | self.finished_ts_s = ts
|
@@ -425,10 +426,11 @@ class EngineCoreProcessStats:
|
425 | 426 | output_queue_size: Optional[int] = None
|
426 | 427 |
|
427 | 428 |
|
428 |
| -class EngineCoreStatsSnapshot(msgspec.Struct, |
429 |
| - array_like=True, |
430 |
| - omit_defaults=True, |
431 |
| - gc=False): |
| 429 | +class EngineCoreStatsSnapshot( |
| 430 | + msgspec.Struct, # type: ignore |
| 431 | + array_like=True, |
| 432 | + omit_defaults=True, |
| 433 | + gc=False): |
432 | 434 | """
|
433 | 435 | A snapshot of the EngineCore's current stats over a period of time.
|
434 | 436 | """
|
|
0 commit comments