|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
3 | 3 | import asyncio
|
| 4 | +import os |
| 5 | +import socket |
4 | 6 | import time
|
5 | 7 | from collections.abc import AsyncGenerator, Iterable, Mapping
|
6 | 8 | from copy import copy
|
7 | 9 | from typing import Any, Optional, Union
|
8 | 10 |
|
9 | 11 | import numpy as np
|
| 12 | +import torch |
10 | 13 |
|
11 | 14 | import vllm.envs as envs
|
12 | 15 | from vllm.config import ModelConfig, VllmConfig
|
@@ -144,6 +147,26 @@ def __init__(
|
144 | 147 | except RuntimeError:
|
145 | 148 | pass
|
146 | 149 |
|
| 150 | + if envs.VLLM_TORCH_PROFILER_DIR: |
| 151 | + logger.info( |
| 152 | + "Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501 |
| 153 | + envs.VLLM_TORCH_PROFILER_DIR) |
| 154 | + worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm" |
| 155 | + self.profiler = torch.profiler.profile( |
| 156 | + activities=[ |
| 157 | + torch.profiler.ProfilerActivity.CPU, |
| 158 | + ], |
| 159 | + with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK, |
| 160 | + on_trace_ready=torch.profiler.tensorboard_trace_handler( |
| 161 | + envs.VLLM_TORCH_PROFILER_DIR, |
| 162 | + worker_name=worker_name, |
| 163 | + use_gzip=True)) |
| 164 | + else: |
| 165 | + logger.info( |
| 166 | + "Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501 |
| 167 | + ) |
| 168 | + self.profiler = None |
| 169 | + |
147 | 170 | @classmethod
|
148 | 171 | @deprecate_kwargs(
|
149 | 172 | "disable_log_requests",
|
@@ -562,10 +585,16 @@ async def check_health(self) -> None:
|
562 | 585 | raise self.dead_error
|
563 | 586 |
|
564 | 587 | async def start_profile(self) -> None:
|
565 |
| - await self.engine_core.profile_async(True) |
| 588 | + coros = [self.engine_core.profile_async(True)] |
| 589 | + if self.profiler is not None: |
| 590 | + coros.append(asyncio.to_thread(self.profiler.start)) |
| 591 | + await asyncio.gather(*coros) |
566 | 592 |
|
567 | 593 | async def stop_profile(self) -> None:
|
568 |
| - await self.engine_core.profile_async(False) |
| 594 | + coros = [self.engine_core.profile_async(False)] |
| 595 | + if self.profiler is not None: |
| 596 | + coros.append(asyncio.to_thread(self.profiler.stop)) |
| 597 | + await asyncio.gather(*coros) |
569 | 598 |
|
570 | 599 | async def reset_mm_cache(self) -> None:
|
571 | 600 | self.processor.mm_registry.reset_processor_cache(self.model_config)
|
|
0 commit comments