Skip to content

Commit b4651d1

Browse files
ruisearch42mzusman
authored andcommitted
[V1] Unify VLLM_ENABLE_V1_MULTIPROCESSING handling in RayExecutor (vllm-project#11472)
1 parent 36219d2 commit b4651d1

File tree

3 files changed

+4
-8
lines changed

3 files changed

+4
-8
lines changed

tests/basic_correctness/test_basic_correctness.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,6 @@ def test_models_distributed(
127127
if attention_backend:
128128
os.environ["VLLM_ATTENTION_BACKEND"] = attention_backend
129129

130-
# Import VLLM_USE_V1 dynamically to handle patching
131-
from vllm.envs import VLLM_USE_V1
132-
if VLLM_USE_V1 and distributed_executor_backend != "mp":
133-
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
134-
135130
dtype = "half"
136131
max_tokens = 5
137132

vllm/v1/engine/llm_engine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from vllm.v1.engine.detokenizer import Detokenizer
2222
from vllm.v1.engine.processor import Processor
2323
from vllm.v1.executor.abstract import Executor
24-
from vllm.v1.executor.ray_utils import initialize_ray_cluster
2524

2625
logger = init_logger(__name__)
2726

@@ -112,7 +111,6 @@ def _get_executor_cls(cls, vllm_config: VllmConfig) -> Type[Executor]:
112111
distributed_executor_backend = (
113112
vllm_config.parallel_config.distributed_executor_backend)
114113
if distributed_executor_backend == "ray":
115-
initialize_ray_cluster(vllm_config.parallel_config)
116114
from vllm.v1.executor.ray_executor import RayExecutor
117115
executor_class = RayExecutor
118116
elif distributed_executor_backend == "mp":

vllm/v1/executor/ray_executor.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from vllm.logger import init_logger
99
from vllm.utils import get_distributed_init_method, get_ip, get_open_port
1010
from vllm.v1.executor.abstract import Executor
11-
from vllm.v1.executor.ray_utils import RayWorkerWrapper, ray
11+
from vllm.v1.executor.ray_utils import (RayWorkerWrapper,
12+
initialize_ray_cluster, ray)
1213
from vllm.v1.outputs import ModelRunnerOutput
1314

1415
if ray is not None:
@@ -33,7 +34,9 @@ def __init__(self, vllm_config: VllmConfig) -> None:
3334
if ray_usage != "1":
3435
os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
3536

37+
initialize_ray_cluster(self.parallel_config)
3638
placement_group = self.parallel_config.placement_group
39+
3740
# Create the parallel GPU workers.
3841
self._init_workers_ray(placement_group)
3942

0 commit comments

Comments
 (0)