Skip to content

Commit 2561d8d

Browse files
Lucaskabelamercykid
authored andcommitted
[Bugfix] Remove VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE (vllm-project#2969)
### What this PR does / why we need it? This PR prepares for deleting this enviroment variable, `VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE`, as vllm requires `fullgraph=True` to run - Fixes vllm-project/vllm#21834 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? See CI - vLLM version: v0.10.2 - vLLM main: vllm-project/vllm@99cc41a --------- Signed-off-by: Lucas Kabela <[email protected]> Signed-off-by: Che Ruan <[email protected]>
1 parent 81215b9 commit 2561d8d

File tree

2 files changed

+10
-14
lines changed

2 files changed

+10
-14
lines changed

vllm_ascend/spec_decode/mtp_proposer.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import torch
44
import torch.nn as nn
55
import torchair
6-
import vllm.envs as envs_vllm
76
from torchair import patch_for_hcom
87
from vllm.attention.layer import Attention
98
from vllm.config import (VllmConfig, get_layers_from_vllm_config,
@@ -596,11 +595,10 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
596595
torch.npu.set_compile_mode(jit_compile=False)
597596
if not self.runner.use_cached_npu_graph:
598597
npu_backend = torchair.get_npu_backend(compiler_config=config)
599-
self.torchair_compiled_model = torch.compile(
600-
self.model,
601-
dynamic=True,
602-
fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
603-
backend=npu_backend)
598+
self.torchair_compiled_model = torch.compile(self.model,
599+
dynamic=True,
600+
fullgraph=True,
601+
backend=npu_backend)
604602
return self.torchair_compiled_model
605603
else:
606604
# Generate a new forward proxy code object to prevent the invalidation of
@@ -622,7 +620,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
622620
batch_size] = torchair.inference.cache_compile(
623621
self.model.__dict__[forward_proxy_name],
624622
dynamic=True,
625-
fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
623+
fullgraph=True,
626624
cache_dir=TORCHAIR_CACHE_DIR,
627625
config=config,
628626
ge_cache=False)

vllm_ascend/torchair/torchair_model_runner.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import torch.distributed as dist
2626
import torch.nn as nn
2727
import torch_npu
28-
import vllm.envs as envs_vllm
2928
from vllm.config import VllmConfig
3029
from vllm.distributed import get_tensor_model_parallel_world_size
3130
from vllm.distributed.parallel_state import get_dp_group
@@ -373,11 +372,10 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
373372
torch.npu.set_compile_mode(jit_compile=False)
374373
if not self.use_cached_npu_graph:
375374
npu_backend = torchair.get_npu_backend(compiler_config=config)
376-
self.torchair_compiled_model = torch.compile(
377-
self.model,
378-
dynamic=True,
379-
fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
380-
backend=npu_backend)
375+
self.torchair_compiled_model = torch.compile(self.model,
376+
dynamic=True,
377+
fullgraph=True,
378+
backend=npu_backend)
381379
return self.torchair_compiled_model
382380
else:
383381
# Generate a new forward proxy code object to prevent the invalidation of
@@ -399,7 +397,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
399397
batch_size] = torchair.inference.cache_compile(
400398
self.model.__dict__[forward_proxy_name],
401399
dynamic=True,
402-
fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
400+
fullgraph=True,
403401
cache_dir=TORCHAIR_CACHE_DIR,
404402
config=config,
405403
ge_cache=False)

0 commit comments

Comments
 (0)