File tree Expand file tree Collapse file tree 2 files changed +2
-9
lines changed Expand file tree Collapse file tree 2 files changed +2
-9
lines changed Original file line number Diff line number Diff line change @@ -25,7 +25,7 @@ outlines == 0.1.11 ; platform_machine == "s390x"
25
25
# required for outlines backend disk cache
26
26
diskcache == 5.6.3
27
27
lark == 1.2.2
28
- xgrammar == 0.1.21 ; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
28
+ xgrammar == 0.1.23 ; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
29
29
typing_extensions >= 4.10
30
30
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
31
31
partial-json-parser # used for parsing partial JSON outputs
Original file line number Diff line number Diff line change 90
90
91
91
if TYPE_CHECKING :
92
92
import xgrammar as xgr
93
- import xgrammar .kernels .apply_token_bitmask_inplace_torch_compile as xgr_torch_compile # noqa: E501
94
93
95
94
from vllm .model_executor .model_loader .tensorizer import TensorizerConfig
96
95
from vllm .v1 .core .sched .output import SchedulerOutput
97
96
else :
98
97
xgr = LazyLoader ("xgr" , globals (), "xgrammar" )
99
- xgr_torch_compile = LazyLoader (
100
- "xgr_torch_compile" , globals (),
101
- "xgrammar.kernels.apply_token_bitmask_inplace_torch_compile" )
102
98
103
99
logger = init_logger (__name__ )
104
100
@@ -1333,10 +1329,7 @@ def apply_grammar_bitmask(
1333
1329
# so we receive it in that format.
1334
1330
grammar_bitmask = torch .from_numpy (grammar_bitmask ).contiguous ()
1335
1331
1336
- # Force use of the torch.compile implementation from xgrammar to work
1337
- # around issues with the Triton kernel in concurrent structured output
1338
- # scenarios. See PR #19565 and issues #19493, #18376 for details.
1339
- xgr_torch_compile .apply_token_bitmask_inplace_torch_compile (
1332
+ xgr .apply_token_bitmask_inplace (
1340
1333
logits ,
1341
1334
grammar_bitmask .to (self .device , non_blocking = True ),
1342
1335
indices = out_indices if not skip_out_indices else None ,
You can’t perform that action at this time.
0 commit comments