Skip to content

Commit 8d1f70e

Browse files
russellb842974287
authored andcommitted
Upgrade xgrammar to 0.1.23 (vllm-project#22988)
Signed-off-by: Russell Bryant <[email protected]> Signed-off-by: Shiyan Deng <[email protected]>
1 parent 0f9a316 commit 8d1f70e

File tree

2 files changed

+2
-9
lines changed

2 files changed

+2
-9
lines changed

requirements/common.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ outlines == 0.1.11 ; platform_machine == "s390x"
2525
# required for outlines backend disk cache
2626
diskcache == 5.6.3
2727
lark == 1.2.2
28-
xgrammar == 0.1.21; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
28+
xgrammar == 0.1.23; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64"
2929
typing_extensions >= 4.10
3030
filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317
3131
partial-json-parser # used for parsing partial JSON outputs

vllm/v1/worker/gpu_model_runner.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,11 @@
9090

9191
if TYPE_CHECKING:
9292
import xgrammar as xgr
93-
import xgrammar.kernels.apply_token_bitmask_inplace_torch_compile as xgr_torch_compile # noqa: E501
9493

9594
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
9695
from vllm.v1.core.sched.output import SchedulerOutput
9796
else:
9897
xgr = LazyLoader("xgr", globals(), "xgrammar")
99-
xgr_torch_compile = LazyLoader(
100-
"xgr_torch_compile", globals(),
101-
"xgrammar.kernels.apply_token_bitmask_inplace_torch_compile")
10298

10399
logger = init_logger(__name__)
104100

@@ -1333,10 +1329,7 @@ def apply_grammar_bitmask(
13331329
# so we receive it in that format.
13341330
grammar_bitmask = torch.from_numpy(grammar_bitmask).contiguous()
13351331

1336-
# Force use of the torch.compile implementation from xgrammar to work
1337-
# around issues with the Triton kernel in concurrent structured output
1338-
# scenarios. See PR #19565 and issues #19493, #18376 for details.
1339-
xgr_torch_compile.apply_token_bitmask_inplace_torch_compile(
1332+
xgr.apply_token_bitmask_inplace(
13401333
logits,
13411334
grammar_bitmask.to(self.device, non_blocking=True),
13421335
indices=out_indices if not skip_out_indices else None,

0 commit comments

Comments
 (0)