We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4e70d7c commit 44f56fdCopy full SHA for 44f56fd
vllm/config.py
@@ -1015,11 +1015,6 @@ def _verify_args(self) -> None:
1015
raise ValueError(
1016
"GPU memory utilization must be less than 1.0. Got "
1017
f"{self.gpu_memory_utilization}.")
1018
- from vllm.platforms import current_platform
1019
- if (current_platform.is_cuda() and self.block_size is not None
1020
- and self.block_size > 32):
1021
- raise ValueError("CUDA Paged Attention kernel only supports "
1022
- f"block sizes up to 32. Got {self.block_size}.")
1023
1024
def _verify_cache_dtype(self) -> None:
1025
if self.cache_dtype == "auto":
0 commit comments