We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2e0e017 commit a2d2acbCopy full SHA for a2d2acb
vllm/attention/backends/blocksparse_attn.py
@@ -89,8 +89,7 @@ class BlocksparseFlashAttentionBackend(AttentionBackend):
89
90
@staticmethod
91
def get_name() -> str:
92
- # For attention layer compatibility
93
- return "FLASH_ATTN"
+ return "BLOCK_SPARSE_FLASH_ATTN"
94
95
96
def get_impl_cls() -> Type["BlocksparseFlashAttentionImpl"]:
vllm/platforms/interface.py
@@ -33,6 +33,7 @@ class _Backend(enum.Enum):
33
HPU_ATTN = enum.auto()
34
PALLAS = enum.auto()
35
IPEX = enum.auto()
36
+ BLOCK_SPARSE_FLASH_ATTN = enum.auto()
37
NO_ATTENTION = enum.auto()
38
39
0 commit comments