Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion vllm/model_executor/layers/quantization/fp8.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,8 @@ def apply(self,
input_scale=layer.input_scale,
bias=bias,
cutlass_fp8_supported=self.cutlass_fp8_supported,
use_per_token_if_dynamic=False)
# Default to using per_token quantization if cutalss fp8 is supported.
use_per_token_if_dynamic=self.cutlass_fp8_supported)


class Fp8MoEMethod(FusedMoEMethodBase):
Expand Down
Loading