Skip to content

Commit 262f740

Browse files
kylesayrslulmer
authored andcommitted
[Quant] Add SupportsQuant to phi3 and clip (vllm-project#13104)
Signed-off-by: Louis Ulmer <[email protected]>
1 parent 8510e9d commit 262f740

25 files changed

+67
-13
lines changed

vllm/model_executor/layers/quantization/aqlm.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def __init__(
169169
num_codebooks: int,
170170
out_group_size: int,
171171
) -> None:
172+
super().__init__()
172173
self.in_group_size = in_group_size
173174
self.nbits_per_codebook = nbits_per_codebook
174175
self.num_codebooks = num_codebooks

vllm/model_executor/layers/quantization/awq.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def __init__(
2626
zero_point: bool,
2727
modules_to_not_convert: Optional[List[str]] = None,
2828
) -> None:
29+
super().__init__()
2930
self.weight_bits = weight_bits
3031
self.group_size = group_size
3132
self.zero_point = zero_point

vllm/model_executor/layers/quantization/awq_marlin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def __init__(self, weight_bits: int, group_size: int, zero_point: bool,
4747
lm_head_quantized: bool,
4848
modules_to_not_convert: Optional[List[str]],
4949
full_config: Dict[str, Any]) -> None:
50+
super().__init__()
5051
self.pack_factor = 32 // weight_bits # packed into int32
5152
self.group_size = group_size
5253
self.zero_point = zero_point

vllm/model_executor/layers/quantization/base_config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import inspect
44
from abc import ABC, abstractmethod
5-
from typing import Any, Dict, List, Mapping, Optional, Type
5+
from typing import Any, Dict, List, Optional, Type
66

77
import torch
88
from torch import nn
@@ -59,7 +59,11 @@ def method_has_implemented_embedding(
5959

6060
class QuantizationConfig(ABC):
6161
"""Base class for quantization configs."""
62-
packed_modules_mapping: Mapping[str, List[str]] = dict()
62+
63+
def __init__(self):
64+
super().__init__()
65+
# mapping is updated by models as they initialize
66+
self.packed_modules_mapping: Dict[str, List[str]] = dict()
6367

6468
@abstractmethod
6569
def get_name(self) -> str:

vllm/model_executor/layers/quantization/bitsandbytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(
3030
llm_int8_skip_modules: Optional[List[str]] = None,
3131
llm_int8_threshold: float = 6.0,
3232
) -> None:
33-
33+
super().__init__()
3434
self.load_in_8bit = load_in_8bit
3535
self.load_in_4bit = load_in_4bit
3636
self.bnb_4bit_compute_dtype = bnb_4bit_compute_dtype

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def __init__(
5151
kv_cache_scheme: Optional[Dict[str, Any]] = None,
5252
config: Optional[Dict[str, Any]] = None,
5353
):
54-
54+
super().__init__()
5555
self.ignore = ignore
5656
self.quant_format = quant_format
5757
# Map from [target -> scheme]

vllm/model_executor/layers/quantization/deepspeedfp.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def __init__(
2525
weight_bits: int = 8,
2626
group_size: int = 512,
2727
) -> None:
28+
super().__init__()
2829
self.weight_bits = weight_bits
2930
self.group_size = group_size
3031
self.valid_types = [torch.bfloat16, torch.float16]

vllm/model_executor/layers/quantization/experts_int8.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class ExpertsInt8Config(QuantizationConfig):
1717
"""Config class for Int8 experts quantization."""
1818

1919
def __init__(self) -> None:
20-
pass
20+
super().__init__()
2121

2222
@classmethod
2323
def get_name(cls) -> str:

vllm/model_executor/layers/quantization/fbgemm_fp8.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class FBGEMMFp8Config(QuantizationConfig):
2929
"""Config class for FBGEMM Fp8."""
3030

3131
def __init__(self, ignore_list: List[str], input_scale_ub: float):
32+
super().__init__()
3233
self.ignore_list = ignore_list if ignore_list else []
3334
self.input_scale_ub = input_scale_ub
3435

vllm/model_executor/layers/quantization/fp8.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def __init__(
4747
ignored_layers: Optional[List[str]] = None,
4848
weight_block_size: Optional[List[int]] = None,
4949
) -> None:
50+
super().__init__()
5051
self.is_checkpoint_fp8_serialized = is_checkpoint_fp8_serialized
5152
if is_checkpoint_fp8_serialized:
5253
logger.warning("Detected fp8 checkpoint. Please note that the "

0 commit comments

Comments
 (0)