Skip to content

Commit ddf3449

Browse files
committed
v1: Set num_cpu_blocks on VllmConfig
This commit sets the vllm_config.cache_config.num_cpu_blocks according to vllm_config.cache_config.swap_space. Signed-off-by: Or Ozeri <[email protected]>
1 parent 6adaed4 commit ddf3449

File tree

3 files changed

+15
-2
lines changed

3 files changed

+15
-2
lines changed

vllm/v1/core/kv_cache_utils.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,7 @@ def _get_kv_cache_config_uniform_type(vllm_config: VllmConfig,
843843
kv_cache_tensors=kv_cache_tensors,
844844
kv_cache_groups=create_kv_cache_group_specs(kv_cache_spec,
845845
grouped_layer_names),
846+
kv_bytes_per_block=len(kv_cache_tensors) * page_size,
846847
)
847848

848849
num_tokens = num_blocks * vllm_config.cache_config.block_size
@@ -1003,6 +1004,7 @@ def _get_kv_cache_config_uniform_page_size(
10031004
num_blocks=num_blocks,
10041005
kv_cache_tensors=kv_cache_tensors,
10051006
kv_cache_groups=kv_cache_groups,
1007+
kv_bytes_per_block=len(kv_cache_tensors) * page_size,
10061008
)
10071009

10081010
min_block_size = min(
@@ -1021,7 +1023,10 @@ def _get_kv_cache_config_uniform_page_size(
10211023

10221024

10231025
def _get_kv_cache_config_attention_free() -> KVCacheConfig:
1024-
return KVCacheConfig(num_blocks=1, kv_cache_tensors=[], kv_cache_groups=[])
1026+
return KVCacheConfig(num_blocks=1,
1027+
kv_cache_tensors=[],
1028+
kv_cache_groups=[],
1029+
kv_bytes_per_block=0)
10251030

10261031

10271032
def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]):
@@ -1149,7 +1154,12 @@ def unify_kv_cache_configs(kv_cache_configs: list[KVCacheConfig]):
11491154
# first `num_blocks` blocks of the tensor.
11501155
min_num_blocks = min(kv_cache_config.num_blocks
11511156
for kv_cache_config in kv_cache_configs)
1157+
kv_bytes_per_block = sum([
1158+
kv_cache_config.kv_bytes_per_block
1159+
for kv_cache_config in kv_cache_configs
1160+
])
11521161
for kv_cache_config in kv_cache_configs:
11531162
kv_cache_config.num_blocks = min_num_blocks
1163+
kv_cache_config.kv_bytes_per_block = kv_bytes_per_block
11541164

11551165
return kv_cache_configs

vllm/v1/engine/core.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ def _initialize_kv_caches(
207207
for cfg in kv_cache_configs
208208
])
209209
num_gpu_blocks = kv_cache_configs[0].num_blocks
210-
num_cpu_blocks = 0
210+
num_cpu_blocks = (int(vllm_config.cache_config.swap_space_bytes) //
211+
kv_cache_configs[0].kv_bytes_per_block)
211212
scheduler_kv_cache_config = kv_cache_configs[0]
212213

213214
# Initialize kv cache and warmup the execution

vllm/v1/kv_cache_interface.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,5 @@ class KVCacheConfig:
264264
see `_get_kv_cache_config_uniform_page_size` for more details.
265265
"""
266266
kv_cache_groups: list[KVCacheGroupSpec]
267+
"""The number of KV bytes per block"""
268+
kv_bytes_per_block: int

0 commit comments

Comments
 (0)