Skip to content

Commit eeff9b3

Browse files
youkaichaolulmer
authored andcommitted
[core] fix sleep mode and pytorch checkpoint compatibility (vllm-project#13001)
Signed-off-by: youkaichao <[email protected]> Signed-off-by: Louis Ulmer <[email protected]>
1 parent 6e07e81 commit eeff9b3

File tree

2 files changed

+8
-3
lines changed

2 files changed

+8
-3
lines changed

tests/basic_correctness/test_cumem.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,10 +115,16 @@ def model(x):
115115

116116

117117
@fork_new_process_for_each_test
118-
def test_end_to_end():
118+
@pytest.mark.parametrize(
119+
"model",
120+
[
121+
"meta-llama/Llama-3.2-1B", # sleep mode with safetensors
122+
"facebook/opt-125m" # sleep mode with pytorch checkpoint
123+
])
124+
def test_end_to_end(model):
119125
free, total = torch.cuda.mem_get_info()
120126
used_bytes_baseline = total - free # in case other process is running
121-
llm = LLM("meta-llama/Llama-3.2-1B", enable_sleep_mode=True)
127+
llm = LLM(model, enable_sleep_mode=True)
122128
prompt = "How are you?"
123129
sampling_params = SamplingParams(temperature=0, max_tokens=10)
124130
output = llm.generate(prompt, sampling_params)

vllm/model_executor/model_loader/weight_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,6 @@ def pt_weights_iterator(
462462
state = torch.load(bin_file, map_location="cpu", weights_only=True)
463463
yield from state.items()
464464
del state
465-
torch.cuda.empty_cache()
466465

467466

468467
def get_gguf_extra_tensor_names(

0 commit comments

Comments
 (0)