File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed
vllm/model_executor/model_loader Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -115,10 +115,16 @@ def model(x):
115
115
116
116
117
117
@fork_new_process_for_each_test
118
- def test_end_to_end ():
118
+ @pytest .mark .parametrize (
119
+ "model" ,
120
+ [
121
+ "meta-llama/Llama-3.2-1B" , # sleep mode with safetensors
122
+ "facebook/opt-125m" # sleep mode with pytorch checkpoint
123
+ ])
124
+ def test_end_to_end (model ):
119
125
free , total = torch .cuda .mem_get_info ()
120
126
used_bytes_baseline = total - free # in case other process is running
121
- llm = LLM ("meta-llama/Llama-3.2-1B" , enable_sleep_mode = True )
127
+ llm = LLM (model , enable_sleep_mode = True )
122
128
prompt = "How are you?"
123
129
sampling_params = SamplingParams (temperature = 0 , max_tokens = 10 )
124
130
output = llm .generate (prompt , sampling_params )
Original file line number Diff line number Diff line change @@ -462,7 +462,6 @@ def pt_weights_iterator(
462
462
state = torch .load (bin_file , map_location = "cpu" , weights_only = True )
463
463
yield from state .items ()
464
464
del state
465
- torch .cuda .empty_cache ()
466
465
467
466
468
467
def get_gguf_extra_tensor_names (
You can’t perform that action at this time.
0 commit comments