File tree Expand file tree Collapse file tree 2 files changed +14
-1
lines changed
models/language/generation Expand file tree Collapse file tree 2 files changed +14
-1
lines changed Original file line number Diff line number Diff line change @@ -129,6 +129,18 @@ if [ $? -ne 0 ]; then
129
129
fi
130
130
echo " Test with granite-8b passed"
131
131
132
+ # used to check asynchronous scheduling
133
+ echo " Testing GSM8K on ganite-8b with async scheduling"
134
+ echo VLLM_CONTIGUOUS_PA=False VLLM_SKIP_WARMUP=True PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 ASYNC_SCHEDULING=1 \
135
+ pytest -v -s vllm-gaudi/tests/models/language/generation/test_common.py --model_card_path vllm-gaudi/tests/full_tests/model_cards/granite-8b.yaml
136
+ VLLM_CONTIGUOUS_PA=False VLLM_SKIP_WARMUP=True PT_HPU_LAZY_MODE=1 VLLM_USE_V1=1 ASYNC_SCHEDULING=1 \
137
+ pytest -v -s vllm-gaudi/tests/models/language/generation/test_common.py --model_card_path vllm-gaudi/tests/full_tests/model_cards/granite-8b.yaml
138
+ if [ $? -ne 0 ]; then
139
+ echo " Error: Test failed for granite-8b + async_scheduling" >&2
140
+ exit -1
141
+ fi
142
+ echo " Test with granite-8b + async_scheduling passed"
143
+
132
144
# used to check MLA + MOE
133
145
echo " Testing GSM8K on deepseek v2 lite"
134
146
# deepseek-R1
Original file line number Diff line number Diff line change @@ -23,10 +23,11 @@ def launch_lm_eval(eval_config):
23
23
enforce_eager = os .environ .get ('ENFORCE_EAGER' , 'False' ).lower () in ['true' , '1' ]
24
24
kv_cache_dtype = os .environ .get ('KV_CACHE_DTYPE' , None )
25
25
task = eval_config .get ('tasks' , 'gsm8k' )
26
+ async_scheduling = os .environ .get ('ASYNC_SCHEDULING' , 'False' ).lower () in ['true' , '1' ]
26
27
model_args = {
27
28
'pretrained' : eval_config ['model_name' ],
28
29
'tensor_parallel_size' : tp_size ,
29
- 'async_scheduling' : True ,
30
+ 'async_scheduling' : async_scheduling ,
30
31
'enforce_eager' : enforce_eager ,
31
32
'enable_prefix_caching' : enable_apc ,
32
33
'add_bos_token' : True ,
You can’t perform that action at this time.
0 commit comments