@@ -1187,8 +1187,9 @@ def profile_run(self) -> None:
1187
1187
# NOTE: Currently model is profiled with a single non-text
1188
1188
# modality with the max possible input tokens even when
1189
1189
# it supports multiple.
1190
- max_tokens_by_modality_dict = MULTIMODAL_REGISTRY .get_max_tokens_per_item_by_nonzero_modality ( # noqa: E501
1191
- self .model_config )
1190
+ max_tokens_by_modality_dict = (
1191
+ MULTIMODAL_REGISTRY .
1192
+ get_max_tokens_per_item_by_nonzero_modality (self .model_config ))
1192
1193
dummy_data_modality , max_tokens_per_mm_item = max (
1193
1194
max_tokens_by_modality_dict .items (), key = lambda item : item [1 ])
1194
1195
@@ -1275,15 +1276,15 @@ def profile_run(self) -> None:
1275
1276
# maximum num_tokens.
1276
1277
num_reqs = self .scheduler_config .max_num_seqs
1277
1278
num_tokens = self .max_num_tokens
1278
- min_tokens_per_req : int = num_tokens // num_reqs
1279
+ min_tokens_per_req = num_tokens // num_reqs
1279
1280
1280
- num_scheduled_tokens_list : List [ int ] = [min_tokens_per_req ] * num_reqs
1281
+ num_scheduled_tokens_list = [min_tokens_per_req ] * num_reqs
1281
1282
num_scheduled_tokens_list [- 1 ] += num_tokens % num_reqs
1282
1283
assert sum (num_scheduled_tokens_list ) == num_tokens
1283
1284
assert len (num_scheduled_tokens_list ) == num_reqs
1284
1285
1285
- num_scheduled_tokens : np . ndarray = np .array (num_scheduled_tokens_list ,
1286
- dtype = np .int32 )
1286
+ num_scheduled_tokens = np .array (num_scheduled_tokens_list ,
1287
+ dtype = np .int32 )
1287
1288
logit_indices = np .cumsum (num_scheduled_tokens ) - 1
1288
1289
1289
1290
with self .maybe_profile_with_lora (self .lora_config ,
0 commit comments