File tree Expand file tree Collapse file tree 2 files changed +8
-4
lines changed Expand file tree Collapse file tree 2 files changed +8
-4
lines changed Original file line number Diff line number Diff line change 16
16
NUM_PROMPTS = [10 ]
17
17
18
18
DEFAULT_SERVER_ARGS : List [str ] = [
19
- "--disable-log-requests" ,
20
19
"--worker-use-ray" ,
21
20
"--gpu-memory-utilization" ,
22
21
"0.85" ,
@@ -110,7 +109,7 @@ async def test_multi_step(
110
109
111
110
# Spin up client/server & issue completion API requests.
112
111
# Default `max_wait_seconds` is 240 but was empirically
113
- # was raised 3x to 720 *just for this test* due to
112
+ # was raised 5x to 1200 *just for this test* due to
114
113
# observed timeouts in GHA CI
115
114
ref_completions = await completions_with_server_args (
116
115
prompts ,
Original file line number Diff line number Diff line change @@ -157,13 +157,19 @@ def url_root(self) -> str:
157
157
def url_for (self , * parts : str ) -> str :
158
158
return self .url_root + "/" + "/" .join (parts )
159
159
160
- def get_client (self ):
160
+ def get_client (self , ** kwargs ):
161
+ if "timeout" not in kwargs :
162
+ kwargs ["timeout" ] = 600
161
163
return openai .OpenAI (
162
164
base_url = self .url_for ("v1" ),
163
165
api_key = self .DUMMY_API_KEY ,
166
+ max_retries = 0 ,
167
+ ** kwargs ,
164
168
)
165
169
166
170
def get_async_client (self , ** kwargs ):
171
+ if "timeout" not in kwargs :
172
+ kwargs ["timeout" ] = 600
167
173
return openai .AsyncOpenAI (base_url = self .url_for ("v1" ),
168
174
api_key = self .DUMMY_API_KEY ,
169
175
max_retries = 0 ,
@@ -780,7 +786,6 @@ async def completions_with_server_args(
780
786
assert len (max_tokens ) == len (prompts )
781
787
782
788
outputs = None
783
- max_wait_seconds = 240 * 3 # 240 is default
784
789
with RemoteOpenAIServer (model_name ,
785
790
server_cli_args ,
786
791
max_wait_seconds = max_wait_seconds ) as server :
You can’t perform that action at this time.
0 commit comments