Remove unnecessary flags and tests

csahithi · csahithi · commit e79996610caf · 2025-09-02T16:23:23.000-07:00
Signed-off-by: Sahithi Chigurupati &lt;chigurupati.sahithi@gmail.com&gt;
diff --git a/tests/entrypoints/openai/basic_tests/conftest.py b/tests/entrypoints/openai/basic_tests/conftest.py
@@ -8,7 +8,6 @@
 BASIC_SERVER_ARGS = [
     "--dtype", "bfloat16", "--max-model-len", "1024", "--enforce-eager",
     "--max-num-seqs", "32", "--gpu-memory-utilization", "0.7",
-    "--disable-log-stats", "--disable-log-requests",
     "--enable-server-load-tracking", "--chat-template",
     "{% for message in messages %}{{message['role'] + ': ' \
     + message['content'] + '\\n'}}{% endfor %}", "--enable-auto-tool-choice",
diff --git a/tests/entrypoints/openai/embedding_tests/conftest.py b/tests/entrypoints/openai/embedding_tests/conftest.py
@@ -9,8 +9,7 @@
 UNIVERSAL_EMBEDDING_ARGS = [
     "--runner", "pooling", "--dtype", "bfloat16", "--enforce-eager",
     "--max-model-len", "512", "--gpu-memory-utilization", "0.7",
-    "--max-num-seqs", "4", "--disable-log-stats", "--disable-log-requests",
-    "--chat-template", DUMMY_CHAT_TEMPLATE
+    "--max-num-seqs", "4", "--chat-template", DUMMY_CHAT_TEMPLATE
 ]
 
 
diff --git a/tests/entrypoints/openai/embedding_tests/test_encoder_decoder.py b/tests/entrypoints/openai/embedding_tests/test_encoder_decoder.py
diff --git a/tests/entrypoints/openai/embedding_tests/test_optional_middleware.py b/tests/entrypoints/openai/embedding_tests/test_optional_middleware.py
@@ -15,7 +15,7 @@
 # Use a small embeddings model for faster startup and smaller memory footprint.
 # Since we are not testing any chat functionality,
 # using a chat capable model is overkill.
-MODEL_NAME = "intfloat/multilingual-e5-small"
+MODEL_NAME = "hmellor/tiny-random-LlamaForCausalLM"
 
 
 @pytest.fixture(scope="module")
@@ -27,12 +27,7 @@ def server(request: pytest.FixtureRequest, embedding_server):
         passed_params = [passed_params]
 
     if passed_params:
-        args = [
-            "--runner", "pooling", "--dtype", "bfloat16", "--enforce-eager",
-            "--max-model-len", "512", "--max-num-seqs", "4",
-            "--gpu-memory-utilization", "0.7", "--disable-log-stats",
-            "--disable-log-requests", *passed_params
-        ]
+        args = ["--enforce-eager", *passed_params]
         with RemoteOpenAIServer(MODEL_NAME, args) as custom_server:
             yield custom_server
     else:
diff --git a/tests/entrypoints/openai/individual_tests/test_metrics.py b/tests/entrypoints/openai/individual_tests/test_metrics.py
@@ -39,7 +39,6 @@ def default_server_args():
 @pytest.fixture(scope="module",
                 params=[
                     "",
-                    "--disable-frontend-multiprocessing",
                     f"--show-hidden-metrics-for-version={PREV_MINOR_VERSION}",
                 ])
 def server(default_server_args, request):
diff --git a/tests/entrypoints/openai/lora_tests/test_default_mm_loras.py b/tests/entrypoints/openai/lora_tests/test_default_mm_loras.py
@@ -8,8 +8,8 @@
 import pytest_asyncio
 from huggingface_hub import snapshot_download
 
+from ....conftest import AudioTestAssets
 from ....utils import RemoteOpenAIServer
-from ...conftest import AudioTestAssets
 
 # NOTE - the tests in this module are currently analogous to test_chat, but are
 # separated to avoid OOM killing due to module-scoped servers, since we

Original file line number	Diff line number	Diff line change
`@@ -9,8 +9,7 @@`
`9`	`9`	`UNIVERSAL_EMBEDDING_ARGS = [`
`10`	`10`	`"--runner", "pooling", "--dtype", "bfloat16", "--enforce-eager",`
`11`	`11`	`"--max-model-len", "512", "--gpu-memory-utilization", "0.7",`
`12`		`- "--max-num-seqs", "4", "--disable-log-stats", "--disable-log-requests",`
`13`		`- "--chat-template", DUMMY_CHAT_TEMPLATE`
	`12`	`+ "--max-num-seqs", "4", "--chat-template", DUMMY_CHAT_TEMPLATE`
`14`	`13`	`]`
`15`	`14`
`16`	`15`