vllm-project
diff --git a/‎.buildkite/test-pipeline.yaml‎
Lines changed: 12 additions & 2 deletions b/‎.buildkite/test-pipeline.yaml‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎tests/entrypoints/openai/basic_tests/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎tests/entrypoints/openai/basic_tests/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/entrypoints/openai/basic_tests/conftest.py‎
Lines changed: 24 additions & 0 deletions b/‎tests/entrypoints/openai/basic_tests/conftest.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎tests/entrypoints/openai/test_basic.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_basic.py‎
Lines changed: 27 additions & 106 deletions b/‎tests/entrypoints/openai/test_basic.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_basic.py‎
Lines changed: 27 additions & 106 deletions
diff --git a/‎tests/entrypoints/openai/test_chat_echo.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_chat_echo.py‎
Lines changed: 1 addition & 18 deletions b/‎tests/entrypoints/openai/test_chat_echo.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_chat_echo.py‎
Lines changed: 1 addition & 18 deletions
diff --git a/‎tests/entrypoints/openai/test_chat_logit_bias_validation.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_chat_logit_bias_validation.py‎
Lines changed: 1 addition & 17 deletions b/‎tests/entrypoints/openai/test_chat_logit_bias_validation.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_chat_logit_bias_validation.py‎
Lines changed: 1 addition & 17 deletions
diff --git a/‎tests/entrypoints/openai/test_chat_template.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_chat_template.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/entrypoints/openai/test_chat_template.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_chat_template.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/entrypoints/openai/test_cli_args.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_cli_args.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/entrypoints/openai/test_cli_args.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_cli_args.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/entrypoints/openai/test_return_token_ids.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_return_token_ids.py‎
Lines changed: 1 addition & 19 deletions b/‎tests/entrypoints/openai/test_return_token_ids.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_return_token_ids.py‎
Lines changed: 1 addition & 19 deletions
diff --git a/‎tests/entrypoints/openai/test_serving_chat.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_serving_chat.py‎
Lines changed: 1 addition & 1 deletion b/‎tests/entrypoints/openai/test_serving_chat.py‎ renamed to ‎tests/entrypoints/openai/basic_tests/test_serving_chat.py‎
Lines changed: 1 addition & 1 deletion
@@ -122,12 +122,22 @@ steps:
   torch_nightly: true
   source_file_dependencies:
   - vllm/
-  - tests/entrypoints/openai
+  - tests/entrypoints/openai/basic_tests
+  - tests/entrypoints/openai/embedding_tests
+  - tests/entrypoints/openai/individual_tests
+  - tests/entrypoints/openai/lora_tests
+  - tests/entrypoints/openai/multimodal_tests
+  - tests/entrypoints/openai/tool_parsers
   - tests/entrypoints/test_chat_utils
   commands:
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - PYTHONPATH=/vllm-workspace pytest -v -s entrypoints/openai/test_collective_rpc.py # PYTHONPATH is needed to import custom Worker extension
-  - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py
+  - pytest -v -s entrypoints/openai/basic_tests/
+  - pytest -v -s entrypoints/openai/embedding_tests/
+  - pytest -v -s entrypoints/openai/individual_tests/
+  - pytest -v -s entrypoints/openai/lora_tests/
+  - pytest -v -s entrypoints/openai/multimodal_tests/
+  - pytest -v -s entrypoints/openai/tool_parsers/
   - pytest -v -s entrypoints/test_chat_utils.py
 
 - label: Distributed Tests (4 GPUs) # 10min
 
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# Basic API tests package
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from ....utils import RemoteOpenAIServer
+
+BASIC_SERVER_ARGS = [
+    "--dtype", "bfloat16", "--max-model-len", "1024", "--enforce-eager",
+    "--max-num-seqs", "32", "--gpu-memory-utilization", "0.7",
+    "--disable-log-stats", "--disable-log-requests",
+    "--enable-server-load-tracking", "--chat-template",
+    "{% for message in messages %}{{message['role'] + ': ' \
+    + message['content'] + '\\n'}}{% endfor %}", "--enable-auto-tool-choice",
+    "--tool-call-parser", "hermes", "--trust-remote-code"
+]
+
+
+@pytest.fixture(scope="package")
+def server():
+    with RemoteOpenAIServer("microsoft/DialoGPT-small",
+                            BASIC_SERVER_ARGS,
+                            max_wait_seconds=120) as server:
+        yield server
@@ -11,63 +11,7 @@
 
 from vllm.version import __version__ as VLLM_VERSION
 
-from ...utils import RemoteOpenAIServer
-
-MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
-
-
-@pytest.fixture(scope='module')
-def server_args(request: pytest.FixtureRequest) -> list[str]:
-    """ Provide extra arguments to the server via indirect parametrization
-
-    Usage:
-
-    >>> @pytest.mark.parametrize(
-    >>>     "server_args",
-    >>>     [
-    >>>         ["--disable-frontend-multiprocessing"],
-    >>>         [
-    >>>             "--model=NousResearch/Hermes-3-Llama-3.1-70B",
-    >>>             "--enable-auto-tool-choice",
-    >>>         ],
-    >>>     ],
-    >>>     indirect=True,
-    >>> )
-    >>> def test_foo(server, client):
-    >>>     ...
-
-    This will run `test_foo` twice with servers with:
-    - `--disable-frontend-multiprocessing`
-    - `--model=NousResearch/Hermes-3-Llama-3.1-70B --enable-auto-tool-choice`.
-
-    """
-    if not hasattr(request, "param"):
-        return []
-
-    val = request.param
-
-    if isinstance(val, str):
-        return [val]
-
-    return request.param
-
-
-@pytest.fixture(scope="module")
-def server(server_args):
-    args = [
-        # use half precision for speed and memory savings in CI environment
-        "--dtype",
-        "bfloat16",
-        "--max-model-len",
-        "8192",
-        "--enforce-eager",
-        "--max-num-seqs",
-        "128",
-        *server_args,
-    ]
-
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
-        yield remote_server
+MODEL_NAME = "microsoft/DialoGPT-small"
 
 
 @pytest_asyncio.fixture
@@ -76,52 +20,23 @@ async def client(server):
         yield async_client
 
 
-@pytest.mark.parametrize(
-    "server_args",
-    [
-        pytest.param([], id="default-frontend-multiprocessing"),
-        pytest.param(["--disable-frontend-multiprocessing"],
-                     id="disable-frontend-multiprocessing")
-    ],
-    indirect=True,
-)
 @pytest.mark.asyncio
-async def test_show_version(server: RemoteOpenAIServer):
+async def test_show_version(server):
     response = requests.get(server.url_for("version"))
     response.raise_for_status()
 
     assert response.json() == {"version": VLLM_VERSION}
 
 
-@pytest.mark.parametrize(
-    "server_args",
-    [
-        pytest.param([], id="default-frontend-multiprocessing"),
-        pytest.param(["--disable-frontend-multiprocessing"],
-                     id="disable-frontend-multiprocessing")
-    ],
-    indirect=True,
-)
 @pytest.mark.asyncio
-async def test_check_health(server: RemoteOpenAIServer):
+async def test_check_health(server):
     response = requests.get(server.url_for("health"))
 
     assert response.status_code == HTTPStatus.OK
 
 
-@pytest.mark.parametrize(
-    "server_args",
-    [
-        pytest.param(["--max-model-len", "10100"],
-                     id="default-frontend-multiprocessing"),
-        pytest.param(
-            ["--disable-frontend-multiprocessing", "--max-model-len", "10100"],
-            id="disable-frontend-multiprocessing")
-    ],
-    indirect=True,
-)
 @pytest.mark.asyncio
-async def test_request_cancellation(server: RemoteOpenAIServer):
+async def test_request_cancellation(server):
     # clunky test: send an ungodly amount of load in with short timeouts
     # then ensure that it still responds quickly afterwards
 
@@ -159,7 +74,7 @@ async def test_request_cancellation(server: RemoteOpenAIServer):
 
 
 @pytest.mark.asyncio
-async def test_request_wrong_content_type(server: RemoteOpenAIServer):
+async def test_request_wrong_content_type(server):
 
     chat_input = [{"role": "user", "content": "Write a long story"}]
     client = server.get_async_client()
@@ -174,38 +89,44 @@ async def test_request_wrong_content_type(server: RemoteOpenAIServer):
             })
 
 
-@pytest.mark.parametrize(
-    "server_args",
-    [
-        pytest.param(["--enable-server-load-tracking"],
-                     id="enable-server-load-tracking")
-    ],
-    indirect=True,
-)
 @pytest.mark.asyncio
-async def test_server_load(server: RemoteOpenAIServer):
+async def test_server_load(server):
     # Check initial server load
     response = requests.get(server.url_for("load"))
     assert response.status_code == HTTPStatus.OK
-    assert response.json().get("server_load") == 0
+    initial_load = response.json().get("server_load")
+    print(f"Initial server load: {initial_load}")
+    assert initial_load == 0, f"Expected initial \
+        server_load to be 0, but got {initial_load}"
 
     def make_long_completion_request():
         return requests.post(
-            server.url_for("v1/completions"),
+            server.url_for("v1/chat/completions"),
             headers={"Content-Type": "application/json"},
             json={
-                "prompt": "Give me a long story",
-                "max_tokens": 1000,
-                "temperature": 0,
+                "model":
+                MODEL_NAME,
+                "messages": [{
+                    "role":
+                    "user",
+                    "content":
+                    "Give me a very long story with many details"
+                }],
+                "max_tokens":
+                1000,
+                "temperature":
+                0,
+                "stream":
+                True,
             },
+            stream=True,
         )
 
     # Start the completion request in a background thread.
     completion_future = asyncio.create_task(
         asyncio.to_thread(make_long_completion_request))
 
-    # Give a short delay to ensure the request has started.
-    await asyncio.sleep(0.1)
+    await asyncio.sleep(0.5)
 
     # Check server load while the completion request is running.
     response = requests.get(server.url_for("load"))
 
@@ -7,25 +7,8 @@
 import pytest
 import pytest_asyncio
 
-from ...utils import RemoteOpenAIServer
-
 # # any model with a chat template should work here
-MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
-
-
-@pytest.fixture(scope="module")
-def server():
-    args = [
-        # use half precision for speed and memory savings in CI environment
-        "--dtype",
-        "float16",
-        "--enforce-eager",
-        "--max-model-len",
-        "4080",
-    ]
-
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
-        yield remote_server
+MODEL_NAME = "microsoft/DialoGPT-small"
 
 
 @pytest_asyncio.fixture
 
@@ -7,9 +7,7 @@
 
 from vllm.config import ModelConfig
 
-from ...utils import RemoteOpenAIServer
-
-MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
+MODEL_NAME = "microsoft/DialoGPT-small"
 
 
 def get_vocab_size(model_name):
@@ -21,20 +19,6 @@ def get_vocab_size(model_name):
     return config.get_vocab_size()
 
 
-@pytest.fixture(scope="module")
-def server():
-    args = [
-        "--dtype",
-        "bfloat16",
-        "--max-model-len",
-        "1024",
-        "--enforce-eager",
-    ]
-
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
-        yield remote_server
-
-
 @pytest_asyncio.fixture
 async def client(server):
     async with server.get_async_client() as async_client:
 
@@ -9,8 +9,8 @@
 from vllm.entrypoints.openai.protocol import ChatCompletionRequest
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-from ...models.registry import HF_EXAMPLE_MODELS
-from ...utils import VLLM_PATH
+from ....models.registry import HF_EXAMPLE_MODELS
+from ....utils import VLLM_PATH
 
 chatml_jinja_path = VLLM_PATH / "examples/template_chatml.jinja"
 assert chatml_jinja_path.exists()
 
@@ -10,7 +10,7 @@
 from vllm.entrypoints.openai.serving_models import LoRAModulePath
 from vllm.utils import FlexibleArgumentParser
 
-from ...utils import VLLM_PATH
+from ....utils import VLLM_PATH
 
 LORA_MODULE = {
     "name": "module2",
 
@@ -5,25 +5,7 @@
 
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-from ...utils import RemoteOpenAIServer
-
-MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"
-
-
-@pytest.fixture(scope="module")
-def server():
-    args = [
-        "--max-model-len",
-        "2048",
-        "--max-num-seqs",
-        "128",
-        "--enable-auto-tool-choice",
-        "--tool-call-parser",
-        "hermes",
-        "--enforce-eager",
-    ]
-    with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
-        yield remote_server
+MODEL_NAME = "microsoft/DialoGPT-small"
 
 
 @pytest.mark.asyncio
 
@@ -17,7 +17,7 @@
                                                     OpenAIServingModels)
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-MODEL_NAME = "openai-community/gpt2"
+MODEL_NAME = "microsoft/DialoGPT-small"
 CHAT_TEMPLATE = "Dummy chat template for testing {}"
 BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# SPDX-License-Identifier: Apache-2.0`
	`2`	`+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project`
	`3`	`+# Basic API tests package`