Skip to content

Commit d8b736f

Browse files
russellbtaneem-ibrahim
authored andcommitted
Limit HTTP header count and size (#23267)
Signed-off-by: Taneem Ibrahim <[email protected]> Signed-off-by: Russell Bryant <[email protected]> Co-authored-by: Taneem Ibrahim <[email protected]> Signed-off-by: simon-mo <[email protected]>
1 parent 3a8708f commit d8b736f

File tree

4 files changed

+41
-0
lines changed

4 files changed

+41
-0
lines changed

vllm/entrypoints/constants.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""
4+
Shared constants for vLLM entrypoints.
5+
"""
6+
7+
# HTTP header limits for h11 parser
8+
# These constants help mitigate header abuse attacks
9+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT = 4194304 # 4 MB
10+
H11_MAX_HEADER_COUNT_DEFAULT = 256

vllm/entrypoints/launcher.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from vllm.engine.async_llm_engine import AsyncEngineDeadError
1515
from vllm.engine.multiprocessing import MQEngineDeadError
1616
from vllm.engine.protocol import EngineClient
17+
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
18+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
1719
from vllm.entrypoints.ssl import SSLCertRefresher
1820
from vllm.logger import init_logger
1921
from vllm.utils import find_process_using_port
@@ -26,6 +28,11 @@ async def serve_http(app: FastAPI,
2628
sock: Optional[socket.socket],
2729
enable_ssl_refresh: bool = False,
2830
**uvicorn_kwargs: Any):
31+
"""
32+
Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
33+
options. Supports http header limits via h11_max_incomplete_event_size and
34+
h11_max_header_count.
35+
"""
2936
logger.info("Available routes are:")
3037
for route in app.routes:
3138
methods = getattr(route, "methods", None)
@@ -36,7 +43,21 @@ async def serve_http(app: FastAPI,
3643

3744
logger.info("Route: %s, Methods: %s", path, ', '.join(methods))
3845

46+
# Extract header limit options if present
47+
h11_max_incomplete_event_size = uvicorn_kwargs.pop(
48+
"h11_max_incomplete_event_size", None)
49+
h11_max_header_count = uvicorn_kwargs.pop("h11_max_header_count", None)
50+
51+
# Set safe defaults if not provided
52+
if h11_max_incomplete_event_size is None:
53+
h11_max_incomplete_event_size = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
54+
if h11_max_header_count is None:
55+
h11_max_header_count = H11_MAX_HEADER_COUNT_DEFAULT
56+
3957
config = uvicorn.Config(app, **uvicorn_kwargs)
58+
# Set header limits
59+
config.h11_max_incomplete_event_size = h11_max_incomplete_event_size
60+
config.h11_max_header_count = h11_max_header_count
4061
config.load()
4162
server = uvicorn.Server(config)
4263
_add_shutdown_handlers(app, server)

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1894,6 +1894,8 @@ async def run_server_worker(listen_address,
18941894
ssl_certfile=args.ssl_certfile,
18951895
ssl_ca_certs=args.ssl_ca_certs,
18961896
ssl_cert_reqs=args.ssl_cert_reqs,
1897+
h11_max_incomplete_event_size=args.h11_max_incomplete_event_size,
1898+
h11_max_header_count=args.h11_max_header_count,
18971899
**uvicorn_kwargs,
18981900
)
18991901

vllm/entrypoints/openai/cli_args.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
2121
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
2222
validate_chat_template)
23+
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
24+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
2325
from vllm.entrypoints.openai.serving_models import LoRAModulePath
2426
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
2527
from vllm.logger import init_logger
@@ -172,6 +174,12 @@ class FrontendArgs:
172174
enable_log_outputs: bool = False
173175
"""If set to True, enable logging of model outputs (generations)
174176
in addition to the input logging that is enabled by default."""
177+
h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
178+
"""Maximum size (bytes) of an incomplete HTTP event (header or body) for
179+
h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB)."""
180+
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
181+
"""Maximum number of HTTP headers allowed in a request for h11 parser.
182+
Helps mitigate header abuse. Default: 256."""
175183

176184
@staticmethod
177185
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

0 commit comments

Comments
 (0)