Skip to content

Commit 6346ee2

Browse files
Limit HTTP header count and size (vllm-project#23267)
Manually applied cherry-pick of commit d8b736f Signed-off-by: Taneem Ibrahim <[email protected]> Signed-off-by: Russell Bryant <[email protected]> Co-authored-by: Taneem Ibrahim <[email protected]> Signed-off-by: simon-mo <[email protected]>
1 parent 9e6754e commit 6346ee2

File tree

4 files changed

+52
-0
lines changed

4 files changed

+52
-0
lines changed

vllm/entrypoints/constants.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""
4+
Shared constants for vLLM entrypoints.
5+
"""
6+
7+
# HTTP header limits for h11 parser
8+
# These constants help mitigate header abuse attacks
9+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT = 4194304 # 4 MB
10+
H11_MAX_HEADER_COUNT_DEFAULT = 256

vllm/entrypoints/launcher.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,20 @@
99
from vllm import envs
1010
from vllm.engine.async_llm_engine import AsyncEngineDeadError
1111
from vllm.engine.multiprocessing import MQEngineDeadError
12+
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
13+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
1214
from vllm.logger import init_logger
1315
from vllm.utils import find_process_using_port
1416

1517
logger = init_logger(__name__)
1618

1719

1820
async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):
21+
"""
22+
Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
23+
options. Supports http header limits via h11_max_incomplete_event_size and
24+
h11_max_header_count.
25+
"""
1926
logger.info("Available routes are:")
2027
for route in app.routes:
2128
methods = getattr(route, "methods", None)
@@ -26,7 +33,21 @@ async def serve_http(app: FastAPI, **uvicorn_kwargs: Any):
2633

2734
logger.info("Route: %s, Methods: %s", path, ', '.join(methods))
2835

36+
# Extract header limit options if present
37+
h11_max_incomplete_event_size = uvicorn_kwargs.pop(
38+
"h11_max_incomplete_event_size", None)
39+
h11_max_header_count = uvicorn_kwargs.pop("h11_max_header_count", None)
40+
41+
# Set safe defaults if not provided
42+
if h11_max_incomplete_event_size is None:
43+
h11_max_incomplete_event_size = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
44+
if h11_max_header_count is None:
45+
h11_max_header_count = H11_MAX_HEADER_COUNT_DEFAULT
46+
2947
config = uvicorn.Config(app, **uvicorn_kwargs)
48+
# Set header limits
49+
config.h11_max_incomplete_event_size = h11_max_incomplete_event_size
50+
config.h11_max_header_count = h11_max_header_count
3051
server = uvicorn.Server(config)
3152
_add_shutdown_handlers(app, server)
3253

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,8 @@ def signal_handler(*_) -> None:
753753
ssl_certfile=args.ssl_certfile,
754754
ssl_ca_certs=args.ssl_ca_certs,
755755
ssl_cert_reqs=args.ssl_cert_reqs,
756+
h11_max_incomplete_event_size=args.h11_max_incomplete_event_size,
757+
h11_max_header_count=args.h11_max_header_count,
756758
**uvicorn_kwargs,
757759
)
758760

vllm/entrypoints/openai/cli_args.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
1313
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
1414
validate_chat_template)
15+
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
16+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
1517
from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
1618
PromptAdapterPath)
1719
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
@@ -251,6 +253,23 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
251253
default=False,
252254
help="If set to True, enable prompt_tokens_details in usage.")
253255

256+
parser.add_argument(
257+
"--h11-max-incomplete-event-size",
258+
type=int,
259+
default=H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT,
260+
help="Maximum size (bytes) of an incomplete HTTP event (header or body)"
261+
" for h11 parser. Helps mitigate header abuse. "
262+
f"Default: {H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT}"
263+
f" ({H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT / (1024*1024):.1f} MB).")
264+
265+
parser.add_argument(
266+
"--h11-max-header-count",
267+
type=int,
268+
default=H11_MAX_HEADER_COUNT_DEFAULT,
269+
help="Maximum number of HTTP headers allowed in a request for h11"
270+
" parser. Helps mitigate header abuse. "
271+
f"Default: {H11_MAX_HEADER_COUNT_DEFAULT}.")
272+
254273
return parser
255274

256275

0 commit comments

Comments
 (0)