14
14
from vllm .engine .async_llm_engine import AsyncEngineDeadError
15
15
from vllm .engine .multiprocessing import MQEngineDeadError
16
16
from vllm .engine .protocol import EngineClient
17
+ from vllm .entrypoints .constants import (H11_MAX_HEADER_COUNT_DEFAULT ,
18
+ H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT )
17
19
from vllm .entrypoints .ssl import SSLCertRefresher
18
20
from vllm .logger import init_logger
19
21
from vllm .utils import find_process_using_port
@@ -26,6 +28,11 @@ async def serve_http(app: FastAPI,
26
28
sock : Optional [socket .socket ],
27
29
enable_ssl_refresh : bool = False ,
28
30
** uvicorn_kwargs : Any ):
31
+ """
32
+ Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
33
+ options. Supports http header limits via h11_max_incomplete_event_size and
34
+ h11_max_header_count.
35
+ """
29
36
logger .info ("Available routes are:" )
30
37
for route in app .routes :
31
38
methods = getattr (route , "methods" , None )
@@ -36,7 +43,21 @@ async def serve_http(app: FastAPI,
36
43
37
44
logger .info ("Route: %s, Methods: %s" , path , ', ' .join (methods ))
38
45
46
+ # Extract header limit options if present
47
+ h11_max_incomplete_event_size = uvicorn_kwargs .pop (
48
+ "h11_max_incomplete_event_size" , None )
49
+ h11_max_header_count = uvicorn_kwargs .pop ("h11_max_header_count" , None )
50
+
51
+ # Set safe defaults if not provided
52
+ if h11_max_incomplete_event_size is None :
53
+ h11_max_incomplete_event_size = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
54
+ if h11_max_header_count is None :
55
+ h11_max_header_count = H11_MAX_HEADER_COUNT_DEFAULT
56
+
39
57
config = uvicorn .Config (app , ** uvicorn_kwargs )
58
+ # Set header limits
59
+ config .h11_max_incomplete_event_size = h11_max_incomplete_event_size
60
+ config .h11_max_header_count = h11_max_header_count
40
61
config .load ()
41
62
server = uvicorn .Server (config )
42
63
_add_shutdown_handlers (app , server )
0 commit comments