Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@
"libcst",
"rich",
"opentelemetry-api",
"opentelemetry-exporter-otlp",
"opentelemetry-instrumentation-fastapi",
"opentelemetry-sdk",
"mistral-common[opencv]>=1.6.3",
]

Expand Down Expand Up @@ -442,8 +445,13 @@ def run(self):

extras["benchmark"] = deps_list("optimum-benchmark")

# OpenTelemetry dependencies for metrics collection in continuous batching
extras["open-telemetry"] = deps_list("opentelemetry-api") + ["opentelemetry-exporter-otlp", "opentelemetry-sdk"]
# OpenTelemetry dependencies for metrics collection
# "open-telemetry-api" is necessary is you want to get metrics from Continuous Batching
extras["open-telemetry-api"] = deps_list("opentelemetry-api")
# "open-telemetry" is necessary if you want to export the collected metrics to a backend, needed for the CB example code
extras["open-telemetry"] = deps_list("opentelemetry-exporter-otlp", "opentelemetry-sdk") + extras["open-telemetry-api"]
# "open-telemetry-serving" is necessary if you want to run `transformers serve` instrumented with OpenTelemetry
extras["open-telemetry-serving"] = deps_list("opentelemetry-instrumentation-fastapi") + extras["open-telemetry"]

# when modifying the following list, make sure to update src/transformers/dependency_versions_check.py
install_requires = [
Expand Down
8 changes: 8 additions & 0 deletions src/transformers/commands/serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

from huggingface_hub import model_info
from huggingface_hub.constants import HF_HUB_OFFLINE
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from tokenizers.decoders import DecodeStream

import transformers
Expand Down Expand Up @@ -744,6 +745,13 @@ async def get_or_set_request_id(request: Request, call_next):
response.headers[X_REQUEST_ID] = request_id
return response

FastAPIInstrumentor.instrument_app(
app,
excluded_urls="health",
http_capture_headers_server_request=[X_REQUEST_ID],
http_capture_headers_server_response=[X_REQUEST_ID],
)

uvicorn.run(app, host=self.args.host, port=self.args.port, log_level=self.args.log_level)

@functools.cache
Expand Down