From 363fead70f391decea9710e3898a60bedbd74bad Mon Sep 17 00:00:00 2001 From: Luc Georges Date: Fri, 5 Sep 2025 17:51:41 +0200 Subject: [PATCH] feat(serve): add OTEL --- setup.py | 12 ++++++++++-- src/transformers/commands/serving.py | 8 ++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b4feedbc77a1..bfc07e80d188 100644 --- a/setup.py +++ b/setup.py @@ -200,6 +200,9 @@ "libcst", "rich", "opentelemetry-api", + "opentelemetry-exporter-otlp", + "opentelemetry-instrumentation-fastapi", + "opentelemetry-sdk", "mistral-common[opencv]>=1.6.3", ] @@ -442,8 +445,13 @@ def run(self): extras["benchmark"] = deps_list("optimum-benchmark") -# OpenTelemetry dependencies for metrics collection in continuous batching -extras["open-telemetry"] = deps_list("opentelemetry-api") + ["opentelemetry-exporter-otlp", "opentelemetry-sdk"] +# OpenTelemetry dependencies for metrics collection +# "open-telemetry-api" is necessary is you want to get metrics from Continuous Batching +extras["open-telemetry-api"] = deps_list("opentelemetry-api") +# "open-telemetry" is necessary if you want to export the collected metrics to a backend, needed for the CB example code +extras["open-telemetry"] = deps_list("opentelemetry-exporter-otlp", "opentelemetry-sdk") + extras["open-telemetry-api"] +# "open-telemetry-serving" is necessary if you want to run `transformers serve` instrumented with OpenTelemetry +extras["open-telemetry-serving"] = deps_list("opentelemetry-instrumentation-fastapi") + extras["open-telemetry"] # when modifying the following list, make sure to update src/transformers/dependency_versions_check.py install_requires = [ diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py index 622f50378dfd..f1250b000ca7 100644 --- a/src/transformers/commands/serving.py +++ b/src/transformers/commands/serving.py @@ -35,6 +35,7 @@ from huggingface_hub import model_info from huggingface_hub.constants import HF_HUB_OFFLINE +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from tokenizers.decoders import DecodeStream import transformers @@ -744,6 +745,13 @@ async def get_or_set_request_id(request: Request, call_next): response.headers[X_REQUEST_ID] = request_id return response + FastAPIInstrumentor.instrument_app( + app, + excluded_urls="health", + http_capture_headers_server_request=[X_REQUEST_ID], + http_capture_headers_server_response=[X_REQUEST_ID], + ) + uvicorn.run(app, host=self.args.host, port=self.args.port, log_level=self.args.log_level) @functools.cache