-
Notifications
You must be signed in to change notification settings - Fork 558
Implement new POTel span processor #3223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
28effd6
e7cbb59
618d6ca
7dba029
1a35dae
c523182
9bfab81
e7a20f2
436626b
5d04d3d
048acc9
8a08fb3
2e2e5b9
2c29711
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,26 @@ | ||
from opentelemetry.context.context import Context | ||
from opentelemetry.context import Context, create_key, get_value, set_value | ||
from opentelemetry.context.contextvars_context import ContextVarsRuntimeContext | ||
|
||
from sentry_sdk.scope import Scope | ||
|
||
|
||
_SCOPES_KEY = create_key("sentry_scopes") | ||
|
||
|
||
class SentryContextVarsRuntimeContext(ContextVarsRuntimeContext): | ||
def attach(self, context): | ||
# type: (Context) -> object | ||
# TODO-neel-potel do scope management | ||
return super().attach(context) | ||
scopes = get_value(_SCOPES_KEY, context) | ||
|
||
if scopes and isinstance(scopes, tuple): | ||
(current_scope, isolation_scope) = scopes | ||
else: | ||
current_scope = Scope.get_current_scope() | ||
isolation_scope = Scope.get_isolation_scope() | ||
|
||
# TODO-neel-potel fork isolation_scope too like JS | ||
# once we setup our own apis to pass through to otel | ||
new_scopes = (current_scope.fork(), isolation_scope) | ||
new_context = set_value(_SCOPES_KEY, new_scopes, context) | ||
|
||
def detach(self, token): | ||
# type: (object) -> None | ||
# TODO-neel-potel not sure if we need anything here, see later | ||
super().detach(token) | ||
return super().attach(new_context) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,23 @@ | ||
from opentelemetry.sdk.trace import SpanProcessor | ||
from collections import deque | ||
|
||
from opentelemetry.trace import format_trace_id, format_span_id | ||
from opentelemetry.context import Context | ||
from opentelemetry.sdk.trace import Span, ReadableSpan, SpanProcessor | ||
|
||
from sentry_sdk import capture_event | ||
from sentry_sdk.integrations.opentelemetry.utils import ( | ||
is_sentry_span, | ||
convert_otel_timestamp, | ||
) | ||
from sentry_sdk.integrations.opentelemetry.consts import ( | ||
OTEL_SENTRY_CONTEXT, | ||
SPAN_ORIGIN, | ||
) | ||
from sentry_sdk._types import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from typing import Optional | ||
from opentelemetry.sdk.trace import ReadableSpan | ||
from typing import Optional, List, Any, Deque | ||
from sentry_sdk._types import Event | ||
|
||
|
||
class PotelSentrySpanProcessor(SpanProcessor): | ||
|
@@ -22,15 +34,23 @@ def __new__(cls): | |
|
||
def __init__(self): | ||
# type: () -> None | ||
pass | ||
self._children_spans = {} # type: dict[int, List[ReadableSpan]] | ||
|
||
def on_start(self, span, parent_context=None): | ||
# type: (ReadableSpan, Optional[Context]) -> None | ||
# type: (Span, Optional[Context]) -> None | ||
pass | ||
|
||
def on_end(self, span): | ||
# type: (ReadableSpan) -> None | ||
pass | ||
if is_sentry_span(span): | ||
return | ||
|
||
# TODO-neel-potel-remote only take parent if not remote | ||
if span.parent: | ||
self._children_spans.setdefault(span.parent.span_id, []).append(span) | ||
else: | ||
# if have a root span ending, we build a transaction and send it | ||
self._flush_root_span(span) | ||
Comment on lines
+55
to
+56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking what happens if there's a child span that, for whatever reason (weird async cases? wonky instrumentation?), hasn't finished before the root span. Since we're not using Do we need some sort of cleanup of orphaned spans? Should we wait a bit before flushing the transaction to account for child spans possibly ending very close to the parent span, but a bit late? IIRC I've noticed JS also having a small sleep in place. TBH not sure how much of a real world problem late child spans are, but I can imagine they happen. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another option: Also use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. some heuristic cleanup logic will be done in a follow up PR yes, I haven't thought through exactly how we'll do it but JS just has a cutoff logic of 5 minutes |
||
|
||
# TODO-neel-potel not sure we need a clear like JS | ||
def shutdown(self): | ||
|
@@ -42,3 +62,113 @@ def shutdown(self): | |
def force_flush(self, timeout_millis=30000): | ||
# type: (int) -> bool | ||
return True | ||
|
||
def _flush_root_span(self, span): | ||
# type: (ReadableSpan) -> None | ||
transaction_event = self._root_span_to_transaction_event(span) | ||
if not transaction_event: | ||
return | ||
|
||
spans = [] | ||
for child in self._collect_children(span): | ||
span_json = self._span_to_json(child) | ||
if span_json: | ||
spans.append(span_json) | ||
transaction_event.setdefault("spans", []).extend(spans) | ||
# TODO-neel-potel sort and cutoff max spans | ||
|
||
capture_event(transaction_event) | ||
|
||
def _collect_children(self, span): | ||
# type: (ReadableSpan) -> List[ReadableSpan] | ||
if not span.context: | ||
return [] | ||
|
||
children = [] | ||
bfs_queue = deque() # type: Deque[int] | ||
bfs_queue.append(span.context.span_id) | ||
|
||
while bfs_queue: | ||
parent_span_id = bfs_queue.popleft() | ||
node_children = self._children_spans.pop(parent_span_id, []) | ||
children.extend(node_children) | ||
bfs_queue.extend( | ||
[child.context.span_id for child in node_children if child.context] | ||
) | ||
|
||
return children | ||
|
||
# we construct the event from scratch here | ||
# and not use the current Transaction class for easier refactoring | ||
Comment on lines
+104
to
+105
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is cool, haven't thought of completely bypassing this. I'll have to think about the implications for the granular instrumenter if we isolate OTel and our instrumentation like this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so for now I'm just creating a raw dict, but I'm thinking eventually of a |
||
# TODO-neel-potel op, description, status logic | ||
def _root_span_to_transaction_event(self, span): | ||
# type: (ReadableSpan) -> Optional[Event] | ||
if not span.context: | ||
return None | ||
if not span.start_time: | ||
return None | ||
if not span.end_time: | ||
return None | ||
|
||
trace_id = format_trace_id(span.context.trace_id) | ||
span_id = format_span_id(span.context.span_id) | ||
parent_span_id = format_span_id(span.parent.span_id) if span.parent else None | ||
|
||
trace_context = { | ||
"trace_id": trace_id, | ||
"span_id": span_id, | ||
"origin": SPAN_ORIGIN, | ||
"op": span.name, # TODO | ||
"status": "ok", # TODO | ||
} # type: dict[str, Any] | ||
|
||
if parent_span_id: | ||
trace_context["parent_span_id"] = parent_span_id | ||
if span.attributes: | ||
trace_context["data"] = dict(span.attributes) | ||
|
||
contexts = {"trace": trace_context} | ||
if span.resource.attributes: | ||
contexts[OTEL_SENTRY_CONTEXT] = {"resource": dict(span.resource.attributes)} | ||
|
||
event = { | ||
"type": "transaction", | ||
"transaction": span.name, # TODO | ||
"transaction_info": {"source": "custom"}, # TODO | ||
"contexts": contexts, | ||
"start_timestamp": convert_otel_timestamp(span.start_time), | ||
"timestamp": convert_otel_timestamp(span.end_time), | ||
} # type: Event | ||
|
||
return event | ||
|
||
def _span_to_json(self, span): | ||
# type: (ReadableSpan) -> Optional[dict[str, Any]] | ||
if not span.context: | ||
return None | ||
if not span.start_time: | ||
return None | ||
if not span.end_time: | ||
return None | ||
|
||
trace_id = format_trace_id(span.context.trace_id) | ||
span_id = format_span_id(span.context.span_id) | ||
parent_span_id = format_span_id(span.parent.span_id) if span.parent else None | ||
|
||
span_json = { | ||
"trace_id": trace_id, | ||
"span_id": span_id, | ||
"origin": SPAN_ORIGIN, | ||
"op": span.name, # TODO | ||
"description": span.name, # TODO | ||
"status": "ok", # TODO | ||
"start_timestamp": convert_otel_timestamp(span.start_time), | ||
"timestamp": convert_otel_timestamp(span.end_time), | ||
} # type: dict[str, Any] | ||
|
||
if parent_span_id: | ||
span_json["parent_span_id"] = parent_span_id | ||
if span.attributes: | ||
span_json["data"] = dict(span.attributes) | ||
|
||
return span_json |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
from typing import cast | ||
from datetime import datetime, timezone | ||
|
||
from opentelemetry.semconv.trace import SpanAttributes | ||
from opentelemetry.sdk.trace import ReadableSpan | ||
|
||
from sentry_sdk import get_client | ||
from sentry_sdk.utils import Dsn | ||
|
||
from sentry_sdk._types import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from typing import Optional | ||
|
||
|
||
def is_sentry_span(span): | ||
# type: (ReadableSpan) -> bool | ||
""" | ||
Break infinite loop: | ||
HTTP requests to Sentry are caught by OTel and send again to Sentry. | ||
""" | ||
if not span.attributes: | ||
return False | ||
|
||
span_url = span.attributes.get(SpanAttributes.HTTP_URL, None) | ||
span_url = cast("Optional[str]", span_url) | ||
|
||
if not span_url: | ||
return False | ||
|
||
dsn_url = None | ||
client = get_client() | ||
|
||
if client.dsn: | ||
try: | ||
dsn_url = Dsn(client.dsn).netloc | ||
except Exception: | ||
pass | ||
|
||
if not dsn_url: | ||
return False | ||
|
||
if dsn_url in span_url: | ||
return True | ||
|
||
return False | ||
|
||
|
||
def convert_otel_timestamp(time): | ||
# type: (int) -> datetime | ||
return datetime.fromtimestamp(time / 1e9, timezone.utc) |
Uh oh!
There was an error while loading. Please reload this page.