Skip to content

Commit f16de5c

Browse files
P403n1x87tylfin
andauthored
chore(debugger): send debugger snapshots to debugger track (#14402)
Send debugger snapshots to the debugger track. Logs will still go through the default logs intake. Refs: [DEBUG-4339](https://datadoghq.atlassian.net/browse/DEBUG-4339) ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) [DEBUG-4338]: https://datadoghq.atlassian.net/browse/DEBUG-4338?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ [DEBUG-4339]: https://datadoghq.atlassian.net/browse/DEBUG-4339?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ --------- Co-authored-by: Tyler Finethy <[email protected]>
1 parent 1e3ceeb commit f16de5c

File tree

16 files changed

+144
-52
lines changed

16 files changed

+144
-52
lines changed

ddtrace/debugging/_encoding.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,18 @@ def __init__(self, service: str, host: Optional[str] = None) -> None:
249249
self._service = service
250250
self._host = host
251251

252+
def _encode(self, item: LogSignal) -> str:
253+
return json.dumps(_build_log_track_payload(self._service, item, self._host))
254+
255+
def encode(self, item: LogSignal) -> bytes:
256+
return self._encode(item).encode("utf-8")
257+
258+
259+
class SnapshotJsonEncoder(LogSignalJsonEncoder):
260+
"""Encoder for snapshot signals, with automatic pruning of large snapshots."""
261+
252262
def encode(self, item: LogSignal) -> bytes:
253-
return self.pruned(json.dumps(_build_log_track_payload(self._service, item, self._host))).encode("utf-8")
263+
return self.pruned(self._encode(item)).encode("utf-8")
254264

255265
def pruned(self, log_signal_json: str) -> str:
256266
if len(log_signal_json) <= self.MAX_SIGNAL_SIZE:
@@ -310,24 +320,26 @@ def __init__(
310320
) -> None:
311321
self._encoder = encoder
312322
self._buffer = JsonBuffer(buffer_size)
313-
self._lock = forksafe.Lock()
323+
self._lock = forksafe.RLock()
314324
self._on_full = on_full
315325
self.count = 0
316326
self.max_size = buffer_size - self._buffer.size
327+
self._full = False
317328

318329
def put(self, item: Snapshot) -> int:
319330
return self.put_encoded(item, self._encoder.encode(item))
320331

321332
def put_encoded(self, item: Snapshot, encoded: bytes) -> int:
322-
try:
323-
with self._lock:
333+
with self._lock:
334+
try:
324335
size = self._buffer.put(encoded)
325336
self.count += 1
326337
return size
327-
except BufferFull:
328-
if self._on_full is not None:
329-
self._on_full(item, encoded)
330-
raise
338+
except BufferFull:
339+
self._full = True
340+
if self._on_full is not None:
341+
self._on_full(item, encoded)
342+
raise
331343

332344
def flush(self) -> Optional[Union[bytes, bytearray]]:
333345
with self._lock:
@@ -338,4 +350,9 @@ def flush(self) -> Optional[Union[bytes, bytearray]]:
338350

339351
encoded = self._buffer.flush()
340352
self.count = 0
353+
self._full = False
341354
return encoded
355+
356+
def is_full(self) -> bool:
357+
with self._lock:
358+
return self._full

ddtrace/debugging/_exception/replay.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ def build(cls, exc_id: uuid.UUID, frame: FrameType) -> "SpanExceptionProbe":
189189

190190
@dataclass
191191
class SpanExceptionSnapshot(Snapshot):
192+
__type__ = "er_snapshot"
193+
192194
exc_id: t.Optional[uuid.UUID] = None
193195

194196
@property

ddtrace/debugging/_probe/status.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def _payload(
4949
"timestamp": int(timestamp * 1e3), # milliseconds
5050
"message": message,
5151
"ddsource": "dd_debugger",
52+
"type": "diagnostic",
5253
"debugger": {
5354
"diagnostics": {
5455
"probeId": probe.probe_id,

ddtrace/debugging/_signal/collector.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22
from typing import Any
33
from typing import Callable
4+
from typing import Dict
45
from typing import List
56
from typing import Tuple
67

@@ -9,6 +10,7 @@
910
from ddtrace.debugging._signal.log import LogSignal
1011
from ddtrace.debugging._signal.model import Signal
1112
from ddtrace.debugging._signal.model import SignalState
13+
from ddtrace.debugging._signal.model import SignalTrack
1214
from ddtrace.internal._encoding import BufferFull
1315
from ddtrace.internal.compat import ExcInfoType
1416
from ddtrace.internal.logger import get_logger
@@ -29,18 +31,20 @@ class SignalCollector(object):
2931
encoded, or the signal status indicate it should be skipped.
3032
"""
3133

32-
def __init__(self, encoder: BufferedEncoder) -> None:
33-
self._encoder = encoder
34+
def __init__(self, tracks: Dict[SignalTrack, BufferedEncoder]) -> None:
35+
self._tracks = tracks
3436

3537
def _enqueue(self, log_signal: LogSignal) -> None:
3638
try:
3739
log.debug(
38-
"[%s][P: %s] SignalCollector. _encoder (%s) _enqueue signal", os.getpid(), os.getppid(), self._encoder
40+
"[%s][P: %s] SignalCollector enqueu signal on track %s", os.getpid(), os.getppid(), log_signal.__track__
3941
)
40-
self._encoder.put(log_signal)
42+
self._tracks[log_signal.__track__].put(log_signal)
4143
except BufferFull:
4244
log.debug("Encoder buffer full")
4345
meter.increment("encoder.buffer.full")
46+
except KeyError:
47+
log.error("No encoder for signal track %s", log_signal.__track__)
4448

4549
def push(self, signal: Signal) -> None:
4650
if signal.state is SignalState.SKIP_COND:

ddtrace/debugging/_signal/log.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from ddtrace.debugging._probe.model import FunctionLocationMixin
66
from ddtrace.debugging._probe.model import LineLocationMixin
77
from ddtrace.debugging._signal.model import Signal
8+
from ddtrace.debugging._signal.model import SignalTrack
89

910

1011
@dataclass
@@ -16,6 +17,9 @@ class LogSignal(Signal):
1617
(e.g. conditions) might need to be reported.
1718
"""
1819

20+
__type__ = "di_snapshot"
21+
__track__: t.ClassVar[SignalTrack] = SignalTrack.LOGS
22+
1923
@property
2024
@abc.abstractmethod
2125
def message(self) -> t.Optional[str]:
@@ -61,6 +65,7 @@ def snapshot(self) -> t.Dict[str, t.Any]:
6165
"evaluationErrors": [{"expr": e.expr, "message": e.message} for e in self.errors],
6266
"probe": self._probe_details(),
6367
"language": "python",
68+
"type": self.__type__,
6469
}
6570
full_data.update(self.data)
6671

ddtrace/debugging/_signal/model.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ class SignalState(str, Enum):
5050
DONE = "DONE"
5151

5252

53+
class SignalTrack(str, Enum):
54+
DEFAULT = "default"
55+
LOGS = "logs"
56+
SNAPSHOT = "snapshot"
57+
58+
5359
@dataclass
5460
class Signal(abc.ABC):
5561
"""Debugger signal base class.
@@ -72,6 +78,7 @@ class Signal(abc.ABC):
7278
"""
7379

7480
__default_timing__: ClassVar[ProbeEvalTiming] = ProbeEvalTiming.EXIT
81+
__track__: ClassVar[SignalTrack] = SignalTrack.DEFAULT
7582

7683
probe: Probe
7784
frame: FrameType

ddtrace/debugging/_signal/snapshot.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from types import FunctionType
77
from types import ModuleType
88
from typing import Any
9+
from typing import ClassVar
910
from typing import Dict
1011
from typing import Mapping
1112
from typing import Optional
@@ -29,6 +30,7 @@
2930
from ddtrace.debugging._signal import utils
3031
from ddtrace.debugging._signal.log import LogSignal
3132
from ddtrace.debugging._signal.model import EvaluationError
33+
from ddtrace.debugging._signal.model import SignalTrack
3234
from ddtrace.debugging._signal.model import probe_to_signal
3335
from ddtrace.debugging._signal.utils import serialize
3436
from ddtrace.internal.compat import ExcInfoType
@@ -95,6 +97,8 @@ class Snapshot(LogSignal):
9597
Used to collect the minimum amount of information from a firing probe.
9698
"""
9799

100+
__track__: ClassVar[SignalTrack] = SignalTrack.SNAPSHOT
101+
98102
entry_capture: Optional[dict] = field(default=None)
99103
return_capture: Optional[dict] = field(default=None)
100104
line_capture: Optional[dict] = field(default=None)

ddtrace/debugging/_uploader.py

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from dataclasses import dataclass
12
from enum import Enum
23
from typing import Any
34
from typing import Optional
@@ -7,8 +8,11 @@
78
from ddtrace.debugging._config import di_config
89
from ddtrace.debugging._encoding import LogSignalJsonEncoder
910
from ddtrace.debugging._encoding import SignalQueue
11+
from ddtrace.debugging._encoding import SnapshotJsonEncoder
1012
from ddtrace.debugging._metrics import metrics
1113
from ddtrace.debugging._signal.collector import SignalCollector
14+
from ddtrace.debugging._signal.model import SignalTrack
15+
from ddtrace.internal import agent
1216
from ddtrace.internal.logger import get_logger
1317
from ddtrace.internal.periodic import ForksafeAwakeablePeriodicService
1418
from ddtrace.internal.utils.http import connector
@@ -27,6 +31,12 @@ class UploaderProduct(str, Enum):
2731
CODE_ORIGIN_SPAN = "code_origin.span"
2832

2933

34+
@dataclass
35+
class UploaderTrack:
36+
endpoint: str
37+
queue: SignalQueue
38+
39+
3040
class LogsIntakeUploaderV1(ForksafeAwakeablePeriodicService):
3141
"""Logs intake uploader.
3242
@@ -36,26 +46,48 @@ class LogsIntakeUploaderV1(ForksafeAwakeablePeriodicService):
3646

3747
_instance: Optional["LogsIntakeUploaderV1"] = None
3848
_products: Set[UploaderProduct] = set()
49+
_agent_endpoints: Set[str] = set()
3950

4051
__queue__ = SignalQueue
4152
__collector__ = SignalCollector
4253

43-
ENDPOINT = di_config._intake_endpoint
44-
4554
RETRY_ATTEMPTS = 3
4655

4756
def __init__(self, interval: Optional[float] = None) -> None:
4857
super().__init__(interval if interval is not None else di_config.upload_interval_seconds)
4958

50-
self._queue = self.__queue__(encoder=LogSignalJsonEncoder(di_config.service_name), on_full=self._on_buffer_full)
51-
self._collector = self.__collector__(self._queue)
59+
endpoint_suffix = f"?ddtags={quote(di_config.tags)}" if di_config._tags_in_qs and di_config.tags else ""
60+
if not self._agent_endpoints:
61+
try:
62+
agent_info = agent.info()
63+
self._agent_endpoints = set(agent_info.get("endpoints", [])) if agent_info is not None else set()
64+
except Exception:
65+
pass # nosec B110
66+
67+
snapshot_track = "/debugger/v1/input"
68+
if "/debugger/v2/input" in self._agent_endpoints:
69+
snapshot_track = "/debugger/v2/input"
70+
elif "/debugger/v1/diagnostics" in self._agent_endpoints:
71+
snapshot_track = "/debugger/v1/diagnostics"
72+
73+
self._tracks = {
74+
SignalTrack.LOGS: UploaderTrack(
75+
endpoint=f"/debugger/v1/input{endpoint_suffix}",
76+
queue=self.__queue__(
77+
encoder=LogSignalJsonEncoder(di_config.service_name), on_full=self._on_buffer_full
78+
),
79+
),
80+
SignalTrack.SNAPSHOT: UploaderTrack(
81+
endpoint=f"{snapshot_track}{endpoint_suffix}",
82+
queue=self.__queue__(encoder=SnapshotJsonEncoder(di_config.service_name), on_full=self._on_buffer_full),
83+
),
84+
}
85+
self._collector = self.__collector__({t: ut.queue for t, ut in self._tracks.items()})
5286
self._headers = {
5387
"Content-type": "application/json; charset=utf-8",
5488
"Accept": "text/plain",
5589
}
5690

57-
if di_config._tags_in_qs and di_config.tags:
58-
self.ENDPOINT += f"?ddtags={quote(di_config.tags)}"
5991
self._connect = connector(di_config._intake_url, timeout=di_config.upload_timeout)
6092

6193
# Make it retry-able
@@ -65,33 +97,31 @@ def __init__(self, interval: Optional[float] = None) -> None:
6597
)(self._write)
6698

6799
log.debug(
68-
"Logs intake uploader initialized (url: %s, endpoint: %s, interval: %f)",
100+
"Logs intake uploader initialized (url: %s, endpoints: %s, interval: %f)",
69101
di_config._intake_url,
70-
self.ENDPOINT,
102+
{t: ut.endpoint for t, ut in self._tracks.items()},
71103
self.interval,
72104
)
73105

74-
def _write(self, payload: bytes) -> None:
106+
self._flush_full = False
107+
108+
def _write(self, payload: bytes, endpoint: str) -> None:
75109
try:
76110
with self._connect() as conn:
77-
conn.request(
78-
"POST",
79-
self.ENDPOINT,
80-
payload,
81-
headers=self._headers,
82-
)
111+
conn.request("POST", endpoint, payload, headers=self._headers)
83112
resp = conn.getresponse()
84113
if not (200 <= resp.status < 300):
85-
log.error("Failed to upload payload: [%d] %r", resp.status, resp.read())
114+
log.error("Failed to upload payload to endpoint %s: [%d] %r", endpoint, resp.status, resp.read())
86115
meter.increment("upload.error", tags={"status": str(resp.status)})
87116
else:
88117
meter.increment("upload.success")
89118
meter.distribution("upload.size", len(payload))
90119
except Exception:
91-
log.error("Failed to write payload", exc_info=True)
120+
log.error("Failed to write payload to endpoint %s", endpoint, exc_info=True)
92121
meter.increment("error")
93122

94123
def _on_buffer_full(self, _item: Any, _encoded: bytes) -> None:
124+
self._flush_full = True
95125
self.upload()
96126

97127
def upload(self) -> None:
@@ -100,20 +130,32 @@ def upload(self) -> None:
100130

101131
def reset(self) -> None:
102132
"""Reset the buffer on fork."""
103-
self._queue = self.__queue__(encoder=self._queue._encoder, on_full=self._on_buffer_full)
104-
self._collector._encoder = self._queue
133+
for track in self._tracks.values():
134+
track.queue = self.__queue__(encoder=track.queue._encoder, on_full=self._on_buffer_full)
135+
self._collector._tracks = {t: ut.queue for t, ut in self._tracks.items()}
136+
137+
def _flush_track(self, track: UploaderTrack) -> None:
138+
queue = track.queue
139+
payload = queue.flush()
140+
if payload is not None:
141+
try:
142+
self._write_with_backoff(payload, track.endpoint)
143+
meter.distribution("batch.cardinality", queue.count)
144+
except Exception:
145+
log.debug("Cannot upload logs payload", exc_info=True)
105146

106147
def periodic(self) -> None:
107148
"""Upload the buffer content to the logs intake."""
108-
count = self._queue.count
109-
if count:
110-
payload = self._queue.flush()
111-
if payload is not None:
112-
try:
113-
self._write_with_backoff(payload)
114-
meter.distribution("batch.cardinality", count)
115-
except Exception:
116-
log.debug("Cannot upload logs payload", exc_info=True)
149+
if self._flush_full:
150+
# We received the signal to flush a full buffer
151+
self._flush_full = False
152+
for track in self._tracks.values():
153+
if track.queue.is_full():
154+
self._flush_track(track)
155+
156+
for track in self._tracks.values():
157+
if track.queue.count:
158+
self._flush_track(track)
117159

118160
on_shutdown = periodic
119161

ddtrace/internal/symbol_db/symbols.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,7 @@ def __init__(self, scopes: t.Optional[t.List[Scope]] = None) -> None:
468468
"ddsource": "python",
469469
"service": config.service or DEFAULT_SERVICE_NAME,
470470
"runtimeId": get_runtime_id(),
471+
"type": "symdb",
471472
}
472473

473474
def add_scope(self, scope: Scope) -> None:

ddtrace/settings/dynamic_instrumentation.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ class DynamicInstrumentationConfig(DDConfig):
5252
max_probes = DDConfig.d(int, lambda _: DEFAULT_MAX_PROBES)
5353
global_rate_limit = DDConfig.d(float, lambda _: DEFAULT_GLOBAL_RATE_LIMIT)
5454
_tags_in_qs = DDConfig.d(bool, lambda _: True)
55-
_intake_endpoint = DDConfig.d(str, lambda _: "/debugger/v1/input")
5655
tags = DDConfig.d(str, _derive_tags)
5756

5857
enabled = DDConfig.v(

0 commit comments

Comments
 (0)