Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- Normalize trace context information before writing it into transaction and span data. This ensures the correct sampling rates are stored for extrapolation in Sentry. ([#4625](https://github.com/getsentry/relay/pull/4625))
- Adds u16 validation to the replay protocol's segment_id field. ([#4635](https://github.com/getsentry/relay/pull/4635))
- Exposes all service utilization with instance labels instead of the last. ([#4654](https://github.com/getsentry/relay/pull/4654))
- Ensure that every span's parent exists. ([#4661](https://github.com/getsentry/relay/pull/4661))

**Internal**:

Expand Down
4 changes: 4 additions & 0 deletions relay-event-normalization/src/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,10 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) {
// Some contexts need to be normalized before metrics extraction takes place.
normalize_contexts(&mut event.contexts);

if event.ty.value() == Some(&EventType::Transaction) {
span::fix_trees::fix_trees(event);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we not put this inside the conditional block below?


if config.normalize_spans && event.ty.value() == Some(&EventType::Transaction) {
crate::normalize::normalize_app_start_spans(event);
span::exclusive_time::compute_span_exclusive_time(event);
Expand Down
164 changes: 164 additions & 0 deletions relay-event-normalization/src/normalize/span/fix_trees.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//! Span tree normalization.
use std::collections::BTreeSet;
use std::mem;

use relay_event_schema::protocol::{Event, TraceContext};
use relay_protocol::Error;

/// Enforce that every span has a valid parent. If any `parent_span_id` is pointing nowhere, the
/// span is re-parented onto the root span.
///
/// This is to avoid any nasty surprises in the span buffer specifically. Other readers of spans
/// (such as the frontend's tree component) were already able to deal with detached spans.
pub fn fix_trees(event: &mut Event) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a name along the lines of reparent_spans_to_root is more expressive?

let Some(ref mut spans) = event.spans.value_mut() else {
return;
};

let Some(contexts) = event.contexts.value_mut() else {
return;
};

let Some(trace_context) = contexts.get_mut::<TraceContext>() else {
return;
};

let Some(root_span_id) = trace_context.span_id.value() else {
return;
};

let valid_span_ids = spans
.iter()
.filter_map(|span| span.value())
.filter_map(|span| span.span_id.value())
.chain(Some(root_span_id))
.cloned()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I really dislike this. In this expression we access span_id, in the for-loop we only access parent_span_id. In theory those are mutually exclusive accesses, and should be able to be borrowed at the same time. But it's tricky to convince the compiler of that, and so I have to clone here.

It would be better to make SpanId a u16 anyway...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Either this or we serialize like in ProjectKey which supports as_str and as_ref::<str>:

/// The public key used in a DSN to identify and authenticate a project at Sentry.
///
/// Project keys are always 32-character hexadecimal strings.
#[derive(Clone, Copy, Eq, Hash, Ord, PartialOrd, PartialEq)]
pub struct ProjectKey([u8; 32]);
)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tried to change this but too much stuff has to be adjusted to make it work since .0 is pub. let's just ship as-is

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, part of this would be refactoring all usage to remove access to the field.

.collect::<BTreeSet<_>>();

for span in spans {
let Some(span) = span.value_mut() else {
continue;
};

let Some(parent_span_id) = span.parent_span_id.value_mut() else {
continue;
};

if valid_span_ids.contains(&*parent_span_id) {
continue;
};

let invalid_parent = mem::replace(parent_span_id, root_span_id.clone());
let meta = span.parent_span_id.meta_mut();
meta.add_error(Error::invalid("span ID does not exist"));
meta.set_original_value(Some(invalid_parent));
}
}

#[cfg(test)]
mod tests {
use relay_protocol::FromValue;
use relay_protocol::SerializableAnnotated;

use super::*;

#[test]
fn basic() {
let mut data = Event::from_value(
serde_json::json!({
"type": "transaction",
"start_timestamp": 1609455600,
"end_timestamp": 1609455605,
"contexts": {
"trace": {
"trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
"span_id": "aaaaaaaaaaaaaaaa",
"parent_span_id": "ffffffffffffffff",
}
},
"spans": [
{
"span_id": "bbbbbbbbbbbbbbbb",
"parent_span_id": "aaaaaaaaaaaaaaaa"
},
{
"span_id": "bbbbbbbbbbbbbbbb",
"parent_span_id": "dddddddddddddddd",
},
{
"span_id": "bbbbbbbbbbbbbbbb",
"parent_span_id": "eeeeeeeeeeeeeeee",
},
],
})
.into(),
);

fix_trees(data.value_mut().as_mut().unwrap());

insta::assert_json_snapshot!(SerializableAnnotated(&data), @r###"
{
"type": "transaction",
"start_timestamp": 1609455600.0,
"contexts": {
"trace": {
"trace_id": "4c79f60c11214eb38604f4ae0781bfb2",
"span_id": "aaaaaaaaaaaaaaaa",
"parent_span_id": "ffffffffffffffff",
"type": "trace"
}
},
"spans": [
{
"span_id": "bbbbbbbbbbbbbbbb",
"parent_span_id": "aaaaaaaaaaaaaaaa"
},
{
"span_id": "bbbbbbbbbbbbbbbb",
"parent_span_id": "aaaaaaaaaaaaaaaa"
},
{
"span_id": "bbbbbbbbbbbbbbbb",
"parent_span_id": "aaaaaaaaaaaaaaaa"
}
],
"end_timestamp": 1609455605,
"_meta": {
"spans": {
"1": {
"parent_span_id": {
"": {
"err": [
[
"invalid_data",
{
"reason": "span ID does not exist"
}
]
],
"val": "dddddddddddddddd"
}
}
},
"2": {
"parent_span_id": {
"": {
"err": [
[
"invalid_data",
{
"reason": "span ID does not exist"
}
]
],
"val": "eeeeeeeeeeeeeeee"
}
}
}
}
}
}
"###);
}
}
1 change: 1 addition & 0 deletions relay-event-normalization/src/normalize/span/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub mod ai;
pub mod country_subregion;
pub mod description;
pub mod exclusive_time;
pub mod fix_trees;
pub mod tag_extraction;

/// Regex used to scrub hex IDs and multi-digit numbers from table names and other identifiers.
Expand Down
4 changes: 3 additions & 1 deletion relay-event-schema/src/protocol/contexts/trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ impl AsRef<str> for TraceId {
}

/// A 16-character hex string as described in the W3C trace context spec.
#[derive(Clone, Debug, Default, Eq, Hash, PartialEq, Empty, IntoValue, ProcessValue)]
#[derive(
Clone, Debug, Default, Eq, Hash, PartialEq, Ord, PartialOrd, Empty, IntoValue, ProcessValue,
)]
pub struct SpanId(pub String);

relay_common::impl_str_serde!(SpanId, "a span identifier");
Expand Down
Loading
Loading