Skip to content

Commit cbf72c2

Browse files
authored
fix(autoscaling): expose services with their instance ids (#4654)
Fixes a bug where only a single service metric was exported even though there were multiple with different instance IDs.
1 parent 885c6bc commit cbf72c2

File tree

4 files changed

+63
-11
lines changed

4 files changed

+63
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
- Separates profiles into backend and ui profiles. ([#4595](https://github.com/getsentry/relay/pull/4595))
1818
- Normalize trace context information before writing it into transaction and span data. This ensures the correct sampling rates are stored for extrapolation in Sentry. ([#4625](https://github.com/getsentry/relay/pull/4625))
1919
- Adds u16 validation to the replay protocol's segment_id field. ([#4635](https://github.com/getsentry/relay/pull/4635))
20+
- Exposes all service utilization with instance labels instead of the last. ([#4654](https://github.com/getsentry/relay/pull/4654))
2021

2122
**Internal**:
2223

relay-server/src/endpoints/autoscaling.rs

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,15 @@ fn to_prometheus_string(data: &AutoscalingData) -> String {
3232
append_data_row(&mut result, "spool_item_count", data.item_count, &[]);
3333
append_data_row(&mut result, "spool_total_size", data.total_size, &[]);
3434
for utilization in &data.services_metrics {
35-
let service_name = extract_service_name(utilization.0);
35+
let service_name = extract_service_name(utilization.name);
3636
append_data_row(
3737
&mut result,
3838
"service_utilization",
39-
utilization.1,
40-
&[("relay_service", service_name)],
39+
utilization.utilization,
40+
&[
41+
("relay_service", service_name),
42+
("instance_id", &format!("{}", utilization.instance_id)),
43+
],
4144
);
4245
}
4346

@@ -138,8 +141,21 @@ mod test {
138141
item_count: 10,
139142
total_size: 30,
140143
services_metrics: vec![
141-
ServiceUtilization("test", 10),
142-
ServiceUtilization("envelope", 50),
144+
ServiceUtilization {
145+
name: "test",
146+
instance_id: 0,
147+
utilization: 10,
148+
},
149+
ServiceUtilization {
150+
name: "test",
151+
instance_id: 1,
152+
utilization: 30,
153+
},
154+
ServiceUtilization {
155+
name: "envelope",
156+
instance_id: 1,
157+
utilization: 50,
158+
},
143159
],
144160
worker_pool_utilization: 61,
145161
runtime_utilization: 41,
@@ -151,8 +167,9 @@ mod test {
151167
relay_up 1
152168
relay_spool_item_count 10
153169
relay_spool_total_size 30
154-
relay_service_utilization{relay_service="test"} 10
155-
relay_service_utilization{relay_service="envelope"} 50
170+
relay_service_utilization{relay_service="test", instance_id="0"} 10
171+
relay_service_utilization{relay_service="test", instance_id="1"} 30
172+
relay_service_utilization{relay_service="envelope", instance_id="1"} 50
156173
relay_worker_pool_utilization 61
157174
relay_runtime_utilization 41
158175
"#

relay-server/src/services/autoscaling.rs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,12 @@ impl Service for AutoscalingMetricService {
6161
let metrics = self.handle
6262
.current_services_metrics()
6363
.iter()
64-
.map(|(id, metric)| ServiceUtilization(id.name(), metric.utilization))
64+
.map(|(id, metric)| ServiceUtilization {
65+
name: id.name(),
66+
instance_id: id.instance_id(),
67+
utilization: metric.utilization
68+
}
69+
)
6570
.collect();
6671
let worker_pool_utilization = self.async_pool.metrics().utilization() as u8;
6772
let runtime_utilization = self.runtime_utilization();
@@ -127,14 +132,43 @@ impl FromMessage<AutoscalingMessageKind> for AutoscalingMetrics {
127132
}
128133
}
129134

135+
/// Contains data that is used for autoscaling.
130136
pub struct AutoscalingData {
137+
/// Memory usage of relay.
131138
pub memory_usage: f32,
139+
/// Is `1` if relay is running, `0` if it's shutting down.
132140
pub up: u8,
141+
/// The total number of bytes used by the spooler.
133142
pub total_size: u64,
143+
/// The total number of envelopes in the spooler.
134144
pub item_count: u64,
145+
/// Worker pool utilization in percent.
135146
pub worker_pool_utilization: u8,
147+
/// List of service utilization.
136148
pub services_metrics: Vec<ServiceUtilization>,
149+
/// Utilization of the async runtime.
137150
pub runtime_utilization: u8,
138151
}
139152

140-
pub struct ServiceUtilization(pub &'static str, pub u8);
153+
/// Contains the minimal required information for service utilization.
154+
///
155+
/// A service can have multiple instances which will all have the same name.
156+
/// Those instances are distinguished by the `instance_id`.
157+
pub struct ServiceUtilization {
158+
/// The service name.
159+
pub name: &'static str,
160+
/// The id of the specific service instance.
161+
pub instance_id: u32,
162+
/// Utilization as percentage.
163+
pub utilization: u8,
164+
}
165+
166+
impl ServiceUtilization {
167+
pub fn new(name: &'static str, instance_id: u32, utilization: u8) -> Self {
168+
Self {
169+
name,
170+
instance_id,
171+
utilization,
172+
}
173+
}
174+
}

tests/integration/test_autoscaling.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
def parse_prometheus(input_string):
1414
result = {}
1515
for line in input_string.splitlines():
16-
parts = line.split(" ")
16+
parts = line.rsplit(" ", 1)
1717
result[parts[0]] = parts[1]
1818
return result
1919

@@ -79,7 +79,7 @@ def test_memory_spooling_metrics(mini_sentry, relay):
7979
@pytest.mark.parametrize(
8080
"metric_name",
8181
(
82-
'relay_service_utilization{relay_service="AggregatorService"}',
82+
'relay_service_utilization{relay_service="AggregatorService", instance_id="0"}',
8383
"relay_worker_pool_utilization",
8484
"relay_runtime_utilization",
8585
),

0 commit comments

Comments
 (0)