@@ -14,6 +14,11 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
14
14
{
15
15
private const double One = 1.0 ;
16
16
private const long Hundred = 100L ;
17
+ private const double CpuLimitThreshold110Percent = 1.1 ;
18
+
19
+ // Meters to track CPU utilization threshold exceedances
20
+ private readonly Counter < long > ? _cpuUtilizationLimit100PercentExceededCounter ;
21
+ private readonly Counter < long > ? _cpuUtilizationLimit110PercentExceededCounter ;
17
22
18
23
private readonly object _cpuLocker = new ( ) ;
19
24
private readonly object _memoryLocker = new ( ) ;
@@ -38,6 +43,8 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
38
43
private double _memoryPercentage ;
39
44
private long _previousCgroupCpuTime ;
40
45
private long _previousHostCpuTime ;
46
+ private long _cpuUtilizationLimit100PercentExceeded ;
47
+ private long _cpuUtilizationLimit110PercentExceeded ;
41
48
public SystemResources Resources { get ; }
42
49
43
50
public LinuxUtilizationProvider ( IOptions < ResourceMonitoringOptions > options , ILinuxUtilizationParser parser ,
@@ -77,17 +84,21 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
77
84
78
85
// Try to get the CPU request from cgroup
79
86
cpuRequest = _parser . GetCgroupRequestCpuV2 ( ) ;
80
- _ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ContainerCpuLimitUtilization , observeValue : ( ) => CpuUtilizationWithoutHostDelta ( ) / cpuLimit , unit : "1" ) ;
87
+
88
+ // Initialize the counters
89
+ _cpuUtilizationLimit100PercentExceededCounter = meter . CreateCounter < long > ( "cpu_utilization_limit_100_percent_exceeded" ) ;
90
+ _cpuUtilizationLimit110PercentExceededCounter = meter . CreateCounter < long > ( "cpu_utilization_limit_110_percent_exceeded" ) ;
91
+ _ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ContainerCpuLimitUtilization , observeValue : ( ) => CpuUtilizationLimit ( cpuLimit ) , unit : "1" ) ;
81
92
_ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ContainerCpuRequestUtilization , observeValue : ( ) => CpuUtilizationWithoutHostDelta ( ) / cpuRequest , unit : "1" ) ;
82
93
}
83
94
else
84
95
{
85
96
_ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ContainerCpuLimitUtilization , observeValue : ( ) => CpuUtilization ( ) * _scaleRelativeToCpuLimit , unit : "1" ) ;
86
97
_ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ContainerCpuRequestUtilization , observeValue : ( ) => CpuUtilization ( ) * _scaleRelativeToCpuRequest , unit : "1" ) ;
98
+ _ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ProcessCpuUtilization , observeValue : ( ) => CpuUtilization ( ) * _scaleRelativeToCpuRequest , unit : "1" ) ;
87
99
}
88
100
89
101
_ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ContainerMemoryLimitUtilization , observeValue : MemoryUtilization , unit : "1" ) ;
90
- _ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ProcessCpuUtilization , observeValue : ( ) => CpuUtilization ( ) * _scaleRelativeToCpuRequest , unit : "1" ) ;
91
102
_ = meter . CreateObservableGauge ( name : ResourceUtilizationInstruments . ProcessMemoryUtilization , observeValue : MemoryUtilization , unit : "1" ) ;
92
103
93
104
// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
@@ -138,6 +149,34 @@ public double CpuUtilizationWithoutHostDelta()
138
149
return _lastCpuCoresUsed ;
139
150
}
140
151
152
+ /// <summary>
153
+ /// Calculates CPU utilization relative to the CPU limit.
154
+ /// </summary>
155
+ /// <param name="cpuLimit">The CPU limit to use for the calculation.</param>
156
+ /// <returns>CPU usage as a ratio of the limit.</returns>
157
+ public double CpuUtilizationLimit ( float cpuLimit )
158
+ {
159
+ double utilization = CpuUtilizationWithoutHostDelta ( ) / cpuLimit ;
160
+
161
+ // Increment counter if utilization exceeds 1 (100%)
162
+ if ( utilization > 1.0 )
163
+ {
164
+ _cpuUtilizationLimit100PercentExceededCounter ? . Add ( 1 ) ;
165
+ _cpuUtilizationLimit100PercentExceeded ++ ;
166
+ Log . CounterMessage100 ( _logger , _cpuUtilizationLimit100PercentExceeded ) ;
167
+ }
168
+
169
+ // Increment counter if utilization exceeds 110%
170
+ if ( utilization > CpuLimitThreshold110Percent )
171
+ {
172
+ _cpuUtilizationLimit110PercentExceededCounter ? . Add ( 1 ) ;
173
+ _cpuUtilizationLimit110PercentExceeded ++ ;
174
+ Log . CounterMessage110 ( _logger , _cpuUtilizationLimit110PercentExceeded ) ;
175
+ }
176
+
177
+ return utilization ;
178
+ }
179
+
141
180
public double CpuUtilization ( )
142
181
{
143
182
DateTimeOffset now = _timeProvider . GetUtcNow ( ) ;
0 commit comments