Skip to content

Commit 00644d3

Browse files
Introduce Content Safety evaluators
1 parent 80131f8 commit 00644d3

26 files changed

+2541
-336
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/RelevanceTruthAndCompletenessEvaluator.cs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,31 +207,33 @@ void UpdateResult()
207207
const string Rationales = "Rationales";
208208
const string Separator = "; ";
209209

210-
var commonMetadata = new Dictionary<string, string> { ["rtc_evaluation_duration"] = duration };
210+
var commonMetadata = new Dictionary<string, string>();
211211

212212
if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))
213213
{
214-
commonMetadata["rtc_evaluation_model_used"] = evaluationResponse.ModelId!;
214+
commonMetadata["rtc-evaluation-model-used"] = evaluationResponse.ModelId!;
215215
}
216216

217217
if (evaluationResponse.Usage is UsageDetails usage)
218218
{
219219
if (usage.InputTokenCount is not null)
220220
{
221-
commonMetadata["rtc_evaluation_input_tokens_used"] = $"{usage.InputTokenCount}";
221+
commonMetadata["rtc-evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";
222222
}
223223

224224
if (usage.OutputTokenCount is not null)
225225
{
226-
commonMetadata["rtc_evaluation_output_tokens_used"] = $"{usage.OutputTokenCount}";
226+
commonMetadata["rtc-evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";
227227
}
228228

229229
if (usage.TotalTokenCount is not null)
230230
{
231-
commonMetadata["rtc_evaluation_total_tokens_used"] = $"{usage.TotalTokenCount}";
231+
commonMetadata["rtc-evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";
232232
}
233233
}
234234

235+
commonMetadata["rtc-evaluation-duration"] = duration;
236+
235237
NumericMetric relevance = result.Get<NumericMetric>(RelevanceMetricName);
236238
relevance.Value = rating.Relevance;
237239
relevance.Interpretation = relevance.InterpretScore();

src/Libraries/Microsoft.Extensions.AI.Evaluation.Quality/SingleNumericMetricEvaluator.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,24 +80,24 @@ await chatConfiguration.ChatClient.GetResponseAsync(
8080

8181
if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))
8282
{
83-
metric.AddOrUpdateMetadata(name: "evaluation_model_used", value: evaluationResponse.ModelId!);
83+
metric.AddOrUpdateMetadata(name: "evaluation-model-used", value: evaluationResponse.ModelId!);
8484
}
8585

8686
if (evaluationResponse.Usage is UsageDetails usage)
8787
{
8888
if (usage.InputTokenCount is not null)
8989
{
90-
metric.AddOrUpdateMetadata(name: "evaluation_input_tokens_used", value: $"{usage.InputTokenCount}");
90+
metric.AddOrUpdateMetadata(name: "evaluation-input-tokens-used", value: $"{usage.InputTokenCount}");
9191
}
9292

9393
if (usage.OutputTokenCount is not null)
9494
{
95-
metric.AddOrUpdateMetadata(name: "evaluation_output_tokens_used", value: $"{usage.OutputTokenCount}");
95+
metric.AddOrUpdateMetadata(name: "evaluation-output-tokens-used", value: $"{usage.OutputTokenCount}");
9696
}
9797

9898
if (usage.TotalTokenCount is not null)
9999
{
100-
metric.AddOrUpdateMetadata(name: "evaluation_total_tokens_used", value: $"{usage.TotalTokenCount}");
100+
metric.AddOrUpdateMetadata(name: "evaluation-total-tokens-used", value: $"{usage.TotalTokenCount}");
101101
}
102102
}
103103

@@ -126,7 +126,7 @@ await chatConfiguration.ChatClient.GetResponseAsync(
126126
{
127127
stopwatch.Stop();
128128
string duration = $"{stopwatch.Elapsed.TotalSeconds.ToString("F2", CultureInfo.InvariantCulture)} s";
129-
metric.AddOrUpdateMetadata(name: "evaluation_duration", value: duration);
129+
metric.AddOrUpdateMetadata(name: "evaluation-duration", value: duration);
130130
}
131131
}
132132
}

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const useCardStyles = makeStyles({
4242
padding: '.75rem',
4343
border: `1px solid ${tokens.colorNeutralStroke2}`,
4444
borderRadius: '4px',
45-
width: '8rem',
45+
width: '12.5rem',
4646
cursor: 'pointer',
4747
transition: 'box-shadow 0.2s ease-in-out, outline 0.2s ease-in-out',
4848
position: 'relative',
@@ -241,4 +241,4 @@ export const MetricDisplay = ({metric}: {metric: MetricWithNoValue | NumericMetr
241241
classes.metricPill,
242242
);
243243
return (<div className={pillClass}><span className={fg}>{metricValue}</span></div>);
244-
};
244+
};
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Linq;
6+
using System.Net.Http;
7+
using System.Threading;
8+
using System.Threading.Tasks;
9+
10+
namespace Microsoft.Extensions.AI.Evaluation.Safety;
11+
12+
/// <summary>
13+
/// An <see cref="IEvaluator"/> that utilizes the Azure AI Content Safety service to evaluate code completion responses
14+
/// produced by an AI model for the presence of vulnerable code.
15+
/// </summary>
16+
/// <param name="contentSafetyServiceConfiguration">
17+
/// Specifies the Azure AI project that should be used and credentials that should be used when this
18+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform
19+
/// evaluations.
20+
/// </param>
21+
/// <param name="httpClient">
22+
/// The <see cref="HttpClient"/> that should be used when communicating with the Azure AI Content Safety service. While
23+
/// the parameter is optional, it is recommended to supply an <see cref="HttpClient"/> that is configured with robust
24+
/// resilience and retry policies.
25+
/// </param>
26+
/// <param name="timeoutInSecondsForRetries">
27+
/// The timeout (in seconds) after which this <see cref="ContentSafetyEvaluator"/> should stop retrying failed attempts
28+
/// to communicate with the Azure AI Content Safety service when performing evaluations.
29+
/// </param>
30+
public sealed class CodeVulnerabilityEvaluator(
31+
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
32+
HttpClient? httpClient = null,
33+
int timeoutInSecondsForRetries = Defaults.TimeoutInSecondsForRetries)
34+
: ContentSafetyEvaluator(
35+
contentSafetyServiceConfiguration,
36+
contentSafetyServiceAnnotationTask: "code vulnerability",
37+
evaluatorName: nameof(CodeVulnerabilityEvaluator),
38+
httpClient,
39+
timeoutInSecondsForRetries)
40+
{
41+
/// <summary>
42+
/// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="BooleanMetric"/> returned by
43+
/// <see cref="CodeVulnerabilityEvaluator"/>.
44+
/// </summary>
45+
public static string CodeVulnerabilityMetricName => "Code Vulnerability";
46+
47+
/// <inheritdoc/>
48+
public override IReadOnlyCollection<string> EvaluationMetricNames => [CodeVulnerabilityMetricName];
49+
50+
/// <inheritdoc/>
51+
public override async ValueTask<EvaluationResult> EvaluateAsync(
52+
IEnumerable<ChatMessage> messages,
53+
ChatResponse modelResponse,
54+
ChatConfiguration? chatConfiguration = null,
55+
IEnumerable<EvaluationContext>? additionalContext = null,
56+
CancellationToken cancellationToken = default)
57+
{
58+
const string CodeVulnerabilityContentSafetyServiceMetricName = "code_vulnerability";
59+
60+
EvaluationResult result =
61+
await EvaluateContentSafetyAsync(
62+
messages,
63+
modelResponse,
64+
contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.ContextCompletion.ToString(),
65+
contentSafetyServiceMetricName: CodeVulnerabilityContentSafetyServiceMetricName,
66+
cancellationToken: cancellationToken).ConfigureAwait(false);
67+
68+
IEnumerable<EvaluationMetric> updatedMetrics =
69+
result.Metrics.Values.Select(
70+
metric =>
71+
{
72+
if (metric.Name == CodeVulnerabilityContentSafetyServiceMetricName)
73+
{
74+
metric.Name = CodeVulnerabilityMetricName;
75+
}
76+
77+
return metric;
78+
});
79+
80+
result = new EvaluationResult(updatedMetrics);
81+
result.Interpret(metric => metric is BooleanMetric booleanMetric ? booleanMetric.InterpretScore() : null);
82+
return result;
83+
}
84+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Linq;
6+
using System.Net.Http;
7+
using System.Threading;
8+
using System.Threading.Tasks;
9+
10+
namespace Microsoft.Extensions.AI.Evaluation.Safety;
11+
12+
#pragma warning disable S1694 // An abstract class should have both abstract and concrete methods
13+
/// <summary>
14+
/// An <see langword="abstract"/> base class that can be used to implement <see cref="IEvaluator"/>s that utilize the
15+
/// Azure AI Content Safety service to evaluate responses produced by an AI model for the presence of a variety of
16+
/// harmful content such as violence, hate speech, etc.
17+
/// </summary>
18+
/// <param name="contentSafetyServiceConfiguration">
19+
/// Specifies the Azure AI project that should be used and credentials that should be used when this
20+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
21+
/// </param>
22+
/// <param name="contentSafetyServiceMetricName">
23+
/// The name of the metric that should be used when this <see cref="ContentSafetyEvaluator"/> communicates with the
24+
/// Azure AI Content Safety service to perform evaluations.
25+
/// </param>
26+
/// <param name="metricName">
27+
/// The name of the <see cref="EvaluationMetric"/> produced by this <see cref="ContentSafetyEvaluator"/>.
28+
/// </param>
29+
/// <param name="evaluatorName">The name of the derived <see cref="ContentSafetyEvaluator"/>.</param>
30+
/// <param name="httpClient">
31+
/// The <see cref="HttpClient"/> that should be used when communicating with the Azure AI Content Safety service. While
32+
/// the parameter is optional, it is recommended to supply an <see cref="HttpClient"/> that is configured with robust
33+
/// resilience and retry policies.
34+
/// </param>
35+
/// <param name="timeoutInSecondsForRetries">
36+
/// The timeout (in seconds) after which this <see cref="ContentSafetyEvaluator"/> should stop retrying failed attempts
37+
/// to communicate with the Azure AI Content Safety service when performing evaluations.
38+
/// </param>
39+
public abstract class ContentHarmEvaluator(
40+
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
41+
string contentSafetyServiceMetricName,
42+
string metricName,
43+
string evaluatorName,
44+
HttpClient? httpClient = null,
45+
int timeoutInSecondsForRetries = Defaults.TimeoutInSecondsForRetries)
46+
: ContentSafetyEvaluator(
47+
contentSafetyServiceConfiguration,
48+
contentSafetyServiceAnnotationTask: "content harm",
49+
evaluatorName,
50+
httpClient,
51+
timeoutInSecondsForRetries)
52+
#pragma warning restore S1694
53+
{
54+
/// <inheritdoc/>
55+
public override IReadOnlyCollection<string> EvaluationMetricNames => [metricName];
56+
57+
/// <inheritdoc/>
58+
public sealed override async ValueTask<EvaluationResult> EvaluateAsync(
59+
IEnumerable<ChatMessage> messages,
60+
ChatResponse modelResponse,
61+
ChatConfiguration? chatConfiguration = null,
62+
IEnumerable<EvaluationContext>? additionalContext = null,
63+
CancellationToken cancellationToken = default)
64+
{
65+
EvaluationResult result =
66+
await EvaluateContentSafetyAsync(
67+
messages,
68+
modelResponse,
69+
contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.Conversation.ToString(),
70+
contentSafetyServiceMetricName: contentSafetyServiceMetricName,
71+
cancellationToken: cancellationToken).ConfigureAwait(false);
72+
73+
IEnumerable<EvaluationMetric> updatedMetrics =
74+
result.Metrics.Values.Select(
75+
metric =>
76+
{
77+
if (metric.Name == contentSafetyServiceMetricName)
78+
{
79+
metric.Name = metricName;
80+
}
81+
82+
return metric;
83+
});
84+
85+
result = new EvaluationResult(updatedMetrics);
86+
result.Interpret(metric => metric is NumericMetric numericMetric ? numericMetric.InterpretHarmScore() : null);
87+
return result;
88+
}
89+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#pragma warning disable S3604
5+
// S3604: Member initializer values should not be redundant.
6+
// We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
7+
// constructor syntax.
8+
9+
using System;
10+
using System.Collections.Generic;
11+
using System.Net.Http;
12+
using System.Threading;
13+
using System.Threading.Tasks;
14+
15+
namespace Microsoft.Extensions.AI.Evaluation.Safety;
16+
17+
/// <summary>
18+
/// An <see langword="abstract"/> base class that can be used to implement <see cref="IEvaluator"/>s that utilize the
19+
/// Azure AI Content Safety service to evaluate responses produced by an AI model for the presence of a variety of
20+
/// unsafe content such as protected material, vulnerable code, harmful content etc.
21+
/// </summary>
22+
/// <param name="contentSafetyServiceConfiguration">
23+
/// Specifies the Azure AI project that should be used and credentials that should be used when this
24+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
25+
/// </param>
26+
/// <param name="contentSafetyServiceAnnotationTask">
27+
/// The name of the annotation task that should be used when this <see cref="ContentSafetyEvaluator"/> communicates
28+
/// with the Azure AI Content Safety service to perform evaluations.
29+
/// </param>
30+
/// <param name="evaluatorName">The name of the derived <see cref="ContentSafetyEvaluator"/>.</param>
31+
/// <param name="httpClient">
32+
/// The <see cref="HttpClient"/> that should be used when communicating with the Azure AI Content Safety service. While
33+
/// the parameter is optional, it is recommended to supply an <see cref="HttpClient"/> that is configured with robust
34+
/// resilience and retry policies.
35+
/// </param>
36+
/// <param name="timeoutInSecondsForRetries">
37+
/// The timeout (in seconds) after which this <see cref="ContentSafetyEvaluator"/> should stop retrying failed attempts
38+
/// to communicate with the Azure AI Content Safety service when performing evaluations.
39+
/// </param>
40+
public abstract class ContentSafetyEvaluator(
41+
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
42+
string contentSafetyServiceAnnotationTask,
43+
string evaluatorName,
44+
HttpClient? httpClient = null,
45+
int timeoutInSecondsForRetries = Defaults.TimeoutInSecondsForRetries) : IEvaluator
46+
{
47+
private readonly ContentSafetyService _service =
48+
new ContentSafetyService(
49+
contentSafetyServiceConfiguration,
50+
contentSafetyServiceAnnotationTask,
51+
evaluatorName,
52+
httpClient,
53+
timeoutInSecondsForRetries);
54+
55+
/// <inheritdoc/>
56+
public abstract IReadOnlyCollection<string> EvaluationMetricNames { get; }
57+
58+
/// <inheritdoc/>
59+
public abstract ValueTask<EvaluationResult> EvaluateAsync(
60+
IEnumerable<ChatMessage> messages,
61+
ChatResponse modelResponse,
62+
ChatConfiguration? chatConfiguration = null,
63+
IEnumerable<EvaluationContext>? additionalContext = null,
64+
CancellationToken cancellationToken = default);
65+
66+
/// <summary>
67+
/// Evaluates the supplied <paramref name="modelResponse"/> using the Azure AI Content Safety Service and returns
68+
/// an <see cref="EvaluationResult"/> containing one or more <see cref="EvaluationMetric"/>s.
69+
/// </summary>
70+
/// <param name="messages">
71+
/// The conversation history including the request that produced the supplied <paramref name="modelResponse"/>.
72+
/// </param>
73+
/// <param name="modelResponse">The response that is to be evaluated.</param>
74+
/// <param name="additionalContext">
75+
/// Per conversation turn contextual information (beyond that which is available in <paramref name="messages"/>)
76+
/// that the <see cref="IEvaluator"/> may need to accurately evaluate the supplied
77+
/// <paramref name="modelResponse"/>.
78+
/// </param>
79+
/// <param name="contentSafetyServicePayloadFormat">
80+
/// An identifier that specifies the format of the payload that should be used when communicating with the Azure AI
81+
/// Content Safety service to perform evaluations.
82+
/// </param>
83+
/// <param name="contentSafetyServiceMetricName">
84+
/// The name of the metric that should be used in the payload when communicating with the Azure AI Content Safety
85+
/// service to perform evaluations.
86+
/// </param>
87+
/// <param name="cancellationToken">
88+
/// A <see cref="CancellationToken"/> that can cancel the evaluation operation.
89+
/// </param>
90+
/// <returns>An <see cref="EvaluationResult"/> containing one or more <see cref="EvaluationMetric"/>s.</returns>
91+
protected ValueTask<EvaluationResult> EvaluateContentSafetyAsync(
92+
IEnumerable<ChatMessage> messages,
93+
ChatResponse modelResponse,
94+
IEnumerable<string?>? additionalContext = null,
95+
string contentSafetyServicePayloadFormat = "QuestionAnswer", // ContentSafetyServicePayloadFormat.QuestionAnswer
96+
string? contentSafetyServiceMetricName = null,
97+
CancellationToken cancellationToken = default)
98+
{
99+
ContentSafetyServicePayloadFormat payloadFormat =
100+
#if NET
101+
Enum.Parse<ContentSafetyServicePayloadFormat>(contentSafetyServicePayloadFormat);
102+
#else
103+
(ContentSafetyServicePayloadFormat)Enum.Parse(
104+
typeof(ContentSafetyServicePayloadFormat),
105+
contentSafetyServicePayloadFormat);
106+
#endif
107+
108+
return _service.EvaluateAsync(
109+
messages,
110+
modelResponse,
111+
additionalContext,
112+
payloadFormat,
113+
metricNames: string.IsNullOrWhiteSpace(contentSafetyServiceMetricName) ? null : [contentSafetyServiceMetricName!],
114+
cancellationToken);
115+
}
116+
}

0 commit comments

Comments
 (0)