Skip to content

Commit cc86de1

Browse files
Introduce Content Safety evaluators
1 parent 84f0a7a commit cc86de1

17 files changed

+1721
-2
lines changed

src/Libraries/Microsoft.Extensions.AI.Evaluation.Reporting/TypeScript/components/MetricCard.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const useCardStyles = makeStyles({
4242
padding: '.75rem',
4343
border: `1px solid ${tokens.colorNeutralStroke2}`,
4444
borderRadius: '4px',
45-
width: '8rem',
45+
width: '12.5rem',
4646
cursor: 'pointer',
4747
transition: 'box-shadow 0.2s ease-in-out, outline 0.2s ease-in-out',
4848
position: 'relative',
@@ -241,4 +241,4 @@ export const MetricDisplay = ({metric}: {metric: MetricWithNoValue | NumericMetr
241241
classes.metricPill,
242242
);
243243
return (<div className={pillClass}><span className={fg}>{metricValue}</span></div>);
244-
};
244+
};
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Net.Http;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
9+
namespace Microsoft.Extensions.AI.Evaluation.Safety;
10+
11+
/// <summary>
12+
/// An <see cref="IEvaluator"/> that utilizes the Azure AI Content Safety service to evaluate code completion responses
13+
/// produced by an AI model for the presence of vulnerable code.
14+
/// </summary>
15+
/// <param name="contentSafetyServiceConfiguration">
16+
/// Specifies the Azure AI project that should be used and credentials that should be used when this
17+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform
18+
/// evaluations.
19+
/// </param>
20+
/// <param name="httpClient">
21+
/// The <see cref="HttpClient"/> that should be used when communicating with the Azure AI Content Safety service.
22+
/// While the parameter is optional, it is recommended to supply an <see cref="HttpClient"/> that is configured
23+
/// with robust resilience and retry policies.
24+
/// </param>
25+
public sealed class CodeVulnerabilityEvaluator(
26+
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
27+
HttpClient? httpClient = null)
28+
: ContentSafetyEvaluator(
29+
evaluatorName: nameof(CodeVulnerabilityEvaluator),
30+
metricName: CodeVulnerabilityMetricName,
31+
contentSafetyServiceConfiguration,
32+
contentSafetyServiceAnnotationTask: "code vulnerability",
33+
contentSafetyServiceMetricName: "code_vulnerability",
34+
contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.ContextCompletion.ToString(),
35+
httpClient)
36+
{
37+
/// <summary>
38+
/// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="BooleanMetric"/> returned by
39+
/// <see cref="CodeVulnerabilityEvaluator"/>.
40+
/// </summary>
41+
public static string CodeVulnerabilityMetricName => "Code Vulnerability";
42+
43+
/// <inheritdoc/>
44+
public override async ValueTask<EvaluationResult> EvaluateAsync(
45+
IEnumerable<ChatMessage> messages,
46+
ChatResponse modelResponse,
47+
ChatConfiguration? chatConfiguration = null,
48+
IEnumerable<EvaluationContext>? additionalContext = null,
49+
CancellationToken cancellationToken = default)
50+
{
51+
EvaluationResult result =
52+
await EvaluateContentSafetyAsync(
53+
messages,
54+
modelResponse,
55+
cancellationToken: cancellationToken).ConfigureAwait(false);
56+
57+
result.Interpret(
58+
metric => metric is BooleanMetric booleanMetric ? booleanMetric.InterpretScore() : null);
59+
60+
return result;
61+
}
62+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Net.Http;
6+
using System.Threading;
7+
using System.Threading.Tasks;
8+
9+
namespace Microsoft.Extensions.AI.Evaluation.Safety;
10+
11+
#pragma warning disable S1694 // An abstract class should have both abstract and concrete methods
12+
/// <summary>
13+
/// An <see langword="abstract"/> base class that can be used to implement <see cref="IEvaluator"/>s that utilize the
14+
/// Azure AI Content Safety service to evaluate responses produced by an AI model for the presence of a variety of
15+
/// harmful content such as violence, hate speech, etc.
16+
/// </summary>
17+
/// <param name="evaluatorName">The name of the derived <see cref="ContentSafetyEvaluator"/>.</param>
18+
/// <param name="metricName">
19+
/// The name of the <see cref="EvaluationMetric"/> produced by the derived <see cref="ContentSafetyEvaluator"/>.
20+
/// </param>
21+
/// <param name="contentSafetyServiceConfiguration">
22+
/// Specifies the Azure AI project that should be used and credentials that should be used when this
23+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
24+
/// </param>
25+
/// <param name="contentSafetyServiceAnnotationTask">
26+
/// The name of the annotation task that should be used when this <see cref="ContentSafetyEvaluator"/> communicates
27+
/// with the Azure AI Content Safety service to perform evaluations.
28+
/// </param>
29+
/// <param name="contentSafetyServiceMetricName">
30+
/// The name of the metric that should be used when this <see cref="ContentSafetyEvaluator"/> communicates with the
31+
/// Azure AI Content Safety service to perform evaluations.
32+
/// </param>
33+
/// <param name="httpClient">
34+
/// The <see cref="System.Net.Http.HttpClient"/> that should be used when communicating with the Azure AI Content
35+
/// Safety service. While the parameter is optional, it is recommended to supply an
36+
/// <see cref="System.Net.Http.HttpClient"/> that is configured with robust resilience and retry policies.
37+
/// </param>
38+
public abstract class ContentHarmEvaluator(
39+
string evaluatorName,
40+
string metricName,
41+
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
42+
string contentSafetyServiceMetricName,
43+
HttpClient? httpClient = null)
44+
: ContentSafetyEvaluator(
45+
evaluatorName,
46+
metricName,
47+
contentSafetyServiceConfiguration,
48+
contentSafetyServiceAnnotationTask: "content harm",
49+
contentSafetyServiceMetricName,
50+
contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.Conversation.ToString(),
51+
httpClient)
52+
#pragma warning restore S1694
53+
{
54+
/// <inheritdoc/>
55+
public sealed override async ValueTask<EvaluationResult> EvaluateAsync(
56+
IEnumerable<ChatMessage> messages,
57+
ChatResponse modelResponse,
58+
ChatConfiguration? chatConfiguration = null,
59+
IEnumerable<EvaluationContext>? additionalContext = null,
60+
CancellationToken cancellationToken = default)
61+
{
62+
EvaluationResult result =
63+
await EvaluateContentSafetyAsync(
64+
messages,
65+
modelResponse,
66+
cancellationToken: cancellationToken).ConfigureAwait(false);
67+
68+
result.Interpret(metric => metric is NumericMetric numericMetric ? numericMetric.InterpretHarmScore() : null);
69+
70+
return result;
71+
}
72+
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#pragma warning disable S3604
5+
// S3604: Member initializer values should not be redundant.
6+
// We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
7+
// constructor syntax.
8+
9+
using System;
10+
using System.Collections.Generic;
11+
using System.Net.Http;
12+
using System.Threading;
13+
using System.Threading.Tasks;
14+
15+
namespace Microsoft.Extensions.AI.Evaluation.Safety;
16+
17+
/// <summary>
18+
/// An <see langword="abstract"/> base class that can be used to implement <see cref="IEvaluator"/>s that utilize the
19+
/// Azure AI Content Safety service to evaluate responses produced by an AI model for the presence of a variety of
20+
/// unsafe content such as protected material, ungrounded answers, harmful content etc.
21+
/// </summary>
22+
/// <param name="evaluatorName">The name of the derived <see cref="ContentSafetyEvaluator"/>.</param>
23+
/// <param name="metricName">
24+
/// The name of the <see cref="EvaluationMetric"/> produced by the derived <see cref="ContentSafetyEvaluator"/>.
25+
/// </param>
26+
/// <param name="contentSafetyServiceConfiguration">
27+
/// Specifies the Azure AI project that should be used and credentials that should be used when this
28+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
29+
/// </param>
30+
/// <param name="contentSafetyServiceAnnotationTask">
31+
/// The name of the annotation task that should be used when this <see cref="ContentSafetyEvaluator"/> communicates
32+
/// with the Azure AI Content Safety service to perform evaluations.
33+
/// </param>
34+
/// <param name="contentSafetyServiceMetricName">
35+
/// The name of the metric that should be used when this <see cref="ContentSafetyEvaluator"/> communicates with the
36+
/// Azure AI Content Safety service to perform evaluations.
37+
/// </param>
38+
/// <param name="contentSafetyServicePayloadFormat">
39+
/// An identifier that specifies the format of the payload that should be used when this
40+
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
41+
/// </param>
42+
/// <param name="httpClient">
43+
/// The <see cref="System.Net.Http.HttpClient"/> that should be used when communicating with the Azure AI Content
44+
/// Safety service. While the parameter is optional, it is recommended to supply an
45+
/// <see cref="System.Net.Http.HttpClient"/> that is configured with robust resilience and retry policies.
46+
/// </param>
47+
public abstract class ContentSafetyEvaluator(
48+
string evaluatorName,
49+
string metricName,
50+
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
51+
string contentSafetyServiceAnnotationTask,
52+
string contentSafetyServiceMetricName,
53+
string contentSafetyServicePayloadFormat,
54+
HttpClient? httpClient = null) : IEvaluator
55+
{
56+
private readonly ContentSafetyService _service =
57+
new ContentSafetyService(
58+
contentSafetyServiceConfiguration,
59+
evaluatorName,
60+
contentSafetyServiceAnnotationTask,
61+
contentSafetyServiceMetricName,
62+
#if NET
63+
Enum.Parse<ContentSafetyServicePayloadFormat>(contentSafetyServicePayloadFormat),
64+
#else
65+
(ContentSafetyServicePayloadFormat)Enum.Parse(typeof(ContentSafetyServicePayloadFormat), contentSafetyServicePayloadFormat),
66+
#endif
67+
metricName,
68+
httpClient);
69+
70+
/// <inheritdoc/>
71+
public IReadOnlyCollection<string> EvaluationMetricNames => [metricName];
72+
73+
/// <inheritdoc/>
74+
public abstract ValueTask<EvaluationResult> EvaluateAsync(
75+
IEnumerable<ChatMessage> messages,
76+
ChatResponse modelResponse,
77+
ChatConfiguration? chatConfiguration = null,
78+
IEnumerable<EvaluationContext>? additionalContext = null,
79+
CancellationToken cancellationToken = default);
80+
81+
/// <summary>
82+
/// Evaluates the supplied <paramref name="modelResponse"/> using the Azure AI Content Safety Service and returns
83+
/// an <see cref="EvaluationResult"/> containing one or more <see cref="EvaluationMetric"/>s.
84+
/// </summary>
85+
/// <param name="messages">
86+
/// The conversation history including the request that produced the supplied <paramref name="modelResponse"/>.
87+
/// </param>
88+
/// <param name="modelResponse">The response that is to be evaluated.</param>
89+
/// <param name="additionalContext">
90+
/// Additional contextual information (beyond that which is available in <paramref name="messages"/>) that the
91+
/// <see cref="IEvaluator"/> may need to accurately evaluate the supplied <paramref name="modelResponse"/>.
92+
/// </param>
93+
/// <param name="cancellationToken">
94+
/// A <see cref="CancellationToken"/> that can cancel the evaluation operation.
95+
/// </param>
96+
/// <returns>An <see cref="EvaluationResult"/> containing one or more <see cref="EvaluationMetric"/>s.</returns>
97+
protected ValueTask<EvaluationResult> EvaluateContentSafetyAsync(
98+
IEnumerable<ChatMessage> messages,
99+
ChatResponse modelResponse,
100+
IEnumerable<string?>? additionalContext = null,
101+
CancellationToken cancellationToken = default)
102+
=> _service.EvaluateAsync(messages, modelResponse, additionalContext, cancellationToken);
103+
}

0 commit comments

Comments
 (0)