Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
// We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
// constructor syntax.

using System.Collections.Generic;

namespace Microsoft.Extensions.AI.Evaluation.Quality;

/// <summary>
Expand All @@ -29,4 +31,8 @@ public sealed class EquivalenceEvaluatorContext(string groundTruth) : Evaluation
/// the response supplied via <see cref="GroundTruth"/>.
/// </remarks>
public string GroundTruth { get; } = groundTruth;

/// <inheritdoc/>
public override IReadOnlyList<AIContent> GetContents()
=> [new TextContent(GroundTruth)];
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
// We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
// constructor syntax.

using System.Collections.Generic;

namespace Microsoft.Extensions.AI.Evaluation.Quality;

/// <summary>
Expand All @@ -29,4 +31,8 @@ public sealed class GroundednessEvaluatorContext(string groundingContext) : Eval
/// in the information present in the supplied <see cref="GroundingContext"/>.
/// </remarks>
public string GroundingContext { get; } = groundingContext;

/// <inheritdoc/>
public override IReadOnlyList<AIContent> GetContents()
=> [new TextContent(GroundingContext)];
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,9 @@ await JsonOutputFixer.RepairJsonAsync(
result.AddDiagnosticToAllMetrics(
EvaluationDiagnostic.Error(
$"""
Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
{evaluationResponseText}
"""));
Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
{evaluationResponseText}
"""));
}
else
{
Expand All @@ -186,10 +186,10 @@ await JsonOutputFixer.RepairJsonAsync(
result.AddDiagnosticToAllMetrics(
EvaluationDiagnostic.Error(
$"""
Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
{evaluationResponseText}
{ex}
"""));
Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
{evaluationResponseText}
{ex}
"""));
}
}
}
Expand All @@ -211,28 +211,28 @@ void UpdateResult()

if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))
{
commonMetadata["rtc-evaluation-model-used"] = evaluationResponse.ModelId!;
commonMetadata["evaluation-model-used"] = evaluationResponse.ModelId!;
}

if (evaluationResponse.Usage is UsageDetails usage)
{
if (usage.InputTokenCount is not null)
{
commonMetadata["rtc-evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";
commonMetadata["evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";
}

if (usage.OutputTokenCount is not null)
{
commonMetadata["rtc-evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";
commonMetadata["evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";
}

if (usage.TotalTokenCount is not null)
{
commonMetadata["rtc-evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";
commonMetadata["evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";
}
}

commonMetadata["rtc-evaluation-duration"] = duration;
commonMetadata["evaluation-duration"] = duration;

NumericMetric relevance = result.Get<NumericMetric>(RelevanceMetricName);
relevance.Value = rating.Relevance;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ await ResponseCacheProvider.GetCacheAsync(

private static IEnumerable<string> GetCachingKeysForChatClient(IChatClient chatClient)
{
var metadata = chatClient.GetService<ChatClientMetadata>();
ChatClientMetadata? metadata = chatClient.GetService<ChatClientMetadata>();

string? providerName = metadata?.ProviderName;
if (!string.IsNullOrWhiteSpace(providerName))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails;
<div className={classes.section}>
<div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
{isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
<h3 className={classes.sectionHeaderText}>LLM Chat Diagnostic Details</h3>
<h3 className={classes.sectionHeaderText}>Diagnostic Data</h3>
{hasCacheStatus && (
<div className={classes.hint}>
{cachedTurns != totalTurns ?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.Diagnostics;

namespace Microsoft.Extensions.AI.Evaluation.Safety;

Expand All @@ -31,26 +31,17 @@ namespace Microsoft.Extensions.AI.Evaluation.Safety;
/// will be ignored.
/// </para>
/// </remarks>
/// <param name="contentSafetyServiceConfiguration">
/// Specifies the Azure AI project that should be used and credentials that should be used when this
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform
/// evaluations.
/// </param>
public sealed class CodeVulnerabilityEvaluator(ContentSafetyServiceConfiguration contentSafetyServiceConfiguration)
public sealed class CodeVulnerabilityEvaluator()
: ContentSafetyEvaluator(
contentSafetyServiceConfiguration,
contentSafetyServiceAnnotationTask: "code vulnerability",
evaluatorName: nameof(CodeVulnerabilityEvaluator))
metricNames: new Dictionary<string, string> { ["code_vulnerability"] = CodeVulnerabilityMetricName })
{
/// <summary>
/// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="BooleanMetric"/> returned by
/// <see cref="CodeVulnerabilityEvaluator"/>.
/// </summary>
public static string CodeVulnerabilityMetricName => "Code Vulnerability";

/// <inheritdoc/>
public override IReadOnlyCollection<string> EvaluationMetricNames => [CodeVulnerabilityMetricName];

/// <inheritdoc/>
public override async ValueTask<EvaluationResult> EvaluateAsync(
IEnumerable<ChatMessage> messages,
Expand All @@ -59,30 +50,18 @@ public override async ValueTask<EvaluationResult> EvaluateAsync(
IEnumerable<EvaluationContext>? additionalContext = null,
CancellationToken cancellationToken = default)
{
const string CodeVulnerabilityContentSafetyServiceMetricName = "code_vulnerability";
_ = Throw.IfNull(chatConfiguration);
_ = Throw.IfNull(modelResponse);

EvaluationResult result =
await EvaluateContentSafetyAsync(
chatConfiguration.ChatClient,
messages,
modelResponse,
additionalContext,
contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.ContextCompletion.ToString(),
contentSafetyServiceMetricName: CodeVulnerabilityContentSafetyServiceMetricName,
cancellationToken: cancellationToken).ConfigureAwait(false);

IEnumerable<EvaluationMetric> updatedMetrics =
result.Metrics.Values.Select(
metric =>
{
if (metric.Name == CodeVulnerabilityContentSafetyServiceMetricName)
{
metric.Name = CodeVulnerabilityMetricName;
}

return metric;
});

result = new EvaluationResult(updatedMetrics);
result.Interpret(metric => metric is BooleanMetric booleanMetric ? booleanMetric.InterpretScore() : null);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,27 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.Diagnostics;

namespace Microsoft.Extensions.AI.Evaluation.Safety;

#pragma warning disable S1694 // An abstract class should have both abstract and concrete methods
/// <summary>
/// An <see langword="abstract"/> base class that can be used to implement <see cref="IEvaluator"/>s that utilize the
/// Azure AI Content Safety service to evaluate responses produced by an AI model for the presence of a variety of
/// harmful content such as violence, hate speech, etc.
/// </summary>
/// <param name="contentSafetyServiceConfiguration">
/// Specifies the Azure AI project that should be used and credentials that should be used when this
/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
/// </param>
/// <param name="contentSafetyServiceMetricName">
/// The name of the metric that should be used when this <see cref="ContentSafetyEvaluator"/> communicates with the
/// Azure AI Content Safety service to perform evaluations.
/// <param name="metricNames">
/// A dictionary containing the mapping from the names of the metrics that are used when communicating with the Azure
/// AI Content Safety to the <see cref="EvaluationMetric.Name"/>s of the <see cref="EvaluationMetric"/>s returned by
/// this <see cref="IEvaluator"/>.
/// </param>
/// <param name="metricName">
/// The name of the <see cref="EvaluationMetric"/> produced by this <see cref="ContentSafetyEvaluator"/>.
/// </param>
/// <param name="evaluatorName">The name of the derived <see cref="ContentSafetyEvaluator"/>.</param>
public abstract class ContentHarmEvaluator(
ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
string contentSafetyServiceMetricName,
string metricName,
string evaluatorName)
: ContentSafetyEvaluator(
contentSafetyServiceConfiguration,
contentSafetyServiceAnnotationTask: "content harm",
evaluatorName)
#pragma warning disable S1694 // An abstract class should have both abstract and concrete methods
public abstract class ContentHarmEvaluator(IDictionary<string, string> metricNames)
: ContentSafetyEvaluator(contentSafetyServiceAnnotationTask: "content harm", metricNames)
#pragma warning restore S1694
{
/// <inheritdoc/>
public override IReadOnlyCollection<string> EvaluationMetricNames => [metricName];

/// <inheritdoc/>
public sealed override async ValueTask<EvaluationResult> EvaluateAsync(
IEnumerable<ChatMessage> messages,
Expand All @@ -48,28 +31,21 @@ public sealed override async ValueTask<EvaluationResult> EvaluateAsync(
IEnumerable<EvaluationContext>? additionalContext = null,
CancellationToken cancellationToken = default)
{
_ = Throw.IfNull(chatConfiguration);
_ = Throw.IfNull(modelResponse);

EvaluationResult result =
await EvaluateContentSafetyAsync(
chatConfiguration.ChatClient,
messages,
modelResponse,
additionalContext,
contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.Conversation.ToString(),
contentSafetyServiceMetricName: contentSafetyServiceMetricName,
cancellationToken: cancellationToken).ConfigureAwait(false);

IEnumerable<EvaluationMetric> updatedMetrics =
result.Metrics.Values.Select(
metric =>
{
if (metric.Name == contentSafetyServiceMetricName)
{
metric.Name = metricName;
}

return metric;
});
result.Interpret(
metric => metric is NumericMetric numericMetric ? numericMetric.InterpretContentHarmScore() : null);

result = new EvaluationResult(updatedMetrics);
result.Interpret(metric => metric is NumericMetric numericMetric ? numericMetric.InterpretHarmScore() : null);
return result;
}
}
Loading
Loading