dotnet · shyamnamboodiripad · Apr 11, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 9, 2025
@@ -6,6 +6,8 @@
 // We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
 // constructor syntax.
 
+using System.Collections.Generic;
+
 namespace Microsoft.Extensions.AI.Evaluation.Quality;
 
 /// <summary>
@@ -29,4 +31,8 @@ public sealed class EquivalenceEvaluatorContext(string groundTruth) : Evaluation
     /// the response supplied via <see cref="GroundTruth"/>.
     /// </remarks>
     public string GroundTruth { get; } = groundTruth;
+
+    /// <inheritdoc/>
+    public override IReadOnlyList<AIContent> GetContents()
+        => [new TextContent(GroundTruth)];
 }
@@ -6,6 +6,8 @@
 // We disable this warning because it is a false positive arising from the analyzer's lack of support for C#'s primary
 // constructor syntax.
 
+using System.Collections.Generic;
+
 namespace Microsoft.Extensions.AI.Evaluation.Quality;
 
 /// <summary>
@@ -29,4 +31,8 @@ public sealed class GroundednessEvaluatorContext(string groundingContext) : Eval
     /// in the information present in the supplied <see cref="GroundingContext"/>.
     /// </remarks>
     public string GroundingContext { get; } = groundingContext;
+
+    /// <inheritdoc/>
+    public override IReadOnlyList<AIContent> GetContents()
+        => [new TextContent(GroundingContext)];
 }
@@ -171,9 +171,9 @@ await JsonOutputFixer.RepairJsonAsync(
                             result.AddDiagnosticToAllMetrics(
                                 EvaluationDiagnostic.Error(
                                     $"""
-                                Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
-                                {evaluationResponseText}
-                                """));
+                                    Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
+                                    {evaluationResponseText}
+                                    """));
                         }
                         else
                         {
@@ -186,10 +186,10 @@ await JsonOutputFixer.RepairJsonAsync(
                         result.AddDiagnosticToAllMetrics(
                             EvaluationDiagnostic.Error(
                                 $"""
-                            Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
-                            {evaluationResponseText}
-                            {ex}
-                            """));
+                                Failed to repair the following response from the model and parse scores for '{RelevanceMetricName}', '{TruthMetricName}' and '{CompletenessMetricName}'.:
+                                {evaluationResponseText}
+                                {ex}
+                                """));
                     }
                 }
             }
@@ -211,28 +211,28 @@ void UpdateResult()
 
             if (!string.IsNullOrWhiteSpace(evaluationResponse.ModelId))
             {
-                commonMetadata["rtc-evaluation-model-used"] = evaluationResponse.ModelId!;
+                commonMetadata["evaluation-model-used"] = evaluationResponse.ModelId!;
             }
 
             if (evaluationResponse.Usage is UsageDetails usage)
             {
                 if (usage.InputTokenCount is not null)
                 {
-                    commonMetadata["rtc-evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";
+                    commonMetadata["evaluation-input-tokens-used"] = $"{usage.InputTokenCount}";
                 }
 
                 if (usage.OutputTokenCount is not null)
                 {
-                    commonMetadata["rtc-evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";
+                    commonMetadata["evaluation-output-tokens-used"] = $"{usage.OutputTokenCount}";
                 }
 
                 if (usage.TotalTokenCount is not null)
                 {
-                    commonMetadata["rtc-evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";
+                    commonMetadata["evaluation-total-tokens-used"] = $"{usage.TotalTokenCount}";
                 }
             }
 
-            commonMetadata["rtc-evaluation-duration"] = duration;
+            commonMetadata["evaluation-duration"] = duration;
 
             NumericMetric relevance = result.Get<NumericMetric>(RelevanceMetricName);
             relevance.Value = rating.Relevance;

@@ -263,7 +263,7 @@ await ResponseCacheProvider.GetCacheAsync(
 
     private static IEnumerable<string> GetCachingKeysForChatClient(IChatClient chatClient)
     {
-        var metadata = chatClient.GetService<ChatClientMetadata>();
+        ChatClientMetadata? metadata = chatClient.GetService<ChatClientMetadata>();
 
         string? providerName = metadata?.ProviderName;
         if (!string.IsNullOrWhiteSpace(providerName))

@@ -24,7 +24,7 @@ export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails;
         <div className={classes.section}>
             <div className={classes.sectionHeader} onClick={() => setIsExpanded(!isExpanded)}>
                 {isExpanded ? <ChevronDown12Regular /> : <ChevronRight12Regular />}
-                <h3 className={classes.sectionHeaderText}>LLM Chat Diagnostic Details</h3>
+                <h3 className={classes.sectionHeaderText}>Diagnostic Data</h3>
                 {hasCacheStatus && (
                     <div className={classes.hint}>
                         {cachedTurns != totalTurns ?

@@ -2,9 +2,9 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
-using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
 
 namespace Microsoft.Extensions.AI.Evaluation.Safety;
 
@@ -31,26 +31,17 @@ namespace Microsoft.Extensions.AI.Evaluation.Safety;
 /// will be ignored.
 /// </para>
 /// </remarks>
-/// <param name="contentSafetyServiceConfiguration">
-/// Specifies the Azure AI project that should be used and credentials that should be used when this
-/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform
-/// evaluations.
-/// </param>
-public sealed class CodeVulnerabilityEvaluator(ContentSafetyServiceConfiguration contentSafetyServiceConfiguration)
+public sealed class CodeVulnerabilityEvaluator()
     : ContentSafetyEvaluator(
-        contentSafetyServiceConfiguration,
         contentSafetyServiceAnnotationTask: "code vulnerability",
-        evaluatorName: nameof(CodeVulnerabilityEvaluator))
+        metricNames: new Dictionary<string, string> { ["code_vulnerability"] = CodeVulnerabilityMetricName })
 {
     /// <summary>
     /// Gets the <see cref="EvaluationMetric.Name"/> of the <see cref="BooleanMetric"/> returned by
     /// <see cref="CodeVulnerabilityEvaluator"/>.
     /// </summary>
     public static string CodeVulnerabilityMetricName => "Code Vulnerability";
 
-    /// <inheritdoc/>
-    public override IReadOnlyCollection<string> EvaluationMetricNames => [CodeVulnerabilityMetricName];
-
     /// <inheritdoc/>
     public override async ValueTask<EvaluationResult> EvaluateAsync(
         IEnumerable<ChatMessage> messages,
@@ -59,30 +50,18 @@ public override async ValueTask<EvaluationResult> EvaluateAsync(
         IEnumerable<EvaluationContext>? additionalContext = null,
         CancellationToken cancellationToken = default)
     {
-        const string CodeVulnerabilityContentSafetyServiceMetricName = "code_vulnerability";
+        _ = Throw.IfNull(chatConfiguration);
+        _ = Throw.IfNull(modelResponse);
 
         EvaluationResult result =
             await EvaluateContentSafetyAsync(
+                chatConfiguration.ChatClient,
                 messages,
                 modelResponse,
+                additionalContext,
                 contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.ContextCompletion.ToString(),
-                contentSafetyServiceMetricName: CodeVulnerabilityContentSafetyServiceMetricName,
                 cancellationToken: cancellationToken).ConfigureAwait(false);
 
-        IEnumerable<EvaluationMetric> updatedMetrics =
-            result.Metrics.Values.Select(
-                metric =>
-                {
-                    if (metric.Name == CodeVulnerabilityContentSafetyServiceMetricName)
-                    {
-                        metric.Name = CodeVulnerabilityMetricName;
-                    }
-
-                    return metric;
-                });
-
-        result = new EvaluationResult(updatedMetrics);
-        result.Interpret(metric => metric is BooleanMetric booleanMetric ? booleanMetric.InterpretScore() : null);
         return result;
     }
 }
@@ -2,44 +2,27 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System.Collections.Generic;
-using System.Linq;
 using System.Threading;
 using System.Threading.Tasks;
+using Microsoft.Shared.Diagnostics;
 
 namespace Microsoft.Extensions.AI.Evaluation.Safety;
 
-#pragma warning disable S1694 // An abstract class should have both abstract and concrete methods
 /// <summary>
 /// An <see langword="abstract"/> base class that can be used to implement <see cref="IEvaluator"/>s that utilize the
 /// Azure AI Content Safety service to evaluate responses produced by an AI model for the presence of a variety of
 /// harmful content such as violence, hate speech, etc.
 /// </summary>
-/// <param name="contentSafetyServiceConfiguration">
-/// Specifies the Azure AI project that should be used and credentials that should be used when this
-/// <see cref="ContentSafetyEvaluator"/> communicates with the Azure AI Content Safety service to perform evaluations.
-/// </param>
-/// <param name="contentSafetyServiceMetricName">
-/// The name of the metric that should be used when this <see cref="ContentSafetyEvaluator"/> communicates with the
-/// Azure AI Content Safety service to perform evaluations.
+/// <param name="metricNames">
+/// A dictionary containing the mapping from the names of the metrics that are used when communicating with the Azure
+/// AI Content Safety to the <see cref="EvaluationMetric.Name"/>s of the <see cref="EvaluationMetric"/>s returned by
+/// this <see cref="IEvaluator"/>.
 /// </param>
-/// <param name="metricName">
-/// The name of the <see cref="EvaluationMetric"/> produced by this <see cref="ContentSafetyEvaluator"/>.
-/// </param>
-/// <param name="evaluatorName">The name of the derived <see cref="ContentSafetyEvaluator"/>.</param>
-public abstract class ContentHarmEvaluator(
-    ContentSafetyServiceConfiguration contentSafetyServiceConfiguration,
-    string contentSafetyServiceMetricName,
-    string metricName,
-    string evaluatorName)
-        : ContentSafetyEvaluator(
-            contentSafetyServiceConfiguration,
-            contentSafetyServiceAnnotationTask: "content harm",
-            evaluatorName)
+#pragma warning disable S1694 // An abstract class should have both abstract and concrete methods
+public abstract class ContentHarmEvaluator(IDictionary<string, string> metricNames)
+    : ContentSafetyEvaluator(contentSafetyServiceAnnotationTask: "content harm", metricNames)
 #pragma warning restore S1694
 {
-    /// <inheritdoc/>
-    public override IReadOnlyCollection<string> EvaluationMetricNames => [metricName];
-
     /// <inheritdoc/>
     public sealed override async ValueTask<EvaluationResult> EvaluateAsync(
         IEnumerable<ChatMessage> messages,
@@ -48,28 +31,21 @@ public sealed override async ValueTask<EvaluationResult> EvaluateAsync(
         IEnumerable<EvaluationContext>? additionalContext = null,
         CancellationToken cancellationToken = default)
     {
+        _ = Throw.IfNull(chatConfiguration);
+        _ = Throw.IfNull(modelResponse);
+
         EvaluationResult result =
             await EvaluateContentSafetyAsync(
+                chatConfiguration.ChatClient,
                 messages,
                 modelResponse,
+                additionalContext,
                 contentSafetyServicePayloadFormat: ContentSafetyServicePayloadFormat.Conversation.ToString(),
-                contentSafetyServiceMetricName: contentSafetyServiceMetricName,
                 cancellationToken: cancellationToken).ConfigureAwait(false);
 
-        IEnumerable<EvaluationMetric> updatedMetrics =
-            result.Metrics.Values.Select(
-                metric =>
-                {
-                    if (metric.Name == contentSafetyServiceMetricName)
-                    {
-                        metric.Name = metricName;
-                    }
-
-                    return metric;
-                });
+        result.Interpret(
+            metric => metric is NumericMetric numericMetric ? numericMetric.InterpretContentHarmScore() : null);
 
-        result = new EvaluationResult(updatedMetrics);
-        result.Interpret(metric => metric is NumericMetric numericMetric ? numericMetric.InterpretHarmScore() : null);
         return result;
     }
 }