feat(ai): support URL to base64 conversion in tool result outputs

xinyao27 · xinyao27 · commit 51221ff90942 · 2025-08-24T20:13:57.000+08:00
Add support for converting URLs to base64 in tool result outputs when the model
doesn't support URLs. This enables tools to return media content as URLs which
will be automatically downloaded and converted to base64 during prompt conversion.

Changes:
- Extract media URLs from tool result content for downloading
- Convert downloaded media to base64 in tool result outputs
- Add test coverage for URL conversion in tool results

This allows tools like screenshot or image generation tools to return URLs
instead of base64 data, making the tool implementation cleaner while ensuring
compatibility with models that don't support URLs.
diff --git a/packages/ai/src/prompt/convert-to-language-model-prompt.test.ts b/packages/ai/src/prompt/convert-to-language-model-prompt.test.ts
@@ -1366,5 +1366,70 @@ describe('convertToLanguageModelMessage', () => {
         }
       `);
     });
+
+    it('should convert URL in tool result content to base64', async () => {
+      const result = await convertToLanguageModelPrompt({
+        prompt: {
+          messages: [
+            {
+              role: 'tool',
+              content: [
+                {
+                  type: 'tool-result',
+                  toolName: 'screenshot',
+                  toolCallId: 'call-123',
+                  output: {
+                    type: 'content',
+                    value: [
+                      { type: 'text', text: 'Screenshot captured' },
+                      {
+                        type: 'media',
+                        data: 'https://example.com/screenshot.png',
+                        mediaType: 'image/png',
+                      },
+                    ],
+                  },
+                },
+              ],
+            },
+          ],
+        },
+        supportedUrls: {},
+        downloadImplementation: async ({ url }) => {
+          expect(url).toEqual(new URL('https://example.com/screenshot.png'));
+          return {
+            data: new Uint8Array([137, 80, 78, 71]), // PNG magic bytes
+            mediaType: 'image/png',
+          };
+        },
+      });
+
+      expect(result).toEqual([
+        {
+          role: 'tool',
+          content: [
+            {
+              type: 'tool-result',
+              toolCallId: 'call-123',
+              toolName: 'screenshot',
+              output: {
+                type: 'content',
+                value: [
+                  { type: 'text', text: 'Screenshot captured' },
+                  {
+                    type: 'media',
+                    data: 'iVBORw==', // base64 of [137, 80, 78, 71]
+                    mediaType: 'image/png',
+                  },
+                ],
+              },
+              providerOptions: undefined,
+            },
+          ],
+          providerOptions: undefined,
+        },
+      ]);
+    });
+
   });
 });
diff --git a/packages/ai/src/prompt/convert-to-language-model-prompt.ts b/packages/ai/src/prompt/convert-to-language-model-prompt.ts
@@ -3,8 +3,10 @@ import {
   LanguageModelV2Message,
   LanguageModelV2Prompt,
   LanguageModelV2TextPart,
+  LanguageModelV2ToolResultOutput,
 } from '@ai-sdk/provider';
 import {
+  convertToBase64,
   DataContent,
   FilePart,
   ImagePart,
@@ -153,7 +155,10 @@ export function convertToLanguageModelMessage({
                   type: 'tool-result' as const,
                   toolCallId: part.toolCallId,
                   toolName: part.toolName,
-                  output: part.output,
+                  output: convertOutputToLanguageModelOutput(
+                    part.output,
+                    downloadedAssets,
+                  ),
                   providerOptions,
                 };
               }
@@ -170,7 +175,10 @@ export function convertToLanguageModelMessage({
           type: 'tool-result' as const,
           toolCallId: part.toolCallId,
           toolName: part.toolName,
-          output: part.output,
+          output: convertOutputToLanguageModelOutput(
+            part.output,
+            downloadedAssets,
+          ),
           providerOptions: part.providerOptions,
         })),
         providerOptions: message.providerOptions,
@@ -233,9 +241,45 @@ async function downloadAssets(
     )
     .map(part => part.data);
 
+  const toolUrls = messages
+    .filter(message => message.role === 'tool')
+    .map(message => message.content)
+    .flat()
+    .filter(item => item.type === 'tool-result')
+    .flatMap(item => {
+      if (item.output.type === 'content') {
+        const results = item.output.value;
+        return results
+          .map(result => {
+            if (result.type === 'media' && result.data && result.mediaType) {
+              const url =
+                typeof result.data === 'string' ? new URL(result.data) : null;
+              if (url instanceof URL) {
+                return { ...result, data: url };
+              }
+            }
+            return null;
+          })
+          .filter(url => url !== null);
+      }
+      return null;
+    })
+    .filter(
+      item =>
+        item &&
+        !isUrlSupported({
+          url: item.data.toString(),
+          mediaType: item.mediaType,
+          supportedUrls,
+        }),
+    )
+    .map(item => item!.data);
+
+  const allUrls = [...urls, ...toolUrls];
+
   // download in parallel:
   const downloadedImages = await Promise.all(
-    urls.map(async url => ({
+    allUrls.map(async url => ({
       url,
       data: await downloadImplementation({ url }),
     })),
@@ -336,3 +380,38 @@ function convertPartToLanguageModelPart(
     }
   }
 }
+
+function convertOutputToLanguageModelOutput(
+  output: LanguageModelV2ToolResultOutput,
+  downloadedAssets: Record<
+    string,
+    { mediaType: string | undefined; data: Uint8Array }
+  >,
+): LanguageModelV2ToolResultOutput {
+  if (output.type === 'content') {
+    return {
+      type: 'content' as const,
+      value: output.value.map(result => {
+        if (result.type === 'media' && result.data && result.mediaType) {
+          const { data: convertedData, mediaType: convertedMediaType } =
+            convertToLanguageModelV2DataContent(result.data);
+          if (convertedData instanceof URL) {
+            const downloadedFile = downloadedAssets[convertedData.toString()];
+            if (downloadedFile) {
+              return {
+                type: 'media' as const,
+                data: convertToBase64(downloadedFile.data),
+                mediaType:
+                  downloadedFile.mediaType ??
+                  convertedMediaType ??
+                  result.mediaType,
+              };
+            }
+          }
+        }
+        return result;
+      }),
+    };
+  }
+  return output;
+}