Skip to content

Commit 51221ff

Browse files
committed
feat(ai): support URL to base64 conversion in tool result outputs
Add support for converting URLs to base64 in tool result outputs when the model doesn't support URLs. This enables tools to return media content as URLs which will be automatically downloaded and converted to base64 during prompt conversion. Changes: - Extract media URLs from tool result content for downloading - Convert downloaded media to base64 in tool result outputs - Add test coverage for URL conversion in tool results This allows tools like screenshot or image generation tools to return URLs instead of base64 data, making the tool implementation cleaner while ensuring compatibility with models that don't support URLs.
1 parent ca67e5a commit 51221ff

File tree

2 files changed

+147
-3
lines changed

2 files changed

+147
-3
lines changed

packages/ai/src/prompt/convert-to-language-model-prompt.test.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1366,5 +1366,70 @@ describe('convertToLanguageModelMessage', () => {
13661366
}
13671367
`);
13681368
});
1369+
1370+
it('should convert URL in tool result content to base64', async () => {
1371+
const result = await convertToLanguageModelPrompt({
1372+
prompt: {
1373+
messages: [
1374+
{
1375+
role: 'tool',
1376+
content: [
1377+
{
1378+
type: 'tool-result',
1379+
toolName: 'screenshot',
1380+
toolCallId: 'call-123',
1381+
output: {
1382+
type: 'content',
1383+
value: [
1384+
{ type: 'text', text: 'Screenshot captured' },
1385+
{
1386+
type: 'media',
1387+
data: 'https://example.com/screenshot.png',
1388+
mediaType: 'image/png',
1389+
},
1390+
],
1391+
},
1392+
},
1393+
],
1394+
},
1395+
],
1396+
},
1397+
supportedUrls: {},
1398+
downloadImplementation: async ({ url }) => {
1399+
expect(url).toEqual(new URL('https://example.com/screenshot.png'));
1400+
return {
1401+
data: new Uint8Array([137, 80, 78, 71]), // PNG magic bytes
1402+
mediaType: 'image/png',
1403+
};
1404+
},
1405+
});
1406+
1407+
expect(result).toEqual([
1408+
{
1409+
role: 'tool',
1410+
content: [
1411+
{
1412+
type: 'tool-result',
1413+
toolCallId: 'call-123',
1414+
toolName: 'screenshot',
1415+
output: {
1416+
type: 'content',
1417+
value: [
1418+
{ type: 'text', text: 'Screenshot captured' },
1419+
{
1420+
type: 'media',
1421+
data: 'iVBORw==', // base64 of [137, 80, 78, 71]
1422+
mediaType: 'image/png',
1423+
},
1424+
],
1425+
},
1426+
providerOptions: undefined,
1427+
},
1428+
],
1429+
providerOptions: undefined,
1430+
},
1431+
]);
1432+
});
1433+
13691434
});
13701435
});

packages/ai/src/prompt/convert-to-language-model-prompt.ts

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ import {
33
LanguageModelV2Message,
44
LanguageModelV2Prompt,
55
LanguageModelV2TextPart,
6+
LanguageModelV2ToolResultOutput,
67
} from '@ai-sdk/provider';
78
import {
9+
convertToBase64,
810
DataContent,
911
FilePart,
1012
ImagePart,
@@ -153,7 +155,10 @@ export function convertToLanguageModelMessage({
153155
type: 'tool-result' as const,
154156
toolCallId: part.toolCallId,
155157
toolName: part.toolName,
156-
output: part.output,
158+
output: convertOutputToLanguageModelOutput(
159+
part.output,
160+
downloadedAssets,
161+
),
157162
providerOptions,
158163
};
159164
}
@@ -170,7 +175,10 @@ export function convertToLanguageModelMessage({
170175
type: 'tool-result' as const,
171176
toolCallId: part.toolCallId,
172177
toolName: part.toolName,
173-
output: part.output,
178+
output: convertOutputToLanguageModelOutput(
179+
part.output,
180+
downloadedAssets,
181+
),
174182
providerOptions: part.providerOptions,
175183
})),
176184
providerOptions: message.providerOptions,
@@ -233,9 +241,45 @@ async function downloadAssets(
233241
)
234242
.map(part => part.data);
235243

244+
const toolUrls = messages
245+
.filter(message => message.role === 'tool')
246+
.map(message => message.content)
247+
.flat()
248+
.filter(item => item.type === 'tool-result')
249+
.flatMap(item => {
250+
if (item.output.type === 'content') {
251+
const results = item.output.value;
252+
return results
253+
.map(result => {
254+
if (result.type === 'media' && result.data && result.mediaType) {
255+
const url =
256+
typeof result.data === 'string' ? new URL(result.data) : null;
257+
if (url instanceof URL) {
258+
return { ...result, data: url };
259+
}
260+
}
261+
return null;
262+
})
263+
.filter(url => url !== null);
264+
}
265+
return null;
266+
})
267+
.filter(
268+
item =>
269+
item &&
270+
!isUrlSupported({
271+
url: item.data.toString(),
272+
mediaType: item.mediaType,
273+
supportedUrls,
274+
}),
275+
)
276+
.map(item => item!.data);
277+
278+
const allUrls = [...urls, ...toolUrls];
279+
236280
// download in parallel:
237281
const downloadedImages = await Promise.all(
238-
urls.map(async url => ({
282+
allUrls.map(async url => ({
239283
url,
240284
data: await downloadImplementation({ url }),
241285
})),
@@ -336,3 +380,38 @@ function convertPartToLanguageModelPart(
336380
}
337381
}
338382
}
383+
384+
function convertOutputToLanguageModelOutput(
385+
output: LanguageModelV2ToolResultOutput,
386+
downloadedAssets: Record<
387+
string,
388+
{ mediaType: string | undefined; data: Uint8Array }
389+
>,
390+
): LanguageModelV2ToolResultOutput {
391+
if (output.type === 'content') {
392+
return {
393+
type: 'content' as const,
394+
value: output.value.map(result => {
395+
if (result.type === 'media' && result.data && result.mediaType) {
396+
const { data: convertedData, mediaType: convertedMediaType } =
397+
convertToLanguageModelV2DataContent(result.data);
398+
if (convertedData instanceof URL) {
399+
const downloadedFile = downloadedAssets[convertedData.toString()];
400+
if (downloadedFile) {
401+
return {
402+
type: 'media' as const,
403+
data: convertToBase64(downloadedFile.data),
404+
mediaType:
405+
downloadedFile.mediaType ??
406+
convertedMediaType ??
407+
result.mediaType,
408+
};
409+
}
410+
}
411+
}
412+
return result;
413+
}),
414+
};
415+
}
416+
return output;
417+
}

0 commit comments

Comments
 (0)