diff --git a/src/tools/look-at/tools.test.ts b/src/tools/look-at/tools.test.ts index ee6fcc388..8b2f040d9 100644 --- a/src/tools/look-at/tools.test.ts +++ b/src/tools/look-at/tools.test.ts @@ -31,25 +31,52 @@ describe("look-at tool", () => { const normalized = normalizeArgs(args as any) expect(normalized.file_path).toBe("/preferred.png") }) + + // given image_data provided + // when called with base64 image data + // then preserve image_data in normalized args + test("preserves image_data when provided", () => { + const args = { image_data: "data:image/png;base64,iVBORw0KGgo=", goal: "analyze" } + const normalized = normalizeArgs(args as any) + expect(normalized.image_data).toBe("data:image/png;base64,iVBORw0KGgo=") + expect(normalized.file_path).toBeUndefined() + }) }) describe("validateArgs", () => { - // given valid arguments + // given valid arguments with file_path // when validated // then return null (no error) - test("returns null for valid args", () => { + test("returns null for valid args with file_path", () => { const args = { file_path: "/valid/path.png", goal: "analyze" } expect(validateArgs(args)).toBeNull() }) - // given file_path missing + // given valid arguments with image_data + // when validated + // then return null (no error) + test("returns null for valid args with image_data", () => { + const args = { image_data: "data:image/png;base64,iVBORw0KGgo=", goal: "analyze" } + expect(validateArgs(args)).toBeNull() + }) + + // given neither file_path nor image_data // when validated // then clear error message - test("returns error when file_path is missing", () => { + test("returns error when neither file_path nor image_data provided", () => { const args = { goal: "analyze" } as any const error = validateArgs(args) expect(error).toContain("file_path") - expect(error).toContain("required") + expect(error).toContain("image_data") + }) + + // given both file_path and image_data + // when validated + // then return error (mutually exclusive) + test("returns error when both file_path and image_data provided", () => { + const args = { file_path: "/path.png", image_data: "base64data", goal: "analyze" } + const error = validateArgs(args) + expect(error).toContain("only one") }) // given goal missing @@ -69,6 +96,17 @@ describe("look-at tool", () => { const args = { file_path: "", goal: "analyze" } const error = validateArgs(args) expect(error).toContain("file_path") + expect(error).toContain("image_data") + }) + + // given image_data is empty string + // when validated + // then return error + test("returns error when image_data is empty string", () => { + const args = { image_data: "", goal: "analyze" } + const error = validateArgs(args) + expect(error).toContain("file_path") + expect(error).toContain("image_data") }) }) @@ -109,7 +147,7 @@ describe("look-at tool", () => { toolContext ) - expect(result).toContain("Error: Failed to analyze file") + expect(result).toContain("Error: Failed to analyze") expect(result).toContain("malformed response") expect(result).toContain("multimodal-looker") expect(result).toContain("image/png") @@ -217,4 +255,111 @@ describe("look-at tool", () => { }) }) }) + + describe("createLookAt with image_data", () => { + // given base64 image data is provided + // when LookAt tool executed + // then should send data URL to session.prompt + test("sends data URL when image_data provided", async () => { + let promptBody: any + + const mockClient = { + app: { + agents: async () => ({ data: [] }), + }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_image_data_test" } }), + prompt: async (input: any) => { + promptBody = input.body + return { data: {} } + }, + messages: async () => ({ + data: [ + { info: { role: "assistant", time: { created: 1 } }, parts: [{ type: "text", text: "analyzed" }] }, + ], + }), + }, + } + + const tool = createLookAt({ + client: mockClient, + directory: "/project", + } as any) + + const toolContext: ToolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + directory: "/project", + worktree: "/project", + abort: new AbortController().signal, + metadata: () => {}, + ask: async () => {}, + } + + await tool.execute( + { image_data: "data:image/png;base64,iVBORw0KGgo=", goal: "describe this image" }, + toolContext + ) + + const filePart = promptBody.parts.find((p: any) => p.type === "file") + expect(filePart).toBeDefined() + expect(filePart.url).toContain("data:image/png;base64") + expect(filePart.mime).toBe("image/png") + expect(filePart.filename).toContain("clipboard-image") + }) + + // given raw base64 without data URI prefix + // when LookAt tool executed + // then should detect mime type and create proper data URL + test("handles raw base64 without data URI prefix", async () => { + let promptBody: any + + const mockClient = { + app: { + agents: async () => ({ data: [] }), + }, + session: { + get: async () => ({ data: { directory: "/project" } }), + create: async () => ({ data: { id: "ses_raw_base64_test" } }), + prompt: async (input: any) => { + promptBody = input.body + return { data: {} } + }, + messages: async () => ({ + data: [ + { info: { role: "assistant", time: { created: 1 } }, parts: [{ type: "text", text: "analyzed" }] }, + ], + }), + }, + } + + const tool = createLookAt({ + client: mockClient, + directory: "/project", + } as any) + + const toolContext: ToolContext = { + sessionID: "parent-session", + messageID: "parent-message", + agent: "sisyphus", + directory: "/project", + worktree: "/project", + abort: new AbortController().signal, + metadata: () => {}, + ask: async () => {}, + } + + await tool.execute( + { image_data: "iVBORw0KGgo=", goal: "analyze" }, + toolContext + ) + + const filePart = promptBody.parts.find((p: any) => p.type === "file") + expect(filePart).toBeDefined() + expect(filePart.url).toContain("data:") + expect(filePart.url).toContain("base64") + }) + }) }) diff --git a/src/tools/look-at/tools.ts b/src/tools/look-at/tools.ts index ef64ad86a..c9ad89738 100644 --- a/src/tools/look-at/tools.ts +++ b/src/tools/look-at/tools.ts @@ -11,14 +11,23 @@ interface LookAtArgsWithAlias extends LookAtArgs { export function normalizeArgs(args: LookAtArgsWithAlias): LookAtArgs { return { - file_path: args.file_path ?? args.path ?? "", + file_path: args.file_path ?? args.path, + image_data: args.image_data, goal: args.goal ?? "", } } export function validateArgs(args: LookAtArgs): string | null { - if (!args.file_path) { - return `Error: Missing required parameter 'file_path'. Usage: look_at(file_path="/path/to/file", goal="what to extract")` + const hasFilePath = args.file_path && args.file_path.length > 0 + const hasImageData = args.image_data && args.image_data.length > 0 + + if (!hasFilePath && !hasImageData) { + return `Error: Must provide either 'file_path' or 'image_data'. Usage: +- look_at(file_path="/path/to/file", goal="what to extract") +- look_at(image_data="base64_encoded_data", goal="what to extract")` + } + if (hasFilePath && hasImageData) { + return `Error: Provide only one of 'file_path' or 'image_data', not both.` } if (!args.goal) { return `Error: Missing required parameter 'goal'. Usage: look_at(file_path="/path/to/file", goal="what to extract")` @@ -26,6 +35,28 @@ export function validateArgs(args: LookAtArgs): string | null { return null } +function inferMimeTypeFromBase64(base64Data: string): string { + if (base64Data.startsWith("data:")) { + const match = base64Data.match(/^data:([^;]+);/) + if (match) return match[1] + } + + try { + const cleanData = base64Data.replace(/^data:[^;]+;base64,/, "") + const header = atob(cleanData.slice(0, 16)) + + if (header.startsWith("\x89PNG")) return "image/png" + if (header.startsWith("\xFF\xD8\xFF")) return "image/jpeg" + if (header.startsWith("GIF8")) return "image/gif" + if (header.startsWith("RIFF") && header.includes("WEBP")) return "image/webp" + if (header.startsWith("%PDF")) return "application/pdf" + } catch { + // Invalid base64 - fall through to default + } + + return "image/png" +} + function inferMimeType(filePath: string): string { const ext = extname(filePath).toLowerCase() const mimeTypes: Record = { @@ -64,11 +95,22 @@ function inferMimeType(filePath: string): string { return mimeTypes[ext] || "application/octet-stream" } +function extractBase64Data(imageData: string): string { + if (imageData.startsWith("data:")) { + const commaIndex = imageData.indexOf(",") + if (commaIndex !== -1) { + return imageData.slice(commaIndex + 1) + } + } + return imageData +} + export function createLookAt(ctx: PluginInput): ToolDefinition { return tool({ description: LOOK_AT_DESCRIPTION, args: { - file_path: tool.schema.string().describe("Absolute path to the file to analyze"), + file_path: tool.schema.string().optional().describe("Absolute path to the file to analyze"), + image_data: tool.schema.string().optional().describe("Base64 encoded image data (for clipboard/pasted images)"), goal: tool.schema.string().describe("What specific information to extract from the file"), }, async execute(rawArgs: LookAtArgs, toolContext) { @@ -79,12 +121,34 @@ export function createLookAt(ctx: PluginInput): ToolDefinition { return validationError } - log(`[look_at] Analyzing file: ${args.file_path}, goal: ${args.goal}`) + const isBase64Input = Boolean(args.image_data) + const sourceDescription = isBase64Input ? "clipboard/pasted image" : args.file_path + log(`[look_at] Analyzing ${sourceDescription}, goal: ${args.goal}`) - const mimeType = inferMimeType(args.file_path) - const filename = basename(args.file_path) + let mimeType: string + let filePart: { type: "file"; mime: string; url: string; filename: string } - const prompt = `Analyze this file and extract the requested information. + if (isBase64Input) { + mimeType = inferMimeTypeFromBase64(args.image_data!) + const base64Content = extractBase64Data(args.image_data!) + const dataUrl = `data:${mimeType};base64,${base64Content}` + filePart = { + type: "file", + mime: mimeType, + url: dataUrl, + filename: `clipboard-image.${mimeType.split("/")[1] || "png"}`, + } + } else { + mimeType = inferMimeType(args.file_path!) + filePart = { + type: "file", + mime: mimeType, + url: pathToFileURL(args.file_path!).href, + filename: basename(args.file_path!), + } + } + + const prompt = `Analyze this ${isBase64Input ? "image" : "file"} and extract the requested information. Goal: ${args.goal} @@ -157,7 +221,7 @@ Original error: ${createResult.error}` log("[look_at] Failed to resolve multimodal-looker model info", error) } - log(`[look_at] Sending prompt with file passthrough to session ${sessionID}`) + log(`[look_at] Sending prompt with ${isBase64Input ? "base64 image" : "file"} to session ${sessionID}`) try { await promptWithModelSuggestionRetry(ctx.client, { path: { id: sessionID }, @@ -171,7 +235,7 @@ Original error: ${createResult.error}` }, parts: [ { type: "text", text: prompt }, - { type: "file", mime: mimeType, url: pathToFileURL(args.file_path).href, filename }, + filePart, ], ...(agentModel ? { model: { providerID: agentModel.providerID, modelID: agentModel.modelID } } : {}), ...(agentVariant ? { variant: agentVariant } : {}), @@ -183,20 +247,20 @@ Original error: ${createResult.error}` const isJsonParseError = errorMessage.includes("JSON") && (errorMessage.includes("EOF") || errorMessage.includes("parse")) if (isJsonParseError) { - return `Error: Failed to analyze file - received malformed response from multimodal-looker agent. + return `Error: Failed to analyze ${isBase64Input ? "image" : "file"} - received malformed response from multimodal-looker agent. This typically occurs when: 1. The multimodal-looker model is not available or not connected -2. The model does not support this file type (${mimeType}) +2. The model does not support this ${isBase64Input ? "image format" : `file type (${mimeType})`} 3. The API returned an empty or truncated response -File: ${args.file_path} +${isBase64Input ? "Source: clipboard/pasted image" : `File: ${args.file_path}`} MIME type: ${mimeType} Try: - Ensure a vision-capable model (e.g., gemini-3-flash, gpt-5.2) is available - Check provider connections in opencode settings -- For text files like .md, .txt, use the Read tool instead +${!isBase64Input ? "- For text files like .md, .txt, use the Read tool instead" : ""} Original error: ${errorMessage}` } diff --git a/src/tools/look-at/types.ts b/src/tools/look-at/types.ts index 3d3f0b0ee..0cfa1c5db 100644 --- a/src/tools/look-at/types.ts +++ b/src/tools/look-at/types.ts @@ -1,4 +1,5 @@ export interface LookAtArgs { - file_path: string + file_path?: string + image_data?: string // base64 encoded image data (for clipboard images) goal: string }