diff --git a/src/tools/look-at/multimodal-agent-metadata.test.ts b/src/tools/look-at/multimodal-agent-metadata.test.ts index a92b33c4b..aa057eb34 100644 --- a/src/tools/look-at/multimodal-agent-metadata.test.ts +++ b/src/tools/look-at/multimodal-agent-metadata.test.ts @@ -65,8 +65,8 @@ describe("resolveMultimodalLookerAgentMetadata", () => { }) }) - test("preserves hardcoded fallback variant when the registered model matches a cache-derived entry", async () => { - // given + test("returns registered model variant directly without merging from dynamic resolution", async () => { + // given - registered model is in the vision-capable cache setVisionCapableModelsCache(new Map([ [ "openai/gpt-5.4", @@ -87,15 +87,15 @@ describe("resolveMultimodalLookerAgentMetadata", () => { // when const result = await resolveMultimodalLookerAgentMetadata(ctx) - // then + // then - returns registered metadata directly, variant is undefined since none was set expect(result).toEqual({ agentModel: { providerID: "openai", modelID: "gpt-5.4" }, - agentVariant: "medium", + agentVariant: undefined, }) }) - test("prefers connected vision-capable provider models before the hardcoded fallback chain", async () => { - // given + test("prefers registered model over dynamically resolved vision-capable model", async () => { + // given - registered model is openai/gpt-5.4, dynamic would resolve to rundao model setVisionCapableModelsCache(new Map([ [ "rundao/public/qwen3.5-397b", @@ -117,10 +117,10 @@ describe("resolveMultimodalLookerAgentMetadata", () => { // when const result = await resolveMultimodalLookerAgentMetadata(ctx) - // then + // then - registered model takes priority even when not in vision cache expect(result).toEqual({ - agentModel: { providerID: "rundao", modelID: "public/qwen3.5-397b" }, - agentVariant: undefined, + agentModel: { providerID: "openai", modelID: "gpt-5.4" }, + agentVariant: "medium", }) }) @@ -148,8 +148,8 @@ describe("resolveMultimodalLookerAgentMetadata", () => { }) }) - test("does not return a registered model when no vision-capable model is available", async () => { - // given + test("returns registered model even when not in vision-capable cache", async () => { + // given - registered model exists but is NOT in the vision-capable cache spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue( new Set(["openai/gpt-5.4"]), ) @@ -164,7 +164,10 @@ describe("resolveMultimodalLookerAgentMetadata", () => { // when const result = await resolveMultimodalLookerAgentMetadata(ctx) - // then - expect(result).toEqual({}) + // then - trusts user's configured model regardless of vision cache + expect(result).toEqual({ + agentModel: { providerID: "openai", modelID: "gpt-5.4" }, + agentVariant: undefined, + }) }) }) diff --git a/src/tools/look-at/multimodal-agent-metadata.ts b/src/tools/look-at/multimodal-agent-metadata.ts index c2c249b41..454e372c1 100644 --- a/src/tools/look-at/multimodal-agent-metadata.ts +++ b/src/tools/look-at/multimodal-agent-metadata.ts @@ -130,31 +130,34 @@ export async function resolveMultimodalLookerAgentMetadata( try { const registeredMetadata = await resolveRegisteredAgentMetadata(ctx) const visionCapableModels = readVisionCapableModelsCache() - const registeredModelIsVisionCapable = isVisionCapableAgentModel( - registeredMetadata.agentModel, - visionCapableModels, - ) + + if (registeredMetadata.agentModel) { + const registeredModelIsVisionCapable = isVisionCapableAgentModel( + registeredMetadata.agentModel, + visionCapableModels, + ) + + if (registeredModelIsVisionCapable) { + log("[look_at] Using registered multimodal-looker model (vision-capable)", { + model: getFullModelKey(registeredMetadata.agentModel), + }) + return registeredMetadata + } + + log("[look_at] Registered multimodal-looker model not in vision-capable cache, using it anyway", { + model: getFullModelKey(registeredMetadata.agentModel), + }) + return registeredMetadata + } const dynamicMetadata = await resolveDynamicAgentMetadata(ctx, visionCapableModels) - - if ( - registeredModelIsVisionCapable && - isConfiguredVisionModel(registeredMetadata.agentModel, dynamicMetadata.agentModel) - ) { - return { - agentModel: registeredMetadata.agentModel, - agentVariant: registeredMetadata.agentVariant ?? dynamicMetadata.agentVariant, - } - } - if (dynamicMetadata.agentModel) { + log("[look_at] No registered model, using dynamic resolution", { + model: getFullModelKey(dynamicMetadata.agentModel), + }) return dynamicMetadata } - if (registeredModelIsVisionCapable) { - return registeredMetadata - } - return {} } catch (error) { log("[look_at] Failed to resolve multimodal-looker model info", error) diff --git a/src/tools/look-at/tools.ts b/src/tools/look-at/tools.ts index a5fdb6075..773d334d0 100644 --- a/src/tools/look-at/tools.ts +++ b/src/tools/look-at/tools.ts @@ -4,7 +4,6 @@ import { tool, type PluginInput, type ToolDefinition } from "@opencode-ai/plugin import { LOOK_AT_DESCRIPTION, MULTIMODAL_LOOKER_AGENT } from "./constants" import type { LookAtArgs } from "./types" import { log, promptSyncWithModelSuggestionRetry } from "../../shared" -import { readVisionCapableModelsCache } from "../../shared/vision-capable-models-cache" import { extractLatestAssistantText } from "./assistant-message-extractor" import type { LookAtArgsWithAlias } from "./look-at-arguments" import { normalizeArgs, validateArgs } from "./look-at-arguments" @@ -39,15 +38,6 @@ function getTemporaryConversionPath(error: unknown): string | null { return null } -function isVisionCapableResolvedModel(model: { - providerID: string - modelID: string -}): boolean { - return readVisionCapableModelsCache().some((visionCapableModel) => - visionCapableModel.providerID === model.providerID && - visionCapableModel.modelID === model.modelID, - ) -} export { normalizeArgs, validateArgs } from "./look-at-arguments" @@ -148,12 +138,6 @@ Be thorough on what was requested, concise on everything else. If the requested information is not found, clearly state what is missing.` const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx) - if (agentModel && !isVisionCapableResolvedModel(agentModel)) { - log("[look_at] Resolved model is not vision-capable, blocking", { - resolvedModel: agentModel, - }) - return "Error: Resolved multimodal-looker model is not vision-capable" - } log(`[look_at] Creating session with parent: ${toolContext.sessionID}`) const parentSession = await ctx.client.session.get({