fix(look-at): respect configured multimodal-looker model instead of overriding via dynamic fallback

This commit is contained in:
Sami Jawhar
2026-03-19 01:36:22 +00:00
parent 30dc50d880
commit db32bad004
3 changed files with 38 additions and 48 deletions

View File

@@ -65,8 +65,8 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
}) })
}) })
test("preserves hardcoded fallback variant when the registered model matches a cache-derived entry", async () => { test("returns registered model variant directly without merging from dynamic resolution", async () => {
// given // given - registered model is in the vision-capable cache
setVisionCapableModelsCache(new Map([ setVisionCapableModelsCache(new Map([
[ [
"openai/gpt-5.4", "openai/gpt-5.4",
@@ -87,15 +87,15 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
// when // when
const result = await resolveMultimodalLookerAgentMetadata(ctx) const result = await resolveMultimodalLookerAgentMetadata(ctx)
// then // then - returns registered metadata directly, variant is undefined since none was set
expect(result).toEqual({ expect(result).toEqual({
agentModel: { providerID: "openai", modelID: "gpt-5.4" }, agentModel: { providerID: "openai", modelID: "gpt-5.4" },
agentVariant: "medium", agentVariant: undefined,
}) })
}) })
test("prefers connected vision-capable provider models before the hardcoded fallback chain", async () => { test("prefers registered model over dynamically resolved vision-capable model", async () => {
// given // given - registered model is openai/gpt-5.4, dynamic would resolve to rundao model
setVisionCapableModelsCache(new Map([ setVisionCapableModelsCache(new Map([
[ [
"rundao/public/qwen3.5-397b", "rundao/public/qwen3.5-397b",
@@ -117,10 +117,10 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
// when // when
const result = await resolveMultimodalLookerAgentMetadata(ctx) const result = await resolveMultimodalLookerAgentMetadata(ctx)
// then // then - registered model takes priority even when not in vision cache
expect(result).toEqual({ expect(result).toEqual({
agentModel: { providerID: "rundao", modelID: "public/qwen3.5-397b" }, agentModel: { providerID: "openai", modelID: "gpt-5.4" },
agentVariant: undefined, agentVariant: "medium",
}) })
}) })
@@ -148,8 +148,8 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
}) })
}) })
test("does not return a registered model when no vision-capable model is available", async () => { test("returns registered model even when not in vision-capable cache", async () => {
// given // given - registered model exists but is NOT in the vision-capable cache
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue( spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
new Set(["openai/gpt-5.4"]), new Set(["openai/gpt-5.4"]),
) )
@@ -164,7 +164,10 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
// when // when
const result = await resolveMultimodalLookerAgentMetadata(ctx) const result = await resolveMultimodalLookerAgentMetadata(ctx)
// then // then - trusts user's configured model regardless of vision cache
expect(result).toEqual({}) expect(result).toEqual({
agentModel: { providerID: "openai", modelID: "gpt-5.4" },
agentVariant: undefined,
})
}) })
}) })

View File

@@ -130,31 +130,34 @@ export async function resolveMultimodalLookerAgentMetadata(
try { try {
const registeredMetadata = await resolveRegisteredAgentMetadata(ctx) const registeredMetadata = await resolveRegisteredAgentMetadata(ctx)
const visionCapableModels = readVisionCapableModelsCache() const visionCapableModels = readVisionCapableModelsCache()
const registeredModelIsVisionCapable = isVisionCapableAgentModel(
registeredMetadata.agentModel, if (registeredMetadata.agentModel) {
visionCapableModels, const registeredModelIsVisionCapable = isVisionCapableAgentModel(
) registeredMetadata.agentModel,
visionCapableModels,
)
if (registeredModelIsVisionCapable) {
log("[look_at] Using registered multimodal-looker model (vision-capable)", {
model: getFullModelKey(registeredMetadata.agentModel),
})
return registeredMetadata
}
log("[look_at] Registered multimodal-looker model not in vision-capable cache, using it anyway", {
model: getFullModelKey(registeredMetadata.agentModel),
})
return registeredMetadata
}
const dynamicMetadata = await resolveDynamicAgentMetadata(ctx, visionCapableModels) const dynamicMetadata = await resolveDynamicAgentMetadata(ctx, visionCapableModels)
if (
registeredModelIsVisionCapable &&
isConfiguredVisionModel(registeredMetadata.agentModel, dynamicMetadata.agentModel)
) {
return {
agentModel: registeredMetadata.agentModel,
agentVariant: registeredMetadata.agentVariant ?? dynamicMetadata.agentVariant,
}
}
if (dynamicMetadata.agentModel) { if (dynamicMetadata.agentModel) {
log("[look_at] No registered model, using dynamic resolution", {
model: getFullModelKey(dynamicMetadata.agentModel),
})
return dynamicMetadata return dynamicMetadata
} }
if (registeredModelIsVisionCapable) {
return registeredMetadata
}
return {} return {}
} catch (error) { } catch (error) {
log("[look_at] Failed to resolve multimodal-looker model info", error) log("[look_at] Failed to resolve multimodal-looker model info", error)

View File

@@ -4,7 +4,6 @@ import { tool, type PluginInput, type ToolDefinition } from "@opencode-ai/plugin
import { LOOK_AT_DESCRIPTION, MULTIMODAL_LOOKER_AGENT } from "./constants" import { LOOK_AT_DESCRIPTION, MULTIMODAL_LOOKER_AGENT } from "./constants"
import type { LookAtArgs } from "./types" import type { LookAtArgs } from "./types"
import { log, promptSyncWithModelSuggestionRetry } from "../../shared" import { log, promptSyncWithModelSuggestionRetry } from "../../shared"
import { readVisionCapableModelsCache } from "../../shared/vision-capable-models-cache"
import { extractLatestAssistantText } from "./assistant-message-extractor" import { extractLatestAssistantText } from "./assistant-message-extractor"
import type { LookAtArgsWithAlias } from "./look-at-arguments" import type { LookAtArgsWithAlias } from "./look-at-arguments"
import { normalizeArgs, validateArgs } from "./look-at-arguments" import { normalizeArgs, validateArgs } from "./look-at-arguments"
@@ -39,15 +38,6 @@ function getTemporaryConversionPath(error: unknown): string | null {
return null return null
} }
function isVisionCapableResolvedModel(model: {
providerID: string
modelID: string
}): boolean {
return readVisionCapableModelsCache().some((visionCapableModel) =>
visionCapableModel.providerID === model.providerID &&
visionCapableModel.modelID === model.modelID,
)
}
export { normalizeArgs, validateArgs } from "./look-at-arguments" export { normalizeArgs, validateArgs } from "./look-at-arguments"
@@ -148,12 +138,6 @@ Be thorough on what was requested, concise on everything else.
If the requested information is not found, clearly state what is missing.` If the requested information is not found, clearly state what is missing.`
const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx) const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx)
if (agentModel && !isVisionCapableResolvedModel(agentModel)) {
log("[look_at] Resolved model is not vision-capable, blocking", {
resolvedModel: agentModel,
})
return "Error: Resolved multimodal-looker model is not vision-capable"
}
log(`[look_at] Creating session with parent: ${toolContext.sessionID}`) log(`[look_at] Creating session with parent: ${toolContext.sessionID}`)
const parentSession = await ctx.client.session.get({ const parentSession = await ctx.client.session.get({