fix(look-at): preserve variant metadata in fallback chain and block non-vision models
- fallback-chain.ts: cache-derived entries inherit variant from matching hardcoded entries - agent-metadata.ts: new isVisionCapableAgentModel() guard blocks non-vision registered models - tools.ts: early vision-capability check before session creation - Added regression tests for variant preservation and non-vision model rejection
This commit is contained in:
@@ -65,6 +65,35 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
|
||||
})
|
||||
})
|
||||
|
||||
test("preserves hardcoded fallback variant when the registered model matches a cache-derived entry", async () => {
|
||||
// given
|
||||
setVisionCapableModelsCache(new Map([
|
||||
[
|
||||
"openai/gpt-5.4",
|
||||
{ providerID: "openai", modelID: "gpt-5.4" },
|
||||
],
|
||||
]))
|
||||
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["openai/gpt-5.4"]),
|
||||
)
|
||||
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"])
|
||||
const ctx = createPluginInput([
|
||||
{
|
||||
name: "multimodal-looker",
|
||||
model: { providerID: "openai", modelID: "gpt-5.4" },
|
||||
},
|
||||
])
|
||||
|
||||
// when
|
||||
const result = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({
|
||||
agentModel: { providerID: "openai", modelID: "gpt-5.4" },
|
||||
agentVariant: "medium",
|
||||
})
|
||||
})
|
||||
|
||||
test("prefers connected vision-capable provider models before the hardcoded fallback chain", async () => {
|
||||
// given
|
||||
setVisionCapableModelsCache(new Map([
|
||||
@@ -97,6 +126,12 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
|
||||
|
||||
test("falls back to the hardcoded multimodal chain when no dynamic vision model exists", async () => {
|
||||
// given
|
||||
setVisionCapableModelsCache(new Map([
|
||||
[
|
||||
"google/gemini-3-flash",
|
||||
{ providerID: "google", modelID: "gemini-3-flash" },
|
||||
],
|
||||
]))
|
||||
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["google/gemini-3-flash"]),
|
||||
)
|
||||
@@ -112,4 +147,24 @@ describe("resolveMultimodalLookerAgentMetadata", () => {
|
||||
agentVariant: undefined,
|
||||
})
|
||||
})
|
||||
|
||||
test("does not return a registered model when no vision-capable model is available", async () => {
|
||||
// given
|
||||
spyOn(modelAvailability, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["openai/gpt-5.4"]),
|
||||
)
|
||||
spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["openai"])
|
||||
const ctx = createPluginInput([
|
||||
{
|
||||
name: "multimodal-looker",
|
||||
model: { providerID: "openai", modelID: "gpt-5.4" },
|
||||
},
|
||||
])
|
||||
|
||||
// when
|
||||
const result = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
// then
|
||||
expect(result).toEqual({})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -28,6 +28,19 @@ function getFullModelKey(model: AgentModel): string {
|
||||
return `${model.providerID}/${model.modelID}`
|
||||
}
|
||||
|
||||
function isVisionCapableAgentModel(
|
||||
agentModel: AgentModel | undefined,
|
||||
visionCapableModels: Array<AgentModel>,
|
||||
): agentModel is AgentModel {
|
||||
if (!agentModel) {
|
||||
return false
|
||||
}
|
||||
|
||||
return visionCapableModels.some((visionCapableModel) =>
|
||||
getFullModelKey(visionCapableModel) === getFullModelKey(agentModel),
|
||||
)
|
||||
}
|
||||
|
||||
function parseAgentModel(model: string): AgentModel | undefined {
|
||||
const [providerID, ...modelIDParts] = model.split("/")
|
||||
const modelID = modelIDParts.join("/")
|
||||
@@ -90,6 +103,10 @@ async function resolveDynamicAgentMetadata(
|
||||
})
|
||||
|
||||
const agentModel = resolution ? parseAgentModel(resolution.model) : undefined
|
||||
if (!isVisionCapableAgentModel(agentModel, visionCapableModels)) {
|
||||
return {}
|
||||
}
|
||||
|
||||
return {
|
||||
agentModel,
|
||||
agentVariant: resolution?.variant,
|
||||
@@ -113,22 +130,32 @@ export async function resolveMultimodalLookerAgentMetadata(
|
||||
try {
|
||||
const registeredMetadata = await resolveRegisteredAgentMetadata(ctx)
|
||||
const visionCapableModels = readVisionCapableModelsCache()
|
||||
|
||||
if (registeredMetadata.agentModel && visionCapableModels.length === 0) {
|
||||
return registeredMetadata
|
||||
}
|
||||
const registeredModelIsVisionCapable = isVisionCapableAgentModel(
|
||||
registeredMetadata.agentModel,
|
||||
visionCapableModels,
|
||||
)
|
||||
|
||||
const dynamicMetadata = await resolveDynamicAgentMetadata(ctx, visionCapableModels)
|
||||
|
||||
if (isConfiguredVisionModel(registeredMetadata.agentModel, dynamicMetadata.agentModel)) {
|
||||
return registeredMetadata
|
||||
if (
|
||||
registeredModelIsVisionCapable &&
|
||||
isConfiguredVisionModel(registeredMetadata.agentModel, dynamicMetadata.agentModel)
|
||||
) {
|
||||
return {
|
||||
agentModel: registeredMetadata.agentModel,
|
||||
agentVariant: registeredMetadata.agentVariant ?? dynamicMetadata.agentVariant,
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamicMetadata.agentModel) {
|
||||
return dynamicMetadata
|
||||
}
|
||||
|
||||
return registeredMetadata
|
||||
if (registeredModelIsVisionCapable) {
|
||||
return registeredMetadata
|
||||
}
|
||||
|
||||
return {}
|
||||
} catch (error) {
|
||||
log("[look_at] Failed to resolve multimodal-looker model info", error)
|
||||
return {}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { describe, expect, it } from "bun:test"
|
||||
|
||||
describe("buildMultimodalLookerFallbackChain", () => {
|
||||
it("builds fallback chain from vision-capable models", async () => {
|
||||
// given
|
||||
@@ -28,4 +30,20 @@ describe("buildMultimodalLookerFallbackChain", () => {
|
||||
expect(result[0].model).toBe("gpt-5.4")
|
||||
expect(result[0].providers).toContain("openai")
|
||||
})
|
||||
|
||||
it("preserves hardcoded variant metadata for cache-derived entries", async () => {
|
||||
// given
|
||||
const { buildMultimodalLookerFallbackChain } = await import("./multimodal-fallback-chain")
|
||||
const visionCapableModels = [{ providerID: "openai", modelID: "gpt-5.4" }]
|
||||
|
||||
// when
|
||||
const result = buildMultimodalLookerFallbackChain(visionCapableModels)
|
||||
|
||||
// then
|
||||
expect(result[0]).toEqual({
|
||||
providers: ["openai"],
|
||||
model: "gpt-5.4",
|
||||
variant: "medium",
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -8,6 +8,15 @@ function getFullModelKey(providerID: string, modelID: string): string {
|
||||
return `${providerID}/${modelID}`
|
||||
}
|
||||
|
||||
function findHardcodedFallbackEntry(
|
||||
providerID: string,
|
||||
modelID: string,
|
||||
): FallbackEntry | undefined {
|
||||
return MULTIMODAL_LOOKER_REQUIREMENT.fallbackChain.find((entry) =>
|
||||
entry.model === modelID && entry.providers.includes(providerID),
|
||||
)
|
||||
}
|
||||
|
||||
export function isHardcodedMultimodalFallbackModel(model: VisionCapableModel): boolean {
|
||||
return MULTIMODAL_LOOKER_REQUIREMENT.fallbackChain.some((entry) =>
|
||||
entry.providers.some((providerID) =>
|
||||
@@ -26,10 +35,16 @@ export function buildMultimodalLookerFallbackChain(
|
||||
const key = getFullModelKey(visionCapableModel.providerID, visionCapableModel.modelID)
|
||||
if (seen.has(key)) continue
|
||||
|
||||
const hardcodedEntry = findHardcodedFallbackEntry(
|
||||
visionCapableModel.providerID,
|
||||
visionCapableModel.modelID,
|
||||
)
|
||||
|
||||
seen.add(key)
|
||||
fallbackChain.push({
|
||||
providers: [visionCapableModel.providerID],
|
||||
model: visionCapableModel.modelID,
|
||||
...(hardcodedEntry?.variant ? { variant: hardcodedEntry.variant } : {}),
|
||||
})
|
||||
}
|
||||
|
||||
@@ -41,7 +56,9 @@ export function buildMultimodalLookerFallbackChain(
|
||||
continue
|
||||
}
|
||||
|
||||
providerModelKeys.forEach((key) => seen.add(key))
|
||||
providerModelKeys.forEach((key) => {
|
||||
seen.add(key)
|
||||
})
|
||||
fallbackChain.push(entry)
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import { tool, type PluginInput, type ToolDefinition } from "@opencode-ai/plugin
|
||||
import { LOOK_AT_DESCRIPTION, MULTIMODAL_LOOKER_AGENT } from "./constants"
|
||||
import type { LookAtArgs } from "./types"
|
||||
import { log, promptSyncWithModelSuggestionRetry } from "../../shared"
|
||||
import { readVisionCapableModelsCache } from "../../shared/vision-capable-models-cache"
|
||||
import { extractLatestAssistantText } from "./assistant-message-extractor"
|
||||
import type { LookAtArgsWithAlias } from "./look-at-arguments"
|
||||
import { normalizeArgs, validateArgs } from "./look-at-arguments"
|
||||
@@ -38,6 +39,16 @@ function getTemporaryConversionPath(error: unknown): string | null {
|
||||
return null
|
||||
}
|
||||
|
||||
function isVisionCapableResolvedModel(model: {
|
||||
providerID: string
|
||||
modelID: string
|
||||
}): boolean {
|
||||
return readVisionCapableModelsCache().some((visionCapableModel) =>
|
||||
visionCapableModel.providerID === model.providerID &&
|
||||
visionCapableModel.modelID === model.modelID,
|
||||
)
|
||||
}
|
||||
|
||||
export { normalizeArgs, validateArgs } from "./look-at-arguments"
|
||||
|
||||
export function createLookAt(ctx: PluginInput): ToolDefinition {
|
||||
@@ -136,6 +147,14 @@ Provide ONLY the extracted information that matches the goal.
|
||||
Be thorough on what was requested, concise on everything else.
|
||||
If the requested information is not found, clearly state what is missing.`
|
||||
|
||||
const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
if (!agentModel || !isVisionCapableResolvedModel(agentModel)) {
|
||||
log("[look_at] No vision-capable multimodal-looker model resolved", {
|
||||
resolvedModel: agentModel,
|
||||
})
|
||||
return "Error: No vision-capable multimodal-looker model available"
|
||||
}
|
||||
|
||||
log(`[look_at] Creating session with parent: ${toolContext.sessionID}`)
|
||||
const parentSession = await ctx.client.session.get({
|
||||
path: { id: toolContext.sessionID },
|
||||
@@ -169,8 +188,6 @@ Original error: ${createResult.error}`
|
||||
const sessionID = createResult.data.id
|
||||
log(`[look_at] Created session: ${sessionID}`)
|
||||
|
||||
const { agentModel, agentVariant } = await resolveMultimodalLookerAgentMetadata(ctx)
|
||||
|
||||
log(`[look_at] Sending prompt with ${isBase64Input ? "base64 image" : "file"} to session ${sessionID}`)
|
||||
try {
|
||||
await promptSyncWithModelSuggestionRetry(ctx.client, {
|
||||
@@ -187,7 +204,7 @@ Original error: ${createResult.error}`
|
||||
{ type: "text", text: prompt },
|
||||
filePart,
|
||||
],
|
||||
...(agentModel ? { model: { providerID: agentModel.providerID, modelID: agentModel.modelID } } : {}),
|
||||
model: { providerID: agentModel.providerID, modelID: agentModel.modelID },
|
||||
...(agentVariant ? { variant: agentVariant } : {}),
|
||||
},
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user