diff --git a/src/hooks/model-fallback/hook.test.ts b/src/hooks/model-fallback/hook.test.ts index aa1f70fd1..bcd720c56 100644 --- a/src/hooks/model-fallback/hook.test.ts +++ b/src/hooks/model-fallback/hook.test.ts @@ -140,6 +140,121 @@ describe("model fallback hook", () => { expect(secondOutput.message["variant"]).toBeUndefined() }) + test("does not re-arm fallback when one is already pending", () => { + //#given + const sessionID = "ses_model_fallback_pending_guard" + clearPendingModelFallback(sessionID) + + //#when + const firstSet = setPendingModelFallback( + sessionID, + "Sisyphus (Ultraworker)", + "anthropic", + "claude-opus-4-6-thinking", + ) + const secondSet = setPendingModelFallback( + sessionID, + "Sisyphus (Ultraworker)", + "anthropic", + "claude-opus-4-6-thinking", + ) + + //#then + expect(firstSet).toBe(true) + expect(secondSet).toBe(false) + clearPendingModelFallback(sessionID) + }) + + test("skips no-op fallback entries that resolve to same provider/model", async () => { + //#given + const sessionID = "ses_model_fallback_noop_skip" + clearPendingModelFallback(sessionID) + + const hook = createModelFallbackHook() as unknown as { + "chat.message"?: ( + input: { sessionID: string }, + output: { message: Record; parts: Array<{ type: string; text?: string }> }, + ) => Promise + } + + setSessionFallbackChain(sessionID, [ + { providers: ["anthropic"], model: "claude-opus-4-6" }, + { providers: ["opencode"], model: "kimi-k2.5-free" }, + ]) + + expect( + setPendingModelFallback( + sessionID, + "Sisyphus (Ultraworker)", + "anthropic", + "claude-opus-4-6", + ), + ).toBe(true) + + const output = { + message: { + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }, + parts: [{ type: "text", text: "continue" }], + } + + //#when + await hook["chat.message"]?.({ sessionID }, output) + + //#then + expect(output.message["model"]).toEqual({ + providerID: "opencode", + modelID: "kimi-k2.5-free", + }) + clearPendingModelFallback(sessionID) + }) + + test("skips no-op fallback entries even when variant differs", async () => { + //#given + const sessionID = "ses_model_fallback_noop_variant_skip" + clearPendingModelFallback(sessionID) + + const hook = createModelFallbackHook() as unknown as { + "chat.message"?: ( + input: { sessionID: string }, + output: { message: Record; parts: Array<{ type: string; text?: string }> }, + ) => Promise + } + + setSessionFallbackChain(sessionID, [ + { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["quotio"], model: "gpt-5.2" }, + ]) + + expect( + setPendingModelFallback( + sessionID, + "Sisyphus (Ultraworker)", + "quotio", + "claude-opus-4-6", + ), + ).toBe(true) + + const output = { + message: { + model: { providerID: "quotio", modelID: "claude-opus-4-6" }, + variant: "max", + }, + parts: [{ type: "text", text: "continue" }], + } + + //#when + await hook["chat.message"]?.({ sessionID }, output) + + //#then + expect(output.message["model"]).toEqual({ + providerID: "quotio", + modelID: "gpt-5.2", + }) + expect(output.message["variant"]).toBeUndefined() + clearPendingModelFallback(sessionID) + }) + test("shows toast when fallback is applied", async () => { //#given const toastCalls: Array<{ title: string; message: string }> = [] @@ -199,7 +314,7 @@ describe("model fallback hook", () => { sessionID, "Atlas (Plan Executor)", "github-copilot", - "claude-sonnet-4-6", + "claude-sonnet-4-5", ) expect(set).toBe(true) diff --git a/src/hooks/model-fallback/hook.ts b/src/hooks/model-fallback/hook.ts index bbb01825e..dbb4aa46d 100644 --- a/src/hooks/model-fallback/hook.ts +++ b/src/hooks/model-fallback/hook.ts @@ -39,6 +39,12 @@ const pendingModelFallbacks = new Map() const lastToastKey = new Map() const sessionFallbackChains = new Map() +function canonicalizeModelID(modelID: string): string { + return modelID + .toLowerCase() + .replace(/\./g, "-") +} + export function setSessionFallbackChain(sessionID: string, fallbackChain: FallbackEntry[] | undefined): void { if (!sessionID) return if (!fallbackChain || fallbackChain.length === 0) { @@ -77,6 +83,11 @@ export function setPendingModelFallback( const existing = pendingModelFallbacks.get(sessionID) if (existing) { + if (existing.pending) { + log("[model-fallback] Pending fallback already armed for session: " + sessionID) + return false + } + // Preserve progression across repeated session.error retries in same session. // We only mark the next turn as pending fallback application. existing.providerID = currentProviderID @@ -140,13 +151,24 @@ export function getNextFallback( } const providerID = selectFallbackProvider(fallback.providers, state.providerID) + const modelID = transformModelForProvider(providerID, fallback.model) + + const isNoOpFallback = + providerID.toLowerCase() === state.providerID.toLowerCase() && + canonicalizeModelID(modelID) === canonicalizeModelID(state.modelID) + + if (isNoOpFallback) { + log("[model-fallback] Skipping no-op fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model) + continue + } + state.pending = false log("[model-fallback] Using fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model) return { providerID, - modelID: transformModelForProvider(providerID, fallback.model), + modelID, variant: fallback.variant, } } diff --git a/src/hooks/runtime-fallback/constants.ts b/src/hooks/runtime-fallback/constants.ts index 60da6fb53..3f011b333 100644 --- a/src/hooks/runtime-fallback/constants.ts +++ b/src/hooks/runtime-fallback/constants.ts @@ -26,6 +26,10 @@ export const RETRYABLE_ERROR_PATTERNS = [ /rate.?limit/i, /too.?many.?requests/i, /quota.?exceeded/i, + /quota\s+will\s+reset\s+after/i, + /all\s+credentials\s+for\s+model/i, + /cool(?:ing)?\s+down/i, + /exhausted\s+your\s+capacity/i, /usage\s+limit\s+has\s+been\s+reached/i, /service.?unavailable/i, /overloaded/i, diff --git a/src/hooks/runtime-fallback/error-classifier.test.ts b/src/hooks/runtime-fallback/error-classifier.test.ts new file mode 100644 index 000000000..52c9bb502 --- /dev/null +++ b/src/hooks/runtime-fallback/error-classifier.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, test } from "bun:test" + +import { extractAutoRetrySignal, isRetryableError } from "./error-classifier" + +describe("runtime-fallback error classifier", () => { + test("detects cooling-down auto-retry status signals", () => { + //#given + const info = { + status: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", + } + + //#when + const signal = extractAutoRetrySignal(info) + + //#then + expect(signal).toBeDefined() + }) + + test("detects single-word cooldown auto-retry status signals", () => { + //#given + const info = { + status: + "All credentials for model claude-opus-4-6 are cooldown [retrying in 7m 56s attempt #1]", + } + + //#when + const signal = extractAutoRetrySignal(info) + + //#then + expect(signal).toBeDefined() + }) + + test("treats cooling-down retry messages as retryable", () => { + //#given + const error = { + message: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", + } + + //#when + const retryable = isRetryableError(error, [400, 403, 408, 429, 500, 502, 503, 504, 529]) + + //#then + expect(retryable).toBe(true) + }) + + test("ignores non-retry assistant status text", () => { + //#given + const info = { + status: "Thinking...", + } + + //#when + const signal = extractAutoRetrySignal(info) + + //#then + expect(signal).toBeUndefined() + }) +}) diff --git a/src/hooks/runtime-fallback/error-classifier.ts b/src/hooks/runtime-fallback/error-classifier.ts index f35819b76..39b6ecdbb 100644 --- a/src/hooks/runtime-fallback/error-classifier.ts +++ b/src/hooks/runtime-fallback/error-classifier.ts @@ -102,7 +102,7 @@ export interface AutoRetrySignal { export const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [ (combined) => /retrying\s+in/i.test(combined), (combined) => - /(?:too\s+many\s+requests|quota\s*exceeded|usage\s+limit|rate\s+limit|limit\s+reached)/i.test(combined), + /(?:too\s+many\s+requests|quota\s*exceeded|quota\s+will\s+reset\s+after|usage\s+limit|rate\s+limit|limit\s+reached|all\s+credentials\s+for\s+model|cool(?:ing)?\s*down|exhausted\s+your\s+capacity)/i.test(combined), ] export function extractAutoRetrySignal(info: Record | undefined): AutoRetrySignal | undefined { diff --git a/src/hooks/runtime-fallback/event-handler.ts b/src/hooks/runtime-fallback/event-handler.ts index f73e6557f..c631c8cd5 100644 --- a/src/hooks/runtime-fallback/event-handler.ts +++ b/src/hooks/runtime-fallback/event-handler.ts @@ -2,13 +2,15 @@ import type { HookDeps } from "./types" import type { AutoRetryHelpers } from "./auto-retry" import { HOOK_NAME } from "./constants" import { log } from "../../shared/logger" -import { extractStatusCode, extractErrorName, classifyErrorType, isRetryableError } from "./error-classifier" +import { extractStatusCode, extractErrorName, classifyErrorType, isRetryableError, extractAutoRetrySignal } from "./error-classifier" import { createFallbackState, prepareFallback } from "./fallback-state" import { getFallbackModelsForSession } from "./fallback-models" import { SessionCategoryRegistry } from "../../shared/session-category-registry" +import { normalizeRetryStatusMessage, extractRetryAttempt } from "../../shared/retry-status-utils" export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { const { config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult, sessionFallbackTimeouts } = deps + const sessionStatusRetryKeys = new Map() const handleSessionCreated = (props: Record | undefined) => { const sessionInfo = props?.info as { id?: string; model?: string } | undefined @@ -33,6 +35,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { sessionRetryInFlight.delete(sessionID) sessionAwaitingFallbackResult.delete(sessionID) helpers.clearSessionFallbackTimeout(sessionID) + sessionStatusRetryKeys.delete(sessionID) SessionCategoryRegistry.remove(sessionID) } } @@ -182,6 +185,88 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { } } + const handleSessionStatus = async (props: Record | undefined) => { + const sessionID = props?.sessionID as string | undefined + const status = props?.status as { type?: string; message?: string; attempt?: number } | undefined + const agent = props?.agent as string | undefined + const model = props?.model as string | undefined + + if (!sessionID || status?.type !== "retry") return + + const retryMessage = typeof status.message === "string" ? status.message : "" + const retrySignal = extractAutoRetrySignal({ status: retryMessage, message: retryMessage }) + if (!retrySignal) return + + const retryKey = `${extractRetryAttempt(status.attempt, retryMessage)}:${normalizeRetryStatusMessage(retryMessage)}` + if (sessionStatusRetryKeys.get(sessionID) === retryKey) { + return + } + sessionStatusRetryKeys.set(sessionID, retryKey) + + if (sessionRetryInFlight.has(sessionID)) { + log(`[${HOOK_NAME}] session.status retry skipped — retry already in flight`, { sessionID }) + return + } + + const resolvedAgent = await helpers.resolveAgentForSessionFromContext(sessionID, agent) + const fallbackModels = getFallbackModelsForSession(sessionID, resolvedAgent, pluginConfig) + if (fallbackModels.length === 0) return + + let state = sessionStates.get(sessionID) + if (!state) { + const detectedAgent = resolvedAgent + const agentConfig = detectedAgent + ? pluginConfig?.agents?.[detectedAgent as keyof typeof pluginConfig.agents] + : undefined + const inferredModel = model || (agentConfig?.model as string | undefined) + if (!inferredModel) { + log(`[${HOOK_NAME}] session.status retry missing model info, cannot fallback`, { sessionID }) + return + } + state = createFallbackState(inferredModel) + sessionStates.set(sessionID, state) + } + sessionLastAccess.set(sessionID, Date.now()) + + if (state.pendingFallbackModel) { + log(`[${HOOK_NAME}] session.status retry skipped (pending fallback in progress)`, { + sessionID, + pendingFallbackModel: state.pendingFallbackModel, + }) + return + } + + log(`[${HOOK_NAME}] Detected provider auto-retry signal in session.status`, { + sessionID, + model: state.currentModel, + retryAttempt: status.attempt, + }) + + await helpers.abortSessionRequest(sessionID, "session.status.retry-signal") + + const result = prepareFallback(sessionID, state, fallbackModels, config) + if (result.success && config.notify_on_fallback) { + await deps.ctx.client.tui + .showToast({ + body: { + title: "Model Fallback", + message: `Switching to ${result.newModel?.split("/").pop() || result.newModel} for next request`, + variant: "warning", + duration: 5000, + }, + }) + .catch(() => {}) + } + + if (result.success && result.newModel) { + await helpers.autoRetryWithFallback(sessionID, result.newModel, resolvedAgent, "session.status") + } + + if (!result.success) { + log(`[${HOOK_NAME}] Fallback preparation failed`, { sessionID, error: result.error }) + } + } + return async ({ event }: { event: { type: string; properties?: unknown } }) => { if (!config.enabled) return @@ -191,6 +276,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) { if (event.type === "session.deleted") { handleSessionDeleted(props); return } if (event.type === "session.stop") { await handleSessionStop(props); return } if (event.type === "session.idle") { handleSessionIdle(props); return } + if (event.type === "session.status") { await handleSessionStatus(props); return } if (event.type === "session.error") { await handleSessionError(props); return } } } diff --git a/src/hooks/runtime-fallback/fallback-models.test.ts b/src/hooks/runtime-fallback/fallback-models.test.ts new file mode 100644 index 000000000..7cf3f8e32 --- /dev/null +++ b/src/hooks/runtime-fallback/fallback-models.test.ts @@ -0,0 +1,66 @@ +import { afterEach, describe, expect, test } from "bun:test" + +import { getFallbackModelsForSession } from "./fallback-models" +import { SessionCategoryRegistry } from "../../shared/session-category-registry" + +describe("runtime-fallback fallback-models", () => { + afterEach(() => { + SessionCategoryRegistry.clear() + }) + + test("uses category fallback_models when session category is registered", () => { + //#given + const sessionID = "ses_runtime_fallback_category" + SessionCategoryRegistry.register(sessionID, "quick") + const pluginConfig = { + categories: { + quick: { + fallback_models: ["openai/gpt-5.2", "anthropic/claude-opus-4-6"], + }, + }, + } as any + + //#when + const result = getFallbackModelsForSession(sessionID, undefined, pluginConfig) + + //#then + expect(result).toEqual(["openai/gpt-5.2", "anthropic/claude-opus-4-6"]) + }) + + test("uses agent-specific fallback_models when agent is resolved", () => { + //#given + const pluginConfig = { + agents: { + oracle: { + fallback_models: ["openai/gpt-5.2", "anthropic/claude-opus-4-6"], + }, + }, + } as any + + //#when + const result = getFallbackModelsForSession("ses_runtime_fallback_agent", "oracle", pluginConfig) + + //#then + expect(result).toEqual(["openai/gpt-5.2", "anthropic/claude-opus-4-6"]) + }) + + test("does not fall back to another agent chain when agent cannot be resolved", () => { + //#given + const pluginConfig = { + agents: { + sisyphus: { + fallback_models: ["quotio/gpt-5.2", "quotio/glm-5", "quotio/kimi-k2.5"], + }, + oracle: { + fallback_models: ["openai/gpt-5.2", "anthropic/claude-opus-4-6"], + }, + }, + } as any + + //#when + const result = getFallbackModelsForSession("ses_runtime_fallback_unknown", undefined, pluginConfig) + + //#then + expect(result).toEqual([]) + }) +}) diff --git a/src/hooks/runtime-fallback/fallback-models.ts b/src/hooks/runtime-fallback/fallback-models.ts index e7af3b40e..fb984bbec 100644 --- a/src/hooks/runtime-fallback/fallback-models.ts +++ b/src/hooks/runtime-fallback/fallback-models.ts @@ -1,5 +1,5 @@ import type { OhMyOpenCodeConfig } from "../../config" -import { AGENT_NAMES, agentPattern } from "./agent-resolver" +import { agentPattern } from "./agent-resolver" import { HOOK_NAME } from "./constants" import { log } from "../../shared/logger" import { SessionCategoryRegistry } from "../../shared/session-category-registry" @@ -51,19 +51,7 @@ export function getFallbackModelsForSession( if (result) return result } - const sisyphusFallback = tryGetFallbackFromAgent("sisyphus") - if (sisyphusFallback) { - log(`[${HOOK_NAME}] Using sisyphus fallback models (no agent detected)`, { sessionID }) - return sisyphusFallback - } - - for (const agentName of AGENT_NAMES) { - const result = tryGetFallbackFromAgent(agentName) - if (result) { - log(`[${HOOK_NAME}] Using ${agentName} fallback models (no agent detected)`, { sessionID }) - return result - } - } + log(`[${HOOK_NAME}] No category/agent fallback models resolved for session`, { sessionID, agent }) return [] } diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts index dbb6e29f1..73e1586f5 100644 --- a/src/hooks/runtime-fallback/index.test.ts +++ b/src/hooks/runtime-fallback/index.test.ts @@ -387,6 +387,219 @@ describe("runtime-fallback", () => { expect(fallbackLog?.data).toMatchObject({ from: "openai/gpt-5.3-codex", to: "anthropic/claude-opus-4-6" }) }) + test("should trigger fallback on auto-retry signal in assistant text parts", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + }) + + const sessionID = "test-session-parts-auto-retry" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "quotio/claude-opus-4-6" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "quotio/claude-opus-4-6", + }, + parts: [ + { + type: "text", + text: "This request would exceed your account's rate limit. Please try again later. [retrying in 2s attempt #2]", + }, + ], + }, + }, + }) + + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeDefined() + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "quotio/claude-opus-4-6", to: "openai/gpt-5.2" }) + }) + + test("should trigger fallback when auto-retry text parts are nested under info.parts", async () => { + const hook = createRuntimeFallbackHook(createMockPluginInput(), { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + }) + + const sessionID = "test-session-info-parts-auto-retry" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "quotio/claude-opus-4-6" } }, + }, + }) + + await hook.event({ + event: { + type: "message.updated", + properties: { + info: { + sessionID, + role: "assistant", + model: "quotio/claude-opus-4-6", + parts: [ + { + type: "text", + text: "This request would exceed your account's rate limit. Please try again later. [retrying in 2s attempt #2]", + }, + ], + }, + }, + }, + }) + + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal")) + expect(signalLog).toBeDefined() + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "quotio/claude-opus-4-6", to: "openai/gpt-5.2" }) + }) + + test("should trigger fallback on session.status auto-retry signal", async () => { + const promptCalls: unknown[] = [] + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [ + { + info: { role: "user" }, + parts: [{ type: "text", text: "continue" }], + }, + ], + }), + promptAsync: async (args) => { + promptCalls.push(args) + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + } + ) + + const sessionID = "test-session-status-auto-retry" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "quotio/claude-opus-4-6" } }, + }, + }) + + await hook.event({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + next: 476, + attempt: 1, + message: "All credentials for model claude-opus-4-6 are cooling down [retrying in 7m 56s attempt #1]", + }, + }, + }, + }) + + const signalLog = logCalls.find((c) => c.msg.includes("Detected provider auto-retry signal in session.status")) + expect(signalLog).toBeDefined() + + const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback")) + expect(fallbackLog).toBeDefined() + expect(fallbackLog?.data).toMatchObject({ from: "quotio/claude-opus-4-6", to: "openai/gpt-5.2" }) + expect(promptCalls.length).toBe(1) + }) + + test("should deduplicate session.status countdown updates for the same retry attempt", async () => { + const promptCalls: unknown[] = [] + const hook = createRuntimeFallbackHook( + createMockPluginInput({ + session: { + messages: async () => ({ + data: [ + { + info: { role: "user" }, + parts: [{ type: "text", text: "continue" }], + }, + ], + }), + promptAsync: async (args) => { + promptCalls.push(args) + return {} + }, + }, + }), + { + config: createMockConfig({ notify_on_fallback: false }), + pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2"]), + } + ) + + const sessionID = "test-session-status-dedup" + SessionCategoryRegistry.register(sessionID, "test") + + await hook.event({ + event: { + type: "session.created", + properties: { info: { id: sessionID, model: "quotio/claude-opus-4-6" } }, + }, + }) + + await hook.event({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + next: 476, + attempt: 1, + message: "All credentials for model claude-opus-4-6 are cooling down [retrying in 7m 56s attempt #1]", + }, + }, + }, + }) + + await hook.event({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + next: 475, + attempt: 1, + message: "All credentials for model claude-opus-4-6 are cooling down [retrying in 7m 55s attempt #1]", + }, + }, + }, + }) + + expect(promptCalls.length).toBe(1) + }) + test("should NOT trigger fallback on auto-retry signal when timeout_seconds is 0", async () => { const hook = createRuntimeFallbackHook(createMockPluginInput(), { config: createMockConfig({ notify_on_fallback: false, timeout_seconds: 0 }), diff --git a/src/hooks/runtime-fallback/message-update-handler.ts b/src/hooks/runtime-fallback/message-update-handler.ts index 7e6130955..9252b7918 100644 --- a/src/hooks/runtime-fallback/message-update-handler.ts +++ b/src/hooks/runtime-fallback/message-update-handler.ts @@ -57,10 +57,20 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel return async (props: Record | undefined) => { const info = props?.info as Record | undefined const sessionID = info?.sessionID as string | undefined - const retrySignalResult = extractAutoRetrySignal(info) - const retrySignal = retrySignalResult?.signal const timeoutEnabled = config.timeout_seconds > 0 - const parts = props?.parts as Array<{ type?: string; text?: string }> | undefined + const eventParts = props?.parts as Array<{ type?: string; text?: string }> | undefined + const infoParts = info?.parts as Array<{ type?: string; text?: string }> | undefined + const parts = eventParts && eventParts.length > 0 ? eventParts : infoParts + const retrySignalResult = extractAutoRetrySignal(info) + const partsText = (parts ?? []) + .filter((p) => typeof p?.text === "string") + .map((p) => (p.text ?? "").trim()) + .filter((text) => text.length > 0) + .join("\n") + const retrySignalFromParts = partsText + ? extractAutoRetrySignal({ message: partsText, status: partsText, summary: partsText })?.signal + : undefined + const retrySignal = retrySignalResult?.signal ?? retrySignalFromParts const errorContentResult = containsErrorContent(parts) const error = info?.error ?? (retrySignal && timeoutEnabled ? { name: "ProviderRateLimitError", message: retrySignal } : undefined) ?? diff --git a/src/plugin/event.model-fallback.test.ts b/src/plugin/event.model-fallback.test.ts index 66b933c85..f8a92b6f1 100644 --- a/src/plugin/event.model-fallback.test.ts +++ b/src/plugin/event.model-fallback.test.ts @@ -1,12 +1,17 @@ -import { afterEach, describe, expect, test } from "bun:test" +declare const require: (name: string) => any +const { afterEach, describe, expect, mock, test } = require("bun:test") + +mock.module("../shared/connected-providers-cache", () => ({ + readConnectedProvidersCache: () => null, + readProviderModelsCache: () => null, +})) import { createEventHandler } from "./event" import { createChatMessageHandler } from "./chat-message" import { _resetForTesting, setMainSession } from "../features/claude-code-session-state" import { createModelFallbackHook, clearPendingModelFallback } from "../hooks/model-fallback/hook" - describe("createEventHandler - model fallback", () => { - const createHandler = (args?: { hooks?: any }) => { + const createHandler = (args?: { hooks?: any; pluginConfig?: any }) => { const abortCalls: string[] = [] const promptCalls: string[] = [] @@ -26,7 +31,7 @@ describe("createEventHandler - model fallback", () => { }, }, } as any, - pluginConfig: {} as any, + pluginConfig: (args?.pluginConfig ?? {}) as any, firstMessageVariantGate: { markSessionCreated: () => {}, clear: () => {}, @@ -206,11 +211,222 @@ describe("createEventHandler - model fallback", () => { //#then expect(abortCalls).toEqual([sessionID]) expect(promptCalls).toEqual([sessionID]) - expect(output.message["model"]).toEqual({ - providerID: "anthropic", - modelID: "claude-opus-4-6", + expect(output.message["model"]).toMatchObject({ + providerID: "kimi-for-coding", + modelID: "k2p5", }) - expect(output.message["variant"]).toBe("max") + expect(output.message["variant"]).toBeUndefined() + }) + + test("does not spam abort/prompt when session.status retry countdown updates", async () => { + //#given + const sessionID = "ses_status_retry_dedup" + setMainSession(sessionID) + clearPendingModelFallback(sessionID) + const modelFallback = createModelFallbackHook() + const { handler, abortCalls, promptCalls } = createHandler({ hooks: { modelFallback } }) + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "msg_user_status_dedup", + sessionID, + role: "user", + modelID: "claude-opus-4-6-thinking", + providerID: "anthropic", + agent: "Sisyphus (Ultraworker)", + }, + }, + }, + }) + + //#when + await handler({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + attempt: 1, + message: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", + next: 300, + }, + }, + }, + }) + await handler({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + attempt: 1, + message: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~4 days attempt #1]", + next: 299, + }, + }, + }, + }) + + //#then + expect(abortCalls).toEqual([sessionID]) + expect(promptCalls).toEqual([sessionID]) + }) + + test("does not trigger model-fallback from session.status when runtime_fallback is enabled", async () => { + //#given + const sessionID = "ses_status_retry_runtime_enabled" + setMainSession(sessionID) + clearPendingModelFallback(sessionID) + const modelFallback = createModelFallbackHook() + const runtimeFallback = { + event: async () => {}, + "chat.message": async () => {}, + } + const { handler, abortCalls, promptCalls } = createHandler({ + hooks: { modelFallback, runtimeFallback }, + pluginConfig: { runtime_fallback: { enabled: true } }, + }) + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "msg_user_status_runtime_enabled", + sessionID, + role: "user", + modelID: "claude-opus-4-6", + providerID: "quotio", + agent: "Sisyphus (Ultraworker)", + }, + }, + }, + }) + + //#when + await handler({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + attempt: 1, + message: + "All credentials for model claude-opus-4-6 are cooling down [retrying in 7m 56s attempt #1]", + next: 476, + }, + }, + }, + }) + + //#then + expect(abortCalls).toEqual([]) + expect(promptCalls).toEqual([]) + }) + + test("prefers user-configured fallback_models over hardcoded chain on session.status retry", async () => { + //#given + const sessionID = "ses_status_retry_user_fallback" + setMainSession(sessionID) + clearPendingModelFallback(sessionID) + + const modelFallback = createModelFallbackHook() + const pluginConfig = { + agents: { + sisyphus: { + fallback_models: ["quotio/gpt-5.2", "quotio/kimi-k2.5"], + }, + }, + } + + const { handler, abortCalls, promptCalls } = createHandler({ hooks: { modelFallback }, pluginConfig }) + + const chatMessageHandler = createChatMessageHandler({ + ctx: { + client: { + tui: { + showToast: async () => ({}), + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + shouldOverride: () => false, + markApplied: () => {}, + }, + hooks: { + modelFallback, + stopContinuationGuard: null, + keywordDetector: null, + claudeCodeHooks: null, + autoSlashCommand: null, + startWork: null, + ralphLoop: null, + } as any, + }) + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "msg_user_status_user_fallback", + sessionID, + role: "user", + time: { created: 1 }, + content: [], + modelID: "claude-opus-4-6", + providerID: "quotio", + agent: "Sisyphus (Ultraworker)", + path: { cwd: "/tmp", root: "/tmp" }, + }, + }, + }, + }) + + //#when + await handler({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + attempt: 1, + message: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", + next: 300, + }, + }, + }, + }) + + const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> } + await chatMessageHandler( + { + sessionID, + agent: "sisyphus", + model: { providerID: "quotio", modelID: "claude-opus-4-6" }, + }, + output, + ) + + //#then + expect(abortCalls).toEqual([sessionID]) + expect(promptCalls).toEqual([sessionID]) + expect(output.message["model"]).toEqual({ + providerID: "quotio", + modelID: "gpt-5.2", + }) + expect(output.message["variant"]).toBeUndefined() }) test("advances main-session fallback chain across repeated session.error retries end-to-end", async () => { @@ -322,21 +538,21 @@ describe("createEventHandler - model fallback", () => { //#when - first retry cycle const first = await triggerRetryCycle() - //#then - first fallback entry applied (prefers current provider when available) - expect(first.message["model"]).toEqual({ - providerID: "anthropic", - modelID: "claude-opus-4-6", + //#then - first fallback entry applied (no-op skip: claude-opus-4-6 matches current model after normalization) + expect(first.message["model"]).toMatchObject({ + providerID: "kimi-for-coding", + modelID: "k2p5", }) - expect(first.message["variant"]).toBe("max") + expect(first.message["variant"]).toBeUndefined() //#when - second retry cycle const second = await triggerRetryCycle() - //#then - second fallback entry applied (chain advanced) - expect(second.message["model"]).toEqual({ - providerID: "kimi-for-coding", - modelID: "k2p5", + //#then - second fallback entry applied (chain advanced past k2p5) + expect(second.message["model"]).toMatchObject({ + modelID: "kimi-k2.5", }) + expect((second.message["model"] as { providerID?: string })?.providerID).toBeTruthy() expect(second.message["variant"]).toBeUndefined() expect(abortCalls).toEqual([sessionID, sessionID]) expect(promptCalls).toEqual([sessionID, sessionID]) diff --git a/src/plugin/event.ts b/src/plugin/event.ts index 1a9356194..0b7c3c2ad 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -13,11 +13,16 @@ import { import { clearPendingModelFallback, clearSessionFallbackChain, + setSessionFallbackChain, setPendingModelFallback, } from "../hooks/model-fallback/hook"; +import { getFallbackModelsForSession } from "../hooks/runtime-fallback/fallback-models"; import { resetMessageCursor } from "../shared"; +import { getAgentConfigKey } from "../shared/agent-display-names"; import { log } from "../shared/logger"; import { shouldRetryError } from "../shared/model-error-classifier"; +import { buildFallbackChainFromModels } from "../shared/fallback-chain-from-models"; +import { extractRetryAttempt, normalizeRetryStatusMessage } from "../shared/retry-status-utils"; import { clearSessionModel, setSessionModel } from "../shared/session-model-state"; import { deleteSessionTools } from "../shared/session-tools-store"; import { lspManager } from "../tools"; @@ -97,6 +102,22 @@ function extractProviderModelFromErrorMessage(message: string): { providerID?: s return {}; } +function applyUserConfiguredFallbackChain( + sessionID: string, + agentName: string, + currentProviderID: string, + pluginConfig: OhMyOpenCodeConfig, +): void { + const agentKey = getAgentConfigKey(agentName); + const configuredFallbackModels = getFallbackModelsForSession(sessionID, agentKey, pluginConfig); + if (configuredFallbackModels.length === 0) return; + + const fallbackChain = buildFallbackChainFromModels(configuredFallbackModels, currentProviderID); + + if (fallbackChain && fallbackChain.length > 0) { + setSessionFallbackChain(sessionID, fallbackChain); + } +} function isCompactionAgent(agent: string): boolean { return agent.toLowerCase() === "compaction"; @@ -116,6 +137,11 @@ export function createEventHandler(args: { client: { session: { abort: (input: { path: { id: string } }) => Promise; + promptAsync?: (input: { + path: { id: string }; + body: { parts: Array<{ type: "text"; text: string }> }; + query: { directory: string }; + }) => Promise; prompt: (input: { path: { id: string }; body: { parts: Array<{ type: "text"; text: string }> }; @@ -176,6 +202,29 @@ export function createEventHandler(args: { return !subagentSessions.has(sessionID); }; + const autoContinueAfterFallback = async (sessionID: string, source: string): Promise => { + await pluginContext.client.session.abort({ path: { id: sessionID } }).catch((error) => { + log("[event] model-fallback abort failed", { sessionID, source, error }); + }); + + const promptBody = { + path: { id: sessionID }, + body: { parts: [{ type: "text" as const, text: "continue" }] }, + query: { directory: pluginContext.directory }, + }; + + if (typeof pluginContext.client.session.promptAsync === "function") { + await pluginContext.client.session.promptAsync(promptBody).catch((error) => { + log("[event] model-fallback promptAsync failed", { sessionID, source, error }); + }); + return; + } + + await pluginContext.client.session.prompt(promptBody).catch((error) => { + log("[event] model-fallback prompt failed", { sessionID, source, error }); + }); + }; + return async (input): Promise => { pruneRecentSyntheticIdles({ recentSyntheticIdles, @@ -310,6 +359,7 @@ export function createEventHandler(args: { const currentProvider = (info?.providerID as string | undefined) ?? "opencode"; const rawModel = (info?.modelID as string | undefined) ?? "claude-opus-4-6"; const currentModel = normalizeFallbackModelID(rawModel); + applyUserConfiguredFallbackChain(sessionID, agentName, currentProvider, args.pluginConfig); const setFallback = setPendingModelFallback(sessionID, agentName, currentProvider, currentModel); @@ -319,15 +369,7 @@ export function createEventHandler(args: { !hooks.stopContinuationGuard?.isStopped(sessionID) ) { lastHandledModelErrorMessageID.set(sessionID, assistantMessageID); - - await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {}); - await pluginContext.client.session - .prompt({ - path: { id: sessionID }, - body: { parts: [{ type: "text", text: "continue" }] }, - query: { directory: pluginContext.directory }, - }) - .catch(() => {}); + await autoContinueAfterFallback(sessionID, "message.updated"); } } } @@ -342,10 +384,14 @@ export function createEventHandler(args: { const sessionID = props?.sessionID as string | undefined; const status = props?.status as { type?: string; attempt?: number; message?: string; next?: number } | undefined; - if (sessionID && status?.type === "retry" && isModelFallbackEnabled) { + if (sessionID && status?.type === "retry" && isModelFallbackEnabled && !isRuntimeFallbackEnabled) { try { const retryMessage = typeof status.message === "string" ? status.message : ""; - const retryKey = `${status.attempt ?? "?"}:${status.next ?? "?"}:${retryMessage}`; + const parsedForKey = extractProviderModelFromErrorMessage(retryMessage); + const retryAttempt = extractRetryAttempt(status.attempt, retryMessage); + // Deduplicate countdown updates for the same retry attempt/model. + // Messages like "retrying in 7m 56s" change every second but should only trigger once. + const retryKey = `${retryAttempt}:${parsedForKey.providerID ?? ""}/${parsedForKey.modelID ?? ""}:${normalizeRetryStatusMessage(retryMessage)}`; if (lastHandledRetryStatusKey.get(sessionID) === retryKey) { return; } @@ -370,6 +416,7 @@ export function createEventHandler(args: { const currentProvider = parsed.providerID ?? lastKnown?.providerID ?? "opencode"; let currentModel = parsed.modelID ?? lastKnown?.modelID ?? "claude-opus-4-6"; currentModel = normalizeFallbackModelID(currentModel); + applyUserConfiguredFallbackChain(sessionID, agentName, currentProvider, args.pluginConfig); const setFallback = setPendingModelFallback(sessionID, agentName, currentProvider, currentModel); @@ -378,14 +425,7 @@ export function createEventHandler(args: { shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID) ) { - await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {}); - await pluginContext.client.session - .prompt({ - path: { id: sessionID }, - body: { parts: [{ type: "text", text: "continue" }] }, - query: { directory: pluginContext.directory }, - }) - .catch(() => {}); + await autoContinueAfterFallback(sessionID, "session.status"); } } } @@ -448,6 +488,7 @@ export function createEventHandler(args: { const currentProvider = (props?.providerID as string) || parsed.providerID || "opencode"; let currentModel = (props?.modelID as string) || parsed.modelID || "claude-opus-4-6"; currentModel = normalizeFallbackModelID(currentModel); + applyUserConfiguredFallbackChain(sessionID, agentName, currentProvider, args.pluginConfig); const setFallback = setPendingModelFallback(sessionID, agentName, currentProvider, currentModel); @@ -456,15 +497,7 @@ export function createEventHandler(args: { shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID) ) { - await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {}); - - await pluginContext.client.session - .prompt({ - path: { id: sessionID }, - body: { parts: [{ type: "text", text: "continue" }] }, - query: { directory: pluginContext.directory }, - }) - .catch(() => {}); + await autoContinueAfterFallback(sessionID, "session.error"); } } } diff --git a/src/plugin/tool-registry.ts b/src/plugin/tool-registry.ts index 3b441c197..23e8009e5 100644 --- a/src/plugin/tool-registry.ts +++ b/src/plugin/tool-registry.ts @@ -48,7 +48,13 @@ export function createToolRegistry(args: { const { ctx, pluginConfig, managers, skillContext, availableCategories } = args const backgroundTools = createBackgroundTools(managers.backgroundManager, ctx.client) - const callOmoAgent = createCallOmoAgent(ctx, managers.backgroundManager, pluginConfig.disabled_agents ?? []) + const callOmoAgent = createCallOmoAgent( + ctx, + managers.backgroundManager, + pluginConfig.disabled_agents ?? [], + pluginConfig.agents, + pluginConfig.categories, + ) const isMultimodalLookerEnabled = !(pluginConfig.disabled_agents ?? []).some( (agent) => agent.toLowerCase() === "multimodal-looker", diff --git a/src/shared/fallback-chain-from-models.test.ts b/src/shared/fallback-chain-from-models.test.ts new file mode 100644 index 000000000..096364594 --- /dev/null +++ b/src/shared/fallback-chain-from-models.test.ts @@ -0,0 +1,48 @@ +import { describe, test, expect } from "bun:test" +import { buildFallbackChainFromModels, parseFallbackModelEntry } from "./fallback-chain-from-models" + +describe("fallback-chain-from-models", () => { + test("parses provider/model entry with parenthesized variant", () => { + //#given + const fallbackModel = "openai/gpt-5.2(high)" + + //#when + const parsed = parseFallbackModelEntry(fallbackModel, "quotio") + + //#then + expect(parsed).toEqual({ + providers: ["openai"], + model: "gpt-5.2", + variant: "high", + }) + }) + + test("uses default provider when fallback model omits provider prefix", () => { + //#given + const fallbackModel = "glm-5" + + //#when + const parsed = parseFallbackModelEntry(fallbackModel, "quotio") + + //#then + expect(parsed).toEqual({ + providers: ["quotio"], + model: "glm-5", + variant: undefined, + }) + }) + + test("builds fallback chain from normalized fallback_models input", () => { + //#given + const fallbackModels = ["quotio/kimi-k2.5", "gpt-5.2 medium"] + + //#when + const chain = buildFallbackChainFromModels(fallbackModels, "quotio") + + //#then + expect(chain).toEqual([ + { providers: ["quotio"], model: "kimi-k2.5", variant: undefined }, + { providers: ["quotio"], model: "gpt-5.2", variant: "medium" }, + ]) + }) +}) diff --git a/src/shared/fallback-chain-from-models.ts b/src/shared/fallback-chain-from-models.ts new file mode 100644 index 000000000..d09b4e5fe --- /dev/null +++ b/src/shared/fallback-chain-from-models.ts @@ -0,0 +1,75 @@ +import type { FallbackEntry } from "./model-requirements" +import { normalizeFallbackModels } from "./model-resolver" + +const KNOWN_VARIANTS = new Set([ + "low", + "medium", + "high", + "xhigh", + "max", + "none", + "auto", + "thinking", +]) + +function parseVariantFromModel(rawModel: string): { modelID: string; variant?: string } { + const trimmedModel = rawModel.trim() + if (!trimmedModel) { + return { modelID: "" } + } + + const parenthesizedVariant = trimmedModel.match(/^(.*)\(([^()]+)\)\s*$/) + if (parenthesizedVariant) { + const modelID = parenthesizedVariant[1]?.trim() ?? "" + const variant = parenthesizedVariant[2]?.trim() + return variant ? { modelID, variant } : { modelID } + } + + const spaceVariant = trimmedModel.match(/^(.*\S)\s+([a-z][a-z0-9_-]*)$/i) + if (spaceVariant) { + const modelID = spaceVariant[1]?.trim() ?? "" + const variant = spaceVariant[2]?.trim().toLowerCase() + if (variant && KNOWN_VARIANTS.has(variant)) { + return { modelID, variant } + } + } + + return { modelID: trimmedModel } +} + +export function parseFallbackModelEntry( + model: string, + defaultProviderID: string, +): FallbackEntry | undefined { + const trimmed = model.trim() + if (!trimmed) return undefined + + const parts = trimmed.split("/") + const providerID = parts.length >= 2 ? parts[0].trim() : defaultProviderID + const rawModelID = parts.length >= 2 ? parts.slice(1).join("/").trim() : trimmed + if (!providerID || !rawModelID) return undefined + + const parsed = parseVariantFromModel(rawModelID) + if (!parsed.modelID) return undefined + + return { + providers: [providerID], + model: parsed.modelID, + variant: parsed.variant, + } +} + +export function buildFallbackChainFromModels( + fallbackModels: string | string[] | undefined, + defaultProviderID: string, +): FallbackEntry[] | undefined { + const normalized = normalizeFallbackModels(fallbackModels) + if (!normalized || normalized.length === 0) return undefined + + const parsed = normalized + .map((model) => parseFallbackModelEntry(model, defaultProviderID)) + .filter((entry): entry is FallbackEntry => entry !== undefined) + + if (parsed.length === 0) return undefined + return parsed +} diff --git a/src/shared/model-error-classifier.test.ts b/src/shared/model-error-classifier.test.ts index 016819f1e..17470199c 100644 --- a/src/shared/model-error-classifier.test.ts +++ b/src/shared/model-error-classifier.test.ts @@ -36,6 +36,20 @@ describe("model-error-classifier", () => { expect(result).toBe(true) }) + test("treats cooling-down auto-retry messages as retryable", () => { + //#given + const error = { + message: + "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in ~5 days attempt #1]", + } + + //#when + const result = shouldRetryError(error) + + //#then + expect(result).toBe(true) + }) + test("selectFallbackProvider prefers first connected provider in preference order", () => { //#given writeFileSync( @@ -73,4 +87,18 @@ describe("model-error-classifier", () => { //#then expect(provider).toBe("anthropic") }) + + test("selectFallbackProvider uses connected preferred provider when fallback providers are unavailable", () => { + //#given + writeFileSync( + join(TEST_CACHE_DIR, "connected-providers.json"), + JSON.stringify({ connected: ["provider-x"], updatedAt: new Date().toISOString() }, null, 2), + ) + + //#when + const provider = selectFallbackProvider(["provider-y"], "provider-x") + + //#then + expect(provider).toBe("provider-x") + }) }) diff --git a/src/shared/model-error-classifier.ts b/src/shared/model-error-classifier.ts index defcef670..22d5606c7 100644 --- a/src/shared/model-error-classifier.ts +++ b/src/shared/model-error-classifier.ts @@ -36,6 +36,11 @@ const RETRYABLE_MESSAGE_PATTERNS = [ "rate_limit", "rate limit", "quota", + "quota will reset after", + "usage limit has been reached", + "all credentials for model", + "cooling down", + "exhausted your capacity", "not found", "unavailable", "insufficient", @@ -55,6 +60,23 @@ const RETRYABLE_MESSAGE_PATTERNS = [ "504", ] +const AUTO_RETRY_GATE_PATTERNS = [ + "rate limit", + "quota", + "usage limit", + "limit reached", + "cooling down", + "credentials for model", + "exhausted your capacity", +] + +function hasProviderAutoRetrySignal(message: string): boolean { + if (!message.includes("retrying in")) { + return false + } + return AUTO_RETRY_GATE_PATTERNS.some((pattern) => message.includes(pattern)) +} + export interface ErrorInfo { name?: string message?: string @@ -79,6 +101,9 @@ export function isRetryableModelError(error: ErrorInfo): boolean { // Check message patterns for unknown errors const msg = error.message?.toLowerCase() ?? "" + if (hasProviderAutoRetrySignal(msg)) { + return true + } return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern)) } @@ -115,7 +140,8 @@ export function hasMoreFallbacks( * Selects the best provider for a fallback entry. * Priority: * 1) First connected provider in the entry's provider preference order - * 2) First provider listed in the fallback entry (when cache is missing) + * 2) Preferred provider when connected (and entry providers are unavailable) + * 3) First provider listed in the fallback entry */ export function selectFallbackProvider( providers: string[], @@ -124,11 +150,19 @@ export function selectFallbackProvider( const connectedProviders = readConnectedProvidersCache() if (connectedProviders) { const connectedSet = new Set(connectedProviders.map(p => p.toLowerCase())) + for (const provider of providers) { if (connectedSet.has(provider.toLowerCase())) { return provider } } + + if ( + preferredProviderID && + connectedSet.has(preferredProviderID.toLowerCase()) + ) { + return preferredProviderID + } } return providers[0] || preferredProviderID || "opencode" diff --git a/src/shared/retry-status-utils.ts b/src/shared/retry-status-utils.ts new file mode 100644 index 000000000..3b93ddf02 --- /dev/null +++ b/src/shared/retry-status-utils.ts @@ -0,0 +1,19 @@ +export function normalizeRetryStatusMessage(message: string): string { + return message + .replace(/\[retrying in [^\]]*attempt\s*#\d+\]/gi, "[retrying]") + .replace(/retrying in\s+[^(]*attempt\s*#\d+/gi, "retrying") + .replace(/\s+/g, " ") + .trim() + .toLowerCase() +} + +export function extractRetryAttempt(statusAttempt: unknown, message: string): string { + if (typeof statusAttempt === "number" && Number.isFinite(statusAttempt)) { + return String(statusAttempt) + } + const attemptMatch = message.match(/attempt\s*#\s*(\d+)/i) + if (attemptMatch?.[1]) { + return attemptMatch[1] + } + return "?" +} diff --git a/src/tools/call-omo-agent/background-executor.test.ts b/src/tools/call-omo-agent/background-executor.test.ts index 970b9c135..53ea45d44 100644 --- a/src/tools/call-omo-agent/background-executor.test.ts +++ b/src/tools/call-omo-agent/background-executor.test.ts @@ -64,4 +64,26 @@ describe("executeBackground", () => { expect(result).toContain("interrupt") expect(result).toContain("test-task-id") }) + + test("passes fallbackChain to background manager launch", async () => { + //#given + const fallbackChain = [ + { providers: ["quotio"], model: "kimi-k2.5", variant: undefined }, + { providers: ["openai"], model: "gpt-5.2", variant: "high" }, + ] + launchMock.mockResolvedValueOnce({ + id: "test-task-id", + sessionID: "sub-session", + description: "Test task", + agent: "test-agent", + status: "pending", + }) + + //#when + await executeBackground(testArgs, testContext, mockManager, mockClient, fallbackChain) + + //#then + const launchArgs = launchMock.mock.calls.at(-1)?.[0] + expect(launchArgs.fallbackChain).toEqual(fallbackChain) + }) }) diff --git a/src/tools/call-omo-agent/background-executor.ts b/src/tools/call-omo-agent/background-executor.ts index b59473d2a..ad0c2b1c9 100644 --- a/src/tools/call-omo-agent/background-executor.ts +++ b/src/tools/call-omo-agent/background-executor.ts @@ -2,6 +2,7 @@ import type { CallOmoAgentArgs } from "./types" import type { BackgroundManager } from "../../features/background-agent" import type { PluginInput } from "@opencode-ai/plugin" import { log } from "../../shared" +import type { FallbackEntry } from "../../shared/model-requirements" import { resolveMessageContext } from "../../features/hook-message-injector" import { getSessionAgent } from "../../features/claude-code-session-state" import { getMessageDir } from "./message-dir" @@ -17,7 +18,8 @@ export async function executeBackground( metadata?: (input: { title?: string; metadata?: Record }) => void }, manager: BackgroundManager, - client: PluginInput["client"] + client: PluginInput["client"], + fallbackChain?: FallbackEntry[], ): Promise { try { const messageDir = getMessageDir(toolContext.sessionID) @@ -48,6 +50,7 @@ export async function executeBackground( parentMessageID: toolContext.messageID, parentAgent, parentTools: getSessionTools(toolContext.sessionID), + fallbackChain, }) const WAIT_FOR_SESSION_INTERVAL_MS = 50 diff --git a/src/tools/call-omo-agent/sync-executor.test.ts b/src/tools/call-omo-agent/sync-executor.test.ts index de243aeed..37df05a36 100644 --- a/src/tools/call-omo-agent/sync-executor.test.ts +++ b/src/tools/call-omo-agent/sync-executor.test.ts @@ -9,6 +9,7 @@ describe("executeSync", () => { createOrGetSession: mock(async () => ({ sessionID: "ses-test-123", isNew: true })), waitForCompletion: mock(async () => {}), processMessages: mock(async () => "agent response"), + setSessionFallbackChain: mock(() => {}), } let promptArgs: any @@ -53,6 +54,7 @@ describe("executeSync", () => { createOrGetSession: mock(async () => ({ sessionID: "ses-test-123", isNew: true })), waitForCompletion: mock(async () => {}), processMessages: mock(async () => "agent response"), + setSessionFallbackChain: mock(() => {}), } let promptArgs: any @@ -88,4 +90,48 @@ describe("executeSync", () => { expect(promptAsync).toHaveBeenCalled() expect(promptArgs.body.tools.task).toBe(false) }) + + test("applies fallbackChain to sync sessions", async () => { + //#given + const { executeSync } = require("./sync-executor") + + const setSessionFallbackChain = mock(() => {}) + const deps = { + createOrGetSession: mock(async () => ({ sessionID: "ses-test-456", isNew: true })), + waitForCompletion: mock(async () => {}), + processMessages: mock(async () => "agent response"), + setSessionFallbackChain, + } + + const args = { + subagent_type: "explore", + description: "test task", + prompt: "find something", + } + + const toolContext = { + sessionID: "parent-session", + messageID: "msg-3", + agent: "sisyphus", + abort: new AbortController().signal, + metadata: mock(async () => {}), + } + + const ctx = { + client: { + session: { promptAsync: mock(async () => ({ data: {} })) }, + }, + } + + const fallbackChain = [ + { providers: ["quotio"], model: "kimi-k2.5", variant: undefined }, + { providers: ["openai"], model: "gpt-5.2", variant: "high" }, + ] + + //#when + await executeSync(args, toolContext, ctx as any, deps, fallbackChain) + + //#then + expect(setSessionFallbackChain).toHaveBeenCalledWith("ses-test-456", fallbackChain) + }) }) diff --git a/src/tools/call-omo-agent/sync-executor.ts b/src/tools/call-omo-agent/sync-executor.ts index af356cc9a..971a9e410 100644 --- a/src/tools/call-omo-agent/sync-executor.ts +++ b/src/tools/call-omo-agent/sync-executor.ts @@ -1,7 +1,9 @@ import type { CallOmoAgentArgs } from "./types" import type { PluginInput } from "@opencode-ai/plugin" import { log } from "../../shared" +import type { FallbackEntry } from "../../shared/model-requirements" import { getAgentToolRestrictions } from "../../shared" +import { setSessionFallbackChain } from "../../hooks/model-fallback/hook" import { createOrGetSession } from "./session-creator" import { waitForCompletion } from "./completion-poller" import { processMessages } from "./message-processor" @@ -14,12 +16,14 @@ type ExecuteSyncDeps = { createOrGetSession: typeof createOrGetSession waitForCompletion: typeof waitForCompletion processMessages: typeof processMessages + setSessionFallbackChain: typeof setSessionFallbackChain } const defaultDeps: ExecuteSyncDeps = { createOrGetSession, waitForCompletion, processMessages, + setSessionFallbackChain, } export async function executeSync( @@ -32,10 +36,15 @@ export async function executeSync( metadata?: (input: { title?: string; metadata?: Record }) => void }, ctx: PluginInput, - deps: ExecuteSyncDeps = defaultDeps + deps: ExecuteSyncDeps = defaultDeps, + fallbackChain?: FallbackEntry[], ): Promise { const { sessionID } = await deps.createOrGetSession(args, toolContext, ctx) + if (fallbackChain && fallbackChain.length > 0) { + deps.setSessionFallbackChain(sessionID, fallbackChain) + } + await toolContext.metadata?.({ title: args.description, metadata: { sessionId: sessionID }, diff --git a/src/tools/call-omo-agent/tools.test.ts b/src/tools/call-omo-agent/tools.test.ts index a560c8bea..4efbe9657 100644 --- a/src/tools/call-omo-agent/tools.test.ts +++ b/src/tools/call-omo-agent/tools.test.ts @@ -99,4 +99,48 @@ describe("createCallOmoAgent", () => { //#then expect(result).not.toContain("disabled via disabled_agents") }) + + test("uses agent override fallback_models when launching background subagent", async () => { + //#given + const launch = mock(() => Promise.resolve({ + id: "task-fallback", + sessionID: "sub-session", + description: "Test task", + agent: "explore", + status: "pending", + })) + const managerWithLaunch = { + launch, + getTask: mock(() => undefined), + } as unknown as BackgroundManager + const toolDef = createCallOmoAgent( + mockCtx, + managerWithLaunch, + [], + { + explore: { + fallback_models: ["quotio/kimi-k2.5", "openai/gpt-5.2(high)"], + }, + }, + ) + const executeFunc = toolDef.execute as Function + + //#when + await executeFunc( + { + description: "Test fallback", + prompt: "Test prompt", + subagent_type: "explore", + run_in_background: true, + }, + { sessionID: "test", messageID: "msg", agent: "test", abort: new AbortController().signal } + ) + + //#then + const launchArgs = launch.mock.calls[0]?.[0] + expect(launchArgs.fallbackChain).toEqual([ + { providers: ["quotio"], model: "kimi-k2.5", variant: undefined }, + { providers: ["openai"], model: "gpt-5.2", variant: "high" }, + ]) + }) }) diff --git a/src/tools/call-omo-agent/tools.ts b/src/tools/call-omo-agent/tools.ts index c2a169e64..1338282f1 100644 --- a/src/tools/call-omo-agent/tools.ts +++ b/src/tools/call-omo-agent/tools.ts @@ -2,14 +2,46 @@ import { tool, type PluginInput, type ToolDefinition } from "@opencode-ai/plugin import { ALLOWED_AGENTS, CALL_OMO_AGENT_DESCRIPTION } from "./constants" import type { AllowedAgentType, CallOmoAgentArgs, ToolContextWithMetadata } from "./types" import type { BackgroundManager } from "../../features/background-agent" +import type { CategoriesConfig, AgentOverrides } from "../../config/schema" +import type { FallbackEntry } from "../../shared/model-requirements" +import { AGENT_MODEL_REQUIREMENTS } from "../../shared/model-requirements" +import { getAgentConfigKey } from "../../shared/agent-display-names" +import { normalizeFallbackModels } from "../../shared/model-resolver" +import { buildFallbackChainFromModels } from "../../shared/fallback-chain-from-models" import { log } from "../../shared" import { executeBackground } from "./background-executor" import { executeSync } from "./sync-executor" +function resolveFallbackChainForCallOmoAgent(args: { + subagentType: string + agentOverrides?: AgentOverrides + userCategories?: CategoriesConfig +}): FallbackEntry[] | undefined { + const { subagentType, agentOverrides, userCategories } = args + const agentConfigKey = getAgentConfigKey(subagentType) + const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentConfigKey] + + const agentOverride = agentOverrides?.[agentConfigKey as keyof AgentOverrides] + ?? (agentOverrides + ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentConfigKey)?.[1] + : undefined) + + const normalizedFallbackModels = normalizeFallbackModels( + agentOverride?.fallback_models + ?? (agentOverride?.category ? userCategories?.[agentOverride.category]?.fallback_models : undefined) + ) + const defaultProviderID = agentRequirement?.fallbackChain?.[0]?.providers?.[0] ?? "opencode" + const configuredFallbackChain = buildFallbackChainFromModels(normalizedFallbackModels, defaultProviderID) + + return configuredFallbackChain ?? agentRequirement?.fallbackChain +} + export function createCallOmoAgent( ctx: PluginInput, backgroundManager: BackgroundManager, - disabledAgents: string[] = [] + disabledAgents: string[] = [], + agentOverrides?: AgentOverrides, + userCategories?: CategoriesConfig, ): ToolDefinition { const agentDescriptions = ALLOWED_AGENTS.map( (name) => `- ${name}: Specialized agent for ${name} tasks` @@ -50,14 +82,20 @@ export function createCallOmoAgent( return `Error: Agent "${normalizedAgent}" is disabled via disabled_agents configuration. Remove it from disabled_agents in your oh-my-opencode.json to use it.` } + const fallbackChain = resolveFallbackChainForCallOmoAgent({ + subagentType: args.subagent_type, + agentOverrides, + userCategories, + }) + if (args.run_in_background) { if (args.session_id) { return `Error: session_id is not supported in background mode. Use run_in_background=false to continue an existing session.` } - return await executeBackground(args, toolCtx, backgroundManager, ctx.client) + return await executeBackground(args, toolCtx, backgroundManager, ctx.client, fallbackChain) } - return await executeSync(args, toolCtx, ctx) + return await executeSync(args, toolCtx, ctx, undefined, fallbackChain) }, }) } diff --git a/src/tools/delegate-task/category-resolver.test.ts b/src/tools/delegate-task/category-resolver.test.ts index a2397c4d2..2257978f0 100644 --- a/src/tools/delegate-task/category-resolver.test.ts +++ b/src/tools/delegate-task/category-resolver.test.ts @@ -75,4 +75,34 @@ describe("resolveCategoryExecution", () => { expect(result.error).toContain("Unknown category") expect(result.error).toContain("definitely-not-a-real-category-xyz123") }) + + test("uses category fallback_models for background/runtime fallback chain", async () => { + //#given + const args = { + category: "deep", + prompt: "test prompt", + description: "Test task", + run_in_background: false, + load_skills: [], + blockedBy: undefined, + enableSkillTools: false, + } + const executorCtx = createMockExecutorContext() + executorCtx.userCategories = { + deep: { + model: "quotio/claude-opus-4-6", + fallback_models: ["quotio/kimi-k2.5", "openai/gpt-5.2(high)"], + }, + } + + //#when + const result = await resolveCategoryExecution(args, executorCtx, undefined, "anthropic/claude-sonnet-4-6") + + //#then + expect(result.error).toBeUndefined() + expect(result.fallbackChain).toEqual([ + { providers: ["quotio"], model: "kimi-k2.5", variant: undefined }, + { providers: ["openai"], model: "gpt-5.2", variant: "high" }, + ]) + }) }) diff --git a/src/tools/delegate-task/category-resolver.ts b/src/tools/delegate-task/category-resolver.ts index bc516dce7..01901ec8c 100644 --- a/src/tools/delegate-task/category-resolver.ts +++ b/src/tools/delegate-task/category-resolver.ts @@ -7,6 +7,8 @@ import { SISYPHUS_JUNIOR_AGENT } from "./sisyphus-junior-agent" import { resolveCategoryConfig } from "./categories" import { parseModelString } from "./model-string-parser" import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements" +import { normalizeFallbackModels } from "../../shared/model-resolver" +import { buildFallbackChainFromModels } from "../../shared/fallback-chain-from-models" import { getAvailableModelsForDelegateTask } from "./available-models" import { resolveModelForDelegateTask } from "./model-selection" @@ -79,6 +81,7 @@ Available categories: ${allCategoryNames}`, } const requirement = CATEGORY_MODEL_REQUIREMENTS[args.category!] + const normalizedConfiguredFallbackModels = normalizeFallbackModels(resolved.config.fallback_models) let actualModel: string | undefined let modelInfo: ModelFallbackInfo | undefined let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined @@ -99,6 +102,7 @@ Available categories: ${allCategoryNames}`, } else { const resolution = resolveModelForDelegateTask({ userModel: explicitCategoryModel ?? overrideModel, + userFallbackModels: normalizedConfiguredFallbackModels, categoryDefaultModel: resolved.model, fallbackChain: requirement.fallbackChain, availableModels, @@ -178,6 +182,14 @@ Available categories: ${categoryNames.join(", ")}`, const categoryConfigModel = resolved.config.model?.toLowerCase() const isUnstableAgent = resolved.config.is_unstable_agent === true || [unstableModel, categoryConfigModel].some(m => m ? m.includes("gemini") || m.includes("minimax") || m.includes("kimi") : false) + const defaultProviderID = categoryModel?.providerID + ?? parseModelString(actualModel ?? "")?.providerID + ?? "opencode" + const configuredFallbackChain = buildFallbackChainFromModels( + normalizedConfiguredFallbackModels, + defaultProviderID, + ) + return { agentToUse: SISYPHUS_JUNIOR_AGENT, categoryModel, @@ -186,6 +198,6 @@ Available categories: ${categoryNames.join(", ")}`, modelInfo, actualModel, isUnstableAgent, - fallbackChain: requirement?.fallbackChain, + fallbackChain: configuredFallbackChain ?? requirement?.fallbackChain, } } diff --git a/src/tools/delegate-task/model-selection.ts b/src/tools/delegate-task/model-selection.ts index f84d96ad1..0d085d868 100644 --- a/src/tools/delegate-task/model-selection.ts +++ b/src/tools/delegate-task/model-selection.ts @@ -14,6 +14,7 @@ function getExplicitHighBaseModel(model: string): string | null { export function resolveModelForDelegateTask(input: { userModel?: string + userFallbackModels?: string[] categoryDefaultModel?: string fallbackChain?: FallbackEntry[] availableModels: Set @@ -44,6 +45,28 @@ export function resolveModelForDelegateTask(input: { } } + const userFallbackModels = input.userFallbackModels + if (userFallbackModels && userFallbackModels.length > 0) { + if (input.availableModels.size === 0) { + const first = normalizeModel(userFallbackModels[0]) + if (first) { + return { model: first } + } + } else { + for (const fallbackModel of userFallbackModels) { + const normalizedFallback = normalizeModel(fallbackModel) + if (!normalizedFallback) continue + + const parts = normalizedFallback.split("/") + const providerHint = parts.length >= 2 ? [parts[0]] : undefined + const match = fuzzyMatchModel(normalizedFallback, input.availableModels, providerHint) + if (match) { + return { model: match } + } + } + } + } + const fallbackChain = input.fallbackChain if (fallbackChain && fallbackChain.length > 0) { if (input.availableModels.size === 0) { diff --git a/src/tools/delegate-task/subagent-resolver.test.ts b/src/tools/delegate-task/subagent-resolver.test.ts index b1c03d73b..28c7a4731 100644 --- a/src/tools/delegate-task/subagent-resolver.test.ts +++ b/src/tools/delegate-task/subagent-resolver.test.ts @@ -17,7 +17,10 @@ function createBaseArgs(overrides?: Partial): DelegateTaskArgs } } -function createExecutorContext(agentsFn: () => Promise): ExecutorContext { +function createExecutorContext( + agentsFn: () => Promise, + overrides?: Partial, +): ExecutorContext { const client = { app: { agents: agentsFn, @@ -28,6 +31,7 @@ function createExecutorContext(agentsFn: () => Promise): ExecutorContex client, manager: {} as ExecutorContext["manager"], directory: "/tmp/test", + ...overrides, } } @@ -101,4 +105,74 @@ describe("resolveSubagentExecution", () => { expect(result.categoryModel).toEqual({ providerID: "openai", modelID: "gpt-5.3-codex" }) cacheSpy.mockRestore() }) + + test("uses agent override fallback_models for subagent runtime fallback chain", async () => { + //#given + const cacheSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue({ + models: { quotio: ["claude-haiku-4-5"] }, + connected: ["quotio"], + updatedAt: "2026-03-03T00:00:00.000Z", + }) + const args = createBaseArgs({ subagent_type: "explore" }) + const executorCtx = createExecutorContext( + async () => ([ + { name: "explore", mode: "subagent", model: "quotio/claude-haiku-4-5" }, + ]), + { + agentOverrides: { + explore: { + fallback_models: ["quotio/gpt-5.2", "glm-5(max)"], + }, + } as ExecutorContext["agentOverrides"], + } + ) + + //#when + const result = await resolveSubagentExecution(args, executorCtx, "sisyphus", "deep") + + //#then + expect(result.error).toBeUndefined() + expect(result.fallbackChain).toEqual([ + { providers: ["quotio"], model: "gpt-5.2", variant: undefined }, + { providers: ["quotio"], model: "glm-5", variant: "max" }, + ]) + cacheSpy.mockRestore() + }) + + test("uses category fallback_models when agent override points at category", async () => { + //#given + const cacheSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue({ + models: { anthropic: ["claude-haiku-4-5"] }, + connected: ["anthropic"], + updatedAt: "2026-03-03T00:00:00.000Z", + }) + const args = createBaseArgs({ subagent_type: "explore" }) + const executorCtx = createExecutorContext( + async () => ([ + { name: "explore", mode: "subagent", model: "quotio/claude-haiku-4-5" }, + ]), + { + agentOverrides: { + explore: { + category: "research", + }, + } as ExecutorContext["agentOverrides"], + userCategories: { + research: { + fallback_models: ["anthropic/claude-haiku-4-5"], + }, + } as ExecutorContext["userCategories"], + } + ) + + //#when + const result = await resolveSubagentExecution(args, executorCtx, "sisyphus", "deep") + + //#then + expect(result.error).toBeUndefined() + expect(result.fallbackChain).toEqual([ + { providers: ["anthropic"], model: "claude-haiku-4-5", variant: undefined }, + ]) + cacheSpy.mockRestore() + }) }) diff --git a/src/tools/delegate-task/subagent-resolver.ts b/src/tools/delegate-task/subagent-resolver.ts index 907cd5c7d..7e2a5c055 100644 --- a/src/tools/delegate-task/subagent-resolver.ts +++ b/src/tools/delegate-task/subagent-resolver.ts @@ -4,6 +4,8 @@ import { isPlanFamily } from "./constants" import { SISYPHUS_JUNIOR_AGENT } from "./sisyphus-junior-agent" import { normalizeModelFormat } from "../../shared/model-format-normalizer" import { AGENT_MODEL_REQUIREMENTS } from "../../shared/model-requirements" +import { normalizeFallbackModels } from "../../shared/model-resolver" +import { buildFallbackChainFromModels } from "../../shared/fallback-chain-from-models" import { getAgentDisplayName, getAgentConfigKey } from "../../shared/agent-display-names" import { normalizeSDKResponse } from "../../shared" import { log } from "../../shared/logger" @@ -17,7 +19,7 @@ export async function resolveSubagentExecution( parentAgent: string | undefined, categoryExamples: string ): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; fallbackChain?: FallbackEntry[]; error?: string }> { - const { client, agentOverrides } = executorCtx + const { client, agentOverrides, userCategories } = executorCtx if (!args.subagent_type?.trim()) { return { agentToUse: "", categoryModel: undefined, error: `Agent name cannot be empty.` } @@ -98,7 +100,10 @@ Create the work plan directly - that's your job as the planning agent.`, const agentOverride = agentOverrides?.[agentConfigKey as keyof typeof agentOverrides] ?? (agentOverrides ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentConfigKey)?.[1] : undefined) const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentConfigKey] - fallbackChain = agentRequirement?.fallbackChain + const normalizedAgentFallbackModels = normalizeFallbackModels( + agentOverride?.fallback_models + ?? (agentOverride?.category ? userCategories?.[agentOverride.category]?.fallback_models : undefined) + ) if (agentOverride?.model || agentRequirement || matchedAgent.model) { const availableModels = await getAvailableModelsForDelegateTask(client) @@ -112,6 +117,7 @@ Create the work plan directly - that's your job as the planning agent.`, const resolution = resolveModelForDelegateTask({ userModel: agentOverride?.model, + userFallbackModels: normalizedAgentFallbackModels, categoryDefaultModel: matchedAgentModelStr, fallbackChain: agentRequirement?.fallbackChain, availableModels, @@ -125,6 +131,15 @@ Create the work plan directly - that's your job as the planning agent.`, categoryModel = variantToUse ? { ...normalized, variant: variantToUse } : normalized } } + + const defaultProviderID = categoryModel?.providerID + ?? normalizedMatchedModel?.providerID + ?? "opencode" + const configuredFallbackChain = buildFallbackChainFromModels( + normalizedAgentFallbackModels, + defaultProviderID, + ) + fallbackChain = configuredFallbackChain ?? agentRequirement?.fallbackChain } if (!categoryModel && matchedAgent.model) {