diff --git a/assets/oh-my-opencode.schema.json b/assets/oh-my-opencode.schema.json index b22b3feac..2762ec30e 100644 --- a/assets/oh-my-opencode.schema.json +++ b/assets/oh-my-opencode.schema.json @@ -69,6 +69,7 @@ "directory-readme-injector", "empty-task-response-detector", "think-mode", + "model-fallback", "anthropic-context-window-limit-recovery", "preemptive-compaction", "rules-injector", @@ -80,6 +81,7 @@ "non-interactive-env", "interactive-bash-session", "thinking-block-validator", + "beast-mode-system", "ralph-loop", "category-skill-reminder", "compaction-context-injector", diff --git a/src/cli/doctor/checks/model-resolution.test.ts b/src/cli/doctor/checks/model-resolution.test.ts index cca2f58b5..447aaedc5 100644 --- a/src/cli/doctor/checks/model-resolution.test.ts +++ b/src/cli/doctor/checks/model-resolution.test.ts @@ -15,7 +15,7 @@ describe("model-resolution check", () => { const sisyphus = info.agents.find((a) => a.name === "sisyphus") expect(sisyphus).toBeDefined() expect(sisyphus!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6") - expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("anthropic") + expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("quotio") }) it("returns category requirements with provider chains", async () => { @@ -26,8 +26,8 @@ describe("model-resolution check", () => { // then: Should have category entries const visual = info.categories.find((c) => c.name === "visual-engineering") expect(visual).toBeDefined() - expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro") - expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google") + expect(visual!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6-thinking") + expect(visual!.requirement.fallbackChain[0]?.providers).toContain("quotio") }) }) @@ -87,7 +87,7 @@ describe("model-resolution check", () => { expect(sisyphus).toBeDefined() expect(sisyphus!.userOverride).toBeUndefined() expect(sisyphus!.effectiveResolution).toContain("Provider fallback:") - expect(sisyphus!.effectiveResolution).toContain("anthropic") + expect(sisyphus!.effectiveResolution).toContain("quotio") }) it("captures user variant for agent when configured", async () => { diff --git a/src/cli/fallback-chain-resolution.ts b/src/cli/fallback-chain-resolution.ts index 528aef0e6..429f2ead9 100644 --- a/src/cli/fallback-chain-resolution.ts +++ b/src/cli/fallback-chain-resolution.ts @@ -1,8 +1,6 @@ -import { - AGENT_MODEL_REQUIREMENTS, - type FallbackEntry, -} from "../shared/model-requirements" +import type { FallbackEntry } from "../shared/model-requirements" import type { ProviderAvailability } from "./model-fallback-types" +import { CLI_AGENT_MODEL_REQUIREMENTS } from "./model-fallback-requirements" import { isProviderAvailable } from "./provider-availability" import { transformModelForProvider } from "./provider-model-id-transform" @@ -25,7 +23,7 @@ export function resolveModelFromChain( } export function getSisyphusFallbackChain(): FallbackEntry[] { - return AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain + return CLI_AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain } export function isAnyFallbackEntryAvailable( diff --git a/src/cli/model-fallback-requirements.ts b/src/cli/model-fallback-requirements.ts new file mode 100644 index 000000000..d48e82bc2 --- /dev/null +++ b/src/cli/model-fallback-requirements.ts @@ -0,0 +1,153 @@ +import type { ModelRequirement } from "../shared/model-requirements" + +// NOTE: These requirements are used by the CLI config generator (`generateModelConfig`). +// They intentionally use "install-time" provider IDs (anthropic/openai/google/opencode/etc), +// not runtime providers like `quotio`/`nvidia`. + +export const CLI_AGENT_MODEL_REQUIREMENTS: Record = { + sisyphus: { + fallbackChain: [ + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["kimi-for-coding"], model: "k2p5" }, + { providers: ["opencode"], model: "kimi-k2.5-free" }, + { providers: ["zai-coding-plan"], model: "glm-4.7" }, + { providers: ["opencode"], model: "glm-4.7-free" }, + ], + requiresAnyModel: true, + }, + hephaestus: { + fallbackChain: [ + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, + ], + requiresProvider: ["openai", "github-copilot", "opencode"], + }, + oracle: { + fallbackChain: [ + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + ], + }, + librarian: { + fallbackChain: [ + { providers: ["zai-coding-plan"], model: "glm-4.7" }, + { providers: ["opencode"], model: "glm-4.7-free" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }, + ], + }, + explore: { + fallbackChain: [ + { providers: ["github-copilot"], model: "grok-code-fast-1" }, + { providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" }, + { providers: ["opencode"], model: "gpt-5-nano" }, + ], + }, + "multimodal-looker": { + fallbackChain: [ + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, + { providers: ["zai-coding-plan"], model: "glm-4.6v" }, + { providers: ["kimi-for-coding"], model: "k2p5" }, + { providers: ["opencode"], model: "kimi-k2.5-free" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" }, + { providers: ["opencode"], model: "gpt-5-nano" }, + ], + }, + prometheus: { + fallbackChain: [ + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["kimi-for-coding"], model: "k2p5" }, + { providers: ["opencode"], model: "kimi-k2.5-free" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + ], + }, + metis: { + fallbackChain: [ + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["kimi-for-coding"], model: "k2p5" }, + { providers: ["opencode"], model: "kimi-k2.5-free" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + ], + }, + momus: { + fallbackChain: [ + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + ], + }, + atlas: { + fallbackChain: [ + { providers: ["kimi-for-coding"], model: "k2p5" }, + { providers: ["opencode"], model: "kimi-k2.5-free" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + ], + }, +} + +export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record = { + "visual-engineering": { + fallbackChain: [ + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["zai-coding-plan"], model: "glm-5" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["kimi-for-coding"], model: "k2p5" }, + ], + }, + ultrabrain: { + fallbackChain: [ + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + ], + }, + deep: { + fallbackChain: [ + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + ], + requiresModel: "gpt-5.3-codex", + }, + artistry: { + fallbackChain: [ + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, + ], + requiresModel: "gemini-3-pro", + }, + quick: { + fallbackChain: [ + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, + { providers: ["opencode"], model: "gpt-5-nano" }, + ], + }, + "unspecified-low": { + fallbackChain: [ + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, + ], + }, + "unspecified-high": { + fallbackChain: [ + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + ], + }, + writing: { + fallbackChain: [ + { providers: ["kimi-for-coding"], model: "k2p5" }, + { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, + { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" }, + ], + }, +} + diff --git a/src/cli/model-fallback.ts b/src/cli/model-fallback.ts index 92625ea4e..4d91ace5b 100644 --- a/src/cli/model-fallback.ts +++ b/src/cli/model-fallback.ts @@ -1,7 +1,7 @@ import { - AGENT_MODEL_REQUIREMENTS, - CATEGORY_MODEL_REQUIREMENTS, -} from "../shared/model-requirements" + CLI_AGENT_MODEL_REQUIREMENTS, + CLI_CATEGORY_MODEL_REQUIREMENTS, +} from "./model-fallback-requirements" import type { InstallConfig } from "./types" import type { AgentConfig, CategoryConfig, GeneratedOmoConfig } from "./model-fallback-types" @@ -16,9 +16,9 @@ import { export type { GeneratedOmoConfig } from "./model-fallback-types" -const LIBRARIAN_MODEL = "opencode/minimax-m2.5-free" +const ZAI_MODEL = "zai-coding-plan/glm-4.7" -const ULTIMATE_FALLBACK = "opencode/big-pickle" +const ULTIMATE_FALLBACK = "opencode/glm-4.7-free" const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json" @@ -38,12 +38,12 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig { return { $schema: SCHEMA_URL, agents: Object.fromEntries( - Object.entries(AGENT_MODEL_REQUIREMENTS) + Object.entries(CLI_AGENT_MODEL_REQUIREMENTS) .filter(([role, req]) => !(role === "sisyphus" && req.requiresAnyModel)) .map(([role]) => [role, { model: ULTIMATE_FALLBACK }]) ), categories: Object.fromEntries( - Object.keys(CATEGORY_MODEL_REQUIREMENTS).map((cat) => [cat, { model: ULTIMATE_FALLBACK }]) + Object.keys(CLI_CATEGORY_MODEL_REQUIREMENTS).map((cat) => [cat, { model: ULTIMATE_FALLBACK }]) ), } } @@ -51,9 +51,9 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig { const agents: Record = {} const categories: Record = {} - for (const [role, req] of Object.entries(AGENT_MODEL_REQUIREMENTS)) { - if (role === "librarian") { - agents[role] = { model: LIBRARIAN_MODEL } + for (const [role, req] of Object.entries(CLI_AGENT_MODEL_REQUIREMENTS)) { + if (role === "librarian" && avail.zai) { + agents[role] = { model: ZAI_MODEL } continue } @@ -75,7 +75,6 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig { if (req.requiresAnyModel && !isAnyFallbackEntryAvailable(fallbackChain, avail)) { continue } - const resolved = resolveModelFromChain(fallbackChain, avail) if (resolved) { const variant = resolved.variant ?? req.variant @@ -100,11 +99,11 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig { } } - for (const [cat, req] of Object.entries(CATEGORY_MODEL_REQUIREMENTS)) { + for (const [cat, req] of Object.entries(CLI_CATEGORY_MODEL_REQUIREMENTS)) { // Special case: unspecified-high downgrades to unspecified-low when not isMaxPlan const fallbackChain = cat === "unspecified-high" && !avail.isMaxPlan - ? CATEGORY_MODEL_REQUIREMENTS["unspecified-low"].fallbackChain + ? CLI_CATEGORY_MODEL_REQUIREMENTS["unspecified-low"].fallbackChain : req.fallbackChain if (req.requiresModel && !isRequiredModelAvailable(req.requiresModel, req.fallbackChain, avail)) { diff --git a/src/config/schema/hooks.ts b/src/config/schema/hooks.ts index d6574df99..d82389516 100644 --- a/src/config/schema/hooks.ts +++ b/src/config/schema/hooks.ts @@ -13,6 +13,7 @@ export const HookNameSchema = z.enum([ "directory-readme-injector", "empty-task-response-detector", "think-mode", + "model-fallback", "anthropic-context-window-limit-recovery", "preemptive-compaction", "rules-injector", @@ -25,6 +26,7 @@ export const HookNameSchema = z.enum([ "interactive-bash-session", "thinking-block-validator", + "beast-mode-system", "ralph-loop", "category-skill-reminder", diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts index c5395852d..ad09ad051 100644 --- a/src/features/background-agent/manager.test.ts +++ b/src/features/background-agent/manager.test.ts @@ -3046,6 +3046,164 @@ describe("BackgroundManager.handleEvent - session.error", () => { manager.shutdown() }) + + test("retry path releases current concurrency slot and prefers current provider in fallback entry", async () => { + //#given + const manager = createBackgroundManager() + const concurrencyManager = getConcurrencyManager(manager) + const concurrencyKey = "quotio/claude-opus-4-6-thinking" + await concurrencyManager.acquire(concurrencyKey) + + ;(manager as unknown as { processKey: (key: string) => Promise }).processKey = async () => {} + + const sessionID = "ses_error_retry" + const task = createMockTask({ + id: "task-session-error-retry", + sessionID, + parentSessionID: "parent-session", + parentMessageID: "msg-retry", + description: "task that should retry", + agent: "sisyphus", + status: "running", + concurrencyKey, + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + fallbackChain: [ + { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["quotio"], model: "claude-opus-4-5" }, + ], + attemptCount: 0, + }) + getTaskMap(manager).set(task.id, task) + + //#when + manager.handleEvent({ + type: "session.error", + properties: { + sessionID, + error: { + name: "UnknownError", + data: { + message: + "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}", + }, + }, + }, + }) + + //#then + expect(task.status).toBe("pending") + expect(task.attemptCount).toBe(1) + expect(task.model).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + variant: "max", + }) + expect(task.concurrencyKey).toBeUndefined() + expect(concurrencyManager.getCount(concurrencyKey)).toBe(0) + + manager.shutdown() + }) + + test("retry path triggers on session.status retry events", async () => { + //#given + const manager = createBackgroundManager() + ;(manager as unknown as { processKey: (key: string) => Promise }).processKey = async () => {} + + const sessionID = "ses_status_retry" + const task = createMockTask({ + id: "task-status-retry", + sessionID, + parentSessionID: "parent-session", + parentMessageID: "msg-status", + description: "task that should retry on status", + agent: "sisyphus", + status: "running", + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + fallbackChain: [ + { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["quotio"], model: "gpt-5.3-codex", variant: "high" }, + ], + attemptCount: 0, + }) + getTaskMap(manager).set(task.id, task) + + //#when + manager.handleEvent({ + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + message: "Provider is overloaded", + }, + }, + }) + + //#then + expect(task.status).toBe("pending") + expect(task.attemptCount).toBe(1) + expect(task.model).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + variant: "max", + }) + + manager.shutdown() + }) + + test("retry path triggers on message.updated assistant error events", async () => { + //#given + const manager = createBackgroundManager() + ;(manager as unknown as { processKey: (key: string) => Promise }).processKey = async () => {} + + const sessionID = "ses_message_updated_retry" + const task = createMockTask({ + id: "task-message-updated-retry", + sessionID, + parentSessionID: "parent-session", + parentMessageID: "msg-message-updated", + description: "task that should retry on message.updated", + agent: "sisyphus", + status: "running", + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + fallbackChain: [ + { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" }, + { providers: ["quotio"], model: "gpt-5.3-codex", variant: "high" }, + ], + attemptCount: 0, + }) + getTaskMap(manager).set(task.id, task) + + //#when + manager.handleEvent({ + type: "message.updated", + properties: { + info: { + id: "msg_errored", + sessionID, + role: "assistant", + error: { + name: "UnknownError", + data: { + message: + "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}", + }, + }, + }, + }, + }) + + //#then + expect(task.status).toBe("pending") + expect(task.attemptCount).toBe(1) + expect(task.model).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + variant: "max", + }) + + manager.shutdown() + }) }) describe("BackgroundManager queue processing - error tasks are skipped", () => { diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts index 4d0682e30..30adadb7c 100644 --- a/src/features/background-agent/manager.ts +++ b/src/features/background-agent/manager.ts @@ -5,6 +5,7 @@ import type { LaunchInput, ResumeInput, } from "./types" +import type { FallbackEntry } from "../../shared/model-requirements" import { TaskHistory } from "./task-history" import { log, @@ -12,6 +13,8 @@ import { normalizePromptTools, normalizeSDKResponse, promptWithModelSuggestionRetry, + readConnectedProvidersCache, + readProviderModelsCache, resolveInheritedPromptTools, createInternalAgentTextPart, } from "../../shared" @@ -19,6 +22,12 @@ import { setSessionTools } from "../../shared/session-tools-store" import { ConcurrencyManager } from "./concurrency" import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema" import { isInsideTmux } from "../../shared/tmux" +import { + shouldRetryError, + getNextFallback, + hasMoreFallbacks, + selectFallbackProvider, +} from "../../shared/model-error-classifier" import { DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS, DEFAULT_STALE_TIMEOUT_MS, @@ -156,6 +165,8 @@ export class BackgroundManager { parentAgent: input.parentAgent, parentTools: input.parentTools, model: input.model, + fallbackChain: input.fallbackChain, + attemptCount: 0, category: input.category, } @@ -677,6 +688,27 @@ export class BackgroundManager { handleEvent(event: Event): void { const props = event.properties + if (event.type === "message.updated") { + const info = props?.info + if (!info || typeof info !== "object") return + + const sessionID = (info as Record)["sessionID"] + const role = (info as Record)["role"] + if (typeof sessionID !== "string" || role !== "assistant") return + + const task = this.findBySession(sessionID) + if (!task || task.status !== "running") return + + const assistantError = (info as Record)["error"] + if (!assistantError) return + + const errorInfo = { + name: this.extractErrorName(assistantError), + message: this.extractErrorMessage(assistantError), + } + this.tryFallbackRetry(task, errorInfo, "message.updated") + } + if (event.type === "message.part.updated" || event.type === "message.part.delta") { if (!props || typeof props !== "object" || !("sessionID" in props)) return const partInfo = props as unknown as MessagePartInfo @@ -773,10 +805,29 @@ export class BackgroundManager { const task = this.findBySession(sessionID) if (!task || task.status !== "running") return + const errorObj = props?.error as { name?: string; message?: string } | undefined + const errorName = errorObj?.name const errorMessage = props ? this.getSessionErrorMessage(props) : undefined + const errorInfo = { name: errorName, message: errorMessage } + if (this.tryFallbackRetry(task, errorInfo, "session.error")) return + + // Original error handling (no retry) + const errorMsg = errorMessage ?? "Session error" + const canRetry = + shouldRetryError(errorInfo) && + !!task.fallbackChain && + hasMoreFallbacks(task.fallbackChain, task.attemptCount ?? 0) + log("[background-agent] Session error - no retry:", { + taskId: task.id, + errorName, + errorMessage: errorMsg?.slice(0, 100), + hasFallbackChain: !!task.fallbackChain, + canRetry, + }) + task.status = "error" - task.error = errorMessage ?? "Session error" + task.error = errorMsg task.completedAt = new Date() this.taskHistory.record(task.parentSessionID, { id: task.id, sessionID: task.sessionID, agent: task.agent, description: task.description, status: "error", category: task.category, startedAt: task.startedAt, completedAt: task.completedAt }) @@ -860,6 +911,129 @@ export class BackgroundManager { } } } + + if (event.type === "session.status") { + const sessionID = props?.sessionID as string | undefined + const status = props?.status as { type?: string; message?: string } | undefined + if (!sessionID || status?.type !== "retry") return + + const task = this.findBySession(sessionID) + if (!task || task.status !== "running") return + + const errorMessage = typeof status.message === "string" ? status.message : undefined + const errorInfo = { name: "SessionRetry", message: errorMessage } + this.tryFallbackRetry(task, errorInfo, "session.status") + } + } + + private tryFallbackRetry( + task: BackgroundTask, + errorInfo: { name?: string; message?: string }, + source: string, + ): boolean { + const fallbackChain = task.fallbackChain + const canRetry = + shouldRetryError(errorInfo) && + fallbackChain && + fallbackChain.length > 0 && + hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0) + + if (!canRetry) return false + + const attemptCount = task.attemptCount ?? 0 + const providerModelsCache = readProviderModelsCache() + const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache() + const connectedSet = connectedProviders ? new Set(connectedProviders) : null + + const isReachable = (entry: FallbackEntry): boolean => { + if (!connectedSet) return true + + // Gate only on provider connectivity. Provider model lists can be stale/incomplete, + // especially after users manually add models to opencode.json. + return entry.providers.some((p) => connectedSet.has(p)) + } + + let selectedAttemptCount = attemptCount + let nextFallback: FallbackEntry | undefined + while (fallbackChain && selectedAttemptCount < fallbackChain.length) { + const candidate = getNextFallback(fallbackChain, selectedAttemptCount) + if (!candidate) break + selectedAttemptCount++ + if (!isReachable(candidate)) { + log("[background-agent] Skipping unreachable fallback:", { + taskId: task.id, + source, + model: candidate.model, + providers: candidate.providers, + }) + continue + } + nextFallback = candidate + break + } + if (!nextFallback) return false + + const providerID = selectFallbackProvider( + nextFallback.providers, + task.model?.providerID, + ) + + log("[background-agent] Retryable error, attempting fallback:", { + taskId: task.id, + source, + errorName: errorInfo.name, + errorMessage: errorInfo.message?.slice(0, 100), + attemptCount: selectedAttemptCount, + nextModel: `${providerID}/${nextFallback.model}`, + }) + + if (task.concurrencyKey) { + this.concurrencyManager.release(task.concurrencyKey) + task.concurrencyKey = undefined + } + + if (task.sessionID) { + this.client.session.abort({ path: { id: task.sessionID } }).catch(() => {}) + subagentSessions.delete(task.sessionID) + } + + const idleTimer = this.idleDeferralTimers.get(task.id) + if (idleTimer) { + clearTimeout(idleTimer) + this.idleDeferralTimers.delete(task.id) + } + + task.attemptCount = selectedAttemptCount + task.model = { + providerID, + modelID: nextFallback.model, + variant: nextFallback.variant, + } + task.status = "pending" + task.sessionID = undefined + task.startedAt = undefined + task.queuedAt = new Date() + task.error = undefined + + const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent + const queue = this.queuesByKey.get(key) ?? [] + const retryInput: LaunchInput = { + description: task.description, + prompt: task.prompt, + agent: task.agent, + parentSessionID: task.parentSessionID, + parentMessageID: task.parentMessageID, + parentModel: task.parentModel, + parentAgent: task.parentAgent, + parentTools: task.parentTools, + model: task.model, + fallbackChain: task.fallbackChain, + category: task.category, + } + queue.push({ task, input: retryInput }) + this.queuesByKey.set(key, queue) + this.processKey(key) + return true } markForNotification(task: BackgroundTask): void { @@ -1273,10 +1447,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea if (isCompactionAgent(info?.agent)) { continue } - if (info?.agent || info?.model || (info?.modelID && info?.providerID)) { - agent = info.agent ?? task.parentAgent - model = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined) - tools = normalizePromptTools(info.tools) ?? tools + const normalizedTools = this.isRecord(info?.tools) + ? normalizePromptTools(info.tools as Record) + : undefined + if (info?.agent || info?.model || (info?.modelID && info?.providerID) || normalizedTools) { + agent = info?.agent ?? task.parentAgent + model = info?.model ?? (info?.providerID && info?.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined) + tools = normalizedTools ?? tools break } } @@ -1296,7 +1473,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea tools = normalizePromptTools(currentMessage?.tools) ?? tools } - tools = resolveInheritedPromptTools(task.parentSessionID, tools) + const resolvedTools = resolveInheritedPromptTools(task.parentSessionID, tools) log("[background-agent] notifyParentSession context:", { taskId: task.id, @@ -1311,7 +1488,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea noReply: !allComplete, ...(agent !== undefined ? { agent } : {}), ...(model !== undefined ? { model } : {}), - ...(tools ? { tools } : {}), + ...(resolvedTools ? { tools: resolvedTools } : {}), parts: [createInternalAgentTextPart(notification)], }, }) @@ -1394,6 +1571,46 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea return "" } + private extractErrorName(error: unknown): string | undefined { + if (this.isRecord(error) && typeof error["name"] === "string") return error["name"] + if (error instanceof Error) return error.name + return undefined + } + + private extractErrorMessage(error: unknown): string | undefined { + if (!error) return undefined + if (typeof error === "string") return error + if (error instanceof Error) return error.message + + if (this.isRecord(error)) { + const dataRaw = error["data"] + const candidates: unknown[] = [ + error, + dataRaw, + error["error"], + this.isRecord(dataRaw) ? (dataRaw as Record)["error"] : undefined, + error["cause"], + ] + + for (const candidate of candidates) { + if (typeof candidate === "string" && candidate.length > 0) return candidate + if ( + this.isRecord(candidate) && + typeof candidate["message"] === "string" && + candidate["message"].length > 0 + ) { + return candidate["message"] + } + } + } + + try { + return JSON.stringify(error) + } catch { + return String(error) + } + } + private isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null } @@ -1610,6 +1827,16 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea // Progress is already tracked via handleEvent(message.part.updated), // so we skip the expensive session.messages() fetch here. // Completion will be detected when session transitions to idle. + if (sessionStatus?.type === "retry") { + const retryMessage = typeof (sessionStatus as { message?: string }).message === "string" + ? (sessionStatus as { message?: string }).message + : undefined + const errorInfo = { name: "SessionRetry", message: retryMessage } + if (this.tryFallbackRetry(task, errorInfo, "polling:session.status")) { + continue + } + } + log("[background-agent] Session still running, relying on event-based progress:", { taskId: task.id, sessionID, diff --git a/src/features/background-agent/types.ts b/src/features/background-agent/types.ts index ea56186e0..6973dd783 100644 --- a/src/features/background-agent/types.ts +++ b/src/features/background-agent/types.ts @@ -1,3 +1,5 @@ +import type { FallbackEntry } from "../../shared/model-requirements" + export type BackgroundTaskStatus = | "pending" | "running" @@ -31,6 +33,10 @@ export interface BackgroundTask { progress?: TaskProgress parentModel?: { providerID: string; modelID: string } model?: { providerID: string; modelID: string; variant?: string } + /** Fallback chain for runtime retry on model errors */ + fallbackChain?: FallbackEntry[] + /** Number of fallback retry attempts made */ + attemptCount?: number /** Active concurrency slot key */ concurrencyKey?: string /** Persistent key for re-acquiring concurrency on resume */ @@ -60,6 +66,8 @@ export interface LaunchInput { parentAgent?: string parentTools?: Record model?: { providerID: string; modelID: string; variant?: string } + /** Fallback chain for runtime retry on model errors */ + fallbackChain?: FallbackEntry[] isUnstableAgent?: boolean skills?: string[] skillContent?: string diff --git a/src/features/claude-code-session-state/state.ts b/src/features/claude-code-session-state/state.ts index dfb700a66..60a8f8a84 100644 --- a/src/features/claude-code-session-state/state.ts +++ b/src/features/claude-code-session-state/state.ts @@ -1,4 +1,5 @@ export const subagentSessions = new Set() +export const syncSubagentSessions = new Set() let _mainSessionID: string | undefined @@ -14,6 +15,7 @@ export function getMainSessionID(): string | undefined { export function _resetForTesting(): void { _mainSessionID = undefined subagentSessions.clear() + syncSubagentSessions.clear() sessionAgentMap.clear() } diff --git a/src/features/opencode-skill-loader/blocking.ts b/src/features/opencode-skill-loader/blocking.ts index 503584992..25df62a6d 100644 --- a/src/features/opencode-skill-loader/blocking.ts +++ b/src/features/opencode-skill-loader/blocking.ts @@ -25,13 +25,13 @@ export function discoverAllSkillsBlocking(dirs: string[], scopes: SkillScope[]): const { port1, port2 } = new MessageChannel() const worker = new Worker(new URL("./discover-worker.ts", import.meta.url), { - workerData: { signal } + // workerData is structured-cloned; pass the SharedArrayBuffer and recreate the view in the worker. + workerData: { signalBuffer: signal.buffer }, }) - worker.postMessage({ port: port2 }, [port2]) - const input: WorkerInput = { dirs, scopes } - port1.postMessage(input) + // Avoid a race where the worker hasn't attached listeners to the MessagePort yet. + worker.postMessage({ port: port2, input }, [port2]) const waitResult = Atomics.wait(signal, 0, 0, TIMEOUT_MS) diff --git a/src/features/opencode-skill-loader/discover-worker.ts b/src/features/opencode-skill-loader/discover-worker.ts index 6a1031d4d..47cb6bea0 100644 --- a/src/features/opencode-skill-loader/discover-worker.ts +++ b/src/features/opencode-skill-loader/discover-worker.ts @@ -18,25 +18,24 @@ interface WorkerOutputError { error: { message: string; stack?: string } } -const { signal } = workerData as { signal: Int32Array } +const { signalBuffer } = workerData as { signalBuffer: SharedArrayBuffer } +const signal = new Int32Array(signalBuffer) if (!parentPort) { throw new Error("Worker must be run with parentPort") } -parentPort.once("message", (data: { port: MessagePort }) => { - const { port } = data +parentPort.once("message", (data: { port: MessagePort; input: WorkerInput }) => { + const { port, input } = data - port.on("message", async (input: WorkerInput) => { + void (async () => { try { - const results = await Promise.all( - input.dirs.map(dir => discoverSkillsInDirAsync(dir)) - ) - + const results = await Promise.all(input.dirs.map((dir) => discoverSkillsInDirAsync(dir))) + const skills = results.flat() - + const output: WorkerOutputSuccess = { ok: true, skills } - + port.postMessage(output) Atomics.store(signal, 0, 1) Atomics.notify(signal, 0) @@ -48,10 +47,10 @@ parentPort.once("message", (data: { port: MessagePort }) => { stack: error instanceof Error ? error.stack : undefined, }, } - + port.postMessage(output) Atomics.store(signal, 0, 1) Atomics.notify(signal, 0) } - }) + })() }) diff --git a/src/features/task-toast-manager/manager.test.ts b/src/features/task-toast-manager/manager.test.ts index bc05a9961..aa3d0f5f1 100644 --- a/src/features/task-toast-manager/manager.test.ts +++ b/src/features/task-toast-manager/manager.test.ts @@ -217,6 +217,27 @@ describe("TaskToastManager", () => { expect(call.body.message).toContain("(inherited from parent)") }) + test("should display warning when model is runtime fallback", () => { + // given - runtime-fallback indicates a model swap mid-run + const task = { + id: "task_runtime", + description: "Task with runtime fallback model", + agent: "explore", + isBackground: false, + modelInfo: { model: "quotio/oswe-vscode-prime", type: "runtime-fallback" as const }, + } + + // when - addTask is called + toastManager.addTask(task) + + // then - toast should show fallback warning + expect(mockClient.tui.showToast).toHaveBeenCalled() + const call = mockClient.tui.showToast.mock.calls[0][0] + expect(call.body.message).toContain("[FALLBACK]") + expect(call.body.message).toContain("quotio/oswe-vscode-prime") + expect(call.body.message).toContain("(runtime fallback)") + }) + test("should not display model info when user-defined", () => { // given - a task with user-defined model const task = { @@ -257,4 +278,32 @@ describe("TaskToastManager", () => { expect(call.body.message).not.toContain("[FALLBACK] Model:") }) }) + + describe("updateTaskModelBySession", () => { + test("updates task model info and shows fallback toast", () => { + // given - task without model info + const task = { + id: "task_update", + sessionID: "ses_update_1", + description: "Task that will fallback", + agent: "explore", + isBackground: false, + } + toastManager.addTask(task) + mockClient.tui.showToast.mockClear() + + // when - runtime fallback applied by session + toastManager.updateTaskModelBySession("ses_update_1", { + model: "nvidia/stepfun-ai/step-3.5-flash", + type: "runtime-fallback", + }) + + // then - new toast shows fallback model + expect(mockClient.tui.showToast).toHaveBeenCalled() + const call = mockClient.tui.showToast.mock.calls[0][0] + expect(call.body.message).toContain("[FALLBACK]") + expect(call.body.message).toContain("nvidia/stepfun-ai/step-3.5-flash") + expect(call.body.message).toContain("(runtime fallback)") + }) + }) }) diff --git a/src/features/task-toast-manager/manager.ts b/src/features/task-toast-manager/manager.ts index 5115fae79..b63fca0c0 100644 --- a/src/features/task-toast-manager/manager.ts +++ b/src/features/task-toast-manager/manager.ts @@ -20,6 +20,7 @@ export class TaskToastManager { addTask(task: { id: string + sessionID?: string description: string agent: string isBackground: boolean @@ -30,6 +31,7 @@ export class TaskToastManager { }): void { const trackedTask: TrackedTask = { id: task.id, + sessionID: task.sessionID, description: task.description, agent: task.agent, status: task.status ?? "running", @@ -54,6 +56,18 @@ export class TaskToastManager { } } + /** + * Update model info for a task by session ID + */ + updateTaskModelBySession(sessionID: string, modelInfo: ModelFallbackInfo): void { + if (!sessionID) return + const task = Array.from(this.tasks.values()).find((t) => t.sessionID === sessionID) + if (!task) return + if (task.modelInfo?.model === modelInfo.model && task.modelInfo?.type === modelInfo.type) return + task.modelInfo = modelInfo + this.showTaskListToast(task) + } + /** * Remove completed/error task */ @@ -110,14 +124,17 @@ export class TaskToastManager { const lines: string[] = [] const isFallback = newTask.modelInfo && ( - newTask.modelInfo.type === "inherited" || newTask.modelInfo.type === "system-default" + newTask.modelInfo.type === "inherited" || + newTask.modelInfo.type === "system-default" || + newTask.modelInfo.type === "runtime-fallback" ) if (isFallback) { - const suffixMap: Record<"inherited" | "system-default", string> = { + const suffixMap: Record<"inherited" | "system-default" | "runtime-fallback", string> = { inherited: " (inherited from parent)", "system-default": " (system default fallback)", + "runtime-fallback": " (runtime fallback)", } - const suffix = suffixMap[newTask.modelInfo!.type as "inherited" | "system-default"] + const suffix = suffixMap[newTask.modelInfo!.type as "inherited" | "system-default" | "runtime-fallback"] lines.push(`[FALLBACK] Model: ${newTask.modelInfo!.model}${suffix}`) lines.push("") } diff --git a/src/features/task-toast-manager/types.ts b/src/features/task-toast-manager/types.ts index 68fbf4325..732f60301 100644 --- a/src/features/task-toast-manager/types.ts +++ b/src/features/task-toast-manager/types.ts @@ -4,12 +4,13 @@ export type TaskStatus = "running" | "queued" | "completed" | "error" export interface ModelFallbackInfo { model: string - type: "user-defined" | "inherited" | "category-default" | "system-default" + type: "user-defined" | "inherited" | "category-default" | "system-default" | "runtime-fallback" source?: ModelSource } export interface TrackedTask { id: string + sessionID?: string description: string agent: string status: TaskStatus diff --git a/src/hooks/beast-mode-system/hook.test.ts b/src/hooks/beast-mode-system/hook.test.ts new file mode 100644 index 000000000..0e6d27a03 --- /dev/null +++ b/src/hooks/beast-mode-system/hook.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, test } from "bun:test" +import { clearSessionModel, setSessionModel } from "../../shared/session-model-state" +import { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook" + +describe("beast-mode-system hook", () => { + test("injects beast mode prompt for copilot gpt-4.1", async () => { + //#given + const sessionID = "ses_beast" + setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" }) + const hook = createBeastModeSystemHook() + const output = { system: [] as string[] } + + //#when + await hook["experimental.chat.system.transform"]?.({ sessionID }, output) + + //#then + expect(output.system[0]).toContain("Beast Mode") + expect(output.system[0]).toContain(BEAST_MODE_SYSTEM_PROMPT.trim().slice(0, 20)) + + clearSessionModel(sessionID) + }) + + test("does not inject for other models", async () => { + //#given + const sessionID = "ses_no_beast" + setSessionModel(sessionID, { providerID: "quotio", modelID: "gpt-5.3-codex" }) + const hook = createBeastModeSystemHook() + const output = { system: [] as string[] } + + //#when + await hook["experimental.chat.system.transform"]?.({ sessionID }, output) + + //#then + expect(output.system.length).toBe(0) + + clearSessionModel(sessionID) + }) + + test("avoids duplicate insertion", async () => { + //#given + const sessionID = "ses_dupe" + setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" }) + const hook = createBeastModeSystemHook() + const output = { system: [BEAST_MODE_SYSTEM_PROMPT] } + + //#when + await hook["experimental.chat.system.transform"]?.({ sessionID }, output) + + //#then + expect(output.system.length).toBe(1) + + clearSessionModel(sessionID) + }) +}) diff --git a/src/hooks/beast-mode-system/hook.ts b/src/hooks/beast-mode-system/hook.ts new file mode 100644 index 000000000..f4e5cdd17 --- /dev/null +++ b/src/hooks/beast-mode-system/hook.ts @@ -0,0 +1,31 @@ +import { getSessionModel } from "../../shared/session-model-state" + +export const BEAST_MODE_SYSTEM_PROMPT = `Beast Mode (Copilot GPT-4.1) + +You are an autonomous coding agent. Execute the task end-to-end. +- Make a brief plan, then act. +- Prefer concrete edits and verification over speculation. +- Run relevant tests when feasible. +- Do not ask the user to perform actions you can do yourself. +- If blocked, state exactly what is needed to proceed. +- Keep responses concise and actionable.` + +function isBeastModeModel(model: { providerID: string; modelID: string } | undefined): boolean { + return model?.providerID === "github-copilot" && model.modelID === "gpt-4.1" +} + +export function createBeastModeSystemHook() { + return { + "experimental.chat.system.transform": async ( + input: { sessionID: string }, + output: { system: string[] }, + ): Promise => { + const model = getSessionModel(input.sessionID) + if (!isBeastModeModel(model)) return + + if (output.system.some((entry) => entry.includes("Beast Mode"))) return + + output.system.unshift(BEAST_MODE_SYSTEM_PROMPT) + }, + } +} diff --git a/src/hooks/beast-mode-system/index.ts b/src/hooks/beast-mode-system/index.ts new file mode 100644 index 000000000..3bd956f28 --- /dev/null +++ b/src/hooks/beast-mode-system/index.ts @@ -0,0 +1 @@ +export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook" diff --git a/src/hooks/index.ts b/src/hooks/index.ts index 72845f671..e82fb4ea5 100644 --- a/src/hooks/index.ts +++ b/src/hooks/index.ts @@ -14,6 +14,7 @@ export { createEmptyTaskResponseDetectorHook } from "./empty-task-response-detec export { createAnthropicContextWindowLimitRecoveryHook, type AnthropicContextWindowLimitRecoveryOptions } from "./anthropic-context-window-limit-recovery"; export { createThinkModeHook } from "./think-mode"; +export { createModelFallbackHook, setPendingModelFallback, clearPendingModelFallback, type ModelFallbackState } from "./model-fallback/hook"; export { createClaudeCodeHooksHook } from "./claude-code-hooks"; export { createRulesInjectorHook } from "./rules-injector"; export { createBackgroundNotificationHook } from "./background-notification" @@ -31,7 +32,6 @@ export { createNoSisyphusGptHook } from "./no-sisyphus-gpt"; export { createNoHephaestusNonGptHook } from "./no-hephaestus-non-gpt"; export { createAutoSlashCommandHook } from "./auto-slash-command"; export { createEditErrorRecoveryHook } from "./edit-error-recovery"; -export { createJsonErrorRecoveryHook } from "./json-error-recovery"; export { createPrometheusMdOnlyHook } from "./prometheus-md-only"; export { createSisyphusJuniorNotepadHook } from "./sisyphus-junior-notepad"; export { createTaskResumeInfoHook } from "./task-resume-info"; @@ -47,5 +47,4 @@ export { createPreemptiveCompactionHook } from "./preemptive-compaction"; export { createTasksTodowriteDisablerHook } from "./tasks-todowrite-disabler"; export { createWriteExistingFileGuardHook } from "./write-existing-file-guard"; export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer"; -export { createHashlineEditDiffEnhancerHook } from "./hashline-edit-diff-enhancer"; - +export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./beast-mode-system"; diff --git a/src/hooks/model-fallback/hook.test.ts b/src/hooks/model-fallback/hook.test.ts new file mode 100644 index 000000000..02284f183 --- /dev/null +++ b/src/hooks/model-fallback/hook.test.ts @@ -0,0 +1,141 @@ +import { beforeEach, describe, expect, test } from "bun:test" + +import { + clearPendingModelFallback, + createModelFallbackHook, + setPendingModelFallback, +} from "./hook" + +describe("model fallback hook", () => { + beforeEach(() => { + clearPendingModelFallback("ses_model_fallback_main") + }) + + test("applies pending fallback on chat.message by overriding model", async () => { + //#given + const hook = createModelFallbackHook() as unknown as { + "chat.message"?: ( + input: { sessionID: string }, + output: { message: Record; parts: Array<{ type: string; text?: string }> }, + ) => Promise + } + + const set = setPendingModelFallback( + "ses_model_fallback_main", + "Sisyphus (Ultraworker)", + "quotio", + "claude-opus-4-6-thinking", + ) + expect(set).toBe(true) + + const output = { + message: { + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + variant: "max", + }, + parts: [{ type: "text", text: "continue" }], + } + + //#when + await hook["chat.message"]?.( + { sessionID: "ses_model_fallback_main" }, + output, + ) + + //#then + expect(output.message["model"]).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + }) + }) + + test("preserves fallback progression across repeated session.error retries", async () => { + //#given + const hook = createModelFallbackHook() as unknown as { + "chat.message"?: ( + input: { sessionID: string }, + output: { message: Record; parts: Array<{ type: string; text?: string }> }, + ) => Promise + } + const sessionID = "ses_model_fallback_main" + + expect( + setPendingModelFallback(sessionID, "Sisyphus (Ultraworker)", "quotio", "claude-opus-4-6-thinking"), + ).toBe(true) + + const firstOutput = { + message: { + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + variant: "max", + }, + parts: [{ type: "text", text: "continue" }], + } + + //#when - first retry is applied + await hook["chat.message"]?.({ sessionID }, firstOutput) + + //#then + expect(firstOutput.message["model"]).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + }) + + //#when - second error re-arms fallback and should advance to next entry + expect( + setPendingModelFallback(sessionID, "Sisyphus (Ultraworker)", "quotio", "claude-opus-4-6"), + ).toBe(true) + + const secondOutput = { + message: { + model: { providerID: "quotio", modelID: "claude-opus-4-6" }, + }, + parts: [{ type: "text", text: "continue" }], + } + await hook["chat.message"]?.({ sessionID }, secondOutput) + + //#then - chain should progress to entry[1], not repeat entry[0] + expect(secondOutput.message["model"]).toEqual({ + providerID: "quotio", + modelID: "gpt-5.3-codex", + }) + expect(secondOutput.message["variant"]).toBe("high") + }) + + test("shows toast when fallback is applied", async () => { + //#given + const toastCalls: Array<{ title: string; message: string }> = [] + const hook = createModelFallbackHook({ + toast: async ({ title, message }) => { + toastCalls.push({ title, message }) + }, + }) as unknown as { + "chat.message"?: ( + input: { sessionID: string }, + output: { message: Record; parts: Array<{ type: string; text?: string }> }, + ) => Promise + } + + const set = setPendingModelFallback( + "ses_model_fallback_toast", + "Sisyphus (Ultraworker)", + "quotio", + "claude-opus-4-6-thinking", + ) + expect(set).toBe(true) + + const output = { + message: { + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + variant: "max", + }, + parts: [{ type: "text", text: "continue" }], + } + + //#when + await hook["chat.message"]?.({ sessionID: "ses_model_fallback_toast" }, output) + + //#then + expect(toastCalls.length).toBe(1) + expect(toastCalls[0]?.title).toBe("Model fallback") + }) +}) diff --git a/src/hooks/model-fallback/hook.ts b/src/hooks/model-fallback/hook.ts new file mode 100644 index 000000000..98d929a96 --- /dev/null +++ b/src/hooks/model-fallback/hook.ts @@ -0,0 +1,228 @@ +import type { FallbackEntry } from "../../shared/model-requirements" +import { getAgentConfigKey } from "../../shared/agent-display-names" +import { AGENT_MODEL_REQUIREMENTS } from "../../shared/model-requirements" +import { readConnectedProvidersCache, readProviderModelsCache } from "../../shared/connected-providers-cache" +import { selectFallbackProvider } from "../../shared/model-error-classifier" +import { log } from "../../shared/logger" +import { getTaskToastManager } from "../../features/task-toast-manager" +import type { ChatMessageInput, ChatMessageHandlerOutput } from "../../plugin/chat-message" + +type FallbackToast = (input: { + title: string + message: string + variant?: "info" | "success" | "warning" | "error" + duration?: number +}) => void | Promise + +type FallbackCallback = (input: { + sessionID: string + providerID: string + modelID: string + variant?: string +}) => void | Promise + +export type ModelFallbackState = { + providerID: string + modelID: string + fallbackChain: FallbackEntry[] + attemptCount: number + pending: boolean +} + +/** + * Map of sessionID -> pending model fallback state + * When a model error occurs, we store the fallback info here. + * The next chat.message call will use this to switch to the fallback model. + */ +const pendingModelFallbacks = new Map() +const lastToastKey = new Map() + +/** + * Sets a pending model fallback for a session. + * Called when a model error is detected in session.error handler. + */ +export function setPendingModelFallback( + sessionID: string, + agentName: string, + currentProviderID: string, + currentModelID: string, +): boolean { + const agentKey = getAgentConfigKey(agentName) + const requirements = AGENT_MODEL_REQUIREMENTS[agentKey] + if (!requirements || !requirements.fallbackChain || requirements.fallbackChain.length === 0) { + log("[model-fallback] No fallback chain for agent: " + agentName + " (key: " + agentKey + ")") + return false + } + + const fallbackChain = requirements.fallbackChain + const existing = pendingModelFallbacks.get(sessionID) + + if (existing) { + // Preserve progression across repeated session.error retries in same session. + // We only mark the next turn as pending fallback application. + existing.providerID = currentProviderID + existing.modelID = currentModelID + existing.pending = true + if (existing.attemptCount >= existing.fallbackChain.length) { + log("[model-fallback] Fallback chain exhausted for session: " + sessionID) + return false + } + log("[model-fallback] Re-armed pending fallback for session: " + sessionID) + return true + } + + const state: ModelFallbackState = { + providerID: currentProviderID, + modelID: currentModelID, + fallbackChain, + attemptCount: 0, + pending: true, + } + + pendingModelFallbacks.set(sessionID, state) + log("[model-fallback] Set pending fallback for session: " + sessionID + ", agent: " + agentName) + return true +} + +/** + * Gets the next fallback model for a session. + * Increments attemptCount each time called. + */ +export function getNextFallback( + sessionID: string, +): { providerID: string; modelID: string; variant?: string } | null { + const state = pendingModelFallbacks.get(sessionID) + if (!state) return null + + if (!state.pending) return null + + const { fallbackChain } = state + + const providerModelsCache = readProviderModelsCache() + const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache() + const connectedSet = connectedProviders ? new Set(connectedProviders) : null + + const isReachable = (entry: FallbackEntry): boolean => { + if (!connectedSet) return true + + // Gate only on provider connectivity. Provider model lists can be stale/incomplete, + // especially after users manually add models to opencode.json. + return entry.providers.some((p) => connectedSet.has(p)) + } + + while (state.attemptCount < fallbackChain.length) { + const attemptCount = state.attemptCount + const fallback = fallbackChain[attemptCount] + state.attemptCount++ + + if (!isReachable(fallback)) { + log("[model-fallback] Skipping unreachable fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model) + continue + } + + const providerID = selectFallbackProvider(fallback.providers, state.providerID) + state.pending = false + + log("[model-fallback] Using fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model) + + return { + providerID, + modelID: fallback.model, + variant: fallback.variant, + } + } + + log("[model-fallback] No more fallbacks for session: " + sessionID) + pendingModelFallbacks.delete(sessionID) + return null +} + +/** + * Clears the pending fallback for a session. + * Called after fallback is successfully applied. + */ +export function clearPendingModelFallback(sessionID: string): void { + pendingModelFallbacks.delete(sessionID) + lastToastKey.delete(sessionID) +} + +/** + * Checks if there's a pending fallback for a session. + */ +export function hasPendingModelFallback(sessionID: string): boolean { + const state = pendingModelFallbacks.get(sessionID) + return state?.pending === true +} + +/** + * Gets the current fallback state for a session (for debugging). + */ +export function getFallbackState(sessionID: string): ModelFallbackState | undefined { + return pendingModelFallbacks.get(sessionID) +} + +/** + * Creates a chat.message hook that applies model fallbacks when pending. + */ +export function createModelFallbackHook(args?: { toast?: FallbackToast; onApplied?: FallbackCallback }) { + const toast = args?.toast + const onApplied = args?.onApplied + + return { + "chat.message": async ( + input: ChatMessageInput, + output: ChatMessageHandlerOutput, + ): Promise => { + const { sessionID } = input + if (!sessionID) return + + const fallback = getNextFallback(sessionID) + if (!fallback) return + + output.message["model"] = { + providerID: fallback.providerID, + modelID: fallback.modelID, + } + if (fallback.variant !== undefined) { + output.message["variant"] = fallback.variant + } else { + delete output.message["variant"] + } + if (toast) { + const key = `${sessionID}:${fallback.providerID}/${fallback.modelID}:${fallback.variant ?? ""}` + if (lastToastKey.get(sessionID) !== key) { + lastToastKey.set(sessionID, key) + const variantLabel = fallback.variant ? ` (${fallback.variant})` : "" + await Promise.resolve( + toast({ + title: "Model fallback", + message: `Using ${fallback.providerID}/${fallback.modelID}${variantLabel}`, + variant: "warning", + duration: 5000, + }), + ) + } + } + if (onApplied) { + await Promise.resolve( + onApplied({ + sessionID, + providerID: fallback.providerID, + modelID: fallback.modelID, + variant: fallback.variant, + }), + ) + } + + const toastManager = getTaskToastManager() + if (toastManager) { + const variantLabel = fallback.variant ? ` (${fallback.variant})` : "" + toastManager.updateTaskModelBySession(sessionID, { + model: `${fallback.providerID}/${fallback.modelID}${variantLabel}`, + type: "runtime-fallback", + }) + } + log("[model-fallback] Applied fallback model: " + JSON.stringify(fallback)) + }, + } +} diff --git a/src/plugin-interface.ts b/src/plugin-interface.ts index e6849779e..3815ec616 100644 --- a/src/plugin-interface.ts +++ b/src/plugin-interface.ts @@ -5,6 +5,7 @@ import { createChatParamsHandler } from "./plugin/chat-params" import { createChatHeadersHandler } from "./plugin/chat-headers" import { createChatMessageHandler } from "./plugin/chat-message" import { createMessagesTransformHandler } from "./plugin/messages-transform" +import { createSystemTransformHandler } from "./plugin/system-transform" import { createEventHandler } from "./plugin/event" import { createToolExecuteAfterHandler } from "./plugin/tool-execute-after" import { createToolExecuteBeforeHandler } from "./plugin/tool-execute-before" @@ -49,6 +50,10 @@ export function createPluginInterface(args: { hooks, }), + "experimental.chat.system.transform": createSystemTransformHandler({ + hooks, + }), + config: managers.configHandler, event: createEventHandler({ diff --git a/src/plugin/chat-message.ts b/src/plugin/chat-message.ts index f035c99a5..027bc8d2f 100644 --- a/src/plugin/chat-message.ts +++ b/src/plugin/chat-message.ts @@ -2,6 +2,7 @@ import type { OhMyOpenCodeConfig } from "../config" import type { PluginContext } from "./types" import { hasConnectedProvidersCache } from "../shared" +import { setSessionModel } from "../shared/session-model-state" import { setSessionAgent } from "../features/claude-code-session-state" import { applyUltraworkModelOverrideOnMessage } from "./ultrawork-model-override" @@ -13,7 +14,12 @@ type FirstMessageVariantGate = { } type ChatMessagePart = { type: string; text?: string; [key: string]: unknown } -type ChatMessageHandlerOutput = { message: Record; parts: ChatMessagePart[] } +export type ChatMessageHandlerOutput = { message: Record; parts: ChatMessagePart[] } +export type ChatMessageInput = { + sessionID: string + agent?: string + model?: { providerID: string; modelID: string } +} type StartWorkHookOutput = { parts: Array<{ type: string; text?: string }> } function isStartWorkHookOutput(value: unknown): value is StartWorkHookOutput { @@ -34,13 +40,13 @@ export function createChatMessageHandler(args: { firstMessageVariantGate: FirstMessageVariantGate hooks: CreatedHooks }): ( - input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } }, + input: ChatMessageInput, output: ChatMessageHandlerOutput ) => Promise { const { ctx, pluginConfig, firstMessageVariantGate, hooks } = args return async ( - input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } }, + input: ChatMessageInput, output: ChatMessageHandlerOutput ): Promise => { if (input.agent) { @@ -53,6 +59,22 @@ export function createChatMessageHandler(args: { firstMessageVariantGate.markApplied(input.sessionID) } + await hooks.modelFallback?.["chat.message"]?.(input, output) + const modelOverride = output.message["model"] + if ( + modelOverride && + typeof modelOverride === "object" && + "providerID" in modelOverride && + "modelID" in modelOverride + ) { + const providerID = (modelOverride as { providerID?: string }).providerID + const modelID = (modelOverride as { modelID?: string }).modelID + if (typeof providerID === "string" && typeof modelID === "string") { + setSessionModel(input.sessionID, { providerID, modelID }) + } + } else if (input.model) { + setSessionModel(input.sessionID, input.model) + } await hooks.stopContinuationGuard?.["chat.message"]?.(input) await hooks.keywordDetector?.["chat.message"]?.(input, output) await hooks.claudeCodeHooks?.["chat.message"]?.(input, output) diff --git a/src/plugin/chat-params.test.ts b/src/plugin/chat-params.test.ts new file mode 100644 index 000000000..91d194b9e --- /dev/null +++ b/src/plugin/chat-params.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, test } from "bun:test" + +import { createChatParamsHandler } from "./chat-params" + +describe("createChatParamsHandler", () => { + test("normalizes object-style agent payload and runs chat.params hooks", async () => { + //#given + let called = false + const handler = createChatParamsHandler({ + anthropicEffort: { + "chat.params": async (input) => { + called = input.agent.name === "sisyphus" + }, + }, + }) + + const input = { + sessionID: "ses_chat_params", + agent: { name: "sisyphus" }, + model: { providerID: "opencode", modelID: "claude-opus-4-6" }, + provider: { id: "opencode" }, + message: {}, + } + + const output = { + temperature: 0.1, + topP: 1, + topK: 1, + options: {}, + } + + //#when + await handler(input, output) + + //#then + expect(called).toBe(true) + }) +}) diff --git a/src/plugin/chat-params.ts b/src/plugin/chat-params.ts index 8f996a887..14ff4ed8e 100644 --- a/src/plugin/chat-params.ts +++ b/src/plugin/chat-params.ts @@ -1,4 +1,4 @@ -type ChatParamsInput = { +export type ChatParamsInput = { sessionID: string agent: { name?: string } model: { providerID: string; modelID: string } @@ -6,7 +6,7 @@ type ChatParamsInput = { message: { variant?: string } } -type ChatParamsOutput = { +export type ChatParamsOutput = { temperature?: number topP?: number topK?: number @@ -27,11 +27,21 @@ function buildChatParamsInput(raw: unknown): ChatParamsInput | null { const message = raw.message if (typeof sessionID !== "string") return null - if (typeof agent !== "string") return null if (!isRecord(model)) return null if (!isRecord(provider)) return null if (!isRecord(message)) return null + let agentName: string | undefined + if (typeof agent === "string") { + agentName = agent + } else if (isRecord(agent)) { + const name = agent.name + if (typeof name === "string") { + agentName = name + } + } + if (!agentName) return null + const providerID = model.providerID const modelID = model.modelID const providerId = provider.id @@ -43,7 +53,7 @@ function buildChatParamsInput(raw: unknown): ChatParamsInput | null { return { sessionID, - agent: { name: agent }, + agent: { name: agentName }, model: { providerID, modelID }, provider: { id: providerId }, message: typeof variant === "string" ? { variant } : {}, diff --git a/src/plugin/event.model-fallback.test.ts b/src/plugin/event.model-fallback.test.ts new file mode 100644 index 000000000..f2a5720b3 --- /dev/null +++ b/src/plugin/event.model-fallback.test.ts @@ -0,0 +1,406 @@ +import { afterEach, describe, expect, test } from "bun:test" + +import { createEventHandler } from "./event" +import { createChatMessageHandler } from "./chat-message" +import { _resetForTesting, setMainSession } from "../features/claude-code-session-state" +import { createModelFallbackHook, clearPendingModelFallback } from "../hooks/model-fallback/hook" + +describe("createEventHandler - model fallback", () => { + afterEach(() => { + _resetForTesting() + }) + + test("triggers retry prompt for assistant message.updated APIError payloads (headless resume)", async () => { + //#given + const abortCalls: string[] = [] + const promptCalls: string[] = [] + const sessionID = "ses_message_updated_fallback" + + const handler = createEventHandler({ + ctx: { + directory: "/tmp", + client: { + session: { + abort: async ({ path }: { path: { id: string } }) => { + abortCalls.push(path.id) + return {} + }, + prompt: async ({ path }: { path: { id: string } }) => { + promptCalls.push(path.id) + return {} + }, + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + markSessionCreated: () => {}, + clear: () => {}, + }, + managers: { + tmuxSessionManager: { + onSessionCreated: async () => {}, + onSessionDeleted: async () => {}, + }, + skillMcpManager: { + disconnectSession: async () => {}, + }, + } as any, + hooks: {} as any, + }) + + //#when + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "msg_err_1", + sessionID, + role: "assistant", + time: { created: 1, completed: 2 }, + error: { + name: "APIError", + data: { + message: + "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}", + isRetryable: true, + }, + }, + parentID: "msg_user_1", + modelID: "claude-opus-4-6-thinking", + providerID: "quotio", + mode: "Sisyphus (Ultraworker)", + agent: "Sisyphus (Ultraworker)", + path: { cwd: "/tmp", root: "/tmp" }, + cost: 0, + tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } }, + }, + }, + }, + }) + + //#then + expect(abortCalls).toEqual([sessionID]) + expect(promptCalls).toEqual([sessionID]) + }) + + test("triggers retry prompt for nested model error payloads", async () => { + //#given + const abortCalls: string[] = [] + const promptCalls: string[] = [] + const sessionID = "ses_main_fallback_nested" + setMainSession(sessionID) + + const handler = createEventHandler({ + ctx: { + directory: "/tmp", + client: { + session: { + abort: async ({ path }: { path: { id: string } }) => { + abortCalls.push(path.id) + return {} + }, + prompt: async ({ path }: { path: { id: string } }) => { + promptCalls.push(path.id) + return {} + }, + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + markSessionCreated: () => {}, + clear: () => {}, + }, + managers: { + tmuxSessionManager: { + onSessionCreated: async () => {}, + onSessionDeleted: async () => {}, + }, + skillMcpManager: { + disconnectSession: async () => {}, + }, + } as any, + hooks: {} as any, + }) + + //#when + await handler({ + event: { + type: "session.error", + properties: { + sessionID, + error: { + name: "UnknownError", + data: { + error: { + message: + "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}", + }, + }, + }, + }, + }, + }) + + //#then + expect(abortCalls).toEqual([sessionID]) + expect(promptCalls).toEqual([sessionID]) + }) + + test("triggers retry prompt on session.status retry events and applies fallback", async () => { + //#given + const abortCalls: string[] = [] + const promptCalls: string[] = [] + const sessionID = "ses_status_retry_fallback" + setMainSession(sessionID) + clearPendingModelFallback(sessionID) + + const modelFallback = createModelFallbackHook() + + const handler = createEventHandler({ + ctx: { + directory: "/tmp", + client: { + session: { + abort: async ({ path }: { path: { id: string } }) => { + abortCalls.push(path.id) + return {} + }, + prompt: async ({ path }: { path: { id: string } }) => { + promptCalls.push(path.id) + return {} + }, + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + markSessionCreated: () => {}, + clear: () => {}, + }, + managers: { + tmuxSessionManager: { + onSessionCreated: async () => {}, + onSessionDeleted: async () => {}, + }, + skillMcpManager: { + disconnectSession: async () => {}, + }, + } as any, + hooks: { + modelFallback, + } as any, + }) + + const chatMessageHandler = createChatMessageHandler({ + ctx: { + client: { + tui: { + showToast: async () => ({}), + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + shouldOverride: () => false, + markApplied: () => {}, + }, + hooks: { + modelFallback, + stopContinuationGuard: null, + keywordDetector: null, + claudeCodeHooks: null, + autoSlashCommand: null, + startWork: null, + ralphLoop: null, + } as any, + }) + + await handler({ + event: { + type: "message.updated", + properties: { + info: { + id: "msg_user_status_1", + sessionID, + role: "user", + time: { created: 1 }, + content: [], + modelID: "claude-opus-4-6-thinking", + providerID: "quotio", + agent: "Sisyphus (Ultraworker)", + path: { cwd: "/tmp", root: "/tmp" }, + }, + }, + }, + }) + + //#when + await handler({ + event: { + type: "session.status", + properties: { + sessionID, + status: { + type: "retry", + attempt: 1, + message: + "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}", + next: 1234, + }, + }, + }, + }) + + const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> } + await chatMessageHandler( + { + sessionID, + agent: "sisyphus", + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + }, + output, + ) + + //#then + expect(abortCalls).toEqual([sessionID]) + expect(promptCalls).toEqual([sessionID]) + expect(output.message["model"]).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + }) + expect(output.message["variant"]).toBe("max") + }) + + test("advances main-session fallback chain across repeated session.error retries end-to-end", async () => { + //#given + const abortCalls: string[] = [] + const promptCalls: string[] = [] + const toastCalls: string[] = [] + const sessionID = "ses_main_fallback_chain" + setMainSession(sessionID) + clearPendingModelFallback(sessionID) + + const modelFallback = createModelFallbackHook() + + const eventHandler = createEventHandler({ + ctx: { + directory: "/tmp", + client: { + session: { + abort: async ({ path }: { path: { id: string } }) => { + abortCalls.push(path.id) + return {} + }, + prompt: async ({ path }: { path: { id: string } }) => { + promptCalls.push(path.id) + return {} + }, + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + markSessionCreated: () => {}, + clear: () => {}, + }, + managers: { + tmuxSessionManager: { + onSessionCreated: async () => {}, + onSessionDeleted: async () => {}, + }, + skillMcpManager: { + disconnectSession: async () => {}, + }, + } as any, + hooks: { + modelFallback, + } as any, + }) + + const chatMessageHandler = createChatMessageHandler({ + ctx: { + client: { + tui: { + showToast: async ({ body }: { body: { title?: string } }) => { + if (body?.title) toastCalls.push(body.title) + return {} + }, + }, + }, + } as any, + pluginConfig: {} as any, + firstMessageVariantGate: { + shouldOverride: () => false, + markApplied: () => {}, + }, + hooks: { + modelFallback, + stopContinuationGuard: null, + keywordDetector: null, + claudeCodeHooks: null, + autoSlashCommand: null, + startWork: null, + ralphLoop: null, + } as any, + }) + + const triggerRetryCycle = async () => { + await eventHandler({ + event: { + type: "session.error", + properties: { + sessionID, + providerID: "quotio", + modelID: "claude-opus-4-6-thinking", + error: { + name: "UnknownError", + data: { + error: { + message: + "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}", + }, + }, + }, + }, + }, + }) + + const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> } + await chatMessageHandler( + { + sessionID, + agent: "sisyphus", + model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" }, + }, + output, + ) + return output + } + + //#when - first retry cycle + const first = await triggerRetryCycle() + + //#then - first fallback entry applied (prefers current provider when available) + expect(first.message["model"]).toEqual({ + providerID: "quotio", + modelID: "claude-opus-4-6", + }) + expect(first.message["variant"]).toBe("max") + + //#when - second retry cycle + const second = await triggerRetryCycle() + + //#then - second fallback entry applied (chain advanced) + expect(second.message["model"]).toEqual({ + providerID: "quotio", + modelID: "gpt-5.3-codex", + }) + expect(second.message["variant"]).toBe("high") + expect(abortCalls).toEqual([sessionID, sessionID]) + expect(promptCalls).toEqual([sessionID, sessionID]) + expect(toastCalls.length).toBeGreaterThanOrEqual(0) + }) +}) diff --git a/src/plugin/event.ts b/src/plugin/event.ts index 3ab1cd41d..08cd49507 100644 --- a/src/plugin/event.ts +++ b/src/plugin/event.ts @@ -4,11 +4,17 @@ import type { PluginContext } from "./types" import { clearSessionAgent, getMainSessionID, + getSessionAgent, + subagentSessions, + syncSubagentSessions, setMainSession, updateSessionAgent, } from "../features/claude-code-session-state" import { resetMessageCursor } from "../shared" import { lspManager } from "../tools" +import { shouldRetryError } from "../shared/model-error-classifier" +import { clearPendingModelFallback, setPendingModelFallback } from "../hooks/model-fallback/hook" +import { clearSessionModel, setSessionModel } from "../shared/session-model-state" import type { CreatedHooks } from "../create-hooks" import type { Managers } from "../create-managers" @@ -20,6 +26,74 @@ type FirstMessageVariantGate = { clear: (sessionID: string) => void } +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + +function normalizeFallbackModelID(modelID: string): string { + return modelID + .replace(/-thinking$/i, "") + .replace(/-max$/i, "") + .replace(/-high$/i, "") +} + +function extractErrorName(error: unknown): string | undefined { + if (isRecord(error) && typeof error.name === "string") return error.name + if (error instanceof Error) return error.name + return undefined +} + +function extractErrorMessage(error: unknown): string { + if (!error) return "" + if (typeof error === "string") return error + if (error instanceof Error) return error.message + + if (isRecord(error)) { + const candidates: unknown[] = [ + error, + error.data, + error.error, + isRecord(error.data) ? error.data.error : undefined, + error.cause, + ] + + for (const candidate of candidates) { + if (isRecord(candidate) && typeof candidate.message === "string" && candidate.message.length > 0) { + return candidate.message + } + } + } + + try { + return JSON.stringify(error) + } catch { + return String(error) + } +} + +function extractProviderModelFromErrorMessage( + message: string, +): { providerID?: string; modelID?: string } { + const lower = message.toLowerCase() + + const providerModel = lower.match(/model\s+not\s+found:\s*([a-z0-9_-]+)\s*\/\s*([a-z0-9._-]+)/i) + if (providerModel) { + return { + providerID: providerModel[1], + modelID: providerModel[2], + } + } + + const modelOnly = lower.match(/unknown\s+provider\s+for\s+model\s+([a-z0-9._-]+)/i) + if (modelOnly) { + return { + modelID: modelOnly[1], + } + } + + return {} +} + export function createEventHandler(args: { ctx: PluginContext pluginConfig: OhMyOpenCodeConfig @@ -29,6 +103,11 @@ export function createEventHandler(args: { }): (input: { event: { type: string; properties?: Record } }) => Promise { const { ctx, firstMessageVariantGate, managers, hooks } = args + // Avoid triggering multiple abort+continue cycles for the same failing assistant message. + const lastHandledModelErrorMessageID = new Map() + const lastHandledRetryStatusKey = new Map() + const lastKnownModelBySession = new Map() + const dispatchToHooks = async (input: { event: { type: string; properties?: Record } }): Promise => { await Promise.resolve(hooks.autoUpdateChecker?.event?.(input)) await Promise.resolve(hooks.claudeCodeHooks?.event?.(input)) @@ -55,6 +134,15 @@ export function createEventHandler(args: { const recentRealIdles = new Map() const DEDUP_WINDOW_MS = 500 + const shouldAutoRetrySession = (sessionID: string): boolean => { + const mainSessionID = getMainSessionID() + if (mainSessionID) return sessionID === mainSessionID + // Headless runs (or resumed sessions) may not emit session.created, so mainSessionID can be unset. + // In that case, treat any non-subagent session as the "main" interactive session. + if (syncSubagentSessions.has(sessionID)) return true + return !subagentSessions.has(sessionID) + } + return async (input): Promise => { pruneRecentSyntheticIdles({ recentSyntheticIdles, @@ -121,8 +209,14 @@ export function createEventHandler(args: { if (sessionInfo?.id) { clearSessionAgent(sessionInfo.id) + lastHandledModelErrorMessageID.delete(sessionInfo.id) + lastHandledRetryStatusKey.delete(sessionInfo.id) + lastKnownModelBySession.delete(sessionInfo.id) + clearPendingModelFallback(sessionInfo.id) resetMessageCursor(sessionInfo.id) firstMessageVariantGate.clear(sessionInfo.id) + clearSessionModel(sessionInfo.id) + syncSubagentSessions.delete(sessionInfo.id) await managers.skillMcpManager.disconnectSession(sessionInfo.id) await lspManager.cleanupTempDirectoryClients() await managers.tmuxSessionManager.onSessionDeleted({ @@ -136,8 +230,129 @@ export function createEventHandler(args: { const sessionID = info?.sessionID as string | undefined const agent = info?.agent as string | undefined const role = info?.role as string | undefined - if (sessionID && agent && role === "user") { - updateSessionAgent(sessionID, agent) + if (sessionID && role === "user") { + if (agent) { + updateSessionAgent(sessionID, agent) + } + const providerID = info?.providerID as string | undefined + const modelID = info?.modelID as string | undefined + if (providerID && modelID) { + lastKnownModelBySession.set(sessionID, { providerID, modelID }) + setSessionModel(sessionID, { providerID, modelID }) + } + } + + // Model fallback: in practice, API/model failures often surface as assistant message errors. + // session.error events are not guaranteed for all providers, so we also observe message.updated. + if (sessionID && role === "assistant") { + const assistantMessageID = info?.id as string | undefined + const assistantError = info?.error + if (assistantMessageID && assistantError) { + const lastHandled = lastHandledModelErrorMessageID.get(sessionID) + if (lastHandled === assistantMessageID) { + return + } + + const errorName = extractErrorName(assistantError) + const errorMessage = extractErrorMessage(assistantError) + const errorInfo = { name: errorName, message: errorMessage } + + if (shouldRetryError(errorInfo)) { + // Prefer the agent/model/provider from the assistant message payload. + let agentName = agent ?? getSessionAgent(sessionID) + if (!agentName && sessionID === getMainSessionID()) { + if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) { + agentName = "sisyphus" + } else if (errorMessage.includes("gpt-5")) { + agentName = "hephaestus" + } else { + agentName = "sisyphus" + } + } + + if (agentName) { + const currentProvider = (info?.providerID as string | undefined) ?? "opencode" + const rawModel = (info?.modelID as string | undefined) ?? "claude-opus-4-6" + const currentModel = normalizeFallbackModelID(rawModel) + + const setFallback = setPendingModelFallback( + sessionID, + agentName, + currentProvider, + currentModel, + ) + + if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) { + lastHandledModelErrorMessageID.set(sessionID, assistantMessageID) + + await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {}) + await ctx.client.session + .prompt({ + path: { id: sessionID }, + body: { parts: [{ type: "text", text: "continue" }] }, + query: { directory: ctx.directory }, + }) + .catch(() => {}) + } + } + } + } + } + } + + if (event.type === "session.status") { + const sessionID = props?.sessionID as string | undefined + const status = props?.status as + | { type?: string; attempt?: number; message?: string; next?: number } + | undefined + + if (sessionID && status?.type === "retry") { + const retryMessage = typeof status.message === "string" ? status.message : "" + const retryKey = `${status.attempt ?? "?"}:${status.next ?? "?"}:${retryMessage}` + if (lastHandledRetryStatusKey.get(sessionID) === retryKey) { + return + } + lastHandledRetryStatusKey.set(sessionID, retryKey) + + const errorInfo = { name: undefined, message: retryMessage } + if (shouldRetryError(errorInfo)) { + let agentName = getSessionAgent(sessionID) + if (!agentName && sessionID === getMainSessionID()) { + if (retryMessage.includes("claude-opus") || retryMessage.includes("opus")) { + agentName = "sisyphus" + } else if (retryMessage.includes("gpt-5")) { + agentName = "hephaestus" + } else { + agentName = "sisyphus" + } + } + + if (agentName) { + const parsed = extractProviderModelFromErrorMessage(retryMessage) + const lastKnown = lastKnownModelBySession.get(sessionID) + const currentProvider = parsed.providerID ?? lastKnown?.providerID ?? "opencode" + let currentModel = parsed.modelID ?? lastKnown?.modelID ?? "claude-opus-4-6" + currentModel = normalizeFallbackModelID(currentModel) + + const setFallback = setPendingModelFallback( + sessionID, + agentName, + currentProvider, + currentModel, + ) + + if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) { + await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {}) + await ctx.client.session + .prompt({ + path: { id: sessionID }, + body: { parts: [{ type: "text", text: "continue" }] }, + query: { directory: ctx.directory }, + }) + .catch(() => {}) + } + } + } } } @@ -145,6 +360,11 @@ export function createEventHandler(args: { const sessionID = props?.sessionID as string | undefined const error = props?.error + const errorName = extractErrorName(error) + const errorMessage = extractErrorMessage(error) + const errorInfo = { name: errorName, message: errorMessage } + + // First, try session recovery for internal errors (thinking blocks, tool results, etc.) if (hooks.sessionRecovery?.isRecoverableError(error)) { const messageInfo = { id: props?.messageID as string | undefined, @@ -168,6 +388,52 @@ export function createEventHandler(args: { }) .catch(() => {}) } + } + // Second, try model fallback for model errors (rate limit, quota, provider issues, etc.) + else if (sessionID && shouldRetryError(errorInfo)) { + // Get the current agent for this session, or default to "sisyphus" for main sessions + let agentName = getSessionAgent(sessionID) + + // For main sessions, if no agent is set, try to infer from the error or default to sisyphus + if (!agentName && sessionID === getMainSessionID()) { + // Try to infer agent from model in error message + if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) { + agentName = "sisyphus" + } else if (errorMessage.includes("gpt-5")) { + agentName = "hephaestus" + } else { + // Default to sisyphus for main session errors + agentName = "sisyphus" + } + } + + if (agentName) { + const parsed = extractProviderModelFromErrorMessage(errorMessage) + const currentProvider = props?.providerID as string || parsed.providerID || "opencode" + let currentModel = props?.modelID as string || parsed.modelID || "claude-opus-4-6" + currentModel = normalizeFallbackModelID(currentModel) + + // Try to set pending model fallback + const setFallback = setPendingModelFallback( + sessionID, + agentName, + currentProvider, + currentModel, + ) + + if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) { + // Abort the current session and prompt with "continue" to trigger the fallback + await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {}) + + await ctx.client.session + .prompt({ + path: { id: sessionID }, + body: { parts: [{ type: "text", text: "continue" }] }, + query: { directory: ctx.directory }, + }) + .catch(() => {}) + } + } } } } diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts index e2596011c..0d2cdf68d 100644 --- a/src/plugin/hooks/create-session-hooks.ts +++ b/src/plugin/hooks/create-session-hooks.ts @@ -7,6 +7,7 @@ import { createSessionRecoveryHook, createSessionNotification, createThinkModeHook, + createModelFallbackHook, createAnthropicContextWindowLimitRecoveryHook, createAutoUpdateCheckerHook, createAgentUsageReminderHook, @@ -30,6 +31,7 @@ import { detectExternalNotificationPlugin, getNotificationConflictWarning, log, + normalizeSDKResponse, } from "../../shared" import { safeCreateHook } from "../../shared/safe-create-hook" import { sessionExists } from "../../tools" @@ -40,6 +42,7 @@ export type SessionHooks = { sessionRecovery: ReturnType | null sessionNotification: ReturnType | null thinkMode: ReturnType | null + modelFallback: ReturnType | null anthropicContextWindowLimitRecovery: ReturnType | null autoUpdateChecker: ReturnType | null agentUsageReminder: ReturnType | null @@ -102,6 +105,64 @@ export function createSessionHooks(args: { ? safeHook("think-mode", () => createThinkModeHook()) : null + const fallbackTitleState = new Map() + const updateFallbackTitle = async (input: { + sessionID: string + providerID: string + modelID: string + variant?: string + }) => { + const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}` + const existing = fallbackTitleState.get(input.sessionID) ?? {} + if (existing.lastKey === key) return + + if (!existing.baseTitle) { + const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null) + const sessionInfo = sessionResp + ? normalizeSDKResponse(sessionResp, null as { title?: string } | null, { preferResponseOnMissingData: true }) + : null + const rawTitle = sessionInfo?.title + if (typeof rawTitle === "string" && rawTitle.length > 0) { + existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim() + } else { + existing.baseTitle = "Session" + } + } + + const variantLabel = input.variant ? ` ${input.variant}` : "" + const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]` + + await ctx.client.session + .update({ + path: { id: input.sessionID }, + body: { title: newTitle }, + query: { directory: ctx.directory }, + }) + .catch(() => {}) + + existing.lastKey = key + fallbackTitleState.set(input.sessionID, existing) + } + + // Model fallback hook - always enabled (no feature flag) + // This handles automatic model switching when model errors occur + const modelFallback = safeHook("model-fallback", () => + createModelFallbackHook({ + toast: async ({ title, message, variant, duration }) => { + await ctx.client.tui + .showToast({ + body: { + title, + message, + variant: variant ?? "warning", + duration: duration ?? 5000, + }, + }) + .catch(() => {}) + }, + onApplied: updateFallbackTitle, + })) + const anthropicContextWindowLimitRecovery = isHookEnabled("anthropic-context-window-limit-recovery") ? safeHook("anthropic-context-window-limit-recovery", () => createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental: pluginConfig.experimental })) @@ -181,6 +242,7 @@ export function createSessionHooks(args: { sessionRecovery, sessionNotification, thinkMode, + modelFallback, anthropicContextWindowLimitRecovery, autoUpdateChecker, agentUsageReminder, diff --git a/src/plugin/hooks/create-transform-hooks.ts b/src/plugin/hooks/create-transform-hooks.ts index 8001d0ab1..816a55b70 100644 --- a/src/plugin/hooks/create-transform-hooks.ts +++ b/src/plugin/hooks/create-transform-hooks.ts @@ -5,6 +5,7 @@ import { createClaudeCodeHooksHook, createKeywordDetectorHook, createThinkingBlockValidatorHook, + createBeastModeSystemHook, } from "../../hooks" import { contextCollector, @@ -17,6 +18,7 @@ export type TransformHooks = { keywordDetector: ReturnType | null contextInjectorMessagesTransform: ReturnType thinkingBlockValidator: ReturnType | null + beastModeSystem: ReturnType | null } export function createTransformHooks(args: { @@ -56,10 +58,19 @@ export function createTransformHooks(args: { ) : null + const beastModeSystem = isHookEnabled("beast-mode-system") + ? safeCreateHook( + "beast-mode-system", + () => createBeastModeSystemHook(), + { enabled: safeHookEnabled }, + ) + : null + return { claudeCodeHooks, keywordDetector, contextInjectorMessagesTransform, thinkingBlockValidator, + beastModeSystem, } } diff --git a/src/plugin/system-transform.ts b/src/plugin/system-transform.ts new file mode 100644 index 000000000..b8bc8c60f --- /dev/null +++ b/src/plugin/system-transform.ts @@ -0,0 +1,12 @@ +import type { CreatedHooks } from "../create-hooks" + +export function createSystemTransformHandler(args: { + hooks: CreatedHooks +}): (input: { sessionID: string }, output: { system: string[] }) => Promise { + return async (input, output): Promise => { + await args.hooks.beastModeSystem?.["experimental.chat.system.transform"]?.( + input, + output, + ) + } +} diff --git a/src/shared/agent-variant.ts b/src/shared/agent-variant.ts index 756f503e9..4551e3d3a 100644 --- a/src/shared/agent-variant.ts +++ b/src/shared/agent-variant.ts @@ -75,6 +75,14 @@ function findVariantInChain( return entry.variant } } + + // Some providers expose identical model IDs (e.g. OpenAI models via different providers). + // If we didn't find an exact provider+model match, fall back to model-only matching. + for (const entry of fallbackChain) { + if (entry.model === currentModel.modelID) { + return entry.variant + } + } return undefined } diff --git a/src/shared/model-error-classifier.test.ts b/src/shared/model-error-classifier.test.ts new file mode 100644 index 000000000..9718d53e6 --- /dev/null +++ b/src/shared/model-error-classifier.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test" + +import { mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs" +import { join } from "node:path" +import * as dataPath from "./data-path" +import { shouldRetryError, selectFallbackProvider } from "./model-error-classifier" + +const TEST_CACHE_DIR = join(import.meta.dir, "__test-cache__") + +describe("model-error-classifier", () => { + let cacheDirSpy: ReturnType + + beforeEach(() => { + cacheDirSpy = spyOn(dataPath, "getOmoOpenCodeCacheDir").mockReturnValue(TEST_CACHE_DIR) + if (existsSync(TEST_CACHE_DIR)) { + rmSync(TEST_CACHE_DIR, { recursive: true }) + } + mkdirSync(TEST_CACHE_DIR, { recursive: true }) + }) + + afterEach(() => { + cacheDirSpy.mockRestore() + if (existsSync(TEST_CACHE_DIR)) { + rmSync(TEST_CACHE_DIR, { recursive: true }) + } + }) + + test("treats overloaded retry messages as retryable", () => { + //#given + const error = { message: "Provider is overloaded" } + + //#when + const result = shouldRetryError(error) + + //#then + expect(result).toBe(true) + }) + + test("selectFallbackProvider prefers first connected provider in preference order", () => { + //#given + writeFileSync( + join(TEST_CACHE_DIR, "connected-providers.json"), + JSON.stringify({ connected: ["quotio", "nvidia"], updatedAt: new Date().toISOString() }, null, 2), + ) + + //#when + const provider = selectFallbackProvider(["quotio", "nvidia"], "nvidia") + + //#then + expect(provider).toBe("quotio") + }) + + test("selectFallbackProvider falls back to next connected provider when first is disconnected", () => { + //#given + writeFileSync( + join(TEST_CACHE_DIR, "connected-providers.json"), + JSON.stringify({ connected: ["nvidia"], updatedAt: new Date().toISOString() }, null, 2), + ) + + //#when + const provider = selectFallbackProvider(["quotio", "nvidia"]) + + //#then + expect(provider).toBe("nvidia") + }) + + test("selectFallbackProvider uses provider preference order when cache is missing", () => { + //#given - no cache file + + //#when + const provider = selectFallbackProvider(["quotio", "nvidia"], "nvidia") + + //#then + expect(provider).toBe("quotio") + }) +}) diff --git a/src/shared/model-error-classifier.ts b/src/shared/model-error-classifier.ts new file mode 100644 index 000000000..9ff47fb5e --- /dev/null +++ b/src/shared/model-error-classifier.ts @@ -0,0 +1,135 @@ +import type { FallbackEntry } from "./model-requirements" +import { readConnectedProvidersCache } from "./connected-providers-cache" + +/** + * Error names that indicate a retryable model error (deadstop). + * These errors completely halt the action loop and should trigger fallback retry. + */ +const RETRYABLE_ERROR_NAMES = new Set([ + "ProviderModelNotFoundError", + "RateLimitError", + "QuotaExceededError", + "InsufficientCreditsError", + "ModelUnavailableError", + "ProviderConnectionError", + "AuthenticationError", +]) + +/** + * Error names that should NOT trigger retry. + * These errors are typically user-induced or fixable without switching models. + */ +const NON_RETRYABLE_ERROR_NAMES = new Set([ + "MessageAbortedError", + "PermissionDeniedError", + "ContextLengthError", + "TimeoutError", + "ValidationError", + "SyntaxError", + "UserError", +]) + +/** + * Message patterns that indicate a retryable error even without a known error name. + */ +const RETRYABLE_MESSAGE_PATTERNS = [ + "rate_limit", + "rate limit", + "quota", + "not found", + "unavailable", + "insufficient", + "too many requests", + "over limit", + "overloaded", + "bad gateway", + "unknown provider", + "provider not found", + "connection error", + "network error", + "timeout", + "service unavailable", + "internal_server_error", + "503", + "502", + "504", +] + +export interface ErrorInfo { + name?: string + message?: string +} + +/** + * Determines if an error is a retryable model error. + * Returns true if the error is a known retryable type OR matches retryable message patterns. + */ +export function isRetryableModelError(error: ErrorInfo): boolean { + // If we have an error name, check against known lists + if (error.name) { + // Explicit non-retryable takes precedence + if (NON_RETRYABLE_ERROR_NAMES.has(error.name)) { + return false + } + // Check if it's a known retryable error + if (RETRYABLE_ERROR_NAMES.has(error.name)) { + return true + } + } + + // Check message patterns for unknown errors + const msg = error.message?.toLowerCase() ?? "" + return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern)) +} + +/** + * Determines if an error should trigger a fallback retry. + * Returns true for deadstop errors that completely halt the action loop. + */ +export function shouldRetryError(error: ErrorInfo): boolean { + return isRetryableModelError(error) +} + +/** + * Gets the next fallback model from the chain based on attempt count. + * Returns undefined if all fallbacks have been exhausted. + */ +export function getNextFallback( + fallbackChain: FallbackEntry[], + attemptCount: number, +): FallbackEntry | undefined { + return fallbackChain[attemptCount] +} + +/** + * Checks if there are more fallbacks available after the current attempt. + */ +export function hasMoreFallbacks( + fallbackChain: FallbackEntry[], + attemptCount: number, +): boolean { + return attemptCount < fallbackChain.length +} + +/** + * Selects the best provider for a fallback entry. + * Priority: + * 1) First connected provider in the entry's provider preference order + * 2) First provider listed in the fallback entry (when cache is missing) + */ +export function selectFallbackProvider( + providers: string[], + preferredProviderID?: string, +): string { + const connectedProviders = readConnectedProvidersCache() + if (connectedProviders) { + const connectedSet = new Set(connectedProviders) + for (const provider of providers) { + if (connectedSet.has(provider)) { + return provider + } + } + } + + return providers[0] || preferredProviderID || "quotio" +} diff --git a/src/shared/model-requirements.test.ts b/src/shared/model-requirements.test.ts index acd78fc6f..793d11721 100644 --- a/src/shared/model-requirements.test.ts +++ b/src/shared/model-requirements.test.ts @@ -6,493 +6,158 @@ import { type ModelRequirement, } from "./model-requirements" +function flattenChains(): FallbackEntry[] { + return [ + ...Object.values(AGENT_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain), + ...Object.values(CATEGORY_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain), + ] +} + +function assertNoExcludedModels(entry: FallbackEntry): void { + // User exclusions. + expect(entry.model).not.toBe("grok-code-fast-1") + if (entry.providers.includes("quotio")) { + expect(entry.model).not.toBe("tstars2.0") + expect(entry.model).not.toMatch(/^kiro-/i) + expect(entry.model).not.toMatch(/^tab_/i) + } + // Remove codex-mini models per request. + expect(entry.model).not.toMatch(/codex-mini/i) +} + +function assertNoOpencodeProvider(entry: FallbackEntry): void { + expect(entry.providers).not.toContain("opencode") +} + +function assertNoProviderPrefixForNonNamespacedProviders(entry: FallbackEntry): void { + // For these providers, model IDs should not be written as "provider/model". + const nonNamespaced = ["quotio", "openai", "github-copilot", "minimax", "minimax-coding-plan"] + for (const provider of entry.providers) { + if (!nonNamespaced.includes(provider)) continue + expect(entry.model.startsWith(`${provider}/`)).toBe(false) + } +} + describe("AGENT_MODEL_REQUIREMENTS", () => { - test("oracle has valid fallbackChain with gpt-5.2 as primary", () => { - // given - oracle agent requirement - const oracle = AGENT_MODEL_REQUIREMENTS["oracle"] - - // when - accessing oracle requirement - // then - fallbackChain exists with gpt-5.2 as first entry - expect(oracle).toBeDefined() - expect(oracle.fallbackChain).toBeArray() - expect(oracle.fallbackChain.length).toBeGreaterThan(0) - - const primary = oracle.fallbackChain[0] - expect(primary.providers).toContain("openai") - expect(primary.model).toBe("gpt-5.2") - expect(primary.variant).toBe("high") - }) - - test("sisyphus has claude-opus-4-6 as primary and requiresAnyModel", () => { - // #given - sisyphus agent requirement - const sisyphus = AGENT_MODEL_REQUIREMENTS["sisyphus"] - - // #when - accessing Sisyphus requirement - // #then - fallbackChain has claude-opus-4-6 first, big-pickle last - expect(sisyphus).toBeDefined() - expect(sisyphus.fallbackChain).toBeArray() - expect(sisyphus.fallbackChain).toHaveLength(5) - expect(sisyphus.requiresAnyModel).toBe(true) - - const primary = sisyphus.fallbackChain[0] - expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"]) - expect(primary.model).toBe("claude-opus-4-6") - expect(primary.variant).toBe("max") - - const last = sisyphus.fallbackChain[4] - expect(last.providers[0]).toBe("opencode") - expect(last.model).toBe("big-pickle") - }) - - test("librarian has valid fallbackChain with gemini-3-flash as primary", () => { - // given - librarian agent requirement - const librarian = AGENT_MODEL_REQUIREMENTS["librarian"] - - // when - accessing librarian requirement - // then - fallbackChain exists with gemini-3-flash as first entry - expect(librarian).toBeDefined() - expect(librarian.fallbackChain).toBeArray() - expect(librarian.fallbackChain.length).toBeGreaterThan(0) - - const primary = librarian.fallbackChain[0] - expect(primary.providers[0]).toBe("google") - expect(primary.model).toBe("gemini-3-flash") - }) - - test("explore has valid fallbackChain with grok-code-fast-1 as primary", () => { - // given - explore agent requirement - const explore = AGENT_MODEL_REQUIREMENTS["explore"] - - // when - accessing explore requirement - // then - fallbackChain: grok → minimax-free → haiku → nano - expect(explore).toBeDefined() - expect(explore.fallbackChain).toBeArray() - expect(explore.fallbackChain).toHaveLength(4) - - const primary = explore.fallbackChain[0] - expect(primary.providers).toContain("github-copilot") - expect(primary.model).toBe("grok-code-fast-1") - - const secondary = explore.fallbackChain[1] - expect(secondary.providers).toContain("opencode") - expect(secondary.model).toBe("minimax-m2.5-free") - - const tertiary = explore.fallbackChain[2] - expect(tertiary.providers).toContain("anthropic") - expect(tertiary.model).toBe("claude-haiku-4-5") - - const quaternary = explore.fallbackChain[3] - expect(quaternary.providers).toContain("opencode") - expect(quaternary.model).toBe("gpt-5-nano") - }) - - test("multimodal-looker has valid fallbackChain with k2p5 as primary", () => { - // given - multimodal-looker agent requirement - const multimodalLooker = AGENT_MODEL_REQUIREMENTS["multimodal-looker"] - - // when - accessing multimodal-looker requirement - // then - fallbackChain exists with k2p5 as first entry - expect(multimodalLooker).toBeDefined() - expect(multimodalLooker.fallbackChain).toBeArray() - expect(multimodalLooker.fallbackChain.length).toBeGreaterThan(0) - - const primary = multimodalLooker.fallbackChain[0] - expect(primary.providers[0]).toBe("kimi-for-coding") - expect(primary.model).toBe("k2p5") - }) - - test("prometheus has claude-opus-4-6 as primary", () => { - // #given - prometheus agent requirement - const prometheus = AGENT_MODEL_REQUIREMENTS["prometheus"] - - // #when - accessing Prometheus requirement - // #then - claude-opus-4-6 is first - expect(prometheus).toBeDefined() - expect(prometheus.fallbackChain).toBeArray() - expect(prometheus.fallbackChain.length).toBeGreaterThan(1) - - const primary = prometheus.fallbackChain[0] - expect(primary.model).toBe("claude-opus-4-6") - expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"]) - expect(primary.variant).toBe("max") - }) - - test("metis has claude-opus-4-6 as primary", () => { - // #given - metis agent requirement - const metis = AGENT_MODEL_REQUIREMENTS["metis"] - - // #when - accessing Metis requirement - // #then - claude-opus-4-6 is first - expect(metis).toBeDefined() - expect(metis.fallbackChain).toBeArray() - expect(metis.fallbackChain.length).toBeGreaterThan(1) - - const primary = metis.fallbackChain[0] - expect(primary.model).toBe("claude-opus-4-6") - expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"]) - expect(primary.variant).toBe("max") - }) - - test("momus has valid fallbackChain with gpt-5.2 as primary", () => { - // given - momus agent requirement - const momus = AGENT_MODEL_REQUIREMENTS["momus"] - - // when - accessing Momus requirement - // then - fallbackChain exists with gpt-5.2 as first entry, variant medium - expect(momus).toBeDefined() - expect(momus.fallbackChain).toBeArray() - expect(momus.fallbackChain.length).toBeGreaterThan(0) - - const primary = momus.fallbackChain[0] - expect(primary.model).toBe("gpt-5.2") - expect(primary.variant).toBe("medium") - expect(primary.providers[0]).toBe("openai") - }) - - test("atlas has valid fallbackChain with k2p5 as primary (kimi-for-coding prioritized)", () => { - // given - atlas agent requirement - const atlas = AGENT_MODEL_REQUIREMENTS["atlas"] - - // when - accessing Atlas requirement - // then - fallbackChain exists with k2p5 as first entry (kimi-for-coding prioritized) - expect(atlas).toBeDefined() - expect(atlas.fallbackChain).toBeArray() - expect(atlas.fallbackChain.length).toBeGreaterThan(0) - - const primary = atlas.fallbackChain[0] - expect(primary.model).toBe("k2p5") - expect(primary.providers[0]).toBe("kimi-for-coding") - }) - - test("hephaestus requires openai/github-copilot/opencode provider", () => { - // #given - hephaestus agent requirement - const hephaestus = AGENT_MODEL_REQUIREMENTS["hephaestus"] - - // #when - accessing hephaestus requirement - // #then - requiresProvider is set to openai, github-copilot, opencode (not requiresModel) - expect(hephaestus).toBeDefined() - expect(hephaestus.requiresProvider).toEqual(["openai", "github-copilot", "opencode"]) - expect(hephaestus.requiresModel).toBeUndefined() - }) - - test("all 10 builtin agents have valid fallbackChain arrays", () => { - // #given - list of 10 agent names - const expectedAgents = [ - "sisyphus", - "hephaestus", - "oracle", - "librarian", + test("defines all 10 builtin agents", () => { + expect(Object.keys(AGENT_MODEL_REQUIREMENTS).sort()).toEqual([ + "atlas", "explore", - "multimodal-looker", - "prometheus", + "hephaestus", + "librarian", "metis", "momus", - "atlas", - ] + "multimodal-looker", + "oracle", + "prometheus", + "sisyphus", + ]) + }) - // when - checking AGENT_MODEL_REQUIREMENTS - const definedAgents = Object.keys(AGENT_MODEL_REQUIREMENTS) + test("sisyphus: 2nd fallback is quotio gpt-5.3-codex (high)", () => { + const sisyphus = AGENT_MODEL_REQUIREMENTS["sisyphus"] + expect(sisyphus.requiresAnyModel).toBe(true) + expect(sisyphus.fallbackChain.length).toBeGreaterThan(2) - // #then - all agents present with valid fallbackChain - expect(definedAgents).toHaveLength(10) - for (const agent of expectedAgents) { - const requirement = AGENT_MODEL_REQUIREMENTS[agent] - expect(requirement).toBeDefined() - expect(requirement.fallbackChain).toBeArray() - expect(requirement.fallbackChain.length).toBeGreaterThan(0) + expect(sisyphus.fallbackChain[0]).toEqual({ + providers: ["quotio"], + model: "claude-opus-4-6", + variant: "max", + }) - for (const entry of requirement.fallbackChain) { - expect(entry.providers).toBeArray() - expect(entry.providers.length).toBeGreaterThan(0) - expect(typeof entry.model).toBe("string") - expect(entry.model.length).toBeGreaterThan(0) - } - } + expect(sisyphus.fallbackChain[1]).toEqual({ + providers: ["quotio"], + model: "gpt-5.3-codex", + variant: "high", + }) + }) + + test("explore: uses speed chain, includes rome, and gpt-5-mini is copilot-first", () => { + const explore = AGENT_MODEL_REQUIREMENTS["explore"] + expect(explore.fallbackChain.length).toBeGreaterThan(4) + expect(explore.fallbackChain[0].model).toBe("claude-haiku-4-5") + expect(explore.fallbackChain.some((e) => e.model === "iflow-rome-30ba3b")).toBe(true) + + const gptMini = explore.fallbackChain.find((e) => e.model === "gpt-5-mini") + expect(gptMini).toBeDefined() + expect(gptMini!.providers[0]).toBe("github-copilot") + expect(gptMini!.variant).toBe("high") + }) + + test("multimodal-looker: prefers gemini image model first", () => { + const multimodal = AGENT_MODEL_REQUIREMENTS["multimodal-looker"] + expect(multimodal.fallbackChain[0]).toEqual({ + providers: ["quotio"], + model: "gemini-3-pro-image", + }) + }) + + test("includes NVIDIA NIM additions in at least one agent chain", () => { + const all = Object.values(AGENT_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain) + expect(all.some((e) => e.providers.includes("nvidia") && e.model === "qwen/qwen3.5-397b-a17b")).toBe(true) + expect(all.some((e) => e.providers.includes("nvidia") && e.model === "stepfun-ai/step-3.5-flash")).toBe(true) + expect(all.some((e) => e.providers.includes("nvidia") && e.model === "bytedance/seed-oss-36b-instruct")).toBe(true) }) }) describe("CATEGORY_MODEL_REQUIREMENTS", () => { - test("ultrabrain has valid fallbackChain with gpt-5.3-codex as primary", () => { - // given - ultrabrain category requirement - const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"] - - // when - accessing ultrabrain requirement - // then - fallbackChain exists with gpt-5.3-codex as first entry - expect(ultrabrain).toBeDefined() - expect(ultrabrain.fallbackChain).toBeArray() - expect(ultrabrain.fallbackChain.length).toBeGreaterThan(0) - - const primary = ultrabrain.fallbackChain[0] - expect(primary.variant).toBe("xhigh") - expect(primary.model).toBe("gpt-5.3-codex") - expect(primary.providers[0]).toBe("openai") - }) - - test("deep has valid fallbackChain with gpt-5.3-codex as primary", () => { - // given - deep category requirement - const deep = CATEGORY_MODEL_REQUIREMENTS["deep"] - - // when - accessing deep requirement - // then - fallbackChain exists with gpt-5.3-codex as first entry, medium variant - expect(deep).toBeDefined() - expect(deep.fallbackChain).toBeArray() - expect(deep.fallbackChain.length).toBeGreaterThan(0) - - const primary = deep.fallbackChain[0] - expect(primary.variant).toBe("medium") - expect(primary.model).toBe("gpt-5.3-codex") - expect(primary.providers[0]).toBe("openai") - }) - - test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => { - // given - visual-engineering category requirement - const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"] - - // when - accessing visual-engineering requirement - // then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max) → k2p5 - expect(visualEngineering).toBeDefined() - expect(visualEngineering.fallbackChain).toBeArray() - expect(visualEngineering.fallbackChain).toHaveLength(4) - - const primary = visualEngineering.fallbackChain[0] - expect(primary.providers[0]).toBe("google") - expect(primary.model).toBe("gemini-3-pro") - expect(primary.variant).toBe("high") - - const second = visualEngineering.fallbackChain[1] - expect(second.providers[0]).toBe("zai-coding-plan") - expect(second.model).toBe("glm-5") - - const third = visualEngineering.fallbackChain[2] - expect(third.model).toBe("claude-opus-4-6") - expect(third.variant).toBe("max") - - const fourth = visualEngineering.fallbackChain[3] - expect(fourth.providers[0]).toBe("kimi-for-coding") - expect(fourth.model).toBe("k2p5") - }) - - test("quick has valid fallbackChain with claude-haiku-4-5 as primary", () => { - // given - quick category requirement - const quick = CATEGORY_MODEL_REQUIREMENTS["quick"] - - // when - accessing quick requirement - // then - fallbackChain exists with claude-haiku-4-5 as first entry - expect(quick).toBeDefined() - expect(quick.fallbackChain).toBeArray() - expect(quick.fallbackChain.length).toBeGreaterThan(0) - - const primary = quick.fallbackChain[0] - expect(primary.model).toBe("claude-haiku-4-5") - expect(primary.providers[0]).toBe("anthropic") - }) - - test("unspecified-low has valid fallbackChain with claude-sonnet-4-6 as primary", () => { - // given - unspecified-low category requirement - const unspecifiedLow = CATEGORY_MODEL_REQUIREMENTS["unspecified-low"] - - // when - accessing unspecified-low requirement - // then - fallbackChain exists with claude-sonnet-4-6 as first entry - expect(unspecifiedLow).toBeDefined() - expect(unspecifiedLow.fallbackChain).toBeArray() - expect(unspecifiedLow.fallbackChain.length).toBeGreaterThan(0) - - const primary = unspecifiedLow.fallbackChain[0] - expect(primary.model).toBe("claude-sonnet-4-6") - expect(primary.providers[0]).toBe("anthropic") - }) - - test("unspecified-high has claude-opus-4-6 as primary", () => { - // #given - unspecified-high category requirement - const unspecifiedHigh = CATEGORY_MODEL_REQUIREMENTS["unspecified-high"] - - // #when - accessing unspecified-high requirement - // #then - claude-opus-4-6 is first - expect(unspecifiedHigh).toBeDefined() - expect(unspecifiedHigh.fallbackChain).toBeArray() - expect(unspecifiedHigh.fallbackChain.length).toBeGreaterThan(1) - - const primary = unspecifiedHigh.fallbackChain[0] - expect(primary.model).toBe("claude-opus-4-6") - expect(primary.variant).toBe("max") - expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"]) - }) - - test("artistry has valid fallbackChain with gemini-3-pro as primary", () => { - // given - artistry category requirement - const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"] - - // when - accessing artistry requirement - // then - fallbackChain exists with gemini-3-pro as first entry - expect(artistry).toBeDefined() - expect(artistry.fallbackChain).toBeArray() - expect(artistry.fallbackChain.length).toBeGreaterThan(0) - - const primary = artistry.fallbackChain[0] - expect(primary.model).toBe("gemini-3-pro") - expect(primary.variant).toBe("high") - expect(primary.providers[0]).toBe("google") - }) - - test("writing has valid fallbackChain with k2p5 as primary (kimi-for-coding)", () => { - // given - writing category requirement - const writing = CATEGORY_MODEL_REQUIREMENTS["writing"] - - // when - accessing writing requirement - // then - fallbackChain: k2p5 → gemini-3-flash → claude-sonnet-4-6 - expect(writing).toBeDefined() - expect(writing.fallbackChain).toBeArray() - expect(writing.fallbackChain).toHaveLength(3) - - const primary = writing.fallbackChain[0] - expect(primary.model).toBe("k2p5") - expect(primary.providers[0]).toBe("kimi-for-coding") - - const second = writing.fallbackChain[1] - expect(second.model).toBe("gemini-3-flash") - expect(second.providers[0]).toBe("google") - }) - - test("all 8 categories have valid fallbackChain arrays", () => { - // given - list of 8 category names - const expectedCategories = [ - "visual-engineering", - "ultrabrain", - "deep", + test("defines all 8 categories", () => { + expect(Object.keys(CATEGORY_MODEL_REQUIREMENTS).sort()).toEqual([ "artistry", + "deep", "quick", - "unspecified-low", + "ultrabrain", "unspecified-high", + "unspecified-low", + "visual-engineering", "writing", - ] + ]) + }) - // when - checking CATEGORY_MODEL_REQUIREMENTS - const definedCategories = Object.keys(CATEGORY_MODEL_REQUIREMENTS) + test("deep requires gpt-5.3-codex", () => { + expect(CATEGORY_MODEL_REQUIREMENTS["deep"].requiresModel).toBe("gpt-5.3-codex") + }) - // then - all categories present with valid fallbackChain - expect(definedCategories).toHaveLength(8) - for (const category of expectedCategories) { - const requirement = CATEGORY_MODEL_REQUIREMENTS[category] - expect(requirement).toBeDefined() - expect(requirement.fallbackChain).toBeArray() - expect(requirement.fallbackChain.length).toBeGreaterThan(0) + test("quick uses the speed chain (haiku primary)", () => { + expect(CATEGORY_MODEL_REQUIREMENTS["quick"].fallbackChain[0].model).toBe("claude-haiku-4-5") + }) - for (const entry of requirement.fallbackChain) { - expect(entry.providers).toBeArray() - expect(entry.providers.length).toBeGreaterThan(0) - expect(typeof entry.model).toBe("string") - expect(entry.model.length).toBeGreaterThan(0) - } + test("ultrabrain starts with gpt-5.3-codex (high)", () => { + const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"] + expect(ultrabrain.fallbackChain[0]).toEqual({ + providers: ["quotio"], + model: "gpt-5.3-codex", + variant: "high", + }) + }) +}) + +describe("ModelRequirements invariants", () => { + test("all entries have non-empty providers and a non-empty model", () => { + for (const entry of flattenChains()) { + expect(entry.providers.length).toBeGreaterThan(0) + expect(typeof entry.model).toBe("string") + expect(entry.model.length).toBeGreaterThan(0) + } + }) + + test("no entry uses opencode provider and no excluded models are present", () => { + for (const entry of flattenChains()) { + assertNoOpencodeProvider(entry) + assertNoExcludedModels(entry) + assertNoProviderPrefixForNonNamespacedProviders(entry) } }) }) -describe("FallbackEntry type", () => { - test("FallbackEntry structure is correct", () => { - // given - a valid FallbackEntry object - const entry: FallbackEntry = { - providers: ["anthropic", "github-copilot", "opencode"], - model: "claude-opus-4-6", - variant: "high", - } - - // when - accessing properties - // then - all properties are accessible - expect(entry.providers).toEqual(["anthropic", "github-copilot", "opencode"]) - expect(entry.model).toBe("claude-opus-4-6") - expect(entry.variant).toBe("high") - }) - - test("FallbackEntry variant is optional", () => { - // given - a FallbackEntry without variant - const entry: FallbackEntry = { - providers: ["opencode", "anthropic"], - model: "big-pickle", - } - - // when - accessing variant - // then - variant is undefined +describe("Type sanity", () => { + test("FallbackEntry.variant is optional", () => { + const entry: FallbackEntry = { providers: ["quotio"], model: "claude-haiku-4-5" } expect(entry.variant).toBeUndefined() }) -}) -describe("ModelRequirement type", () => { - test("ModelRequirement structure with fallbackChain is correct", () => { - // given - a valid ModelRequirement object - const requirement: ModelRequirement = { - fallbackChain: [ - { providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["openai", "github-copilot"], model: "gpt-5.2", variant: "high" }, - ], - } - - // when - accessing properties - // then - fallbackChain is accessible with correct structure - expect(requirement.fallbackChain).toBeArray() - expect(requirement.fallbackChain).toHaveLength(2) - expect(requirement.fallbackChain[0].model).toBe("claude-opus-4-6") - expect(requirement.fallbackChain[1].model).toBe("gpt-5.2") - }) - - test("ModelRequirement variant is optional", () => { - // given - a ModelRequirement without top-level variant - const requirement: ModelRequirement = { - fallbackChain: [{ providers: ["opencode"], model: "big-pickle" }], - } - - // when - accessing variant - // then - variant is undefined - expect(requirement.variant).toBeUndefined() - }) - - test("no model in fallbackChain has provider prefix", () => { - // given - all agent and category requirements - const allRequirements = [ - ...Object.values(AGENT_MODEL_REQUIREMENTS), - ...Object.values(CATEGORY_MODEL_REQUIREMENTS), - ] - - // when - checking each model in fallbackChain - // then - none contain "/" (provider prefix) - for (const req of allRequirements) { - for (const entry of req.fallbackChain) { - expect(entry.model).not.toContain("/") - } - } - }) - - test("all fallbackChain entries have non-empty providers array", () => { - // given - all agent and category requirements - const allRequirements = [ - ...Object.values(AGENT_MODEL_REQUIREMENTS), - ...Object.values(CATEGORY_MODEL_REQUIREMENTS), - ] - - // when - checking each entry in fallbackChain - // then - all have non-empty providers array - for (const req of allRequirements) { - for (const entry of req.fallbackChain) { - expect(entry.providers).toBeArray() - expect(entry.providers.length).toBeGreaterThan(0) - } - } - }) -}) - -describe("requiresModel field in categories", () => { - test("deep category has requiresModel set to gpt-5.3-codex", () => { - // given - const deep = CATEGORY_MODEL_REQUIREMENTS["deep"] - - // when / #then - expect(deep.requiresModel).toBe("gpt-5.3-codex") - }) - - test("artistry category has requiresModel set to gemini-3-pro", () => { - // given - const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"] - - // when / #then - expect(artistry.requiresModel).toBe("gemini-3-pro") + test("ModelRequirement.variant is optional", () => { + const req: ModelRequirement = { fallbackChain: [{ providers: ["quotio"], model: "claude-haiku-4-5" }] } + expect(req.variant).toBeUndefined() }) }) diff --git a/src/shared/model-requirements.ts b/src/shared/model-requirements.ts index 703749f2c..21a939781 100644 --- a/src/shared/model-requirements.ts +++ b/src/shared/model-requirements.ts @@ -12,85 +12,133 @@ export type ModelRequirement = { requiresProvider?: string[] // If set, only activates when any of these providers is connected } +function fb(providers: string[] | string, model: string, variant?: string): FallbackEntry { + return { + providers: Array.isArray(providers) ? providers : [providers], + model, + ...(variant !== undefined ? { variant } : {}), + } +} + +// Provider preference rules: +// - Never use the paid `opencode` provider as an automatic fallback. +// - Prefer `quotio` when the same model exists across multiple providers. +// - Prefer `github-copilot` first for `gpt-5-mini` (unlimited), fall back to `quotio`. +// Note: user requested "Quotio-first" and to avoid the OpenCode provider; we keep runtime fallbacks on +// `quotio` + `nvidia` (+ `github-copilot` for unlimited GPT mini) unless explicitly requested otherwise. +const P_GPT: string[] = ["quotio"] +const P_GPT_MINI: string[] = ["github-copilot", "quotio"] + +// Benchmark-driven ordering (user-provided table + NVIDIA NIM docs), tuned per-agent for quality vs speed. + +const SPEED_CHAIN: FallbackEntry[] = [ + fb("quotio", "claude-haiku-4-5"), fb("quotio", "oswe-vscode-prime"), + fb(P_GPT_MINI, "gpt-5-mini", "high"), fb(P_GPT_MINI, "gpt-4.1"), + fb("nvidia", "nvidia/nemotron-3-nano-30b-a3b"), fb("quotio", "iflow-rome-30ba3b"), + fb("minimax-coding-plan", "MiniMax-M2.5"), fb("nvidia", "bytedance/seed-oss-36b-instruct"), + fb("quotio", "claude-sonnet-4-5"), +] + +const QUALITY_CODING_CHAIN: FallbackEntry[] = [ + fb("quotio", "claude-opus-4-6-thinking"), + fb("nvidia", "stepfun-ai/step-3.5-flash"), + fb("nvidia", "qwen/qwen3.5-397b-a17b"), + fb("quotio", "glm-5"), + fb("nvidia", "z-ai/glm5"), + fb("quotio", "deepseek-v3.2-reasoner"), + fb("quotio", "deepseek-r1"), + fb("nvidia", "deepseek-ai/deepseek-r1"), + fb("quotio", "qwen3-235b-a22b-thinking-2507"), + fb("nvidia", "qwen/qwen3-next-80b-a3b-thinking"), + fb("nvidia", "qwen/qwen3-coder-480b-a35b-instruct"), + fb("nvidia", "bytedance/seed-oss-36b-instruct"), + fb("quotio", "kimi-k2-thinking"), + fb("quotio", "kimi-k2.5"), + fb("nvidia", "moonshotai/kimi-k2.5"), + fb("minimax-coding-plan", "MiniMax-M2.5"), + fb("minimax-coding-plan", "MiniMax-M2.5-highspeed"), + fb("minimax", "MiniMax-M2.5"), + fb("quotio", "minimax-m2.5"), + fb("quotio", "claude-sonnet-4-5-thinking"), +] + export const AGENT_MODEL_REQUIREMENTS: Record = { sisyphus: { fallbackChain: [ - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["kimi-for-coding"], model: "k2p5" }, - { providers: ["opencode"], model: "kimi-k2.5-free" }, - { providers: ["zai-coding-plan", "opencode"], model: "glm-5" }, - { providers: ["opencode"], model: "big-pickle" }, + // 1st fallback: switch away from Opus Thinking to the non-thinking model (often more available). + fb("quotio", "claude-opus-4-6", "max"), + // 2nd fallback: user-requested. + fb("quotio", "gpt-5.3-codex", "high"), + ...QUALITY_CODING_CHAIN, + ...SPEED_CHAIN, ], requiresAnyModel: true, }, hephaestus: { fallbackChain: [ - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, + fb("quotio", "gpt-5.3-codex", "high"), + ...QUALITY_CODING_CHAIN, ], - requiresProvider: ["openai", "github-copilot", "opencode"], + requiresAnyModel: true, }, oracle: { fallbackChain: [ - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + fb("quotio", "gpt-5.3-codex", "high"), + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "claude-sonnet-4-5-thinking"), + ...QUALITY_CODING_CHAIN, ], }, - librarian: { - fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, - { providers: ["opencode"], model: "minimax-m2.5-free" }, - { providers: ["opencode"], model: "big-pickle" }, - ], - }, - explore: { + librarian: { fallbackChain: [ - { providers: ["github-copilot"], model: "grok-code-fast-1" }, - { providers: ["opencode"], model: "minimax-m2.5-free" }, - { providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" }, - { providers: ["opencode"], model: "gpt-5-nano" }, + fb("quotio", "claude-sonnet-4-5"), + ...SPEED_CHAIN, + ...QUALITY_CODING_CHAIN, ], }, + explore: { + fallbackChain: SPEED_CHAIN, + }, "multimodal-looker": { fallbackChain: [ - { providers: ["kimi-for-coding"], model: "k2p5" }, - { providers: ["opencode"], model: "kimi-k2.5-free" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, - { providers: ["zai-coding-plan"], model: "glm-4.6v" }, + fb("quotio", "gemini-3-pro-image"), + fb("quotio", "gemini-3-pro-high"), + fb("quotio", "gemini-3-flash"), + fb("quotio", "kimi-k2.5"), + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "claude-sonnet-4-5-thinking"), + fb("quotio", "claude-haiku-4-5"), ], }, prometheus: { fallbackChain: [ - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["kimi-for-coding"], model: "k2p5" }, - { providers: ["opencode"], model: "kimi-k2.5-free" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "gpt-5.3-codex", "high"), + fb("quotio", "claude-sonnet-4-5-thinking"), + ...QUALITY_CODING_CHAIN, ], }, metis: { fallbackChain: [ - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["kimi-for-coding"], model: "k2p5" }, - { providers: ["opencode"], model: "kimi-k2.5-free" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "gpt-5.3-codex", "high"), + fb("quotio", "claude-sonnet-4-5-thinking"), + ...QUALITY_CODING_CHAIN, ], }, momus: { fallbackChain: [ - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + fb("quotio", "gpt-5.3-codex", "high"), + fb("quotio", "claude-opus-4-6-thinking"), + ...QUALITY_CODING_CHAIN, ], }, atlas: { fallbackChain: [ - { providers: ["kimi-for-coding"], model: "k2p5" }, - { providers: ["opencode"], model: "kimi-k2.5-free" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, + fb("quotio", "claude-sonnet-4-5-thinking"), + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "gpt-5.3-codex", "medium"), + ...QUALITY_CODING_CHAIN, ], }, } @@ -98,61 +146,60 @@ export const AGENT_MODEL_REQUIREMENTS: Record = { export const CATEGORY_MODEL_REQUIREMENTS: Record = { "visual-engineering": { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, - { providers: ["zai-coding-plan", "opencode"], model: "glm-5" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["kimi-for-coding"], model: "k2p5" }, + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "gemini-3-pro-image"), + fb("quotio", "kimi-k2-thinking"), + fb("quotio", "kimi-k2.5"), + fb("quotio", "claude-sonnet-4-5-thinking"), + fb("quotio", "gpt-5.3-codex", "medium"), ], }, ultrabrain: { fallbackChain: [ - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, + fb("quotio", "gpt-5.3-codex", "high"), + fb("quotio", "claude-opus-4-6-thinking"), + fb("nvidia", "stepfun-ai/step-3.5-flash"), + fb("nvidia", "qwen/qwen3.5-397b-a17b"), + ...QUALITY_CODING_CHAIN, ], }, deep: { fallbackChain: [ - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, + fb("quotio", "gpt-5.3-codex", "medium"), + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "claude-sonnet-4-5-thinking"), + ...QUALITY_CODING_CHAIN, ], requiresModel: "gpt-5.3-codex", }, artistry: { fallbackChain: [ - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" }, + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "claude-sonnet-4-5-thinking"), + fb("quotio", "claude-sonnet-4-5"), ], - requiresModel: "gemini-3-pro", + requiresModel: "claude-opus-4-6", }, quick: { - fallbackChain: [ - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, - { providers: ["opencode"], model: "gpt-5-nano" }, - ], + fallbackChain: SPEED_CHAIN, }, "unspecified-low": { - fallbackChain: [ - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, - ], + fallbackChain: SPEED_CHAIN, }, "unspecified-high": { fallbackChain: [ - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" }, - { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" }, + fb("quotio", "claude-opus-4-6-thinking"), + fb("quotio", "gpt-5.3-codex", "high"), + ...QUALITY_CODING_CHAIN, ], }, writing: { fallbackChain: [ - { providers: ["kimi-for-coding"], model: "k2p5" }, - { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" }, - { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" }, + fb("quotio", "claude-sonnet-4-5"), + fb("quotio", "glm-5"), + fb("quotio", "kimi-k2.5"), + fb("quotio", "claude-haiku-4-5"), + fb("quotio", "gemini-3-flash"), ], }, } diff --git a/src/shared/session-model-state.test.ts b/src/shared/session-model-state.test.ts new file mode 100644 index 000000000..76f36127d --- /dev/null +++ b/src/shared/session-model-state.test.ts @@ -0,0 +1,30 @@ +import { describe, expect, test } from "bun:test" +import { clearSessionModel, getSessionModel, setSessionModel } from "./session-model-state" + +describe("session-model-state", () => { + test("stores and retrieves a session model", () => { + //#given + const sessionID = "ses_test" + + //#when + setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" }) + + //#then + expect(getSessionModel(sessionID)).toEqual({ + providerID: "github-copilot", + modelID: "gpt-4.1", + }) + }) + + test("clears a session model", () => { + //#given + const sessionID = "ses_clear" + setSessionModel(sessionID, { providerID: "quotio", modelID: "gpt-5.3-codex" }) + + //#when + clearSessionModel(sessionID) + + //#then + expect(getSessionModel(sessionID)).toBeUndefined() + }) +}) diff --git a/src/shared/session-model-state.ts b/src/shared/session-model-state.ts new file mode 100644 index 000000000..551bc5e58 --- /dev/null +++ b/src/shared/session-model-state.ts @@ -0,0 +1,15 @@ +export type SessionModel = { providerID: string; modelID: string } + +const sessionModels = new Map() + +export function setSessionModel(sessionID: string, model: SessionModel): void { + sessionModels.set(sessionID, model) +} + +export function getSessionModel(sessionID: string): SessionModel | undefined { + return sessionModels.get(sessionID) +} + +export function clearSessionModel(sessionID: string): void { + sessionModels.delete(sessionID) +} diff --git a/src/tools/call-omo-agent/session-creator.ts b/src/tools/call-omo-agent/session-creator.ts index 37afad3f3..766b4a5f9 100644 --- a/src/tools/call-omo-agent/session-creator.ts +++ b/src/tools/call-omo-agent/session-creator.ts @@ -1,6 +1,6 @@ import type { CallOmoAgentArgs } from "./types" import type { PluginInput } from "@opencode-ai/plugin" -import { subagentSessions } from "../../features/claude-code-session-state" +import { subagentSessions, syncSubagentSessions } from "../../features/claude-code-session-state" import { log } from "../../shared" export async function createOrGetSession( @@ -64,6 +64,7 @@ Original error: ${createResult.error}`) const sessionID = createResult.data.id log(`[call_omo_agent] Created session: ${sessionID}`) subagentSessions.add(sessionID) + syncSubagentSessions.add(sessionID) return { sessionID, isNew: true } } } diff --git a/src/tools/call-omo-agent/subagent-session-creator.ts b/src/tools/call-omo-agent/subagent-session-creator.ts index 383cae638..432bc1f7b 100644 --- a/src/tools/call-omo-agent/subagent-session-creator.ts +++ b/src/tools/call-omo-agent/subagent-session-creator.ts @@ -1,7 +1,7 @@ import type { PluginInput } from "@opencode-ai/plugin" import { log } from "../../shared" import { resolveSessionDirectory } from "../../shared" -import { subagentSessions } from "../../features/claude-code-session-state" +import { subagentSessions, syncSubagentSessions } from "../../features/claude-code-session-state" import type { CallOmoAgentArgs } from "./types" import type { ToolContextWithMetadata } from "./tool-context-with-metadata" @@ -69,5 +69,6 @@ Original error: ${createResult.error}`, const sessionID = createResult.data.id log(`[call_omo_agent] Created session: ${sessionID}`) subagentSessions.add(sessionID) + syncSubagentSessions.add(sessionID) return { ok: true, sessionID } } diff --git a/src/tools/delegate-task/background-task.ts b/src/tools/delegate-task/background-task.ts index e724695b2..957200f75 100644 --- a/src/tools/delegate-task/background-task.ts +++ b/src/tools/delegate-task/background-task.ts @@ -1,5 +1,6 @@ import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types" import type { ExecutorContext, ParentContext } from "./executor-types" +import type { FallbackEntry } from "../../shared/model-requirements" import { getTimingConfig } from "./timing" import { storeToolMetadata } from "../../features/tool-metadata-store" import { formatDetailedError } from "./error-formatting" @@ -12,7 +13,8 @@ export async function executeBackgroundTask( parentContext: ParentContext, agentToUse: string, categoryModel: { providerID: string; modelID: string; variant?: string } | undefined, - systemContent: string | undefined + systemContent: string | undefined, + fallbackChain?: FallbackEntry[], ): Promise { const { manager } = executorCtx @@ -27,6 +29,7 @@ export async function executeBackgroundTask( parentAgent: parentContext.agent, parentTools: getSessionTools(parentContext.sessionID), model: categoryModel, + fallbackChain, skills: args.load_skills.length > 0 ? args.load_skills : undefined, skillContent: systemContent, category: args.category, diff --git a/src/tools/delegate-task/category-resolver.ts b/src/tools/delegate-task/category-resolver.ts index 8a226558d..ba7ce7c7f 100644 --- a/src/tools/delegate-task/category-resolver.ts +++ b/src/tools/delegate-task/category-resolver.ts @@ -1,6 +1,7 @@ import type { ModelFallbackInfo } from "../../features/task-toast-manager/types" import type { DelegateTaskArgs } from "./types" import type { ExecutorContext } from "./executor-types" +import type { FallbackEntry } from "../../shared/model-requirements" import { mergeCategories } from "../../shared/merge-categories" import { SISYPHUS_JUNIOR_AGENT } from "./sisyphus-junior-agent" import { resolveCategoryConfig } from "./categories" @@ -16,6 +17,7 @@ export interface CategoryResolutionResult { modelInfo: ModelFallbackInfo | undefined actualModel: string | undefined isUnstableAgent: boolean + fallbackChain?: FallbackEntry[] // For runtime retry on model errors error?: string } @@ -177,5 +179,6 @@ Available categories: ${categoryNames.join(", ")}`, modelInfo, actualModel, isUnstableAgent, + fallbackChain: requirement?.fallbackChain, } } diff --git a/src/tools/delegate-task/subagent-resolver.ts b/src/tools/delegate-task/subagent-resolver.ts index ad48b714e..043243db2 100644 --- a/src/tools/delegate-task/subagent-resolver.ts +++ b/src/tools/delegate-task/subagent-resolver.ts @@ -8,6 +8,7 @@ import { getAgentDisplayName, getAgentConfigKey } from "../../shared/agent-displ import { normalizeSDKResponse } from "../../shared" import { log } from "../../shared/logger" import { getAvailableModelsForDelegateTask } from "./available-models" +import type { FallbackEntry } from "../../shared/model-requirements" import { resolveModelForDelegateTask } from "./model-selection" export async function resolveSubagentExecution( @@ -15,7 +16,7 @@ export async function resolveSubagentExecution( executorCtx: ExecutorContext, parentAgent: string | undefined, categoryExamples: string -): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; error?: string }> { +): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; fallbackChain?: FallbackEntry[]; error?: string }> { const { client, agentOverrides } = executorCtx if (!args.subagent_type?.trim()) { @@ -46,6 +47,7 @@ Create the work plan directly - that's your job as the planning agent.`, let agentToUse = agentName let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined + let fallbackChain: FallbackEntry[] | undefined = undefined try { const agentsResult = await client.app.agents() @@ -92,6 +94,7 @@ Create the work plan directly - that's your job as the planning agent.`, const agentOverride = agentOverrides?.[agentConfigKey as keyof typeof agentOverrides] ?? (agentOverrides ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentConfigKey)?.[1] : undefined) const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentConfigKey] + fallbackChain = agentRequirement?.fallbackChain if (agentOverride?.model || agentRequirement || matchedAgent.model) { const availableModels = await getAvailableModelsForDelegateTask(client) @@ -135,5 +138,5 @@ Create the work plan directly - that's your job as the planning agent.`, } } - return { agentToUse, categoryModel } + return { agentToUse, categoryModel, fallbackChain } } diff --git a/src/tools/delegate-task/sync-task.ts b/src/tools/delegate-task/sync-task.ts index d95437865..19000134e 100644 --- a/src/tools/delegate-task/sync-task.ts +++ b/src/tools/delegate-task/sync-task.ts @@ -3,7 +3,7 @@ import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types" import type { ExecutorContext, ParentContext } from "./executor-types" import { getTaskToastManager } from "../../features/task-toast-manager" import { storeToolMetadata } from "../../features/tool-metadata-store" -import { subagentSessions } from "../../features/claude-code-session-state" +import { subagentSessions, syncSubagentSessions, setSessionAgent } from "../../features/claude-code-session-state" import { log } from "../../shared/logger" import { formatDuration } from "./time-formatter" import { formatDetailedError } from "./error-formatting" @@ -40,6 +40,8 @@ export async function executeSyncTask( const sessionID = createSessionResult.sessionID syncSessionID = sessionID subagentSessions.add(sessionID) + syncSubagentSessions.add(sessionID) + setSessionAgent(sessionID, agentToUse) if (onSyncSessionCreated) { log("[task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID }) @@ -59,6 +61,7 @@ export async function executeSyncTask( if (toastManager) { toastManager.addTask({ id: taskId, + sessionID, description: args.description, agent: agentToUse, isBackground: false, @@ -145,6 +148,7 @@ session_id: ${sessionID} } finally { if (syncSessionID) { subagentSessions.delete(syncSessionID) + syncSubagentSessions.delete(syncSessionID) } } } diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts index 073c360b6..ce753eb3a 100644 --- a/src/tools/delegate-task/tools.ts +++ b/src/tools/delegate-task/tools.ts @@ -164,6 +164,7 @@ Prompts MUST be in English.` let modelInfo: import("../../features/task-toast-manager/types").ModelFallbackInfo | undefined let actualModel: string | undefined let isUnstableAgent = false + let fallbackChain: import("../../shared/model-requirements").FallbackEntry[] | undefined if (args.category) { const resolution = await resolveCategoryExecution(args, options, inheritedModel, systemDefaultModel) @@ -176,6 +177,7 @@ Prompts MUST be in English.` modelInfo = resolution.modelInfo actualModel = resolution.actualModel isUnstableAgent = resolution.isUnstableAgent + fallbackChain = resolution.fallbackChain const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean @@ -206,6 +208,7 @@ Prompts MUST be in English.` } agentToUse = resolution.agentToUse categoryModel = resolution.categoryModel + fallbackChain = resolution.fallbackChain } const systemContent = buildSystemContent({ @@ -217,7 +220,7 @@ Prompts MUST be in English.` }) if (runInBackground) { - return executeBackgroundTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent) + return executeBackgroundTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, fallbackChain) } return executeSyncTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, modelInfo)