Compare commits

...

3 Commits

Author SHA1 Message Date
YeonGyu-Kim
2bb211c979 fix: add max retry protection and session cleanup for model fallback 2026-02-21 02:27:27 +09:00
VespianRex
bf51919a79 Address review feedback for fallback fixes 2026-02-20 17:46:12 +02:00
VespianRex
f5f1d1d4c2 Fix model fallback across main/background/sync agents 2026-02-20 17:45:53 +02:00
46 changed files with 2511 additions and 622 deletions

View File

@@ -69,6 +69,7 @@
"directory-readme-injector",
"empty-task-response-detector",
"think-mode",
"model-fallback",
"anthropic-context-window-limit-recovery",
"preemptive-compaction",
"rules-injector",
@@ -80,6 +81,7 @@
"non-interactive-env",
"interactive-bash-session",
"thinking-block-validator",
"beast-mode-system",
"ralph-loop",
"category-skill-reminder",
"compaction-context-injector",
@@ -3009,6 +3011,9 @@
},
"disable_omo_env": {
"type": "boolean"
},
"model_fallback_title": {
"type": "boolean"
}
},
"additionalProperties": false

View File

@@ -15,7 +15,7 @@ describe("model-resolution check", () => {
const sisyphus = info.agents.find((a) => a.name === "sisyphus")
expect(sisyphus).toBeDefined()
expect(sisyphus!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6")
expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("anthropic")
expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("quotio")
})
it("returns category requirements with provider chains", async () => {
@@ -26,8 +26,8 @@ describe("model-resolution check", () => {
// then: Should have category entries
const visual = info.categories.find((c) => c.name === "visual-engineering")
expect(visual).toBeDefined()
expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
expect(visual!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6-thinking")
expect(visual!.requirement.fallbackChain[0]?.providers).toContain("quotio")
})
})
@@ -87,7 +87,7 @@ describe("model-resolution check", () => {
expect(sisyphus).toBeDefined()
expect(sisyphus!.userOverride).toBeUndefined()
expect(sisyphus!.effectiveResolution).toContain("Provider fallback:")
expect(sisyphus!.effectiveResolution).toContain("anthropic")
expect(sisyphus!.effectiveResolution).toContain("quotio")
})
it("captures user variant for agent when configured", async () => {

View File

@@ -1,8 +1,6 @@
import {
AGENT_MODEL_REQUIREMENTS,
type FallbackEntry,
} from "../shared/model-requirements"
import type { FallbackEntry } from "../shared/model-requirements"
import type { ProviderAvailability } from "./model-fallback-types"
import { CLI_AGENT_MODEL_REQUIREMENTS } from "./model-fallback-requirements"
import { isProviderAvailable } from "./provider-availability"
import { transformModelForProvider } from "./provider-model-id-transform"
@@ -25,7 +23,7 @@ export function resolveModelFromChain(
}
export function getSisyphusFallbackChain(): FallbackEntry[] {
return AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain
return CLI_AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain
}
export function isAnyFallbackEntryAvailable(

View File

@@ -0,0 +1,153 @@
import type { ModelRequirement } from "../shared/model-requirements"
// NOTE: These requirements are used by the CLI config generator (`generateModelConfig`).
// They intentionally use "install-time" provider IDs (anthropic/openai/google/opencode/etc),
// not runtime providers like `quotio`/`nvidia`.
export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
sisyphus: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["zai-coding-plan"], model: "glm-4.7" },
{ providers: ["opencode"], model: "glm-4.7-free" },
],
requiresAnyModel: true,
},
hephaestus: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
],
requiresProvider: ["openai", "github-copilot", "opencode"],
},
oracle: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
],
},
librarian: {
fallbackChain: [
{ providers: ["zai-coding-plan"], model: "glm-4.7" },
{ providers: ["opencode"], model: "glm-4.7-free" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
],
},
explore: {
fallbackChain: [
{ providers: ["github-copilot"], model: "grok-code-fast-1" },
{ providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
"multimodal-looker": {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
{ providers: ["zai-coding-plan"], model: "glm-4.6v" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
prometheus: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
],
},
metis: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
],
},
momus: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
],
},
atlas: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
],
},
}
export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
"visual-engineering": {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["zai-coding-plan"], model: "glm-5" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
],
},
ultrabrain: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
],
},
deep: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
],
requiresModel: "gpt-5.3-codex",
},
artistry: {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
],
requiresModel: "gemini-3-pro",
},
quick: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
},
"unspecified-low": {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
],
},
"unspecified-high": {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
],
},
writing: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
],
},
}

View File

@@ -1,7 +1,7 @@
import {
AGENT_MODEL_REQUIREMENTS,
CATEGORY_MODEL_REQUIREMENTS,
} from "../shared/model-requirements"
CLI_AGENT_MODEL_REQUIREMENTS,
CLI_CATEGORY_MODEL_REQUIREMENTS,
} from "./model-fallback-requirements"
import type { InstallConfig } from "./types"
import type { AgentConfig, CategoryConfig, GeneratedOmoConfig } from "./model-fallback-types"
@@ -16,9 +16,9 @@ import {
export type { GeneratedOmoConfig } from "./model-fallback-types"
const LIBRARIAN_MODEL = "opencode/minimax-m2.5-free"
const ZAI_MODEL = "zai-coding-plan/glm-4.7"
const ULTIMATE_FALLBACK = "opencode/big-pickle"
const ULTIMATE_FALLBACK = "opencode/glm-4.7-free"
const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json"
@@ -38,12 +38,12 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
return {
$schema: SCHEMA_URL,
agents: Object.fromEntries(
Object.entries(AGENT_MODEL_REQUIREMENTS)
Object.entries(CLI_AGENT_MODEL_REQUIREMENTS)
.filter(([role, req]) => !(role === "sisyphus" && req.requiresAnyModel))
.map(([role]) => [role, { model: ULTIMATE_FALLBACK }])
),
categories: Object.fromEntries(
Object.keys(CATEGORY_MODEL_REQUIREMENTS).map((cat) => [cat, { model: ULTIMATE_FALLBACK }])
Object.keys(CLI_CATEGORY_MODEL_REQUIREMENTS).map((cat) => [cat, { model: ULTIMATE_FALLBACK }])
),
}
}
@@ -51,9 +51,9 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
const agents: Record<string, AgentConfig> = {}
const categories: Record<string, CategoryConfig> = {}
for (const [role, req] of Object.entries(AGENT_MODEL_REQUIREMENTS)) {
if (role === "librarian") {
agents[role] = { model: LIBRARIAN_MODEL }
for (const [role, req] of Object.entries(CLI_AGENT_MODEL_REQUIREMENTS)) {
if (role === "librarian" && avail.zai) {
agents[role] = { model: ZAI_MODEL }
continue
}
@@ -75,7 +75,6 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
if (req.requiresAnyModel && !isAnyFallbackEntryAvailable(fallbackChain, avail)) {
continue
}
const resolved = resolveModelFromChain(fallbackChain, avail)
if (resolved) {
const variant = resolved.variant ?? req.variant
@@ -100,11 +99,11 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
}
}
for (const [cat, req] of Object.entries(CATEGORY_MODEL_REQUIREMENTS)) {
for (const [cat, req] of Object.entries(CLI_CATEGORY_MODEL_REQUIREMENTS)) {
// Special case: unspecified-high downgrades to unspecified-low when not isMaxPlan
const fallbackChain =
cat === "unspecified-high" && !avail.isMaxPlan
? CATEGORY_MODEL_REQUIREMENTS["unspecified-low"].fallbackChain
? CLI_CATEGORY_MODEL_REQUIREMENTS["unspecified-low"].fallbackChain
: req.fallbackChain
if (req.requiresModel && !isRequiredModelAvailable(req.requiresModel, req.fallbackChain, avail)) {

View File

@@ -17,6 +17,10 @@ export const ExperimentalConfigSchema = z.object({
safe_hook_creation: z.boolean().optional(),
/** Disable auto-injected <omo-env> context in prompts (experimental) */
disable_omo_env: z.boolean().optional(),
/** Enable hashline_edit tool for improved file editing with hash-based line anchors */
hashline_edit: z.boolean().optional(),
/** Append fallback model info to session title when a runtime fallback occurs (default: false) */
model_fallback_title: z.boolean().optional(),
})
export type ExperimentalConfig = z.infer<typeof ExperimentalConfigSchema>

View File

@@ -13,6 +13,7 @@ export const HookNameSchema = z.enum([
"directory-readme-injector",
"empty-task-response-detector",
"think-mode",
"model-fallback",
"anthropic-context-window-limit-recovery",
"preemptive-compaction",
"rules-injector",
@@ -25,6 +26,7 @@ export const HookNameSchema = z.enum([
"interactive-bash-session",
"thinking-block-validator",
"beast-mode-system",
"ralph-loop",
"category-skill-reminder",

View File

@@ -2920,6 +2920,39 @@ describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
})
describe("BackgroundManager.handleEvent - session.error", () => {
const defaultRetryFallbackChain = [
{ providers: ["quotio"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["quotio"], model: "gpt-5.3-codex", variant: "high" },
]
const stubProcessKey = (manager: BackgroundManager) => {
;(manager as unknown as { processKey: (key: string) => Promise<void> }).processKey = async () => {}
}
const createRetryTask = (manager: BackgroundManager, input: {
id: string
sessionID: string
description: string
concurrencyKey?: string
fallbackChain?: typeof defaultRetryFallbackChain
}) => {
const task = createMockTask({
id: input.id,
sessionID: input.sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-retry",
description: input.description,
agent: "sisyphus",
status: "running",
concurrencyKey: input.concurrencyKey,
model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
fallbackChain: input.fallbackChain ?? defaultRetryFallbackChain,
attemptCount: 0,
})
getTaskMap(manager).set(task.id, task)
return task
}
test("sets task to error, releases concurrency, and cleans up", async () => {
//#given
const manager = createBackgroundManager()
@@ -3046,6 +3079,135 @@ describe("BackgroundManager.handleEvent - session.error", () => {
manager.shutdown()
})
test("retry path releases current concurrency slot and prefers current provider in fallback entry", async () => {
//#given
const manager = createBackgroundManager()
const concurrencyManager = getConcurrencyManager(manager)
const concurrencyKey = "quotio/claude-opus-4-6-thinking"
await concurrencyManager.acquire(concurrencyKey)
stubProcessKey(manager)
const sessionID = "ses_error_retry"
const task = createRetryTask(manager, {
id: "task-session-error-retry",
sessionID,
description: "task that should retry",
concurrencyKey,
fallbackChain: [
{ providers: ["quotio"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["quotio"], model: "claude-opus-4-5" },
],
})
//#when
manager.handleEvent({
type: "session.error",
properties: {
sessionID,
error: {
name: "UnknownError",
data: {
message:
"Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
},
},
},
})
//#then
expect(task.status).toBe("pending")
expect(task.attemptCount).toBe(1)
expect(task.model).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
variant: "max",
})
expect(task.concurrencyKey).toBeUndefined()
expect(concurrencyManager.getCount(concurrencyKey)).toBe(0)
manager.shutdown()
})
test("retry path triggers on session.status retry events", async () => {
//#given
const manager = createBackgroundManager()
stubProcessKey(manager)
const sessionID = "ses_status_retry"
const task = createRetryTask(manager, {
id: "task-status-retry",
sessionID,
description: "task that should retry on status",
})
//#when
manager.handleEvent({
type: "session.status",
properties: {
sessionID,
status: {
type: "retry",
message: "Provider is overloaded",
},
},
})
//#then
expect(task.status).toBe("pending")
expect(task.attemptCount).toBe(1)
expect(task.model).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
variant: "max",
})
manager.shutdown()
})
test("retry path triggers on message.updated assistant error events", async () => {
//#given
const manager = createBackgroundManager()
stubProcessKey(manager)
const sessionID = "ses_message_updated_retry"
const task = createRetryTask(manager, {
id: "task-message-updated-retry",
sessionID,
description: "task that should retry on message.updated",
})
//#when
manager.handleEvent({
type: "message.updated",
properties: {
info: {
id: "msg_errored",
sessionID,
role: "assistant",
error: {
name: "UnknownError",
data: {
message:
"Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
},
},
},
},
})
//#then
expect(task.status).toBe("pending")
expect(task.attemptCount).toBe(1)
expect(task.model).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
variant: "max",
})
manager.shutdown()
})
})
describe("BackgroundManager queue processing - error tasks are skipped", () => {

View File

@@ -5,6 +5,7 @@ import type {
LaunchInput,
ResumeInput,
} from "./types"
import type { FallbackEntry } from "../../shared/model-requirements"
import { TaskHistory } from "./task-history"
import {
log,
@@ -12,6 +13,8 @@ import {
normalizePromptTools,
normalizeSDKResponse,
promptWithModelSuggestionRetry,
readConnectedProvidersCache,
readProviderModelsCache,
resolveInheritedPromptTools,
createInternalAgentTextPart,
} from "../../shared"
@@ -19,6 +22,12 @@ import { setSessionTools } from "../../shared/session-tools-store"
import { ConcurrencyManager } from "./concurrency"
import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
import { isInsideTmux } from "../../shared/tmux"
import {
shouldRetryError,
getNextFallback,
hasMoreFallbacks,
selectFallbackProvider,
} from "../../shared/model-error-classifier"
import {
DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS,
DEFAULT_STALE_TIMEOUT_MS,
@@ -156,6 +165,8 @@ export class BackgroundManager {
parentAgent: input.parentAgent,
parentTools: input.parentTools,
model: input.model,
fallbackChain: input.fallbackChain,
attemptCount: 0,
category: input.category,
}
@@ -677,6 +688,27 @@ export class BackgroundManager {
handleEvent(event: Event): void {
const props = event.properties
if (event.type === "message.updated") {
const info = props?.info
if (!info || typeof info !== "object") return
const sessionID = (info as Record<string, unknown>)["sessionID"]
const role = (info as Record<string, unknown>)["role"]
if (typeof sessionID !== "string" || role !== "assistant") return
const task = this.findBySession(sessionID)
if (!task || task.status !== "running") return
const assistantError = (info as Record<string, unknown>)["error"]
if (!assistantError) return
const errorInfo = {
name: this.extractErrorName(assistantError),
message: this.extractErrorMessage(assistantError),
}
this.tryFallbackRetry(task, errorInfo, "message.updated")
}
if (event.type === "message.part.updated" || event.type === "message.part.delta") {
if (!props || typeof props !== "object" || !("sessionID" in props)) return
const partInfo = props as unknown as MessagePartInfo
@@ -773,10 +805,29 @@ export class BackgroundManager {
const task = this.findBySession(sessionID)
if (!task || task.status !== "running") return
const errorObj = props?.error as { name?: string; message?: string } | undefined
const errorName = errorObj?.name
const errorMessage = props ? this.getSessionErrorMessage(props) : undefined
const errorInfo = { name: errorName, message: errorMessage }
if (this.tryFallbackRetry(task, errorInfo, "session.error")) return
// Original error handling (no retry)
const errorMsg = errorMessage ?? "Session error"
const canRetry =
shouldRetryError(errorInfo) &&
!!task.fallbackChain &&
hasMoreFallbacks(task.fallbackChain, task.attemptCount ?? 0)
log("[background-agent] Session error - no retry:", {
taskId: task.id,
errorName,
errorMessage: errorMsg?.slice(0, 100),
hasFallbackChain: !!task.fallbackChain,
canRetry,
})
task.status = "error"
task.error = errorMessage ?? "Session error"
task.error = errorMsg
task.completedAt = new Date()
this.taskHistory.record(task.parentSessionID, { id: task.id, sessionID: task.sessionID, agent: task.agent, description: task.description, status: "error", category: task.category, startedAt: task.startedAt, completedAt: task.completedAt })
@@ -860,6 +911,129 @@ export class BackgroundManager {
}
}
}
if (event.type === "session.status") {
const sessionID = props?.sessionID as string | undefined
const status = props?.status as { type?: string; message?: string } | undefined
if (!sessionID || status?.type !== "retry") return
const task = this.findBySession(sessionID)
if (!task || task.status !== "running") return
const errorMessage = typeof status.message === "string" ? status.message : undefined
const errorInfo = { name: "SessionRetry", message: errorMessage }
this.tryFallbackRetry(task, errorInfo, "session.status")
}
}
private tryFallbackRetry(
task: BackgroundTask,
errorInfo: { name?: string; message?: string },
source: string,
): boolean {
const fallbackChain = task.fallbackChain
const canRetry =
shouldRetryError(errorInfo) &&
fallbackChain &&
fallbackChain.length > 0 &&
hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0)
if (!canRetry) return false
const attemptCount = task.attemptCount ?? 0
const providerModelsCache = readProviderModelsCache()
const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
const connectedSet = connectedProviders ? new Set(connectedProviders) : null
const isReachable = (entry: FallbackEntry): boolean => {
if (!connectedSet) return true
// Gate only on provider connectivity. Provider model lists can be stale/incomplete,
// especially after users manually add models to opencode.json.
return entry.providers.some((p) => connectedSet.has(p))
}
let selectedAttemptCount = attemptCount
let nextFallback: FallbackEntry | undefined
while (fallbackChain && selectedAttemptCount < fallbackChain.length) {
const candidate = getNextFallback(fallbackChain, selectedAttemptCount)
if (!candidate) break
selectedAttemptCount++
if (!isReachable(candidate)) {
log("[background-agent] Skipping unreachable fallback:", {
taskId: task.id,
source,
model: candidate.model,
providers: candidate.providers,
})
continue
}
nextFallback = candidate
break
}
if (!nextFallback) return false
const providerID = selectFallbackProvider(
nextFallback.providers,
task.model?.providerID,
)
log("[background-agent] Retryable error, attempting fallback:", {
taskId: task.id,
source,
errorName: errorInfo.name,
errorMessage: errorInfo.message?.slice(0, 100),
attemptCount: selectedAttemptCount,
nextModel: `${providerID}/${nextFallback.model}`,
})
if (task.concurrencyKey) {
this.concurrencyManager.release(task.concurrencyKey)
task.concurrencyKey = undefined
}
if (task.sessionID) {
this.client.session.abort({ path: { id: task.sessionID } }).catch(() => {})
subagentSessions.delete(task.sessionID)
}
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
task.attemptCount = selectedAttemptCount
task.model = {
providerID,
modelID: nextFallback.model,
variant: nextFallback.variant,
}
task.status = "pending"
task.sessionID = undefined
task.startedAt = undefined
task.queuedAt = new Date()
task.error = undefined
const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent
const queue = this.queuesByKey.get(key) ?? []
const retryInput: LaunchInput = {
description: task.description,
prompt: task.prompt,
agent: task.agent,
parentSessionID: task.parentSessionID,
parentMessageID: task.parentMessageID,
parentModel: task.parentModel,
parentAgent: task.parentAgent,
parentTools: task.parentTools,
model: task.model,
fallbackChain: task.fallbackChain,
category: task.category,
}
queue.push({ task, input: retryInput })
this.queuesByKey.set(key, queue)
this.processKey(key)
return true
}
markForNotification(task: BackgroundTask): void {
@@ -1273,10 +1447,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
if (isCompactionAgent(info?.agent)) {
continue
}
if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
agent = info.agent ?? task.parentAgent
model = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
tools = normalizePromptTools(info.tools) ?? tools
const normalizedTools = this.isRecord(info?.tools)
? normalizePromptTools(info.tools as Record<string, boolean | "allow" | "deny" | "ask">)
: undefined
if (info?.agent || info?.model || (info?.modelID && info?.providerID) || normalizedTools) {
agent = info?.agent ?? task.parentAgent
model = info?.model ?? (info?.providerID && info?.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
tools = normalizedTools ?? tools
break
}
}
@@ -1296,7 +1473,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
tools = normalizePromptTools(currentMessage?.tools) ?? tools
}
tools = resolveInheritedPromptTools(task.parentSessionID, tools)
const resolvedTools = resolveInheritedPromptTools(task.parentSessionID, tools)
log("[background-agent] notifyParentSession context:", {
taskId: task.id,
@@ -1311,7 +1488,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
noReply: !allComplete,
...(agent !== undefined ? { agent } : {}),
...(model !== undefined ? { model } : {}),
...(tools ? { tools } : {}),
...(resolvedTools ? { tools: resolvedTools } : {}),
parts: [createInternalAgentTextPart(notification)],
},
})
@@ -1394,6 +1571,46 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
return ""
}
private extractErrorName(error: unknown): string | undefined {
if (this.isRecord(error) && typeof error["name"] === "string") return error["name"]
if (error instanceof Error) return error.name
return undefined
}
private extractErrorMessage(error: unknown): string | undefined {
if (!error) return undefined
if (typeof error === "string") return error
if (error instanceof Error) return error.message
if (this.isRecord(error)) {
const dataRaw = error["data"]
const candidates: unknown[] = [
error,
dataRaw,
error["error"],
this.isRecord(dataRaw) ? (dataRaw as Record<string, unknown>)["error"] : undefined,
error["cause"],
]
for (const candidate of candidates) {
if (typeof candidate === "string" && candidate.length > 0) return candidate
if (
this.isRecord(candidate) &&
typeof candidate["message"] === "string" &&
candidate["message"].length > 0
) {
return candidate["message"]
}
}
}
try {
return JSON.stringify(error)
} catch {
return String(error)
}
}
private isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null
}
@@ -1610,6 +1827,16 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
// Progress is already tracked via handleEvent(message.part.updated),
// so we skip the expensive session.messages() fetch here.
// Completion will be detected when session transitions to idle.
if (sessionStatus?.type === "retry") {
const retryMessage = typeof (sessionStatus as { message?: string }).message === "string"
? (sessionStatus as { message?: string }).message
: undefined
const errorInfo = { name: "SessionRetry", message: retryMessage }
if (this.tryFallbackRetry(task, errorInfo, "polling:session.status")) {
continue
}
}
log("[background-agent] Session still running, relying on event-based progress:", {
taskId: task.id,
sessionID,

View File

@@ -1,3 +1,5 @@
import type { FallbackEntry } from "../../shared/model-requirements"
export type BackgroundTaskStatus =
| "pending"
| "running"
@@ -31,6 +33,10 @@ export interface BackgroundTask {
progress?: TaskProgress
parentModel?: { providerID: string; modelID: string }
model?: { providerID: string; modelID: string; variant?: string }
/** Fallback chain for runtime retry on model errors */
fallbackChain?: FallbackEntry[]
/** Number of fallback retry attempts made */
attemptCount?: number
/** Active concurrency slot key */
concurrencyKey?: string
/** Persistent key for re-acquiring concurrency on resume */
@@ -60,6 +66,8 @@ export interface LaunchInput {
parentAgent?: string
parentTools?: Record<string, boolean>
model?: { providerID: string; modelID: string; variant?: string }
/** Fallback chain for runtime retry on model errors */
fallbackChain?: FallbackEntry[]
isUnstableAgent?: boolean
skills?: string[]
skillContent?: string

View File

@@ -1,4 +1,5 @@
export const subagentSessions = new Set<string>()
export const syncSubagentSessions = new Set<string>()
let _mainSessionID: string | undefined
@@ -14,6 +15,7 @@ export function getMainSessionID(): string | undefined {
export function _resetForTesting(): void {
_mainSessionID = undefined
subagentSessions.clear()
syncSubagentSessions.clear()
sessionAgentMap.clear()
}

View File

@@ -25,13 +25,13 @@ export function discoverAllSkillsBlocking(dirs: string[], scopes: SkillScope[]):
const { port1, port2 } = new MessageChannel()
const worker = new Worker(new URL("./discover-worker.ts", import.meta.url), {
workerData: { signal }
// workerData is structured-cloned; pass the SharedArrayBuffer and recreate the view in the worker.
workerData: { signalBuffer: signal.buffer },
})
worker.postMessage({ port: port2 }, [port2])
const input: WorkerInput = { dirs, scopes }
port1.postMessage(input)
// Avoid a race where the worker hasn't attached listeners to the MessagePort yet.
worker.postMessage({ port: port2, input }, [port2])
const waitResult = Atomics.wait(signal, 0, 0, TIMEOUT_MS)

View File

@@ -18,25 +18,24 @@ interface WorkerOutputError {
error: { message: string; stack?: string }
}
const { signal } = workerData as { signal: Int32Array }
const { signalBuffer } = workerData as { signalBuffer: SharedArrayBuffer }
const signal = new Int32Array(signalBuffer)
if (!parentPort) {
throw new Error("Worker must be run with parentPort")
}
parentPort.once("message", (data: { port: MessagePort }) => {
const { port } = data
parentPort.once("message", (data: { port: MessagePort; input: WorkerInput }) => {
const { port, input } = data
port.on("message", async (input: WorkerInput) => {
void (async () => {
try {
const results = await Promise.all(
input.dirs.map(dir => discoverSkillsInDirAsync(dir))
)
const results = await Promise.all(input.dirs.map((dir) => discoverSkillsInDirAsync(dir)))
const skills = results.flat()
const output: WorkerOutputSuccess = { ok: true, skills }
port.postMessage(output)
Atomics.store(signal, 0, 1)
Atomics.notify(signal, 0)
@@ -48,10 +47,10 @@ parentPort.once("message", (data: { port: MessagePort }) => {
stack: error instanceof Error ? error.stack : undefined,
},
}
port.postMessage(output)
Atomics.store(signal, 0, 1)
Atomics.notify(signal, 0)
}
})
})()
})

View File

@@ -217,6 +217,27 @@ describe("TaskToastManager", () => {
expect(call.body.message).toContain("(inherited from parent)")
})
test("should display warning when model is runtime fallback", () => {
// given - runtime-fallback indicates a model swap mid-run
const task = {
id: "task_runtime",
description: "Task with runtime fallback model",
agent: "explore",
isBackground: false,
modelInfo: { model: "quotio/oswe-vscode-prime", type: "runtime-fallback" as const },
}
// when - addTask is called
toastManager.addTask(task)
// then - toast should show fallback warning
expect(mockClient.tui.showToast).toHaveBeenCalled()
const call = mockClient.tui.showToast.mock.calls[0][0]
expect(call.body.message).toContain("[FALLBACK]")
expect(call.body.message).toContain("quotio/oswe-vscode-prime")
expect(call.body.message).toContain("(runtime fallback)")
})
test("should not display model info when user-defined", () => {
// given - a task with user-defined model
const task = {
@@ -257,4 +278,32 @@ describe("TaskToastManager", () => {
expect(call.body.message).not.toContain("[FALLBACK] Model:")
})
})
describe("updateTaskModelBySession", () => {
test("updates task model info and shows fallback toast", () => {
// given - task without model info
const task = {
id: "task_update",
sessionID: "ses_update_1",
description: "Task that will fallback",
agent: "explore",
isBackground: false,
}
toastManager.addTask(task)
mockClient.tui.showToast.mockClear()
// when - runtime fallback applied by session
toastManager.updateTaskModelBySession("ses_update_1", {
model: "nvidia/stepfun-ai/step-3.5-flash",
type: "runtime-fallback",
})
// then - new toast shows fallback model
expect(mockClient.tui.showToast).toHaveBeenCalled()
const call = mockClient.tui.showToast.mock.calls[0][0]
expect(call.body.message).toContain("[FALLBACK]")
expect(call.body.message).toContain("nvidia/stepfun-ai/step-3.5-flash")
expect(call.body.message).toContain("(runtime fallback)")
})
})
})

View File

@@ -20,6 +20,7 @@ export class TaskToastManager {
addTask(task: {
id: string
sessionID?: string
description: string
agent: string
isBackground: boolean
@@ -30,6 +31,7 @@ export class TaskToastManager {
}): void {
const trackedTask: TrackedTask = {
id: task.id,
sessionID: task.sessionID,
description: task.description,
agent: task.agent,
status: task.status ?? "running",
@@ -54,6 +56,18 @@ export class TaskToastManager {
}
}
/**
* Update model info for a task by session ID
*/
updateTaskModelBySession(sessionID: string, modelInfo: ModelFallbackInfo): void {
if (!sessionID) return
const task = Array.from(this.tasks.values()).find((t) => t.sessionID === sessionID)
if (!task) return
if (task.modelInfo?.model === modelInfo.model && task.modelInfo?.type === modelInfo.type) return
task.modelInfo = modelInfo
this.showTaskListToast(task)
}
/**
* Remove completed/error task
*/
@@ -110,14 +124,17 @@ export class TaskToastManager {
const lines: string[] = []
const isFallback = newTask.modelInfo && (
newTask.modelInfo.type === "inherited" || newTask.modelInfo.type === "system-default"
newTask.modelInfo.type === "inherited" ||
newTask.modelInfo.type === "system-default" ||
newTask.modelInfo.type === "runtime-fallback"
)
if (isFallback) {
const suffixMap: Record<"inherited" | "system-default", string> = {
const suffixMap: Record<"inherited" | "system-default" | "runtime-fallback", string> = {
inherited: " (inherited from parent)",
"system-default": " (system default fallback)",
"runtime-fallback": " (runtime fallback)",
}
const suffix = suffixMap[newTask.modelInfo!.type as "inherited" | "system-default"]
const suffix = suffixMap[newTask.modelInfo!.type as "inherited" | "system-default" | "runtime-fallback"]
lines.push(`[FALLBACK] Model: ${newTask.modelInfo!.model}${suffix}`)
lines.push("")
}

View File

@@ -4,12 +4,13 @@ export type TaskStatus = "running" | "queued" | "completed" | "error"
export interface ModelFallbackInfo {
model: string
type: "user-defined" | "inherited" | "category-default" | "system-default"
type: "user-defined" | "inherited" | "category-default" | "system-default" | "runtime-fallback"
source?: ModelSource
}
export interface TrackedTask {
id: string
sessionID?: string
description: string
agent: string
status: TaskStatus

View File

@@ -0,0 +1,54 @@
import { describe, expect, test } from "bun:test"
import { clearSessionModel, setSessionModel } from "../../shared/session-model-state"
import { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook"
describe("beast-mode-system hook", () => {
test("injects beast mode prompt for copilot gpt-4.1", async () => {
//#given
const sessionID = "ses_beast"
setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" })
const hook = createBeastModeSystemHook()
const output = { system: [] as string[] }
//#when
await hook["experimental.chat.system.transform"]?.({ sessionID }, output)
//#then
expect(output.system[0]).toContain("Beast Mode")
expect(output.system[0]).toContain(BEAST_MODE_SYSTEM_PROMPT.trim().slice(0, 20))
clearSessionModel(sessionID)
})
test("does not inject for other models", async () => {
//#given
const sessionID = "ses_no_beast"
setSessionModel(sessionID, { providerID: "quotio", modelID: "gpt-5.3-codex" })
const hook = createBeastModeSystemHook()
const output = { system: [] as string[] }
//#when
await hook["experimental.chat.system.transform"]?.({ sessionID }, output)
//#then
expect(output.system.length).toBe(0)
clearSessionModel(sessionID)
})
test("avoids duplicate insertion", async () => {
//#given
const sessionID = "ses_dupe"
setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" })
const hook = createBeastModeSystemHook()
const output = { system: [BEAST_MODE_SYSTEM_PROMPT] }
//#when
await hook["experimental.chat.system.transform"]?.({ sessionID }, output)
//#then
expect(output.system.length).toBe(1)
clearSessionModel(sessionID)
})
})

View File

@@ -0,0 +1,31 @@
import { getSessionModel } from "../../shared/session-model-state"
export const BEAST_MODE_SYSTEM_PROMPT = `Beast Mode (Copilot GPT-4.1)
You are an autonomous coding agent. Execute the task end-to-end.
- Make a brief plan, then act.
- Prefer concrete edits and verification over speculation.
- Run relevant tests when feasible.
- Do not ask the user to perform actions you can do yourself.
- If blocked, state exactly what is needed to proceed.
- Keep responses concise and actionable.`
function isBeastModeModel(model: { providerID: string; modelID: string } | undefined): boolean {
return model?.providerID === "github-copilot" && model.modelID === "gpt-4.1"
}
export function createBeastModeSystemHook() {
return {
"experimental.chat.system.transform": async (
input: { sessionID: string },
output: { system: string[] },
): Promise<void> => {
const model = getSessionModel(input.sessionID)
if (!isBeastModeModel(model)) return
if (output.system.some((entry) => entry.includes("Beast Mode"))) return
output.system.unshift(BEAST_MODE_SYSTEM_PROMPT)
},
}
}

View File

@@ -0,0 +1 @@
export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook"

View File

@@ -14,6 +14,7 @@ export { createEmptyTaskResponseDetectorHook } from "./empty-task-response-detec
export { createAnthropicContextWindowLimitRecoveryHook, type AnthropicContextWindowLimitRecoveryOptions } from "./anthropic-context-window-limit-recovery";
export { createThinkModeHook } from "./think-mode";
export { createModelFallbackHook, setPendingModelFallback, clearPendingModelFallback, type ModelFallbackState } from "./model-fallback/hook";
export { createClaudeCodeHooksHook } from "./claude-code-hooks";
export { createRulesInjectorHook } from "./rules-injector";
export { createBackgroundNotificationHook } from "./background-notification"
@@ -31,7 +32,6 @@ export { createNoSisyphusGptHook } from "./no-sisyphus-gpt";
export { createNoHephaestusNonGptHook } from "./no-hephaestus-non-gpt";
export { createAutoSlashCommandHook } from "./auto-slash-command";
export { createEditErrorRecoveryHook } from "./edit-error-recovery";
export { createJsonErrorRecoveryHook } from "./json-error-recovery";
export { createPrometheusMdOnlyHook } from "./prometheus-md-only";
export { createSisyphusJuniorNotepadHook } from "./sisyphus-junior-notepad";
export { createTaskResumeInfoHook } from "./task-resume-info";
@@ -47,5 +47,4 @@ export { createPreemptiveCompactionHook } from "./preemptive-compaction";
export { createTasksTodowriteDisablerHook } from "./tasks-todowrite-disabler";
export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
export { createHashlineEditDiffEnhancerHook } from "./hashline-edit-diff-enhancer";
export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./beast-mode-system";

View File

@@ -0,0 +1,141 @@
import { beforeEach, describe, expect, test } from "bun:test"
import {
clearPendingModelFallback,
createModelFallbackHook,
setPendingModelFallback,
} from "./hook"
describe("model fallback hook", () => {
beforeEach(() => {
clearPendingModelFallback("ses_model_fallback_main")
})
test("applies pending fallback on chat.message by overriding model", async () => {
//#given
const hook = createModelFallbackHook() as unknown as {
"chat.message"?: (
input: { sessionID: string },
output: { message: Record<string, unknown>; parts: Array<{ type: string; text?: string }> },
) => Promise<void>
}
const set = setPendingModelFallback(
"ses_model_fallback_main",
"Sisyphus (Ultraworker)",
"quotio",
"claude-opus-4-6-thinking",
)
expect(set).toBe(true)
const output = {
message: {
model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
variant: "max",
},
parts: [{ type: "text", text: "continue" }],
}
//#when
await hook["chat.message"]?.(
{ sessionID: "ses_model_fallback_main" },
output,
)
//#then
expect(output.message["model"]).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
})
})
test("preserves fallback progression across repeated session.error retries", async () => {
//#given
const hook = createModelFallbackHook() as unknown as {
"chat.message"?: (
input: { sessionID: string },
output: { message: Record<string, unknown>; parts: Array<{ type: string; text?: string }> },
) => Promise<void>
}
const sessionID = "ses_model_fallback_main"
expect(
setPendingModelFallback(sessionID, "Sisyphus (Ultraworker)", "quotio", "claude-opus-4-6-thinking"),
).toBe(true)
const firstOutput = {
message: {
model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
variant: "max",
},
parts: [{ type: "text", text: "continue" }],
}
//#when - first retry is applied
await hook["chat.message"]?.({ sessionID }, firstOutput)
//#then
expect(firstOutput.message["model"]).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
})
//#when - second error re-arms fallback and should advance to next entry
expect(
setPendingModelFallback(sessionID, "Sisyphus (Ultraworker)", "quotio", "claude-opus-4-6"),
).toBe(true)
const secondOutput = {
message: {
model: { providerID: "quotio", modelID: "claude-opus-4-6" },
},
parts: [{ type: "text", text: "continue" }],
}
await hook["chat.message"]?.({ sessionID }, secondOutput)
//#then - chain should progress to entry[1], not repeat entry[0]
expect(secondOutput.message["model"]).toEqual({
providerID: "quotio",
modelID: "gpt-5.3-codex",
})
expect(secondOutput.message["variant"]).toBe("high")
})
test("shows toast when fallback is applied", async () => {
//#given
const toastCalls: Array<{ title: string; message: string }> = []
const hook = createModelFallbackHook({
toast: async ({ title, message }) => {
toastCalls.push({ title, message })
},
}) as unknown as {
"chat.message"?: (
input: { sessionID: string },
output: { message: Record<string, unknown>; parts: Array<{ type: string; text?: string }> },
) => Promise<void>
}
const set = setPendingModelFallback(
"ses_model_fallback_toast",
"Sisyphus (Ultraworker)",
"quotio",
"claude-opus-4-6-thinking",
)
expect(set).toBe(true)
const output = {
message: {
model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
variant: "max",
},
parts: [{ type: "text", text: "continue" }],
}
//#when
await hook["chat.message"]?.({ sessionID: "ses_model_fallback_toast" }, output)
//#then
expect(toastCalls.length).toBe(1)
expect(toastCalls[0]?.title).toBe("Model fallback")
})
})

View File

@@ -0,0 +1,246 @@
import type { FallbackEntry } from "../../shared/model-requirements"
import { getAgentConfigKey } from "../../shared/agent-display-names"
import { AGENT_MODEL_REQUIREMENTS } from "../../shared/model-requirements"
import { readConnectedProvidersCache, readProviderModelsCache } from "../../shared/connected-providers-cache"
import { selectFallbackProvider } from "../../shared/model-error-classifier"
import { log } from "../../shared/logger"
import { getTaskToastManager } from "../../features/task-toast-manager"
import type { ChatMessageInput, ChatMessageHandlerOutput } from "../../plugin/chat-message"
type FallbackToast = (input: {
title: string
message: string
variant?: "info" | "success" | "warning" | "error"
duration?: number
}) => void | Promise<void>
type FallbackCallback = (input: {
sessionID: string
providerID: string
modelID: string
variant?: string
}) => void | Promise<void>
export type ModelFallbackState = {
providerID: string
modelID: string
fallbackChain: FallbackEntry[]
attemptCount: number
pending: boolean
}
/**
* Map of sessionID -> pending model fallback state
* When a model error occurs, we store the fallback info here.
* The next chat.message call will use this to switch to the fallback model.
*/
const pendingModelFallbacks = new Map<string, ModelFallbackState>()
const lastToastKey = new Map<string, string>()
const sessionFallbackChains = new Map<string, FallbackEntry[]>()
export function setSessionFallbackChain(sessionID: string, fallbackChain: FallbackEntry[] | undefined): void {
if (!sessionID) return
if (!fallbackChain || fallbackChain.length === 0) {
sessionFallbackChains.delete(sessionID)
return
}
sessionFallbackChains.set(sessionID, fallbackChain)
}
export function clearSessionFallbackChain(sessionID: string): void {
sessionFallbackChains.delete(sessionID)
}
/**
* Sets a pending model fallback for a session.
* Called when a model error is detected in session.error handler.
*/
export function setPendingModelFallback(
sessionID: string,
agentName: string,
currentProviderID: string,
currentModelID: string,
): boolean {
const agentKey = getAgentConfigKey(agentName)
const requirements = AGENT_MODEL_REQUIREMENTS[agentKey]
const sessionFallback = sessionFallbackChains.get(sessionID)
const fallbackChain = sessionFallback && sessionFallback.length > 0
? sessionFallback
: requirements?.fallbackChain
if (!fallbackChain || fallbackChain.length === 0) {
log("[model-fallback] No fallback chain for agent: " + agentName + " (key: " + agentKey + ")")
return false
}
const existing = pendingModelFallbacks.get(sessionID)
if (existing) {
// Preserve progression across repeated session.error retries in same session.
// We only mark the next turn as pending fallback application.
existing.providerID = currentProviderID
existing.modelID = currentModelID
existing.pending = true
if (existing.attemptCount >= existing.fallbackChain.length) {
log("[model-fallback] Fallback chain exhausted for session: " + sessionID)
return false
}
log("[model-fallback] Re-armed pending fallback for session: " + sessionID)
return true
}
const state: ModelFallbackState = {
providerID: currentProviderID,
modelID: currentModelID,
fallbackChain,
attemptCount: 0,
pending: true,
}
pendingModelFallbacks.set(sessionID, state)
log("[model-fallback] Set pending fallback for session: " + sessionID + ", agent: " + agentName)
return true
}
/**
* Gets the next fallback model for a session.
* Increments attemptCount each time called.
*/
export function getNextFallback(
sessionID: string,
): { providerID: string; modelID: string; variant?: string } | null {
const state = pendingModelFallbacks.get(sessionID)
if (!state) return null
if (!state.pending) return null
const { fallbackChain } = state
const providerModelsCache = readProviderModelsCache()
const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
const connectedSet = connectedProviders ? new Set(connectedProviders) : null
const isReachable = (entry: FallbackEntry): boolean => {
if (!connectedSet) return true
// Gate only on provider connectivity. Provider model lists can be stale/incomplete,
// especially after users manually add models to opencode.json.
return entry.providers.some((p) => connectedSet.has(p))
}
while (state.attemptCount < fallbackChain.length) {
const attemptCount = state.attemptCount
const fallback = fallbackChain[attemptCount]
state.attemptCount++
if (!isReachable(fallback)) {
log("[model-fallback] Skipping unreachable fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model)
continue
}
const providerID = selectFallbackProvider(fallback.providers, state.providerID)
state.pending = false
log("[model-fallback] Using fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model)
return {
providerID,
modelID: fallback.model,
variant: fallback.variant,
}
}
log("[model-fallback] No more fallbacks for session: " + sessionID)
pendingModelFallbacks.delete(sessionID)
return null
}
/**
* Clears the pending fallback for a session.
* Called after fallback is successfully applied.
*/
export function clearPendingModelFallback(sessionID: string): void {
pendingModelFallbacks.delete(sessionID)
lastToastKey.delete(sessionID)
}
/**
* Checks if there's a pending fallback for a session.
*/
export function hasPendingModelFallback(sessionID: string): boolean {
const state = pendingModelFallbacks.get(sessionID)
return state?.pending === true
}
/**
* Gets the current fallback state for a session (for debugging).
*/
export function getFallbackState(sessionID: string): ModelFallbackState | undefined {
return pendingModelFallbacks.get(sessionID)
}
/**
* Creates a chat.message hook that applies model fallbacks when pending.
*/
export function createModelFallbackHook(args?: { toast?: FallbackToast; onApplied?: FallbackCallback }) {
const toast = args?.toast
const onApplied = args?.onApplied
return {
"chat.message": async (
input: ChatMessageInput,
output: ChatMessageHandlerOutput,
): Promise<void> => {
const { sessionID } = input
if (!sessionID) return
const fallback = getNextFallback(sessionID)
if (!fallback) return
output.message["model"] = {
providerID: fallback.providerID,
modelID: fallback.modelID,
}
if (fallback.variant !== undefined) {
output.message["variant"] = fallback.variant
} else {
delete output.message["variant"]
}
if (toast) {
const key = `${sessionID}:${fallback.providerID}/${fallback.modelID}:${fallback.variant ?? ""}`
if (lastToastKey.get(sessionID) !== key) {
lastToastKey.set(sessionID, key)
const variantLabel = fallback.variant ? ` (${fallback.variant})` : ""
await Promise.resolve(
toast({
title: "Model fallback",
message: `Using ${fallback.providerID}/${fallback.modelID}${variantLabel}`,
variant: "warning",
duration: 5000,
}),
)
}
}
if (onApplied) {
await Promise.resolve(
onApplied({
sessionID,
providerID: fallback.providerID,
modelID: fallback.modelID,
variant: fallback.variant,
}),
)
}
const toastManager = getTaskToastManager()
if (toastManager) {
const variantLabel = fallback.variant ? ` (${fallback.variant})` : ""
toastManager.updateTaskModelBySession(sessionID, {
model: `${fallback.providerID}/${fallback.modelID}${variantLabel}`,
type: "runtime-fallback",
})
}
log("[model-fallback] Applied fallback model: " + JSON.stringify(fallback))
},
}
}

View File

@@ -5,6 +5,7 @@ import { createChatParamsHandler } from "./plugin/chat-params"
import { createChatHeadersHandler } from "./plugin/chat-headers"
import { createChatMessageHandler } from "./plugin/chat-message"
import { createMessagesTransformHandler } from "./plugin/messages-transform"
import { createSystemTransformHandler } from "./plugin/system-transform"
import { createEventHandler } from "./plugin/event"
import { createToolExecuteAfterHandler } from "./plugin/tool-execute-after"
import { createToolExecuteBeforeHandler } from "./plugin/tool-execute-before"
@@ -49,6 +50,10 @@ export function createPluginInterface(args: {
hooks,
}),
"experimental.chat.system.transform": createSystemTransformHandler({
hooks,
}),
config: managers.configHandler,
event: createEventHandler({

View File

@@ -2,6 +2,7 @@ import type { OhMyOpenCodeConfig } from "../config"
import type { PluginContext } from "./types"
import { hasConnectedProvidersCache } from "../shared"
import { setSessionModel } from "../shared/session-model-state"
import { setSessionAgent } from "../features/claude-code-session-state"
import { applyUltraworkModelOverrideOnMessage } from "./ultrawork-model-override"
@@ -13,7 +14,12 @@ type FirstMessageVariantGate = {
}
type ChatMessagePart = { type: string; text?: string; [key: string]: unknown }
type ChatMessageHandlerOutput = { message: Record<string, unknown>; parts: ChatMessagePart[] }
export type ChatMessageHandlerOutput = { message: Record<string, unknown>; parts: ChatMessagePart[] }
export type ChatMessageInput = {
sessionID: string
agent?: string
model?: { providerID: string; modelID: string }
}
type StartWorkHookOutput = { parts: Array<{ type: string; text?: string }> }
function isStartWorkHookOutput(value: unknown): value is StartWorkHookOutput {
@@ -34,13 +40,13 @@ export function createChatMessageHandler(args: {
firstMessageVariantGate: FirstMessageVariantGate
hooks: CreatedHooks
}): (
input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } },
input: ChatMessageInput,
output: ChatMessageHandlerOutput
) => Promise<void> {
const { ctx, pluginConfig, firstMessageVariantGate, hooks } = args
return async (
input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } },
input: ChatMessageInput,
output: ChatMessageHandlerOutput
): Promise<void> => {
if (input.agent) {
@@ -53,6 +59,22 @@ export function createChatMessageHandler(args: {
firstMessageVariantGate.markApplied(input.sessionID)
}
await hooks.modelFallback?.["chat.message"]?.(input, output)
const modelOverride = output.message["model"]
if (
modelOverride &&
typeof modelOverride === "object" &&
"providerID" in modelOverride &&
"modelID" in modelOverride
) {
const providerID = (modelOverride as { providerID?: string }).providerID
const modelID = (modelOverride as { modelID?: string }).modelID
if (typeof providerID === "string" && typeof modelID === "string") {
setSessionModel(input.sessionID, { providerID, modelID })
}
} else if (input.model) {
setSessionModel(input.sessionID, input.model)
}
await hooks.stopContinuationGuard?.["chat.message"]?.(input)
await hooks.keywordDetector?.["chat.message"]?.(input, output)
await hooks.claudeCodeHooks?.["chat.message"]?.(input, output)

View File

@@ -0,0 +1,38 @@
import { describe, expect, test } from "bun:test"
import { createChatParamsHandler } from "./chat-params"
describe("createChatParamsHandler", () => {
test("normalizes object-style agent payload and runs chat.params hooks", async () => {
//#given
let called = false
const handler = createChatParamsHandler({
anthropicEffort: {
"chat.params": async (input) => {
called = input.agent.name === "sisyphus"
},
},
})
const input = {
sessionID: "ses_chat_params",
agent: { name: "sisyphus" },
model: { providerID: "opencode", modelID: "claude-opus-4-6" },
provider: { id: "opencode" },
message: {},
}
const output = {
temperature: 0.1,
topP: 1,
topK: 1,
options: {},
}
//#when
await handler(input, output)
//#then
expect(called).toBe(true)
})
})

View File

@@ -1,4 +1,4 @@
type ChatParamsInput = {
export type ChatParamsInput = {
sessionID: string
agent: { name?: string }
model: { providerID: string; modelID: string }
@@ -6,7 +6,7 @@ type ChatParamsInput = {
message: { variant?: string }
}
type ChatParamsOutput = {
export type ChatParamsOutput = {
temperature?: number
topP?: number
topK?: number
@@ -27,11 +27,21 @@ function buildChatParamsInput(raw: unknown): ChatParamsInput | null {
const message = raw.message
if (typeof sessionID !== "string") return null
if (typeof agent !== "string") return null
if (!isRecord(model)) return null
if (!isRecord(provider)) return null
if (!isRecord(message)) return null
let agentName: string | undefined
if (typeof agent === "string") {
agentName = agent
} else if (isRecord(agent)) {
const name = agent.name
if (typeof name === "string") {
agentName = name
}
}
if (!agentName) return null
const providerID = model.providerID
const modelID = model.modelID
const providerId = provider.id
@@ -43,7 +53,7 @@ function buildChatParamsInput(raw: unknown): ChatParamsInput | null {
return {
sessionID,
agent: { name: agent },
agent: { name: agentName },
model: { providerID, modelID },
provider: { id: providerId },
message: typeof variant === "string" ? { variant } : {},

View File

@@ -0,0 +1,343 @@
import { afterEach, describe, expect, test } from "bun:test"
import { createEventHandler } from "./event"
import { createChatMessageHandler } from "./chat-message"
import { _resetForTesting, setMainSession } from "../features/claude-code-session-state"
import { createModelFallbackHook, clearPendingModelFallback } from "../hooks/model-fallback/hook"
describe("createEventHandler - model fallback", () => {
const createHandler = (args?: { hooks?: any }) => {
const abortCalls: string[] = []
const promptCalls: string[] = []
const handler = createEventHandler({
ctx: {
directory: "/tmp",
client: {
session: {
abort: async ({ path }: { path: { id: string } }) => {
abortCalls.push(path.id)
return {}
},
prompt: async ({ path }: { path: { id: string } }) => {
promptCalls.push(path.id)
return {}
},
},
},
} as any,
pluginConfig: {} as any,
firstMessageVariantGate: {
markSessionCreated: () => {},
clear: () => {},
},
managers: {
tmuxSessionManager: {
onSessionCreated: async () => {},
onSessionDeleted: async () => {},
},
skillMcpManager: {
disconnectSession: async () => {},
},
} as any,
hooks: args?.hooks ?? ({} as any),
})
return { handler, abortCalls, promptCalls }
}
afterEach(() => {
_resetForTesting()
})
test("triggers retry prompt for assistant message.updated APIError payloads (headless resume)", async () => {
//#given
const sessionID = "ses_message_updated_fallback"
const { handler, abortCalls, promptCalls } = createHandler()
//#when
await handler({
event: {
type: "message.updated",
properties: {
info: {
id: "msg_err_1",
sessionID,
role: "assistant",
time: { created: 1, completed: 2 },
error: {
name: "APIError",
data: {
message:
"Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
isRetryable: true,
},
},
parentID: "msg_user_1",
modelID: "claude-opus-4-6-thinking",
providerID: "quotio",
mode: "Sisyphus (Ultraworker)",
agent: "Sisyphus (Ultraworker)",
path: { cwd: "/tmp", root: "/tmp" },
cost: 0,
tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
},
},
},
})
//#then
expect(abortCalls).toEqual([sessionID])
expect(promptCalls).toEqual([sessionID])
})
test("triggers retry prompt for nested model error payloads", async () => {
//#given
const sessionID = "ses_main_fallback_nested"
setMainSession(sessionID)
const { handler, abortCalls, promptCalls } = createHandler()
//#when
await handler({
event: {
type: "session.error",
properties: {
sessionID,
error: {
name: "UnknownError",
data: {
error: {
message:
"Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
},
},
},
},
},
})
//#then
expect(abortCalls).toEqual([sessionID])
expect(promptCalls).toEqual([sessionID])
})
test("triggers retry prompt on session.status retry events and applies fallback", async () => {
//#given
const sessionID = "ses_status_retry_fallback"
setMainSession(sessionID)
clearPendingModelFallback(sessionID)
const modelFallback = createModelFallbackHook()
const { handler, abortCalls, promptCalls } = createHandler({ hooks: { modelFallback } })
const chatMessageHandler = createChatMessageHandler({
ctx: {
client: {
tui: {
showToast: async () => ({}),
},
},
} as any,
pluginConfig: {} as any,
firstMessageVariantGate: {
shouldOverride: () => false,
markApplied: () => {},
},
hooks: {
modelFallback,
stopContinuationGuard: null,
keywordDetector: null,
claudeCodeHooks: null,
autoSlashCommand: null,
startWork: null,
ralphLoop: null,
} as any,
})
await handler({
event: {
type: "message.updated",
properties: {
info: {
id: "msg_user_status_1",
sessionID,
role: "user",
time: { created: 1 },
content: [],
modelID: "claude-opus-4-6-thinking",
providerID: "quotio",
agent: "Sisyphus (Ultraworker)",
path: { cwd: "/tmp", root: "/tmp" },
},
},
},
})
//#when
await handler({
event: {
type: "session.status",
properties: {
sessionID,
status: {
type: "retry",
attempt: 1,
message:
"Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
next: 1234,
},
},
},
})
const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> }
await chatMessageHandler(
{
sessionID,
agent: "sisyphus",
model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
},
output,
)
//#then
expect(abortCalls).toEqual([sessionID])
expect(promptCalls).toEqual([sessionID])
expect(output.message["model"]).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
})
expect(output.message["variant"]).toBe("max")
})
test("advances main-session fallback chain across repeated session.error retries end-to-end", async () => {
//#given
const abortCalls: string[] = []
const promptCalls: string[] = []
const toastCalls: string[] = []
const sessionID = "ses_main_fallback_chain"
setMainSession(sessionID)
clearPendingModelFallback(sessionID)
const modelFallback = createModelFallbackHook()
const eventHandler = createEventHandler({
ctx: {
directory: "/tmp",
client: {
session: {
abort: async ({ path }: { path: { id: string } }) => {
abortCalls.push(path.id)
return {}
},
prompt: async ({ path }: { path: { id: string } }) => {
promptCalls.push(path.id)
return {}
},
},
},
} as any,
pluginConfig: {} as any,
firstMessageVariantGate: {
markSessionCreated: () => {},
clear: () => {},
},
managers: {
tmuxSessionManager: {
onSessionCreated: async () => {},
onSessionDeleted: async () => {},
},
skillMcpManager: {
disconnectSession: async () => {},
},
} as any,
hooks: {
modelFallback,
} as any,
})
const chatMessageHandler = createChatMessageHandler({
ctx: {
client: {
tui: {
showToast: async ({ body }: { body: { title?: string } }) => {
if (body?.title) toastCalls.push(body.title)
return {}
},
},
},
} as any,
pluginConfig: {} as any,
firstMessageVariantGate: {
shouldOverride: () => false,
markApplied: () => {},
},
hooks: {
modelFallback,
stopContinuationGuard: null,
keywordDetector: null,
claudeCodeHooks: null,
autoSlashCommand: null,
startWork: null,
ralphLoop: null,
} as any,
})
const triggerRetryCycle = async () => {
await eventHandler({
event: {
type: "session.error",
properties: {
sessionID,
providerID: "quotio",
modelID: "claude-opus-4-6-thinking",
error: {
name: "UnknownError",
data: {
error: {
message:
"Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
},
},
},
},
},
})
const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> }
await chatMessageHandler(
{
sessionID,
agent: "sisyphus",
model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
},
output,
)
return output
}
//#when - first retry cycle
const first = await triggerRetryCycle()
//#then - first fallback entry applied (prefers current provider when available)
expect(first.message["model"]).toEqual({
providerID: "quotio",
modelID: "claude-opus-4-6",
})
expect(first.message["variant"]).toBe("max")
//#when - second retry cycle
const second = await triggerRetryCycle()
//#then - second fallback entry applied (chain advanced)
expect(second.message["model"]).toEqual({
providerID: "quotio",
modelID: "gpt-5.3-codex",
})
expect(second.message["variant"]).toBe("high")
expect(abortCalls).toEqual([sessionID, sessionID])
expect(promptCalls).toEqual([sessionID, sessionID])
expect(toastCalls.length).toBeGreaterThanOrEqual(0)
})
})

View File

@@ -4,11 +4,17 @@ import type { PluginContext } from "./types"
import {
clearSessionAgent,
getMainSessionID,
getSessionAgent,
subagentSessions,
syncSubagentSessions,
setMainSession,
updateSessionAgent,
} from "../features/claude-code-session-state"
import { resetMessageCursor } from "../shared"
import { lspManager } from "../tools"
import { shouldRetryError } from "../shared/model-error-classifier"
import { clearPendingModelFallback, clearSessionFallbackChain, setPendingModelFallback } from "../hooks/model-fallback/hook"
import { clearSessionModel, setSessionModel } from "../shared/session-model-state"
import type { CreatedHooks } from "../create-hooks"
import type { Managers } from "../create-managers"
@@ -20,6 +26,74 @@ type FirstMessageVariantGate = {
clear: (sessionID: string) => void
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null
}
function normalizeFallbackModelID(modelID: string): string {
return modelID
.replace(/-thinking$/i, "")
.replace(/-max$/i, "")
.replace(/-high$/i, "")
}
function extractErrorName(error: unknown): string | undefined {
if (isRecord(error) && typeof error.name === "string") return error.name
if (error instanceof Error) return error.name
return undefined
}
function extractErrorMessage(error: unknown): string {
if (!error) return ""
if (typeof error === "string") return error
if (error instanceof Error) return error.message
if (isRecord(error)) {
const candidates: unknown[] = [
error,
error.data,
error.error,
isRecord(error.data) ? error.data.error : undefined,
error.cause,
]
for (const candidate of candidates) {
if (isRecord(candidate) && typeof candidate.message === "string" && candidate.message.length > 0) {
return candidate.message
}
}
}
try {
return JSON.stringify(error)
} catch {
return String(error)
}
}
function extractProviderModelFromErrorMessage(
message: string,
): { providerID?: string; modelID?: string } {
const lower = message.toLowerCase()
const providerModel = lower.match(/model\s+not\s+found:\s*([a-z0-9_-]+)\s*\/\s*([a-z0-9._-]+)/i)
if (providerModel) {
return {
providerID: providerModel[1],
modelID: providerModel[2],
}
}
const modelOnly = lower.match(/unknown\s+provider\s+for\s+model\s+([a-z0-9._-]+)/i)
if (modelOnly) {
return {
modelID: modelOnly[1],
}
}
return {}
}
export function createEventHandler(args: {
ctx: PluginContext
pluginConfig: OhMyOpenCodeConfig
@@ -29,6 +103,11 @@ export function createEventHandler(args: {
}): (input: { event: { type: string; properties?: Record<string, unknown> } }) => Promise<void> {
const { ctx, firstMessageVariantGate, managers, hooks } = args
// Avoid triggering multiple abort+continue cycles for the same failing assistant message.
const lastHandledModelErrorMessageID = new Map<string, string>()
const lastHandledRetryStatusKey = new Map<string, string>()
const lastKnownModelBySession = new Map<string, { providerID: string; modelID: string }>()
const dispatchToHooks = async (input: { event: { type: string; properties?: Record<string, unknown> } }): Promise<void> => {
await Promise.resolve(hooks.autoUpdateChecker?.event?.(input))
await Promise.resolve(hooks.claudeCodeHooks?.event?.(input))
@@ -55,6 +134,15 @@ export function createEventHandler(args: {
const recentRealIdles = new Map<string, number>()
const DEDUP_WINDOW_MS = 500
const shouldAutoRetrySession = (sessionID: string): boolean => {
if (syncSubagentSessions.has(sessionID)) return true
const mainSessionID = getMainSessionID()
if (mainSessionID) return sessionID === mainSessionID
// Headless runs (or resumed sessions) may not emit session.created, so mainSessionID can be unset.
// In that case, treat any non-subagent session as the "main" interactive session.
return !subagentSessions.has(sessionID)
}
return async (input): Promise<void> => {
pruneRecentSyntheticIdles({
recentSyntheticIdles,
@@ -121,8 +209,15 @@ export function createEventHandler(args: {
if (sessionInfo?.id) {
clearSessionAgent(sessionInfo.id)
lastHandledModelErrorMessageID.delete(sessionInfo.id)
lastHandledRetryStatusKey.delete(sessionInfo.id)
lastKnownModelBySession.delete(sessionInfo.id)
clearPendingModelFallback(sessionInfo.id)
clearSessionFallbackChain(sessionInfo.id)
resetMessageCursor(sessionInfo.id)
firstMessageVariantGate.clear(sessionInfo.id)
clearSessionModel(sessionInfo.id)
syncSubagentSessions.delete(sessionInfo.id)
await managers.skillMcpManager.disconnectSession(sessionInfo.id)
await lspManager.cleanupTempDirectoryClients()
await managers.tmuxSessionManager.onSessionDeleted({
@@ -136,8 +231,129 @@ export function createEventHandler(args: {
const sessionID = info?.sessionID as string | undefined
const agent = info?.agent as string | undefined
const role = info?.role as string | undefined
if (sessionID && agent && role === "user") {
updateSessionAgent(sessionID, agent)
if (sessionID && role === "user") {
if (agent) {
updateSessionAgent(sessionID, agent)
}
const providerID = info?.providerID as string | undefined
const modelID = info?.modelID as string | undefined
if (providerID && modelID) {
lastKnownModelBySession.set(sessionID, { providerID, modelID })
setSessionModel(sessionID, { providerID, modelID })
}
}
// Model fallback: in practice, API/model failures often surface as assistant message errors.
// session.error events are not guaranteed for all providers, so we also observe message.updated.
if (sessionID && role === "assistant") {
const assistantMessageID = info?.id as string | undefined
const assistantError = info?.error
if (assistantMessageID && assistantError) {
const lastHandled = lastHandledModelErrorMessageID.get(sessionID)
if (lastHandled === assistantMessageID) {
return
}
const errorName = extractErrorName(assistantError)
const errorMessage = extractErrorMessage(assistantError)
const errorInfo = { name: errorName, message: errorMessage }
if (shouldRetryError(errorInfo)) {
// Prefer the agent/model/provider from the assistant message payload.
let agentName = agent ?? getSessionAgent(sessionID)
if (!agentName && sessionID === getMainSessionID()) {
if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) {
agentName = "sisyphus"
} else if (errorMessage.includes("gpt-5")) {
agentName = "hephaestus"
} else {
agentName = "sisyphus"
}
}
if (agentName) {
const currentProvider = (info?.providerID as string | undefined) ?? "opencode"
const rawModel = (info?.modelID as string | undefined) ?? "claude-opus-4-6"
const currentModel = normalizeFallbackModelID(rawModel)
const setFallback = setPendingModelFallback(
sessionID,
agentName,
currentProvider,
currentModel,
)
if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
lastHandledModelErrorMessageID.set(sessionID, assistantMessageID)
await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {})
await ctx.client.session
.prompt({
path: { id: sessionID },
body: { parts: [{ type: "text", text: "continue" }] },
query: { directory: ctx.directory },
})
.catch(() => {})
}
}
}
}
}
}
if (event.type === "session.status") {
const sessionID = props?.sessionID as string | undefined
const status = props?.status as
| { type?: string; attempt?: number; message?: string; next?: number }
| undefined
if (sessionID && status?.type === "retry") {
const retryMessage = typeof status.message === "string" ? status.message : ""
const retryKey = `${status.attempt ?? "?"}:${status.next ?? "?"}:${retryMessage}`
if (lastHandledRetryStatusKey.get(sessionID) === retryKey) {
return
}
lastHandledRetryStatusKey.set(sessionID, retryKey)
const errorInfo = { name: undefined, message: retryMessage }
if (shouldRetryError(errorInfo)) {
let agentName = getSessionAgent(sessionID)
if (!agentName && sessionID === getMainSessionID()) {
if (retryMessage.includes("claude-opus") || retryMessage.includes("opus")) {
agentName = "sisyphus"
} else if (retryMessage.includes("gpt-5")) {
agentName = "hephaestus"
} else {
agentName = "sisyphus"
}
}
if (agentName) {
const parsed = extractProviderModelFromErrorMessage(retryMessage)
const lastKnown = lastKnownModelBySession.get(sessionID)
const currentProvider = parsed.providerID ?? lastKnown?.providerID ?? "opencode"
let currentModel = parsed.modelID ?? lastKnown?.modelID ?? "claude-opus-4-6"
currentModel = normalizeFallbackModelID(currentModel)
const setFallback = setPendingModelFallback(
sessionID,
agentName,
currentProvider,
currentModel,
)
if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {})
await ctx.client.session
.prompt({
path: { id: sessionID },
body: { parts: [{ type: "text", text: "continue" }] },
query: { directory: ctx.directory },
})
.catch(() => {})
}
}
}
}
}
@@ -145,6 +361,11 @@ export function createEventHandler(args: {
const sessionID = props?.sessionID as string | undefined
const error = props?.error
const errorName = extractErrorName(error)
const errorMessage = extractErrorMessage(error)
const errorInfo = { name: errorName, message: errorMessage }
// First, try session recovery for internal errors (thinking blocks, tool results, etc.)
if (hooks.sessionRecovery?.isRecoverableError(error)) {
const messageInfo = {
id: props?.messageID as string | undefined,
@@ -168,6 +389,52 @@ export function createEventHandler(args: {
})
.catch(() => {})
}
}
// Second, try model fallback for model errors (rate limit, quota, provider issues, etc.)
else if (sessionID && shouldRetryError(errorInfo)) {
// Get the current agent for this session, or default to "sisyphus" for main sessions
let agentName = getSessionAgent(sessionID)
// For main sessions, if no agent is set, try to infer from the error or default to sisyphus
if (!agentName && sessionID === getMainSessionID()) {
// Try to infer agent from model in error message
if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) {
agentName = "sisyphus"
} else if (errorMessage.includes("gpt-5")) {
agentName = "hephaestus"
} else {
// Default to sisyphus for main session errors
agentName = "sisyphus"
}
}
if (agentName) {
const parsed = extractProviderModelFromErrorMessage(errorMessage)
const currentProvider = props?.providerID as string || parsed.providerID || "opencode"
let currentModel = props?.modelID as string || parsed.modelID || "claude-opus-4-6"
currentModel = normalizeFallbackModelID(currentModel)
// Try to set pending model fallback
const setFallback = setPendingModelFallback(
sessionID,
agentName,
currentProvider,
currentModel,
)
if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
// Abort the current session and prompt with "continue" to trigger the fallback
await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {})
await ctx.client.session
.prompt({
path: { id: sessionID },
body: { parts: [{ type: "text", text: "continue" }] },
query: { directory: ctx.directory },
})
.catch(() => {})
}
}
}
}
}

View File

@@ -7,6 +7,7 @@ import {
createSessionRecoveryHook,
createSessionNotification,
createThinkModeHook,
createModelFallbackHook,
createAnthropicContextWindowLimitRecoveryHook,
createAutoUpdateCheckerHook,
createAgentUsageReminderHook,
@@ -30,6 +31,7 @@ import {
detectExternalNotificationPlugin,
getNotificationConflictWarning,
log,
normalizeSDKResponse,
} from "../../shared"
import { safeCreateHook } from "../../shared/safe-create-hook"
import { sessionExists } from "../../tools"
@@ -40,6 +42,7 @@ export type SessionHooks = {
sessionRecovery: ReturnType<typeof createSessionRecoveryHook> | null
sessionNotification: ReturnType<typeof createSessionNotification> | null
thinkMode: ReturnType<typeof createThinkModeHook> | null
modelFallback: ReturnType<typeof createModelFallbackHook> | null
anthropicContextWindowLimitRecovery: ReturnType<typeof createAnthropicContextWindowLimitRecoveryHook> | null
autoUpdateChecker: ReturnType<typeof createAutoUpdateCheckerHook> | null
agentUsageReminder: ReturnType<typeof createAgentUsageReminderHook> | null
@@ -102,6 +105,73 @@ export function createSessionHooks(args: {
? safeHook("think-mode", () => createThinkModeHook())
: null
const enableFallbackTitle = pluginConfig.experimental?.model_fallback_title ?? false
const fallbackTitleMaxEntries = 200
const fallbackTitleState = new Map<string, { baseTitle?: string; lastKey?: string }>()
const updateFallbackTitle = async (input: {
sessionID: string
providerID: string
modelID: string
variant?: string
}) => {
if (!enableFallbackTitle) return
const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}`
const existing = fallbackTitleState.get(input.sessionID) ?? {}
if (existing.lastKey === key) return
if (!existing.baseTitle) {
const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null)
const sessionInfo = sessionResp
? normalizeSDKResponse(sessionResp, null as { title?: string } | null, { preferResponseOnMissingData: true })
: null
const rawTitle = sessionInfo?.title
if (typeof rawTitle === "string" && rawTitle.length > 0) {
existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim()
} else {
existing.baseTitle = "Session"
}
}
const variantLabel = input.variant ? ` ${input.variant}` : ""
const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]`
await ctx.client.session
.update({
path: { id: input.sessionID },
body: { title: newTitle },
query: { directory: ctx.directory },
})
.catch(() => {})
existing.lastKey = key
fallbackTitleState.set(input.sessionID, existing)
if (fallbackTitleState.size > fallbackTitleMaxEntries) {
const oldestKey = fallbackTitleState.keys().next().value
if (oldestKey) fallbackTitleState.delete(oldestKey)
}
}
// Model fallback hook (configurable via disabled_hooks)
// This handles automatic model switching when model errors occur
const modelFallback = isHookEnabled("model-fallback")
? safeHook("model-fallback", () =>
createModelFallbackHook({
toast: async ({ title, message, variant, duration }) => {
await ctx.client.tui
.showToast({
body: {
title,
message,
variant: variant ?? "warning",
duration: duration ?? 5000,
},
})
.catch(() => {})
},
onApplied: enableFallbackTitle ? updateFallbackTitle : undefined,
}))
: null
const anthropicContextWindowLimitRecovery = isHookEnabled("anthropic-context-window-limit-recovery")
? safeHook("anthropic-context-window-limit-recovery", () =>
createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental: pluginConfig.experimental }))
@@ -181,6 +251,7 @@ export function createSessionHooks(args: {
sessionRecovery,
sessionNotification,
thinkMode,
modelFallback,
anthropicContextWindowLimitRecovery,
autoUpdateChecker,
agentUsageReminder,

View File

@@ -5,6 +5,7 @@ import {
createClaudeCodeHooksHook,
createKeywordDetectorHook,
createThinkingBlockValidatorHook,
createBeastModeSystemHook,
} from "../../hooks"
import {
contextCollector,
@@ -17,6 +18,7 @@ export type TransformHooks = {
keywordDetector: ReturnType<typeof createKeywordDetectorHook> | null
contextInjectorMessagesTransform: ReturnType<typeof createContextInjectorMessagesTransformHook>
thinkingBlockValidator: ReturnType<typeof createThinkingBlockValidatorHook> | null
beastModeSystem: ReturnType<typeof createBeastModeSystemHook> | null
}
export function createTransformHooks(args: {
@@ -56,10 +58,19 @@ export function createTransformHooks(args: {
)
: null
const beastModeSystem = isHookEnabled("beast-mode-system")
? safeCreateHook(
"beast-mode-system",
() => createBeastModeSystemHook(),
{ enabled: safeHookEnabled },
)
: null
return {
claudeCodeHooks,
keywordDetector,
contextInjectorMessagesTransform,
thinkingBlockValidator,
beastModeSystem,
}
}

View File

@@ -0,0 +1,12 @@
import type { CreatedHooks } from "../create-hooks"
export function createSystemTransformHandler(args: {
hooks: CreatedHooks
}): (input: { sessionID: string }, output: { system: string[] }) => Promise<void> {
return async (input, output): Promise<void> => {
await args.hooks.beastModeSystem?.["experimental.chat.system.transform"]?.(
input,
output,
)
}
}

View File

@@ -75,6 +75,14 @@ function findVariantInChain(
return entry.variant
}
}
// Some providers expose identical model IDs (e.g. OpenAI models via different providers).
// If we didn't find an exact provider+model match, fall back to model-only matching.
for (const entry of fallbackChain) {
if (entry.model === currentModel.modelID) {
return entry.variant
}
}
return undefined
}

View File

@@ -0,0 +1,76 @@
import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
import { mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs"
import { join } from "node:path"
import * as dataPath from "./data-path"
import { shouldRetryError, selectFallbackProvider } from "./model-error-classifier"
const TEST_CACHE_DIR = join(import.meta.dir, "__test-cache__")
describe("model-error-classifier", () => {
let cacheDirSpy: ReturnType<typeof spyOn>
beforeEach(() => {
cacheDirSpy = spyOn(dataPath, "getOmoOpenCodeCacheDir").mockReturnValue(TEST_CACHE_DIR)
if (existsSync(TEST_CACHE_DIR)) {
rmSync(TEST_CACHE_DIR, { recursive: true })
}
mkdirSync(TEST_CACHE_DIR, { recursive: true })
})
afterEach(() => {
cacheDirSpy.mockRestore()
if (existsSync(TEST_CACHE_DIR)) {
rmSync(TEST_CACHE_DIR, { recursive: true })
}
})
test("treats overloaded retry messages as retryable", () => {
//#given
const error = { message: "Provider is overloaded" }
//#when
const result = shouldRetryError(error)
//#then
expect(result).toBe(true)
})
test("selectFallbackProvider prefers first connected provider in preference order", () => {
//#given
writeFileSync(
join(TEST_CACHE_DIR, "connected-providers.json"),
JSON.stringify({ connected: ["quotio", "nvidia"], updatedAt: new Date().toISOString() }, null, 2),
)
//#when
const provider = selectFallbackProvider(["quotio", "nvidia"], "nvidia")
//#then
expect(provider).toBe("quotio")
})
test("selectFallbackProvider falls back to next connected provider when first is disconnected", () => {
//#given
writeFileSync(
join(TEST_CACHE_DIR, "connected-providers.json"),
JSON.stringify({ connected: ["nvidia"], updatedAt: new Date().toISOString() }, null, 2),
)
//#when
const provider = selectFallbackProvider(["quotio", "nvidia"])
//#then
expect(provider).toBe("nvidia")
})
test("selectFallbackProvider uses provider preference order when cache is missing", () => {
//#given - no cache file
//#when
const provider = selectFallbackProvider(["quotio", "nvidia"], "nvidia")
//#then
expect(provider).toBe("quotio")
})
})

View File

@@ -0,0 +1,140 @@
import type { FallbackEntry } from "./model-requirements"
import { readConnectedProvidersCache } from "./connected-providers-cache"
/**
* Error names that indicate a retryable model error (deadstop).
* These errors completely halt the action loop and should trigger fallback retry.
*/
const RETRYABLE_ERROR_NAMES = new Set([
"ProviderModelNotFoundError",
"RateLimitError",
"QuotaExceededError",
"InsufficientCreditsError",
"ModelUnavailableError",
"ProviderConnectionError",
"AuthenticationError",
])
/**
* Error names that should NOT trigger retry.
* These errors are typically user-induced or fixable without switching models.
*/
const NON_RETRYABLE_ERROR_NAMES = new Set([
"MessageAbortedError",
"PermissionDeniedError",
"ContextLengthError",
"TimeoutError",
"ValidationError",
"SyntaxError",
"UserError",
])
/**
* Message patterns that indicate a retryable error even without a known error name.
*/
const RETRYABLE_MESSAGE_PATTERNS = [
"rate_limit",
"rate limit",
"quota",
"not found",
"unavailable",
"insufficient",
"too many requests",
"over limit",
"overloaded",
"bad gateway",
"unknown provider",
"provider not found",
"connection error",
"network error",
"timeout",
"service unavailable",
"internal_server_error",
"503",
"502",
"504",
]
const MAX_FALLBACK_ATTEMPTS = 10
export interface ErrorInfo {
name?: string
message?: string
}
/**
* Determines if an error is a retryable model error.
* Returns true if the error is a known retryable type OR matches retryable message patterns.
*/
export function isRetryableModelError(error: ErrorInfo): boolean {
// If we have an error name, check against known lists
if (error.name) {
// Explicit non-retryable takes precedence
if (NON_RETRYABLE_ERROR_NAMES.has(error.name)) {
return false
}
// Check if it's a known retryable error
if (RETRYABLE_ERROR_NAMES.has(error.name)) {
return true
}
}
// Check message patterns for unknown errors
const msg = error.message?.toLowerCase() ?? ""
return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern))
}
/**
* Determines if an error should trigger a fallback retry.
* Returns true for deadstop errors that completely halt the action loop.
*/
export function shouldRetryError(error: ErrorInfo): boolean {
return isRetryableModelError(error)
}
/**
* Gets the next fallback model from the chain based on attempt count.
* Returns undefined if all fallbacks have been exhausted.
*/
export function getNextFallback(
fallbackChain: FallbackEntry[],
attemptCount: number,
): FallbackEntry | undefined {
return fallbackChain[attemptCount]
}
/**
* Checks if there are more fallbacks available after the current attempt.
*/
export function hasMoreFallbacks(
fallbackChain: FallbackEntry[],
attemptCount: number,
): boolean {
return (
attemptCount < fallbackChain.length &&
attemptCount < MAX_FALLBACK_ATTEMPTS
)
}
/**
* Selects the best provider for a fallback entry.
* Priority:
* 1) First connected provider in the entry's provider preference order
* 2) First provider listed in the fallback entry (when cache is missing)
*/
export function selectFallbackProvider(
providers: string[],
preferredProviderID?: string,
): string {
const connectedProviders = readConnectedProvidersCache()
if (connectedProviders) {
const connectedSet = new Set(connectedProviders)
for (const provider of providers) {
if (connectedSet.has(provider)) {
return provider
}
}
}
return providers[0] || preferredProviderID || "quotio"
}

View File

@@ -6,493 +6,158 @@ import {
type ModelRequirement,
} from "./model-requirements"
function flattenChains(): FallbackEntry[] {
return [
...Object.values(AGENT_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain),
...Object.values(CATEGORY_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain),
]
}
function assertNoExcludedModels(entry: FallbackEntry): void {
// User exclusions.
expect(entry.model).not.toBe("grok-code-fast-1")
if (entry.providers.includes("quotio")) {
expect(entry.model).not.toBe("tstars2.0")
expect(entry.model).not.toMatch(/^kiro-/i)
expect(entry.model).not.toMatch(/^tab_/i)
}
// Remove codex-mini models per request.
expect(entry.model).not.toMatch(/codex-mini/i)
}
function assertNoOpencodeProvider(entry: FallbackEntry): void {
expect(entry.providers).not.toContain("opencode")
}
function assertNoProviderPrefixForNonNamespacedProviders(entry: FallbackEntry): void {
// For these providers, model IDs should not be written as "provider/model".
const nonNamespaced = ["quotio", "openai", "github-copilot", "minimax", "minimax-coding-plan"]
for (const provider of entry.providers) {
if (!nonNamespaced.includes(provider)) continue
expect(entry.model.startsWith(`${provider}/`)).toBe(false)
}
}
describe("AGENT_MODEL_REQUIREMENTS", () => {
test("oracle has valid fallbackChain with gpt-5.2 as primary", () => {
// given - oracle agent requirement
const oracle = AGENT_MODEL_REQUIREMENTS["oracle"]
// when - accessing oracle requirement
// then - fallbackChain exists with gpt-5.2 as first entry
expect(oracle).toBeDefined()
expect(oracle.fallbackChain).toBeArray()
expect(oracle.fallbackChain.length).toBeGreaterThan(0)
const primary = oracle.fallbackChain[0]
expect(primary.providers).toContain("openai")
expect(primary.model).toBe("gpt-5.2")
expect(primary.variant).toBe("high")
})
test("sisyphus has claude-opus-4-6 as primary and requiresAnyModel", () => {
// #given - sisyphus agent requirement
const sisyphus = AGENT_MODEL_REQUIREMENTS["sisyphus"]
// #when - accessing Sisyphus requirement
// #then - fallbackChain has claude-opus-4-6 first, big-pickle last
expect(sisyphus).toBeDefined()
expect(sisyphus.fallbackChain).toBeArray()
expect(sisyphus.fallbackChain).toHaveLength(5)
expect(sisyphus.requiresAnyModel).toBe(true)
const primary = sisyphus.fallbackChain[0]
expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
expect(primary.model).toBe("claude-opus-4-6")
expect(primary.variant).toBe("max")
const last = sisyphus.fallbackChain[4]
expect(last.providers[0]).toBe("opencode")
expect(last.model).toBe("big-pickle")
})
test("librarian has valid fallbackChain with gemini-3-flash as primary", () => {
// given - librarian agent requirement
const librarian = AGENT_MODEL_REQUIREMENTS["librarian"]
// when - accessing librarian requirement
// then - fallbackChain exists with gemini-3-flash as first entry
expect(librarian).toBeDefined()
expect(librarian.fallbackChain).toBeArray()
expect(librarian.fallbackChain.length).toBeGreaterThan(0)
const primary = librarian.fallbackChain[0]
expect(primary.providers[0]).toBe("google")
expect(primary.model).toBe("gemini-3-flash")
})
test("explore has valid fallbackChain with grok-code-fast-1 as primary", () => {
// given - explore agent requirement
const explore = AGENT_MODEL_REQUIREMENTS["explore"]
// when - accessing explore requirement
// then - fallbackChain: grok → minimax-free → haiku → nano
expect(explore).toBeDefined()
expect(explore.fallbackChain).toBeArray()
expect(explore.fallbackChain).toHaveLength(4)
const primary = explore.fallbackChain[0]
expect(primary.providers).toContain("github-copilot")
expect(primary.model).toBe("grok-code-fast-1")
const secondary = explore.fallbackChain[1]
expect(secondary.providers).toContain("opencode")
expect(secondary.model).toBe("minimax-m2.5-free")
const tertiary = explore.fallbackChain[2]
expect(tertiary.providers).toContain("anthropic")
expect(tertiary.model).toBe("claude-haiku-4-5")
const quaternary = explore.fallbackChain[3]
expect(quaternary.providers).toContain("opencode")
expect(quaternary.model).toBe("gpt-5-nano")
})
test("multimodal-looker has valid fallbackChain with k2p5 as primary", () => {
// given - multimodal-looker agent requirement
const multimodalLooker = AGENT_MODEL_REQUIREMENTS["multimodal-looker"]
// when - accessing multimodal-looker requirement
// then - fallbackChain exists with k2p5 as first entry
expect(multimodalLooker).toBeDefined()
expect(multimodalLooker.fallbackChain).toBeArray()
expect(multimodalLooker.fallbackChain.length).toBeGreaterThan(0)
const primary = multimodalLooker.fallbackChain[0]
expect(primary.providers[0]).toBe("kimi-for-coding")
expect(primary.model).toBe("k2p5")
})
test("prometheus has claude-opus-4-6 as primary", () => {
// #given - prometheus agent requirement
const prometheus = AGENT_MODEL_REQUIREMENTS["prometheus"]
// #when - accessing Prometheus requirement
// #then - claude-opus-4-6 is first
expect(prometheus).toBeDefined()
expect(prometheus.fallbackChain).toBeArray()
expect(prometheus.fallbackChain.length).toBeGreaterThan(1)
const primary = prometheus.fallbackChain[0]
expect(primary.model).toBe("claude-opus-4-6")
expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
expect(primary.variant).toBe("max")
})
test("metis has claude-opus-4-6 as primary", () => {
// #given - metis agent requirement
const metis = AGENT_MODEL_REQUIREMENTS["metis"]
// #when - accessing Metis requirement
// #then - claude-opus-4-6 is first
expect(metis).toBeDefined()
expect(metis.fallbackChain).toBeArray()
expect(metis.fallbackChain.length).toBeGreaterThan(1)
const primary = metis.fallbackChain[0]
expect(primary.model).toBe("claude-opus-4-6")
expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
expect(primary.variant).toBe("max")
})
test("momus has valid fallbackChain with gpt-5.2 as primary", () => {
// given - momus agent requirement
const momus = AGENT_MODEL_REQUIREMENTS["momus"]
// when - accessing Momus requirement
// then - fallbackChain exists with gpt-5.2 as first entry, variant medium
expect(momus).toBeDefined()
expect(momus.fallbackChain).toBeArray()
expect(momus.fallbackChain.length).toBeGreaterThan(0)
const primary = momus.fallbackChain[0]
expect(primary.model).toBe("gpt-5.2")
expect(primary.variant).toBe("medium")
expect(primary.providers[0]).toBe("openai")
})
test("atlas has valid fallbackChain with k2p5 as primary (kimi-for-coding prioritized)", () => {
// given - atlas agent requirement
const atlas = AGENT_MODEL_REQUIREMENTS["atlas"]
// when - accessing Atlas requirement
// then - fallbackChain exists with k2p5 as first entry (kimi-for-coding prioritized)
expect(atlas).toBeDefined()
expect(atlas.fallbackChain).toBeArray()
expect(atlas.fallbackChain.length).toBeGreaterThan(0)
const primary = atlas.fallbackChain[0]
expect(primary.model).toBe("k2p5")
expect(primary.providers[0]).toBe("kimi-for-coding")
})
test("hephaestus requires openai/github-copilot/opencode provider", () => {
// #given - hephaestus agent requirement
const hephaestus = AGENT_MODEL_REQUIREMENTS["hephaestus"]
// #when - accessing hephaestus requirement
// #then - requiresProvider is set to openai, github-copilot, opencode (not requiresModel)
expect(hephaestus).toBeDefined()
expect(hephaestus.requiresProvider).toEqual(["openai", "github-copilot", "opencode"])
expect(hephaestus.requiresModel).toBeUndefined()
})
test("all 10 builtin agents have valid fallbackChain arrays", () => {
// #given - list of 10 agent names
const expectedAgents = [
"sisyphus",
"hephaestus",
"oracle",
"librarian",
test("defines all 10 builtin agents", () => {
expect(Object.keys(AGENT_MODEL_REQUIREMENTS).sort()).toEqual([
"atlas",
"explore",
"multimodal-looker",
"prometheus",
"hephaestus",
"librarian",
"metis",
"momus",
"atlas",
]
"multimodal-looker",
"oracle",
"prometheus",
"sisyphus",
])
})
// when - checking AGENT_MODEL_REQUIREMENTS
const definedAgents = Object.keys(AGENT_MODEL_REQUIREMENTS)
test("sisyphus: 2nd fallback is quotio gpt-5.3-codex (high)", () => {
const sisyphus = AGENT_MODEL_REQUIREMENTS["sisyphus"]
expect(sisyphus.requiresAnyModel).toBe(true)
expect(sisyphus.fallbackChain.length).toBeGreaterThan(2)
// #then - all agents present with valid fallbackChain
expect(definedAgents).toHaveLength(10)
for (const agent of expectedAgents) {
const requirement = AGENT_MODEL_REQUIREMENTS[agent]
expect(requirement).toBeDefined()
expect(requirement.fallbackChain).toBeArray()
expect(requirement.fallbackChain.length).toBeGreaterThan(0)
expect(sisyphus.fallbackChain[0]).toEqual({
providers: ["quotio"],
model: "claude-opus-4-6",
variant: "max",
})
for (const entry of requirement.fallbackChain) {
expect(entry.providers).toBeArray()
expect(entry.providers.length).toBeGreaterThan(0)
expect(typeof entry.model).toBe("string")
expect(entry.model.length).toBeGreaterThan(0)
}
}
expect(sisyphus.fallbackChain[1]).toEqual({
providers: ["quotio"],
model: "gpt-5.3-codex",
variant: "high",
})
})
test("explore: uses speed chain, includes rome, and gpt-5-mini is copilot-first", () => {
const explore = AGENT_MODEL_REQUIREMENTS["explore"]
expect(explore.fallbackChain.length).toBeGreaterThan(4)
expect(explore.fallbackChain[0].model).toBe("claude-haiku-4-5")
expect(explore.fallbackChain.some((e) => e.model === "iflow-rome-30ba3b")).toBe(true)
const gptMini = explore.fallbackChain.find((e) => e.model === "gpt-5-mini")
expect(gptMini).toBeDefined()
expect(gptMini!.providers[0]).toBe("github-copilot")
expect(gptMini!.variant).toBe("high")
})
test("multimodal-looker: prefers gemini image model first", () => {
const multimodal = AGENT_MODEL_REQUIREMENTS["multimodal-looker"]
expect(multimodal.fallbackChain[0]).toEqual({
providers: ["quotio"],
model: "gemini-3-pro-image",
})
})
test("includes NVIDIA NIM additions in at least one agent chain", () => {
const all = Object.values(AGENT_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain)
expect(all.some((e) => e.providers.includes("nvidia") && e.model === "qwen/qwen3.5-397b-a17b")).toBe(true)
expect(all.some((e) => e.providers.includes("nvidia") && e.model === "stepfun-ai/step-3.5-flash")).toBe(true)
expect(all.some((e) => e.providers.includes("nvidia") && e.model === "bytedance/seed-oss-36b-instruct")).toBe(true)
})
})
describe("CATEGORY_MODEL_REQUIREMENTS", () => {
test("ultrabrain has valid fallbackChain with gpt-5.3-codex as primary", () => {
// given - ultrabrain category requirement
const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"]
// when - accessing ultrabrain requirement
// then - fallbackChain exists with gpt-5.3-codex as first entry
expect(ultrabrain).toBeDefined()
expect(ultrabrain.fallbackChain).toBeArray()
expect(ultrabrain.fallbackChain.length).toBeGreaterThan(0)
const primary = ultrabrain.fallbackChain[0]
expect(primary.variant).toBe("xhigh")
expect(primary.model).toBe("gpt-5.3-codex")
expect(primary.providers[0]).toBe("openai")
})
test("deep has valid fallbackChain with gpt-5.3-codex as primary", () => {
// given - deep category requirement
const deep = CATEGORY_MODEL_REQUIREMENTS["deep"]
// when - accessing deep requirement
// then - fallbackChain exists with gpt-5.3-codex as first entry, medium variant
expect(deep).toBeDefined()
expect(deep.fallbackChain).toBeArray()
expect(deep.fallbackChain.length).toBeGreaterThan(0)
const primary = deep.fallbackChain[0]
expect(primary.variant).toBe("medium")
expect(primary.model).toBe("gpt-5.3-codex")
expect(primary.providers[0]).toBe("openai")
})
test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => {
// given - visual-engineering category requirement
const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"]
// when - accessing visual-engineering requirement
// then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max) → k2p5
expect(visualEngineering).toBeDefined()
expect(visualEngineering.fallbackChain).toBeArray()
expect(visualEngineering.fallbackChain).toHaveLength(4)
const primary = visualEngineering.fallbackChain[0]
expect(primary.providers[0]).toBe("google")
expect(primary.model).toBe("gemini-3-pro")
expect(primary.variant).toBe("high")
const second = visualEngineering.fallbackChain[1]
expect(second.providers[0]).toBe("zai-coding-plan")
expect(second.model).toBe("glm-5")
const third = visualEngineering.fallbackChain[2]
expect(third.model).toBe("claude-opus-4-6")
expect(third.variant).toBe("max")
const fourth = visualEngineering.fallbackChain[3]
expect(fourth.providers[0]).toBe("kimi-for-coding")
expect(fourth.model).toBe("k2p5")
})
test("quick has valid fallbackChain with claude-haiku-4-5 as primary", () => {
// given - quick category requirement
const quick = CATEGORY_MODEL_REQUIREMENTS["quick"]
// when - accessing quick requirement
// then - fallbackChain exists with claude-haiku-4-5 as first entry
expect(quick).toBeDefined()
expect(quick.fallbackChain).toBeArray()
expect(quick.fallbackChain.length).toBeGreaterThan(0)
const primary = quick.fallbackChain[0]
expect(primary.model).toBe("claude-haiku-4-5")
expect(primary.providers[0]).toBe("anthropic")
})
test("unspecified-low has valid fallbackChain with claude-sonnet-4-6 as primary", () => {
// given - unspecified-low category requirement
const unspecifiedLow = CATEGORY_MODEL_REQUIREMENTS["unspecified-low"]
// when - accessing unspecified-low requirement
// then - fallbackChain exists with claude-sonnet-4-6 as first entry
expect(unspecifiedLow).toBeDefined()
expect(unspecifiedLow.fallbackChain).toBeArray()
expect(unspecifiedLow.fallbackChain.length).toBeGreaterThan(0)
const primary = unspecifiedLow.fallbackChain[0]
expect(primary.model).toBe("claude-sonnet-4-6")
expect(primary.providers[0]).toBe("anthropic")
})
test("unspecified-high has claude-opus-4-6 as primary", () => {
// #given - unspecified-high category requirement
const unspecifiedHigh = CATEGORY_MODEL_REQUIREMENTS["unspecified-high"]
// #when - accessing unspecified-high requirement
// #then - claude-opus-4-6 is first
expect(unspecifiedHigh).toBeDefined()
expect(unspecifiedHigh.fallbackChain).toBeArray()
expect(unspecifiedHigh.fallbackChain.length).toBeGreaterThan(1)
const primary = unspecifiedHigh.fallbackChain[0]
expect(primary.model).toBe("claude-opus-4-6")
expect(primary.variant).toBe("max")
expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
})
test("artistry has valid fallbackChain with gemini-3-pro as primary", () => {
// given - artistry category requirement
const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"]
// when - accessing artistry requirement
// then - fallbackChain exists with gemini-3-pro as first entry
expect(artistry).toBeDefined()
expect(artistry.fallbackChain).toBeArray()
expect(artistry.fallbackChain.length).toBeGreaterThan(0)
const primary = artistry.fallbackChain[0]
expect(primary.model).toBe("gemini-3-pro")
expect(primary.variant).toBe("high")
expect(primary.providers[0]).toBe("google")
})
test("writing has valid fallbackChain with k2p5 as primary (kimi-for-coding)", () => {
// given - writing category requirement
const writing = CATEGORY_MODEL_REQUIREMENTS["writing"]
// when - accessing writing requirement
// then - fallbackChain: k2p5 → gemini-3-flash → claude-sonnet-4-6
expect(writing).toBeDefined()
expect(writing.fallbackChain).toBeArray()
expect(writing.fallbackChain).toHaveLength(3)
const primary = writing.fallbackChain[0]
expect(primary.model).toBe("k2p5")
expect(primary.providers[0]).toBe("kimi-for-coding")
const second = writing.fallbackChain[1]
expect(second.model).toBe("gemini-3-flash")
expect(second.providers[0]).toBe("google")
})
test("all 8 categories have valid fallbackChain arrays", () => {
// given - list of 8 category names
const expectedCategories = [
"visual-engineering",
"ultrabrain",
"deep",
test("defines all 8 categories", () => {
expect(Object.keys(CATEGORY_MODEL_REQUIREMENTS).sort()).toEqual([
"artistry",
"deep",
"quick",
"unspecified-low",
"ultrabrain",
"unspecified-high",
"unspecified-low",
"visual-engineering",
"writing",
]
])
})
// when - checking CATEGORY_MODEL_REQUIREMENTS
const definedCategories = Object.keys(CATEGORY_MODEL_REQUIREMENTS)
test("deep requires gpt-5.3-codex", () => {
expect(CATEGORY_MODEL_REQUIREMENTS["deep"].requiresModel).toBe("gpt-5.3-codex")
})
// then - all categories present with valid fallbackChain
expect(definedCategories).toHaveLength(8)
for (const category of expectedCategories) {
const requirement = CATEGORY_MODEL_REQUIREMENTS[category]
expect(requirement).toBeDefined()
expect(requirement.fallbackChain).toBeArray()
expect(requirement.fallbackChain.length).toBeGreaterThan(0)
test("quick uses the speed chain (haiku primary)", () => {
expect(CATEGORY_MODEL_REQUIREMENTS["quick"].fallbackChain[0].model).toBe("claude-haiku-4-5")
})
for (const entry of requirement.fallbackChain) {
expect(entry.providers).toBeArray()
expect(entry.providers.length).toBeGreaterThan(0)
expect(typeof entry.model).toBe("string")
expect(entry.model.length).toBeGreaterThan(0)
}
test("ultrabrain starts with gpt-5.3-codex (high)", () => {
const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"]
expect(ultrabrain.fallbackChain[0]).toEqual({
providers: ["quotio"],
model: "gpt-5.3-codex",
variant: "high",
})
})
})
describe("ModelRequirements invariants", () => {
test("all entries have non-empty providers and a non-empty model", () => {
for (const entry of flattenChains()) {
expect(entry.providers.length).toBeGreaterThan(0)
expect(typeof entry.model).toBe("string")
expect(entry.model.length).toBeGreaterThan(0)
}
})
test("no entry uses opencode provider and no excluded models are present", () => {
for (const entry of flattenChains()) {
assertNoOpencodeProvider(entry)
assertNoExcludedModels(entry)
assertNoProviderPrefixForNonNamespacedProviders(entry)
}
})
})
describe("FallbackEntry type", () => {
test("FallbackEntry structure is correct", () => {
// given - a valid FallbackEntry object
const entry: FallbackEntry = {
providers: ["anthropic", "github-copilot", "opencode"],
model: "claude-opus-4-6",
variant: "high",
}
// when - accessing properties
// then - all properties are accessible
expect(entry.providers).toEqual(["anthropic", "github-copilot", "opencode"])
expect(entry.model).toBe("claude-opus-4-6")
expect(entry.variant).toBe("high")
})
test("FallbackEntry variant is optional", () => {
// given - a FallbackEntry without variant
const entry: FallbackEntry = {
providers: ["opencode", "anthropic"],
model: "big-pickle",
}
// when - accessing variant
// then - variant is undefined
describe("Type sanity", () => {
test("FallbackEntry.variant is optional", () => {
const entry: FallbackEntry = { providers: ["quotio"], model: "claude-haiku-4-5" }
expect(entry.variant).toBeUndefined()
})
})
describe("ModelRequirement type", () => {
test("ModelRequirement structure with fallbackChain is correct", () => {
// given - a valid ModelRequirement object
const requirement: ModelRequirement = {
fallbackChain: [
{ providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot"], model: "gpt-5.2", variant: "high" },
],
}
// when - accessing properties
// then - fallbackChain is accessible with correct structure
expect(requirement.fallbackChain).toBeArray()
expect(requirement.fallbackChain).toHaveLength(2)
expect(requirement.fallbackChain[0].model).toBe("claude-opus-4-6")
expect(requirement.fallbackChain[1].model).toBe("gpt-5.2")
})
test("ModelRequirement variant is optional", () => {
// given - a ModelRequirement without top-level variant
const requirement: ModelRequirement = {
fallbackChain: [{ providers: ["opencode"], model: "big-pickle" }],
}
// when - accessing variant
// then - variant is undefined
expect(requirement.variant).toBeUndefined()
})
test("no model in fallbackChain has provider prefix", () => {
// given - all agent and category requirements
const allRequirements = [
...Object.values(AGENT_MODEL_REQUIREMENTS),
...Object.values(CATEGORY_MODEL_REQUIREMENTS),
]
// when - checking each model in fallbackChain
// then - none contain "/" (provider prefix)
for (const req of allRequirements) {
for (const entry of req.fallbackChain) {
expect(entry.model).not.toContain("/")
}
}
})
test("all fallbackChain entries have non-empty providers array", () => {
// given - all agent and category requirements
const allRequirements = [
...Object.values(AGENT_MODEL_REQUIREMENTS),
...Object.values(CATEGORY_MODEL_REQUIREMENTS),
]
// when - checking each entry in fallbackChain
// then - all have non-empty providers array
for (const req of allRequirements) {
for (const entry of req.fallbackChain) {
expect(entry.providers).toBeArray()
expect(entry.providers.length).toBeGreaterThan(0)
}
}
})
})
describe("requiresModel field in categories", () => {
test("deep category has requiresModel set to gpt-5.3-codex", () => {
// given
const deep = CATEGORY_MODEL_REQUIREMENTS["deep"]
// when / #then
expect(deep.requiresModel).toBe("gpt-5.3-codex")
})
test("artistry category has requiresModel set to gemini-3-pro", () => {
// given
const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"]
// when / #then
expect(artistry.requiresModel).toBe("gemini-3-pro")
test("ModelRequirement.variant is optional", () => {
const req: ModelRequirement = { fallbackChain: [{ providers: ["quotio"], model: "claude-haiku-4-5" }] }
expect(req.variant).toBeUndefined()
})
})

View File

@@ -12,147 +12,203 @@ export type ModelRequirement = {
requiresProvider?: string[] // If set, only activates when any of these providers is connected
}
function fb(providers: string[] | string, model: string, variant?: string): FallbackEntry {
return {
providers: Array.isArray(providers) ? providers : [providers],
model,
...(variant !== undefined ? { variant } : {}),
}
}
function dedupeChain(chain: FallbackEntry[]): FallbackEntry[] {
const seen = new Set<string>()
const result: FallbackEntry[] = []
for (const entry of chain) {
const key = `${entry.model}:${entry.variant ?? ""}`
if (seen.has(key)) continue
seen.add(key)
result.push(entry)
}
return result
}
// Provider preference rules:
// - Never use the paid `opencode` provider as an automatic fallback.
// - Prefer `quotio` when the same model exists across multiple providers.
// - Prefer `github-copilot` first for `gpt-5-mini` (unlimited), fall back to `quotio`.
// Note: user requested "Quotio-first" and to avoid the OpenCode provider; we keep runtime fallbacks on
// `quotio` + `nvidia` (+ `github-copilot` for unlimited GPT mini) unless explicitly requested otherwise.
const P_GPT: string[] = ["quotio"]
const P_GPT_MINI: string[] = ["github-copilot", "quotio"]
// Benchmark-driven ordering (user-provided table + NVIDIA NIM docs), tuned per-agent for quality vs speed.
const SPEED_CHAIN: FallbackEntry[] = [
fb("quotio", "claude-haiku-4-5"), fb("quotio", "oswe-vscode-prime"),
fb(P_GPT_MINI, "gpt-5-mini", "high"), fb(P_GPT_MINI, "gpt-4.1"),
fb("nvidia", "nvidia/nemotron-3-nano-30b-a3b"), fb("quotio", "iflow-rome-30ba3b"),
fb("minimax-coding-plan", "MiniMax-M2.5"), fb("nvidia", "bytedance/seed-oss-36b-instruct"),
fb("quotio", "claude-sonnet-4-5"),
]
const QUALITY_CODING_CHAIN: FallbackEntry[] = [
fb("quotio", "claude-opus-4-6-thinking"),
fb("nvidia", "stepfun-ai/step-3.5-flash"),
fb("nvidia", "qwen/qwen3.5-397b-a17b"),
fb("quotio", "glm-5"),
fb("nvidia", "z-ai/glm5"),
fb("quotio", "deepseek-v3.2-reasoner"),
fb("quotio", "deepseek-r1"),
fb("nvidia", "deepseek-ai/deepseek-r1"),
fb("quotio", "qwen3-235b-a22b-thinking-2507"),
fb("nvidia", "qwen/qwen3-next-80b-a3b-thinking"),
fb("nvidia", "qwen/qwen3-coder-480b-a35b-instruct"),
fb("nvidia", "bytedance/seed-oss-36b-instruct"),
fb("quotio", "kimi-k2-thinking"),
fb("quotio", "kimi-k2.5"),
fb("nvidia", "moonshotai/kimi-k2.5"),
fb("minimax-coding-plan", "MiniMax-M2.5"),
fb("minimax-coding-plan", "MiniMax-M2.5-highspeed"),
fb("minimax", "MiniMax-M2.5"),
fb("quotio", "minimax-m2.5"),
fb("quotio", "claude-sonnet-4-5-thinking"),
]
export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
sisyphus: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{ providers: ["opencode"], model: "big-pickle" },
// 1st fallback: switch away from Opus Thinking to the non-thinking model (often more available).
fb("quotio", "claude-opus-4-6", "max"),
// 2nd fallback: user-requested.
fb("quotio", "gpt-5.3-codex", "high"),
...QUALITY_CODING_CHAIN,
...SPEED_CHAIN,
],
requiresAnyModel: true,
},
hephaestus: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
fb("quotio", "gpt-5.3-codex", "high"),
...QUALITY_CODING_CHAIN,
],
requiresProvider: ["openai", "github-copilot", "opencode"],
requiresAnyModel: true,
},
oracle: {
fallbackChain: dedupeChain([
fb("quotio", "gpt-5.3-codex", "high"),
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "claude-sonnet-4-5-thinking"),
...QUALITY_CODING_CHAIN,
]),
},
librarian: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
fb("quotio", "claude-sonnet-4-5"),
...SPEED_CHAIN,
...QUALITY_CODING_CHAIN,
],
},
librarian: {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["opencode"], model: "minimax-m2.5-free" },
{ providers: ["opencode"], model: "big-pickle" },
],
},
explore: {
fallbackChain: [
{ providers: ["github-copilot"], model: "grok-code-fast-1" },
{ providers: ["opencode"], model: "minimax-m2.5-free" },
{ providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
fallbackChain: SPEED_CHAIN,
},
"multimodal-looker": {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
{ providers: ["zai-coding-plan"], model: "glm-4.6v" },
fb("quotio", "gemini-3-pro-image"),
fb("quotio", "gemini-3-pro-high"),
fb("quotio", "gemini-3-flash"),
fb("quotio", "kimi-k2.5"),
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "claude-sonnet-4-5-thinking"),
fb("quotio", "claude-haiku-4-5"),
],
},
prometheus: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
],
fallbackChain: dedupeChain([
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "gpt-5.3-codex", "high"),
fb("quotio", "claude-sonnet-4-5-thinking"),
...QUALITY_CODING_CHAIN,
]),
},
metis: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
],
fallbackChain: dedupeChain([
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "gpt-5.3-codex", "high"),
fb("quotio", "claude-sonnet-4-5-thinking"),
...QUALITY_CODING_CHAIN,
]),
},
momus: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
],
fallbackChain: dedupeChain([
fb("quotio", "gpt-5.3-codex", "high"),
fb("quotio", "claude-opus-4-6-thinking"),
...QUALITY_CODING_CHAIN,
]),
},
atlas: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["opencode"], model: "kimi-k2.5-free" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
],
fallbackChain: dedupeChain([
fb("quotio", "claude-sonnet-4-5-thinking"),
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "gpt-5.3-codex", "medium"),
...QUALITY_CODING_CHAIN,
]),
},
}
export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
"visual-engineering": {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["kimi-for-coding"], model: "k2p5" },
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "gemini-3-pro-image"),
fb("quotio", "kimi-k2-thinking"),
fb("quotio", "kimi-k2.5"),
fb("quotio", "claude-sonnet-4-5-thinking"),
fb("quotio", "gpt-5.3-codex", "medium"),
],
},
ultrabrain: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
],
fallbackChain: dedupeChain([
fb("quotio", "gpt-5.3-codex", "high"),
...QUALITY_CODING_CHAIN,
]),
},
deep: {
fallbackChain: [
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
fb("quotio", "gpt-5.3-codex", "medium"),
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "claude-sonnet-4-5-thinking"),
...QUALITY_CODING_CHAIN,
],
requiresModel: "gpt-5.3-codex",
},
artistry: {
fallbackChain: [
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "claude-sonnet-4-5-thinking"),
fb("quotio", "claude-sonnet-4-5"),
],
requiresModel: "gemini-3-pro",
requiresModel: "claude-opus-4-6",
},
quick: {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["opencode"], model: "gpt-5-nano" },
],
fallbackChain: SPEED_CHAIN,
},
"unspecified-low": {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
],
fallbackChain: SPEED_CHAIN,
},
"unspecified-high": {
fallbackChain: [
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
],
fallbackChain: dedupeChain([
fb("quotio", "claude-opus-4-6-thinking"),
fb("quotio", "gpt-5.3-codex", "high"),
...QUALITY_CODING_CHAIN,
]),
},
writing: {
fallbackChain: [
{ providers: ["kimi-for-coding"], model: "k2p5" },
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
fb("quotio", "claude-sonnet-4-5"),
fb("quotio", "glm-5"),
fb("quotio", "kimi-k2.5"),
fb("quotio", "claude-haiku-4-5"),
fb("quotio", "gemini-3-flash"),
],
},
}

View File

@@ -0,0 +1,30 @@
import { describe, expect, test } from "bun:test"
import { clearSessionModel, getSessionModel, setSessionModel } from "./session-model-state"
describe("session-model-state", () => {
test("stores and retrieves a session model", () => {
//#given
const sessionID = "ses_test"
//#when
setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" })
//#then
expect(getSessionModel(sessionID)).toEqual({
providerID: "github-copilot",
modelID: "gpt-4.1",
})
})
test("clears a session model", () => {
//#given
const sessionID = "ses_clear"
setSessionModel(sessionID, { providerID: "quotio", modelID: "gpt-5.3-codex" })
//#when
clearSessionModel(sessionID)
//#then
expect(getSessionModel(sessionID)).toBeUndefined()
})
})

View File

@@ -0,0 +1,15 @@
export type SessionModel = { providerID: string; modelID: string }
const sessionModels = new Map<string, SessionModel>()
export function setSessionModel(sessionID: string, model: SessionModel): void {
sessionModels.set(sessionID, model)
}
export function getSessionModel(sessionID: string): SessionModel | undefined {
return sessionModels.get(sessionID)
}
export function clearSessionModel(sessionID: string): void {
sessionModels.delete(sessionID)
}

View File

@@ -1,6 +1,6 @@
import type { CallOmoAgentArgs } from "./types"
import type { PluginInput } from "@opencode-ai/plugin"
import { subagentSessions } from "../../features/claude-code-session-state"
import { subagentSessions, syncSubagentSessions } from "../../features/claude-code-session-state"
import { log } from "../../shared"
export async function createOrGetSession(
@@ -64,6 +64,7 @@ Original error: ${createResult.error}`)
const sessionID = createResult.data.id
log(`[call_omo_agent] Created session: ${sessionID}`)
subagentSessions.add(sessionID)
syncSubagentSessions.add(sessionID)
return { sessionID, isNew: true }
}
}

View File

@@ -1,7 +1,7 @@
import type { PluginInput } from "@opencode-ai/plugin"
import { log } from "../../shared"
import { resolveSessionDirectory } from "../../shared"
import { subagentSessions } from "../../features/claude-code-session-state"
import { subagentSessions, syncSubagentSessions } from "../../features/claude-code-session-state"
import type { CallOmoAgentArgs } from "./types"
import type { ToolContextWithMetadata } from "./tool-context-with-metadata"
@@ -69,5 +69,6 @@ Original error: ${createResult.error}`,
const sessionID = createResult.data.id
log(`[call_omo_agent] Created session: ${sessionID}`)
subagentSessions.add(sessionID)
syncSubagentSessions.add(sessionID)
return { ok: true, sessionID }
}

View File

@@ -1,5 +1,6 @@
import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types"
import type { ExecutorContext, ParentContext } from "./executor-types"
import type { FallbackEntry } from "../../shared/model-requirements"
import { getTimingConfig } from "./timing"
import { storeToolMetadata } from "../../features/tool-metadata-store"
import { formatDetailedError } from "./error-formatting"
@@ -12,7 +13,8 @@ export async function executeBackgroundTask(
parentContext: ParentContext,
agentToUse: string,
categoryModel: { providerID: string; modelID: string; variant?: string } | undefined,
systemContent: string | undefined
systemContent: string | undefined,
fallbackChain?: FallbackEntry[],
): Promise<string> {
const { manager } = executorCtx
@@ -27,6 +29,7 @@ export async function executeBackgroundTask(
parentAgent: parentContext.agent,
parentTools: getSessionTools(parentContext.sessionID),
model: categoryModel,
fallbackChain,
skills: args.load_skills.length > 0 ? args.load_skills : undefined,
skillContent: systemContent,
category: args.category,

View File

@@ -1,6 +1,7 @@
import type { ModelFallbackInfo } from "../../features/task-toast-manager/types"
import type { DelegateTaskArgs } from "./types"
import type { ExecutorContext } from "./executor-types"
import type { FallbackEntry } from "../../shared/model-requirements"
import { mergeCategories } from "../../shared/merge-categories"
import { SISYPHUS_JUNIOR_AGENT } from "./sisyphus-junior-agent"
import { resolveCategoryConfig } from "./categories"
@@ -16,6 +17,7 @@ export interface CategoryResolutionResult {
modelInfo: ModelFallbackInfo | undefined
actualModel: string | undefined
isUnstableAgent: boolean
fallbackChain?: FallbackEntry[] // For runtime retry on model errors
error?: string
}
@@ -177,5 +179,6 @@ Available categories: ${categoryNames.join(", ")}`,
modelInfo,
actualModel,
isUnstableAgent,
fallbackChain: requirement?.fallbackChain,
}
}

View File

@@ -8,6 +8,7 @@ import { getAgentDisplayName, getAgentConfigKey } from "../../shared/agent-displ
import { normalizeSDKResponse } from "../../shared"
import { log } from "../../shared/logger"
import { getAvailableModelsForDelegateTask } from "./available-models"
import type { FallbackEntry } from "../../shared/model-requirements"
import { resolveModelForDelegateTask } from "./model-selection"
export async function resolveSubagentExecution(
@@ -15,7 +16,7 @@ export async function resolveSubagentExecution(
executorCtx: ExecutorContext,
parentAgent: string | undefined,
categoryExamples: string
): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; error?: string }> {
): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; fallbackChain?: FallbackEntry[]; error?: string }> {
const { client, agentOverrides } = executorCtx
if (!args.subagent_type?.trim()) {
@@ -46,6 +47,7 @@ Create the work plan directly - that's your job as the planning agent.`,
let agentToUse = agentName
let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
let fallbackChain: FallbackEntry[] | undefined = undefined
try {
const agentsResult = await client.app.agents()
@@ -92,6 +94,7 @@ Create the work plan directly - that's your job as the planning agent.`,
const agentOverride = agentOverrides?.[agentConfigKey as keyof typeof agentOverrides]
?? (agentOverrides ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentConfigKey)?.[1] : undefined)
const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentConfigKey]
fallbackChain = agentRequirement?.fallbackChain
if (agentOverride?.model || agentRequirement || matchedAgent.model) {
const availableModels = await getAvailableModelsForDelegateTask(client)
@@ -135,5 +138,5 @@ Create the work plan directly - that's your job as the planning agent.`,
}
}
return { agentToUse, categoryModel }
return { agentToUse, categoryModel, fallbackChain }
}

View File

@@ -100,7 +100,7 @@ describe("executeSyncTask - cleanup on error paths", () => {
//#when - executeSyncTask with fetchSyncResult failing
const result = await executeSyncTask(args, mockCtx, mockExecutorCtx, {
sessionID: "parent-session",
}, "test-agent", undefined, undefined, undefined, deps)
}, "test-agent", undefined, undefined, undefined, undefined, deps)
//#then - should return error and cleanup resources
expect(result).toBe("Fetch failed")
@@ -150,7 +150,7 @@ describe("executeSyncTask - cleanup on error paths", () => {
//#when - executeSyncTask with pollSyncSession failing
const result = await executeSyncTask(args, mockCtx, mockExecutorCtx, {
sessionID: "parent-session",
}, "test-agent", undefined, undefined, undefined, deps)
}, "test-agent", undefined, undefined, undefined, undefined, deps)
//#then - should return error and cleanup resources
expect(result).toBe("Poll error")
@@ -200,7 +200,7 @@ describe("executeSyncTask - cleanup on error paths", () => {
//#when - executeSyncTask completes successfully
const result = await executeSyncTask(args, mockCtx, mockExecutorCtx, {
sessionID: "parent-session",
}, "test-agent", undefined, undefined, undefined, deps)
}, "test-agent", undefined, undefined, undefined, undefined, deps)
//#then - should complete and cleanup resources
expect(result).toContain("Task completed")

View File

@@ -3,11 +3,12 @@ import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types"
import type { ExecutorContext, ParentContext } from "./executor-types"
import { getTaskToastManager } from "../../features/task-toast-manager"
import { storeToolMetadata } from "../../features/tool-metadata-store"
import { subagentSessions } from "../../features/claude-code-session-state"
import { subagentSessions, syncSubagentSessions, setSessionAgent } from "../../features/claude-code-session-state"
import { log } from "../../shared/logger"
import { formatDuration } from "./time-formatter"
import { formatDetailedError } from "./error-formatting"
import { syncTaskDeps, type SyncTaskDeps } from "./sync-task-deps"
import { setSessionFallbackChain, clearSessionFallbackChain } from "../../hooks/model-fallback/hook"
export async function executeSyncTask(
args: DelegateTaskArgs,
@@ -18,6 +19,7 @@ export async function executeSyncTask(
categoryModel: { providerID: string; modelID: string; variant?: string } | undefined,
systemContent: string | undefined,
modelInfo?: ModelFallbackInfo,
fallbackChain?: import("../../shared/model-requirements").FallbackEntry[],
deps: SyncTaskDeps = syncTaskDeps
): Promise<string> {
const { client, directory, onSyncSessionCreated } = executorCtx
@@ -40,6 +42,9 @@ export async function executeSyncTask(
const sessionID = createSessionResult.sessionID
syncSessionID = sessionID
subagentSessions.add(sessionID)
syncSubagentSessions.add(sessionID)
setSessionAgent(sessionID, agentToUse)
setSessionFallbackChain(sessionID, fallbackChain)
if (onSyncSessionCreated) {
log("[task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID })
@@ -59,6 +64,7 @@ export async function executeSyncTask(
if (toastManager) {
toastManager.addTask({
id: taskId,
sessionID,
description: args.description,
agent: agentToUse,
isBackground: false,
@@ -145,6 +151,8 @@ session_id: ${sessionID}
} finally {
if (syncSessionID) {
subagentSessions.delete(syncSessionID)
syncSubagentSessions.delete(syncSessionID)
clearSessionFallbackChain(syncSessionID)
}
}
}

View File

@@ -164,6 +164,7 @@ Prompts MUST be in English.`
let modelInfo: import("../../features/task-toast-manager/types").ModelFallbackInfo | undefined
let actualModel: string | undefined
let isUnstableAgent = false
let fallbackChain: import("../../shared/model-requirements").FallbackEntry[] | undefined
if (args.category) {
const resolution = await resolveCategoryExecution(args, options, inheritedModel, systemDefaultModel)
@@ -176,6 +177,7 @@ Prompts MUST be in English.`
modelInfo = resolution.modelInfo
actualModel = resolution.actualModel
isUnstableAgent = resolution.isUnstableAgent
fallbackChain = resolution.fallbackChain
const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean
@@ -206,6 +208,7 @@ Prompts MUST be in English.`
}
agentToUse = resolution.agentToUse
categoryModel = resolution.categoryModel
fallbackChain = resolution.fallbackChain
}
const systemContent = buildSystemContent({
@@ -217,10 +220,10 @@ Prompts MUST be in English.`
})
if (runInBackground) {
return executeBackgroundTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent)
return executeBackgroundTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, fallbackChain)
}
return executeSyncTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, modelInfo)
return executeSyncTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, modelInfo, fallbackChain)
},
})
}