Fix model fallback across main/background/sync agents

2026-02-19 04:41:00 +02:00
parent d8da89fd5b
commit f5f1d1d4c2
44 changed files with 2500 additions and 605 deletions
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -69,6 +69,7 @@
          "directory-readme-injector",
          "empty-task-response-detector",
          "think-mode",
+          "model-fallback",
          "anthropic-context-window-limit-recovery",
          "preemptive-compaction",
          "rules-injector",
@@ -80,6 +81,7 @@
          "non-interactive-env",
          "interactive-bash-session",
          "thinking-block-validator",
+          "beast-mode-system",
          "ralph-loop",
          "category-skill-reminder",
          "compaction-context-injector",
--- a/src/cli/doctor/checks/model-resolution.test.ts
+++ b/src/cli/doctor/checks/model-resolution.test.ts
@@ -15,7 +15,7 @@ describe("model-resolution check", () => {
      const sisyphus = info.agents.find((a) => a.name === "sisyphus")
      expect(sisyphus).toBeDefined()
      expect(sisyphus!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6")
-      expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("anthropic")
+      expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("quotio")
    })

    it("returns category requirements with provider chains", async () => {
@@ -26,8 +26,8 @@ describe("model-resolution check", () => {
      // then: Should have category entries
      const visual = info.categories.find((c) => c.name === "visual-engineering")
      expect(visual).toBeDefined()
-      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
-      expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
+      expect(visual!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6-thinking")
+      expect(visual!.requirement.fallbackChain[0]?.providers).toContain("quotio")
    })
  })

@@ -87,7 +87,7 @@ describe("model-resolution check", () => {
      expect(sisyphus).toBeDefined()
      expect(sisyphus!.userOverride).toBeUndefined()
      expect(sisyphus!.effectiveResolution).toContain("Provider fallback:")
-      expect(sisyphus!.effectiveResolution).toContain("anthropic")
+      expect(sisyphus!.effectiveResolution).toContain("quotio")
    })

    it("captures user variant for agent when configured", async () => {
--- a/src/cli/fallback-chain-resolution.ts
+++ b/src/cli/fallback-chain-resolution.ts
@@ -1,8 +1,6 @@
-import {
-	AGENT_MODEL_REQUIREMENTS,
-	type FallbackEntry,
-} from "../shared/model-requirements"
+import type { FallbackEntry } from "../shared/model-requirements"
 import type { ProviderAvailability } from "./model-fallback-types"
+import { CLI_AGENT_MODEL_REQUIREMENTS } from "./model-fallback-requirements"
 import { isProviderAvailable } from "./provider-availability"
 import { transformModelForProvider } from "./provider-model-id-transform"

@@ -25,7 +23,7 @@ export function resolveModelFromChain(
 }

 export function getSisyphusFallbackChain(): FallbackEntry[] {
-	return AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain
+	return CLI_AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain
 }

 export function isAnyFallbackEntryAvailable(
--- a/src/cli/model-fallback-requirements.ts
+++ b/src/cli/model-fallback-requirements.ts
@@ -0,0 +1,153 @@
+import type { ModelRequirement } from "../shared/model-requirements"
+
+// NOTE: These requirements are used by the CLI config generator (`generateModelConfig`).
+// They intentionally use "install-time" provider IDs (anthropic/openai/google/opencode/etc),
+// not runtime providers like `quotio`/`nvidia`.
+
+export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
+  sisyphus: {
+    fallbackChain: [
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+      { providers: ["opencode"], model: "kimi-k2.5-free" },
+      { providers: ["zai-coding-plan"], model: "glm-4.7" },
+      { providers: ["opencode"], model: "glm-4.7-free" },
+    ],
+    requiresAnyModel: true,
+  },
+  hephaestus: {
+    fallbackChain: [
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
+    ],
+    requiresProvider: ["openai", "github-copilot", "opencode"],
+  },
+  oracle: {
+    fallbackChain: [
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+    ],
+  },
+  librarian: {
+    fallbackChain: [
+      { providers: ["zai-coding-plan"], model: "glm-4.7" },
+      { providers: ["opencode"], model: "glm-4.7-free" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
+    ],
+  },
+  explore: {
+    fallbackChain: [
+      { providers: ["github-copilot"], model: "grok-code-fast-1" },
+      { providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
+      { providers: ["opencode"], model: "gpt-5-nano" },
+    ],
+  },
+  "multimodal-looker": {
+    fallbackChain: [
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
+      { providers: ["zai-coding-plan"], model: "glm-4.6v" },
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+      { providers: ["opencode"], model: "kimi-k2.5-free" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" },
+      { providers: ["opencode"], model: "gpt-5-nano" },
+    ],
+  },
+  prometheus: {
+    fallbackChain: [
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+      { providers: ["opencode"], model: "kimi-k2.5-free" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+    ],
+  },
+  metis: {
+    fallbackChain: [
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+      { providers: ["opencode"], model: "kimi-k2.5-free" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+    ],
+  },
+  momus: {
+    fallbackChain: [
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+    ],
+  },
+  atlas: {
+    fallbackChain: [
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+      { providers: ["opencode"], model: "kimi-k2.5-free" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+    ],
+  },
+}
+
+export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
+  "visual-engineering": {
+    fallbackChain: [
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["zai-coding-plan"], model: "glm-5" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+    ],
+  },
+  ultrabrain: {
+    fallbackChain: [
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+    ],
+  },
+  deep: {
+    fallbackChain: [
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+    ],
+    requiresModel: "gpt-5.3-codex",
+  },
+  artistry: {
+    fallbackChain: [
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
+    ],
+    requiresModel: "gemini-3-pro",
+  },
+  quick: {
+    fallbackChain: [
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
+      { providers: ["opencode"], model: "gpt-5-nano" },
+    ],
+  },
+  "unspecified-low": {
+    fallbackChain: [
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
+    ],
+  },
+  "unspecified-high": {
+    fallbackChain: [
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+    ],
+  },
+  writing: {
+    fallbackChain: [
+      { providers: ["kimi-for-coding"], model: "k2p5" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
+      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
+    ],
+  },
+}
+
--- a/src/cli/model-fallback.ts
+++ b/src/cli/model-fallback.ts
@@ -1,7 +1,7 @@
 import {
-	AGENT_MODEL_REQUIREMENTS,
-	CATEGORY_MODEL_REQUIREMENTS,
-} from "../shared/model-requirements"
+  CLI_AGENT_MODEL_REQUIREMENTS,
+  CLI_CATEGORY_MODEL_REQUIREMENTS,
+} from "./model-fallback-requirements"
 import type { InstallConfig } from "./types"

 import type { AgentConfig, CategoryConfig, GeneratedOmoConfig } from "./model-fallback-types"
@@ -16,9 +16,9 @@ import {

 export type { GeneratedOmoConfig } from "./model-fallback-types"

-const LIBRARIAN_MODEL = "opencode/minimax-m2.5-free"
+const ZAI_MODEL = "zai-coding-plan/glm-4.7"

-const ULTIMATE_FALLBACK = "opencode/big-pickle"
+const ULTIMATE_FALLBACK = "opencode/glm-4.7-free"
 const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json"


@@ -38,12 +38,12 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
    return {
      $schema: SCHEMA_URL,
      agents: Object.fromEntries(
-        Object.entries(AGENT_MODEL_REQUIREMENTS)
+        Object.entries(CLI_AGENT_MODEL_REQUIREMENTS)
          .filter(([role, req]) => !(role === "sisyphus" && req.requiresAnyModel))
          .map(([role]) => [role, { model: ULTIMATE_FALLBACK }])
      ),
      categories: Object.fromEntries(
-        Object.keys(CATEGORY_MODEL_REQUIREMENTS).map((cat) => [cat, { model: ULTIMATE_FALLBACK }])
+        Object.keys(CLI_CATEGORY_MODEL_REQUIREMENTS).map((cat) => [cat, { model: ULTIMATE_FALLBACK }])
      ),
    }
  }
@@ -51,9 +51,9 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
  const agents: Record<string, AgentConfig> = {}
  const categories: Record<string, CategoryConfig> = {}

-  for (const [role, req] of Object.entries(AGENT_MODEL_REQUIREMENTS)) {
-    if (role === "librarian") {
-      agents[role] = { model: LIBRARIAN_MODEL }
+  for (const [role, req] of Object.entries(CLI_AGENT_MODEL_REQUIREMENTS)) {
+    if (role === "librarian" && avail.zai) {
+      agents[role] = { model: ZAI_MODEL }
      continue
    }

@@ -75,7 +75,6 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
      if (req.requiresAnyModel && !isAnyFallbackEntryAvailable(fallbackChain, avail)) {
        continue
      }
-
      const resolved = resolveModelFromChain(fallbackChain, avail)
      if (resolved) {
        const variant = resolved.variant ?? req.variant
@@ -100,11 +99,11 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
    }
  }

-  for (const [cat, req] of Object.entries(CATEGORY_MODEL_REQUIREMENTS)) {
+  for (const [cat, req] of Object.entries(CLI_CATEGORY_MODEL_REQUIREMENTS)) {
    // Special case: unspecified-high downgrades to unspecified-low when not isMaxPlan
    const fallbackChain =
      cat === "unspecified-high" && !avail.isMaxPlan
-        ? CATEGORY_MODEL_REQUIREMENTS["unspecified-low"].fallbackChain
+        ? CLI_CATEGORY_MODEL_REQUIREMENTS["unspecified-low"].fallbackChain
        : req.fallbackChain

    if (req.requiresModel && !isRequiredModelAvailable(req.requiresModel, req.fallbackChain, avail)) {
--- a/src/config/schema/hooks.ts
+++ b/src/config/schema/hooks.ts
@@ -13,6 +13,7 @@ export const HookNameSchema = z.enum([
  "directory-readme-injector",
  "empty-task-response-detector",
  "think-mode",
+  "model-fallback",
  "anthropic-context-window-limit-recovery",
  "preemptive-compaction",
  "rules-injector",
@@ -25,6 +26,7 @@ export const HookNameSchema = z.enum([
  "interactive-bash-session",

  "thinking-block-validator",
+  "beast-mode-system",
  "ralph-loop",
  "category-skill-reminder",

--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -3046,6 +3046,164 @@ describe("BackgroundManager.handleEvent - session.error", () => {

    manager.shutdown()
  })
+
+  test("retry path releases current concurrency slot and prefers current provider in fallback entry", async () => {
+    //#given
+    const manager = createBackgroundManager()
+    const concurrencyManager = getConcurrencyManager(manager)
+    const concurrencyKey = "quotio/claude-opus-4-6-thinking"
+    await concurrencyManager.acquire(concurrencyKey)
+
+    ;(manager as unknown as { processKey: (key: string) => Promise<void> }).processKey = async () => {}
+
+    const sessionID = "ses_error_retry"
+    const task = createMockTask({
+      id: "task-session-error-retry",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-retry",
+      description: "task that should retry",
+      agent: "sisyphus",
+      status: "running",
+      concurrencyKey,
+      model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+      fallbackChain: [
+        { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" },
+        { providers: ["quotio"], model: "claude-opus-4-5" },
+      ],
+      attemptCount: 0,
+    })
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({
+      type: "session.error",
+      properties: {
+        sessionID,
+        error: {
+          name: "UnknownError",
+          data: {
+            message:
+              "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+          },
+        },
+      },
+    })
+
+    //#then
+    expect(task.status).toBe("pending")
+    expect(task.attemptCount).toBe(1)
+    expect(task.model).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+      variant: "max",
+    })
+    expect(task.concurrencyKey).toBeUndefined()
+    expect(concurrencyManager.getCount(concurrencyKey)).toBe(0)
+
+    manager.shutdown()
+  })
+
+  test("retry path triggers on session.status retry events", async () => {
+    //#given
+    const manager = createBackgroundManager()
+    ;(manager as unknown as { processKey: (key: string) => Promise<void> }).processKey = async () => {}
+
+    const sessionID = "ses_status_retry"
+    const task = createMockTask({
+      id: "task-status-retry",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-status",
+      description: "task that should retry on status",
+      agent: "sisyphus",
+      status: "running",
+      model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+      fallbackChain: [
+        { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" },
+        { providers: ["quotio"], model: "gpt-5.3-codex", variant: "high" },
+      ],
+      attemptCount: 0,
+    })
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({
+      type: "session.status",
+      properties: {
+        sessionID,
+        status: {
+          type: "retry",
+          message: "Provider is overloaded",
+        },
+      },
+    })
+
+    //#then
+    expect(task.status).toBe("pending")
+    expect(task.attemptCount).toBe(1)
+    expect(task.model).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+      variant: "max",
+    })
+
+    manager.shutdown()
+  })
+
+  test("retry path triggers on message.updated assistant error events", async () => {
+    //#given
+    const manager = createBackgroundManager()
+    ;(manager as unknown as { processKey: (key: string) => Promise<void> }).processKey = async () => {}
+
+    const sessionID = "ses_message_updated_retry"
+    const task = createMockTask({
+      id: "task-message-updated-retry",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-message-updated",
+      description: "task that should retry on message.updated",
+      agent: "sisyphus",
+      status: "running",
+      model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+      fallbackChain: [
+        { providers: ["quotio"], model: "claude-opus-4-6", variant: "max" },
+        { providers: ["quotio"], model: "gpt-5.3-codex", variant: "high" },
+      ],
+      attemptCount: 0,
+    })
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({
+      type: "message.updated",
+      properties: {
+        info: {
+          id: "msg_errored",
+          sessionID,
+          role: "assistant",
+          error: {
+            name: "UnknownError",
+            data: {
+              message:
+                "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+            },
+          },
+        },
+      },
+    })
+
+    //#then
+    expect(task.status).toBe("pending")
+    expect(task.attemptCount).toBe(1)
+    expect(task.model).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+      variant: "max",
+    })
+
+    manager.shutdown()
+  })
 })

 describe("BackgroundManager queue processing - error tasks are skipped", () => {
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -5,6 +5,7 @@ import type {
  LaunchInput,
  ResumeInput,
 } from "./types"
+import type { FallbackEntry } from "../../shared/model-requirements"
 import { TaskHistory } from "./task-history"
 import {
  log,
@@ -12,6 +13,8 @@ import {
  normalizePromptTools,
  normalizeSDKResponse,
  promptWithModelSuggestionRetry,
+  readConnectedProvidersCache,
+  readProviderModelsCache,
  resolveInheritedPromptTools,
  createInternalAgentTextPart,
 } from "../../shared"
@@ -19,6 +22,12 @@ import { setSessionTools } from "../../shared/session-tools-store"
 import { ConcurrencyManager } from "./concurrency"
 import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
 import { isInsideTmux } from "../../shared/tmux"
+import {
+  shouldRetryError,
+  getNextFallback,
+  hasMoreFallbacks,
+  selectFallbackProvider,
+} from "../../shared/model-error-classifier"
 import {
  DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS,
  DEFAULT_STALE_TIMEOUT_MS,
@@ -156,6 +165,8 @@ export class BackgroundManager {
      parentAgent: input.parentAgent,
      parentTools: input.parentTools,
      model: input.model,
+      fallbackChain: input.fallbackChain,
+      attemptCount: 0,
      category: input.category,
    }

@@ -677,6 +688,27 @@ export class BackgroundManager {
  handleEvent(event: Event): void {
    const props = event.properties

+    if (event.type === "message.updated") {
+      const info = props?.info
+      if (!info || typeof info !== "object") return
+
+      const sessionID = (info as Record<string, unknown>)["sessionID"]
+      const role = (info as Record<string, unknown>)["role"]
+      if (typeof sessionID !== "string" || role !== "assistant") return
+
+      const task = this.findBySession(sessionID)
+      if (!task || task.status !== "running") return
+
+      const assistantError = (info as Record<string, unknown>)["error"]
+      if (!assistantError) return
+
+      const errorInfo = {
+        name: this.extractErrorName(assistantError),
+        message: this.extractErrorMessage(assistantError),
+      }
+      this.tryFallbackRetry(task, errorInfo, "message.updated")
+    }
+
    if (event.type === "message.part.updated" || event.type === "message.part.delta") {
      if (!props || typeof props !== "object" || !("sessionID" in props)) return
      const partInfo = props as unknown as MessagePartInfo
@@ -773,10 +805,29 @@ export class BackgroundManager {
      const task = this.findBySession(sessionID)
      if (!task || task.status !== "running") return

+      const errorObj = props?.error as { name?: string; message?: string } | undefined
+      const errorName = errorObj?.name
      const errorMessage = props ? this.getSessionErrorMessage(props) : undefined

+      const errorInfo = { name: errorName, message: errorMessage }
+      if (this.tryFallbackRetry(task, errorInfo, "session.error")) return
+
+      // Original error handling (no retry)
+      const errorMsg = errorMessage ?? "Session error"
+      const canRetry =
+        shouldRetryError(errorInfo) &&
+        !!task.fallbackChain &&
+        hasMoreFallbacks(task.fallbackChain, task.attemptCount ?? 0)
+      log("[background-agent] Session error - no retry:", {
+        taskId: task.id,
+        errorName,
+        errorMessage: errorMsg?.slice(0, 100),
+        hasFallbackChain: !!task.fallbackChain,
+        canRetry,
+      })
+
      task.status = "error"
-      task.error = errorMessage ?? "Session error"
+      task.error = errorMsg
      task.completedAt = new Date()
      this.taskHistory.record(task.parentSessionID, { id: task.id, sessionID: task.sessionID, agent: task.agent, description: task.description, status: "error", category: task.category, startedAt: task.startedAt, completedAt: task.completedAt })

@@ -860,6 +911,129 @@ export class BackgroundManager {
        }
      }
    }
+
+    if (event.type === "session.status") {
+      const sessionID = props?.sessionID as string | undefined
+      const status = props?.status as { type?: string; message?: string } | undefined
+      if (!sessionID || status?.type !== "retry") return
+
+      const task = this.findBySession(sessionID)
+      if (!task || task.status !== "running") return
+
+      const errorMessage = typeof status.message === "string" ? status.message : undefined
+      const errorInfo = { name: "SessionRetry", message: errorMessage }
+      this.tryFallbackRetry(task, errorInfo, "session.status")
+    }
+  }
+
+  private tryFallbackRetry(
+    task: BackgroundTask,
+    errorInfo: { name?: string; message?: string },
+    source: string,
+  ): boolean {
+    const fallbackChain = task.fallbackChain
+    const canRetry =
+      shouldRetryError(errorInfo) &&
+      fallbackChain &&
+      fallbackChain.length > 0 &&
+      hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0)
+
+    if (!canRetry) return false
+
+    const attemptCount = task.attemptCount ?? 0
+    const providerModelsCache = readProviderModelsCache()
+    const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
+    const connectedSet = connectedProviders ? new Set(connectedProviders) : null
+
+    const isReachable = (entry: FallbackEntry): boolean => {
+      if (!connectedSet) return true
+
+      // Gate only on provider connectivity. Provider model lists can be stale/incomplete,
+      // especially after users manually add models to opencode.json.
+      return entry.providers.some((p) => connectedSet.has(p))
+    }
+
+    let selectedAttemptCount = attemptCount
+    let nextFallback: FallbackEntry | undefined
+    while (fallbackChain && selectedAttemptCount < fallbackChain.length) {
+      const candidate = getNextFallback(fallbackChain, selectedAttemptCount)
+      if (!candidate) break
+      selectedAttemptCount++
+      if (!isReachable(candidate)) {
+        log("[background-agent] Skipping unreachable fallback:", {
+          taskId: task.id,
+          source,
+          model: candidate.model,
+          providers: candidate.providers,
+        })
+        continue
+      }
+      nextFallback = candidate
+      break
+    }
+    if (!nextFallback) return false
+
+    const providerID = selectFallbackProvider(
+      nextFallback.providers,
+      task.model?.providerID,
+    )
+
+    log("[background-agent] Retryable error, attempting fallback:", {
+      taskId: task.id,
+      source,
+      errorName: errorInfo.name,
+      errorMessage: errorInfo.message?.slice(0, 100),
+      attemptCount: selectedAttemptCount,
+      nextModel: `${providerID}/${nextFallback.model}`,
+    })
+
+    if (task.concurrencyKey) {
+      this.concurrencyManager.release(task.concurrencyKey)
+      task.concurrencyKey = undefined
+    }
+
+    if (task.sessionID) {
+      this.client.session.abort({ path: { id: task.sessionID } }).catch(() => {})
+      subagentSessions.delete(task.sessionID)
+    }
+
+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
+    task.attemptCount = selectedAttemptCount
+    task.model = {
+      providerID,
+      modelID: nextFallback.model,
+      variant: nextFallback.variant,
+    }
+    task.status = "pending"
+    task.sessionID = undefined
+    task.startedAt = undefined
+    task.queuedAt = new Date()
+    task.error = undefined
+
+    const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent
+    const queue = this.queuesByKey.get(key) ?? []
+    const retryInput: LaunchInput = {
+      description: task.description,
+      prompt: task.prompt,
+      agent: task.agent,
+      parentSessionID: task.parentSessionID,
+      parentMessageID: task.parentMessageID,
+      parentModel: task.parentModel,
+      parentAgent: task.parentAgent,
+      parentTools: task.parentTools,
+      model: task.model,
+      fallbackChain: task.fallbackChain,
+      category: task.category,
+    }
+    queue.push({ task, input: retryInput })
+    this.queuesByKey.set(key, queue)
+    this.processKey(key)
+    return true
  }

  markForNotification(task: BackgroundTask): void {
@@ -1273,10 +1447,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
            if (isCompactionAgent(info?.agent)) {
              continue
            }
-            if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
-              agent = info.agent ?? task.parentAgent
-              model = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
-              tools = normalizePromptTools(info.tools) ?? tools
+            const normalizedTools = this.isRecord(info?.tools)
+              ? normalizePromptTools(info.tools as Record<string, boolean | "allow" | "deny" | "ask">)
+              : undefined
+            if (info?.agent || info?.model || (info?.modelID && info?.providerID) || normalizedTools) {
+              agent = info?.agent ?? task.parentAgent
+              model = info?.model ?? (info?.providerID && info?.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
+              tools = normalizedTools ?? tools
              break
            }
          }
@@ -1296,7 +1473,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          tools = normalizePromptTools(currentMessage?.tools) ?? tools
        }

-        tools = resolveInheritedPromptTools(task.parentSessionID, tools)
+        const resolvedTools = resolveInheritedPromptTools(task.parentSessionID, tools)

        log("[background-agent] notifyParentSession context:", {
          taskId: task.id,
@@ -1311,7 +1488,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
              noReply: !allComplete,
              ...(agent !== undefined ? { agent } : {}),
              ...(model !== undefined ? { model } : {}),
-              ...(tools ? { tools } : {}),
+              ...(resolvedTools ? { tools: resolvedTools } : {}),
              parts: [createInternalAgentTextPart(notification)],
            },
          })
@@ -1394,6 +1571,46 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    return ""
  }

+  private extractErrorName(error: unknown): string | undefined {
+    if (this.isRecord(error) && typeof error["name"] === "string") return error["name"]
+    if (error instanceof Error) return error.name
+    return undefined
+  }
+
+  private extractErrorMessage(error: unknown): string | undefined {
+    if (!error) return undefined
+    if (typeof error === "string") return error
+    if (error instanceof Error) return error.message
+
+    if (this.isRecord(error)) {
+      const dataRaw = error["data"]
+      const candidates: unknown[] = [
+        error,
+        dataRaw,
+        error["error"],
+        this.isRecord(dataRaw) ? (dataRaw as Record<string, unknown>)["error"] : undefined,
+        error["cause"],
+      ]
+
+      for (const candidate of candidates) {
+        if (typeof candidate === "string" && candidate.length > 0) return candidate
+        if (
+          this.isRecord(candidate) &&
+          typeof candidate["message"] === "string" &&
+          candidate["message"].length > 0
+        ) {
+          return candidate["message"]
+        }
+      }
+    }
+
+    try {
+      return JSON.stringify(error)
+    } catch {
+      return String(error)
+    }
+  }
+
  private isRecord(value: unknown): value is Record<string, unknown> {
    return typeof value === "object" && value !== null
  }
@@ -1610,6 +1827,16 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
        // Progress is already tracked via handleEvent(message.part.updated),
        // so we skip the expensive session.messages() fetch here.
        // Completion will be detected when session transitions to idle.
+        if (sessionStatus?.type === "retry") {
+          const retryMessage = typeof (sessionStatus as { message?: string }).message === "string"
+            ? (sessionStatus as { message?: string }).message
+            : undefined
+          const errorInfo = { name: "SessionRetry", message: retryMessage }
+          if (this.tryFallbackRetry(task, errorInfo, "polling:session.status")) {
+            continue
+          }
+        }
+
        log("[background-agent] Session still running, relying on event-based progress:", {
          taskId: task.id,
          sessionID,
--- a/src/features/background-agent/types.ts
+++ b/src/features/background-agent/types.ts
@@ -1,3 +1,5 @@
+import type { FallbackEntry } from "../../shared/model-requirements"
+
 export type BackgroundTaskStatus =
  | "pending"
  | "running"
@@ -31,6 +33,10 @@ export interface BackgroundTask {
  progress?: TaskProgress
  parentModel?: { providerID: string; modelID: string }
  model?: { providerID: string; modelID: string; variant?: string }
+  /** Fallback chain for runtime retry on model errors */
+  fallbackChain?: FallbackEntry[]
+  /** Number of fallback retry attempts made */
+  attemptCount?: number
  /** Active concurrency slot key */
  concurrencyKey?: string
  /** Persistent key for re-acquiring concurrency on resume */
@@ -60,6 +66,8 @@ export interface LaunchInput {
  parentAgent?: string
  parentTools?: Record<string, boolean>
  model?: { providerID: string; modelID: string; variant?: string }
+  /** Fallback chain for runtime retry on model errors */
+  fallbackChain?: FallbackEntry[]
  isUnstableAgent?: boolean
  skills?: string[]
  skillContent?: string
--- a/src/features/claude-code-session-state/state.ts
+++ b/src/features/claude-code-session-state/state.ts
@@ -1,4 +1,5 @@
 export const subagentSessions = new Set<string>()
+export const syncSubagentSessions = new Set<string>()

 let _mainSessionID: string | undefined

@@ -14,6 +15,7 @@ export function getMainSessionID(): string | undefined {
 export function _resetForTesting(): void {
  _mainSessionID = undefined
  subagentSessions.clear()
+  syncSubagentSessions.clear()
  sessionAgentMap.clear()
 }

--- a/src/features/opencode-skill-loader/blocking.ts
+++ b/src/features/opencode-skill-loader/blocking.ts
@@ -25,13 +25,13 @@ export function discoverAllSkillsBlocking(dirs: string[], scopes: SkillScope[]):
  const { port1, port2 } = new MessageChannel()
  
  const worker = new Worker(new URL("./discover-worker.ts", import.meta.url), {
-    workerData: { signal }
+    // workerData is structured-cloned; pass the SharedArrayBuffer and recreate the view in the worker.
+    workerData: { signalBuffer: signal.buffer },
  })

-  worker.postMessage({ port: port2 }, [port2])
-  
  const input: WorkerInput = { dirs, scopes }
-  port1.postMessage(input)
+  // Avoid a race where the worker hasn't attached listeners to the MessagePort yet.
+  worker.postMessage({ port: port2, input }, [port2])

  const waitResult = Atomics.wait(signal, 0, 0, TIMEOUT_MS)

--- a/src/features/opencode-skill-loader/discover-worker.ts
+++ b/src/features/opencode-skill-loader/discover-worker.ts
@@ -18,25 +18,24 @@ interface WorkerOutputError {
  error: { message: string; stack?: string }
 }

-const { signal } = workerData as { signal: Int32Array }
+const { signalBuffer } = workerData as { signalBuffer: SharedArrayBuffer }
+const signal = new Int32Array(signalBuffer)

 if (!parentPort) {
  throw new Error("Worker must be run with parentPort")
 }

-parentPort.once("message", (data: { port: MessagePort }) => {
-  const { port } = data
+parentPort.once("message", (data: { port: MessagePort; input: WorkerInput }) => {
+  const { port, input } = data

-  port.on("message", async (input: WorkerInput) => {
+  void (async () => {
    try {
-      const results = await Promise.all(
-        input.dirs.map(dir => discoverSkillsInDirAsync(dir))
-      )
-      
+      const results = await Promise.all(input.dirs.map((dir) => discoverSkillsInDirAsync(dir)))
+
      const skills = results.flat()
-      
+
      const output: WorkerOutputSuccess = { ok: true, skills }
-      
+
      port.postMessage(output)
      Atomics.store(signal, 0, 1)
      Atomics.notify(signal, 0)
@@ -48,10 +47,10 @@ parentPort.once("message", (data: { port: MessagePort }) => {
          stack: error instanceof Error ? error.stack : undefined,
        },
      }
-      
+
      port.postMessage(output)
      Atomics.store(signal, 0, 1)
      Atomics.notify(signal, 0)
    }
-  })
+  })()
 })
--- a/src/features/task-toast-manager/manager.test.ts
+++ b/src/features/task-toast-manager/manager.test.ts
@@ -217,6 +217,27 @@ describe("TaskToastManager", () => {
      expect(call.body.message).toContain("(inherited from parent)")
    })

+    test("should display warning when model is runtime fallback", () => {
+      // given - runtime-fallback indicates a model swap mid-run
+      const task = {
+        id: "task_runtime",
+        description: "Task with runtime fallback model",
+        agent: "explore",
+        isBackground: false,
+        modelInfo: { model: "quotio/oswe-vscode-prime", type: "runtime-fallback" as const },
+      }
+
+      // when - addTask is called
+      toastManager.addTask(task)
+
+      // then - toast should show fallback warning
+      expect(mockClient.tui.showToast).toHaveBeenCalled()
+      const call = mockClient.tui.showToast.mock.calls[0][0]
+      expect(call.body.message).toContain("[FALLBACK]")
+      expect(call.body.message).toContain("quotio/oswe-vscode-prime")
+      expect(call.body.message).toContain("(runtime fallback)")
+    })
+
    test("should not display model info when user-defined", () => {
      // given - a task with user-defined model
      const task = {
@@ -257,4 +278,32 @@ describe("TaskToastManager", () => {
      expect(call.body.message).not.toContain("[FALLBACK] Model:")
    })
  })
+
+  describe("updateTaskModelBySession", () => {
+    test("updates task model info and shows fallback toast", () => {
+      // given - task without model info
+      const task = {
+        id: "task_update",
+        sessionID: "ses_update_1",
+        description: "Task that will fallback",
+        agent: "explore",
+        isBackground: false,
+      }
+      toastManager.addTask(task)
+      mockClient.tui.showToast.mockClear()
+
+      // when - runtime fallback applied by session
+      toastManager.updateTaskModelBySession("ses_update_1", {
+        model: "nvidia/stepfun-ai/step-3.5-flash",
+        type: "runtime-fallback",
+      })
+
+      // then - new toast shows fallback model
+      expect(mockClient.tui.showToast).toHaveBeenCalled()
+      const call = mockClient.tui.showToast.mock.calls[0][0]
+      expect(call.body.message).toContain("[FALLBACK]")
+      expect(call.body.message).toContain("nvidia/stepfun-ai/step-3.5-flash")
+      expect(call.body.message).toContain("(runtime fallback)")
+    })
+  })
 })
--- a/src/features/task-toast-manager/manager.ts
+++ b/src/features/task-toast-manager/manager.ts
@@ -20,6 +20,7 @@ export class TaskToastManager {

  addTask(task: {
    id: string
+    sessionID?: string
    description: string
    agent: string
    isBackground: boolean
@@ -30,6 +31,7 @@ export class TaskToastManager {
  }): void {
    const trackedTask: TrackedTask = {
      id: task.id,
+      sessionID: task.sessionID,
      description: task.description,
      agent: task.agent,
      status: task.status ?? "running",
@@ -54,6 +56,18 @@ export class TaskToastManager {
    }
  }

+  /**
+   * Update model info for a task by session ID
+   */
+  updateTaskModelBySession(sessionID: string, modelInfo: ModelFallbackInfo): void {
+    if (!sessionID) return
+    const task = Array.from(this.tasks.values()).find((t) => t.sessionID === sessionID)
+    if (!task) return
+    if (task.modelInfo?.model === modelInfo.model && task.modelInfo?.type === modelInfo.type) return
+    task.modelInfo = modelInfo
+    this.showTaskListToast(task)
+  }
+
  /**
   * Remove completed/error task
   */
@@ -110,14 +124,17 @@ export class TaskToastManager {
    const lines: string[] = []

    const isFallback = newTask.modelInfo && (
-      newTask.modelInfo.type === "inherited" || newTask.modelInfo.type === "system-default"
+      newTask.modelInfo.type === "inherited" ||
+      newTask.modelInfo.type === "system-default" ||
+      newTask.modelInfo.type === "runtime-fallback"
    )
    if (isFallback) {
-      const suffixMap: Record<"inherited" | "system-default", string> = {
+      const suffixMap: Record<"inherited" | "system-default" | "runtime-fallback", string> = {
        inherited: " (inherited from parent)",
        "system-default": " (system default fallback)",
+        "runtime-fallback": " (runtime fallback)",
      }
-      const suffix = suffixMap[newTask.modelInfo!.type as "inherited" | "system-default"]
+      const suffix = suffixMap[newTask.modelInfo!.type as "inherited" | "system-default" | "runtime-fallback"]
      lines.push(`[FALLBACK] Model: ${newTask.modelInfo!.model}${suffix}`)
      lines.push("")
    }
--- a/src/features/task-toast-manager/types.ts
+++ b/src/features/task-toast-manager/types.ts
@@ -4,12 +4,13 @@ export type TaskStatus = "running" | "queued" | "completed" | "error"

 export interface ModelFallbackInfo {
  model: string
-  type: "user-defined" | "inherited" | "category-default" | "system-default"
+  type: "user-defined" | "inherited" | "category-default" | "system-default" | "runtime-fallback"
  source?: ModelSource
 }

 export interface TrackedTask {
  id: string
+  sessionID?: string
  description: string
  agent: string
  status: TaskStatus
--- a/src/hooks/beast-mode-system/hook.test.ts
+++ b/src/hooks/beast-mode-system/hook.test.ts
@@ -0,0 +1,54 @@
+import { describe, expect, test } from "bun:test"
+import { clearSessionModel, setSessionModel } from "../../shared/session-model-state"
+import { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook"
+
+describe("beast-mode-system hook", () => {
+  test("injects beast mode prompt for copilot gpt-4.1", async () => {
+    //#given
+    const sessionID = "ses_beast"
+    setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" })
+    const hook = createBeastModeSystemHook()
+    const output = { system: [] as string[] }
+
+    //#when
+    await hook["experimental.chat.system.transform"]?.({ sessionID }, output)
+
+    //#then
+    expect(output.system[0]).toContain("Beast Mode")
+    expect(output.system[0]).toContain(BEAST_MODE_SYSTEM_PROMPT.trim().slice(0, 20))
+
+    clearSessionModel(sessionID)
+  })
+
+  test("does not inject for other models", async () => {
+    //#given
+    const sessionID = "ses_no_beast"
+    setSessionModel(sessionID, { providerID: "quotio", modelID: "gpt-5.3-codex" })
+    const hook = createBeastModeSystemHook()
+    const output = { system: [] as string[] }
+
+    //#when
+    await hook["experimental.chat.system.transform"]?.({ sessionID }, output)
+
+    //#then
+    expect(output.system.length).toBe(0)
+
+    clearSessionModel(sessionID)
+  })
+
+  test("avoids duplicate insertion", async () => {
+    //#given
+    const sessionID = "ses_dupe"
+    setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" })
+    const hook = createBeastModeSystemHook()
+    const output = { system: [BEAST_MODE_SYSTEM_PROMPT] }
+
+    //#when
+    await hook["experimental.chat.system.transform"]?.({ sessionID }, output)
+
+    //#then
+    expect(output.system.length).toBe(1)
+
+    clearSessionModel(sessionID)
+  })
+})
--- a/src/hooks/beast-mode-system/hook.ts
+++ b/src/hooks/beast-mode-system/hook.ts
@@ -0,0 +1,31 @@
+import { getSessionModel } from "../../shared/session-model-state"
+
+export const BEAST_MODE_SYSTEM_PROMPT = `Beast Mode (Copilot GPT-4.1)
+
+You are an autonomous coding agent. Execute the task end-to-end.
+- Make a brief plan, then act.
+- Prefer concrete edits and verification over speculation.
+- Run relevant tests when feasible.
+- Do not ask the user to perform actions you can do yourself.
+- If blocked, state exactly what is needed to proceed.
+- Keep responses concise and actionable.`
+
+function isBeastModeModel(model: { providerID: string; modelID: string } | undefined): boolean {
+  return model?.providerID === "github-copilot" && model.modelID === "gpt-4.1"
+}
+
+export function createBeastModeSystemHook() {
+  return {
+    "experimental.chat.system.transform": async (
+      input: { sessionID: string },
+      output: { system: string[] },
+    ): Promise<void> => {
+      const model = getSessionModel(input.sessionID)
+      if (!isBeastModeModel(model)) return
+
+      if (output.system.some((entry) => entry.includes("Beast Mode"))) return
+
+      output.system.unshift(BEAST_MODE_SYSTEM_PROMPT)
+    },
+  }
+}
--- a/src/hooks/beast-mode-system/index.ts
+++ b/src/hooks/beast-mode-system/index.ts
@@ -0,0 +1 @@
+export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook"
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -14,6 +14,7 @@ export { createEmptyTaskResponseDetectorHook } from "./empty-task-response-detec
 export { createAnthropicContextWindowLimitRecoveryHook, type AnthropicContextWindowLimitRecoveryOptions } from "./anthropic-context-window-limit-recovery";

 export { createThinkModeHook } from "./think-mode";
+export { createModelFallbackHook, setPendingModelFallback, clearPendingModelFallback, type ModelFallbackState } from "./model-fallback/hook";
 export { createClaudeCodeHooksHook } from "./claude-code-hooks";
 export { createRulesInjectorHook } from "./rules-injector";
 export { createBackgroundNotificationHook } from "./background-notification"
@@ -31,7 +32,6 @@ export { createNoSisyphusGptHook } from "./no-sisyphus-gpt";
 export { createNoHephaestusNonGptHook } from "./no-hephaestus-non-gpt";
 export { createAutoSlashCommandHook } from "./auto-slash-command";
 export { createEditErrorRecoveryHook } from "./edit-error-recovery";
-export { createJsonErrorRecoveryHook } from "./json-error-recovery";
 export { createPrometheusMdOnlyHook } from "./prometheus-md-only";
 export { createSisyphusJuniorNotepadHook } from "./sisyphus-junior-notepad";
 export { createTaskResumeInfoHook } from "./task-resume-info";
@@ -47,5 +47,4 @@ export { createPreemptiveCompactionHook } from "./preemptive-compaction";
 export { createTasksTodowriteDisablerHook } from "./tasks-todowrite-disabler";
 export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
 export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
-export { createHashlineEditDiffEnhancerHook } from "./hashline-edit-diff-enhancer";
-
+export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./beast-mode-system";
--- a/src/hooks/model-fallback/hook.test.ts
+++ b/src/hooks/model-fallback/hook.test.ts
@@ -0,0 +1,141 @@
+import { beforeEach, describe, expect, test } from "bun:test"
+
+import {
+  clearPendingModelFallback,
+  createModelFallbackHook,
+  setPendingModelFallback,
+} from "./hook"
+
+describe("model fallback hook", () => {
+  beforeEach(() => {
+    clearPendingModelFallback("ses_model_fallback_main")
+  })
+
+  test("applies pending fallback on chat.message by overriding model", async () => {
+    //#given
+    const hook = createModelFallbackHook() as unknown as {
+      "chat.message"?: (
+        input: { sessionID: string },
+        output: { message: Record<string, unknown>; parts: Array<{ type: string; text?: string }> },
+      ) => Promise<void>
+    }
+
+    const set = setPendingModelFallback(
+      "ses_model_fallback_main",
+      "Sisyphus (Ultraworker)",
+      "quotio",
+      "claude-opus-4-6-thinking",
+    )
+    expect(set).toBe(true)
+
+    const output = {
+      message: {
+        model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+        variant: "max",
+      },
+      parts: [{ type: "text", text: "continue" }],
+    }
+
+    //#when
+    await hook["chat.message"]?.(
+      { sessionID: "ses_model_fallback_main" },
+      output,
+    )
+
+    //#then
+    expect(output.message["model"]).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+    })
+  })
+
+  test("preserves fallback progression across repeated session.error retries", async () => {
+    //#given
+    const hook = createModelFallbackHook() as unknown as {
+      "chat.message"?: (
+        input: { sessionID: string },
+        output: { message: Record<string, unknown>; parts: Array<{ type: string; text?: string }> },
+      ) => Promise<void>
+    }
+    const sessionID = "ses_model_fallback_main"
+
+    expect(
+      setPendingModelFallback(sessionID, "Sisyphus (Ultraworker)", "quotio", "claude-opus-4-6-thinking"),
+    ).toBe(true)
+
+    const firstOutput = {
+      message: {
+        model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+        variant: "max",
+      },
+      parts: [{ type: "text", text: "continue" }],
+    }
+
+    //#when - first retry is applied
+    await hook["chat.message"]?.({ sessionID }, firstOutput)
+
+    //#then
+    expect(firstOutput.message["model"]).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+    })
+
+    //#when - second error re-arms fallback and should advance to next entry
+    expect(
+      setPendingModelFallback(sessionID, "Sisyphus (Ultraworker)", "quotio", "claude-opus-4-6"),
+    ).toBe(true)
+
+    const secondOutput = {
+      message: {
+        model: { providerID: "quotio", modelID: "claude-opus-4-6" },
+      },
+      parts: [{ type: "text", text: "continue" }],
+    }
+    await hook["chat.message"]?.({ sessionID }, secondOutput)
+
+    //#then - chain should progress to entry[1], not repeat entry[0]
+    expect(secondOutput.message["model"]).toEqual({
+      providerID: "quotio",
+      modelID: "gpt-5.3-codex",
+    })
+    expect(secondOutput.message["variant"]).toBe("high")
+  })
+
+  test("shows toast when fallback is applied", async () => {
+    //#given
+    const toastCalls: Array<{ title: string; message: string }> = []
+    const hook = createModelFallbackHook({
+      toast: async ({ title, message }) => {
+        toastCalls.push({ title, message })
+      },
+    }) as unknown as {
+      "chat.message"?: (
+        input: { sessionID: string },
+        output: { message: Record<string, unknown>; parts: Array<{ type: string; text?: string }> },
+      ) => Promise<void>
+    }
+
+    const set = setPendingModelFallback(
+      "ses_model_fallback_toast",
+      "Sisyphus (Ultraworker)",
+      "quotio",
+      "claude-opus-4-6-thinking",
+    )
+    expect(set).toBe(true)
+
+    const output = {
+      message: {
+        model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+        variant: "max",
+      },
+      parts: [{ type: "text", text: "continue" }],
+    }
+
+    //#when
+    await hook["chat.message"]?.({ sessionID: "ses_model_fallback_toast" }, output)
+
+    //#then
+    expect(toastCalls.length).toBe(1)
+    expect(toastCalls[0]?.title).toBe("Model fallback")
+  })
+})
--- a/src/hooks/model-fallback/hook.ts
+++ b/src/hooks/model-fallback/hook.ts
@@ -0,0 +1,228 @@
+import type { FallbackEntry } from "../../shared/model-requirements"
+import { getAgentConfigKey } from "../../shared/agent-display-names"
+import { AGENT_MODEL_REQUIREMENTS } from "../../shared/model-requirements"
+import { readConnectedProvidersCache, readProviderModelsCache } from "../../shared/connected-providers-cache"
+import { selectFallbackProvider } from "../../shared/model-error-classifier"
+import { log } from "../../shared/logger"
+import { getTaskToastManager } from "../../features/task-toast-manager"
+import type { ChatMessageInput, ChatMessageHandlerOutput } from "../../plugin/chat-message"
+
+type FallbackToast = (input: {
+  title: string
+  message: string
+  variant?: "info" | "success" | "warning" | "error"
+  duration?: number
+}) => void | Promise<void>
+
+type FallbackCallback = (input: {
+  sessionID: string
+  providerID: string
+  modelID: string
+  variant?: string
+}) => void | Promise<void>
+
+export type ModelFallbackState = {
+  providerID: string
+  modelID: string
+  fallbackChain: FallbackEntry[]
+  attemptCount: number
+  pending: boolean
+}
+
+/**
+ * Map of sessionID -> pending model fallback state
+ * When a model error occurs, we store the fallback info here.
+ * The next chat.message call will use this to switch to the fallback model.
+ */
+const pendingModelFallbacks = new Map<string, ModelFallbackState>()
+const lastToastKey = new Map<string, string>()
+
+/**
+ * Sets a pending model fallback for a session.
+ * Called when a model error is detected in session.error handler.
+ */
+export function setPendingModelFallback(
+  sessionID: string,
+  agentName: string,
+  currentProviderID: string,
+  currentModelID: string,
+): boolean {
+  const agentKey = getAgentConfigKey(agentName)
+  const requirements = AGENT_MODEL_REQUIREMENTS[agentKey]
+  if (!requirements || !requirements.fallbackChain || requirements.fallbackChain.length === 0) {
+    log("[model-fallback] No fallback chain for agent: " + agentName + " (key: " + agentKey + ")")
+    return false
+  }
+
+  const fallbackChain = requirements.fallbackChain
+  const existing = pendingModelFallbacks.get(sessionID)
+
+  if (existing) {
+    // Preserve progression across repeated session.error retries in same session.
+    // We only mark the next turn as pending fallback application.
+    existing.providerID = currentProviderID
+    existing.modelID = currentModelID
+    existing.pending = true
+    if (existing.attemptCount >= existing.fallbackChain.length) {
+      log("[model-fallback] Fallback chain exhausted for session: " + sessionID)
+      return false
+    }
+    log("[model-fallback] Re-armed pending fallback for session: " + sessionID)
+    return true
+  }
+
+  const state: ModelFallbackState = {
+    providerID: currentProviderID,
+    modelID: currentModelID,
+    fallbackChain,
+    attemptCount: 0,
+    pending: true,
+  }
+
+  pendingModelFallbacks.set(sessionID, state)
+  log("[model-fallback] Set pending fallback for session: " + sessionID + ", agent: " + agentName)
+  return true
+}
+
+/**
+ * Gets the next fallback model for a session.
+ * Increments attemptCount each time called.
+ */
+export function getNextFallback(
+  sessionID: string,
+): { providerID: string; modelID: string; variant?: string } | null {
+  const state = pendingModelFallbacks.get(sessionID)
+  if (!state) return null
+
+  if (!state.pending) return null
+
+  const { fallbackChain } = state
+
+  const providerModelsCache = readProviderModelsCache()
+  const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
+  const connectedSet = connectedProviders ? new Set(connectedProviders) : null
+
+  const isReachable = (entry: FallbackEntry): boolean => {
+    if (!connectedSet) return true
+
+    // Gate only on provider connectivity. Provider model lists can be stale/incomplete,
+    // especially after users manually add models to opencode.json.
+    return entry.providers.some((p) => connectedSet.has(p))
+  }
+
+  while (state.attemptCount < fallbackChain.length) {
+    const attemptCount = state.attemptCount
+    const fallback = fallbackChain[attemptCount]
+    state.attemptCount++
+
+    if (!isReachable(fallback)) {
+      log("[model-fallback] Skipping unreachable fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model)
+      continue
+    }
+
+    const providerID = selectFallbackProvider(fallback.providers, state.providerID)
+    state.pending = false
+
+    log("[model-fallback] Using fallback for session: " + sessionID + ", attempt: " + attemptCount + ", model: " + fallback.model)
+
+    return {
+      providerID,
+      modelID: fallback.model,
+      variant: fallback.variant,
+    }
+  }
+
+  log("[model-fallback] No more fallbacks for session: " + sessionID)
+  pendingModelFallbacks.delete(sessionID)
+  return null
+}
+
+/**
+ * Clears the pending fallback for a session.
+ * Called after fallback is successfully applied.
+ */
+export function clearPendingModelFallback(sessionID: string): void {
+  pendingModelFallbacks.delete(sessionID)
+  lastToastKey.delete(sessionID)
+}
+
+/**
+ * Checks if there's a pending fallback for a session.
+ */
+export function hasPendingModelFallback(sessionID: string): boolean {
+  const state = pendingModelFallbacks.get(sessionID)
+  return state?.pending === true
+}
+
+/**
+ * Gets the current fallback state for a session (for debugging).
+ */
+export function getFallbackState(sessionID: string): ModelFallbackState | undefined {
+  return pendingModelFallbacks.get(sessionID)
+}
+
+/**
+ * Creates a chat.message hook that applies model fallbacks when pending.
+ */
+export function createModelFallbackHook(args?: { toast?: FallbackToast; onApplied?: FallbackCallback }) {
+  const toast = args?.toast
+  const onApplied = args?.onApplied
+
+  return {
+    "chat.message": async (
+      input: ChatMessageInput,
+      output: ChatMessageHandlerOutput,
+    ): Promise<void> => {
+      const { sessionID } = input
+      if (!sessionID) return
+
+      const fallback = getNextFallback(sessionID)
+      if (!fallback) return
+
+      output.message["model"] = {
+        providerID: fallback.providerID,
+        modelID: fallback.modelID,
+      }
+      if (fallback.variant !== undefined) {
+        output.message["variant"] = fallback.variant
+      } else {
+        delete output.message["variant"]
+      }
+      if (toast) {
+        const key = `${sessionID}:${fallback.providerID}/${fallback.modelID}:${fallback.variant ?? ""}`
+        if (lastToastKey.get(sessionID) !== key) {
+          lastToastKey.set(sessionID, key)
+          const variantLabel = fallback.variant ? ` (${fallback.variant})` : ""
+          await Promise.resolve(
+            toast({
+              title: "Model fallback",
+              message: `Using ${fallback.providerID}/${fallback.modelID}${variantLabel}`,
+              variant: "warning",
+              duration: 5000,
+            }),
+          )
+        }
+      }
+      if (onApplied) {
+        await Promise.resolve(
+          onApplied({
+            sessionID,
+            providerID: fallback.providerID,
+            modelID: fallback.modelID,
+            variant: fallback.variant,
+          }),
+        )
+      }
+
+      const toastManager = getTaskToastManager()
+      if (toastManager) {
+        const variantLabel = fallback.variant ? ` (${fallback.variant})` : ""
+        toastManager.updateTaskModelBySession(sessionID, {
+          model: `${fallback.providerID}/${fallback.modelID}${variantLabel}`,
+          type: "runtime-fallback",
+        })
+      }
+      log("[model-fallback] Applied fallback model: " + JSON.stringify(fallback))
+    },
+  }
+}
--- a/src/plugin-interface.ts
+++ b/src/plugin-interface.ts
@@ -5,6 +5,7 @@ import { createChatParamsHandler } from "./plugin/chat-params"
 import { createChatHeadersHandler } from "./plugin/chat-headers"
 import { createChatMessageHandler } from "./plugin/chat-message"
 import { createMessagesTransformHandler } from "./plugin/messages-transform"
+import { createSystemTransformHandler } from "./plugin/system-transform"
 import { createEventHandler } from "./plugin/event"
 import { createToolExecuteAfterHandler } from "./plugin/tool-execute-after"
 import { createToolExecuteBeforeHandler } from "./plugin/tool-execute-before"
@@ -49,6 +50,10 @@ export function createPluginInterface(args: {
      hooks,
    }),

+    "experimental.chat.system.transform": createSystemTransformHandler({
+      hooks,
+    }),
+
    config: managers.configHandler,

    event: createEventHandler({
--- a/src/plugin/chat-message.ts
+++ b/src/plugin/chat-message.ts
@@ -2,6 +2,7 @@ import type { OhMyOpenCodeConfig } from "../config"
 import type { PluginContext } from "./types"

 import { hasConnectedProvidersCache } from "../shared"
+import { setSessionModel } from "../shared/session-model-state"
 import { setSessionAgent } from "../features/claude-code-session-state"
 import { applyUltraworkModelOverrideOnMessage } from "./ultrawork-model-override"

@@ -13,7 +14,12 @@ type FirstMessageVariantGate = {
 }

 type ChatMessagePart = { type: string; text?: string; [key: string]: unknown }
-type ChatMessageHandlerOutput = { message: Record<string, unknown>; parts: ChatMessagePart[] }
+export type ChatMessageHandlerOutput = { message: Record<string, unknown>; parts: ChatMessagePart[] }
+export type ChatMessageInput = {
+  sessionID: string
+  agent?: string
+  model?: { providerID: string; modelID: string }
+}
 type StartWorkHookOutput = { parts: Array<{ type: string; text?: string }> }

 function isStartWorkHookOutput(value: unknown): value is StartWorkHookOutput {
@@ -34,13 +40,13 @@ export function createChatMessageHandler(args: {
  firstMessageVariantGate: FirstMessageVariantGate
  hooks: CreatedHooks
 }): (
-  input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } },
+  input: ChatMessageInput,
  output: ChatMessageHandlerOutput
 ) => Promise<void> {
  const { ctx, pluginConfig, firstMessageVariantGate, hooks } = args

  return async (
-    input: { sessionID: string; agent?: string; model?: { providerID: string; modelID: string } },
+    input: ChatMessageInput,
    output: ChatMessageHandlerOutput
  ): Promise<void> => {
    if (input.agent) {
@@ -53,6 +59,22 @@ export function createChatMessageHandler(args: {
      firstMessageVariantGate.markApplied(input.sessionID)
    }

+    await hooks.modelFallback?.["chat.message"]?.(input, output)
+    const modelOverride = output.message["model"]
+    if (
+      modelOverride &&
+      typeof modelOverride === "object" &&
+      "providerID" in modelOverride &&
+      "modelID" in modelOverride
+    ) {
+      const providerID = (modelOverride as { providerID?: string }).providerID
+      const modelID = (modelOverride as { modelID?: string }).modelID
+      if (typeof providerID === "string" && typeof modelID === "string") {
+        setSessionModel(input.sessionID, { providerID, modelID })
+      }
+    } else if (input.model) {
+      setSessionModel(input.sessionID, input.model)
+    }
    await hooks.stopContinuationGuard?.["chat.message"]?.(input)
    await hooks.keywordDetector?.["chat.message"]?.(input, output)
    await hooks.claudeCodeHooks?.["chat.message"]?.(input, output)
--- a/src/plugin/chat-params.test.ts
+++ b/src/plugin/chat-params.test.ts
@@ -0,0 +1,38 @@
+import { describe, expect, test } from "bun:test"
+
+import { createChatParamsHandler } from "./chat-params"
+
+describe("createChatParamsHandler", () => {
+  test("normalizes object-style agent payload and runs chat.params hooks", async () => {
+    //#given
+    let called = false
+    const handler = createChatParamsHandler({
+      anthropicEffort: {
+        "chat.params": async (input) => {
+          called = input.agent.name === "sisyphus"
+        },
+      },
+    })
+
+    const input = {
+      sessionID: "ses_chat_params",
+      agent: { name: "sisyphus" },
+      model: { providerID: "opencode", modelID: "claude-opus-4-6" },
+      provider: { id: "opencode" },
+      message: {},
+    }
+
+    const output = {
+      temperature: 0.1,
+      topP: 1,
+      topK: 1,
+      options: {},
+    }
+
+    //#when
+    await handler(input, output)
+
+    //#then
+    expect(called).toBe(true)
+  })
+})
--- a/src/plugin/chat-params.ts
+++ b/src/plugin/chat-params.ts
@@ -1,4 +1,4 @@
-type ChatParamsInput = {
+export type ChatParamsInput = {
  sessionID: string
  agent: { name?: string }
  model: { providerID: string; modelID: string }
@@ -6,7 +6,7 @@ type ChatParamsInput = {
  message: { variant?: string }
 }

-type ChatParamsOutput = {
+export type ChatParamsOutput = {
  temperature?: number
  topP?: number
  topK?: number
@@ -27,11 +27,21 @@ function buildChatParamsInput(raw: unknown): ChatParamsInput | null {
  const message = raw.message

  if (typeof sessionID !== "string") return null
-  if (typeof agent !== "string") return null
  if (!isRecord(model)) return null
  if (!isRecord(provider)) return null
  if (!isRecord(message)) return null

+  let agentName: string | undefined
+  if (typeof agent === "string") {
+    agentName = agent
+  } else if (isRecord(agent)) {
+    const name = agent.name
+    if (typeof name === "string") {
+      agentName = name
+    }
+  }
+  if (!agentName) return null
+
  const providerID = model.providerID
  const modelID = model.modelID
  const providerId = provider.id
@@ -43,7 +53,7 @@ function buildChatParamsInput(raw: unknown): ChatParamsInput | null {

  return {
    sessionID,
-    agent: { name: agent },
+    agent: { name: agentName },
    model: { providerID, modelID },
    provider: { id: providerId },
    message: typeof variant === "string" ? { variant } : {},
--- a/src/plugin/event.model-fallback.test.ts
+++ b/src/plugin/event.model-fallback.test.ts
@@ -0,0 +1,406 @@
+import { afterEach, describe, expect, test } from "bun:test"
+
+import { createEventHandler } from "./event"
+import { createChatMessageHandler } from "./chat-message"
+import { _resetForTesting, setMainSession } from "../features/claude-code-session-state"
+import { createModelFallbackHook, clearPendingModelFallback } from "../hooks/model-fallback/hook"
+
+describe("createEventHandler - model fallback", () => {
+  afterEach(() => {
+    _resetForTesting()
+  })
+
+  test("triggers retry prompt for assistant message.updated APIError payloads (headless resume)", async () => {
+    //#given
+    const abortCalls: string[] = []
+    const promptCalls: string[] = []
+    const sessionID = "ses_message_updated_fallback"
+
+    const handler = createEventHandler({
+      ctx: {
+        directory: "/tmp",
+        client: {
+          session: {
+            abort: async ({ path }: { path: { id: string } }) => {
+              abortCalls.push(path.id)
+              return {}
+            },
+            prompt: async ({ path }: { path: { id: string } }) => {
+              promptCalls.push(path.id)
+              return {}
+            },
+          },
+        },
+      } as any,
+      pluginConfig: {} as any,
+      firstMessageVariantGate: {
+        markSessionCreated: () => {},
+        clear: () => {},
+      },
+      managers: {
+        tmuxSessionManager: {
+          onSessionCreated: async () => {},
+          onSessionDeleted: async () => {},
+        },
+        skillMcpManager: {
+          disconnectSession: async () => {},
+        },
+      } as any,
+      hooks: {} as any,
+    })
+
+    //#when
+    await handler({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            id: "msg_err_1",
+            sessionID,
+            role: "assistant",
+            time: { created: 1, completed: 2 },
+            error: {
+              name: "APIError",
+              data: {
+                message:
+                  "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+                isRetryable: true,
+              },
+            },
+            parentID: "msg_user_1",
+            modelID: "claude-opus-4-6-thinking",
+            providerID: "quotio",
+            mode: "Sisyphus (Ultraworker)",
+            agent: "Sisyphus (Ultraworker)",
+            path: { cwd: "/tmp", root: "/tmp" },
+            cost: 0,
+            tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
+          },
+        },
+      },
+    })
+
+    //#then
+    expect(abortCalls).toEqual([sessionID])
+    expect(promptCalls).toEqual([sessionID])
+  })
+
+  test("triggers retry prompt for nested model error payloads", async () => {
+    //#given
+    const abortCalls: string[] = []
+    const promptCalls: string[] = []
+    const sessionID = "ses_main_fallback_nested"
+    setMainSession(sessionID)
+
+    const handler = createEventHandler({
+      ctx: {
+        directory: "/tmp",
+        client: {
+          session: {
+            abort: async ({ path }: { path: { id: string } }) => {
+              abortCalls.push(path.id)
+              return {}
+            },
+            prompt: async ({ path }: { path: { id: string } }) => {
+              promptCalls.push(path.id)
+              return {}
+            },
+          },
+        },
+      } as any,
+      pluginConfig: {} as any,
+      firstMessageVariantGate: {
+        markSessionCreated: () => {},
+        clear: () => {},
+      },
+      managers: {
+        tmuxSessionManager: {
+          onSessionCreated: async () => {},
+          onSessionDeleted: async () => {},
+        },
+        skillMcpManager: {
+          disconnectSession: async () => {},
+        },
+      } as any,
+      hooks: {} as any,
+    })
+
+    //#when
+    await handler({
+      event: {
+        type: "session.error",
+        properties: {
+          sessionID,
+          error: {
+            name: "UnknownError",
+            data: {
+              error: {
+                message:
+                  "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+              },
+            },
+          },
+        },
+      },
+    })
+
+    //#then
+    expect(abortCalls).toEqual([sessionID])
+    expect(promptCalls).toEqual([sessionID])
+  })
+
+  test("triggers retry prompt on session.status retry events and applies fallback", async () => {
+    //#given
+    const abortCalls: string[] = []
+    const promptCalls: string[] = []
+    const sessionID = "ses_status_retry_fallback"
+    setMainSession(sessionID)
+    clearPendingModelFallback(sessionID)
+
+    const modelFallback = createModelFallbackHook()
+
+    const handler = createEventHandler({
+      ctx: {
+        directory: "/tmp",
+        client: {
+          session: {
+            abort: async ({ path }: { path: { id: string } }) => {
+              abortCalls.push(path.id)
+              return {}
+            },
+            prompt: async ({ path }: { path: { id: string } }) => {
+              promptCalls.push(path.id)
+              return {}
+            },
+          },
+        },
+      } as any,
+      pluginConfig: {} as any,
+      firstMessageVariantGate: {
+        markSessionCreated: () => {},
+        clear: () => {},
+      },
+      managers: {
+        tmuxSessionManager: {
+          onSessionCreated: async () => {},
+          onSessionDeleted: async () => {},
+        },
+        skillMcpManager: {
+          disconnectSession: async () => {},
+        },
+      } as any,
+      hooks: {
+        modelFallback,
+      } as any,
+    })
+
+    const chatMessageHandler = createChatMessageHandler({
+      ctx: {
+        client: {
+          tui: {
+            showToast: async () => ({}),
+          },
+        },
+      } as any,
+      pluginConfig: {} as any,
+      firstMessageVariantGate: {
+        shouldOverride: () => false,
+        markApplied: () => {},
+      },
+      hooks: {
+        modelFallback,
+        stopContinuationGuard: null,
+        keywordDetector: null,
+        claudeCodeHooks: null,
+        autoSlashCommand: null,
+        startWork: null,
+        ralphLoop: null,
+      } as any,
+    })
+
+    await handler({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            id: "msg_user_status_1",
+            sessionID,
+            role: "user",
+            time: { created: 1 },
+            content: [],
+            modelID: "claude-opus-4-6-thinking",
+            providerID: "quotio",
+            agent: "Sisyphus (Ultraworker)",
+            path: { cwd: "/tmp", root: "/tmp" },
+          },
+        },
+      },
+    })
+
+    //#when
+    await handler({
+      event: {
+        type: "session.status",
+        properties: {
+          sessionID,
+          status: {
+            type: "retry",
+            attempt: 1,
+            message:
+              "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+            next: 1234,
+          },
+        },
+      },
+    })
+
+    const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> }
+    await chatMessageHandler(
+      {
+        sessionID,
+        agent: "sisyphus",
+        model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+      },
+      output,
+    )
+
+    //#then
+    expect(abortCalls).toEqual([sessionID])
+    expect(promptCalls).toEqual([sessionID])
+    expect(output.message["model"]).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+    })
+    expect(output.message["variant"]).toBe("max")
+  })
+
+  test("advances main-session fallback chain across repeated session.error retries end-to-end", async () => {
+    //#given
+    const abortCalls: string[] = []
+    const promptCalls: string[] = []
+    const toastCalls: string[] = []
+    const sessionID = "ses_main_fallback_chain"
+    setMainSession(sessionID)
+    clearPendingModelFallback(sessionID)
+
+    const modelFallback = createModelFallbackHook()
+
+    const eventHandler = createEventHandler({
+      ctx: {
+        directory: "/tmp",
+        client: {
+          session: {
+            abort: async ({ path }: { path: { id: string } }) => {
+              abortCalls.push(path.id)
+              return {}
+            },
+            prompt: async ({ path }: { path: { id: string } }) => {
+              promptCalls.push(path.id)
+              return {}
+            },
+          },
+        },
+      } as any,
+      pluginConfig: {} as any,
+      firstMessageVariantGate: {
+        markSessionCreated: () => {},
+        clear: () => {},
+      },
+      managers: {
+        tmuxSessionManager: {
+          onSessionCreated: async () => {},
+          onSessionDeleted: async () => {},
+        },
+        skillMcpManager: {
+          disconnectSession: async () => {},
+        },
+      } as any,
+      hooks: {
+        modelFallback,
+      } as any,
+    })
+
+    const chatMessageHandler = createChatMessageHandler({
+      ctx: {
+        client: {
+          tui: {
+            showToast: async ({ body }: { body: { title?: string } }) => {
+              if (body?.title) toastCalls.push(body.title)
+              return {}
+            },
+          },
+        },
+      } as any,
+      pluginConfig: {} as any,
+      firstMessageVariantGate: {
+        shouldOverride: () => false,
+        markApplied: () => {},
+      },
+      hooks: {
+        modelFallback,
+        stopContinuationGuard: null,
+        keywordDetector: null,
+        claudeCodeHooks: null,
+        autoSlashCommand: null,
+        startWork: null,
+        ralphLoop: null,
+      } as any,
+    })
+
+    const triggerRetryCycle = async () => {
+      await eventHandler({
+        event: {
+          type: "session.error",
+          properties: {
+            sessionID,
+            providerID: "quotio",
+            modelID: "claude-opus-4-6-thinking",
+            error: {
+              name: "UnknownError",
+              data: {
+                error: {
+                  message:
+                    "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+                },
+              },
+            },
+          },
+        },
+      })
+
+      const output = { message: {}, parts: [] as Array<{ type: string; text?: string }> }
+      await chatMessageHandler(
+        {
+          sessionID,
+          agent: "sisyphus",
+          model: { providerID: "quotio", modelID: "claude-opus-4-6-thinking" },
+        },
+        output,
+      )
+      return output
+    }
+
+    //#when - first retry cycle
+    const first = await triggerRetryCycle()
+
+    //#then - first fallback entry applied (prefers current provider when available)
+    expect(first.message["model"]).toEqual({
+      providerID: "quotio",
+      modelID: "claude-opus-4-6",
+    })
+    expect(first.message["variant"]).toBe("max")
+
+    //#when - second retry cycle
+    const second = await triggerRetryCycle()
+
+    //#then - second fallback entry applied (chain advanced)
+    expect(second.message["model"]).toEqual({
+      providerID: "quotio",
+      modelID: "gpt-5.3-codex",
+    })
+    expect(second.message["variant"]).toBe("high")
+    expect(abortCalls).toEqual([sessionID, sessionID])
+    expect(promptCalls).toEqual([sessionID, sessionID])
+    expect(toastCalls.length).toBeGreaterThanOrEqual(0)
+  })
+})
--- a/src/plugin/event.ts
+++ b/src/plugin/event.ts
@@ -4,11 +4,17 @@ import type { PluginContext } from "./types"
 import {
  clearSessionAgent,
  getMainSessionID,
+  getSessionAgent,
+  subagentSessions,
+  syncSubagentSessions,
  setMainSession,
  updateSessionAgent,
 } from "../features/claude-code-session-state"
 import { resetMessageCursor } from "../shared"
 import { lspManager } from "../tools"
+import { shouldRetryError } from "../shared/model-error-classifier"
+import { clearPendingModelFallback, setPendingModelFallback } from "../hooks/model-fallback/hook"
+import { clearSessionModel, setSessionModel } from "../shared/session-model-state"

 import type { CreatedHooks } from "../create-hooks"
 import type { Managers } from "../create-managers"
@@ -20,6 +26,74 @@ type FirstMessageVariantGate = {
  clear: (sessionID: string) => void
 }

+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null
+}
+
+function normalizeFallbackModelID(modelID: string): string {
+  return modelID
+    .replace(/-thinking$/i, "")
+    .replace(/-max$/i, "")
+    .replace(/-high$/i, "")
+}
+
+function extractErrorName(error: unknown): string | undefined {
+  if (isRecord(error) && typeof error.name === "string") return error.name
+  if (error instanceof Error) return error.name
+  return undefined
+}
+
+function extractErrorMessage(error: unknown): string {
+  if (!error) return ""
+  if (typeof error === "string") return error
+  if (error instanceof Error) return error.message
+
+  if (isRecord(error)) {
+    const candidates: unknown[] = [
+      error,
+      error.data,
+      error.error,
+      isRecord(error.data) ? error.data.error : undefined,
+      error.cause,
+    ]
+
+    for (const candidate of candidates) {
+      if (isRecord(candidate) && typeof candidate.message === "string" && candidate.message.length > 0) {
+        return candidate.message
+      }
+    }
+  }
+
+  try {
+    return JSON.stringify(error)
+  } catch {
+    return String(error)
+  }
+}
+
+function extractProviderModelFromErrorMessage(
+  message: string,
+): { providerID?: string; modelID?: string } {
+  const lower = message.toLowerCase()
+
+  const providerModel = lower.match(/model\s+not\s+found:\s*([a-z0-9_-]+)\s*\/\s*([a-z0-9._-]+)/i)
+  if (providerModel) {
+    return {
+      providerID: providerModel[1],
+      modelID: providerModel[2],
+    }
+  }
+
+  const modelOnly = lower.match(/unknown\s+provider\s+for\s+model\s+([a-z0-9._-]+)/i)
+  if (modelOnly) {
+    return {
+      modelID: modelOnly[1],
+    }
+  }
+
+  return {}
+}
+
 export function createEventHandler(args: {
  ctx: PluginContext
  pluginConfig: OhMyOpenCodeConfig
@@ -29,6 +103,11 @@ export function createEventHandler(args: {
 }): (input: { event: { type: string; properties?: Record<string, unknown> } }) => Promise<void> {
  const { ctx, firstMessageVariantGate, managers, hooks } = args

+  // Avoid triggering multiple abort+continue cycles for the same failing assistant message.
+  const lastHandledModelErrorMessageID = new Map<string, string>()
+  const lastHandledRetryStatusKey = new Map<string, string>()
+  const lastKnownModelBySession = new Map<string, { providerID: string; modelID: string }>()
+
  const dispatchToHooks = async (input: { event: { type: string; properties?: Record<string, unknown> } }): Promise<void> => {
    await Promise.resolve(hooks.autoUpdateChecker?.event?.(input))
    await Promise.resolve(hooks.claudeCodeHooks?.event?.(input))
@@ -55,6 +134,15 @@ export function createEventHandler(args: {
  const recentRealIdles = new Map<string, number>()
  const DEDUP_WINDOW_MS = 500

+  const shouldAutoRetrySession = (sessionID: string): boolean => {
+    const mainSessionID = getMainSessionID()
+    if (mainSessionID) return sessionID === mainSessionID
+    // Headless runs (or resumed sessions) may not emit session.created, so mainSessionID can be unset.
+    // In that case, treat any non-subagent session as the "main" interactive session.
+    if (syncSubagentSessions.has(sessionID)) return true
+    return !subagentSessions.has(sessionID)
+  }
+
  return async (input): Promise<void> => {
    pruneRecentSyntheticIdles({
      recentSyntheticIdles,
@@ -121,8 +209,14 @@ export function createEventHandler(args: {

      if (sessionInfo?.id) {
        clearSessionAgent(sessionInfo.id)
+        lastHandledModelErrorMessageID.delete(sessionInfo.id)
+        lastHandledRetryStatusKey.delete(sessionInfo.id)
+        lastKnownModelBySession.delete(sessionInfo.id)
+        clearPendingModelFallback(sessionInfo.id)
        resetMessageCursor(sessionInfo.id)
        firstMessageVariantGate.clear(sessionInfo.id)
+        clearSessionModel(sessionInfo.id)
+        syncSubagentSessions.delete(sessionInfo.id)
        await managers.skillMcpManager.disconnectSession(sessionInfo.id)
        await lspManager.cleanupTempDirectoryClients()
        await managers.tmuxSessionManager.onSessionDeleted({
@@ -136,8 +230,129 @@ export function createEventHandler(args: {
      const sessionID = info?.sessionID as string | undefined
      const agent = info?.agent as string | undefined
      const role = info?.role as string | undefined
-      if (sessionID && agent && role === "user") {
-        updateSessionAgent(sessionID, agent)
+      if (sessionID && role === "user") {
+        if (agent) {
+          updateSessionAgent(sessionID, agent)
+        }
+        const providerID = info?.providerID as string | undefined
+        const modelID = info?.modelID as string | undefined
+        if (providerID && modelID) {
+          lastKnownModelBySession.set(sessionID, { providerID, modelID })
+          setSessionModel(sessionID, { providerID, modelID })
+        }
+      }
+
+      // Model fallback: in practice, API/model failures often surface as assistant message errors.
+      // session.error events are not guaranteed for all providers, so we also observe message.updated.
+      if (sessionID && role === "assistant") {
+        const assistantMessageID = info?.id as string | undefined
+        const assistantError = info?.error
+        if (assistantMessageID && assistantError) {
+          const lastHandled = lastHandledModelErrorMessageID.get(sessionID)
+          if (lastHandled === assistantMessageID) {
+            return
+          }
+
+          const errorName = extractErrorName(assistantError)
+          const errorMessage = extractErrorMessage(assistantError)
+          const errorInfo = { name: errorName, message: errorMessage }
+
+          if (shouldRetryError(errorInfo)) {
+            // Prefer the agent/model/provider from the assistant message payload.
+            let agentName = agent ?? getSessionAgent(sessionID)
+            if (!agentName && sessionID === getMainSessionID()) {
+              if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) {
+                agentName = "sisyphus"
+              } else if (errorMessage.includes("gpt-5")) {
+                agentName = "hephaestus"
+              } else {
+                agentName = "sisyphus"
+              }
+            }
+
+            if (agentName) {
+              const currentProvider = (info?.providerID as string | undefined) ?? "opencode"
+              const rawModel = (info?.modelID as string | undefined) ?? "claude-opus-4-6"
+              const currentModel = normalizeFallbackModelID(rawModel)
+
+              const setFallback = setPendingModelFallback(
+                sessionID,
+                agentName,
+                currentProvider,
+                currentModel,
+              )
+
+              if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
+                lastHandledModelErrorMessageID.set(sessionID, assistantMessageID)
+
+                await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {})
+                await ctx.client.session
+                  .prompt({
+                    path: { id: sessionID },
+                    body: { parts: [{ type: "text", text: "continue" }] },
+                    query: { directory: ctx.directory },
+                  })
+                  .catch(() => {})
+              }
+            }
+          }
+        }
+      }
+    }
+
+    if (event.type === "session.status") {
+      const sessionID = props?.sessionID as string | undefined
+      const status = props?.status as
+        | { type?: string; attempt?: number; message?: string; next?: number }
+        | undefined
+
+      if (sessionID && status?.type === "retry") {
+        const retryMessage = typeof status.message === "string" ? status.message : ""
+        const retryKey = `${status.attempt ?? "?"}:${status.next ?? "?"}:${retryMessage}`
+        if (lastHandledRetryStatusKey.get(sessionID) === retryKey) {
+          return
+        }
+        lastHandledRetryStatusKey.set(sessionID, retryKey)
+
+        const errorInfo = { name: undefined, message: retryMessage }
+        if (shouldRetryError(errorInfo)) {
+          let agentName = getSessionAgent(sessionID)
+          if (!agentName && sessionID === getMainSessionID()) {
+            if (retryMessage.includes("claude-opus") || retryMessage.includes("opus")) {
+              agentName = "sisyphus"
+            } else if (retryMessage.includes("gpt-5")) {
+              agentName = "hephaestus"
+            } else {
+              agentName = "sisyphus"
+            }
+          }
+
+          if (agentName) {
+            const parsed = extractProviderModelFromErrorMessage(retryMessage)
+            const lastKnown = lastKnownModelBySession.get(sessionID)
+            const currentProvider = parsed.providerID ?? lastKnown?.providerID ?? "opencode"
+            let currentModel = parsed.modelID ?? lastKnown?.modelID ?? "claude-opus-4-6"
+            currentModel = normalizeFallbackModelID(currentModel)
+
+            const setFallback = setPendingModelFallback(
+              sessionID,
+              agentName,
+              currentProvider,
+              currentModel,
+            )
+
+            if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
+              await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {})
+              await ctx.client.session
+                .prompt({
+                  path: { id: sessionID },
+                  body: { parts: [{ type: "text", text: "continue" }] },
+                  query: { directory: ctx.directory },
+                })
+                .catch(() => {})
+            }
+          }
+        }
      }
    }

@@ -145,6 +360,11 @@ export function createEventHandler(args: {
      const sessionID = props?.sessionID as string | undefined
      const error = props?.error

+      const errorName = extractErrorName(error)
+      const errorMessage = extractErrorMessage(error)
+      const errorInfo = { name: errorName, message: errorMessage }
+
+      // First, try session recovery for internal errors (thinking blocks, tool results, etc.)
      if (hooks.sessionRecovery?.isRecoverableError(error)) {
        const messageInfo = {
          id: props?.messageID as string | undefined,
@@ -168,6 +388,52 @@ export function createEventHandler(args: {
            })
            .catch(() => {})
        }
+      } 
+      // Second, try model fallback for model errors (rate limit, quota, provider issues, etc.)
+      else if (sessionID && shouldRetryError(errorInfo)) {
+        // Get the current agent for this session, or default to "sisyphus" for main sessions
+        let agentName = getSessionAgent(sessionID)
+        
+        // For main sessions, if no agent is set, try to infer from the error or default to sisyphus
+        if (!agentName && sessionID === getMainSessionID()) {
+          // Try to infer agent from model in error message
+          if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) {
+            agentName = "sisyphus"
+          } else if (errorMessage.includes("gpt-5")) {
+            agentName = "hephaestus"
+          } else {
+            // Default to sisyphus for main session errors
+            agentName = "sisyphus"
+          }
+        }
+        
+        if (agentName) {
+          const parsed = extractProviderModelFromErrorMessage(errorMessage)
+          const currentProvider = props?.providerID as string || parsed.providerID || "opencode"
+          let currentModel = props?.modelID as string || parsed.modelID || "claude-opus-4-6"
+          currentModel = normalizeFallbackModelID(currentModel)
+
+          // Try to set pending model fallback
+          const setFallback = setPendingModelFallback(
+            sessionID,
+            agentName,
+            currentProvider,
+            currentModel,
+          )
+          
+          if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
+            // Abort the current session and prompt with "continue" to trigger the fallback
+            await ctx.client.session.abort({ path: { id: sessionID } }).catch(() => {})
+            
+            await ctx.client.session
+              .prompt({
+                path: { id: sessionID },
+                body: { parts: [{ type: "text", text: "continue" }] },
+                query: { directory: ctx.directory },
+              })
+              .catch(() => {})
+          }
+        }
      }
    }
  }
--- a/src/plugin/hooks/create-session-hooks.ts
+++ b/src/plugin/hooks/create-session-hooks.ts
@@ -7,6 +7,7 @@ import {
  createSessionRecoveryHook,
  createSessionNotification,
  createThinkModeHook,
+  createModelFallbackHook,
  createAnthropicContextWindowLimitRecoveryHook,
  createAutoUpdateCheckerHook,
  createAgentUsageReminderHook,
@@ -30,6 +31,7 @@ import {
  detectExternalNotificationPlugin,
  getNotificationConflictWarning,
  log,
+  normalizeSDKResponse,
 } from "../../shared"
 import { safeCreateHook } from "../../shared/safe-create-hook"
 import { sessionExists } from "../../tools"
@@ -40,6 +42,7 @@ export type SessionHooks = {
  sessionRecovery: ReturnType<typeof createSessionRecoveryHook> | null
  sessionNotification: ReturnType<typeof createSessionNotification> | null
  thinkMode: ReturnType<typeof createThinkModeHook> | null
+  modelFallback: ReturnType<typeof createModelFallbackHook> | null
  anthropicContextWindowLimitRecovery: ReturnType<typeof createAnthropicContextWindowLimitRecoveryHook> | null
  autoUpdateChecker: ReturnType<typeof createAutoUpdateCheckerHook> | null
  agentUsageReminder: ReturnType<typeof createAgentUsageReminderHook> | null
@@ -102,6 +105,64 @@ export function createSessionHooks(args: {
    ? safeHook("think-mode", () => createThinkModeHook())
    : null

+  const fallbackTitleState = new Map<string, { baseTitle?: string; lastKey?: string }>()
+  const updateFallbackTitle = async (input: {
+    sessionID: string
+    providerID: string
+    modelID: string
+    variant?: string
+  }) => {
+    const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}`
+    const existing = fallbackTitleState.get(input.sessionID) ?? {}
+    if (existing.lastKey === key) return
+
+    if (!existing.baseTitle) {
+      const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null)
+      const sessionInfo = sessionResp
+        ? normalizeSDKResponse(sessionResp, null as { title?: string } | null, { preferResponseOnMissingData: true })
+        : null
+      const rawTitle = sessionInfo?.title
+      if (typeof rawTitle === "string" && rawTitle.length > 0) {
+        existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim()
+      } else {
+        existing.baseTitle = "Session"
+      }
+    }
+
+    const variantLabel = input.variant ? ` ${input.variant}` : ""
+    const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]`
+
+    await ctx.client.session
+      .update({
+        path: { id: input.sessionID },
+        body: { title: newTitle },
+        query: { directory: ctx.directory },
+      })
+      .catch(() => {})
+
+    existing.lastKey = key
+    fallbackTitleState.set(input.sessionID, existing)
+  }
+
+  // Model fallback hook - always enabled (no feature flag)
+  // This handles automatic model switching when model errors occur
+  const modelFallback = safeHook("model-fallback", () =>
+    createModelFallbackHook({
+      toast: async ({ title, message, variant, duration }) => {
+        await ctx.client.tui
+          .showToast({
+            body: {
+              title,
+              message,
+              variant: variant ?? "warning",
+              duration: duration ?? 5000,
+            },
+          })
+          .catch(() => {})
+      },
+      onApplied: updateFallbackTitle,
+    }))
+
  const anthropicContextWindowLimitRecovery = isHookEnabled("anthropic-context-window-limit-recovery")
    ? safeHook("anthropic-context-window-limit-recovery", () =>
        createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental: pluginConfig.experimental }))
@@ -181,6 +242,7 @@ export function createSessionHooks(args: {
    sessionRecovery,
    sessionNotification,
    thinkMode,
+    modelFallback,
    anthropicContextWindowLimitRecovery,
    autoUpdateChecker,
    agentUsageReminder,
--- a/src/plugin/hooks/create-transform-hooks.ts
+++ b/src/plugin/hooks/create-transform-hooks.ts
@@ -5,6 +5,7 @@ import {
  createClaudeCodeHooksHook,
  createKeywordDetectorHook,
  createThinkingBlockValidatorHook,
+  createBeastModeSystemHook,
 } from "../../hooks"
 import {
  contextCollector,
@@ -17,6 +18,7 @@ export type TransformHooks = {
  keywordDetector: ReturnType<typeof createKeywordDetectorHook> | null
  contextInjectorMessagesTransform: ReturnType<typeof createContextInjectorMessagesTransformHook>
  thinkingBlockValidator: ReturnType<typeof createThinkingBlockValidatorHook> | null
+  beastModeSystem: ReturnType<typeof createBeastModeSystemHook> | null
 }

 export function createTransformHooks(args: {
@@ -56,10 +58,19 @@ export function createTransformHooks(args: {
      )
    : null

+  const beastModeSystem = isHookEnabled("beast-mode-system")
+    ? safeCreateHook(
+        "beast-mode-system",
+        () => createBeastModeSystemHook(),
+        { enabled: safeHookEnabled },
+      )
+    : null
+
  return {
    claudeCodeHooks,
    keywordDetector,
    contextInjectorMessagesTransform,
    thinkingBlockValidator,
+    beastModeSystem,
  }
 }
--- a/src/plugin/system-transform.ts
+++ b/src/plugin/system-transform.ts
@@ -0,0 +1,12 @@
+import type { CreatedHooks } from "../create-hooks"
+
+export function createSystemTransformHandler(args: {
+  hooks: CreatedHooks
+}): (input: { sessionID: string }, output: { system: string[] }) => Promise<void> {
+  return async (input, output): Promise<void> => {
+    await args.hooks.beastModeSystem?.["experimental.chat.system.transform"]?.(
+      input,
+      output,
+    )
+  }
+}
--- a/src/shared/agent-variant.ts
+++ b/src/shared/agent-variant.ts
@@ -75,6 +75,14 @@ function findVariantInChain(
      return entry.variant
    }
  }
+
+  // Some providers expose identical model IDs (e.g. OpenAI models via different providers).
+  // If we didn't find an exact provider+model match, fall back to model-only matching.
+  for (const entry of fallbackChain) {
+    if (entry.model === currentModel.modelID) {
+      return entry.variant
+    }
+  }
  return undefined
 }

--- a/src/shared/model-error-classifier.test.ts
+++ b/src/shared/model-error-classifier.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
+
+import { mkdirSync, rmSync, writeFileSync, existsSync } from "node:fs"
+import { join } from "node:path"
+import * as dataPath from "./data-path"
+import { shouldRetryError, selectFallbackProvider } from "./model-error-classifier"
+
+const TEST_CACHE_DIR = join(import.meta.dir, "__test-cache__")
+
+describe("model-error-classifier", () => {
+  let cacheDirSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    cacheDirSpy = spyOn(dataPath, "getOmoOpenCodeCacheDir").mockReturnValue(TEST_CACHE_DIR)
+    if (existsSync(TEST_CACHE_DIR)) {
+      rmSync(TEST_CACHE_DIR, { recursive: true })
+    }
+    mkdirSync(TEST_CACHE_DIR, { recursive: true })
+  })
+
+  afterEach(() => {
+    cacheDirSpy.mockRestore()
+    if (existsSync(TEST_CACHE_DIR)) {
+      rmSync(TEST_CACHE_DIR, { recursive: true })
+    }
+  })
+
+  test("treats overloaded retry messages as retryable", () => {
+    //#given
+    const error = { message: "Provider is overloaded" }
+
+    //#when
+    const result = shouldRetryError(error)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  test("selectFallbackProvider prefers first connected provider in preference order", () => {
+    //#given
+    writeFileSync(
+      join(TEST_CACHE_DIR, "connected-providers.json"),
+      JSON.stringify({ connected: ["quotio", "nvidia"], updatedAt: new Date().toISOString() }, null, 2),
+    )
+
+    //#when
+    const provider = selectFallbackProvider(["quotio", "nvidia"], "nvidia")
+
+    //#then
+    expect(provider).toBe("quotio")
+  })
+
+  test("selectFallbackProvider falls back to next connected provider when first is disconnected", () => {
+    //#given
+    writeFileSync(
+      join(TEST_CACHE_DIR, "connected-providers.json"),
+      JSON.stringify({ connected: ["nvidia"], updatedAt: new Date().toISOString() }, null, 2),
+    )
+
+    //#when
+    const provider = selectFallbackProvider(["quotio", "nvidia"])
+
+    //#then
+    expect(provider).toBe("nvidia")
+  })
+
+  test("selectFallbackProvider uses provider preference order when cache is missing", () => {
+    //#given - no cache file
+
+    //#when
+    const provider = selectFallbackProvider(["quotio", "nvidia"], "nvidia")
+
+    //#then
+    expect(provider).toBe("quotio")
+  })
+})
--- a/src/shared/model-error-classifier.ts
+++ b/src/shared/model-error-classifier.ts
@@ -0,0 +1,135 @@
+import type { FallbackEntry } from "./model-requirements"
+import { readConnectedProvidersCache } from "./connected-providers-cache"
+
+/**
+ * Error names that indicate a retryable model error (deadstop).
+ * These errors completely halt the action loop and should trigger fallback retry.
+ */
+const RETRYABLE_ERROR_NAMES = new Set([
+  "ProviderModelNotFoundError",
+  "RateLimitError",
+  "QuotaExceededError",
+  "InsufficientCreditsError",
+  "ModelUnavailableError",
+  "ProviderConnectionError",
+  "AuthenticationError",
+])
+
+/**
+ * Error names that should NOT trigger retry.
+ * These errors are typically user-induced or fixable without switching models.
+ */
+const NON_RETRYABLE_ERROR_NAMES = new Set([
+  "MessageAbortedError",
+  "PermissionDeniedError",
+  "ContextLengthError",
+  "TimeoutError",
+  "ValidationError",
+  "SyntaxError",
+  "UserError",
+])
+
+/**
+ * Message patterns that indicate a retryable error even without a known error name.
+ */
+const RETRYABLE_MESSAGE_PATTERNS = [
+  "rate_limit",
+  "rate limit",
+  "quota",
+  "not found",
+  "unavailable",
+  "insufficient",
+  "too many requests",
+  "over limit",
+  "overloaded",
+  "bad gateway",
+  "unknown provider",
+  "provider not found",
+  "connection error",
+  "network error",
+  "timeout",
+  "service unavailable",
+  "internal_server_error",
+  "503",
+  "502",
+  "504",
+]
+
+export interface ErrorInfo {
+  name?: string
+  message?: string
+}
+
+/**
+ * Determines if an error is a retryable model error.
+ * Returns true if the error is a known retryable type OR matches retryable message patterns.
+ */
+export function isRetryableModelError(error: ErrorInfo): boolean {
+  // If we have an error name, check against known lists
+  if (error.name) {
+    // Explicit non-retryable takes precedence
+    if (NON_RETRYABLE_ERROR_NAMES.has(error.name)) {
+      return false
+    }
+    // Check if it's a known retryable error
+    if (RETRYABLE_ERROR_NAMES.has(error.name)) {
+      return true
+    }
+  }
+
+  // Check message patterns for unknown errors
+  const msg = error.message?.toLowerCase() ?? ""
+  return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern))
+}
+
+/**
+ * Determines if an error should trigger a fallback retry.
+ * Returns true for deadstop errors that completely halt the action loop.
+ */
+export function shouldRetryError(error: ErrorInfo): boolean {
+  return isRetryableModelError(error)
+}
+
+/**
+ * Gets the next fallback model from the chain based on attempt count.
+ * Returns undefined if all fallbacks have been exhausted.
+ */
+export function getNextFallback(
+  fallbackChain: FallbackEntry[],
+  attemptCount: number,
+): FallbackEntry | undefined {
+  return fallbackChain[attemptCount]
+}
+
+/**
+ * Checks if there are more fallbacks available after the current attempt.
+ */
+export function hasMoreFallbacks(
+  fallbackChain: FallbackEntry[],
+  attemptCount: number,
+): boolean {
+  return attemptCount < fallbackChain.length
+}
+
+/**
+ * Selects the best provider for a fallback entry.
+ * Priority:
+ * 1) First connected provider in the entry's provider preference order
+ * 2) First provider listed in the fallback entry (when cache is missing)
+ */
+export function selectFallbackProvider(
+  providers: string[],
+  preferredProviderID?: string,
+): string {
+  const connectedProviders = readConnectedProvidersCache()
+  if (connectedProviders) {
+    const connectedSet = new Set(connectedProviders)
+    for (const provider of providers) {
+      if (connectedSet.has(provider)) {
+        return provider
+      }
+    }
+  }
+
+  return providers[0] || preferredProviderID || "quotio"
+}
--- a/src/shared/model-requirements.test.ts
+++ b/src/shared/model-requirements.test.ts
@@ -6,493 +6,158 @@ import {
  type ModelRequirement,
 } from "./model-requirements"

+function flattenChains(): FallbackEntry[] {
+  return [
+    ...Object.values(AGENT_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain),
+    ...Object.values(CATEGORY_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain),
+  ]
+}
+
+function assertNoExcludedModels(entry: FallbackEntry): void {
+  // User exclusions.
+  expect(entry.model).not.toBe("grok-code-fast-1")
+  if (entry.providers.includes("quotio")) {
+    expect(entry.model).not.toBe("tstars2.0")
+    expect(entry.model).not.toMatch(/^kiro-/i)
+    expect(entry.model).not.toMatch(/^tab_/i)
+  }
+  // Remove codex-mini models per request.
+  expect(entry.model).not.toMatch(/codex-mini/i)
+}
+
+function assertNoOpencodeProvider(entry: FallbackEntry): void {
+  expect(entry.providers).not.toContain("opencode")
+}
+
+function assertNoProviderPrefixForNonNamespacedProviders(entry: FallbackEntry): void {
+  // For these providers, model IDs should not be written as "provider/model".
+  const nonNamespaced = ["quotio", "openai", "github-copilot", "minimax", "minimax-coding-plan"]
+  for (const provider of entry.providers) {
+    if (!nonNamespaced.includes(provider)) continue
+    expect(entry.model.startsWith(`${provider}/`)).toBe(false)
+  }
+}
+
 describe("AGENT_MODEL_REQUIREMENTS", () => {
-  test("oracle has valid fallbackChain with gpt-5.2 as primary", () => {
-    // given - oracle agent requirement
-    const oracle = AGENT_MODEL_REQUIREMENTS["oracle"]
-
-    // when - accessing oracle requirement
-    // then - fallbackChain exists with gpt-5.2 as first entry
-    expect(oracle).toBeDefined()
-    expect(oracle.fallbackChain).toBeArray()
-    expect(oracle.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = oracle.fallbackChain[0]
-    expect(primary.providers).toContain("openai")
-    expect(primary.model).toBe("gpt-5.2")
-    expect(primary.variant).toBe("high")
-  })
-
-  test("sisyphus has claude-opus-4-6 as primary and requiresAnyModel", () => {
-    // #given - sisyphus agent requirement
-    const sisyphus = AGENT_MODEL_REQUIREMENTS["sisyphus"]
-
-    // #when - accessing Sisyphus requirement
-    // #then - fallbackChain has claude-opus-4-6 first, big-pickle last
-    expect(sisyphus).toBeDefined()
-    expect(sisyphus.fallbackChain).toBeArray()
-    expect(sisyphus.fallbackChain).toHaveLength(5)
-    expect(sisyphus.requiresAnyModel).toBe(true)
-
-    const primary = sisyphus.fallbackChain[0]
-    expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
-    expect(primary.model).toBe("claude-opus-4-6")
-    expect(primary.variant).toBe("max")
-
-    const last = sisyphus.fallbackChain[4]
-    expect(last.providers[0]).toBe("opencode")
-    expect(last.model).toBe("big-pickle")
-  })
-
-  test("librarian has valid fallbackChain with gemini-3-flash as primary", () => {
-    // given - librarian agent requirement
-    const librarian = AGENT_MODEL_REQUIREMENTS["librarian"]
-
-    // when - accessing librarian requirement
-    // then - fallbackChain exists with gemini-3-flash as first entry
-    expect(librarian).toBeDefined()
-    expect(librarian.fallbackChain).toBeArray()
-    expect(librarian.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = librarian.fallbackChain[0]
-    expect(primary.providers[0]).toBe("google")
-    expect(primary.model).toBe("gemini-3-flash")
-  })
-
-  test("explore has valid fallbackChain with grok-code-fast-1 as primary", () => {
-    // given - explore agent requirement
-    const explore = AGENT_MODEL_REQUIREMENTS["explore"]
-
-    // when - accessing explore requirement
-    // then - fallbackChain: grok → minimax-free → haiku → nano
-    expect(explore).toBeDefined()
-    expect(explore.fallbackChain).toBeArray()
-    expect(explore.fallbackChain).toHaveLength(4)
-
-    const primary = explore.fallbackChain[0]
-    expect(primary.providers).toContain("github-copilot")
-    expect(primary.model).toBe("grok-code-fast-1")
-
-    const secondary = explore.fallbackChain[1]
-    expect(secondary.providers).toContain("opencode")
-    expect(secondary.model).toBe("minimax-m2.5-free")
-
-    const tertiary = explore.fallbackChain[2]
-    expect(tertiary.providers).toContain("anthropic")
-    expect(tertiary.model).toBe("claude-haiku-4-5")
-
-    const quaternary = explore.fallbackChain[3]
-    expect(quaternary.providers).toContain("opencode")
-    expect(quaternary.model).toBe("gpt-5-nano")
-  })
-
-  test("multimodal-looker has valid fallbackChain with k2p5 as primary", () => {
-    // given - multimodal-looker agent requirement
-    const multimodalLooker = AGENT_MODEL_REQUIREMENTS["multimodal-looker"]
-
-    // when - accessing multimodal-looker requirement
-    // then - fallbackChain exists with k2p5 as first entry
-    expect(multimodalLooker).toBeDefined()
-    expect(multimodalLooker.fallbackChain).toBeArray()
-    expect(multimodalLooker.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = multimodalLooker.fallbackChain[0]
-    expect(primary.providers[0]).toBe("kimi-for-coding")
-    expect(primary.model).toBe("k2p5")
-  })
-
-  test("prometheus has claude-opus-4-6 as primary", () => {
-    // #given - prometheus agent requirement
-    const prometheus = AGENT_MODEL_REQUIREMENTS["prometheus"]
-
-    // #when - accessing Prometheus requirement
-    // #then - claude-opus-4-6 is first
-    expect(prometheus).toBeDefined()
-    expect(prometheus.fallbackChain).toBeArray()
-    expect(prometheus.fallbackChain.length).toBeGreaterThan(1)
-
-    const primary = prometheus.fallbackChain[0]
-    expect(primary.model).toBe("claude-opus-4-6")
-    expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
-    expect(primary.variant).toBe("max")
-  })
-
-  test("metis has claude-opus-4-6 as primary", () => {
-    // #given - metis agent requirement
-    const metis = AGENT_MODEL_REQUIREMENTS["metis"]
-
-    // #when - accessing Metis requirement
-    // #then - claude-opus-4-6 is first
-    expect(metis).toBeDefined()
-    expect(metis.fallbackChain).toBeArray()
-    expect(metis.fallbackChain.length).toBeGreaterThan(1)
-
-    const primary = metis.fallbackChain[0]
-    expect(primary.model).toBe("claude-opus-4-6")
-    expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
-    expect(primary.variant).toBe("max")
-  })
-
-  test("momus has valid fallbackChain with gpt-5.2 as primary", () => {
-    // given - momus agent requirement
-    const momus = AGENT_MODEL_REQUIREMENTS["momus"]
-
-    // when - accessing Momus requirement
-    // then - fallbackChain exists with gpt-5.2 as first entry, variant medium
-    expect(momus).toBeDefined()
-    expect(momus.fallbackChain).toBeArray()
-    expect(momus.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = momus.fallbackChain[0]
-    expect(primary.model).toBe("gpt-5.2")
-    expect(primary.variant).toBe("medium")
-    expect(primary.providers[0]).toBe("openai")
-  })
-
-  test("atlas has valid fallbackChain with k2p5 as primary (kimi-for-coding prioritized)", () => {
-    // given - atlas agent requirement
-    const atlas = AGENT_MODEL_REQUIREMENTS["atlas"]
-
-    // when - accessing Atlas requirement
-    // then - fallbackChain exists with k2p5 as first entry (kimi-for-coding prioritized)
-    expect(atlas).toBeDefined()
-    expect(atlas.fallbackChain).toBeArray()
-    expect(atlas.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = atlas.fallbackChain[0]
-    expect(primary.model).toBe("k2p5")
-    expect(primary.providers[0]).toBe("kimi-for-coding")
-  })
-
-  test("hephaestus requires openai/github-copilot/opencode provider", () => {
-    // #given - hephaestus agent requirement
-    const hephaestus = AGENT_MODEL_REQUIREMENTS["hephaestus"]
-
-    // #when - accessing hephaestus requirement
-    // #then - requiresProvider is set to openai, github-copilot, opencode (not requiresModel)
-    expect(hephaestus).toBeDefined()
-    expect(hephaestus.requiresProvider).toEqual(["openai", "github-copilot", "opencode"])
-    expect(hephaestus.requiresModel).toBeUndefined()
-  })
-
-  test("all 10 builtin agents have valid fallbackChain arrays", () => {
-    // #given - list of 10 agent names
-    const expectedAgents = [
-      "sisyphus",
-      "hephaestus",
-      "oracle",
-      "librarian",
+  test("defines all 10 builtin agents", () => {
+    expect(Object.keys(AGENT_MODEL_REQUIREMENTS).sort()).toEqual([
+      "atlas",
      "explore",
-      "multimodal-looker",
-      "prometheus",
+      "hephaestus",
+      "librarian",
      "metis",
      "momus",
-      "atlas",
-    ]
+      "multimodal-looker",
+      "oracle",
+      "prometheus",
+      "sisyphus",
+    ])
+  })

-    // when - checking AGENT_MODEL_REQUIREMENTS
-    const definedAgents = Object.keys(AGENT_MODEL_REQUIREMENTS)
+  test("sisyphus: 2nd fallback is quotio gpt-5.3-codex (high)", () => {
+    const sisyphus = AGENT_MODEL_REQUIREMENTS["sisyphus"]
+    expect(sisyphus.requiresAnyModel).toBe(true)
+    expect(sisyphus.fallbackChain.length).toBeGreaterThan(2)

-    // #then - all agents present with valid fallbackChain
-    expect(definedAgents).toHaveLength(10)
-    for (const agent of expectedAgents) {
-      const requirement = AGENT_MODEL_REQUIREMENTS[agent]
-      expect(requirement).toBeDefined()
-      expect(requirement.fallbackChain).toBeArray()
-      expect(requirement.fallbackChain.length).toBeGreaterThan(0)
+    expect(sisyphus.fallbackChain[0]).toEqual({
+      providers: ["quotio"],
+      model: "claude-opus-4-6",
+      variant: "max",
+    })

-      for (const entry of requirement.fallbackChain) {
-        expect(entry.providers).toBeArray()
-        expect(entry.providers.length).toBeGreaterThan(0)
-        expect(typeof entry.model).toBe("string")
-        expect(entry.model.length).toBeGreaterThan(0)
-      }
-    }
+    expect(sisyphus.fallbackChain[1]).toEqual({
+      providers: ["quotio"],
+      model: "gpt-5.3-codex",
+      variant: "high",
+    })
+  })
+
+  test("explore: uses speed chain, includes rome, and gpt-5-mini is copilot-first", () => {
+    const explore = AGENT_MODEL_REQUIREMENTS["explore"]
+    expect(explore.fallbackChain.length).toBeGreaterThan(4)
+    expect(explore.fallbackChain[0].model).toBe("claude-haiku-4-5")
+    expect(explore.fallbackChain.some((e) => e.model === "iflow-rome-30ba3b")).toBe(true)
+
+    const gptMini = explore.fallbackChain.find((e) => e.model === "gpt-5-mini")
+    expect(gptMini).toBeDefined()
+    expect(gptMini!.providers[0]).toBe("github-copilot")
+    expect(gptMini!.variant).toBe("high")
+  })
+
+  test("multimodal-looker: prefers gemini image model first", () => {
+    const multimodal = AGENT_MODEL_REQUIREMENTS["multimodal-looker"]
+    expect(multimodal.fallbackChain[0]).toEqual({
+      providers: ["quotio"],
+      model: "gemini-3-pro-image",
+    })
+  })
+
+  test("includes NVIDIA NIM additions in at least one agent chain", () => {
+    const all = Object.values(AGENT_MODEL_REQUIREMENTS).flatMap((r) => r.fallbackChain)
+    expect(all.some((e) => e.providers.includes("nvidia") && e.model === "qwen/qwen3.5-397b-a17b")).toBe(true)
+    expect(all.some((e) => e.providers.includes("nvidia") && e.model === "stepfun-ai/step-3.5-flash")).toBe(true)
+    expect(all.some((e) => e.providers.includes("nvidia") && e.model === "bytedance/seed-oss-36b-instruct")).toBe(true)
  })
 })

 describe("CATEGORY_MODEL_REQUIREMENTS", () => {
-  test("ultrabrain has valid fallbackChain with gpt-5.3-codex as primary", () => {
-    // given - ultrabrain category requirement
-    const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"]
-
-    // when - accessing ultrabrain requirement
-    // then - fallbackChain exists with gpt-5.3-codex as first entry
-    expect(ultrabrain).toBeDefined()
-    expect(ultrabrain.fallbackChain).toBeArray()
-    expect(ultrabrain.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = ultrabrain.fallbackChain[0]
-    expect(primary.variant).toBe("xhigh")
-    expect(primary.model).toBe("gpt-5.3-codex")
-    expect(primary.providers[0]).toBe("openai")
-  })
-
-  test("deep has valid fallbackChain with gpt-5.3-codex as primary", () => {
-    // given - deep category requirement
-    const deep = CATEGORY_MODEL_REQUIREMENTS["deep"]
-
-    // when - accessing deep requirement
-    // then - fallbackChain exists with gpt-5.3-codex as first entry, medium variant
-    expect(deep).toBeDefined()
-    expect(deep.fallbackChain).toBeArray()
-    expect(deep.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = deep.fallbackChain[0]
-    expect(primary.variant).toBe("medium")
-    expect(primary.model).toBe("gpt-5.3-codex")
-    expect(primary.providers[0]).toBe("openai")
-  })
-
-  test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => {
-    // given - visual-engineering category requirement
-    const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"]
-
-    // when - accessing visual-engineering requirement
-    // then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max) → k2p5
-    expect(visualEngineering).toBeDefined()
-    expect(visualEngineering.fallbackChain).toBeArray()
-    expect(visualEngineering.fallbackChain).toHaveLength(4)
-
-    const primary = visualEngineering.fallbackChain[0]
-    expect(primary.providers[0]).toBe("google")
-    expect(primary.model).toBe("gemini-3-pro")
-    expect(primary.variant).toBe("high")
-
-    const second = visualEngineering.fallbackChain[1]
-    expect(second.providers[0]).toBe("zai-coding-plan")
-    expect(second.model).toBe("glm-5")
-
-    const third = visualEngineering.fallbackChain[2]
-    expect(third.model).toBe("claude-opus-4-6")
-    expect(third.variant).toBe("max")
-
-    const fourth = visualEngineering.fallbackChain[3]
-    expect(fourth.providers[0]).toBe("kimi-for-coding")
-    expect(fourth.model).toBe("k2p5")
-  })
-
-  test("quick has valid fallbackChain with claude-haiku-4-5 as primary", () => {
-    // given - quick category requirement
-    const quick = CATEGORY_MODEL_REQUIREMENTS["quick"]
-
-    // when - accessing quick requirement
-    // then - fallbackChain exists with claude-haiku-4-5 as first entry
-    expect(quick).toBeDefined()
-    expect(quick.fallbackChain).toBeArray()
-    expect(quick.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = quick.fallbackChain[0]
-    expect(primary.model).toBe("claude-haiku-4-5")
-    expect(primary.providers[0]).toBe("anthropic")
-  })
-
-  test("unspecified-low has valid fallbackChain with claude-sonnet-4-6 as primary", () => {
-    // given - unspecified-low category requirement
-    const unspecifiedLow = CATEGORY_MODEL_REQUIREMENTS["unspecified-low"]
-
-    // when - accessing unspecified-low requirement
-    // then - fallbackChain exists with claude-sonnet-4-6 as first entry
-    expect(unspecifiedLow).toBeDefined()
-    expect(unspecifiedLow.fallbackChain).toBeArray()
-    expect(unspecifiedLow.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = unspecifiedLow.fallbackChain[0]
-    expect(primary.model).toBe("claude-sonnet-4-6")
-    expect(primary.providers[0]).toBe("anthropic")
-  })
-
-  test("unspecified-high has claude-opus-4-6 as primary", () => {
-    // #given - unspecified-high category requirement
-    const unspecifiedHigh = CATEGORY_MODEL_REQUIREMENTS["unspecified-high"]
-
-    // #when - accessing unspecified-high requirement
-    // #then - claude-opus-4-6 is first
-    expect(unspecifiedHigh).toBeDefined()
-    expect(unspecifiedHigh.fallbackChain).toBeArray()
-    expect(unspecifiedHigh.fallbackChain.length).toBeGreaterThan(1)
-
-    const primary = unspecifiedHigh.fallbackChain[0]
-    expect(primary.model).toBe("claude-opus-4-6")
-    expect(primary.variant).toBe("max")
-    expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
-  })
-
-  test("artistry has valid fallbackChain with gemini-3-pro as primary", () => {
-    // given - artistry category requirement
-    const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"]
-
-    // when - accessing artistry requirement
-    // then - fallbackChain exists with gemini-3-pro as first entry
-    expect(artistry).toBeDefined()
-    expect(artistry.fallbackChain).toBeArray()
-    expect(artistry.fallbackChain.length).toBeGreaterThan(0)
-
-    const primary = artistry.fallbackChain[0]
-    expect(primary.model).toBe("gemini-3-pro")
-    expect(primary.variant).toBe("high")
-    expect(primary.providers[0]).toBe("google")
-  })
-
-  test("writing has valid fallbackChain with k2p5 as primary (kimi-for-coding)", () => {
-    // given - writing category requirement
-    const writing = CATEGORY_MODEL_REQUIREMENTS["writing"]
-
-    // when - accessing writing requirement
-    // then - fallbackChain: k2p5 → gemini-3-flash → claude-sonnet-4-6
-    expect(writing).toBeDefined()
-    expect(writing.fallbackChain).toBeArray()
-    expect(writing.fallbackChain).toHaveLength(3)
-
-    const primary = writing.fallbackChain[0]
-    expect(primary.model).toBe("k2p5")
-    expect(primary.providers[0]).toBe("kimi-for-coding")
-
-    const second = writing.fallbackChain[1]
-    expect(second.model).toBe("gemini-3-flash")
-    expect(second.providers[0]).toBe("google")
-  })
-
-  test("all 8 categories have valid fallbackChain arrays", () => {
-    // given - list of 8 category names
-    const expectedCategories = [
-      "visual-engineering",
-      "ultrabrain",
-      "deep",
+  test("defines all 8 categories", () => {
+    expect(Object.keys(CATEGORY_MODEL_REQUIREMENTS).sort()).toEqual([
      "artistry",
+      "deep",
      "quick",
-      "unspecified-low",
+      "ultrabrain",
      "unspecified-high",
+      "unspecified-low",
+      "visual-engineering",
      "writing",
-    ]
+    ])
+  })

-    // when - checking CATEGORY_MODEL_REQUIREMENTS
-    const definedCategories = Object.keys(CATEGORY_MODEL_REQUIREMENTS)
+  test("deep requires gpt-5.3-codex", () => {
+    expect(CATEGORY_MODEL_REQUIREMENTS["deep"].requiresModel).toBe("gpt-5.3-codex")
+  })

-    // then - all categories present with valid fallbackChain
-    expect(definedCategories).toHaveLength(8)
-    for (const category of expectedCategories) {
-      const requirement = CATEGORY_MODEL_REQUIREMENTS[category]
-      expect(requirement).toBeDefined()
-      expect(requirement.fallbackChain).toBeArray()
-      expect(requirement.fallbackChain.length).toBeGreaterThan(0)
+  test("quick uses the speed chain (haiku primary)", () => {
+    expect(CATEGORY_MODEL_REQUIREMENTS["quick"].fallbackChain[0].model).toBe("claude-haiku-4-5")
+  })

-      for (const entry of requirement.fallbackChain) {
-        expect(entry.providers).toBeArray()
-        expect(entry.providers.length).toBeGreaterThan(0)
-        expect(typeof entry.model).toBe("string")
-        expect(entry.model.length).toBeGreaterThan(0)
-      }
+  test("ultrabrain starts with gpt-5.3-codex (high)", () => {
+    const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"]
+    expect(ultrabrain.fallbackChain[0]).toEqual({
+      providers: ["quotio"],
+      model: "gpt-5.3-codex",
+      variant: "high",
+    })
+  })
+})
+
+describe("ModelRequirements invariants", () => {
+  test("all entries have non-empty providers and a non-empty model", () => {
+    for (const entry of flattenChains()) {
+      expect(entry.providers.length).toBeGreaterThan(0)
+      expect(typeof entry.model).toBe("string")
+      expect(entry.model.length).toBeGreaterThan(0)
+    }
+  })
+
+  test("no entry uses opencode provider and no excluded models are present", () => {
+    for (const entry of flattenChains()) {
+      assertNoOpencodeProvider(entry)
+      assertNoExcludedModels(entry)
+      assertNoProviderPrefixForNonNamespacedProviders(entry)
    }
  })
 })

-describe("FallbackEntry type", () => {
-  test("FallbackEntry structure is correct", () => {
-    // given - a valid FallbackEntry object
-    const entry: FallbackEntry = {
-      providers: ["anthropic", "github-copilot", "opencode"],
-      model: "claude-opus-4-6",
-      variant: "high",
-    }
-
-    // when - accessing properties
-    // then - all properties are accessible
-    expect(entry.providers).toEqual(["anthropic", "github-copilot", "opencode"])
-    expect(entry.model).toBe("claude-opus-4-6")
-    expect(entry.variant).toBe("high")
-  })
-
-  test("FallbackEntry variant is optional", () => {
-    // given - a FallbackEntry without variant
-    const entry: FallbackEntry = {
-      providers: ["opencode", "anthropic"],
-      model: "big-pickle",
-    }
-
-    // when - accessing variant
-    // then - variant is undefined
+describe("Type sanity", () => {
+  test("FallbackEntry.variant is optional", () => {
+    const entry: FallbackEntry = { providers: ["quotio"], model: "claude-haiku-4-5" }
    expect(entry.variant).toBeUndefined()
  })
-})

-describe("ModelRequirement type", () => {
-  test("ModelRequirement structure with fallbackChain is correct", () => {
-    // given - a valid ModelRequirement object
-    const requirement: ModelRequirement = {
-      fallbackChain: [
-        { providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6", variant: "max" },
-        { providers: ["openai", "github-copilot"], model: "gpt-5.2", variant: "high" },
-      ],
-    }
-
-    // when - accessing properties
-    // then - fallbackChain is accessible with correct structure
-    expect(requirement.fallbackChain).toBeArray()
-    expect(requirement.fallbackChain).toHaveLength(2)
-    expect(requirement.fallbackChain[0].model).toBe("claude-opus-4-6")
-    expect(requirement.fallbackChain[1].model).toBe("gpt-5.2")
-  })
-
-  test("ModelRequirement variant is optional", () => {
-    // given - a ModelRequirement without top-level variant
-    const requirement: ModelRequirement = {
-      fallbackChain: [{ providers: ["opencode"], model: "big-pickle" }],
-    }
-
-    // when - accessing variant
-    // then - variant is undefined
-    expect(requirement.variant).toBeUndefined()
-  })
-
-  test("no model in fallbackChain has provider prefix", () => {
-    // given - all agent and category requirements
-    const allRequirements = [
-      ...Object.values(AGENT_MODEL_REQUIREMENTS),
-      ...Object.values(CATEGORY_MODEL_REQUIREMENTS),
-    ]
-
-    // when - checking each model in fallbackChain
-    // then - none contain "/" (provider prefix)
-    for (const req of allRequirements) {
-      for (const entry of req.fallbackChain) {
-        expect(entry.model).not.toContain("/")
-      }
-    }
-  })
-
-   test("all fallbackChain entries have non-empty providers array", () => {
-     // given - all agent and category requirements
-     const allRequirements = [
-       ...Object.values(AGENT_MODEL_REQUIREMENTS),
-       ...Object.values(CATEGORY_MODEL_REQUIREMENTS),
-     ]
-
-     // when - checking each entry in fallbackChain
-     // then - all have non-empty providers array
-     for (const req of allRequirements) {
-       for (const entry of req.fallbackChain) {
-         expect(entry.providers).toBeArray()
-         expect(entry.providers.length).toBeGreaterThan(0)
-       }
-     }
-   })
-})
-
-describe("requiresModel field in categories", () => {
-  test("deep category has requiresModel set to gpt-5.3-codex", () => {
-    // given
-    const deep = CATEGORY_MODEL_REQUIREMENTS["deep"]
-
-    // when / #then
-    expect(deep.requiresModel).toBe("gpt-5.3-codex")
-  })
-
-  test("artistry category has requiresModel set to gemini-3-pro", () => {
-    // given
-    const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"]
-
-    // when / #then
-    expect(artistry.requiresModel).toBe("gemini-3-pro")
+  test("ModelRequirement.variant is optional", () => {
+    const req: ModelRequirement = { fallbackChain: [{ providers: ["quotio"], model: "claude-haiku-4-5" }] }
+    expect(req.variant).toBeUndefined()
  })
 })
--- a/src/shared/model-requirements.ts
+++ b/src/shared/model-requirements.ts
@@ -12,85 +12,133 @@ export type ModelRequirement = {
  requiresProvider?: string[] // If set, only activates when any of these providers is connected
 }

+function fb(providers: string[] | string, model: string, variant?: string): FallbackEntry {
+  return {
+    providers: Array.isArray(providers) ? providers : [providers],
+    model,
+    ...(variant !== undefined ? { variant } : {}),
+  }
+}
+
+// Provider preference rules:
+// - Never use the paid `opencode` provider as an automatic fallback.
+// - Prefer `quotio` when the same model exists across multiple providers.
+// - Prefer `github-copilot` first for `gpt-5-mini` (unlimited), fall back to `quotio`.
+// Note: user requested "Quotio-first" and to avoid the OpenCode provider; we keep runtime fallbacks on
+// `quotio` + `nvidia` (+ `github-copilot` for unlimited GPT mini) unless explicitly requested otherwise.
+const P_GPT: string[] = ["quotio"]
+const P_GPT_MINI: string[] = ["github-copilot", "quotio"]
+
+// Benchmark-driven ordering (user-provided table + NVIDIA NIM docs), tuned per-agent for quality vs speed.
+
+const SPEED_CHAIN: FallbackEntry[] = [
+  fb("quotio", "claude-haiku-4-5"), fb("quotio", "oswe-vscode-prime"),
+  fb(P_GPT_MINI, "gpt-5-mini", "high"), fb(P_GPT_MINI, "gpt-4.1"),
+  fb("nvidia", "nvidia/nemotron-3-nano-30b-a3b"), fb("quotio", "iflow-rome-30ba3b"),
+  fb("minimax-coding-plan", "MiniMax-M2.5"), fb("nvidia", "bytedance/seed-oss-36b-instruct"),
+  fb("quotio", "claude-sonnet-4-5"),
+]
+
+const QUALITY_CODING_CHAIN: FallbackEntry[] = [
+  fb("quotio", "claude-opus-4-6-thinking"),
+  fb("nvidia", "stepfun-ai/step-3.5-flash"),
+  fb("nvidia", "qwen/qwen3.5-397b-a17b"),
+  fb("quotio", "glm-5"),
+  fb("nvidia", "z-ai/glm5"),
+  fb("quotio", "deepseek-v3.2-reasoner"),
+  fb("quotio", "deepseek-r1"),
+  fb("nvidia", "deepseek-ai/deepseek-r1"),
+  fb("quotio", "qwen3-235b-a22b-thinking-2507"),
+  fb("nvidia", "qwen/qwen3-next-80b-a3b-thinking"),
+  fb("nvidia", "qwen/qwen3-coder-480b-a35b-instruct"),
+  fb("nvidia", "bytedance/seed-oss-36b-instruct"),
+  fb("quotio", "kimi-k2-thinking"),
+  fb("quotio", "kimi-k2.5"),
+  fb("nvidia", "moonshotai/kimi-k2.5"),
+  fb("minimax-coding-plan", "MiniMax-M2.5"),
+  fb("minimax-coding-plan", "MiniMax-M2.5-highspeed"),
+  fb("minimax", "MiniMax-M2.5"),
+  fb("quotio", "minimax-m2.5"),
+  fb("quotio", "claude-sonnet-4-5-thinking"),
+]
+
 export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
  sisyphus: {
    fallbackChain: [
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["kimi-for-coding"], model: "k2p5" },
-      { providers: ["opencode"], model: "kimi-k2.5-free" },
-      { providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
-      { providers: ["opencode"], model: "big-pickle" },
+      // 1st fallback: switch away from Opus Thinking to the non-thinking model (often more available).
+      fb("quotio", "claude-opus-4-6", "max"),
+      // 2nd fallback: user-requested.
+      fb("quotio", "gpt-5.3-codex", "high"),
+      ...QUALITY_CODING_CHAIN,
+      ...SPEED_CHAIN,
    ],
    requiresAnyModel: true,
  },
  hephaestus: {
    fallbackChain: [
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
+      fb("quotio", "gpt-5.3-codex", "high"),
+      ...QUALITY_CODING_CHAIN,
    ],
-    requiresProvider: ["openai", "github-copilot", "opencode"],
+    requiresAnyModel: true,
  },
  oracle: {
    fallbackChain: [
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      fb("quotio", "gpt-5.3-codex", "high"),
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
-   librarian: {
-     fallbackChain: [
-       { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
-       { providers: ["opencode"], model: "minimax-m2.5-free" },
-       { providers: ["opencode"], model: "big-pickle" },
-     ],
-   },
-  explore: {
+  librarian: {
    fallbackChain: [
-      { providers: ["github-copilot"], model: "grok-code-fast-1" },
-      { providers: ["opencode"], model: "minimax-m2.5-free" },
-      { providers: ["anthropic", "opencode"], model: "claude-haiku-4-5" },
-      { providers: ["opencode"], model: "gpt-5-nano" },
+      fb("quotio", "claude-sonnet-4-5"),
+      ...SPEED_CHAIN,
+      ...QUALITY_CODING_CHAIN,
    ],
  },
+  explore: {
+    fallbackChain: SPEED_CHAIN,
+  },
  "multimodal-looker": {
    fallbackChain: [
-      { providers: ["kimi-for-coding"], model: "k2p5" },
-      { providers: ["opencode"], model: "kimi-k2.5-free" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
-      { providers: ["zai-coding-plan"], model: "glm-4.6v" },
+      fb("quotio", "gemini-3-pro-image"),
+      fb("quotio", "gemini-3-pro-high"),
+      fb("quotio", "gemini-3-flash"),
+      fb("quotio", "kimi-k2.5"),
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      fb("quotio", "claude-haiku-4-5"),
    ],
  },
  prometheus: {
    fallbackChain: [
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["kimi-for-coding"], model: "k2p5" },
-      { providers: ["opencode"], model: "kimi-k2.5-free" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "gpt-5.3-codex", "high"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
  metis: {
    fallbackChain: [
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["kimi-for-coding"], model: "k2p5" },
-      { providers: ["opencode"], model: "kimi-k2.5-free" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "gpt-5.3-codex", "high"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
  momus: {
    fallbackChain: [
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      fb("quotio", "gpt-5.3-codex", "high"),
+      fb("quotio", "claude-opus-4-6-thinking"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
  atlas: {
    fallbackChain: [
-      { providers: ["kimi-for-coding"], model: "k2p5" },
-      { providers: ["opencode"], model: "kimi-k2.5-free" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "gpt-5.3-codex", "medium"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
 }
@@ -98,61 +146,60 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
 export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
  "visual-engineering": {
    fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
-      { providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["kimi-for-coding"], model: "k2p5" },
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "gemini-3-pro-image"),
+      fb("quotio", "kimi-k2-thinking"),
+      fb("quotio", "kimi-k2.5"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      fb("quotio", "gpt-5.3-codex", "medium"),
    ],
  },
  ultrabrain: {
    fallbackChain: [
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
+      fb("quotio", "gpt-5.3-codex", "high"),
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("nvidia", "stepfun-ai/step-3.5-flash"),
+      fb("nvidia", "qwen/qwen3.5-397b-a17b"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
  deep: {
    fallbackChain: [
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      fb("quotio", "gpt-5.3-codex", "medium"),
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      ...QUALITY_CODING_CHAIN,
    ],
    requiresModel: "gpt-5.3-codex",
  },
  artistry: {
    fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "claude-sonnet-4-5-thinking"),
+      fb("quotio", "claude-sonnet-4-5"),
    ],
-    requiresModel: "gemini-3-pro",
+    requiresModel: "claude-opus-4-6",
  },
  quick: {
-    fallbackChain: [
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-haiku-4-5" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
-      { providers: ["opencode"], model: "gpt-5-nano" },
-    ],
+    fallbackChain: SPEED_CHAIN,
  },
  "unspecified-low": {
-    fallbackChain: [
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
-    ],
+    fallbackChain: SPEED_CHAIN,
  },
  "unspecified-high": {
    fallbackChain: [
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      fb("quotio", "claude-opus-4-6-thinking"),
+      fb("quotio", "gpt-5.3-codex", "high"),
+      ...QUALITY_CODING_CHAIN,
    ],
  },
  writing: {
    fallbackChain: [
-      { providers: ["kimi-for-coding"], model: "k2p5" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
-      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
+      fb("quotio", "claude-sonnet-4-5"),
+      fb("quotio", "glm-5"),
+      fb("quotio", "kimi-k2.5"),
+      fb("quotio", "claude-haiku-4-5"),
+      fb("quotio", "gemini-3-flash"),
    ],
  },
 }
--- a/src/shared/session-model-state.test.ts
+++ b/src/shared/session-model-state.test.ts
@@ -0,0 +1,30 @@
+import { describe, expect, test } from "bun:test"
+import { clearSessionModel, getSessionModel, setSessionModel } from "./session-model-state"
+
+describe("session-model-state", () => {
+  test("stores and retrieves a session model", () => {
+    //#given
+    const sessionID = "ses_test"
+
+    //#when
+    setSessionModel(sessionID, { providerID: "github-copilot", modelID: "gpt-4.1" })
+
+    //#then
+    expect(getSessionModel(sessionID)).toEqual({
+      providerID: "github-copilot",
+      modelID: "gpt-4.1",
+    })
+  })
+
+  test("clears a session model", () => {
+    //#given
+    const sessionID = "ses_clear"
+    setSessionModel(sessionID, { providerID: "quotio", modelID: "gpt-5.3-codex" })
+
+    //#when
+    clearSessionModel(sessionID)
+
+    //#then
+    expect(getSessionModel(sessionID)).toBeUndefined()
+  })
+})
--- a/src/shared/session-model-state.ts
+++ b/src/shared/session-model-state.ts
@@ -0,0 +1,15 @@
+export type SessionModel = { providerID: string; modelID: string }
+
+const sessionModels = new Map<string, SessionModel>()
+
+export function setSessionModel(sessionID: string, model: SessionModel): void {
+  sessionModels.set(sessionID, model)
+}
+
+export function getSessionModel(sessionID: string): SessionModel | undefined {
+  return sessionModels.get(sessionID)
+}
+
+export function clearSessionModel(sessionID: string): void {
+  sessionModels.delete(sessionID)
+}
--- a/src/tools/call-omo-agent/session-creator.ts
+++ b/src/tools/call-omo-agent/session-creator.ts
@@ -1,6 +1,6 @@
 import type { CallOmoAgentArgs } from "./types"
 import type { PluginInput } from "@opencode-ai/plugin"
-import { subagentSessions } from "../../features/claude-code-session-state"
+import { subagentSessions, syncSubagentSessions } from "../../features/claude-code-session-state"
 import { log } from "../../shared"

 export async function createOrGetSession(
@@ -64,6 +64,7 @@ Original error: ${createResult.error}`)
    const sessionID = createResult.data.id
    log(`[call_omo_agent] Created session: ${sessionID}`)
    subagentSessions.add(sessionID)
+    syncSubagentSessions.add(sessionID)
    return { sessionID, isNew: true }
  }
 }
--- a/src/tools/call-omo-agent/subagent-session-creator.ts
+++ b/src/tools/call-omo-agent/subagent-session-creator.ts
@@ -1,7 +1,7 @@
 import type { PluginInput } from "@opencode-ai/plugin"
 import { log } from "../../shared"
 import { resolveSessionDirectory } from "../../shared"
-import { subagentSessions } from "../../features/claude-code-session-state"
+import { subagentSessions, syncSubagentSessions } from "../../features/claude-code-session-state"
 import type { CallOmoAgentArgs } from "./types"
 import type { ToolContextWithMetadata } from "./tool-context-with-metadata"

@@ -69,5 +69,6 @@ Original error: ${createResult.error}`,
 	const sessionID = createResult.data.id
 	log(`[call_omo_agent] Created session: ${sessionID}`)
 	subagentSessions.add(sessionID)
+	syncSubagentSessions.add(sessionID)
 	return { ok: true, sessionID }
 }
--- a/src/tools/delegate-task/background-task.ts
+++ b/src/tools/delegate-task/background-task.ts
@@ -1,5 +1,6 @@
 import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types"
 import type { ExecutorContext, ParentContext } from "./executor-types"
+import type { FallbackEntry } from "../../shared/model-requirements"
 import { getTimingConfig } from "./timing"
 import { storeToolMetadata } from "../../features/tool-metadata-store"
 import { formatDetailedError } from "./error-formatting"
@@ -12,7 +13,8 @@ export async function executeBackgroundTask(
  parentContext: ParentContext,
  agentToUse: string,
  categoryModel: { providerID: string; modelID: string; variant?: string } | undefined,
-  systemContent: string | undefined
+  systemContent: string | undefined,
+  fallbackChain?: FallbackEntry[],
 ): Promise<string> {
  const { manager } = executorCtx

@@ -27,6 +29,7 @@ export async function executeBackgroundTask(
      parentAgent: parentContext.agent,
      parentTools: getSessionTools(parentContext.sessionID),
      model: categoryModel,
+      fallbackChain,
      skills: args.load_skills.length > 0 ? args.load_skills : undefined,
      skillContent: systemContent,
      category: args.category,
--- a/src/tools/delegate-task/category-resolver.ts
+++ b/src/tools/delegate-task/category-resolver.ts
@@ -1,6 +1,7 @@
 import type { ModelFallbackInfo } from "../../features/task-toast-manager/types"
 import type { DelegateTaskArgs } from "./types"
 import type { ExecutorContext } from "./executor-types"
+import type { FallbackEntry } from "../../shared/model-requirements"
 import { mergeCategories } from "../../shared/merge-categories"
 import { SISYPHUS_JUNIOR_AGENT } from "./sisyphus-junior-agent"
 import { resolveCategoryConfig } from "./categories"
@@ -16,6 +17,7 @@ export interface CategoryResolutionResult {
  modelInfo: ModelFallbackInfo | undefined
  actualModel: string | undefined
  isUnstableAgent: boolean
+  fallbackChain?: FallbackEntry[]  // For runtime retry on model errors
  error?: string
 }

@@ -177,5 +179,6 @@ Available categories: ${categoryNames.join(", ")}`,
    modelInfo,
    actualModel,
    isUnstableAgent,
+    fallbackChain: requirement?.fallbackChain,
  }
 }
--- a/src/tools/delegate-task/subagent-resolver.ts
+++ b/src/tools/delegate-task/subagent-resolver.ts
@@ -8,6 +8,7 @@ import { getAgentDisplayName, getAgentConfigKey } from "../../shared/agent-displ
 import { normalizeSDKResponse } from "../../shared"
 import { log } from "../../shared/logger"
 import { getAvailableModelsForDelegateTask } from "./available-models"
+import type { FallbackEntry } from "../../shared/model-requirements"
 import { resolveModelForDelegateTask } from "./model-selection"

 export async function resolveSubagentExecution(
@@ -15,7 +16,7 @@ export async function resolveSubagentExecution(
  executorCtx: ExecutorContext,
  parentAgent: string | undefined,
  categoryExamples: string
-): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; error?: string }> {
+): Promise<{ agentToUse: string; categoryModel: { providerID: string; modelID: string; variant?: string } | undefined; fallbackChain?: FallbackEntry[]; error?: string }> {
  const { client, agentOverrides } = executorCtx

  if (!args.subagent_type?.trim()) {
@@ -46,6 +47,7 @@ Create the work plan directly - that's your job as the planning agent.`,

  let agentToUse = agentName
  let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
+  let fallbackChain: FallbackEntry[] | undefined = undefined

  try {
    const agentsResult = await client.app.agents()
@@ -92,6 +94,7 @@ Create the work plan directly - that's your job as the planning agent.`,
    const agentOverride = agentOverrides?.[agentConfigKey as keyof typeof agentOverrides]
      ?? (agentOverrides ? Object.entries(agentOverrides).find(([key]) => key.toLowerCase() === agentConfigKey)?.[1] : undefined)
    const agentRequirement = AGENT_MODEL_REQUIREMENTS[agentConfigKey]
+    fallbackChain = agentRequirement?.fallbackChain

    if (agentOverride?.model || agentRequirement || matchedAgent.model) {
      const availableModels = await getAvailableModelsForDelegateTask(client)
@@ -135,5 +138,5 @@ Create the work plan directly - that's your job as the planning agent.`,
    }
  }

-  return { agentToUse, categoryModel }
+  return { agentToUse, categoryModel, fallbackChain }
 }
--- a/src/tools/delegate-task/sync-task.ts
+++ b/src/tools/delegate-task/sync-task.ts
@@ -3,7 +3,7 @@ import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types"
 import type { ExecutorContext, ParentContext } from "./executor-types"
 import { getTaskToastManager } from "../../features/task-toast-manager"
 import { storeToolMetadata } from "../../features/tool-metadata-store"
-import { subagentSessions } from "../../features/claude-code-session-state"
+import { subagentSessions, syncSubagentSessions, setSessionAgent } from "../../features/claude-code-session-state"
 import { log } from "../../shared/logger"
 import { formatDuration } from "./time-formatter"
 import { formatDetailedError } from "./error-formatting"
@@ -40,6 +40,8 @@ export async function executeSyncTask(
    const sessionID = createSessionResult.sessionID
    syncSessionID = sessionID
    subagentSessions.add(sessionID)
+    syncSubagentSessions.add(sessionID)
+    setSessionAgent(sessionID, agentToUse)

    if (onSyncSessionCreated) {
      log("[task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID })
@@ -59,6 +61,7 @@ export async function executeSyncTask(
    if (toastManager) {
      toastManager.addTask({
        id: taskId,
+        sessionID,
        description: args.description,
        agent: agentToUse,
        isBackground: false,
@@ -145,6 +148,7 @@ session_id: ${sessionID}
  } finally {
    if (syncSessionID) {
      subagentSessions.delete(syncSessionID)
+      syncSubagentSessions.delete(syncSessionID)
    }
  }
 }
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -164,6 +164,7 @@ Prompts MUST be in English.`
      let modelInfo: import("../../features/task-toast-manager/types").ModelFallbackInfo | undefined
      let actualModel: string | undefined
      let isUnstableAgent = false
+      let fallbackChain: import("../../shared/model-requirements").FallbackEntry[] | undefined

      if (args.category) {
        const resolution = await resolveCategoryExecution(args, options, inheritedModel, systemDefaultModel)
@@ -176,6 +177,7 @@ Prompts MUST be in English.`
        modelInfo = resolution.modelInfo
        actualModel = resolution.actualModel
        isUnstableAgent = resolution.isUnstableAgent
+        fallbackChain = resolution.fallbackChain

        const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean

@@ -206,6 +208,7 @@ Prompts MUST be in English.`
        }
        agentToUse = resolution.agentToUse
        categoryModel = resolution.categoryModel
+        fallbackChain = resolution.fallbackChain
      }

      const systemContent = buildSystemContent({
@@ -217,7 +220,7 @@ Prompts MUST be in English.`
      })

      if (runInBackground) {
-        return executeBackgroundTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent)
+        return executeBackgroundTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, fallbackChain)
      }

      return executeSyncTask(args, ctx, options, parentContext, agentToUse, categoryModel, systemContent, modelInfo)
				`@@ -0,0 +1 @@`
				`export { createBeastModeSystemHook, BEAST_MODE_SYSTEM_PROMPT } from "./hook"`