fix: expand error classifier patterns and auto-enable model_fallback (#2393 )

2026-03-12 01:18:15 +09:00
36 changed files with 375 additions and 461 deletions
--- a/README.md
+++ b/README.md
@@ -181,7 +181,7 @@ When Sisyphus delegates to a subagent, it doesn't pick a model. It picks a **cat
 | `quick`              | Single-file changes, typos         |
 | `ultrabrain`         | Hard logic, architecture decisions |

-Agent says what kind of work. Harness picks the right model. `ultrabrain` now routes to GPT-5.4 xhigh by default. You touch nothing.
+Agent says what kind of work. Harness picks the right model. You touch nothing.

 ### Claude Code Compatibility

--- a/docs/guide/agent-model-matching.md
+++ b/docs/guide/agent-model-matching.md
@@ -147,11 +147,11 @@ When agents delegate work, they don't pick a model name — they pick a **catego
 | Category             | When Used                  | Fallback Chain                               |
 | -------------------- | -------------------------- | -------------------------------------------- |
 | `visual-engineering` | Frontend, UI, CSS, design  | Gemini 3.1 Pro → GLM 5 → Claude Opus         |
-| `ultrabrain`         | Maximum reasoning needed   | GPT-5.4 → Gemini 3.1 Pro → Claude Opus       |
+| `ultrabrain`         | Maximum reasoning needed   | GPT-5.3 Codex → Gemini 3.1 Pro → Claude Opus |
 | `deep`               | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro |
 | `artistry`           | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4       |
 | `quick`              | Simple, fast tasks         | Claude Haiku → Gemini Flash → GPT-5-Nano     |
-| `unspecified-high`   | General complex work       | Claude Opus → GPT-5.4 (high) → GLM 5 → K2P5  |
+| `unspecified-high`   | General complex work       | GPT-5.4 → Claude Opus → GLM 5 → K2P5         |
 | `unspecified-low`    | General standard work      | Claude Sonnet → GPT-5.3 Codex → Gemini Flash |
 | `writing`            | Text, docs, prose          | Gemini Flash → Claude Sonnet                 |

@@ -190,7 +190,7 @@ See the [Orchestration System Guide](./orchestration.md) for how agents dispatch
  "categories": {
    "quick": { "model": "opencode/gpt-5-nano" },
    "unspecified-low": { "model": "anthropic/claude-sonnet-4-6" },
-    "unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
+    "unspecified-high": { "model": "openai/gpt-5.4-high" },
    "visual-engineering": {
      "model": "google/gemini-3.1-pro",
      "variant": "high",
--- a/docs/guide/orchestration.md
+++ b/docs/guide/orchestration.md
@@ -296,12 +296,12 @@ task({ category: "quick", prompt: "..." }); // "Just get it done fast"
 | Category             | Model                  | When to Use                                                 |
 | -------------------- | ---------------------- | ----------------------------------------------------------- |
 | `visual-engineering` | Gemini 3.1 Pro         | Frontend, UI/UX, design, styling, animation                 |
-| `ultrabrain`         | GPT-5.4 (xhigh)        | Deep logical reasoning, complex architecture decisions      |
+| `ultrabrain`         | GPT-5.3 Codex (xhigh)  | Deep logical reasoning, complex architecture decisions      |
 | `artistry`           | Gemini 3.1 Pro (high)  | Highly creative or artistic tasks, novel ideas              |
 | `quick`              | Claude Haiku 4.5       | Trivial tasks - single file changes, typo fixes             |
 | `deep`               | GPT-5.3 Codex (medium) | Goal-oriented autonomous problem-solving, thorough research |
 | `unspecified-low`    | Claude Sonnet 4.6      | Tasks that don't fit other categories, low effort           |
-| `unspecified-high`   | Claude Opus 4.6 (max)  | Tasks that don't fit other categories, high effort          |
+| `unspecified-high`   | GPT-5.4 (high)         | Tasks that don't fit other categories, high effort          |
 | `writing`            | Gemini 3 Flash         | Documentation, prose, technical writing                     |

 ### Skills: Domain-Specific Instructions
--- a/docs/guide/overview.md
+++ b/docs/guide/overview.md
@@ -101,7 +101,7 @@ Use Hephaestus when you need deep architectural reasoning, complex debugging acr

 - **Multi-model orchestration.** Pure Codex is single-model. OmO routes different tasks to different models automatically. GPT for deep reasoning. Gemini for frontend. Haiku for speed. The right brain for the right job.
 - **Background agents.** Fire 5+ agents in parallel. Something Codex simply cannot do. While one agent writes code, another researches patterns, another checks documentation. Like a real dev team.
- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.4. `quick` gets Haiku. No manual juggling.
+- **Category system.** Tasks are routed by intent, not model name. `visual-engineering` gets Gemini. `ultrabrain` gets GPT-5.3 Codex. `quick` gets Haiku. No manual juggling.
 - **Accumulated wisdom.** Subagents learn from previous results. Conventions discovered in task 1 are passed to task 5. Mistakes made early aren't repeated. The system gets smarter as it works.

 ### Prometheus: The Strategic Planner
@@ -193,13 +193,13 @@ You can override specific agents or categories in your config:
    },

    // General high-effort work
-    "unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
+    "unspecified-high": { "model": "openai/gpt-5.4", "variant": "high" },

    // Quick tasks: use the cheapest models
    "quick": { "model": "anthropic/claude-haiku-4-5" },

-    // Deep reasoning: GPT-5.4
-    "ultrabrain": { "model": "openai/gpt-5.4", "variant": "xhigh" },
+    // Deep reasoning: GPT-5.3-codex
+    "ultrabrain": { "model": "openai/gpt-5.3-codex", "variant": "xhigh" },
  },
 }
 ```
--- a/docs/reference/configuration.md
+++ b/docs/reference/configuration.md
@@ -100,7 +100,7 @@ Here's a practical starting configuration:
    "unspecified-low": { "model": "anthropic/claude-sonnet-4-6" },

    // unspecified-high — complex work
-    "unspecified-high": { "model": "anthropic/claude-opus-4-6", "variant": "max" },
+    "unspecified-high": { "model": "openai/gpt-5.4-high" },

    // writing — docs/prose
    "writing": { "model": "google/gemini-3-flash" },
@@ -225,12 +225,12 @@ Domain-specific model delegation used by the `task()` tool. When Sisyphus delega
 | Category             | Default Model                   | Description                                    |
 | -------------------- | ------------------------------- | ---------------------------------------------- |
 | `visual-engineering` | `google/gemini-3.1-pro` (high)  | Frontend, UI/UX, design, animation             |
-| `ultrabrain`         | `openai/gpt-5.4` (xhigh)        | Deep logical reasoning, complex architecture   |
+| `ultrabrain`         | `openai/gpt-5.3-codex` (xhigh)  | Deep logical reasoning, complex architecture   |
 | `deep`               | `openai/gpt-5.3-codex` (medium) | Autonomous problem-solving, thorough research  |
 | `artistry`           | `google/gemini-3.1-pro` (high)  | Creative/unconventional approaches             |
 | `quick`              | `anthropic/claude-haiku-4-5`    | Trivial tasks, typo fixes, single-file changes |
 | `unspecified-low`    | `anthropic/claude-sonnet-4-6`   | General tasks, low effort                      |
-| `unspecified-high`   | `anthropic/claude-opus-4-6` (max) | General tasks, high effort                   |
+| `unspecified-high`   | `openai/gpt-5.4` (high)         | General tasks, high effort                     |
 | `writing`            | `google/gemini-3-flash`         | Documentation, prose, technical writing        |

 > **Note**: Built-in defaults only apply if the category is present in your config. Otherwise the system default model is used.
@@ -283,12 +283,12 @@ Disable categories: `{ "disabled_categories": ["ultrabrain"] }`
 | Category               | Default Model       | Provider Priority                                              |
 | ---------------------- | ------------------- | -------------------------------------------------------------- |
 | **visual-engineering** | `gemini-3.1-pro`    | `gemini-3.1-pro` → `glm-5` → `claude-opus-4-6`                 |
-| **ultrabrain**         | `gpt-5.4`           | `gpt-5.4` → `gemini-3.1-pro` → `claude-opus-4-6`               |
+| **ultrabrain**         | `gpt-5.3-codex`     | `gpt-5.3-codex` → `gemini-3.1-pro` → `claude-opus-4-6`         |
 | **deep**               | `gpt-5.3-codex`     | `gpt-5.3-codex` → `claude-opus-4-6` → `gemini-3.1-pro`         |
 | **artistry**           | `gemini-3.1-pro`    | `gemini-3.1-pro` → `claude-opus-4-6` → `gpt-5.4`               |
 | **quick**              | `claude-haiku-4-5`  | `claude-haiku-4-5` → `gemini-3-flash` → `gpt-5-nano`           |
 | **unspecified-low**    | `claude-sonnet-4-6` | `claude-sonnet-4-6` → `gpt-5.3-codex` → `gemini-3-flash`       |
-| **unspecified-high**   | `claude-opus-4-6`   | `claude-opus-4-6` → `gpt-5.4 (high)` → `glm-5` → `k2p5` → `kimi-k2.5` |
+| **unspecified-high**   | `gpt-5.4`           | `gpt-5.4` → `claude-opus-4-6` → `glm-5` → `k2p5` → `kimi-k2.5` |
 | **writing**            | `gemini-3-flash`    | `gemini-3-flash` → `claude-sonnet-4-6`                         |

 Run `bunx oh-my-opencode doctor --verbose` to see effective model resolution for your config.
--- a/docs/reference/features.md
+++ b/docs/reference/features.md
@@ -108,12 +108,12 @@ By combining these two concepts, you can generate optimal agents through `task`.
 | Category             | Default Model                   | Use Cases                                                                                                                   |
 | -------------------- | ------------------------------- | --------------------------------------------------------------------------------------------------------------------------- |
 | `visual-engineering` | `google/gemini-3.1-pro`         | Frontend, UI/UX, design, styling, animation                                                                                 |
-| `ultrabrain`         | `openai/gpt-5.4` (xhigh)        | Deep logical reasoning, complex architecture decisions requiring extensive analysis                                         |
+| `ultrabrain`         | `openai/gpt-5.3-codex` (xhigh)  | Deep logical reasoning, complex architecture decisions requiring extensive analysis                                         |
 | `deep`               | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
 | `artistry`           | `google/gemini-3.1-pro` (high)  | Highly creative/artistic tasks, novel ideas                                                                                 |
 | `quick`              | `anthropic/claude-haiku-4-5`    | Trivial tasks - single file changes, typo fixes, simple modifications                                                       |
 | `unspecified-low`    | `anthropic/claude-sonnet-4-6`   | Tasks that don't fit other categories, low effort required                                                                  |
-| `unspecified-high`   | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required                                                               |
+| `unspecified-high`   | `openai/gpt-5.4` (high)         | Tasks that don't fit other categories, high effort required                                                                 |
 | `writing`            | `google/gemini-3-flash`         | Documentation, prose, technical writing                                                                                     |

 ### Usage
@@ -332,7 +332,7 @@ You can create powerful specialized agents by combining Categories and Skills.

 - **Category**: `ultrabrain`
 - **load_skills**: `[]` (pure reasoning)
- **Effect**: Leverages GPT-5.4 xhigh reasoning for in-depth system architecture analysis.
+- **Effect**: Leverages GPT-5.3 Codex's logical reasoning for in-depth system architecture analysis.

 #### The Maintainer (Quick Fixes)

--- a/src/agents/builtin-agents.ts
+++ b/src/agents/builtin-agents.ts
@@ -115,7 +115,6 @@ export async function createBuiltinAgents(
    browserProvider,
    uiSelectedModel,
    availableModels,
-    isFirstRunNoCache,
    disabledSkills,
    disableOmoEnv,
  })
--- a/src/agents/builtin-agents/general-agents.ts
+++ b/src/agents/builtin-agents/general-agents.ts
@@ -7,7 +7,7 @@ import { AGENT_MODEL_REQUIREMENTS, isModelAvailable } from "../../shared"
 import { buildAgent, isFactory } from "../agent-builder"
 import { applyOverrides } from "./agent-overrides"
 import { applyEnvironmentContext } from "./environment-context"
-import { applyModelResolution, getFirstFallbackModel } from "./model-resolution"
+import { applyModelResolution } from "./model-resolution"

 export function collectPendingBuiltinAgents(input: {
  agentSources: Record<BuiltinAgentName, import("../agent-builder").AgentSource>
@@ -21,7 +21,6 @@ export function collectPendingBuiltinAgents(input: {
  browserProvider?: BrowserAutomationProvider
  uiSelectedModel?: string
  availableModels: Set<string>
-  isFirstRunNoCache: boolean
  disabledSkills?: Set<string>
  useTaskSystem?: boolean
  disableOmoEnv?: boolean
@@ -38,7 +37,6 @@ export function collectPendingBuiltinAgents(input: {
    browserProvider,
    uiSelectedModel,
    availableModels,
-    isFirstRunNoCache,
    disabledSkills,
    disableOmoEnv = false,
  } = input
@@ -68,16 +66,13 @@ export function collectPendingBuiltinAgents(input: {

    const isPrimaryAgent = isFactory(source) && source.mode === "primary"

-    let resolution = applyModelResolution({
+    const resolution = applyModelResolution({
      uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
      userModel: override?.model,
      requirement,
      availableModels,
      systemDefaultModel,
    })
-    if (!resolution && isFirstRunNoCache && !override?.model) {
-      resolution = getFirstFallbackModel(requirement)
-    }
    if (!resolution) continue
    const { model, variant: resolvedVariant } = resolution

--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -483,23 +483,17 @@ describe("createBuiltinAgents without systemDefaultModel", () => {
     cacheSpy.mockRestore?.()
   })

-  test("oracle is created on first run when no cache and no systemDefaultModel", async () => {
-    // #given
-    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
-    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(new Set())
+   test("agents NOT created when no cache and no systemDefaultModel (first run without defaults)", async () => {
+     // #given
+     const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)

-    try {
-      // #when
-      const agents = await createBuiltinAgents([], {}, undefined, undefined)
+     // #when
+     const agents = await createBuiltinAgents([], {}, undefined, undefined)

-      // #then
-      expect(agents.oracle).toBeDefined()
-      expect(agents.oracle.model).toBe("openai/gpt-5.4")
-    } finally {
-      fetchSpy.mockRestore()
-      cacheSpy.mockRestore()
-    }
-  })
+     // #then
+     expect(agents.oracle).toBeUndefined()
+     cacheSpy.mockRestore?.()
+   })

  test("sisyphus created via connected cache fallback when all providers available", async () => {
    // #given
--- a/src/config/schema/background-task.ts
+++ b/src/config/schema/background-task.ts
@@ -8,7 +8,7 @@ export const BackgroundTaskConfigSchema = z.object({
  maxDescendants: z.number().int().min(1).optional(),
  /** Stale timeout in milliseconds - interrupt tasks with no activity for this duration (default: 180000 = 3 minutes, minimum: 60000 = 1 minute) */
  staleTimeoutMs: z.number().min(60000).optional(),
-  /** Timeout for tasks that never received any progress update, falling back to startedAt (default: 1800000 = 30 minutes, minimum: 60000 = 1 minute) */
+  /** Timeout for tasks that never received any progress update, falling back to startedAt (default: 600000 = 10 minutes, minimum: 60000 = 1 minute) */
  messageStalenessTimeoutMs: z.number().min(60000).optional(),
  syncPollTimeoutMs: z.number().min(60000).optional(),
 })
--- a/src/features/background-agent/constants.ts
+++ b/src/features/background-agent/constants.ts
@@ -4,7 +4,7 @@ import type { BackgroundTask, LaunchInput } from "./types"
 export const TASK_TTL_MS = 30 * 60 * 1000
 export const MIN_STABILITY_TIME_MS = 10 * 1000
 export const DEFAULT_STALE_TIMEOUT_MS = 180_000
-export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
+export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 600_000
 export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
 export const MIN_IDLE_TIME_MS = 5000
 export const POLLING_INTERVAL_MS = 3000
--- a/src/features/background-agent/default-message-staleness-timeout.test.ts
+++ b/src/features/background-agent/default-message-staleness-timeout.test.ts
@@ -1,60 +0,0 @@
-declare const require: (name: string) => any
-const { describe, expect, test, mock } = require("bun:test")
-
-import { DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS } from "./constants"
-import { checkAndInterruptStaleTasks } from "./task-poller"
-import type { BackgroundTask } from "./types"
-
-function createRunningTask(startedAt: Date): BackgroundTask {
-  return {
-    id: "task-1",
-    sessionID: "ses-1",
-    parentSessionID: "parent-ses-1",
-    parentMessageID: "msg-1",
-    description: "test",
-    prompt: "test",
-    agent: "explore",
-    status: "running",
-    startedAt,
-    progress: undefined,
-  }
-}
-
-describe("DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS", () => {
-  test("uses a 30 minute default", () => {
-    // #given
-    const expectedTimeout = 30 * 60 * 1000
-
-    // #when
-    const timeout = DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS
-
-    // #then
-    expect(timeout).toBe(expectedTimeout)
-  })
-
-  test("does not interrupt a never-updated task after 15 minutes when config is omitted", async () => {
-    // #given
-    const task = createRunningTask(new Date(Date.now() - 15 * 60 * 1000))
-    const client = {
-      session: {
-        abort: mock(() => Promise.resolve()),
-      },
-    }
-    const concurrencyManager = {
-      release: mock(() => {}),
-    }
-    const notifyParentSession = mock(() => Promise.resolve())
-
-    // #when
-    await checkAndInterruptStaleTasks({
-      tasks: [task],
-      client: client as never,
-      config: undefined,
-      concurrencyManager: concurrencyManager as never,
-      notifyParentSession,
-    })
-
-    // #then
-    expect(task.status).toBe("running")
-  })
-})
--- a/src/features/background-agent/task-poller.test.ts
+++ b/src/features/background-agent/task-poller.test.ts
@@ -117,13 +117,13 @@ describe("checkAndInterruptStaleTasks", () => {
  })

  it("should use DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS when messageStalenessTimeoutMs is not configured", async () => {
-    //#given — task started 35 minutes ago, no config for messageStalenessTimeoutMs
+    //#given — task started 15 minutes ago, no config for messageStalenessTimeoutMs
    const task = createRunningTask({
-      startedAt: new Date(Date.now() - 35 * 60 * 1000),
+      startedAt: new Date(Date.now() - 15 * 60 * 1000),
      progress: undefined,
    })

-    //#when — default is 30 minutes (1_800_000ms)
+    //#when — default is 10 minutes (600_000ms)
    await checkAndInterruptStaleTasks({
      tasks: [task],
      client: mockClient as never,
--- a/src/hooks/ralph-loop/index.test.ts
+++ b/src/hooks/ralph-loop/index.test.ts
@@ -1025,6 +1025,7 @@ Original task: Build something`
      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
      const toolResultEntry = JSON.stringify({
        type: "tool_result",
+        timestamp: new Date().toISOString(),
        tool_name: "write",
        tool_input: {},
        tool_output: { output: "Task complete! <promise>DONE</promise>" },
--- a/src/plugin-config.ts
+++ b/src/plugin-config.ts
@@ -11,15 +11,6 @@ import {
  migrateConfigFile,
 } from "./shared";

-const PARTIAL_STRING_ARRAY_KEYS = new Set([
-  "disabled_mcps",
-  "disabled_agents",
-  "disabled_skills",
-  "disabled_hooks",
-  "disabled_commands",
-  "disabled_tools",
-]);
-
 export function parseConfigPartially(
  rawConfig: Record<string, unknown>
 ): OhMyOpenCodeConfig | null {
@@ -32,14 +23,6 @@ export function parseConfigPartially(
  const invalidSections: string[] = [];

  for (const key of Object.keys(rawConfig)) {
-    if (PARTIAL_STRING_ARRAY_KEYS.has(key)) {
-      const sectionValue = rawConfig[key];
-      if (Array.isArray(sectionValue) && sectionValue.every((value) => typeof value === "string")) {
-        partialConfig[key] = sectionValue;
-      }
-      continue;
-    }
-
    const sectionResult = OhMyOpenCodeConfigSchema.safeParse({ [key]: rawConfig[key] });
    if (sectionResult.success) {
      const parsed = sectionResult.data as Record<string, unknown>;
--- a/src/plugin-handlers/agent-config-handler.ts
+++ b/src/plugin-handlers/agent-config-handler.ts
@@ -78,22 +78,6 @@ export async function applyAgentConfig(params: {
  const useTaskSystem = params.pluginConfig.experimental?.task_system ?? false;
  const disableOmoEnv = params.pluginConfig.experimental?.disable_omo_env ?? false;

-  const includeClaudeAgents = params.pluginConfig.claude_code?.agents ?? true;
-  const userAgents = includeClaudeAgents ? loadUserAgents() : {};
-  const projectAgents = includeClaudeAgents ? loadProjectAgents(params.ctx.directory) : {};
-  const rawPluginAgents = params.pluginComponents.agents;
-
-  const customAgentSummaries = [
-    ...Object.entries(userAgents),
-    ...Object.entries(projectAgents),
-    ...Object.entries(rawPluginAgents).filter(([, config]) => config !== undefined),
-  ].map(([name, config]) => ({
-    name,
-    description: typeof (config as Record<string, unknown>)?.description === "string"
-      ? (config as Record<string, unknown>).description as string
-      : "",
-  }));
-
  const builtinAgents = await createBuiltinAgents(
    migratedDisabledAgents,
    params.pluginConfig.agents,
@@ -102,7 +86,7 @@ export async function applyAgentConfig(params: {
    params.pluginConfig.categories,
    params.pluginConfig.git_master,
    allDiscoveredSkills,
-    customAgentSummaries,
+    params.ctx.client,
    browserProvider,
    currentModel,
    disabledSkills,
@@ -110,6 +94,11 @@ export async function applyAgentConfig(params: {
    disableOmoEnv,
  );

+  const includeClaudeAgents = params.pluginConfig.claude_code?.agents ?? true;
+  const userAgents = includeClaudeAgents ? loadUserAgents() : {};
+  const projectAgents = includeClaudeAgents ? loadProjectAgents(params.ctx.directory) : {};
+
+  const rawPluginAgents = params.pluginComponents.agents;
  const pluginAgents = Object.fromEntries(
    Object.entries(rawPluginAgents).map(([key, value]) => [
      key,
--- a/src/plugin/event.ts
+++ b/src/plugin/event.ts
@@ -174,7 +174,6 @@ export function createEventHandler(args: {
    await Promise.resolve(hooks.todoContinuationEnforcer?.handler?.(input));
    await Promise.resolve(hooks.unstableAgentBabysitter?.event?.(input));
    await Promise.resolve(hooks.contextWindowMonitor?.event?.(input));
-    await Promise.resolve(hooks.preemptiveCompaction?.event?.(input));
    await Promise.resolve(hooks.directoryAgentsInjector?.event?.(input));
    await Promise.resolve(hooks.directoryReadmeInjector?.event?.(input));
    await Promise.resolve(hooks.rulesInjector?.event?.(input));
--- a/src/plugin/hooks/create-model-fallback-session-hook.ts
+++ b/src/plugin/hooks/create-model-fallback-session-hook.ts
@@ -0,0 +1,131 @@
+import type { OhMyOpenCodeConfig, HookName } from "../../config"
+
+import { createModelFallbackHook } from "../../hooks"
+import { normalizeSDKResponse } from "../../shared"
+
+import { resolveModelFallbackEnabled } from "./model-fallback-config"
+
+type SafeHook = <THook>(hookName: HookName, factory: () => THook) => THook | null
+
+type ModelFallbackSessionContext = {
+  directory: string
+  client: {
+    session: {
+      get: (input: { path: { id: string } }) => Promise<unknown>
+      update: (input: {
+        path: { id: string }
+        body: { title: string }
+        query: { directory: string }
+      }) => Promise<unknown>
+    }
+    tui: {
+      showToast: (input: {
+        body: {
+          title: string
+          message: string
+          variant: "success" | "error" | "info" | "warning"
+          duration: number
+        }
+      }) => Promise<unknown>
+    }
+  }
+}
+
+function createFallbackTitleUpdater(
+  ctx: ModelFallbackSessionContext,
+  enabled: boolean,
+):
+  | ((input: {
+      sessionID: string
+      providerID: string
+      modelID: string
+      variant?: string
+    }) => Promise<void>)
+  | undefined {
+  if (!enabled) {
+    return undefined
+  }
+
+  const fallbackTitleMaxEntries = 200
+  const fallbackTitleState = new Map<string, { baseTitle?: string; lastKey?: string }>()
+
+  return async (input) => {
+    const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}`
+    const existing = fallbackTitleState.get(input.sessionID) ?? {}
+    if (existing.lastKey === key) {
+      return
+    }
+
+    if (!existing.baseTitle) {
+      const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null)
+      const sessionInfo = sessionResp
+        ? normalizeSDKResponse(sessionResp, null as { title?: string } | null, {
+            preferResponseOnMissingData: true,
+          })
+        : null
+      const rawTitle = sessionInfo?.title
+      if (typeof rawTitle === "string" && rawTitle.length > 0) {
+        existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim()
+      } else {
+        existing.baseTitle = "Session"
+      }
+    }
+
+    const variantLabel = input.variant ? ` ${input.variant}` : ""
+    const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]`
+
+    await ctx.client.session
+      .update({
+        path: { id: input.sessionID },
+        body: { title: newTitle },
+        query: { directory: ctx.directory },
+      })
+      .catch(() => {})
+
+    existing.lastKey = key
+    fallbackTitleState.set(input.sessionID, existing)
+    if (fallbackTitleState.size > fallbackTitleMaxEntries) {
+      const oldestKey = fallbackTitleState.keys().next().value
+      if (oldestKey) {
+        fallbackTitleState.delete(oldestKey)
+      }
+    }
+  }
+}
+
+export function createConfiguredModelFallbackHook(args: {
+  ctx: ModelFallbackSessionContext
+  pluginConfig: OhMyOpenCodeConfig
+  isHookEnabled: (hookName: HookName) => boolean
+  safeHook: SafeHook
+}): ReturnType<typeof createModelFallbackHook> | null {
+  const { ctx, pluginConfig, isHookEnabled, safeHook } = args
+  const isModelFallbackEnabled = resolveModelFallbackEnabled(pluginConfig)
+
+  if (!isModelFallbackEnabled || !isHookEnabled("model-fallback")) {
+    return null
+  }
+
+  const onApplied = createFallbackTitleUpdater(
+    ctx,
+    pluginConfig.experimental?.model_fallback_title ?? false,
+  )
+
+  return safeHook("model-fallback", () =>
+    createModelFallbackHook({
+      toast: async ({ title, message, variant, duration }) => {
+        await ctx.client.tui
+          .showToast({
+            body: {
+              title,
+              message,
+              variant: variant ?? "warning",
+              duration: duration ?? 5000,
+            },
+          })
+          .catch(() => {})
+      },
+      onApplied,
+    }),
+  )
+}
--- a/src/plugin/hooks/create-session-hooks.ts
+++ b/src/plugin/hooks/create-session-hooks.ts
@@ -1,13 +1,10 @@
 import type { OhMyOpenCodeConfig, HookName } from "../../config"
 import type { ModelCacheState } from "../../plugin-state"
-import type { PluginContext } from "../types"
-
 import {
  createContextWindowMonitorHook,
  createSessionRecoveryHook,
  createSessionNotification,
  createThinkModeHook,
-  createModelFallbackHook,
  createAnthropicContextWindowLimitRecoveryHook,
  createAutoUpdateCheckerHook,
  createAgentUsageReminderHook,
@@ -31,10 +28,10 @@ import {
  detectExternalNotificationPlugin,
  getNotificationConflictWarning,
  log,
-  normalizeSDKResponse,
 } from "../../shared"
 import { safeCreateHook } from "../../shared/safe-create-hook"
 import { sessionExists } from "../../tools"
+import { createConfiguredModelFallbackHook } from "./create-model-fallback-session-hook"

 export type SessionHooks = {
  contextWindowMonitor: ReturnType<typeof createContextWindowMonitorHook> | null
@@ -42,7 +39,7 @@ export type SessionHooks = {
  sessionRecovery: ReturnType<typeof createSessionRecoveryHook> | null
  sessionNotification: ReturnType<typeof createSessionNotification> | null
  thinkMode: ReturnType<typeof createThinkModeHook> | null
-  modelFallback: ReturnType<typeof createModelFallbackHook> | null
+  modelFallback: ReturnType<typeof createConfiguredModelFallbackHook>
  anthropicContextWindowLimitRecovery: ReturnType<typeof createAnthropicContextWindowLimitRecoveryHook> | null
  autoUpdateChecker: ReturnType<typeof createAutoUpdateCheckerHook> | null
  agentUsageReminder: ReturnType<typeof createAgentUsageReminderHook> | null
@@ -63,7 +60,7 @@ export type SessionHooks = {
 }

 export function createSessionHooks(args: {
-  ctx: PluginContext
+  ctx: Parameters<typeof createContextWindowMonitorHook>[0]
  pluginConfig: OhMyOpenCodeConfig
  modelCacheState: ModelCacheState
  isHookEnabled: (hookName: HookName) => boolean
@@ -105,73 +102,12 @@ export function createSessionHooks(args: {
    ? safeHook("think-mode", () => createThinkModeHook())
    : null

-  const enableFallbackTitle = pluginConfig.experimental?.model_fallback_title ?? false
-  const fallbackTitleMaxEntries = 200
-  const fallbackTitleState = new Map<string, { baseTitle?: string; lastKey?: string }>()
-  const updateFallbackTitle = async (input: {
-    sessionID: string
-    providerID: string
-    modelID: string
-    variant?: string
-  }) => {
-    if (!enableFallbackTitle) return
-    const key = `${input.providerID}/${input.modelID}${input.variant ? `:${input.variant}` : ""}`
-    const existing = fallbackTitleState.get(input.sessionID) ?? {}
-    if (existing.lastKey === key) return
-
-    if (!existing.baseTitle) {
-      const sessionResp = await ctx.client.session.get({ path: { id: input.sessionID } }).catch(() => null)
-      const sessionInfo = sessionResp
-        ? normalizeSDKResponse(sessionResp, null as { title?: string } | null, { preferResponseOnMissingData: true })
-        : null
-      const rawTitle = sessionInfo?.title
-      if (typeof rawTitle === "string" && rawTitle.length > 0) {
-        existing.baseTitle = rawTitle.replace(/\s*\[fallback:[^\]]+\]$/i, "").trim()
-      } else {
-        existing.baseTitle = "Session"
-      }
-    }
-
-    const variantLabel = input.variant ? ` ${input.variant}` : ""
-    const newTitle = `${existing.baseTitle} [fallback: ${input.providerID}/${input.modelID}${variantLabel}]`
-
-    await ctx.client.session
-      .update({
-        path: { id: input.sessionID },
-        body: { title: newTitle },
-        query: { directory: ctx.directory },
-      })
-      .catch(() => {})
-
-    existing.lastKey = key
-    fallbackTitleState.set(input.sessionID, existing)
-    if (fallbackTitleState.size > fallbackTitleMaxEntries) {
-      const oldestKey = fallbackTitleState.keys().next().value
-      if (oldestKey) fallbackTitleState.delete(oldestKey)
-    }
-  }
-
-  // Model fallback hook (configurable via model_fallback config + disabled_hooks)
-  // This handles automatic model switching when model errors occur
-  const isModelFallbackConfigEnabled = pluginConfig.model_fallback ?? true
-  const modelFallback = isModelFallbackConfigEnabled && isHookEnabled("model-fallback")
-    ? safeHook("model-fallback", () =>
-      createModelFallbackHook({
-        toast: async ({ title, message, variant, duration }) => {
-          await ctx.client.tui
-            .showToast({
-              body: {
-                title,
-                message,
-                variant: variant ?? "warning",
-                duration: duration ?? 5000,
-              },
-            })
-            .catch(() => {})
-        },
-        onApplied: enableFallbackTitle ? updateFallbackTitle : undefined,
-      }))
-    : null
+  const modelFallback = createConfiguredModelFallbackHook({
+    ctx,
+    pluginConfig,
+    isHookEnabled,
+    safeHook,
+  })

  const anthropicContextWindowLimitRecovery = isHookEnabled("anthropic-context-window-limit-recovery")
    ? safeHook("anthropic-context-window-limit-recovery", () =>
--- a/src/plugin/hooks/model-fallback-config.test.ts
+++ b/src/plugin/hooks/model-fallback-config.test.ts
@@ -0,0 +1,63 @@
+declare const require: (name: string) => any
+const { describe, expect, test } = require("bun:test")
+
+import type { OhMyOpenCodeConfig } from "../../config"
+
+import {
+  hasConfiguredModelFallbacks,
+  resolveModelFallbackEnabled,
+} from "./model-fallback-config"
+
+describe("model-fallback-config", () => {
+  test("detects agent fallback_models configuration", () => {
+    //#given
+    const pluginConfig: OhMyOpenCodeConfig = {
+      agents: {
+        sisyphus: {
+          fallback_models: ["openai/gpt-5.2", "anthropic/claude-opus-4-6"],
+        },
+      },
+    }
+
+    //#when
+    const result = hasConfiguredModelFallbacks(pluginConfig)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  test("auto-enables model fallback when category fallback_models are configured", () => {
+    //#given
+    const pluginConfig: OhMyOpenCodeConfig = {
+      categories: {
+        quick: {
+          fallback_models: ["openai/gpt-5.2"],
+        },
+      },
+    }
+
+    //#when
+    const result = resolveModelFallbackEnabled(pluginConfig)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  test("keeps model fallback disabled when explicitly turned off", () => {
+    //#given
+    const pluginConfig: OhMyOpenCodeConfig = {
+      model_fallback: false,
+      agents: {
+        sisyphus: {
+          fallback_models: ["openai/gpt-5.2"],
+        },
+      },
+    }
+
+    //#when
+    const result = resolveModelFallbackEnabled(pluginConfig)
+
+    //#then
+    expect(result).toBe(false)
+  })
+})
--- a/src/plugin/hooks/model-fallback-config.ts
+++ b/src/plugin/hooks/model-fallback-config.ts
@@ -0,0 +1,33 @@
+import type { OhMyOpenCodeConfig } from "../../config"
+
+import { log, normalizeFallbackModels } from "../../shared"
+
+type FallbackModelsConfig = {
+  fallback_models?: string | string[]
+}
+
+function hasFallbackModels(config: FallbackModelsConfig | undefined): boolean {
+  return (normalizeFallbackModels(config?.fallback_models)?.length ?? 0) > 0
+}
+
+export function hasConfiguredModelFallbacks(pluginConfig: OhMyOpenCodeConfig): boolean {
+  const agentConfigs = Object.values<FallbackModelsConfig | undefined>(pluginConfig.agents ?? {})
+  if (agentConfigs.some(hasFallbackModels)) {
+    return true
+  }
+
+  const categoryConfigs = Object.values<FallbackModelsConfig | undefined>(pluginConfig.categories ?? {})
+  return categoryConfigs.some(hasFallbackModels)
+}
+
+export function resolveModelFallbackEnabled(pluginConfig: OhMyOpenCodeConfig): boolean {
+  const hasConfiguredFallbacks = hasConfiguredModelFallbacks(pluginConfig)
+
+  if (pluginConfig.model_fallback === false && hasConfiguredFallbacks) {
+    log(
+      "model_fallback is disabled while fallback_models are configured; set model_fallback=true to keep provider fallback retries enabled",
+    )
+  }
+
+  return pluginConfig.model_fallback ?? hasConfiguredFallbacks
+}
--- a/src/shared/model-error-classifier.test.ts
+++ b/src/shared/model-error-classifier.test.ts
@@ -40,6 +40,28 @@ describe("model-error-classifier", () => {
    expect(result).toBe(true)
  })

+  test("treats FreeUsageLimitError names as retryable", () => {
+    //#given
+    const error = { name: "FreeUsageLimitError" }
+
+    //#when
+    const result = shouldRetryError(error)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  test("treats free tier usage limit messages as retryable", () => {
+    //#given
+    const error = { message: "Free tier daily limit reached for this provider" }
+
+    //#when
+    const result = shouldRetryError(error)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
  test("selectFallbackProvider prefers first connected provider in preference order", () => {
    //#given
    readConnectedProvidersCacheMock.mockReturnValue(["anthropic", "nvidia"])
--- a/src/shared/model-error-classifier.ts
+++ b/src/shared/model-error-classifier.ts
@@ -6,13 +6,14 @@ import { readConnectedProvidersCache } from "./connected-providers-cache"
 * These errors completely halt the action loop and should trigger fallback retry.
 */
 const RETRYABLE_ERROR_NAMES = new Set([
-  "ProviderModelNotFoundError",
-  "RateLimitError",
-  "QuotaExceededError",
-  "InsufficientCreditsError",
-  "ModelUnavailableError",
-  "ProviderConnectionError",
-  "AuthenticationError",
+  "providermodelnotfounderror",
+  "ratelimiterror",
+  "quotaexceedederror",
+  "insufficientcreditserror",
+  "modelunavailableerror",
+  "providerconnectionerror",
+  "authenticationerror",
+  "freeusagelimiterror",
 ])

 /**
@@ -20,24 +21,28 @@ const RETRYABLE_ERROR_NAMES = new Set([
 * These errors are typically user-induced or fixable without switching models.
 */
 const NON_RETRYABLE_ERROR_NAMES = new Set([
-  "MessageAbortedError",
-  "PermissionDeniedError",
-  "ContextLengthError",
-  "TimeoutError",
-  "ValidationError",
-  "SyntaxError",
-  "UserError",
+  "messageabortederror",
+  "permissiondeniederror",
+  "contextlengtherror",
+  "timeouterror",
+  "validationerror",
+  "syntaxerror",
+  "usererror",
 ])

 /**
 * Message patterns that indicate a retryable error even without a known error name.
 */
-const RETRYABLE_MESSAGE_PATTERNS = [
+const RETRYABLE_MESSAGE_PATTERNS: Array<string | RegExp> = [
  "rate_limit",
  "rate limit",
  "quota",
  "quota will reset after",
  "usage limit has been reached",
+  /free\s+usage/i,
+  /free\s+tier/i,
+  /daily\s+limit/i,
+  /limit\s+reached/i,
  "all credentials for model",
  "cooling down",
  "exhausted your capacity",
@@ -55,17 +60,9 @@ const RETRYABLE_MESSAGE_PATTERNS = [
  "timeout",
  "service unavailable",
  "internal_server_error",
-  "free usage",
-  "usage exceeded",
-  "credit",
-  "balance",
-  "temporarily unavailable",
-  "try again",
  "503",
  "502",
  "504",
-  "429",
-  "529",
 ]

 const AUTO_RETRY_GATE_PATTERNS = [
@@ -85,6 +82,11 @@ function hasProviderAutoRetrySignal(message: string): boolean {
  return AUTO_RETRY_GATE_PATTERNS.some((pattern) => message.includes(pattern))
 }

+function matchesRetryableMessagePattern(message: string): boolean {
+  return RETRYABLE_MESSAGE_PATTERNS.some((pattern) =>
+    typeof pattern === "string" ? message.includes(pattern) : pattern.test(message))
+}
+
 export interface ErrorInfo {
  name?: string
  message?: string
@@ -97,12 +99,14 @@ export interface ErrorInfo {
 export function isRetryableModelError(error: ErrorInfo): boolean {
  // If we have an error name, check against known lists
  if (error.name) {
+    const normalizedErrorName = error.name.toLowerCase()
+
    // Explicit non-retryable takes precedence
-    if (NON_RETRYABLE_ERROR_NAMES.has(error.name)) {
+    if (NON_RETRYABLE_ERROR_NAMES.has(normalizedErrorName)) {
      return false
    }
    // Check if it's a known retryable error
-    if (RETRYABLE_ERROR_NAMES.has(error.name)) {
+    if (RETRYABLE_ERROR_NAMES.has(normalizedErrorName)) {
      return true
    }
  }
@@ -112,7 +116,7 @@ export function isRetryableModelError(error: ErrorInfo): boolean {
  if (hasProviderAutoRetrySignal(msg)) {
    return true
  }
-  return RETRYABLE_MESSAGE_PATTERNS.some((pattern) => msg.includes(pattern))
+  return matchesRetryableMessagePattern(msg)
 }

 /**
--- a/src/shared/model-requirements.test.ts
+++ b/src/shared/model-requirements.test.ts
@@ -239,19 +239,19 @@ describe("AGENT_MODEL_REQUIREMENTS", () => {
 })

 describe("CATEGORY_MODEL_REQUIREMENTS", () => {
-  test("ultrabrain has valid fallbackChain with gpt-5.4 as primary", () => {
+  test("ultrabrain has valid fallbackChain with gpt-5.3-codex as primary", () => {
    // given - ultrabrain category requirement
    const ultrabrain = CATEGORY_MODEL_REQUIREMENTS["ultrabrain"]

    // when - accessing ultrabrain requirement
-    // then - fallbackChain exists with gpt-5.4 as first entry
+    // then - fallbackChain exists with gpt-5.3-codex as first entry
    expect(ultrabrain).toBeDefined()
    expect(ultrabrain.fallbackChain).toBeArray()
    expect(ultrabrain.fallbackChain.length).toBeGreaterThan(0)

    const primary = ultrabrain.fallbackChain[0]
    expect(primary.variant).toBe("xhigh")
-    expect(primary.model).toBe("gpt-5.4")
+    expect(primary.model).toBe("gpt-5.3-codex")
    expect(primary.providers[0]).toBe("openai")
  })

@@ -326,25 +326,20 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
    expect(primary.providers[0]).toBe("anthropic")
  })

-  test("unspecified-high has claude-opus-4-6 as primary and gpt-5.4 as secondary", () => {
+  test("unspecified-high has gpt-5.4 as primary", () => {
    // #given - unspecified-high category requirement
    const unspecifiedHigh = CATEGORY_MODEL_REQUIREMENTS["unspecified-high"]

    // #when - accessing unspecified-high requirement
-    // #then - claude-opus-4-6 is first and gpt-5.4 is second
+    // #then - gpt-5.4 is first
    expect(unspecifiedHigh).toBeDefined()
    expect(unspecifiedHigh.fallbackChain).toBeArray()
    expect(unspecifiedHigh.fallbackChain.length).toBeGreaterThan(1)

    const primary = unspecifiedHigh.fallbackChain[0]
-    expect(primary.model).toBe("claude-opus-4-6")
-    expect(primary.variant).toBe("max")
-    expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
-
-    const secondary = unspecifiedHigh.fallbackChain[1]
-    expect(secondary.model).toBe("gpt-5.4")
-    expect(secondary.variant).toBe("high")
-    expect(secondary.providers).toEqual(["openai", "github-copilot", "opencode"])
+    expect(primary.model).toBe("gpt-5.4")
+    expect(primary.variant).toBe("high")
+    expect(primary.providers).toEqual(["openai", "github-copilot", "opencode"])
  })

  test("artistry has valid fallbackChain with gemini-3.1-pro as primary", () => {
--- a/src/shared/model-requirements.ts
+++ b/src/shared/model-requirements.ts
@@ -205,7 +205,7 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
    fallbackChain: [
      {
        providers: ["openai", "opencode"],
-        model: "gpt-5.4",
+        model: "gpt-5.3-codex",
        variant: "xhigh",
      },
      {
@@ -288,16 +288,16 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
  },
  "unspecified-high": {
    fallbackChain: [
-      {
-        providers: ["anthropic", "github-copilot", "opencode"],
-        model: "claude-opus-4-6",
-        variant: "max",
-      },
      {
        providers: ["openai", "github-copilot", "opencode"],
        model: "gpt-5.4",
        variant: "high",
      },
+      {
+        providers: ["anthropic", "github-copilot", "opencode"],
+        model: "claude-opus-4-6",
+        variant: "max",
+      },
      { providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
      { providers: ["kimi-for-coding"], model: "k2p5" },
      {
--- a/src/tools/AGENTS.md
+++ b/src/tools/AGENTS.md
@@ -92,12 +92,12 @@
 | Category | Model | Domain |
 |----------|-------|--------|
 | visual-engineering | gemini-3.1-pro high | Frontend, UI/UX |
-| ultrabrain | gpt-5.4 xhigh | Hard logic |
+| ultrabrain | gpt-5.3-codex xhigh | Hard logic |
 | deep | gpt-5.3-codex medium | Autonomous problem-solving |
 | artistry | gemini-3.1-pro high | Creative approaches |
 | quick | claude-haiku-4-5 | Trivial tasks |
 | unspecified-low | claude-sonnet-4-6 | Moderate effort |
-| unspecified-high | claude-opus-4-6 max | High effort |
+| unspecified-high | gpt-5.4 high | High effort |
 | writing | kimi-k2p5 | Documentation |

 ## HOW TO ADD A TOOL
--- a/src/tools/background-task/create-background-output.metadata.test.ts
+++ b/src/tools/background-task/create-background-output.metadata.test.ts
@@ -1,68 +0,0 @@
-/// <reference types="bun-types" />
-
-import type { ToolContext } from "@opencode-ai/plugin/tool"
-import { describe, expect, test } from "bun:test"
-import type { BackgroundTask } from "../../features/background-agent"
-import { clearPendingStore, consumeToolMetadata } from "../../features/tool-metadata-store"
-import type { BackgroundOutputClient, BackgroundOutputManager } from "./clients"
-import { createBackgroundOutput } from "./create-background-output"
-
-const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
-
-type ToolContextWithCallID = ToolContext & {
-  callID: string
-}
-
-describe("createBackgroundOutput metadata", () => {
-  test("omits sessionId metadata when task session is not yet assigned", async () => {
-    // #given
-    clearPendingStore()
-
-    const task: BackgroundTask = {
-      id: "task-1",
-      sessionID: undefined,
-      parentSessionID: "main-1",
-      parentMessageID: "msg-1",
-      description: "background task",
-      prompt: "do work",
-      agent: "test-agent",
-      status: "running",
-    }
-    const manager: BackgroundOutputManager = {
-      getTask: id => (id === task.id ? task : undefined),
-    }
-    const client: BackgroundOutputClient = {
-      session: {
-        messages: async () => ({ data: [] }),
-      },
-    }
-    const tool = createBackgroundOutput(manager, client)
-    const context = {
-      sessionID: "test-session",
-      messageID: "test-message",
-      agent: "test-agent",
-      directory: projectDir,
-      worktree: projectDir,
-      abort: new AbortController().signal,
-      metadata: () => {},
-      ask: async () => {},
-      callID: "call-1",
-    } as ToolContextWithCallID
-
-    // #when
-    await tool.execute({ task_id: task.id }, context)
-
-    // #then
-    expect(consumeToolMetadata("test-session", "call-1")).toEqual({
-      title: "test-agent - background task",
-      metadata: {
-        agent: "test-agent",
-        category: undefined,
-        description: "background task",
-        task_id: "task-1",
-      },
-    })
-
-    clearPendingStore()
-  })
-})
--- a/src/tools/background-task/create-background-output.ts
+++ b/src/tools/background-task/create-background-output.ts
@@ -75,7 +75,7 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
            agent: task.agent,
            category: task.category,
            description: task.description,
-            ...(task.sessionID ? { sessionId: task.sessionID } : {}),
+            sessionId: task.sessionID ?? "pending",
          } as Record<string, unknown>,
        }
        ctx.metadata?.(meta)
--- a/src/tools/background-task/create-background-task.metadata.test.ts
+++ b/src/tools/background-task/create-background-task.metadata.test.ts
@@ -1,84 +0,0 @@
-/// <reference types="bun-types" />
-
-import type { PluginInput } from "@opencode-ai/plugin"
-import type { ToolContext } from "@opencode-ai/plugin/tool"
-import { describe, expect, mock, test } from "bun:test"
-import type { BackgroundManager } from "../../features/background-agent"
-import { clearPendingStore, consumeToolMetadata } from "../../features/tool-metadata-store"
-import { createBackgroundTask } from "./create-background-task"
-
-const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
-
-type ToolContextWithCallID = ToolContext & {
-  callID: string
-}
-
-describe("createBackgroundTask metadata", () => {
-  test("omits sessionId metadata when session is not yet assigned", async () => {
-    // #given
-    clearPendingStore()
-
-    const manager = {
-      launch: mock(() => Promise.resolve({
-        id: "task-1",
-        sessionID: null,
-        description: "Test task",
-        agent: "test-agent",
-        status: "pending",
-      })),
-      getTask: mock(() => undefined),
-    } as unknown as BackgroundManager
-    const client = {
-      session: {
-        messages: mock(() => Promise.resolve({ data: [] })),
-      },
-    } as unknown as PluginInput["client"]
-
-    let capturedMetadata: { title?: string; metadata?: Record<string, unknown> } | undefined
-    const tool = createBackgroundTask(manager, client)
-    const originalDateNow = Date.now
-    let dateNowCallCount = 0
-    Date.now = () => {
-      dateNowCallCount += 1
-      return dateNowCallCount === 1 ? 0 : 30001
-    }
-
-    try {
-      // #when
-      const context: ToolContextWithCallID = {
-        sessionID: "test-session",
-        messageID: "test-message",
-        agent: "test-agent",
-        directory: projectDir,
-        worktree: projectDir,
-        abort: new AbortController().signal,
-        ask: async () => {},
-        callID: "call-1",
-        metadata: input => {
-          capturedMetadata = input
-        },
-      }
-
-      const output = await tool.execute(
-        {
-          description: "Test background task",
-          prompt: "Test prompt",
-          agent: "test-agent",
-        },
-        context
-      )
-
-      // #then
-      expect(output).toContain("Session ID: (not yet assigned)")
-      expect(output).not.toContain('Session ID: pending')
-      expect(capturedMetadata?.metadata).toEqual({})
-      expect(consumeToolMetadata("test-session", "call-1")).toEqual({
-        title: "Test background task",
-        metadata: {},
-      })
-    } finally {
-      Date.now = originalDateNow
-      clearPendingStore()
-    }
-  })
-})
--- a/src/tools/background-task/create-background-task.ts
+++ b/src/tools/background-task/create-background-task.ts
@@ -94,9 +94,7 @@ export function createBackgroundTask(

        const bgMeta = {
          title: args.description,
-          metadata: {
-            ...(sessionId ? { sessionId } : {}),
-          },
+          metadata: { sessionId: sessionId ?? "pending" },
        }
        await ctx.metadata?.(bgMeta)

@@ -107,7 +105,7 @@ export function createBackgroundTask(
        return `Background task launched successfully.

 Task ID: ${task.id}
-Session ID: ${sessionId ?? "(not yet assigned)"}
+Session ID: ${sessionId ?? "pending"}
 Description: ${task.description}
 Agent: ${task.agent}
 Status: ${task.status}
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@@ -284,12 +284,12 @@ You are NOT an interactive assistant. You are an autonomous problem-solver.

 export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
  "visual-engineering": { model: "google/gemini-3.1-pro", variant: "high" },
-  ultrabrain: { model: "openai/gpt-5.4", variant: "xhigh" },
+  ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" },
  deep: { model: "openai/gpt-5.3-codex", variant: "medium" },
  artistry: { model: "google/gemini-3.1-pro", variant: "high" },
  quick: { model: "anthropic/claude-haiku-4-5" },
  "unspecified-low": { model: "anthropic/claude-sonnet-4-6" },
-  "unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" },
+  "unspecified-high": { model: "openai/gpt-5.4-high" },
  writing: { model: "kimi-for-coding/k2p5" },
 }

--- a/src/tools/delegate-task/timing.test.ts
+++ b/src/tools/delegate-task/timing.test.ts
@@ -3,17 +3,6 @@ const { describe, expect, test } = require("bun:test")
 import { __resetTimingConfig, __setTimingConfig, getDefaultSyncPollTimeoutMs } from "./timing"

 describe("timing sync poll timeout defaults", () => {
-  test("default sync timeout is 30 minutes", () => {
-    // #given
-    __resetTimingConfig()
-
-    // #when
-    const timeout = getDefaultSyncPollTimeoutMs()
-
-    // #then
-    expect(timeout).toBe(30 * 60 * 1000)
-  })
-
  test("default sync timeout accessor follows MAX_POLL_TIME_MS config", () => {
    // #given
    __resetTimingConfig()
--- a/src/tools/delegate-task/timing.ts
+++ b/src/tools/delegate-task/timing.ts
@@ -3,7 +3,7 @@ let MIN_STABILITY_TIME_MS = 10000
 let STABILITY_POLLS_REQUIRED = 3
 let WAIT_FOR_SESSION_INTERVAL_MS = 100
 let WAIT_FOR_SESSION_TIMEOUT_MS = 30000
-const DEFAULT_POLL_TIMEOUT_MS = 30 * 60 * 1000
+const DEFAULT_POLL_TIMEOUT_MS = 10 * 60 * 1000
 let MAX_POLL_TIME_MS = DEFAULT_POLL_TIMEOUT_MS
 let SESSION_CONTINUATION_STABILITY_MS = 5000

--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -83,7 +83,7 @@ describe("sisyphus-task", () => {

      // when / #then
      expect(category).toBeDefined()
-      expect(category.model).toBe("openai/gpt-5.4")
+      expect(category.model).toBe("openai/gpt-5.3-codex")
      expect(category.variant).toBe("xhigh")
    })

@@ -97,14 +97,14 @@ describe("sisyphus-task", () => {
      expect(category.variant).toBe("medium")
    })

-    test("unspecified-high category uses claude-opus-4-6 max as primary", () => {
+    test("unspecified-high category uses explicit high model", () => {
      // given
      const category = DEFAULT_CATEGORIES["unspecified-high"]

      // when / #then
      expect(category).toBeDefined()
-      expect(category.model).toBe("anthropic/claude-opus-4-6")
-      expect(category.variant).toBe("max")
+      expect(category.model).toBe("openai/gpt-5.4-high")
+      expect(category.variant).toBeUndefined()
    })
  })

@@ -1036,7 +1036,7 @@ describe("sisyphus-task", () => {
        abort: new AbortController().signal,
      }

-      // when - unspecified-high uses claude-opus-4-6 max in DEFAULT_CATEGORIES
+      // when - unspecified-high uses the explicit high model in DEFAULT_CATEGORIES
      await tool.execute(
        {
          description: "Test unspecified-high default variant",
@@ -1048,11 +1048,10 @@ describe("sisyphus-task", () => {
        toolContext
      )

-      // then - claude-opus-4-6 should be passed with max variant
+      // then - the explicit high model should be passed without a separate variant
      expect(launchInput.model).toEqual({
-        providerID: "anthropic",
-        modelID: "claude-opus-4-6",
-        variant: "max",
+        providerID: "openai",
+        modelID: "gpt-5.4-high",
      })
    }, { timeout: 20000 })

@@ -1097,7 +1096,7 @@ describe("sisyphus-task", () => {
        abort: new AbortController().signal,
      }

-      // when - unspecified-high uses claude-opus-4-6 max in DEFAULT_CATEGORIES
+      // when - unspecified-high uses the explicit high model in DEFAULT_CATEGORIES
      await tool.execute(
        {
          description: "Test unspecified-high sync variant",
@@ -1109,12 +1108,12 @@ describe("sisyphus-task", () => {
        toolContext
      )

-      // then - claude-opus-4-6 should be passed with max variant
+      // then - the explicit high model should be passed without a separate variant
      expect(promptBody.model).toEqual({
-        providerID: "anthropic",
-        modelID: "claude-opus-4-6",
+        providerID: "openai",
+        modelID: "gpt-5.4-high",
      })
-      expect(promptBody.variant).toBe("max")
+      expect(promptBody.variant).toBeUndefined()
    }, { timeout: 20000 })
  })

@@ -2403,7 +2402,7 @@ describe("sisyphus-task", () => {
        abort: new AbortController().signal,
      }

-      // when - using ultrabrain category (default model is openai/gpt-5.4)
+      // when - using ultrabrain category (default model is openai/gpt-5.3-codex)
      await tool.execute(
        {
          description: "Override precedence test",
@@ -2455,7 +2454,7 @@ describe("sisyphus-task", () => {
         client: mockClient,
         sisyphusJuniorModel: "anthropic/claude-sonnet-4-6",
         userCategories: {
-           ultrabrain: { model: "openai/gpt-5.4" },
+           ultrabrain: { model: "openai/gpt-5.3-codex" },
         },
         connectedProvidersOverride: TEST_CONNECTED_PROVIDERS,
         availableModelsOverride: createTestAvailableModels(),
@@ -2482,7 +2481,7 @@ describe("sisyphus-task", () => {

      // then - explicit category model should win
      expect(launchInput.model.providerID).toBe("openai")
-      expect(launchInput.model.modelID).toBe("gpt-5.4")
+      expect(launchInput.model.modelID).toBe("gpt-5.3-codex")
    })

    test("sisyphus-junior model override works with quick category (#1295)", async () => {
@@ -2947,7 +2946,7 @@ describe("sisyphus-task", () => {
      
      // then - catalog model is used
      expect(resolved).not.toBeNull()
-      expect(resolved!.config.model).toBe("openai/gpt-5.4")
+      expect(resolved!.config.model).toBe("openai/gpt-5.3-codex")
      expect(resolved!.config.variant).toBe("xhigh")
    })

@@ -2971,10 +2970,10 @@ describe("sisyphus-task", () => {
      // when
      const resolved = resolveCategoryConfig(categoryName, { inheritedModel, systemDefaultModel: SYSTEM_DEFAULT_MODEL })
      
-      // then - category's built-in model wins (ultrabrain uses gpt-5.4)
+      // then - category's built-in model wins (ultrabrain uses gpt-5.3-codex)
      expect(resolved).not.toBeNull()
      const actualModel = resolved!.config.model
-      expect(actualModel).toBe("openai/gpt-5.4")
+      expect(actualModel).toBe("openai/gpt-5.3-codex")
    })

    test("when user defines model - modelInfo should report user-defined regardless of inheritedModel", () => {
@@ -3028,12 +3027,12 @@ describe("sisyphus-task", () => {
      const categoryName = "ultrabrain"
      const inheritedModel = "anthropic/claude-opus-4-6"
      
-      // when category has a built-in model (gpt-5.4 for ultrabrain)
+      // when category has a built-in model (gpt-5.3-codex for ultrabrain)
      const resolved = resolveCategoryConfig(categoryName, { inheritedModel, systemDefaultModel: SYSTEM_DEFAULT_MODEL })
      
      // then category's built-in model should be used, NOT inheritedModel
      expect(resolved).not.toBeNull()
-      expect(resolved!.model).toBe("openai/gpt-5.4")
+      expect(resolved!.model).toBe("openai/gpt-5.3-codex")
    })

    test("FIXED: systemDefaultModel is used when no userConfig.model and no inheritedModel", () => {
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -123,11 +123,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
      })

      if (args.run_in_background === undefined) {
-        if (args.category || args.subagent_type || args.session_id) {
-          args.run_in_background = false
-        } else {
-          throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
-        }
+        throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
      }
      if (typeof args.load_skills === "string") {
        try {
--- a/src/tools/hashline-edit/tools.ts
+++ b/src/tools/hashline-edit/tools.ts
@@ -30,8 +30,8 @@ export function createHashlineEditTool(): ToolDefinition {
            pos: tool.schema.string().optional().describe("Primary anchor in LINE#ID format"),
            end: tool.schema.string().optional().describe("Range end anchor in LINE#ID format"),
            lines: tool.schema
-              .union([tool.schema.string(), tool.schema.null()])
-              .describe("Replacement or inserted lines as newline-delimited string. null deletes with replace"),
+              .union([tool.schema.string(), tool.schema.array(tool.schema.string()), tool.schema.null()])
+              .describe("Replacement or inserted lines. null/[] deletes with replace"),
          })
        )
        .describe("Array of edit operations to apply (empty when delete=true)"),