fix: convert executeSyncTask to async prompt + polling pattern

Oracle agent (and all sync subagent tasks) fails with JSON Parse error in ACP environments because session.prompt() (blocking HTTP) returns empty/incomplete responses. Replace promptSyncWithModelSuggestionRetry with promptWithModelSuggestionRetry (async, fire-and-forget) and add polling loop to wait for response stability, matching the proven pattern from executeUnstableAgentTask. Fixes #1681
@mrm007 has signed the CLA in code-yeongyu/oh-my-opencode#1680
2026-02-09 10:03:54 +09:00 · 2026-02-08 21:41:45 +00:00 · 2026-02-08 17:12:45 +00:00 · 2026-02-08 16:02:43 +00:00 · 2026-02-08 15:44:17 +00:00 · 2026-02-08 20:00:52 +09:00
289 changed files with 18085 additions and 9124 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Dependencies
-.sisyphus/
+.sisyphus/*
+!.sisyphus/rules/
 node_modules/

 # Build output
--- a/.opencode/command/remove-deadcode.md
+++ b/.opencode/command/remove-deadcode.md
@@ -41,27 +41,27 @@ Fire ALL simultaneously:

 ```
 // Agent 1: Find all exported symbols
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
  List each with: file path, line number, symbol name, export type (named/default).
  EXCLUDE: src/index.ts root exports, test files.
  Return as structured list.")

 // Agent 2: Find potentially unused files
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find files in src/ that are NOT imported by any other file.
  Check import/require statements across the entire codebase.
  EXCLUDE: index.ts files, test files, entry points, config files, .md files.
  Return list of potentially orphaned files.")

 // Agent 3: Find unused imports within files
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find unused imports across src/**/*.ts files.
  Look for import statements where the imported symbol is never referenced in the file body.
  Return: file path, line number, imported symbol name.")

 // Agent 4: Find functions/variables only used in their own declaration
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
  to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
 ```
--- a/.opencode/skills/github-issue-triage/SKILL.md
+++ b/.opencode/skills/github-issue-triage/SKILL.md
@@ -21,7 +21,7 @@ You are a GitHub issue triage automation agent. Your job is to:

 | Aspect | Rule |
 |--------|------|
-| **Task Granularity** | 1 Issue = Exactly 1 `delegate_task()` call |
+| **Task Granularity** | 1 Issue = Exactly 1 `task()` call |
 | **Execution Mode** | `run_in_background=true` (Each issue runs independently) |
 | **Result Handling** | `background_output()` to collect results as they complete |
 | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -67,7 +67,7 @@ for (let i = 0; i < allIssues.length; i++) {
  const issue = allIssues[i]
  const category = getCategory(i)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← CRITICAL: Each issue is independent background task
@@ -195,7 +195,7 @@ for (let i = 0; i < allIssues.length; i++) {
  
  console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← BACKGROUND TASK: Each issue runs independently
@@ -480,7 +480,7 @@ When invoked, immediately:
 4. Exhaustive pagination for issues
 5. Exhaustive pagination for PRs
 6. **LAUNCH**: For each issue:
-   - `delegate_task(run_in_background=true)` - 1 task per issue
+   - `task(run_in_background=true)` - 1 task per issue
   - Store taskId mapped to issue number
 7. **STREAM**: Poll `background_output()` for each task:
   - As each completes, immediately report result
--- a/.opencode/skills/github-pr-triage/SKILL.md
+++ b/.opencode/skills/github-pr-triage/SKILL.md
@@ -22,7 +22,7 @@ You are a GitHub Pull Request triage automation agent. Your job is to:

 | Aspect | Rule |
 |--------|------|
-| **Task Granularity** | 1 PR = Exactly 1 `delegate_task()` call |
+| **Task Granularity** | 1 PR = Exactly 1 `task()` call |
 | **Execution Mode** | `run_in_background=true` (Each PR runs independently) |
 | **Result Handling** | `background_output()` to collect results as they complete |
 | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -68,7 +68,7 @@ for (let i = 0; i < allPRs.length; i++) {
  const pr = allPRs[i]
  const category = getCategory(i)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← CRITICAL: Each PR is independent background task
@@ -178,7 +178,7 @@ for (let i = 0; i < allPRs.length; i++) {
  
  console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← BACKGROUND TASK: Each PR runs independently
@@ -474,7 +474,7 @@ When invoked, immediately:
 2. `gh repo view --json nameWithOwner -q .nameWithOwner`
 3. Exhaustive pagination for ALL open PRs
 4. **LAUNCH**: For each PR:
-   - `delegate_task(run_in_background=true)` - 1 task per PR
+   - `task(run_in_background=true)` - 1 task per PR
   - Store taskId mapped to PR number
 5. **STREAM**: Poll `background_output()` for each task:
   - As each completes, immediately report result
--- a/.sisyphus/rules/modular-code-enforcement.md
+++ b/.sisyphus/rules/modular-code-enforcement.md
@@ -0,0 +1,117 @@
+---
+globs: ["**/*.ts", "**/*.tsx"]
+alwaysApply: false
+description: "Enforces strict modular code architecture: SRP, no monolithic index.ts, 200 LOC hard limit"
+---
+
+<MANDATORY_ARCHITECTURE_RULE severity="BLOCKING" priority="HIGHEST">
+
+# Modular Code Architecture — Zero Tolerance Policy
+
+This rule is NON-NEGOTIABLE. Violations BLOCK all further work until resolved.
+
+## Rule 1: index.ts is an ENTRY POINT, NOT a dumping ground
+
+`index.ts` files MUST ONLY contain:
+- Re-exports (`export { ... } from "./module"`)
+- Factory function calls that compose modules
+- Top-level wiring/registration (hook registration, plugin setup)
+
+`index.ts` MUST NEVER contain:
+- Business logic implementation
+- Helper/utility functions
+- Type definitions beyond simple re-exports
+- Multiple unrelated responsibilities mixed together
+
+**If you find mixed logic in index.ts**: Extract each responsibility into its own dedicated file BEFORE making any other changes. This is not optional.
+
+## Rule 2: No Catch-All Files — utils.ts / service.ts are CODE SMELLS
+
+A single `utils.ts`, `helpers.ts`, `service.ts`, or `common.ts` is a **gravity well** — every unrelated function gets tossed in, and it grows into an untestable, unreviewable blob.
+
+**These file names are BANNED as top-level catch-alls.** Instead:
+
+| Anti-Pattern | Refactor To |
+|--------------|-------------|
+| `utils.ts` with `formatDate()`, `slugify()`, `retry()` | `date-formatter.ts`, `slugify.ts`, `retry.ts` |
+| `service.ts` handling auth + billing + notifications | `auth-service.ts`, `billing-service.ts`, `notification-service.ts` |
+| `helpers.ts` with 15 unrelated exports | One file per logical domain |
+
+**Design for reusability from the start.** Each module should be:
+- **Independently importable** — no consumer should need to pull in unrelated code
+- **Self-contained** — its dependencies are explicit, not buried in a shared grab-bag
+- **Nameable by purpose** — the filename alone tells you what it does
+
+If you catch yourself typing `utils.ts` or `service.ts`, STOP and name the file after what it actually does.
+
+## Rule 3: Single Responsibility Principle — ABSOLUTE
+
+Every `.ts` file MUST have exactly ONE clear, nameable responsibility.
+
+**Self-test**: If you cannot describe the file's purpose in ONE short phrase (e.g., "parses YAML frontmatter", "matches rules against file paths"), the file does too much. Split it.
+
+| Signal | Action |
+|--------|--------|
+| File has 2+ unrelated exported functions | **SPLIT NOW** — each into its own module |
+| File mixes I/O with pure logic | **SPLIT NOW** — separate side effects from computation |
+| File has both types and implementation | **SPLIT NOW** — types.ts + implementation.ts |
+| You need to scroll to understand the file | **SPLIT NOW** — it's too large |
+
+## Rule 4: 200 LOC Hard Limit — CODE SMELL DETECTOR
+
+Any `.ts`/`.tsx` file exceeding **200 lines of code** (excluding prompt strings, template literals containing prompts, and `.md` content) is an **immediate code smell**.
+
+**When you detect a file > 200 LOC**:
+1. **STOP** current work
+2. **Identify** the multiple responsibilities hiding in the file
+3. **Extract** each responsibility into a focused module
+4. **Verify** each resulting file is < 200 LOC and has a single purpose
+5. **Resume** original work
+
+Prompt-heavy files (agent definitions, skill definitions) where the bulk of content is template literal prompt text are EXEMPT from the LOC count — but their non-prompt logic must still be < 200 LOC.
+
+### How to Count LOC
+
+**Count these** (= actual logic):
+- Import statements
+- Variable/constant declarations
+- Function/class/interface/type definitions
+- Control flow (`if`, `for`, `while`, `switch`, `try/catch`)
+- Expressions, assignments, return statements
+- Closing braces `}` that belong to logic blocks
+
+**Exclude these** (= not logic):
+- Blank lines
+- Comment-only lines (`//`, `/* */`, `/** */`)
+- Lines inside template literals that are prompt/instruction text (e.g., the string body of `` const prompt = `...` ``)
+- Lines inside multi-line strings used as documentation/prompt content
+
+**Quick method**: Read the file → subtract blank lines, comment-only lines, and prompt string content → remaining count = LOC.
+
+**Example**:
+```typescript
+// 1  import { foo } from "./foo";          ← COUNT
+// 2                                         ← SKIP (blank)
+// 3  // Helper for bar                      ← SKIP (comment)
+// 4  export function bar(x: number) {       ← COUNT
+// 5    const prompt = `                     ← COUNT (declaration)
+// 6      You are an assistant.              ← SKIP (prompt text)
+// 7      Follow these rules:                ← SKIP (prompt text)
+// 8    `;                                   ← COUNT (closing)
+// 9    return process(prompt, x);           ← COUNT
+// 10 }                                      ← COUNT
+```
+→ LOC = **5** (lines 1, 4, 5, 9, 10). Not 10.
+
+When in doubt, **round up** — err on the side of splitting.
+
+## How to Apply
+
+When reading, writing, or editing ANY `.ts`/`.tsx` file:
+
+1. **Check the file you're touching** — does it violate any rule above?
+2. **If YES** — refactor FIRST, then proceed with your task
+3. **If creating a new file** — ensure it has exactly one responsibility and stays under 200 LOC
+4. **If adding code to an existing file** — verify the addition doesn't push the file past 200 LOC or add a second responsibility. If it does, extract into a new module.
+
+</MANDATORY_ARCHITECTURE_RULE>
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,7 +1,7 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-02-03T16:10:30+09:00
-**Commit:** d7679e14
+**Generated:** 2026-02-08T16:45:00+09:00
+**Commit:** edee865f
 **Branch:** dev

 ---
@@ -120,40 +120,45 @@ This is an **international open-source project**. To ensure accessibility and ma

 ## OVERVIEW

-OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3 Flash). 34 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 11 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.
+OpenCode plugin: multi-model agent orchestration (Claude Opus 4.6, GPT-5.3 Codex, Gemini 3 Flash). 40+ lifecycle hooks, 25+ tools (LSP, AST-Grep, delegation), 11 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.

 ## STRUCTURE

 ```
 oh-my-opencode/
 ├── src/
-│   ├── agents/        # 11 AI agents - see src/agents/AGENTS.md
-│   ├── hooks/         # 34 lifecycle hooks - see src/hooks/AGENTS.md
-│   ├── tools/         # 20+ tools - see src/tools/AGENTS.md
-│   ├── features/      # Background agents, Claude Code compat - see src/features/AGENTS.md
-│   ├── shared/        # 66 cross-cutting utilities - see src/shared/AGENTS.md
-│   ├── cli/           # CLI installer, doctor - see src/cli/AGENTS.md
-│   ├── mcp/           # Built-in MCPs - see src/mcp/AGENTS.md
-│   ├── config/        # Zod schema, TypeScript types
-│   └── index.ts       # Main plugin entry (788 lines)
-├── script/            # build-schema.ts, build-binaries.ts
-├── packages/          # 11 platform-specific binaries
-└── dist/              # Build output (ESM + .d.ts)
+│   ├── agents/           # 11 AI agents - see src/agents/AGENTS.md
+│   ├── hooks/            # 40+ lifecycle hooks - see src/hooks/AGENTS.md
+│   ├── tools/            # 25+ tools - see src/tools/AGENTS.md
+│   ├── features/         # Background agents, skills, Claude Code compat - see src/features/AGENTS.md
+│   ├── shared/           # 66 cross-cutting utilities - see src/shared/AGENTS.md
+│   ├── cli/              # CLI installer, doctor - see src/cli/AGENTS.md
+│   ├── mcp/              # Built-in MCPs - see src/mcp/AGENTS.md
+│   ├── config/           # Zod schema (schema.ts 455 lines), TypeScript types
+│   ├── plugin-handlers/  # Plugin config loading (config-handler.ts 562 lines)
+│   ├── index.ts          # Main plugin entry (999 lines)
+│   ├── plugin-config.ts  # Config loading orchestration
+│   └── plugin-state.ts   # Model cache state
+├── script/               # build-schema.ts, build-binaries.ts, publish.ts
+├── packages/             # 11 platform-specific binaries
+└── dist/                 # Build output (ESM + .d.ts)
 ```

 ## WHERE TO LOOK

 | Task | Location | Notes |
 |------|----------|-------|
-| Add agent | `src/agents/` | Create .ts with factory, add to `agentSources` |
+| Add agent | `src/agents/` | Create .ts with factory, add to `agentSources` in utils.ts |
 | Add hook | `src/hooks/` | Create dir with `createXXXHook()`, register in index.ts |
 | Add tool | `src/tools/` | Dir with index/types/constants/tools.ts |
-| Add MCP | `src/mcp/` | Create config, add to index.ts |
+| Add MCP | `src/mcp/` | Create config, add to `createBuiltinMcps()` |
 | Add skill | `src/features/builtin-skills/` | Create dir with SKILL.md |
 | Add command | `src/features/builtin-commands/` | Add template + register in commands.ts |
 | Config schema | `src/config/schema.ts` | Zod schema, run `bun run build:schema` |
-| Background agents | `src/features/background-agent/` | manager.ts (1418 lines) |
-| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (757 lines) |
+| Plugin config | `src/plugin-handlers/config-handler.ts` | JSONC loading, merging, migration |
+| Background agents | `src/features/background-agent/` | manager.ts (1556 lines) |
+| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (770 lines) |
+| Delegation | `src/tools/delegate-task/` | Category routing (executor.ts 983 lines) |

 ## TDD (Test-Driven Development)

@@ -165,7 +170,7 @@ oh-my-opencode/
 **Rules:**
 - NEVER write implementation before test
 - NEVER delete failing tests - fix the code
- Test file: `*.test.ts` alongside source (100 test files)
+- Test file: `*.test.ts` alongside source (163 test files)
 - BDD comments: `//#given`, `//#when`, `//#then`

 ## CONVENTIONS
@@ -175,7 +180,7 @@ oh-my-opencode/
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern via index.ts
 - **Naming**: kebab-case dirs, `createXXXHook`/`createXXXTool` factories
- **Testing**: BDD comments, 100 test files
+- **Testing**: BDD comments, 163 test files
 - **Temperature**: 0.1 for code agents, max 0.3

 ## ANTI-PATTERNS
@@ -190,7 +195,7 @@ oh-my-opencode/
 | Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
 | Error Handling | Empty catch blocks |
 | Testing | Deleting failing tests, writing implementation before test |
-| Agent Calls | Sequential - use `delegate_task` parallel |
+| Agent Calls | Sequential - use `task` parallel |
 | Hook Logic | Heavy PreToolUse - slows every call |
 | Commits | Giant (3+ files), separate test from impl |
 | Temperature | >0.3 for code agents |
@@ -204,14 +209,17 @@ oh-my-opencode/

 | Agent | Model | Purpose |
 |-------|-------|---------|
-| Sisyphus | anthropic/claude-opus-4-5 | Primary orchestrator (fallback: kimi-k2.5 → glm-4.7 → gpt-5.2-codex → gemini-3-pro) |
-| Hephaestus | openai/gpt-5.2-codex | Autonomous deep worker, "The Legitimate Craftsman" (requires gpt-5.2-codex, no fallback) |
+| Sisyphus | anthropic/claude-opus-4-6 | Primary orchestrator (fallback: kimi-k2.5 → glm-4.7 → gpt-5.3-codex → gemini-3-pro) |
+| Hephaestus | openai/gpt-5.3-codex | Autonomous deep worker, "The Legitimate Craftsman" (requires gpt-5.3-codex, no fallback) |
 | Atlas | anthropic/claude-sonnet-4-5 | Master orchestrator (fallback: kimi-k2.5 → gpt-5.2) |
 | oracle | openai/gpt-5.2 | Consultation, debugging |
 | librarian | zai-coding-plan/glm-4.7 | Docs, GitHub search (fallback: glm-4.7-free) |
 | explore | xai/grok-code-fast-1 | Fast codebase grep (fallback: claude-haiku-4-5 → gpt-5-mini → gpt-5-nano) |
 | multimodal-looker | google/gemini-3-flash | PDF/image analysis |
-| Prometheus | anthropic/claude-opus-4-5 | Strategic planning (fallback: kimi-k2.5 → gpt-5.2) |
+| Prometheus | anthropic/claude-opus-4-6 | Strategic planning (fallback: kimi-k2.5 → gpt-5.2) |
+| Metis | anthropic/claude-opus-4-6 | Pre-planning analysis (temp 0.3, fallback: kimi-k2.5 → gpt-5.2) |
+| Momus | openai/gpt-5.2 | Plan validation (temp 0.1, fallback: claude-opus-4-6) |
+| Sisyphus-Junior | anthropic/claude-sonnet-4-5 | Category-spawned executor (temp 0.1) |

 ## COMMANDS

@@ -219,7 +227,7 @@ oh-my-opencode/
 bun run typecheck      # Type check
 bun run build          # ESM + declarations + schema
 bun run rebuild        # Clean + Build
-bun test               # 100 test files
+bun test               # 100+ test files
 ```

 ## DEPLOYMENT
@@ -233,30 +241,41 @@ bun test               # 100 test files

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/features/builtin-skills/skills.ts` | 1729 | Skill definitions |
-| `src/features/background-agent/manager.ts` | 1418 | Task lifecycle, concurrency |
-| `src/agents/prometheus-prompt.ts` | 1283 | Planning agent prompt |
-| `src/tools/delegate-task/tools.ts` | 1135 | Category-based delegation |
-| `src/hooks/atlas/index.ts` | 757 | Orchestrator hook |
-| `src/index.ts` | 788 | Main plugin entry |
+| `src/features/background-agent/manager.ts` | 1642 | Task lifecycle, concurrency |
+| `src/features/builtin-skills/skills/git-master.ts` | 1107 | Git master skill definition |
+| `src/index.ts` | 999 | Main plugin entry |
+| `src/tools/delegate-task/executor.ts` | 969 | Category-based delegation executor |
+| `src/tools/lsp/client.ts` | 851 | LSP client operations |
+| `src/tools/background-task/tools.ts` | 757 | Background task tools |
+| `src/hooks/atlas/index.ts` | 697 | Orchestrator hook |
 | `src/cli/config-manager.ts` | 667 | JSONC config parsing |
+| `src/features/skill-mcp-manager/manager.ts` | 640 | MCP client lifecycle |
 | `src/features/builtin-commands/templates/refactor.ts` | 619 | Refactor command template |
+| `src/agents/hephaestus.ts` | 618 | Autonomous deep worker agent |
+| `src/agents/utils.ts` | 571 | Agent creation, model fallback resolution |
+| `src/plugin-handlers/config-handler.ts` | 562 | Plugin config loading |
+| `src/tools/delegate-task/constants.ts` | 552 | Delegation constants |
+| `src/cli/install.ts` | 542 | Interactive CLI installer |
+| `src/hooks/task-continuation-enforcer.ts` | 530 | Task completion enforcement |
+| `src/agents/sisyphus.ts` | 530 | Main orchestrator agent |

 ## MCP ARCHITECTURE

 Three-tier system:
-1. **Built-in**: websearch (Exa), context7 (docs), grep_app (GitHub)
+1. **Built-in**: websearch (Exa/Tavily), context7 (docs), grep_app (GitHub)
 2. **Claude Code compat**: .mcp.json with `${VAR}` expansion
 3. **Skill-embedded**: YAML frontmatter in skills

 ## CONFIG SYSTEM

- **Zod validation**: `src/config/schema.ts`
+- **Zod validation**: `src/config/schema.ts` (455 lines)
 - **JSONC support**: Comments, trailing commas
 - **Multi-level**: Project (`.opencode/`) → User (`~/.config/opencode/`)
+- **Loading**: `src/plugin-handlers/config-handler.ts` → merge → validate

 ## NOTES

 - **OpenCode**: Requires >= 1.0.150
 - **Flaky tests**: ralph-loop (CI timeout), session-state (parallel pollution)
 - **Trusted deps**: @ast-grep/cli, @ast-grep/napi, @code-yeongyu/comment-checker
+- **No linter/formatter**: No ESLint, Prettier, or Biome configured
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
--- a/bun.lock
+++ b/bun.lock
@@ -1,6 +1,6 @@
 {
  "lockfileVersion": 1,
-  "configVersion": 1,
+  "configVersion": 0,
  "workspaces": {
    "": {
      "name": "oh-my-opencode",
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.2.2",
-        "oh-my-opencode-darwin-x64": "3.2.2",
-        "oh-my-opencode-linux-arm64": "3.2.2",
-        "oh-my-opencode-linux-arm64-musl": "3.2.2",
-        "oh-my-opencode-linux-x64": "3.2.2",
-        "oh-my-opencode-linux-x64-musl": "3.2.2",
-        "oh-my-opencode-windows-x64": "3.2.2",
+        "oh-my-opencode-darwin-arm64": "3.3.1",
+        "oh-my-opencode-darwin-x64": "3.3.1",
+        "oh-my-opencode-linux-arm64": "3.3.1",
+        "oh-my-opencode-linux-arm64-musl": "3.3.1",
+        "oh-my-opencode-linux-x64": "3.3.1",
+        "oh-my-opencode-linux-x64-musl": "3.3.1",
+        "oh-my-opencode-windows-x64": "3.3.1",
      },
    },
  },
@@ -44,41 +44,41 @@
    "@code-yeongyu/comment-checker",
  ],
  "packages": {
-    "@ast-grep/cli": ["@ast-grep/cli@0.40.5", "", { "dependencies": { "detect-libc": "2.1.2" }, "optionalDependencies": { "@ast-grep/cli-darwin-arm64": "0.40.5", "@ast-grep/cli-darwin-x64": "0.40.5", "@ast-grep/cli-linux-arm64-gnu": "0.40.5", "@ast-grep/cli-linux-x64-gnu": "0.40.5", "@ast-grep/cli-win32-arm64-msvc": "0.40.5", "@ast-grep/cli-win32-ia32-msvc": "0.40.5", "@ast-grep/cli-win32-x64-msvc": "0.40.5" }, "bin": { "sg": "sg", "ast-grep": "ast-grep" } }, "sha512-yVXL7Gz0WIHerQLf+MVaVSkhIhidtWReG5akNVr/JS9OVCVkSdz7gWm7H8jVv2M9OO1tauuG76K3UaRGBPu5lQ=="],
+    "@ast-grep/cli": ["@ast-grep/cli@0.40.0", "", { "dependencies": { "detect-libc": "2.1.2" }, "optionalDependencies": { "@ast-grep/cli-darwin-arm64": "0.40.0", "@ast-grep/cli-darwin-x64": "0.40.0", "@ast-grep/cli-linux-arm64-gnu": "0.40.0", "@ast-grep/cli-linux-x64-gnu": "0.40.0", "@ast-grep/cli-win32-arm64-msvc": "0.40.0", "@ast-grep/cli-win32-ia32-msvc": "0.40.0", "@ast-grep/cli-win32-x64-msvc": "0.40.0" }, "bin": { "sg": "sg", "ast-grep": "ast-grep" } }, "sha512-L8AkflsfI2ZP70yIdrwqvjR02ScCuRmM/qNGnJWUkOFck+e6gafNVJ4e4jjGQlEul+dNdBpx36+O2Op629t47A=="],

-    "@ast-grep/cli-darwin-arm64": ["@ast-grep/cli-darwin-arm64@0.40.5", "", { "os": "darwin", "cpu": "arm64" }, "sha512-T9CzwJ1GqQhnANdsu6c7iT1akpvTVMK+AZrxnhIPv33Ze5hrXUUkqan+j4wUAukRJDqU7u94EhXLSLD+5tcJ8g=="],
+    "@ast-grep/cli-darwin-arm64": ["@ast-grep/cli-darwin-arm64@0.40.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-UehY2MMUkdJbsriP7NKc6+uojrqPn7d1Cl0em+WAkee7Eij81VdyIjRsRxtZSLh440ZWQBHI3PALZ9RkOO8pKQ=="],

-    "@ast-grep/cli-darwin-x64": ["@ast-grep/cli-darwin-x64@0.40.5", "", { "os": "darwin", "cpu": "x64" }, "sha512-ez9b2zKvXU8f4ghhjlqYvbx6tWCKJTuVlNVqDDfjqwwhGeiTYfnzMlSVat4ElYRMd21gLtXZIMy055v2f21Ztg=="],
+    "@ast-grep/cli-darwin-x64": ["@ast-grep/cli-darwin-x64@0.40.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-RFDJ2ZxUbT0+grntNlOLJx7wa9/ciVCeaVtQpQy8WJJTvXvkY0etl8Qlh2TmO2x2yr+i0Z6aMJi4IG/Yx5ghTQ=="],

-    "@ast-grep/cli-linux-arm64-gnu": ["@ast-grep/cli-linux-arm64-gnu@0.40.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-VXa2L1IEYD66AMb0GuG7VlMMbPmEGoJUySWDcwSZo/D9neiry3MJ41LQR5oTG2HyhIPBsf9umrXnmuRq66BviA=="],
+    "@ast-grep/cli-linux-arm64-gnu": ["@ast-grep/cli-linux-arm64-gnu@0.40.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-4p55gnTQ1mMFCyqjtM7bH9SB9r16mkwXtUcJQGX1YgFG4WD+QG8rC4GwSuNNZcdlYaOQuTWrgUEQ9z5K06UXfg=="],

-    "@ast-grep/cli-linux-x64-gnu": ["@ast-grep/cli-linux-x64-gnu@0.40.5", "", { "os": "linux", "cpu": "x64" }, "sha512-GQC5162eIOWXR2eQQ6Knzg7/8Trp5E1ODJkaErf0IubdQrZBGqj5AAcQPcWgPbbnmktjIp0H4NraPpOJ9eJ22A=="],
+    "@ast-grep/cli-linux-x64-gnu": ["@ast-grep/cli-linux-x64-gnu@0.40.0", "", { "os": "linux", "cpu": "x64" }, "sha512-u2MXFceuwvrO+OQ6zFGoJ6wbATXn46HWwW79j4UPrXYJzVl97jRyjJOIQTJOzTflsk02fjP98DQkfvbXt2dl3Q=="],

-    "@ast-grep/cli-win32-arm64-msvc": ["@ast-grep/cli-win32-arm64-msvc@0.40.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-YiZdnQZsSlXQTMsZJop/Ux9MmUGfuRvC2x/UbFgrt5OBSYxND+yoiMc0WcA3WG+wU+tt4ZkB5HUea3r/IkOLYA=="],
+    "@ast-grep/cli-win32-arm64-msvc": ["@ast-grep/cli-win32-arm64-msvc@0.40.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-E/I1xpF/RQL2fo1CQsQfTxyDLnChsbZ+ERrQHKuF1FI4WrkaPOBibpqda60QgVmUcgOGZyZ/GRb3iKEVWPsQNQ=="],

-    "@ast-grep/cli-win32-ia32-msvc": ["@ast-grep/cli-win32-ia32-msvc@0.40.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-MHkCxCITVTr8sY9CcVqNKbfUzMa3Hc6IilGXad0Clnw2vNmPfWqSky+hU/UTerr5YHWwWfAVURH7ANZgirtx0Q=="],
+    "@ast-grep/cli-win32-ia32-msvc": ["@ast-grep/cli-win32-ia32-msvc@0.40.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-9h12OQu1BR0GxHEtT+Z4QkJk3LLWLiKwjBkjXUGlASHYDPTyLcs85KwDLeFHs4BwarF8TDdF+KySvB9WPGl/nQ=="],

-    "@ast-grep/cli-win32-x64-msvc": ["@ast-grep/cli-win32-x64-msvc@0.40.5", "", { "os": "win32", "cpu": "x64" }, "sha512-/MJ5un7yxlClaaxou9eYl+Kr2xr/yTtYtTq5aLBWjPWA6dmmJ1nAJgx5zKHVuplFXFBrFDQk3paEgAETMTGcrA=="],
+    "@ast-grep/cli-win32-x64-msvc": ["@ast-grep/cli-win32-x64-msvc@0.40.0", "", { "os": "win32", "cpu": "x64" }, "sha512-n2+3WynEWFHhXg6KDgjwWQ0UEtIvqUITFbKEk5cDkUYrzYhg/A6kj0qauPwRbVMoJms49vtsNpLkzzqyunio5g=="],

-    "@ast-grep/napi": ["@ast-grep/napi@0.40.5", "", { "optionalDependencies": { "@ast-grep/napi-darwin-arm64": "0.40.5", "@ast-grep/napi-darwin-x64": "0.40.5", "@ast-grep/napi-linux-arm64-gnu": "0.40.5", "@ast-grep/napi-linux-arm64-musl": "0.40.5", "@ast-grep/napi-linux-x64-gnu": "0.40.5", "@ast-grep/napi-linux-x64-musl": "0.40.5", "@ast-grep/napi-win32-arm64-msvc": "0.40.5", "@ast-grep/napi-win32-ia32-msvc": "0.40.5", "@ast-grep/napi-win32-x64-msvc": "0.40.5" } }, "sha512-hJA62OeBKUQT68DD2gDyhOqJxZxycqg8wLxbqjgqSzYttCMSDL9tiAQ9abgekBYNHudbJosm9sWOEbmCDfpX2A=="],
+    "@ast-grep/napi": ["@ast-grep/napi@0.40.0", "", { "optionalDependencies": { "@ast-grep/napi-darwin-arm64": "0.40.0", "@ast-grep/napi-darwin-x64": "0.40.0", "@ast-grep/napi-linux-arm64-gnu": "0.40.0", "@ast-grep/napi-linux-arm64-musl": "0.40.0", "@ast-grep/napi-linux-x64-gnu": "0.40.0", "@ast-grep/napi-linux-x64-musl": "0.40.0", "@ast-grep/napi-win32-arm64-msvc": "0.40.0", "@ast-grep/napi-win32-ia32-msvc": "0.40.0", "@ast-grep/napi-win32-x64-msvc": "0.40.0" } }, "sha512-tq6nO/8KwUF/mHuk1ECaAOSOlz2OB/PmygnvprJzyAHGRVzdcffblaOOWe90M9sGz5MAasXoF+PTcayQj9TKKA=="],

-    "@ast-grep/napi-darwin-arm64": ["@ast-grep/napi-darwin-arm64@0.40.5", "", { "os": "darwin", "cpu": "arm64" }, "sha512-2F072fGN0WTq7KI3okuEnkGJVEHLbi56Bw1H6NAMf7j2mJJeQWsRyGOMcyNnUXZDeNdvoMH0OB2a5wwUegY/nQ=="],
+    "@ast-grep/napi-darwin-arm64": ["@ast-grep/napi-darwin-arm64@0.40.0", "", { "os": "darwin", "cpu": "arm64" }, "sha512-ZMjl5yLhKjxdwbqEEdMizgQdWH2NrWsM6Px+JuGErgCDe6Aedq9yurEPV7veybGdLVJQhOah6htlSflXxjHnYA=="],

-    "@ast-grep/napi-darwin-x64": ["@ast-grep/napi-darwin-x64@0.40.5", "", { "os": "darwin", "cpu": "x64" }, "sha512-dJMidHZhhxuLBYNi6/FKI812jQ7wcFPSKkVPwviez2D+KvYagapUMAV/4dJ7FCORfguVk8Y0jpPAlYmWRT5nvA=="],
+    "@ast-grep/napi-darwin-x64": ["@ast-grep/napi-darwin-x64@0.40.0", "", { "os": "darwin", "cpu": "x64" }, "sha512-f9Ol5oQKNRMBkvDtzBK1WiNn2/3eejF2Pn9xwTj7PhXuSFseedOspPYllxQo0gbwUlw/DJqGFTce/jarhR/rBw=="],

-    "@ast-grep/napi-linux-arm64-gnu": ["@ast-grep/napi-linux-arm64-gnu@0.40.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-nBRCbyoS87uqkaw4Oyfe5VO+SRm2B+0g0T8ME69Qry9ShMf41a2bTdpcQx9e8scZPogq+CTwDHo3THyBV71l9w=="],
+    "@ast-grep/napi-linux-arm64-gnu": ["@ast-grep/napi-linux-arm64-gnu@0.40.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-+tO+VW5GDhT9jGkKOK+3b8+ohKjC98WTzn7wSskd/myyhK3oYL1WTKqCm07WSYBZOJvb3z+WaX+wOUrc4bvtyQ=="],

-    "@ast-grep/napi-linux-arm64-musl": ["@ast-grep/napi-linux-arm64-musl@0.40.5", "", { "os": "linux", "cpu": "arm64" }, "sha512-/qKsmds5FMoaEj6FdNzepbmLMtlFuBLdrAn9GIWCqOIcVcYvM1Nka8+mncfeXB/MFZKOrzQsQdPTWqrrQzXLrA=="],
+    "@ast-grep/napi-linux-arm64-musl": ["@ast-grep/napi-linux-arm64-musl@0.40.0", "", { "os": "linux", "cpu": "arm64" }, "sha512-MS9qalLRjUnF2PCzuTKTvCMVSORYHxxe3Qa0+SSaVULsXRBmuy5C/b1FeWwMFnwNnC0uie3VDet31Zujwi8q6A=="],

-    "@ast-grep/napi-linux-x64-gnu": ["@ast-grep/napi-linux-x64-gnu@0.40.5", "", { "os": "linux", "cpu": "x64" }, "sha512-DP4oDbq7f/1A2hRTFLhJfDFR6aI5mRWdEfKfHzRItmlKsR9WlcEl1qDJs/zX9R2EEtIDsSKRzuJNfJllY3/W8Q=="],
+    "@ast-grep/napi-linux-x64-gnu": ["@ast-grep/napi-linux-x64-gnu@0.40.0", "", { "os": "linux", "cpu": "x64" }, "sha512-BeHZVMNXhM3WV3XE2yghO0fRxhMOt8BTN972p5piYEQUvKeSHmS8oeGcs6Ahgx5znBclqqqq37ZfioYANiTqJA=="],

-    "@ast-grep/napi-linux-x64-musl": ["@ast-grep/napi-linux-x64-musl@0.40.5", "", { "os": "linux", "cpu": "x64" }, "sha512-BRZUvVBPUNpWPo6Ns8chXVzxHPY+k9gpsubGTHy92Q26ecZULd/dTkWWdnvfhRqttsSQ9Pe/XQdi5+hDQ6RYcg=="],
+    "@ast-grep/napi-linux-x64-musl": ["@ast-grep/napi-linux-x64-musl@0.40.0", "", { "os": "linux", "cpu": "x64" }, "sha512-rG1YujF7O+lszX8fd5u6qkFTuv4FwHXjWvt1CCvCxXwQLSY96LaCW88oVKg7WoEYQh54y++Fk57F+Wh9Gv9nVQ=="],

-    "@ast-grep/napi-win32-arm64-msvc": ["@ast-grep/napi-win32-arm64-msvc@0.40.5", "", { "os": "win32", "cpu": "arm64" }, "sha512-y95zSEwc7vhxmcrcH0GnK4ZHEBQrmrszRBNQovzaciF9GUqEcCACNLoBesn4V47IaOp4fYgD2/EhGRTIBFb2Ug=="],
+    "@ast-grep/napi-win32-arm64-msvc": ["@ast-grep/napi-win32-arm64-msvc@0.40.0", "", { "os": "win32", "cpu": "arm64" }, "sha512-9SqmnQqd4zTEUk6yx0TuW2ycZZs2+e569O/R0QnhSiQNpgwiJCYOe/yPS0BC9HkiaozQm6jjAcasWpFtz/dp+w=="],

-    "@ast-grep/napi-win32-ia32-msvc": ["@ast-grep/napi-win32-ia32-msvc@0.40.5", "", { "os": "win32", "cpu": "ia32" }, "sha512-K/u8De62iUnFCzVUs7FBdTZ2Jrgc5/DLHqjpup66KxZ7GIM9/HGME/O8aSoPkpcAeCD4TiTZ11C1i5p5H98hTg=="],
+    "@ast-grep/napi-win32-ia32-msvc": ["@ast-grep/napi-win32-ia32-msvc@0.40.0", "", { "os": "win32", "cpu": "ia32" }, "sha512-0JkdBZi5l9vZhGEO38A1way0LmLRDU5Vos6MXrLIOVkymmzDTDlCdY394J1LMmmsfwWcyJg6J7Yv2dw41MCxDQ=="],

-    "@ast-grep/napi-win32-x64-msvc": ["@ast-grep/napi-win32-x64-msvc@0.40.5", "", { "os": "win32", "cpu": "x64" }, "sha512-dqm5zg/o4Nh4VOQPEpMS23ot8HVd22gG0eg01t4CFcZeuzyuSgBlOL3N7xLbz3iH2sVkk7keuBwAzOIpTqziNQ=="],
+    "@ast-grep/napi-win32-x64-msvc": ["@ast-grep/napi-win32-x64-msvc@0.40.0", "", { "os": "win32", "cpu": "x64" }, "sha512-Hk2IwfPqMFGZt5SRxsoWmGLxBXxprow4LRp1eG6V8EEiJCNHxZ9ZiEaIc5bNvMDBjHVSnqZAXT22dROhrcSKQg=="],

    "@clack/core": ["@clack/core@0.5.0", "", { "dependencies": { "picocolors": "^1.0.0", "sisteransi": "^1.0.5" } }, "sha512-p3y0FIOwaYRUPRcMO7+dlmLh8PSRcrjuTndsiA0WAFbWES0mLZlrjVoBRZ9DzkPFJZG6KGkJmoEAY0ZcVWTkow=="],

@@ -86,17 +86,17 @@

    "@code-yeongyu/comment-checker": ["@code-yeongyu/comment-checker@0.6.1", "", { "os": [ "linux", "win32", "darwin", ], "cpu": [ "x64", "arm64", ], "bin": { "comment-checker": "bin/comment-checker" } }, "sha512-BBremX+Y5aW8sTzlhHrLsKParupYkPOVUYmq9STrlWvBvfAme6w5IWuZCLl6nHIQScRDdvGdrAjPycJC86EZFA=="],

-    "@hono/node-server": ["@hono/node-server@1.19.9", "", { "peerDependencies": { "hono": "^4" } }, "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw=="],
+    "@hono/node-server": ["@hono/node-server@1.19.7", "", { "peerDependencies": { "hono": "^4" } }, "sha512-vUcD0uauS7EU2caukW8z5lJKtoGMokxNbJtBiwHgpqxEXokaHCBkQUmCHhjFB1VUTWdqj25QoMkMKzgjq+uhrw=="],

-    "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.25.3", "", { "dependencies": { "@hono/node-server": "^1.19.9", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.0.1", "express-rate-limit": "^7.5.0", "jose": "^6.1.1", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.0" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-vsAMBMERybvYgKbg/l4L1rhS7VXV1c0CtyJg72vwxONVX0l4ZfKVAnZEWTQixJGTzKnELjQ59e4NbdFDALRiAQ=="],
+    "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.25.1", "", { "dependencies": { "@hono/node-server": "^1.19.7", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.0.1", "express-rate-limit": "^7.5.0", "jose": "^6.1.1", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.0" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-yO28oVFFC7EBoiKdAn+VqRm+plcfv4v0xp6osG/VsCB0NlPZWi87ajbCZZ8f/RvOFLEu7//rSRmuZZ7lMoe3gQ=="],

-    "@opencode-ai/plugin": ["@opencode-ai/plugin@1.1.47", "", { "dependencies": { "@opencode-ai/sdk": "1.1.47", "zod": "4.1.8" } }, "sha512-gNMPz72altieDfLhUw3VAT1xbduKi3w3wZ57GLeS7qU9W474HdvdIiLBnt2Xq3U7Ko0/0tvK3nzCker6IIDqmQ=="],
+    "@opencode-ai/plugin": ["@opencode-ai/plugin@1.1.19", "", { "dependencies": { "@opencode-ai/sdk": "1.1.19", "zod": "4.1.8" } }, "sha512-Q6qBEjHb/dJMEw4BUqQxEswTMxCCHUpFMMb6jR8HTTs8X/28XRkKt5pHNPA82GU65IlSoPRph+zd8LReBDN53Q=="],

-    "@opencode-ai/sdk": ["@opencode-ai/sdk@1.1.47", "", {}, "sha512-s3PBHwk1sP6Zt/lJxIWSBWZ1TnrI1nFxSP97LCODUytouAQgbygZ1oDH7O2sGMBEuGdA8B1nNSPla0aRSN3IpA=="],
+    "@opencode-ai/sdk": ["@opencode-ai/sdk@1.1.19", "", {}, "sha512-XhZhFuvlLCqDpvNtUEjOsi/wvFj3YCXb1dySp+OONQRMuHlorNYnNa7P2A2ntKuhRdGT1Xt5na0nFzlUyNw+4A=="],

    "@types/js-yaml": ["@types/js-yaml@4.0.9", "", {}, "sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg=="],

-    "@types/node": ["@types/node@25.1.0", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-t7frlewr6+cbx+9Ohpl0NOTKXZNV9xHRmNOvql47BFJKcEG1CxtxlPEEe+gR9uhVWM4DwhnvTF110mIL4yP9RA=="],
+    "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="],

    "@types/picomatch": ["@types/picomatch@3.0.2", "", {}, "sha512-n0i8TD3UDB7paoMMxA3Y65vUncFJXjcUf7lQY7YyKGl6031FNjfsLs6pdLFCy2GNFxItPJG8GvvpbZc2skH7WA=="],

@@ -108,7 +108,7 @@

    "argparse": ["argparse@2.0.1", "", {}, "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="],

-    "body-parser": ["body-parser@2.2.2", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.1", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA=="],
+    "body-parser": ["body-parser@2.2.1", "", { "dependencies": { "bytes": "^3.1.2", "content-type": "^1.0.5", "debug": "^4.4.3", "http-errors": "^2.0.0", "iconv-lite": "^0.7.0", "on-finished": "^2.4.1", "qs": "^6.14.0", "raw-body": "^3.0.1", "type-is": "^2.0.1" } }, "sha512-nfDwkulwiZYQIGwxdy0RUmowMhKcFVcYXUU7m4QlKYim1rUtg83xm2yjZ40QjDuc291AJjjeSc9b++AWHSgSHw=="],

    "bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="],

@@ -118,7 +118,7 @@

    "call-bound": ["call-bound@1.0.4", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "get-intrinsic": "^1.3.0" } }, "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg=="],

-    "commander": ["commander@14.0.3", "", {}, "sha512-H+y0Jo/T1RZ9qPP4Eh1pkcQcLRglraJaSLoyOtHxu6AapkjWVCy2Sit1QQ4x3Dng8qDlSsZEet7g5Pq06MvTgw=="],
+    "commander": ["commander@14.0.2", "", {}, "sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ=="],

    "content-disposition": ["content-disposition@1.0.1", "", {}, "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q=="],

@@ -128,7 +128,7 @@

    "cookie-signature": ["cookie-signature@1.2.2", "", {}, "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg=="],

-    "cors": ["cors@2.8.6", "", { "dependencies": { "object-assign": "^4", "vary": "^1" } }, "sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw=="],
+    "cors": ["cors@2.8.5", "", { "dependencies": { "object-assign": "^4", "vary": "^1" } }, "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g=="],

    "cross-spawn": ["cross-spawn@7.0.6", "", { "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", "which": "^2.0.1" } }, "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA=="],

@@ -184,11 +184,11 @@

    "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="],

-    "hono": ["hono@4.11.7", "", {}, "sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw=="],
+    "hono": ["hono@4.10.8", "", {}, "sha512-DDT0A0r6wzhe8zCGoYOmMeuGu3dyTAE40HHjwUsWFTEy5WxK1x2WDSsBPlEXgPbRIFY6miDualuUDbasPogIww=="],

    "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="],

-    "iconv-lite": ["iconv-lite@0.7.2", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw=="],
+    "iconv-lite": ["iconv-lite@0.7.1", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-2Tth85cXwGFHfvRgZWszZSvdo+0Xsqmw8k8ZwxScfcBneNUraK+dxRxRm24nszx80Y0TVio8kKLt5sLE7ZCLlw=="],

    "inherits": ["inherits@2.0.4", "", {}, "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="],

@@ -226,19 +226,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.2.2", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-KyfoWcANfcvpfanrrX+Wc8vH8vr9mvr7dJMHBe2bkvuhdtHnLHOG18hQwLg6jk4HhdoZAeBEmkolOsK2k4XajA=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-R+o42Km6bsIaW6D3I8uu2HCF3BjIWqa/fg38W5y4hJEOw4mL0Q7uV4R+0vtrXRHo9crXTK9ag0fqVQUm+Y6iAQ=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.2.2", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ajZ1E36Ixwdz6rvSUKUI08M2xOaNIl1ZsdVjknZTrPRtct9xgS+BEFCoSCov9bnV/9DrZD3mlZtO/+FFDbseUg=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7VTbpR1vH3OEkoJxBKtYuxFPX8M3IbJKoeHWME9iK6FpT11W1ASsjyuhvzB1jcxSeqF8ddMnjitlG5ub6h5EVw=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.2.2", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ItJsYfigXcOa8/ejTjopC4qk5BCeYioMQ693kPTpeYHK3ByugTjJk8aamE7bHlVnmrdgWldz91QFzaP82yOAdg=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-BZ/r/CFlvbOxkdZZrRoT16xFOjibRZHuwQnaE4f0JvOzgK6/HWp3zJI1+2/aX/oK5GA6lZxNWRrJC/SKUi8LEg=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.2.2", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-/TvjYe/Kb//ZSHnJzgRj0QPKpS5Y2nermVTSaMTGS2btObXQyQWzuphDhsVRu60SVrNLbflHzfuTdqb3avDjyA=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-U90Wruf21h+CJbtcrS7MeTAc/5VOF6RI+5jr7qj/cCxjXNJtjhyJdz/maehArjtgf304+lYCM/Mh1i+G2D3YFQ=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.2.2", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Ka5j+tjuQkNnpESVzcTzW5tZMlBhOfP9F12+UaR72cIcwFpSoLMBp84rV6R0vXM0zUcrrN7mPeW66DvQ6A0XQQ=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-sYzohSNdwsAhivbXcbhPdF1qqQi2CCI7FSgbmvvfBOMyZ8HAgqOFqYW2r3GPdmtywzkjOTvCzTG56FZwEjx15w=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.2.2", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ISl0sTNShKCgPFO+rsDqEDsvVHQAMfOSAxO0KuWbHFKaH+KaRV4d3N/ihgxZ2M94CZjJLzZEuln+6kLZ93cvzQ=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aG5pZ4eWS0YSGUicOnjMkUPrIqQV4poYF+d9SIvrfvlaMcK6WlQn7jXzgNCwJsfGn5lyhSmjshZBEU+v79Ua3w=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.2.2", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-KeiJLQvJuZ+UYf/+eMsQXvCiHDRPk6tD15lL+qruLvU19va62JqMNvTuOv97732uF19iG0ZMiiVhqIMbSyVPqQ=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-FGH7cnzBqNwjSkzCDglMsVttaq+MsykAxa7ehaFK+0dnBZArvllS3W13a3dGaANHMZzfK0vz8hNDUdVi7Z63cA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

@@ -310,10 +310,8 @@

    "wrappy": ["wrappy@1.0.2", "", {}, "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="],

-    "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
+    "zod": ["zod@4.1.8", "", {}, "sha512-5R1P+WwQqmmMIEACyzSvo4JXHY5WiAFHRMg+zBZKgKS+Q1viRa0C1hmUKtHltoIFKtIdki3pRxkmpP74jnNYHQ=="],

    "zod-to-json-schema": ["zod-to-json-schema@3.25.1", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA=="],
-
-    "@opencode-ai/plugin/zod": ["zod@4.1.8", "", {}, "sha512-5R1P+WwQqmmMIEACyzSvo4JXHY5WiAFHRMg+zBZKgKS+Q1viRa0C1hmUKtHltoIFKtIdki3pRxkmpP74jnNYHQ=="],
  }
 }
--- a/docs/category-skill-guide.md
+++ b/docs/category-skill-guide.md
@@ -9,7 +9,7 @@ Instead of delegating everything to a single AI agent, it's far more efficient t
 - **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset)
 - **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows)

-By combining these two concepts, you can generate optimal agents through `delegate_task`.
+By combining these two concepts, you can generate optimal agents through `task`.

 ---

@@ -22,20 +22,20 @@ A Category is an agent configuration preset optimized for specific domains.
 | Category | Default Model | Use Cases |
 |----------|---------------|-----------|
 | `visual-engineering` | `google/gemini-3-pro` | Frontend, UI/UX, design, styling, animation |
-| `ultrabrain` | `openai/gpt-5.2-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
-| `deep` | `openai/gpt-5.2-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
+| `ultrabrain` | `openai/gpt-5.3-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
+| `deep` | `openai/gpt-5.3-codex` (medium) | Goal-oriented autonomous problem-solving. Thorough research before action. For hairy problems requiring deep understanding. |
 | `artistry` | `google/gemini-3-pro` (max) | Highly creative/artistic tasks, novel ideas |
 | `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
 | `unspecified-low` | `anthropic/claude-sonnet-4-5` | Tasks that don't fit other categories, low effort required |
-| `unspecified-high` | `anthropic/claude-opus-4-5` (max) | Tasks that don't fit other categories, high effort required |
+| `unspecified-high` | `anthropic/claude-opus-4-6` (max) | Tasks that don't fit other categories, high effort required |
 | `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |

 ### Usage

-Specify the `category` parameter when invoking the `delegate_task` tool.
+Specify the `category` parameter when invoking the `task` tool.

 ```typescript
-delegate_task(
+task(
  category="visual-engineering",
  prompt="Add a responsive chart component to the dashboard page"
 )
@@ -74,7 +74,7 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
 Add desired skill names to the `load_skills` array.

 ```typescript
-delegate_task(
+task(
  category="quick",
  load_skills=["git-master"],
  prompt="Commit current changes. Follow commit message style."
@@ -126,7 +126,7 @@ You can create powerful specialized agents by combining Categories and Skills.

 ---

-## 5. delegate_task Prompt Guide
+## 5. task Prompt Guide

 When delegating, **clear and specific** prompts are essential. Include these 7 elements:

@@ -158,8 +158,8 @@ You can fine-tune categories in `oh-my-opencode.json`.

 | Field | Type | Description |
 |-------|------|-------------|
-| `description` | string | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
-| `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-5`) |
+| `description` | string | Human-readable description of the category's purpose. Shown in task prompt. |
+| `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) |
 | `variant` | string | Model variant (e.g., `max`, `xhigh`) |
 | `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. |
 | `top_p` | number | Nucleus sampling parameter (0.0 ~ 1.0) |
@@ -191,7 +191,7 @@ You can fine-tune categories in `oh-my-opencode.json`.

    // 3. Configure thinking model and restrict tools
    "deep-reasoning": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "thinking": {
        "type": "enabled",
        "budgetTokens": 32000
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -25,7 +25,7 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
    "explore": { "model": "opencode/gpt-5-nano" }        // Free model for grep
  },
  
-  // Override category models (used by delegate_task)
+  // Override category models (used by task)
  "categories": {
    "quick": { "model": "opencode/gpt-5-nano" },         // Fast/cheap for trivial tasks
    "visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
@@ -252,7 +252,7 @@ Available agents: `sisyphus`, `prometheus`, `oracle`, `librarian`, `explore`, `m
 Oh My OpenCode includes built-in skills that provide additional capabilities:

 - **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.
+- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `task(category='quick', load_skills=['git-master'], ...)` to save context.

 Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:

@@ -455,7 +455,7 @@ Run background subagents in separate tmux panes for **visual multi-agent executi
 ### How It Works

 When `tmux.enabled` is `true` and you're inside a tmux session:
- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes
+- Background agents (via `task(run_in_background=true)`) spawn in new tmux panes
 - Each pane shows the subagent's real-time output
 - Panes are automatically closed when the subagent completes
 - Layout is automatically adjusted based on your configuration
@@ -693,7 +693,7 @@ Configure concurrency limits for background agent tasks. This controls how many
      "google": 10
    },
    "modelConcurrency": {
-      "anthropic/claude-opus-4-5": 2,
+      "anthropic/claude-opus-4-6": 2,
      "google/gemini-3-flash": 10
    }
  }
@@ -705,7 +705,7 @@ Configure concurrency limits for background agent tasks. This controls how many
 | `defaultConcurrency`  | -       | Default maximum concurrent background tasks for all providers/models                                                    |
 | `staleTimeoutMs`      | `180000` | Stale timeout in milliseconds - interrupt tasks with no activity for this duration (minimum: 60000 = 1 minute)             |
 | `providerConcurrency` | -       | Per-provider concurrency limits. Keys are provider names (e.g., `anthropic`, `openai`, `google`)                        |
-| `modelConcurrency`    | -       | Per-model concurrency limits. Keys are full model names (e.g., `anthropic/claude-opus-4-5`). Overrides provider limits. |
+| `modelConcurrency`    | -       | Per-model concurrency limits. Keys are full model names (e.g., `anthropic/claude-opus-4-6`). Overrides provider limits. |

 **Priority Order**: `modelConcurrency` > `providerConcurrency` > `defaultConcurrency`

@@ -716,7 +716,7 @@ Configure concurrency limits for background agent tasks. This controls how many

 ## Categories

-Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
+Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.

 ### Built-in Categories

@@ -725,11 +725,11 @@ All 7 categories come with optimal model defaults, but **you must configure them
 | Category             | Built-in Default Model             | Description                                                          |
 | -------------------- | ---------------------------------- | -------------------------------------------------------------------- |
 | `visual-engineering` | `google/gemini-3-pro-preview`      | Frontend, UI/UX, design, styling, animation                          |
-| `ultrabrain`         | `openai/gpt-5.2-codex` (xhigh)     | Deep logical reasoning, complex architecture decisions               |
+| `ultrabrain`         | `openai/gpt-5.3-codex` (xhigh)     | Deep logical reasoning, complex architecture decisions               |
 | `artistry`           | `google/gemini-3-pro-preview` (max)| Highly creative/artistic tasks, novel ideas                          |
 | `quick`              | `anthropic/claude-haiku-4-5`       | Trivial tasks - single file changes, typo fixes, simple modifications|
 | `unspecified-low`    | `anthropic/claude-sonnet-4-5`      | Tasks that don't fit other categories, low effort required           |
-| `unspecified-high`   | `anthropic/claude-opus-4-5` (max)  | Tasks that don't fit other categories, high effort required          |
+| `unspecified-high`   | `anthropic/claude-opus-4-6` (max)  | Tasks that don't fit other categories, high effort required          |
 | `writing`            | `google/gemini-3-flash-preview`    | Documentation, prose, technical writing                              |

 ### ⚠️ Critical: Model Resolution Priority
@@ -768,7 +768,7 @@ All 7 categories come with optimal model defaults, but **you must configure them
      "model": "google/gemini-3-pro-preview"
    },
    "ultrabrain": { 
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh"
    },
    "artistry": { 
@@ -782,7 +782,7 @@ All 7 categories come with optimal model defaults, but **you must configure them
      "model": "anthropic/claude-sonnet-4-5"
    },
    "unspecified-high": { 
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max"
    },
    "writing": { 
@@ -797,12 +797,12 @@ All 7 categories come with optimal model defaults, but **you must configure them
 ### Usage

 ```javascript
-// Via delegate_task tool
-delegate_task(category="visual-engineering", prompt="Create a responsive dashboard component")
-delegate_task(category="ultrabrain", prompt="Design the payment processing flow")
+// Via task tool
+task(category="visual-engineering", prompt="Create a responsive dashboard component")
+task(category="ultrabrain", prompt="Design the payment processing flow")

 // Or target a specific agent directly (bypasses categories)
-delegate_task(agent="oracle", prompt="Review this architecture")
+task(agent="oracle", prompt="Review this architecture")
 ```

 ### Custom Categories
@@ -831,7 +831,7 @@ Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`

 | Option             | Type    | Default | Description                                                                                         |
 | ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
-| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in delegate_task prompt.                     |
+| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in task prompt.                     |
 | `is_unstable_agent`| boolean | `false`  | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |

 ## Model Resolution System
@@ -870,9 +870,9 @@ At runtime, Oh My OpenCode uses a 3-step resolution process to determine which m
 │   │ anthropic → github-copilot → opencode → antigravity     │   │
 │   │     │            │              │            │          │   │
 │   │     ▼            ▼              ▼            ▼          │   │
-│   │ Try: anthropic/claude-opus-4-5                          │   │
-│   │ Try: github-copilot/claude-opus-4-5                     │   │
-│   │ Try: opencode/claude-opus-4-5                           │   │
+│   │ Try: anthropic/claude-opus-4-6                          │   │
+│   │ Try: github-copilot/claude-opus-4-6                     │   │
+│   │ Try: opencode/claude-opus-4-6                           │   │
 │   │ ...                                                     │   │
 │   │                                                         │   │
 │   │ Found in available models? → Return matched model       │   │
@@ -894,13 +894,13 @@ Each agent has a defined provider priority chain. The system tries providers in

 | Agent | Model (no prefix) | Provider Priority Chain |
 |-------|-------------------|-------------------------|
-| **Sisyphus** | `claude-opus-4-5` | anthropic → kimi-for-coding → zai-coding-plan → openai → google |
+| **Sisyphus** | `claude-opus-4-6` | anthropic → kimi-for-coding → zai-coding-plan → openai → google |
 | **oracle** | `gpt-5.2` | openai → google → anthropic |
 | **librarian** | `glm-4.7` | zai-coding-plan → opencode → anthropic |
 | **explore** | `claude-haiku-4-5` | anthropic → github-copilot → opencode |
 | **multimodal-looker** | `gemini-3-flash` | google → openai → zai-coding-plan → kimi-for-coding → anthropic → opencode |
-| **Prometheus (Planner)** | `claude-opus-4-5` | anthropic → kimi-for-coding → openai → google |
-| **Metis (Plan Consultant)** | `claude-opus-4-5` | anthropic → kimi-for-coding → openai → google |
+| **Prometheus (Planner)** | `claude-opus-4-6` | anthropic → kimi-for-coding → openai → google |
+| **Metis (Plan Consultant)** | `claude-opus-4-6` | anthropic → kimi-for-coding → openai → google |
 | **Momus (Plan Reviewer)** | `gpt-5.2` | openai → anthropic → google |
 | **Atlas** | `claude-sonnet-4-5` | anthropic → kimi-for-coding → openai → google |

@@ -911,12 +911,12 @@ Categories follow the same resolution logic:
 | Category | Model (no prefix) | Provider Priority Chain |
 |----------|-------------------|-------------------------|
 | **visual-engineering** | `gemini-3-pro` | google → anthropic → zai-coding-plan |
-| **ultrabrain** | `gpt-5.2-codex` | openai → google → anthropic |
-| **deep** | `gpt-5.2-codex` | openai → anthropic → google |
+| **ultrabrain** | `gpt-5.3-codex` | openai → google → anthropic |
+| **deep** | `gpt-5.3-codex` | openai → anthropic → google |
 | **artistry** | `gemini-3-pro` | google → anthropic → openai |
 | **quick** | `claude-haiku-4-5` | anthropic → google → opencode |
 | **unspecified-low** | `claude-sonnet-4-5` | anthropic → openai → google |
-| **unspecified-high** | `claude-opus-4-5` | anthropic → openai → google |
+| **unspecified-high** | `claude-opus-4-6` | anthropic → openai → google |
 | **writing** | `gemini-3-flash` | google → anthropic → zai-coding-plan → openai |

 ### Checking Your Configuration
@@ -949,7 +949,7 @@ Override any agent or category model in `oh-my-opencode.json`:
  },
  "categories": {
    "visual-engineering": {
-      "model": "anthropic/claude-opus-4-5"
+      "model": "anthropic/claude-opus-4-6"
    }
  }
 }
--- a/docs/features.md
+++ b/docs/features.md
@@ -10,8 +10,8 @@ Oh-My-OpenCode provides 11 specialized AI agents. Each has distinct expertise, o

 | Agent | Model | Purpose |
 |-------|-------|---------|
-| **Sisyphus** | `anthropic/claude-opus-4-5` | **The default orchestrator.** Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Todo-driven workflow with extended thinking (32k budget). Fallback: kimi-k2.5 → glm-4.7 → gpt-5.2-codex → gemini-3-pro. |
-| **Hephaestus** | `openai/gpt-5.2-codex` | **The Legitimate Craftsman.** Autonomous deep worker inspired by AmpCode's deep mode. Goal-oriented execution with thorough research before action. Explores codebase patterns, completes tasks end-to-end without premature stopping. Named after the Greek god of forge and craftsmanship. Requires gpt-5.2-codex (no fallback - only activates when this model is available). |
+| **Sisyphus** | `anthropic/claude-opus-4-6` | **The default orchestrator.** Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Todo-driven workflow with extended thinking (32k budget). Fallback: kimi-k2.5 → glm-4.7 → gpt-5.3-codex → gemini-3-pro. |
+| **Hephaestus** | `openai/gpt-5.3-codex` | **The Legitimate Craftsman.** Autonomous deep worker inspired by AmpCode's deep mode. Goal-oriented execution with thorough research before action. Explores codebase patterns, completes tasks end-to-end without premature stopping. Named after the Greek god of forge and craftsmanship. Requires gpt-5.3-codex (no fallback - only activates when this model is available). |
 | **oracle** | `openai/gpt-5.2` | Architecture decisions, code review, debugging. Read-only consultation - stellar logical reasoning and deep analysis. Inspired by AmpCode. |
 | **librarian** | `zai-coding-plan/glm-4.7` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Fallback: glm-4.7-free → claude-sonnet-4-5. |
 | **explore** | `anthropic/claude-haiku-4-5` | Fast codebase exploration and contextual grep. Fallback: gpt-5-mini → gpt-5-nano. |
@@ -21,9 +21,9 @@ Oh-My-OpenCode provides 11 specialized AI agents. Each has distinct expertise, o

 | Agent | Model | Purpose |
 |-------|-------|---------|
-| **Prometheus** | `anthropic/claude-opus-4-5` | Strategic planner with interview mode. Creates detailed work plans through iterative questioning. Fallback: kimi-k2.5 → gpt-5.2 → gemini-3-pro. |
-| **Metis** | `anthropic/claude-opus-4-5` | Plan consultant - pre-planning analysis. Identifies hidden intentions, ambiguities, and AI failure points. Fallback: kimi-k2.5 → gpt-5.2 → gemini-3-pro. |
-| **Momus** | `openai/gpt-5.2` | Plan reviewer - validates plans against clarity, verifiability, and completeness standards. Fallback: gpt-5.2 → claude-opus-4-5 → gemini-3-pro. |
+| **Prometheus** | `anthropic/claude-opus-4-6` | Strategic planner with interview mode. Creates detailed work plans through iterative questioning. Fallback: kimi-k2.5 → gpt-5.2 → gemini-3-pro. |
+| **Metis** | `anthropic/claude-opus-4-6` | Plan consultant - pre-planning analysis. Identifies hidden intentions, ambiguities, and AI failure points. Fallback: kimi-k2.5 → gpt-5.2 → gemini-3-pro. |
+| **Momus** | `openai/gpt-5.2` | Plan reviewer - validates plans against clarity, verifiability, and completeness standards. Fallback: gpt-5.2 → claude-opus-4-6 → gemini-3-pro. |

 ### Invoking Agents

@@ -54,7 +54,7 @@ Run agents in the background and continue working:

 ```
 # Launch in background
-delegate_task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)

 # Continue working...
 # System notifies on completion
@@ -374,7 +374,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
 | Hook | Event | Description |
 |------|-------|-------------|
 | **task-resume-info** | PostToolUse | Provides task resume information for continuity. |
-| **delegate-task-retry** | PostToolUse | Retries failed delegate_task calls. |
+| **delegate-task-retry** | PostToolUse | Retries failed task calls. |

 #### Integration

@@ -454,7 +454,7 @@ Disable specific hooks in config:
 | Tool | Description |
 |------|-------------|
 | **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. |
-| **delegate_task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
+| **task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
 | **background_output** | Retrieve background task results |
 | **background_cancel** | Cancel running background tasks |

--- a/docs/guide/installation.md
+++ b/docs/guide/installation.md
@@ -196,7 +196,7 @@ When GitHub Copilot is the best available provider, oh-my-opencode uses these mo

 | Agent         | Model                            |
 | ------------- | -------------------------------- |
-| **Sisyphus**  | `github-copilot/claude-opus-4.5` |
+| **Sisyphus**  | `github-copilot/claude-opus-4.6` |
 | **Oracle**    | `github-copilot/gpt-5.2`         |
 | **Explore**   | `opencode/gpt-5-nano`              |
 | **Librarian** | `zai-coding-plan/glm-4.7` (if Z.ai available) or fallback |
@@ -218,13 +218,13 @@ If Z.ai is the only provider available, all agents will use GLM models:

 #### OpenCode Zen

-OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/gpt-5-nano`, and `opencode/glm-4.7-free`.
+OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-6`, `opencode/gpt-5.2`, `opencode/gpt-5-nano`, and `opencode/glm-4.7-free`.

 When OpenCode Zen is the best available provider (no native or Copilot), these models are used:

 | Agent         | Model                            |
 | ------------- | -------------------------------- |
-| **Sisyphus**  | `opencode/claude-opus-4-5`       |
+| **Sisyphus**  | `opencode/claude-opus-4-6`       |
 | **Oracle**    | `opencode/gpt-5.2`               |
 | **Explore**   | `opencode/gpt-5-nano`             |
 | **Librarian** | `opencode/glm-4.7-free`          |
--- a/docs/guide/understanding-orchestration-system.md
+++ b/docs/guide/understanding-orchestration-system.md
@@ -50,11 +50,11 @@ flowchart TB
    User -->|"/start-work"| Orchestrator
    Plan -->|"Read"| Orchestrator
    
-    Orchestrator -->|"delegate_task(category)"| Junior
-    Orchestrator -->|"delegate_task(agent)"| Oracle
-    Orchestrator -->|"delegate_task(agent)"| Explore
-    Orchestrator -->|"delegate_task(agent)"| Librarian
-    Orchestrator -->|"delegate_task(agent)"| Frontend
+    Orchestrator -->|"task(category)"| Junior
+    Orchestrator -->|"task(agent)"| Oracle
+    Orchestrator -->|"task(agent)"| Explore
+    Orchestrator -->|"task(agent)"| Librarian
+    Orchestrator -->|"task(agent)"| Frontend
    
    Junior -->|"Results + Learnings"| Orchestrator
    Oracle -->|"Advice"| Orchestrator
@@ -220,9 +220,9 @@ Independent tasks run in parallel:
 ```typescript
 // Orchestrator identifies parallelizable groups from plan
 // Group A: Tasks 2, 3, 4 (no file conflicts)
-delegate_task(category="ultrabrain", prompt="Task 2...")
-delegate_task(category="visual-engineering", prompt="Task 3...")
-delegate_task(category="general", prompt="Task 4...")
+task(category="ultrabrain", prompt="Task 2...")
+task(category="visual-engineering", prompt="Task 3...")
+task(category="general", prompt="Task 4...")
 // All run simultaneously
 ```

@@ -234,7 +234,7 @@ delegate_task(category="general", prompt="Task 4...")

 Junior is the **workhorse** that actually writes code. Key characteristics:

- **Focused**: Cannot delegate (blocked from task/delegate_task tools)
+- **Focused**: Cannot delegate (blocked from task tool)
 - **Disciplined**: Obsessive todo tracking
 - **Verified**: Must pass lsp_diagnostics before completion
 - **Constrained**: Cannot modify plan files (READ-ONLY)
@@ -268,7 +268,7 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.

 ---

-## The delegate_task Tool: Category + Skill System
+## The task Tool: Category + Skill System

 ### Why Categories are Revolutionary

@@ -276,17 +276,17 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.

 ```typescript
 // OLD: Model name creates distributional bias
-delegate_task(agent="gpt-5.2", prompt="...")  // Model knows its limitations
-delegate_task(agent="claude-opus-4.5", prompt="...")  // Different self-perception
+task(agent="gpt-5.2", prompt="...")  // Model knows its limitations
+task(agent="claude-opus-4.6", prompt="...")  // Different self-perception
 ```

 **The Solution: Semantic Categories:**

 ```typescript
 // NEW: Category describes INTENT, not implementation
-delegate_task(category="ultrabrain", prompt="...")     // "Think strategically"
-delegate_task(category="visual-engineering", prompt="...")  // "Design beautifully"
-delegate_task(category="quick", prompt="...")          // "Just get it done fast"
+task(category="ultrabrain", prompt="...")     // "Think strategically"
+task(category="visual-engineering", prompt="...")  // "Design beautifully"
+task(category="quick", prompt="...")          // "Just get it done fast"
 ```

 ### Built-in Categories
@@ -324,13 +324,13 @@ Skills prepend specialized instructions to subagent prompts:

 ```typescript
 // Category + Skill combination
-delegate_task(
+task(
  category="visual-engineering", 
  load_skills=["frontend-ui-ux"],  // Adds UI/UX expertise
  prompt="..."
 )

-delegate_task(
+task(
  category="general",
  load_skills=["playwright"],  // Adds browser automation expertise
  prompt="..."
@@ -365,7 +365,7 @@ sequenceDiagram
        
        Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
        
-        Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
+        Orchestrator->>Junior: task(category, load_skills, prompt)
        
        Junior->>Junior: Create todos, execute
        Junior->>Junior: Verify (lsp_diagnostics, tests)
--- a/docs/orchestration-guide.md
+++ b/docs/orchestration-guide.md
@@ -275,7 +275,7 @@ flowchart TD

 ### 🔮 Prometheus (The Planner)

- **Model**: `anthropic/claude-opus-4-5`
+- **Model**: `anthropic/claude-opus-4-6`
 - **Role**: Strategic planning, requirements interviews, work plan creation
 - **Constraint**: **READ-ONLY**. Can only create/modify markdown files within `.sisyphus/` directory.
 - **Characteristic**: Never writes code directly, focuses solely on "how to do it".
@@ -387,7 +387,7 @@ You can control related features in `oh-my-opencode.json`.

 2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation.

-3. **Active Delegation**: During execution, delegate to specialized agents via `delegate_task` rather than modifying code directly.
+3. **Active Delegation**: During execution, delegate to specialized agents via `task` rather than modifying code directly.

 4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json.

--- a/issue-1501-analysis.md
+++ b/issue-1501-analysis.md
@@ -0,0 +1,357 @@
+# Issue #1501 분석 보고서: ULW Mode PLAN AGENT 무한루프
+
+## 📋 이슈 요약
+
+**증상:**
+- ULW (ultrawork) mode에서 PLAN AGENT가 무한루프에 빠짐
+- 분석/탐색 완료 후 plan만 계속 생성
+- 1분마다 매우 작은 토큰으로 요청 발생
+
+**예상 동작:**
+- 탐색 완료 후 solution document 생성
+
+---
+
+## 🔍 근본 원인 분석
+
+### 파일: `src/tools/delegate-task/constants.ts`
+
+#### 문제의 핵심
+
+`PLAN_AGENT_SYSTEM_PREPEND` (constants.ts 234-269행)에 구조적 결함이 있었습니다:
+
+1. **Interactive Mode 가정**
+   ```
+   2. After gathering context, ALWAYS present:
+      - Uncertainties: List of unclear points
+      - Clarifying Questions: Specific questions to resolve uncertainties
+   
+   3. ITERATE until ALL requirements are crystal clear:
+      - Do NOT proceed to planning until you have 100% clarity
+      - Ask the user to confirm your understanding
+   ```
+
+2. **종료 조건 없음**
+   - "100% clarity" 요구는 객관적 측정 불가능
+   - 사용자 확인 요청은 ULW mode에서 불가능
+   - 무한루프로 이어짐
+
+3. **ULW Mode 미감지**
+   - Subagent로 실행되는 경우를 구분하지 않음
+   - 항상 interactive mode로 동작 시도
+
+### 왜 무한루프가 발생했는가?
+
+```
+ULW Mode 시작
+  → Sisyphus가 Plan Agent 호출 (subagent)
+    → Plan Agent: "100% clarity 필요"
+      → Clarifying questions 생성
+        → 사용자 없음 (subagent)
+          → 다시 plan 생성 시도
+            → "여전히 unclear"
+              → 무한루프 반복
+```
+
+**핵심:** Plan Agent는 사용자와 대화하도록 설계되었지만, ULW mode에서는 사용자가 없는 subagent로 실행됨.
+
+---
+
+## ✅ 적용된 수정 방안
+
+### 수정 내용 (constants.ts)
+
+#### 1. SUBAGENT MODE DETECTION 섹션 추가
+
+```typescript
+SUBAGENT MODE DETECTION (CRITICAL):
+If you received a detailed prompt with gathered context from a parent orchestrator (e.g., Sisyphus):
+- You are running as a SUBAGENT
+- You CANNOT directly interact with the user
+- DO NOT ask clarifying questions - proceed with available information
+- Make reasonable assumptions for minor ambiguities
+- Generate the plan based on the provided context
+```
+
+#### 2. Context Gathering Protocol 수정
+
+```diff
+- 1. Launch background agents to gather context:
+ 1. Launch background agents to gather context (ONLY if not already provided):
+```
+
+**효과:** 이미 Sisyphus가 context를 수집한 경우 중복 방지
+
+#### 3. Clarifying Questions → Assumptions
+
+```diff
+- 2. After gathering context, ALWAYS present:
+-    - Uncertainties: List of unclear points
+-    - Clarifying Questions: Specific questions
+ 2. After gathering context, assess clarity:
+    - User Request Summary: Concise restatement
+    - Assumptions Made: List any assumptions for unclear points
+```
+
+**효과:** 질문 대신 가정 사항 문서화
+
+#### 4. 무한루프 방지 - 명확한 종료 조건
+
+```diff
+- 3. ITERATE until ALL requirements are crystal clear:
+-    - Do NOT proceed to planning until you have 100% clarity
+-    - Ask the user to confirm your understanding
+-    - Resolve every ambiguity before generating the work plan
+ 3. PROCEED TO PLAN GENERATION when:
+    - Core objective is understood (even if some details are ambiguous)
+    - You have gathered context via explore/librarian (or context was provided)
+    - You can make reasonable assumptions for remaining ambiguities
+    
+    DO NOT loop indefinitely waiting for perfect clarity.
+    DOCUMENT assumptions in the plan so they can be validated during execution.
+```
+
+**효과:**
+- "100% clarity" 요구 제거
+- 객관적인 진입 조건 제공
+- 무한루프 명시적 금지
+- Assumptions를 plan에 문서화하여 실행 중 검증 가능
+
+#### 5. 철학 변경
+
+```diff
+- REMEMBER: Vague requirements lead to failed implementations.
+ REMEMBER: A plan with documented assumptions is better than no plan.
+```
+
+**효과:** Perfectionism → Pragmatism
+
+---
+
+## 🎯 해결 메커니즘
+
+### Before (무한루프)
+
+```
+Plan Agent 시작
+  ↓
+Context gathering
+  ↓
+Requirements 명확한가?
+  ↓ NO
+Clarifying questions 생성
+  ↓
+사용자 응답 대기 (없음)
+  ↓
+다시 plan 시도
+  ↓
+(무한 반복)
+```
+
+### After (정상 종료)
+
+```
+Plan Agent 시작
+  ↓
+Subagent mode 감지?
+  ↓ YES
+Context 이미 있음? → YES
+  ↓
+Core objective 이해? → YES
+  ↓
+Reasonable assumptions 가능? → YES
+  ↓
+Plan 생성 (assumptions 문서화)
+  ↓
+완료 ✓
+```
+
+---
+
+## 📊 영향 분석
+
+### 해결되는 문제
+
+1. **ULW mode 무한루프** ✓
+2. **Sisyphus에서 Plan Agent 호출 시 블로킹** ✓
+3. **작은 토큰 반복 요청** ✓
+4. **1분마다 재시도** ✓
+
+### 부작용 없음
+
+- Interactive mode (사용자와 직접 대화)는 여전히 작동
+- Subagent mode일 때만 다르게 동작
+- Backward compatibility 유지
+
+### 추가 개선사항
+
+- Assumptions를 plan에 명시적으로 문서화
+- Execution 중 validation 가능
+- 더 pragmatic한 workflow
+
+---
+
+## 🧪 검증 방법
+
+### 테스트 시나리오
+
+1. **ULW mode에서 Plan Agent 호출**
+   ```bash
+   oh-my-opencode run "Complex task requiring planning. ulw"
+   ```
+   - 예상: Plan 생성 후 정상 종료
+   - 확인: 무한루프 없음
+
+2. **Interactive mode (변경 없어야 함)**
+   ```bash
+   oh-my-opencode run --agent prometheus "Design X"
+   ```
+   - 예상: Clarifying questions 여전히 가능
+   - 확인: 사용자와 대화 가능
+
+3. **Subagent context 제공 케이스**
+   - 예상: Context gathering skip
+   - 확인: 중복 탐색 없음
+
+---
+
+## 📝 수정된 파일
+
+```
+src/tools/delegate-task/constants.ts
+```
+
+### Diff Summary
+
+```diff
+@@ -234,22 +234,32 @@ export const PLAN_AGENT_SYSTEM_PREPEND = `<system>
+SUBAGENT MODE DETECTION (CRITICAL):
+[subagent 감지 및 처리 로직]
+
+ MANDATORY CONTEXT GATHERING PROTOCOL:
+-1. Launch background agents to gather context:
+1. Launch background agents (ONLY if not already provided):
+
+-2. After gathering context, ALWAYS present:
+-   - Uncertainties
+-   - Clarifying Questions
+2. After gathering context, assess clarity:
+   - Assumptions Made
+
+-3. ITERATE until ALL requirements are crystal clear:
+-   - Do NOT proceed until 100% clarity
+-   - Ask user to confirm
+3. PROCEED TO PLAN GENERATION when:
+   - Core objective understood
+   - Context gathered
+   - Reasonable assumptions possible
+   
+   DO NOT loop indefinitely.
+   DOCUMENT assumptions.
+```
+
+---
+
+## 🚀 권장 사항
+
+### Immediate Actions
+
+1. ✅ **수정 적용 완료** - constants.ts 업데이트됨
+2. ⏳ **테스트 수행** - ULW mode에서 동작 검증
+3. ⏳ **PR 생성** - code review 요청
+
+### Future Improvements
+
+1. **Subagent context 표준화**
+   - Subagent로 호출 시 명시적 플래그 전달
+   - `is_subagent: true` 파라미터 추가 고려
+
+2. **Assumptions validation workflow**
+   - Plan 실행 중 assumptions 검증 메커니즘
+   - Incorrect assumptions 감지 시 재계획
+
+3. **Timeout 메커니즘**
+   - Plan Agent가 X분 이상 걸리면 강제 종료
+   - Fallback plan 생성
+
+4. **Monitoring 추가**
+   - Plan Agent 실행 시간 측정
+   - Iteration 횟수 로깅
+   - 무한루프 조기 감지
+
+---
+
+## 📖 관련 코드 구조
+
+### Call Stack
+
+```
+Sisyphus (ULW mode)
+  ↓
+task(category="deep", ...)
+  ↓
+executor.ts: executeBackgroundContinuation()
+  ↓
+prompt-builder.ts: buildSystemContent()
+  ↓
+constants.ts: PLAN_AGENT_SYSTEM_PREPEND (문제 위치)
+  ↓
+Plan Agent 실행
+```
+
+### Key Functions
+
+1. **executor.ts:587** - `isPlanAgent()` 체크
+2. **prompt-builder.ts:11** - Plan Agent prepend 주입
+3. **constants.ts:234** - PLAN_AGENT_SYSTEM_PREPEND 정의
+
+---
+
+## 🎓 교훈
+
+### Design Lessons
+
+1. **Dual Mode Support**
+   - Interactive vs Autonomous mode 구분 필수
+   - Context 전달 방식 명확히
+
+2. **Avoid Perfectionism in Agents**
+   - "100% clarity" 같은 주관적 조건 지양
+   - 명확한 객관적 종료 조건 필요
+
+3. **Document Uncertainties**
+   - 불확실성을 숨기지 말고 문서화
+   - 실행 중 validation 가능하게
+
+4. **Infinite Loop Prevention**
+   - 모든 반복문에 명시적 종료 조건
+   - Timeout 또는 max iteration 설정
+
+---
+
+## 🔗 참고 자료
+
+- **Issue:** #1501 - [Bug]: ULW mode will 100% cause PLAN AGENT to get stuck
+- **Files Modified:** `src/tools/delegate-task/constants.ts`
+- **Related Concepts:** Ultrawork mode, Plan Agent, Subagent delegation
+- **Agent Architecture:** Sisyphus → Prometheus → Atlas workflow
+
+---
+
+## ✅ Conclusion
+
+**Root Cause:** Plan Agent가 interactive mode를 가정했으나 ULW mode에서는 subagent로 실행되어 사용자 상호작용 불가능. "100% clarity" 요구로 무한루프 발생.
+
+**Solution:** Subagent mode 감지 로직 추가, clarifying questions 제거, 명확한 종료 조건 제공, assumptions 문서화 방식 도입.
+
+**Result:** ULW mode에서 Plan Agent가 정상적으로 plan 생성 후 종료. 무한루프 해결.
+
+---
+
+**Status:** ✅ Fixed  
+**Tested:** ⏳ Pending  
+**Deployed:** ⏳ Pending  
+
+**Analyst:** Sisyphus (oh-my-opencode ultrawork mode)  
+**Date:** 2026-02-05  
+**Session:** fast-ember
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -74,13 +74,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.2.3",
-    "oh-my-opencode-darwin-x64": "3.2.3",
-    "oh-my-opencode-linux-arm64": "3.2.3",
-    "oh-my-opencode-linux-arm64-musl": "3.2.3",
-    "oh-my-opencode-linux-x64": "3.2.3",
-    "oh-my-opencode-linux-x64-musl": "3.2.3",
-    "oh-my-opencode-windows-x64": "3.2.3"
+    "oh-my-opencode-darwin-arm64": "3.4.0",
+    "oh-my-opencode-darwin-x64": "3.4.0",
+    "oh-my-opencode-linux-arm64": "3.4.0",
+    "oh-my-opencode-linux-arm64-musl": "3.4.0",
+    "oh-my-opencode-linux-x64": "3.4.0",
+    "oh-my-opencode-linux-x64-musl": "3.4.0",
+    "oh-my-opencode-windows-x64": "3.4.0"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.2.3",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/script/build-schema.ts
+++ b/script/build-schema.ts
@@ -1,5 +1,6 @@
 #!/usr/bin/env bun
 import * as z from "zod"
+import { zodToJsonSchema } from "zod-to-json-schema"
 import { OhMyOpenCodeConfigSchema } from "../src/config/schema"

 const SCHEMA_OUTPUT_PATH = "assets/oh-my-opencode.schema.json"
@@ -7,9 +8,8 @@ const SCHEMA_OUTPUT_PATH = "assets/oh-my-opencode.schema.json"
 async function main() {
  console.log("Generating JSON Schema...")

-  const jsonSchema = z.toJSONSchema(OhMyOpenCodeConfigSchema, {
-    io: "input",
-    target: "draft-7",
+  const jsonSchema = zodToJsonSchema(OhMyOpenCodeConfigSchema, {
+    target: "draft7",
  })

  const finalSchema = {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1183,6 +1183,86 @@
      "created_at": "2026-02-03T20:44:25Z",
      "repoId": 1108837393,
      "pullRequestNo": 1449
+    },
+    {
+      "name": "BowTiedSwan",
+      "id": 86532747,
+      "comment_id": 3742668781,
+      "created_at": "2026-01-13T08:05:00Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 741
+    },
+    {
+      "name": "Mang-Joo",
+      "id": 86056915,
+      "comment_id": 3855493558,
+      "created_at": "2026-02-05T18:41:49Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1526
+    },
+    {
+      "name": "shaunmorris",
+      "id": 579820,
+      "comment_id": 3858265174,
+      "created_at": "2026-02-06T06:23:24Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1541
+    },
+    {
+      "name": "itsnebulalol",
+      "id": 18669106,
+      "comment_id": 3864672624,
+      "created_at": "2026-02-07T15:10:54Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1622
+    },
+    {
+      "name": "mkusaka",
+      "id": 24956031,
+      "comment_id": 3864822328,
+      "created_at": "2026-02-07T16:54:36Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1629
+    },
+    {
+      "name": "quantmind-br",
+      "id": 170503374,
+      "comment_id": 3865064441,
+      "created_at": "2026-02-07T18:38:24Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1634
+    },
+    {
+      "name": "QiRaining",
+      "id": 13825001,
+      "comment_id": 3865979224,
+      "created_at": "2026-02-08T02:34:46Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1641
+    },
+    {
+      "name": "JunyeongChoi0",
+      "id": 99778164,
+      "comment_id": 3867461224,
+      "created_at": "2026-02-08T16:02:31Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1674
+    },
+    {
+      "name": "aliozdenisik",
+      "id": 106994209,
+      "comment_id": 3867619266,
+      "created_at": "2026-02-08T17:12:34Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1676
+    },
+    {
+      "name": "mrm007",
+      "id": 3297808,
+      "comment_id": 3868350953,
+      "created_at": "2026-02-08T21:41:35Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1680
    }
  ]
 }
--- a/sisyphus-prompt.md
+++ b/sisyphus-prompt.md
@@ -7,7 +7,7 @@

 | Field | Value |
 |-------|-------|
-| Model | `anthropic/claude-opus-4-5` |
+| Model | `anthropic/claude-opus-4-6` |
 | Max Tokens | `64000` |
 | Mode | `primary` |
 | Thinking | Budget: 32000 |
@@ -212,7 +212,7 @@ Search **external references** (docs, OSS, web). Fire proactively when unfamilia
 - "Working with unfamiliar npm/pip/cargo packages"
 ### Pre-Delegation Planning (MANDATORY)

-**BEFORE every `delegate_task` call, EXPLICITLY declare your reasoning.**
+**BEFORE every `task` call, EXPLICITLY declare your reasoning.**

 #### Step 1: Identify Task Requirements

@@ -236,7 +236,7 @@ Ask yourself:
 **MANDATORY FORMAT:**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Category**: [selected-category-name]
 - **Why this category**: [how category description matches task domain]
 - **load_skills**: [list of selected skills]
@@ -246,14 +246,14 @@ I will use delegate_task with:
 - **Expected Outcome**: [what success looks like]
 ```

-**Then** make the delegate_task call.
+**Then** make the task call.

 #### Examples

 **CORRECT: Full Evaluation**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Category**: [category-name]
 - **Why this category**: Category description says "[quote description]" which matches this task's requirements
 - **load_skills**: ["skill-a", "skill-b"]
@@ -263,9 +263,11 @@ I will use delegate_task with:
  - skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason]
 - **Expected Outcome**: [concrete deliverable]

-delegate_task(
+task(
  category="[category-name]",
  load_skills=["skill-a", "skill-b"],
+  description="[short task description]",
+  run_in_background=false,
  prompt="..."
 )
 ```
@@ -273,14 +275,16 @@ delegate_task(
 **CORRECT: Agent-Specific (for exploration/consultation)**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Agent**: [agent-name]
 - **Reason**: This requires [agent's specialty] based on agent description
 - **load_skills**: [] (agents have built-in expertise)
 - **Expected Outcome**: [what agent should return]

-delegate_task(
+task(
  subagent_type="[agent-name]",
+  description="[short task description]",
+  run_in_background=false,
  load_skills=[],
  prompt="..."
 )
@@ -289,14 +293,15 @@ delegate_task(
 **CORRECT: Background Exploration**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Agent**: explore
 - **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
 - **load_skills**: []
 - **Expected Outcome**: List of files containing auth patterns

-delegate_task(
+task(
  subagent_type="explore",
+  description="Find auth implementations",
  run_in_background=true,
  load_skills=[],
  prompt="Find all authentication implementations in the codebase"
@@ -306,7 +311,7 @@ delegate_task(
 **WRONG: No Skill Evaluation**

 ```
-delegate_task(category="...", load_skills=[], prompt="...")  // Where's the justification?
+task(category="...", load_skills=[], prompt="...")  // Where's the justification?
 ```

 **WRONG: Vague Category Selection**
@@ -317,7 +322,7 @@ I'll use this category because it seems right.

 #### Enforcement

-**BLOCKING VIOLATION**: If you call `delegate_task` without:
+**BLOCKING VIOLATION**: If you call `task` without:
 1. Explaining WHY category was selected (based on description)
 2. Evaluating EACH available skill for relevance

@@ -329,15 +334,15 @@ I'll use this category because it seems right.
 ```typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
+task(subagent_type="explore", description="Find auth implementations", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+task(subagent_type="explore", description="Find error handling patterns", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
+task(subagent_type="librarian", description="Find JWT best practices", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+task(subagent_type="librarian", description="Find Express auth patterns", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
-result = delegate_task(...)  // Never wait synchronously for explore/librarian
+result = task(...)  // Never wait synchronously for explore/librarian
 ```

 ### Background Result Collection:
@@ -347,16 +352,16 @@ result = delegate_task(...)  // Never wait synchronously for explore/librarian
 4. BEFORE final answer: `background_cancel(all=true)`

 ### Resume Previous Agent (CRITICAL for efficiency):
-Pass `resume=session_id` to continue previous agent with FULL CONTEXT PRESERVED.
+Pass `session_id` to continue previous agent with FULL CONTEXT PRESERVED.

-**ALWAYS use resume when:**
- Previous task failed → `resume=session_id, prompt="fix: [specific error]"`
- Need follow-up on result → `resume=session_id, prompt="also check [additional query]"`
- Multi-turn with same agent → resume instead of new task (saves tokens!)
+**ALWAYS use session_id when:**
+- Previous task failed → `session_id="ses_xxx", prompt="fix: [specific error]"`
+- Need follow-up on result → `session_id="ses_xxx", prompt="also check [additional query]"`
+- Multi-turn with same agent → session_id instead of new task (saves tokens!)

 **Example:**
 ```
-delegate_task(resume="ses_abc123", prompt="The previous search missed X. Also look for Y.")
+task(session_id="ses_abc123", description="Follow-up search", run_in_background=false, load_skills=[], prompt="The previous search missed X. Also look for Y.")
 ```

 ### Search Stop Conditions
@@ -377,7 +382,7 @@ STOP searching when:
 3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS
 ### Category + Skills Delegation System

-**delegate_task() combines categories and skills for optimal task execution.**
+**task() combines categories and skills for optimal task execution.**

 #### Available Categories (Domain-Optimized Models)

@@ -442,7 +447,7 @@ SKILL EVALUATION for "[skill-name]":
 ### Delegation Pattern

 ```typescript
-delegate_task(
+task(
  category="[selected-category]",
  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills
  prompt="..."
@@ -451,7 +456,7 @@ delegate_task(

 **ANTI-PATTERN (will produce poor results):**
 ```typescript
-delegate_task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
+task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
 ```
 ### Delegation Table:

--- a/src/AGENTS.md
+++ b/src/AGENTS.md
@@ -0,0 +1,128 @@
+# AGENTS KNOWLEDGE BASE
+
+## OVERVIEW
+
+Main plugin entry point and orchestration layer. 1000+ lines of plugin initialization, hook registration, tool composition, and lifecycle management.
+
+**Core Responsibilities:**
+- Plugin initialization and configuration loading
+- 40+ lifecycle hooks orchestration  
+- 25+ tools composition and filtering
+- Background agent management
+- Session state coordination
+- MCP server lifecycle
+- Tmux integration
+- Claude Code compatibility layer
+
+## STRUCTURE
+```
+src/
+├── index.ts                          # Main plugin entry (1000 lines) - orchestration layer
+├── index.compaction-model-agnostic.static.test.ts  # Compaction hook tests
+├── agents/                           # 11 AI agents (16 files)
+├── cli/                              # CLI commands (9 files) 
+├── config/                           # Schema validation (3 files)
+├── features/                         # Background features (20+ files)
+├── hooks/                            # 40+ lifecycle hooks (14 files)
+├── mcp/                              # MCP server configs (7 files)
+├── plugin-handlers/                  # Config loading (3 files)
+├── shared/                           # Utilities (70 files)
+└── tools/                            # 25+ tools (15 files)
+```
+
+## KEY COMPONENTS
+
+**Plugin Initialization:**
+- `OhMyOpenCodePlugin()`: Main plugin factory (lines 124-841)
+- Configuration loading via `loadPluginConfig()`
+- Hook registration with safe creation patterns
+- Tool composition and disabled tool filtering
+
+**Lifecycle Management:**
+- 40+ hooks: session recovery, continuation enforcers, compaction, context injection
+- Background agent coordination via `BackgroundManager`
+- Tmux session management for multi-pane workflows
+- MCP server lifecycle via `SkillMcpManager`
+
+**Tool Ecosystem:**
+- 25+ tools: LSP, AST-grep, delegation, background tasks, skills
+- Tool filtering based on agent permissions and user config
+- Metadata restoration for tool outputs
+
+**Integration Points:**
+- Claude Code compatibility hooks and commands
+- OpenCode SDK client interactions
+- Session state persistence and recovery
+- Model variant resolution and application
+
+## HOOK REGISTRATION PATTERNS
+
+**Safe Hook Creation:**
+```typescript
+const hook = isHookEnabled("hook-name")
+  ? safeCreateHook("hook-name", () => createHookFactory(ctx), { enabled: safeHookEnabled })
+  : null;
+```
+
+**Hook Categories:**
+- **Session Management**: recovery, notification, compaction
+- **Continuation**: todo/task enforcers, stop guards
+- **Context**: injection, rules, directory content
+- **Tool Enhancement**: output truncation, error recovery, validation
+- **Agent Coordination**: usage reminders, babysitting, delegation
+
+## TOOL COMPOSITION
+
+**Core Tools:**
+```typescript
+const allTools: Record<string, ToolDefinition> = {
+  ...builtinTools,           // Basic file/session operations
+  ...createGrepTools(ctx),   // Content search
+  ...createAstGrepTools(ctx), // AST-aware refactoring
+  task: delegateTask,        // Agent delegation
+  skill: skillTool,          // Skill execution
+  // ... 20+ more tools
+};
+```
+
+**Tool Filtering:**
+- Agent permission-based restrictions
+- User-configured disabled tools
+- Dynamic tool availability based on session state
+
+## SESSION LIFECYCLE
+
+**Session Events:**
+- `session.created`: Initialize session state, tmux setup
+- `session.deleted`: Cleanup resources, clear caches
+- `message.updated`: Update agent assignments
+- `session.error`: Trigger recovery mechanisms
+
+**Continuation Flow:**
+1. User message triggers agent selection
+2. Model/variant resolution applied
+3. Tools execute with hook interception
+4. Continuation enforcers monitor completion
+5. Session compaction preserves context
+
+## CONFIGURATION INTEGRATION
+
+**Plugin Config Loading:**
+- Project + user config merging
+- Schema validation via Zod
+- Migration support for legacy configs
+- Dynamic feature enablement
+
+**Runtime Configuration:**
+- Hook enablement based on `disabled_hooks`
+- Tool filtering via `disabled_tools`
+- Agent overrides and category definitions
+- Experimental feature toggles
+
+## ANTI-PATTERNS
+
+- **Direct hook exports**: All hooks created via factories for testability
+- **Global state pollution**: Session-scoped state management
+- **Synchronous blocking**: Async-first architecture with background coordination
+- **Tight coupling**: Plugin components communicate via events, not direct calls
+- **Memory leaks**: Proper cleanup on session deletion and plugin unload
--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -2,7 +2,7 @@

 ## OVERVIEW

-11 AI agents for multi-model orchestration. Each agent has factory function + metadata + fallback chains.
+32 files containing AI agents and utilities for multi-model orchestration. Each agent has factory function + metadata + fallback chains.

 **Primary Agents** (respect UI model selection):
 - Sisyphus, Atlas, Prometheus
@@ -13,36 +13,50 @@
 ## STRUCTURE
 ```
 agents/
-├── atlas.ts                    # Master Orchestrator (holds todo list)
-├── sisyphus.ts                 # Main prompt (SF Bay Area engineer identity)
-├── hephaestus.ts               # Autonomous Deep Worker (GPT 5.2 Codex, "The Legitimate Craftsman")
-├── sisyphus-junior.ts          # Delegated task executor (category-spawned)
+├── atlas/                      # Master Orchestrator (holds todo list)
+│   ├── index.ts
+│   ├── default.ts              # Claude-optimized prompt (390 lines)
+│   ├── gpt.ts                  # GPT-optimized prompt (330 lines)
+│   └── utils.ts
+├── prometheus/                 # Planning Agent (Interview/Consultant mode)
+│   ├── index.ts
+│   ├── plan-template.ts        # Work plan structure (423 lines)
+│   ├── interview-mode.ts       # Interview flow (335 lines)
+│   ├── plan-generation.ts
+│   ├── high-accuracy-mode.ts
+│   ├── identity-constraints.ts # Identity rules (301 lines)
+│   └── behavioral-summary.ts
+├── sisyphus-junior/            # Delegated task executor (category-spawned)
+│   ├── index.ts
+│   ├── default.ts
+│   └── gpt.ts
+├── sisyphus.ts                 # Main orchestrator prompt (530 lines)
+├── hephaestus.ts               # Autonomous deep worker (618 lines, GPT 5.3 Codex)
 ├── oracle.ts                   # Strategic advisor (GPT-5.2)
-├── librarian.ts                # Multi-repo research (GitHub CLI, Context7)
-├── explore.ts                  # Fast contextual grep (Grok Code Fast)
+├── librarian.ts                # Multi-repo research (328 lines)
+├── explore.ts                  # Fast contextual grep
 ├── multimodal-looker.ts        # Media analyzer (Gemini 3 Flash)
-├── prometheus-prompt.ts        # Planning (Interview/Consultant mode, 1283 lines)
-├── metis.ts                    # Pre-planning analysis (Gap detection)
-├── momus.ts                    # Plan reviewer (Ruthless fault-finding)
-├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation
+├── metis.ts                    # Pre-planning analysis (347 lines)
+├── momus.ts                    # Plan reviewer
+├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation (431 lines)
 ├── types.ts                    # AgentModelConfig, AgentPromptMetadata
-├── utils.ts                    # createBuiltinAgents(), resolveModelWithFallback()
+├── utils.ts                    # createBuiltinAgents(), resolveModelWithFallback() (485 lines)
 └── index.ts                    # builtinAgents export
 ```

 ## AGENT MODELS
 | Agent | Model | Temp | Purpose |
 |-------|-------|------|---------|
-| Sisyphus | anthropic/claude-opus-4-5 | 0.1 | Primary orchestrator (fallback: kimi-k2.5 → glm-4.7 → gpt-5.2-codex → gemini-3-pro) |
-| Hephaestus | openai/gpt-5.2-codex | 0.1 | Autonomous deep worker, "The Legitimate Craftsman" (requires gpt-5.2-codex, no fallback) |
+| Sisyphus | anthropic/claude-opus-4-6 | 0.1 | Primary orchestrator (fallback: kimi-k2.5 → glm-4.7 → gpt-5.3-codex → gemini-3-pro) |
+| Hephaestus | openai/gpt-5.3-codex | 0.1 | Autonomous deep worker, "The Legitimate Craftsman" (requires gpt-5.3-codex, no fallback) |
 | Atlas | anthropic/claude-sonnet-4-5 | 0.1 | Master orchestrator (fallback: kimi-k2.5 → gpt-5.2) |
 | oracle | openai/gpt-5.2 | 0.1 | Consultation, debugging |
 | librarian | zai-coding-plan/glm-4.7 | 0.1 | Docs, GitHub search (fallback: glm-4.7-free) |
 | explore | xai/grok-code-fast-1 | 0.1 | Fast contextual grep (fallback: claude-haiku-4-5 → gpt-5-mini → gpt-5-nano) |
 | multimodal-looker | google/gemini-3-flash | 0.1 | PDF/image analysis |
-| Prometheus | anthropic/claude-opus-4-5 | 0.1 | Strategic planning (fallback: kimi-k2.5 → gpt-5.2) |
-| Metis | anthropic/claude-opus-4-5 | 0.3 | Pre-planning analysis (fallback: kimi-k2.5 → gpt-5.2) |
-| Momus | openai/gpt-5.2 | 0.1 | Plan validation (fallback: claude-opus-4-5) |
+| Prometheus | anthropic/claude-opus-4-6 | 0.1 | Strategic planning (fallback: kimi-k2.5 → gpt-5.2) |
+| Metis | anthropic/claude-opus-4-6 | 0.3 | Pre-planning analysis (fallback: kimi-k2.5 → gpt-5.2) |
+| Momus | openai/gpt-5.2 | 0.1 | Plan validation (fallback: claude-opus-4-6) |
 | Sisyphus-Junior | anthropic/claude-sonnet-4-5 | 0.1 | Category-spawned executor |

 ## HOW TO ADD
@@ -54,20 +68,22 @@ agents/
 ## TOOL RESTRICTIONS
 | Agent | Denied Tools |
 |-------|-------------|
-| oracle | write, edit, task, delegate_task |
-| librarian | write, edit, task, delegate_task, call_omo_agent |
-| explore | write, edit, task, delegate_task, call_omo_agent |
+| oracle | write, edit, task, task |
+| librarian | write, edit, task, task, call_omo_agent |
+| explore | write, edit, task, task, call_omo_agent |
 | multimodal-looker | Allowlist: read only |
-| Sisyphus-Junior | task, delegate_task |
+| Sisyphus-Junior | task, task |
+| Atlas | task, call_omo_agent |

 ## PATTERNS
 - **Factory**: `createXXXAgent(model: string): AgentConfig`
- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers.
- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`.
- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas.
+- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers
+- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`
+- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas
+- **Model-specific routing**: Atlas, Sisyphus-Junior have GPT vs Claude prompt variants

 ## ANTI-PATTERNS
- **Trust reports**: NEVER trust "I'm done" - verify outputs.
- **High temp**: Don't use >0.3 for code agents.
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration.
- **Prometheus writing code**: Planner only - never implements.
+- **Trust reports**: NEVER trust "I'm done" - verify outputs
+- **High temp**: Don't use >0.3 for code agents
+- **Sequential calls**: Use `task` with `run_in_background` for exploration
+- **Prometheus writing code**: Planner only - never implements
--- a/src/agents/atlas/default.ts
+++ b/src/agents/atlas/default.ts
@@ -19,18 +19,18 @@ You never write code yourself. You orchestrate specialists who do.
 </identity>

 <mission>
-Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
+Complete ALL tasks in a work plan via \`task()\` until fully done.
 One task per delegation. Parallel when independent. Verify everything.
 </mission>

 <delegation_system>
 ## How to Delegate

-Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
+Use \`task()\` with EITHER category OR agent (mutually exclusive):

 \`\`\`typescript
 // Option A: Category + Skills (spawns Sisyphus-Junior with domain config)
-delegate_task(
+task(
  category="[category-name]",
  load_skills=["skill-1", "skill-2"],
  run_in_background=false,
@@ -38,7 +38,7 @@ delegate_task(
 )

 // Option B: Specialized Agent (for specific expert tasks)
-delegate_task(
+task(
  subagent_type="[agent-name]",
  load_skills=[],
  run_in_background=false,
@@ -58,7 +58,7 @@ delegate_task(

 ## 6-Section Prompt Structure (MANDATORY)

-Every \`delegate_task()\` prompt MUST include ALL 6 sections:
+Every \`task()\` prompt MUST include ALL 6 sections:

 \`\`\`markdown
 ## 1. TASK
@@ -149,7 +149,7 @@ Structure:
 ### 3.1 Check Parallelization
 If tasks can run in parallel:
 - Prepare prompts for ALL parallelizable tasks
- Invoke multiple \`delegate_task()\` in ONE message
+- Invoke multiple \`task()\` in ONE message
 - Wait for all to complete
 - Verify all, then continue

@@ -167,10 +167,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")

 Extract wisdom and include in prompt.

-### 3.3 Invoke delegate_task()
+### 3.3 Invoke task()

 \`\`\`typescript
-delegate_task(
+task(
  category="[category]",
  load_skills=["[relevant-skills]"],
  run_in_background=false,
@@ -210,7 +210,7 @@ delegate_task(

 **If verification fails**: Resume the SAME session with the ACTUAL error output:
 \`\`\`typescript
-delegate_task(
+task(
  session_id="ses_xyz789",  // ALWAYS use the session from the failed task
  load_skills=[...],
  prompt="Verification failed: {actual error}. Fix."
@@ -221,13 +221,13 @@ delegate_task(

 **CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**

-Every \`delegate_task()\` output includes a session_id. STORE IT.
+Every \`task()\` output includes a session_id. STORE IT.

 If task fails:
 1. Identify what went wrong
 2. **Resume the SAME session** - subagent has full context already:
    \`\`\`typescript
-    delegate_task(
+    task(
      session_id="ses_xyz789",  // Session from failed task
      load_skills=[...],
      prompt="FAILED: {error}. Fix by: {specific instruction}"
@@ -274,21 +274,21 @@ ACCUMULATED WISDOM:

 **For exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-delegate_task(subagent_type="explore", run_in_background=true, ...)
-delegate_task(subagent_type="librarian", run_in_background=true, ...)
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
+task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)
 \`\`\`

 **For task execution**: NEVER background
 \`\`\`typescript
-delegate_task(category="...", run_in_background=false, ...)
+task(category="...", load_skills=[...], run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
 \`\`\`typescript
 // Tasks 2, 3, 4 are independent - invoke together
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
 \`\`\`

 **Background management**:
--- a/src/agents/atlas/gpt.ts
+++ b/src/agents/atlas/gpt.ts
@@ -24,7 +24,7 @@ You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
 </identity>

 <mission>
-Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
+Complete ALL tasks in a work plan via \`task()\` until fully done.
 - One task per delegation
 - Parallel when independent
 - Verify everything
@@ -71,14 +71,14 @@ Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
 <delegation_system>
 ## Delegation API

-Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
+Use \`task()\` with EITHER category OR agent (mutually exclusive):

 \`\`\`typescript
 // Category + Skills (spawns Sisyphus-Junior)
-delegate_task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
+task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")

 // Specialized Agent
-delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
+task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
 \`\`\`

 {CATEGORY_SECTION}
@@ -93,7 +93,7 @@ delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false,

 ## 6-Section Prompt Structure (MANDATORY)

-Every \`delegate_task()\` prompt MUST include ALL 6 sections:
+Every \`task()\` prompt MUST include ALL 6 sections:

 \`\`\`markdown
 ## 1. TASK
@@ -166,7 +166,7 @@ Structure: learnings.md, decisions.md, issues.md, problems.md
 ## Step 3: Execute Tasks

 ### 3.1 Parallelization Check
- Parallel tasks → invoke multiple \`delegate_task()\` in ONE message
+- Parallel tasks → invoke multiple \`task()\` in ONE message
 - Sequential → process one at a time

 ### 3.2 Pre-Delegation (MANDATORY)
@@ -176,10 +176,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
 \`\`\`
 Extract wisdom → include in prompt.

-### 3.3 Invoke delegate_task()
+### 3.3 Invoke task()

 \`\`\`typescript
-delegate_task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
+task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
 \`\`\`

 ### 3.4 Verify (PROJECT-LEVEL QA)
@@ -201,7 +201,7 @@ Checklist:
 **CRITICAL: Use \`session_id\` for retries.**

 \`\`\`typescript
-delegate_task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
+task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
 \`\`\`

 - Maximum 3 retries per task
@@ -231,18 +231,18 @@ ACCUMULATED WISDOM: [from notepad]
 <parallel_execution>
 **Exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-delegate_task(subagent_type="explore", run_in_background=true, ...)
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
 \`\`\`

 **Task execution**: NEVER background
 \`\`\`typescript
-delegate_task(category="...", run_in_background=false, ...)
+task(category="...", load_skills=[...], run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
 \`\`\`typescript
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
 \`\`\`

 **Background management**:
--- a/src/agents/atlas/index.ts
+++ b/src/agents/atlas/index.ts
@@ -1,7 +1,7 @@
 /**
 * Atlas - Master Orchestrator Agent
 *
- * Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done.
+ * Orchestrates work via task() to complete ALL tasks in a todo list until fully done.
 * You are the conductor of a symphony of specialized agents.
 *
 * Routing:
@@ -111,7 +111,7 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {

  const baseConfig = {
    description:
-      "Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
+      "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
    mode: MODE,
    ...(ctx.model ? { model: ctx.model } : {}),
    temperature: 0.1,
--- a/src/agents/atlas/utils.ts
+++ b/src/agents/atlas/utils.ts
@@ -6,23 +6,24 @@
 */

 import type { CategoryConfig } from "../../config/schema"
-import type { AvailableAgent, AvailableSkill } from "../dynamic-agent-prompt-builder"
+import { formatCustomSkillsBlock, type AvailableAgent, type AvailableSkill } from "../dynamic-agent-prompt-builder"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../../tools/delegate-task/constants"
+import { truncateDescription } from "../../shared/truncate-description"

 export const getCategoryDescription = (name: string, userCategories?: Record<string, CategoryConfig>) =>
  userCategories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks"

 export function buildAgentSelectionSection(agents: AvailableAgent[]): string {
-  if (agents.length === 0) {
-    return `##### Option B: Use AGENT directly (for specialized experts)
+   if (agents.length === 0) {
+     return `##### Option B: Use AGENT directly (for specialized experts)

-No agents available.`
-  }
+ No agents available.`
+   }

-  const rows = agents.map((a) => {
-    const shortDesc = a.description.split(".")[0] || a.description
-    return `| \`${a.name}\` | ${shortDesc} |`
-  })
+   const rows = agents.map((a) => {
+     const shortDesc = truncateDescription(a.description)
+     return `| \`${a.name}\` | ${shortDesc} |`
+   })

  return `##### Option B: Use AGENT directly (for specialized experts)

@@ -47,7 +48,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
 ${categoryRows.join("\n")}

 \`\`\`typescript
-delegate_task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
+task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
 \`\`\``
 }

@@ -56,21 +57,48 @@ export function buildSkillsSection(skills: AvailableSkill[]): string {
    return ""
  }

-  const skillRows = skills.map((s) => {
-    const shortDesc = s.description.split(".")[0] || s.description
-    return `| \`${s.name}\` | ${shortDesc} |`
-  })
+  const builtinSkills = skills.filter((s) => s.location === "plugin")
+  const customSkills = skills.filter((s) => s.location !== "plugin")
+
+   const builtinRows = builtinSkills.map((s) => {
+     const shortDesc = truncateDescription(s.description)
+     return `| \`${s.name}\` | ${shortDesc} |`
+   })
+
+   const customRows = customSkills.map((s) => {
+     const shortDesc = truncateDescription(s.description)
+     const source = s.location === "project" ? "project" : "user"
+     return `| \`${s.name}\` | ${shortDesc} | ${source} |`
+   })
+
+  const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills, "**")
+
+  let skillsTable: string
+
+  if (customSkills.length > 0 && builtinSkills.length > 0) {
+    skillsTable = `**Built-in Skills:**
+
+| Skill | When to Use |
+|-------|-------------|
+${builtinRows.join("\n")}
+
+${customSkillBlock}`
+  } else if (customSkills.length > 0) {
+    skillsTable = customSkillBlock
+  } else {
+    skillsTable = `| Skill | When to Use |
+|-------|-------------|
+${builtinRows.join("\n")}`
+  }

  return `
 #### 3.2.2: Skill Selection (PREPEND TO PROMPT)

 **Skills are specialized instructions that guide subagent behavior. Consider them alongside category selection.**

-| Skill | When to Use |
-|-------|-------------|
-${skillRows.join("\n")}
+${skillsTable}

-**MANDATORY: Evaluate ALL skills for relevance to your task.**
+**MANDATORY: Evaluate ALL skills (built-in AND user-installed) for relevance to your task.**

 Read each skill's description and ask: "Does this skill's domain overlap with my task?"
 - If YES: INCLUDE in load_skills=[...]
@@ -78,7 +106,7 @@ Read each skill's description and ask: "Does this skill's domain overlap with my

 **Usage:**
 \`\`\`typescript
-delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
+task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
 \`\`\`

 **IMPORTANT:**
@@ -94,10 +122,10 @@ export function buildDecisionMatrix(agents: AvailableAgent[], userCategories?: R
    `| ${getCategoryDescription(name, userCategories)} | \`category="${name}", load_skills=[...]\` |`
  )

-  const agentRows = agents.map((a) => {
-    const shortDesc = a.description.split(".")[0] || a.description
-    return `| ${shortDesc} | \`agent="${a.name}"\` |`
-  })
+   const agentRows = agents.map((a) => {
+     const shortDesc = truncateDescription(a.description)
+     return `| ${shortDesc} | \`agent="${a.name}"\` |`
+   })

  return `##### Decision Matrix

--- a/src/agents/dynamic-agent-prompt-builder.test.ts
+++ b/src/agents/dynamic-agent-prompt-builder.test.ts
@@ -0,0 +1,205 @@
+/// <reference types="bun-types" />
+
+import { describe, it, expect } from "bun:test"
+import {
+  buildCategorySkillsDelegationGuide,
+  buildUltraworkSection,
+  formatCustomSkillsBlock,
+  type AvailableSkill,
+  type AvailableCategory,
+  type AvailableAgent,
+} from "./dynamic-agent-prompt-builder"
+
+describe("buildCategorySkillsDelegationGuide", () => {
+  const categories: AvailableCategory[] = [
+    { name: "visual-engineering", description: "Frontend, UI/UX" },
+    { name: "quick", description: "Trivial tasks" },
+  ]
+
+  const builtinSkills: AvailableSkill[] = [
+    { name: "playwright", description: "Browser automation via Playwright", location: "plugin" },
+    { name: "frontend-ui-ux", description: "Designer-turned-developer", location: "plugin" },
+  ]
+
+  const customUserSkills: AvailableSkill[] = [
+    { name: "react-19", description: "React 19 patterns and best practices", location: "user" },
+    { name: "tailwind-4", description: "Tailwind CSS v4 utilities", location: "user" },
+  ]
+
+  const customProjectSkills: AvailableSkill[] = [
+    { name: "our-design-system", description: "Internal design system components", location: "project" },
+  ]
+
+  it("should separate builtin and custom skills into distinct sections", () => {
+    //#given: mix of builtin and custom skills
+    const allSkills = [...builtinSkills, ...customUserSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: should have separate sections
+    expect(result).toContain("Built-in Skills")
+    expect(result).toContain("User-Installed Skills")
+    expect(result).toContain("HIGH PRIORITY")
+  })
+
+  it("should include custom skill names in CRITICAL warning", () => {
+    //#given: custom skills installed
+    const allSkills = [...builtinSkills, ...customUserSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: should mention custom skills by name in the warning
+    expect(result).toContain('"react-19"')
+    expect(result).toContain('"tailwind-4"')
+    expect(result).toContain("CRITICAL")
+  })
+
+  it("should show source column for custom skills (user vs project)", () => {
+    //#given: both user and project custom skills
+    const allSkills = [...builtinSkills, ...customUserSkills, ...customProjectSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: should show source for each custom skill
+    expect(result).toContain("| user |")
+    expect(result).toContain("| project |")
+  })
+
+  it("should not show custom skill section when only builtin skills exist", () => {
+    //#given: only builtin skills
+    const allSkills = [...builtinSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: should not contain custom skill emphasis
+    expect(result).not.toContain("User-Installed Skills")
+    expect(result).not.toContain("HIGH PRIORITY")
+    expect(result).toContain("Available Skills")
+  })
+
+  it("should handle only custom skills (no builtins)", () => {
+    //#given: only custom skills, no builtins
+    const allSkills = [...customUserSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: should show custom skills with emphasis, no builtin section
+    expect(result).toContain("User-Installed Skills")
+    expect(result).toContain("HIGH PRIORITY")
+    expect(result).not.toContain("Built-in Skills")
+  })
+
+  it("should include priority note for custom skills in evaluation step", () => {
+    //#given: custom skills present
+    const allSkills = [...builtinSkills, ...customUserSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: evaluation section should mention user-installed priority
+    expect(result).toContain("User-installed skills get PRIORITY")
+    expect(result).toContain("INCLUDE it rather than omit it")
+  })
+
+  it("should NOT include priority note when no custom skills", () => {
+    //#given: only builtin skills
+    const allSkills = [...builtinSkills]
+
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide(categories, allSkills)
+
+    //#then: no priority note for custom skills
+    expect(result).not.toContain("User-installed skills get PRIORITY")
+  })
+
+  it("should return empty string when no categories and no skills", () => {
+    //#given: no categories and no skills
+    //#when: building the delegation guide
+    const result = buildCategorySkillsDelegationGuide([], [])
+
+    //#then: should return empty string
+    expect(result).toBe("")
+  })
+})
+
+describe("buildUltraworkSection", () => {
+  const agents: AvailableAgent[] = []
+
+  it("should separate builtin and custom skills", () => {
+    //#given: mix of builtin and custom skills
+    const skills: AvailableSkill[] = [
+      { name: "playwright", description: "Browser automation", location: "plugin" },
+      { name: "react-19", description: "React 19 patterns", location: "user" },
+    ]
+
+    //#when: building ultrawork section
+    const result = buildUltraworkSection(agents, [], skills)
+
+    //#then: should have separate sections
+    expect(result).toContain("Built-in Skills")
+    expect(result).toContain("User-Installed Skills")
+    expect(result).toContain("HIGH PRIORITY")
+  })
+
+  it("should not separate when only builtin skills", () => {
+    //#given: only builtin skills
+    const skills: AvailableSkill[] = [
+      { name: "playwright", description: "Browser automation", location: "plugin" },
+    ]
+
+    //#when: building ultrawork section
+    const result = buildUltraworkSection(agents, [], skills)
+
+    //#then: should have single section
+    expect(result).toContain("Built-in Skills")
+    expect(result).not.toContain("User-Installed Skills")
+  })
+})
+
+describe("formatCustomSkillsBlock", () => {
+  const customSkills: AvailableSkill[] = [
+    { name: "react-19", description: "React 19 patterns", location: "user" },
+    { name: "tailwind-4", description: "Tailwind v4", location: "project" },
+  ]
+
+  const customRows = customSkills.map((s) => {
+    const source = s.location === "project" ? "project" : "user"
+    return `| \`${s.name}\` | ${s.description} | ${source} |`
+  })
+
+  it("should produce consistent output used by both builders", () => {
+    //#given: custom skills and rows
+    //#when: formatting with default header level
+    const result = formatCustomSkillsBlock(customRows, customSkills)
+
+    //#then: contains all expected elements
+    expect(result).toContain("User-Installed Skills (HIGH PRIORITY)")
+    expect(result).toContain("CRITICAL")
+    expect(result).toContain('"react-19"')
+    expect(result).toContain('"tailwind-4"')
+    expect(result).toContain("| user |")
+    expect(result).toContain("| project |")
+  })
+
+  it("should use #### header by default", () => {
+    //#given: default header level
+    const result = formatCustomSkillsBlock(customRows, customSkills)
+
+    //#then: uses markdown h4
+    expect(result).toContain("#### User-Installed Skills")
+  })
+
+  it("should use bold header when specified", () => {
+    //#given: bold header level (used by Atlas)
+    const result = formatCustomSkillsBlock(customRows, customSkills, "**")
+
+    //#then: uses bold instead of h4
+    expect(result).toContain("**User-Installed Skills (HIGH PRIORITY):**")
+    expect(result).not.toContain("#### User-Installed Skills")
+  })
+})
--- a/src/agents/dynamic-agent-prompt-builder.ts
+++ b/src/agents/dynamic-agent-prompt-builder.ts
@@ -1,7 +1,8 @@
-import type { AgentPromptMetadata, BuiltinAgentName } from "./types"
+import type { AgentPromptMetadata } from "./types"
+import { truncateDescription } from "../shared/truncate-description"

 export interface AvailableAgent {
-  name: BuiltinAgentName
+  name: string
  description: string
  metadata: AgentPromptMetadata
 }
@@ -20,6 +21,7 @@ export interface AvailableSkill {
 export interface AvailableCategory {
  name: string
  description: string
+  model?: string
 }

 export function categorizeTools(toolNames: string[]): AvailableTool[] {
@@ -166,6 +168,33 @@ export function buildDelegationTable(agents: AvailableAgent[]): string {
  return rows.join("\n")
 }

+/**
+ * Renders the "User-Installed Skills (HIGH PRIORITY)" block used across multiple agent prompts.
+ * Extracted to avoid duplication between buildCategorySkillsDelegationGuide, buildSkillsSection, etc.
+ */
+export function formatCustomSkillsBlock(
+  customRows: string[],
+  customSkills: AvailableSkill[],
+  headerLevel: "####" | "**" = "####"
+): string {
+  const customSkillNames = customSkills.map((s) => `"${s.name}"`).join(", ")
+  const header = headerLevel === "####"
+    ? `#### User-Installed Skills (HIGH PRIORITY)`
+    : `**User-Installed Skills (HIGH PRIORITY):**`
+
+  return `${header}
+
+**The user has installed these custom skills. They MUST be evaluated for EVERY delegation.**
+Subagents are STATELESS — they lose all custom knowledge unless you pass these skills via \`load_skills\`.
+
+| Skill | Expertise Domain | Source |
+|-------|------------------|--------|
+${customRows.join("\n")}
+
+> **CRITICAL**: Ignoring user-installed skills when they match the task domain is a failure.
+> The user installed ${customSkillNames} for a reason — USE THEM when the task overlaps with their domain.`
+}
+
 export function buildCategorySkillsDelegationGuide(categories: AvailableCategory[], skills: AvailableSkill[]): string {
  if (categories.length === 0 && skills.length === 0) return ""

@@ -174,14 +203,47 @@ export function buildCategorySkillsDelegationGuide(categories: AvailableCategory
    return `| \`${c.name}\` | ${desc} |`
  })

-  const skillRows = skills.map((s) => {
-    const desc = s.description.split(".")[0] || s.description
-    return `| \`${s.name}\` | ${desc} |`
-  })
+  const builtinSkills = skills.filter((s) => s.location === "plugin")
+  const customSkills = skills.filter((s) => s.location !== "plugin")
+
+   const builtinRows = builtinSkills.map((s) => {
+     const desc = truncateDescription(s.description)
+     return `| \`${s.name}\` | ${desc} |`
+   })
+
+   const customRows = customSkills.map((s) => {
+     const desc = truncateDescription(s.description)
+     const source = s.location === "project" ? "project" : "user"
+     return `| \`${s.name}\` | ${desc} | ${source} |`
+   })
+
+  const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills)
+
+  let skillsSection: string
+
+  if (customSkills.length > 0 && builtinSkills.length > 0) {
+    skillsSection = `#### Built-in Skills
+
+| Skill | Expertise Domain |
+|-------|------------------|
+${builtinRows.join("\n")}
+
+${customSkillBlock}`
+  } else if (customSkills.length > 0) {
+    skillsSection = customSkillBlock
+  } else {
+    skillsSection = `#### Available Skills (Domain Expertise Injection)
+
+Skills inject specialized instructions into the subagent. Read the description to understand when each skill applies.
+
+| Skill | Expertise Domain |
+|-------|------------------|
+${builtinRows.join("\n")}`
+  }

  return `### Category + Skills Delegation System

-**delegate_task() combines categories and skills for optimal task execution.**
+**task() combines categories and skills for optimal task execution.**

 #### Available Categories (Domain-Optimized Models)

@@ -191,13 +253,7 @@ Each category is configured with a model optimized for that domain. Read the des
 |----------|-------------------|
 ${categoryRows.join("\n")}

-#### Available Skills (Domain Expertise Injection)
-
-Skills inject specialized instructions into the subagent. Read the description to understand when each skill applies.
-
-| Skill | Expertise Domain |
-|-------|------------------|
-${skillRows.join("\n")}
+${skillsSection}

 ---

@@ -208,12 +264,15 @@ ${skillRows.join("\n")}
 - Match task requirements to category domain
 - Select the category whose domain BEST fits the task

-**STEP 2: Evaluate ALL Skills**
+**STEP 2: Evaluate ALL Skills (Built-in AND User-Installed)**
 For EVERY skill listed above, ask yourself:
 > "Does this skill's expertise domain overlap with my task?"

 - If YES → INCLUDE in \`load_skills=[...]\`
 - If NO → You MUST justify why (see below)
+${customSkills.length > 0 ? `
+> **User-installed skills get PRIORITY.** The user explicitly installed them for their workflow.
+> When in doubt about a user-installed skill, INCLUDE it rather than omit it.` : ""}

 **STEP 3: Justify Omissions**

@@ -238,16 +297,16 @@ SKILL EVALUATION for "[skill-name]":
 ### Delegation Pattern

 \`\`\`typescript
-delegate_task(
+task(
  category="[selected-category]",
-  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills
+  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills — ESPECIALLY user-installed ones
  prompt="..."
 )
 \`\`\`

 **ANTI-PATTERN (will produce poor results):**
 \`\`\`typescript
-delegate_task(category="...", load_skills=[], run_in_background=false, prompt="...")  // Empty load_skills without justification
+task(category="...", load_skills=[], run_in_background=false, prompt="...")  // Empty load_skills without justification
 \`\`\``
 }

@@ -328,12 +387,26 @@ export function buildUltraworkSection(
  }

  if (skills.length > 0) {
-    lines.push("**Skills** (combine with categories - EVALUATE ALL for relevance):")
-    for (const skill of skills) {
-      const shortDesc = skill.description.split(".")[0] || skill.description
-      lines.push(`- \`${skill.name}\`: ${shortDesc}`)
+    const builtinSkills = skills.filter((s) => s.location === "plugin")
+    const customSkills = skills.filter((s) => s.location !== "plugin")
+
+    if (builtinSkills.length > 0) {
+      lines.push("**Built-in Skills** (combine with categories):")
+      for (const skill of builtinSkills) {
+        const shortDesc = skill.description.split(".")[0] || skill.description
+        lines.push(`- \`${skill.name}\`: ${shortDesc}`)
+      }
+      lines.push("")
+    }
+
+    if (customSkills.length > 0) {
+      lines.push("**User-Installed Skills** (HIGH PRIORITY - user installed these for their workflow):")
+      for (const skill of customSkills) {
+        const shortDesc = skill.description.split(".")[0] || skill.description
+        lines.push(`- \`${skill.name}\`: ${shortDesc}`)
+      }
+      lines.push("")
    }
-    lines.push("")
  }

  if (agents.length > 0) {
@@ -349,7 +422,7 @@ export function buildUltraworkSection(

    lines.push("**Agents** (for specialized consultation/exploration):")
    for (const agent of sortedAgents) {
-      const shortDesc = agent.description.split(".")[0] || agent.description
+      const shortDesc = agent.description.length > 120 ? agent.description.slice(0, 120) + "..." : agent.description
      const suffix = agent.name === "explore" || agent.name === "librarian" ? " (multiple)" : ""
      lines.push(`- \`${agent.name}${suffix}\`: ${shortDesc}`)
    }
--- a/src/agents/explore.ts
+++ b/src/agents/explore.ts
@@ -29,7 +29,7 @@ export function createExploreAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
    "call_omo_agent",
  ])

--- a/src/agents/hephaestus.ts
+++ b/src/agents/hephaestus.ts
@@ -142,6 +142,19 @@ You operate as a **Senior Staff Engineer** with deep expertise in:

 You do not guess. You verify. You do not stop early. You complete.

+## Core Principle (HIGHEST PRIORITY)
+
+**KEEP GOING. SOLVE PROBLEMS. ASK ONLY WHEN TRULY IMPOSSIBLE.**
+
+When blocked:
+1. Try a different approach (there's always another way)
+2. Decompose the problem into smaller pieces
+3. Challenge your assumptions
+4. Explore how others solved similar problems
+
+Asking the user is the LAST resort after exhausting creative alternatives.
+Your job is to SOLVE problems, not report them.
+
 ## Hard Constraints (MUST READ FIRST - GPT 5.2 Constraint-First)

 ${hardBlocks}
@@ -214,8 +227,8 @@ Agent: *runs gh pr list, gh pr view, searches recent commits*

 **Delegation Check (MANDATORY before acting directly):**
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`delegate_task\` category that best describes this task? What skills are available to equip the agent with?
-   - MUST FIND skills to use: \`delegate_task(load_skills=[{skill1}, ...])\`
+2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
+   - MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
 3. Can I do it myself for the best result, FOR SURE?

 **Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**
@@ -267,15 +280,15 @@ ${librarianSection}
 // CORRECT: Always background, always parallel
 // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
 // Continue immediately - collect results when needed

 // WRONG: Sequential or blocking - NEVER DO THIS
-result = delegate_task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
 \`\`\`

 **Rules:**
@@ -380,7 +393,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 ### Session Continuity (MANDATORY)

-Every \`delegate_task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT.**

 **ALWAYS continue when:**
 | Scenario | Action |
@@ -404,6 +417,13 @@ Only terminate your turn when you are SURE the problem is SOLVED.
 Autonomously resolve the query to the BEST of your ability.
 Do NOT guess. Do NOT ask unnecessary questions. Do NOT stop early.

+**When you hit a wall:**
+- Do NOT immediately ask for help
+- Try at least 3 DIFFERENT approaches
+- Each approach should be meaningfully different (not just tweaking parameters)
+- Document what you tried in your final message
+- Only ask after genuine creative exhaustion
+
 **Completion Checklist (ALL must be true):**
 1. User asked for X → X is FULLY implemented (not partial, not "basic version")
 2. X passes lsp_diagnostics (zero errors on ALL modified files)
@@ -459,9 +479,9 @@ Do NOT guess. Do NOT ask unnecessary questions. Do NOT stop early.
 - Each update must include concrete outcome ("Found X", "Updated Y")

 **Scope:**
- Implement EXACTLY what user requests
- No extra features, no embellishments
- Simplest valid interpretation for ambiguous instructions
+- Implement what user requests
+- When blocked, autonomously try alternative approaches before asking
+- No unnecessary features, but solve blockers creatively
 </output_contract>

 ## Response Compaction (LONG CONTEXT HANDLING)
@@ -545,21 +565,27 @@ When working on long sessions or complex multi-file tasks:
 2. Re-verify after EVERY fix attempt
 3. Never shotgun debug

-### After 3 Consecutive Failures
+### After Failure (AUTONOMOUS RECOVERY)
+
+1. **Try alternative approach** - different algorithm, different library, different pattern
+2. **Decompose** - break into smaller, independently solvable steps
+3. **Challenge assumptions** - what if your initial interpretation was wrong?
+4. **Explore more** - fire explore/librarian agents for similar problems solved elsewhere
+
+### After 3 DIFFERENT Approaches Fail

 1. **STOP** all edits
 2. **REVERT** to last working state
-3. **DOCUMENT** what failed
+3. **DOCUMENT** what you tried (all 3 approaches)
 4. **CONSULT** Oracle with full context
-5. If unresolved, **ASK USER**
+5. If Oracle cannot help, **ASK USER** with clear explanation of attempts

 **Never**: Leave code broken, delete failing tests, continue hoping

 ## Soft Guidelines

 - Prefer existing libraries over new dependencies
- Prefer small, focused changes over large refactors
- When uncertain about scope, ask`
+- Prefer small, focused changes over large refactors`
 }

 export function createHephaestusAgent(
--- a/src/agents/librarian.ts
+++ b/src/agents/librarian.ts
@@ -26,7 +26,7 @@ export function createLibrarianAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
    "call_omo_agent",
  ])

--- a/src/agents/metis.ts
+++ b/src/agents/metis.ts
@@ -307,7 +307,6 @@ const metisRestrictions = createAgentToolRestrictions([
  "write",
  "edit",
  "task",
-  "delegate_task",
 ])

 export function createMetisAgent(model: string): AgentConfig {
--- a/src/agents/momus.ts
+++ b/src/agents/momus.ts
@@ -193,7 +193,7 @@ export function createMomusAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
  ])

  const base = {
--- a/src/agents/oracle.ts
+++ b/src/agents/oracle.ts
@@ -147,7 +147,7 @@ export function createOracleAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
  ])

  const base = {
--- a/src/agents/prometheus/high-accuracy-mode.ts
+++ b/src/agents/prometheus/high-accuracy-mode.ts
@@ -15,8 +15,9 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
 \`\`\`typescript
 // After generating initial plan
 while (true) {
-  const result = delegate_task(
+  const result = task(
    subagent_type="momus",
+    load_skills=[],
    prompt=".sisyphus/plans/{name}.md",
    run_in_background=false
  )
--- a/src/agents/prometheus/interview-mode.ts
+++ b/src/agents/prometheus/interview-mode.ts
@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
 **Research First:**
 \`\`\`typescript
 // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
-delegate_task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
-delegate_task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -91,9 +91,9 @@ delegate_task(subagent_type="explore", prompt="I'm about to modify [affected cod
 \`\`\`typescript
 // Launch BEFORE asking user questions
 // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
-delegate_task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
-delegate_task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
 \`\`\`

 **Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js

 Run this check:
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
 \`\`\`

 #### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:

 **Research First:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
 \`\`\`

 **Oracle Consultation** (recommend when stakes are high):
 \`\`\`typescript
-delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
+task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false)
 \`\`\`

 **Interview Focus:**
@@ -253,9 +253,9 @@ delegate_task(subagent_type="oracle", prompt="Architecture consultation needed:

 **Parallel Investigation:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -281,17 +281,17 @@ delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested i

 **For Understanding Codebase:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
 \`\`\`

 **For External Knowledge:**
 \`\`\`typescript
-delegate_task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
 \`\`\`

 **For Implementation Examples:**
 \`\`\`typescript
-delegate_task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
 \`\`\`

 ## Interview Mode Anti-Patterns
--- a/src/agents/prometheus/plan-generation.ts
+++ b/src/agents/prometheus/plan-generation.ts
@@ -59,8 +59,9 @@ todoWrite([
 **BEFORE generating the plan**, summon Metis to catch what you might have missed:

 \`\`\`typescript
-delegate_task(
+task(
  subagent_type="metis",
+  load_skills=[],
  prompt=\`Review this planning session before I generate the work plan:

  **User's Goal**: {summarize what user wants}
--- a/src/agents/prometheus/plan-template.ts
+++ b/src/agents/prometheus/plan-template.ts
@@ -214,7 +214,7 @@ Parallel Speedup: ~40% faster than sequential

 | Wave | Tasks | Recommended Agents |
 |------|-------|-------------------|
-| 1 | 1, 5 | delegate_task(category="...", load_skills=[...], run_in_background=false) |
+| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
 | 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
 | 3 | 4 | final integration task |

--- a/src/agents/sisyphus-junior/default.ts
+++ b/src/agents/sisyphus-junior/default.ts
@@ -24,7 +24,6 @@ Execute tasks directly. NEVER delegate or spawn other agents.
 <Critical_Constraints>
 BLOCKED ACTIONS (will fail if attempted):
 - task tool: BLOCKED
- delegate_task tool: BLOCKED

 ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
 You work ALONE for implementation. No delegation of implementation tasks.
--- a/src/agents/sisyphus-junior/gpt.ts
+++ b/src/agents/sisyphus-junior/gpt.ts
@@ -50,7 +50,6 @@ BLOCKED (will fail if attempted):
 | Tool | Status |
 |------|--------|
 | task | BLOCKED |
-| delegate_task | BLOCKED |

 ALLOWED:
 | Tool | Usage |
--- a/src/agents/sisyphus-junior/index.test.ts
+++ b/src/agents/sisyphus-junior/index.test.ts
@@ -143,13 +143,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
    })
  })

-  describe("tool safety (task/delegate_task blocked, call_omo_agent allowed)", () => {
-    test("task and delegate_task remain blocked, call_omo_agent is allowed via tools format", () => {
+  describe("tool safety (task blocked, call_omo_agent allowed)", () => {
+    test("task remains blocked, call_omo_agent is allowed via tools format", () => {
      // given
      const override = {
        tools: {
          task: true,
-          delegate_task: true,
          call_omo_agent: true,
          read: true,
        },
@@ -163,25 +162,22 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const permission = result.permission as Record<string, string> | undefined
      if (tools) {
        expect(tools.task).toBe(false)
-        expect(tools.delegate_task).toBe(false)
        // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
        expect(tools.call_omo_agent).toBe(true)
        expect(tools.read).toBe(true)
      }
      if (permission) {
        expect(permission.task).toBe("deny")
-        expect(permission.delegate_task).toBe("deny")
        // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
        expect(permission.call_omo_agent).toBe("allow")
      }
    })

-    test("task and delegate_task remain blocked when using permission format override", () => {
+    test("task remains blocked when using permission format override", () => {
      // given
      const override = {
        permission: {
          task: "allow",
-          delegate_task: "allow",
          call_omo_agent: "allow",
          read: "allow",
        },
@@ -190,17 +186,15 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      // when
      const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0])

-      // then - task/delegate_task blocked, but call_omo_agent allowed for explore/librarian spawning
+      // then - task blocked, but call_omo_agent allowed for explore/librarian spawning
      const tools = result.tools as Record<string, boolean> | undefined
      const permission = result.permission as Record<string, string> | undefined
      if (tools) {
        expect(tools.task).toBe(false)
-        expect(tools.delegate_task).toBe(false)
        expect(tools.call_omo_agent).toBe(true)
      }
      if (permission) {
        expect(permission.task).toBe("deny")
-        expect(permission.delegate_task).toBe("deny")
        expect(permission.call_omo_agent).toBe("allow")
      }
    })
--- a/src/agents/sisyphus-junior/index.ts
+++ b/src/agents/sisyphus-junior/index.ts
@@ -28,7 +28,7 @@ const MODE: AgentMode = "subagent"

 // Core tools that Sisyphus-Junior must NEVER have access to
 // Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
-const BLOCKED_TOOLS = ["task", "delegate_task"]
+const BLOCKED_TOOLS = ["task"]

 export const SISYPHUS_JUNIOR_DEFAULTS = {
  model: "anthropic/claude-sonnet-4-5",
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -214,8 +214,8 @@ ${keyTriggers}

 **Delegation Check (MANDATORY before acting directly):**
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`delegate_task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
-  - MUST FIND skills to use, for: \`delegate_task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS DELEGATE TASK PARAMETER.
+2. If not, is there a \`task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
+  - MUST FIND skills to use, for: \`task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS TASK PARAMETER.
 3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH?

 **Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.**
@@ -277,15 +277,15 @@ ${librarianSection}
 // CORRECT: Always background, always parallel
 // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
-result = delegate_task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
 \`\`\`

 ### Background Result Collection:
@@ -340,7 +340,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 ### Session Continuity (MANDATORY)

-Every \`delegate_task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT.**

 **ALWAYS continue when:**
 | Scenario | Action |
@@ -358,10 +358,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**

 \`\`\`typescript
 // WRONG: Starting fresh loses all context
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Fix the type error in auth.ts...")
+task(category="quick", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix the type error in auth.ts...")

 // CORRECT: Resume preserves everything
-delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
+task(session_id="ses_abc123", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix: Type error on line 42")
 \`\`\`

 **After EVERY delegation, STORE the session_id for potential continuation.**
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -6,14 +6,14 @@ import * as connectedProvidersCache from "../shared/connected-providers-cache"
 import * as modelAvailability from "../shared/model-availability"
 import * as shared from "../shared"

-const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-5"
+const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-6"

 describe("createBuiltinAgents with model overrides", () => {
  test("Sisyphus with default model has thinking config when all models available", async () => {
    // #given
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
      new Set([
-        "anthropic/claude-opus-4-5",
+        "anthropic/claude-opus-4-6",
        "kimi-for-coding/k2p5",
        "opencode/kimi-k2.5-free",
        "zai-coding-plan/glm-4.7",
@@ -26,7 +26,7 @@ describe("createBuiltinAgents with model overrides", () => {
      const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})

      // #then
-      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
+      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-6")
      expect(agents.sisyphus.thinking).toEqual({ type: "enabled", budgetTokens: 32000 })
      expect(agents.sisyphus.reasoningEffort).toBeUndefined()
    } finally {
@@ -79,9 +79,75 @@ describe("createBuiltinAgents with model overrides", () => {
    }
  })

+  test("user config model takes priority over uiSelectedModel for sisyphus", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
+    )
+    const uiSelectedModel = "openai/gpt-5.2"
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        overrides,
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        undefined,
+        undefined,
+        uiSelectedModel
+      )
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("user config model takes priority over uiSelectedModel for atlas", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
+    )
+    const uiSelectedModel = "openai/gpt-5.2"
+    const overrides = {
+      atlas: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        overrides,
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        undefined,
+        undefined,
+        uiSelectedModel
+      )
+
+      // #then
+      expect(agents.atlas).toBeDefined()
+      expect(agents.atlas.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
  test("Sisyphus is created on first run when no availableModels or cache exist", async () => {
    // #given
-    const systemDefaultModel = "anthropic/claude-opus-4-5"
+    const systemDefaultModel = "anthropic/claude-opus-4-6"
    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(new Set())

@@ -91,7 +157,7 @@ describe("createBuiltinAgents with model overrides", () => {

      // #then
      expect(agents.sisyphus).toBeDefined()
-      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
+      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-6")
    } finally {
      cacheSpy.mockRestore()
      fetchSpy.mockRestore()
@@ -183,6 +249,222 @@ describe("createBuiltinAgents with model overrides", () => {
    expect(agents.sisyphus.prompt).toContain("frontend-ui-ux")
    expect(agents.sisyphus.prompt).toContain("git-master")
  })
+
+  test("includes custom agents in orchestrator prompts when provided via config", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set([
+        "anthropic/claude-opus-4-6",
+        "kimi-for-coding/k2p5",
+        "opencode/kimi-k2.5-free",
+        "zai-coding-plan/glm-4.7",
+        "opencode/glm-4.7-free",
+        "openai/gpt-5.2",
+      ])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "researcher",
+        description: "Research agent for deep analysis",
+        hidden: false,
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).toContain("researcher")
+      expect(agents.hephaestus.prompt).toContain("researcher")
+      expect(agents.atlas.prompt).toContain("researcher")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("excludes hidden custom agents from orchestrator prompts", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "hidden-agent",
+        description: "Should never show",
+        hidden: true,
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).not.toContain("hidden-agent")
+      expect(agents.hephaestus.prompt).not.toContain("hidden-agent")
+      expect(agents.atlas.prompt).not.toContain("hidden-agent")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("excludes disabled custom agents from orchestrator prompts", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "disabled-agent",
+        description: "Should never show",
+        disabled: true,
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).not.toContain("disabled-agent")
+      expect(agents.hephaestus.prompt).not.toContain("disabled-agent")
+      expect(agents.atlas.prompt).not.toContain("disabled-agent")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("excludes custom agents when disabledAgents contains their name (case-insensitive)", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const disabledAgents = ["ReSeArChEr"]
+    const customAgentSummaries = [
+      {
+        name: "researcher",
+        description: "Should never show",
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        disabledAgents,
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).not.toContain("researcher")
+      expect(agents.hephaestus.prompt).not.toContain("researcher")
+      expect(agents.atlas.prompt).not.toContain("researcher")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("deduplicates custom agents case-insensitively", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      { name: "Researcher", description: "First" },
+      { name: "researcher", description: "Second" },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      const matches = agents.sisyphus.prompt.match(/Custom agent: researcher/gi) ?? []
+      expect(matches.length).toBe(1)
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("sanitizes custom agent strings for markdown tables", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "table-agent",
+        description: "Line1\nAlpha | Beta",
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).toContain("Line1 Alpha \\| Beta")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
 })

 describe("createBuiltinAgents without systemDefaultModel", () => {
@@ -218,7 +500,7 @@ describe("createBuiltinAgents without systemDefaultModel", () => {
    ])
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
      new Set([
-        "anthropic/claude-opus-4-5",
+        "anthropic/claude-opus-4-6",
        "kimi-for-coding/k2p5",
        "opencode/kimi-k2.5-free",
        "zai-coding-plan/glm-4.7",
@@ -232,7 +514,7 @@ describe("createBuiltinAgents without systemDefaultModel", () => {

      // #then
      expect(agents.sisyphus).toBeDefined()
-      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
+      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-6")
    } finally {
      cacheSpy.mockRestore()
      fetchSpy.mockRestore()
@@ -240,12 +522,13 @@ describe("createBuiltinAgents without systemDefaultModel", () => {
  })
 })

-describe("createBuiltinAgents with requiresModel gating", () => {
-  test("hephaestus is not created when gpt-5.2-codex is unavailable", async () => {
-    // #given
+describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () => {
+  test("hephaestus is not created when no required provider is connected", async () => {
+    // #given - only anthropic models available, not in hephaestus requiresProvider
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
-      new Set(["anthropic/claude-opus-4-5"])
+      new Set(["anthropic/claude-opus-4-6"])
    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic"])

    try {
      // #when
@@ -255,13 +538,48 @@ describe("createBuiltinAgents with requiresModel gating", () => {
      expect(agents.hephaestus).toBeUndefined()
    } finally {
      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
    }
  })

-  test("hephaestus is created when gpt-5.2-codex is available", async () => {
-    // #given
+  test("hephaestus is created when openai provider is connected", async () => {
+    // #given - openai provider has models available
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
-      new Set(["openai/gpt-5.2-codex"])
+      new Set(["openai/gpt-5.3-codex"])
+    )
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.hephaestus).toBeDefined()
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("hephaestus is created when github-copilot provider is connected", async () => {
+    // #given - github-copilot provider has models available
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["github-copilot/gpt-5.3-codex"])
+    )
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.hephaestus).toBeDefined()
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("hephaestus is created when opencode provider is connected", async () => {
+    // #given - opencode provider has models available
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["opencode/gpt-5.3-codex"])
    )

    try {
@@ -286,20 +604,20 @@ describe("createBuiltinAgents with requiresModel gating", () => {

      // #then
      expect(agents.hephaestus).toBeDefined()
-      expect(agents.hephaestus.model).toBe("openai/gpt-5.2-codex")
+      expect(agents.hephaestus.model).toBe("openai/gpt-5.3-codex")
    } finally {
      cacheSpy.mockRestore()
      fetchSpy.mockRestore()
    }
  })

-  test("hephaestus is created when explicit config provided even if model unavailable", async () => {
+  test("hephaestus is created when explicit config provided even if provider unavailable", async () => {
    // #given
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
-      new Set(["anthropic/claude-opus-4-5"])
+      new Set(["anthropic/claude-opus-4-6"])
    )
    const overrides = {
-      hephaestus: { model: "anthropic/claude-opus-4-5" },
+      hephaestus: { model: "anthropic/claude-opus-4-6" },
    }

    try {
@@ -318,7 +636,7 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
  test("sisyphus is created when at least one fallback model is available", async () => {
    // #given
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
-      new Set(["anthropic/claude-opus-4-5"])
+      new Set(["anthropic/claude-opus-4-6"])
    )

    try {
@@ -343,7 +661,7 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {

      // #then
      expect(agents.sisyphus).toBeDefined()
-      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
+      expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-6")
    } finally {
      cacheSpy.mockRestore()
      fetchSpy.mockRestore()
@@ -354,7 +672,7 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
    // #given
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(new Set())
    const overrides = {
-      sisyphus: { model: "anthropic/claude-opus-4-5" },
+      sisyphus: { model: "anthropic/claude-opus-4-6" },
    }

    try {
@@ -368,11 +686,12 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
    }
  })

-  test("sisyphus is not created when no fallback model is available (unrelated model only)", async () => {
+  test("sisyphus is not created when no fallback model is available and provider not connected", async () => {
    // #given - only openai/gpt-5.2 available, not in sisyphus fallback chain
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
      new Set(["openai/gpt-5.2"])
    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue([])

    try {
      // #when
@@ -382,13 +701,66 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
      expect(agents.sisyphus).toBeUndefined()
    } finally {
      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
+    }
+  })
+
+  test("sisyphus uses user-configured plugin model even when not in cache or fallback chain", async () => {
+    // #given - user configures a model from a plugin provider (like antigravity)
+    // that is NOT in the availableModels cache and NOT in the fallback chain
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2"])
+    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
+      ["openai"]
+    )
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
+    }
+  })
+
+  test("sisyphus uses user-configured plugin model when availableModels is empty but cache exists", async () => {
+    // #given - connected providers cache exists but models cache is empty
+    // This reproduces the exact scenario where provider-models.json has models: {}
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set()
+    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
+      ["google", "openai", "opencode"]
+    )
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
    }
  })
 })

 describe("buildAgent with category and skills", () => {
  const { buildAgent } = require("./utils")
-  const TEST_MODEL = "anthropic/claude-opus-4-5"
+  const TEST_MODEL = "anthropic/claude-opus-4-6"

  beforeEach(() => {
    clearSkillCache()
@@ -534,7 +906,7 @@ describe("buildAgent with category and skills", () => {
    const agent = buildAgent(source["test-agent"], TEST_MODEL)

    // #then - category's built-in model and skills are applied
-    expect(agent.model).toBe("openai/gpt-5.2-codex")
+    expect(agent.model).toBe("openai/gpt-5.3-codex")
    expect(agent.variant).toBe("xhigh")
    expect(agent.prompt).toContain("Role: Designer-Turned-Developer")
    expect(agent.prompt).toContain("Task description")
@@ -647,9 +1019,9 @@ describe("override.category expansion in createBuiltinAgents", () => {
    // #when
    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)

-    // #then - ultrabrain category: model=openai/gpt-5.2-codex, variant=xhigh
+    // #then - ultrabrain category: model=openai/gpt-5.3-codex, variant=xhigh
    expect(agents.oracle).toBeDefined()
-    expect(agents.oracle.model).toBe("openai/gpt-5.2-codex")
+    expect(agents.oracle.model).toBe("openai/gpt-5.3-codex")
    expect(agents.oracle.variant).toBe("xhigh")
  })

@@ -716,9 +1088,9 @@ describe("override.category expansion in createBuiltinAgents", () => {
    // #when
    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)

-    // #then - ultrabrain category: model=openai/gpt-5.2-codex, variant=xhigh
+    // #then - ultrabrain category: model=openai/gpt-5.3-codex, variant=xhigh
    expect(agents.sisyphus).toBeDefined()
-    expect(agents.sisyphus.model).toBe("openai/gpt-5.2-codex")
+    expect(agents.sisyphus.model).toBe("openai/gpt-5.3-codex")
    expect(agents.sisyphus.variant).toBe("xhigh")
  })

@@ -731,9 +1103,9 @@ describe("override.category expansion in createBuiltinAgents", () => {
    // #when
    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)

-    // #then - ultrabrain category: model=openai/gpt-5.2-codex, variant=xhigh
+    // #then - ultrabrain category: model=openai/gpt-5.3-codex, variant=xhigh
    expect(agents.atlas).toBeDefined()
-    expect(agents.atlas.model).toBe("openai/gpt-5.2-codex")
+    expect(agents.atlas.model).toBe("openai/gpt-5.3-codex")
    expect(agents.atlas.variant).toBe("xhigh")
  })

@@ -835,4 +1207,29 @@ describe("Deadlock prevention - fetchAvailableModels must not receive client", (
     fetchSpy.mockRestore?.()
     cacheSpy.mockRestore?.()
   })
+  test("Hephaestus variant override respects user config over hardcoded default", async () => {
+    // #given - user provides variant in config
+    const overrides = {
+      hephaestus: { variant: "high" },
+    }
+
+    // #when
+    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
+
+    // #then - user variant takes precedence over hardcoded "medium"
+    expect(agents.hephaestus).toBeDefined()
+    expect(agents.hephaestus.variant).toBe("high")
+  })
+
+  test("Hephaestus uses default variant when no user override provided", async () => {
+    // #given - no variant override in config
+    const overrides = {}
+
+    // #when
+    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
+
+    // #then - default "medium" variant is applied
+    expect(agents.hephaestus).toBeDefined()
+    expect(agents.hephaestus.variant).toBe("medium")
+  })
 })
--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -11,7 +11,18 @@ import { createAtlasAgent, atlasPromptMetadata } from "./atlas"
 import { createMomusAgent, momusPromptMetadata } from "./momus"
 import { createHephaestusAgent } from "./hephaestus"
 import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
-import { deepMerge, fetchAvailableModels, resolveModelPipeline, AGENT_MODEL_REQUIREMENTS, readConnectedProvidersCache, isModelAvailable, isAnyFallbackModelAvailable, migrateAgentConfig } from "../shared"
+import {
+  deepMerge,
+  fetchAvailableModels,
+  resolveModelPipeline,
+  AGENT_MODEL_REQUIREMENTS,
+  readConnectedProvidersCache,
+  isModelAvailable,
+  isAnyFallbackModelAvailable,
+  isAnyProviderConnected,
+  migrateAgentConfig,
+  truncateDescription,
+} from "../shared"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
 import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content"
 import { createBuiltinSkills } from "../features/builtin-skills"
@@ -52,6 +63,64 @@ function isFactory(source: AgentSource): source is AgentFactory {
  return typeof source === "function"
 }

+type RegisteredAgentSummary = {
+  name: string
+  description: string
+}
+
+function sanitizeMarkdownTableCell(value: string): string {
+  return value
+    .replace(/\r?\n/g, " ")
+    .replace(/\|/g, "\\|")
+    .replace(/\s+/g, " ")
+    .trim()
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null
+}
+
+function parseRegisteredAgentSummaries(input: unknown): RegisteredAgentSummary[] {
+  if (!Array.isArray(input)) return []
+
+  const result: RegisteredAgentSummary[] = []
+  for (const item of input) {
+    if (!isRecord(item)) continue
+
+    const name = typeof item.name === "string" ? item.name : undefined
+    if (!name) continue
+
+    const hidden = item.hidden
+    if (hidden === true) continue
+
+    const disabled = item.disabled
+    if (disabled === true) continue
+
+    const enabled = item.enabled
+    if (enabled === false) continue
+
+    const description = typeof item.description === "string" ? item.description : ""
+    result.push({ name, description: sanitizeMarkdownTableCell(description) })
+  }
+
+  return result
+}
+
+function buildCustomAgentMetadata(agentName: string, description: string): AgentPromptMetadata {
+  const shortDescription = sanitizeMarkdownTableCell(truncateDescription(description))
+  const safeAgentName = sanitizeMarkdownTableCell(agentName)
+  return {
+    category: "specialist",
+    cost: "CHEAP",
+    triggers: [
+      {
+        domain: `Custom agent: ${safeAgentName}`,
+        trigger: shortDescription || "Use when this agent's description matches the task",
+      },
+    ],
+  }
+}
+
 export function buildAgent(
  source: AgentSource,
  model: string,
@@ -233,13 +302,13 @@ export async function createBuiltinAgents(
  categories?: CategoriesConfig,
  gitMasterConfig?: GitMasterConfig,
  discoveredSkills: LoadedSkill[] = [],
-  client?: any,
+  customAgentSummaries?: unknown,
  browserProvider?: BrowserAutomationProvider,
  uiSelectedModel?: string,
  disabledSkills?: Set<string>
 ): Promise<Record<string, AgentConfig>> {
  const connectedProviders = readConnectedProvidersCache()
-  // IMPORTANT: Do NOT pass client to fetchAvailableModels during plugin initialization.
+  // IMPORTANT: Do NOT call OpenCode client APIs during plugin initialization.
  // This function is called from config handler, and calling client API causes deadlock.
  // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301
  const availableModels = await fetchAvailableModels(undefined, {
@@ -279,6 +348,10 @@ export async function createBuiltinAgents(

  const availableSkills: AvailableSkill[] = [...builtinAvailable, ...discoveredAvailable]

+  const registeredAgents = parseRegisteredAgentSummaries(customAgentSummaries)
+  const builtinAgentNames = new Set(Object.keys(agentSources).map((n) => n.toLowerCase()))
+  const disabledAgentNames = new Set(disabledAgents.map((n) => n.toLowerCase()))
+
  // Collect general agents first (for availableAgents), but don't add to result yet
  const pendingAgentConfigs: Map<string, AgentConfig> = new Map()

@@ -304,7 +377,7 @@ export async function createBuiltinAgents(
     const isPrimaryAgent = isFactory(source) && source.mode === "primary"

    const resolution = applyModelResolution({
-      uiSelectedModel: isPrimaryAgent ? uiSelectedModel : undefined,
+      uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
      userModel: override?.model,
      requirement,
      availableModels,
@@ -335,14 +408,27 @@ export async function createBuiltinAgents(
    // Store for later - will be added after sisyphus and hephaestus
    pendingAgentConfigs.set(name, config)

-    const metadata = agentMetadata[agentName]
-    if (metadata) {
-      availableAgents.push({
-        name: agentName,
-        description: config.description ?? "",
-        metadata,
-      })
-    }
+     const metadata = agentMetadata[agentName]
+     if (metadata) {
+       availableAgents.push({
+         name: agentName,
+         description: config.description ?? "",
+         metadata,
+       })
+     }
+   }
+
+  for (const agent of registeredAgents) {
+    const lowerName = agent.name.toLowerCase()
+    if (builtinAgentNames.has(lowerName)) continue
+    if (disabledAgentNames.has(lowerName)) continue
+    if (availableAgents.some((a) => a.name.toLowerCase() === lowerName)) continue
+
+    availableAgents.push({
+      name: agent.name,
+      description: agent.description,
+      metadata: buildCustomAgentMetadata(agent.name, agent.description),
+    })
  }

   const sisyphusOverride = agentOverrides["sisyphus"]
@@ -356,7 +442,7 @@ export async function createBuiltinAgents(

   if (!disabledAgents.includes("sisyphus") && meetsSisyphusAnyModelRequirement) {
    let sisyphusResolution = applyModelResolution({
-      uiSelectedModel,
+      uiSelectedModel: sisyphusOverride?.model ? undefined : uiSelectedModel,
      userModel: sisyphusOverride?.model,
      requirement: sisyphusRequirement,
      availableModels,
@@ -394,13 +480,13 @@ export async function createBuiltinAgents(
    const hephaestusRequirement = AGENT_MODEL_REQUIREMENTS["hephaestus"]
    const hasHephaestusExplicitConfig = hephaestusOverride !== undefined

-    const hasRequiredModel =
-      !hephaestusRequirement?.requiresModel ||
+    const hasRequiredProvider =
+      !hephaestusRequirement?.requiresProvider ||
      hasHephaestusExplicitConfig ||
      isFirstRunNoCache ||
-      (availableModels.size > 0 && isModelAvailable(hephaestusRequirement.requiresModel, availableModels))
+      isAnyProviderConnected(hephaestusRequirement.requiresProvider, availableModels)

-    if (hasRequiredModel) {
+    if (hasRequiredProvider) {
      let hephaestusResolution = applyModelResolution({
        userModel: hephaestusOverride?.model,
        requirement: hephaestusRequirement,
@@ -423,13 +509,13 @@ export async function createBuiltinAgents(
          availableCategories
        )

-        hephaestusConfig = { ...hephaestusConfig, variant: hephaestusResolvedVariant ?? "medium" }
-
+        if (!hephaestusOverride?.variant) {
+          hephaestusConfig = { ...hephaestusConfig, variant: hephaestusResolvedVariant ?? "medium" }
+        }
        const hepOverrideCategory = (hephaestusOverride as Record<string, unknown> | undefined)?.category as string | undefined
        if (hepOverrideCategory) {
          hephaestusConfig = applyCategoryOverride(hephaestusConfig, hepOverrideCategory, mergedCategories)
        }
-
        if (directory && hephaestusConfig.prompt) {
          const envContext = createEnvContext()
          hephaestusConfig = { ...hephaestusConfig, prompt: hephaestusConfig.prompt + envContext }
@@ -454,7 +540,7 @@ export async function createBuiltinAgents(
      const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]

      const atlasResolution = applyModelResolution({
-        uiSelectedModel,
+        uiSelectedModel: orchestratorOverride?.model ? undefined : uiSelectedModel,
        userModel: orchestratorOverride?.model,
        requirement: atlasRequirement,
        availableModels,
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -2,25 +2,25 @@

 ## OVERVIEW

-CLI entry: `bunx oh-my-opencode`. 4 commands with Commander.js + @clack/prompts TUI.
+CLI entry: `bunx oh-my-opencode`. 70 CLI utilities and commands with Commander.js + @clack/prompts TUI.

-**Commands**: install (interactive setup), doctor (14 health checks), run (session launcher), get-local-version
+**Commands**: install (interactive setup), doctor (14 health checks), run (session launcher), get-local-version, mcp-oauth

 ## STRUCTURE

 ```
 cli/
-├── index.ts              # Commander.js entry (4 commands)
+├── index.ts              # Commander.js entry (5 commands)
 ├── install.ts            # Interactive TUI (542 lines)
 ├── config-manager.ts     # JSONC parsing (667 lines)
-├── types.ts              # InstallArgs, InstallConfig
 ├── model-fallback.ts     # Model fallback configuration
+├── types.ts              # InstallArgs, InstallConfig
 ├── doctor/
 │   ├── index.ts          # Doctor entry
 │   ├── runner.ts         # Check orchestration
 │   ├── formatter.ts      # Colored output
 │   ├── constants.ts      # Check IDs, symbols
-│   ├── types.ts          # CheckResult, CheckDefinition (114 lines)
+│   ├── types.ts          # CheckResult, CheckDefinition
 │   └── checks/           # 14 checks, 23 files
 │       ├── version.ts    # OpenCode + plugin version
 │       ├── config.ts     # JSONC validity, Zod
@@ -28,10 +28,11 @@ cli/
 │       ├── dependencies.ts # AST-Grep, Comment Checker
 │       ├── lsp.ts        # LSP connectivity
 │       ├── mcp.ts        # MCP validation
-│       ├── model-resolution.ts # Model resolution check
+│       ├── model-resolution.ts # Model resolution check (323 lines)
 │       └── gh.ts         # GitHub CLI
 ├── run/
-│   └── index.ts          # Session launcher
+│   ├── index.ts          # Session launcher
+│   └── events.ts         # CLI run events (325 lines)
 ├── mcp-oauth/
 │   └── index.ts          # MCP OAuth flow
 └── get-local-version/
@@ -46,6 +47,7 @@ cli/
 | `doctor` | 14 health checks for diagnostics |
 | `run` | Launch session with todo enforcement |
 | `get-local-version` | Version detection and update check |
+| `mcp-oauth` | MCP OAuth authentication flow |

 ## DOCTOR CATEGORIES (14 Checks)

--- a/src/cli/snapshots/model-fallback.test.ts.snap
+++ b/src/cli/snapshots/model-fallback.test.ts.snap
@@ -75,26 +75,26 @@ exports[`generateModelConfig single native provider uses Claude models when only
      "model": "anthropic/claude-sonnet-4-5",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "multimodal-looker": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "oracle": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -103,7 +103,7 @@ exports[`generateModelConfig single native provider uses Claude models when only
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-high": {
@@ -113,7 +113,7 @@ exports[`generateModelConfig single native provider uses Claude models when only
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "writing": {
@@ -137,26 +137,26 @@ exports[`generateModelConfig single native provider uses Claude models with isMa
      "model": "anthropic/claude-sonnet-4-5",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "multimodal-looker": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "oracle": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -165,18 +165,18 @@ exports[`generateModelConfig single native provider uses Claude models with isMa
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-high": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-low": {
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "writing": {
@@ -197,7 +197,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
      "model": "opencode/gpt-5-nano",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
@@ -225,22 +225,22 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
  },
  "categories": {
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "opencode/glm-4.7-free",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "unspecified-low": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "visual-engineering": {
@@ -264,7 +264,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
      "model": "opencode/gpt-5-nano",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
@@ -292,14 +292,14 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
  },
  "categories": {
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "opencode/glm-4.7-free",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -307,7 +307,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
      "variant": "high",
    },
    "unspecified-low": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "visual-engineering": {
@@ -451,14 +451,14 @@ exports[`generateModelConfig all native providers uses preferred models from fal
      "model": "anthropic/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "anthropic/claude-sonnet-4-5",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -473,11 +473,11 @@ exports[`generateModelConfig all native providers uses preferred models from fal
      "variant": "high",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -487,14 +487,14 @@ exports[`generateModelConfig all native providers uses preferred models from fal
      "variant": "high",
    },
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -524,14 +524,14 @@ exports[`generateModelConfig all native providers uses preferred models with isM
      "model": "anthropic/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "anthropic/claude-sonnet-4-5",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -546,11 +546,11 @@ exports[`generateModelConfig all native providers uses preferred models with isM
      "variant": "high",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -560,18 +560,18 @@ exports[`generateModelConfig all native providers uses preferred models with isM
      "variant": "high",
    },
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-low": {
@@ -598,14 +598,14 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
      "model": "opencode/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "opencode/glm-4.7-free",
    },
    "metis": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -620,11 +620,11 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
      "variant": "high",
    },
    "prometheus": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -634,14 +634,14 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
      "variant": "high",
    },
    "deep": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "opencode/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -671,14 +671,14 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
      "model": "opencode/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "opencode/glm-4.7-free",
    },
    "metis": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -693,11 +693,11 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
      "variant": "high",
    },
    "prometheus": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -707,18 +707,18 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
      "variant": "high",
    },
    "deep": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "opencode/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
-      "model": "opencode/claude-opus-4-5",
+      "model": "opencode/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-low": {
@@ -745,14 +745,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
      "model": "github-copilot/gpt-5-mini",
    },
    "hephaestus": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "metis": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "momus": {
@@ -767,11 +767,11 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
      "variant": "high",
    },
    "prometheus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
  },
@@ -781,14 +781,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
      "variant": "high",
    },
    "deep": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "github-copilot/claude-haiku-4.5",
    },
    "ultrabrain": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -818,14 +818,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
      "model": "github-copilot/gpt-5-mini",
    },
    "hephaestus": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "metis": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "momus": {
@@ -840,11 +840,11 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
      "variant": "high",
    },
    "prometheus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
  },
@@ -854,18 +854,18 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
      "variant": "high",
    },
    "deep": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "github-copilot/claude-haiku-4.5",
    },
    "ultrabrain": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "unspecified-low": {
@@ -1002,14 +1002,14 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
      "model": "anthropic/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "opencode/glm-4.7-free",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -1024,11 +1024,11 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
      "variant": "high",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -1038,14 +1038,14 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
      "variant": "high",
    },
    "deep": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "opencode/gpt-5.2-codex",
+      "model": "opencode/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -1075,14 +1075,14 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
      "model": "github-copilot/gpt-5-mini",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "metis": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "momus": {
@@ -1097,11 +1097,11 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
      "variant": "high",
    },
    "prometheus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
  },
@@ -1111,14 +1111,14 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
      "variant": "high",
    },
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "github-copilot/claude-haiku-4.5",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -1151,26 +1151,26 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "multimodal-looker": {
      "model": "zai-coding-plan/glm-4.6v",
    },
    "oracle": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -1179,7 +1179,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-high": {
@@ -1189,7 +1189,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "writing": {
@@ -1213,11 +1213,11 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
      "model": "anthropic/claude-sonnet-4-5",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "multimodal-looker": {
@@ -1228,11 +1228,11 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
      "variant": "high",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -1275,14 +1275,14 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
      "model": "opencode/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "momus": {
@@ -1297,11 +1297,11 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
      "variant": "high",
    },
    "prometheus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "github-copilot/claude-opus-4.5",
+      "model": "github-copilot/claude-opus-4.6",
      "variant": "max",
    },
  },
@@ -1311,14 +1311,14 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
      "variant": "high",
    },
    "deep": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "github-copilot/claude-haiku-4.5",
    },
    "ultrabrain": {
-      "model": "github-copilot/gpt-5.2-codex",
+      "model": "github-copilot/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -1348,14 +1348,14 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
      "model": "anthropic/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -1370,11 +1370,11 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
      "variant": "high",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -1384,14 +1384,14 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
      "variant": "high",
    },
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
@@ -1421,14 +1421,14 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
      "model": "anthropic/claude-haiku-4-5",
    },
    "hephaestus": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "librarian": {
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "momus": {
@@ -1443,11 +1443,11 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
      "variant": "high",
    },
    "prometheus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "sisyphus": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
  },
@@ -1457,18 +1457,18 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
      "variant": "high",
    },
    "deep": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "medium",
    },
    "quick": {
      "model": "anthropic/claude-haiku-4-5",
    },
    "ultrabrain": {
-      "model": "openai/gpt-5.2-codex",
+      "model": "openai/gpt-5.3-codex",
      "variant": "xhigh",
    },
    "unspecified-high": {
-      "model": "anthropic/claude-opus-4-5",
+      "model": "anthropic/claude-opus-4-6",
      "variant": "max",
    },
    "unspecified-low": {
--- a/src/cli/config-manager.test.ts
+++ b/src/cli/config-manager.test.ts
@@ -259,7 +259,7 @@ describe("generateOmoConfig - model fallback system", () => {
    // #then Sisyphus uses Claude (OR logic - at least one provider available)
    expect(result.$schema).toBe("https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json")
    expect(result.agents).toBeDefined()
-    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-6")
  })

  test("generates native opus models when Claude max20 subscription", () => {
@@ -279,7 +279,7 @@ describe("generateOmoConfig - model fallback system", () => {
    const result = generateOmoConfig(config)

    // #then Sisyphus uses Claude (OR logic - at least one provider available)
-    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-6")
  })

  test("uses github-copilot sonnet fallback when only copilot available", () => {
@@ -298,8 +298,8 @@ describe("generateOmoConfig - model fallback system", () => {
    // #when generating config
    const result = generateOmoConfig(config)

-    // #then Sisyphus uses Copilot (OR logic - copilot is in claude-opus-4-5 providers)
-    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("github-copilot/claude-opus-4.5")
+    // #then Sisyphus uses Copilot (OR logic - copilot is in claude-opus-4-6 providers)
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("github-copilot/claude-opus-4.6")
  })

  test("uses ultimate fallback when no providers configured", () => {
@@ -342,7 +342,7 @@ describe("generateOmoConfig - model fallback system", () => {
    // #then librarian should use zai-coding-plan/glm-4.7
    expect((result.agents as Record<string, { model: string }>).librarian.model).toBe("zai-coding-plan/glm-4.7")
    // #then Sisyphus uses Claude (OR logic)
-    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-6")
  })

  test("uses native OpenAI models when only ChatGPT available", () => {
--- a/src/cli/doctor/checks/gh.test.ts
+++ b/src/cli/doctor/checks/gh.test.ts
@@ -29,7 +29,7 @@ describe("gh cli check", () => {

    it("returns gh cli info structure", async () => {
      const spawnSpy = spyOn(Bun, "spawn").mockImplementation((cmd) => {
-        if (Array.isArray(cmd) && cmd[0] === "which" && cmd[1] === "gh") {
+        if (Array.isArray(cmd) && (cmd[0] === "which" || cmd[0] === "where") && cmd[1] === "gh") {
          return createProc({ stdout: "/usr/bin/gh\n" })
        }

--- a/src/cli/doctor/checks/gh.ts
+++ b/src/cli/doctor/checks/gh.ts
@@ -13,7 +13,8 @@ export interface GhCliInfo {

 async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
  try {
-    const proc = Bun.spawn(["which", binary], { stdout: "pipe", stderr: "pipe" })
+    const whichCmd = process.platform === "win32" ? "where" : "which"
+    const proc = Bun.spawn([whichCmd, binary], { stdout: "pipe", stderr: "pipe" })
    const output = await new Response(proc.stdout).text()
    await proc.exited
    if (proc.exitCode === 0) {
--- a/src/cli/doctor/checks/model-resolution.test.ts
+++ b/src/cli/doctor/checks/model-resolution.test.ts
@@ -14,9 +14,8 @@ describe("model-resolution check", () => {
      // then: Should have agent entries
      const sisyphus = info.agents.find((a) => a.name === "sisyphus")
      expect(sisyphus).toBeDefined()
-      expect(sisyphus!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-5")
+      expect(sisyphus!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-6")
      expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("anthropic")
-      expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("github-copilot")
    })

    it("returns category requirements with provider chains", async () => {
@@ -43,7 +42,7 @@ describe("model-resolution check", () => {
      // given: User has override for oracle agent
      const mockConfig = {
        agents: {
-          oracle: { model: "anthropic/claude-opus-4-5" },
+          oracle: { model: "anthropic/claude-opus-4-6" },
        },
      }

@@ -52,8 +51,8 @@ describe("model-resolution check", () => {
      // then: Oracle should show the override
      const oracle = info.agents.find((a) => a.name === "oracle")
      expect(oracle).toBeDefined()
-      expect(oracle!.userOverride).toBe("anthropic/claude-opus-4-5")
-      expect(oracle!.effectiveResolution).toBe("User override: anthropic/claude-opus-4-5")
+      expect(oracle!.userOverride).toBe("anthropic/claude-opus-4-6")
+      expect(oracle!.effectiveResolution).toBe("User override: anthropic/claude-opus-4-6")
    })

    it("shows user override for category when configured", async () => {
@@ -90,6 +89,46 @@ describe("model-resolution check", () => {
      expect(sisyphus!.effectiveResolution).toContain("Provider fallback:")
      expect(sisyphus!.effectiveResolution).toContain("anthropic")
    })
+
+    it("captures user variant for agent when configured", async () => {
+      const { getModelResolutionInfoWithOverrides } = await import("./model-resolution")
+
+      //#given User has model with variant override for oracle agent
+      const mockConfig = {
+        agents: {
+          oracle: { model: "openai/gpt-5.2", variant: "xhigh" },
+        },
+      }
+
+      //#when getting resolution info with config
+      const info = getModelResolutionInfoWithOverrides(mockConfig)
+
+      //#then Oracle should have userVariant set
+      const oracle = info.agents.find((a) => a.name === "oracle")
+      expect(oracle).toBeDefined()
+      expect(oracle!.userOverride).toBe("openai/gpt-5.2")
+      expect(oracle!.userVariant).toBe("xhigh")
+    })
+
+    it("captures user variant for category when configured", async () => {
+      const { getModelResolutionInfoWithOverrides } = await import("./model-resolution")
+
+      //#given User has model with variant override for visual-engineering category
+      const mockConfig = {
+        categories: {
+          "visual-engineering": { model: "google/gemini-3-flash-preview", variant: "high" },
+        },
+      }
+
+      //#when getting resolution info with config
+      const info = getModelResolutionInfoWithOverrides(mockConfig)
+
+      //#then visual-engineering should have userVariant set
+      const visual = info.categories.find((c) => c.name === "visual-engineering")
+      expect(visual).toBeDefined()
+      expect(visual!.userOverride).toBe("google/gemini-3-flash-preview")
+      expect(visual!.userVariant).toBe("high")
+    })
  })

  describe("checkModelResolution", () => {
--- a/src/cli/doctor/checks/model-resolution.ts
+++ b/src/cli/doctor/checks/model-resolution.ts
@@ -51,6 +51,7 @@ export interface AgentResolutionInfo {
  name: string
  requirement: ModelRequirement
  userOverride?: string
+  userVariant?: string
  effectiveModel: string
  effectiveResolution: string
 }
@@ -59,6 +60,7 @@ export interface CategoryResolutionInfo {
  name: string
  requirement: ModelRequirement
  userOverride?: string
+  userVariant?: string
  effectiveModel: string
  effectiveResolution: string
 }
@@ -152,10 +154,12 @@ export function getModelResolutionInfoWithOverrides(config: OmoConfig): ModelRes
  const agents: AgentResolutionInfo[] = Object.entries(AGENT_MODEL_REQUIREMENTS).map(
    ([name, requirement]) => {
      const userOverride = config.agents?.[name]?.model
+      const userVariant = config.agents?.[name]?.variant
      return {
        name,
        requirement,
        userOverride,
+        userVariant,
        effectiveModel: getEffectiveModel(requirement, userOverride),
        effectiveResolution: buildEffectiveResolution(requirement, userOverride),
      }
@@ -165,10 +169,12 @@ export function getModelResolutionInfoWithOverrides(config: OmoConfig): ModelRes
  const categories: CategoryResolutionInfo[] = Object.entries(CATEGORY_MODEL_REQUIREMENTS).map(
    ([name, requirement]) => {
      const userOverride = config.categories?.[name]?.model
+      const userVariant = config.categories?.[name]?.variant
      return {
        name,
        requirement,
        userOverride,
+        userVariant,
        effectiveModel: getEffectiveModel(requirement, userOverride),
        effectiveResolution: buildEffectiveResolution(requirement, userOverride),
      }
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@@ -19,6 +19,7 @@ program
  .name("oh-my-opencode")
  .description("The ultimate OpenCode plugin - multi-model orchestration, LSP tools, and more")
  .version(VERSION, "-v, --version", "Show version number")
+  .enablePositionalOptions()

 program
  .command("install")
@@ -43,7 +44,7 @@ Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
  OpenAI        Native openai/ models (GPT-5.2 for Oracle)
  Gemini        Native google/ models (Gemini 3 Pro, Flash)
  Copilot       github-copilot/ models (fallback)
-  OpenCode Zen  opencode/ models (opencode/claude-opus-4-5, etc.)
+  OpenCode Zen  opencode/ models (opencode/claude-opus-4-6, etc.)
  Z.ai          zai-coding-plan/glm-4.7 (Librarian priority)
  Kimi          kimi-for-coding/k2p5 (Sisyphus/Prometheus fallback)
 `)
@@ -64,16 +65,28 @@ Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
  })

 program
-  .command("run <message>")
-  .description("Run opencode with todo/background task completion enforcement")
+   .command("run <message>")
+   .allowUnknownOption()
+   .passThroughOptions()
+   .description("Run opencode with todo/background task completion enforcement")
  .option("-a, --agent <name>", "Agent to use (default: from CLI/env/config, fallback: Sisyphus)")
  .option("-d, --directory <path>", "Working directory")
  .option("-t, --timeout <ms>", "Timeout in milliseconds (default: 30 minutes)", parseInt)
+  .option("-p, --port <port>", "Server port (attaches if port already in use)", parseInt)
+  .option("--attach <url>", "Attach to existing opencode server URL")
+  .option("--on-complete <command>", "Shell command to run after completion")
+  .option("--json", "Output structured JSON result to stdout")
+  .option("--session-id <id>", "Resume existing session instead of creating new one")
  .addHelpText("after", `
 Examples:
  $ bunx oh-my-opencode run "Fix the bug in index.ts"
  $ bunx oh-my-opencode run --agent Sisyphus "Implement feature X"
  $ bunx oh-my-opencode run --timeout 3600000 "Large refactoring task"
+  $ bunx oh-my-opencode run --port 4321 "Fix the bug"
+  $ bunx oh-my-opencode run --attach http://127.0.0.1:4321 "Fix the bug"
+  $ bunx oh-my-opencode run --json "Fix the bug" | jq .sessionId
+  $ bunx oh-my-opencode run --on-complete "notify-send Done" "Fix the bug"
+  $ bunx oh-my-opencode run --session-id ses_abc123 "Continue the work"

 Agent resolution order:
  1) --agent flag
@@ -89,11 +102,20 @@ Unlike 'opencode run', this command waits until:
  - All child sessions (background tasks) are idle
 `)
  .action(async (message: string, options) => {
+    if (options.port && options.attach) {
+      console.error("Error: --port and --attach are mutually exclusive")
+      process.exit(1)
+    }
    const runOptions: RunOptions = {
      message,
      agent: options.agent,
      directory: options.directory,
      timeout: options.timeout,
+      port: options.port,
+      attach: options.attach,
+      onComplete: options.onComplete,
+      json: options.json ?? false,
+      sessionId: options.sessionId,
    }
    const exitCode = await run(runOptions)
    process.exit(exitCode)
--- a/src/cli/install.ts
+++ b/src/cli/install.ts
@@ -243,7 +243,7 @@ async function runTuiMode(detected: DetectedConfig): Promise<InstallConfig | nul
    message: "Do you have access to OpenCode Zen (opencode/ models)?",
    options: [
      { value: "no" as const, label: "No", hint: "Will use other configured providers" },
-      { value: "yes" as const, label: "Yes", hint: "opencode/claude-opus-4-5, opencode/gpt-5.2, etc." },
+      { value: "yes" as const, label: "Yes", hint: "opencode/claude-opus-4-6, opencode/gpt-5.2, etc." },
    ],
    initialValue: initial.opencodeZen,
  })
--- a/src/cli/model-fallback.test.ts
+++ b/src/cli/model-fallback.test.ts
@@ -376,7 +376,7 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then
-      expect(result.agents?.sisyphus?.model).toBe("anthropic/claude-opus-4-5")
+      expect(result.agents?.sisyphus?.model).toBe("anthropic/claude-opus-4-6")
    })

    test("Sisyphus is created when multiple fallback providers are available", () => {
@@ -393,7 +393,7 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then
-      expect(result.agents?.sisyphus?.model).toBe("anthropic/claude-opus-4-5")
+      expect(result.agents?.sisyphus?.model).toBe("anthropic/claude-opus-4-6")
    })

    test("Sisyphus is omitted when no fallback provider is available (OpenAI not in chain)", () => {
@@ -409,7 +409,7 @@ describe("generateModelConfig", () => {
  })

  describe("Hephaestus agent special cases", () => {
-    test("Hephaestus is created when OpenAI is available (has gpt-5.2-codex)", () => {
+    test("Hephaestus is created when OpenAI is available (openai provider connected)", () => {
      // #given
      const config = createConfig({ hasOpenAI: true })

@@ -417,11 +417,11 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then
-      expect(result.agents?.hephaestus?.model).toBe("openai/gpt-5.2-codex")
+      expect(result.agents?.hephaestus?.model).toBe("openai/gpt-5.3-codex")
      expect(result.agents?.hephaestus?.variant).toBe("medium")
    })

-    test("Hephaestus is created when Copilot is available (has gpt-5.2-codex)", () => {
+    test("Hephaestus is created when Copilot is available (github-copilot provider connected)", () => {
      // #given
      const config = createConfig({ hasCopilot: true })

@@ -429,11 +429,11 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then
-      expect(result.agents?.hephaestus?.model).toBe("github-copilot/gpt-5.2-codex")
+      expect(result.agents?.hephaestus?.model).toBe("github-copilot/gpt-5.3-codex")
      expect(result.agents?.hephaestus?.variant).toBe("medium")
    })

-    test("Hephaestus is created when OpenCode Zen is available (has gpt-5.2-codex)", () => {
+    test("Hephaestus is created when OpenCode Zen is available (opencode provider connected)", () => {
      // #given
      const config = createConfig({ hasOpencodeZen: true })

@@ -441,11 +441,11 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then
-      expect(result.agents?.hephaestus?.model).toBe("opencode/gpt-5.2-codex")
+      expect(result.agents?.hephaestus?.model).toBe("opencode/gpt-5.3-codex")
      expect(result.agents?.hephaestus?.variant).toBe("medium")
    })

-    test("Hephaestus is omitted when only Claude is available (no gpt-5.2-codex)", () => {
+    test("Hephaestus is omitted when only Claude is available (no required provider connected)", () => {
      // #given
      const config = createConfig({ hasClaude: true })

@@ -456,7 +456,7 @@ describe("generateModelConfig", () => {
      expect(result.agents?.hephaestus).toBeUndefined()
    })

-    test("Hephaestus is omitted when only Gemini is available (no gpt-5.2-codex)", () => {
+    test("Hephaestus is omitted when only Gemini is available (no required provider connected)", () => {
      // #given
      const config = createConfig({ hasGemini: true })

@@ -467,7 +467,7 @@ describe("generateModelConfig", () => {
      expect(result.agents?.hephaestus).toBeUndefined()
    })

-    test("Hephaestus is omitted when only ZAI is available (no gpt-5.2-codex)", () => {
+    test("Hephaestus is omitted when only ZAI is available (no required provider connected)", () => {
      // #given
      const config = createConfig({ hasZaiCodingPlan: true })

--- a/src/cli/model-fallback.ts
+++ b/src/cli/model-fallback.ts
@@ -71,7 +71,7 @@ function isProviderAvailable(provider: string, avail: ProviderAvailability): boo
 function transformModelForProvider(provider: string, model: string): string {
  if (provider === "github-copilot") {
    return model
-      .replace("claude-opus-4-5", "claude-opus-4.5")
+      .replace("claude-opus-4-6", "claude-opus-4.6")
      .replace("claude-sonnet-4-5", "claude-sonnet-4.5")
      .replace("claude-haiku-4-5", "claude-haiku-4.5")
      .replace("claude-sonnet-4", "claude-sonnet-4")
@@ -122,6 +122,13 @@ function isRequiredModelAvailable(
  return matchingEntry.providers.some((provider) => isProviderAvailable(provider, avail))
 }

+function isRequiredProviderAvailable(
+  requiredProviders: string[],
+  avail: ProviderAvailability
+): boolean {
+  return requiredProviders.some((provider) => isProviderAvailable(provider, avail))
+}
+
 export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
  const avail = toProviderAvailability(config)
  const hasAnyProvider =
@@ -185,6 +192,9 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
    if (req.requiresModel && !isRequiredModelAvailable(req.requiresModel, req.fallbackChain, avail)) {
      continue
    }
+    if (req.requiresProvider && !isRequiredProviderAvailable(req.requiresProvider, avail)) {
+      continue
+    }

    const resolved = resolveModelFromChain(req.fallbackChain, avail)
    if (resolved) {
@@ -205,6 +215,9 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
    if (req.requiresModel && !isRequiredModelAvailable(req.requiresModel, req.fallbackChain, avail)) {
      continue
    }
+    if (req.requiresProvider && !isRequiredProviderAvailable(req.requiresProvider, avail)) {
+      continue
+    }

    const resolved = resolveModelFromChain(fallbackChain, avail)
    if (resolved) {
--- a/src/cli/run/agent-resolver.ts
+++ b/src/cli/run/agent-resolver.ts
@@ -0,0 +1,69 @@
+import pc from "picocolors"
+import type { RunOptions } from "./types"
+import type { OhMyOpenCodeConfig } from "../../config"
+
+const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
+const DEFAULT_AGENT = "sisyphus"
+
+type EnvVars = Record<string, string | undefined>
+
+const normalizeAgentName = (agent?: string): string | undefined => {
+  if (!agent) return undefined
+  const trimmed = agent.trim()
+  if (!trimmed) return undefined
+  const lowered = trimmed.toLowerCase()
+  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
+  return coreMatch ?? trimmed
+}
+
+const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
+  const lowered = agent.toLowerCase()
+  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
+    return true
+  }
+  return (config.disabled_agents ?? []).some(
+    (disabled) => disabled.toLowerCase() === lowered
+  )
+}
+
+const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
+  for (const agent of CORE_AGENT_ORDER) {
+    if (!isAgentDisabled(agent, config)) {
+      return agent
+    }
+  }
+  return DEFAULT_AGENT
+}
+
+export const resolveRunAgent = (
+  options: RunOptions,
+  pluginConfig: OhMyOpenCodeConfig,
+  env: EnvVars = process.env
+): string => {
+  const cliAgent = normalizeAgentName(options.agent)
+  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
+  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
+  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
+  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
+
+  if (isAgentDisabled(normalized, pluginConfig)) {
+    const fallback = pickFallbackAgent(pluginConfig)
+    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
+    if (fallbackDisabled) {
+      console.log(
+        pc.yellow(
+          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
+        )
+      )
+      return fallback
+    }
+    console.log(
+      pc.yellow(
+        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
+      )
+    )
+    return fallback
+  }
+
+  return normalized
+}
--- a/src/cli/run/events.ts
+++ b/src/cli/run/events.ts
@@ -65,6 +65,8 @@ export interface EventState {
  currentTool: string | null
  /** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
  hasReceivedMeaningfulWork: boolean
+  /** Count of assistant messages for the main session */
+  messageCount: number
 }

 export function createEventState(): EventState {
@@ -76,6 +78,7 @@ export function createEventState(): EventState {
    lastPartText: "",
    currentTool: null,
    hasReceivedMeaningfulWork: false,
+    messageCount: 0,
  }
 }

@@ -266,6 +269,7 @@ function handleMessageUpdated(
  if (props?.info?.role !== "assistant") return

  state.hasReceivedMeaningfulWork = true
+  state.messageCount++
 }

 function handleToolExecute(
--- a/src/cli/run/index.ts
+++ b/src/cli/run/index.ts
@@ -1,2 +1,7 @@
 export { run } from "./runner"
-export type { RunOptions, RunContext } from "./types"
+export { resolveRunAgent } from "./agent-resolver"
+export { createServerConnection } from "./server-connection"
+export { resolveSession } from "./session-resolver"
+export { createJsonOutputManager } from "./json-output"
+export { executeOnCompleteHook } from "./on-complete-hook"
+export type { RunOptions, RunContext, RunResult, ServerConnection } from "./types"
--- a/src/cli/run/integration.test.ts
+++ b/src/cli/run/integration.test.ts
@@ -0,0 +1,294 @@
+import { describe, it, expect, mock, spyOn, beforeEach, afterEach } from "bun:test"
+import type { RunResult } from "./types"
+import { createJsonOutputManager } from "./json-output"
+import { resolveSession } from "./session-resolver"
+import { executeOnCompleteHook } from "./on-complete-hook"
+import type { OpencodeClient } from "./types"
+
+const mockServerClose = mock(() => {})
+const mockCreateOpencode = mock(() =>
+  Promise.resolve({
+    client: { session: {} },
+    server: { url: "http://127.0.0.1:9999", close: mockServerClose },
+  })
+)
+const mockCreateOpencodeClient = mock(() => ({ session: {} }))
+const mockIsPortAvailable = mock(() => Promise.resolve(true))
+const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 9999, wasAutoSelected: false }))
+
+mock.module("@opencode-ai/sdk", () => ({
+  createOpencode: mockCreateOpencode,
+  createOpencodeClient: mockCreateOpencodeClient,
+}))
+
+mock.module("../../shared/port-utils", () => ({
+  isPortAvailable: mockIsPortAvailable,
+  getAvailableServerPort: mockGetAvailableServerPort,
+  DEFAULT_SERVER_PORT: 4096,
+}))
+
+const { createServerConnection } = await import("./server-connection")
+
+interface MockWriteStream {
+  write: (chunk: string) => boolean
+  writes: string[]
+}
+
+function createMockWriteStream(): MockWriteStream {
+  const writes: string[] = []
+  return {
+    writes,
+    write: function (this: MockWriteStream, chunk: string): boolean {
+      this.writes.push(chunk)
+      return true
+    },
+  }
+}
+
+const createMockClient = (
+  getResult?: { error?: unknown; data?: { id: string } }
+): OpencodeClient => ({
+  session: {
+    get: mock((opts: { path: { id: string } }) =>
+      Promise.resolve(getResult ?? { data: { id: opts.path.id } })
+    ),
+    create: mock(() => Promise.resolve({ data: { id: "new-session-id" } })),
+  },
+} as unknown as OpencodeClient)
+
+describe("integration: --json mode", () => {
+  it("emits valid RunResult JSON to stdout", () => {
+    // given
+    const mockStdout = createMockWriteStream()
+    const mockStderr = createMockWriteStream()
+    const result: RunResult = {
+      sessionId: "test-session",
+      success: true,
+      durationMs: 1234,
+      messageCount: 42,
+      summary: "Test summary",
+    }
+    const manager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+
+    // when
+    manager.emitResult(result)
+
+    // then
+    expect(mockStdout.writes).toHaveLength(1)
+    const emitted = mockStdout.writes[0]!
+    expect(() => JSON.parse(emitted)).not.toThrow()
+    const parsed = JSON.parse(emitted) as RunResult
+    expect(parsed.sessionId).toBe("test-session")
+    expect(parsed.success).toBe(true)
+    expect(parsed.durationMs).toBe(1234)
+    expect(parsed.messageCount).toBe(42)
+    expect(parsed.summary).toBe("Test summary")
+  })
+
+  it("redirects stdout to stderr when active", () => {
+    // given
+    spyOn(console, "log").mockImplementation(() => {})
+    const mockStdout = createMockWriteStream()
+    const mockStderr = createMockWriteStream()
+    const manager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+    manager.redirectToStderr()
+
+    // when
+    mockStdout.write("should go to stderr")
+
+    // then
+    expect(mockStdout.writes).toHaveLength(0)
+    expect(mockStderr.writes).toEqual(["should go to stderr"])
+  })
+})
+
+describe("integration: --session-id", () => {
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  it("resolves provided session ID without creating new session", async () => {
+    // given
+    const sessionId = "existing-session-id"
+    const mockClient = createMockClient({ data: { id: sessionId } })
+
+    // when
+    const result = await resolveSession({ client: mockClient, sessionId })
+
+    // then
+    expect(result).toBe(sessionId)
+    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("throws when session does not exist", async () => {
+    // given
+    const sessionId = "non-existent-session-id"
+    const mockClient = createMockClient({ error: { message: "Session not found" } })
+
+    // when
+    const result = resolveSession({ client: mockClient, sessionId })
+
+    // then
+    await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+})
+
+describe("integration: --on-complete", () => {
+  let spawnSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    spyOn(console, "error").mockImplementation(() => {})
+    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+      exited: Promise.resolve(0),
+      exitCode: 0,
+    } as unknown as ReturnType<typeof Bun.spawn>)
+  })
+
+  afterEach(() => {
+    spawnSpy.mockRestore()
+  })
+
+  it("passes all 4 env vars as strings to spawned process", async () => {
+    // given
+    spawnSpy.mockClear()
+
+    // when
+    await executeOnCompleteHook({
+      command: "echo test",
+      sessionId: "session-123",
+      exitCode: 0,
+      durationMs: 5000,
+      messageCount: 10,
+    })
+
+    // then
+    expect(spawnSpy).toHaveBeenCalledTimes(1)
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(options?.env?.SESSION_ID).toBe("session-123")
+    expect(options?.env?.EXIT_CODE).toBe("0")
+    expect(options?.env?.DURATION_MS).toBe("5000")
+    expect(options?.env?.MESSAGE_COUNT).toBe("10")
+    expect(options?.env?.SESSION_ID).toBeTypeOf("string")
+    expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
+    expect(options?.env?.DURATION_MS).toBeTypeOf("string")
+    expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
+  })
+})
+
+describe("integration: option combinations", () => {
+  let mockStdout: MockWriteStream
+  let mockStderr: MockWriteStream
+  let spawnSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+    mockStdout = createMockWriteStream()
+    mockStderr = createMockWriteStream()
+    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+      exited: Promise.resolve(0),
+      exitCode: 0,
+    } as unknown as ReturnType<typeof Bun.spawn>)
+  })
+
+  afterEach(() => {
+    spawnSpy?.mockRestore?.()
+  })
+
+  it("json output and on-complete hook can both execute", async () => {
+    // given - json manager active + on-complete hook ready
+    const result: RunResult = {
+      sessionId: "session-123",
+      success: true,
+      durationMs: 5000,
+      messageCount: 10,
+      summary: "Test completed",
+    }
+    const jsonManager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+    jsonManager.redirectToStderr()
+    spawnSpy.mockClear()
+
+    // when - both are invoked sequentially (as runner would)
+    jsonManager.emitResult(result)
+    await executeOnCompleteHook({
+      command: "echo done",
+      sessionId: result.sessionId,
+      exitCode: result.success ? 0 : 1,
+      durationMs: result.durationMs,
+      messageCount: result.messageCount,
+    })
+
+    // then - json emits result AND on-complete hook runs
+    expect(mockStdout.writes).toHaveLength(1)
+    const emitted = mockStdout.writes[0]!
+    expect(() => JSON.parse(emitted)).not.toThrow()
+    expect(spawnSpy).toHaveBeenCalledTimes(1)
+    const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(args).toEqual(["sh", "-c", "echo done"])
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(options?.env?.SESSION_ID).toBe("session-123")
+    expect(options?.env?.EXIT_CODE).toBe("0")
+    expect(options?.env?.DURATION_MS).toBe("5000")
+    expect(options?.env?.MESSAGE_COUNT).toBe("10")
+  })
+})
+
+describe("integration: server connection", () => {
+  let consoleSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    consoleSpy = spyOn(console, "log").mockImplementation(() => {})
+    mockCreateOpencode.mockClear()
+    mockCreateOpencodeClient.mockClear()
+    mockServerClose.mockClear()
+  })
+
+  afterEach(() => {
+    consoleSpy.mockRestore()
+  })
+
+  it("attach mode creates client with no-op cleanup", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+
+    // then
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("port with available port starts server", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 9999
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    expect(mockCreateOpencode).toHaveBeenCalled()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+})
--- a/src/cli/run/json-output.test.ts
+++ b/src/cli/run/json-output.test.ts
@@ -0,0 +1,170 @@
+import { describe, it, expect, beforeEach } from "bun:test"
+import type { RunResult } from "./types"
+import { createJsonOutputManager } from "./json-output"
+
+interface MockWriteStream {
+  write: (chunk: string) => boolean
+  writes: string[]
+}
+
+function createMockWriteStream(): MockWriteStream {
+  const stream: MockWriteStream = {
+    writes: [],
+    write: function (this: MockWriteStream, chunk: string): boolean {
+      this.writes.push(chunk)
+      return true
+    },
+  }
+  return stream
+}
+
+describe("createJsonOutputManager", () => {
+  let mockStdout: MockWriteStream
+  let mockStderr: MockWriteStream
+
+  beforeEach(() => {
+    mockStdout = createMockWriteStream()
+    mockStderr = createMockWriteStream()
+  })
+
+  describe("redirectToStderr", () => {
+    it("causes stdout writes to go to stderr", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      mockStdout.write("test message")
+
+      // then
+      expect(mockStdout.writes).toHaveLength(0)
+      expect(mockStderr.writes).toEqual(["test message"])
+    })
+  })
+
+  describe("restore", () => {
+    it("reverses the redirect", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      manager.restore()
+      mockStdout.write("restored message")
+
+      // then
+      expect(mockStdout.writes).toEqual(["restored message"])
+      expect(mockStderr.writes).toHaveLength(0)
+    })
+  })
+
+  describe("emitResult", () => {
+    it("writes valid JSON to stdout", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 1234,
+        messageCount: 42,
+        summary: "Test summary",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      expect(mockStdout.writes).toHaveLength(1)
+      const emitted = mockStdout.writes[0]!
+      expect(() => JSON.parse(emitted)).not.toThrow()
+    })
+
+    it("output matches RunResult schema", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 1234,
+        messageCount: 42,
+        summary: "Test summary",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      const emitted = mockStdout.writes[0]!
+      const parsed = JSON.parse(emitted) as RunResult
+      expect(parsed).toEqual(result)
+      expect(parsed.sessionId).toBe("test-session")
+      expect(parsed.success).toBe(true)
+      expect(parsed.durationMs).toBe(1234)
+      expect(parsed.messageCount).toBe(42)
+      expect(parsed.summary).toBe("Test summary")
+    })
+
+    it("restores stdout even if redirect was active", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 100,
+        messageCount: 1,
+        summary: "Test",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      expect(mockStdout.writes).toHaveLength(1)
+      expect(mockStdout.writes[0]!).toBe(JSON.stringify(result) + "\n")
+
+      mockStdout.write("after emit")
+      expect(mockStdout.writes).toHaveLength(2)
+      expect(mockStderr.writes).toHaveLength(0)
+    })
+  })
+
+  describe("multiple redirects and restores", () => {
+    it("work correctly", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.redirectToStderr()
+      mockStdout.write("first redirect")
+
+      manager.redirectToStderr()
+      mockStdout.write("second redirect")
+
+      manager.restore()
+      mockStdout.write("after restore")
+
+      // then
+      expect(mockStdout.writes).toEqual(["after restore"])
+      expect(mockStderr.writes).toEqual(["first redirect", "second redirect"])
+    })
+  })
+})
--- a/src/cli/run/json-output.ts
+++ b/src/cli/run/json-output.ts
@@ -0,0 +1,52 @@
+import type { RunResult } from "./types"
+
+export interface JsonOutputManager {
+  redirectToStderr: () => void
+  restore: () => void
+  emitResult: (result: RunResult) => void
+}
+
+interface JsonOutputManagerOptions {
+  stdout?: NodeJS.WriteStream
+  stderr?: NodeJS.WriteStream
+}
+
+export function createJsonOutputManager(
+  options: JsonOutputManagerOptions = {}
+): JsonOutputManager {
+  const stdout = options.stdout ?? process.stdout
+  const stderr = options.stderr ?? process.stderr
+
+  const originalWrite = stdout.write.bind(stdout)
+
+  function redirectToStderr(): void {
+    stdout.write = function (
+      chunk: Uint8Array | string,
+      encodingOrCallback?: BufferEncoding | ((error?: Error | null) => void),
+      callback?: (error?: Error | null) => void
+    ): boolean {
+      if (typeof encodingOrCallback === "function") {
+        return stderr.write(chunk, encodingOrCallback)
+      }
+      if (encodingOrCallback !== undefined) {
+        return stderr.write(chunk, encodingOrCallback, callback)
+      }
+      return stderr.write(chunk)
+    } as NodeJS.WriteStream["write"]
+  }
+
+  function restore(): void {
+    stdout.write = originalWrite
+  }
+
+  function emitResult(result: RunResult): void {
+    restore()
+    originalWrite(JSON.stringify(result) + "\n")
+  }
+
+  return {
+    redirectToStderr,
+    restore,
+    emitResult,
+  }
+}
--- a/src/cli/run/on-complete-hook.test.ts
+++ b/src/cli/run/on-complete-hook.test.ts
@@ -0,0 +1,179 @@
+import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
+import { executeOnCompleteHook } from "./on-complete-hook"
+
+describe("executeOnCompleteHook", () => {
+  function createProc(exitCode: number) {
+    return {
+      exited: Promise.resolve(exitCode),
+      exitCode,
+    } as unknown as ReturnType<typeof Bun.spawn>
+  }
+
+  let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
+
+  beforeEach(() => {
+    consoleErrorSpy = spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  afterEach(() => {
+    consoleErrorSpy.mockRestore()
+  })
+
+  it("executes command with correct env vars", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "echo test",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).toHaveBeenCalledTimes(1)
+      const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+
+      expect(args).toEqual(["sh", "-c", "echo test"])
+      expect(options?.env?.SESSION_ID).toBe("session-123")
+      expect(options?.env?.EXIT_CODE).toBe("0")
+      expect(options?.env?.DURATION_MS).toBe("5000")
+      expect(options?.env?.MESSAGE_COUNT).toBe("10")
+      expect(options?.stdout).toBe("inherit")
+      expect(options?.stderr).toBe("inherit")
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("env var values are strings", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "echo test",
+        sessionId: "session-123",
+        exitCode: 1,
+        durationMs: 12345,
+        messageCount: 42,
+      })
+
+      // then
+      const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+
+      expect(options?.env?.EXIT_CODE).toBe("1")
+      expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
+      expect(options?.env?.DURATION_MS).toBe("12345")
+      expect(options?.env?.DURATION_MS).toBeTypeOf("string")
+      expect(options?.env?.MESSAGE_COUNT).toBe("42")
+      expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("empty command string is no-op", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).not.toHaveBeenCalled()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("whitespace-only command is no-op", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "   ",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).not.toHaveBeenCalled()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("command failure logs warning but does not throw", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
+
+    try {
+      // when
+      await expect(
+        executeOnCompleteHook({
+          command: "false",
+          sessionId: "session-123",
+          exitCode: 0,
+          durationMs: 5000,
+          messageCount: 10,
+        })
+      ).resolves.toBeUndefined()
+
+      // then
+      expect(consoleErrorSpy).toHaveBeenCalled()
+      const warningCall = consoleErrorSpy.mock.calls.find(
+        (call) => typeof call[0] === "string" && call[0].includes("Warning: on-complete hook exited with code 1")
+      )
+      expect(warningCall).toBeDefined()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("spawn error logs warning but does not throw", async () => {
+    // given
+    const spawnError = new Error("Command not found")
+    const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
+      throw spawnError
+    })
+
+    try {
+      // when
+      await expect(
+        executeOnCompleteHook({
+          command: "nonexistent-command",
+          sessionId: "session-123",
+          exitCode: 0,
+          durationMs: 5000,
+          messageCount: 10,
+        })
+      ).resolves.toBeUndefined()
+
+      // then
+      expect(consoleErrorSpy).toHaveBeenCalled()
+      const errorCalls = consoleErrorSpy.mock.calls.filter((call) => {
+        const firstArg = call[0]
+        return typeof firstArg === "string" && (firstArg.includes("Warning") || firstArg.toLowerCase().includes("error"))
+      })
+      expect(errorCalls.length).toBeGreaterThan(0)
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+})
--- a/src/cli/run/on-complete-hook.ts
+++ b/src/cli/run/on-complete-hook.ts
@@ -0,0 +1,42 @@
+import pc from "picocolors"
+
+export async function executeOnCompleteHook(options: {
+  command: string
+  sessionId: string
+  exitCode: number
+  durationMs: number
+  messageCount: number
+}): Promise<void> {
+  const { command, sessionId, exitCode, durationMs, messageCount } = options
+
+  const trimmedCommand = command.trim()
+  if (!trimmedCommand) {
+    return
+  }
+
+  console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
+
+  try {
+    const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
+      env: {
+        ...process.env,
+        SESSION_ID: sessionId,
+        EXIT_CODE: String(exitCode),
+        DURATION_MS: String(durationMs),
+        MESSAGE_COUNT: String(messageCount),
+      },
+      stdout: "inherit",
+      stderr: "inherit",
+    })
+
+    const hookExitCode = await proc.exited
+
+    if (hookExitCode !== 0) {
+      console.error(
+        pc.yellow(`Warning: on-complete hook exited with code ${hookExitCode}`)
+      )
+    }
+  } catch (error) {
+    console.error(pc.yellow(`Warning: Failed to execute on-complete hook: ${error instanceof Error ? error.message : String(error)}`))
+  }
+}
--- a/src/cli/run/runner.ts
+++ b/src/cli/run/runner.ts
@@ -1,100 +1,37 @@
-import { createOpencode } from "@opencode-ai/sdk"
 import pc from "picocolors"
 import type { RunOptions, RunContext } from "./types"
 import { checkCompletionConditions } from "./completion"
 import { createEventState, processEvents, serializeError } from "./events"
-import type { OhMyOpenCodeConfig } from "../../config"
 import { loadPluginConfig } from "../../plugin-config"
+import { createServerConnection } from "./server-connection"
+import { resolveSession } from "./session-resolver"
+import { createJsonOutputManager } from "./json-output"
+import { executeOnCompleteHook } from "./on-complete-hook"
+import { resolveRunAgent } from "./agent-resolver"
+
+export { resolveRunAgent }

 const POLL_INTERVAL_MS = 500
 const DEFAULT_TIMEOUT_MS = 0
-const SESSION_CREATE_MAX_RETRIES = 3
-const SESSION_CREATE_RETRY_DELAY_MS = 1000
-const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
-const DEFAULT_AGENT = "sisyphus"
-
-type EnvVars = Record<string, string | undefined>
-
-const normalizeAgentName = (agent?: string): string | undefined => {
-  if (!agent) return undefined
-  const trimmed = agent.trim()
-  if (!trimmed) return undefined
-  const lowered = trimmed.toLowerCase()
-  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
-  return coreMatch ?? trimmed
-}
-
-const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
-  const lowered = agent.toLowerCase()
-  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
-    return true
-  }
-  return (config.disabled_agents ?? []).some(
-    (disabled) => disabled.toLowerCase() === lowered
-  )
-}
-
-const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
-  for (const agent of CORE_AGENT_ORDER) {
-    if (!isAgentDisabled(agent, config)) {
-      return agent
-    }
-  }
-  return DEFAULT_AGENT
-}
-
-export const resolveRunAgent = (
-  options: RunOptions,
-  pluginConfig: OhMyOpenCodeConfig,
-  env: EnvVars = process.env
-): string => {
-  const cliAgent = normalizeAgentName(options.agent)
-  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
-  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
-  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
-  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
-
-  if (isAgentDisabled(normalized, pluginConfig)) {
-    const fallback = pickFallbackAgent(pluginConfig)
-    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
-    if (fallbackDisabled) {
-      console.log(
-        pc.yellow(
-          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
-        )
-      )
-      return fallback
-    }
-    console.log(
-      pc.yellow(
-        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
-      )
-    )
-    return fallback
-  }
-
-  return normalized
-}

 export async function run(options: RunOptions): Promise<number> {
-  // Set CLI run mode environment variable before any config loading
-  // This signals to config-handler to deny Question tool (no TUI to answer)
  process.env.OPENCODE_CLI_RUN_MODE = "true"

+  const startTime = Date.now()
  const {
    message,
    directory = process.cwd(),
    timeout = DEFAULT_TIMEOUT_MS,
  } = options
+
+  const jsonManager = options.json ? createJsonOutputManager() : null
+  if (jsonManager) jsonManager.redirectToStderr()
+
  const pluginConfig = loadPluginConfig(directory, { command: "run" })
  const resolvedAgent = resolveRunAgent(options, pluginConfig)
-
-  console.log(pc.cyan("Starting opencode server..."))
-
  const abortController = new AbortController()
  let timeoutId: ReturnType<typeof setTimeout> | null = null

-  // timeout=0 means no timeout (run until completion)
  if (timeout > 0) {
    timeoutId = setTimeout(() => {
      console.log(pc.yellow("\nTimeout reached. Aborting..."))
@@ -103,23 +40,15 @@ export async function run(options: RunOptions): Promise<number> {
  }

  try {
-    // Support custom OpenCode server port via environment variable
-    // This allows Open Agent and other orchestrators to run multiple
-    // concurrent missions without port conflicts
-    const serverPort = process.env.OPENCODE_SERVER_PORT
-      ? parseInt(process.env.OPENCODE_SERVER_PORT, 10)
-      : undefined
-    const serverHostname = process.env.OPENCODE_SERVER_HOSTNAME || undefined
-
-    const { client, server } = await createOpencode({
+    const { client, cleanup: serverCleanup } = await createServerConnection({
+      port: options.port,
+      attach: options.attach,
      signal: abortController.signal,
-      ...(serverPort && !isNaN(serverPort) ? { port: serverPort } : {}),
-      ...(serverHostname ? { hostname: serverHostname } : {}),
    })

    const cleanup = () => {
      if (timeoutId) clearTimeout(timeoutId)
-      server.close()
+      serverCleanup()
    }

    process.on("SIGINT", () => {
@@ -129,61 +58,14 @@ export async function run(options: RunOptions): Promise<number> {
    })

    try {
-      // Retry session creation with exponential backoff
-      // Server might not be fully ready even after "listening" message
-      let sessionID: string | undefined
-      let lastError: unknown
-
-      for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
-        const sessionRes = await client.session.create({
-          body: { title: "oh-my-opencode run" },
-        })
-
-        if (sessionRes.error) {
-          lastError = sessionRes.error
-          console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`))
-          console.error(pc.dim(`  Error: ${serializeError(sessionRes.error)}`))
-
-          if (attempt < SESSION_CREATE_MAX_RETRIES) {
-            const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
-            console.log(pc.dim(`  Retrying in ${delay}ms...`))
-            await new Promise((resolve) => setTimeout(resolve, delay))
-            continue
-          }
-        }
-
-        sessionID = sessionRes.data?.id
-        if (sessionID) {
-          break
-        }
-
-        // No error but also no session ID - unexpected response
-        lastError = new Error(`Unexpected response: ${JSON.stringify(sessionRes, null, 2)}`)
-        console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`))
-
-        if (attempt < SESSION_CREATE_MAX_RETRIES) {
-          const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
-          console.log(pc.dim(`  Retrying in ${delay}ms...`))
-          await new Promise((resolve) => setTimeout(resolve, delay))
-        }
-      }
-
-      if (!sessionID) {
-        console.error(pc.red("Failed to create session after all retries"))
-        console.error(pc.dim(`Last error: ${serializeError(lastError)}`))
-        cleanup()
-        return 1
-      }
+      const sessionID = await resolveSession({
+        client,
+        sessionId: options.sessionId,
+      })

      console.log(pc.dim(`Session: ${sessionID}`))

-      const ctx: RunContext = {
-        client,
-        sessionID,
-        directory,
-        abortController,
-      }
-
+      const ctx: RunContext = { client, sessionID, directory, abortController }
      const events = await client.event.subscribe()
      const eventState = createEventState()
      const eventProcessor = processEvents(ctx, events.stream, eventState)
@@ -199,47 +81,41 @@ export async function run(options: RunOptions): Promise<number> {
      })

      console.log(pc.dim("Waiting for completion...\n"))
-
-      while (!abortController.signal.aborted) {
-        await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
-
-        if (!eventState.mainSessionIdle) {
-          continue
-        }
-
-        // Check if session errored - exit with failure if so
-        if (eventState.mainSessionError) {
-          console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
-          console.error(pc.yellow("Check if todos were completed before the error."))
-          cleanup()
-          process.exit(1)
-        }
-
-        // Guard against premature completion: don't check completion until the
-        // session has produced meaningful work (text output, tool call, or tool result).
-        // Without this, a session that goes busy->idle before the LLM responds
-        // would exit immediately because 0 todos + 0 children = "complete".
-        if (!eventState.hasReceivedMeaningfulWork) {
-          continue
-        }
-
-        const shouldExit = await checkCompletionConditions(ctx)
-        if (shouldExit) {
-          console.log(pc.green("\n\nAll tasks completed."))
-          cleanup()
-          process.exit(0)
-        }
-      }
+      const exitCode = await pollForCompletion(ctx, eventState, abortController)

      await eventProcessor.catch(() => {})
      cleanup()
-      return 130
+
+      const durationMs = Date.now() - startTime
+
+      if (options.onComplete) {
+        await executeOnCompleteHook({
+          command: options.onComplete,
+          sessionId: sessionID,
+          exitCode,
+          durationMs,
+          messageCount: eventState.messageCount,
+        })
+      }
+
+      if (jsonManager) {
+        jsonManager.emitResult({
+          sessionId: sessionID,
+          success: exitCode === 0,
+          durationMs,
+          messageCount: eventState.messageCount,
+          summary: eventState.lastPartText.slice(0, 200) || "Run completed",
+        })
+      }
+
+      return exitCode
    } catch (err) {
      cleanup()
      throw err
    }
  } catch (err) {
    if (timeoutId) clearTimeout(timeoutId)
+    if (jsonManager) jsonManager.restore()
    if (err instanceof Error && err.name === "AbortError") {
      return 130
    }
@@ -247,3 +123,31 @@ export async function run(options: RunOptions): Promise<number> {
    return 1
  }
 }
+
+async function pollForCompletion(
+  ctx: RunContext,
+  eventState: ReturnType<typeof createEventState>,
+  abortController: AbortController
+): Promise<number> {
+  while (!abortController.signal.aborted) {
+    await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
+
+    if (!eventState.mainSessionIdle) continue
+
+    if (eventState.mainSessionError) {
+      console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
+      console.error(pc.yellow("Check if todos were completed before the error."))
+      return 1
+    }
+
+    if (!eventState.hasReceivedMeaningfulWork) continue
+
+    const shouldExit = await checkCompletionConditions(ctx)
+    if (shouldExit) {
+      console.log(pc.green("\n\nAll tasks completed."))
+      return 0
+    }
+  }
+
+  return 130
+}
--- a/src/cli/run/server-connection.test.ts
+++ b/src/cli/run/server-connection.test.ts
@@ -0,0 +1,152 @@
+import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test"
+
+const originalConsole = globalThis.console
+
+const mockServerClose = mock(() => {})
+const mockCreateOpencode = mock(() =>
+  Promise.resolve({
+    client: { session: {} },
+    server: { url: "http://127.0.0.1:4096", close: mockServerClose },
+  })
+)
+const mockCreateOpencodeClient = mock(() => ({ session: {} }))
+const mockIsPortAvailable = mock(() => Promise.resolve(true))
+const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 4096, wasAutoSelected: false }))
+const mockConsoleLog = mock(() => {})
+
+mock.module("@opencode-ai/sdk", () => ({
+  createOpencode: mockCreateOpencode,
+  createOpencodeClient: mockCreateOpencodeClient,
+}))
+
+mock.module("../../shared/port-utils", () => ({
+  isPortAvailable: mockIsPortAvailable,
+  getAvailableServerPort: mockGetAvailableServerPort,
+  DEFAULT_SERVER_PORT: 4096,
+}))
+
+const { createServerConnection } = await import("./server-connection")
+
+describe("createServerConnection", () => {
+  beforeEach(() => {
+    mockCreateOpencode.mockClear()
+    mockCreateOpencodeClient.mockClear()
+    mockIsPortAvailable.mockClear()
+    mockGetAvailableServerPort.mockClear()
+    mockServerClose.mockClear()
+    mockConsoleLog.mockClear()
+    globalThis.console = { ...console, log: mockConsoleLog } as typeof console
+  })
+
+  afterEach(() => {
+    globalThis.console = originalConsole
+  })
+
+  it("attach mode returns client with no-op cleanup", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+
+    // then
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("explicit port starts server when port is available", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 8080
+    mockIsPortAvailable.mockResolvedValueOnce(true)
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 8080, hostname: "127.0.0.1" })
+    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+
+  it("explicit port attaches when port is occupied", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 8080
+    mockIsPortAvailable.mockResolvedValueOnce(false)
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockCreateOpencode).not.toHaveBeenCalled()
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: "http://127.0.0.1:8080" })
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("auto mode uses getAvailableServerPort", async () => {
+    // given
+    const signal = new AbortController().signal
+    mockGetAvailableServerPort.mockResolvedValueOnce({ port: 4100, wasAutoSelected: true })
+
+    // when
+    const result = await createServerConnection({ signal })
+
+    // then
+    expect(mockGetAvailableServerPort).toHaveBeenCalledWith(4096, "127.0.0.1")
+    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 4100, hostname: "127.0.0.1" })
+    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+
+  it("invalid port throws error", async () => {
+    // given
+    const signal = new AbortController().signal
+
+    // when & then
+    await expect(createServerConnection({ port: 0, signal })).rejects.toThrow("Port must be between 1 and 65535")
+    await expect(createServerConnection({ port: -1, signal })).rejects.toThrow("Port must be between 1 and 65535")
+    await expect(createServerConnection({ port: 99999, signal })).rejects.toThrow("Port must be between 1 and 65535")
+  })
+
+  it("cleanup calls server.close for owned server", async () => {
+    // given
+    const signal = new AbortController().signal
+    mockIsPortAvailable.mockResolvedValueOnce(true)
+
+    // when
+    const result = await createServerConnection({ port: 8080, signal })
+    result.cleanup()
+
+    // then
+    expect(mockServerClose).toHaveBeenCalledTimes(1)
+  })
+
+  it("cleanup is no-op for attached server", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+    result.cleanup()
+
+    // then
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+})
--- a/src/cli/run/server-connection.ts
+++ b/src/cli/run/server-connection.ts
@@ -0,0 +1,47 @@
+import { createOpencode, createOpencodeClient } from "@opencode-ai/sdk"
+import pc from "picocolors"
+import type { ServerConnection } from "./types"
+import { getAvailableServerPort, isPortAvailable, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
+
+export async function createServerConnection(options: {
+  port?: number
+  attach?: string
+  signal: AbortSignal
+}): Promise<ServerConnection> {
+  const { port, attach, signal } = options
+
+  if (attach !== undefined) {
+    console.log(pc.dim("Attaching to existing server at"), pc.cyan(attach))
+    const client = createOpencodeClient({ baseUrl: attach })
+    return { client, cleanup: () => {} }
+  }
+
+  if (port !== undefined) {
+    if (port < 1 || port > 65535) {
+      throw new Error("Port must be between 1 and 65535")
+    }
+
+    const available = await isPortAvailable(port, "127.0.0.1")
+
+    if (available) {
+      console.log(pc.dim("Starting server on port"), pc.cyan(port.toString()))
+      const { client, server } = await createOpencode({ signal, port, hostname: "127.0.0.1" })
+      console.log(pc.dim("Server listening at"), pc.cyan(server.url))
+      return { client, cleanup: () => server.close() }
+    }
+
+    console.log(pc.dim("Port"), pc.cyan(port.toString()), pc.dim("is occupied, attaching to existing server"))
+    const client = createOpencodeClient({ baseUrl: `http://127.0.0.1:${port}` })
+    return { client, cleanup: () => {} }
+  }
+
+  const { port: selectedPort, wasAutoSelected } = await getAvailableServerPort(DEFAULT_SERVER_PORT, "127.0.0.1")
+  if (wasAutoSelected) {
+    console.log(pc.dim("Auto-selected port"), pc.cyan(selectedPort.toString()))
+  } else {
+    console.log(pc.dim("Starting server on port"), pc.cyan(selectedPort.toString()))
+  }
+  const { client, server } = await createOpencode({ signal, port: selectedPort, hostname: "127.0.0.1" })
+  console.log(pc.dim("Server listening at"), pc.cyan(server.url))
+  return { client, cleanup: () => server.close() }
+}
--- a/src/cli/run/session-resolver.test.ts
+++ b/src/cli/run/session-resolver.test.ts
@@ -0,0 +1,158 @@
+/// <reference types="bun-types" />
+
+import { beforeEach, describe, expect, it, mock, spyOn } from "bun:test";
+import { resolveSession } from "./session-resolver";
+import type { OpencodeClient } from "./types";
+
+const createMockClient = (overrides: {
+  getResult?: { error?: unknown; data?: { id: string } }
+  createResults?: Array<{ error?: unknown; data?: { id: string } }>
+} = {}): OpencodeClient => {
+  const { getResult, createResults = [] } = overrides
+  let createCallIndex = 0
+  return {
+    session: {
+      get: mock((opts: { path: { id: string } }) =>
+        Promise.resolve(getResult ?? { data: { id: opts.path.id } })
+      ),
+      create: mock(() => {
+        const result =
+          createResults[createCallIndex] ?? { data: { id: "new-session-id" } }
+        createCallIndex++
+        return Promise.resolve(result)
+      }),
+    },
+  } as unknown as OpencodeClient
+}
+
+describe("resolveSession", () => {
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  it("returns provided session ID when session exists", async () => {
+    // given
+    const sessionId = "existing-session-id"
+    const mockClient = createMockClient({
+      getResult: { data: { id: sessionId } },
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient, sessionId })
+
+    // then
+    expect(result).toBe(sessionId)
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+    })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("throws error when provided session ID not found", async () => {
+    // given
+    const sessionId = "non-existent-session-id"
+    const mockClient = createMockClient({
+      getResult: { error: { message: "Session not found" } },
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient, sessionId })
+
+    // then
+    await Promise.resolve(
+      expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    )
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+    })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("creates new session when no session ID provided", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [{ data: { id: "new-session-id" } }],
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient })
+
+    // then
+    expect(result).toBe("new-session-id")
+    expect(mockClient.session.create).toHaveBeenCalledWith({
+      body: {
+        title: "oh-my-opencode run",
+        permission: [
+          { permission: "question", action: "deny", pattern: "*" },
+        ],
+      },
+    })
+    expect(mockClient.session.get).not.toHaveBeenCalled()
+  })
+
+  it("retries session creation on failure", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { error: { message: "Network error" } },
+        { data: { id: "retried-session-id" } },
+      ],
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient })
+
+    // then
+    expect(result).toBe("retried-session-id")
+    expect(mockClient.session.create).toHaveBeenCalledTimes(2)
+    expect(mockClient.session.create).toHaveBeenCalledWith({
+      body: {
+        title: "oh-my-opencode run",
+        permission: [
+          { permission: "question", action: "deny", pattern: "*" },
+        ],
+      },
+    })
+  })
+
+  it("throws after all retries exhausted", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { error: { message: "Error 1" } },
+        { error: { message: "Error 2" } },
+        { error: { message: "Error 3" } },
+      ],
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient })
+
+    // then
+    await Promise.resolve(
+      expect(result).rejects.toThrow("Failed to create session after all retries")
+    )
+    expect(mockClient.session.create).toHaveBeenCalledTimes(3)
+  })
+
+  it("session creation returns no ID", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { data: undefined },
+        { data: undefined },
+        { data: undefined },
+      ],
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient })
+
+    // then
+    await Promise.resolve(
+      expect(result).rejects.toThrow("Failed to create session after all retries")
+    )
+    expect(mockClient.session.create).toHaveBeenCalledTimes(3)
+  })
+})
--- a/src/cli/run/session-resolver.ts
+++ b/src/cli/run/session-resolver.ts
@@ -0,0 +1,65 @@
+import pc from "picocolors"
+import type { OpencodeClient } from "./types"
+import { serializeError } from "./events"
+
+const SESSION_CREATE_MAX_RETRIES = 3
+const SESSION_CREATE_RETRY_DELAY_MS = 1000
+
+export async function resolveSession(options: {
+  client: OpencodeClient
+  sessionId?: string
+}): Promise<string> {
+  const { client, sessionId } = options
+
+  if (sessionId) {
+    const res = await client.session.get({ path: { id: sessionId } })
+    if (res.error || !res.data) {
+      throw new Error(`Session not found: ${sessionId}`)
+    }
+    return sessionId
+  }
+
+  for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
+    const res = await client.session.create({
+      body: {
+        title: "oh-my-opencode run",
+        // In CLI run mode there's no TUI to answer questions.
+        permission: [
+          { permission: "question", action: "deny" as const, pattern: "*" },
+        ],
+      } as any,
+    })
+
+    if (res.error) {
+      console.error(
+        pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`)
+      )
+      console.error(pc.dim(`  Error: ${serializeError(res.error)}`))
+
+      if (attempt < SESSION_CREATE_MAX_RETRIES) {
+        const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
+        console.log(pc.dim(`  Retrying in ${delay}ms...`))
+        await new Promise((resolve) => setTimeout(resolve, delay))
+      }
+      continue
+    }
+
+    if (res.data?.id) {
+      return res.data.id
+    }
+
+    console.error(
+      pc.yellow(
+        `Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`
+      )
+    )
+
+    if (attempt < SESSION_CREATE_MAX_RETRIES) {
+      const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
+      console.log(pc.dim(`  Retrying in ${delay}ms...`))
+      await new Promise((resolve) => setTimeout(resolve, delay))
+    }
+  }
+
+  throw new Error("Failed to create session after all retries")
+}
--- a/src/cli/run/types.ts
+++ b/src/cli/run/types.ts
@@ -1,10 +1,29 @@
 import type { OpencodeClient } from "@opencode-ai/sdk"
+export type { OpencodeClient }

 export interface RunOptions {
  message: string
  agent?: string
  directory?: string
  timeout?: number
+  port?: number
+  attach?: string
+  onComplete?: string
+  json?: boolean
+  sessionId?: string
+}
+
+export interface ServerConnection {
+  client: OpencodeClient
+  cleanup: () => void
+}
+
+export interface RunResult {
+  sessionId: string
+  success: boolean
+  durationMs: number
+  messageCount: number
+  summary: string
 }

 export interface RunContext {
--- a/src/config/AGENTS.md
+++ b/src/config/AGENTS.md
@@ -0,0 +1,93 @@
+**Generated:** 2026-02-08T16:45:00+09:00
+**Commit:** f2b7b759
+**Branch:** dev
+
+## OVERVIEW
+
+Zod schema definitions for plugin configuration. 455+ lines of type-safe config validation with JSONC support, multi-level inheritance, and comprehensive agent/category overrides.
+
+## STRUCTURE
+```
+config/
+├── schema.ts              # Main Zod schema (455 lines) - agents, categories, experimental features
+├── schema.test.ts         # Schema validation tests (17909 lines)
+└── index.ts               # Barrel export
+```
+
+## SCHEMA COMPONENTS
+
+**Agent Configuration:**
+- `AgentOverrideConfigSchema`: Model, variant, temperature, permissions, tools
+- `AgentOverridesSchema`: Per-agent overrides (sisyphus, hephaestus, prometheus, etc.)
+- `AgentPermissionSchema`: Tool access control (edit, bash, webfetch, task)
+
+**Category Configuration:**
+- `CategoryConfigSchema`: Model defaults, thinking budgets, tool restrictions
+- `CategoriesConfigSchema`: Named categories (visual-engineering, ultrabrain, deep, etc.)
+
+**Experimental Features:**
+- `ExperimentalConfigSchema`: Dynamic context pruning, task system, plugin timeouts
+- `DynamicContextPruningConfigSchema`: Intelligent context management
+
+**Built-in Enums:**
+- `AgentNameSchema`: sisyphus, hephaestus, prometheus, oracle, librarian, explore, multimodal-looker, metis, momus, atlas
+- `HookNameSchema`: 100+ hook names for lifecycle management
+- `BuiltinCommandNameSchema`: init-deep, ralph-loop, refactor, start-work
+- `BuiltinSkillNameSchema`: playwright, agent-browser, git-master
+
+## CONFIGURATION HIERARCHY
+
+1. **Project config** (`.opencode/oh-my-opencode.json`)
+2. **User config** (`~/.config/opencode/oh-my-opencode.json`)
+3. **Defaults** (hardcoded fallbacks)
+
+**Multi-level inheritance:** Project → User → Defaults
+
+## VALIDATION FEATURES
+
+- **JSONC support**: Comments and trailing commas
+- **Type safety**: Full TypeScript inference
+- **Migration support**: Legacy config compatibility
+- **Schema versioning**: $schema field for validation
+
+## KEY SCHEMAS
+
+| Schema | Purpose | Lines |
+|--------|---------|-------|
+| `OhMyOpenCodeConfigSchema` | Root config schema | 400+ |
+| `AgentOverrideConfigSchema` | Agent customization | 50+ |
+| `CategoryConfigSchema` | Task category defaults | 30+ |
+| `ExperimentalConfigSchema` | Beta features | 40+ |
+
+## USAGE PATTERNS
+
+**Agent Override:**
+```typescript
+agents: {
+  sisyphus: {
+    model: "anthropic/claude-opus-4-6",
+    variant: "max",
+    temperature: 0.1
+  }
+}
+```
+
+**Category Definition:**
+```typescript
+categories: {
+  "visual-engineering": {
+    model: "google/gemini-3-pro",
+    variant: "high"
+  }
+}
+```
+
+**Experimental Features:**
+```typescript
+experimental: {
+  dynamic_context_pruning: {
+    enabled: true,
+    notification: "detailed"
+  }
+}
+```
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -5,6 +5,8 @@ import {
  BrowserAutomationProviderSchema,
  BuiltinCategoryNameSchema,
  CategoryConfigSchema,
+  ExperimentalConfigSchema,
+  GitMasterConfigSchema,
  OhMyOpenCodeConfigSchema,
 } from "./schema"

@@ -606,3 +608,128 @@ describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
    expect(result.data?.browser_automation_engine).toBeUndefined()
  })
 })
+
+describe("ExperimentalConfigSchema feature flags", () => {
+  test("accepts plugin_load_timeout_ms as number", () => {
+    //#given
+    const config = { plugin_load_timeout_ms: 5000 }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.plugin_load_timeout_ms).toBe(5000)
+    }
+  })
+
+  test("rejects plugin_load_timeout_ms below 1000", () => {
+    //#given
+    const config = { plugin_load_timeout_ms: 500 }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
+  test("accepts safe_hook_creation as boolean", () => {
+    //#given
+    const config = { safe_hook_creation: false }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.safe_hook_creation).toBe(false)
+    }
+  })
+
+  test("both fields are optional", () => {
+    //#given
+    const config = {}
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.plugin_load_timeout_ms).toBeUndefined()
+      expect(result.data.safe_hook_creation).toBeUndefined()
+    }
+  })
+})
+
+describe("GitMasterConfigSchema", () => {
+  test("accepts boolean true for commit_footer", () => {
+    //#given
+    const config = { commit_footer: true }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(true)
+    }
+  })
+
+  test("accepts boolean false for commit_footer", () => {
+    //#given
+    const config = { commit_footer: false }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(false)
+    }
+  })
+
+  test("accepts string value for commit_footer", () => {
+    //#given
+    const config = { commit_footer: "Custom footer text" }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe("Custom footer text")
+    }
+  })
+
+  test("defaults commit_footer to true when not provided", () => {
+    //#given
+    const config = {}
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(true)
+    }
+  })
+
+  test("rejects number for commit_footer", () => {
+    //#given
+    const config = { commit_footer: 123 }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+})
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -12,6 +12,7 @@ const AgentPermissionSchema = z.object({
  edit: PermissionValue.optional(),
  bash: BashPermission.optional(),
  webfetch: PermissionValue.optional(),
+  task: PermissionValue.optional(),
  doom_loop: PermissionValue.optional(),
  external_directory: PermissionValue.optional(),
 })
@@ -32,6 +33,7 @@ export const BuiltinAgentNameSchema = z.enum([
 export const BuiltinSkillNameSchema = z.enum([
  "playwright",
  "agent-browser",
+  "dev-browser",
  "frontend-ui-ux",
  "git-master",
 ])
@@ -63,10 +65,12 @@ export const HookNameSchema = z.enum([
  "comment-checker",
  "grep-output-truncator",
  "tool-output-truncator",
+  "question-label-truncator",
  "directory-agents-injector",
  "directory-readme-injector",
  "empty-task-response-detector",
  "think-mode",
+  "subagent-question-blocker",
  "anthropic-context-window-limit-recovery",
  "preemptive-compaction",
  "rules-injector",
@@ -83,6 +87,7 @@ export const HookNameSchema = z.enum([
  "category-skill-reminder",

  "compaction-context-injector",
+  "compaction-todo-preserver",
  "claude-code-hooks",
  "auto-slash-command",
  "edit-error-recovery",
@@ -92,13 +97,22 @@ export const HookNameSchema = z.enum([
  "start-work",
  "atlas",
  "unstable-agent-babysitter",
+  "task-reminder",
+  "task-resume-info",
  "stop-continuation-guard",
  "tasks-todowrite-disabler",
+  "write-existing-file-guard",
+  "anthropic-effort",
 ])

 export const BuiltinCommandNameSchema = z.enum([
  "init-deep",
+  "ralph-loop",
+  "ulw-loop",
+  "cancel-ralph",
+  "refactor",
  "start-work",
+  "stop-continuation",
 ])

 export const AgentOverrideConfigSchema = z.object({
@@ -172,7 +186,7 @@ export const SisyphusAgentConfigSchema = z.object({
 })

 export const CategoryConfigSchema = z.object({
-  /** Human-readable description of the category's purpose. Shown in delegate_task prompt. */
+  /** Human-readable description of the category's purpose. Shown in task prompt. */
  description: z.string().optional(),
  model: z.string().optional(),
  variant: z.string().optional(),
@@ -255,6 +269,10 @@ export const ExperimentalConfigSchema = z.object({
  dynamic_context_pruning: DynamicContextPruningConfigSchema.optional(),
  /** Enable experimental task system for Todowrite disabler hook */
  task_system: z.boolean().optional(),
+  /** Timeout in ms for loadAllPluginComponents during config handler init (default: 10000, min: 1000) */
+  plugin_load_timeout_ms: z.number().min(1000).optional(),
+  /** Wrap hook creation in try/catch to prevent one failing hook from crashing the plugin (default: true at call site) */
+  safe_hook_creation: z.boolean().optional(),
 })

 export const SkillSourceSchema = z.union([
@@ -322,10 +340,10 @@ export const BabysittingConfigSchema = z.object({
 })

 export const GitMasterConfigSchema = z.object({
-  /** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */
-  commit_footer: z.boolean().default(true),
-  /** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
-  include_co_authored_by: z.boolean().default(true),
+	/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true). Can be boolean or custom string. */
+	commit_footer: z.union([z.boolean(), z.string()]).default(true),
+	/** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
+	include_co_authored_by: z.boolean().default(true),
 })

 export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser", "dev-browser"])
@@ -368,8 +386,10 @@ export const TmuxConfigSchema = z.object({
 })

 export const SisyphusTasksConfigSchema = z.object({
-  /** Storage path for tasks (default: .sisyphus/tasks) */
-  storage_path: z.string().default(".sisyphus/tasks"),
+  /** Absolute or relative storage path override. When set, bypasses global config dir. */
+  storage_path: z.string().optional(),
+  /** Force task list ID (alternative to env ULTRAWORK_TASK_LIST_ID) */
+  task_list_id: z.string().optional(),
  /** Enable Claude Code path compatibility mode */
  claude_code_compat: z.boolean().default(false),
 })
@@ -407,6 +427,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
  websearch: WebsearchConfigSchema.optional(),
  tmux: TmuxConfigSchema.optional(),
  sisyphus: SisyphusConfigSchema.optional(),
+  /** Migration history to prevent re-applying migrations (e.g., model version upgrades) */
+  _migrations: z.array(z.string()).optional(),
 })

 export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -2,63 +2,29 @@

 ## OVERVIEW

-20 feature modules: background agents, skill MCPs, builtin skills/commands, Claude Code compatibility layer.
-
-**Feature Types**: Task orchestration, Skill definitions, Command templates, Claude Code loaders, Supporting utilities
+Background agents, skills, Claude Code compat, builtin commands, MCP managers, etc.

 ## STRUCTURE

-```
 features/
-├── background-agent/           # Task lifecycle (1418 lines)
-│   ├── manager.ts              # Launch → poll → complete
-│   └── concurrency.ts          # Per-provider limits
-├── builtin-skills/             # Core skills (1729 lines)
-│   └── skills.ts               # playwright, dev-browser, frontend-ui-ux, git-master, typescript-programmer
-├── builtin-commands/           # ralph-loop, refactor, ulw-loop, init-deep, start-work, cancel-ralph, stop-continuation
-├── claude-code-agent-loader/   # ~/.claude/agents/*.md
-├── claude-code-command-loader/ # ~/.claude/commands/*.md
-├── claude-code-mcp-loader/     # .mcp.json with ${VAR} expansion
-├── claude-code-plugin-loader/  # installed_plugins.json
-├── claude-code-session-state/  # Session persistence
-├── opencode-skill-loader/      # Skills from 6 directories
-├── context-injector/           # AGENTS.md/README.md injection
-├── boulder-state/              # Todo state persistence
-├── hook-message-injector/      # Message injection
-├── task-toast-manager/         # Background task notifications
-├── skill-mcp-manager/          # MCP client lifecycle (617 lines)
-├── tmux-subagent/              # Tmux session management
-├── mcp-oauth/                  # MCP OAuth handling
-├── sisyphus-swarm/             # Swarm coordination
-├── sisyphus-tasks/             # Task tracking
-└── claude-tasks/               # Task schema/storage - see AGENTS.md
-```
+├── background-agent/                      # Task lifecycle, concurrency (manager.ts 1642 lines)
+├── builtin-skills/                       # Skills like git-master (1107 lines)
+├── builtin-commands/                     # Commands like refactor (619 lines)
+├── skill-mcp-manager/                    # MCP client lifecycle (640 lines)
+├── claude-code-plugin-loader/            # Plugin loading
+├── claude-code-mcp-loader/               # MCP loading
+├── claude-code-session-state/            # Session state
+├── claude-code-command-loader/           # Command loading
+├── claude-code-agent-loader/             # Agent loading
+├── context-injector/                     # Context injection
+├── hook-message-injector/                # Message injection
+├── task-toast-manager/                   # Task toasts
+├── boulder-state/                        # State management
+├── tmux-subagent/                        # Tmux subagent
+├── mcp-oauth/                            # OAuth for MCP
+├── opencode-skill-loader/                # Skill loading
+├── tool-metadata-store/                  # Tool metadata

-## LOADER PRIORITY
+## HOW TO ADD

-| Type | Priority (highest first) |
-|------|--------------------------|
-| Commands | `.opencode/command/` > `~/.config/opencode/command/` > `.claude/commands/` |
-| Skills | `.opencode/skills/` > `~/.config/opencode/skills/` > `.claude/skills/` |
-| MCPs | `.claude/.mcp.json` > `.mcp.json` > `~/.claude/.mcp.json` |
-
-## BACKGROUND AGENT
-
- **Lifecycle**: `launch` → `poll` (2s) → `complete`
- **Stability**: 3 consecutive polls = idle
- **Concurrency**: Per-provider/model limits via `ConcurrencyManager`
- **Cleanup**: 30m TTL, 3m stale timeout
- **State**: Per-session Maps, cleaned on `session.deleted`
-
-## SKILL MCP
-
- **Lazy**: Clients created on first call
- **Transports**: stdio, http (SSE/Streamable)
- **Lifecycle**: 5m idle cleanup
-
-## ANTI-PATTERNS
-
- **Sequential delegation**: Use `delegate_task` parallel
- **Trust self-reports**: ALWAYS verify
- **Main thread blocks**: No heavy I/O in loader init
- **Direct state mutation**: Use managers for boulder/session state
+Create dir with index.ts, types.ts, etc.
--- a/src/features/background-agent/concurrency.test.ts
+++ b/src/features/background-agent/concurrency.test.ts
@@ -94,7 +94,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {

    // when
    const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-5")
-    const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-5")
+    const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6")
    const defaultLimit = manager.getConcurrencyLimit("google/gemini-3-pro")

    // then
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -1,8 +1,9 @@
-import { describe, test, expect, beforeEach } from "bun:test"
-import { afterEach } from "bun:test"
+declare const require: (name: string) => any
+const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
 import { tmpdir } from "node:os"
 import type { PluginInput } from "@opencode-ai/plugin"
 import type { BackgroundTask, ResumeInput } from "./types"
+import { MIN_IDLE_TIME_MS } from "./constants"
 import { BackgroundManager } from "./manager"
 import { ConcurrencyManager } from "./concurrency"

@@ -170,6 +171,7 @@ function createBackgroundManager(): BackgroundManager {
  const client = {
    session: {
      prompt: async () => ({}),
+      promptAsync: async () => ({}),
      abort: async () => ({}),
    },
  }
@@ -783,7 +785,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    }
    const currentMessage: CurrentMessage = {
      agent: "sisyphus",
-      model: { providerID: "anthropic", modelID: "claude-opus-4-5" },
+      model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
    }

    // when
@@ -791,7 +793,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>

    // then - uses currentMessage values, not task.parentModel/parentAgent
    expect(promptBody.agent).toBe("sisyphus")
-    expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-5" })
+    expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" })
  })

  test("should fallback to parentAgent when currentMessage.agent is undefined", async () => {
@@ -875,6 +877,94 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
  })
 })

+describe("BackgroundManager.notifyParentSession - aborted parent", () => {
+  test("should skip notification when parent session is aborted", async () => {
+    //#given
+    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      return {}
+    }
+    const client = {
+      session: {
+        prompt: promptMock,
+        promptAsync: promptMock,
+        abort: async () => ({}),
+        messages: async () => {
+          const error = new Error("User aborted")
+          error.name = "MessageAbortedError"
+          throw error
+        },
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    const task: BackgroundTask = {
+      id: "task-aborted-parent",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task aborted parent",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+    }
+    getPendingByParent(manager).set("session-parent", new Set([task.id, "task-remaining"]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
+      .notifyParentSession(task)
+
+    //#then
+    expect(promptCalled).toBe(false)
+
+    manager.shutdown()
+  })
+
+  test("should swallow aborted error from prompt", async () => {
+    //#given
+    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      const error = new Error("User aborted")
+      error.name = "MessageAbortedError"
+      throw error
+    }
+    const client = {
+      session: {
+        prompt: promptMock,
+        promptAsync: promptMock,
+        abort: async () => ({}),
+        messages: async () => ({ data: [] }),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    const task: BackgroundTask = {
+      id: "task-aborted-prompt",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task aborted prompt",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+    }
+    getPendingByParent(manager).set("session-parent", new Set([task.id]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
+      .notifyParentSession(task)
+
+    //#then
+    expect(promptCalled).toBe(true)
+
+    manager.shutdown()
+  })
+})
+
 function buildNotificationPromptBody(
  task: BackgroundTask,
  currentMessage: CurrentMessage | null
@@ -913,7 +1003,7 @@ describe("BackgroundManager.tryCompleteTask", () => {

  test("should release concurrency and clear key on completion", async () => {
    // given
-    const concurrencyKey = "anthropic/claude-opus-4-5"
+    const concurrencyKey = "anthropic/claude-opus-4-6"
    const concurrencyManager = getConcurrencyManager(manager)
    await concurrencyManager.acquire(concurrencyKey)

@@ -942,7 +1032,7 @@ describe("BackgroundManager.tryCompleteTask", () => {

  test("should prevent double completion and double release", async () => {
    // given
-    const concurrencyKey = "anthropic/claude-opus-4-5"
+    const concurrencyKey = "anthropic/claude-opus-4-6"
    const concurrencyManager = getConcurrencyManager(manager)
    await concurrencyManager.acquire(concurrencyKey)

@@ -969,19 +1059,20 @@ describe("BackgroundManager.tryCompleteTask", () => {
    expect(concurrencyManager.getCount(concurrencyKey)).toBe(0)
  })

-  test("should abort session on completion", async () => {
-    // #given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-        messages: async () => ({ data: [] }),
-      },
-    }
+   test("should abort session on completion", async () => {
+     // #given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+         messages: async () => ({ data: [] }),
+       },
+     }
    manager.shutdown()
    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
    stubNotifyParentSession(manager)
@@ -1004,6 +1095,127 @@ describe("BackgroundManager.tryCompleteTask", () => {
    // #then
    expect(abortedSessionIDs).toEqual(["session-1"])
  })
+
+  test("should clean pendingByParent even when notifyParentSession throws", async () => {
+    // given
+    ;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {
+      throw new Error("notify failed")
+    }
+
+    const task: BackgroundTask = {
+      id: "task-pending-cleanup",
+      sessionID: "session-pending-cleanup",
+      parentSessionID: "parent-pending-cleanup",
+      parentMessageID: "msg-1",
+      description: "pending cleanup task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(),
+    }
+    getTaskMap(manager).set(task.id, task)
+    getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
+
+    // when
+    await tryCompleteTaskForTest(manager, task)
+
+    // then
+    expect(task.status).toBe("completed")
+    expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
+  })
+
+  test("should avoid overlapping promptAsync calls when tasks complete concurrently", async () => {
+    // given
+    type PromptAsyncBody = Record<string, unknown> & { noReply?: boolean }
+
+    let resolveMessages: ((value: { data: unknown[] }) => void) | undefined
+    const messagesBarrier = new Promise<{ data: unknown[] }>((resolve) => {
+      resolveMessages = resolve
+    })
+
+    const promptBodies: PromptAsyncBody[] = []
+    let promptInFlight = false
+    let rejectedCount = 0
+    let promptCallCount = 0
+
+    let releaseFirstPrompt: (() => void) | undefined
+    let resolveFirstStarted: (() => void) | undefined
+    const firstStarted = new Promise<void>((resolve) => {
+      resolveFirstStarted = resolve
+    })
+
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        abort: async () => ({}),
+        messages: async () => messagesBarrier,
+        promptAsync: async (args: { path: { id: string }; body: PromptAsyncBody }) => {
+          promptBodies.push(args.body)
+
+          if (!promptInFlight) {
+            promptCallCount += 1
+            if (promptCallCount === 1) {
+              promptInFlight = true
+              resolveFirstStarted?.()
+              return await new Promise((resolve) => {
+                releaseFirstPrompt = () => {
+                  promptInFlight = false
+                  resolve({})
+                }
+              })
+            }
+
+            return {}
+          }
+
+          rejectedCount += 1
+          throw new Error("BUSY")
+        },
+      },
+    }
+
+    manager.shutdown()
+    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+
+    const parentSessionID = "parent-session"
+    const taskA = createMockTask({
+      id: "task-a",
+      sessionID: "session-a",
+      parentSessionID,
+    })
+    const taskB = createMockTask({
+      id: "task-b",
+      sessionID: "session-b",
+      parentSessionID,
+    })
+
+    getTaskMap(manager).set(taskA.id, taskA)
+    getTaskMap(manager).set(taskB.id, taskB)
+    getPendingByParent(manager).set(parentSessionID, new Set([taskA.id, taskB.id]))
+
+    // when
+    const completionA = tryCompleteTaskForTest(manager, taskA)
+    const completionB = tryCompleteTaskForTest(manager, taskB)
+    resolveMessages?.({ data: [] })
+
+    await firstStarted
+
+    // Give the second completion a chance to attempt promptAsync while the first is in-flight.
+    // In the buggy implementation, this triggers an overlap and increments rejectedCount.
+    for (let i = 0; i < 20; i++) {
+      await Promise.resolve()
+      if (rejectedCount > 0) break
+      if (promptBodies.length >= 2) break
+    }
+
+    releaseFirstPrompt?.()
+    await Promise.all([completionA, completionB])
+
+    // then
+    expect(rejectedCount).toBe(0)
+    expect(promptBodies.length).toBe(2)
+    expect(promptBodies.some((b) => b.noReply === false)).toBe(true)
+  })
 })

 describe("BackgroundManager.trackTask", () => {
@@ -1026,7 +1238,7 @@ describe("BackgroundManager.trackTask", () => {
      sessionID: "session-1",
      parentSessionID: "parent-session",
      description: "external task",
-      agent: "delegate_task",
+      agent: "task",
      concurrencyKey: "external-key",
    }

@@ -1061,7 +1273,7 @@ describe("BackgroundManager.resume concurrency key", () => {
      sessionID: "session-1",
      parentSessionID: "parent-session",
      description: "external task",
-      agent: "delegate_task",
+      agent: "task",
      concurrencyKey: "external-key",
    })

@@ -1083,24 +1295,26 @@ describe("BackgroundManager.resume concurrency key", () => {
 })

 describe("BackgroundManager.resume model persistence", () => {
-  let manager: BackgroundManager
-  let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>
+   let manager: BackgroundManager
+   let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>

-  beforeEach(() => {
-    // given
-    promptCalls = []
-    const client = {
-      session: {
-        prompt: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
-          promptCalls.push(args)
-          return {}
-        },
-        abort: async () => ({}),
-      },
-    }
-    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
-    stubNotifyParentSession(manager)
-  })
+   beforeEach(() => {
+     // given
+     promptCalls = []
+     const promptMock = async (args: { path: { id: string }; body: Record<string, unknown> }) => {
+       promptCalls.push(args)
+       return {}
+     }
+     const client = {
+       session: {
+         prompt: promptMock,
+         promptAsync: promptMock,
+         abort: async () => ({}),
+       },
+     }
+     manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+     stubNotifyParentSession(manager)
+   })

  afterEach(() => {
    manager.shutdown()
@@ -1198,19 +1412,20 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
  let manager: BackgroundManager
  let mockClient: ReturnType<typeof createMockClient>

-  function createMockClient() {
-    return {
-      session: {
-        create: async () => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
-        get: async () => ({ data: { directory: "/test/dir" } }),
-        prompt: async () => ({}),
-        messages: async () => ({ data: [] }),
-        todo: async () => ({ data: [] }),
-        status: async () => ({ data: {} }),
-        abort: async () => ({}),
-      },
-    }
-  }
+    function createMockClient() {
+      return {
+        session: {
+          create: async (_args?: any) => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
+          get: async () => ({ data: { directory: "/test/dir" } }),
+          prompt: async () => ({}),
+          promptAsync: async () => ({}),
+          messages: async () => ({ data: [] }),
+         todo: async () => ({ data: [] }),
+         status: async () => ({ data: {} }),
+         abort: async () => ({}),
+       },
+     }
+   }

  beforeEach(() => {
    // given
@@ -1305,6 +1520,55 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
  })

  describe("task transitions pending→running when slot available", () => {
+    test("should inherit parent session permission rules (and force deny question)", async () => {
+      // given
+      const createCalls: any[] = []
+      const parentPermission = [
+        { permission: "question", action: "allow" as const, pattern: "*" },
+        { permission: "plan_enter", action: "deny" as const, pattern: "*" },
+      ]
+
+      const customClient = {
+        session: {
+          create: async (args?: any) => {
+            createCalls.push(args)
+            return { data: { id: `ses_${crypto.randomUUID()}` } }
+          },
+          get: async () => ({ data: { directory: "/test/dir", permission: parentPermission } }),
+          prompt: async () => ({}),
+          promptAsync: async () => ({}),
+          messages: async () => ({ data: [] }),
+          todo: async () => ({ data: [] }),
+          status: async () => ({ data: {} }),
+          abort: async () => ({}),
+        },
+      }
+      manager.shutdown()
+      manager = new BackgroundManager({ client: customClient, directory: tmpdir() } as unknown as PluginInput, {
+        defaultConcurrency: 5,
+      })
+
+      const input = {
+        description: "Test task",
+        prompt: "Do something",
+        agent: "test-agent",
+        parentSessionID: "parent-session",
+        parentMessageID: "parent-message",
+      }
+
+      // when
+      await manager.launch(input)
+      await new Promise(resolve => setTimeout(resolve, 50))
+
+      // then
+      expect(createCalls).toHaveLength(1)
+      const permission = createCalls[0]?.body?.permission
+      expect(permission).toEqual([
+        { permission: "plan_enter", action: "deny", pattern: "*" },
+        { permission: "question", action: "deny", pattern: "*" },
+      ])
+    })
+
    test("should transition first task to running immediately", async () => {
      // given
      const config = { defaultConcurrency: 5 }
@@ -1573,7 +1837,7 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
        description: "Task 1",
        prompt: "Do something",
        agent: "test-agent",
-        model: { providerID: "anthropic", modelID: "claude-opus-4-5" },
+        model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
        parentSessionID: "parent-session",
        parentMessageID: "parent-message",
      }
@@ -1758,13 +2022,14 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
 })

 describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
-  test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })

    const task: BackgroundTask = {
@@ -1790,12 +2055,13 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("running")
  })

-  test("should NOT interrupt task with recent lastUpdate", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
+   test("should NOT interrupt task with recent lastUpdate", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
    }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })

@@ -1822,11 +2088,12 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("running")
  })

-  test("should interrupt task with stale lastUpdate (> 3min)", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
+   test("should interrupt task with stale lastUpdate (> 3min)", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
      },
    }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
@@ -1858,10 +2125,11 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.completedAt).toBeDefined()
  })

-  test("should respect custom staleTimeoutMs config", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
+   test("should respect custom staleTimeoutMs config", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
        abort: async () => ({}),
      },
    }
@@ -1892,13 +2160,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.error).toContain("Stale timeout")
  })

-  test("should release concurrency before abort", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should release concurrency before abort", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
    stubNotifyParentSession(manager)

@@ -1927,13 +2196,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("cancelled")
  })

-  test("should handle multiple stale tasks in same poll cycle", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should handle multiple stale tasks in same poll cycle", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
    stubNotifyParentSession(manager)

@@ -1978,13 +2248,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task2.status).toBe("cancelled")
  })

-  test("should use default timeout when config not provided", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should use default timeout when config not provided", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
    stubNotifyParentSession(manager)

@@ -2013,18 +2284,19 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
 })

 describe("BackgroundManager.shutdown session abort", () => {
-  test("should call session.abort for all running tasks during shutdown", () => {
-    // given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-      },
-    }
+   test("should call session.abort for all running tasks during shutdown", () => {
+     // given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)

    const task1: BackgroundTask = {
@@ -2062,18 +2334,19 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(abortedSessionIDs).toHaveLength(2)
  })

-  test("should not call session.abort for completed or cancelled tasks", () => {
-    // given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-      },
-    }
+   test("should not call session.abort for completed or cancelled tasks", () => {
+     // given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)

    const completedTask: BackgroundTask = {
@@ -2122,15 +2395,16 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(abortedSessionIDs).toHaveLength(0)
  })

-  test("should call onShutdown callback during shutdown", () => {
-    // given
-    let shutdownCalled = false
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should call onShutdown callback during shutdown", () => {
+     // given
+     let shutdownCalled = false
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager(
      { client, directory: tmpdir() } as unknown as PluginInput,
      undefined,
@@ -2148,14 +2422,15 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(shutdownCalled).toBe(true)
  })

-  test("should not throw when onShutdown callback throws", () => {
-    // given
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should not throw when onShutdown callback throws", () => {
+     // given
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager(
      { client, directory: tmpdir() } as unknown as PluginInput,
      undefined,
@@ -2171,6 +2446,69 @@ describe("BackgroundManager.shutdown session abort", () => {
  })
 })

+describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
+  test("should cancel descendant tasks when parent session is deleted", () => {
+    // given
+    const manager = createBackgroundManager()
+    const parentSessionID = "session-parent"
+    const childTask = createMockTask({
+      id: "task-child",
+      sessionID: "session-child",
+      parentSessionID,
+      status: "running",
+    })
+    const siblingTask = createMockTask({
+      id: "task-sibling",
+      sessionID: "session-sibling",
+      parentSessionID,
+      status: "running",
+    })
+    const grandchildTask = createMockTask({
+      id: "task-grandchild",
+      sessionID: "session-grandchild",
+      parentSessionID: "session-child",
+      status: "pending",
+      startedAt: undefined,
+      queuedAt: new Date(),
+    })
+    const unrelatedTask = createMockTask({
+      id: "task-unrelated",
+      sessionID: "session-unrelated",
+      parentSessionID: "other-parent",
+      status: "running",
+    })
+
+    const taskMap = getTaskMap(manager)
+    taskMap.set(childTask.id, childTask)
+    taskMap.set(siblingTask.id, siblingTask)
+    taskMap.set(grandchildTask.id, grandchildTask)
+    taskMap.set(unrelatedTask.id, unrelatedTask)
+
+    const pendingByParent = getPendingByParent(manager)
+    pendingByParent.set(parentSessionID, new Set([childTask.id, siblingTask.id]))
+    pendingByParent.set("session-child", new Set([grandchildTask.id]))
+
+    // when
+    manager.handleEvent({
+      type: "session.deleted",
+      properties: { info: { id: parentSessionID } },
+    })
+
+    // then
+    expect(taskMap.has(childTask.id)).toBe(false)
+    expect(taskMap.has(siblingTask.id)).toBe(false)
+    expect(taskMap.has(grandchildTask.id)).toBe(false)
+    expect(taskMap.has(unrelatedTask.id)).toBe(true)
+    expect(childTask.status).toBe("cancelled")
+    expect(siblingTask.status).toBe("cancelled")
+    expect(grandchildTask.status).toBe("cancelled")
+    expect(pendingByParent.get(parentSessionID)).toBeUndefined()
+    expect(pendingByParent.get("session-child")).toBeUndefined()
+
+    manager.shutdown()
+  })
+})
+
 describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
  function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
    return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
@@ -2324,3 +2662,182 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
    expect(completionTimers.size).toBe(0)
  })
 })
+
+describe("BackgroundManager.handleEvent - early session.idle deferral", () => {
+  test("should defer and retry when session.idle fires before MIN_IDLE_TIME_MS", async () => {
+    //#given - a running task started less than MIN_IDLE_TIME_MS ago
+    const sessionID = "session-early-idle"
+    const messagesCalls: string[] = []
+    const realDateNow = Date.now
+    const baseNow = realDateNow()
+
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async (args: { path: { id: string } }) => {
+           messagesCalls.push(args.path.id)
+           return {
+             data: [
+               {
+                 info: { role: "assistant" },
+                 parts: [{ type: "text", text: "ok" }],
+               },
+             ],
+          }
+        },
+        todo: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const remainingMs = 1200
+    const task: BackgroundTask = {
+      id: "task-early-idle",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "early idle task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(baseNow),
+    }
+
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - session.idle fires
+    try {
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
+      manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+
+      // Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
+
+      //#then - idle should be deferred (not dropped), and task should eventually complete
+      expect(task.status).toBe("running")
+      await new Promise((resolve) => setTimeout(resolve, 220))
+      expect(task.status).toBe("completed")
+      expect(messagesCalls).toEqual([sessionID])
+    } finally {
+      Date.now = realDateNow
+      manager.shutdown()
+    }
+  })
+
+  test("should not defer when session.idle fires after MIN_IDLE_TIME_MS", async () => {
+     //#given - a running task started more than MIN_IDLE_TIME_MS ago
+     const sessionID = "session-late-idle"
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async () => ({
+           data: [
+             {
+               info: { role: "assistant" },
+               parts: [{ type: "text", text: "ok" }],
+             },
+           ],
+         }),
+         todo: async () => ({ data: [] }),
+       },
+     }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const task: BackgroundTask = {
+      id: "task-late-idle",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "late idle task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 10)),
+    }
+
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+
+    //#then - should be processed immediately
+    await new Promise((resolve) => setTimeout(resolve, 10))
+    expect(task.status).toBe("completed")
+
+    manager.shutdown()
+  })
+
+  test("should not process deferred idle if task already completed by other means", async () => {
+    //#given - a running task
+    const sessionID = "session-deferred-noop"
+    let messagesCallCount = 0
+    const realDateNow = Date.now
+    const baseNow = realDateNow()
+
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async () => {
+           messagesCallCount += 1
+           return {
+             data: [
+               {
+                 info: { role: "assistant" },
+                 parts: [{ type: "text", text: "ok" }],
+               },
+             ],
+           }
+        },
+        todo: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const remainingMs = 120
+    const task: BackgroundTask = {
+      id: "task-deferred-noop",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "deferred noop task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(baseNow),
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - session.idle fires early, then task completes via another path before defer timer
+    try {
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
+      manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+      expect(messagesCallCount).toBe(0)
+
+      await tryCompleteTaskForTest(manager, task)
+      expect(task.status).toBe("completed")
+
+      // Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
+
+      //#then - deferred callback should be a no-op
+      await new Promise((resolve) => setTimeout(resolve, remainingMs + 80))
+      expect(task.status).toBe("completed")
+      expect(messagesCallCount).toBe(0)
+    } finally {
+      Date.now = realDateNow
+      manager.shutdown()
+    }
+  })
+})
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -88,6 +88,8 @@ export class BackgroundManager {
  private queuesByKey: Map<string, QueueItem[]> = new Map()
  private processingKeys: Set<string> = new Set()
  private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
+  private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
+  private notificationQueueByParent: Map<string, Promise<void>> = new Map()

  constructor(
    ctx: PluginInput,
@@ -234,13 +236,17 @@ export class BackgroundManager {
    const parentDirectory = parentSession?.data?.directory ?? this.directory
    log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)

+    const inheritedPermission = (parentSession as any)?.data?.permission
+    const permissionRules = Array.isArray(inheritedPermission)
+      ? inheritedPermission.filter((r: any) => r?.permission !== "question")
+      : []
+    permissionRules.push({ permission: "question", action: "deny" as const, pattern: "*" })
+
    const createResult = await this.client.session.create({
      body: {
        parentID: input.parentSessionID,
        title: `${input.description} (@${input.agent} subagent)`,
-        permission: [
-          { permission: "question", action: "deny" as const, pattern: "*" },
-        ],
+        permission: permissionRules,
      } as any,
      query: {
        directory: parentDirectory,
@@ -309,7 +315,7 @@ export class BackgroundManager {
      promptLength: input.prompt.length,
    })

-    // Use prompt() instead of promptAsync() to properly initialize agent loop (fire-and-forget)
+    // Fire-and-forget prompt via promptAsync (no response body needed)
    // Include model if caller provided one (e.g., from Sisyphus category configs)
    // IMPORTANT: variant must be a top-level field in the body, NOT nested inside model
    // OpenCode's PromptInput schema expects: { model: { providerID, modelID }, variant: "max" }
@@ -328,7 +334,6 @@ export class BackgroundManager {
        tools: {
          ...getAgentToolRestrictions(input.agent),
          task: false,
-          delegate_task: false,
          call_omo_agent: true,
          question: false,
        },
@@ -357,7 +362,8 @@ export class BackgroundManager {
        }).catch(() => {})

        this.markForNotification(existingTask)
-        this.notifyParentSession(existingTask).catch(err => {
+        this.cleanupPendingByParent(existingTask)
+        this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => {
          log("[background-agent] Failed to notify on error:", err)
        })
      }
@@ -410,7 +416,7 @@ export class BackgroundManager {
  }

  /**
-   * Track a task created elsewhere (e.g., from delegate_task) for notification tracking.
+   * Track a task created elsewhere (e.g., from task) for notification tracking.
   * This allows tasks created by other tools to receive the same toast/prompt notifications.
   */
  async trackTask(input: {
@@ -458,7 +464,7 @@ export class BackgroundManager {
      return existingTask
    }

-    const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "delegate_task"
+    const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "task"

    // Acquire concurrency slot if a key is provided
    if (input.concurrencyKey) {
@@ -472,7 +478,7 @@ export class BackgroundManager {
      parentMessageID: "",
      description: input.description,
      prompt: "",
-      agent: input.agent || "delegate_task",
+      agent: input.agent || "task",
      status: "running",
      startedAt: new Date(),
      progress: {
@@ -570,7 +576,7 @@ export class BackgroundManager {
      promptLength: input.prompt.length,
    })

-    // Use prompt() instead of promptAsync() to properly initialize agent loop
+    // Fire-and-forget prompt via promptAsync (no response body needed)
    // Include model if task has one (preserved from original launch with category config)
    // variant must be top-level in body, not nested inside model (OpenCode PromptInput schema)
    const resumeModel = existingTask.model
@@ -578,7 +584,7 @@ export class BackgroundManager {
      : undefined
    const resumeVariant = existingTask.model?.variant

-    this.client.session.prompt({
+    this.client.session.promptAsync({
      path: { id: existingTask.sessionID },
      body: {
        agent: existingTask.agent,
@@ -587,7 +593,6 @@ export class BackgroundManager {
        tools: {
          ...getAgentToolRestrictions(existingTask.agent),
          task: false,
-          delegate_task: false,
          call_omo_agent: true,
          question: false,
        },
@@ -614,7 +619,8 @@ export class BackgroundManager {
      }

      this.markForNotification(existingTask)
-      this.notifyParentSession(existingTask).catch(err => {
+      this.cleanupPendingByParent(existingTask)
+      this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => {
        log("[background-agent] Failed to notify on resume error:", err)
      })
    })
@@ -651,6 +657,13 @@ export class BackgroundManager {
      const task = this.findBySession(sessionID)
      if (!task) return

+      // Clear any pending idle deferral timer since the task is still active
+      const existingTimer = this.idleDeferralTimers.get(task.id)
+      if (existingTimer) {
+        clearTimeout(existingTimer)
+        this.idleDeferralTimers.delete(task.id)
+      }
+
      if (partInfo?.type === "tool" || partInfo?.tool) {
        if (!task.progress) {
          task.progress = {
@@ -677,7 +690,17 @@ export class BackgroundManager {
      // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
      const elapsedMs = Date.now() - startedAt.getTime()
      if (elapsedMs < MIN_IDLE_TIME_MS) {
-        log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
+        const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
+        if (!this.idleDeferralTimers.has(task.id)) {
+          log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
+          const timer = setTimeout(() => {
+            this.idleDeferralTimers.delete(task.id)
+            this.handleEvent({ type: "session.idle", properties: { sessionID } })
+          }, remainingMs)
+          this.idleDeferralTimers.set(task.id, timer)
+        } else {
+          log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
+        }
        return
      }

@@ -718,28 +741,47 @@ export class BackgroundManager {
      if (!info || typeof info.id !== "string") return
      const sessionID = info.id

-      const task = this.findBySession(sessionID)
-      if (!task) return
-
-      if (task.status === "running") {
-        task.status = "cancelled"
-        task.completedAt = new Date()
-        task.error = "Session deleted"
+      const tasksToCancel = new Map<string, BackgroundTask>()
+      const directTask = this.findBySession(sessionID)
+      if (directTask) {
+        tasksToCancel.set(directTask.id, directTask)
+      }
+      for (const descendant of this.getAllDescendantTasks(sessionID)) {
+        tasksToCancel.set(descendant.id, descendant)
      }

-       if (task.concurrencyKey) {
-         this.concurrencyManager.release(task.concurrencyKey)
-         task.concurrencyKey = undefined
-       }
-      const existingTimer = this.completionTimers.get(task.id)
-      if (existingTimer) {
-        clearTimeout(existingTimer)
-        this.completionTimers.delete(task.id)
+      if (tasksToCancel.size === 0) return
+
+      for (const task of tasksToCancel.values()) {
+        if (task.status === "running" || task.status === "pending") {
+          void this.cancelTask(task.id, {
+            source: "session.deleted",
+            reason: "Session deleted",
+            skipNotification: true,
+          }).catch(err => {
+            log("[background-agent] Failed to cancel task on session.deleted:", { taskId: task.id, error: err })
+          })
+        }
+
+        const existingTimer = this.completionTimers.get(task.id)
+        if (existingTimer) {
+          clearTimeout(existingTimer)
+          this.completionTimers.delete(task.id)
+        }
+
+        const idleTimer = this.idleDeferralTimers.get(task.id)
+        if (idleTimer) {
+          clearTimeout(idleTimer)
+          this.idleDeferralTimers.delete(task.id)
+        }
+
+        this.cleanupPendingByParent(task)
+        this.tasks.delete(task.id)
+        this.clearNotificationsForTask(task.id)
+        if (task.sessionID) {
+          subagentSessions.delete(task.sessionID)
+        }
      }
-      this.cleanupPendingByParent(task)
-      this.tasks.delete(task.id)
-      this.clearNotificationsForTask(task.id)
-      subagentSessions.delete(sessionID)
    }
  }

@@ -890,6 +932,12 @@ export class BackgroundManager {
      this.completionTimers.delete(task.id)
    }

+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
    this.cleanupPendingByParent(task)

    if (abortSession && task.sessionID) {
@@ -906,7 +954,7 @@ export class BackgroundManager {
    this.markForNotification(task)

    try {
-      await this.notifyParentSession(task)
+      await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
      log(`[background-agent] Task cancelled via ${source}:`, task.id)
    } catch (err) {
      log("[background-agent] Error in notifyParentSession for cancelled task:", { taskId: task.id, error: err })
@@ -1025,6 +1073,15 @@ export class BackgroundManager {

    this.markForNotification(task)

+    // Ensure pending tracking is cleaned up even if notification fails
+    this.cleanupPendingByParent(task)
+
+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
    if (task.sessionID) {
      this.client.session.abort({
        path: { id: task.sessionID },
@@ -1032,7 +1089,7 @@ export class BackgroundManager {
    }

    try {
-      await this.notifyParentSession(task)
+      await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
      log(`[background-agent] Task completed via ${source}:`, task.id)
    } catch (err) {
      log("[background-agent] Error in notifyParentSession:", { taskId: task.id, error: err })
@@ -1062,16 +1119,19 @@ export class BackgroundManager {

    // Update pending tracking and check if all tasks complete
    const pendingSet = this.pendingByParent.get(task.parentSessionID)
+    let allComplete = false
+    let remainingCount = 0
    if (pendingSet) {
      pendingSet.delete(task.id)
-      if (pendingSet.size === 0) {
+      remainingCount = pendingSet.size
+      allComplete = remainingCount === 0
+      if (allComplete) {
        this.pendingByParent.delete(task.parentSessionID)
      }
+    } else {
+      allComplete = true
    }

-    const allComplete = !pendingSet || pendingSet.size === 0
-    const remainingCount = pendingSet?.size ?? 0
-
    const statusText = task.status === "completed" ? "COMPLETED" : "CANCELLED"
    const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
    
@@ -1123,7 +1183,14 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          break
        }
      }
-    } catch {
+    } catch (error) {
+      if (this.isAbortedSessionError(error)) {
+        log("[background-agent] Parent session aborted, skipping notification:", {
+          taskId: task.id,
+          parentSessionID: task.parentSessionID,
+        })
+        return
+      }
      const messageDir = getMessageDir(task.parentSessionID)
      const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
      agent = currentMessage?.agent ?? task.parentAgent
@@ -1139,7 +1206,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    })

    try {
-      await this.client.session.prompt({
+      await this.client.session.promptAsync({
        path: { id: task.parentSessionID },
        body: {
          noReply: !allComplete,
@@ -1154,6 +1221,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
        noReply: !allComplete,
      })
    } catch (error) {
+      if (this.isAbortedSessionError(error)) {
+        log("[background-agent] Parent session aborted, skipping notification:", {
+          taskId: task.id,
+          parentSessionID: task.parentSessionID,
+        })
+        return
+      }
      log("[background-agent] Failed to send notification:", error)
    }

@@ -1192,6 +1266,28 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    return `${seconds}s`
  }

+  private isAbortedSessionError(error: unknown): boolean {
+    const message = this.getErrorText(error)
+    return message.toLowerCase().includes("aborted")
+  }
+
+  private getErrorText(error: unknown): string {
+    if (!error) return ""
+    if (typeof error === "string") return error
+    if (error instanceof Error) {
+      return `${error.name}: ${error.message}`
+    }
+    if (typeof error === "object" && error !== null) {
+      if ("message" in error && typeof error.message === "string") {
+        return error.message
+      }
+      if ("name" in error && typeof error.name === "string") {
+        return error.name
+      }
+    }
+    return ""
+  }
+
  private hasRunningTasks(): boolean {
    for (const task of this.tasks.values()) {
      if (task.status === "running") return true
@@ -1290,7 +1386,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      log(`[background-agent] Task ${task.id} interrupted: stale timeout`)

      try {
-        await this.notifyParentSession(task)
+        await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
      } catch (err) {
        log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err })
      }
@@ -1475,16 +1571,46 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    }
    this.completionTimers.clear()

+    for (const timer of this.idleDeferralTimers.values()) {
+      clearTimeout(timer)
+    }
+    this.idleDeferralTimers.clear()
+
    this.concurrencyManager.clear()
    this.tasks.clear()
    this.notifications.clear()
    this.pendingByParent.clear()
+    this.notificationQueueByParent.clear()
    this.queuesByKey.clear()
    this.processingKeys.clear()
    this.unregisterProcessCleanup()
    log("[background-agent] Shutdown complete")

  }
+
+  private enqueueNotificationForParent(
+    parentSessionID: string | undefined,
+    operation: () => Promise<void>
+  ): Promise<void> {
+    if (!parentSessionID) {
+      return operation()
+    }
+
+    const previous = this.notificationQueueByParent.get(parentSessionID) ?? Promise.resolve()
+    const current = previous
+      .catch(() => {})
+      .then(operation)
+
+    this.notificationQueueByParent.set(parentSessionID, current)
+
+    void current.finally(() => {
+      if (this.notificationQueueByParent.get(parentSessionID) === current) {
+        this.notificationQueueByParent.delete(parentSessionID)
+      }
+    }).catch(() => {})
+
+    return current
+  }
 }

 function registerProcessSignal(
--- a/src/features/background-agent/result-handler.ts
+++ b/src/features/background-agent/result-handler.ts
@@ -240,7 +240,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
  })

  try {
-    await client.session.prompt({
+    await client.session.promptAsync({
      path: { id: task.parentSessionID },
      body: {
        noReply: !allComplete,
--- a/src/features/background-agent/spawner.test.ts
+++ b/src/features/background-agent/spawner.test.ts
@@ -0,0 +1,65 @@
+import { describe, test, expect } from "bun:test"
+
+import { createTask, startTask } from "./spawner"
+
+describe("background-agent spawner.startTask", () => {
+  test("should inherit parent session permission rules (and force deny question)", async () => {
+    //#given
+    const createCalls: any[] = []
+    const parentPermission = [
+      { permission: "question", action: "allow" as const, pattern: "*" },
+      { permission: "plan_enter", action: "deny" as const, pattern: "*" },
+    ]
+
+    const client = {
+      session: {
+        get: async () => ({ data: { directory: "/parent/dir", permission: parentPermission } }),
+        create: async (args?: any) => {
+          createCalls.push(args)
+          return { data: { id: "ses_child" } }
+        },
+        promptAsync: async () => ({}),
+      },
+    }
+
+    const task = createTask({
+      description: "Test task",
+      prompt: "Do work",
+      agent: "explore",
+      parentSessionID: "ses_parent",
+      parentMessageID: "msg_parent",
+    })
+
+    const item = {
+      task,
+      input: {
+        description: task.description,
+        prompt: task.prompt,
+        agent: task.agent,
+        parentSessionID: task.parentSessionID,
+        parentMessageID: task.parentMessageID,
+        parentModel: task.parentModel,
+        parentAgent: task.parentAgent,
+        model: task.model,
+      },
+    }
+
+    const ctx = {
+      client,
+      directory: "/fallback",
+      concurrencyManager: { release: () => {} },
+      tmuxEnabled: false,
+      onTaskError: () => {},
+    }
+
+    //#when
+    await startTask(item as any, ctx as any)
+
+    //#then
+    expect(createCalls).toHaveLength(1)
+    expect(createCalls[0]?.body?.permission).toEqual([
+      { permission: "plan_enter", action: "deny", pattern: "*" },
+      { permission: "question", action: "deny", pattern: "*" },
+    ])
+  })
+})
--- a/src/features/background-agent/spawner.ts
+++ b/src/features/background-agent/spawner.ts
@@ -58,13 +58,17 @@ export async function startTask(
  const parentDirectory = parentSession?.data?.directory ?? directory
  log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)

+  const inheritedPermission = (parentSession as any)?.data?.permission
+  const permissionRules = Array.isArray(inheritedPermission)
+    ? inheritedPermission.filter((r: any) => r?.permission !== "question")
+    : []
+  permissionRules.push({ permission: "question", action: "deny" as const, pattern: "*" })
+
  const createResult = await client.session.create({
    body: {
      parentID: input.parentSessionID,
      title: `Background: ${input.description}`,
-      permission: [
-        { permission: "question", action: "deny" as const, pattern: "*" },
-      ],
+      permission: permissionRules,
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    } as any,
    query: {
@@ -146,7 +150,6 @@ export async function startTask(
      tools: {
        ...getAgentToolRestrictions(input.agent),
        task: false,
-        delegate_task: false,
        call_omo_agent: true,
        question: false,
      },
@@ -222,7 +225,7 @@ export async function resumeTask(
    : undefined
  const resumeVariant = task.model?.variant

-  client.session.prompt({
+  client.session.promptAsync({
    path: { id: task.sessionID },
    body: {
      agent: task.agent,
@@ -231,7 +234,6 @@ export async function resumeTask(
      tools: {
        ...getAgentToolRestrictions(task.agent),
        task: false,
-        delegate_task: false,
        call_omo_agent: true,
        question: false,
      },
--- a/src/features/background-agent/spawner/background-session-creator.ts
+++ b/src/features/background-agent/spawner/background-session-creator.ts
@@ -0,0 +1,46 @@
+import type { OpencodeClient } from "../constants"
+import type { ConcurrencyManager } from "../concurrency"
+import type { LaunchInput } from "../types"
+import { log } from "../../../shared"
+
+export async function createBackgroundSession(options: {
+  client: OpencodeClient
+  input: LaunchInput
+  parentDirectory: string
+  concurrencyManager: ConcurrencyManager
+  concurrencyKey: string
+}): Promise<string> {
+  const { client, input, parentDirectory, concurrencyManager, concurrencyKey } = options
+
+  const body = {
+    parentID: input.parentSessionID,
+    title: `Background: ${input.description}`,
+    permission: [{ permission: "question", action: "deny" as const, pattern: "*" }],
+  }
+
+  const createResult = await client.session
+    .create({
+      body,
+      query: {
+        directory: parentDirectory,
+      },
+    })
+    .catch((error) => {
+      concurrencyManager.release(concurrencyKey)
+      throw error
+    })
+
+  if (createResult.error) {
+    concurrencyManager.release(concurrencyKey)
+    throw new Error(`Failed to create background session: ${createResult.error}`)
+  }
+
+  if (!createResult.data?.id) {
+    concurrencyManager.release(concurrencyKey)
+    throw new Error("Failed to create background session: API returned no session ID")
+  }
+
+  const sessionID = createResult.data.id
+  log("[background-agent] Background session created", { sessionID })
+  return sessionID
+}
--- a/src/features/background-agent/spawner/concurrency-key-from-launch-input.ts
+++ b/src/features/background-agent/spawner/concurrency-key-from-launch-input.ts
@@ -0,0 +1,7 @@
+import type { LaunchInput } from "../types"
+
+export function getConcurrencyKeyFromLaunchInput(input: LaunchInput): string {
+  return input.model
+    ? `${input.model.providerID}/${input.model.modelID}`
+    : input.agent
+}
--- a/src/features/background-agent/spawner/parent-directory-resolver.ts
+++ b/src/features/background-agent/spawner/parent-directory-resolver.ts
@@ -0,0 +1,21 @@
+import type { OpencodeClient } from "../constants"
+import { log } from "../../../shared"
+
+export async function resolveParentDirectory(options: {
+  client: OpencodeClient
+  parentSessionID: string
+  defaultDirectory: string
+}): Promise<string> {
+  const { client, parentSessionID, defaultDirectory } = options
+
+  const parentSession = await client.session
+    .get({ path: { id: parentSessionID } })
+    .catch((error) => {
+      log(`[background-agent] Failed to get parent session: ${error}`)
+      return null
+    })
+
+  const parentDirectory = parentSession?.data?.directory ?? defaultDirectory
+  log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)
+  return parentDirectory
+}
--- a/src/features/background-agent/spawner/tmux-callback-invoker.ts
+++ b/src/features/background-agent/spawner/tmux-callback-invoker.ts
@@ -0,0 +1,39 @@
+import type { OnSubagentSessionCreated } from "../constants"
+import { TMUX_CALLBACK_DELAY_MS } from "../constants"
+import { log } from "../../../shared"
+import { isInsideTmux } from "../../../shared/tmux"
+
+export async function maybeInvokeTmuxCallback(options: {
+  onSubagentSessionCreated?: OnSubagentSessionCreated
+  tmuxEnabled: boolean
+  sessionID: string
+  parentID: string
+  title: string
+}): Promise<void> {
+  const { onSubagentSessionCreated, tmuxEnabled, sessionID, parentID, title } = options
+
+  log("[background-agent] tmux callback check", {
+    hasCallback: !!onSubagentSessionCreated,
+    tmuxEnabled,
+    isInsideTmux: isInsideTmux(),
+    sessionID,
+    parentID,
+  })
+
+  if (!onSubagentSessionCreated || !tmuxEnabled || !isInsideTmux()) {
+    log("[background-agent] SKIP tmux callback - conditions not met")
+    return
+  }
+
+  log("[background-agent] Invoking tmux callback NOW", { sessionID })
+  await onSubagentSessionCreated({
+    sessionID,
+    parentID,
+    title,
+  }).catch((error) => {
+    log("[background-agent] Failed to spawn tmux pane:", error)
+  })
+
+  log("[background-agent] tmux callback completed, waiting")
+  await new Promise<void>((resolve) => setTimeout(resolve, TMUX_CALLBACK_DELAY_MS))
+}
--- a/src/features/boulder-state/storage.test.ts
+++ b/src/features/boulder-state/storage.test.ts
@@ -246,5 +246,33 @@ describe("boulder-state", () => {
      expect(state.plan_name).toBe("auth-refactor")
      expect(state.started_at).toBeDefined()
    })
+
+    test("should include agent field when provided", () => {
+      //#given - plan path, session id, and agent type
+      const planPath = "/path/to/feature.md"
+      const sessionId = "ses-xyz789"
+      const agent = "atlas"
+
+      //#when - createBoulderState is called with agent
+      const state = createBoulderState(planPath, sessionId, agent)
+
+      //#then - state should include the agent field
+      expect(state.agent).toBe("atlas")
+      expect(state.active_plan).toBe(planPath)
+      expect(state.session_ids).toEqual([sessionId])
+      expect(state.plan_name).toBe("feature")
+    })
+
+    test("should allow agent to be undefined", () => {
+      //#given - plan path and session id without agent
+      const planPath = "/path/to/legacy.md"
+      const sessionId = "ses-legacy"
+
+      //#when - createBoulderState is called without agent
+      const state = createBoulderState(planPath, sessionId)
+
+      //#then - state should not have agent field (backward compatible)
+      expect(state.agent).toBeUndefined()
+    })
  })
 })
--- a/src/features/boulder-state/storage.ts
+++ b/src/features/boulder-state/storage.ts
@@ -139,12 +139,14 @@ export function getPlanName(planPath: string): string {
 */
 export function createBoulderState(
  planPath: string,
-  sessionId: string
+  sessionId: string,
+  agent?: string
 ): BoulderState {
  return {
    active_plan: planPath,
    started_at: new Date().toISOString(),
    session_ids: [sessionId],
    plan_name: getPlanName(planPath),
+    ...(agent !== undefined ? { agent } : {}),
  }
 }
--- a/src/features/boulder-state/types.ts
+++ b/src/features/boulder-state/types.ts
@@ -14,6 +14,8 @@ export interface BoulderState {
  session_ids: string[]
  /** Plan name derived from filename */
  plan_name: string
+  /** Agent type to use when resuming (e.g., 'atlas') */
+  agent?: string
 }

 export interface PlanProgress {
--- a/src/features/builtin-commands/commands.test.ts
+++ b/src/features/builtin-commands/commands.test.ts
@@ -0,0 +1,138 @@
+import { describe, test, expect } from "bun:test"
+import { loadBuiltinCommands } from "./commands"
+import { HANDOFF_TEMPLATE } from "./templates/handoff"
+import type { BuiltinCommandName } from "./types"
+
+describe("loadBuiltinCommands", () => {
+  test("should include handoff command in loaded commands", () => {
+    //#given
+    const disabledCommands: BuiltinCommandName[] = []
+
+    //#when
+    const commands = loadBuiltinCommands(disabledCommands)
+
+    //#then
+    expect(commands.handoff).toBeDefined()
+    expect(commands.handoff.name).toBe("handoff")
+  })
+
+  test("should exclude handoff when disabled", () => {
+    //#given
+    const disabledCommands: BuiltinCommandName[] = ["handoff"]
+
+    //#when
+    const commands = loadBuiltinCommands(disabledCommands)
+
+    //#then
+    expect(commands.handoff).toBeUndefined()
+  })
+
+  test("should include handoff template content in command template", () => {
+    //#given - no disabled commands
+
+    //#when
+    const commands = loadBuiltinCommands()
+
+    //#then
+    expect(commands.handoff.template).toContain(HANDOFF_TEMPLATE)
+  })
+
+  test("should include session context variables in handoff template", () => {
+    //#given - no disabled commands
+
+    //#when
+    const commands = loadBuiltinCommands()
+
+    //#then
+    expect(commands.handoff.template).toContain("$SESSION_ID")
+    expect(commands.handoff.template).toContain("$TIMESTAMP")
+    expect(commands.handoff.template).toContain("$ARGUMENTS")
+  })
+
+  test("should have correct description for handoff", () => {
+    //#given - no disabled commands
+
+    //#when
+    const commands = loadBuiltinCommands()
+
+    //#then
+    expect(commands.handoff.description).toContain("context summary")
+  })
+})
+
+describe("HANDOFF_TEMPLATE", () => {
+  test("should include session reading instruction", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("session_read")
+  })
+
+  test("should include compaction-style sections in output format", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("USER REQUESTS (AS-IS)")
+    expect(HANDOFF_TEMPLATE).toContain("EXPLICIT CONSTRAINTS")
+  })
+
+  test("should include programmatic context gathering instructions", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("todoread")
+    expect(HANDOFF_TEMPLATE).toContain("git diff")
+    expect(HANDOFF_TEMPLATE).toContain("git status")
+  })
+
+  test("should include context extraction format", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("WORK COMPLETED")
+    expect(HANDOFF_TEMPLATE).toContain("CURRENT STATE")
+    expect(HANDOFF_TEMPLATE).toContain("PENDING TASKS")
+    expect(HANDOFF_TEMPLATE).toContain("KEY FILES")
+    expect(HANDOFF_TEMPLATE).toContain("IMPORTANT DECISIONS")
+    expect(HANDOFF_TEMPLATE).toContain("CONTEXT FOR CONTINUATION")
+    expect(HANDOFF_TEMPLATE).toContain("GOAL")
+  })
+
+  test("should enforce first person perspective", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("first person perspective")
+  })
+
+  test("should limit key files to 10", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("Maximum 10 files")
+  })
+
+  test("should instruct plain text format without markdown", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("Plain text with bullets")
+    expect(HANDOFF_TEMPLATE).toContain("No markdown headers")
+  })
+
+  test("should include user instructions for new session", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("new session")
+    expect(HANDOFF_TEMPLATE).toContain("opencode")
+  })
+
+  test("should not contain emojis", () => {
+    //#given - the template string
+
+    //#when / #then
+    const emojiRegex = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2702}-\u{27B0}\u{24C2}-\u{1F251}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/u
+    expect(emojiRegex.test(HANDOFF_TEMPLATE)).toBe(false)
+  })
+})
--- a/src/features/builtin-commands/commands.ts
+++ b/src/features/builtin-commands/commands.ts
@@ -5,6 +5,7 @@ import { RALPH_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-lo
 import { STOP_CONTINUATION_TEMPLATE } from "./templates/stop-continuation"
 import { REFACTOR_TEMPLATE } from "./templates/refactor"
 import { START_WORK_TEMPLATE } from "./templates/start-work"
+import { HANDOFF_TEMPLATE } from "./templates/handoff"

 const BUILTIN_COMMAND_DEFINITIONS: Record<BuiltinCommandName, Omit<CommandDefinition, "name">> = {
  "init-deep": {
@@ -77,6 +78,22 @@ $ARGUMENTS
 ${STOP_CONTINUATION_TEMPLATE}
 </command-instruction>`,
  },
+  handoff: {
+    description: "(builtin) Create a detailed context summary for continuing work in a new session",
+    template: `<command-instruction>
+${HANDOFF_TEMPLATE}
+</command-instruction>
+
+<session-context>
+Session ID: $SESSION_ID
+Timestamp: $TIMESTAMP
+</session-context>
+
+<user-request>
+$ARGUMENTS
+</user-request>`,
+    argumentHint: "[goal]",
+  },
 }

 export function loadBuiltinCommands(
--- a/src/features/builtin-commands/templates/handoff.ts
+++ b/src/features/builtin-commands/templates/handoff.ts
@@ -0,0 +1,177 @@
+export const HANDOFF_TEMPLATE = `# Handoff Command
+
+## Purpose
+
+Use /handoff when:
+- The current session context is getting too long and quality is degrading
+- You want to start fresh while preserving essential context from this session
+- The context window is approaching capacity
+
+This creates a detailed context summary that can be used to continue work in a new session.
+
+---
+
+# PHASE 0: VALIDATE REQUEST
+
+Before proceeding, confirm:
+- [ ] There is meaningful work or context in this session to preserve
+- [ ] The user wants to create a handoff summary (not just asking about it)
+
+If the session is nearly empty or has no meaningful context, inform the user there is nothing substantial to hand off.
+
+---
+
+# PHASE 1: GATHER PROGRAMMATIC CONTEXT
+
+Execute these tools to gather concrete data:
+
+1. session_read({ session_id: "$SESSION_ID" }) — full session history
+2. todoread() — current task progress
+3. Bash({ command: "git diff --stat HEAD~10..HEAD" }) — recent file changes
+4. Bash({ command: "git status --porcelain" }) — uncommitted changes
+
+Suggested execution order:
+
+\`\`\`
+session_read({ session_id: "$SESSION_ID" })
+todoread()
+Bash({ command: "git diff --stat HEAD~10..HEAD" })
+Bash({ command: "git status --porcelain" })
+\`\`\`
+
+Analyze the gathered outputs to understand:
+- What the user asked for (exact wording)
+- What work was completed
+- What tasks remain incomplete (include todo state)
+- What decisions were made
+- What files were modified or discussed (include git diff/stat + status)
+- What patterns, constraints, or preferences were established
+
+---
+
+# PHASE 2: EXTRACT CONTEXT
+
+Write the context summary from first person perspective ("I did...", "I told you...").
+
+Focus on:
+- Capabilities and behavior, not file-by-file implementation details
+- What matters for continuing the work
+- Avoiding excessive implementation details (variable names, storage keys, constants) unless critical
+- USER REQUESTS (AS-IS) must be verbatim (do not paraphrase)
+- EXPLICIT CONSTRAINTS must be verbatim only (do not invent)
+
+Questions to consider when extracting:
+- What did I just do or implement?
+- What instructions did I already give which are still relevant (e.g. follow patterns in the codebase)?
+- What files did I tell you are important or that I am working on?
+- Did I provide a plan or spec that should be included?
+- What did I already tell you that is important (libraries, patterns, constraints, preferences)?
+- What important technical details did I discover (APIs, methods, patterns)?
+- What caveats, limitations, or open questions did I find?
+
+---
+
+# PHASE 3: FORMAT OUTPUT
+
+Generate a handoff summary using this exact format:
+
+\`\`\`
+HANDOFF CONTEXT
+===============
+
+USER REQUESTS (AS-IS)
+---------------------
+- [Exact verbatim user requests - NOT paraphrased]
+
+GOAL
+----
+[One sentence describing what should be done next]
+
+WORK COMPLETED
+--------------
+- [First person bullet points of what was done]
+- [Include specific file paths when relevant]
+- [Note key implementation decisions]
+
+CURRENT STATE
+-------------
+- [Current state of the codebase or task]
+- [Build/test status if applicable]
+- [Any environment or configuration state]
+
+PENDING TASKS
+-------------
+- [Tasks that were planned but not completed]
+- [Next logical steps to take]
+- [Any blockers or issues encountered]
+- [Include current todo state from todoread()]
+
+KEY FILES
+---------
+- [path/to/file1] - [brief role description]
+- [path/to/file2] - [brief role description]
+(Maximum 10 files, prioritized by importance)
+- (Include files from git diff/stat and git status)
+
+IMPORTANT DECISIONS
+-------------------
+- [Technical decisions that were made and why]
+- [Trade-offs that were considered]
+- [Patterns or conventions established]
+
+EXPLICIT CONSTRAINTS
+--------------------
+- [Verbatim constraints only - from user or existing AGENTS.md]
+- If none, write: None
+
+CONTEXT FOR CONTINUATION
+------------------------
+- [What the next session needs to know to continue]
+- [Warnings or gotchas to be aware of]
+- [References to documentation if relevant]
+\`\`\`
+
+Rules for the summary:
+- Plain text with bullets
+- No markdown headers with # (use the format above with dashes)
+- No bold, italic, or code fences within content
+- Use workspace-relative paths for files
+- Keep it focused - only include what matters for continuation
+- Pick an appropriate length based on complexity
+- USER REQUESTS (AS-IS) and EXPLICIT CONSTRAINTS must be verbatim only
+
+---
+
+# PHASE 4: PROVIDE INSTRUCTIONS
+
+After generating the summary, instruct the user:
+
+\`\`\`
+---
+
+TO CONTINUE IN A NEW SESSION:
+
+1. Press 'n' in OpenCode TUI to open a new session, or run 'opencode' in a new terminal
+2. Paste the HANDOFF CONTEXT above as your first message
+3. Add your request: "Continue from the handoff context above. [Your next task]"
+
+The new session will have all context needed to continue seamlessly.
+\`\`\`
+
+---
+
+# IMPORTANT CONSTRAINTS
+
+- DO NOT attempt to programmatically create new sessions (no API available to agents)
+- DO provide a self-contained summary that works without access to this session
+- DO include workspace-relative file paths
+- DO NOT include sensitive information (API keys, credentials, secrets)
+- DO NOT exceed 10 files in the KEY FILES section
+- DO keep the GOAL section to a single sentence or short paragraph
+
+---
+
+# EXECUTE NOW
+
+Begin by gathering programmatic context, then synthesize the handoff summary.
+`
--- a/src/features/builtin-commands/templates/init-deep.ts
+++ b/src/features/builtin-commands/templates/init-deep.ts
@@ -45,12 +45,12 @@ Don't wait—these run async while main session works.

 \`\`\`
 // Fire all at once, collect results later
-delegate_task(agent="explore", prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
-delegate_task(agent="explore", prompt="Entry points: FIND main files → REPORT non-standard organization")
-delegate_task(agent="explore", prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
-delegate_task(agent="explore", prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
-delegate_task(agent="explore", prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
-delegate_task(agent="explore", prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
+task(subagent_type="explore", load_skills=[], description="Explore project structure", run_in_background=true, prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
+task(subagent_type="explore", load_skills=[], description="Find entry points", run_in_background=true, prompt="Entry points: FIND main files → REPORT non-standard organization")
+task(subagent_type="explore", load_skills=[], description="Find conventions", run_in_background=true, prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
+task(subagent_type="explore", load_skills=[], description="Find anti-patterns", run_in_background=true, prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
+task(subagent_type="explore", load_skills=[], description="Explore build/CI", run_in_background=true, prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
+task(subagent_type="explore", load_skills=[], description="Find test patterns", run_in_background=true, prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
 \`\`\`

 <dynamic-agents>
@@ -76,9 +76,9 @@ max_depth=$(find . -type d -not -path '*/node_modules/*' -not -path '*/.git/*' |
 Example spawning:
 \`\`\`
 // 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents
-delegate_task(agent="explore", prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
-delegate_task(agent="explore", prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
-delegate_task(agent="explore", prompt="Cross-cutting concerns: FIND shared utilities across directories")
+task(subagent_type="explore", load_skills=[], description="Analyze large files", run_in_background=true, prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
+task(subagent_type="explore", load_skills=[], description="Explore deep modules", run_in_background=true, prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
+task(subagent_type="explore", load_skills=[], description="Find shared utilities", run_in_background=true, prompt="Cross-cutting concerns: FIND shared utilities across directories")
 // ... more based on calculation
 \`\`\`
 </dynamic-agents>
@@ -185,6 +185,11 @@ AGENTS_LOCATIONS = [

 **Mark "generate" as in_progress.**

+<critical>
+**File Writing Rule**: If AGENTS.md already exists at the target path → use \`Edit\` tool. If it does NOT exist → use \`Write\` tool.
+NEVER use Write to overwrite an existing file. ALWAYS check existence first via \`Read\` or discovery results.
+</critical>
+
 ### Root AGENTS.md (Full Treatment)

 \`\`\`markdown
@@ -240,7 +245,7 @@ Launch writing tasks for each location:

 \`\`\`
 for loc in AGENTS_LOCATIONS (except root):
-  delegate_task(category="writing", load_skills=[], run_in_background=false, prompt=\\\`
+  task(category="writing", load_skills=[], run_in_background=false, description="Generate AGENTS.md", prompt=\\\`
    Generate AGENTS.md for: \${loc.path}
    - Reason: \${loc.reason}
    - 30-80 lines max
--- a/src/features/builtin-commands/types.ts
+++ b/src/features/builtin-commands/types.ts
@@ -1,6 +1,6 @@
 import type { CommandDefinition } from "../claude-code-command-loader"

-export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation"
+export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" | "handoff"

 export interface BuiltinCommandConfig {
  disabled_commands?: BuiltinCommandName[]
--- a/src/features/builtin-skills/git-master/SKILL.md
+++ b/src/features/builtin-skills/git-master/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: git-master
-description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
+description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
 ---

 # Git Master Agent
--- a/src/features/builtin-skills/skills/git-master.ts
+++ b/src/features/builtin-skills/skills/git-master.ts
@@ -3,7 +3,7 @@ import type { BuiltinSkill } from "../types"
 export const gitMasterSkill: BuiltinSkill = {
  name: "git-master",
  description:
-    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
+    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
  template: `# Git Master Agent

 You are a Git expert combining three specializations:
--- a/Show More
+++ b/Show More