fix: convert executeSyncTask to async prompt + polling pattern

Oracle agent (and all sync subagent tasks) fails with JSON Parse error in ACP environments because session.prompt() (blocking HTTP) returns empty/incomplete responses. Replace promptSyncWithModelSuggestionRetry with promptWithModelSuggestionRetry (async, fire-and-forget) and add polling loop to wait for response stability, matching the proven pattern from executeUnstableAgentTask. Fixes #1681
@mrm007 has signed the CLA in code-yeongyu/oh-my-opencode#1680
2026-02-09 10:03:54 +09:00 · 2026-02-08 21:41:45 +00:00 · 2026-02-08 17:12:45 +00:00 · 2026-02-08 16:02:43 +00:00 · 2026-02-08 15:44:17 +00:00 · 2026-02-08 20:00:52 +09:00
241 changed files with 13629 additions and 4709 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Dependencies
-.sisyphus/
+.sisyphus/*
+!.sisyphus/rules/
 node_modules/

 # Build output
--- a/.opencode/command/remove-deadcode.md
+++ b/.opencode/command/remove-deadcode.md
@@ -41,27 +41,27 @@ Fire ALL simultaneously:

 ```
 // Agent 1: Find all exported symbols
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
  List each with: file path, line number, symbol name, export type (named/default).
  EXCLUDE: src/index.ts root exports, test files.
  Return as structured list.")

 // Agent 2: Find potentially unused files
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find files in src/ that are NOT imported by any other file.
  Check import/require statements across the entire codebase.
  EXCLUDE: index.ts files, test files, entry points, config files, .md files.
  Return list of potentially orphaned files.")

 // Agent 3: Find unused imports within files
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find unused imports across src/**/*.ts files.
  Look for import statements where the imported symbol is never referenced in the file body.
  Return: file path, line number, imported symbol name.")

 // Agent 4: Find functions/variables only used in their own declaration
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
  to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
 ```
--- a/.opencode/skills/github-issue-triage/SKILL.md
+++ b/.opencode/skills/github-issue-triage/SKILL.md
@@ -21,7 +21,7 @@ You are a GitHub issue triage automation agent. Your job is to:

 | Aspect | Rule |
 |--------|------|
-| **Task Granularity** | 1 Issue = Exactly 1 `delegate_task()` call |
+| **Task Granularity** | 1 Issue = Exactly 1 `task()` call |
 | **Execution Mode** | `run_in_background=true` (Each issue runs independently) |
 | **Result Handling** | `background_output()` to collect results as they complete |
 | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -67,7 +67,7 @@ for (let i = 0; i < allIssues.length; i++) {
  const issue = allIssues[i]
  const category = getCategory(i)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← CRITICAL: Each issue is independent background task
@@ -195,7 +195,7 @@ for (let i = 0; i < allIssues.length; i++) {
  
  console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← BACKGROUND TASK: Each issue runs independently
@@ -480,7 +480,7 @@ When invoked, immediately:
 4. Exhaustive pagination for issues
 5. Exhaustive pagination for PRs
 6. **LAUNCH**: For each issue:
-   - `delegate_task(run_in_background=true)` - 1 task per issue
+   - `task(run_in_background=true)` - 1 task per issue
   - Store taskId mapped to issue number
 7. **STREAM**: Poll `background_output()` for each task:
   - As each completes, immediately report result
--- a/.opencode/skills/github-pr-triage/SKILL.md
+++ b/.opencode/skills/github-pr-triage/SKILL.md
@@ -22,7 +22,7 @@ You are a GitHub Pull Request triage automation agent. Your job is to:

 | Aspect | Rule |
 |--------|------|
-| **Task Granularity** | 1 PR = Exactly 1 `delegate_task()` call |
+| **Task Granularity** | 1 PR = Exactly 1 `task()` call |
 | **Execution Mode** | `run_in_background=true` (Each PR runs independently) |
 | **Result Handling** | `background_output()` to collect results as they complete |
 | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -68,7 +68,7 @@ for (let i = 0; i < allPRs.length; i++) {
  const pr = allPRs[i]
  const category = getCategory(i)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← CRITICAL: Each PR is independent background task
@@ -178,7 +178,7 @@ for (let i = 0; i < allPRs.length; i++) {
  
  console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← BACKGROUND TASK: Each PR runs independently
@@ -474,7 +474,7 @@ When invoked, immediately:
 2. `gh repo view --json nameWithOwner -q .nameWithOwner`
 3. Exhaustive pagination for ALL open PRs
 4. **LAUNCH**: For each PR:
-   - `delegate_task(run_in_background=true)` - 1 task per PR
+   - `task(run_in_background=true)` - 1 task per PR
   - Store taskId mapped to PR number
 5. **STREAM**: Poll `background_output()` for each task:
   - As each completes, immediately report result
--- a/.sisyphus/rules/modular-code-enforcement.md
+++ b/.sisyphus/rules/modular-code-enforcement.md
@@ -0,0 +1,117 @@
+---
+globs: ["**/*.ts", "**/*.tsx"]
+alwaysApply: false
+description: "Enforces strict modular code architecture: SRP, no monolithic index.ts, 200 LOC hard limit"
+---
+
+<MANDATORY_ARCHITECTURE_RULE severity="BLOCKING" priority="HIGHEST">
+
+# Modular Code Architecture — Zero Tolerance Policy
+
+This rule is NON-NEGOTIABLE. Violations BLOCK all further work until resolved.
+
+## Rule 1: index.ts is an ENTRY POINT, NOT a dumping ground
+
+`index.ts` files MUST ONLY contain:
+- Re-exports (`export { ... } from "./module"`)
+- Factory function calls that compose modules
+- Top-level wiring/registration (hook registration, plugin setup)
+
+`index.ts` MUST NEVER contain:
+- Business logic implementation
+- Helper/utility functions
+- Type definitions beyond simple re-exports
+- Multiple unrelated responsibilities mixed together
+
+**If you find mixed logic in index.ts**: Extract each responsibility into its own dedicated file BEFORE making any other changes. This is not optional.
+
+## Rule 2: No Catch-All Files — utils.ts / service.ts are CODE SMELLS
+
+A single `utils.ts`, `helpers.ts`, `service.ts`, or `common.ts` is a **gravity well** — every unrelated function gets tossed in, and it grows into an untestable, unreviewable blob.
+
+**These file names are BANNED as top-level catch-alls.** Instead:
+
+| Anti-Pattern | Refactor To |
+|--------------|-------------|
+| `utils.ts` with `formatDate()`, `slugify()`, `retry()` | `date-formatter.ts`, `slugify.ts`, `retry.ts` |
+| `service.ts` handling auth + billing + notifications | `auth-service.ts`, `billing-service.ts`, `notification-service.ts` |
+| `helpers.ts` with 15 unrelated exports | One file per logical domain |
+
+**Design for reusability from the start.** Each module should be:
+- **Independently importable** — no consumer should need to pull in unrelated code
+- **Self-contained** — its dependencies are explicit, not buried in a shared grab-bag
+- **Nameable by purpose** — the filename alone tells you what it does
+
+If you catch yourself typing `utils.ts` or `service.ts`, STOP and name the file after what it actually does.
+
+## Rule 3: Single Responsibility Principle — ABSOLUTE
+
+Every `.ts` file MUST have exactly ONE clear, nameable responsibility.
+
+**Self-test**: If you cannot describe the file's purpose in ONE short phrase (e.g., "parses YAML frontmatter", "matches rules against file paths"), the file does too much. Split it.
+
+| Signal | Action |
+|--------|--------|
+| File has 2+ unrelated exported functions | **SPLIT NOW** — each into its own module |
+| File mixes I/O with pure logic | **SPLIT NOW** — separate side effects from computation |
+| File has both types and implementation | **SPLIT NOW** — types.ts + implementation.ts |
+| You need to scroll to understand the file | **SPLIT NOW** — it's too large |
+
+## Rule 4: 200 LOC Hard Limit — CODE SMELL DETECTOR
+
+Any `.ts`/`.tsx` file exceeding **200 lines of code** (excluding prompt strings, template literals containing prompts, and `.md` content) is an **immediate code smell**.
+
+**When you detect a file > 200 LOC**:
+1. **STOP** current work
+2. **Identify** the multiple responsibilities hiding in the file
+3. **Extract** each responsibility into a focused module
+4. **Verify** each resulting file is < 200 LOC and has a single purpose
+5. **Resume** original work
+
+Prompt-heavy files (agent definitions, skill definitions) where the bulk of content is template literal prompt text are EXEMPT from the LOC count — but their non-prompt logic must still be < 200 LOC.
+
+### How to Count LOC
+
+**Count these** (= actual logic):
+- Import statements
+- Variable/constant declarations
+- Function/class/interface/type definitions
+- Control flow (`if`, `for`, `while`, `switch`, `try/catch`)
+- Expressions, assignments, return statements
+- Closing braces `}` that belong to logic blocks
+
+**Exclude these** (= not logic):
+- Blank lines
+- Comment-only lines (`//`, `/* */`, `/** */`)
+- Lines inside template literals that are prompt/instruction text (e.g., the string body of `` const prompt = `...` ``)
+- Lines inside multi-line strings used as documentation/prompt content
+
+**Quick method**: Read the file → subtract blank lines, comment-only lines, and prompt string content → remaining count = LOC.
+
+**Example**:
+```typescript
+// 1  import { foo } from "./foo";          ← COUNT
+// 2                                         ← SKIP (blank)
+// 3  // Helper for bar                      ← SKIP (comment)
+// 4  export function bar(x: number) {       ← COUNT
+// 5    const prompt = `                     ← COUNT (declaration)
+// 6      You are an assistant.              ← SKIP (prompt text)
+// 7      Follow these rules:                ← SKIP (prompt text)
+// 8    `;                                   ← COUNT (closing)
+// 9    return process(prompt, x);           ← COUNT
+// 10 }                                      ← COUNT
+```
+→ LOC = **5** (lines 1, 4, 5, 9, 10). Not 10.
+
+When in doubt, **round up** — err on the side of splitting.
+
+## How to Apply
+
+When reading, writing, or editing ANY `.ts`/`.tsx` file:
+
+1. **Check the file you're touching** — does it violate any rule above?
+2. **If YES** — refactor FIRST, then proceed with your task
+3. **If creating a new file** — ensure it has exactly one responsibility and stays under 200 LOC
+4. **If adding code to an existing file** — verify the addition doesn't push the file past 200 LOC or add a second responsibility. If it does, extract into a new module.
+
+</MANDATORY_ARCHITECTURE_RULE>
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,7 +1,7 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-02-06T18:30:00+09:00
-**Commit:** c6c149e
+**Generated:** 2026-02-08T16:45:00+09:00
+**Commit:** edee865f
 **Branch:** dev

 ---
@@ -135,8 +135,8 @@ oh-my-opencode/
 │   ├── cli/              # CLI installer, doctor - see src/cli/AGENTS.md
 │   ├── mcp/              # Built-in MCPs - see src/mcp/AGENTS.md
 │   ├── config/           # Zod schema (schema.ts 455 lines), TypeScript types
-│   ├── plugin-handlers/  # Plugin config loading (config-handler.ts 501 lines)
-│   ├── index.ts          # Main plugin entry (924 lines)
+│   ├── plugin-handlers/  # Plugin config loading (config-handler.ts 562 lines)
+│   ├── index.ts          # Main plugin entry (999 lines)
 │   ├── plugin-config.ts  # Config loading orchestration
 │   └── plugin-state.ts   # Model cache state
 ├── script/               # build-schema.ts, build-binaries.ts, publish.ts
@@ -170,7 +170,7 @@ oh-my-opencode/
 **Rules:**
 - NEVER write implementation before test
 - NEVER delete failing tests - fix the code
- Test file: `*.test.ts` alongside source (100+ test files)
+- Test file: `*.test.ts` alongside source (163 test files)
 - BDD comments: `//#given`, `//#when`, `//#then`

 ## CONVENTIONS
@@ -180,7 +180,7 @@ oh-my-opencode/
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern via index.ts
 - **Naming**: kebab-case dirs, `createXXXHook`/`createXXXTool` factories
- **Testing**: BDD comments, 100+ test files
+- **Testing**: BDD comments, 163 test files
 - **Temperature**: 0.1 for code agents, max 0.3

 ## ANTI-PATTERNS
@@ -195,7 +195,7 @@ oh-my-opencode/
 | Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
 | Error Handling | Empty catch blocks |
 | Testing | Deleting failing tests, writing implementation before test |
-| Agent Calls | Sequential - use `delegate_task` parallel |
+| Agent Calls | Sequential - use `task` parallel |
 | Hook Logic | Heavy PreToolUse - slows every call |
 | Commits | Giant (3+ files), separate test from impl |
 | Temperature | >0.3 for code agents |
@@ -241,19 +241,22 @@ bun test               # 100+ test files

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/features/background-agent/manager.ts` | 1556 | Task lifecycle, concurrency |
+| `src/features/background-agent/manager.ts` | 1642 | Task lifecycle, concurrency |
 | `src/features/builtin-skills/skills/git-master.ts` | 1107 | Git master skill definition |
-| `src/tools/delegate-task/executor.ts` | 983 | Category-based delegation executor |
-| `src/index.ts` | 924 | Main plugin entry |
-| `src/tools/lsp/client.ts` | 803 | LSP client operations |
-| `src/hooks/atlas/index.ts` | 770 | Orchestrator hook |
-| `src/tools/background-task/tools.ts` | 734 | Background task tools |
+| `src/index.ts` | 999 | Main plugin entry |
+| `src/tools/delegate-task/executor.ts` | 969 | Category-based delegation executor |
+| `src/tools/lsp/client.ts` | 851 | LSP client operations |
+| `src/tools/background-task/tools.ts` | 757 | Background task tools |
+| `src/hooks/atlas/index.ts` | 697 | Orchestrator hook |
 | `src/cli/config-manager.ts` | 667 | JSONC config parsing |
 | `src/features/skill-mcp-manager/manager.ts` | 640 | MCP client lifecycle |
 | `src/features/builtin-commands/templates/refactor.ts` | 619 | Refactor command template |
 | `src/agents/hephaestus.ts` | 618 | Autonomous deep worker agent |
+| `src/agents/utils.ts` | 571 | Agent creation, model fallback resolution |
+| `src/plugin-handlers/config-handler.ts` | 562 | Plugin config loading |
 | `src/tools/delegate-task/constants.ts` | 552 | Delegation constants |
 | `src/cli/install.ts` | 542 | Interactive CLI installer |
+| `src/hooks/task-continuation-enforcer.ts` | 530 | Task completion enforcement |
 | `src/agents/sisyphus.ts` | 530 | Main orchestrator agent |

 ## MCP ARCHITECTURE
--- a/bun.lock
+++ b/bun.lock
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.2.3",
-        "oh-my-opencode-darwin-x64": "3.2.3",
-        "oh-my-opencode-linux-arm64": "3.2.3",
-        "oh-my-opencode-linux-arm64-musl": "3.2.3",
-        "oh-my-opencode-linux-x64": "3.2.3",
-        "oh-my-opencode-linux-x64-musl": "3.2.3",
-        "oh-my-opencode-windows-x64": "3.2.3",
+        "oh-my-opencode-darwin-arm64": "3.3.1",
+        "oh-my-opencode-darwin-x64": "3.3.1",
+        "oh-my-opencode-linux-arm64": "3.3.1",
+        "oh-my-opencode-linux-arm64-musl": "3.3.1",
+        "oh-my-opencode-linux-x64": "3.3.1",
+        "oh-my-opencode-linux-x64-musl": "3.3.1",
+        "oh-my-opencode-windows-x64": "3.3.1",
      },
    },
  },
@@ -226,19 +226,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.2.3", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Doc9xQCj5Jmx3PzouBIfvDwmfWM94Y9Q9IngFqOjrVpfBef9V/WIH0PlhJU6ps4BKGey8Nf2afFq3UE06Z63Hg=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-R+o42Km6bsIaW6D3I8uu2HCF3BjIWqa/fg38W5y4hJEOw4mL0Q7uV4R+0vtrXRHo9crXTK9ag0fqVQUm+Y6iAQ=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.2.3", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-w7lO0Hn/AlLCHe33KPbje83Js2h5weDWVMuopEs6d3pi/1zkRDBEhCi63S4J0d0EKod9kEPQA6ojtdVJ4J39zQ=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7VTbpR1vH3OEkoJxBKtYuxFPX8M3IbJKoeHWME9iK6FpT11W1ASsjyuhvzB1jcxSeqF8ddMnjitlG5ub6h5EVw=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.2.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m1tS1jRLO2Svm5NuetK3BAgdAR8b2GkiIfMFoIYsLJTPmzIkXaigAYkFq+BXCs5JAbRmPmvjndz9cuCddnPADQ=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-BZ/r/CFlvbOxkdZZrRoT16xFOjibRZHuwQnaE4f0JvOzgK6/HWp3zJI1+2/aX/oK5GA6lZxNWRrJC/SKUi8LEg=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.2.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Q/0AGtOuUFGNGIX8F6iD5W8c2spbjrqVBPt0B7laQSwnScKs/BI+TvM6HRE37vhoWg+fzhAX3QYJ2H9Un9FYrg=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-U90Wruf21h+CJbtcrS7MeTAc/5VOF6RI+5jr7qj/cCxjXNJtjhyJdz/maehArjtgf304+lYCM/Mh1i+G2D3YFQ=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.2.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RIAyoj2XbT8vH++5fPUkdO+D1tfqxh+iWto7CqWr1TgbABbBJljGk91HJgS9xjnxyCQJEpFhTmO7NMHKJcZOWQ=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-sYzohSNdwsAhivbXcbhPdF1qqQi2CCI7FSgbmvvfBOMyZ8HAgqOFqYW2r3GPdmtywzkjOTvCzTG56FZwEjx15w=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.2.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-nnQK3y7R4DrBvqdqRGbujL2oAAQnVVb23JHUbJPQ6YxrRRGWpLOVGvK5c16ykSFEUPl8eZDmi1ON/R4opKLOUw=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aG5pZ4eWS0YSGUicOnjMkUPrIqQV4poYF+d9SIvrfvlaMcK6WlQn7jXzgNCwJsfGn5lyhSmjshZBEU+v79Ua3w=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.2.3", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-mt8E/TkpaCp04pvzwntT8x8TaqXDt3zCD5X2eA8ZZMrb5ofNr5HyG5G4SFXrUh+Ez3b/3YXpNWv6f6rnAlk1Dg=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-FGH7cnzBqNwjSkzCDglMsVttaq+MsykAxa7ehaFK+0dnBZArvllS3W13a3dGaANHMZzfK0vz8hNDUdVi7Z63cA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/docs/category-skill-guide.md
+++ b/docs/category-skill-guide.md
@@ -9,7 +9,7 @@ Instead of delegating everything to a single AI agent, it's far more efficient t
 - **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset)
 - **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows)

-By combining these two concepts, you can generate optimal agents through `delegate_task`.
+By combining these two concepts, you can generate optimal agents through `task`.

 ---

@@ -32,10 +32,10 @@ A Category is an agent configuration preset optimized for specific domains.

 ### Usage

-Specify the `category` parameter when invoking the `delegate_task` tool.
+Specify the `category` parameter when invoking the `task` tool.

 ```typescript
-delegate_task(
+task(
  category="visual-engineering",
  prompt="Add a responsive chart component to the dashboard page"
 )
@@ -74,7 +74,7 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
 Add desired skill names to the `load_skills` array.

 ```typescript
-delegate_task(
+task(
  category="quick",
  load_skills=["git-master"],
  prompt="Commit current changes. Follow commit message style."
@@ -126,7 +126,7 @@ You can create powerful specialized agents by combining Categories and Skills.

 ---

-## 5. delegate_task Prompt Guide
+## 5. task Prompt Guide

 When delegating, **clear and specific** prompts are essential. Include these 7 elements:

@@ -158,7 +158,7 @@ You can fine-tune categories in `oh-my-opencode.json`.

 | Field | Type | Description |
 |-------|------|-------------|
-| `description` | string | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
+| `description` | string | Human-readable description of the category's purpose. Shown in task prompt. |
 | `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) |
 | `variant` | string | Model variant (e.g., `max`, `xhigh`) |
 | `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. |
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -25,7 +25,7 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
    "explore": { "model": "opencode/gpt-5-nano" }        // Free model for grep
  },
  
-  // Override category models (used by delegate_task)
+  // Override category models (used by task)
  "categories": {
    "quick": { "model": "opencode/gpt-5-nano" },         // Fast/cheap for trivial tasks
    "visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
@@ -252,7 +252,7 @@ Available agents: `sisyphus`, `prometheus`, `oracle`, `librarian`, `explore`, `m
 Oh My OpenCode includes built-in skills that provide additional capabilities:

 - **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.
+- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `task(category='quick', load_skills=['git-master'], ...)` to save context.

 Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:

@@ -455,7 +455,7 @@ Run background subagents in separate tmux panes for **visual multi-agent executi
 ### How It Works

 When `tmux.enabled` is `true` and you're inside a tmux session:
- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes
+- Background agents (via `task(run_in_background=true)`) spawn in new tmux panes
 - Each pane shows the subagent's real-time output
 - Panes are automatically closed when the subagent completes
 - Layout is automatically adjusted based on your configuration
@@ -716,7 +716,7 @@ Configure concurrency limits for background agent tasks. This controls how many

 ## Categories

-Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
+Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.

 ### Built-in Categories

@@ -797,12 +797,12 @@ All 7 categories come with optimal model defaults, but **you must configure them
 ### Usage

 ```javascript
-// Via delegate_task tool
-delegate_task(category="visual-engineering", prompt="Create a responsive dashboard component")
-delegate_task(category="ultrabrain", prompt="Design the payment processing flow")
+// Via task tool
+task(category="visual-engineering", prompt="Create a responsive dashboard component")
+task(category="ultrabrain", prompt="Design the payment processing flow")

 // Or target a specific agent directly (bypasses categories)
-delegate_task(agent="oracle", prompt="Review this architecture")
+task(agent="oracle", prompt="Review this architecture")
 ```

 ### Custom Categories
@@ -831,7 +831,7 @@ Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`

 | Option             | Type    | Default | Description                                                                                         |
 | ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
-| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in delegate_task prompt.                     |
+| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in task prompt.                     |
 | `is_unstable_agent`| boolean | `false`  | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |

 ## Model Resolution System
--- a/docs/features.md
+++ b/docs/features.md
@@ -54,7 +54,7 @@ Run agents in the background and continue working:

 ```
 # Launch in background
-delegate_task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)

 # Continue working...
 # System notifies on completion
@@ -374,7 +374,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
 | Hook | Event | Description |
 |------|-------|-------------|
 | **task-resume-info** | PostToolUse | Provides task resume information for continuity. |
-| **delegate-task-retry** | PostToolUse | Retries failed delegate_task calls. |
+| **delegate-task-retry** | PostToolUse | Retries failed task calls. |

 #### Integration

@@ -454,7 +454,7 @@ Disable specific hooks in config:
 | Tool | Description |
 |------|-------------|
 | **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. |
-| **delegate_task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
+| **task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
 | **background_output** | Retrieve background task results |
 | **background_cancel** | Cancel running background tasks |

--- a/docs/guide/understanding-orchestration-system.md
+++ b/docs/guide/understanding-orchestration-system.md
@@ -50,11 +50,11 @@ flowchart TB
    User -->|"/start-work"| Orchestrator
    Plan -->|"Read"| Orchestrator
    
-    Orchestrator -->|"delegate_task(category)"| Junior
-    Orchestrator -->|"delegate_task(agent)"| Oracle
-    Orchestrator -->|"delegate_task(agent)"| Explore
-    Orchestrator -->|"delegate_task(agent)"| Librarian
-    Orchestrator -->|"delegate_task(agent)"| Frontend
+    Orchestrator -->|"task(category)"| Junior
+    Orchestrator -->|"task(agent)"| Oracle
+    Orchestrator -->|"task(agent)"| Explore
+    Orchestrator -->|"task(agent)"| Librarian
+    Orchestrator -->|"task(agent)"| Frontend
    
    Junior -->|"Results + Learnings"| Orchestrator
    Oracle -->|"Advice"| Orchestrator
@@ -220,9 +220,9 @@ Independent tasks run in parallel:
 ```typescript
 // Orchestrator identifies parallelizable groups from plan
 // Group A: Tasks 2, 3, 4 (no file conflicts)
-delegate_task(category="ultrabrain", prompt="Task 2...")
-delegate_task(category="visual-engineering", prompt="Task 3...")
-delegate_task(category="general", prompt="Task 4...")
+task(category="ultrabrain", prompt="Task 2...")
+task(category="visual-engineering", prompt="Task 3...")
+task(category="general", prompt="Task 4...")
 // All run simultaneously
 ```

@@ -234,7 +234,7 @@ delegate_task(category="general", prompt="Task 4...")

 Junior is the **workhorse** that actually writes code. Key characteristics:

- **Focused**: Cannot delegate (blocked from task/delegate_task tools)
+- **Focused**: Cannot delegate (blocked from task tool)
 - **Disciplined**: Obsessive todo tracking
 - **Verified**: Must pass lsp_diagnostics before completion
 - **Constrained**: Cannot modify plan files (READ-ONLY)
@@ -268,7 +268,7 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.

 ---

-## The delegate_task Tool: Category + Skill System
+## The task Tool: Category + Skill System

 ### Why Categories are Revolutionary

@@ -276,17 +276,17 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.

 ```typescript
 // OLD: Model name creates distributional bias
-delegate_task(agent="gpt-5.2", prompt="...")  // Model knows its limitations
-delegate_task(agent="claude-opus-4.6", prompt="...")  // Different self-perception
+task(agent="gpt-5.2", prompt="...")  // Model knows its limitations
+task(agent="claude-opus-4.6", prompt="...")  // Different self-perception
 ```

 **The Solution: Semantic Categories:**

 ```typescript
 // NEW: Category describes INTENT, not implementation
-delegate_task(category="ultrabrain", prompt="...")     // "Think strategically"
-delegate_task(category="visual-engineering", prompt="...")  // "Design beautifully"
-delegate_task(category="quick", prompt="...")          // "Just get it done fast"
+task(category="ultrabrain", prompt="...")     // "Think strategically"
+task(category="visual-engineering", prompt="...")  // "Design beautifully"
+task(category="quick", prompt="...")          // "Just get it done fast"
 ```

 ### Built-in Categories
@@ -324,13 +324,13 @@ Skills prepend specialized instructions to subagent prompts:

 ```typescript
 // Category + Skill combination
-delegate_task(
+task(
  category="visual-engineering", 
  load_skills=["frontend-ui-ux"],  // Adds UI/UX expertise
  prompt="..."
 )

-delegate_task(
+task(
  category="general",
  load_skills=["playwright"],  // Adds browser automation expertise
  prompt="..."
@@ -365,7 +365,7 @@ sequenceDiagram
        
        Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
        
-        Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
+        Orchestrator->>Junior: task(category, load_skills, prompt)
        
        Junior->>Junior: Create todos, execute
        Junior->>Junior: Verify (lsp_diagnostics, tests)
--- a/docs/orchestration-guide.md
+++ b/docs/orchestration-guide.md
@@ -387,7 +387,7 @@ You can control related features in `oh-my-opencode.json`.

 2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation.

-3. **Active Delegation**: During execution, delegate to specialized agents via `delegate_task` rather than modifying code directly.
+3. **Active Delegation**: During execution, delegate to specialized agents via `task` rather than modifying code directly.

 4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json.

--- a/issue-1501-analysis.md
+++ b/issue-1501-analysis.md
@@ -288,7 +288,7 @@ src/tools/delegate-task/constants.ts
 ```
 Sisyphus (ULW mode)
  ↓
-delegate_task(category="deep", ...)
+task(category="deep", ...)
  ↓
 executor.ts: executeBackgroundContinuation()
  ↓
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -74,13 +74,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.2.4",
-    "oh-my-opencode-darwin-x64": "3.2.4",
-    "oh-my-opencode-linux-arm64": "3.2.4",
-    "oh-my-opencode-linux-arm64-musl": "3.2.4",
-    "oh-my-opencode-linux-x64": "3.2.4",
-    "oh-my-opencode-linux-x64-musl": "3.2.4",
-    "oh-my-opencode-windows-x64": "3.2.4"
+    "oh-my-opencode-darwin-arm64": "3.4.0",
+    "oh-my-opencode-darwin-x64": "3.4.0",
+    "oh-my-opencode-linux-arm64": "3.4.0",
+    "oh-my-opencode-linux-arm64-musl": "3.4.0",
+    "oh-my-opencode-linux-x64": "3.4.0",
+    "oh-my-opencode-linux-x64-musl": "3.4.0",
+    "oh-my-opencode-windows-x64": "3.4.0"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.2.4",
+  "version": "3.4.0",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1207,6 +1207,62 @@
      "created_at": "2026-02-06T06:23:24Z",
      "repoId": 1108837393,
      "pullRequestNo": 1541
+    },
+    {
+      "name": "itsnebulalol",
+      "id": 18669106,
+      "comment_id": 3864672624,
+      "created_at": "2026-02-07T15:10:54Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1622
+    },
+    {
+      "name": "mkusaka",
+      "id": 24956031,
+      "comment_id": 3864822328,
+      "created_at": "2026-02-07T16:54:36Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1629
+    },
+    {
+      "name": "quantmind-br",
+      "id": 170503374,
+      "comment_id": 3865064441,
+      "created_at": "2026-02-07T18:38:24Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1634
+    },
+    {
+      "name": "QiRaining",
+      "id": 13825001,
+      "comment_id": 3865979224,
+      "created_at": "2026-02-08T02:34:46Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1641
+    },
+    {
+      "name": "JunyeongChoi0",
+      "id": 99778164,
+      "comment_id": 3867461224,
+      "created_at": "2026-02-08T16:02:31Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1674
+    },
+    {
+      "name": "aliozdenisik",
+      "id": 106994209,
+      "comment_id": 3867619266,
+      "created_at": "2026-02-08T17:12:34Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1676
+    },
+    {
+      "name": "mrm007",
+      "id": 3297808,
+      "comment_id": 3868350953,
+      "created_at": "2026-02-08T21:41:35Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1680
    }
  ]
 }
--- a/sisyphus-prompt.md
+++ b/sisyphus-prompt.md
@@ -212,7 +212,7 @@ Search **external references** (docs, OSS, web). Fire proactively when unfamilia
 - "Working with unfamiliar npm/pip/cargo packages"
 ### Pre-Delegation Planning (MANDATORY)

-**BEFORE every `delegate_task` call, EXPLICITLY declare your reasoning.**
+**BEFORE every `task` call, EXPLICITLY declare your reasoning.**

 #### Step 1: Identify Task Requirements

@@ -236,7 +236,7 @@ Ask yourself:
 **MANDATORY FORMAT:**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Category**: [selected-category-name]
 - **Why this category**: [how category description matches task domain]
 - **load_skills**: [list of selected skills]
@@ -246,14 +246,14 @@ I will use delegate_task with:
 - **Expected Outcome**: [what success looks like]
 ```

-**Then** make the delegate_task call.
+**Then** make the task call.

 #### Examples

 **CORRECT: Full Evaluation**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Category**: [category-name]
 - **Why this category**: Category description says "[quote description]" which matches this task's requirements
 - **load_skills**: ["skill-a", "skill-b"]
@@ -263,9 +263,11 @@ I will use delegate_task with:
  - skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason]
 - **Expected Outcome**: [concrete deliverable]

-delegate_task(
+task(
  category="[category-name]",
  load_skills=["skill-a", "skill-b"],
+  description="[short task description]",
+  run_in_background=false,
  prompt="..."
 )
 ```
@@ -273,14 +275,16 @@ delegate_task(
 **CORRECT: Agent-Specific (for exploration/consultation)**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Agent**: [agent-name]
 - **Reason**: This requires [agent's specialty] based on agent description
 - **load_skills**: [] (agents have built-in expertise)
 - **Expected Outcome**: [what agent should return]

-delegate_task(
+task(
  subagent_type="[agent-name]",
+  description="[short task description]",
+  run_in_background=false,
  load_skills=[],
  prompt="..."
 )
@@ -289,14 +293,15 @@ delegate_task(
 **CORRECT: Background Exploration**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Agent**: explore
 - **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
 - **load_skills**: []
 - **Expected Outcome**: List of files containing auth patterns

-delegate_task(
+task(
  subagent_type="explore",
+  description="Find auth implementations",
  run_in_background=true,
  load_skills=[],
  prompt="Find all authentication implementations in the codebase"
@@ -306,7 +311,7 @@ delegate_task(
 **WRONG: No Skill Evaluation**

 ```
-delegate_task(category="...", load_skills=[], prompt="...")  // Where's the justification?
+task(category="...", load_skills=[], prompt="...")  // Where's the justification?
 ```

 **WRONG: Vague Category Selection**
@@ -317,7 +322,7 @@ I'll use this category because it seems right.

 #### Enforcement

-**BLOCKING VIOLATION**: If you call `delegate_task` without:
+**BLOCKING VIOLATION**: If you call `task` without:
 1. Explaining WHY category was selected (based on description)
 2. Evaluating EACH available skill for relevance

@@ -329,15 +334,15 @@ I'll use this category because it seems right.
 ```typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
+task(subagent_type="explore", description="Find auth implementations", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+task(subagent_type="explore", description="Find error handling patterns", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
+task(subagent_type="librarian", description="Find JWT best practices", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+task(subagent_type="librarian", description="Find Express auth patterns", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
-result = delegate_task(...)  // Never wait synchronously for explore/librarian
+result = task(...)  // Never wait synchronously for explore/librarian
 ```

 ### Background Result Collection:
@@ -347,16 +352,16 @@ result = delegate_task(...)  // Never wait synchronously for explore/librarian
 4. BEFORE final answer: `background_cancel(all=true)`

 ### Resume Previous Agent (CRITICAL for efficiency):
-Pass `resume=session_id` to continue previous agent with FULL CONTEXT PRESERVED.
+Pass `session_id` to continue previous agent with FULL CONTEXT PRESERVED.

-**ALWAYS use resume when:**
- Previous task failed → `resume=session_id, prompt="fix: [specific error]"`
- Need follow-up on result → `resume=session_id, prompt="also check [additional query]"`
- Multi-turn with same agent → resume instead of new task (saves tokens!)
+**ALWAYS use session_id when:**
+- Previous task failed → `session_id="ses_xxx", prompt="fix: [specific error]"`
+- Need follow-up on result → `session_id="ses_xxx", prompt="also check [additional query]"`
+- Multi-turn with same agent → session_id instead of new task (saves tokens!)

 **Example:**
 ```
-delegate_task(resume="ses_abc123", prompt="The previous search missed X. Also look for Y.")
+task(session_id="ses_abc123", description="Follow-up search", run_in_background=false, load_skills=[], prompt="The previous search missed X. Also look for Y.")
 ```

 ### Search Stop Conditions
@@ -377,7 +382,7 @@ STOP searching when:
 3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS
 ### Category + Skills Delegation System

-**delegate_task() combines categories and skills for optimal task execution.**
+**task() combines categories and skills for optimal task execution.**

 #### Available Categories (Domain-Optimized Models)

@@ -442,7 +447,7 @@ SKILL EVALUATION for "[skill-name]":
 ### Delegation Pattern

 ```typescript
-delegate_task(
+task(
  category="[selected-category]",
  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills
  prompt="..."
@@ -451,7 +456,7 @@ delegate_task(

 **ANTI-PATTERN (will produce poor results):**
 ```typescript
-delegate_task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
+task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
 ```
 ### Delegation Table:

--- a/src/AGENTS.md
+++ b/src/AGENTS.md
@@ -0,0 +1,128 @@
+# AGENTS KNOWLEDGE BASE
+
+## OVERVIEW
+
+Main plugin entry point and orchestration layer. 1000+ lines of plugin initialization, hook registration, tool composition, and lifecycle management.
+
+**Core Responsibilities:**
+- Plugin initialization and configuration loading
+- 40+ lifecycle hooks orchestration  
+- 25+ tools composition and filtering
+- Background agent management
+- Session state coordination
+- MCP server lifecycle
+- Tmux integration
+- Claude Code compatibility layer
+
+## STRUCTURE
+```
+src/
+├── index.ts                          # Main plugin entry (1000 lines) - orchestration layer
+├── index.compaction-model-agnostic.static.test.ts  # Compaction hook tests
+├── agents/                           # 11 AI agents (16 files)
+├── cli/                              # CLI commands (9 files) 
+├── config/                           # Schema validation (3 files)
+├── features/                         # Background features (20+ files)
+├── hooks/                            # 40+ lifecycle hooks (14 files)
+├── mcp/                              # MCP server configs (7 files)
+├── plugin-handlers/                  # Config loading (3 files)
+├── shared/                           # Utilities (70 files)
+└── tools/                            # 25+ tools (15 files)
+```
+
+## KEY COMPONENTS
+
+**Plugin Initialization:**
+- `OhMyOpenCodePlugin()`: Main plugin factory (lines 124-841)
+- Configuration loading via `loadPluginConfig()`
+- Hook registration with safe creation patterns
+- Tool composition and disabled tool filtering
+
+**Lifecycle Management:**
+- 40+ hooks: session recovery, continuation enforcers, compaction, context injection
+- Background agent coordination via `BackgroundManager`
+- Tmux session management for multi-pane workflows
+- MCP server lifecycle via `SkillMcpManager`
+
+**Tool Ecosystem:**
+- 25+ tools: LSP, AST-grep, delegation, background tasks, skills
+- Tool filtering based on agent permissions and user config
+- Metadata restoration for tool outputs
+
+**Integration Points:**
+- Claude Code compatibility hooks and commands
+- OpenCode SDK client interactions
+- Session state persistence and recovery
+- Model variant resolution and application
+
+## HOOK REGISTRATION PATTERNS
+
+**Safe Hook Creation:**
+```typescript
+const hook = isHookEnabled("hook-name")
+  ? safeCreateHook("hook-name", () => createHookFactory(ctx), { enabled: safeHookEnabled })
+  : null;
+```
+
+**Hook Categories:**
+- **Session Management**: recovery, notification, compaction
+- **Continuation**: todo/task enforcers, stop guards
+- **Context**: injection, rules, directory content
+- **Tool Enhancement**: output truncation, error recovery, validation
+- **Agent Coordination**: usage reminders, babysitting, delegation
+
+## TOOL COMPOSITION
+
+**Core Tools:**
+```typescript
+const allTools: Record<string, ToolDefinition> = {
+  ...builtinTools,           // Basic file/session operations
+  ...createGrepTools(ctx),   // Content search
+  ...createAstGrepTools(ctx), // AST-aware refactoring
+  task: delegateTask,        // Agent delegation
+  skill: skillTool,          // Skill execution
+  // ... 20+ more tools
+};
+```
+
+**Tool Filtering:**
+- Agent permission-based restrictions
+- User-configured disabled tools
+- Dynamic tool availability based on session state
+
+## SESSION LIFECYCLE
+
+**Session Events:**
+- `session.created`: Initialize session state, tmux setup
+- `session.deleted`: Cleanup resources, clear caches
+- `message.updated`: Update agent assignments
+- `session.error`: Trigger recovery mechanisms
+
+**Continuation Flow:**
+1. User message triggers agent selection
+2. Model/variant resolution applied
+3. Tools execute with hook interception
+4. Continuation enforcers monitor completion
+5. Session compaction preserves context
+
+## CONFIGURATION INTEGRATION
+
+**Plugin Config Loading:**
+- Project + user config merging
+- Schema validation via Zod
+- Migration support for legacy configs
+- Dynamic feature enablement
+
+**Runtime Configuration:**
+- Hook enablement based on `disabled_hooks`
+- Tool filtering via `disabled_tools`
+- Agent overrides and category definitions
+- Experimental feature toggles
+
+## ANTI-PATTERNS
+
+- **Direct hook exports**: All hooks created via factories for testability
+- **Global state pollution**: Session-scoped state management
+- **Synchronous blocking**: Async-first architecture with background coordination
+- **Tight coupling**: Plugin components communicate via events, not direct calls
+- **Memory leaks**: Proper cleanup on session deletion and plugin unload
--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -2,7 +2,7 @@

 ## OVERVIEW

-11 AI agents for multi-model orchestration. Each agent has factory function + metadata + fallback chains.
+32 files containing AI agents and utilities for multi-model orchestration. Each agent has factory function + metadata + fallback chains.

 **Primary Agents** (respect UI model selection):
 - Sisyphus, Atlas, Prometheus
@@ -68,11 +68,11 @@ agents/
 ## TOOL RESTRICTIONS
 | Agent | Denied Tools |
 |-------|-------------|
-| oracle | write, edit, task, delegate_task |
-| librarian | write, edit, task, delegate_task, call_omo_agent |
-| explore | write, edit, task, delegate_task, call_omo_agent |
+| oracle | write, edit, task, task |
+| librarian | write, edit, task, task, call_omo_agent |
+| explore | write, edit, task, task, call_omo_agent |
 | multimodal-looker | Allowlist: read only |
-| Sisyphus-Junior | task, delegate_task |
+| Sisyphus-Junior | task, task |
 | Atlas | task, call_omo_agent |

 ## PATTERNS
@@ -85,5 +85,5 @@ agents/
 ## ANTI-PATTERNS
 - **Trust reports**: NEVER trust "I'm done" - verify outputs
 - **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration
+- **Sequential calls**: Use `task` with `run_in_background` for exploration
 - **Prometheus writing code**: Planner only - never implements
--- a/src/agents/atlas/default.ts
+++ b/src/agents/atlas/default.ts
@@ -19,18 +19,18 @@ You never write code yourself. You orchestrate specialists who do.
 </identity>

 <mission>
-Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
+Complete ALL tasks in a work plan via \`task()\` until fully done.
 One task per delegation. Parallel when independent. Verify everything.
 </mission>

 <delegation_system>
 ## How to Delegate

-Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
+Use \`task()\` with EITHER category OR agent (mutually exclusive):

 \`\`\`typescript
 // Option A: Category + Skills (spawns Sisyphus-Junior with domain config)
-delegate_task(
+task(
  category="[category-name]",
  load_skills=["skill-1", "skill-2"],
  run_in_background=false,
@@ -38,7 +38,7 @@ delegate_task(
 )

 // Option B: Specialized Agent (for specific expert tasks)
-delegate_task(
+task(
  subagent_type="[agent-name]",
  load_skills=[],
  run_in_background=false,
@@ -58,7 +58,7 @@ delegate_task(

 ## 6-Section Prompt Structure (MANDATORY)

-Every \`delegate_task()\` prompt MUST include ALL 6 sections:
+Every \`task()\` prompt MUST include ALL 6 sections:

 \`\`\`markdown
 ## 1. TASK
@@ -149,7 +149,7 @@ Structure:
 ### 3.1 Check Parallelization
 If tasks can run in parallel:
 - Prepare prompts for ALL parallelizable tasks
- Invoke multiple \`delegate_task()\` in ONE message
+- Invoke multiple \`task()\` in ONE message
 - Wait for all to complete
 - Verify all, then continue

@@ -167,10 +167,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")

 Extract wisdom and include in prompt.

-### 3.3 Invoke delegate_task()
+### 3.3 Invoke task()

 \`\`\`typescript
-delegate_task(
+task(
  category="[category]",
  load_skills=["[relevant-skills]"],
  run_in_background=false,
@@ -210,7 +210,7 @@ delegate_task(

 **If verification fails**: Resume the SAME session with the ACTUAL error output:
 \`\`\`typescript
-delegate_task(
+task(
  session_id="ses_xyz789",  // ALWAYS use the session from the failed task
  load_skills=[...],
  prompt="Verification failed: {actual error}. Fix."
@@ -221,13 +221,13 @@ delegate_task(

 **CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**

-Every \`delegate_task()\` output includes a session_id. STORE IT.
+Every \`task()\` output includes a session_id. STORE IT.

 If task fails:
 1. Identify what went wrong
 2. **Resume the SAME session** - subagent has full context already:
    \`\`\`typescript
-    delegate_task(
+    task(
      session_id="ses_xyz789",  // Session from failed task
      load_skills=[...],
      prompt="FAILED: {error}. Fix by: {specific instruction}"
@@ -274,21 +274,21 @@ ACCUMULATED WISDOM:

 **For exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-delegate_task(subagent_type="explore", run_in_background=true, ...)
-delegate_task(subagent_type="librarian", run_in_background=true, ...)
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
+task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)
 \`\`\`

 **For task execution**: NEVER background
 \`\`\`typescript
-delegate_task(category="...", run_in_background=false, ...)
+task(category="...", load_skills=[...], run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
 \`\`\`typescript
 // Tasks 2, 3, 4 are independent - invoke together
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
 \`\`\`

 **Background management**:
--- a/src/agents/atlas/gpt.ts
+++ b/src/agents/atlas/gpt.ts
@@ -24,7 +24,7 @@ You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
 </identity>

 <mission>
-Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
+Complete ALL tasks in a work plan via \`task()\` until fully done.
 - One task per delegation
 - Parallel when independent
 - Verify everything
@@ -71,14 +71,14 @@ Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
 <delegation_system>
 ## Delegation API

-Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
+Use \`task()\` with EITHER category OR agent (mutually exclusive):

 \`\`\`typescript
 // Category + Skills (spawns Sisyphus-Junior)
-delegate_task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
+task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")

 // Specialized Agent
-delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
+task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
 \`\`\`

 {CATEGORY_SECTION}
@@ -93,7 +93,7 @@ delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false,

 ## 6-Section Prompt Structure (MANDATORY)

-Every \`delegate_task()\` prompt MUST include ALL 6 sections:
+Every \`task()\` prompt MUST include ALL 6 sections:

 \`\`\`markdown
 ## 1. TASK
@@ -166,7 +166,7 @@ Structure: learnings.md, decisions.md, issues.md, problems.md
 ## Step 3: Execute Tasks

 ### 3.1 Parallelization Check
- Parallel tasks → invoke multiple \`delegate_task()\` in ONE message
+- Parallel tasks → invoke multiple \`task()\` in ONE message
 - Sequential → process one at a time

 ### 3.2 Pre-Delegation (MANDATORY)
@@ -176,10 +176,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
 \`\`\`
 Extract wisdom → include in prompt.

-### 3.3 Invoke delegate_task()
+### 3.3 Invoke task()

 \`\`\`typescript
-delegate_task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
+task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
 \`\`\`

 ### 3.4 Verify (PROJECT-LEVEL QA)
@@ -201,7 +201,7 @@ Checklist:
 **CRITICAL: Use \`session_id\` for retries.**

 \`\`\`typescript
-delegate_task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
+task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
 \`\`\`

 - Maximum 3 retries per task
@@ -231,18 +231,18 @@ ACCUMULATED WISDOM: [from notepad]
 <parallel_execution>
 **Exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-delegate_task(subagent_type="explore", run_in_background=true, ...)
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
 \`\`\`

 **Task execution**: NEVER background
 \`\`\`typescript
-delegate_task(category="...", run_in_background=false, ...)
+task(category="...", load_skills=[...], run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
 \`\`\`typescript
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
 \`\`\`

 **Background management**:
--- a/src/agents/atlas/index.ts
+++ b/src/agents/atlas/index.ts
@@ -1,7 +1,7 @@
 /**
 * Atlas - Master Orchestrator Agent
 *
- * Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done.
+ * Orchestrates work via task() to complete ALL tasks in a todo list until fully done.
 * You are the conductor of a symphony of specialized agents.
 *
 * Routing:
@@ -111,7 +111,7 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {

  const baseConfig = {
    description:
-      "Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
+      "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
    mode: MODE,
    ...(ctx.model ? { model: ctx.model } : {}),
    temperature: 0.1,
--- a/src/agents/atlas/utils.ts
+++ b/src/agents/atlas/utils.ts
@@ -8,21 +8,22 @@
 import type { CategoryConfig } from "../../config/schema"
 import { formatCustomSkillsBlock, type AvailableAgent, type AvailableSkill } from "../dynamic-agent-prompt-builder"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../../tools/delegate-task/constants"
+import { truncateDescription } from "../../shared/truncate-description"

 export const getCategoryDescription = (name: string, userCategories?: Record<string, CategoryConfig>) =>
  userCategories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks"

 export function buildAgentSelectionSection(agents: AvailableAgent[]): string {
-  if (agents.length === 0) {
-    return `##### Option B: Use AGENT directly (for specialized experts)
+   if (agents.length === 0) {
+     return `##### Option B: Use AGENT directly (for specialized experts)

-No agents available.`
-  }
+ No agents available.`
+   }

-  const rows = agents.map((a) => {
-    const shortDesc = a.description.split(".")[0] || a.description
-    return `| \`${a.name}\` | ${shortDesc} |`
-  })
+   const rows = agents.map((a) => {
+     const shortDesc = truncateDescription(a.description)
+     return `| \`${a.name}\` | ${shortDesc} |`
+   })

  return `##### Option B: Use AGENT directly (for specialized experts)

@@ -47,7 +48,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
 ${categoryRows.join("\n")}

 \`\`\`typescript
-delegate_task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
+task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
 \`\`\``
 }

@@ -59,16 +60,16 @@ export function buildSkillsSection(skills: AvailableSkill[]): string {
  const builtinSkills = skills.filter((s) => s.location === "plugin")
  const customSkills = skills.filter((s) => s.location !== "plugin")

-  const builtinRows = builtinSkills.map((s) => {
-    const shortDesc = s.description.split(".")[0] || s.description
-    return `| \`${s.name}\` | ${shortDesc} |`
-  })
+   const builtinRows = builtinSkills.map((s) => {
+     const shortDesc = truncateDescription(s.description)
+     return `| \`${s.name}\` | ${shortDesc} |`
+   })

-  const customRows = customSkills.map((s) => {
-    const shortDesc = s.description.split(".")[0] || s.description
-    const source = s.location === "project" ? "project" : "user"
-    return `| \`${s.name}\` | ${shortDesc} | ${source} |`
-  })
+   const customRows = customSkills.map((s) => {
+     const shortDesc = truncateDescription(s.description)
+     const source = s.location === "project" ? "project" : "user"
+     return `| \`${s.name}\` | ${shortDesc} | ${source} |`
+   })

  const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills, "**")

@@ -105,7 +106,7 @@ Read each skill's description and ask: "Does this skill's domain overlap with my

 **Usage:**
 \`\`\`typescript
-delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
+task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
 \`\`\`

 **IMPORTANT:**
@@ -121,10 +122,10 @@ export function buildDecisionMatrix(agents: AvailableAgent[], userCategories?: R
    `| ${getCategoryDescription(name, userCategories)} | \`category="${name}", load_skills=[...]\` |`
  )

-  const agentRows = agents.map((a) => {
-    const shortDesc = a.description.split(".")[0] || a.description
-    return `| ${shortDesc} | \`agent="${a.name}"\` |`
-  })
+   const agentRows = agents.map((a) => {
+     const shortDesc = truncateDescription(a.description)
+     return `| ${shortDesc} | \`agent="${a.name}"\` |`
+   })

  return `##### Decision Matrix

--- a/src/agents/dynamic-agent-prompt-builder.ts
+++ b/src/agents/dynamic-agent-prompt-builder.ts
@@ -1,7 +1,8 @@
-import type { AgentPromptMetadata, BuiltinAgentName } from "./types"
+import type { AgentPromptMetadata } from "./types"
+import { truncateDescription } from "../shared/truncate-description"

 export interface AvailableAgent {
-  name: BuiltinAgentName
+  name: string
  description: string
  metadata: AgentPromptMetadata
 }
@@ -205,16 +206,16 @@ export function buildCategorySkillsDelegationGuide(categories: AvailableCategory
  const builtinSkills = skills.filter((s) => s.location === "plugin")
  const customSkills = skills.filter((s) => s.location !== "plugin")

-  const builtinRows = builtinSkills.map((s) => {
-    const desc = s.description.split(".")[0] || s.description
-    return `| \`${s.name}\` | ${desc} |`
-  })
+   const builtinRows = builtinSkills.map((s) => {
+     const desc = truncateDescription(s.description)
+     return `| \`${s.name}\` | ${desc} |`
+   })

-  const customRows = customSkills.map((s) => {
-    const desc = s.description.split(".")[0] || s.description
-    const source = s.location === "project" ? "project" : "user"
-    return `| \`${s.name}\` | ${desc} | ${source} |`
-  })
+   const customRows = customSkills.map((s) => {
+     const desc = truncateDescription(s.description)
+     const source = s.location === "project" ? "project" : "user"
+     return `| \`${s.name}\` | ${desc} | ${source} |`
+   })

  const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills)

@@ -242,7 +243,7 @@ ${builtinRows.join("\n")}`

  return `### Category + Skills Delegation System

-**delegate_task() combines categories and skills for optimal task execution.**
+**task() combines categories and skills for optimal task execution.**

 #### Available Categories (Domain-Optimized Models)

@@ -296,7 +297,7 @@ SKILL EVALUATION for "[skill-name]":
 ### Delegation Pattern

 \`\`\`typescript
-delegate_task(
+task(
  category="[selected-category]",
  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills — ESPECIALLY user-installed ones
  prompt="..."
@@ -305,7 +306,7 @@ delegate_task(

 **ANTI-PATTERN (will produce poor results):**
 \`\`\`typescript
-delegate_task(category="...", load_skills=[], run_in_background=false, prompt="...")  // Empty load_skills without justification
+task(category="...", load_skills=[], run_in_background=false, prompt="...")  // Empty load_skills without justification
 \`\`\``
 }

@@ -421,7 +422,7 @@ export function buildUltraworkSection(

    lines.push("**Agents** (for specialized consultation/exploration):")
    for (const agent of sortedAgents) {
-      const shortDesc = agent.description.split(".")[0] || agent.description
+      const shortDesc = agent.description.length > 120 ? agent.description.slice(0, 120) + "..." : agent.description
      const suffix = agent.name === "explore" || agent.name === "librarian" ? " (multiple)" : ""
      lines.push(`- \`${agent.name}${suffix}\`: ${shortDesc}`)
    }
--- a/src/agents/explore.ts
+++ b/src/agents/explore.ts
@@ -29,7 +29,7 @@ export function createExploreAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
    "call_omo_agent",
  ])

--- a/src/agents/hephaestus.ts
+++ b/src/agents/hephaestus.ts
@@ -227,8 +227,8 @@ Agent: *runs gh pr list, gh pr view, searches recent commits*

 **Delegation Check (MANDATORY before acting directly):**
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`delegate_task\` category that best describes this task? What skills are available to equip the agent with?
-   - MUST FIND skills to use: \`delegate_task(load_skills=[{skill1}, ...])\`
+2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
+   - MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
 3. Can I do it myself for the best result, FOR SURE?

 **Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**
@@ -280,15 +280,15 @@ ${librarianSection}
 // CORRECT: Always background, always parallel
 // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
 // Continue immediately - collect results when needed

 // WRONG: Sequential or blocking - NEVER DO THIS
-result = delegate_task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
 \`\`\`

 **Rules:**
@@ -393,7 +393,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 ### Session Continuity (MANDATORY)

-Every \`delegate_task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT.**

 **ALWAYS continue when:**
 | Scenario | Action |
--- a/src/agents/librarian.ts
+++ b/src/agents/librarian.ts
@@ -26,7 +26,7 @@ export function createLibrarianAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
    "call_omo_agent",
  ])

--- a/src/agents/metis.ts
+++ b/src/agents/metis.ts
@@ -307,7 +307,6 @@ const metisRestrictions = createAgentToolRestrictions([
  "write",
  "edit",
  "task",
-  "delegate_task",
 ])

 export function createMetisAgent(model: string): AgentConfig {
--- a/src/agents/momus.ts
+++ b/src/agents/momus.ts
@@ -193,7 +193,7 @@ export function createMomusAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
  ])

  const base = {
--- a/src/agents/oracle.ts
+++ b/src/agents/oracle.ts
@@ -147,7 +147,7 @@ export function createOracleAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
  ])

  const base = {
--- a/src/agents/prometheus/high-accuracy-mode.ts
+++ b/src/agents/prometheus/high-accuracy-mode.ts
@@ -15,8 +15,9 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
 \`\`\`typescript
 // After generating initial plan
 while (true) {
-  const result = delegate_task(
+  const result = task(
    subagent_type="momus",
+    load_skills=[],
    prompt=".sisyphus/plans/{name}.md",
    run_in_background=false
  )
--- a/src/agents/prometheus/interview-mode.ts
+++ b/src/agents/prometheus/interview-mode.ts
@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
 **Research First:**
 \`\`\`typescript
 // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
-delegate_task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
-delegate_task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -91,9 +91,9 @@ delegate_task(subagent_type="explore", prompt="I'm about to modify [affected cod
 \`\`\`typescript
 // Launch BEFORE asking user questions
 // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
-delegate_task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
-delegate_task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
 \`\`\`

 **Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js

 Run this check:
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
 \`\`\`

 #### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:

 **Research First:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
 \`\`\`

 **Oracle Consultation** (recommend when stakes are high):
 \`\`\`typescript
-delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
+task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false)
 \`\`\`

 **Interview Focus:**
@@ -253,9 +253,9 @@ delegate_task(subagent_type="oracle", prompt="Architecture consultation needed:

 **Parallel Investigation:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -281,17 +281,17 @@ delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested i

 **For Understanding Codebase:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
 \`\`\`

 **For External Knowledge:**
 \`\`\`typescript
-delegate_task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
 \`\`\`

 **For Implementation Examples:**
 \`\`\`typescript
-delegate_task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
 \`\`\`

 ## Interview Mode Anti-Patterns
--- a/src/agents/prometheus/plan-generation.ts
+++ b/src/agents/prometheus/plan-generation.ts
@@ -59,8 +59,9 @@ todoWrite([
 **BEFORE generating the plan**, summon Metis to catch what you might have missed:

 \`\`\`typescript
-delegate_task(
+task(
  subagent_type="metis",
+  load_skills=[],
  prompt=\`Review this planning session before I generate the work plan:

  **User's Goal**: {summarize what user wants}
--- a/src/agents/prometheus/plan-template.ts
+++ b/src/agents/prometheus/plan-template.ts
@@ -214,7 +214,7 @@ Parallel Speedup: ~40% faster than sequential

 | Wave | Tasks | Recommended Agents |
 |------|-------|-------------------|
-| 1 | 1, 5 | delegate_task(category="...", load_skills=[...], run_in_background=false) |
+| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
 | 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
 | 3 | 4 | final integration task |

--- a/src/agents/sisyphus-junior/default.ts
+++ b/src/agents/sisyphus-junior/default.ts
@@ -24,7 +24,6 @@ Execute tasks directly. NEVER delegate or spawn other agents.
 <Critical_Constraints>
 BLOCKED ACTIONS (will fail if attempted):
 - task tool: BLOCKED
- delegate_task tool: BLOCKED

 ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
 You work ALONE for implementation. No delegation of implementation tasks.
--- a/src/agents/sisyphus-junior/gpt.ts
+++ b/src/agents/sisyphus-junior/gpt.ts
@@ -50,7 +50,6 @@ BLOCKED (will fail if attempted):
 | Tool | Status |
 |------|--------|
 | task | BLOCKED |
-| delegate_task | BLOCKED |

 ALLOWED:
 | Tool | Usage |
--- a/src/agents/sisyphus-junior/index.test.ts
+++ b/src/agents/sisyphus-junior/index.test.ts
@@ -143,13 +143,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
    })
  })

-  describe("tool safety (task/delegate_task blocked, call_omo_agent allowed)", () => {
-    test("task and delegate_task remain blocked, call_omo_agent is allowed via tools format", () => {
+  describe("tool safety (task blocked, call_omo_agent allowed)", () => {
+    test("task remains blocked, call_omo_agent is allowed via tools format", () => {
      // given
      const override = {
        tools: {
          task: true,
-          delegate_task: true,
          call_omo_agent: true,
          read: true,
        },
@@ -163,25 +162,22 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const permission = result.permission as Record<string, string> | undefined
      if (tools) {
        expect(tools.task).toBe(false)
-        expect(tools.delegate_task).toBe(false)
        // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
        expect(tools.call_omo_agent).toBe(true)
        expect(tools.read).toBe(true)
      }
      if (permission) {
        expect(permission.task).toBe("deny")
-        expect(permission.delegate_task).toBe("deny")
        // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
        expect(permission.call_omo_agent).toBe("allow")
      }
    })

-    test("task and delegate_task remain blocked when using permission format override", () => {
+    test("task remains blocked when using permission format override", () => {
      // given
      const override = {
        permission: {
          task: "allow",
-          delegate_task: "allow",
          call_omo_agent: "allow",
          read: "allow",
        },
@@ -190,17 +186,15 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      // when
      const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0])

-      // then - task/delegate_task blocked, but call_omo_agent allowed for explore/librarian spawning
+      // then - task blocked, but call_omo_agent allowed for explore/librarian spawning
      const tools = result.tools as Record<string, boolean> | undefined
      const permission = result.permission as Record<string, string> | undefined
      if (tools) {
        expect(tools.task).toBe(false)
-        expect(tools.delegate_task).toBe(false)
        expect(tools.call_omo_agent).toBe(true)
      }
      if (permission) {
        expect(permission.task).toBe("deny")
-        expect(permission.delegate_task).toBe("deny")
        expect(permission.call_omo_agent).toBe("allow")
      }
    })
--- a/src/agents/sisyphus-junior/index.ts
+++ b/src/agents/sisyphus-junior/index.ts
@@ -28,7 +28,7 @@ const MODE: AgentMode = "subagent"

 // Core tools that Sisyphus-Junior must NEVER have access to
 // Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
-const BLOCKED_TOOLS = ["task", "delegate_task"]
+const BLOCKED_TOOLS = ["task"]

 export const SISYPHUS_JUNIOR_DEFAULTS = {
  model: "anthropic/claude-sonnet-4-5",
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -214,8 +214,8 @@ ${keyTriggers}

 **Delegation Check (MANDATORY before acting directly):**
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`delegate_task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
-  - MUST FIND skills to use, for: \`delegate_task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS DELEGATE TASK PARAMETER.
+2. If not, is there a \`task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
+  - MUST FIND skills to use, for: \`task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS TASK PARAMETER.
 3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH?

 **Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.**
@@ -277,15 +277,15 @@ ${librarianSection}
 // CORRECT: Always background, always parallel
 // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
-result = delegate_task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
 \`\`\`

 ### Background Result Collection:
@@ -340,7 +340,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 ### Session Continuity (MANDATORY)

-Every \`delegate_task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT.**

 **ALWAYS continue when:**
 | Scenario | Action |
@@ -358,10 +358,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**

 \`\`\`typescript
 // WRONG: Starting fresh loses all context
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Fix the type error in auth.ts...")
+task(category="quick", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix the type error in auth.ts...")

 // CORRECT: Resume preserves everything
-delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
+task(session_id="ses_abc123", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix: Type error on line 42")
 \`\`\`

 **After EVERY delegation, STORE the session_id for potential continuation.**
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -79,6 +79,72 @@ describe("createBuiltinAgents with model overrides", () => {
    }
  })

+  test("user config model takes priority over uiSelectedModel for sisyphus", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
+    )
+    const uiSelectedModel = "openai/gpt-5.2"
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        overrides,
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        undefined,
+        undefined,
+        uiSelectedModel
+      )
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("user config model takes priority over uiSelectedModel for atlas", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
+    )
+    const uiSelectedModel = "openai/gpt-5.2"
+    const overrides = {
+      atlas: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        overrides,
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        undefined,
+        undefined,
+        uiSelectedModel
+      )
+
+      // #then
+      expect(agents.atlas).toBeDefined()
+      expect(agents.atlas.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
  test("Sisyphus is created on first run when no availableModels or cache exist", async () => {
    // #given
    const systemDefaultModel = "anthropic/claude-opus-4-6"
@@ -183,6 +249,222 @@ describe("createBuiltinAgents with model overrides", () => {
    expect(agents.sisyphus.prompt).toContain("frontend-ui-ux")
    expect(agents.sisyphus.prompt).toContain("git-master")
  })
+
+  test("includes custom agents in orchestrator prompts when provided via config", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set([
+        "anthropic/claude-opus-4-6",
+        "kimi-for-coding/k2p5",
+        "opencode/kimi-k2.5-free",
+        "zai-coding-plan/glm-4.7",
+        "opencode/glm-4.7-free",
+        "openai/gpt-5.2",
+      ])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "researcher",
+        description: "Research agent for deep analysis",
+        hidden: false,
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).toContain("researcher")
+      expect(agents.hephaestus.prompt).toContain("researcher")
+      expect(agents.atlas.prompt).toContain("researcher")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("excludes hidden custom agents from orchestrator prompts", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "hidden-agent",
+        description: "Should never show",
+        hidden: true,
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).not.toContain("hidden-agent")
+      expect(agents.hephaestus.prompt).not.toContain("hidden-agent")
+      expect(agents.atlas.prompt).not.toContain("hidden-agent")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("excludes disabled custom agents from orchestrator prompts", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "disabled-agent",
+        description: "Should never show",
+        disabled: true,
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).not.toContain("disabled-agent")
+      expect(agents.hephaestus.prompt).not.toContain("disabled-agent")
+      expect(agents.atlas.prompt).not.toContain("disabled-agent")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("excludes custom agents when disabledAgents contains their name (case-insensitive)", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const disabledAgents = ["ReSeArChEr"]
+    const customAgentSummaries = [
+      {
+        name: "researcher",
+        description: "Should never show",
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        disabledAgents,
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).not.toContain("researcher")
+      expect(agents.hephaestus.prompt).not.toContain("researcher")
+      expect(agents.atlas.prompt).not.toContain("researcher")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("deduplicates custom agents case-insensitively", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      { name: "Researcher", description: "First" },
+      { name: "researcher", description: "Second" },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      const matches = agents.sisyphus.prompt.match(/Custom agent: researcher/gi) ?? []
+      expect(matches.length).toBe(1)
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("sanitizes custom agent strings for markdown tables", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["anthropic/claude-opus-4-6", "openai/gpt-5.2"])
+    )
+
+    const customAgentSummaries = [
+      {
+        name: "table-agent",
+        description: "Line1\nAlpha | Beta",
+      },
+    ]
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        {},
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        customAgentSummaries
+      )
+
+      // #then
+      expect(agents.sisyphus.prompt).toContain("Line1 Alpha \\| Beta")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
 })

 describe("createBuiltinAgents without systemDefaultModel", () => {
@@ -422,6 +704,58 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
      cacheSpy.mockRestore()
    }
  })
+
+  test("sisyphus uses user-configured plugin model even when not in cache or fallback chain", async () => {
+    // #given - user configures a model from a plugin provider (like antigravity)
+    // that is NOT in the availableModels cache and NOT in the fallback chain
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2"])
+    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
+      ["openai"]
+    )
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
+    }
+  })
+
+  test("sisyphus uses user-configured plugin model when availableModels is empty but cache exists", async () => {
+    // #given - connected providers cache exists but models cache is empty
+    // This reproduces the exact scenario where provider-models.json has models: {}
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set()
+    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
+      ["google", "openai", "opencode"]
+    )
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
+    }
+  })
 })

 describe("buildAgent with category and skills", () => {
@@ -873,4 +1207,29 @@ describe("Deadlock prevention - fetchAvailableModels must not receive client", (
     fetchSpy.mockRestore?.()
     cacheSpy.mockRestore?.()
   })
+  test("Hephaestus variant override respects user config over hardcoded default", async () => {
+    // #given - user provides variant in config
+    const overrides = {
+      hephaestus: { variant: "high" },
+    }
+
+    // #when
+    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
+
+    // #then - user variant takes precedence over hardcoded "medium"
+    expect(agents.hephaestus).toBeDefined()
+    expect(agents.hephaestus.variant).toBe("high")
+  })
+
+  test("Hephaestus uses default variant when no user override provided", async () => {
+    // #given - no variant override in config
+    const overrides = {}
+
+    // #when
+    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
+
+    // #then - default "medium" variant is applied
+    expect(agents.hephaestus).toBeDefined()
+    expect(agents.hephaestus.variant).toBe("medium")
+  })
 })
--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -11,7 +11,18 @@ import { createAtlasAgent, atlasPromptMetadata } from "./atlas"
 import { createMomusAgent, momusPromptMetadata } from "./momus"
 import { createHephaestusAgent } from "./hephaestus"
 import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
-import { deepMerge, fetchAvailableModels, resolveModelPipeline, AGENT_MODEL_REQUIREMENTS, readConnectedProvidersCache, isModelAvailable, isAnyFallbackModelAvailable, isAnyProviderConnected, migrateAgentConfig } from "../shared"
+import {
+  deepMerge,
+  fetchAvailableModels,
+  resolveModelPipeline,
+  AGENT_MODEL_REQUIREMENTS,
+  readConnectedProvidersCache,
+  isModelAvailable,
+  isAnyFallbackModelAvailable,
+  isAnyProviderConnected,
+  migrateAgentConfig,
+  truncateDescription,
+} from "../shared"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
 import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content"
 import { createBuiltinSkills } from "../features/builtin-skills"
@@ -52,6 +63,64 @@ function isFactory(source: AgentSource): source is AgentFactory {
  return typeof source === "function"
 }

+type RegisteredAgentSummary = {
+  name: string
+  description: string
+}
+
+function sanitizeMarkdownTableCell(value: string): string {
+  return value
+    .replace(/\r?\n/g, " ")
+    .replace(/\|/g, "\\|")
+    .replace(/\s+/g, " ")
+    .trim()
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null
+}
+
+function parseRegisteredAgentSummaries(input: unknown): RegisteredAgentSummary[] {
+  if (!Array.isArray(input)) return []
+
+  const result: RegisteredAgentSummary[] = []
+  for (const item of input) {
+    if (!isRecord(item)) continue
+
+    const name = typeof item.name === "string" ? item.name : undefined
+    if (!name) continue
+
+    const hidden = item.hidden
+    if (hidden === true) continue
+
+    const disabled = item.disabled
+    if (disabled === true) continue
+
+    const enabled = item.enabled
+    if (enabled === false) continue
+
+    const description = typeof item.description === "string" ? item.description : ""
+    result.push({ name, description: sanitizeMarkdownTableCell(description) })
+  }
+
+  return result
+}
+
+function buildCustomAgentMetadata(agentName: string, description: string): AgentPromptMetadata {
+  const shortDescription = sanitizeMarkdownTableCell(truncateDescription(description))
+  const safeAgentName = sanitizeMarkdownTableCell(agentName)
+  return {
+    category: "specialist",
+    cost: "CHEAP",
+    triggers: [
+      {
+        domain: `Custom agent: ${safeAgentName}`,
+        trigger: shortDescription || "Use when this agent's description matches the task",
+      },
+    ],
+  }
+}
+
 export function buildAgent(
  source: AgentSource,
  model: string,
@@ -233,13 +302,13 @@ export async function createBuiltinAgents(
  categories?: CategoriesConfig,
  gitMasterConfig?: GitMasterConfig,
  discoveredSkills: LoadedSkill[] = [],
-  client?: any,
+  customAgentSummaries?: unknown,
  browserProvider?: BrowserAutomationProvider,
  uiSelectedModel?: string,
  disabledSkills?: Set<string>
 ): Promise<Record<string, AgentConfig>> {
  const connectedProviders = readConnectedProvidersCache()
-  // IMPORTANT: Do NOT pass client to fetchAvailableModels during plugin initialization.
+  // IMPORTANT: Do NOT call OpenCode client APIs during plugin initialization.
  // This function is called from config handler, and calling client API causes deadlock.
  // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301
  const availableModels = await fetchAvailableModels(undefined, {
@@ -279,6 +348,10 @@ export async function createBuiltinAgents(

  const availableSkills: AvailableSkill[] = [...builtinAvailable, ...discoveredAvailable]

+  const registeredAgents = parseRegisteredAgentSummaries(customAgentSummaries)
+  const builtinAgentNames = new Set(Object.keys(agentSources).map((n) => n.toLowerCase()))
+  const disabledAgentNames = new Set(disabledAgents.map((n) => n.toLowerCase()))
+
  // Collect general agents first (for availableAgents), but don't add to result yet
  const pendingAgentConfigs: Map<string, AgentConfig> = new Map()

@@ -304,7 +377,7 @@ export async function createBuiltinAgents(
     const isPrimaryAgent = isFactory(source) && source.mode === "primary"

    const resolution = applyModelResolution({
-      uiSelectedModel: isPrimaryAgent ? uiSelectedModel : undefined,
+      uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
      userModel: override?.model,
      requirement,
      availableModels,
@@ -335,14 +408,27 @@ export async function createBuiltinAgents(
    // Store for later - will be added after sisyphus and hephaestus
    pendingAgentConfigs.set(name, config)

-    const metadata = agentMetadata[agentName]
-    if (metadata) {
-      availableAgents.push({
-        name: agentName,
-        description: config.description ?? "",
-        metadata,
-      })
-    }
+     const metadata = agentMetadata[agentName]
+     if (metadata) {
+       availableAgents.push({
+         name: agentName,
+         description: config.description ?? "",
+         metadata,
+       })
+     }
+   }
+
+  for (const agent of registeredAgents) {
+    const lowerName = agent.name.toLowerCase()
+    if (builtinAgentNames.has(lowerName)) continue
+    if (disabledAgentNames.has(lowerName)) continue
+    if (availableAgents.some((a) => a.name.toLowerCase() === lowerName)) continue
+
+    availableAgents.push({
+      name: agent.name,
+      description: agent.description,
+      metadata: buildCustomAgentMetadata(agent.name, agent.description),
+    })
  }

   const sisyphusOverride = agentOverrides["sisyphus"]
@@ -356,7 +442,7 @@ export async function createBuiltinAgents(

   if (!disabledAgents.includes("sisyphus") && meetsSisyphusAnyModelRequirement) {
    let sisyphusResolution = applyModelResolution({
-      uiSelectedModel,
+      uiSelectedModel: sisyphusOverride?.model ? undefined : uiSelectedModel,
      userModel: sisyphusOverride?.model,
      requirement: sisyphusRequirement,
      availableModels,
@@ -423,13 +509,13 @@ export async function createBuiltinAgents(
          availableCategories
        )

-        hephaestusConfig = { ...hephaestusConfig, variant: hephaestusResolvedVariant ?? "medium" }
-
+        if (!hephaestusOverride?.variant) {
+          hephaestusConfig = { ...hephaestusConfig, variant: hephaestusResolvedVariant ?? "medium" }
+        }
        const hepOverrideCategory = (hephaestusOverride as Record<string, unknown> | undefined)?.category as string | undefined
        if (hepOverrideCategory) {
          hephaestusConfig = applyCategoryOverride(hephaestusConfig, hepOverrideCategory, mergedCategories)
        }
-
        if (directory && hephaestusConfig.prompt) {
          const envContext = createEnvContext()
          hephaestusConfig = { ...hephaestusConfig, prompt: hephaestusConfig.prompt + envContext }
@@ -454,7 +540,7 @@ export async function createBuiltinAgents(
      const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]

      const atlasResolution = applyModelResolution({
-        uiSelectedModel,
+        uiSelectedModel: orchestratorOverride?.model ? undefined : uiSelectedModel,
        userModel: orchestratorOverride?.model,
        requirement: atlasRequirement,
        availableModels,
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -2,7 +2,7 @@

 ## OVERVIEW

-CLI entry: `bunx oh-my-opencode`. 5 commands with Commander.js + @clack/prompts TUI.
+CLI entry: `bunx oh-my-opencode`. 70 CLI utilities and commands with Commander.js + @clack/prompts TUI.

 **Commands**: install (interactive setup), doctor (14 health checks), run (session launcher), get-local-version, mcp-oauth

--- a/src/cli/doctor/checks/gh.test.ts
+++ b/src/cli/doctor/checks/gh.test.ts
@@ -29,7 +29,7 @@ describe("gh cli check", () => {

    it("returns gh cli info structure", async () => {
      const spawnSpy = spyOn(Bun, "spawn").mockImplementation((cmd) => {
-        if (Array.isArray(cmd) && cmd[0] === "which" && cmd[1] === "gh") {
+        if (Array.isArray(cmd) && (cmd[0] === "which" || cmd[0] === "where") && cmd[1] === "gh") {
          return createProc({ stdout: "/usr/bin/gh\n" })
        }

--- a/src/cli/doctor/checks/gh.ts
+++ b/src/cli/doctor/checks/gh.ts
@@ -13,7 +13,8 @@ export interface GhCliInfo {

 async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
  try {
-    const proc = Bun.spawn(["which", binary], { stdout: "pipe", stderr: "pipe" })
+    const whichCmd = process.platform === "win32" ? "where" : "which"
+    const proc = Bun.spawn([whichCmd, binary], { stdout: "pipe", stderr: "pipe" })
    const output = await new Response(proc.stdout).text()
    await proc.exited
    if (proc.exitCode === 0) {
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@@ -19,6 +19,7 @@ program
  .name("oh-my-opencode")
  .description("The ultimate OpenCode plugin - multi-model orchestration, LSP tools, and more")
  .version(VERSION, "-v, --version", "Show version number")
+  .enablePositionalOptions()

 program
  .command("install")
@@ -64,16 +65,28 @@ Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
  })

 program
-  .command("run <message>")
-  .description("Run opencode with todo/background task completion enforcement")
+   .command("run <message>")
+   .allowUnknownOption()
+   .passThroughOptions()
+   .description("Run opencode with todo/background task completion enforcement")
  .option("-a, --agent <name>", "Agent to use (default: from CLI/env/config, fallback: Sisyphus)")
  .option("-d, --directory <path>", "Working directory")
  .option("-t, --timeout <ms>", "Timeout in milliseconds (default: 30 minutes)", parseInt)
+  .option("-p, --port <port>", "Server port (attaches if port already in use)", parseInt)
+  .option("--attach <url>", "Attach to existing opencode server URL")
+  .option("--on-complete <command>", "Shell command to run after completion")
+  .option("--json", "Output structured JSON result to stdout")
+  .option("--session-id <id>", "Resume existing session instead of creating new one")
  .addHelpText("after", `
 Examples:
  $ bunx oh-my-opencode run "Fix the bug in index.ts"
  $ bunx oh-my-opencode run --agent Sisyphus "Implement feature X"
  $ bunx oh-my-opencode run --timeout 3600000 "Large refactoring task"
+  $ bunx oh-my-opencode run --port 4321 "Fix the bug"
+  $ bunx oh-my-opencode run --attach http://127.0.0.1:4321 "Fix the bug"
+  $ bunx oh-my-opencode run --json "Fix the bug" | jq .sessionId
+  $ bunx oh-my-opencode run --on-complete "notify-send Done" "Fix the bug"
+  $ bunx oh-my-opencode run --session-id ses_abc123 "Continue the work"

 Agent resolution order:
  1) --agent flag
@@ -89,11 +102,20 @@ Unlike 'opencode run', this command waits until:
  - All child sessions (background tasks) are idle
 `)
  .action(async (message: string, options) => {
+    if (options.port && options.attach) {
+      console.error("Error: --port and --attach are mutually exclusive")
+      process.exit(1)
+    }
    const runOptions: RunOptions = {
      message,
      agent: options.agent,
      directory: options.directory,
      timeout: options.timeout,
+      port: options.port,
+      attach: options.attach,
+      onComplete: options.onComplete,
+      json: options.json ?? false,
+      sessionId: options.sessionId,
    }
    const exitCode = await run(runOptions)
    process.exit(exitCode)
--- a/src/cli/run/agent-resolver.ts
+++ b/src/cli/run/agent-resolver.ts
@@ -0,0 +1,69 @@
+import pc from "picocolors"
+import type { RunOptions } from "./types"
+import type { OhMyOpenCodeConfig } from "../../config"
+
+const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
+const DEFAULT_AGENT = "sisyphus"
+
+type EnvVars = Record<string, string | undefined>
+
+const normalizeAgentName = (agent?: string): string | undefined => {
+  if (!agent) return undefined
+  const trimmed = agent.trim()
+  if (!trimmed) return undefined
+  const lowered = trimmed.toLowerCase()
+  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
+  return coreMatch ?? trimmed
+}
+
+const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
+  const lowered = agent.toLowerCase()
+  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
+    return true
+  }
+  return (config.disabled_agents ?? []).some(
+    (disabled) => disabled.toLowerCase() === lowered
+  )
+}
+
+const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
+  for (const agent of CORE_AGENT_ORDER) {
+    if (!isAgentDisabled(agent, config)) {
+      return agent
+    }
+  }
+  return DEFAULT_AGENT
+}
+
+export const resolveRunAgent = (
+  options: RunOptions,
+  pluginConfig: OhMyOpenCodeConfig,
+  env: EnvVars = process.env
+): string => {
+  const cliAgent = normalizeAgentName(options.agent)
+  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
+  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
+  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
+  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
+
+  if (isAgentDisabled(normalized, pluginConfig)) {
+    const fallback = pickFallbackAgent(pluginConfig)
+    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
+    if (fallbackDisabled) {
+      console.log(
+        pc.yellow(
+          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
+        )
+      )
+      return fallback
+    }
+    console.log(
+      pc.yellow(
+        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
+      )
+    )
+    return fallback
+  }
+
+  return normalized
+}
--- a/src/cli/run/events.ts
+++ b/src/cli/run/events.ts
@@ -65,6 +65,8 @@ export interface EventState {
  currentTool: string | null
  /** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
  hasReceivedMeaningfulWork: boolean
+  /** Count of assistant messages for the main session */
+  messageCount: number
 }

 export function createEventState(): EventState {
@@ -76,6 +78,7 @@ export function createEventState(): EventState {
    lastPartText: "",
    currentTool: null,
    hasReceivedMeaningfulWork: false,
+    messageCount: 0,
  }
 }

@@ -266,6 +269,7 @@ function handleMessageUpdated(
  if (props?.info?.role !== "assistant") return

  state.hasReceivedMeaningfulWork = true
+  state.messageCount++
 }

 function handleToolExecute(
--- a/src/cli/run/index.ts
+++ b/src/cli/run/index.ts
@@ -1,2 +1,7 @@
 export { run } from "./runner"
-export type { RunOptions, RunContext } from "./types"
+export { resolveRunAgent } from "./agent-resolver"
+export { createServerConnection } from "./server-connection"
+export { resolveSession } from "./session-resolver"
+export { createJsonOutputManager } from "./json-output"
+export { executeOnCompleteHook } from "./on-complete-hook"
+export type { RunOptions, RunContext, RunResult, ServerConnection } from "./types"
--- a/src/cli/run/integration.test.ts
+++ b/src/cli/run/integration.test.ts
@@ -0,0 +1,294 @@
+import { describe, it, expect, mock, spyOn, beforeEach, afterEach } from "bun:test"
+import type { RunResult } from "./types"
+import { createJsonOutputManager } from "./json-output"
+import { resolveSession } from "./session-resolver"
+import { executeOnCompleteHook } from "./on-complete-hook"
+import type { OpencodeClient } from "./types"
+
+const mockServerClose = mock(() => {})
+const mockCreateOpencode = mock(() =>
+  Promise.resolve({
+    client: { session: {} },
+    server: { url: "http://127.0.0.1:9999", close: mockServerClose },
+  })
+)
+const mockCreateOpencodeClient = mock(() => ({ session: {} }))
+const mockIsPortAvailable = mock(() => Promise.resolve(true))
+const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 9999, wasAutoSelected: false }))
+
+mock.module("@opencode-ai/sdk", () => ({
+  createOpencode: mockCreateOpencode,
+  createOpencodeClient: mockCreateOpencodeClient,
+}))
+
+mock.module("../../shared/port-utils", () => ({
+  isPortAvailable: mockIsPortAvailable,
+  getAvailableServerPort: mockGetAvailableServerPort,
+  DEFAULT_SERVER_PORT: 4096,
+}))
+
+const { createServerConnection } = await import("./server-connection")
+
+interface MockWriteStream {
+  write: (chunk: string) => boolean
+  writes: string[]
+}
+
+function createMockWriteStream(): MockWriteStream {
+  const writes: string[] = []
+  return {
+    writes,
+    write: function (this: MockWriteStream, chunk: string): boolean {
+      this.writes.push(chunk)
+      return true
+    },
+  }
+}
+
+const createMockClient = (
+  getResult?: { error?: unknown; data?: { id: string } }
+): OpencodeClient => ({
+  session: {
+    get: mock((opts: { path: { id: string } }) =>
+      Promise.resolve(getResult ?? { data: { id: opts.path.id } })
+    ),
+    create: mock(() => Promise.resolve({ data: { id: "new-session-id" } })),
+  },
+} as unknown as OpencodeClient)
+
+describe("integration: --json mode", () => {
+  it("emits valid RunResult JSON to stdout", () => {
+    // given
+    const mockStdout = createMockWriteStream()
+    const mockStderr = createMockWriteStream()
+    const result: RunResult = {
+      sessionId: "test-session",
+      success: true,
+      durationMs: 1234,
+      messageCount: 42,
+      summary: "Test summary",
+    }
+    const manager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+
+    // when
+    manager.emitResult(result)
+
+    // then
+    expect(mockStdout.writes).toHaveLength(1)
+    const emitted = mockStdout.writes[0]!
+    expect(() => JSON.parse(emitted)).not.toThrow()
+    const parsed = JSON.parse(emitted) as RunResult
+    expect(parsed.sessionId).toBe("test-session")
+    expect(parsed.success).toBe(true)
+    expect(parsed.durationMs).toBe(1234)
+    expect(parsed.messageCount).toBe(42)
+    expect(parsed.summary).toBe("Test summary")
+  })
+
+  it("redirects stdout to stderr when active", () => {
+    // given
+    spyOn(console, "log").mockImplementation(() => {})
+    const mockStdout = createMockWriteStream()
+    const mockStderr = createMockWriteStream()
+    const manager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+    manager.redirectToStderr()
+
+    // when
+    mockStdout.write("should go to stderr")
+
+    // then
+    expect(mockStdout.writes).toHaveLength(0)
+    expect(mockStderr.writes).toEqual(["should go to stderr"])
+  })
+})
+
+describe("integration: --session-id", () => {
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  it("resolves provided session ID without creating new session", async () => {
+    // given
+    const sessionId = "existing-session-id"
+    const mockClient = createMockClient({ data: { id: sessionId } })
+
+    // when
+    const result = await resolveSession({ client: mockClient, sessionId })
+
+    // then
+    expect(result).toBe(sessionId)
+    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("throws when session does not exist", async () => {
+    // given
+    const sessionId = "non-existent-session-id"
+    const mockClient = createMockClient({ error: { message: "Session not found" } })
+
+    // when
+    const result = resolveSession({ client: mockClient, sessionId })
+
+    // then
+    await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+})
+
+describe("integration: --on-complete", () => {
+  let spawnSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    spyOn(console, "error").mockImplementation(() => {})
+    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+      exited: Promise.resolve(0),
+      exitCode: 0,
+    } as unknown as ReturnType<typeof Bun.spawn>)
+  })
+
+  afterEach(() => {
+    spawnSpy.mockRestore()
+  })
+
+  it("passes all 4 env vars as strings to spawned process", async () => {
+    // given
+    spawnSpy.mockClear()
+
+    // when
+    await executeOnCompleteHook({
+      command: "echo test",
+      sessionId: "session-123",
+      exitCode: 0,
+      durationMs: 5000,
+      messageCount: 10,
+    })
+
+    // then
+    expect(spawnSpy).toHaveBeenCalledTimes(1)
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(options?.env?.SESSION_ID).toBe("session-123")
+    expect(options?.env?.EXIT_CODE).toBe("0")
+    expect(options?.env?.DURATION_MS).toBe("5000")
+    expect(options?.env?.MESSAGE_COUNT).toBe("10")
+    expect(options?.env?.SESSION_ID).toBeTypeOf("string")
+    expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
+    expect(options?.env?.DURATION_MS).toBeTypeOf("string")
+    expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
+  })
+})
+
+describe("integration: option combinations", () => {
+  let mockStdout: MockWriteStream
+  let mockStderr: MockWriteStream
+  let spawnSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+    mockStdout = createMockWriteStream()
+    mockStderr = createMockWriteStream()
+    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+      exited: Promise.resolve(0),
+      exitCode: 0,
+    } as unknown as ReturnType<typeof Bun.spawn>)
+  })
+
+  afterEach(() => {
+    spawnSpy?.mockRestore?.()
+  })
+
+  it("json output and on-complete hook can both execute", async () => {
+    // given - json manager active + on-complete hook ready
+    const result: RunResult = {
+      sessionId: "session-123",
+      success: true,
+      durationMs: 5000,
+      messageCount: 10,
+      summary: "Test completed",
+    }
+    const jsonManager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+    jsonManager.redirectToStderr()
+    spawnSpy.mockClear()
+
+    // when - both are invoked sequentially (as runner would)
+    jsonManager.emitResult(result)
+    await executeOnCompleteHook({
+      command: "echo done",
+      sessionId: result.sessionId,
+      exitCode: result.success ? 0 : 1,
+      durationMs: result.durationMs,
+      messageCount: result.messageCount,
+    })
+
+    // then - json emits result AND on-complete hook runs
+    expect(mockStdout.writes).toHaveLength(1)
+    const emitted = mockStdout.writes[0]!
+    expect(() => JSON.parse(emitted)).not.toThrow()
+    expect(spawnSpy).toHaveBeenCalledTimes(1)
+    const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(args).toEqual(["sh", "-c", "echo done"])
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(options?.env?.SESSION_ID).toBe("session-123")
+    expect(options?.env?.EXIT_CODE).toBe("0")
+    expect(options?.env?.DURATION_MS).toBe("5000")
+    expect(options?.env?.MESSAGE_COUNT).toBe("10")
+  })
+})
+
+describe("integration: server connection", () => {
+  let consoleSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    consoleSpy = spyOn(console, "log").mockImplementation(() => {})
+    mockCreateOpencode.mockClear()
+    mockCreateOpencodeClient.mockClear()
+    mockServerClose.mockClear()
+  })
+
+  afterEach(() => {
+    consoleSpy.mockRestore()
+  })
+
+  it("attach mode creates client with no-op cleanup", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+
+    // then
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("port with available port starts server", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 9999
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    expect(mockCreateOpencode).toHaveBeenCalled()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+})
--- a/src/cli/run/json-output.test.ts
+++ b/src/cli/run/json-output.test.ts
@@ -0,0 +1,170 @@
+import { describe, it, expect, beforeEach } from "bun:test"
+import type { RunResult } from "./types"
+import { createJsonOutputManager } from "./json-output"
+
+interface MockWriteStream {
+  write: (chunk: string) => boolean
+  writes: string[]
+}
+
+function createMockWriteStream(): MockWriteStream {
+  const stream: MockWriteStream = {
+    writes: [],
+    write: function (this: MockWriteStream, chunk: string): boolean {
+      this.writes.push(chunk)
+      return true
+    },
+  }
+  return stream
+}
+
+describe("createJsonOutputManager", () => {
+  let mockStdout: MockWriteStream
+  let mockStderr: MockWriteStream
+
+  beforeEach(() => {
+    mockStdout = createMockWriteStream()
+    mockStderr = createMockWriteStream()
+  })
+
+  describe("redirectToStderr", () => {
+    it("causes stdout writes to go to stderr", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      mockStdout.write("test message")
+
+      // then
+      expect(mockStdout.writes).toHaveLength(0)
+      expect(mockStderr.writes).toEqual(["test message"])
+    })
+  })
+
+  describe("restore", () => {
+    it("reverses the redirect", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      manager.restore()
+      mockStdout.write("restored message")
+
+      // then
+      expect(mockStdout.writes).toEqual(["restored message"])
+      expect(mockStderr.writes).toHaveLength(0)
+    })
+  })
+
+  describe("emitResult", () => {
+    it("writes valid JSON to stdout", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 1234,
+        messageCount: 42,
+        summary: "Test summary",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      expect(mockStdout.writes).toHaveLength(1)
+      const emitted = mockStdout.writes[0]!
+      expect(() => JSON.parse(emitted)).not.toThrow()
+    })
+
+    it("output matches RunResult schema", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 1234,
+        messageCount: 42,
+        summary: "Test summary",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      const emitted = mockStdout.writes[0]!
+      const parsed = JSON.parse(emitted) as RunResult
+      expect(parsed).toEqual(result)
+      expect(parsed.sessionId).toBe("test-session")
+      expect(parsed.success).toBe(true)
+      expect(parsed.durationMs).toBe(1234)
+      expect(parsed.messageCount).toBe(42)
+      expect(parsed.summary).toBe("Test summary")
+    })
+
+    it("restores stdout even if redirect was active", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 100,
+        messageCount: 1,
+        summary: "Test",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      expect(mockStdout.writes).toHaveLength(1)
+      expect(mockStdout.writes[0]!).toBe(JSON.stringify(result) + "\n")
+
+      mockStdout.write("after emit")
+      expect(mockStdout.writes).toHaveLength(2)
+      expect(mockStderr.writes).toHaveLength(0)
+    })
+  })
+
+  describe("multiple redirects and restores", () => {
+    it("work correctly", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.redirectToStderr()
+      mockStdout.write("first redirect")
+
+      manager.redirectToStderr()
+      mockStdout.write("second redirect")
+
+      manager.restore()
+      mockStdout.write("after restore")
+
+      // then
+      expect(mockStdout.writes).toEqual(["after restore"])
+      expect(mockStderr.writes).toEqual(["first redirect", "second redirect"])
+    })
+  })
+})
--- a/src/cli/run/json-output.ts
+++ b/src/cli/run/json-output.ts
@@ -0,0 +1,52 @@
+import type { RunResult } from "./types"
+
+export interface JsonOutputManager {
+  redirectToStderr: () => void
+  restore: () => void
+  emitResult: (result: RunResult) => void
+}
+
+interface JsonOutputManagerOptions {
+  stdout?: NodeJS.WriteStream
+  stderr?: NodeJS.WriteStream
+}
+
+export function createJsonOutputManager(
+  options: JsonOutputManagerOptions = {}
+): JsonOutputManager {
+  const stdout = options.stdout ?? process.stdout
+  const stderr = options.stderr ?? process.stderr
+
+  const originalWrite = stdout.write.bind(stdout)
+
+  function redirectToStderr(): void {
+    stdout.write = function (
+      chunk: Uint8Array | string,
+      encodingOrCallback?: BufferEncoding | ((error?: Error | null) => void),
+      callback?: (error?: Error | null) => void
+    ): boolean {
+      if (typeof encodingOrCallback === "function") {
+        return stderr.write(chunk, encodingOrCallback)
+      }
+      if (encodingOrCallback !== undefined) {
+        return stderr.write(chunk, encodingOrCallback, callback)
+      }
+      return stderr.write(chunk)
+    } as NodeJS.WriteStream["write"]
+  }
+
+  function restore(): void {
+    stdout.write = originalWrite
+  }
+
+  function emitResult(result: RunResult): void {
+    restore()
+    originalWrite(JSON.stringify(result) + "\n")
+  }
+
+  return {
+    redirectToStderr,
+    restore,
+    emitResult,
+  }
+}
--- a/src/cli/run/on-complete-hook.test.ts
+++ b/src/cli/run/on-complete-hook.test.ts
@@ -0,0 +1,179 @@
+import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
+import { executeOnCompleteHook } from "./on-complete-hook"
+
+describe("executeOnCompleteHook", () => {
+  function createProc(exitCode: number) {
+    return {
+      exited: Promise.resolve(exitCode),
+      exitCode,
+    } as unknown as ReturnType<typeof Bun.spawn>
+  }
+
+  let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
+
+  beforeEach(() => {
+    consoleErrorSpy = spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  afterEach(() => {
+    consoleErrorSpy.mockRestore()
+  })
+
+  it("executes command with correct env vars", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "echo test",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).toHaveBeenCalledTimes(1)
+      const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+
+      expect(args).toEqual(["sh", "-c", "echo test"])
+      expect(options?.env?.SESSION_ID).toBe("session-123")
+      expect(options?.env?.EXIT_CODE).toBe("0")
+      expect(options?.env?.DURATION_MS).toBe("5000")
+      expect(options?.env?.MESSAGE_COUNT).toBe("10")
+      expect(options?.stdout).toBe("inherit")
+      expect(options?.stderr).toBe("inherit")
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("env var values are strings", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "echo test",
+        sessionId: "session-123",
+        exitCode: 1,
+        durationMs: 12345,
+        messageCount: 42,
+      })
+
+      // then
+      const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+
+      expect(options?.env?.EXIT_CODE).toBe("1")
+      expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
+      expect(options?.env?.DURATION_MS).toBe("12345")
+      expect(options?.env?.DURATION_MS).toBeTypeOf("string")
+      expect(options?.env?.MESSAGE_COUNT).toBe("42")
+      expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("empty command string is no-op", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).not.toHaveBeenCalled()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("whitespace-only command is no-op", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "   ",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).not.toHaveBeenCalled()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("command failure logs warning but does not throw", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
+
+    try {
+      // when
+      await expect(
+        executeOnCompleteHook({
+          command: "false",
+          sessionId: "session-123",
+          exitCode: 0,
+          durationMs: 5000,
+          messageCount: 10,
+        })
+      ).resolves.toBeUndefined()
+
+      // then
+      expect(consoleErrorSpy).toHaveBeenCalled()
+      const warningCall = consoleErrorSpy.mock.calls.find(
+        (call) => typeof call[0] === "string" && call[0].includes("Warning: on-complete hook exited with code 1")
+      )
+      expect(warningCall).toBeDefined()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("spawn error logs warning but does not throw", async () => {
+    // given
+    const spawnError = new Error("Command not found")
+    const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
+      throw spawnError
+    })
+
+    try {
+      // when
+      await expect(
+        executeOnCompleteHook({
+          command: "nonexistent-command",
+          sessionId: "session-123",
+          exitCode: 0,
+          durationMs: 5000,
+          messageCount: 10,
+        })
+      ).resolves.toBeUndefined()
+
+      // then
+      expect(consoleErrorSpy).toHaveBeenCalled()
+      const errorCalls = consoleErrorSpy.mock.calls.filter((call) => {
+        const firstArg = call[0]
+        return typeof firstArg === "string" && (firstArg.includes("Warning") || firstArg.toLowerCase().includes("error"))
+      })
+      expect(errorCalls.length).toBeGreaterThan(0)
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+})
--- a/src/cli/run/on-complete-hook.ts
+++ b/src/cli/run/on-complete-hook.ts
@@ -0,0 +1,42 @@
+import pc from "picocolors"
+
+export async function executeOnCompleteHook(options: {
+  command: string
+  sessionId: string
+  exitCode: number
+  durationMs: number
+  messageCount: number
+}): Promise<void> {
+  const { command, sessionId, exitCode, durationMs, messageCount } = options
+
+  const trimmedCommand = command.trim()
+  if (!trimmedCommand) {
+    return
+  }
+
+  console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
+
+  try {
+    const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
+      env: {
+        ...process.env,
+        SESSION_ID: sessionId,
+        EXIT_CODE: String(exitCode),
+        DURATION_MS: String(durationMs),
+        MESSAGE_COUNT: String(messageCount),
+      },
+      stdout: "inherit",
+      stderr: "inherit",
+    })
+
+    const hookExitCode = await proc.exited
+
+    if (hookExitCode !== 0) {
+      console.error(
+        pc.yellow(`Warning: on-complete hook exited with code ${hookExitCode}`)
+      )
+    }
+  } catch (error) {
+    console.error(pc.yellow(`Warning: Failed to execute on-complete hook: ${error instanceof Error ? error.message : String(error)}`))
+  }
+}
--- a/src/cli/run/runner.ts
+++ b/src/cli/run/runner.ts
@@ -1,101 +1,37 @@
-import { createOpencode } from "@opencode-ai/sdk"
 import pc from "picocolors"
 import type { RunOptions, RunContext } from "./types"
 import { checkCompletionConditions } from "./completion"
 import { createEventState, processEvents, serializeError } from "./events"
-import type { OhMyOpenCodeConfig } from "../../config"
 import { loadPluginConfig } from "../../plugin-config"
-import { getAvailableServerPort, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
+import { createServerConnection } from "./server-connection"
+import { resolveSession } from "./session-resolver"
+import { createJsonOutputManager } from "./json-output"
+import { executeOnCompleteHook } from "./on-complete-hook"
+import { resolveRunAgent } from "./agent-resolver"
+
+export { resolveRunAgent }

 const POLL_INTERVAL_MS = 500
 const DEFAULT_TIMEOUT_MS = 0
-const SESSION_CREATE_MAX_RETRIES = 3
-const SESSION_CREATE_RETRY_DELAY_MS = 1000
-const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
-const DEFAULT_AGENT = "sisyphus"
-
-type EnvVars = Record<string, string | undefined>
-
-const normalizeAgentName = (agent?: string): string | undefined => {
-  if (!agent) return undefined
-  const trimmed = agent.trim()
-  if (!trimmed) return undefined
-  const lowered = trimmed.toLowerCase()
-  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
-  return coreMatch ?? trimmed
-}
-
-const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
-  const lowered = agent.toLowerCase()
-  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
-    return true
-  }
-  return (config.disabled_agents ?? []).some(
-    (disabled) => disabled.toLowerCase() === lowered
-  )
-}
-
-const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
-  for (const agent of CORE_AGENT_ORDER) {
-    if (!isAgentDisabled(agent, config)) {
-      return agent
-    }
-  }
-  return DEFAULT_AGENT
-}
-
-export const resolveRunAgent = (
-  options: RunOptions,
-  pluginConfig: OhMyOpenCodeConfig,
-  env: EnvVars = process.env
-): string => {
-  const cliAgent = normalizeAgentName(options.agent)
-  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
-  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
-  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
-  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
-
-  if (isAgentDisabled(normalized, pluginConfig)) {
-    const fallback = pickFallbackAgent(pluginConfig)
-    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
-    if (fallbackDisabled) {
-      console.log(
-        pc.yellow(
-          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
-        )
-      )
-      return fallback
-    }
-    console.log(
-      pc.yellow(
-        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
-      )
-    )
-    return fallback
-  }
-
-  return normalized
-}

 export async function run(options: RunOptions): Promise<number> {
-  // Set CLI run mode environment variable before any config loading
-  // This signals to config-handler to deny Question tool (no TUI to answer)
  process.env.OPENCODE_CLI_RUN_MODE = "true"

+  const startTime = Date.now()
  const {
    message,
    directory = process.cwd(),
    timeout = DEFAULT_TIMEOUT_MS,
  } = options
+
+  const jsonManager = options.json ? createJsonOutputManager() : null
+  if (jsonManager) jsonManager.redirectToStderr()
+
  const pluginConfig = loadPluginConfig(directory, { command: "run" })
  const resolvedAgent = resolveRunAgent(options, pluginConfig)
-
-  console.log(pc.cyan("Starting opencode server (auto port selection enabled)..."))
-
  const abortController = new AbortController()
  let timeoutId: ReturnType<typeof setTimeout> | null = null

-  // timeout=0 means no timeout (run until completion)
  if (timeout > 0) {
    timeoutId = setTimeout(() => {
      console.log(pc.yellow("\nTimeout reached. Aborting..."))
@@ -104,29 +40,15 @@ export async function run(options: RunOptions): Promise<number> {
  }

  try {
-    const envPort = process.env.OPENCODE_SERVER_PORT
-      ? parseInt(process.env.OPENCODE_SERVER_PORT, 10)
-      : undefined
-    const serverHostname = process.env.OPENCODE_SERVER_HOSTNAME || "127.0.0.1"
-    const preferredPort = envPort && !isNaN(envPort) ? envPort : DEFAULT_SERVER_PORT
-
-    const { port: serverPort, wasAutoSelected } = await getAvailableServerPort(preferredPort, serverHostname)
-
-    if (wasAutoSelected) {
-      console.log(pc.yellow(`Port ${preferredPort} is busy, using port ${serverPort} instead`))
-    } else {
-      console.log(pc.dim(`Using port ${serverPort}`))
-    }
-
-    const { client, server } = await createOpencode({
+    const { client, cleanup: serverCleanup } = await createServerConnection({
+      port: options.port,
+      attach: options.attach,
      signal: abortController.signal,
-      port: serverPort,
-      hostname: serverHostname,
    })

    const cleanup = () => {
      if (timeoutId) clearTimeout(timeoutId)
-      server.close()
+      serverCleanup()
    }

    process.on("SIGINT", () => {
@@ -136,61 +58,14 @@ export async function run(options: RunOptions): Promise<number> {
    })

    try {
-      // Retry session creation with exponential backoff
-      // Server might not be fully ready even after "listening" message
-      let sessionID: string | undefined
-      let lastError: unknown
-
-      for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
-        const sessionRes = await client.session.create({
-          body: { title: "oh-my-opencode run" },
-        })
-
-        if (sessionRes.error) {
-          lastError = sessionRes.error
-          console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`))
-          console.error(pc.dim(`  Error: ${serializeError(sessionRes.error)}`))
-
-          if (attempt < SESSION_CREATE_MAX_RETRIES) {
-            const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
-            console.log(pc.dim(`  Retrying in ${delay}ms...`))
-            await new Promise((resolve) => setTimeout(resolve, delay))
-            continue
-          }
-        }
-
-        sessionID = sessionRes.data?.id
-        if (sessionID) {
-          break
-        }
-
-        // No error but also no session ID - unexpected response
-        lastError = new Error(`Unexpected response: ${JSON.stringify(sessionRes, null, 2)}`)
-        console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`))
-
-        if (attempt < SESSION_CREATE_MAX_RETRIES) {
-          const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
-          console.log(pc.dim(`  Retrying in ${delay}ms...`))
-          await new Promise((resolve) => setTimeout(resolve, delay))
-        }
-      }
-
-      if (!sessionID) {
-        console.error(pc.red("Failed to create session after all retries"))
-        console.error(pc.dim(`Last error: ${serializeError(lastError)}`))
-        cleanup()
-        return 1
-      }
+      const sessionID = await resolveSession({
+        client,
+        sessionId: options.sessionId,
+      })

      console.log(pc.dim(`Session: ${sessionID}`))

-      const ctx: RunContext = {
-        client,
-        sessionID,
-        directory,
-        abortController,
-      }
-
+      const ctx: RunContext = { client, sessionID, directory, abortController }
      const events = await client.event.subscribe()
      const eventState = createEventState()
      const eventProcessor = processEvents(ctx, events.stream, eventState)
@@ -206,47 +81,41 @@ export async function run(options: RunOptions): Promise<number> {
      })

      console.log(pc.dim("Waiting for completion...\n"))
-
-      while (!abortController.signal.aborted) {
-        await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
-
-        if (!eventState.mainSessionIdle) {
-          continue
-        }
-
-        // Check if session errored - exit with failure if so
-        if (eventState.mainSessionError) {
-          console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
-          console.error(pc.yellow("Check if todos were completed before the error."))
-          cleanup()
-          process.exit(1)
-        }
-
-        // Guard against premature completion: don't check completion until the
-        // session has produced meaningful work (text output, tool call, or tool result).
-        // Without this, a session that goes busy->idle before the LLM responds
-        // would exit immediately because 0 todos + 0 children = "complete".
-        if (!eventState.hasReceivedMeaningfulWork) {
-          continue
-        }
-
-        const shouldExit = await checkCompletionConditions(ctx)
-        if (shouldExit) {
-          console.log(pc.green("\n\nAll tasks completed."))
-          cleanup()
-          process.exit(0)
-        }
-      }
+      const exitCode = await pollForCompletion(ctx, eventState, abortController)

      await eventProcessor.catch(() => {})
      cleanup()
-      return 130
+
+      const durationMs = Date.now() - startTime
+
+      if (options.onComplete) {
+        await executeOnCompleteHook({
+          command: options.onComplete,
+          sessionId: sessionID,
+          exitCode,
+          durationMs,
+          messageCount: eventState.messageCount,
+        })
+      }
+
+      if (jsonManager) {
+        jsonManager.emitResult({
+          sessionId: sessionID,
+          success: exitCode === 0,
+          durationMs,
+          messageCount: eventState.messageCount,
+          summary: eventState.lastPartText.slice(0, 200) || "Run completed",
+        })
+      }
+
+      return exitCode
    } catch (err) {
      cleanup()
      throw err
    }
  } catch (err) {
    if (timeoutId) clearTimeout(timeoutId)
+    if (jsonManager) jsonManager.restore()
    if (err instanceof Error && err.name === "AbortError") {
      return 130
    }
@@ -254,3 +123,31 @@ export async function run(options: RunOptions): Promise<number> {
    return 1
  }
 }
+
+async function pollForCompletion(
+  ctx: RunContext,
+  eventState: ReturnType<typeof createEventState>,
+  abortController: AbortController
+): Promise<number> {
+  while (!abortController.signal.aborted) {
+    await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
+
+    if (!eventState.mainSessionIdle) continue
+
+    if (eventState.mainSessionError) {
+      console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
+      console.error(pc.yellow("Check if todos were completed before the error."))
+      return 1
+    }
+
+    if (!eventState.hasReceivedMeaningfulWork) continue
+
+    const shouldExit = await checkCompletionConditions(ctx)
+    if (shouldExit) {
+      console.log(pc.green("\n\nAll tasks completed."))
+      return 0
+    }
+  }
+
+  return 130
+}
--- a/src/cli/run/server-connection.test.ts
+++ b/src/cli/run/server-connection.test.ts
@@ -0,0 +1,152 @@
+import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test"
+
+const originalConsole = globalThis.console
+
+const mockServerClose = mock(() => {})
+const mockCreateOpencode = mock(() =>
+  Promise.resolve({
+    client: { session: {} },
+    server: { url: "http://127.0.0.1:4096", close: mockServerClose },
+  })
+)
+const mockCreateOpencodeClient = mock(() => ({ session: {} }))
+const mockIsPortAvailable = mock(() => Promise.resolve(true))
+const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 4096, wasAutoSelected: false }))
+const mockConsoleLog = mock(() => {})
+
+mock.module("@opencode-ai/sdk", () => ({
+  createOpencode: mockCreateOpencode,
+  createOpencodeClient: mockCreateOpencodeClient,
+}))
+
+mock.module("../../shared/port-utils", () => ({
+  isPortAvailable: mockIsPortAvailable,
+  getAvailableServerPort: mockGetAvailableServerPort,
+  DEFAULT_SERVER_PORT: 4096,
+}))
+
+const { createServerConnection } = await import("./server-connection")
+
+describe("createServerConnection", () => {
+  beforeEach(() => {
+    mockCreateOpencode.mockClear()
+    mockCreateOpencodeClient.mockClear()
+    mockIsPortAvailable.mockClear()
+    mockGetAvailableServerPort.mockClear()
+    mockServerClose.mockClear()
+    mockConsoleLog.mockClear()
+    globalThis.console = { ...console, log: mockConsoleLog } as typeof console
+  })
+
+  afterEach(() => {
+    globalThis.console = originalConsole
+  })
+
+  it("attach mode returns client with no-op cleanup", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+
+    // then
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("explicit port starts server when port is available", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 8080
+    mockIsPortAvailable.mockResolvedValueOnce(true)
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 8080, hostname: "127.0.0.1" })
+    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+
+  it("explicit port attaches when port is occupied", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 8080
+    mockIsPortAvailable.mockResolvedValueOnce(false)
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockCreateOpencode).not.toHaveBeenCalled()
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: "http://127.0.0.1:8080" })
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("auto mode uses getAvailableServerPort", async () => {
+    // given
+    const signal = new AbortController().signal
+    mockGetAvailableServerPort.mockResolvedValueOnce({ port: 4100, wasAutoSelected: true })
+
+    // when
+    const result = await createServerConnection({ signal })
+
+    // then
+    expect(mockGetAvailableServerPort).toHaveBeenCalledWith(4096, "127.0.0.1")
+    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 4100, hostname: "127.0.0.1" })
+    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+
+  it("invalid port throws error", async () => {
+    // given
+    const signal = new AbortController().signal
+
+    // when & then
+    await expect(createServerConnection({ port: 0, signal })).rejects.toThrow("Port must be between 1 and 65535")
+    await expect(createServerConnection({ port: -1, signal })).rejects.toThrow("Port must be between 1 and 65535")
+    await expect(createServerConnection({ port: 99999, signal })).rejects.toThrow("Port must be between 1 and 65535")
+  })
+
+  it("cleanup calls server.close for owned server", async () => {
+    // given
+    const signal = new AbortController().signal
+    mockIsPortAvailable.mockResolvedValueOnce(true)
+
+    // when
+    const result = await createServerConnection({ port: 8080, signal })
+    result.cleanup()
+
+    // then
+    expect(mockServerClose).toHaveBeenCalledTimes(1)
+  })
+
+  it("cleanup is no-op for attached server", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+    result.cleanup()
+
+    // then
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+})
--- a/src/cli/run/server-connection.ts
+++ b/src/cli/run/server-connection.ts
@@ -0,0 +1,47 @@
+import { createOpencode, createOpencodeClient } from "@opencode-ai/sdk"
+import pc from "picocolors"
+import type { ServerConnection } from "./types"
+import { getAvailableServerPort, isPortAvailable, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
+
+export async function createServerConnection(options: {
+  port?: number
+  attach?: string
+  signal: AbortSignal
+}): Promise<ServerConnection> {
+  const { port, attach, signal } = options
+
+  if (attach !== undefined) {
+    console.log(pc.dim("Attaching to existing server at"), pc.cyan(attach))
+    const client = createOpencodeClient({ baseUrl: attach })
+    return { client, cleanup: () => {} }
+  }
+
+  if (port !== undefined) {
+    if (port < 1 || port > 65535) {
+      throw new Error("Port must be between 1 and 65535")
+    }
+
+    const available = await isPortAvailable(port, "127.0.0.1")
+
+    if (available) {
+      console.log(pc.dim("Starting server on port"), pc.cyan(port.toString()))
+      const { client, server } = await createOpencode({ signal, port, hostname: "127.0.0.1" })
+      console.log(pc.dim("Server listening at"), pc.cyan(server.url))
+      return { client, cleanup: () => server.close() }
+    }
+
+    console.log(pc.dim("Port"), pc.cyan(port.toString()), pc.dim("is occupied, attaching to existing server"))
+    const client = createOpencodeClient({ baseUrl: `http://127.0.0.1:${port}` })
+    return { client, cleanup: () => {} }
+  }
+
+  const { port: selectedPort, wasAutoSelected } = await getAvailableServerPort(DEFAULT_SERVER_PORT, "127.0.0.1")
+  if (wasAutoSelected) {
+    console.log(pc.dim("Auto-selected port"), pc.cyan(selectedPort.toString()))
+  } else {
+    console.log(pc.dim("Starting server on port"), pc.cyan(selectedPort.toString()))
+  }
+  const { client, server } = await createOpencode({ signal, port: selectedPort, hostname: "127.0.0.1" })
+  console.log(pc.dim("Server listening at"), pc.cyan(server.url))
+  return { client, cleanup: () => server.close() }
+}
--- a/src/cli/run/session-resolver.test.ts
+++ b/src/cli/run/session-resolver.test.ts
@@ -0,0 +1,158 @@
+/// <reference types="bun-types" />
+
+import { beforeEach, describe, expect, it, mock, spyOn } from "bun:test";
+import { resolveSession } from "./session-resolver";
+import type { OpencodeClient } from "./types";
+
+const createMockClient = (overrides: {
+  getResult?: { error?: unknown; data?: { id: string } }
+  createResults?: Array<{ error?: unknown; data?: { id: string } }>
+} = {}): OpencodeClient => {
+  const { getResult, createResults = [] } = overrides
+  let createCallIndex = 0
+  return {
+    session: {
+      get: mock((opts: { path: { id: string } }) =>
+        Promise.resolve(getResult ?? { data: { id: opts.path.id } })
+      ),
+      create: mock(() => {
+        const result =
+          createResults[createCallIndex] ?? { data: { id: "new-session-id" } }
+        createCallIndex++
+        return Promise.resolve(result)
+      }),
+    },
+  } as unknown as OpencodeClient
+}
+
+describe("resolveSession", () => {
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  it("returns provided session ID when session exists", async () => {
+    // given
+    const sessionId = "existing-session-id"
+    const mockClient = createMockClient({
+      getResult: { data: { id: sessionId } },
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient, sessionId })
+
+    // then
+    expect(result).toBe(sessionId)
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+    })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("throws error when provided session ID not found", async () => {
+    // given
+    const sessionId = "non-existent-session-id"
+    const mockClient = createMockClient({
+      getResult: { error: { message: "Session not found" } },
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient, sessionId })
+
+    // then
+    await Promise.resolve(
+      expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    )
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+    })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("creates new session when no session ID provided", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [{ data: { id: "new-session-id" } }],
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient })
+
+    // then
+    expect(result).toBe("new-session-id")
+    expect(mockClient.session.create).toHaveBeenCalledWith({
+      body: {
+        title: "oh-my-opencode run",
+        permission: [
+          { permission: "question", action: "deny", pattern: "*" },
+        ],
+      },
+    })
+    expect(mockClient.session.get).not.toHaveBeenCalled()
+  })
+
+  it("retries session creation on failure", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { error: { message: "Network error" } },
+        { data: { id: "retried-session-id" } },
+      ],
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient })
+
+    // then
+    expect(result).toBe("retried-session-id")
+    expect(mockClient.session.create).toHaveBeenCalledTimes(2)
+    expect(mockClient.session.create).toHaveBeenCalledWith({
+      body: {
+        title: "oh-my-opencode run",
+        permission: [
+          { permission: "question", action: "deny", pattern: "*" },
+        ],
+      },
+    })
+  })
+
+  it("throws after all retries exhausted", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { error: { message: "Error 1" } },
+        { error: { message: "Error 2" } },
+        { error: { message: "Error 3" } },
+      ],
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient })
+
+    // then
+    await Promise.resolve(
+      expect(result).rejects.toThrow("Failed to create session after all retries")
+    )
+    expect(mockClient.session.create).toHaveBeenCalledTimes(3)
+  })
+
+  it("session creation returns no ID", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { data: undefined },
+        { data: undefined },
+        { data: undefined },
+      ],
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient })
+
+    // then
+    await Promise.resolve(
+      expect(result).rejects.toThrow("Failed to create session after all retries")
+    )
+    expect(mockClient.session.create).toHaveBeenCalledTimes(3)
+  })
+})
--- a/src/cli/run/session-resolver.ts
+++ b/src/cli/run/session-resolver.ts
@@ -0,0 +1,65 @@
+import pc from "picocolors"
+import type { OpencodeClient } from "./types"
+import { serializeError } from "./events"
+
+const SESSION_CREATE_MAX_RETRIES = 3
+const SESSION_CREATE_RETRY_DELAY_MS = 1000
+
+export async function resolveSession(options: {
+  client: OpencodeClient
+  sessionId?: string
+}): Promise<string> {
+  const { client, sessionId } = options
+
+  if (sessionId) {
+    const res = await client.session.get({ path: { id: sessionId } })
+    if (res.error || !res.data) {
+      throw new Error(`Session not found: ${sessionId}`)
+    }
+    return sessionId
+  }
+
+  for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
+    const res = await client.session.create({
+      body: {
+        title: "oh-my-opencode run",
+        // In CLI run mode there's no TUI to answer questions.
+        permission: [
+          { permission: "question", action: "deny" as const, pattern: "*" },
+        ],
+      } as any,
+    })
+
+    if (res.error) {
+      console.error(
+        pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`)
+      )
+      console.error(pc.dim(`  Error: ${serializeError(res.error)}`))
+
+      if (attempt < SESSION_CREATE_MAX_RETRIES) {
+        const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
+        console.log(pc.dim(`  Retrying in ${delay}ms...`))
+        await new Promise((resolve) => setTimeout(resolve, delay))
+      }
+      continue
+    }
+
+    if (res.data?.id) {
+      return res.data.id
+    }
+
+    console.error(
+      pc.yellow(
+        `Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`
+      )
+    )
+
+    if (attempt < SESSION_CREATE_MAX_RETRIES) {
+      const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
+      console.log(pc.dim(`  Retrying in ${delay}ms...`))
+      await new Promise((resolve) => setTimeout(resolve, delay))
+    }
+  }
+
+  throw new Error("Failed to create session after all retries")
+}
--- a/src/cli/run/types.ts
+++ b/src/cli/run/types.ts
@@ -1,10 +1,29 @@
 import type { OpencodeClient } from "@opencode-ai/sdk"
+export type { OpencodeClient }

 export interface RunOptions {
  message: string
  agent?: string
  directory?: string
  timeout?: number
+  port?: number
+  attach?: string
+  onComplete?: string
+  json?: boolean
+  sessionId?: string
+}
+
+export interface ServerConnection {
+  client: OpencodeClient
+  cleanup: () => void
+}
+
+export interface RunResult {
+  sessionId: string
+  success: boolean
+  durationMs: number
+  messageCount: number
+  summary: string
 }

 export interface RunContext {
--- a/src/config/AGENTS.md
+++ b/src/config/AGENTS.md
@@ -0,0 +1,93 @@
+**Generated:** 2026-02-08T16:45:00+09:00
+**Commit:** f2b7b759
+**Branch:** dev
+
+## OVERVIEW
+
+Zod schema definitions for plugin configuration. 455+ lines of type-safe config validation with JSONC support, multi-level inheritance, and comprehensive agent/category overrides.
+
+## STRUCTURE
+```
+config/
+├── schema.ts              # Main Zod schema (455 lines) - agents, categories, experimental features
+├── schema.test.ts         # Schema validation tests (17909 lines)
+└── index.ts               # Barrel export
+```
+
+## SCHEMA COMPONENTS
+
+**Agent Configuration:**
+- `AgentOverrideConfigSchema`: Model, variant, temperature, permissions, tools
+- `AgentOverridesSchema`: Per-agent overrides (sisyphus, hephaestus, prometheus, etc.)
+- `AgentPermissionSchema`: Tool access control (edit, bash, webfetch, task)
+
+**Category Configuration:**
+- `CategoryConfigSchema`: Model defaults, thinking budgets, tool restrictions
+- `CategoriesConfigSchema`: Named categories (visual-engineering, ultrabrain, deep, etc.)
+
+**Experimental Features:**
+- `ExperimentalConfigSchema`: Dynamic context pruning, task system, plugin timeouts
+- `DynamicContextPruningConfigSchema`: Intelligent context management
+
+**Built-in Enums:**
+- `AgentNameSchema`: sisyphus, hephaestus, prometheus, oracle, librarian, explore, multimodal-looker, metis, momus, atlas
+- `HookNameSchema`: 100+ hook names for lifecycle management
+- `BuiltinCommandNameSchema`: init-deep, ralph-loop, refactor, start-work
+- `BuiltinSkillNameSchema`: playwright, agent-browser, git-master
+
+## CONFIGURATION HIERARCHY
+
+1. **Project config** (`.opencode/oh-my-opencode.json`)
+2. **User config** (`~/.config/opencode/oh-my-opencode.json`)
+3. **Defaults** (hardcoded fallbacks)
+
+**Multi-level inheritance:** Project → User → Defaults
+
+## VALIDATION FEATURES
+
+- **JSONC support**: Comments and trailing commas
+- **Type safety**: Full TypeScript inference
+- **Migration support**: Legacy config compatibility
+- **Schema versioning**: $schema field for validation
+
+## KEY SCHEMAS
+
+| Schema | Purpose | Lines |
+|--------|---------|-------|
+| `OhMyOpenCodeConfigSchema` | Root config schema | 400+ |
+| `AgentOverrideConfigSchema` | Agent customization | 50+ |
+| `CategoryConfigSchema` | Task category defaults | 30+ |
+| `ExperimentalConfigSchema` | Beta features | 40+ |
+
+## USAGE PATTERNS
+
+**Agent Override:**
+```typescript
+agents: {
+  sisyphus: {
+    model: "anthropic/claude-opus-4-6",
+    variant: "max",
+    temperature: 0.1
+  }
+}
+```
+
+**Category Definition:**
+```typescript
+categories: {
+  "visual-engineering": {
+    model: "google/gemini-3-pro",
+    variant: "high"
+  }
+}
+```
+
+**Experimental Features:**
+```typescript
+experimental: {
+  dynamic_context_pruning: {
+    enabled: true,
+    notification: "detailed"
+  }
+}
+```
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -5,6 +5,8 @@ import {
  BrowserAutomationProviderSchema,
  BuiltinCategoryNameSchema,
  CategoryConfigSchema,
+  ExperimentalConfigSchema,
+  GitMasterConfigSchema,
  OhMyOpenCodeConfigSchema,
 } from "./schema"

@@ -606,3 +608,128 @@ describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
    expect(result.data?.browser_automation_engine).toBeUndefined()
  })
 })
+
+describe("ExperimentalConfigSchema feature flags", () => {
+  test("accepts plugin_load_timeout_ms as number", () => {
+    //#given
+    const config = { plugin_load_timeout_ms: 5000 }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.plugin_load_timeout_ms).toBe(5000)
+    }
+  })
+
+  test("rejects plugin_load_timeout_ms below 1000", () => {
+    //#given
+    const config = { plugin_load_timeout_ms: 500 }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
+  test("accepts safe_hook_creation as boolean", () => {
+    //#given
+    const config = { safe_hook_creation: false }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.safe_hook_creation).toBe(false)
+    }
+  })
+
+  test("both fields are optional", () => {
+    //#given
+    const config = {}
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.plugin_load_timeout_ms).toBeUndefined()
+      expect(result.data.safe_hook_creation).toBeUndefined()
+    }
+  })
+})
+
+describe("GitMasterConfigSchema", () => {
+  test("accepts boolean true for commit_footer", () => {
+    //#given
+    const config = { commit_footer: true }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(true)
+    }
+  })
+
+  test("accepts boolean false for commit_footer", () => {
+    //#given
+    const config = { commit_footer: false }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(false)
+    }
+  })
+
+  test("accepts string value for commit_footer", () => {
+    //#given
+    const config = { commit_footer: "Custom footer text" }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe("Custom footer text")
+    }
+  })
+
+  test("defaults commit_footer to true when not provided", () => {
+    //#given
+    const config = {}
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(true)
+    }
+  })
+
+  test("rejects number for commit_footer", () => {
+    //#given
+    const config = { commit_footer: 123 }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+})
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -12,6 +12,7 @@ const AgentPermissionSchema = z.object({
  edit: PermissionValue.optional(),
  bash: BashPermission.optional(),
  webfetch: PermissionValue.optional(),
+  task: PermissionValue.optional(),
  doom_loop: PermissionValue.optional(),
  external_directory: PermissionValue.optional(),
 })
@@ -86,6 +87,7 @@ export const HookNameSchema = z.enum([
  "category-skill-reminder",

  "compaction-context-injector",
+  "compaction-todo-preserver",
  "claude-code-hooks",
  "auto-slash-command",
  "edit-error-recovery",
@@ -100,6 +102,7 @@ export const HookNameSchema = z.enum([
  "stop-continuation-guard",
  "tasks-todowrite-disabler",
  "write-existing-file-guard",
+  "anthropic-effort",
 ])

 export const BuiltinCommandNameSchema = z.enum([
@@ -183,7 +186,7 @@ export const SisyphusAgentConfigSchema = z.object({
 })

 export const CategoryConfigSchema = z.object({
-  /** Human-readable description of the category's purpose. Shown in delegate_task prompt. */
+  /** Human-readable description of the category's purpose. Shown in task prompt. */
  description: z.string().optional(),
  model: z.string().optional(),
  variant: z.string().optional(),
@@ -266,6 +269,10 @@ export const ExperimentalConfigSchema = z.object({
  dynamic_context_pruning: DynamicContextPruningConfigSchema.optional(),
  /** Enable experimental task system for Todowrite disabler hook */
  task_system: z.boolean().optional(),
+  /** Timeout in ms for loadAllPluginComponents during config handler init (default: 10000, min: 1000) */
+  plugin_load_timeout_ms: z.number().min(1000).optional(),
+  /** Wrap hook creation in try/catch to prevent one failing hook from crashing the plugin (default: true at call site) */
+  safe_hook_creation: z.boolean().optional(),
 })

 export const SkillSourceSchema = z.union([
@@ -333,10 +340,10 @@ export const BabysittingConfigSchema = z.object({
 })

 export const GitMasterConfigSchema = z.object({
-  /** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */
-  commit_footer: z.boolean().default(true),
-  /** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
-  include_co_authored_by: z.boolean().default(true),
+	/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true). Can be boolean or custom string. */
+	commit_footer: z.union([z.boolean(), z.string()]).default(true),
+	/** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
+	include_co_authored_by: z.boolean().default(true),
 })

 export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser", "dev-browser"])
@@ -420,6 +427,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
  websearch: WebsearchConfigSchema.optional(),
  tmux: TmuxConfigSchema.optional(),
  sisyphus: SisyphusConfigSchema.optional(),
+  /** Migration history to prevent re-applying migrations (e.g., model version upgrades) */
+  _migrations: z.array(z.string()).optional(),
 })

 export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -2,61 +2,29 @@

 ## OVERVIEW

-17 feature modules: background agents, skill MCPs, builtin skills/commands, Claude Code compatibility layer, task management.
-
-**Feature Types**: Task orchestration, Skill definitions, Command templates, Claude Code loaders, Supporting utilities
+Background agents, skills, Claude Code compat, builtin commands, MCP managers, etc.

 ## STRUCTURE

-```
 features/
-├── background-agent/           # Task lifecycle (1556 lines)
-│   ├── manager.ts              # Launch → poll → complete
-│   └── concurrency.ts          # Per-provider limits
-├── builtin-skills/             # Core skills
-│   └── skills/                 # playwright, agent-browser, frontend-ui-ux, git-master, dev-browser
-├── builtin-commands/           # ralph-loop, refactor, ulw-loop, init-deep, start-work, cancel-ralph, stop-continuation
-├── claude-code-agent-loader/   # ~/.claude/agents/*.md
-├── claude-code-command-loader/ # ~/.claude/commands/*.md
-├── claude-code-mcp-loader/     # .mcp.json with ${VAR} expansion
-├── claude-code-plugin-loader/  # installed_plugins.json (486 lines)
-├── claude-code-session-state/  # Session persistence
-├── opencode-skill-loader/      # Skills from 6 directories (loader.ts 311 lines)
-├── context-injector/           # AGENTS.md/README.md injection
-├── boulder-state/              # Todo state persistence
-├── hook-message-injector/      # Message injection
-├── task-toast-manager/         # Background task notifications
-├── skill-mcp-manager/          # MCP client lifecycle (640 lines)
-├── tmux-subagent/              # Tmux session management (472 lines)
-├── mcp-oauth/                  # MCP OAuth handling
-└── claude-tasks/               # Task schema/storage - see AGENTS.md
-```
+├── background-agent/                      # Task lifecycle, concurrency (manager.ts 1642 lines)
+├── builtin-skills/                       # Skills like git-master (1107 lines)
+├── builtin-commands/                     # Commands like refactor (619 lines)
+├── skill-mcp-manager/                    # MCP client lifecycle (640 lines)
+├── claude-code-plugin-loader/            # Plugin loading
+├── claude-code-mcp-loader/               # MCP loading
+├── claude-code-session-state/            # Session state
+├── claude-code-command-loader/           # Command loading
+├── claude-code-agent-loader/             # Agent loading
+├── context-injector/                     # Context injection
+├── hook-message-injector/                # Message injection
+├── task-toast-manager/                   # Task toasts
+├── boulder-state/                        # State management
+├── tmux-subagent/                        # Tmux subagent
+├── mcp-oauth/                            # OAuth for MCP
+├── opencode-skill-loader/                # Skill loading
+├── tool-metadata-store/                  # Tool metadata

-## LOADER PRIORITY
+## HOW TO ADD

-| Type | Priority (highest first) |
-|------|--------------------------|
-| Commands | `.opencode/command/` > `~/.config/opencode/command/` > `.claude/commands/` |
-| Skills | `.opencode/skills/` > `~/.config/opencode/skills/` > `.claude/skills/` |
-| MCPs | `.claude/.mcp.json` > `.mcp.json` > `~/.claude/.mcp.json` |
-
-## BACKGROUND AGENT
-
- **Lifecycle**: `launch` → `poll` (2s) → `complete`
- **Stability**: 3 consecutive polls = idle
- **Concurrency**: Per-provider/model limits via `ConcurrencyManager`
- **Cleanup**: 30m TTL, 3m stale timeout
- **State**: Per-session Maps, cleaned on `session.deleted`
-
-## SKILL MCP
-
- **Lazy**: Clients created on first call
- **Transports**: stdio, http (SSE/Streamable)
- **Lifecycle**: 5m idle cleanup
-
-## ANTI-PATTERNS
-
- **Sequential delegation**: Use `delegate_task` parallel
- **Trust self-reports**: ALWAYS verify
- **Main thread blocks**: No heavy I/O in loader init
- **Direct state mutation**: Use managers for boulder/session state
+Create dir with index.ts, types.ts, etc.
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -1,8 +1,9 @@
-import { describe, test, expect, beforeEach } from "bun:test"
-import { afterEach } from "bun:test"
+declare const require: (name: string) => any
+const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
 import { tmpdir } from "node:os"
 import type { PluginInput } from "@opencode-ai/plugin"
 import type { BackgroundTask, ResumeInput } from "./types"
+import { MIN_IDLE_TIME_MS } from "./constants"
 import { BackgroundManager } from "./manager"
 import { ConcurrencyManager } from "./concurrency"

@@ -170,6 +171,7 @@ function createBackgroundManager(): BackgroundManager {
  const client = {
    session: {
      prompt: async () => ({}),
+      promptAsync: async () => ({}),
      abort: async () => ({}),
    },
  }
@@ -879,12 +881,14 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
  test("should skip notification when parent session is aborted", async () => {
    //#given
    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      return {}
+    }
    const client = {
      session: {
-        prompt: async () => {
-          promptCalled = true
-          return {}
-        },
+        prompt: promptMock,
+        promptAsync: promptMock,
        abort: async () => ({}),
        messages: async () => {
          const error = new Error("User aborted")
@@ -921,14 +925,16 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
  test("should swallow aborted error from prompt", async () => {
    //#given
    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      const error = new Error("User aborted")
+      error.name = "MessageAbortedError"
+      throw error
+    }
    const client = {
      session: {
-        prompt: async () => {
-          promptCalled = true
-          const error = new Error("User aborted")
-          error.name = "MessageAbortedError"
-          throw error
-        },
+        prompt: promptMock,
+        promptAsync: promptMock,
        abort: async () => ({}),
        messages: async () => ({ data: [] }),
      },
@@ -1053,19 +1059,20 @@ describe("BackgroundManager.tryCompleteTask", () => {
    expect(concurrencyManager.getCount(concurrencyKey)).toBe(0)
  })

-  test("should abort session on completion", async () => {
-    // #given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-        messages: async () => ({ data: [] }),
-      },
-    }
+   test("should abort session on completion", async () => {
+     // #given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+         messages: async () => ({ data: [] }),
+       },
+     }
    manager.shutdown()
    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
    stubNotifyParentSession(manager)
@@ -1088,6 +1095,127 @@ describe("BackgroundManager.tryCompleteTask", () => {
    // #then
    expect(abortedSessionIDs).toEqual(["session-1"])
  })
+
+  test("should clean pendingByParent even when notifyParentSession throws", async () => {
+    // given
+    ;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {
+      throw new Error("notify failed")
+    }
+
+    const task: BackgroundTask = {
+      id: "task-pending-cleanup",
+      sessionID: "session-pending-cleanup",
+      parentSessionID: "parent-pending-cleanup",
+      parentMessageID: "msg-1",
+      description: "pending cleanup task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(),
+    }
+    getTaskMap(manager).set(task.id, task)
+    getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
+
+    // when
+    await tryCompleteTaskForTest(manager, task)
+
+    // then
+    expect(task.status).toBe("completed")
+    expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
+  })
+
+  test("should avoid overlapping promptAsync calls when tasks complete concurrently", async () => {
+    // given
+    type PromptAsyncBody = Record<string, unknown> & { noReply?: boolean }
+
+    let resolveMessages: ((value: { data: unknown[] }) => void) | undefined
+    const messagesBarrier = new Promise<{ data: unknown[] }>((resolve) => {
+      resolveMessages = resolve
+    })
+
+    const promptBodies: PromptAsyncBody[] = []
+    let promptInFlight = false
+    let rejectedCount = 0
+    let promptCallCount = 0
+
+    let releaseFirstPrompt: (() => void) | undefined
+    let resolveFirstStarted: (() => void) | undefined
+    const firstStarted = new Promise<void>((resolve) => {
+      resolveFirstStarted = resolve
+    })
+
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        abort: async () => ({}),
+        messages: async () => messagesBarrier,
+        promptAsync: async (args: { path: { id: string }; body: PromptAsyncBody }) => {
+          promptBodies.push(args.body)
+
+          if (!promptInFlight) {
+            promptCallCount += 1
+            if (promptCallCount === 1) {
+              promptInFlight = true
+              resolveFirstStarted?.()
+              return await new Promise((resolve) => {
+                releaseFirstPrompt = () => {
+                  promptInFlight = false
+                  resolve({})
+                }
+              })
+            }
+
+            return {}
+          }
+
+          rejectedCount += 1
+          throw new Error("BUSY")
+        },
+      },
+    }
+
+    manager.shutdown()
+    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+
+    const parentSessionID = "parent-session"
+    const taskA = createMockTask({
+      id: "task-a",
+      sessionID: "session-a",
+      parentSessionID,
+    })
+    const taskB = createMockTask({
+      id: "task-b",
+      sessionID: "session-b",
+      parentSessionID,
+    })
+
+    getTaskMap(manager).set(taskA.id, taskA)
+    getTaskMap(manager).set(taskB.id, taskB)
+    getPendingByParent(manager).set(parentSessionID, new Set([taskA.id, taskB.id]))
+
+    // when
+    const completionA = tryCompleteTaskForTest(manager, taskA)
+    const completionB = tryCompleteTaskForTest(manager, taskB)
+    resolveMessages?.({ data: [] })
+
+    await firstStarted
+
+    // Give the second completion a chance to attempt promptAsync while the first is in-flight.
+    // In the buggy implementation, this triggers an overlap and increments rejectedCount.
+    for (let i = 0; i < 20; i++) {
+      await Promise.resolve()
+      if (rejectedCount > 0) break
+      if (promptBodies.length >= 2) break
+    }
+
+    releaseFirstPrompt?.()
+    await Promise.all([completionA, completionB])
+
+    // then
+    expect(rejectedCount).toBe(0)
+    expect(promptBodies.length).toBe(2)
+    expect(promptBodies.some((b) => b.noReply === false)).toBe(true)
+  })
 })

 describe("BackgroundManager.trackTask", () => {
@@ -1110,7 +1238,7 @@ describe("BackgroundManager.trackTask", () => {
      sessionID: "session-1",
      parentSessionID: "parent-session",
      description: "external task",
-      agent: "delegate_task",
+      agent: "task",
      concurrencyKey: "external-key",
    }

@@ -1145,7 +1273,7 @@ describe("BackgroundManager.resume concurrency key", () => {
      sessionID: "session-1",
      parentSessionID: "parent-session",
      description: "external task",
-      agent: "delegate_task",
+      agent: "task",
      concurrencyKey: "external-key",
    })

@@ -1167,24 +1295,26 @@ describe("BackgroundManager.resume concurrency key", () => {
 })

 describe("BackgroundManager.resume model persistence", () => {
-  let manager: BackgroundManager
-  let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>
+   let manager: BackgroundManager
+   let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>

-  beforeEach(() => {
-    // given
-    promptCalls = []
-    const client = {
-      session: {
-        prompt: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
-          promptCalls.push(args)
-          return {}
-        },
-        abort: async () => ({}),
-      },
-    }
-    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
-    stubNotifyParentSession(manager)
-  })
+   beforeEach(() => {
+     // given
+     promptCalls = []
+     const promptMock = async (args: { path: { id: string }; body: Record<string, unknown> }) => {
+       promptCalls.push(args)
+       return {}
+     }
+     const client = {
+       session: {
+         prompt: promptMock,
+         promptAsync: promptMock,
+         abort: async () => ({}),
+       },
+     }
+     manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+     stubNotifyParentSession(manager)
+   })

  afterEach(() => {
    manager.shutdown()
@@ -1282,19 +1412,20 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
  let manager: BackgroundManager
  let mockClient: ReturnType<typeof createMockClient>

-  function createMockClient() {
-    return {
-      session: {
-        create: async () => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
-        get: async () => ({ data: { directory: "/test/dir" } }),
-        prompt: async () => ({}),
-        messages: async () => ({ data: [] }),
-        todo: async () => ({ data: [] }),
-        status: async () => ({ data: {} }),
-        abort: async () => ({}),
-      },
-    }
-  }
+    function createMockClient() {
+      return {
+        session: {
+          create: async (_args?: any) => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
+          get: async () => ({ data: { directory: "/test/dir" } }),
+          prompt: async () => ({}),
+          promptAsync: async () => ({}),
+          messages: async () => ({ data: [] }),
+         todo: async () => ({ data: [] }),
+         status: async () => ({ data: {} }),
+         abort: async () => ({}),
+       },
+     }
+   }

  beforeEach(() => {
    // given
@@ -1389,6 +1520,55 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
  })

  describe("task transitions pending→running when slot available", () => {
+    test("should inherit parent session permission rules (and force deny question)", async () => {
+      // given
+      const createCalls: any[] = []
+      const parentPermission = [
+        { permission: "question", action: "allow" as const, pattern: "*" },
+        { permission: "plan_enter", action: "deny" as const, pattern: "*" },
+      ]
+
+      const customClient = {
+        session: {
+          create: async (args?: any) => {
+            createCalls.push(args)
+            return { data: { id: `ses_${crypto.randomUUID()}` } }
+          },
+          get: async () => ({ data: { directory: "/test/dir", permission: parentPermission } }),
+          prompt: async () => ({}),
+          promptAsync: async () => ({}),
+          messages: async () => ({ data: [] }),
+          todo: async () => ({ data: [] }),
+          status: async () => ({ data: {} }),
+          abort: async () => ({}),
+        },
+      }
+      manager.shutdown()
+      manager = new BackgroundManager({ client: customClient, directory: tmpdir() } as unknown as PluginInput, {
+        defaultConcurrency: 5,
+      })
+
+      const input = {
+        description: "Test task",
+        prompt: "Do something",
+        agent: "test-agent",
+        parentSessionID: "parent-session",
+        parentMessageID: "parent-message",
+      }
+
+      // when
+      await manager.launch(input)
+      await new Promise(resolve => setTimeout(resolve, 50))
+
+      // then
+      expect(createCalls).toHaveLength(1)
+      const permission = createCalls[0]?.body?.permission
+      expect(permission).toEqual([
+        { permission: "plan_enter", action: "deny", pattern: "*" },
+        { permission: "question", action: "deny", pattern: "*" },
+      ])
+    })
+
    test("should transition first task to running immediately", async () => {
      // given
      const config = { defaultConcurrency: 5 }
@@ -1842,13 +2022,14 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
 })

 describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
-  test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })

    const task: BackgroundTask = {
@@ -1874,12 +2055,13 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("running")
  })

-  test("should NOT interrupt task with recent lastUpdate", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
+   test("should NOT interrupt task with recent lastUpdate", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
    }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })

@@ -1906,11 +2088,12 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("running")
  })

-  test("should interrupt task with stale lastUpdate (> 3min)", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
+   test("should interrupt task with stale lastUpdate (> 3min)", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
      },
    }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
@@ -1942,10 +2125,11 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.completedAt).toBeDefined()
  })

-  test("should respect custom staleTimeoutMs config", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
+   test("should respect custom staleTimeoutMs config", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
        abort: async () => ({}),
      },
    }
@@ -1976,13 +2160,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.error).toContain("Stale timeout")
  })

-  test("should release concurrency before abort", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should release concurrency before abort", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
    stubNotifyParentSession(manager)

@@ -2011,13 +2196,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("cancelled")
  })

-  test("should handle multiple stale tasks in same poll cycle", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should handle multiple stale tasks in same poll cycle", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
    stubNotifyParentSession(manager)

@@ -2062,13 +2248,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task2.status).toBe("cancelled")
  })

-  test("should use default timeout when config not provided", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should use default timeout when config not provided", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
    stubNotifyParentSession(manager)

@@ -2097,18 +2284,19 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
 })

 describe("BackgroundManager.shutdown session abort", () => {
-  test("should call session.abort for all running tasks during shutdown", () => {
-    // given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-      },
-    }
+   test("should call session.abort for all running tasks during shutdown", () => {
+     // given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)

    const task1: BackgroundTask = {
@@ -2146,18 +2334,19 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(abortedSessionIDs).toHaveLength(2)
  })

-  test("should not call session.abort for completed or cancelled tasks", () => {
-    // given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-      },
-    }
+   test("should not call session.abort for completed or cancelled tasks", () => {
+     // given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)

    const completedTask: BackgroundTask = {
@@ -2206,15 +2395,16 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(abortedSessionIDs).toHaveLength(0)
  })

-  test("should call onShutdown callback during shutdown", () => {
-    // given
-    let shutdownCalled = false
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should call onShutdown callback during shutdown", () => {
+     // given
+     let shutdownCalled = false
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager(
      { client, directory: tmpdir() } as unknown as PluginInput,
      undefined,
@@ -2232,14 +2422,15 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(shutdownCalled).toBe(true)
  })

-  test("should not throw when onShutdown callback throws", () => {
-    // given
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should not throw when onShutdown callback throws", () => {
+     // given
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager(
      { client, directory: tmpdir() } as unknown as PluginInput,
      undefined,
@@ -2255,6 +2446,69 @@ describe("BackgroundManager.shutdown session abort", () => {
  })
 })

+describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
+  test("should cancel descendant tasks when parent session is deleted", () => {
+    // given
+    const manager = createBackgroundManager()
+    const parentSessionID = "session-parent"
+    const childTask = createMockTask({
+      id: "task-child",
+      sessionID: "session-child",
+      parentSessionID,
+      status: "running",
+    })
+    const siblingTask = createMockTask({
+      id: "task-sibling",
+      sessionID: "session-sibling",
+      parentSessionID,
+      status: "running",
+    })
+    const grandchildTask = createMockTask({
+      id: "task-grandchild",
+      sessionID: "session-grandchild",
+      parentSessionID: "session-child",
+      status: "pending",
+      startedAt: undefined,
+      queuedAt: new Date(),
+    })
+    const unrelatedTask = createMockTask({
+      id: "task-unrelated",
+      sessionID: "session-unrelated",
+      parentSessionID: "other-parent",
+      status: "running",
+    })
+
+    const taskMap = getTaskMap(manager)
+    taskMap.set(childTask.id, childTask)
+    taskMap.set(siblingTask.id, siblingTask)
+    taskMap.set(grandchildTask.id, grandchildTask)
+    taskMap.set(unrelatedTask.id, unrelatedTask)
+
+    const pendingByParent = getPendingByParent(manager)
+    pendingByParent.set(parentSessionID, new Set([childTask.id, siblingTask.id]))
+    pendingByParent.set("session-child", new Set([grandchildTask.id]))
+
+    // when
+    manager.handleEvent({
+      type: "session.deleted",
+      properties: { info: { id: parentSessionID } },
+    })
+
+    // then
+    expect(taskMap.has(childTask.id)).toBe(false)
+    expect(taskMap.has(siblingTask.id)).toBe(false)
+    expect(taskMap.has(grandchildTask.id)).toBe(false)
+    expect(taskMap.has(unrelatedTask.id)).toBe(true)
+    expect(childTask.status).toBe("cancelled")
+    expect(siblingTask.status).toBe("cancelled")
+    expect(grandchildTask.status).toBe("cancelled")
+    expect(pendingByParent.get(parentSessionID)).toBeUndefined()
+    expect(pendingByParent.get("session-child")).toBeUndefined()
+
+    manager.shutdown()
+  })
+})
+
 describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
  function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
    return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
@@ -2408,3 +2662,182 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
    expect(completionTimers.size).toBe(0)
  })
 })
+
+describe("BackgroundManager.handleEvent - early session.idle deferral", () => {
+  test("should defer and retry when session.idle fires before MIN_IDLE_TIME_MS", async () => {
+    //#given - a running task started less than MIN_IDLE_TIME_MS ago
+    const sessionID = "session-early-idle"
+    const messagesCalls: string[] = []
+    const realDateNow = Date.now
+    const baseNow = realDateNow()
+
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async (args: { path: { id: string } }) => {
+           messagesCalls.push(args.path.id)
+           return {
+             data: [
+               {
+                 info: { role: "assistant" },
+                 parts: [{ type: "text", text: "ok" }],
+               },
+             ],
+          }
+        },
+        todo: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const remainingMs = 1200
+    const task: BackgroundTask = {
+      id: "task-early-idle",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "early idle task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(baseNow),
+    }
+
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - session.idle fires
+    try {
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
+      manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+
+      // Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
+
+      //#then - idle should be deferred (not dropped), and task should eventually complete
+      expect(task.status).toBe("running")
+      await new Promise((resolve) => setTimeout(resolve, 220))
+      expect(task.status).toBe("completed")
+      expect(messagesCalls).toEqual([sessionID])
+    } finally {
+      Date.now = realDateNow
+      manager.shutdown()
+    }
+  })
+
+  test("should not defer when session.idle fires after MIN_IDLE_TIME_MS", async () => {
+     //#given - a running task started more than MIN_IDLE_TIME_MS ago
+     const sessionID = "session-late-idle"
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async () => ({
+           data: [
+             {
+               info: { role: "assistant" },
+               parts: [{ type: "text", text: "ok" }],
+             },
+           ],
+         }),
+         todo: async () => ({ data: [] }),
+       },
+     }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const task: BackgroundTask = {
+      id: "task-late-idle",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "late idle task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 10)),
+    }
+
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+
+    //#then - should be processed immediately
+    await new Promise((resolve) => setTimeout(resolve, 10))
+    expect(task.status).toBe("completed")
+
+    manager.shutdown()
+  })
+
+  test("should not process deferred idle if task already completed by other means", async () => {
+    //#given - a running task
+    const sessionID = "session-deferred-noop"
+    let messagesCallCount = 0
+    const realDateNow = Date.now
+    const baseNow = realDateNow()
+
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async () => {
+           messagesCallCount += 1
+           return {
+             data: [
+               {
+                 info: { role: "assistant" },
+                 parts: [{ type: "text", text: "ok" }],
+               },
+             ],
+           }
+        },
+        todo: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const remainingMs = 120
+    const task: BackgroundTask = {
+      id: "task-deferred-noop",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "deferred noop task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(baseNow),
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - session.idle fires early, then task completes via another path before defer timer
+    try {
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
+      manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+      expect(messagesCallCount).toBe(0)
+
+      await tryCompleteTaskForTest(manager, task)
+      expect(task.status).toBe("completed")
+
+      // Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
+
+      //#then - deferred callback should be a no-op
+      await new Promise((resolve) => setTimeout(resolve, remainingMs + 80))
+      expect(task.status).toBe("completed")
+      expect(messagesCallCount).toBe(0)
+    } finally {
+      Date.now = realDateNow
+      manager.shutdown()
+    }
+  })
+})
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -88,6 +88,8 @@ export class BackgroundManager {
  private queuesByKey: Map<string, QueueItem[]> = new Map()
  private processingKeys: Set<string> = new Set()
  private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
+  private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
+  private notificationQueueByParent: Map<string, Promise<void>> = new Map()

  constructor(
    ctx: PluginInput,
@@ -234,13 +236,17 @@ export class BackgroundManager {
    const parentDirectory = parentSession?.data?.directory ?? this.directory
    log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)

+    const inheritedPermission = (parentSession as any)?.data?.permission
+    const permissionRules = Array.isArray(inheritedPermission)
+      ? inheritedPermission.filter((r: any) => r?.permission !== "question")
+      : []
+    permissionRules.push({ permission: "question", action: "deny" as const, pattern: "*" })
+
    const createResult = await this.client.session.create({
      body: {
        parentID: input.parentSessionID,
        title: `${input.description} (@${input.agent} subagent)`,
-        permission: [
-          { permission: "question", action: "deny" as const, pattern: "*" },
-        ],
+        permission: permissionRules,
      } as any,
      query: {
        directory: parentDirectory,
@@ -309,7 +315,7 @@ export class BackgroundManager {
      promptLength: input.prompt.length,
    })

-    // Use prompt() instead of promptAsync() to properly initialize agent loop (fire-and-forget)
+    // Fire-and-forget prompt via promptAsync (no response body needed)
    // Include model if caller provided one (e.g., from Sisyphus category configs)
    // IMPORTANT: variant must be a top-level field in the body, NOT nested inside model
    // OpenCode's PromptInput schema expects: { model: { providerID, modelID }, variant: "max" }
@@ -328,7 +334,6 @@ export class BackgroundManager {
        tools: {
          ...getAgentToolRestrictions(input.agent),
          task: false,
-          delegate_task: false,
          call_omo_agent: true,
          question: false,
        },
@@ -357,7 +362,8 @@ export class BackgroundManager {
        }).catch(() => {})

        this.markForNotification(existingTask)
-        this.notifyParentSession(existingTask).catch(err => {
+        this.cleanupPendingByParent(existingTask)
+        this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => {
          log("[background-agent] Failed to notify on error:", err)
        })
      }
@@ -410,7 +416,7 @@ export class BackgroundManager {
  }

  /**
-   * Track a task created elsewhere (e.g., from delegate_task) for notification tracking.
+   * Track a task created elsewhere (e.g., from task) for notification tracking.
   * This allows tasks created by other tools to receive the same toast/prompt notifications.
   */
  async trackTask(input: {
@@ -458,7 +464,7 @@ export class BackgroundManager {
      return existingTask
    }

-    const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "delegate_task"
+    const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "task"

    // Acquire concurrency slot if a key is provided
    if (input.concurrencyKey) {
@@ -472,7 +478,7 @@ export class BackgroundManager {
      parentMessageID: "",
      description: input.description,
      prompt: "",
-      agent: input.agent || "delegate_task",
+      agent: input.agent || "task",
      status: "running",
      startedAt: new Date(),
      progress: {
@@ -570,7 +576,7 @@ export class BackgroundManager {
      promptLength: input.prompt.length,
    })

-    // Use prompt() instead of promptAsync() to properly initialize agent loop
+    // Fire-and-forget prompt via promptAsync (no response body needed)
    // Include model if task has one (preserved from original launch with category config)
    // variant must be top-level in body, not nested inside model (OpenCode PromptInput schema)
    const resumeModel = existingTask.model
@@ -578,7 +584,7 @@ export class BackgroundManager {
      : undefined
    const resumeVariant = existingTask.model?.variant

-    this.client.session.prompt({
+    this.client.session.promptAsync({
      path: { id: existingTask.sessionID },
      body: {
        agent: existingTask.agent,
@@ -587,7 +593,6 @@ export class BackgroundManager {
        tools: {
          ...getAgentToolRestrictions(existingTask.agent),
          task: false,
-          delegate_task: false,
          call_omo_agent: true,
          question: false,
        },
@@ -614,7 +619,8 @@ export class BackgroundManager {
      }

      this.markForNotification(existingTask)
-      this.notifyParentSession(existingTask).catch(err => {
+      this.cleanupPendingByParent(existingTask)
+      this.enqueueNotificationForParent(existingTask.parentSessionID, () => this.notifyParentSession(existingTask)).catch(err => {
        log("[background-agent] Failed to notify on resume error:", err)
      })
    })
@@ -651,6 +657,13 @@ export class BackgroundManager {
      const task = this.findBySession(sessionID)
      if (!task) return

+      // Clear any pending idle deferral timer since the task is still active
+      const existingTimer = this.idleDeferralTimers.get(task.id)
+      if (existingTimer) {
+        clearTimeout(existingTimer)
+        this.idleDeferralTimers.delete(task.id)
+      }
+
      if (partInfo?.type === "tool" || partInfo?.tool) {
        if (!task.progress) {
          task.progress = {
@@ -677,7 +690,17 @@ export class BackgroundManager {
      // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
      const elapsedMs = Date.now() - startedAt.getTime()
      if (elapsedMs < MIN_IDLE_TIME_MS) {
-        log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
+        const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
+        if (!this.idleDeferralTimers.has(task.id)) {
+          log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
+          const timer = setTimeout(() => {
+            this.idleDeferralTimers.delete(task.id)
+            this.handleEvent({ type: "session.idle", properties: { sessionID } })
+          }, remainingMs)
+          this.idleDeferralTimers.set(task.id, timer)
+        } else {
+          log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
+        }
        return
      }

@@ -718,28 +741,47 @@ export class BackgroundManager {
      if (!info || typeof info.id !== "string") return
      const sessionID = info.id

-      const task = this.findBySession(sessionID)
-      if (!task) return
-
-      if (task.status === "running") {
-        task.status = "cancelled"
-        task.completedAt = new Date()
-        task.error = "Session deleted"
+      const tasksToCancel = new Map<string, BackgroundTask>()
+      const directTask = this.findBySession(sessionID)
+      if (directTask) {
+        tasksToCancel.set(directTask.id, directTask)
+      }
+      for (const descendant of this.getAllDescendantTasks(sessionID)) {
+        tasksToCancel.set(descendant.id, descendant)
      }

-       if (task.concurrencyKey) {
-         this.concurrencyManager.release(task.concurrencyKey)
-         task.concurrencyKey = undefined
-       }
-      const existingTimer = this.completionTimers.get(task.id)
-      if (existingTimer) {
-        clearTimeout(existingTimer)
-        this.completionTimers.delete(task.id)
+      if (tasksToCancel.size === 0) return
+
+      for (const task of tasksToCancel.values()) {
+        if (task.status === "running" || task.status === "pending") {
+          void this.cancelTask(task.id, {
+            source: "session.deleted",
+            reason: "Session deleted",
+            skipNotification: true,
+          }).catch(err => {
+            log("[background-agent] Failed to cancel task on session.deleted:", { taskId: task.id, error: err })
+          })
+        }
+
+        const existingTimer = this.completionTimers.get(task.id)
+        if (existingTimer) {
+          clearTimeout(existingTimer)
+          this.completionTimers.delete(task.id)
+        }
+
+        const idleTimer = this.idleDeferralTimers.get(task.id)
+        if (idleTimer) {
+          clearTimeout(idleTimer)
+          this.idleDeferralTimers.delete(task.id)
+        }
+
+        this.cleanupPendingByParent(task)
+        this.tasks.delete(task.id)
+        this.clearNotificationsForTask(task.id)
+        if (task.sessionID) {
+          subagentSessions.delete(task.sessionID)
+        }
      }
-      this.cleanupPendingByParent(task)
-      this.tasks.delete(task.id)
-      this.clearNotificationsForTask(task.id)
-      subagentSessions.delete(sessionID)
    }
  }

@@ -890,6 +932,12 @@ export class BackgroundManager {
      this.completionTimers.delete(task.id)
    }

+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
    this.cleanupPendingByParent(task)

    if (abortSession && task.sessionID) {
@@ -906,7 +954,7 @@ export class BackgroundManager {
    this.markForNotification(task)

    try {
-      await this.notifyParentSession(task)
+      await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
      log(`[background-agent] Task cancelled via ${source}:`, task.id)
    } catch (err) {
      log("[background-agent] Error in notifyParentSession for cancelled task:", { taskId: task.id, error: err })
@@ -1025,6 +1073,15 @@ export class BackgroundManager {

    this.markForNotification(task)

+    // Ensure pending tracking is cleaned up even if notification fails
+    this.cleanupPendingByParent(task)
+
+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
    if (task.sessionID) {
      this.client.session.abort({
        path: { id: task.sessionID },
@@ -1032,7 +1089,7 @@ export class BackgroundManager {
    }

    try {
-      await this.notifyParentSession(task)
+      await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
      log(`[background-agent] Task completed via ${source}:`, task.id)
    } catch (err) {
      log("[background-agent] Error in notifyParentSession:", { taskId: task.id, error: err })
@@ -1062,16 +1119,19 @@ export class BackgroundManager {

    // Update pending tracking and check if all tasks complete
    const pendingSet = this.pendingByParent.get(task.parentSessionID)
+    let allComplete = false
+    let remainingCount = 0
    if (pendingSet) {
      pendingSet.delete(task.id)
-      if (pendingSet.size === 0) {
+      remainingCount = pendingSet.size
+      allComplete = remainingCount === 0
+      if (allComplete) {
        this.pendingByParent.delete(task.parentSessionID)
      }
+    } else {
+      allComplete = true
    }

-    const allComplete = !pendingSet || pendingSet.size === 0
-    const remainingCount = pendingSet?.size ?? 0
-
    const statusText = task.status === "completed" ? "COMPLETED" : "CANCELLED"
    const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
    
@@ -1146,7 +1206,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    })

    try {
-      await this.client.session.prompt({
+      await this.client.session.promptAsync({
        path: { id: task.parentSessionID },
        body: {
          noReply: !allComplete,
@@ -1326,7 +1386,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      log(`[background-agent] Task ${task.id} interrupted: stale timeout`)

      try {
-        await this.notifyParentSession(task)
+        await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
      } catch (err) {
        log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err })
      }
@@ -1511,16 +1571,46 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    }
    this.completionTimers.clear()

+    for (const timer of this.idleDeferralTimers.values()) {
+      clearTimeout(timer)
+    }
+    this.idleDeferralTimers.clear()
+
    this.concurrencyManager.clear()
    this.tasks.clear()
    this.notifications.clear()
    this.pendingByParent.clear()
+    this.notificationQueueByParent.clear()
    this.queuesByKey.clear()
    this.processingKeys.clear()
    this.unregisterProcessCleanup()
    log("[background-agent] Shutdown complete")

  }
+
+  private enqueueNotificationForParent(
+    parentSessionID: string | undefined,
+    operation: () => Promise<void>
+  ): Promise<void> {
+    if (!parentSessionID) {
+      return operation()
+    }
+
+    const previous = this.notificationQueueByParent.get(parentSessionID) ?? Promise.resolve()
+    const current = previous
+      .catch(() => {})
+      .then(operation)
+
+    this.notificationQueueByParent.set(parentSessionID, current)
+
+    void current.finally(() => {
+      if (this.notificationQueueByParent.get(parentSessionID) === current) {
+        this.notificationQueueByParent.delete(parentSessionID)
+      }
+    }).catch(() => {})
+
+    return current
+  }
 }

 function registerProcessSignal(
--- a/src/features/background-agent/result-handler.ts
+++ b/src/features/background-agent/result-handler.ts
@@ -240,7 +240,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
  })

  try {
-    await client.session.prompt({
+    await client.session.promptAsync({
      path: { id: task.parentSessionID },
      body: {
        noReply: !allComplete,
--- a/src/features/background-agent/spawner.test.ts
+++ b/src/features/background-agent/spawner.test.ts
@@ -0,0 +1,65 @@
+import { describe, test, expect } from "bun:test"
+
+import { createTask, startTask } from "./spawner"
+
+describe("background-agent spawner.startTask", () => {
+  test("should inherit parent session permission rules (and force deny question)", async () => {
+    //#given
+    const createCalls: any[] = []
+    const parentPermission = [
+      { permission: "question", action: "allow" as const, pattern: "*" },
+      { permission: "plan_enter", action: "deny" as const, pattern: "*" },
+    ]
+
+    const client = {
+      session: {
+        get: async () => ({ data: { directory: "/parent/dir", permission: parentPermission } }),
+        create: async (args?: any) => {
+          createCalls.push(args)
+          return { data: { id: "ses_child" } }
+        },
+        promptAsync: async () => ({}),
+      },
+    }
+
+    const task = createTask({
+      description: "Test task",
+      prompt: "Do work",
+      agent: "explore",
+      parentSessionID: "ses_parent",
+      parentMessageID: "msg_parent",
+    })
+
+    const item = {
+      task,
+      input: {
+        description: task.description,
+        prompt: task.prompt,
+        agent: task.agent,
+        parentSessionID: task.parentSessionID,
+        parentMessageID: task.parentMessageID,
+        parentModel: task.parentModel,
+        parentAgent: task.parentAgent,
+        model: task.model,
+      },
+    }
+
+    const ctx = {
+      client,
+      directory: "/fallback",
+      concurrencyManager: { release: () => {} },
+      tmuxEnabled: false,
+      onTaskError: () => {},
+    }
+
+    //#when
+    await startTask(item as any, ctx as any)
+
+    //#then
+    expect(createCalls).toHaveLength(1)
+    expect(createCalls[0]?.body?.permission).toEqual([
+      { permission: "plan_enter", action: "deny", pattern: "*" },
+      { permission: "question", action: "deny", pattern: "*" },
+    ])
+  })
+})
--- a/src/features/background-agent/spawner.ts
+++ b/src/features/background-agent/spawner.ts
@@ -58,13 +58,17 @@ export async function startTask(
  const parentDirectory = parentSession?.data?.directory ?? directory
  log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)

+  const inheritedPermission = (parentSession as any)?.data?.permission
+  const permissionRules = Array.isArray(inheritedPermission)
+    ? inheritedPermission.filter((r: any) => r?.permission !== "question")
+    : []
+  permissionRules.push({ permission: "question", action: "deny" as const, pattern: "*" })
+
  const createResult = await client.session.create({
    body: {
      parentID: input.parentSessionID,
      title: `Background: ${input.description}`,
-      permission: [
-        { permission: "question", action: "deny" as const, pattern: "*" },
-      ],
+      permission: permissionRules,
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    } as any,
    query: {
@@ -146,7 +150,6 @@ export async function startTask(
      tools: {
        ...getAgentToolRestrictions(input.agent),
        task: false,
-        delegate_task: false,
        call_omo_agent: true,
        question: false,
      },
@@ -222,7 +225,7 @@ export async function resumeTask(
    : undefined
  const resumeVariant = task.model?.variant

-  client.session.prompt({
+  client.session.promptAsync({
    path: { id: task.sessionID },
    body: {
      agent: task.agent,
@@ -231,7 +234,6 @@ export async function resumeTask(
      tools: {
        ...getAgentToolRestrictions(task.agent),
        task: false,
-        delegate_task: false,
        call_omo_agent: true,
        question: false,
      },
--- a/src/features/background-agent/spawner/background-session-creator.ts
+++ b/src/features/background-agent/spawner/background-session-creator.ts
@@ -0,0 +1,46 @@
+import type { OpencodeClient } from "../constants"
+import type { ConcurrencyManager } from "../concurrency"
+import type { LaunchInput } from "../types"
+import { log } from "../../../shared"
+
+export async function createBackgroundSession(options: {
+  client: OpencodeClient
+  input: LaunchInput
+  parentDirectory: string
+  concurrencyManager: ConcurrencyManager
+  concurrencyKey: string
+}): Promise<string> {
+  const { client, input, parentDirectory, concurrencyManager, concurrencyKey } = options
+
+  const body = {
+    parentID: input.parentSessionID,
+    title: `Background: ${input.description}`,
+    permission: [{ permission: "question", action: "deny" as const, pattern: "*" }],
+  }
+
+  const createResult = await client.session
+    .create({
+      body,
+      query: {
+        directory: parentDirectory,
+      },
+    })
+    .catch((error) => {
+      concurrencyManager.release(concurrencyKey)
+      throw error
+    })
+
+  if (createResult.error) {
+    concurrencyManager.release(concurrencyKey)
+    throw new Error(`Failed to create background session: ${createResult.error}`)
+  }
+
+  if (!createResult.data?.id) {
+    concurrencyManager.release(concurrencyKey)
+    throw new Error("Failed to create background session: API returned no session ID")
+  }
+
+  const sessionID = createResult.data.id
+  log("[background-agent] Background session created", { sessionID })
+  return sessionID
+}
--- a/src/features/background-agent/spawner/concurrency-key-from-launch-input.ts
+++ b/src/features/background-agent/spawner/concurrency-key-from-launch-input.ts
@@ -0,0 +1,7 @@
+import type { LaunchInput } from "../types"
+
+export function getConcurrencyKeyFromLaunchInput(input: LaunchInput): string {
+  return input.model
+    ? `${input.model.providerID}/${input.model.modelID}`
+    : input.agent
+}
--- a/src/features/background-agent/spawner/parent-directory-resolver.ts
+++ b/src/features/background-agent/spawner/parent-directory-resolver.ts
@@ -0,0 +1,21 @@
+import type { OpencodeClient } from "../constants"
+import { log } from "../../../shared"
+
+export async function resolveParentDirectory(options: {
+  client: OpencodeClient
+  parentSessionID: string
+  defaultDirectory: string
+}): Promise<string> {
+  const { client, parentSessionID, defaultDirectory } = options
+
+  const parentSession = await client.session
+    .get({ path: { id: parentSessionID } })
+    .catch((error) => {
+      log(`[background-agent] Failed to get parent session: ${error}`)
+      return null
+    })
+
+  const parentDirectory = parentSession?.data?.directory ?? defaultDirectory
+  log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)
+  return parentDirectory
+}
--- a/src/features/background-agent/spawner/tmux-callback-invoker.ts
+++ b/src/features/background-agent/spawner/tmux-callback-invoker.ts
@@ -0,0 +1,39 @@
+import type { OnSubagentSessionCreated } from "../constants"
+import { TMUX_CALLBACK_DELAY_MS } from "../constants"
+import { log } from "../../../shared"
+import { isInsideTmux } from "../../../shared/tmux"
+
+export async function maybeInvokeTmuxCallback(options: {
+  onSubagentSessionCreated?: OnSubagentSessionCreated
+  tmuxEnabled: boolean
+  sessionID: string
+  parentID: string
+  title: string
+}): Promise<void> {
+  const { onSubagentSessionCreated, tmuxEnabled, sessionID, parentID, title } = options
+
+  log("[background-agent] tmux callback check", {
+    hasCallback: !!onSubagentSessionCreated,
+    tmuxEnabled,
+    isInsideTmux: isInsideTmux(),
+    sessionID,
+    parentID,
+  })
+
+  if (!onSubagentSessionCreated || !tmuxEnabled || !isInsideTmux()) {
+    log("[background-agent] SKIP tmux callback - conditions not met")
+    return
+  }
+
+  log("[background-agent] Invoking tmux callback NOW", { sessionID })
+  await onSubagentSessionCreated({
+    sessionID,
+    parentID,
+    title,
+  }).catch((error) => {
+    log("[background-agent] Failed to spawn tmux pane:", error)
+  })
+
+  log("[background-agent] tmux callback completed, waiting")
+  await new Promise<void>((resolve) => setTimeout(resolve, TMUX_CALLBACK_DELAY_MS))
+}
--- a/src/features/builtin-commands/commands.test.ts
+++ b/src/features/builtin-commands/commands.test.ts
@@ -0,0 +1,138 @@
+import { describe, test, expect } from "bun:test"
+import { loadBuiltinCommands } from "./commands"
+import { HANDOFF_TEMPLATE } from "./templates/handoff"
+import type { BuiltinCommandName } from "./types"
+
+describe("loadBuiltinCommands", () => {
+  test("should include handoff command in loaded commands", () => {
+    //#given
+    const disabledCommands: BuiltinCommandName[] = []
+
+    //#when
+    const commands = loadBuiltinCommands(disabledCommands)
+
+    //#then
+    expect(commands.handoff).toBeDefined()
+    expect(commands.handoff.name).toBe("handoff")
+  })
+
+  test("should exclude handoff when disabled", () => {
+    //#given
+    const disabledCommands: BuiltinCommandName[] = ["handoff"]
+
+    //#when
+    const commands = loadBuiltinCommands(disabledCommands)
+
+    //#then
+    expect(commands.handoff).toBeUndefined()
+  })
+
+  test("should include handoff template content in command template", () => {
+    //#given - no disabled commands
+
+    //#when
+    const commands = loadBuiltinCommands()
+
+    //#then
+    expect(commands.handoff.template).toContain(HANDOFF_TEMPLATE)
+  })
+
+  test("should include session context variables in handoff template", () => {
+    //#given - no disabled commands
+
+    //#when
+    const commands = loadBuiltinCommands()
+
+    //#then
+    expect(commands.handoff.template).toContain("$SESSION_ID")
+    expect(commands.handoff.template).toContain("$TIMESTAMP")
+    expect(commands.handoff.template).toContain("$ARGUMENTS")
+  })
+
+  test("should have correct description for handoff", () => {
+    //#given - no disabled commands
+
+    //#when
+    const commands = loadBuiltinCommands()
+
+    //#then
+    expect(commands.handoff.description).toContain("context summary")
+  })
+})
+
+describe("HANDOFF_TEMPLATE", () => {
+  test("should include session reading instruction", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("session_read")
+  })
+
+  test("should include compaction-style sections in output format", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("USER REQUESTS (AS-IS)")
+    expect(HANDOFF_TEMPLATE).toContain("EXPLICIT CONSTRAINTS")
+  })
+
+  test("should include programmatic context gathering instructions", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("todoread")
+    expect(HANDOFF_TEMPLATE).toContain("git diff")
+    expect(HANDOFF_TEMPLATE).toContain("git status")
+  })
+
+  test("should include context extraction format", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("WORK COMPLETED")
+    expect(HANDOFF_TEMPLATE).toContain("CURRENT STATE")
+    expect(HANDOFF_TEMPLATE).toContain("PENDING TASKS")
+    expect(HANDOFF_TEMPLATE).toContain("KEY FILES")
+    expect(HANDOFF_TEMPLATE).toContain("IMPORTANT DECISIONS")
+    expect(HANDOFF_TEMPLATE).toContain("CONTEXT FOR CONTINUATION")
+    expect(HANDOFF_TEMPLATE).toContain("GOAL")
+  })
+
+  test("should enforce first person perspective", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("first person perspective")
+  })
+
+  test("should limit key files to 10", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("Maximum 10 files")
+  })
+
+  test("should instruct plain text format without markdown", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("Plain text with bullets")
+    expect(HANDOFF_TEMPLATE).toContain("No markdown headers")
+  })
+
+  test("should include user instructions for new session", () => {
+    //#given - the template string
+
+    //#when / #then
+    expect(HANDOFF_TEMPLATE).toContain("new session")
+    expect(HANDOFF_TEMPLATE).toContain("opencode")
+  })
+
+  test("should not contain emojis", () => {
+    //#given - the template string
+
+    //#when / #then
+    const emojiRegex = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2702}-\u{27B0}\u{24C2}-\u{1F251}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}]/u
+    expect(emojiRegex.test(HANDOFF_TEMPLATE)).toBe(false)
+  })
+})
--- a/src/features/builtin-commands/commands.ts
+++ b/src/features/builtin-commands/commands.ts
@@ -5,6 +5,7 @@ import { RALPH_LOOP_TEMPLATE, CANCEL_RALPH_TEMPLATE } from "./templates/ralph-lo
 import { STOP_CONTINUATION_TEMPLATE } from "./templates/stop-continuation"
 import { REFACTOR_TEMPLATE } from "./templates/refactor"
 import { START_WORK_TEMPLATE } from "./templates/start-work"
+import { HANDOFF_TEMPLATE } from "./templates/handoff"

 const BUILTIN_COMMAND_DEFINITIONS: Record<BuiltinCommandName, Omit<CommandDefinition, "name">> = {
  "init-deep": {
@@ -77,6 +78,22 @@ $ARGUMENTS
 ${STOP_CONTINUATION_TEMPLATE}
 </command-instruction>`,
  },
+  handoff: {
+    description: "(builtin) Create a detailed context summary for continuing work in a new session",
+    template: `<command-instruction>
+${HANDOFF_TEMPLATE}
+</command-instruction>
+
+<session-context>
+Session ID: $SESSION_ID
+Timestamp: $TIMESTAMP
+</session-context>
+
+<user-request>
+$ARGUMENTS
+</user-request>`,
+    argumentHint: "[goal]",
+  },
 }

 export function loadBuiltinCommands(
--- a/src/features/builtin-commands/templates/handoff.ts
+++ b/src/features/builtin-commands/templates/handoff.ts
@@ -0,0 +1,177 @@
+export const HANDOFF_TEMPLATE = `# Handoff Command
+
+## Purpose
+
+Use /handoff when:
+- The current session context is getting too long and quality is degrading
+- You want to start fresh while preserving essential context from this session
+- The context window is approaching capacity
+
+This creates a detailed context summary that can be used to continue work in a new session.
+
+---
+
+# PHASE 0: VALIDATE REQUEST
+
+Before proceeding, confirm:
+- [ ] There is meaningful work or context in this session to preserve
+- [ ] The user wants to create a handoff summary (not just asking about it)
+
+If the session is nearly empty or has no meaningful context, inform the user there is nothing substantial to hand off.
+
+---
+
+# PHASE 1: GATHER PROGRAMMATIC CONTEXT
+
+Execute these tools to gather concrete data:
+
+1. session_read({ session_id: "$SESSION_ID" }) — full session history
+2. todoread() — current task progress
+3. Bash({ command: "git diff --stat HEAD~10..HEAD" }) — recent file changes
+4. Bash({ command: "git status --porcelain" }) — uncommitted changes
+
+Suggested execution order:
+
+\`\`\`
+session_read({ session_id: "$SESSION_ID" })
+todoread()
+Bash({ command: "git diff --stat HEAD~10..HEAD" })
+Bash({ command: "git status --porcelain" })
+\`\`\`
+
+Analyze the gathered outputs to understand:
+- What the user asked for (exact wording)
+- What work was completed
+- What tasks remain incomplete (include todo state)
+- What decisions were made
+- What files were modified or discussed (include git diff/stat + status)
+- What patterns, constraints, or preferences were established
+
+---
+
+# PHASE 2: EXTRACT CONTEXT
+
+Write the context summary from first person perspective ("I did...", "I told you...").
+
+Focus on:
+- Capabilities and behavior, not file-by-file implementation details
+- What matters for continuing the work
+- Avoiding excessive implementation details (variable names, storage keys, constants) unless critical
+- USER REQUESTS (AS-IS) must be verbatim (do not paraphrase)
+- EXPLICIT CONSTRAINTS must be verbatim only (do not invent)
+
+Questions to consider when extracting:
+- What did I just do or implement?
+- What instructions did I already give which are still relevant (e.g. follow patterns in the codebase)?
+- What files did I tell you are important or that I am working on?
+- Did I provide a plan or spec that should be included?
+- What did I already tell you that is important (libraries, patterns, constraints, preferences)?
+- What important technical details did I discover (APIs, methods, patterns)?
+- What caveats, limitations, or open questions did I find?
+
+---
+
+# PHASE 3: FORMAT OUTPUT
+
+Generate a handoff summary using this exact format:
+
+\`\`\`
+HANDOFF CONTEXT
+===============
+
+USER REQUESTS (AS-IS)
+---------------------
+- [Exact verbatim user requests - NOT paraphrased]
+
+GOAL
+----
+[One sentence describing what should be done next]
+
+WORK COMPLETED
+--------------
+- [First person bullet points of what was done]
+- [Include specific file paths when relevant]
+- [Note key implementation decisions]
+
+CURRENT STATE
+-------------
+- [Current state of the codebase or task]
+- [Build/test status if applicable]
+- [Any environment or configuration state]
+
+PENDING TASKS
+-------------
+- [Tasks that were planned but not completed]
+- [Next logical steps to take]
+- [Any blockers or issues encountered]
+- [Include current todo state from todoread()]
+
+KEY FILES
+---------
+- [path/to/file1] - [brief role description]
+- [path/to/file2] - [brief role description]
+(Maximum 10 files, prioritized by importance)
+- (Include files from git diff/stat and git status)
+
+IMPORTANT DECISIONS
+-------------------
+- [Technical decisions that were made and why]
+- [Trade-offs that were considered]
+- [Patterns or conventions established]
+
+EXPLICIT CONSTRAINTS
+--------------------
+- [Verbatim constraints only - from user or existing AGENTS.md]
+- If none, write: None
+
+CONTEXT FOR CONTINUATION
+------------------------
+- [What the next session needs to know to continue]
+- [Warnings or gotchas to be aware of]
+- [References to documentation if relevant]
+\`\`\`
+
+Rules for the summary:
+- Plain text with bullets
+- No markdown headers with # (use the format above with dashes)
+- No bold, italic, or code fences within content
+- Use workspace-relative paths for files
+- Keep it focused - only include what matters for continuation
+- Pick an appropriate length based on complexity
+- USER REQUESTS (AS-IS) and EXPLICIT CONSTRAINTS must be verbatim only
+
+---
+
+# PHASE 4: PROVIDE INSTRUCTIONS
+
+After generating the summary, instruct the user:
+
+\`\`\`
+---
+
+TO CONTINUE IN A NEW SESSION:
+
+1. Press 'n' in OpenCode TUI to open a new session, or run 'opencode' in a new terminal
+2. Paste the HANDOFF CONTEXT above as your first message
+3. Add your request: "Continue from the handoff context above. [Your next task]"
+
+The new session will have all context needed to continue seamlessly.
+\`\`\`
+
+---
+
+# IMPORTANT CONSTRAINTS
+
+- DO NOT attempt to programmatically create new sessions (no API available to agents)
+- DO provide a self-contained summary that works without access to this session
+- DO include workspace-relative file paths
+- DO NOT include sensitive information (API keys, credentials, secrets)
+- DO NOT exceed 10 files in the KEY FILES section
+- DO keep the GOAL section to a single sentence or short paragraph
+
+---
+
+# EXECUTE NOW
+
+Begin by gathering programmatic context, then synthesize the handoff summary.
+`
--- a/src/features/builtin-commands/templates/init-deep.ts
+++ b/src/features/builtin-commands/templates/init-deep.ts
@@ -45,12 +45,12 @@ Don't wait—these run async while main session works.

 \`\`\`
 // Fire all at once, collect results later
-delegate_task(agent="explore", prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
-delegate_task(agent="explore", prompt="Entry points: FIND main files → REPORT non-standard organization")
-delegate_task(agent="explore", prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
-delegate_task(agent="explore", prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
-delegate_task(agent="explore", prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
-delegate_task(agent="explore", prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
+task(subagent_type="explore", load_skills=[], description="Explore project structure", run_in_background=true, prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
+task(subagent_type="explore", load_skills=[], description="Find entry points", run_in_background=true, prompt="Entry points: FIND main files → REPORT non-standard organization")
+task(subagent_type="explore", load_skills=[], description="Find conventions", run_in_background=true, prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
+task(subagent_type="explore", load_skills=[], description="Find anti-patterns", run_in_background=true, prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
+task(subagent_type="explore", load_skills=[], description="Explore build/CI", run_in_background=true, prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
+task(subagent_type="explore", load_skills=[], description="Find test patterns", run_in_background=true, prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
 \`\`\`

 <dynamic-agents>
@@ -76,9 +76,9 @@ max_depth=$(find . -type d -not -path '*/node_modules/*' -not -path '*/.git/*' |
 Example spawning:
 \`\`\`
 // 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents
-delegate_task(agent="explore", prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
-delegate_task(agent="explore", prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
-delegate_task(agent="explore", prompt="Cross-cutting concerns: FIND shared utilities across directories")
+task(subagent_type="explore", load_skills=[], description="Analyze large files", run_in_background=true, prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
+task(subagent_type="explore", load_skills=[], description="Explore deep modules", run_in_background=true, prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
+task(subagent_type="explore", load_skills=[], description="Find shared utilities", run_in_background=true, prompt="Cross-cutting concerns: FIND shared utilities across directories")
 // ... more based on calculation
 \`\`\`
 </dynamic-agents>
@@ -185,6 +185,11 @@ AGENTS_LOCATIONS = [

 **Mark "generate" as in_progress.**

+<critical>
+**File Writing Rule**: If AGENTS.md already exists at the target path → use \`Edit\` tool. If it does NOT exist → use \`Write\` tool.
+NEVER use Write to overwrite an existing file. ALWAYS check existence first via \`Read\` or discovery results.
+</critical>
+
 ### Root AGENTS.md (Full Treatment)

 \`\`\`markdown
@@ -240,7 +245,7 @@ Launch writing tasks for each location:

 \`\`\`
 for loc in AGENTS_LOCATIONS (except root):
-  delegate_task(category="writing", load_skills=[], run_in_background=false, prompt=\\\`
+  task(category="writing", load_skills=[], run_in_background=false, description="Generate AGENTS.md", prompt=\\\`
    Generate AGENTS.md for: \${loc.path}
    - Reason: \${loc.reason}
    - 30-80 lines max
--- a/src/features/builtin-commands/types.ts
+++ b/src/features/builtin-commands/types.ts
@@ -1,6 +1,6 @@
 import type { CommandDefinition } from "../claude-code-command-loader"

-export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation"
+export type BuiltinCommandName = "init-deep" | "ralph-loop" | "cancel-ralph" | "ulw-loop" | "refactor" | "start-work" | "stop-continuation" | "handoff"

 export interface BuiltinCommandConfig {
  disabled_commands?: BuiltinCommandName[]
--- a/src/features/builtin-skills/git-master/SKILL.md
+++ b/src/features/builtin-skills/git-master/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: git-master
-description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
+description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
 ---

 # Git Master Agent
--- a/src/features/builtin-skills/skills/git-master.ts
+++ b/src/features/builtin-skills/skills/git-master.ts
@@ -3,7 +3,7 @@ import type { BuiltinSkill } from "../types"
 export const gitMasterSkill: BuiltinSkill = {
  name: "git-master",
  description:
-    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
+    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
  template: `# Git Master Agent

 You are a Git expert combining three specializations:
--- a/src/features/claude-code-mcp-loader/loader.test.ts
+++ b/src/features/claude-code-mcp-loader/loader.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, beforeEach, afterEach } from "bun:test"
+import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test"
 import { mkdirSync, writeFileSync, rmSync } from "fs"
 import { join } from "path"
 import { tmpdir } from "os"
@@ -8,6 +8,17 @@ const TEST_DIR = join(tmpdir(), "mcp-loader-test-" + Date.now())
 describe("getSystemMcpServerNames", () => {
  beforeEach(() => {
    mkdirSync(TEST_DIR, { recursive: true })
+
+    // Isolate tests from real user environment (e.g., ~/.claude.json).
+    // loader.ts reads user-level config via os.homedir() + getClaudeConfigDir().
+    mock.module("os", () => ({
+      homedir: () => TEST_DIR,
+      tmpdir,
+    }))
+
+    mock.module("../../shared", () => ({
+      getClaudeConfigDir: () => join(TEST_DIR, ".claude"),
+    }))
  })

  afterEach(() => {
@@ -126,37 +137,123 @@ describe("getSystemMcpServerNames", () => {
    }
  })

-  it("merges server names from multiple .mcp.json files", async () => {
-    // given
-    mkdirSync(join(TEST_DIR, ".claude"), { recursive: true })
-    
-    const projectMcp = {
-      mcpServers: {
-        playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
-      },
-    }
-    const localMcp = {
-      mcpServers: {
-        memory: { command: "npx", args: ["-y", "@anthropic-ai/mcp-server-memory"] },
-      },
-    }
-    
-    writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(projectMcp))
-    writeFileSync(join(TEST_DIR, ".claude", ".mcp.json"), JSON.stringify(localMcp))
+   it("merges server names from multiple .mcp.json files", async () => {
+     // given
+     mkdirSync(join(TEST_DIR, ".claude"), { recursive: true })
+     
+     const projectMcp = {
+       mcpServers: {
+         playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
+       },
+     }
+     const localMcp = {
+       mcpServers: {
+         memory: { command: "npx", args: ["-y", "@anthropic-ai/mcp-server-memory"] },
+       },
+     }
+     
+     writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(projectMcp))
+     writeFileSync(join(TEST_DIR, ".claude", ".mcp.json"), JSON.stringify(localMcp))

-    const originalCwd = process.cwd()
-    process.chdir(TEST_DIR)
+     const originalCwd = process.cwd()
+     process.chdir(TEST_DIR)

-    try {
-      // when
-      const { getSystemMcpServerNames } = await import("./loader")
-      const names = getSystemMcpServerNames()
+     try {
+       // when
+       const { getSystemMcpServerNames } = await import("./loader")
+       const names = getSystemMcpServerNames()

-      // then
-      expect(names.has("playwright")).toBe(true)
-      expect(names.has("memory")).toBe(true)
-    } finally {
-      process.chdir(originalCwd)
-    }
-  })
+       // then
+       expect(names.has("playwright")).toBe(true)
+       expect(names.has("memory")).toBe(true)
+     } finally {
+       process.chdir(originalCwd)
+     }
+   })
+
+    it("reads user-level MCP config from ~/.claude.json", async () => {
+      // given
+      const userConfigPath = join(TEST_DIR, ".claude.json")
+      const userMcpConfig = {
+        mcpServers: {
+          "user-server": {
+            command: "npx",
+            args: ["user-mcp-server"],
+          },
+        },
+      }
+
+      const originalCwd = process.cwd()
+      process.chdir(TEST_DIR)
+
+      try {
+        mock.module("os", () => ({
+          homedir: () => TEST_DIR,
+          tmpdir,
+        }))
+
+        writeFileSync(userConfigPath, JSON.stringify(userMcpConfig))
+
+        const { getSystemMcpServerNames } = await import("./loader")
+        const names = getSystemMcpServerNames()
+
+        expect(names.has("user-server")).toBe(true)
+      } finally {
+        process.chdir(originalCwd)
+        rmSync(userConfigPath, { force: true })
+      }
+    })
+
+    it("reads both ~/.claude.json and ~/.claude/.mcp.json for user scope", async () => {
+      // given: simulate both user-level config files
+      const userClaudeJson = join(TEST_DIR, ".claude.json")
+      const claudeDir = join(TEST_DIR, ".claude")
+      const claudeDirMcpJson = join(claudeDir, ".mcp.json")
+
+      mkdirSync(claudeDir, { recursive: true })
+
+      // ~/.claude.json has server-a
+      writeFileSync(userClaudeJson, JSON.stringify({
+        mcpServers: {
+          "server-from-claude-json": {
+            command: "npx",
+            args: ["server-a"],
+          },
+        },
+      }))
+
+      // ~/.claude/.mcp.json has server-b (CLI-managed)
+      writeFileSync(claudeDirMcpJson, JSON.stringify({
+        mcpServers: {
+          "server-from-mcp-json": {
+            command: "npx",
+            args: ["server-b"],
+          },
+        },
+      }))
+
+      const originalCwd = process.cwd()
+      process.chdir(TEST_DIR)
+
+      try {
+        mock.module("os", () => ({
+          homedir: () => TEST_DIR,
+          tmpdir,
+        }))
+
+        // Also mock getClaudeConfigDir to point to our test .claude dir
+        mock.module("../../shared", () => ({
+          getClaudeConfigDir: () => claudeDir,
+        }))
+
+        const { getSystemMcpServerNames } = await import("./loader")
+        const names = getSystemMcpServerNames()
+
+        // Both sources should be merged
+        expect(names.has("server-from-claude-json")).toBe(true)
+        expect(names.has("server-from-mcp-json")).toBe(true)
+      } finally {
+        process.chdir(originalCwd)
+      }
+    })
 })
--- a/src/features/claude-code-mcp-loader/loader.ts
+++ b/src/features/claude-code-mcp-loader/loader.ts
@@ -1,5 +1,6 @@
 import { existsSync, readFileSync } from "fs"
 import { join } from "path"
+import { homedir } from "os"
 import { getClaudeConfigDir } from "../../shared"
 import type {
  ClaudeCodeMcpConfig,
@@ -20,6 +21,7 @@ function getMcpConfigPaths(): McpConfigPath[] {
  const cwd = process.cwd()

  return [
+    { path: join(homedir(), ".claude.json"), scope: "user" },
    { path: join(claudeConfigDir, ".mcp.json"), scope: "user" },
    { path: join(cwd, ".mcp.json"), scope: "project" },
    { path: join(cwd, ".claude", ".mcp.json"), scope: "local" },
--- a/src/features/claude-tasks/index.ts
+++ b/src/features/claude-tasks/index.ts
@@ -1,2 +1,3 @@
 export * from "./types"
 export * from "./storage"
+export * from "./session-storage"
--- a/src/features/claude-tasks/session-storage.test.ts
+++ b/src/features/claude-tasks/session-storage.test.ts
@@ -0,0 +1,204 @@
+import { describe, test, expect, beforeEach, afterEach } from "bun:test"
+import { existsSync, mkdirSync, rmSync, writeFileSync, readdirSync } from "fs"
+import { join } from "path"
+import type { OhMyOpenCodeConfig } from "../../config/schema"
+import {
+  getSessionTaskDir,
+  listSessionTaskFiles,
+  listAllSessionDirs,
+  findTaskAcrossSessions,
+} from "./session-storage"
+
+const TEST_DIR = ".test-session-storage"
+const TEST_DIR_ABS = join(process.cwd(), TEST_DIR)
+
+function makeConfig(storagePath: string): Partial<OhMyOpenCodeConfig> {
+  return {
+    sisyphus: {
+      tasks: { storage_path: storagePath, claude_code_compat: false },
+    },
+  }
+}
+
+describe("getSessionTaskDir", () => {
+  test("returns session-scoped subdirectory under base task dir", () => {
+    //#given
+    const config = makeConfig("/tmp/tasks")
+    const sessionID = "ses_abc123"
+
+    //#when
+    const result = getSessionTaskDir(config, sessionID)
+
+    //#then
+    expect(result).toBe("/tmp/tasks/ses_abc123")
+  })
+
+  test("uses relative storage path joined with cwd", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+    const sessionID = "ses_xyz"
+
+    //#when
+    const result = getSessionTaskDir(config, sessionID)
+
+    //#then
+    expect(result).toBe(join(TEST_DIR_ABS, "ses_xyz"))
+  })
+})
+
+describe("listSessionTaskFiles", () => {
+  beforeEach(() => {
+    if (existsSync(TEST_DIR_ABS)) {
+      rmSync(TEST_DIR_ABS, { recursive: true, force: true })
+    }
+  })
+
+  afterEach(() => {
+    if (existsSync(TEST_DIR_ABS)) {
+      rmSync(TEST_DIR_ABS, { recursive: true, force: true })
+    }
+  })
+
+  test("returns empty array when session directory does not exist", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+
+    //#when
+    const result = listSessionTaskFiles(config, "nonexistent-session")
+
+    //#then
+    expect(result).toEqual([])
+  })
+
+  test("lists only T-*.json files in the session directory", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+    const sessionDir = join(TEST_DIR_ABS, "ses_001")
+    mkdirSync(sessionDir, { recursive: true })
+    writeFileSync(join(sessionDir, "T-aaa.json"), "{}", "utf-8")
+    writeFileSync(join(sessionDir, "T-bbb.json"), "{}", "utf-8")
+    writeFileSync(join(sessionDir, "other.txt"), "nope", "utf-8")
+
+    //#when
+    const result = listSessionTaskFiles(config, "ses_001")
+
+    //#then
+    expect(result).toHaveLength(2)
+    expect(result).toContain("T-aaa")
+    expect(result).toContain("T-bbb")
+  })
+
+  test("does not list tasks from other sessions", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+    const session1Dir = join(TEST_DIR_ABS, "ses_001")
+    const session2Dir = join(TEST_DIR_ABS, "ses_002")
+    mkdirSync(session1Dir, { recursive: true })
+    mkdirSync(session2Dir, { recursive: true })
+    writeFileSync(join(session1Dir, "T-from-s1.json"), "{}", "utf-8")
+    writeFileSync(join(session2Dir, "T-from-s2.json"), "{}", "utf-8")
+
+    //#when
+    const result = listSessionTaskFiles(config, "ses_001")
+
+    //#then
+    expect(result).toEqual(["T-from-s1"])
+  })
+})
+
+describe("listAllSessionDirs", () => {
+  beforeEach(() => {
+    if (existsSync(TEST_DIR_ABS)) {
+      rmSync(TEST_DIR_ABS, { recursive: true, force: true })
+    }
+  })
+
+  afterEach(() => {
+    if (existsSync(TEST_DIR_ABS)) {
+      rmSync(TEST_DIR_ABS, { recursive: true, force: true })
+    }
+  })
+
+  test("returns empty array when base directory does not exist", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+
+    //#when
+    const result = listAllSessionDirs(config)
+
+    //#then
+    expect(result).toEqual([])
+  })
+
+  test("returns only directory entries (not files)", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+    mkdirSync(TEST_DIR_ABS, { recursive: true })
+    mkdirSync(join(TEST_DIR_ABS, "ses_001"), { recursive: true })
+    mkdirSync(join(TEST_DIR_ABS, "ses_002"), { recursive: true })
+    writeFileSync(join(TEST_DIR_ABS, ".lock"), "{}", "utf-8")
+    writeFileSync(join(TEST_DIR_ABS, "T-legacy.json"), "{}", "utf-8")
+
+    //#when
+    const result = listAllSessionDirs(config)
+
+    //#then
+    expect(result).toHaveLength(2)
+    expect(result).toContain("ses_001")
+    expect(result).toContain("ses_002")
+  })
+})
+
+describe("findTaskAcrossSessions", () => {
+  beforeEach(() => {
+    if (existsSync(TEST_DIR_ABS)) {
+      rmSync(TEST_DIR_ABS, { recursive: true, force: true })
+    }
+  })
+
+  afterEach(() => {
+    if (existsSync(TEST_DIR_ABS)) {
+      rmSync(TEST_DIR_ABS, { recursive: true, force: true })
+    }
+  })
+
+  test("returns null when task does not exist in any session", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+    mkdirSync(join(TEST_DIR_ABS, "ses_001"), { recursive: true })
+
+    //#when
+    const result = findTaskAcrossSessions(config, "T-nonexistent")
+
+    //#then
+    expect(result).toBeNull()
+  })
+
+  test("finds task in the correct session directory", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+    const session2Dir = join(TEST_DIR_ABS, "ses_002")
+    mkdirSync(join(TEST_DIR_ABS, "ses_001"), { recursive: true })
+    mkdirSync(session2Dir, { recursive: true })
+    writeFileSync(join(session2Dir, "T-target.json"), '{"id":"T-target"}', "utf-8")
+
+    //#when
+    const result = findTaskAcrossSessions(config, "T-target")
+
+    //#then
+    expect(result).not.toBeNull()
+    expect(result!.sessionID).toBe("ses_002")
+    expect(result!.path).toBe(join(session2Dir, "T-target.json"))
+  })
+
+  test("returns null when base directory does not exist", () => {
+    //#given
+    const config = makeConfig(TEST_DIR)
+
+    //#when
+    const result = findTaskAcrossSessions(config, "T-any")
+
+    //#then
+    expect(result).toBeNull()
+  })
+})
--- a/src/features/claude-tasks/session-storage.ts
+++ b/src/features/claude-tasks/session-storage.ts
@@ -0,0 +1,52 @@
+import { join } from "path"
+import { existsSync, readdirSync, statSync } from "fs"
+import { getTaskDir } from "./storage"
+import type { OhMyOpenCodeConfig } from "../../config/schema"
+
+export function getSessionTaskDir(
+  config: Partial<OhMyOpenCodeConfig>,
+  sessionID: string,
+): string {
+  return join(getTaskDir(config), sessionID)
+}
+
+export function listSessionTaskFiles(
+  config: Partial<OhMyOpenCodeConfig>,
+  sessionID: string,
+): string[] {
+  const dir = getSessionTaskDir(config, sessionID)
+  if (!existsSync(dir)) return []
+  return readdirSync(dir)
+    .filter((f) => f.endsWith(".json") && f.startsWith("T-"))
+    .map((f) => f.replace(".json", ""))
+}
+
+export function listAllSessionDirs(
+  config: Partial<OhMyOpenCodeConfig>,
+): string[] {
+  const baseDir = getTaskDir(config)
+  if (!existsSync(baseDir)) return []
+  return readdirSync(baseDir).filter((entry) => {
+    const fullPath = join(baseDir, entry)
+    return statSync(fullPath).isDirectory()
+  })
+}
+
+export interface TaskLocation {
+  path: string
+  sessionID: string
+}
+
+export function findTaskAcrossSessions(
+  config: Partial<OhMyOpenCodeConfig>,
+  taskId: string,
+): TaskLocation | null {
+  const sessionDirs = listAllSessionDirs(config)
+  for (const sessionID of sessionDirs) {
+    const taskPath = join(getSessionTaskDir(config, sessionID), `${taskId}.json`)
+    if (existsSync(taskPath)) {
+      return { path: taskPath, sessionID }
+    }
+  }
+  return null
+}
--- a/src/features/claude-tasks/storage.test.ts
+++ b/src/features/claude-tasks/storage.test.ts
@@ -20,6 +20,7 @@ const TEST_DIR_ABS = join(process.cwd(), TEST_DIR)

 describe("getTaskDir", () => {
  const originalTaskListId = process.env.ULTRAWORK_TASK_LIST_ID
+  const originalClaudeTaskListId = process.env.CLAUDE_CODE_TASK_LIST_ID

  beforeEach(() => {
    if (originalTaskListId === undefined) {
@@ -27,6 +28,12 @@ describe("getTaskDir", () => {
    } else {
      process.env.ULTRAWORK_TASK_LIST_ID = originalTaskListId
    }
+
+    if (originalClaudeTaskListId === undefined) {
+      delete process.env.CLAUDE_CODE_TASK_LIST_ID
+    } else {
+      process.env.CLAUDE_CODE_TASK_LIST_ID = originalClaudeTaskListId
+    }
  })

  afterEach(() => {
@@ -35,6 +42,12 @@ describe("getTaskDir", () => {
    } else {
      process.env.ULTRAWORK_TASK_LIST_ID = originalTaskListId
    }
+
+    if (originalClaudeTaskListId === undefined) {
+      delete process.env.CLAUDE_CODE_TASK_LIST_ID
+    } else {
+      process.env.CLAUDE_CODE_TASK_LIST_ID = originalClaudeTaskListId
+    }
  })

  test("returns global config path for default config", () => {
@@ -62,6 +75,19 @@ describe("getTaskDir", () => {
    expect(result).toBe(join(configDir, "tasks", "custom-list-id"))
  })

+  test("respects CLAUDE_CODE_TASK_LIST_ID env var when ULTRAWORK_TASK_LIST_ID not set", () => {
+    //#given
+    delete process.env.ULTRAWORK_TASK_LIST_ID
+    process.env.CLAUDE_CODE_TASK_LIST_ID = "claude list/id"
+    const configDir = getOpenCodeConfigDir({ binary: "opencode" })
+
+    //#when
+    const result = getTaskDir()
+
+    //#then
+    expect(result).toBe(join(configDir, "tasks", "claude-list-id"))
+  })
+
  test("falls back to sanitized cwd basename when env var not set", () => {
    //#given
    delete process.env.ULTRAWORK_TASK_LIST_ID
@@ -114,6 +140,7 @@ describe("getTaskDir", () => {

 describe("resolveTaskListId", () => {
  const originalTaskListId = process.env.ULTRAWORK_TASK_LIST_ID
+  const originalClaudeTaskListId = process.env.CLAUDE_CODE_TASK_LIST_ID

  beforeEach(() => {
    if (originalTaskListId === undefined) {
@@ -121,6 +148,12 @@ describe("resolveTaskListId", () => {
    } else {
      process.env.ULTRAWORK_TASK_LIST_ID = originalTaskListId
    }
+
+    if (originalClaudeTaskListId === undefined) {
+      delete process.env.CLAUDE_CODE_TASK_LIST_ID
+    } else {
+      process.env.CLAUDE_CODE_TASK_LIST_ID = originalClaudeTaskListId
+    }
  })

  afterEach(() => {
@@ -129,6 +162,12 @@ describe("resolveTaskListId", () => {
    } else {
      process.env.ULTRAWORK_TASK_LIST_ID = originalTaskListId
    }
+
+    if (originalClaudeTaskListId === undefined) {
+      delete process.env.CLAUDE_CODE_TASK_LIST_ID
+    } else {
+      process.env.CLAUDE_CODE_TASK_LIST_ID = originalClaudeTaskListId
+    }
  })

  test("returns env var when set", () => {
@@ -142,6 +181,30 @@ describe("resolveTaskListId", () => {
    expect(result).toBe("custom-list")
  })

+  test("returns CLAUDE_CODE_TASK_LIST_ID when ULTRAWORK_TASK_LIST_ID not set", () => {
+    //#given
+    delete process.env.ULTRAWORK_TASK_LIST_ID
+    process.env.CLAUDE_CODE_TASK_LIST_ID = "claude-list"
+
+    //#when
+    const result = resolveTaskListId()
+
+    //#then
+    expect(result).toBe("claude-list")
+  })
+
+  test("sanitizes CLAUDE_CODE_TASK_LIST_ID special characters", () => {
+    //#given
+    delete process.env.ULTRAWORK_TASK_LIST_ID
+    process.env.CLAUDE_CODE_TASK_LIST_ID = "claude list/id"
+
+    //#when
+    const result = resolveTaskListId()
+
+    //#then
+    expect(result).toBe("claude-list-id")
+  })
+
  test("sanitizes special characters", () => {
    //#given
    process.env.ULTRAWORK_TASK_LIST_ID = "custom list/id"
--- a/src/features/claude-tasks/storage.ts
+++ b/src/features/claude-tasks/storage.ts
@@ -26,6 +26,9 @@ export function resolveTaskListId(config: Partial<OhMyOpenCodeConfig> = {}): str
  const envId = process.env.ULTRAWORK_TASK_LIST_ID?.trim()
  if (envId) return sanitizePathSegment(envId)

+  const claudeEnvId = process.env.CLAUDE_CODE_TASK_LIST_ID?.trim()
+  if (claudeEnvId) return sanitizePathSegment(claudeEnvId)
+
  const configId = config.sisyphus?.tasks?.task_list_id?.trim()
  if (configId) return sanitizePathSegment(configId)

--- a/src/features/opencode-skill-loader/skill-content.test.ts
+++ b/src/features/opencode-skill-loader/skill-content.test.ts
@@ -314,6 +314,44 @@ describe("resolveMultipleSkillsAsync", () => {
 		expect(gitMasterContent).toContain("Co-authored-by: Sisyphus")
 	})

+	it("should inject custom string footer when commit_footer is a string", async () => {
+		// given: git-master skill with custom string footer
+		const skillNames = ["git-master"]
+		const customFooter = "Custom footer from my team"
+		const options = {
+			gitMasterConfig: {
+				commit_footer: customFooter,
+				include_co_authored_by: false,
+			},
+		}
+
+		// when: resolving with custom footer config
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: custom footer is injected instead of default
+		const gitMasterContent = result.resolved.get("git-master")
+		expect(gitMasterContent).toContain(customFooter)
+		expect(gitMasterContent).not.toContain("Ultraworked with [Sisyphus]")
+	})
+
+	it("should use default Sisyphus footer when commit_footer is boolean true", async () => {
+		// given: git-master skill with boolean true footer
+		const skillNames = ["git-master"]
+		const options = {
+			gitMasterConfig: {
+				commit_footer: true,
+				include_co_authored_by: false,
+			},
+		}
+
+		// when: resolving with boolean true footer config
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: default Sisyphus footer is injected
+		const gitMasterContent = result.resolved.get("git-master")
+		expect(gitMasterContent).toContain("Ultraworked with [Sisyphus]")
+	})
+
 	it("should handle empty array", async () => {
 		// given: empty skill names
 		const skillNames: string[] = []
@@ -389,3 +427,33 @@ describe("resolveMultipleSkills with browserProvider", () => {
 		expect(result.notFound).toContain("agent-browser")
 	})
 })
+
+describe("resolveMultipleSkillsAsync with browserProvider filtering", () => {
+	it("should exclude discovered agent-browser when browserProvider is playwright", async () => {
+		// given: playwright is the selected browserProvider (default)
+		const skillNames = ["playwright", "git-master"]
+		const options = { browserProvider: "playwright" as const }
+
+		// when: resolving multiple skills
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: playwright resolved, agent-browser would be excluded if discovered
+		expect(result.resolved.has("playwright")).toBe(true)
+		expect(result.resolved.has("git-master")).toBe(true)
+		expect(result.notFound).not.toContain("playwright")
+	})
+
+	it("should exclude discovered playwright when browserProvider is agent-browser", async () => {
+		// given: agent-browser is the selected browserProvider
+		const skillNames = ["agent-browser", "git-master"]
+		const options = { browserProvider: "agent-browser" as const }
+
+		// when: resolving multiple skills
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: agent-browser resolved, playwright would be excluded if discovered
+		expect(result.resolved.has("agent-browser")).toBe(true)
+		expect(result.resolved.has("git-master")).toBe(true)
+		expect(result.notFound).not.toContain("agent-browser")
+	})
+})
--- a/src/features/opencode-skill-loader/skill-content.ts
+++ b/src/features/opencode-skill-loader/skill-content.ts
@@ -55,10 +55,23 @@ async function getAllSkills(options?: SkillResolutionOptions): Promise<LoadedSki
 		mcpConfig: skill.mcpConfig,
 	}))

-	const discoveredNames = new Set(discoveredSkills.map((s) => s.name))
+	// Provider-gated skill names that should be filtered based on browserProvider
+	const providerGatedSkillNames = new Set(["agent-browser", "playwright"])
+	const browserProvider = options?.browserProvider ?? "playwright"
+
+	// Filter discovered skills to exclude provider-gated names that don't match the selected provider
+	const filteredDiscoveredSkills = discoveredSkills.filter((skill) => {
+		if (!providerGatedSkillNames.has(skill.name)) {
+			return true
+		}
+		// For provider-gated skills, only include if it matches the selected provider
+		return skill.name === browserProvider
+	})
+
+	const discoveredNames = new Set(filteredDiscoveredSkills.map((s) => s.name))
 	const uniqueBuiltins = builtinSkillsAsLoaded.filter((s) => !discoveredNames.has(s.name))

-	let allSkills = [...discoveredSkills, ...uniqueBuiltins]
+	let allSkills = [...filteredDiscoveredSkills, ...uniqueBuiltins]

 	// Filter discovered skills by disabledSkills (builtin skills are already filtered by createBuiltinSkills)
 	if (hasDisabledSkills) {
@@ -97,9 +110,10 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
 	sections.push(``)

 	if (commitFooter) {
+		const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
 		sections.push(`1. **Footer in commit body:**`)
 		sections.push("```")
-		sections.push(`Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)`)
+		sections.push(footerText)
 		sections.push("```")
 		sections.push(``)
 	}
@@ -113,14 +127,16 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
 	}

 	if (commitFooter && includeCoAuthoredBy) {
+		const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
 		sections.push(`**Example (both enabled):**`)
 		sections.push("```bash")
-		sections.push(`git commit -m "{Commit Message}" -m "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)" -m "Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>"`)
+		sections.push(`git commit -m "{Commit Message}" -m "${footerText}" -m "Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>"`)
 		sections.push("```")
 	} else if (commitFooter) {
+		const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
 		sections.push(`**Example:**`)
 		sections.push("```bash")
-		sections.push(`git commit -m "{Commit Message}" -m "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"`)
+		sections.push(`git commit -m "{Commit Message}" -m "${footerText}"`)
 		sections.push("```")
 	} else if (includeCoAuthoredBy) {
 		sections.push(`**Example:**`)
--- a/src/features/tmux-subagent/manager-cleanup.ts
+++ b/src/features/tmux-subagent/manager-cleanup.ts
@@ -0,0 +1,43 @@
+import type { TmuxConfig } from "../../config/schema"
+import type { TrackedSession } from "./types"
+import { log } from "../../shared"
+import { queryWindowState } from "./pane-state-querier"
+import { executeAction } from "./action-executor"
+import { TmuxPollingManager } from "./polling-manager"
+
+export class ManagerCleanup {
+  constructor(
+    private sessions: Map<string, TrackedSession>,
+    private sourcePaneId: string | undefined,
+    private pollingManager: TmuxPollingManager,
+    private tmuxConfig: TmuxConfig,
+    private serverUrl: string
+  ) {}
+
+  async cleanup(): Promise<void> {
+    this.pollingManager.stopPolling()
+
+    if (this.sessions.size > 0) {
+      log("[tmux-session-manager] closing all panes", { count: this.sessions.size })
+      const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
+      
+      if (state) {
+        const closePromises = Array.from(this.sessions.values()).map((s) =>
+          executeAction(
+            { type: "close", paneId: s.paneId, sessionId: s.sessionId },
+            { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+          ).catch((err) =>
+            log("[tmux-session-manager] cleanup error for pane", {
+              paneId: s.paneId,
+              error: String(err),
+            }),
+          ),
+        )
+        await Promise.all(closePromises)
+      }
+      this.sessions.clear()
+    }
+
+    log("[tmux-session-manager] cleanup complete")
+  }
+}
--- a/src/features/tmux-subagent/manager.ts
+++ b/src/features/tmux-subagent/manager.ts
@@ -13,7 +13,7 @@ import { log } from "../../shared"
 import { queryWindowState } from "./pane-state-querier"
 import { decideSpawnActions, decideCloseAction, type SessionMapping } from "./decision-engine"
 import { executeActions, executeAction } from "./action-executor"
-
+import { TmuxPollingManager } from "./polling-manager"
 type OpencodeClient = PluginInput["client"]

 interface SessionCreatedEvent {
@@ -57,9 +57,8 @@ export class TmuxSessionManager {
  private sourcePaneId: string | undefined
  private sessions = new Map<string, TrackedSession>()
  private pendingSessions = new Set<string>()
-  private pollInterval?: ReturnType<typeof setInterval>
  private deps: TmuxUtilDeps
-
+  private pollingManager: TmuxPollingManager
  constructor(ctx: PluginInput, tmuxConfig: TmuxConfig, deps: TmuxUtilDeps = defaultTmuxDeps) {
    this.client = ctx.client
    this.tmuxConfig = tmuxConfig
@@ -67,7 +66,11 @@ export class TmuxSessionManager {
    const defaultPort = process.env.OPENCODE_PORT ?? "4096"
    this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
    this.sourcePaneId = deps.getCurrentPaneId()
-
+    this.pollingManager = new TmuxPollingManager(
+      this.client,
+      this.sessions,
+      this.closeSessionById.bind(this)
+    )
    log("[tmux-session-manager] initialized", {
      configEnabled: this.tmuxConfig.enabled,
      tmuxConfig: this.tmuxConfig,
@@ -75,7 +78,6 @@ export class TmuxSessionManager {
      sourcePaneId: this.sourcePaneId,
    })
  }
-
  private isEnabled(): boolean {
    return this.tmuxConfig.enabled && this.deps.isInsideTmux()
  }
@@ -125,6 +127,12 @@ export class TmuxSessionManager {
    return false
  }

+  // NOTE: Exposed (via `as any`) for test stability checks.
+  // Actual polling is owned by TmuxPollingManager.
+  private async pollSessions(): Promise<void> {
+    await (this.pollingManager as any).pollSessions()
+  }
+
  async onSessionCreated(event: SessionCreatedEvent): Promise<void> {
    const enabled = this.isEnabled()
    log("[tmux-session-manager] onSessionCreated called", {
@@ -239,7 +247,7 @@ export class TmuxSessionManager {
          paneId: result.spawnedPaneId,
          sessionReady,
        })
-        this.startPolling()
+        this.pollingManager.startPolling()
      } else {
        log("[tmux-session-manager] spawn failed", {
          success: result.success,
@@ -278,140 +286,10 @@ export class TmuxSessionManager {
    this.sessions.delete(event.sessionID)

    if (this.sessions.size === 0) {
-      this.stopPolling()
+      this.pollingManager.stopPolling()
    }
  }

-  private startPolling(): void {
-    if (this.pollInterval) return
-
-    this.pollInterval = setInterval(
-      () => this.pollSessions(),
-      POLL_INTERVAL_BACKGROUND_MS,
-    )
-    log("[tmux-session-manager] polling started")
-  }
-
-  private stopPolling(): void {
-    if (this.pollInterval) {
-      clearInterval(this.pollInterval)
-      this.pollInterval = undefined
-      log("[tmux-session-manager] polling stopped")
-    }
-  }
-
-  private async pollSessions(): Promise<void> {
-    if (this.sessions.size === 0) {
-      this.stopPolling()
-      return
-    }
-
-    try {
-      const statusResult = await this.client.session.status({ path: undefined })
-      const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
-
-      log("[tmux-session-manager] pollSessions", {
-        trackedSessions: Array.from(this.sessions.keys()),
-        allStatusKeys: Object.keys(allStatuses),
-      })
-
-      const now = Date.now()
-      const sessionsToClose: string[] = []
-
-      for (const [sessionId, tracked] of this.sessions.entries()) {
-        const status = allStatuses[sessionId]
-        const isIdle = status?.type === "idle"
-
-        if (status) {
-          tracked.lastSeenAt = new Date(now)
-        }
-
-        const missingSince = !status ? now - tracked.lastSeenAt.getTime() : 0
-        const missingTooLong = missingSince >= SESSION_MISSING_GRACE_MS
-        const isTimedOut = now - tracked.createdAt.getTime() > SESSION_TIMEOUT_MS
-        const elapsedMs = now - tracked.createdAt.getTime()
-
-        // Stability detection: Don't close immediately on idle
-        // Wait for STABLE_POLLS_REQUIRED consecutive polls with same message count
-        let shouldCloseViaStability = false
-
-        if (isIdle && elapsedMs >= MIN_STABILITY_TIME_MS) {
-          // Fetch message count to detect if agent is still producing output
-          try {
-            const messagesResult = await this.client.session.messages({ 
-              path: { id: sessionId } 
-            })
-            const currentMsgCount = Array.isArray(messagesResult.data) 
-              ? messagesResult.data.length 
-              : 0
-
-            if (tracked.lastMessageCount === currentMsgCount) {
-              // Message count unchanged - increment stable polls
-              tracked.stableIdlePolls = (tracked.stableIdlePolls ?? 0) + 1
-              
-              if (tracked.stableIdlePolls >= STABLE_POLLS_REQUIRED) {
-                // Double-check status before closing
-                const recheckResult = await this.client.session.status({ path: undefined })
-                const recheckStatuses = (recheckResult.data ?? {}) as Record<string, { type: string }>
-                const recheckStatus = recheckStatuses[sessionId]
-                
-                if (recheckStatus?.type === "idle") {
-                  shouldCloseViaStability = true
-                } else {
-                  // Status changed - reset stability counter
-                  tracked.stableIdlePolls = 0
-                  log("[tmux-session-manager] stability reached but session not idle on recheck, resetting", {
-                    sessionId,
-                    recheckStatus: recheckStatus?.type,
-                  })
-                }
-              }
-            } else {
-              // New messages - agent is still working, reset stability counter
-              tracked.stableIdlePolls = 0
-            }
-            
-            tracked.lastMessageCount = currentMsgCount
-          } catch (msgErr) {
-            log("[tmux-session-manager] failed to fetch messages for stability check", {
-              sessionId,
-              error: String(msgErr),
-            })
-            // On error, don't close - be conservative
-          }
-        } else if (!isIdle) {
-          // Not idle - reset stability counter
-          tracked.stableIdlePolls = 0
-        }
-
-        log("[tmux-session-manager] session check", {
-          sessionId,
-          statusType: status?.type,
-          isIdle,
-          elapsedMs,
-          stableIdlePolls: tracked.stableIdlePolls,
-          lastMessageCount: tracked.lastMessageCount,
-          missingSince,
-          missingTooLong,
-          isTimedOut,
-          shouldCloseViaStability,
-        })
-
-        // Close if: stability detection confirmed OR missing too long OR timed out
-        // Note: We no longer close immediately on idle - stability detection handles that
-        if (shouldCloseViaStability || missingTooLong || isTimedOut) {
-          sessionsToClose.push(sessionId)
-        }
-      }
-
-      for (const sessionId of sessionsToClose) {
-        log("[tmux-session-manager] closing session due to poll", { sessionId })
-        await this.closeSessionById(sessionId)
-      }
-    } catch (err) {
-      log("[tmux-session-manager] poll error", { error: String(err) })
-    }
-  }

  private async closeSessionById(sessionId: string): Promise<void> {
    const tracked = this.sessions.get(sessionId)
@@ -433,7 +311,7 @@ export class TmuxSessionManager {
    this.sessions.delete(sessionId)

    if (this.sessions.size === 0) {
-      this.stopPolling()
+      this.pollingManager.stopPolling()
    }
  }

@@ -444,7 +322,7 @@ export class TmuxSessionManager {
  }

  async cleanup(): Promise<void> {
-    this.stopPolling()
+    this.pollingManager.stopPolling()

    if (this.sessions.size > 0) {
      log("[tmux-session-manager] closing all panes", { count: this.sessions.size })
--- a/src/features/tmux-subagent/polling-manager.ts
+++ b/src/features/tmux-subagent/polling-manager.ts
@@ -0,0 +1,139 @@
+import type { OpencodeClient } from "../../tools/delegate-task/types"
+import { POLL_INTERVAL_BACKGROUND_MS } from "../../shared/tmux"
+import type { TrackedSession } from "./types"
+import { SESSION_MISSING_GRACE_MS } from "../../shared/tmux"
+import { log } from "../../shared"
+
+const SESSION_TIMEOUT_MS = 10 * 60 * 1000
+const MIN_STABILITY_TIME_MS = 10 * 1000
+const STABLE_POLLS_REQUIRED = 3
+
+export class TmuxPollingManager {
+  private pollInterval?: ReturnType<typeof setInterval>
+
+  constructor(
+    private client: OpencodeClient,
+    private sessions: Map<string, TrackedSession>,
+    private closeSessionById: (sessionId: string) => Promise<void>
+  ) {}
+
+  startPolling(): void {
+    if (this.pollInterval) return
+
+    this.pollInterval = setInterval(
+      () => this.pollSessions(),
+      POLL_INTERVAL_BACKGROUND_MS, // POLL_INTERVAL_BACKGROUND_MS
+    )
+    log("[tmux-session-manager] polling started")
+  }
+
+  stopPolling(): void {
+    if (this.pollInterval) {
+      clearInterval(this.pollInterval)
+      this.pollInterval = undefined
+      log("[tmux-session-manager] polling stopped")
+    }
+  }
+
+  private async pollSessions(): Promise<void> {
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+      return
+    }
+
+    try {
+      const statusResult = await this.client.session.status({ path: undefined })
+      const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+
+      log("[tmux-session-manager] pollSessions", {
+        trackedSessions: Array.from(this.sessions.keys()),
+        allStatusKeys: Object.keys(allStatuses),
+      })
+
+      const now = Date.now()
+      const sessionsToClose: string[] = []
+
+      for (const [sessionId, tracked] of this.sessions.entries()) {
+        const status = allStatuses[sessionId]
+        const isIdle = status?.type === "idle"
+
+        if (status) {
+          tracked.lastSeenAt = new Date(now)
+        }
+
+        const missingSince = !status ? now - tracked.lastSeenAt.getTime() : 0
+        const missingTooLong = missingSince >= SESSION_MISSING_GRACE_MS
+        const isTimedOut = now - tracked.createdAt.getTime() > SESSION_TIMEOUT_MS
+        const elapsedMs = now - tracked.createdAt.getTime()
+
+        let shouldCloseViaStability = false
+
+        if (isIdle && elapsedMs >= MIN_STABILITY_TIME_MS) {
+          try {
+            const messagesResult = await this.client.session.messages({ 
+              path: { id: sessionId } 
+            })
+            const currentMsgCount = Array.isArray(messagesResult.data) 
+              ? messagesResult.data.length 
+              : 0
+
+            if (tracked.lastMessageCount === currentMsgCount) {
+              tracked.stableIdlePolls = (tracked.stableIdlePolls ?? 0) + 1
+              
+              if (tracked.stableIdlePolls >= STABLE_POLLS_REQUIRED) {
+                const recheckResult = await this.client.session.status({ path: undefined })
+                const recheckStatuses = (recheckResult.data ?? {}) as Record<string, { type: string }>
+                const recheckStatus = recheckStatuses[sessionId]
+                
+                if (recheckStatus?.type === "idle") {
+                  shouldCloseViaStability = true
+                } else {
+                  tracked.stableIdlePolls = 0
+                  log("[tmux-session-manager] stability reached but session not idle on recheck, resetting", {
+                    sessionId,
+                    recheckStatus: recheckStatus?.type,
+                  })
+                }
+              }
+            } else {
+              tracked.stableIdlePolls = 0
+            }
+            
+            tracked.lastMessageCount = currentMsgCount
+          } catch (msgErr) {
+            log("[tmux-session-manager] failed to fetch messages for stability check", {
+              sessionId,
+              error: String(msgErr),
+            })
+          }
+        } else if (!isIdle) {
+          tracked.stableIdlePolls = 0
+        }
+
+        log("[tmux-session-manager] session check", {
+          sessionId,
+          statusType: status?.type,
+          isIdle,
+          elapsedMs,
+          stableIdlePolls: tracked.stableIdlePolls,
+          lastMessageCount: tracked.lastMessageCount,
+          missingSince,
+          missingTooLong,
+          isTimedOut,
+          shouldCloseViaStability,
+        })
+
+        if (shouldCloseViaStability || missingTooLong || isTimedOut) {
+          sessionsToClose.push(sessionId)
+        }
+      }
+
+      for (const sessionId of sessionsToClose) {
+        log("[tmux-session-manager] closing session due to poll", { sessionId })
+        await this.closeSessionById(sessionId)
+      }
+    } catch (err) {
+      log("[tmux-session-manager] poll error", { error: String(err) })
+    }
+  }
+}
--- a/src/features/tmux-subagent/session-cleaner.ts
+++ b/src/features/tmux-subagent/session-cleaner.ts
@@ -0,0 +1,80 @@
+import type { TmuxConfig } from "../../config/schema"
+import type { TrackedSession } from "./types"
+import type { SessionMapping } from "./decision-engine"
+import { log } from "../../shared"
+import { queryWindowState } from "./pane-state-querier"
+import { decideCloseAction } from "./decision-engine"
+import { executeAction } from "./action-executor"
+import { TmuxPollingManager } from "./polling-manager"
+
+export interface TmuxUtilDeps {
+  isInsideTmux: () => boolean
+  getCurrentPaneId: () => string | undefined
+}
+
+export class SessionCleaner {
+  constructor(
+    private tmuxConfig: TmuxConfig,
+    private deps: TmuxUtilDeps,
+    private sessions: Map<string, TrackedSession>,
+    private sourcePaneId: string | undefined,
+    private getSessionMappings: () => SessionMapping[],
+    private pollingManager: TmuxPollingManager,
+    private serverUrl: string
+  ) {}
+
+  private isEnabled(): boolean {
+    return this.tmuxConfig.enabled && this.deps.isInsideTmux()
+  }
+
+  async onSessionDeleted(event: { sessionID: string }): Promise<void> {
+    if (!this.isEnabled()) return
+    if (!this.sourcePaneId) return
+
+    const tracked = this.sessions.get(event.sessionID)
+    if (!tracked) return
+
+    log("[tmux-session-manager] onSessionDeleted", { sessionId: event.sessionID })
+
+    const state = await queryWindowState(this.sourcePaneId)
+    if (!state) {
+      this.sessions.delete(event.sessionID)
+      return
+    }
+
+    const closeAction = decideCloseAction(state, event.sessionID, this.getSessionMappings())
+    if (closeAction) {
+      await executeAction(closeAction, { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state })
+    }
+
+    this.sessions.delete(event.sessionID)
+
+    if (this.sessions.size === 0) {
+      this.pollingManager.stopPolling()
+    }
+  }
+
+  async closeSessionById(sessionId: string): Promise<void> {
+    const tracked = this.sessions.get(sessionId)
+    if (!tracked) return
+
+    log("[tmux-session-manager] closing session pane", {
+      sessionId,
+      paneId: tracked.paneId,
+    })
+
+    const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
+    if (state) {
+      await executeAction(
+        { type: "close", paneId: tracked.paneId, sessionId },
+        { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+      )
+    }
+
+    this.sessions.delete(sessionId)
+
+    if (this.sessions.size === 0) {
+      this.pollingManager.stopPolling()
+    }
+  }
+}
--- a/src/features/tmux-subagent/session-spawner.ts
+++ b/src/features/tmux-subagent/session-spawner.ts
@@ -0,0 +1,166 @@
+import type { TmuxConfig } from "../../config/schema"
+import type { TrackedSession, CapacityConfig } from "./types"
+import { log } from "../../shared"
+import { queryWindowState } from "./pane-state-querier"
+import { decideSpawnActions, type SessionMapping } from "./decision-engine"
+import { executeActions } from "./action-executor"
+import { TmuxPollingManager } from "./polling-manager"
+
+interface SessionCreatedEvent {
+  type: string
+  properties?: { info?: { id?: string; parentID?: string; title?: string } }
+}
+
+export interface TmuxUtilDeps {
+  isInsideTmux: () => boolean
+  getCurrentPaneId: () => string | undefined
+}
+
+export class SessionSpawner {
+  constructor(
+    private tmuxConfig: TmuxConfig,
+    private deps: TmuxUtilDeps,
+    private sessions: Map<string, TrackedSession>,
+    private pendingSessions: Set<string>,
+    private sourcePaneId: string | undefined,
+    private getCapacityConfig: () => CapacityConfig,
+    private getSessionMappings: () => SessionMapping[],
+    private waitForSessionReady: (sessionId: string) => Promise<boolean>,
+    private pollingManager: TmuxPollingManager,
+    private serverUrl: string
+  ) {}
+
+  private isEnabled(): boolean {
+    return this.tmuxConfig.enabled && this.deps.isInsideTmux()
+  }
+
+  async onSessionCreated(event: SessionCreatedEvent): Promise<void> {
+    const enabled = this.isEnabled()
+    log("[tmux-session-manager] onSessionCreated called", {
+      enabled,
+      tmuxConfigEnabled: this.tmuxConfig.enabled,
+      isInsideTmux: this.deps.isInsideTmux(),
+      eventType: event.type,
+      infoId: event.properties?.info?.id,
+      infoParentID: event.properties?.info?.parentID,
+    })
+
+    if (!enabled) return
+    if (event.type !== "session.created") return
+
+    const info = event.properties?.info
+    if (!info?.id || !info?.parentID) return
+
+    const sessionId = info.id
+    const title = info.title ?? "Subagent"
+
+    if (this.sessions.has(sessionId) || this.pendingSessions.has(sessionId)) {
+      log("[tmux-session-manager] session already tracked or pending", { sessionId })
+      return
+    }
+
+    if (!this.sourcePaneId) {
+      log("[tmux-session-manager] no source pane id")
+      return
+    }
+
+    this.pendingSessions.add(sessionId)
+
+    try {
+      const state = await queryWindowState(this.sourcePaneId)
+      if (!state) {
+        log("[tmux-session-manager] failed to query window state")
+        return
+      }
+
+      log("[tmux-session-manager] window state queried", {
+        windowWidth: state.windowWidth,
+        mainPane: state.mainPane?.paneId,
+        agentPaneCount: state.agentPanes.length,
+        agentPanes: state.agentPanes.map((p) => p.paneId),
+      })
+
+      const decision = decideSpawnActions(
+        state,
+        sessionId,
+        title,
+        this.getCapacityConfig(),
+        this.getSessionMappings()
+      )
+
+      log("[tmux-session-manager] spawn decision", {
+        canSpawn: decision.canSpawn,
+        reason: decision.reason,
+        actionCount: decision.actions.length,
+        actions: decision.actions.map((a) => {
+          if (a.type === "close") return { type: "close", paneId: a.paneId }
+          if (a.type === "replace") return { type: "replace", paneId: a.paneId, newSessionId: a.newSessionId }
+          return { type: "spawn", sessionId: a.sessionId }
+        }),
+      })
+
+      if (!decision.canSpawn) {
+        log("[tmux-session-manager] cannot spawn", { reason: decision.reason })
+        return
+      }
+
+      const result = await executeActions(
+        decision.actions,
+        { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+      )
+
+      for (const { action, result: actionResult } of result.results) {
+        if (action.type === "close" && actionResult.success) {
+          this.sessions.delete(action.sessionId)
+          log("[tmux-session-manager] removed closed session from cache", {
+            sessionId: action.sessionId,
+          })
+        }
+        if (action.type === "replace" && actionResult.success) {
+          this.sessions.delete(action.oldSessionId)
+          log("[tmux-session-manager] removed replaced session from cache", {
+            oldSessionId: action.oldSessionId,
+            newSessionId: action.newSessionId,
+          })
+        }
+      }
+
+      if (result.success && result.spawnedPaneId) {
+        const sessionReady = await this.waitForSessionReady(sessionId)
+        
+        if (!sessionReady) {
+          log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
+            sessionId,
+            paneId: result.spawnedPaneId,
+          })
+        }
+        
+        const now = Date.now()
+        this.sessions.set(sessionId, {
+          sessionId,
+          paneId: result.spawnedPaneId,
+          description: title,
+          createdAt: new Date(now),
+          lastSeenAt: new Date(now),
+        })
+        log("[tmux-session-manager] pane spawned and tracked", {
+          sessionId,
+          paneId: result.spawnedPaneId,
+          sessionReady,
+        })
+        this.pollingManager.startPolling()
+      } else {
+        log("[tmux-session-manager] spawn failed", {
+          success: result.success,
+          results: result.results.map((r) => ({
+            type: r.action.type,
+            success: r.result.success,
+            error: r.result.error,
+          })),
+        })
+      }
+    } finally {
+      this.pendingSessions.delete(sessionId)
+    }
+  }
+}
--- a/src/features/tool-metadata-store/index.test.ts
+++ b/src/features/tool-metadata-store/index.test.ts
@@ -0,0 +1,111 @@
+import { describe, test, expect, beforeEach } from "bun:test"
+import {
+  storeToolMetadata,
+  consumeToolMetadata,
+  getPendingStoreSize,
+  clearPendingStore,
+} from "./index"
+
+describe("tool-metadata-store", () => {
+  beforeEach(() => {
+    clearPendingStore()
+  })
+
+  describe("storeToolMetadata", () => {
+    test("#given metadata with title and metadata, #when stored, #then store size increases", () => {
+      //#given
+      const sessionID = "ses_abc123"
+      const callID = "call_001"
+      const data = {
+        title: "Test Task",
+        metadata: { sessionId: "ses_child", agent: "oracle" },
+      }
+
+      //#when
+      storeToolMetadata(sessionID, callID, data)
+
+      //#then
+      expect(getPendingStoreSize()).toBe(1)
+    })
+  })
+
+  describe("consumeToolMetadata", () => {
+    test("#given stored metadata, #when consumed, #then returns the stored data", () => {
+      //#given
+      const sessionID = "ses_abc123"
+      const callID = "call_001"
+      const data = {
+        title: "My Task",
+        metadata: { sessionId: "ses_sub", run_in_background: true },
+      }
+      storeToolMetadata(sessionID, callID, data)
+
+      //#when
+      const result = consumeToolMetadata(sessionID, callID)
+
+      //#then
+      expect(result).toEqual(data)
+    })
+
+    test("#given stored metadata, #when consumed twice, #then second call returns undefined", () => {
+      //#given
+      const sessionID = "ses_abc123"
+      const callID = "call_001"
+      storeToolMetadata(sessionID, callID, { title: "Task" })
+
+      //#when
+      consumeToolMetadata(sessionID, callID)
+      const second = consumeToolMetadata(sessionID, callID)
+
+      //#then
+      expect(second).toBeUndefined()
+      expect(getPendingStoreSize()).toBe(0)
+    })
+
+    test("#given no stored metadata, #when consumed, #then returns undefined", () => {
+      //#given
+      const sessionID = "ses_nonexistent"
+      const callID = "call_999"
+
+      //#when
+      const result = consumeToolMetadata(sessionID, callID)
+
+      //#then
+      expect(result).toBeUndefined()
+    })
+  })
+
+  describe("isolation", () => {
+    test("#given multiple entries, #when consuming one, #then others remain", () => {
+      //#given
+      storeToolMetadata("ses_1", "call_a", { title: "Task A" })
+      storeToolMetadata("ses_1", "call_b", { title: "Task B" })
+      storeToolMetadata("ses_2", "call_a", { title: "Task C" })
+
+      //#when
+      const resultA = consumeToolMetadata("ses_1", "call_a")
+
+      //#then
+      expect(resultA?.title).toBe("Task A")
+      expect(getPendingStoreSize()).toBe(2)
+      expect(consumeToolMetadata("ses_1", "call_b")?.title).toBe("Task B")
+      expect(consumeToolMetadata("ses_2", "call_a")?.title).toBe("Task C")
+      expect(getPendingStoreSize()).toBe(0)
+    })
+  })
+
+  describe("overwrite", () => {
+    test("#given existing entry, #when stored again with same key, #then overwrites", () => {
+      //#given
+      storeToolMetadata("ses_1", "call_a", { title: "Old" })
+
+      //#when
+      storeToolMetadata("ses_1", "call_a", { title: "New", metadata: { updated: true } })
+
+      //#then
+      const result = consumeToolMetadata("ses_1", "call_a")
+      expect(result?.title).toBe("New")
+      expect(result?.metadata).toEqual({ updated: true })
+    })
+  })
+})
--- a/Show More
+++ b/Show More