release: v3.3.1

Merge pull request #1632 from code-yeongyu/fix/look-at-sync-prompt
2026-02-07 17:48:30 +00:00 · 2026-02-08 02:45:38 +09:00 · 2026-02-08 02:45:06 +09:00 · 2026-02-08 02:41:29 +09:00 · 2026-02-08 02:36:27 +09:00 · 2026-02-07 16:54:49 +00:00
174 changed files with 7682 additions and 2820 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 # Dependencies
-.sisyphus/
+.sisyphus/*
+!.sisyphus/rules/
 node_modules/

 # Build output
--- a/.opencode/command/remove-deadcode.md
+++ b/.opencode/command/remove-deadcode.md
@@ -41,27 +41,27 @@ Fire ALL simultaneously:

 ```
 // Agent 1: Find all exported symbols
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
  List each with: file path, line number, symbol name, export type (named/default).
  EXCLUDE: src/index.ts root exports, test files.
  Return as structured list.")

 // Agent 2: Find potentially unused files
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find files in src/ that are NOT imported by any other file.
  Check import/require statements across the entire codebase.
  EXCLUDE: index.ts files, test files, entry points, config files, .md files.
  Return list of potentially orphaned files.")

 // Agent 3: Find unused imports within files
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find unused imports across src/**/*.ts files.
  Look for import statements where the imported symbol is never referenced in the file body.
  Return: file path, line number, imported symbol name.")

 // Agent 4: Find functions/variables only used in their own declaration
-delegate_task(subagent_type="explore", run_in_background=true,
+task(subagent_type="explore", run_in_background=true,
  prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
  to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
 ```
--- a/.opencode/skills/github-issue-triage/SKILL.md
+++ b/.opencode/skills/github-issue-triage/SKILL.md
@@ -21,7 +21,7 @@ You are a GitHub issue triage automation agent. Your job is to:

 | Aspect | Rule |
 |--------|------|
-| **Task Granularity** | 1 Issue = Exactly 1 `delegate_task()` call |
+| **Task Granularity** | 1 Issue = Exactly 1 `task()` call |
 | **Execution Mode** | `run_in_background=true` (Each issue runs independently) |
 | **Result Handling** | `background_output()` to collect results as they complete |
 | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -67,7 +67,7 @@ for (let i = 0; i < allIssues.length; i++) {
  const issue = allIssues[i]
  const category = getCategory(i)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← CRITICAL: Each issue is independent background task
@@ -195,7 +195,7 @@ for (let i = 0; i < allIssues.length; i++) {
  
  console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← BACKGROUND TASK: Each issue runs independently
@@ -480,7 +480,7 @@ When invoked, immediately:
 4. Exhaustive pagination for issues
 5. Exhaustive pagination for PRs
 6. **LAUNCH**: For each issue:
-   - `delegate_task(run_in_background=true)` - 1 task per issue
+   - `task(run_in_background=true)` - 1 task per issue
   - Store taskId mapped to issue number
 7. **STREAM**: Poll `background_output()` for each task:
   - As each completes, immediately report result
--- a/.opencode/skills/github-pr-triage/SKILL.md
+++ b/.opencode/skills/github-pr-triage/SKILL.md
@@ -22,7 +22,7 @@ You are a GitHub Pull Request triage automation agent. Your job is to:

 | Aspect | Rule |
 |--------|------|
-| **Task Granularity** | 1 PR = Exactly 1 `delegate_task()` call |
+| **Task Granularity** | 1 PR = Exactly 1 `task()` call |
 | **Execution Mode** | `run_in_background=true` (Each PR runs independently) |
 | **Result Handling** | `background_output()` to collect results as they complete |
 | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -68,7 +68,7 @@ for (let i = 0; i < allPRs.length; i++) {
  const pr = allPRs[i]
  const category = getCategory(i)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← CRITICAL: Each PR is independent background task
@@ -178,7 +178,7 @@ for (let i = 0; i < allPRs.length; i++) {
  
  console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`)
  
-  const taskId = await delegate_task(
+  const taskId = await task(
    category=category,
    load_skills=[],
    run_in_background=true,  // ← BACKGROUND TASK: Each PR runs independently
@@ -474,7 +474,7 @@ When invoked, immediately:
 2. `gh repo view --json nameWithOwner -q .nameWithOwner`
 3. Exhaustive pagination for ALL open PRs
 4. **LAUNCH**: For each PR:
-   - `delegate_task(run_in_background=true)` - 1 task per PR
+   - `task(run_in_background=true)` - 1 task per PR
   - Store taskId mapped to PR number
 5. **STREAM**: Poll `background_output()` for each task:
   - As each completes, immediately report result
--- a/.sisyphus/rules/modular-code-enforcement.md
+++ b/.sisyphus/rules/modular-code-enforcement.md
@@ -0,0 +1,117 @@
+---
+globs: ["**/*.ts", "**/*.tsx"]
+alwaysApply: false
+description: "Enforces strict modular code architecture: SRP, no monolithic index.ts, 200 LOC hard limit"
+---
+
+<MANDATORY_ARCHITECTURE_RULE severity="BLOCKING" priority="HIGHEST">
+
+# Modular Code Architecture — Zero Tolerance Policy
+
+This rule is NON-NEGOTIABLE. Violations BLOCK all further work until resolved.
+
+## Rule 1: index.ts is an ENTRY POINT, NOT a dumping ground
+
+`index.ts` files MUST ONLY contain:
+- Re-exports (`export { ... } from "./module"`)
+- Factory function calls that compose modules
+- Top-level wiring/registration (hook registration, plugin setup)
+
+`index.ts` MUST NEVER contain:
+- Business logic implementation
+- Helper/utility functions
+- Type definitions beyond simple re-exports
+- Multiple unrelated responsibilities mixed together
+
+**If you find mixed logic in index.ts**: Extract each responsibility into its own dedicated file BEFORE making any other changes. This is not optional.
+
+## Rule 2: No Catch-All Files — utils.ts / service.ts are CODE SMELLS
+
+A single `utils.ts`, `helpers.ts`, `service.ts`, or `common.ts` is a **gravity well** — every unrelated function gets tossed in, and it grows into an untestable, unreviewable blob.
+
+**These file names are BANNED as top-level catch-alls.** Instead:
+
+| Anti-Pattern | Refactor To |
+|--------------|-------------|
+| `utils.ts` with `formatDate()`, `slugify()`, `retry()` | `date-formatter.ts`, `slugify.ts`, `retry.ts` |
+| `service.ts` handling auth + billing + notifications | `auth-service.ts`, `billing-service.ts`, `notification-service.ts` |
+| `helpers.ts` with 15 unrelated exports | One file per logical domain |
+
+**Design for reusability from the start.** Each module should be:
+- **Independently importable** — no consumer should need to pull in unrelated code
+- **Self-contained** — its dependencies are explicit, not buried in a shared grab-bag
+- **Nameable by purpose** — the filename alone tells you what it does
+
+If you catch yourself typing `utils.ts` or `service.ts`, STOP and name the file after what it actually does.
+
+## Rule 3: Single Responsibility Principle — ABSOLUTE
+
+Every `.ts` file MUST have exactly ONE clear, nameable responsibility.
+
+**Self-test**: If you cannot describe the file's purpose in ONE short phrase (e.g., "parses YAML frontmatter", "matches rules against file paths"), the file does too much. Split it.
+
+| Signal | Action |
+|--------|--------|
+| File has 2+ unrelated exported functions | **SPLIT NOW** — each into its own module |
+| File mixes I/O with pure logic | **SPLIT NOW** — separate side effects from computation |
+| File has both types and implementation | **SPLIT NOW** — types.ts + implementation.ts |
+| You need to scroll to understand the file | **SPLIT NOW** — it's too large |
+
+## Rule 4: 200 LOC Hard Limit — CODE SMELL DETECTOR
+
+Any `.ts`/`.tsx` file exceeding **200 lines of code** (excluding prompt strings, template literals containing prompts, and `.md` content) is an **immediate code smell**.
+
+**When you detect a file > 200 LOC**:
+1. **STOP** current work
+2. **Identify** the multiple responsibilities hiding in the file
+3. **Extract** each responsibility into a focused module
+4. **Verify** each resulting file is < 200 LOC and has a single purpose
+5. **Resume** original work
+
+Prompt-heavy files (agent definitions, skill definitions) where the bulk of content is template literal prompt text are EXEMPT from the LOC count — but their non-prompt logic must still be < 200 LOC.
+
+### How to Count LOC
+
+**Count these** (= actual logic):
+- Import statements
+- Variable/constant declarations
+- Function/class/interface/type definitions
+- Control flow (`if`, `for`, `while`, `switch`, `try/catch`)
+- Expressions, assignments, return statements
+- Closing braces `}` that belong to logic blocks
+
+**Exclude these** (= not logic):
+- Blank lines
+- Comment-only lines (`//`, `/* */`, `/** */`)
+- Lines inside template literals that are prompt/instruction text (e.g., the string body of `` const prompt = `...` ``)
+- Lines inside multi-line strings used as documentation/prompt content
+
+**Quick method**: Read the file → subtract blank lines, comment-only lines, and prompt string content → remaining count = LOC.
+
+**Example**:
+```typescript
+// 1  import { foo } from "./foo";          ← COUNT
+// 2                                         ← SKIP (blank)
+// 3  // Helper for bar                      ← SKIP (comment)
+// 4  export function bar(x: number) {       ← COUNT
+// 5    const prompt = `                     ← COUNT (declaration)
+// 6      You are an assistant.              ← SKIP (prompt text)
+// 7      Follow these rules:                ← SKIP (prompt text)
+// 8    `;                                   ← COUNT (closing)
+// 9    return process(prompt, x);           ← COUNT
+// 10 }                                      ← COUNT
+```
+→ LOC = **5** (lines 1, 4, 5, 9, 10). Not 10.
+
+When in doubt, **round up** — err on the side of splitting.
+
+## How to Apply
+
+When reading, writing, or editing ANY `.ts`/`.tsx` file:
+
+1. **Check the file you're touching** — does it violate any rule above?
+2. **If YES** — refactor FIRST, then proceed with your task
+3. **If creating a new file** — ensure it has exactly one responsibility and stays under 200 LOC
+4. **If adding code to an existing file** — verify the addition doesn't push the file past 200 LOC or add a second responsibility. If it does, extract into a new module.
+
+</MANDATORY_ARCHITECTURE_RULE>
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -195,7 +195,7 @@ oh-my-opencode/
 | Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
 | Error Handling | Empty catch blocks |
 | Testing | Deleting failing tests, writing implementation before test |
-| Agent Calls | Sequential - use `delegate_task` parallel |
+| Agent Calls | Sequential - use `task` parallel |
 | Hook Logic | Heavy PreToolUse - slows every call |
 | Commits | Giant (3+ files), separate test from impl |
 | Temperature | >0.3 for code agents |
--- a/bun.lock
+++ b/bun.lock
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.2.3",
-        "oh-my-opencode-darwin-x64": "3.2.3",
-        "oh-my-opencode-linux-arm64": "3.2.3",
-        "oh-my-opencode-linux-arm64-musl": "3.2.3",
-        "oh-my-opencode-linux-x64": "3.2.3",
-        "oh-my-opencode-linux-x64-musl": "3.2.3",
-        "oh-my-opencode-windows-x64": "3.2.3",
+        "oh-my-opencode-darwin-arm64": "3.3.0",
+        "oh-my-opencode-darwin-x64": "3.3.0",
+        "oh-my-opencode-linux-arm64": "3.3.0",
+        "oh-my-opencode-linux-arm64-musl": "3.3.0",
+        "oh-my-opencode-linux-x64": "3.3.0",
+        "oh-my-opencode-linux-x64-musl": "3.3.0",
+        "oh-my-opencode-windows-x64": "3.3.0",
      },
    },
  },
@@ -226,19 +226,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.2.3", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Doc9xQCj5Jmx3PzouBIfvDwmfWM94Y9Q9IngFqOjrVpfBef9V/WIH0PlhJU6ps4BKGey8Nf2afFq3UE06Z63Hg=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.0", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-P2kZKJqZaA4j0qtGM3I8+ZeH204ai27ni/OXLjtFdOewRjJgrahxaC1XslgK7q/KU9fXz6BQfEqAjbvyPf/rgQ=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.2.3", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-w7lO0Hn/AlLCHe33KPbje83Js2h5weDWVMuopEs6d3pi/1zkRDBEhCi63S4J0d0EKod9kEPQA6ojtdVJ4J39zQ=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.0", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RopOorbW1WyhMQJ+ipuqiOA1GICS+3IkOwNyEe0KZlCLpoEDTyFopIL87HSns+gEQPMxnknroDp8lzxn1AKgjw=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.2.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m1tS1jRLO2Svm5NuetK3BAgdAR8b2GkiIfMFoIYsLJTPmzIkXaigAYkFq+BXCs5JAbRmPmvjndz9cuCddnPADQ=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-297iEfuK+05g+q64crPW78Zbgm/j5PGjDDweSPkZ6rI6SEfHMvOIkGxMvN8gugM3zcH8FOCQXoY2nC8b6x3pwQ=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.2.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Q/0AGtOuUFGNGIX8F6iD5W8c2spbjrqVBPt0B7laQSwnScKs/BI+TvM6HRE37vhoWg+fzhAX3QYJ2H9Un9FYrg=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-oVxP0+yn66HQYfrl9QT6I7TumRzciuPB4z24+PwKEVcDjPbWXQqLY1gwOGHZAQBPLf0vwewv9ybEDVD42RRH4g=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.2.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RIAyoj2XbT8vH++5fPUkdO+D1tfqxh+iWto7CqWr1TgbABbBJljGk91HJgS9xjnxyCQJEpFhTmO7NMHKJcZOWQ=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-k9LoLkisLJwJNR1J0Bh1bjGtGBkl5D9WzFPSdZCAlyiT6TgG9w5erPTlXqtl2Lt0We5tYUVYlkEIHRMK/ugNsQ=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.2.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-nnQK3y7R4DrBvqdqRGbujL2oAAQnVVb23JHUbJPQ6YxrRRGWpLOVGvK5c16ykSFEUPl8eZDmi1ON/R4opKLOUw=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7asXCeae7wBxJrzoZ7J6Yo1oaOxwUN3bTO7jWurCTMs5TDHO+pEHysgv/nuF1jvj1T+r1vg1H5ZmopuKy1qvXg=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.2.3", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-mt8E/TkpaCp04pvzwntT8x8TaqXDt3zCD5X2eA8ZZMrb5ofNr5HyG5G4SFXrUh+Ez3b/3YXpNWv6f6rnAlk1Dg=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.0", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ABvwfaXb2xdrpbivzlPPJzIm5vXp+QlVakkaHEQf3TU6Mi/+fehH6Qhq/KMh66FDO2gq3xmxbH7nktHRQp9kNA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/docs/category-skill-guide.md
+++ b/docs/category-skill-guide.md
@@ -9,7 +9,7 @@ Instead of delegating everything to a single AI agent, it's far more efficient t
 - **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset)
 - **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows)

-By combining these two concepts, you can generate optimal agents through `delegate_task`.
+By combining these two concepts, you can generate optimal agents through `task`.

 ---

@@ -32,10 +32,10 @@ A Category is an agent configuration preset optimized for specific domains.

 ### Usage

-Specify the `category` parameter when invoking the `delegate_task` tool.
+Specify the `category` parameter when invoking the `task` tool.

 ```typescript
-delegate_task(
+task(
  category="visual-engineering",
  prompt="Add a responsive chart component to the dashboard page"
 )
@@ -74,7 +74,7 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
 Add desired skill names to the `load_skills` array.

 ```typescript
-delegate_task(
+task(
  category="quick",
  load_skills=["git-master"],
  prompt="Commit current changes. Follow commit message style."
@@ -126,7 +126,7 @@ You can create powerful specialized agents by combining Categories and Skills.

 ---

-## 5. delegate_task Prompt Guide
+## 5. task Prompt Guide

 When delegating, **clear and specific** prompts are essential. Include these 7 elements:

@@ -158,7 +158,7 @@ You can fine-tune categories in `oh-my-opencode.json`.

 | Field | Type | Description |
 |-------|------|-------------|
-| `description` | string | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
+| `description` | string | Human-readable description of the category's purpose. Shown in task prompt. |
 | `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) |
 | `variant` | string | Model variant (e.g., `max`, `xhigh`) |
 | `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. |
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -25,7 +25,7 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
    "explore": { "model": "opencode/gpt-5-nano" }        // Free model for grep
  },
  
-  // Override category models (used by delegate_task)
+  // Override category models (used by task)
  "categories": {
    "quick": { "model": "opencode/gpt-5-nano" },         // Fast/cheap for trivial tasks
    "visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
@@ -252,7 +252,7 @@ Available agents: `sisyphus`, `prometheus`, `oracle`, `librarian`, `explore`, `m
 Oh My OpenCode includes built-in skills that provide additional capabilities:

 - **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.
+- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `task(category='quick', load_skills=['git-master'], ...)` to save context.

 Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:

@@ -455,7 +455,7 @@ Run background subagents in separate tmux panes for **visual multi-agent executi
 ### How It Works

 When `tmux.enabled` is `true` and you're inside a tmux session:
- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes
+- Background agents (via `task(run_in_background=true)`) spawn in new tmux panes
 - Each pane shows the subagent's real-time output
 - Panes are automatically closed when the subagent completes
 - Layout is automatically adjusted based on your configuration
@@ -716,7 +716,7 @@ Configure concurrency limits for background agent tasks. This controls how many

 ## Categories

-Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
+Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.

 ### Built-in Categories

@@ -797,12 +797,12 @@ All 7 categories come with optimal model defaults, but **you must configure them
 ### Usage

 ```javascript
-// Via delegate_task tool
-delegate_task(category="visual-engineering", prompt="Create a responsive dashboard component")
-delegate_task(category="ultrabrain", prompt="Design the payment processing flow")
+// Via task tool
+task(category="visual-engineering", prompt="Create a responsive dashboard component")
+task(category="ultrabrain", prompt="Design the payment processing flow")

 // Or target a specific agent directly (bypasses categories)
-delegate_task(agent="oracle", prompt="Review this architecture")
+task(agent="oracle", prompt="Review this architecture")
 ```

 ### Custom Categories
@@ -831,7 +831,7 @@ Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`

 | Option             | Type    | Default | Description                                                                                         |
 | ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
-| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in delegate_task prompt.                     |
+| `description`       | string  | -       | Human-readable description of the category's purpose. Shown in task prompt.                     |
 | `is_unstable_agent`| boolean | `false`  | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |

 ## Model Resolution System
--- a/docs/features.md
+++ b/docs/features.md
@@ -54,7 +54,7 @@ Run agents in the background and continue working:

 ```
 # Launch in background
-delegate_task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)

 # Continue working...
 # System notifies on completion
@@ -374,7 +374,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
 | Hook | Event | Description |
 |------|-------|-------------|
 | **task-resume-info** | PostToolUse | Provides task resume information for continuity. |
-| **delegate-task-retry** | PostToolUse | Retries failed delegate_task calls. |
+| **delegate-task-retry** | PostToolUse | Retries failed task calls. |

 #### Integration

@@ -454,7 +454,7 @@ Disable specific hooks in config:
 | Tool | Description |
 |------|-------------|
 | **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. |
-| **delegate_task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
+| **task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
 | **background_output** | Retrieve background task results |
 | **background_cancel** | Cancel running background tasks |

--- a/docs/guide/understanding-orchestration-system.md
+++ b/docs/guide/understanding-orchestration-system.md
@@ -50,11 +50,11 @@ flowchart TB
    User -->|"/start-work"| Orchestrator
    Plan -->|"Read"| Orchestrator
    
-    Orchestrator -->|"delegate_task(category)"| Junior
-    Orchestrator -->|"delegate_task(agent)"| Oracle
-    Orchestrator -->|"delegate_task(agent)"| Explore
-    Orchestrator -->|"delegate_task(agent)"| Librarian
-    Orchestrator -->|"delegate_task(agent)"| Frontend
+    Orchestrator -->|"task(category)"| Junior
+    Orchestrator -->|"task(agent)"| Oracle
+    Orchestrator -->|"task(agent)"| Explore
+    Orchestrator -->|"task(agent)"| Librarian
+    Orchestrator -->|"task(agent)"| Frontend
    
    Junior -->|"Results + Learnings"| Orchestrator
    Oracle -->|"Advice"| Orchestrator
@@ -220,9 +220,9 @@ Independent tasks run in parallel:
 ```typescript
 // Orchestrator identifies parallelizable groups from plan
 // Group A: Tasks 2, 3, 4 (no file conflicts)
-delegate_task(category="ultrabrain", prompt="Task 2...")
-delegate_task(category="visual-engineering", prompt="Task 3...")
-delegate_task(category="general", prompt="Task 4...")
+task(category="ultrabrain", prompt="Task 2...")
+task(category="visual-engineering", prompt="Task 3...")
+task(category="general", prompt="Task 4...")
 // All run simultaneously
 ```

@@ -234,7 +234,7 @@ delegate_task(category="general", prompt="Task 4...")

 Junior is the **workhorse** that actually writes code. Key characteristics:

- **Focused**: Cannot delegate (blocked from task/delegate_task tools)
+- **Focused**: Cannot delegate (blocked from task tool)
 - **Disciplined**: Obsessive todo tracking
 - **Verified**: Must pass lsp_diagnostics before completion
 - **Constrained**: Cannot modify plan files (READ-ONLY)
@@ -268,7 +268,7 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.

 ---

-## The delegate_task Tool: Category + Skill System
+## The task Tool: Category + Skill System

 ### Why Categories are Revolutionary

@@ -276,17 +276,17 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.

 ```typescript
 // OLD: Model name creates distributional bias
-delegate_task(agent="gpt-5.2", prompt="...")  // Model knows its limitations
-delegate_task(agent="claude-opus-4.6", prompt="...")  // Different self-perception
+task(agent="gpt-5.2", prompt="...")  // Model knows its limitations
+task(agent="claude-opus-4.6", prompt="...")  // Different self-perception
 ```

 **The Solution: Semantic Categories:**

 ```typescript
 // NEW: Category describes INTENT, not implementation
-delegate_task(category="ultrabrain", prompt="...")     // "Think strategically"
-delegate_task(category="visual-engineering", prompt="...")  // "Design beautifully"
-delegate_task(category="quick", prompt="...")          // "Just get it done fast"
+task(category="ultrabrain", prompt="...")     // "Think strategically"
+task(category="visual-engineering", prompt="...")  // "Design beautifully"
+task(category="quick", prompt="...")          // "Just get it done fast"
 ```

 ### Built-in Categories
@@ -324,13 +324,13 @@ Skills prepend specialized instructions to subagent prompts:

 ```typescript
 // Category + Skill combination
-delegate_task(
+task(
  category="visual-engineering", 
  load_skills=["frontend-ui-ux"],  // Adds UI/UX expertise
  prompt="..."
 )

-delegate_task(
+task(
  category="general",
  load_skills=["playwright"],  // Adds browser automation expertise
  prompt="..."
@@ -365,7 +365,7 @@ sequenceDiagram
        
        Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
        
-        Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
+        Orchestrator->>Junior: task(category, load_skills, prompt)
        
        Junior->>Junior: Create todos, execute
        Junior->>Junior: Verify (lsp_diagnostics, tests)
--- a/docs/orchestration-guide.md
+++ b/docs/orchestration-guide.md
@@ -387,7 +387,7 @@ You can control related features in `oh-my-opencode.json`.

 2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation.

-3. **Active Delegation**: During execution, delegate to specialized agents via `delegate_task` rather than modifying code directly.
+3. **Active Delegation**: During execution, delegate to specialized agents via `task` rather than modifying code directly.

 4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json.

--- a/issue-1501-analysis.md
+++ b/issue-1501-analysis.md
@@ -288,7 +288,7 @@ src/tools/delegate-task/constants.ts
 ```
 Sisyphus (ULW mode)
  ↓
-delegate_task(category="deep", ...)
+task(category="deep", ...)
  ↓
 executor.ts: executeBackgroundContinuation()
  ↓
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -74,13 +74,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.2.4",
-    "oh-my-opencode-darwin-x64": "3.2.4",
-    "oh-my-opencode-linux-arm64": "3.2.4",
-    "oh-my-opencode-linux-arm64-musl": "3.2.4",
-    "oh-my-opencode-linux-x64": "3.2.4",
-    "oh-my-opencode-linux-x64-musl": "3.2.4",
-    "oh-my-opencode-windows-x64": "3.2.4"
+    "oh-my-opencode-darwin-arm64": "3.3.1",
+    "oh-my-opencode-darwin-x64": "3.3.1",
+    "oh-my-opencode-linux-arm64": "3.3.1",
+    "oh-my-opencode-linux-arm64-musl": "3.3.1",
+    "oh-my-opencode-linux-x64": "3.3.1",
+    "oh-my-opencode-linux-x64-musl": "3.3.1",
+    "oh-my-opencode-windows-x64": "3.3.1"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.2.4",
+  "version": "3.3.1",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1207,6 +1207,22 @@
      "created_at": "2026-02-06T06:23:24Z",
      "repoId": 1108837393,
      "pullRequestNo": 1541
+    },
+    {
+      "name": "itsnebulalol",
+      "id": 18669106,
+      "comment_id": 3864672624,
+      "created_at": "2026-02-07T15:10:54Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1622
+    },
+    {
+      "name": "mkusaka",
+      "id": 24956031,
+      "comment_id": 3864822328,
+      "created_at": "2026-02-07T16:54:36Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1629
    }
  ]
 }
--- a/sisyphus-prompt.md
+++ b/sisyphus-prompt.md
@@ -212,7 +212,7 @@ Search **external references** (docs, OSS, web). Fire proactively when unfamilia
 - "Working with unfamiliar npm/pip/cargo packages"
 ### Pre-Delegation Planning (MANDATORY)

-**BEFORE every `delegate_task` call, EXPLICITLY declare your reasoning.**
+**BEFORE every `task` call, EXPLICITLY declare your reasoning.**

 #### Step 1: Identify Task Requirements

@@ -236,7 +236,7 @@ Ask yourself:
 **MANDATORY FORMAT:**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Category**: [selected-category-name]
 - **Why this category**: [how category description matches task domain]
 - **load_skills**: [list of selected skills]
@@ -246,14 +246,14 @@ I will use delegate_task with:
 - **Expected Outcome**: [what success looks like]
 ```

-**Then** make the delegate_task call.
+**Then** make the task call.

 #### Examples

 **CORRECT: Full Evaluation**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Category**: [category-name]
 - **Why this category**: Category description says "[quote description]" which matches this task's requirements
 - **load_skills**: ["skill-a", "skill-b"]
@@ -263,9 +263,11 @@ I will use delegate_task with:
  - skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason]
 - **Expected Outcome**: [concrete deliverable]

-delegate_task(
+task(
  category="[category-name]",
  load_skills=["skill-a", "skill-b"],
+  description="[short task description]",
+  run_in_background=false,
  prompt="..."
 )
 ```
@@ -273,14 +275,16 @@ delegate_task(
 **CORRECT: Agent-Specific (for exploration/consultation)**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Agent**: [agent-name]
 - **Reason**: This requires [agent's specialty] based on agent description
 - **load_skills**: [] (agents have built-in expertise)
 - **Expected Outcome**: [what agent should return]

-delegate_task(
+task(
  subagent_type="[agent-name]",
+  description="[short task description]",
+  run_in_background=false,
  load_skills=[],
  prompt="..."
 )
@@ -289,14 +293,15 @@ delegate_task(
 **CORRECT: Background Exploration**

 ```
-I will use delegate_task with:
+I will use task with:
 - **Agent**: explore
 - **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
 - **load_skills**: []
 - **Expected Outcome**: List of files containing auth patterns

-delegate_task(
+task(
  subagent_type="explore",
+  description="Find auth implementations",
  run_in_background=true,
  load_skills=[],
  prompt="Find all authentication implementations in the codebase"
@@ -306,7 +311,7 @@ delegate_task(
 **WRONG: No Skill Evaluation**

 ```
-delegate_task(category="...", load_skills=[], prompt="...")  // Where's the justification?
+task(category="...", load_skills=[], prompt="...")  // Where's the justification?
 ```

 **WRONG: Vague Category Selection**
@@ -317,7 +322,7 @@ I'll use this category because it seems right.

 #### Enforcement

-**BLOCKING VIOLATION**: If you call `delegate_task` without:
+**BLOCKING VIOLATION**: If you call `task` without:
 1. Explaining WHY category was selected (based on description)
 2. Evaluating EACH available skill for relevance

@@ -329,15 +334,15 @@ I'll use this category because it seems right.
 ```typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
+task(subagent_type="explore", description="Find auth implementations", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+task(subagent_type="explore", description="Find error handling patterns", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
+task(subagent_type="librarian", description="Find JWT best practices", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+task(subagent_type="librarian", description="Find Express auth patterns", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
-result = delegate_task(...)  // Never wait synchronously for explore/librarian
+result = task(...)  // Never wait synchronously for explore/librarian
 ```

 ### Background Result Collection:
@@ -347,16 +352,16 @@ result = delegate_task(...)  // Never wait synchronously for explore/librarian
 4. BEFORE final answer: `background_cancel(all=true)`

 ### Resume Previous Agent (CRITICAL for efficiency):
-Pass `resume=session_id` to continue previous agent with FULL CONTEXT PRESERVED.
+Pass `session_id` to continue previous agent with FULL CONTEXT PRESERVED.

-**ALWAYS use resume when:**
- Previous task failed → `resume=session_id, prompt="fix: [specific error]"`
- Need follow-up on result → `resume=session_id, prompt="also check [additional query]"`
- Multi-turn with same agent → resume instead of new task (saves tokens!)
+**ALWAYS use session_id when:**
+- Previous task failed → `session_id="ses_xxx", prompt="fix: [specific error]"`
+- Need follow-up on result → `session_id="ses_xxx", prompt="also check [additional query]"`
+- Multi-turn with same agent → session_id instead of new task (saves tokens!)

 **Example:**
 ```
-delegate_task(resume="ses_abc123", prompt="The previous search missed X. Also look for Y.")
+task(session_id="ses_abc123", description="Follow-up search", run_in_background=false, load_skills=[], prompt="The previous search missed X. Also look for Y.")
 ```

 ### Search Stop Conditions
@@ -377,7 +382,7 @@ STOP searching when:
 3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS
 ### Category + Skills Delegation System

-**delegate_task() combines categories and skills for optimal task execution.**
+**task() combines categories and skills for optimal task execution.**

 #### Available Categories (Domain-Optimized Models)

@@ -442,7 +447,7 @@ SKILL EVALUATION for "[skill-name]":
 ### Delegation Pattern

 ```typescript
-delegate_task(
+task(
  category="[selected-category]",
  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills
  prompt="..."
@@ -451,7 +456,7 @@ delegate_task(

 **ANTI-PATTERN (will produce poor results):**
 ```typescript
-delegate_task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
+task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
 ```
 ### Delegation Table:

--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -68,11 +68,11 @@ agents/
 ## TOOL RESTRICTIONS
 | Agent | Denied Tools |
 |-------|-------------|
-| oracle | write, edit, task, delegate_task |
-| librarian | write, edit, task, delegate_task, call_omo_agent |
-| explore | write, edit, task, delegate_task, call_omo_agent |
+| oracle | write, edit, task, task |
+| librarian | write, edit, task, task, call_omo_agent |
+| explore | write, edit, task, task, call_omo_agent |
 | multimodal-looker | Allowlist: read only |
-| Sisyphus-Junior | task, delegate_task |
+| Sisyphus-Junior | task, task |
 | Atlas | task, call_omo_agent |

 ## PATTERNS
@@ -85,5 +85,5 @@ agents/
 ## ANTI-PATTERNS
 - **Trust reports**: NEVER trust "I'm done" - verify outputs
 - **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration
+- **Sequential calls**: Use `task` with `run_in_background` for exploration
 - **Prometheus writing code**: Planner only - never implements
--- a/src/agents/atlas/default.ts
+++ b/src/agents/atlas/default.ts
@@ -19,18 +19,18 @@ You never write code yourself. You orchestrate specialists who do.
 </identity>

 <mission>
-Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
+Complete ALL tasks in a work plan via \`task()\` until fully done.
 One task per delegation. Parallel when independent. Verify everything.
 </mission>

 <delegation_system>
 ## How to Delegate

-Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
+Use \`task()\` with EITHER category OR agent (mutually exclusive):

 \`\`\`typescript
 // Option A: Category + Skills (spawns Sisyphus-Junior with domain config)
-delegate_task(
+task(
  category="[category-name]",
  load_skills=["skill-1", "skill-2"],
  run_in_background=false,
@@ -38,7 +38,7 @@ delegate_task(
 )

 // Option B: Specialized Agent (for specific expert tasks)
-delegate_task(
+task(
  subagent_type="[agent-name]",
  load_skills=[],
  run_in_background=false,
@@ -58,7 +58,7 @@ delegate_task(

 ## 6-Section Prompt Structure (MANDATORY)

-Every \`delegate_task()\` prompt MUST include ALL 6 sections:
+Every \`task()\` prompt MUST include ALL 6 sections:

 \`\`\`markdown
 ## 1. TASK
@@ -149,7 +149,7 @@ Structure:
 ### 3.1 Check Parallelization
 If tasks can run in parallel:
 - Prepare prompts for ALL parallelizable tasks
- Invoke multiple \`delegate_task()\` in ONE message
+- Invoke multiple \`task()\` in ONE message
 - Wait for all to complete
 - Verify all, then continue

@@ -167,10 +167,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")

 Extract wisdom and include in prompt.

-### 3.3 Invoke delegate_task()
+### 3.3 Invoke task()

 \`\`\`typescript
-delegate_task(
+task(
  category="[category]",
  load_skills=["[relevant-skills]"],
  run_in_background=false,
@@ -210,7 +210,7 @@ delegate_task(

 **If verification fails**: Resume the SAME session with the ACTUAL error output:
 \`\`\`typescript
-delegate_task(
+task(
  session_id="ses_xyz789",  // ALWAYS use the session from the failed task
  load_skills=[...],
  prompt="Verification failed: {actual error}. Fix."
@@ -221,13 +221,13 @@ delegate_task(

 **CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**

-Every \`delegate_task()\` output includes a session_id. STORE IT.
+Every \`task()\` output includes a session_id. STORE IT.

 If task fails:
 1. Identify what went wrong
 2. **Resume the SAME session** - subagent has full context already:
    \`\`\`typescript
-    delegate_task(
+    task(
      session_id="ses_xyz789",  // Session from failed task
      load_skills=[...],
      prompt="FAILED: {error}. Fix by: {specific instruction}"
@@ -274,21 +274,21 @@ ACCUMULATED WISDOM:

 **For exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-delegate_task(subagent_type="explore", run_in_background=true, ...)
-delegate_task(subagent_type="librarian", run_in_background=true, ...)
+task(subagent_type="explore", run_in_background=true, ...)
+task(subagent_type="librarian", run_in_background=true, ...)
 \`\`\`

 **For task execution**: NEVER background
 \`\`\`typescript
-delegate_task(category="...", run_in_background=false, ...)
+task(category="...", run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
 \`\`\`typescript
 // Tasks 2, 3, 4 are independent - invoke together
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
 \`\`\`

 **Background management**:
--- a/src/agents/atlas/gpt.ts
+++ b/src/agents/atlas/gpt.ts
@@ -24,7 +24,7 @@ You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
 </identity>

 <mission>
-Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
+Complete ALL tasks in a work plan via \`task()\` until fully done.
 - One task per delegation
 - Parallel when independent
 - Verify everything
@@ -71,14 +71,14 @@ Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
 <delegation_system>
 ## Delegation API

-Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
+Use \`task()\` with EITHER category OR agent (mutually exclusive):

 \`\`\`typescript
 // Category + Skills (spawns Sisyphus-Junior)
-delegate_task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
+task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")

 // Specialized Agent
-delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
+task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
 \`\`\`

 {CATEGORY_SECTION}
@@ -93,7 +93,7 @@ delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false,

 ## 6-Section Prompt Structure (MANDATORY)

-Every \`delegate_task()\` prompt MUST include ALL 6 sections:
+Every \`task()\` prompt MUST include ALL 6 sections:

 \`\`\`markdown
 ## 1. TASK
@@ -166,7 +166,7 @@ Structure: learnings.md, decisions.md, issues.md, problems.md
 ## Step 3: Execute Tasks

 ### 3.1 Parallelization Check
- Parallel tasks → invoke multiple \`delegate_task()\` in ONE message
+- Parallel tasks → invoke multiple \`task()\` in ONE message
 - Sequential → process one at a time

 ### 3.2 Pre-Delegation (MANDATORY)
@@ -176,10 +176,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
 \`\`\`
 Extract wisdom → include in prompt.

-### 3.3 Invoke delegate_task()
+### 3.3 Invoke task()

 \`\`\`typescript
-delegate_task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
+task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
 \`\`\`

 ### 3.4 Verify (PROJECT-LEVEL QA)
@@ -201,7 +201,7 @@ Checklist:
 **CRITICAL: Use \`session_id\` for retries.**

 \`\`\`typescript
-delegate_task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
+task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
 \`\`\`

 - Maximum 3 retries per task
@@ -231,18 +231,18 @@ ACCUMULATED WISDOM: [from notepad]
 <parallel_execution>
 **Exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-delegate_task(subagent_type="explore", run_in_background=true, ...)
+task(subagent_type="explore", run_in_background=true, ...)
 \`\`\`

 **Task execution**: NEVER background
 \`\`\`typescript
-delegate_task(category="...", run_in_background=false, ...)
+task(category="...", run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
 \`\`\`typescript
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
 \`\`\`

 **Background management**:
--- a/src/agents/atlas/index.ts
+++ b/src/agents/atlas/index.ts
@@ -1,7 +1,7 @@
 /**
 * Atlas - Master Orchestrator Agent
 *
- * Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done.
+ * Orchestrates work via task() to complete ALL tasks in a todo list until fully done.
 * You are the conductor of a symphony of specialized agents.
 *
 * Routing:
@@ -111,7 +111,7 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {

  const baseConfig = {
    description:
-      "Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
+      "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
    mode: MODE,
    ...(ctx.model ? { model: ctx.model } : {}),
    temperature: 0.1,
--- a/src/agents/atlas/utils.ts
+++ b/src/agents/atlas/utils.ts
@@ -8,21 +8,22 @@
 import type { CategoryConfig } from "../../config/schema"
 import { formatCustomSkillsBlock, type AvailableAgent, type AvailableSkill } from "../dynamic-agent-prompt-builder"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../../tools/delegate-task/constants"
+import { truncateDescription } from "../../shared/truncate-description"

 export const getCategoryDescription = (name: string, userCategories?: Record<string, CategoryConfig>) =>
  userCategories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks"

 export function buildAgentSelectionSection(agents: AvailableAgent[]): string {
-  if (agents.length === 0) {
-    return `##### Option B: Use AGENT directly (for specialized experts)
+   if (agents.length === 0) {
+     return `##### Option B: Use AGENT directly (for specialized experts)

-No agents available.`
-  }
+ No agents available.`
+   }

-  const rows = agents.map((a) => {
-    const shortDesc = a.description.split(".")[0] || a.description
-    return `| \`${a.name}\` | ${shortDesc} |`
-  })
+   const rows = agents.map((a) => {
+     const shortDesc = truncateDescription(a.description)
+     return `| \`${a.name}\` | ${shortDesc} |`
+   })

  return `##### Option B: Use AGENT directly (for specialized experts)

@@ -47,7 +48,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
 ${categoryRows.join("\n")}

 \`\`\`typescript
-delegate_task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
+task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
 \`\`\``
 }

@@ -59,16 +60,16 @@ export function buildSkillsSection(skills: AvailableSkill[]): string {
  const builtinSkills = skills.filter((s) => s.location === "plugin")
  const customSkills = skills.filter((s) => s.location !== "plugin")

-  const builtinRows = builtinSkills.map((s) => {
-    const shortDesc = s.description.split(".")[0] || s.description
-    return `| \`${s.name}\` | ${shortDesc} |`
-  })
+   const builtinRows = builtinSkills.map((s) => {
+     const shortDesc = truncateDescription(s.description)
+     return `| \`${s.name}\` | ${shortDesc} |`
+   })

-  const customRows = customSkills.map((s) => {
-    const shortDesc = s.description.split(".")[0] || s.description
-    const source = s.location === "project" ? "project" : "user"
-    return `| \`${s.name}\` | ${shortDesc} | ${source} |`
-  })
+   const customRows = customSkills.map((s) => {
+     const shortDesc = truncateDescription(s.description)
+     const source = s.location === "project" ? "project" : "user"
+     return `| \`${s.name}\` | ${shortDesc} | ${source} |`
+   })

  const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills, "**")

@@ -105,7 +106,7 @@ Read each skill's description and ask: "Does this skill's domain overlap with my

 **Usage:**
 \`\`\`typescript
-delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
+task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
 \`\`\`

 **IMPORTANT:**
@@ -121,10 +122,10 @@ export function buildDecisionMatrix(agents: AvailableAgent[], userCategories?: R
    `| ${getCategoryDescription(name, userCategories)} | \`category="${name}", load_skills=[...]\` |`
  )

-  const agentRows = agents.map((a) => {
-    const shortDesc = a.description.split(".")[0] || a.description
-    return `| ${shortDesc} | \`agent="${a.name}"\` |`
-  })
+   const agentRows = agents.map((a) => {
+     const shortDesc = truncateDescription(a.description)
+     return `| ${shortDesc} | \`agent="${a.name}"\` |`
+   })

  return `##### Decision Matrix

--- a/src/agents/dynamic-agent-prompt-builder.ts
+++ b/src/agents/dynamic-agent-prompt-builder.ts
@@ -1,4 +1,5 @@
 import type { AgentPromptMetadata, BuiltinAgentName } from "./types"
+import { truncateDescription } from "../shared/truncate-description"

 export interface AvailableAgent {
  name: BuiltinAgentName
@@ -205,16 +206,16 @@ export function buildCategorySkillsDelegationGuide(categories: AvailableCategory
  const builtinSkills = skills.filter((s) => s.location === "plugin")
  const customSkills = skills.filter((s) => s.location !== "plugin")

-  const builtinRows = builtinSkills.map((s) => {
-    const desc = s.description.split(".")[0] || s.description
-    return `| \`${s.name}\` | ${desc} |`
-  })
+   const builtinRows = builtinSkills.map((s) => {
+     const desc = truncateDescription(s.description)
+     return `| \`${s.name}\` | ${desc} |`
+   })

-  const customRows = customSkills.map((s) => {
-    const desc = s.description.split(".")[0] || s.description
-    const source = s.location === "project" ? "project" : "user"
-    return `| \`${s.name}\` | ${desc} | ${source} |`
-  })
+   const customRows = customSkills.map((s) => {
+     const desc = truncateDescription(s.description)
+     const source = s.location === "project" ? "project" : "user"
+     return `| \`${s.name}\` | ${desc} | ${source} |`
+   })

  const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills)

@@ -242,7 +243,7 @@ ${builtinRows.join("\n")}`

  return `### Category + Skills Delegation System

-**delegate_task() combines categories and skills for optimal task execution.**
+**task() combines categories and skills for optimal task execution.**

 #### Available Categories (Domain-Optimized Models)

@@ -296,7 +297,7 @@ SKILL EVALUATION for "[skill-name]":
 ### Delegation Pattern

 \`\`\`typescript
-delegate_task(
+task(
  category="[selected-category]",
  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills — ESPECIALLY user-installed ones
  prompt="..."
@@ -305,7 +306,7 @@ delegate_task(

 **ANTI-PATTERN (will produce poor results):**
 \`\`\`typescript
-delegate_task(category="...", load_skills=[], run_in_background=false, prompt="...")  // Empty load_skills without justification
+task(category="...", load_skills=[], run_in_background=false, prompt="...")  // Empty load_skills without justification
 \`\`\``
 }

@@ -421,7 +422,7 @@ export function buildUltraworkSection(

    lines.push("**Agents** (for specialized consultation/exploration):")
    for (const agent of sortedAgents) {
-      const shortDesc = agent.description.split(".")[0] || agent.description
+      const shortDesc = agent.description.length > 120 ? agent.description.slice(0, 120) + "..." : agent.description
      const suffix = agent.name === "explore" || agent.name === "librarian" ? " (multiple)" : ""
      lines.push(`- \`${agent.name}${suffix}\`: ${shortDesc}`)
    }
--- a/src/agents/explore.ts
+++ b/src/agents/explore.ts
@@ -29,7 +29,7 @@ export function createExploreAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
    "call_omo_agent",
  ])

--- a/src/agents/hephaestus.ts
+++ b/src/agents/hephaestus.ts
@@ -227,8 +227,8 @@ Agent: *runs gh pr list, gh pr view, searches recent commits*

 **Delegation Check (MANDATORY before acting directly):**
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`delegate_task\` category that best describes this task? What skills are available to equip the agent with?
-   - MUST FIND skills to use: \`delegate_task(load_skills=[{skill1}, ...])\`
+2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
+   - MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
 3. Can I do it myself for the best result, FOR SURE?

 **Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**
@@ -280,15 +280,15 @@ ${librarianSection}
 // CORRECT: Always background, always parallel
 // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
 // Continue immediately - collect results when needed

 // WRONG: Sequential or blocking - NEVER DO THIS
-result = delegate_task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
 \`\`\`

 **Rules:**
@@ -393,7 +393,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 ### Session Continuity (MANDATORY)

-Every \`delegate_task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT.**

 **ALWAYS continue when:**
 | Scenario | Action |
--- a/src/agents/librarian.ts
+++ b/src/agents/librarian.ts
@@ -26,7 +26,7 @@ export function createLibrarianAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
    "call_omo_agent",
  ])

--- a/src/agents/metis.ts
+++ b/src/agents/metis.ts
@@ -307,7 +307,6 @@ const metisRestrictions = createAgentToolRestrictions([
  "write",
  "edit",
  "task",
-  "delegate_task",
 ])

 export function createMetisAgent(model: string): AgentConfig {
--- a/src/agents/momus.ts
+++ b/src/agents/momus.ts
@@ -193,7 +193,7 @@ export function createMomusAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
  ])

  const base = {
--- a/src/agents/oracle.ts
+++ b/src/agents/oracle.ts
@@ -147,7 +147,7 @@ export function createOracleAgent(model: string): AgentConfig {
    "write",
    "edit",
    "task",
-    "delegate_task",
+    "task",
  ])

  const base = {
--- a/src/agents/prometheus/high-accuracy-mode.ts
+++ b/src/agents/prometheus/high-accuracy-mode.ts
@@ -15,7 +15,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
 \`\`\`typescript
 // After generating initial plan
 while (true) {
-  const result = delegate_task(
+  const result = task(
    subagent_type="momus",
    prompt=".sisyphus/plans/{name}.md",
    run_in_background=false
--- a/src/agents/prometheus/interview-mode.ts
+++ b/src/agents/prometheus/interview-mode.ts
@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
 **Research First:**
 \`\`\`typescript
 // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
-delegate_task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
-delegate_task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -91,9 +91,9 @@ delegate_task(subagent_type="explore", prompt="I'm about to modify [affected cod
 \`\`\`typescript
 // Launch BEFORE asking user questions
 // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
-delegate_task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
-delegate_task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
+task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
 \`\`\`

 **Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js

 Run this check:
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
 \`\`\`

 #### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:

 **Research First:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
+task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
 \`\`\`

 **Oracle Consultation** (recommend when stakes are high):
 \`\`\`typescript
-delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
+task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
 \`\`\`

 **Interview Focus:**
@@ -253,9 +253,9 @@ delegate_task(subagent_type="oracle", prompt="Architecture consultation needed:

 **Parallel Investigation:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
-delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
+task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
+task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -281,17 +281,17 @@ delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested i

 **For Understanding Codebase:**
 \`\`\`typescript
-delegate_task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
+task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
 \`\`\`

 **For External Knowledge:**
 \`\`\`typescript
-delegate_task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
+task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
 \`\`\`

 **For Implementation Examples:**
 \`\`\`typescript
-delegate_task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
+task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
 \`\`\`

 ## Interview Mode Anti-Patterns
--- a/src/agents/prometheus/plan-generation.ts
+++ b/src/agents/prometheus/plan-generation.ts
@@ -59,7 +59,7 @@ todoWrite([
 **BEFORE generating the plan**, summon Metis to catch what you might have missed:

 \`\`\`typescript
-delegate_task(
+task(
  subagent_type="metis",
  prompt=\`Review this planning session before I generate the work plan:

--- a/src/agents/prometheus/plan-template.ts
+++ b/src/agents/prometheus/plan-template.ts
@@ -214,7 +214,7 @@ Parallel Speedup: ~40% faster than sequential

 | Wave | Tasks | Recommended Agents |
 |------|-------|-------------------|
-| 1 | 1, 5 | delegate_task(category="...", load_skills=[...], run_in_background=false) |
+| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
 | 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
 | 3 | 4 | final integration task |

--- a/src/agents/sisyphus-junior/default.ts
+++ b/src/agents/sisyphus-junior/default.ts
@@ -24,7 +24,6 @@ Execute tasks directly. NEVER delegate or spawn other agents.
 <Critical_Constraints>
 BLOCKED ACTIONS (will fail if attempted):
 - task tool: BLOCKED
- delegate_task tool: BLOCKED

 ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
 You work ALONE for implementation. No delegation of implementation tasks.
--- a/src/agents/sisyphus-junior/gpt.ts
+++ b/src/agents/sisyphus-junior/gpt.ts
@@ -50,7 +50,6 @@ BLOCKED (will fail if attempted):
 | Tool | Status |
 |------|--------|
 | task | BLOCKED |
-| delegate_task | BLOCKED |

 ALLOWED:
 | Tool | Usage |
--- a/src/agents/sisyphus-junior/index.test.ts
+++ b/src/agents/sisyphus-junior/index.test.ts
@@ -143,13 +143,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
    })
  })

-  describe("tool safety (task/delegate_task blocked, call_omo_agent allowed)", () => {
-    test("task and delegate_task remain blocked, call_omo_agent is allowed via tools format", () => {
+  describe("tool safety (task blocked, call_omo_agent allowed)", () => {
+    test("task remains blocked, call_omo_agent is allowed via tools format", () => {
      // given
      const override = {
        tools: {
          task: true,
-          delegate_task: true,
          call_omo_agent: true,
          read: true,
        },
@@ -163,25 +162,22 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const permission = result.permission as Record<string, string> | undefined
      if (tools) {
        expect(tools.task).toBe(false)
-        expect(tools.delegate_task).toBe(false)
        // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
        expect(tools.call_omo_agent).toBe(true)
        expect(tools.read).toBe(true)
      }
      if (permission) {
        expect(permission.task).toBe("deny")
-        expect(permission.delegate_task).toBe("deny")
        // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
        expect(permission.call_omo_agent).toBe("allow")
      }
    })

-    test("task and delegate_task remain blocked when using permission format override", () => {
+    test("task remains blocked when using permission format override", () => {
      // given
      const override = {
        permission: {
          task: "allow",
-          delegate_task: "allow",
          call_omo_agent: "allow",
          read: "allow",
        },
@@ -190,17 +186,15 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      // when
      const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0])

-      // then - task/delegate_task blocked, but call_omo_agent allowed for explore/librarian spawning
+      // then - task blocked, but call_omo_agent allowed for explore/librarian spawning
      const tools = result.tools as Record<string, boolean> | undefined
      const permission = result.permission as Record<string, string> | undefined
      if (tools) {
        expect(tools.task).toBe(false)
-        expect(tools.delegate_task).toBe(false)
        expect(tools.call_omo_agent).toBe(true)
      }
      if (permission) {
        expect(permission.task).toBe("deny")
-        expect(permission.delegate_task).toBe("deny")
        expect(permission.call_omo_agent).toBe("allow")
      }
    })
--- a/src/agents/sisyphus-junior/index.ts
+++ b/src/agents/sisyphus-junior/index.ts
@@ -28,7 +28,7 @@ const MODE: AgentMode = "subagent"

 // Core tools that Sisyphus-Junior must NEVER have access to
 // Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
-const BLOCKED_TOOLS = ["task", "delegate_task"]
+const BLOCKED_TOOLS = ["task"]

 export const SISYPHUS_JUNIOR_DEFAULTS = {
  model: "anthropic/claude-sonnet-4-5",
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -214,8 +214,8 @@ ${keyTriggers}

 **Delegation Check (MANDATORY before acting directly):**
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`delegate_task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
-  - MUST FIND skills to use, for: \`delegate_task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS DELEGATE TASK PARAMETER.
+2. If not, is there a \`task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
+  - MUST FIND skills to use, for: \`task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS TASK PARAMETER.
 3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH?

 **Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.**
@@ -277,15 +277,15 @@ ${librarianSection}
 // CORRECT: Always background, always parallel
 // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
-delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
-delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
-result = delegate_task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
 \`\`\`

 ### Background Result Collection:
@@ -340,7 +340,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 ### Session Continuity (MANDATORY)

-Every \`delegate_task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT.**

 **ALWAYS continue when:**
 | Scenario | Action |
@@ -358,10 +358,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**

 \`\`\`typescript
 // WRONG: Starting fresh loses all context
-delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Fix the type error in auth.ts...")
+task(category="quick", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix the type error in auth.ts...")

 // CORRECT: Resume preserves everything
-delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
+task(session_id="ses_abc123", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix: Type error on line 42")
 \`\`\`

 **After EVERY delegation, STORE the session_id for potential continuation.**
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -79,6 +79,72 @@ describe("createBuiltinAgents with model overrides", () => {
    }
  })

+  test("user config model takes priority over uiSelectedModel for sisyphus", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
+    )
+    const uiSelectedModel = "openai/gpt-5.2"
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        overrides,
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        undefined,
+        undefined,
+        uiSelectedModel
+      )
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
+  test("user config model takes priority over uiSelectedModel for atlas", async () => {
+    // #given
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
+    )
+    const uiSelectedModel = "openai/gpt-5.2"
+    const overrides = {
+      atlas: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents(
+        [],
+        overrides,
+        undefined,
+        TEST_DEFAULT_MODEL,
+        undefined,
+        undefined,
+        [],
+        undefined,
+        undefined,
+        uiSelectedModel
+      )
+
+      // #then
+      expect(agents.atlas).toBeDefined()
+      expect(agents.atlas.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+    }
+  })
+
  test("Sisyphus is created on first run when no availableModels or cache exist", async () => {
    // #given
    const systemDefaultModel = "anthropic/claude-opus-4-6"
@@ -422,6 +488,58 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
      cacheSpy.mockRestore()
    }
  })
+
+  test("sisyphus uses user-configured plugin model even when not in cache or fallback chain", async () => {
+    // #given - user configures a model from a plugin provider (like antigravity)
+    // that is NOT in the availableModels cache and NOT in the fallback chain
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set(["openai/gpt-5.2"])
+    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
+      ["openai"]
+    )
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
+    }
+  })
+
+  test("sisyphus uses user-configured plugin model when availableModels is empty but cache exists", async () => {
+    // #given - connected providers cache exists but models cache is empty
+    // This reproduces the exact scenario where provider-models.json has models: {}
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
+      new Set()
+    )
+    const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
+      ["google", "openai", "opencode"]
+    )
+    const overrides = {
+      sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
+    }
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.sisyphus).toBeDefined()
+      expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
+    } finally {
+      fetchSpy.mockRestore()
+      cacheSpy.mockRestore()
+    }
+  })
 })

 describe("buildAgent with category and skills", () => {
--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -304,7 +304,7 @@ export async function createBuiltinAgents(
     const isPrimaryAgent = isFactory(source) && source.mode === "primary"

    const resolution = applyModelResolution({
-      uiSelectedModel: isPrimaryAgent ? uiSelectedModel : undefined,
+      uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
      userModel: override?.model,
      requirement,
      availableModels,
@@ -356,7 +356,7 @@ export async function createBuiltinAgents(

   if (!disabledAgents.includes("sisyphus") && meetsSisyphusAnyModelRequirement) {
    let sisyphusResolution = applyModelResolution({
-      uiSelectedModel,
+      uiSelectedModel: sisyphusOverride?.model ? undefined : uiSelectedModel,
      userModel: sisyphusOverride?.model,
      requirement: sisyphusRequirement,
      availableModels,
@@ -454,7 +454,7 @@ export async function createBuiltinAgents(
      const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]

      const atlasResolution = applyModelResolution({
-        uiSelectedModel,
+        uiSelectedModel: orchestratorOverride?.model ? undefined : uiSelectedModel,
        userModel: orchestratorOverride?.model,
        requirement: atlasRequirement,
        availableModels,
--- a/src/cli/doctor/checks/gh.test.ts
+++ b/src/cli/doctor/checks/gh.test.ts
@@ -29,7 +29,7 @@ describe("gh cli check", () => {

    it("returns gh cli info structure", async () => {
      const spawnSpy = spyOn(Bun, "spawn").mockImplementation((cmd) => {
-        if (Array.isArray(cmd) && cmd[0] === "which" && cmd[1] === "gh") {
+        if (Array.isArray(cmd) && (cmd[0] === "which" || cmd[0] === "where") && cmd[1] === "gh") {
          return createProc({ stdout: "/usr/bin/gh\n" })
        }

--- a/src/cli/doctor/checks/gh.ts
+++ b/src/cli/doctor/checks/gh.ts
@@ -13,7 +13,8 @@ export interface GhCliInfo {

 async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
  try {
-    const proc = Bun.spawn(["which", binary], { stdout: "pipe", stderr: "pipe" })
+    const whichCmd = process.platform === "win32" ? "where" : "which"
+    const proc = Bun.spawn([whichCmd, binary], { stdout: "pipe", stderr: "pipe" })
    const output = await new Response(proc.stdout).text()
    await proc.exited
    if (proc.exitCode === 0) {
--- a/src/cli/index.ts
+++ b/src/cli/index.ts
@@ -64,16 +64,28 @@ Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
  })

 program
-  .command("run <message>")
-  .description("Run opencode with todo/background task completion enforcement")
+   .command("run <message>")
+   .allowUnknownOption()
+   .passThroughOptions()
+   .description("Run opencode with todo/background task completion enforcement")
  .option("-a, --agent <name>", "Agent to use (default: from CLI/env/config, fallback: Sisyphus)")
  .option("-d, --directory <path>", "Working directory")
  .option("-t, --timeout <ms>", "Timeout in milliseconds (default: 30 minutes)", parseInt)
+  .option("-p, --port <port>", "Server port (attaches if port already in use)", parseInt)
+  .option("--attach <url>", "Attach to existing opencode server URL")
+  .option("--on-complete <command>", "Shell command to run after completion")
+  .option("--json", "Output structured JSON result to stdout")
+  .option("--session-id <id>", "Resume existing session instead of creating new one")
  .addHelpText("after", `
 Examples:
  $ bunx oh-my-opencode run "Fix the bug in index.ts"
  $ bunx oh-my-opencode run --agent Sisyphus "Implement feature X"
  $ bunx oh-my-opencode run --timeout 3600000 "Large refactoring task"
+  $ bunx oh-my-opencode run --port 4321 "Fix the bug"
+  $ bunx oh-my-opencode run --attach http://127.0.0.1:4321 "Fix the bug"
+  $ bunx oh-my-opencode run --json "Fix the bug" | jq .sessionId
+  $ bunx oh-my-opencode run --on-complete "notify-send Done" "Fix the bug"
+  $ bunx oh-my-opencode run --session-id ses_abc123 "Continue the work"

 Agent resolution order:
  1) --agent flag
@@ -89,11 +101,20 @@ Unlike 'opencode run', this command waits until:
  - All child sessions (background tasks) are idle
 `)
  .action(async (message: string, options) => {
+    if (options.port && options.attach) {
+      console.error("Error: --port and --attach are mutually exclusive")
+      process.exit(1)
+    }
    const runOptions: RunOptions = {
      message,
      agent: options.agent,
      directory: options.directory,
      timeout: options.timeout,
+      port: options.port,
+      attach: options.attach,
+      onComplete: options.onComplete,
+      json: options.json ?? false,
+      sessionId: options.sessionId,
    }
    const exitCode = await run(runOptions)
    process.exit(exitCode)
--- a/src/cli/run/agent-resolver.ts
+++ b/src/cli/run/agent-resolver.ts
@@ -0,0 +1,69 @@
+import pc from "picocolors"
+import type { RunOptions } from "./types"
+import type { OhMyOpenCodeConfig } from "../../config"
+
+const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
+const DEFAULT_AGENT = "sisyphus"
+
+type EnvVars = Record<string, string | undefined>
+
+const normalizeAgentName = (agent?: string): string | undefined => {
+  if (!agent) return undefined
+  const trimmed = agent.trim()
+  if (!trimmed) return undefined
+  const lowered = trimmed.toLowerCase()
+  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
+  return coreMatch ?? trimmed
+}
+
+const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
+  const lowered = agent.toLowerCase()
+  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
+    return true
+  }
+  return (config.disabled_agents ?? []).some(
+    (disabled) => disabled.toLowerCase() === lowered
+  )
+}
+
+const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
+  for (const agent of CORE_AGENT_ORDER) {
+    if (!isAgentDisabled(agent, config)) {
+      return agent
+    }
+  }
+  return DEFAULT_AGENT
+}
+
+export const resolveRunAgent = (
+  options: RunOptions,
+  pluginConfig: OhMyOpenCodeConfig,
+  env: EnvVars = process.env
+): string => {
+  const cliAgent = normalizeAgentName(options.agent)
+  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
+  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
+  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
+  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
+
+  if (isAgentDisabled(normalized, pluginConfig)) {
+    const fallback = pickFallbackAgent(pluginConfig)
+    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
+    if (fallbackDisabled) {
+      console.log(
+        pc.yellow(
+          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
+        )
+      )
+      return fallback
+    }
+    console.log(
+      pc.yellow(
+        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
+      )
+    )
+    return fallback
+  }
+
+  return normalized
+}
--- a/src/cli/run/events.ts
+++ b/src/cli/run/events.ts
@@ -65,6 +65,8 @@ export interface EventState {
  currentTool: string | null
  /** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
  hasReceivedMeaningfulWork: boolean
+  /** Count of assistant messages for the main session */
+  messageCount: number
 }

 export function createEventState(): EventState {
@@ -76,6 +78,7 @@ export function createEventState(): EventState {
    lastPartText: "",
    currentTool: null,
    hasReceivedMeaningfulWork: false,
+    messageCount: 0,
  }
 }

@@ -266,6 +269,7 @@ function handleMessageUpdated(
  if (props?.info?.role !== "assistant") return

  state.hasReceivedMeaningfulWork = true
+  state.messageCount++
 }

 function handleToolExecute(
--- a/src/cli/run/index.ts
+++ b/src/cli/run/index.ts
@@ -1,2 +1,7 @@
 export { run } from "./runner"
-export type { RunOptions, RunContext } from "./types"
+export { resolveRunAgent } from "./agent-resolver"
+export { createServerConnection } from "./server-connection"
+export { resolveSession } from "./session-resolver"
+export { createJsonOutputManager } from "./json-output"
+export { executeOnCompleteHook } from "./on-complete-hook"
+export type { RunOptions, RunContext, RunResult, ServerConnection } from "./types"
--- a/src/cli/run/integration.test.ts
+++ b/src/cli/run/integration.test.ts
@@ -0,0 +1,294 @@
+import { describe, it, expect, mock, spyOn, beforeEach, afterEach } from "bun:test"
+import type { RunResult } from "./types"
+import { createJsonOutputManager } from "./json-output"
+import { resolveSession } from "./session-resolver"
+import { executeOnCompleteHook } from "./on-complete-hook"
+import type { OpencodeClient } from "./types"
+
+const mockServerClose = mock(() => {})
+const mockCreateOpencode = mock(() =>
+  Promise.resolve({
+    client: { session: {} },
+    server: { url: "http://127.0.0.1:9999", close: mockServerClose },
+  })
+)
+const mockCreateOpencodeClient = mock(() => ({ session: {} }))
+const mockIsPortAvailable = mock(() => Promise.resolve(true))
+const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 9999, wasAutoSelected: false }))
+
+mock.module("@opencode-ai/sdk", () => ({
+  createOpencode: mockCreateOpencode,
+  createOpencodeClient: mockCreateOpencodeClient,
+}))
+
+mock.module("../../shared/port-utils", () => ({
+  isPortAvailable: mockIsPortAvailable,
+  getAvailableServerPort: mockGetAvailableServerPort,
+  DEFAULT_SERVER_PORT: 4096,
+}))
+
+const { createServerConnection } = await import("./server-connection")
+
+interface MockWriteStream {
+  write: (chunk: string) => boolean
+  writes: string[]
+}
+
+function createMockWriteStream(): MockWriteStream {
+  const writes: string[] = []
+  return {
+    writes,
+    write: function (this: MockWriteStream, chunk: string): boolean {
+      this.writes.push(chunk)
+      return true
+    },
+  }
+}
+
+const createMockClient = (
+  getResult?: { error?: unknown; data?: { id: string } }
+): OpencodeClient => ({
+  session: {
+    get: mock((opts: { path: { id: string } }) =>
+      Promise.resolve(getResult ?? { data: { id: opts.path.id } })
+    ),
+    create: mock(() => Promise.resolve({ data: { id: "new-session-id" } })),
+  },
+} as unknown as OpencodeClient)
+
+describe("integration: --json mode", () => {
+  it("emits valid RunResult JSON to stdout", () => {
+    // given
+    const mockStdout = createMockWriteStream()
+    const mockStderr = createMockWriteStream()
+    const result: RunResult = {
+      sessionId: "test-session",
+      success: true,
+      durationMs: 1234,
+      messageCount: 42,
+      summary: "Test summary",
+    }
+    const manager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+
+    // when
+    manager.emitResult(result)
+
+    // then
+    expect(mockStdout.writes).toHaveLength(1)
+    const emitted = mockStdout.writes[0]!
+    expect(() => JSON.parse(emitted)).not.toThrow()
+    const parsed = JSON.parse(emitted) as RunResult
+    expect(parsed.sessionId).toBe("test-session")
+    expect(parsed.success).toBe(true)
+    expect(parsed.durationMs).toBe(1234)
+    expect(parsed.messageCount).toBe(42)
+    expect(parsed.summary).toBe("Test summary")
+  })
+
+  it("redirects stdout to stderr when active", () => {
+    // given
+    spyOn(console, "log").mockImplementation(() => {})
+    const mockStdout = createMockWriteStream()
+    const mockStderr = createMockWriteStream()
+    const manager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+    manager.redirectToStderr()
+
+    // when
+    mockStdout.write("should go to stderr")
+
+    // then
+    expect(mockStdout.writes).toHaveLength(0)
+    expect(mockStderr.writes).toEqual(["should go to stderr"])
+  })
+})
+
+describe("integration: --session-id", () => {
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  it("resolves provided session ID without creating new session", async () => {
+    // given
+    const sessionId = "existing-session-id"
+    const mockClient = createMockClient({ data: { id: sessionId } })
+
+    // when
+    const result = await resolveSession({ client: mockClient, sessionId })
+
+    // then
+    expect(result).toBe(sessionId)
+    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("throws when session does not exist", async () => {
+    // given
+    const sessionId = "non-existent-session-id"
+    const mockClient = createMockClient({ error: { message: "Session not found" } })
+
+    // when
+    const result = resolveSession({ client: mockClient, sessionId })
+
+    // then
+    await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+})
+
+describe("integration: --on-complete", () => {
+  let spawnSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    spyOn(console, "error").mockImplementation(() => {})
+    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+      exited: Promise.resolve(0),
+      exitCode: 0,
+    } as unknown as ReturnType<typeof Bun.spawn>)
+  })
+
+  afterEach(() => {
+    spawnSpy.mockRestore()
+  })
+
+  it("passes all 4 env vars as strings to spawned process", async () => {
+    // given
+    spawnSpy.mockClear()
+
+    // when
+    await executeOnCompleteHook({
+      command: "echo test",
+      sessionId: "session-123",
+      exitCode: 0,
+      durationMs: 5000,
+      messageCount: 10,
+    })
+
+    // then
+    expect(spawnSpy).toHaveBeenCalledTimes(1)
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(options?.env?.SESSION_ID).toBe("session-123")
+    expect(options?.env?.EXIT_CODE).toBe("0")
+    expect(options?.env?.DURATION_MS).toBe("5000")
+    expect(options?.env?.MESSAGE_COUNT).toBe("10")
+    expect(options?.env?.SESSION_ID).toBeTypeOf("string")
+    expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
+    expect(options?.env?.DURATION_MS).toBeTypeOf("string")
+    expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
+  })
+})
+
+describe("integration: option combinations", () => {
+  let mockStdout: MockWriteStream
+  let mockStderr: MockWriteStream
+  let spawnSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+    mockStdout = createMockWriteStream()
+    mockStderr = createMockWriteStream()
+    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+      exited: Promise.resolve(0),
+      exitCode: 0,
+    } as unknown as ReturnType<typeof Bun.spawn>)
+  })
+
+  afterEach(() => {
+    spawnSpy?.mockRestore?.()
+  })
+
+  it("json output and on-complete hook can both execute", async () => {
+    // given - json manager active + on-complete hook ready
+    const result: RunResult = {
+      sessionId: "session-123",
+      success: true,
+      durationMs: 5000,
+      messageCount: 10,
+      summary: "Test completed",
+    }
+    const jsonManager = createJsonOutputManager({
+      stdout: mockStdout as unknown as NodeJS.WriteStream,
+      stderr: mockStderr as unknown as NodeJS.WriteStream,
+    })
+    jsonManager.redirectToStderr()
+    spawnSpy.mockClear()
+
+    // when - both are invoked sequentially (as runner would)
+    jsonManager.emitResult(result)
+    await executeOnCompleteHook({
+      command: "echo done",
+      sessionId: result.sessionId,
+      exitCode: result.success ? 0 : 1,
+      durationMs: result.durationMs,
+      messageCount: result.messageCount,
+    })
+
+    // then - json emits result AND on-complete hook runs
+    expect(mockStdout.writes).toHaveLength(1)
+    const emitted = mockStdout.writes[0]!
+    expect(() => JSON.parse(emitted)).not.toThrow()
+    expect(spawnSpy).toHaveBeenCalledTimes(1)
+    const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(args).toEqual(["sh", "-c", "echo done"])
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    expect(options?.env?.SESSION_ID).toBe("session-123")
+    expect(options?.env?.EXIT_CODE).toBe("0")
+    expect(options?.env?.DURATION_MS).toBe("5000")
+    expect(options?.env?.MESSAGE_COUNT).toBe("10")
+  })
+})
+
+describe("integration: server connection", () => {
+  let consoleSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    consoleSpy = spyOn(console, "log").mockImplementation(() => {})
+    mockCreateOpencode.mockClear()
+    mockCreateOpencodeClient.mockClear()
+    mockServerClose.mockClear()
+  })
+
+  afterEach(() => {
+    consoleSpy.mockRestore()
+  })
+
+  it("attach mode creates client with no-op cleanup", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+
+    // then
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("port with available port starts server", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 9999
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    expect(mockCreateOpencode).toHaveBeenCalled()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+})
--- a/src/cli/run/json-output.test.ts
+++ b/src/cli/run/json-output.test.ts
@@ -0,0 +1,170 @@
+import { describe, it, expect, beforeEach } from "bun:test"
+import type { RunResult } from "./types"
+import { createJsonOutputManager } from "./json-output"
+
+interface MockWriteStream {
+  write: (chunk: string) => boolean
+  writes: string[]
+}
+
+function createMockWriteStream(): MockWriteStream {
+  const stream: MockWriteStream = {
+    writes: [],
+    write: function (this: MockWriteStream, chunk: string): boolean {
+      this.writes.push(chunk)
+      return true
+    },
+  }
+  return stream
+}
+
+describe("createJsonOutputManager", () => {
+  let mockStdout: MockWriteStream
+  let mockStderr: MockWriteStream
+
+  beforeEach(() => {
+    mockStdout = createMockWriteStream()
+    mockStderr = createMockWriteStream()
+  })
+
+  describe("redirectToStderr", () => {
+    it("causes stdout writes to go to stderr", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      mockStdout.write("test message")
+
+      // then
+      expect(mockStdout.writes).toHaveLength(0)
+      expect(mockStderr.writes).toEqual(["test message"])
+    })
+  })
+
+  describe("restore", () => {
+    it("reverses the redirect", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      manager.restore()
+      mockStdout.write("restored message")
+
+      // then
+      expect(mockStdout.writes).toEqual(["restored message"])
+      expect(mockStderr.writes).toHaveLength(0)
+    })
+  })
+
+  describe("emitResult", () => {
+    it("writes valid JSON to stdout", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 1234,
+        messageCount: 42,
+        summary: "Test summary",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      expect(mockStdout.writes).toHaveLength(1)
+      const emitted = mockStdout.writes[0]!
+      expect(() => JSON.parse(emitted)).not.toThrow()
+    })
+
+    it("output matches RunResult schema", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 1234,
+        messageCount: 42,
+        summary: "Test summary",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      const emitted = mockStdout.writes[0]!
+      const parsed = JSON.parse(emitted) as RunResult
+      expect(parsed).toEqual(result)
+      expect(parsed.sessionId).toBe("test-session")
+      expect(parsed.success).toBe(true)
+      expect(parsed.durationMs).toBe(1234)
+      expect(parsed.messageCount).toBe(42)
+      expect(parsed.summary).toBe("Test summary")
+    })
+
+    it("restores stdout even if redirect was active", () => {
+      // given
+      const result: RunResult = {
+        sessionId: "test-session",
+        success: true,
+        durationMs: 100,
+        messageCount: 1,
+        summary: "Test",
+      }
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+      manager.redirectToStderr()
+
+      // when
+      manager.emitResult(result)
+
+      // then
+      expect(mockStdout.writes).toHaveLength(1)
+      expect(mockStdout.writes[0]!).toBe(JSON.stringify(result) + "\n")
+
+      mockStdout.write("after emit")
+      expect(mockStdout.writes).toHaveLength(2)
+      expect(mockStderr.writes).toHaveLength(0)
+    })
+  })
+
+  describe("multiple redirects and restores", () => {
+    it("work correctly", () => {
+      // given
+      const manager = createJsonOutputManager({
+        stdout: mockStdout as unknown as NodeJS.WriteStream,
+        stderr: mockStderr as unknown as NodeJS.WriteStream,
+      })
+
+      // when
+      manager.redirectToStderr()
+      mockStdout.write("first redirect")
+
+      manager.redirectToStderr()
+      mockStdout.write("second redirect")
+
+      manager.restore()
+      mockStdout.write("after restore")
+
+      // then
+      expect(mockStdout.writes).toEqual(["after restore"])
+      expect(mockStderr.writes).toEqual(["first redirect", "second redirect"])
+    })
+  })
+})
--- a/src/cli/run/json-output.ts
+++ b/src/cli/run/json-output.ts
@@ -0,0 +1,52 @@
+import type { RunResult } from "./types"
+
+export interface JsonOutputManager {
+  redirectToStderr: () => void
+  restore: () => void
+  emitResult: (result: RunResult) => void
+}
+
+interface JsonOutputManagerOptions {
+  stdout?: NodeJS.WriteStream
+  stderr?: NodeJS.WriteStream
+}
+
+export function createJsonOutputManager(
+  options: JsonOutputManagerOptions = {}
+): JsonOutputManager {
+  const stdout = options.stdout ?? process.stdout
+  const stderr = options.stderr ?? process.stderr
+
+  const originalWrite = stdout.write.bind(stdout)
+
+  function redirectToStderr(): void {
+    stdout.write = function (
+      chunk: Uint8Array | string,
+      encodingOrCallback?: BufferEncoding | ((error?: Error | null) => void),
+      callback?: (error?: Error | null) => void
+    ): boolean {
+      if (typeof encodingOrCallback === "function") {
+        return stderr.write(chunk, encodingOrCallback)
+      }
+      if (encodingOrCallback !== undefined) {
+        return stderr.write(chunk, encodingOrCallback, callback)
+      }
+      return stderr.write(chunk)
+    } as NodeJS.WriteStream["write"]
+  }
+
+  function restore(): void {
+    stdout.write = originalWrite
+  }
+
+  function emitResult(result: RunResult): void {
+    restore()
+    originalWrite(JSON.stringify(result) + "\n")
+  }
+
+  return {
+    redirectToStderr,
+    restore,
+    emitResult,
+  }
+}
--- a/src/cli/run/on-complete-hook.test.ts
+++ b/src/cli/run/on-complete-hook.test.ts
@@ -0,0 +1,179 @@
+import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
+import { executeOnCompleteHook } from "./on-complete-hook"
+
+describe("executeOnCompleteHook", () => {
+  function createProc(exitCode: number) {
+    return {
+      exited: Promise.resolve(exitCode),
+      exitCode,
+    } as unknown as ReturnType<typeof Bun.spawn>
+  }
+
+  let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
+
+  beforeEach(() => {
+    consoleErrorSpy = spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  afterEach(() => {
+    consoleErrorSpy.mockRestore()
+  })
+
+  it("executes command with correct env vars", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "echo test",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).toHaveBeenCalledTimes(1)
+      const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+
+      expect(args).toEqual(["sh", "-c", "echo test"])
+      expect(options?.env?.SESSION_ID).toBe("session-123")
+      expect(options?.env?.EXIT_CODE).toBe("0")
+      expect(options?.env?.DURATION_MS).toBe("5000")
+      expect(options?.env?.MESSAGE_COUNT).toBe("10")
+      expect(options?.stdout).toBe("inherit")
+      expect(options?.stderr).toBe("inherit")
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("env var values are strings", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "echo test",
+        sessionId: "session-123",
+        exitCode: 1,
+        durationMs: 12345,
+        messageCount: 42,
+      })
+
+      // then
+      const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+
+      expect(options?.env?.EXIT_CODE).toBe("1")
+      expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
+      expect(options?.env?.DURATION_MS).toBe("12345")
+      expect(options?.env?.DURATION_MS).toBeTypeOf("string")
+      expect(options?.env?.MESSAGE_COUNT).toBe("42")
+      expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("empty command string is no-op", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).not.toHaveBeenCalled()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("whitespace-only command is no-op", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+
+    try {
+      // when
+      await executeOnCompleteHook({
+        command: "   ",
+        sessionId: "session-123",
+        exitCode: 0,
+        durationMs: 5000,
+        messageCount: 10,
+      })
+
+      // then
+      expect(spawnSpy).not.toHaveBeenCalled()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("command failure logs warning but does not throw", async () => {
+    // given
+    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
+
+    try {
+      // when
+      await expect(
+        executeOnCompleteHook({
+          command: "false",
+          sessionId: "session-123",
+          exitCode: 0,
+          durationMs: 5000,
+          messageCount: 10,
+        })
+      ).resolves.toBeUndefined()
+
+      // then
+      expect(consoleErrorSpy).toHaveBeenCalled()
+      const warningCall = consoleErrorSpy.mock.calls.find(
+        (call) => typeof call[0] === "string" && call[0].includes("Warning: on-complete hook exited with code 1")
+      )
+      expect(warningCall).toBeDefined()
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+
+  it("spawn error logs warning but does not throw", async () => {
+    // given
+    const spawnError = new Error("Command not found")
+    const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
+      throw spawnError
+    })
+
+    try {
+      // when
+      await expect(
+        executeOnCompleteHook({
+          command: "nonexistent-command",
+          sessionId: "session-123",
+          exitCode: 0,
+          durationMs: 5000,
+          messageCount: 10,
+        })
+      ).resolves.toBeUndefined()
+
+      // then
+      expect(consoleErrorSpy).toHaveBeenCalled()
+      const errorCalls = consoleErrorSpy.mock.calls.filter((call) => {
+        const firstArg = call[0]
+        return typeof firstArg === "string" && (firstArg.includes("Warning") || firstArg.toLowerCase().includes("error"))
+      })
+      expect(errorCalls.length).toBeGreaterThan(0)
+    } finally {
+      spawnSpy.mockRestore()
+    }
+  })
+})
--- a/src/cli/run/on-complete-hook.ts
+++ b/src/cli/run/on-complete-hook.ts
@@ -0,0 +1,42 @@
+import pc from "picocolors"
+
+export async function executeOnCompleteHook(options: {
+  command: string
+  sessionId: string
+  exitCode: number
+  durationMs: number
+  messageCount: number
+}): Promise<void> {
+  const { command, sessionId, exitCode, durationMs, messageCount } = options
+
+  const trimmedCommand = command.trim()
+  if (!trimmedCommand) {
+    return
+  }
+
+  console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
+
+  try {
+    const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
+      env: {
+        ...process.env,
+        SESSION_ID: sessionId,
+        EXIT_CODE: String(exitCode),
+        DURATION_MS: String(durationMs),
+        MESSAGE_COUNT: String(messageCount),
+      },
+      stdout: "inherit",
+      stderr: "inherit",
+    })
+
+    const hookExitCode = await proc.exited
+
+    if (hookExitCode !== 0) {
+      console.error(
+        pc.yellow(`Warning: on-complete hook exited with code ${hookExitCode}`)
+      )
+    }
+  } catch (error) {
+    console.error(pc.yellow(`Warning: Failed to execute on-complete hook: ${error instanceof Error ? error.message : String(error)}`))
+  }
+}
--- a/src/cli/run/runner.ts
+++ b/src/cli/run/runner.ts
@@ -1,101 +1,37 @@
-import { createOpencode } from "@opencode-ai/sdk"
 import pc from "picocolors"
 import type { RunOptions, RunContext } from "./types"
 import { checkCompletionConditions } from "./completion"
 import { createEventState, processEvents, serializeError } from "./events"
-import type { OhMyOpenCodeConfig } from "../../config"
 import { loadPluginConfig } from "../../plugin-config"
-import { getAvailableServerPort, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
+import { createServerConnection } from "./server-connection"
+import { resolveSession } from "./session-resolver"
+import { createJsonOutputManager } from "./json-output"
+import { executeOnCompleteHook } from "./on-complete-hook"
+import { resolveRunAgent } from "./agent-resolver"
+
+export { resolveRunAgent }

 const POLL_INTERVAL_MS = 500
 const DEFAULT_TIMEOUT_MS = 0
-const SESSION_CREATE_MAX_RETRIES = 3
-const SESSION_CREATE_RETRY_DELAY_MS = 1000
-const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
-const DEFAULT_AGENT = "sisyphus"
-
-type EnvVars = Record<string, string | undefined>
-
-const normalizeAgentName = (agent?: string): string | undefined => {
-  if (!agent) return undefined
-  const trimmed = agent.trim()
-  if (!trimmed) return undefined
-  const lowered = trimmed.toLowerCase()
-  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
-  return coreMatch ?? trimmed
-}
-
-const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
-  const lowered = agent.toLowerCase()
-  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
-    return true
-  }
-  return (config.disabled_agents ?? []).some(
-    (disabled) => disabled.toLowerCase() === lowered
-  )
-}
-
-const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
-  for (const agent of CORE_AGENT_ORDER) {
-    if (!isAgentDisabled(agent, config)) {
-      return agent
-    }
-  }
-  return DEFAULT_AGENT
-}
-
-export const resolveRunAgent = (
-  options: RunOptions,
-  pluginConfig: OhMyOpenCodeConfig,
-  env: EnvVars = process.env
-): string => {
-  const cliAgent = normalizeAgentName(options.agent)
-  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
-  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
-  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
-  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
-
-  if (isAgentDisabled(normalized, pluginConfig)) {
-    const fallback = pickFallbackAgent(pluginConfig)
-    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
-    if (fallbackDisabled) {
-      console.log(
-        pc.yellow(
-          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
-        )
-      )
-      return fallback
-    }
-    console.log(
-      pc.yellow(
-        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
-      )
-    )
-    return fallback
-  }
-
-  return normalized
-}

 export async function run(options: RunOptions): Promise<number> {
-  // Set CLI run mode environment variable before any config loading
-  // This signals to config-handler to deny Question tool (no TUI to answer)
  process.env.OPENCODE_CLI_RUN_MODE = "true"

+  const startTime = Date.now()
  const {
    message,
    directory = process.cwd(),
    timeout = DEFAULT_TIMEOUT_MS,
  } = options
+
+  const jsonManager = options.json ? createJsonOutputManager() : null
+  if (jsonManager) jsonManager.redirectToStderr()
+
  const pluginConfig = loadPluginConfig(directory, { command: "run" })
  const resolvedAgent = resolveRunAgent(options, pluginConfig)
-
-  console.log(pc.cyan("Starting opencode server (auto port selection enabled)..."))
-
  const abortController = new AbortController()
  let timeoutId: ReturnType<typeof setTimeout> | null = null

-  // timeout=0 means no timeout (run until completion)
  if (timeout > 0) {
    timeoutId = setTimeout(() => {
      console.log(pc.yellow("\nTimeout reached. Aborting..."))
@@ -104,29 +40,15 @@ export async function run(options: RunOptions): Promise<number> {
  }

  try {
-    const envPort = process.env.OPENCODE_SERVER_PORT
-      ? parseInt(process.env.OPENCODE_SERVER_PORT, 10)
-      : undefined
-    const serverHostname = process.env.OPENCODE_SERVER_HOSTNAME || "127.0.0.1"
-    const preferredPort = envPort && !isNaN(envPort) ? envPort : DEFAULT_SERVER_PORT
-
-    const { port: serverPort, wasAutoSelected } = await getAvailableServerPort(preferredPort, serverHostname)
-
-    if (wasAutoSelected) {
-      console.log(pc.yellow(`Port ${preferredPort} is busy, using port ${serverPort} instead`))
-    } else {
-      console.log(pc.dim(`Using port ${serverPort}`))
-    }
-
-    const { client, server } = await createOpencode({
+    const { client, cleanup: serverCleanup } = await createServerConnection({
+      port: options.port,
+      attach: options.attach,
      signal: abortController.signal,
-      port: serverPort,
-      hostname: serverHostname,
    })

    const cleanup = () => {
      if (timeoutId) clearTimeout(timeoutId)
-      server.close()
+      serverCleanup()
    }

    process.on("SIGINT", () => {
@@ -136,61 +58,14 @@ export async function run(options: RunOptions): Promise<number> {
    })

    try {
-      // Retry session creation with exponential backoff
-      // Server might not be fully ready even after "listening" message
-      let sessionID: string | undefined
-      let lastError: unknown
-
-      for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
-        const sessionRes = await client.session.create({
-          body: { title: "oh-my-opencode run" },
-        })
-
-        if (sessionRes.error) {
-          lastError = sessionRes.error
-          console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`))
-          console.error(pc.dim(`  Error: ${serializeError(sessionRes.error)}`))
-
-          if (attempt < SESSION_CREATE_MAX_RETRIES) {
-            const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
-            console.log(pc.dim(`  Retrying in ${delay}ms...`))
-            await new Promise((resolve) => setTimeout(resolve, delay))
-            continue
-          }
-        }
-
-        sessionID = sessionRes.data?.id
-        if (sessionID) {
-          break
-        }
-
-        // No error but also no session ID - unexpected response
-        lastError = new Error(`Unexpected response: ${JSON.stringify(sessionRes, null, 2)}`)
-        console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`))
-
-        if (attempt < SESSION_CREATE_MAX_RETRIES) {
-          const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
-          console.log(pc.dim(`  Retrying in ${delay}ms...`))
-          await new Promise((resolve) => setTimeout(resolve, delay))
-        }
-      }
-
-      if (!sessionID) {
-        console.error(pc.red("Failed to create session after all retries"))
-        console.error(pc.dim(`Last error: ${serializeError(lastError)}`))
-        cleanup()
-        return 1
-      }
+      const sessionID = await resolveSession({
+        client,
+        sessionId: options.sessionId,
+      })

      console.log(pc.dim(`Session: ${sessionID}`))

-      const ctx: RunContext = {
-        client,
-        sessionID,
-        directory,
-        abortController,
-      }
-
+      const ctx: RunContext = { client, sessionID, directory, abortController }
      const events = await client.event.subscribe()
      const eventState = createEventState()
      const eventProcessor = processEvents(ctx, events.stream, eventState)
@@ -206,47 +81,41 @@ export async function run(options: RunOptions): Promise<number> {
      })

      console.log(pc.dim("Waiting for completion...\n"))
-
-      while (!abortController.signal.aborted) {
-        await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
-
-        if (!eventState.mainSessionIdle) {
-          continue
-        }
-
-        // Check if session errored - exit with failure if so
-        if (eventState.mainSessionError) {
-          console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
-          console.error(pc.yellow("Check if todos were completed before the error."))
-          cleanup()
-          process.exit(1)
-        }
-
-        // Guard against premature completion: don't check completion until the
-        // session has produced meaningful work (text output, tool call, or tool result).
-        // Without this, a session that goes busy->idle before the LLM responds
-        // would exit immediately because 0 todos + 0 children = "complete".
-        if (!eventState.hasReceivedMeaningfulWork) {
-          continue
-        }
-
-        const shouldExit = await checkCompletionConditions(ctx)
-        if (shouldExit) {
-          console.log(pc.green("\n\nAll tasks completed."))
-          cleanup()
-          process.exit(0)
-        }
-      }
+      const exitCode = await pollForCompletion(ctx, eventState, abortController)

      await eventProcessor.catch(() => {})
      cleanup()
-      return 130
+
+      const durationMs = Date.now() - startTime
+
+      if (options.onComplete) {
+        await executeOnCompleteHook({
+          command: options.onComplete,
+          sessionId: sessionID,
+          exitCode,
+          durationMs,
+          messageCount: eventState.messageCount,
+        })
+      }
+
+      if (jsonManager) {
+        jsonManager.emitResult({
+          sessionId: sessionID,
+          success: exitCode === 0,
+          durationMs,
+          messageCount: eventState.messageCount,
+          summary: eventState.lastPartText.slice(0, 200) || "Run completed",
+        })
+      }
+
+      return exitCode
    } catch (err) {
      cleanup()
      throw err
    }
  } catch (err) {
    if (timeoutId) clearTimeout(timeoutId)
+    if (jsonManager) jsonManager.restore()
    if (err instanceof Error && err.name === "AbortError") {
      return 130
    }
@@ -254,3 +123,31 @@ export async function run(options: RunOptions): Promise<number> {
    return 1
  }
 }
+
+async function pollForCompletion(
+  ctx: RunContext,
+  eventState: ReturnType<typeof createEventState>,
+  abortController: AbortController
+): Promise<number> {
+  while (!abortController.signal.aborted) {
+    await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
+
+    if (!eventState.mainSessionIdle) continue
+
+    if (eventState.mainSessionError) {
+      console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
+      console.error(pc.yellow("Check if todos were completed before the error."))
+      return 1
+    }
+
+    if (!eventState.hasReceivedMeaningfulWork) continue
+
+    const shouldExit = await checkCompletionConditions(ctx)
+    if (shouldExit) {
+      console.log(pc.green("\n\nAll tasks completed."))
+      return 0
+    }
+  }
+
+  return 130
+}
--- a/src/cli/run/server-connection.test.ts
+++ b/src/cli/run/server-connection.test.ts
@@ -0,0 +1,152 @@
+import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test"
+
+const originalConsole = globalThis.console
+
+const mockServerClose = mock(() => {})
+const mockCreateOpencode = mock(() =>
+  Promise.resolve({
+    client: { session: {} },
+    server: { url: "http://127.0.0.1:4096", close: mockServerClose },
+  })
+)
+const mockCreateOpencodeClient = mock(() => ({ session: {} }))
+const mockIsPortAvailable = mock(() => Promise.resolve(true))
+const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 4096, wasAutoSelected: false }))
+const mockConsoleLog = mock(() => {})
+
+mock.module("@opencode-ai/sdk", () => ({
+  createOpencode: mockCreateOpencode,
+  createOpencodeClient: mockCreateOpencodeClient,
+}))
+
+mock.module("../../shared/port-utils", () => ({
+  isPortAvailable: mockIsPortAvailable,
+  getAvailableServerPort: mockGetAvailableServerPort,
+  DEFAULT_SERVER_PORT: 4096,
+}))
+
+const { createServerConnection } = await import("./server-connection")
+
+describe("createServerConnection", () => {
+  beforeEach(() => {
+    mockCreateOpencode.mockClear()
+    mockCreateOpencodeClient.mockClear()
+    mockIsPortAvailable.mockClear()
+    mockGetAvailableServerPort.mockClear()
+    mockServerClose.mockClear()
+    mockConsoleLog.mockClear()
+    globalThis.console = { ...console, log: mockConsoleLog } as typeof console
+  })
+
+  afterEach(() => {
+    globalThis.console = originalConsole
+  })
+
+  it("attach mode returns client with no-op cleanup", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+
+    // then
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("explicit port starts server when port is available", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 8080
+    mockIsPortAvailable.mockResolvedValueOnce(true)
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 8080, hostname: "127.0.0.1" })
+    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+
+  it("explicit port attaches when port is occupied", async () => {
+    // given
+    const signal = new AbortController().signal
+    const port = 8080
+    mockIsPortAvailable.mockResolvedValueOnce(false)
+
+    // when
+    const result = await createServerConnection({ port, signal })
+
+    // then
+    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockCreateOpencode).not.toHaveBeenCalled()
+    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: "http://127.0.0.1:8080" })
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+
+  it("auto mode uses getAvailableServerPort", async () => {
+    // given
+    const signal = new AbortController().signal
+    mockGetAvailableServerPort.mockResolvedValueOnce({ port: 4100, wasAutoSelected: true })
+
+    // when
+    const result = await createServerConnection({ signal })
+
+    // then
+    expect(mockGetAvailableServerPort).toHaveBeenCalledWith(4096, "127.0.0.1")
+    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 4100, hostname: "127.0.0.1" })
+    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
+    expect(result.client).toBeDefined()
+    expect(result.cleanup).toBeDefined()
+    result.cleanup()
+    expect(mockServerClose).toHaveBeenCalled()
+  })
+
+  it("invalid port throws error", async () => {
+    // given
+    const signal = new AbortController().signal
+
+    // when & then
+    await expect(createServerConnection({ port: 0, signal })).rejects.toThrow("Port must be between 1 and 65535")
+    await expect(createServerConnection({ port: -1, signal })).rejects.toThrow("Port must be between 1 and 65535")
+    await expect(createServerConnection({ port: 99999, signal })).rejects.toThrow("Port must be between 1 and 65535")
+  })
+
+  it("cleanup calls server.close for owned server", async () => {
+    // given
+    const signal = new AbortController().signal
+    mockIsPortAvailable.mockResolvedValueOnce(true)
+
+    // when
+    const result = await createServerConnection({ port: 8080, signal })
+    result.cleanup()
+
+    // then
+    expect(mockServerClose).toHaveBeenCalledTimes(1)
+  })
+
+  it("cleanup is no-op for attached server", async () => {
+    // given
+    const signal = new AbortController().signal
+    const attachUrl = "http://localhost:8080"
+
+    // when
+    const result = await createServerConnection({ attach: attachUrl, signal })
+    result.cleanup()
+
+    // then
+    expect(mockServerClose).not.toHaveBeenCalled()
+  })
+})
--- a/src/cli/run/server-connection.ts
+++ b/src/cli/run/server-connection.ts
@@ -0,0 +1,47 @@
+import { createOpencode, createOpencodeClient } from "@opencode-ai/sdk"
+import pc from "picocolors"
+import type { ServerConnection } from "./types"
+import { getAvailableServerPort, isPortAvailable, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
+
+export async function createServerConnection(options: {
+  port?: number
+  attach?: string
+  signal: AbortSignal
+}): Promise<ServerConnection> {
+  const { port, attach, signal } = options
+
+  if (attach !== undefined) {
+    console.log(pc.dim("Attaching to existing server at"), pc.cyan(attach))
+    const client = createOpencodeClient({ baseUrl: attach })
+    return { client, cleanup: () => {} }
+  }
+
+  if (port !== undefined) {
+    if (port < 1 || port > 65535) {
+      throw new Error("Port must be between 1 and 65535")
+    }
+
+    const available = await isPortAvailable(port, "127.0.0.1")
+
+    if (available) {
+      console.log(pc.dim("Starting server on port"), pc.cyan(port.toString()))
+      const { client, server } = await createOpencode({ signal, port, hostname: "127.0.0.1" })
+      console.log(pc.dim("Server listening at"), pc.cyan(server.url))
+      return { client, cleanup: () => server.close() }
+    }
+
+    console.log(pc.dim("Port"), pc.cyan(port.toString()), pc.dim("is occupied, attaching to existing server"))
+    const client = createOpencodeClient({ baseUrl: `http://127.0.0.1:${port}` })
+    return { client, cleanup: () => {} }
+  }
+
+  const { port: selectedPort, wasAutoSelected } = await getAvailableServerPort(DEFAULT_SERVER_PORT, "127.0.0.1")
+  if (wasAutoSelected) {
+    console.log(pc.dim("Auto-selected port"), pc.cyan(selectedPort.toString()))
+  } else {
+    console.log(pc.dim("Starting server on port"), pc.cyan(selectedPort.toString()))
+  }
+  const { client, server } = await createOpencode({ signal, port: selectedPort, hostname: "127.0.0.1" })
+  console.log(pc.dim("Server listening at"), pc.cyan(server.url))
+  return { client, cleanup: () => server.close() }
+}
--- a/src/cli/run/session-resolver.test.ts
+++ b/src/cli/run/session-resolver.test.ts
@@ -0,0 +1,140 @@
+import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
+import { resolveSession } from "./session-resolver"
+import type { OpencodeClient } from "./types"
+
+const createMockClient = (overrides: {
+  getResult?: { error?: unknown; data?: { id: string } }
+  createResults?: Array<{ error?: unknown; data?: { id: string } }>
+} = {}): OpencodeClient => {
+  const { getResult, createResults = [] } = overrides
+  let createCallIndex = 0
+  return {
+    session: {
+      get: mock((opts: { path: { id: string } }) =>
+        Promise.resolve(getResult ?? { data: { id: opts.path.id } })
+      ),
+      create: mock(() => {
+        const result =
+          createResults[createCallIndex] ?? { data: { id: "new-session-id" } }
+        createCallIndex++
+        return Promise.resolve(result)
+      }),
+    },
+  } as unknown as OpencodeClient
+}
+
+describe("resolveSession", () => {
+  beforeEach(() => {
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+  })
+
+  it("returns provided session ID when session exists", async () => {
+    // given
+    const sessionId = "existing-session-id"
+    const mockClient = createMockClient({
+      getResult: { data: { id: sessionId } },
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient, sessionId })
+
+    // then
+    expect(result).toBe(sessionId)
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+    })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("throws error when provided session ID not found", async () => {
+    // given
+    const sessionId = "non-existent-session-id"
+    const mockClient = createMockClient({
+      getResult: { error: { message: "Session not found" } },
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient, sessionId })
+
+    // then
+    await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+    })
+    expect(mockClient.session.create).not.toHaveBeenCalled()
+  })
+
+  it("creates new session when no session ID provided", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [{ data: { id: "new-session-id" } }],
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient })
+
+    // then
+    expect(result).toBe("new-session-id")
+    expect(mockClient.session.create).toHaveBeenCalledWith({
+      body: { title: "oh-my-opencode run" },
+    })
+    expect(mockClient.session.get).not.toHaveBeenCalled()
+  })
+
+  it("retries session creation on failure", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { error: { message: "Network error" } },
+        { data: { id: "retried-session-id" } },
+      ],
+    })
+
+    // when
+    const result = await resolveSession({ client: mockClient })
+
+    // then
+    expect(result).toBe("retried-session-id")
+    expect(mockClient.session.create).toHaveBeenCalledTimes(2)
+    expect(mockClient.session.create).toHaveBeenCalledWith({
+      body: { title: "oh-my-opencode run" },
+    })
+  })
+
+  it("throws after all retries exhausted", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { error: { message: "Error 1" } },
+        { error: { message: "Error 2" } },
+        { error: { message: "Error 3" } },
+      ],
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient })
+
+    // then
+    await expect(result).rejects.toThrow("Failed to create session after all retries")
+    expect(mockClient.session.create).toHaveBeenCalledTimes(3)
+  })
+
+  it("session creation returns no ID", async () => {
+    // given
+    const mockClient = createMockClient({
+      createResults: [
+        { data: undefined },
+        { data: undefined },
+        { data: undefined },
+      ],
+    })
+
+    // when
+    const result = resolveSession({ client: mockClient })
+
+    // then
+    await expect(result).rejects.toThrow("Failed to create session after all retries")
+    expect(mockClient.session.create).toHaveBeenCalledTimes(3)
+  })
+})
--- a/src/cli/run/session-resolver.ts
+++ b/src/cli/run/session-resolver.ts
@@ -0,0 +1,64 @@
+import pc from "picocolors"
+import type { OpencodeClient } from "./types"
+import { serializeError } from "./events"
+
+const SESSION_CREATE_MAX_RETRIES = 3
+const SESSION_CREATE_RETRY_DELAY_MS = 1000
+
+export async function resolveSession(options: {
+  client: OpencodeClient
+  sessionId?: string
+}): Promise<string> {
+  const { client, sessionId } = options
+
+  if (sessionId) {
+    const res = await client.session.get({ path: { id: sessionId } })
+    if (res.error || !res.data) {
+      throw new Error(`Session not found: ${sessionId}`)
+    }
+    return sessionId
+  }
+
+  let lastError: unknown
+  for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
+    const res = await client.session.create({
+      body: { title: "oh-my-opencode run" },
+    })
+
+    if (res.error) {
+      lastError = res.error
+      console.error(
+        pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`)
+      )
+      console.error(pc.dim(`  Error: ${serializeError(res.error)}`))
+
+      if (attempt < SESSION_CREATE_MAX_RETRIES) {
+        const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
+        console.log(pc.dim(`  Retrying in ${delay}ms...`))
+        await new Promise((resolve) => setTimeout(resolve, delay))
+      }
+      continue
+    }
+
+    if (res.data?.id) {
+      return res.data.id
+    }
+
+    lastError = new Error(
+      `Unexpected response: ${JSON.stringify(res, null, 2)}`
+    )
+    console.error(
+      pc.yellow(
+        `Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`
+      )
+    )
+
+    if (attempt < SESSION_CREATE_MAX_RETRIES) {
+      const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
+      console.log(pc.dim(`  Retrying in ${delay}ms...`))
+      await new Promise((resolve) => setTimeout(resolve, delay))
+    }
+  }
+
+  throw new Error("Failed to create session after all retries")
+}
--- a/src/cli/run/types.ts
+++ b/src/cli/run/types.ts
@@ -1,10 +1,29 @@
 import type { OpencodeClient } from "@opencode-ai/sdk"
+export type { OpencodeClient }

 export interface RunOptions {
  message: string
  agent?: string
  directory?: string
  timeout?: number
+  port?: number
+  attach?: string
+  onComplete?: string
+  json?: boolean
+  sessionId?: string
+}
+
+export interface ServerConnection {
+  client: OpencodeClient
+  cleanup: () => void
+}
+
+export interface RunResult {
+  sessionId: string
+  success: boolean
+  durationMs: number
+  messageCount: number
+  summary: string
 }

 export interface RunContext {
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -5,6 +5,8 @@ import {
  BrowserAutomationProviderSchema,
  BuiltinCategoryNameSchema,
  CategoryConfigSchema,
+  ExperimentalConfigSchema,
+  GitMasterConfigSchema,
  OhMyOpenCodeConfigSchema,
 } from "./schema"

@@ -606,3 +608,128 @@ describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
    expect(result.data?.browser_automation_engine).toBeUndefined()
  })
 })
+
+describe("ExperimentalConfigSchema feature flags", () => {
+  test("accepts plugin_load_timeout_ms as number", () => {
+    //#given
+    const config = { plugin_load_timeout_ms: 5000 }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.plugin_load_timeout_ms).toBe(5000)
+    }
+  })
+
+  test("rejects plugin_load_timeout_ms below 1000", () => {
+    //#given
+    const config = { plugin_load_timeout_ms: 500 }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+
+  test("accepts safe_hook_creation as boolean", () => {
+    //#given
+    const config = { safe_hook_creation: false }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.safe_hook_creation).toBe(false)
+    }
+  })
+
+  test("both fields are optional", () => {
+    //#given
+    const config = {}
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.plugin_load_timeout_ms).toBeUndefined()
+      expect(result.data.safe_hook_creation).toBeUndefined()
+    }
+  })
+})
+
+describe("GitMasterConfigSchema", () => {
+  test("accepts boolean true for commit_footer", () => {
+    //#given
+    const config = { commit_footer: true }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(true)
+    }
+  })
+
+  test("accepts boolean false for commit_footer", () => {
+    //#given
+    const config = { commit_footer: false }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(false)
+    }
+  })
+
+  test("accepts string value for commit_footer", () => {
+    //#given
+    const config = { commit_footer: "Custom footer text" }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe("Custom footer text")
+    }
+  })
+
+  test("defaults commit_footer to true when not provided", () => {
+    //#given
+    const config = {}
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.commit_footer).toBe(true)
+    }
+  })
+
+  test("rejects number for commit_footer", () => {
+    //#given
+    const config = { commit_footer: 123 }
+
+    //#when
+    const result = GitMasterConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
+})
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -12,6 +12,7 @@ const AgentPermissionSchema = z.object({
  edit: PermissionValue.optional(),
  bash: BashPermission.optional(),
  webfetch: PermissionValue.optional(),
+  task: PermissionValue.optional(),
  doom_loop: PermissionValue.optional(),
  external_directory: PermissionValue.optional(),
 })
@@ -86,6 +87,7 @@ export const HookNameSchema = z.enum([
  "category-skill-reminder",

  "compaction-context-injector",
+  "compaction-todo-preserver",
  "claude-code-hooks",
  "auto-slash-command",
  "edit-error-recovery",
@@ -100,6 +102,7 @@ export const HookNameSchema = z.enum([
  "stop-continuation-guard",
  "tasks-todowrite-disabler",
  "write-existing-file-guard",
+  "anthropic-effort",
 ])

 export const BuiltinCommandNameSchema = z.enum([
@@ -183,7 +186,7 @@ export const SisyphusAgentConfigSchema = z.object({
 })

 export const CategoryConfigSchema = z.object({
-  /** Human-readable description of the category's purpose. Shown in delegate_task prompt. */
+  /** Human-readable description of the category's purpose. Shown in task prompt. */
  description: z.string().optional(),
  model: z.string().optional(),
  variant: z.string().optional(),
@@ -266,6 +269,10 @@ export const ExperimentalConfigSchema = z.object({
  dynamic_context_pruning: DynamicContextPruningConfigSchema.optional(),
  /** Enable experimental task system for Todowrite disabler hook */
  task_system: z.boolean().optional(),
+  /** Timeout in ms for loadAllPluginComponents during config handler init (default: 10000, min: 1000) */
+  plugin_load_timeout_ms: z.number().min(1000).optional(),
+  /** Wrap hook creation in try/catch to prevent one failing hook from crashing the plugin (default: true at call site) */
+  safe_hook_creation: z.boolean().optional(),
 })

 export const SkillSourceSchema = z.union([
@@ -333,10 +340,10 @@ export const BabysittingConfigSchema = z.object({
 })

 export const GitMasterConfigSchema = z.object({
-  /** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */
-  commit_footer: z.boolean().default(true),
-  /** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
-  include_co_authored_by: z.boolean().default(true),
+	/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true). Can be boolean or custom string. */
+	commit_footer: z.union([z.boolean(), z.string()]).default(true),
+	/** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
+	include_co_authored_by: z.boolean().default(true),
 })

 export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser", "dev-browser"])
@@ -420,6 +427,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
  websearch: WebsearchConfigSchema.optional(),
  tmux: TmuxConfigSchema.optional(),
  sisyphus: SisyphusConfigSchema.optional(),
+  /** Migration history to prevent re-applying migrations (e.g., model version upgrades) */
+  _migrations: z.array(z.string()).optional(),
 })

 export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -56,7 +56,7 @@ features/

 ## ANTI-PATTERNS

- **Sequential delegation**: Use `delegate_task` parallel
+- **Sequential delegation**: Use `task` parallel
 - **Trust self-reports**: ALWAYS verify
 - **Main thread blocks**: No heavy I/O in loader init
 - **Direct state mutation**: Use managers for boulder/session state
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -1,8 +1,9 @@
-import { describe, test, expect, beforeEach } from "bun:test"
-import { afterEach } from "bun:test"
+declare const require: (name: string) => any
+const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
 import { tmpdir } from "node:os"
 import type { PluginInput } from "@opencode-ai/plugin"
 import type { BackgroundTask, ResumeInput } from "./types"
+import { MIN_IDLE_TIME_MS } from "./constants"
 import { BackgroundManager } from "./manager"
 import { ConcurrencyManager } from "./concurrency"

@@ -170,6 +171,7 @@ function createBackgroundManager(): BackgroundManager {
  const client = {
    session: {
      prompt: async () => ({}),
+      promptAsync: async () => ({}),
      abort: async () => ({}),
    },
  }
@@ -879,12 +881,14 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
  test("should skip notification when parent session is aborted", async () => {
    //#given
    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      return {}
+    }
    const client = {
      session: {
-        prompt: async () => {
-          promptCalled = true
-          return {}
-        },
+        prompt: promptMock,
+        promptAsync: promptMock,
        abort: async () => ({}),
        messages: async () => {
          const error = new Error("User aborted")
@@ -921,14 +925,16 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
  test("should swallow aborted error from prompt", async () => {
    //#given
    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      const error = new Error("User aborted")
+      error.name = "MessageAbortedError"
+      throw error
+    }
    const client = {
      session: {
-        prompt: async () => {
-          promptCalled = true
-          const error = new Error("User aborted")
-          error.name = "MessageAbortedError"
-          throw error
-        },
+        prompt: promptMock,
+        promptAsync: promptMock,
        abort: async () => ({}),
        messages: async () => ({ data: [] }),
      },
@@ -1053,19 +1059,20 @@ describe("BackgroundManager.tryCompleteTask", () => {
    expect(concurrencyManager.getCount(concurrencyKey)).toBe(0)
  })

-  test("should abort session on completion", async () => {
-    // #given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-        messages: async () => ({ data: [] }),
-      },
-    }
+   test("should abort session on completion", async () => {
+     // #given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+         messages: async () => ({ data: [] }),
+       },
+     }
    manager.shutdown()
    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
    stubNotifyParentSession(manager)
@@ -1088,6 +1095,34 @@ describe("BackgroundManager.tryCompleteTask", () => {
    // #then
    expect(abortedSessionIDs).toEqual(["session-1"])
  })
+
+  test("should clean pendingByParent even when notifyParentSession throws", async () => {
+    // given
+    ;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {
+      throw new Error("notify failed")
+    }
+
+    const task: BackgroundTask = {
+      id: "task-pending-cleanup",
+      sessionID: "session-pending-cleanup",
+      parentSessionID: "parent-pending-cleanup",
+      parentMessageID: "msg-1",
+      description: "pending cleanup task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(),
+    }
+    getTaskMap(manager).set(task.id, task)
+    getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
+
+    // when
+    await tryCompleteTaskForTest(manager, task)
+
+    // then
+    expect(task.status).toBe("completed")
+    expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
+  })
 })

 describe("BackgroundManager.trackTask", () => {
@@ -1110,7 +1145,7 @@ describe("BackgroundManager.trackTask", () => {
      sessionID: "session-1",
      parentSessionID: "parent-session",
      description: "external task",
-      agent: "delegate_task",
+      agent: "task",
      concurrencyKey: "external-key",
    }

@@ -1145,7 +1180,7 @@ describe("BackgroundManager.resume concurrency key", () => {
      sessionID: "session-1",
      parentSessionID: "parent-session",
      description: "external task",
-      agent: "delegate_task",
+      agent: "task",
      concurrencyKey: "external-key",
    })

@@ -1167,24 +1202,26 @@ describe("BackgroundManager.resume concurrency key", () => {
 })

 describe("BackgroundManager.resume model persistence", () => {
-  let manager: BackgroundManager
-  let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>
+   let manager: BackgroundManager
+   let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>

-  beforeEach(() => {
-    // given
-    promptCalls = []
-    const client = {
-      session: {
-        prompt: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
-          promptCalls.push(args)
-          return {}
-        },
-        abort: async () => ({}),
-      },
-    }
-    manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
-    stubNotifyParentSession(manager)
-  })
+   beforeEach(() => {
+     // given
+     promptCalls = []
+     const promptMock = async (args: { path: { id: string }; body: Record<string, unknown> }) => {
+       promptCalls.push(args)
+       return {}
+     }
+     const client = {
+       session: {
+         prompt: promptMock,
+         promptAsync: promptMock,
+         abort: async () => ({}),
+       },
+     }
+     manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+     stubNotifyParentSession(manager)
+   })

  afterEach(() => {
    manager.shutdown()
@@ -1282,19 +1319,20 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
  let manager: BackgroundManager
  let mockClient: ReturnType<typeof createMockClient>

-  function createMockClient() {
-    return {
-      session: {
-        create: async () => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
-        get: async () => ({ data: { directory: "/test/dir" } }),
-        prompt: async () => ({}),
-        messages: async () => ({ data: [] }),
-        todo: async () => ({ data: [] }),
-        status: async () => ({ data: {} }),
-        abort: async () => ({}),
-      },
-    }
-  }
+   function createMockClient() {
+     return {
+       session: {
+         create: async () => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
+         get: async () => ({ data: { directory: "/test/dir" } }),
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         messages: async () => ({ data: [] }),
+         todo: async () => ({ data: [] }),
+         status: async () => ({ data: {} }),
+         abort: async () => ({}),
+       },
+     }
+   }

  beforeEach(() => {
    // given
@@ -1842,13 +1880,14 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
 })

 describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
-  test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })

    const task: BackgroundTask = {
@@ -1874,12 +1913,13 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("running")
  })

-  test("should NOT interrupt task with recent lastUpdate", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
+   test("should NOT interrupt task with recent lastUpdate", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
    }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })

@@ -1906,11 +1946,12 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("running")
  })

-  test("should interrupt task with stale lastUpdate (> 3min)", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
+   test("should interrupt task with stale lastUpdate (> 3min)", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
      },
    }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
@@ -1942,10 +1983,11 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.completedAt).toBeDefined()
  })

-  test("should respect custom staleTimeoutMs config", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
+   test("should respect custom staleTimeoutMs config", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
        abort: async () => ({}),
      },
    }
@@ -1976,13 +2018,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.error).toContain("Stale timeout")
  })

-  test("should release concurrency before abort", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should release concurrency before abort", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
    stubNotifyParentSession(manager)

@@ -2011,13 +2054,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("cancelled")
  })

-  test("should handle multiple stale tasks in same poll cycle", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should handle multiple stale tasks in same poll cycle", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
    stubNotifyParentSession(manager)

@@ -2062,13 +2106,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
    expect(task2.status).toBe("cancelled")
  })

-  test("should use default timeout when config not provided", async () => {
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should use default timeout when config not provided", async () => {
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
    stubNotifyParentSession(manager)

@@ -2097,18 +2142,19 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
 })

 describe("BackgroundManager.shutdown session abort", () => {
-  test("should call session.abort for all running tasks during shutdown", () => {
-    // given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-      },
-    }
+   test("should call session.abort for all running tasks during shutdown", () => {
+     // given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)

    const task1: BackgroundTask = {
@@ -2146,18 +2192,19 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(abortedSessionIDs).toHaveLength(2)
  })

-  test("should not call session.abort for completed or cancelled tasks", () => {
-    // given
-    const abortedSessionIDs: string[] = []
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async (args: { path: { id: string } }) => {
-          abortedSessionIDs.push(args.path.id)
-          return {}
-        },
-      },
-    }
+   test("should not call session.abort for completed or cancelled tasks", () => {
+     // given
+     const abortedSessionIDs: string[] = []
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async (args: { path: { id: string } }) => {
+           abortedSessionIDs.push(args.path.id)
+           return {}
+         },
+       },
+     }
    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)

    const completedTask: BackgroundTask = {
@@ -2206,15 +2253,16 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(abortedSessionIDs).toHaveLength(0)
  })

-  test("should call onShutdown callback during shutdown", () => {
-    // given
-    let shutdownCalled = false
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should call onShutdown callback during shutdown", () => {
+     // given
+     let shutdownCalled = false
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager(
      { client, directory: tmpdir() } as unknown as PluginInput,
      undefined,
@@ -2232,14 +2280,15 @@ describe("BackgroundManager.shutdown session abort", () => {
    expect(shutdownCalled).toBe(true)
  })

-  test("should not throw when onShutdown callback throws", () => {
-    // given
-    const client = {
-      session: {
-        prompt: async () => ({}),
-        abort: async () => ({}),
-      },
-    }
+   test("should not throw when onShutdown callback throws", () => {
+     // given
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+       },
+     }
    const manager = new BackgroundManager(
      { client, directory: tmpdir() } as unknown as PluginInput,
      undefined,
@@ -2255,6 +2304,69 @@ describe("BackgroundManager.shutdown session abort", () => {
  })
 })

+describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
+  test("should cancel descendant tasks when parent session is deleted", () => {
+    // given
+    const manager = createBackgroundManager()
+    const parentSessionID = "session-parent"
+    const childTask = createMockTask({
+      id: "task-child",
+      sessionID: "session-child",
+      parentSessionID,
+      status: "running",
+    })
+    const siblingTask = createMockTask({
+      id: "task-sibling",
+      sessionID: "session-sibling",
+      parentSessionID,
+      status: "running",
+    })
+    const grandchildTask = createMockTask({
+      id: "task-grandchild",
+      sessionID: "session-grandchild",
+      parentSessionID: "session-child",
+      status: "pending",
+      startedAt: undefined,
+      queuedAt: new Date(),
+    })
+    const unrelatedTask = createMockTask({
+      id: "task-unrelated",
+      sessionID: "session-unrelated",
+      parentSessionID: "other-parent",
+      status: "running",
+    })
+
+    const taskMap = getTaskMap(manager)
+    taskMap.set(childTask.id, childTask)
+    taskMap.set(siblingTask.id, siblingTask)
+    taskMap.set(grandchildTask.id, grandchildTask)
+    taskMap.set(unrelatedTask.id, unrelatedTask)
+
+    const pendingByParent = getPendingByParent(manager)
+    pendingByParent.set(parentSessionID, new Set([childTask.id, siblingTask.id]))
+    pendingByParent.set("session-child", new Set([grandchildTask.id]))
+
+    // when
+    manager.handleEvent({
+      type: "session.deleted",
+      properties: { info: { id: parentSessionID } },
+    })
+
+    // then
+    expect(taskMap.has(childTask.id)).toBe(false)
+    expect(taskMap.has(siblingTask.id)).toBe(false)
+    expect(taskMap.has(grandchildTask.id)).toBe(false)
+    expect(taskMap.has(unrelatedTask.id)).toBe(true)
+    expect(childTask.status).toBe("cancelled")
+    expect(siblingTask.status).toBe("cancelled")
+    expect(grandchildTask.status).toBe("cancelled")
+    expect(pendingByParent.get(parentSessionID)).toBeUndefined()
+    expect(pendingByParent.get("session-child")).toBeUndefined()
+
+    manager.shutdown()
+  })
+})
+
 describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
  function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
    return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
@@ -2408,3 +2520,182 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
    expect(completionTimers.size).toBe(0)
  })
 })
+
+describe("BackgroundManager.handleEvent - early session.idle deferral", () => {
+  test("should defer and retry when session.idle fires before MIN_IDLE_TIME_MS", async () => {
+    //#given - a running task started less than MIN_IDLE_TIME_MS ago
+    const sessionID = "session-early-idle"
+    const messagesCalls: string[] = []
+    const realDateNow = Date.now
+    const baseNow = realDateNow()
+
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async (args: { path: { id: string } }) => {
+           messagesCalls.push(args.path.id)
+           return {
+             data: [
+               {
+                 info: { role: "assistant" },
+                 parts: [{ type: "text", text: "ok" }],
+               },
+             ],
+          }
+        },
+        todo: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const remainingMs = 1200
+    const task: BackgroundTask = {
+      id: "task-early-idle",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "early idle task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(baseNow),
+    }
+
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - session.idle fires
+    try {
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
+      manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+
+      // Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
+
+      //#then - idle should be deferred (not dropped), and task should eventually complete
+      expect(task.status).toBe("running")
+      await new Promise((resolve) => setTimeout(resolve, 220))
+      expect(task.status).toBe("completed")
+      expect(messagesCalls).toEqual([sessionID])
+    } finally {
+      Date.now = realDateNow
+      manager.shutdown()
+    }
+  })
+
+  test("should not defer when session.idle fires after MIN_IDLE_TIME_MS", async () => {
+     //#given - a running task started more than MIN_IDLE_TIME_MS ago
+     const sessionID = "session-late-idle"
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async () => ({
+           data: [
+             {
+               info: { role: "assistant" },
+               parts: [{ type: "text", text: "ok" }],
+             },
+           ],
+         }),
+         todo: async () => ({ data: [] }),
+       },
+     }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const task: BackgroundTask = {
+      id: "task-late-idle",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "late idle task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 10)),
+    }
+
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+
+    //#then - should be processed immediately
+    await new Promise((resolve) => setTimeout(resolve, 10))
+    expect(task.status).toBe("completed")
+
+    manager.shutdown()
+  })
+
+  test("should not process deferred idle if task already completed by other means", async () => {
+    //#given - a running task
+    const sessionID = "session-deferred-noop"
+    let messagesCallCount = 0
+    const realDateNow = Date.now
+    const baseNow = realDateNow()
+
+     const client = {
+       session: {
+         prompt: async () => ({}),
+         promptAsync: async () => ({}),
+         abort: async () => ({}),
+         messages: async () => {
+           messagesCallCount += 1
+           return {
+             data: [
+               {
+                 info: { role: "assistant" },
+                 parts: [{ type: "text", text: "ok" }],
+               },
+             ],
+           }
+        },
+        todo: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    stubNotifyParentSession(manager)
+
+    const remainingMs = 120
+    const task: BackgroundTask = {
+      id: "task-deferred-noop",
+      sessionID,
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "deferred noop task",
+      prompt: "test",
+      agent: "explore",
+      status: "running",
+      startedAt: new Date(baseNow),
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - session.idle fires early, then task completes via another path before defer timer
+    try {
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
+      manager.handleEvent({ type: "session.idle", properties: { sessionID } })
+      expect(messagesCallCount).toBe(0)
+
+      await tryCompleteTaskForTest(manager, task)
+      expect(task.status).toBe("completed")
+
+      // Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
+      Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
+
+      //#then - deferred callback should be a no-op
+      await new Promise((resolve) => setTimeout(resolve, remainingMs + 80))
+      expect(task.status).toBe("completed")
+      expect(messagesCallCount).toBe(0)
+    } finally {
+      Date.now = realDateNow
+      manager.shutdown()
+    }
+  })
+})
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -88,6 +88,7 @@ export class BackgroundManager {
  private queuesByKey: Map<string, QueueItem[]> = new Map()
  private processingKeys: Set<string> = new Set()
  private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
+  private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()

  constructor(
    ctx: PluginInput,
@@ -309,7 +310,7 @@ export class BackgroundManager {
      promptLength: input.prompt.length,
    })

-    // Use prompt() instead of promptAsync() to properly initialize agent loop (fire-and-forget)
+    // Fire-and-forget prompt via promptAsync (no response body needed)
    // Include model if caller provided one (e.g., from Sisyphus category configs)
    // IMPORTANT: variant must be a top-level field in the body, NOT nested inside model
    // OpenCode's PromptInput schema expects: { model: { providerID, modelID }, variant: "max" }
@@ -328,7 +329,6 @@ export class BackgroundManager {
        tools: {
          ...getAgentToolRestrictions(input.agent),
          task: false,
-          delegate_task: false,
          call_omo_agent: true,
          question: false,
        },
@@ -357,6 +357,7 @@ export class BackgroundManager {
        }).catch(() => {})

        this.markForNotification(existingTask)
+        this.cleanupPendingByParent(existingTask)
        this.notifyParentSession(existingTask).catch(err => {
          log("[background-agent] Failed to notify on error:", err)
        })
@@ -410,7 +411,7 @@ export class BackgroundManager {
  }

  /**
-   * Track a task created elsewhere (e.g., from delegate_task) for notification tracking.
+   * Track a task created elsewhere (e.g., from task) for notification tracking.
   * This allows tasks created by other tools to receive the same toast/prompt notifications.
   */
  async trackTask(input: {
@@ -458,7 +459,7 @@ export class BackgroundManager {
      return existingTask
    }

-    const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "delegate_task"
+    const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "task"

    // Acquire concurrency slot if a key is provided
    if (input.concurrencyKey) {
@@ -472,7 +473,7 @@ export class BackgroundManager {
      parentMessageID: "",
      description: input.description,
      prompt: "",
-      agent: input.agent || "delegate_task",
+      agent: input.agent || "task",
      status: "running",
      startedAt: new Date(),
      progress: {
@@ -570,7 +571,7 @@ export class BackgroundManager {
      promptLength: input.prompt.length,
    })

-    // Use prompt() instead of promptAsync() to properly initialize agent loop
+    // Fire-and-forget prompt via promptAsync (no response body needed)
    // Include model if task has one (preserved from original launch with category config)
    // variant must be top-level in body, not nested inside model (OpenCode PromptInput schema)
    const resumeModel = existingTask.model
@@ -578,7 +579,7 @@ export class BackgroundManager {
      : undefined
    const resumeVariant = existingTask.model?.variant

-    this.client.session.prompt({
+    this.client.session.promptAsync({
      path: { id: existingTask.sessionID },
      body: {
        agent: existingTask.agent,
@@ -587,7 +588,6 @@ export class BackgroundManager {
        tools: {
          ...getAgentToolRestrictions(existingTask.agent),
          task: false,
-          delegate_task: false,
          call_omo_agent: true,
          question: false,
        },
@@ -614,6 +614,7 @@ export class BackgroundManager {
      }

      this.markForNotification(existingTask)
+      this.cleanupPendingByParent(existingTask)
      this.notifyParentSession(existingTask).catch(err => {
        log("[background-agent] Failed to notify on resume error:", err)
      })
@@ -651,6 +652,13 @@ export class BackgroundManager {
      const task = this.findBySession(sessionID)
      if (!task) return

+      // Clear any pending idle deferral timer since the task is still active
+      const existingTimer = this.idleDeferralTimers.get(task.id)
+      if (existingTimer) {
+        clearTimeout(existingTimer)
+        this.idleDeferralTimers.delete(task.id)
+      }
+
      if (partInfo?.type === "tool" || partInfo?.tool) {
        if (!task.progress) {
          task.progress = {
@@ -677,7 +685,17 @@ export class BackgroundManager {
      // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
      const elapsedMs = Date.now() - startedAt.getTime()
      if (elapsedMs < MIN_IDLE_TIME_MS) {
-        log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
+        const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
+        if (!this.idleDeferralTimers.has(task.id)) {
+          log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
+          const timer = setTimeout(() => {
+            this.idleDeferralTimers.delete(task.id)
+            this.handleEvent({ type: "session.idle", properties: { sessionID } })
+          }, remainingMs)
+          this.idleDeferralTimers.set(task.id, timer)
+        } else {
+          log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
+        }
        return
      }

@@ -718,28 +736,47 @@ export class BackgroundManager {
      if (!info || typeof info.id !== "string") return
      const sessionID = info.id

-      const task = this.findBySession(sessionID)
-      if (!task) return
-
-      if (task.status === "running") {
-        task.status = "cancelled"
-        task.completedAt = new Date()
-        task.error = "Session deleted"
+      const tasksToCancel = new Map<string, BackgroundTask>()
+      const directTask = this.findBySession(sessionID)
+      if (directTask) {
+        tasksToCancel.set(directTask.id, directTask)
+      }
+      for (const descendant of this.getAllDescendantTasks(sessionID)) {
+        tasksToCancel.set(descendant.id, descendant)
      }

-       if (task.concurrencyKey) {
-         this.concurrencyManager.release(task.concurrencyKey)
-         task.concurrencyKey = undefined
-       }
-      const existingTimer = this.completionTimers.get(task.id)
-      if (existingTimer) {
-        clearTimeout(existingTimer)
-        this.completionTimers.delete(task.id)
+      if (tasksToCancel.size === 0) return
+
+      for (const task of tasksToCancel.values()) {
+        if (task.status === "running" || task.status === "pending") {
+          void this.cancelTask(task.id, {
+            source: "session.deleted",
+            reason: "Session deleted",
+            skipNotification: true,
+          }).catch(err => {
+            log("[background-agent] Failed to cancel task on session.deleted:", { taskId: task.id, error: err })
+          })
+        }
+
+        const existingTimer = this.completionTimers.get(task.id)
+        if (existingTimer) {
+          clearTimeout(existingTimer)
+          this.completionTimers.delete(task.id)
+        }
+
+        const idleTimer = this.idleDeferralTimers.get(task.id)
+        if (idleTimer) {
+          clearTimeout(idleTimer)
+          this.idleDeferralTimers.delete(task.id)
+        }
+
+        this.cleanupPendingByParent(task)
+        this.tasks.delete(task.id)
+        this.clearNotificationsForTask(task.id)
+        if (task.sessionID) {
+          subagentSessions.delete(task.sessionID)
+        }
      }
-      this.cleanupPendingByParent(task)
-      this.tasks.delete(task.id)
-      this.clearNotificationsForTask(task.id)
-      subagentSessions.delete(sessionID)
    }
  }

@@ -890,6 +927,12 @@ export class BackgroundManager {
      this.completionTimers.delete(task.id)
    }

+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
    this.cleanupPendingByParent(task)

    if (abortSession && task.sessionID) {
@@ -1025,6 +1068,15 @@ export class BackgroundManager {

    this.markForNotification(task)

+    // Ensure pending tracking is cleaned up even if notification fails
+    this.cleanupPendingByParent(task)
+
+    const idleTimer = this.idleDeferralTimers.get(task.id)
+    if (idleTimer) {
+      clearTimeout(idleTimer)
+      this.idleDeferralTimers.delete(task.id)
+    }
+
    if (task.sessionID) {
      this.client.session.abort({
        path: { id: task.sessionID },
@@ -1146,7 +1198,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    })

    try {
-      await this.client.session.prompt({
+      await this.client.session.promptAsync({
        path: { id: task.parentSessionID },
        body: {
          noReply: !allComplete,
@@ -1511,6 +1563,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    }
    this.completionTimers.clear()

+    for (const timer of this.idleDeferralTimers.values()) {
+      clearTimeout(timer)
+    }
+    this.idleDeferralTimers.clear()
+
    this.concurrencyManager.clear()
    this.tasks.clear()
    this.notifications.clear()
--- a/src/features/background-agent/result-handler.ts
+++ b/src/features/background-agent/result-handler.ts
@@ -240,7 +240,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
  })

  try {
-    await client.session.prompt({
+    await client.session.promptAsync({
      path: { id: task.parentSessionID },
      body: {
        noReply: !allComplete,
--- a/src/features/background-agent/spawner.ts
+++ b/src/features/background-agent/spawner.ts
@@ -146,7 +146,6 @@ export async function startTask(
      tools: {
        ...getAgentToolRestrictions(input.agent),
        task: false,
-        delegate_task: false,
        call_omo_agent: true,
        question: false,
      },
@@ -222,7 +221,7 @@ export async function resumeTask(
    : undefined
  const resumeVariant = task.model?.variant

-  client.session.prompt({
+  client.session.promptAsync({
    path: { id: task.sessionID },
    body: {
      agent: task.agent,
@@ -231,7 +230,6 @@ export async function resumeTask(
      tools: {
        ...getAgentToolRestrictions(task.agent),
        task: false,
-        delegate_task: false,
        call_omo_agent: true,
        question: false,
      },
--- a/src/features/builtin-commands/templates/init-deep.ts
+++ b/src/features/builtin-commands/templates/init-deep.ts
@@ -45,12 +45,12 @@ Don't wait—these run async while main session works.

 \`\`\`
 // Fire all at once, collect results later
-delegate_task(agent="explore", prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
-delegate_task(agent="explore", prompt="Entry points: FIND main files → REPORT non-standard organization")
-delegate_task(agent="explore", prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
-delegate_task(agent="explore", prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
-delegate_task(agent="explore", prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
-delegate_task(agent="explore", prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
+task(subagent_type="explore", load_skills=[], description="Explore project structure", run_in_background=true, prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
+task(subagent_type="explore", load_skills=[], description="Find entry points", run_in_background=true, prompt="Entry points: FIND main files → REPORT non-standard organization")
+task(subagent_type="explore", load_skills=[], description="Find conventions", run_in_background=true, prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
+task(subagent_type="explore", load_skills=[], description="Find anti-patterns", run_in_background=true, prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
+task(subagent_type="explore", load_skills=[], description="Explore build/CI", run_in_background=true, prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
+task(subagent_type="explore", load_skills=[], description="Find test patterns", run_in_background=true, prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
 \`\`\`

 <dynamic-agents>
@@ -76,9 +76,9 @@ max_depth=$(find . -type d -not -path '*/node_modules/*' -not -path '*/.git/*' |
 Example spawning:
 \`\`\`
 // 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents
-delegate_task(agent="explore", prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
-delegate_task(agent="explore", prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
-delegate_task(agent="explore", prompt="Cross-cutting concerns: FIND shared utilities across directories")
+task(subagent_type="explore", load_skills=[], description="Analyze large files", run_in_background=true, prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
+task(subagent_type="explore", load_skills=[], description="Explore deep modules", run_in_background=true, prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
+task(subagent_type="explore", load_skills=[], description="Find shared utilities", run_in_background=true, prompt="Cross-cutting concerns: FIND shared utilities across directories")
 // ... more based on calculation
 \`\`\`
 </dynamic-agents>
@@ -185,6 +185,11 @@ AGENTS_LOCATIONS = [

 **Mark "generate" as in_progress.**

+<critical>
+**File Writing Rule**: If AGENTS.md already exists at the target path → use \`Edit\` tool. If it does NOT exist → use \`Write\` tool.
+NEVER use Write to overwrite an existing file. ALWAYS check existence first via \`Read\` or discovery results.
+</critical>
+
 ### Root AGENTS.md (Full Treatment)

 \`\`\`markdown
@@ -240,7 +245,7 @@ Launch writing tasks for each location:

 \`\`\`
 for loc in AGENTS_LOCATIONS (except root):
-  delegate_task(category="writing", load_skills=[], run_in_background=false, prompt=\\\`
+  task(category="writing", load_skills=[], run_in_background=false, description="Generate AGENTS.md", prompt=\\\`
    Generate AGENTS.md for: \${loc.path}
    - Reason: \${loc.reason}
    - 30-80 lines max
--- a/src/features/builtin-skills/git-master/SKILL.md
+++ b/src/features/builtin-skills/git-master/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: git-master
-description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
+description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
 ---

 # Git Master Agent
--- a/src/features/builtin-skills/skills/git-master.ts
+++ b/src/features/builtin-skills/skills/git-master.ts
@@ -3,7 +3,7 @@ import type { BuiltinSkill } from "../types"
 export const gitMasterSkill: BuiltinSkill = {
  name: "git-master",
  description:
-    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
+    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
  template: `# Git Master Agent

 You are a Git expert combining three specializations:
--- a/src/features/claude-code-mcp-loader/loader.test.ts
+++ b/src/features/claude-code-mcp-loader/loader.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, beforeEach, afterEach } from "bun:test"
+import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test"
 import { mkdirSync, writeFileSync, rmSync } from "fs"
 import { join } from "path"
 import { tmpdir } from "os"
@@ -126,37 +126,123 @@ describe("getSystemMcpServerNames", () => {
    }
  })

-  it("merges server names from multiple .mcp.json files", async () => {
-    // given
-    mkdirSync(join(TEST_DIR, ".claude"), { recursive: true })
-    
-    const projectMcp = {
-      mcpServers: {
-        playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
-      },
-    }
-    const localMcp = {
-      mcpServers: {
-        memory: { command: "npx", args: ["-y", "@anthropic-ai/mcp-server-memory"] },
-      },
-    }
-    
-    writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(projectMcp))
-    writeFileSync(join(TEST_DIR, ".claude", ".mcp.json"), JSON.stringify(localMcp))
+   it("merges server names from multiple .mcp.json files", async () => {
+     // given
+     mkdirSync(join(TEST_DIR, ".claude"), { recursive: true })
+     
+     const projectMcp = {
+       mcpServers: {
+         playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
+       },
+     }
+     const localMcp = {
+       mcpServers: {
+         memory: { command: "npx", args: ["-y", "@anthropic-ai/mcp-server-memory"] },
+       },
+     }
+     
+     writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(projectMcp))
+     writeFileSync(join(TEST_DIR, ".claude", ".mcp.json"), JSON.stringify(localMcp))

-    const originalCwd = process.cwd()
-    process.chdir(TEST_DIR)
+     const originalCwd = process.cwd()
+     process.chdir(TEST_DIR)

-    try {
-      // when
-      const { getSystemMcpServerNames } = await import("./loader")
-      const names = getSystemMcpServerNames()
+     try {
+       // when
+       const { getSystemMcpServerNames } = await import("./loader")
+       const names = getSystemMcpServerNames()

-      // then
-      expect(names.has("playwright")).toBe(true)
-      expect(names.has("memory")).toBe(true)
-    } finally {
-      process.chdir(originalCwd)
-    }
-  })
+       // then
+       expect(names.has("playwright")).toBe(true)
+       expect(names.has("memory")).toBe(true)
+     } finally {
+       process.chdir(originalCwd)
+     }
+   })
+
+    it("reads user-level MCP config from ~/.claude.json", async () => {
+      // given
+      const userConfigPath = join(TEST_DIR, ".claude.json")
+      const userMcpConfig = {
+        mcpServers: {
+          "user-server": {
+            command: "npx",
+            args: ["user-mcp-server"],
+          },
+        },
+      }
+
+      const originalCwd = process.cwd()
+      process.chdir(TEST_DIR)
+
+      try {
+        mock.module("os", () => ({
+          homedir: () => TEST_DIR,
+          tmpdir,
+        }))
+
+        writeFileSync(userConfigPath, JSON.stringify(userMcpConfig))
+
+        const { getSystemMcpServerNames } = await import("./loader")
+        const names = getSystemMcpServerNames()
+
+        expect(names.has("user-server")).toBe(true)
+      } finally {
+        process.chdir(originalCwd)
+        rmSync(userConfigPath, { force: true })
+      }
+    })
+
+    it("reads both ~/.claude.json and ~/.claude/.mcp.json for user scope", async () => {
+      // given: simulate both user-level config files
+      const userClaudeJson = join(TEST_DIR, ".claude.json")
+      const claudeDir = join(TEST_DIR, ".claude")
+      const claudeDirMcpJson = join(claudeDir, ".mcp.json")
+
+      mkdirSync(claudeDir, { recursive: true })
+
+      // ~/.claude.json has server-a
+      writeFileSync(userClaudeJson, JSON.stringify({
+        mcpServers: {
+          "server-from-claude-json": {
+            command: "npx",
+            args: ["server-a"],
+          },
+        },
+      }))
+
+      // ~/.claude/.mcp.json has server-b (CLI-managed)
+      writeFileSync(claudeDirMcpJson, JSON.stringify({
+        mcpServers: {
+          "server-from-mcp-json": {
+            command: "npx",
+            args: ["server-b"],
+          },
+        },
+      }))
+
+      const originalCwd = process.cwd()
+      process.chdir(TEST_DIR)
+
+      try {
+        mock.module("os", () => ({
+          homedir: () => TEST_DIR,
+          tmpdir,
+        }))
+
+        // Also mock getClaudeConfigDir to point to our test .claude dir
+        mock.module("../../shared", () => ({
+          getClaudeConfigDir: () => claudeDir,
+        }))
+
+        const { getSystemMcpServerNames } = await import("./loader")
+        const names = getSystemMcpServerNames()
+
+        // Both sources should be merged
+        expect(names.has("server-from-claude-json")).toBe(true)
+        expect(names.has("server-from-mcp-json")).toBe(true)
+      } finally {
+        process.chdir(originalCwd)
+      }
+    })
 })
--- a/src/features/claude-code-mcp-loader/loader.ts
+++ b/src/features/claude-code-mcp-loader/loader.ts
@@ -1,5 +1,6 @@
 import { existsSync, readFileSync } from "fs"
 import { join } from "path"
+import { homedir } from "os"
 import { getClaudeConfigDir } from "../../shared"
 import type {
  ClaudeCodeMcpConfig,
@@ -20,6 +21,7 @@ function getMcpConfigPaths(): McpConfigPath[] {
  const cwd = process.cwd()

  return [
+    { path: join(homedir(), ".claude.json"), scope: "user" },
    { path: join(claudeConfigDir, ".mcp.json"), scope: "user" },
    { path: join(cwd, ".mcp.json"), scope: "project" },
    { path: join(cwd, ".claude", ".mcp.json"), scope: "local" },
--- a/src/features/opencode-skill-loader/skill-content.test.ts
+++ b/src/features/opencode-skill-loader/skill-content.test.ts
@@ -314,6 +314,44 @@ describe("resolveMultipleSkillsAsync", () => {
 		expect(gitMasterContent).toContain("Co-authored-by: Sisyphus")
 	})

+	it("should inject custom string footer when commit_footer is a string", async () => {
+		// given: git-master skill with custom string footer
+		const skillNames = ["git-master"]
+		const customFooter = "Custom footer from my team"
+		const options = {
+			gitMasterConfig: {
+				commit_footer: customFooter,
+				include_co_authored_by: false,
+			},
+		}
+
+		// when: resolving with custom footer config
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: custom footer is injected instead of default
+		const gitMasterContent = result.resolved.get("git-master")
+		expect(gitMasterContent).toContain(customFooter)
+		expect(gitMasterContent).not.toContain("Ultraworked with [Sisyphus]")
+	})
+
+	it("should use default Sisyphus footer when commit_footer is boolean true", async () => {
+		// given: git-master skill with boolean true footer
+		const skillNames = ["git-master"]
+		const options = {
+			gitMasterConfig: {
+				commit_footer: true,
+				include_co_authored_by: false,
+			},
+		}
+
+		// when: resolving with boolean true footer config
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: default Sisyphus footer is injected
+		const gitMasterContent = result.resolved.get("git-master")
+		expect(gitMasterContent).toContain("Ultraworked with [Sisyphus]")
+	})
+
 	it("should handle empty array", async () => {
 		// given: empty skill names
 		const skillNames: string[] = []
@@ -389,3 +427,33 @@ describe("resolveMultipleSkills with browserProvider", () => {
 		expect(result.notFound).toContain("agent-browser")
 	})
 })
+
+describe("resolveMultipleSkillsAsync with browserProvider filtering", () => {
+	it("should exclude discovered agent-browser when browserProvider is playwright", async () => {
+		// given: playwright is the selected browserProvider (default)
+		const skillNames = ["playwright", "git-master"]
+		const options = { browserProvider: "playwright" as const }
+
+		// when: resolving multiple skills
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: playwright resolved, agent-browser would be excluded if discovered
+		expect(result.resolved.has("playwright")).toBe(true)
+		expect(result.resolved.has("git-master")).toBe(true)
+		expect(result.notFound).not.toContain("playwright")
+	})
+
+	it("should exclude discovered playwright when browserProvider is agent-browser", async () => {
+		// given: agent-browser is the selected browserProvider
+		const skillNames = ["agent-browser", "git-master"]
+		const options = { browserProvider: "agent-browser" as const }
+
+		// when: resolving multiple skills
+		const result = await resolveMultipleSkillsAsync(skillNames, options)
+
+		// then: agent-browser resolved, playwright would be excluded if discovered
+		expect(result.resolved.has("agent-browser")).toBe(true)
+		expect(result.resolved.has("git-master")).toBe(true)
+		expect(result.notFound).not.toContain("agent-browser")
+	})
+})
--- a/src/features/opencode-skill-loader/skill-content.ts
+++ b/src/features/opencode-skill-loader/skill-content.ts
@@ -55,10 +55,23 @@ async function getAllSkills(options?: SkillResolutionOptions): Promise<LoadedSki
 		mcpConfig: skill.mcpConfig,
 	}))

-	const discoveredNames = new Set(discoveredSkills.map((s) => s.name))
+	// Provider-gated skill names that should be filtered based on browserProvider
+	const providerGatedSkillNames = new Set(["agent-browser", "playwright"])
+	const browserProvider = options?.browserProvider ?? "playwright"
+
+	// Filter discovered skills to exclude provider-gated names that don't match the selected provider
+	const filteredDiscoveredSkills = discoveredSkills.filter((skill) => {
+		if (!providerGatedSkillNames.has(skill.name)) {
+			return true
+		}
+		// For provider-gated skills, only include if it matches the selected provider
+		return skill.name === browserProvider
+	})
+
+	const discoveredNames = new Set(filteredDiscoveredSkills.map((s) => s.name))
 	const uniqueBuiltins = builtinSkillsAsLoaded.filter((s) => !discoveredNames.has(s.name))

-	let allSkills = [...discoveredSkills, ...uniqueBuiltins]
+	let allSkills = [...filteredDiscoveredSkills, ...uniqueBuiltins]

 	// Filter discovered skills by disabledSkills (builtin skills are already filtered by createBuiltinSkills)
 	if (hasDisabledSkills) {
@@ -97,9 +110,10 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
 	sections.push(``)

 	if (commitFooter) {
+		const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
 		sections.push(`1. **Footer in commit body:**`)
 		sections.push("```")
-		sections.push(`Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)`)
+		sections.push(footerText)
 		sections.push("```")
 		sections.push(``)
 	}
@@ -113,14 +127,16 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
 	}

 	if (commitFooter && includeCoAuthoredBy) {
+		const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
 		sections.push(`**Example (both enabled):**`)
 		sections.push("```bash")
-		sections.push(`git commit -m "{Commit Message}" -m "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)" -m "Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>"`)
+		sections.push(`git commit -m "{Commit Message}" -m "${footerText}" -m "Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>"`)
 		sections.push("```")
 	} else if (commitFooter) {
+		const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
 		sections.push(`**Example:**`)
 		sections.push("```bash")
-		sections.push(`git commit -m "{Commit Message}" -m "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"`)
+		sections.push(`git commit -m "{Commit Message}" -m "${footerText}"`)
 		sections.push("```")
 	} else if (includeCoAuthoredBy) {
 		sections.push(`**Example:**`)
--- a/src/features/tool-metadata-store/index.test.ts
+++ b/src/features/tool-metadata-store/index.test.ts
@@ -0,0 +1,111 @@
+import { describe, test, expect, beforeEach } from "bun:test"
+import {
+  storeToolMetadata,
+  consumeToolMetadata,
+  getPendingStoreSize,
+  clearPendingStore,
+} from "./index"
+
+describe("tool-metadata-store", () => {
+  beforeEach(() => {
+    clearPendingStore()
+  })
+
+  describe("storeToolMetadata", () => {
+    test("#given metadata with title and metadata, #when stored, #then store size increases", () => {
+      //#given
+      const sessionID = "ses_abc123"
+      const callID = "call_001"
+      const data = {
+        title: "Test Task",
+        metadata: { sessionId: "ses_child", agent: "oracle" },
+      }
+
+      //#when
+      storeToolMetadata(sessionID, callID, data)
+
+      //#then
+      expect(getPendingStoreSize()).toBe(1)
+    })
+  })
+
+  describe("consumeToolMetadata", () => {
+    test("#given stored metadata, #when consumed, #then returns the stored data", () => {
+      //#given
+      const sessionID = "ses_abc123"
+      const callID = "call_001"
+      const data = {
+        title: "My Task",
+        metadata: { sessionId: "ses_sub", run_in_background: true },
+      }
+      storeToolMetadata(sessionID, callID, data)
+
+      //#when
+      const result = consumeToolMetadata(sessionID, callID)
+
+      //#then
+      expect(result).toEqual(data)
+    })
+
+    test("#given stored metadata, #when consumed twice, #then second call returns undefined", () => {
+      //#given
+      const sessionID = "ses_abc123"
+      const callID = "call_001"
+      storeToolMetadata(sessionID, callID, { title: "Task" })
+
+      //#when
+      consumeToolMetadata(sessionID, callID)
+      const second = consumeToolMetadata(sessionID, callID)
+
+      //#then
+      expect(second).toBeUndefined()
+      expect(getPendingStoreSize()).toBe(0)
+    })
+
+    test("#given no stored metadata, #when consumed, #then returns undefined", () => {
+      //#given
+      const sessionID = "ses_nonexistent"
+      const callID = "call_999"
+
+      //#when
+      const result = consumeToolMetadata(sessionID, callID)
+
+      //#then
+      expect(result).toBeUndefined()
+    })
+  })
+
+  describe("isolation", () => {
+    test("#given multiple entries, #when consuming one, #then others remain", () => {
+      //#given
+      storeToolMetadata("ses_1", "call_a", { title: "Task A" })
+      storeToolMetadata("ses_1", "call_b", { title: "Task B" })
+      storeToolMetadata("ses_2", "call_a", { title: "Task C" })
+
+      //#when
+      const resultA = consumeToolMetadata("ses_1", "call_a")
+
+      //#then
+      expect(resultA?.title).toBe("Task A")
+      expect(getPendingStoreSize()).toBe(2)
+      expect(consumeToolMetadata("ses_1", "call_b")?.title).toBe("Task B")
+      expect(consumeToolMetadata("ses_2", "call_a")?.title).toBe("Task C")
+      expect(getPendingStoreSize()).toBe(0)
+    })
+  })
+
+  describe("overwrite", () => {
+    test("#given existing entry, #when stored again with same key, #then overwrites", () => {
+      //#given
+      storeToolMetadata("ses_1", "call_a", { title: "Old" })
+
+      //#when
+      storeToolMetadata("ses_1", "call_a", { title: "New", metadata: { updated: true } })
+
+      //#then
+      const result = consumeToolMetadata("ses_1", "call_a")
+      expect(result?.title).toBe("New")
+      expect(result?.metadata).toEqual({ updated: true })
+    })
+  })
+})
--- a/src/features/tool-metadata-store/index.ts
+++ b/src/features/tool-metadata-store/index.ts
@@ -0,0 +1,84 @@
+/**
+ * Pending tool metadata store.
+ *
+ * OpenCode's `fromPlugin()` wrapper always replaces the metadata returned by
+ * plugin tools with `{ truncated, outputPath }`, discarding any sessionId,
+ * title, or custom metadata set during `execute()`.
+ *
+ * This store captures metadata written via `ctx.metadata()` inside execute(),
+ * then the `tool.execute.after` hook consumes it and merges it back into the
+ * result *before* the processor writes the final part to the session store.
+ *
+ * Flow:
+ *   execute() → storeToolMetadata(sessionID, callID, data)
+ *   fromPlugin() → overwrites metadata with { truncated }
+ *   tool.execute.after → consumeToolMetadata(sessionID, callID) → merges back
+ *   processor → Session.updatePart(status:"completed", metadata: result.metadata)
+ */
+
+export interface PendingToolMetadata {
+  title?: string
+  metadata?: Record<string, unknown>
+}
+
+const pendingStore = new Map<string, PendingToolMetadata & { storedAt: number }>()
+
+const STALE_TIMEOUT_MS = 15 * 60 * 1000
+
+function makeKey(sessionID: string, callID: string): string {
+  return `${sessionID}:${callID}`
+}
+
+function cleanupStaleEntries(): void {
+  const now = Date.now()
+  for (const [key, entry] of pendingStore) {
+    if (now - entry.storedAt > STALE_TIMEOUT_MS) {
+      pendingStore.delete(key)
+    }
+  }
+}
+
+/**
+ * Store metadata to be restored after fromPlugin() overwrites it.
+ * Called from tool execute() functions alongside ctx.metadata().
+ */
+export function storeToolMetadata(
+  sessionID: string,
+  callID: string,
+  data: PendingToolMetadata,
+): void {
+  cleanupStaleEntries()
+  pendingStore.set(makeKey(sessionID, callID), { ...data, storedAt: Date.now() })
+}
+
+/**
+ * Consume stored metadata (one-time read, removes from store).
+ * Called from tool.execute.after hook.
+ */
+export function consumeToolMetadata(
+  sessionID: string,
+  callID: string,
+): PendingToolMetadata | undefined {
+  const key = makeKey(sessionID, callID)
+  const stored = pendingStore.get(key)
+  if (stored) {
+    pendingStore.delete(key)
+    const { storedAt: _, ...data } = stored
+    return data
+  }
+  return undefined
+}
+
+/**
+ * Get current store size (for testing/debugging).
+ */
+export function getPendingStoreSize(): number {
+  return pendingStore.size
+}
+
+/**
+ * Clear all pending metadata (for testing).
+ */
+export function clearPendingStore(): void {
+  pendingStore.clear()
+}
--- a/src/hooks/agent-usage-reminder/constants.ts
+++ b/src/hooks/agent-usage-reminder/constants.ts
@@ -24,7 +24,7 @@ export const TARGET_TOOLS = new Set([
 export const AGENT_TOOLS = new Set([
  "task",
  "call_omo_agent",
-  "delegate_task",
+  "task",
 ]);

 export const REMINDER_MESSAGE = `
@@ -32,13 +32,13 @@ export const REMINDER_MESSAGE = `

 You called a search/fetch tool directly without leveraging specialized agents.

-RECOMMENDED: Use delegate_task with explore/librarian agents for better results:
+RECOMMENDED: Use task with explore/librarian agents for better results:

 \`\`\`
 // Parallel exploration - fire multiple agents simultaneously
-delegate_task(agent="explore", prompt="Find all files matching pattern X")
-delegate_task(agent="explore", prompt="Search for implementation of Y") 
-delegate_task(agent="librarian", prompt="Lookup documentation for Z")
+task(agent="explore", prompt="Find all files matching pattern X")
+task(agent="explore", prompt="Search for implementation of Y") 
+task(agent="librarian", prompt="Lookup documentation for Z")

 // Then continue your work while they run in background
 // System will notify you when each completes
@@ -50,5 +50,5 @@ WHY:
 - Specialized agents have domain expertise
 - Reduces context window usage in main session

-ALWAYS prefer: Multiple parallel delegate_task calls > Direct tool calls
+ALWAYS prefer: Multiple parallel task calls > Direct tool calls
 `;
--- a/src/hooks/anthropic-context-window-limit-recovery/deduplication-recovery.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/deduplication-recovery.ts
@@ -0,0 +1,71 @@
+import type { ParsedTokenLimitError } from "./types"
+import type { ExperimentalConfig } from "../../config"
+import type { DeduplicationConfig } from "./pruning-deduplication"
+import type { PruningState } from "./pruning-types"
+import { executeDeduplication } from "./pruning-deduplication"
+import { truncateToolOutputsByCallId } from "./pruning-tool-output-truncation"
+import { log } from "../../shared/logger"
+
+function createPruningState(): PruningState {
+  return {
+    toolIdsToPrune: new Set<string>(),
+    currentTurn: 0,
+    fileOperations: new Map(),
+    toolSignatures: new Map(),
+    erroredTools: new Map(),
+  }
+}
+
+function isPromptTooLongError(parsed: ParsedTokenLimitError): boolean {
+  return !parsed.errorType.toLowerCase().includes("non-empty content")
+}
+
+function getDeduplicationPlan(
+  experimental?: ExperimentalConfig,
+): { config: DeduplicationConfig; protectedTools: Set<string> } | null {
+  const pruningConfig = experimental?.dynamic_context_pruning
+  if (!pruningConfig?.enabled) return null
+
+  const deduplicationEnabled = pruningConfig.strategies?.deduplication?.enabled
+  if (deduplicationEnabled === false) return null
+
+  const protectedTools = new Set(pruningConfig.protected_tools ?? [])
+  return {
+    config: {
+      enabled: true,
+      protectedTools: pruningConfig.protected_tools ?? [],
+    },
+    protectedTools,
+  }
+}
+
+export async function attemptDeduplicationRecovery(
+  sessionID: string,
+  parsed: ParsedTokenLimitError,
+  experimental: ExperimentalConfig | undefined,
+): Promise<void> {
+  if (!isPromptTooLongError(parsed)) return
+
+  const plan = getDeduplicationPlan(experimental)
+  if (!plan) return
+
+  const pruningState = createPruningState()
+  const prunedCount = executeDeduplication(
+    sessionID,
+    pruningState,
+    plan.config,
+    plan.protectedTools,
+  )
+  const { truncatedCount } = truncateToolOutputsByCallId(
+    sessionID,
+    pruningState.toolIdsToPrune,
+  )
+
+  if (prunedCount > 0 || truncatedCount > 0) {
+    log("[auto-compact] deduplication recovery applied", {
+      sessionID,
+      prunedCount,
+      truncatedCount,
+    })
+  }
+}
--- a/src/hooks/anthropic-context-window-limit-recovery/index.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/index.ts
@@ -1,151 +1,5 @@
-import type { PluginInput } from "@opencode-ai/plugin"
-import type { AutoCompactState, ParsedTokenLimitError } from "./types"
-import type { ExperimentalConfig } from "../../config"
-import { parseAnthropicTokenLimitError } from "./parser"
-import { executeCompact, getLastAssistant } from "./executor"
-import { log } from "../../shared/logger"
-
-export interface AnthropicContextWindowLimitRecoveryOptions {
-  experimental?: ExperimentalConfig
-}
-
-function createRecoveryState(): AutoCompactState {
-  return {
-    pendingCompact: new Set<string>(),
-    errorDataBySession: new Map<string, ParsedTokenLimitError>(),
-    retryStateBySession: new Map(),
-    truncateStateBySession: new Map(),
-    emptyContentAttemptBySession: new Map(),
-    compactionInProgress: new Set<string>(),
-  }
-}
-
-export function createAnthropicContextWindowLimitRecoveryHook(ctx: PluginInput, options?: AnthropicContextWindowLimitRecoveryOptions) {
-  const autoCompactState = createRecoveryState()
-  const experimental = options?.experimental
-
-  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
-    const props = event.properties as Record<string, unknown> | undefined
-
-    if (event.type === "session.deleted") {
-      const sessionInfo = props?.info as { id?: string } | undefined
-      if (sessionInfo?.id) {
-        autoCompactState.pendingCompact.delete(sessionInfo.id)
-        autoCompactState.errorDataBySession.delete(sessionInfo.id)
-        autoCompactState.retryStateBySession.delete(sessionInfo.id)
-        autoCompactState.truncateStateBySession.delete(sessionInfo.id)
-        autoCompactState.emptyContentAttemptBySession.delete(sessionInfo.id)
-        autoCompactState.compactionInProgress.delete(sessionInfo.id)
-      }
-      return
-    }
-
-    if (event.type === "session.error") {
-      const sessionID = props?.sessionID as string | undefined
-      log("[auto-compact] session.error received", { sessionID, error: props?.error })
-      if (!sessionID) return
-
-      const parsed = parseAnthropicTokenLimitError(props?.error)
-      log("[auto-compact] parsed result", { parsed, hasError: !!props?.error })
-      if (parsed) {
-        autoCompactState.pendingCompact.add(sessionID)
-        autoCompactState.errorDataBySession.set(sessionID, parsed)
-
-        if (autoCompactState.compactionInProgress.has(sessionID)) {
-          return
-        }
-
-        const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
-        const providerID = parsed.providerID ?? (lastAssistant?.providerID as string | undefined)
-        const modelID = parsed.modelID ?? (lastAssistant?.modelID as string | undefined)
-
-        await ctx.client.tui
-          .showToast({
-            body: {
-              title: "Context Limit Hit",
-              message: "Truncating large tool outputs and recovering...",
-              variant: "warning" as const,
-              duration: 3000,
-            },
-          })
-          .catch(() => {})
-
-        setTimeout(() => {
-          executeCompact(
-            sessionID,
-            { providerID, modelID },
-            autoCompactState,
-            ctx.client,
-            ctx.directory,
-            experimental
-          )
-        }, 300)
-      }
-      return
-    }
-
-    if (event.type === "message.updated") {
-      const info = props?.info as Record<string, unknown> | undefined
-      const sessionID = info?.sessionID as string | undefined
-
-      if (sessionID && info?.role === "assistant" && info.error) {
-        log("[auto-compact] message.updated with error", { sessionID, error: info.error })
-        const parsed = parseAnthropicTokenLimitError(info.error)
-        log("[auto-compact] message.updated parsed result", { parsed })
-        if (parsed) {
-          parsed.providerID = info.providerID as string | undefined
-          parsed.modelID = info.modelID as string | undefined
-          autoCompactState.pendingCompact.add(sessionID)
-          autoCompactState.errorDataBySession.set(sessionID, parsed)
-        }
-      }
-      return
-    }
-
-    if (event.type === "session.idle") {
-      const sessionID = props?.sessionID as string | undefined
-      if (!sessionID) return
-
-      if (!autoCompactState.pendingCompact.has(sessionID)) return
-
-      const errorData = autoCompactState.errorDataBySession.get(sessionID)
-      const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
-
-      if (lastAssistant?.summary === true) {
-        autoCompactState.pendingCompact.delete(sessionID)
-        return
-      }
-
-      const providerID = errorData?.providerID ?? (lastAssistant?.providerID as string | undefined)
-      const modelID = errorData?.modelID ?? (lastAssistant?.modelID as string | undefined)
-
-      await ctx.client.tui
-        .showToast({
-          body: {
-            title: "Auto Compact",
-            message: "Token limit exceeded. Attempting recovery...",
-            variant: "warning" as const,
-            duration: 3000,
-          },
-        })
-        .catch(() => {})
-
-      await executeCompact(
-        sessionID,
-        { providerID, modelID },
-        autoCompactState,
-        ctx.client,
-        ctx.directory,
-        experimental
-      )
-    }
-  }
-
-  return {
-    event: eventHandler,
-  }
-}
-
+export { createAnthropicContextWindowLimitRecoveryHook } from "./recovery-hook"
+export type { AnthropicContextWindowLimitRecoveryOptions } from "./recovery-hook"
 export type { AutoCompactState, ParsedTokenLimitError, TruncateState } from "./types"
 export { parseAnthropicTokenLimitError } from "./parser"
 export { executeCompact, getLastAssistant } from "./executor"
--- a/src/hooks/anthropic-context-window-limit-recovery/pruning-tool-output-truncation.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/pruning-tool-output-truncation.ts
@@ -0,0 +1,97 @@
+import { existsSync, readdirSync, readFileSync } from "node:fs"
+import { join } from "node:path"
+import { getOpenCodeStorageDir } from "../../shared/data-path"
+import { truncateToolResult } from "./storage"
+import { log } from "../../shared/logger"
+
+interface StoredToolPart {
+  type?: string
+  callID?: string
+  truncated?: boolean
+  state?: {
+    output?: string
+  }
+}
+
+function getMessageStorage(): string {
+  return join(getOpenCodeStorageDir(), "message")
+}
+
+function getPartStorage(): string {
+  return join(getOpenCodeStorageDir(), "part")
+}
+
+function getMessageDir(sessionID: string): string | null {
+  const messageStorage = getMessageStorage()
+  if (!existsSync(messageStorage)) return null
+
+  const directPath = join(messageStorage, sessionID)
+  if (existsSync(directPath)) return directPath
+
+  for (const dir of readdirSync(messageStorage)) {
+    const sessionPath = join(messageStorage, dir, sessionID)
+    if (existsSync(sessionPath)) return sessionPath
+  }
+
+  return null
+}
+
+function getMessageIds(sessionID: string): string[] {
+  const messageDir = getMessageDir(sessionID)
+  if (!messageDir) return []
+
+  const messageIds: string[] = []
+  for (const file of readdirSync(messageDir)) {
+    if (!file.endsWith(".json")) continue
+    messageIds.push(file.replace(".json", ""))
+  }
+
+  return messageIds
+}
+
+export function truncateToolOutputsByCallId(
+  sessionID: string,
+  callIds: Set<string>,
+): { truncatedCount: number } {
+  if (callIds.size === 0) return { truncatedCount: 0 }
+
+  const messageIds = getMessageIds(sessionID)
+  if (messageIds.length === 0) return { truncatedCount: 0 }
+
+  let truncatedCount = 0
+
+  for (const messageID of messageIds) {
+    const partDir = join(getPartStorage(), messageID)
+    if (!existsSync(partDir)) continue
+
+    for (const file of readdirSync(partDir)) {
+      if (!file.endsWith(".json")) continue
+      const partPath = join(partDir, file)
+
+      try {
+        const content = readFileSync(partPath, "utf-8")
+        const part = JSON.parse(content) as StoredToolPart
+
+        if (part.type !== "tool" || !part.callID) continue
+        if (!callIds.has(part.callID)) continue
+        if (!part.state?.output || part.truncated) continue
+
+        const result = truncateToolResult(partPath)
+        if (result.success) {
+          truncatedCount++
+        }
+      } catch {
+        continue
+      }
+    }
+  }
+
+  if (truncatedCount > 0) {
+    log("[auto-compact] pruned duplicate tool outputs", {
+      sessionID,
+      truncatedCount,
+    })
+  }
+
+  return { truncatedCount }
+}
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-deduplication.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-deduplication.test.ts
@@ -0,0 +1,122 @@
+import { describe, test, expect, mock, beforeEach } from "bun:test"
+import type { PluginInput } from "@opencode-ai/plugin"
+import type { ExperimentalConfig } from "../../config"
+
+const attemptDeduplicationRecoveryMock = mock(async () => {})
+
+mock.module("./deduplication-recovery", () => ({
+  attemptDeduplicationRecovery: attemptDeduplicationRecoveryMock,
+}))
+
+function createImmediateTimeouts(): () => void {
+  const originalSetTimeout = globalThis.setTimeout
+  const originalClearTimeout = globalThis.clearTimeout
+
+  globalThis.setTimeout = ((callback: (...args: unknown[]) => void, _delay?: number, ...args: unknown[]) => {
+    callback(...args)
+    return 0 as unknown as ReturnType<typeof setTimeout>
+  }) as typeof setTimeout
+
+  globalThis.clearTimeout = ((_: ReturnType<typeof setTimeout>) => {}) as typeof clearTimeout
+
+  return () => {
+    globalThis.setTimeout = originalSetTimeout
+    globalThis.clearTimeout = originalClearTimeout
+  }
+}
+
+describe("createAnthropicContextWindowLimitRecoveryHook", () => {
+  beforeEach(() => {
+    attemptDeduplicationRecoveryMock.mockClear()
+  })
+
+  test("calls deduplication recovery when compaction is already in progress", async () => {
+    //#given
+    const restoreTimeouts = createImmediateTimeouts()
+
+    const experimental = {
+      dynamic_context_pruning: {
+        enabled: true,
+        strategies: {
+          deduplication: { enabled: true },
+        },
+      },
+    } satisfies ExperimentalConfig
+
+    let resolveSummarize: (() => void) | null = null
+    const summarizePromise = new Promise<void>((resolve) => {
+      resolveSummarize = resolve
+    })
+
+    const mockClient = {
+      session: {
+        messages: mock(() => Promise.resolve({ data: [] })),
+        summarize: mock(() => summarizePromise),
+        revert: mock(() => Promise.resolve()),
+        prompt_async: mock(() => Promise.resolve()),
+      },
+      tui: {
+        showToast: mock(() => Promise.resolve()),
+      },
+    }
+
+    try {
+      const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
+      const ctx = { client: mockClient, directory: "/tmp" } as PluginInput
+      const hook = createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental })
+
+      // first error triggers compaction (setTimeout runs immediately due to mock)
+      await hook.event({
+        event: {
+          type: "session.error",
+          properties: { sessionID: "session-96", error: "prompt is too long" },
+        },
+      })
+
+      //#when - second error while compaction is in progress
+      await hook.event({
+        event: {
+          type: "session.error",
+          properties: { sessionID: "session-96", error: "prompt is too long" },
+        },
+      })
+
+      //#then - deduplication recovery was called for the second error
+      expect(attemptDeduplicationRecoveryMock).toHaveBeenCalledTimes(1)
+      expect(attemptDeduplicationRecoveryMock.mock.calls[0]![0]).toBe("session-96")
+    } finally {
+      if (resolveSummarize) resolveSummarize()
+      restoreTimeouts()
+    }
+  })
+
+  test("does not call deduplication when compaction is not in progress", async () => {
+    //#given
+    const mockClient = {
+      session: {
+        messages: mock(() => Promise.resolve({ data: [] })),
+        summarize: mock(() => Promise.resolve()),
+        revert: mock(() => Promise.resolve()),
+        prompt_async: mock(() => Promise.resolve()),
+      },
+      tui: {
+        showToast: mock(() => Promise.resolve()),
+      },
+    }
+
+    const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
+    const ctx = { client: mockClient, directory: "/tmp" } as PluginInput
+    const hook = createAnthropicContextWindowLimitRecoveryHook(ctx)
+
+    //#when - single error (no compaction in progress)
+    await hook.event({
+      event: {
+        type: "session.error",
+        properties: { sessionID: "session-no-dedup", error: "some other error" },
+      },
+    })
+
+    //#then
+    expect(attemptDeduplicationRecoveryMock).not.toHaveBeenCalled()
+  })
+})
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
@@ -0,0 +1,153 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import type { AutoCompactState, ParsedTokenLimitError } from "./types"
+import type { ExperimentalConfig } from "../../config"
+import { parseAnthropicTokenLimitError } from "./parser"
+import { executeCompact, getLastAssistant } from "./executor"
+import { attemptDeduplicationRecovery } from "./deduplication-recovery"
+import { log } from "../../shared/logger"
+
+export interface AnthropicContextWindowLimitRecoveryOptions {
+  experimental?: ExperimentalConfig
+}
+
+function createRecoveryState(): AutoCompactState {
+  return {
+    pendingCompact: new Set<string>(),
+    errorDataBySession: new Map<string, ParsedTokenLimitError>(),
+    retryStateBySession: new Map(),
+    truncateStateBySession: new Map(),
+    emptyContentAttemptBySession: new Map(),
+    compactionInProgress: new Set<string>(),
+  }
+}
+
+
+export function createAnthropicContextWindowLimitRecoveryHook(
+  ctx: PluginInput,
+  options?: AnthropicContextWindowLimitRecoveryOptions,
+) {
+  const autoCompactState = createRecoveryState()
+  const experimental = options?.experimental
+
+  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
+    const props = event.properties as Record<string, unknown> | undefined
+
+    if (event.type === "session.deleted") {
+      const sessionInfo = props?.info as { id?: string } | undefined
+      if (sessionInfo?.id) {
+        autoCompactState.pendingCompact.delete(sessionInfo.id)
+        autoCompactState.errorDataBySession.delete(sessionInfo.id)
+        autoCompactState.retryStateBySession.delete(sessionInfo.id)
+        autoCompactState.truncateStateBySession.delete(sessionInfo.id)
+        autoCompactState.emptyContentAttemptBySession.delete(sessionInfo.id)
+        autoCompactState.compactionInProgress.delete(sessionInfo.id)
+      }
+      return
+    }
+
+    if (event.type === "session.error") {
+      const sessionID = props?.sessionID as string | undefined
+      log("[auto-compact] session.error received", { sessionID, error: props?.error })
+      if (!sessionID) return
+
+      const parsed = parseAnthropicTokenLimitError(props?.error)
+      log("[auto-compact] parsed result", { parsed, hasError: !!props?.error })
+      if (parsed) {
+        autoCompactState.pendingCompact.add(sessionID)
+        autoCompactState.errorDataBySession.set(sessionID, parsed)
+
+        if (autoCompactState.compactionInProgress.has(sessionID)) {
+          await attemptDeduplicationRecovery(sessionID, parsed, experimental)
+          return
+        }
+
+        const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
+        const providerID = parsed.providerID ?? (lastAssistant?.providerID as string | undefined)
+        const modelID = parsed.modelID ?? (lastAssistant?.modelID as string | undefined)
+
+        await ctx.client.tui
+          .showToast({
+            body: {
+              title: "Context Limit Hit",
+              message: "Truncating large tool outputs and recovering...",
+              variant: "warning" as const,
+              duration: 3000,
+            },
+          })
+          .catch(() => {})
+
+        setTimeout(() => {
+          executeCompact(
+            sessionID,
+            { providerID, modelID },
+            autoCompactState,
+            ctx.client,
+            ctx.directory,
+            experimental,
+          )
+        }, 300)
+      }
+      return
+    }
+
+    if (event.type === "message.updated") {
+      const info = props?.info as Record<string, unknown> | undefined
+      const sessionID = info?.sessionID as string | undefined
+
+      if (sessionID && info?.role === "assistant" && info.error) {
+        log("[auto-compact] message.updated with error", { sessionID, error: info.error })
+        const parsed = parseAnthropicTokenLimitError(info.error)
+        log("[auto-compact] message.updated parsed result", { parsed })
+        if (parsed) {
+          parsed.providerID = info.providerID as string | undefined
+          parsed.modelID = info.modelID as string | undefined
+          autoCompactState.pendingCompact.add(sessionID)
+          autoCompactState.errorDataBySession.set(sessionID, parsed)
+        }
+      }
+      return
+    }
+
+    if (event.type === "session.idle") {
+      const sessionID = props?.sessionID as string | undefined
+      if (!sessionID) return
+
+      if (!autoCompactState.pendingCompact.has(sessionID)) return
+
+      const errorData = autoCompactState.errorDataBySession.get(sessionID)
+      const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
+
+      if (lastAssistant?.summary === true) {
+        autoCompactState.pendingCompact.delete(sessionID)
+        return
+      }
+
+      const providerID = errorData?.providerID ?? (lastAssistant?.providerID as string | undefined)
+      const modelID = errorData?.modelID ?? (lastAssistant?.modelID as string | undefined)
+
+      await ctx.client.tui
+        .showToast({
+          body: {
+            title: "Auto Compact",
+            message: "Token limit exceeded. Attempting recovery...",
+            variant: "warning" as const,
+            duration: 3000,
+          },
+        })
+        .catch(() => {})
+
+      await executeCompact(
+        sessionID,
+        { providerID, modelID },
+        autoCompactState,
+        ctx.client,
+        ctx.directory,
+        experimental,
+      )
+    }
+  }
+
+  return {
+    event: eventHandler,
+  }
+}
--- a/src/hooks/anthropic-effort/index.test.ts
+++ b/src/hooks/anthropic-effort/index.test.ts
@@ -0,0 +1,215 @@
+import { describe, expect, it } from "bun:test"
+import { createAnthropicEffortHook } from "./index"
+
+interface ChatParamsInput {
+  sessionID: string
+  agent: { name?: string }
+  model: { providerID: string; modelID: string; id?: string; api?: { npm?: string } }
+  provider: { id: string }
+  message: { variant?: string }
+}
+
+interface ChatParamsOutput {
+  temperature?: number
+  topP?: number
+  topK?: number
+  options: Record<string, unknown>
+}
+
+function createMockParams(overrides: {
+  providerID?: string
+  modelID?: string
+  variant?: string
+  agentName?: string
+  existingOptions?: Record<string, unknown>
+}): { input: ChatParamsInput; output: ChatParamsOutput } {
+  const providerID = overrides.providerID ?? "anthropic"
+  const modelID = overrides.modelID ?? "claude-opus-4-6"
+  const variant = "variant" in overrides ? overrides.variant : "max"
+  const agentName = overrides.agentName ?? "sisyphus"
+  const existingOptions = overrides.existingOptions ?? {}
+
+  return {
+    input: {
+      sessionID: "test-session",
+      agent: { name: agentName },
+      model: { providerID, modelID },
+      provider: { id: providerID },
+      message: { variant },
+    },
+    output: {
+      temperature: 0.1,
+      options: { ...existingOptions },
+    },
+  }
+}
+
+describe("createAnthropicEffortHook", () => {
+  describe("opus 4-6 with variant max", () => {
+    it("should inject effort max for anthropic opus-4-6 with variant max", async () => {
+      //#given anthropic opus-4-6 model with variant max
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({})
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should be injected into options
+      expect(output.options.effort).toBe("max")
+    })
+
+    it("should inject effort max for github-copilot claude-opus-4-6", async () => {
+      //#given github-copilot provider with claude-opus-4-6
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        providerID: "github-copilot",
+        modelID: "claude-opus-4-6",
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should be injected (github-copilot resolves to anthropic)
+      expect(output.options.effort).toBe("max")
+    })
+
+    it("should inject effort max for opencode provider with claude-opus-4-6", async () => {
+      //#given opencode provider with claude-opus-4-6
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        providerID: "opencode",
+        modelID: "claude-opus-4-6",
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should be injected
+      expect(output.options.effort).toBe("max")
+    })
+
+    it("should handle normalized model ID with dots (opus-4.6)", async () => {
+      //#given model ID with dots instead of hyphens
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        modelID: "claude-opus-4.6",
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then should normalize and inject effort
+      expect(output.options.effort).toBe("max")
+    })
+  })
+
+  describe("conditions NOT met - should skip", () => {
+    it("should NOT inject effort when variant is not max", async () => {
+      //#given opus-4-6 with variant high (not max)
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({ variant: "high" })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should NOT be injected
+      expect(output.options.effort).toBeUndefined()
+    })
+
+    it("should NOT inject effort when variant is undefined", async () => {
+      //#given opus-4-6 with no variant
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({ variant: undefined })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should NOT be injected
+      expect(output.options.effort).toBeUndefined()
+    })
+
+    it("should NOT inject effort for non-opus model", async () => {
+      //#given claude-sonnet-4-5 (not opus)
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        modelID: "claude-sonnet-4-5",
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should NOT be injected
+      expect(output.options.effort).toBeUndefined()
+    })
+
+    it("should NOT inject effort for non-anthropic provider with non-claude model", async () => {
+      //#given openai provider with gpt model
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        providerID: "openai",
+        modelID: "gpt-5.2",
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should NOT be injected
+      expect(output.options.effort).toBeUndefined()
+    })
+
+    it("should NOT throw when model.modelID is undefined", async () => {
+      //#given model with undefined modelID (runtime edge case)
+      const hook = createAnthropicEffortHook()
+      const input = {
+        sessionID: "test-session",
+        agent: { name: "sisyphus" },
+        model: { providerID: "anthropic", modelID: undefined as unknown as string },
+        provider: { id: "anthropic" },
+        message: { variant: "max" as const },
+      }
+      const output = { temperature: 0.1, options: {} }
+
+      //#when chat.params hook is called with undefined modelID
+      await hook["chat.params"](input, output)
+
+      //#then should gracefully skip without throwing
+      expect(output.options.effort).toBeUndefined()
+    })
+  })
+
+  describe("preserves existing options", () => {
+    it("should NOT overwrite existing effort if already set", async () => {
+      //#given options already have effort set
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        existingOptions: { effort: "high" },
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then existing effort should be preserved
+      expect(output.options.effort).toBe("high")
+    })
+
+    it("should preserve other existing options when injecting effort", async () => {
+      //#given options with existing thinking config
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        existingOptions: {
+          thinking: { type: "enabled", budgetTokens: 31999 },
+        },
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should be added without affecting thinking
+      expect(output.options.effort).toBe("max")
+      expect(output.options.thinking).toEqual({
+        type: "enabled",
+        budgetTokens: 31999,
+      })
+    })
+  })
+})
--- a/src/hooks/anthropic-effort/index.ts
+++ b/src/hooks/anthropic-effort/index.ts
@@ -0,0 +1,56 @@
+import { log } from "../../shared"
+
+const OPUS_4_6_PATTERN = /claude-opus-4[-.]6/i
+
+function normalizeModelID(modelID: string): string {
+  return modelID.replace(/\.(\d+)/g, "-$1")
+}
+
+function isClaudeProvider(providerID: string, modelID: string): boolean {
+  if (["anthropic", "opencode"].includes(providerID)) return true
+  if (providerID === "github-copilot" && modelID.toLowerCase().includes("claude")) return true
+  return false
+}
+
+function isOpus46(modelID: string): boolean {
+  const normalized = normalizeModelID(modelID)
+  return OPUS_4_6_PATTERN.test(normalized)
+}
+
+interface ChatParamsInput {
+  sessionID: string
+  agent: { name?: string }
+  model: { providerID: string; modelID: string }
+  provider: { id: string }
+  message: { variant?: string }
+}
+
+interface ChatParamsOutput {
+  temperature?: number
+  topP?: number
+  topK?: number
+  options: Record<string, unknown>
+}
+
+export function createAnthropicEffortHook() {
+  return {
+    "chat.params": async (
+      input: ChatParamsInput,
+      output: ChatParamsOutput
+    ): Promise<void> => {
+      const { model, message } = input
+      if (!model?.modelID || !model?.providerID) return
+      if (message.variant !== "max") return
+      if (!isClaudeProvider(model.providerID, model.modelID)) return
+      if (!isOpus46(model.modelID)) return
+      if (output.options.effort !== undefined) return
+
+      output.options.effort = "max"
+      log("anthropic-effort: injected effort=max", {
+        sessionID: input.sessionID,
+        provider: model.providerID,
+        model: model.modelID,
+      })
+    },
+  }
+}
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -34,6 +34,7 @@ describe("atlas hook", () => {
      client: {
        session: {
          prompt: promptMock,
+          promptAsync: promptMock,
        },
      },
      _promptMock: promptMock,
@@ -86,7 +87,7 @@ describe("atlas hook", () => {

      // when - calling with undefined output
      const result = await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID: "session-123" },
+        { tool: "task", sessionID: "session-123" },
        undefined as unknown as { title: string; output: string; metadata: Record<string, unknown> }
      )

@@ -94,8 +95,8 @@ describe("atlas hook", () => {
      expect(result).toBeUndefined()
    })

-    test("should ignore non-delegate_task tools", async () => {
-      // given - hook and non-delegate_task tool
+    test("should ignore non-task tools", async () => {
+      // given - hook and non-task tool
      const hook = createAtlasHook(createMockPluginInput())
      const output = {
        title: "Test Tool",
@@ -138,7 +139,7 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

@@ -162,14 +163,14 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

      // then - standalone verification reminder appended
      expect(output.output).toContain("Task completed successfully")
      expect(output.output).toContain("MANDATORY:")
-      expect(output.output).toContain("delegate_task(session_id=")
+      expect(output.output).toContain("task(session_id=")
      
      cleanupMessageStorage(sessionID)
    })
@@ -199,7 +200,7 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

@@ -208,7 +209,7 @@ describe("atlas hook", () => {
      expect(output.output).toContain("SUBAGENT WORK COMPLETED")
      expect(output.output).toContain("test-plan")
      expect(output.output).toContain("LIE")
-      expect(output.output).toContain("delegate_task(session_id=")
+      expect(output.output).toContain("task(session_id=")
      
      cleanupMessageStorage(sessionID)
    })
@@ -238,7 +239,7 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

@@ -275,7 +276,7 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

@@ -311,7 +312,7 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

@@ -348,7 +349,7 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

@@ -385,12 +386,12 @@ describe("atlas hook", () => {

      // when
      await hook["tool.execute.after"](
-        { tool: "delegate_task", sessionID },
+        { tool: "task", sessionID },
        output
      )

      // then - should include session_id instructions and verification
-      expect(output.output).toContain("delegate_task(session_id=")
+      expect(output.output).toContain("task(session_id=")
      expect(output.output).toContain("[x]")
      expect(output.output).toContain("MANDATORY:")
      
@@ -425,8 +426,8 @@ describe("atlas hook", () => {

        // then
        expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
-        expect(output.output).toContain("delegate_task")
-        expect(output.output).toContain("delegate_task")
+        expect(output.output).toContain("task")
+        expect(output.output).toContain("task")
      })

      test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {
@@ -755,40 +756,71 @@ describe("atlas hook", () => {
      expect(mockInput._promptMock).not.toHaveBeenCalled()
    })

-    test("should skip when background tasks are running", async () => {
-      // given - boulder state with incomplete plan
-      const planPath = join(TEST_DIR, "test-plan.md")
-      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
+     test("should skip when background tasks are running", async () => {
+       // given - boulder state with incomplete plan
+       const planPath = join(TEST_DIR, "test-plan.md")
+       writeFileSync(planPath, "# Plan\n- [ ] Task 1")

-      const state: BoulderState = {
-        active_plan: planPath,
-        started_at: "2026-01-02T10:00:00Z",
-        session_ids: [MAIN_SESSION_ID],
-        plan_name: "test-plan",
-      }
-      writeBoulderState(TEST_DIR, state)
+       const state: BoulderState = {
+         active_plan: planPath,
+         started_at: "2026-01-02T10:00:00Z",
+         session_ids: [MAIN_SESSION_ID],
+         plan_name: "test-plan",
+       }
+       writeBoulderState(TEST_DIR, state)

-      const mockBackgroundManager = {
-        getTasksByParentSession: () => [{ status: "running" }],
-      }
+       const mockBackgroundManager = {
+         getTasksByParentSession: () => [{ status: "running" }],
+       }

-      const mockInput = createMockPluginInput()
-      const hook = createAtlasHook(mockInput, {
-        directory: TEST_DIR,
-        backgroundManager: mockBackgroundManager as any,
-      })
+       const mockInput = createMockPluginInput()
+       const hook = createAtlasHook(mockInput, {
+         directory: TEST_DIR,
+         backgroundManager: mockBackgroundManager as any,
+       })

-      // when
-      await hook.handler({
-        event: {
-          type: "session.idle",
-          properties: { sessionID: MAIN_SESSION_ID },
-        },
-      })
+       // when
+       await hook.handler({
+         event: {
+           type: "session.idle",
+           properties: { sessionID: MAIN_SESSION_ID },
+         },
+       })

-      // then - should not call prompt
-      expect(mockInput._promptMock).not.toHaveBeenCalled()
-    })
+       // then - should not call prompt
+       expect(mockInput._promptMock).not.toHaveBeenCalled()
+     })
+
+     test("should skip when continuation is stopped via isContinuationStopped", async () => {
+       // given - boulder state with incomplete plan
+       const planPath = join(TEST_DIR, "test-plan.md")
+       writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+       const state: BoulderState = {
+         active_plan: planPath,
+         started_at: "2026-01-02T10:00:00Z",
+         session_ids: [MAIN_SESSION_ID],
+         plan_name: "test-plan",
+       }
+       writeBoulderState(TEST_DIR, state)
+
+       const mockInput = createMockPluginInput()
+       const hook = createAtlasHook(mockInput, {
+         directory: TEST_DIR,
+         isContinuationStopped: (sessionID: string) => sessionID === MAIN_SESSION_ID,
+       })
+
+       // when
+       await hook.handler({
+         event: {
+           type: "session.idle",
+           properties: { sessionID: MAIN_SESSION_ID },
+         },
+       })
+
+       // then - should not call prompt because continuation is stopped
+       expect(mockInput._promptMock).not.toHaveBeenCalled()
+     })

    test("should clear abort state on message.updated", async () => {
      // given - boulder with incomplete plan
--- a/src/hooks/atlas/index.ts
+++ b/src/hooks/atlas/index.ts
@@ -44,7 +44,7 @@ You just performed direct file modifications outside \`.sisyphus/\`.
 **You are an ORCHESTRATOR, not an IMPLEMENTER.**

 As an orchestrator, you should:
- **DELEGATE** implementation work to subagents via \`delegate_task\`
+- **DELEGATE** implementation work to subagents via \`task\`
 - **VERIFY** the work done by subagents
 - **COORDINATE** multiple tasks and ensure completion

@@ -54,7 +54,7 @@ You should NOT:
 - Implement features yourself

 **If you need to make changes:**
-1. Use \`delegate_task\` to delegate to an appropriate subagent
+1. Use \`task\` to delegate to an appropriate subagent
 2. Provide clear instructions in the prompt
 3. Verify the subagent's work after completion

@@ -128,7 +128,7 @@ You (Atlas) are attempting to directly modify a file outside \`.sisyphus/\`.
 **THIS IS FORBIDDEN** (except for VERIFICATION purposes)

 As an ORCHESTRATOR, you MUST:
-1. **DELEGATE** all implementation work via \`delegate_task\`
+1. **DELEGATE** all implementation work via \`task\`
 2. **VERIFY** the work done by subagents (reading files is OK)
 3. **COORDINATE** - you orchestrate, you don't implement

@@ -146,11 +146,11 @@ As an ORCHESTRATOR, you MUST:

 **IF THIS IS FOR VERIFICATION:**
 Proceed if you are verifying subagent work by making a small fix.
-But for any substantial changes, USE \`delegate_task\`.
+But for any substantial changes, USE \`task\`.

 **CORRECT APPROACH:**
 \`\`\`
-delegate_task(
+task(
  category="...",
  prompt="[specific single task with clear acceptance criteria]"
 )
@@ -193,7 +193,7 @@ function buildVerificationReminder(sessionId: string): string {

 **If ANY verification fails, use this immediately:**
 \`\`\`
-delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
+task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
 \`\`\``
 }

@@ -399,6 +399,7 @@ const CONTINUATION_COOLDOWN_MS = 5000
 export interface AtlasHookOptions {
  directory: string
  backgroundManager?: BackgroundManager
+  isContinuationStopped?: (sessionID: string) => boolean
 }

 function isAbortError(error: unknown): boolean {
@@ -483,7 +484,7 @@ export function createAtlasHook(
          : undefined
      }

-        await ctx.client.session.prompt({
+        await ctx.client.session.promptAsync({
          path: { id: sessionID },
          body: {
             agent: agent ?? "atlas",
@@ -573,6 +574,11 @@ export function createAtlasHook(
          return
        }

+        if (options?.isContinuationStopped?.(sessionID)) {
+          log(`[${HOOK_NAME}] Skipped: continuation stopped for session`, { sessionID })
+          return
+        }
+
        const requiredAgent = (boulderState.agent ?? "atlas").toLowerCase()
        const lastAgent = getLastAgentFromSession(sessionID)
        if (!lastAgent || lastAgent !== requiredAgent) {
@@ -688,12 +694,12 @@ export function createAtlasHook(
        return
      }

-      // Check delegate_task - inject single-task directive
-      if (input.tool === "delegate_task") {
+      // Check task - inject single-task directive
+      if (input.tool === "task") {
        const prompt = output.args.prompt as string | undefined
        if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
          output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
-          log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, {
+          log(`[${HOOK_NAME}] Injected single-task directive to task`, {
            sessionID: input.sessionID,
          })
        }
@@ -732,7 +738,7 @@ export function createAtlasHook(
        return
      }

-      if (input.tool !== "delegate_task") {
+      if (input.tool !== "task") {
        return
      }

--- a/src/hooks/category-skill-reminder/index.test.ts
+++ b/src/hooks/category-skill-reminder/index.test.ts
@@ -50,7 +50,7 @@ describe("category-skill-reminder hook", () => {

      // then - reminder should be injected
      expect(output.output).toContain("[Category+Skill Reminder]")
-      expect(output.output).toContain("delegate_task")
+      expect(output.output).toContain("task")

      clearSessionAgent(sessionID)
    })
@@ -130,16 +130,16 @@ describe("category-skill-reminder hook", () => {
  })

  describe("delegation tool tracking", () => {
-    test("should NOT inject reminder if delegate_task is used", async () => {
-      // given - sisyphus agent that uses delegate_task
+    test("should NOT inject reminder if task is used", async () => {
+      // given - sisyphus agent that uses task
      const hook = createHook()
      const sessionID = "delegation-session"
      updateSessionAgent(sessionID, "Sisyphus")

      const output = { title: "", output: "result", metadata: {} }

-      // when - delegate_task is used, then more tool calls
-      await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output)
+      // when - task is used, then more tool calls
+      await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
@@ -329,15 +329,15 @@ describe("category-skill-reminder hook", () => {
    })

    test("should handle delegation tool names case-insensitively", async () => {
-      // given - sisyphus agent using DELEGATE_TASK in uppercase
+      // given - sisyphus agent using TASK in uppercase
      const hook = createHook()
      const sessionID = "case-delegate-session"
      updateSessionAgent(sessionID, "Sisyphus")

      const output = { title: "", output: "result", metadata: {} }

-      // when - DELEGATE_TASK in uppercase is used
-      await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output)
+      // when - TASK in uppercase is used
+      await hook["tool.execute.after"]({ tool: "TASK", sessionID, callID: "1" }, output)
      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
--- a/src/hooks/category-skill-reminder/index.ts
+++ b/src/hooks/category-skill-reminder/index.ts
@@ -30,9 +30,8 @@ const DELEGATABLE_WORK_TOOLS = new Set([
 * Tools that indicate the agent is already using delegation properly.
 */
 const DELEGATION_TOOLS = new Set([
-  "delegate_task",
-  "call_omo_agent",
-  "task",
+   "task",
+   "call_omo_agent",
 ])

 function formatSkillNames(skills: AvailableSkill[], limit: number): string {
@@ -63,7 +62,7 @@ function buildReminderMessage(availableSkills: AvailableSkill[]): string {
    "> User-installed skills OVERRIDE built-in defaults. ALWAYS prefer YOUR SKILLS when domain matches.",
    "",
    "```typescript",
-    `delegate_task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
+    `task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
    "```",
    "",
  ]
--- a/src/hooks/claude-code-hooks/config.ts
+++ b/src/hooks/claude-code-hooks/config.ts
@@ -20,7 +20,7 @@ interface RawClaudeHooksConfig {
 function normalizeHookMatcher(raw: RawHookMatcher): HookMatcher {
  return {
    matcher: raw.matcher ?? raw.pattern ?? "*",
-    hooks: raw.hooks,
+    hooks: Array.isArray(raw.hooks) ? raw.hooks : [],
  }
 }

--- a/src/hooks/claude-code-hooks/index.ts
+++ b/src/hooks/claude-code-hooks/index.ts
@@ -175,7 +175,7 @@ export function createClaudeCodeHooksHook(
      input: { tool: string; sessionID: string; callID: string },
      output: { args: Record<string, unknown> }
    ): Promise<void> => {
-      if (input.tool === "todowrite" && typeof output.args.todos === "string") {
+      if (input.tool.trim() === "todowrite" && typeof output.args.todos === "string") {
        let parsed: unknown
        try {
          parsed = JSON.parse(output.args.todos)
@@ -257,7 +257,7 @@ export function createClaudeCodeHooksHook(
      const cachedInput = getToolInput(input.sessionID, input.tool, input.callID) || {}

      // Use metadata if available and non-empty, otherwise wrap output.output in a structured object
-      // This ensures plugin tools (call_omo_agent, delegate_task, task) that return strings
+      // This ensures plugin tools (call_omo_agent, task) that return strings
      // get their results properly recorded in transcripts instead of empty {}
      const metadata = output.metadata as Record<string, unknown> | undefined
      const hasMetadata = metadata && typeof metadata === "object" && Object.keys(metadata).length > 0
--- a/src/hooks/claude-code-hooks/post-tool-use.ts
+++ b/src/hooks/claude-code-hooks/post-tool-use.ts
@@ -91,11 +91,12 @@ export async function executePostToolUseHooks(

    const startTime = Date.now()

-    for (const matcher of matchers) {
-      for (const hook of matcher.hooks) {
-        if (hook.type !== "command") continue
+     for (const matcher of matchers) {
+       if (!matcher.hooks || matcher.hooks.length === 0) continue
+       for (const hook of matcher.hooks) {
+         if (hook.type !== "command") continue

-        if (isHookCommandDisabled("PostToolUse", hook.command, extendedConfig ?? null)) {
+         if (isHookCommandDisabled("PostToolUse", hook.command, extendedConfig ?? null)) {
          log("PostToolUse hook command skipped (disabled by config)", { command: hook.command, toolName: ctx.toolName })
          continue
        }
--- a/src/hooks/claude-code-hooks/pre-compact.ts
+++ b/src/hooks/claude-code-hooks/pre-compact.ts
@@ -47,11 +47,12 @@ export async function executePreCompactHooks(
  let firstHookName: string | undefined
  const collectedContext: string[] = []

-  for (const matcher of matchers) {
-    for (const hook of matcher.hooks) {
-      if (hook.type !== "command") continue
+   for (const matcher of matchers) {
+     if (!matcher.hooks || matcher.hooks.length === 0) continue
+     for (const hook of matcher.hooks) {
+       if (hook.type !== "command") continue

-      if (isHookCommandDisabled("PreCompact", hook.command, extendedConfig ?? null)) {
+       if (isHookCommandDisabled("PreCompact", hook.command, extendedConfig ?? null)) {
        log("PreCompact hook command skipped (disabled by config)", { command: hook.command })
        continue
      }
--- a/src/hooks/claude-code-hooks/pre-tool-use.ts
+++ b/src/hooks/claude-code-hooks/pre-tool-use.ts
@@ -74,11 +74,12 @@ export async function executePreToolUseHooks(
  let firstHookName: string | undefined
  const inputLines = buildInputLines(ctx.toolInput)

-  for (const matcher of matchers) {
-    for (const hook of matcher.hooks) {
-      if (hook.type !== "command") continue
+   for (const matcher of matchers) {
+     if (!matcher.hooks || matcher.hooks.length === 0) continue
+     for (const hook of matcher.hooks) {
+       if (hook.type !== "command") continue

-      if (isHookCommandDisabled("PreToolUse", hook.command, extendedConfig ?? null)) {
+       if (isHookCommandDisabled("PreToolUse", hook.command, extendedConfig ?? null)) {
        log("PreToolUse hook command skipped (disabled by config)", { command: hook.command, toolName: ctx.toolName })
        continue
      }
--- a/src/hooks/claude-code-hooks/stop.ts
+++ b/src/hooks/claude-code-hooks/stop.ts
@@ -65,11 +65,12 @@ export async function executeStopHooks(
    hook_source: "opencode-plugin",
  }

-  for (const matcher of matchers) {
-    for (const hook of matcher.hooks) {
-      if (hook.type !== "command") continue
+   for (const matcher of matchers) {
+     if (!matcher.hooks || matcher.hooks.length === 0) continue
+     for (const hook of matcher.hooks) {
+       if (hook.type !== "command") continue

-      if (isHookCommandDisabled("Stop", hook.command, extendedConfig ?? null)) {
+       if (isHookCommandDisabled("Stop", hook.command, extendedConfig ?? null)) {
        log("Stop hook command skipped (disabled by config)", { command: hook.command })
        continue
      }
--- a/src/hooks/claude-code-hooks/user-prompt-submit.test.ts
+++ b/src/hooks/claude-code-hooks/user-prompt-submit.test.ts
@@ -0,0 +1,107 @@
+import { describe, it, expect } from "bun:test"
+import {
+  executeUserPromptSubmitHooks,
+  type UserPromptSubmitContext,
+} from "./user-prompt-submit"
+
+describe("executeUserPromptSubmitHooks", () => {
+  it("returns early when no config provided", async () => {
+    // given
+    const ctx: UserPromptSubmitContext = {
+      sessionId: "test-session",
+      prompt: "test prompt",
+      parts: [{ type: "text", text: "test prompt" }],
+      cwd: "/tmp",
+    }
+
+    // when
+    const result = await executeUserPromptSubmitHooks(ctx, null)
+
+    // then
+    expect(result.block).toBe(false)
+    expect(result.messages).toEqual([])
+  })
+
+  it("returns early when hook tags present in user input", async () => {
+    // given
+    const ctx: UserPromptSubmitContext = {
+      sessionId: "test-session",
+      prompt: "<user-prompt-submit-hook>previous output</user-prompt-submit-hook>",
+      parts: [
+        {
+          type: "text",
+          text: "<user-prompt-submit-hook>previous output</user-prompt-submit-hook>",
+        },
+      ],
+      cwd: "/tmp",
+    }
+
+    // when
+    const result = await executeUserPromptSubmitHooks(ctx, null)
+
+    // then
+    expect(result.block).toBe(false)
+    expect(result.messages).toEqual([])
+  })
+
+  it("does not return early when hook tags in prompt but not in user input", async () => {
+    // given - simulates case where hook output was injected into session context
+    // but current user input does not contain tags
+    const ctx: UserPromptSubmitContext = {
+      sessionId: "test-session",
+      prompt:
+        "<user-prompt-submit-hook>previous output</user-prompt-submit-hook>\n\nuser message",
+      parts: [{ type: "text", text: "user message" }],
+      cwd: "/tmp",
+    }
+
+    // when
+    const result = await executeUserPromptSubmitHooks(ctx, null)
+
+    // then - should not return early, should continue to config check
+    expect(result.block).toBe(false)
+    expect(result.messages).toEqual([])
+  })
+
+  it("should fire on first prompt", async () => {
+    // given
+    const ctx: UserPromptSubmitContext = {
+      sessionId: "test-session-1",
+      prompt: "first prompt",
+      parts: [{ type: "text", text: "first prompt" }],
+      cwd: "/tmp",
+    }
+
+    // when
+    const result = await executeUserPromptSubmitHooks(ctx, null)
+
+    // then
+    expect(result.block).toBe(false)
+    expect(result.messages).toEqual([])
+  })
+
+  it("should fire on second prompt in same session", async () => {
+    // given
+    const ctx1: UserPromptSubmitContext = {
+      sessionId: "test-session-2",
+      prompt: "first prompt",
+      parts: [{ type: "text", text: "first prompt" }],
+      cwd: "/tmp",
+    }
+
+    const ctx2: UserPromptSubmitContext = {
+      sessionId: "test-session-2",
+      prompt: "second prompt",
+      parts: [{ type: "text", text: "second prompt" }],
+      cwd: "/tmp",
+    }
+
+    // when
+    const result1 = await executeUserPromptSubmitHooks(ctx1, null)
+    const result2 = await executeUserPromptSubmitHooks(ctx2, null)
+
+    // then
+    expect(result1.block).toBe(false)
+    expect(result2.block).toBe(false)
+  })
+})
--- a/src/hooks/claude-code-hooks/user-prompt-submit.ts
+++ b/src/hooks/claude-code-hooks/user-prompt-submit.ts
@@ -44,9 +44,16 @@ export async function executeUserPromptSubmitHooks(
    return { block: false, modifiedParts, messages }
  }

+  // Check if hook tags are in the current user input only (not in injected context)
+  // by checking only the text parts that were provided in this message
+  const userInputText = ctx.parts
+    .filter((p) => p.type === "text" && p.text)
+    .map((p) => p.text ?? "")
+    .join("\n")
+
  if (
-    ctx.prompt.includes(USER_PROMPT_SUBMIT_TAG_OPEN) &&
-    ctx.prompt.includes(USER_PROMPT_SUBMIT_TAG_CLOSE)
+    userInputText.includes(USER_PROMPT_SUBMIT_TAG_OPEN) &&
+    userInputText.includes(USER_PROMPT_SUBMIT_TAG_CLOSE)
  ) {
    return { block: false, modifiedParts, messages }
  }
@@ -70,9 +77,10 @@ export async function executeUserPromptSubmitHooks(
    hook_source: "opencode-plugin",
  }

-  for (const matcher of matchers) {
-    for (const hook of matcher.hooks) {
-      if (hook.type !== "command") continue
+   for (const matcher of matchers) {
+     if (!matcher.hooks || matcher.hooks.length === 0) continue
+     for (const hook of matcher.hooks) {
+       if (hook.type !== "command") continue

      if (isHookCommandDisabled("UserPromptSubmit", hook.command, extendedConfig ?? null)) {
        log("UserPromptSubmit hook command skipped (disabled by config)", { command: hook.command })
--- a/src/hooks/compaction-context-injector/index.test.ts
+++ b/src/hooks/compaction-context-injector/index.test.ts
@@ -56,4 +56,17 @@ describe("createCompactionContextInjector", () => {
      expect(prompt).toContain("Files already verified")
    })
  })
+
+  it("restricts constraints to explicit verbatim statements", async () => {
+    //#given
+    const injector = createCompactionContextInjector()
+
+    //#when
+    const prompt = injector()
+
+    //#then
+    expect(prompt).toContain("Explicit Constraints (Verbatim Only)")
+    expect(prompt).toContain("Do NOT invent")
+    expect(prompt).toContain("Quote constraints verbatim")
+  })
 })
--- a/Show More
+++ b/Show More