release: v3.0.0-beta.3

release: v2.14.1
fix(test): extend timeout for resume sync test
2026-01-11 02:40:31 +00:00 · 2026-01-11 02:23:00 +00:00 · 2026-01-11 11:20:00 +09:00 · 2026-01-11 11:14:15 +09:00 · 2026-01-11 11:11:34 +09:00 · 2026-01-11 11:07:46 +09:00
40 changed files with 1512 additions and 249 deletions
--- a/.github/assets/orchestrator-sisyphus.png
+++ b/.github/assets/orchestrator-sisyphus.png
--- a/.github/assets/sisyphuslabs.png
+++ b/.github/assets/sisyphuslabs.png
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -1,5 +1,5 @@
 name: publish
-run-name: "${{ format('release {0} ({1})', inputs.bump, inputs.tag || 'latest') }}"
+run-name: "${{ format('release {0}', inputs.bump) }}"

 on:
  workflow_dispatch:
@@ -16,15 +16,6 @@ on:
        description: "Override version (optional)"
        required: false
        type: string
-      tag:
-        description: "npm dist-tag (latest, beta, next)"
-        required: false
-        type: choice
-        default: "latest"
-        options:
-          - latest
-          - beta
-          - next

 concurrency: ${{ github.workflow }}-${{ github.ref }}

@@ -135,7 +126,6 @@ jobs:
        env:
          BUMP: ${{ inputs.bump }}
          VERSION: ${{ inputs.version }}
-          NPM_TAG: ${{ inputs.tag || 'latest' }}
          CI: true
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NPM_CONFIG_PROVENANCE: true
@@ -146,7 +136,6 @@ jobs:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Merge to master
-        if: inputs.tag == 'latest' || inputs.tag == ''
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,7 +1,7 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-01-02T22:41:22+09:00
-**Commit:** d0694e5
+**Generated:** 2026-01-09T15:38:00+09:00
+**Commit:** 0581793
 **Branch:** dev

 ## OVERVIEW
@@ -22,7 +22,7 @@ oh-my-opencode/
 │   ├── cli/           # CLI installer, doctor - see src/cli/AGENTS.md
 │   ├── mcp/           # MCP configs: context7, grep_app
 │   ├── config/        # Zod schema, TypeScript types
-│   └── index.ts       # Main plugin entry (464 lines)
+│   └── index.ts       # Main plugin entry (548 lines)
 ├── script/            # build-schema.ts, publish.ts, generate-changelog.ts
 ├── assets/            # JSON schema
 └── dist/              # Build output (ESM + .d.ts)
@@ -50,6 +50,7 @@ oh-my-opencode/
 | Shared utilities | `src/shared/` | Cross-cutting utilities |
 | Slash commands | `src/hooks/auto-slash-command/` | Auto-detect and execute `/command` patterns |
 | Ralph Loop | `src/hooks/ralph-loop/` | Self-referential dev loop until completion |
+| Orchestrator | `src/hooks/sisyphus-orchestrator/` | Main orchestration hook (660 lines) |

 ## TDD (Test-Driven Development)

@@ -64,15 +65,16 @@ oh-my-opencode/

 | Phase | Action | Verification |
 |-------|--------|--------------|
-| **RED** | Write test describing expected behavior | `bun test` → FAIL (expected) |
-| **GREEN** | Implement minimum code to pass | `bun test` → PASS |
-| **REFACTOR** | Improve code quality, remove duplication | `bun test` → PASS (must stay green) |
+| **RED** | Write test describing expected behavior | `bun test` -> FAIL (expected) |
+| **GREEN** | Implement minimum code to pass | `bun test` -> PASS |
+| **REFACTOR** | Improve code quality, remove duplication | `bun test` -> PASS (must stay green) |

 **Rules:**
 - NEVER write implementation before test
 - NEVER delete failing tests to "pass" - fix the code
 - One test at a time - don't batch
 - Test file naming: `*.test.ts` alongside source
+- BDD comments: `#given`, `#when`, `#then` (same as AAA)

 ## CONVENTIONS

@@ -81,7 +83,7 @@ oh-my-opencode/
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern in index.ts; explicit named exports for tools/hooks
 - **Naming**: kebab-case directories, createXXXHook/createXXXTool factories
- **Testing**: BDD comments `#given`, `#when`, `#then` (same as AAA); TDD workflow (RED-GREEN-REFACTOR)
+- **Testing**: BDD comments `#given/#when/#then`, TDD workflow (RED-GREEN-REFACTOR)
 - **Temperature**: 0.1 for code agents, max 0.3

 ## ANTI-PATTERNS (THIS PROJECT)
@@ -99,6 +101,11 @@ oh-my-opencode/
 - **Sequential agent calls**: Use `sisyphus_task` for parallel execution
 - **Heavy PreToolUse logic**: Slows every tool call
 - **Self-planning for complex tasks**: Spawn planning agent (Prometheus) instead
+- **Trust agent self-reports**: ALWAYS verify results independently
+- **Skip TODO creation**: Multi-step tasks MUST have todos first
+- **Batch completions**: Mark TODOs complete immediately, don't group
+- **Giant commits**: 3+ files = 2+ commits minimum
+- **Separate test from impl**: Same commit always

 ## UNIQUE STYLES

@@ -114,10 +121,10 @@ oh-my-opencode/
 ## AGENT MODELS

 | Agent | Default Model | Purpose |
-|-------|-------|---------|
+|-------|---------------|---------|
 | Sisyphus | anthropic/claude-opus-4-5 | Primary orchestrator |
 | oracle | openai/gpt-5.2 | Read-only consultation. High-IQ debugging, architecture |
-| librarian | anthropic/claude-sonnet-4-5 | Multi-repo analysis, docs |
+| librarian | opencode/glm-4.7-free | Multi-repo analysis, docs |
 | explore | opencode/grok-code | Fast codebase exploration |
 | frontend-ui-ux-engineer | google/gemini-3-pro-preview | UI generation |
 | document-writer | google/gemini-3-pro-preview | Technical docs |
@@ -130,7 +137,7 @@ bun run typecheck      # Type check
 bun run build          # ESM + declarations + schema
 bun run rebuild        # Clean + Build
 bun run build:schema   # Schema only
-bun test               # Run tests
+bun test               # Run tests (76 test files, 2559+ BDD assertions)
 ```

 ## DEPLOYMENT
@@ -153,18 +160,23 @@ bun test               # Run tests

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/index.ts` | 464 | Main plugin, all hook/tool init |
-| `src/cli/config-manager.ts` | 669 | JSONC parsing, env detection |
-| `src/auth/antigravity/fetch.ts` | 621 | Token refresh, URL rewriting |
-| `src/tools/lsp/client.ts` | 611 | LSP protocol, JSON-RPC |
-| `src/auth/antigravity/response.ts` | 598 | Response transformation, streaming |
-| `src/auth/antigravity/thinking.ts` | 571 | Thinking block extraction/transformation |
-| `src/hooks/anthropic-context-window-limit-recovery/executor.ts` | 564 | Multi-stage recovery |
-| `src/agents/sisyphus.ts` | 504 | Orchestrator prompt |
+| `src/agents/orchestrator-sisyphus.ts` | 1484 | Orchestrator agent, complex delegation |
+| `src/features/builtin-skills/skills.ts` | 1230 | Skill definitions (frontend-ui-ux, playwright) |
+| `src/agents/prometheus-prompt.ts` | 982 | Planning agent system prompt |
+| `src/auth/antigravity/fetch.ts` | 798 | Token refresh, URL rewriting |
+| `src/auth/antigravity/thinking.ts` | 755 | Thinking block extraction |
+| `src/cli/config-manager.ts` | 725 | JSONC parsing, env detection |
+| `src/hooks/sisyphus-orchestrator/index.ts` | 660 | Orchestrator hook impl |
+| `src/agents/sisyphus.ts` | 641 | Main Sisyphus prompt |
+| `src/tools/lsp/client.ts` | 612 | LSP protocol, JSON-RPC |
+| `src/features/background-agent/manager.ts` | 608 | Task lifecycle |
+| `src/auth/antigravity/response.ts` | 599 | Response transformation, streaming |
+| `src/hooks/anthropic-context-window-limit-recovery/executor.ts` | 556 | Multi-stage recovery |
+| `src/index.ts` | 548 | Main plugin, all hook/tool init |

 ## NOTES

- **Testing**: Bun native test (`bun test`), BDD-style `#given/#when/#then`, 360+ tests
+- **Testing**: Bun native test (`bun test`), BDD-style `#given/#when/#then`, 76 test files
 - **OpenCode**: Requires >= 1.0.150
 - **Multi-lang docs**: README.md (EN), README.ko.md (KO), README.ja.md (JA), README.zh-cn.md (ZH-CN)
 - **Config**: `~/.config/opencode/oh-my-opencode.json` (user) or `.opencode/oh-my-opencode.json` (project)
--- a/README.ja.md
+++ b/README.ja.md
@@ -1,9 +1,12 @@
 > [!NOTE]
 >
-> *「私はエージェントが生成したコードと人間が書いたコードを区別できない、しかしはるかに多くのことを達成できる世界を作り、ソフトウェア革命を起こすことを目指しています。私はこの旅に個人的な時間、情熱、そして資金を注ぎ込んできましたし、これからもそうし続けます。」*
+> [![Sisyphus Labs — Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
+> > **Sisyphusの完全製品化バージョンを構築中です。フロンティアエージェントの未来を定義します。<br />[こちら](https://sisyphuslabs.ai)からウェイトリストに参加してください。**
+
+> [!TIP]
 >
-> [![The Orchestrator is coming](./.github/assets/orchestrator-sisyphus.png)](https://x.com/justsisyphus/status/2006250634354548963)
-> > **オーケストレーターが来ます。今週中に。[Xで通知を受け取る](https://x.com/justsisyphus/status/2006250634354548963)**
+> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-sisyphus.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.1)
+> > **オーケストレーターがベータ版で利用可能になりました。`oh-my-opencode@3.0.0-beta.1`を使用してインストールしてください。**
 >
 > 一緒に歩みましょう！
 >
@@ -319,9 +322,9 @@ opencode auth login
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -458,7 +461,7 @@ oh-my-opencode を削除するには：

 - **Sisyphus** (`anthropic/claude-opus-4-5`): **デフォルトエージェントです。** OpenCode のための強力な AI オーケストレーターです。専門のサブエージェントを活用して、複雑なタスクを計画、委任、実行します。バックグラウンドタスクへの委任と Todo ベースのワークフローを重視します。最大の推論能力を発揮するため、Claude Opus 4.5 と拡張思考 (32k token budget) を使用します。
 - **oracle** (`openai/gpt-5.2`): アーキテクチャ、コードレビュー、戦略立案のための専門アドバイザー。GPT-5.2 の卓越した論理的推論と深い分析能力を活用します。AmpCode からインスピレーションを得ました。
- **librarian** (`anthropic/claude-sonnet-4-5` または `google/gemini-3-flash`): マルチリポジトリ分析、ドキュメント検索、実装例の調査を担当。Antigravity 認証が設定されている場合は Gemini 3 Flash を使用し、それ以外は Claude Sonnet 4.5 を使用して、深いコードベース理解と GitHub リサーチ、根拠に基づいた回答を提供します。AmpCode からインスピレーションを得ました。
+- **librarian** (`opencode/glm-4.7-free`): マルチリポジトリ分析、ドキュメント検索、実装例の調査を担当。GLM-4.7 Free を使用して、深いコードベース理解と GitHub リサーチ、根拠に基づいた回答を提供します。AmpCode からインスピレーションを得ました。
 - **explore** (`opencode/grok-code`、`google/gemini-3-flash`、または `anthropic/claude-haiku-4-5`): 高速なコードベース探索、ファイルパターンマッチング。Antigravity 認証が設定されている場合は Gemini 3 Flash を使用し、Claude max20 が利用可能な場合は Haiku を使用し、それ以外は Grok を使います。Claude Code からインスピレーションを得ました。
 - **frontend-ui-ux-engineer** (`google/gemini-3-pro-preview`): 開発者に転身したデザイナーという設定です。素晴らしい UI を作ります。美しく独創的な UI コードを生成することに長けた Gemini を使用します。
 - **document-writer** (`google/gemini-3-pro-preview`): テクニカルライティングの専門家という設定です。Gemini は文筆家であり、流れるような文章を書きます。
@@ -771,9 +774,9 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
--- a/README.md
+++ b/README.md
@@ -1,9 +1,12 @@
 > [!NOTE]
 >
-> *"I aim to spark a software revolution by creating a world where agent-generated code is indistinguishable from human code, yet capable of achieving vastly more. I have poured my personal time, passion, and funds into this journey, and I will continue to do so."*
+> [![Sisyphus Labs — Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
+> > **We're building a fully productized version of Sisyphus to define the future of frontier agents. <br />Join the waitlist [here](https://sisyphuslabs.ai).**
+
+> [!TIP]
 >
-> [![The Orchestrator is coming](./.github/assets/orchestrator-sisyphus.png)](https://x.com/justsisyphus/status/2006250634354548963)
-> > **The Orchestrator is coming. This Week. [Get notified on X](https://x.com/justsisyphus/status/2006250634354548963)**
+> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-sisyphus.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.1)
+> > **The Orchestrator is now available in beta. Use `oh-my-opencode@3.0.0-beta.1` to install it.**
 >
 > Be with us!
 >
@@ -358,9 +361,9 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -496,9 +499,9 @@ To remove oh-my-opencode:

 - **Sisyphus** (`anthropic/claude-opus-4-5`): **The default agent.** A powerful AI orchestrator for OpenCode. Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Emphasizes background task delegation and todo-driven workflow. Uses Claude Opus 4.5 with extended thinking (32k budget) for maximum reasoning capability.
 - **oracle** (`openai/gpt-5.2`): Architecture, code review, strategy. Uses GPT-5.2 for its stellar logical reasoning and deep analysis. Inspired by AmpCode.
- **librarian** (`anthropic/claude-sonnet-4-5` or `google/gemini-3-flash`): Multi-repo analysis, doc lookup, implementation examples. Uses Gemini 3 Flash when Antigravity auth is configured, otherwise Claude Sonnet 4.5 for deep codebase understanding and GitHub research with evidence-based answers. Inspired by AmpCode.
+- **librarian** (`opencode/glm-4.7-free`): Multi-repo analysis, doc lookup, implementation examples. Uses GLM-4.7 Free for deep codebase understanding and GitHub research with evidence-based answers. Inspired by AmpCode.
 - **explore** (`opencode/grok-code`, `google/gemini-3-flash`, or `anthropic/claude-haiku-4-5`): Fast codebase exploration and pattern matching. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code.
- **frontend-ui-ux-engineer** (`google/gemini-3-pro-high`): A designer turned developer. Builds gorgeous UIs. Gemini excels at creative, beautiful UI code.
+- **frontend-ui-ux-engineer** (`google/gemini-3-pro-preview`): A designer turned developer. Builds gorgeous UIs. Gemini excels at creative, beautiful UI code.
 - **document-writer** (`google/gemini-3-flash`): Technical writing expert. Gemini is a wordsmith—writes prose that flows.
 - **multimodal-looker** (`google/gemini-3-flash`): Visual content specialist. Analyzes PDFs, images, diagrams to extract information.

@@ -827,9 +830,9 @@ When using `opencode-antigravity-auth`, disable the built-in auth and override a
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -1089,7 +1092,7 @@ Add custom categories in `oh-my-opencode.json`:
      "prompt_append": "Focus on data analysis, ML pipelines, and statistical methods."
    },
    "visual": {
-      "model": "google/gemini-3-pro-high",
+      "model": "google/gemini-3-pro-preview",
      "prompt_append": "Use shadcn/ui components and Tailwind CSS."
    }
  }
--- a/README.zh-cn.md
+++ b/README.zh-cn.md
@@ -1,9 +1,12 @@
 > [!NOTE]
 >
-> *"我致力于引发一场软件革命，创造一个AI生成的代码与人类代码无法区分、却能实现更多的世界。我已经在这段旅程中投入了个人时间、热情和资金，并将继续这样做。"*
+> [![Sisyphus Labs — Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
+> > **我们正在构建Sisyphus的完全产品化版本，定义前沿代理的未来。<br />[点击此处](https://sisyphuslabs.ai)加入候补名单。**
+
+> [!TIP]
 >
-> [![The Orchestrator is coming](./.github/assets/orchestrator-sisyphus.png)](https://x.com/justsisyphus/status/2006250634354548963)
-> > **编排器即将到来。就在本周。[在X上获取通知](https://x.com/justsisyphus/status/2006250634354548963)**
+> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-sisyphus.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.1)
+> > **编排器现已推出测试版。使用`oh-my-opencode@3.0.0-beta.1`来安装。**
 >
 > 与我们同行！
 >
@@ -324,9 +327,9 @@ opencode auth login
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -463,7 +466,7 @@ gh repo star code-yeongyu/oh-my-opencode

 - **Sisyphus** (`anthropic/claude-opus-4-5`)：**默认 Agent。** OpenCode 专属的强力 AI 编排器。指挥专业子 Agent 搞定复杂任务。主打后台任务委派和 Todo 驱动。用 Claude Opus 4.5 加上扩展思考（32k token 预算），智商拉满。
 - **oracle** (`openai/gpt-5.2`)：架构师、代码审查员、战略家。GPT-5.2 的逻辑推理和深度分析能力不是盖的。致敬 AmpCode。
- **librarian** (`anthropic/claude-sonnet-4-5` 或 `google/gemini-3-flash`)：多仓库分析、查文档、找示例。配置 Antigravity 认证时使用 Gemini 3 Flash，否则使用 Claude Sonnet 4.5 深入理解代码库，GitHub 调研，给出的答案都有据可查。致敬 AmpCode。
+- **librarian** (`opencode/glm-4.7-free`)：多仓库分析、查文档、找示例。使用 GLM-4.7 Free 深入理解代码库，GitHub 调研，给出的答案都有据可查。致敬 AmpCode。
 - **explore** (`opencode/grok-code`、`google/gemini-3-flash` 或 `anthropic/claude-haiku-4-5`)：极速代码库扫描、模式匹配。配置 Antigravity 认证时使用 Gemini 3 Flash，Claude max20 可用时使用 Haiku，否则用 Grok。致敬 Claude Code。
 - **frontend-ui-ux-engineer** (`google/gemini-3-pro-preview`)：设计师出身的程序员。UI 做得那是真漂亮。Gemini 写这种创意美观的代码是一绝。
 - **document-writer** (`google/gemini-3-pro-preview`)：技术写作专家。Gemini 文笔好，写出来的东西读着顺畅。
@@ -772,9 +775,9 @@ Agent 爽了，你自然也爽。但我还想直接让你爽。
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -27,7 +27,9 @@
          "frontend-ui-ux-engineer",
          "document-writer",
          "multimodal-looker",
-          "Metis (Plan Consultant)"
+          "Metis (Plan Consultant)",
+          "Momus (Plan Reviewer)",
+          "orchestrator-sisyphus"
        ]
      }
    },
@@ -832,6 +834,129 @@
            }
          }
        },
+        "Momus (Plan Reviewer)": {
+          "type": "object",
+          "properties": {
+            "model": {
+              "type": "string"
+            },
+            "category": {
+              "type": "string"
+            },
+            "skills": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
+            "temperature": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 2
+            },
+            "top_p": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1
+            },
+            "prompt": {
+              "type": "string"
+            },
+            "prompt_append": {
+              "type": "string"
+            },
+            "tools": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {
+                "type": "boolean"
+              }
+            },
+            "disable": {
+              "type": "boolean"
+            },
+            "description": {
+              "type": "string"
+            },
+            "mode": {
+              "type": "string",
+              "enum": [
+                "subagent",
+                "primary",
+                "all"
+              ]
+            },
+            "color": {
+              "type": "string",
+              "pattern": "^#[0-9A-Fa-f]{6}$"
+            },
+            "permission": {
+              "type": "object",
+              "properties": {
+                "edit": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "bash": {
+                  "anyOf": [
+                    {
+                      "type": "string",
+                      "enum": [
+                        "ask",
+                        "allow",
+                        "deny"
+                      ]
+                    },
+                    {
+                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
+                      "additionalProperties": {
+                        "type": "string",
+                        "enum": [
+                          "ask",
+                          "allow",
+                          "deny"
+                        ]
+                      }
+                    }
+                  ]
+                },
+                "webfetch": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "doom_loop": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "external_directory": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                }
+              }
+            }
+          }
+        },
        "oracle": {
          "type": "object",
          "properties": {
@@ -1569,6 +1694,129 @@
              }
            }
          }
+        },
+        "orchestrator-sisyphus": {
+          "type": "object",
+          "properties": {
+            "model": {
+              "type": "string"
+            },
+            "category": {
+              "type": "string"
+            },
+            "skills": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
+            "temperature": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 2
+            },
+            "top_p": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1
+            },
+            "prompt": {
+              "type": "string"
+            },
+            "prompt_append": {
+              "type": "string"
+            },
+            "tools": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {
+                "type": "boolean"
+              }
+            },
+            "disable": {
+              "type": "boolean"
+            },
+            "description": {
+              "type": "string"
+            },
+            "mode": {
+              "type": "string",
+              "enum": [
+                "subagent",
+                "primary",
+                "all"
+              ]
+            },
+            "color": {
+              "type": "string",
+              "pattern": "^#[0-9A-Fa-f]{6}$"
+            },
+            "permission": {
+              "type": "object",
+              "properties": {
+                "edit": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "bash": {
+                  "anyOf": [
+                    {
+                      "type": "string",
+                      "enum": [
+                        "ask",
+                        "allow",
+                        "deny"
+                      ]
+                    },
+                    {
+                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
+                      "additionalProperties": {
+                        "type": "string",
+                        "enum": [
+                          "ask",
+                          "allow",
+                          "deny"
+                        ]
+                      }
+                    }
+                  ]
+                },
+                "webfetch": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "doom_loop": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "external_directory": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                }
+              }
+            }
+          }
        }
      }
    },
--- a/docs/orchestration-guide.md
+++ b/docs/orchestration-guide.md
@@ -1,5 +1,26 @@
 # Oh-My-OpenCode Orchestration Guide

+## TL;DR - When to Use What
+
+| Complexity | Approach | When to Use |
+|------------|----------|-------------|
+| **Simple** | Just prompt | Simple tasks, quick fixes, single-file changes |
+| **Complex + Lazy** | Just type `ulw` or `ultrawork` | Complex tasks where explaining context is tedious. Agent figures it out. |
+| **Complex + Precise** | `@plan` → `/start-work` | Precise, multi-step work requiring true orchestration. Prometheus plans, Sisyphus executes. |
+
+**Decision Flow:**
+```
+Is it a quick fix or simple task?
+  └─ YES → Just prompt normally
+  └─ NO  → Is explaining the full context tedious?
+             └─ YES → Type "ulw" and let the agent figure it out
+             └─ NO  → Do you need precise, verifiable execution?
+                        └─ YES → Use @plan for Prometheus planning, then /start-work
+                        └─ NO  → Just use "ulw"
+```
+
+---
+
 This document provides a comprehensive guide to the orchestration system that implements Oh-My-OpenCode's core philosophy: **"Separation of Planning and Execution"**.

 ## 1. Overview
@@ -24,7 +45,7 @@ graph TD
        Metis --> Prometheus
        Prometheus --> Momus[Momus<br>Reviewer]
        Momus --> Prometheus
-        Prometheus --> PlanFile[/.sisyphus/plans/*.md]
+        Prometheus --> PlanFile["/.sisyphus/plans/{name}.md"]
    end
    
    PlanFile --> StartWork[/start-work]
@@ -93,9 +114,9 @@ When the user enters `/start-work`, the execution phase begins.

 ## 5. Commands and Usage

-### `/plan [request]`
+### `@plan [request]`
 Invokes Prometheus to start a planning session.
- Example: `/plan "I want to refactor the authentication system to NextAuth"`
+- Example: `@plan "I want to refactor the authentication system to NextAuth"`

 ### `/start-work`
 Executes the generated plan.
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.0.0-beta.1",
+  "version": "3.0.0-beta.3",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/script/publish.ts
+++ b/script/publish.ts
@@ -5,7 +5,6 @@ import { $ } from "bun"
 const PACKAGE_NAME = "oh-my-opencode"
 const bump = process.env.BUMP as "major" | "minor" | "patch" | undefined
 const versionOverride = process.env.VERSION
-const npmTag = process.env.NPM_TAG || "latest"

 console.log("=== Publishing oh-my-opencode ===\n")

@@ -107,12 +106,22 @@ async function getContributors(previous: string): Promise<string[]> {
  return notes
 }

-async function buildAndPublish(): Promise<void> {
-  console.log(`\nPublishing to npm with tag: ${npmTag}...`)
+function getDistTag(version: string): string | null {
+  if (!version.includes("-")) return null
+  const prerelease = version.split("-")[1]
+  const tag = prerelease?.split(".")[0]
+  return tag || "next"
+}
+
+async function buildAndPublish(version: string): Promise<void> {
+  console.log("\nPublishing to npm...")
+  const distTag = getDistTag(version)
+  const tagArgs = distTag ? ["--tag", distTag] : []
+  
  if (process.env.CI) {
-    await $`npm publish --access public --provenance --ignore-scripts --tag ${npmTag}`
+    await $`npm publish --access public --provenance --ignore-scripts ${tagArgs}`
  } else {
-    await $`npm publish --access public --ignore-scripts --tag ${npmTag}`
+    await $`npm publish --access public --ignore-scripts ${tagArgs}`
  }
 }

@@ -174,7 +183,7 @@ async function main() {
  const contributors = await getContributors(previous)
  const notes = [...changelog, ...contributors]

-  await buildAndPublish()
+  await buildAndPublish(newVersion)
  await gitTagAndRelease(newVersion, notes)

  console.log(`\n=== Successfully published ${PACKAGE_NAME}@${newVersion} ===`)
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -311,6 +311,94 @@
      "created_at": "2026-01-08T10:02:26Z",
      "repoId": 1108837393,
      "pullRequestNo": 592
+    },
+    {
+      "name": "xLillium",
+      "id": 16964936,
+      "comment_id": 3725604869,
+      "created_at": "2026-01-08T20:18:27Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 603
+    },
+    {
+      "name": "SJY0917032",
+      "id": 88534701,
+      "comment_id": 3728199745,
+      "created_at": "2026-01-09T10:01:19Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 625
+    },
+    {
+      "name": "kdcokenny",
+      "id": 99611484,
+      "comment_id": 3728801075,
+      "created_at": "2026-01-09T12:54:05Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 629
+    },
+    {
+      "name": "ElwinLiu",
+      "id": 87802244,
+      "comment_id": 3731812585,
+      "created_at": "2026-01-10T04:32:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 645
+    },
+    {
+      "name": "Luodian",
+      "id": 15847405,
+      "comment_id": 3731833107,
+      "created_at": "2026-01-10T05:01:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 634
+    },
+    {
+      "name": "imarshallwidjaja",
+      "id": 60992624,
+      "comment_id": 3732124681,
+      "created_at": "2026-01-10T07:58:43Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 648
+    },
+    {
+      "name": "GollyJer",
+      "id": 689204,
+      "comment_id": 3732253764,
+      "created_at": "2026-01-10T09:33:21Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 649
+    },
+    {
+      "name": "kargnas",
+      "id": 1438533,
+      "comment_id": 3732344143,
+      "created_at": "2026-01-10T10:25:25Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 653
+    },
+    {
+      "name": "ashir6892",
+      "id": 52703606,
+      "comment_id": 3733435826,
+      "created_at": "2026-01-10T19:50:07Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 675
+    },
+    {
+      "name": "arthur404dev",
+      "id": 59490008,
+      "comment_id": 3733697071,
+      "created_at": "2026-01-10T23:51:44Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 676
+    },
+    {
+      "name": "KNN-07",
+      "id": 55886589,
+      "comment_id": 3733788592,
+      "created_at": "2026-01-11T01:11:38Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 679
    }
  ]
 }
--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -8,13 +8,18 @@ AI agent definitions for multi-model orchestration. 7 specialized agents: Sisyph

 ```
 agents/
-├── sisyphus.ts              # Primary orchestrator (Claude Opus 4.5)
+├── orchestrator-sisyphus.ts # Orchestrator agent (1484 lines) - complex delegation
+├── sisyphus.ts              # Main Sisyphus prompt (641 lines)
+├── sisyphus-junior.ts       # Junior variant for delegated tasks
 ├── oracle.ts                # Strategic advisor (GPT-5.2)
 ├── librarian.ts             # Multi-repo research (Claude Sonnet 4.5)
 ├── explore.ts               # Fast codebase grep (Grok Code)
 ├── frontend-ui-ux-engineer.ts  # UI generation (Gemini 3 Pro)
-├── document-writer.ts       # Technical docs (Gemini 3 Flash)
+├── document-writer.ts       # Technical docs (Gemini 3 Pro)
 ├── multimodal-looker.ts     # PDF/image analysis (Gemini 3 Flash)
+├── prometheus-prompt.ts     # Planning agent prompt (982 lines)
+├── metis.ts                 # Plan Consultant agent (404 lines)
+├── momus.ts                 # Plan Reviewer agent (404 lines)
 ├── build-prompt.ts          # Shared build agent prompt
 ├── plan-prompt.ts           # Shared plan agent prompt
 ├── types.ts                 # AgentModelConfig interface
@@ -28,7 +33,7 @@ agents/
 |-------|---------------|----------|---------|
 | Sisyphus | anthropic/claude-opus-4-5 | - | Primary orchestrator with extended thinking |
 | oracle | openai/gpt-5.2 | - | Read-only consultation. High-IQ debugging, architecture |
-| librarian | anthropic/claude-sonnet-4-5 | google/gemini-3-flash | Docs, OSS research, GitHub examples |
+| librarian | opencode/glm-4.7-free | - | Docs, OSS research, GitHub examples |
 | explore | opencode/grok-code | google/gemini-3-flash, anthropic/claude-haiku-4-5 | Fast contextual grep |
 | frontend-ui-ux-engineer | google/gemini-3-pro-preview | - | UI/UX code generation |
 | document-writer | google/gemini-3-pro-preview | - | Technical writing |
--- a/src/agents/librarian.ts
+++ b/src/agents/librarian.ts
@@ -1,6 +1,5 @@
 import type { AgentConfig } from "@opencode-ai/sdk"
 import type { AgentPromptMetadata } from "./types"
-import { createAgentToolRestrictions } from "../shared/permission-compat"

 const DEFAULT_MODEL = "opencode/glm-4.7-free"

@@ -22,26 +21,18 @@ export const LIBRARIAN_PROMPT_METADATA: AgentPromptMetadata = {
 }

 export function createLibrarianAgent(model: string = DEFAULT_MODEL): AgentConfig {
-  const restrictions = createAgentToolRestrictions([
-    "write",
-    "edit",
-    "task",
-    "sisyphus_task",
-    "call_omo_agent",
-  ])
-
  return {
    description:
      "Specialized codebase understanding agent for multi-repository analysis, searching remote codebases, retrieving official documentation, and finding implementation examples using GitHub CLI, Context7, and Web Search. MUST BE USED when users ask to look up code in remote repositories, explain library internals, or find usage examples in open source.",
    mode: "subagent" as const,
    model,
    temperature: 0.1,
-    ...restrictions,
+    tools: { write: false, edit: false, background_task: false },
    prompt: `# THE LIBRARIAN

 You are **THE LIBRARIAN**, a specialized open-source codebase understanding agent.

-Your job: Answer questions about open-source libraries. Provide **EVIDENCE** with **GitHub permalinks** when the question requires verification, implementation details, or current/version-specific information. For well-known APIs and stable concepts, answer directly from knowledge.
+Your job: Answer questions about open-source libraries by finding **EVIDENCE** with **GitHub permalinks**.

 ## CRITICAL: DATE AWARENESS

@@ -53,20 +44,64 @@ Your job: Answer questions about open-source libraries. Provide **EVIDENCE** wit

 ---

-## PHASE 0: ASSESS BEFORE SEARCHING
+## PHASE 0: REQUEST CLASSIFICATION (MANDATORY FIRST STEP)

-**First**: Can you answer confidently from training knowledge? If yes, answer directly.
-
-**Search when**: version-specific info, implementation internals, recent changes, unfamiliar libraries, user explicitly requests source/examples.
-
-**If search needed**, classify into:
+Classify EVERY request into one of these categories before taking action:

 | Type | Trigger Examples | Tools |
 |------|------------------|-------|
-| **TYPE A: CONCEPTUAL** | "How do I use X?", "Best practice for Y?" | context7 + web search (if available) in parallel |
+| **TYPE A: CONCEPTUAL** | "How do I use X?", "Best practice for Y?" | Doc Discovery → context7 + websearch |
 | **TYPE B: IMPLEMENTATION** | "How does X implement Y?", "Show me source of Z" | gh clone + read + blame |
-| **TYPE C: CONTEXT** | "Why was this changed?", "What's the history?", "Related issues/PRs?" | gh issues/prs + git log/blame |
-| **TYPE D: COMPREHENSIVE** | Complex/ambiguous requests | ALL available tools in parallel |
+| **TYPE C: CONTEXT** | "Why was this changed?", "History of X?" | gh issues/prs + git log/blame |
+| **TYPE D: COMPREHENSIVE** | Complex/ambiguous requests | Doc Discovery → ALL tools |
+
+---
+
+## PHASE 0.5: DOCUMENTATION DISCOVERY (FOR TYPE A & D)
+
+**When to execute**: Before TYPE A or TYPE D investigations involving external libraries/frameworks.
+
+### Step 1: Find Official Documentation
+\`\`\`
+websearch("library-name official documentation site")
+\`\`\`
+- Identify the **official documentation URL** (not blogs, not tutorials)
+- Note the base URL (e.g., \`https://docs.example.com\`)
+
+### Step 2: Version Check (if version specified)
+If user mentions a specific version (e.g., "React 18", "Next.js 14", "v2.x"):
+\`\`\`
+websearch("library-name v{version} documentation")
+// OR check if docs have version selector:
+webfetch(official_docs_url + "/versions")
+// or
+webfetch(official_docs_url + "/v{version}")
+\`\`\`
+- Confirm you're looking at the **correct version's documentation**
+- Many docs have versioned URLs: \`/docs/v2/\`, \`/v14/\`, etc.
+
+### Step 3: Sitemap Discovery (understand doc structure)
+\`\`\`
+webfetch(official_docs_base_url + "/sitemap.xml")
+// Fallback options:
+webfetch(official_docs_base_url + "/sitemap-0.xml")
+webfetch(official_docs_base_url + "/docs/sitemap.xml")
+\`\`\`
+- Parse sitemap to understand documentation structure
+- Identify relevant sections for the user's question
+- This prevents random searching—you now know WHERE to look
+
+### Step 4: Targeted Investigation
+With sitemap knowledge, fetch the SPECIFIC documentation pages relevant to the query:
+\`\`\`
+webfetch(specific_doc_page_from_sitemap)
+context7_query-docs(libraryId: id, query: "specific topic")
+\`\`\`
+
+**Skip Doc Discovery when**:
+- TYPE B (implementation) - you're cloning repos anyway
+- TYPE C (context/history) - you're looking at issues/PRs
+- Library has no official docs (rare OSS projects)

 ---

@@ -75,15 +110,15 @@ Your job: Answer questions about open-source libraries. Provide **EVIDENCE** wit
 ### TYPE A: CONCEPTUAL QUESTION
 **Trigger**: "How do I...", "What is...", "Best practice for...", rough/general questions

-**If searching**, use tools as needed:
+**Execute Documentation Discovery FIRST (Phase 0.5)**, then:
 \`\`\`
 Tool 1: context7_resolve-library-id("library-name")
-        → then context7_get-library-docs(id, topic: "specific-topic")
-Tool 2: grep_app_searchGitHub(query: "usage pattern", language: ["TypeScript"])
-Tool 3 (optional): If web search is available, search "library-name topic 2025"
+        → then context7_query-docs(libraryId: id, query: "specific-topic")
+Tool 2: webfetch(relevant_pages_from_sitemap)  // Targeted, not random
+Tool 3: grep_app_searchGitHub(query: "usage pattern", language: ["TypeScript"])
 \`\`\`

-**Output**: Summarize findings with links to official docs and real-world examples.
+**Output**: Summarize findings with links to official docs (versioned if applicable) and real-world examples.

 ---

@@ -94,20 +129,20 @@ Tool 3 (optional): If web search is available, search "library-name topic 2025"
 \`\`\`
 Step 1: Clone to temp directory
        gh repo clone owner/repo \${TMPDIR:-/tmp}/repo-name -- --depth 1
-        
+
 Step 2: Get commit SHA for permalinks
        cd \${TMPDIR:-/tmp}/repo-name && git rev-parse HEAD
-        
+
 Step 3: Find the implementation
        - grep/ast_grep_search for function/class
        - read the specific file
        - git blame for context if needed
-        
+
 Step 4: Construct permalink
        https://github.com/owner/repo/blob/<sha>/path/to/file#L10-L20
 \`\`\`

-**For faster results, parallelize**:
+**Parallel acceleration (4+ calls)**:
 \`\`\`
 Tool 1: gh repo clone owner/repo \${TMPDIR:-/tmp}/repo -- --depth 1
 Tool 2: grep_app_searchGitHub(query: "function_name", repo: "owner/repo")
@@ -120,7 +155,7 @@ Tool 4: context7_get-library-docs(id, topic: "relevant-api")
 ### TYPE C: CONTEXT & HISTORY
 **Trigger**: "Why was this changed?", "What's the history?", "Related issues/PRs?"

-**Tools to use**:
+**Execute in parallel (4+ calls)**:
 \`\`\`
 Tool 1: gh search issues "keyword" --repo owner/repo --state all --limit 10
 Tool 2: gh search prs "keyword" --repo owner/repo --state merged --limit 10
@@ -142,22 +177,21 @@ gh api repos/owner/repo/pulls/<number>/files
 ### TYPE D: COMPREHENSIVE RESEARCH
 **Trigger**: Complex questions, ambiguous requests, "deep dive into..."

-**Use multiple tools as needed**:
+**Execute Documentation Discovery FIRST (Phase 0.5)**, then execute in parallel (6+ calls):
 \`\`\`
-// Documentation
-Tool 1: context7_resolve-library-id → context7_get-library-docs
+// Documentation (informed by sitemap discovery)
+Tool 1: context7_resolve-library-id → context7_query-docs
+Tool 2: webfetch(targeted_doc_pages_from_sitemap)

 // Code Search
-Tool 2: grep_app_searchGitHub(query: "pattern1", language: [...])
-Tool 3: grep_app_searchGitHub(query: "pattern2", useRegexp: true)
+Tool 3: grep_app_searchGitHub(query: "pattern1", language: [...])
+Tool 4: grep_app_searchGitHub(query: "pattern2", useRegexp: true)

 // Source Analysis
-Tool 4: gh repo clone owner/repo \${TMPDIR:-/tmp}/repo -- --depth 1
+Tool 5: gh repo clone owner/repo \${TMPDIR:-/tmp}/repo -- --depth 1

 // Context
-Tool 5: gh search issues "topic" --repo owner/repo
-
-// Optional: If web search is available, search for recent updates
+Tool 6: gh search issues "topic" --repo owner/repo
 \`\`\`

 ---
@@ -202,7 +236,11 @@ https://github.com/tanstack/query/blob/abc123def/packages/react-query/src/useQue

 | Purpose | Tool | Command/Usage |
 |---------|------|---------------|
-| **Official Docs** | context7 | \`context7_resolve-library-id\` → \`context7_get-library-docs\` |
+| **Official Docs** | context7 | \`context7_resolve-library-id\` → \`context7_query-docs\` |
+| **Find Docs URL** | websearch_exa | \`websearch_exa_web_search_exa("library official documentation")\` |
+| **Sitemap Discovery** | webfetch | \`webfetch(docs_url + "/sitemap.xml")\` to understand doc structure |
+| **Read Doc Page** | webfetch | \`webfetch(specific_doc_page)\` for targeted documentation |
+| **Latest Info** | websearch_exa | \`websearch_exa_web_search_exa("query 2025")\` |
 | **Fast Code Search** | grep_app | \`grep_app_searchGitHub(query, language, useRegexp)\` |
 | **Deep Code Search** | gh CLI | \`gh search code "query" --repo owner/repo\` |
 | **Clone Repo** | gh CLI | \`gh repo clone owner/repo \${TMPDIR:-/tmp}/name -- --depth 1\` |
@@ -210,8 +248,6 @@ https://github.com/tanstack/query/blob/abc123def/packages/react-query/src/useQue
 | **View Issue/PR** | gh CLI | \`gh issue/pr view <num> --repo owner/repo --comments\` |
 | **Release Info** | gh CLI | \`gh api repos/owner/repo/releases/latest\` |
 | **Git History** | git | \`git log\`, \`git blame\`, \`git show\` |
-| **Read URL** | webfetch | \`webfetch(url)\` for blog posts, SO threads |
-| **Web Search** | (if available) | Use any available web search tool for latest info |

 ### Temp Directory

@@ -228,16 +264,18 @@ Use OS-appropriate temp directory:

 ---

-## PARALLEL EXECUTION GUIDANCE
+## PARALLEL EXECUTION REQUIREMENTS

-When searching is needed, scale effort to question complexity:
-
-| Request Type | Suggested Calls |
+| Request Type | Suggested Calls | Doc Discovery Required |
 |--------------|----------------|
-| TYPE A (Conceptual) | 1-2 |
-| TYPE B (Implementation) | 2-3 |
-| TYPE C (Context) | 2-3 |
-| TYPE D (Comprehensive) | 3-5 |
+| TYPE A (Conceptual) | 1-2 | YES (Phase 0.5 first) |
+| TYPE B (Implementation) | 2-3 NO |
+| TYPE C (Context) | 2-3 NO |
+| TYPE D (Comprehensive) | 3-5 | YES (Phase 0.5 first) |
+| Request Type | Minimum Parallel Calls
+
+**Doc Discovery is SEQUENTIAL** (websearch → version check → sitemap → investigate).
+**Main phase is PARALLEL** once you know where to look.

 **Always vary queries** when using grep_app:
 \`\`\`
@@ -261,6 +299,8 @@ grep_app_searchGitHub(query: "useQuery")
 | grep_app no results | Broaden query, try concept instead of exact name |
 | gh API rate limit | Use cloned repo in temp directory |
 | Repo not found | Search for forks or mirrors |
+| Sitemap not found | Try \`/sitemap-0.xml\`, \`/sitemap_index.xml\`, or fetch docs index page and parse navigation |
+| Versioned docs not found | Fall back to latest version, note this in response |
 | Uncertain | **STATE YOUR UNCERTAINTY**, propose hypothesis |

 ---
@@ -268,7 +308,7 @@ grep_app_searchGitHub(query: "useQuery")
 ## COMMUNICATION RULES

 1. **NO TOOL NAMES**: Say "I'll search the codebase" not "I'll use grep_app"
-2. **NO PREAMBLE**: Answer directly, skip "I'll help you with..." 
+2. **NO PREAMBLE**: Answer directly, skip "I'll help you with..."
 3. **ALWAYS CITE**: Every code claim needs a permalink
 4. **USE MARKDOWN**: Code blocks with language identifiers
 5. **BE CONCISE**: Facts > opinions, evidence > speculation
--- a/src/agents/metis.ts
+++ b/src/agents/metis.ts
@@ -278,16 +278,22 @@ const metisRestrictions = createAgentToolRestrictions([
  "sisyphus_task",
 ])

-export const metisAgent: AgentConfig = {
-  description:
-    "Pre-planning consultant that analyzes requests to identify hidden intentions, ambiguities, and AI failure points.",
-  mode: "subagent" as const,
-  model: "anthropic/claude-opus-4-5",
-  temperature: 0.3,
-  ...metisRestrictions,
-  prompt: METIS_SYSTEM_PROMPT,
-  thinking: { type: "enabled", budgetTokens: 32000 },
-} as AgentConfig
+const DEFAULT_MODEL = "anthropic/claude-opus-4-5"
+
+export function createMetisAgent(model: string = DEFAULT_MODEL): AgentConfig {
+  return {
+    description:
+      "Pre-planning consultant that analyzes requests to identify hidden intentions, ambiguities, and AI failure points.",
+    mode: "subagent" as const,
+    model,
+    temperature: 0.3,
+    ...metisRestrictions,
+    prompt: METIS_SYSTEM_PROMPT,
+    thinking: { type: "enabled", budgetTokens: 32000 },
+  } as AgentConfig
+}
+
+export const metisAgent: AgentConfig = createMetisAgent()

 export const metisPromptMetadata: AgentPromptMetadata = {
  category: "advisor",
--- a/src/agents/orchestrator-sisyphus.ts
+++ b/src/agents/orchestrator-sisyphus.ts
@@ -13,6 +13,7 @@ import { createAgentToolRestrictions } from "../shared/permission-compat"
 */

 export interface OrchestratorContext {
+  model?: string
  availableAgents?: AvailableAgent[]
  availableSkills?: AvailableSkill[]
  userCategories?: Record<string, CategoryConfig>
@@ -131,7 +132,6 @@ ${rows.join("\n")}
 }

 export const ORCHESTRATOR_SISYPHUS_SYSTEM_PROMPT = `You are "Sisyphus" - Powerful AI Agent with orchestration capabilities from OhMyOpenCode.
-Named by [YeonGyu Kim](https://github.com/code-yeongyu).

 **Why Sisyphus?**: Humans roll their boulder every day. So do you. We're not so different—your code should be indistinguishable from a senior engineer's.

@@ -1432,6 +1432,8 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string {
    .replace("{SKILLS_SECTION}", skillsSection)
 }

+const DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
+
 export function createOrchestratorSisyphusAgent(ctx?: OrchestratorContext): AgentConfig {
  const restrictions = createAgentToolRestrictions([
    "task",
@@ -1442,7 +1444,7 @@ export function createOrchestratorSisyphusAgent(ctx?: OrchestratorContext): Agen
    description:
      "Orchestrates work via sisyphus_task() to complete ALL tasks in a todo list until fully done",
    mode: "primary" as const,
-    model: "anthropic/claude-sonnet-4-5",
+    model: ctx?.model ?? DEFAULT_MODEL,
    temperature: 0.1,
    prompt: buildDynamicOrchestratorPrompt(ctx),
    thinking: { type: "enabled", budgetTokens: 32000 },
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -18,7 +18,6 @@ const DEFAULT_MODEL = "anthropic/claude-opus-4-5"

 const SISYPHUS_ROLE_SECTION = `<Role>
 You are "Sisyphus" - Powerful AI Agent with orchestration capabilities from OhMyOpenCode.
-Named by [YeonGyu Kim](https://github.com/code-yeongyu).

 **Why Sisyphus?**: Humans roll their boulder every day. So do you. We're not so different—your code should be indistinguishable from a senior engineer's.

--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -7,9 +7,9 @@ import { createExploreAgent, EXPLORE_PROMPT_METADATA } from "./explore"
 import { createFrontendUiUxEngineerAgent, FRONTEND_PROMPT_METADATA } from "./frontend-ui-ux-engineer"
 import { createDocumentWriterAgent, DOCUMENT_WRITER_PROMPT_METADATA } from "./document-writer"
 import { createMultimodalLookerAgent, MULTIMODAL_LOOKER_PROMPT_METADATA } from "./multimodal-looker"
-import { metisAgent } from "./metis"
+import { createMetisAgent } from "./metis"
 import { createOrchestratorSisyphusAgent, orchestratorSisyphusAgent } from "./orchestrator-sisyphus"
-import { momusAgent } from "./momus"
+import { createMomusAgent } from "./momus"
 import type { AvailableAgent } from "./sisyphus-prompt-builder"
 import { deepMerge } from "../shared"
 import { DEFAULT_CATEGORIES } from "../tools/sisyphus-task/constants"
@@ -25,8 +25,8 @@ const agentSources: Record<BuiltinAgentName, AgentSource> = {
  "frontend-ui-ux-engineer": createFrontendUiUxEngineerAgent,
  "document-writer": createDocumentWriterAgent,
  "multimodal-looker": createMultimodalLookerAgent,
-  "Metis (Plan Consultant)": metisAgent,
-  "Momus (Plan Reviewer)": momusAgent,
+  "Metis (Plan Consultant)": createMetisAgent,
+  "Momus (Plan Reviewer)": createMomusAgent,
  "orchestrator-sisyphus": orchestratorSisyphusAgent,
 }

@@ -176,7 +176,11 @@ export function createBuiltinAgents(

  if (!disabledAgents.includes("orchestrator-sisyphus")) {
    const orchestratorOverride = agentOverrides["orchestrator-sisyphus"]
-    let orchestratorConfig = createOrchestratorSisyphusAgent({ availableAgents })
+    const orchestratorModel = orchestratorOverride?.model
+    let orchestratorConfig = createOrchestratorSisyphusAgent({
+      model: orchestratorModel,
+      availableAgents,
+    })

    if (orchestratorOverride) {
      orchestratorConfig = mergeAgentConfig(orchestratorConfig, orchestratorOverride)
--- a/src/auth/AGENTS.md
+++ b/src/auth/AGENTS.md
@@ -9,16 +9,20 @@ Google Antigravity OAuth for Gemini models. Token management, fetch interception
 ```
 auth/
 └── antigravity/
-    ├── plugin.ts         # Main export, hooks registration
+    ├── plugin.ts         # Main export, hooks registration (554 lines)
    ├── oauth.ts          # OAuth flow, token acquisition
    ├── token.ts          # Token storage, refresh logic
-    ├── fetch.ts          # Fetch interceptor (621 lines)
-    ├── response.ts       # Response transformation (598 lines)
-    ├── thinking.ts       # Thinking block extraction (571 lines)
+    ├── fetch.ts          # Fetch interceptor (798 lines)
+    ├── response.ts       # Response transformation (599 lines)
+    ├── thinking.ts       # Thinking block extraction (755 lines)
    ├── thought-signature-store.ts  # Signature caching
    ├── message-converter.ts        # Format conversion
+    ├── accounts.ts       # Multi-account management
+    ├── browser.ts        # Browser automation for OAuth
+    ├── cli.ts            # CLI interaction
    ├── request.ts        # Request building
    ├── project.ts        # Project ID management
+    ├── storage.ts        # Token persistence
    ├── tools.ts          # OAuth tool registration
    ├── constants.ts      # API endpoints, model mappings
    └── types.ts
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -9,16 +9,20 @@ CLI for oh-my-opencode: interactive installer, health diagnostics (doctor), runt
 ```
 cli/
 ├── index.ts              # Commander.js entry, subcommand routing
-├── install.ts            # Interactive TUI installer (477 lines)
-├── config-manager.ts     # JSONC parsing, env detection (669 lines)
+├── install.ts            # Interactive TUI installer (436 lines)
+├── config-manager.ts     # JSONC parsing, env detection (725 lines)
 ├── types.ts              # CLI-specific types
+├── commands/             # CLI subcommands
 ├── doctor/               # Health check system
 │   ├── index.ts          # Doctor command entry
+│   ├── runner.ts         # Health check orchestration
 │   ├── constants.ts      # Check categories
 │   ├── types.ts          # Check result interfaces
-│   └── checks/           # 17+ individual checks
+│   └── checks/           # 17+ individual checks (auth, config, dependencies, gh, lsp, mcp, opencode, plugin, version)
 ├── get-local-version/    # Version detection
 └── run/                  # OpenCode session launcher
+    ├── completion.ts     # Completion logic
+    └── events.ts         # Event handling
 ```

 ## CLI COMMANDS
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -25,6 +25,8 @@ export const BuiltinAgentNameSchema = z.enum([
  "document-writer",
  "multimodal-looker",
  "Metis (Plan Consultant)",
+  "Momus (Plan Reviewer)",
+  "orchestrator-sisyphus",
 ])

 export const BuiltinSkillNameSchema = z.enum([
@@ -40,12 +42,14 @@ export const OverridableAgentNameSchema = z.enum([
  "OpenCode-Builder",
  "Prometheus (Planner)",
  "Metis (Plan Consultant)",
+  "Momus (Plan Reviewer)",
  "oracle",
  "librarian",
  "explore",
  "frontend-ui-ux-engineer",
  "document-writer",
  "multimodal-looker",
+  "orchestrator-sisyphus",
 ])

 export const AgentNameSchema = BuiltinAgentNameSchema
@@ -118,12 +122,14 @@ export const AgentOverridesSchema = z.object({
  "OpenCode-Builder": AgentOverrideConfigSchema.optional(),
  "Prometheus (Planner)": AgentOverrideConfigSchema.optional(),
  "Metis (Plan Consultant)": AgentOverrideConfigSchema.optional(),
+  "Momus (Plan Reviewer)": AgentOverrideConfigSchema.optional(),
  oracle: AgentOverrideConfigSchema.optional(),
  librarian: AgentOverrideConfigSchema.optional(),
  explore: AgentOverrideConfigSchema.optional(),
  "frontend-ui-ux-engineer": AgentOverrideConfigSchema.optional(),
  "document-writer": AgentOverrideConfigSchema.optional(),
  "multimodal-looker": AgentOverrideConfigSchema.optional(),
+  "orchestrator-sisyphus": AgentOverrideConfigSchema.optional(),
 })

 export const ClaudeCodeConfigSchema = z.object({
@@ -290,6 +296,7 @@ export const GitMasterConfigSchema = z.object({
  /** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
  include_co_authored_by: z.boolean().default(true),
 })
+
 export const OhMyOpenCodeConfigSchema = z.object({
  $schema: z.string().optional(),
  disabled_mcps: z.array(AnyMcpNameSchema).optional(),
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -8,17 +8,23 @@ Claude Code compatibility layer + core feature modules. Commands, skills, agents

 ```
 features/
-├── background-agent/           # Task lifecycle, notifications (460 lines)
+├── background-agent/           # Task lifecycle, notifications (608 lines)
+├── boulder-state/              # Boulder state persistence
 ├── builtin-commands/           # Built-in slash commands
-├── builtin-skills/             # Built-in skills (playwright)
+│   └── templates/              # start-work, refactor, init-deep, ralph-loop
+├── builtin-skills/             # Built-in skills
+│   ├── git-master/             # Atomic commits, rebase, history search
+│   └── frontend-ui-ux/         # Designer-turned-developer skill
 ├── claude-code-agent-loader/   # ~/.claude/agents/*.md
 ├── claude-code-command-loader/ # ~/.claude/commands/*.md
 ├── claude-code-mcp-loader/     # .mcp.json files
 │   └── env-expander.ts         # ${VAR} expansion
-├── claude-code-plugin-loader/  # installed_plugins.json (484 lines)
+├── claude-code-plugin-loader/  # installed_plugins.json (486 lines)
 ├── claude-code-session-state/  # Session state persistence
+├── context-injector/           # Context collection and injection
 ├── opencode-skill-loader/      # Skills from OpenCode + Claude paths
 ├── skill-mcp-manager/          # MCP servers in skill YAML
+├── task-toast-manager/         # Task toast notifications
 └── hook-message-injector/      # Inject messages into conversation
 ```

--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -674,3 +674,95 @@ describe("LaunchInput.skillContent", () => {
    expect(input.skillContent).toBe("You are a playwright expert")
  })
 })
+
+describe("BackgroundManager.notifyParentSession - agent context preservation", () => {
+  test("should not pass agent field when parentAgent is undefined", async () => {
+    // #given
+    const task: BackgroundTask = {
+      id: "task-no-agent",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task without agent context",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: undefined,
+      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
+    }
+
+    // #when
+    const promptBody = buildNotificationPromptBody(task)
+
+    // #then
+    expect("agent" in promptBody).toBe(false)
+    expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus" })
+  })
+
+  test("should include agent field when parentAgent is defined", async () => {
+    // #given
+    const task: BackgroundTask = {
+      id: "task-with-agent",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task with agent context",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: "Sisyphus",
+      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
+    }
+
+    // #when
+    const promptBody = buildNotificationPromptBody(task)
+
+    // #then
+    expect(promptBody.agent).toBe("Sisyphus")
+  })
+
+  test("should not pass model field when parentModel is undefined", async () => {
+    // #given
+    const task: BackgroundTask = {
+      id: "task-no-model",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task without model context",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: "Sisyphus",
+      parentModel: undefined,
+    }
+
+    // #when
+    const promptBody = buildNotificationPromptBody(task)
+
+    // #then
+    expect("model" in promptBody).toBe(false)
+    expect(promptBody.agent).toBe("Sisyphus")
+  })
+})
+
+function buildNotificationPromptBody(task: BackgroundTask): Record<string, unknown> {
+  const body: Record<string, unknown> = {
+    parts: [{ type: "text", text: `[BACKGROUND TASK COMPLETED] Task "${task.description}" finished.` }],
+  }
+
+  if (task.parentAgent !== undefined) {
+    body.agent = task.parentAgent
+  }
+
+  if (task.parentModel?.providerID && task.parentModel?.modelID) {
+    body.model = { providerID: task.parentModel.providerID, modelID: task.parentModel.modelID }
+  }
+
+  return body
+}
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -13,6 +13,7 @@ import { subagentSessions } from "../claude-code-session-state"
 import { getTaskToastManager } from "../task-toast-manager"

 const TASK_TTL_MS = 30 * 60 * 1000
+const MIN_STABILITY_TIME_MS = 10 * 1000  // Must run at least 10s before stability detection kicks in

 type OpencodeClient = PluginInput["client"]

@@ -43,6 +44,7 @@ interface Todo {
 export class BackgroundManager {
  private tasks: Map<string, BackgroundTask>
  private notifications: Map<string, BackgroundTask[]>
+  private pendingByParent: Map<string, Set<string>>  // Track pending tasks per parent for batching
  private client: OpencodeClient
  private directory: string
  private pollingInterval?: ReturnType<typeof setInterval>
@@ -51,12 +53,20 @@ export class BackgroundManager {
  constructor(ctx: PluginInput, config?: BackgroundTaskConfig) {
    this.tasks = new Map()
    this.notifications = new Map()
+    this.pendingByParent = new Map()
    this.client = ctx.client
    this.directory = ctx.directory
    this.concurrencyManager = new ConcurrencyManager(config)
  }

  async launch(input: LaunchInput): Promise<BackgroundTask> {
+    log("[background-agent] launch() called with:", {
+      agent: input.agent,
+      model: input.model,
+      description: input.description,
+      parentSessionID: input.parentSessionID,
+    })
+
    if (!input.agent || input.agent.trim() === "") {
      throw new Error("Agent parameter is required")
    }
@@ -106,6 +116,11 @@ export class BackgroundManager {
    this.tasks.set(task.id, task)
    this.startPolling()

+    // Track for batched notifications
+    const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
+    pending.add(task.id)
+    this.pendingByParent.set(input.parentSessionID, pending)
+
    log("[background-agent] Launching task:", { taskId: task.id, sessionID, agent: input.agent })

    const toastManager = getTaskToastManager()
@@ -119,10 +134,21 @@ export class BackgroundManager {
      })
    }

-    this.client.session.promptAsync({
+    log("[background-agent] Calling prompt (fire-and-forget) for launch with:", {
+      sessionID,
+      agent: input.agent,
+      model: input.model,
+      hasSkillContent: !!input.skillContent,
+      promptLength: input.prompt.length,
+    })
+
+    // Use prompt() instead of promptAsync() to properly initialize agent loop (fire-and-forget)
+    // Include model if caller provided one (e.g., from Sisyphus category configs)
+    this.client.session.prompt({
      path: { id: sessionID },
      body: {
        agent: input.agent,
+        ...(input.model ? { model: input.model } : {}),
        system: input.skillContent,
        tools: {
          task: false,
@@ -146,7 +172,9 @@ export class BackgroundManager {
          this.concurrencyManager.release(existingTask.concurrencyKey)
        }
        this.markForNotification(existingTask)
-        this.notifyParentSession(existingTask)
+        this.notifyParentSession(existingTask).catch(err => {
+          log("[background-agent] Failed to notify on error:", err)
+        })
      }
    })

@@ -199,6 +227,7 @@ export class BackgroundManager {
    parentSessionID: string
    description: string
    agent?: string
+    parentAgent?: string
  }): BackgroundTask {
    const task: BackgroundTask = {
      id: input.taskId,
@@ -214,12 +243,18 @@ export class BackgroundManager {
        toolCalls: 0,
        lastUpdate: new Date(),
      },
+      parentAgent: input.parentAgent,
    }

    this.tasks.set(task.id, task)
    subagentSessions.add(input.sessionID)
    this.startPolling()

+    // Track for batched notifications (external tasks need tracking too)
+    const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
+    pending.add(task.id)
+    this.pendingByParent.set(input.parentSessionID, pending)
+
    log("[background-agent] Registered external task:", { taskId: task.id, sessionID: input.sessionID })

    return task
@@ -247,6 +282,11 @@ export class BackgroundManager {
    this.startPolling()
    subagentSessions.add(existingTask.sessionID)

+    // Track for batched notifications (P2 fix: resumed tasks need tracking too)
+    const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
+    pending.add(existingTask.id)
+    this.pendingByParent.set(input.parentSessionID, pending)
+
    const toastManager = getTaskToastManager()
    if (toastManager) {
      toastManager.addTask({
@@ -259,7 +299,15 @@ export class BackgroundManager {

    log("[background-agent] Resuming task:", { taskId: existingTask.id, sessionID: existingTask.sessionID })

-    this.client.session.promptAsync({
+    log("[background-agent] Resuming task - calling prompt (fire-and-forget) with:", {
+      sessionID: existingTask.sessionID,
+      agent: existingTask.agent,
+      promptLength: input.prompt.length,
+    })
+
+    // Note: Don't pass model in body - use agent's configured model instead
+    // Use prompt() instead of promptAsync() to properly initialize agent loop
+    this.client.session.prompt({
      path: { id: existingTask.sessionID },
      body: {
        agent: existingTask.agent,
@@ -270,13 +318,15 @@ export class BackgroundManager {
        parts: [{ type: "text", text: input.prompt }],
      },
    }).catch((error) => {
-      log("[background-agent] resume promptAsync error:", error)
+      log("[background-agent] resume prompt error:", error)
      existingTask.status = "error"
      const errorMessage = error instanceof Error ? error.message : String(error)
      existingTask.error = errorMessage
      existingTask.completedAt = new Date()
      this.markForNotification(existingTask)
-      this.notifyParentSession(existingTask)
+      this.notifyParentSession(existingTask).catch(err => {
+        log("[background-agent] Failed to notify on resume error:", err)
+      })
    })

    return existingTask
@@ -331,7 +381,22 @@ export class BackgroundManager {
      const task = this.findBySession(sessionID)
      if (!task || task.status !== "running") return

-      this.checkSessionTodos(sessionID).then((hasIncompleteTodos) => {
+      // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
+      const elapsedMs = Date.now() - task.startedAt.getTime()
+      const MIN_IDLE_TIME_MS = 5000
+      if (elapsedMs < MIN_IDLE_TIME_MS) {
+        log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
+        return
+      }
+
+      // Edge guard: Verify session has actual assistant output before completing
+      this.validateSessionHasOutput(sessionID).then(async (hasValidOutput) => {
+        if (!hasValidOutput) {
+          log("[background-agent] Session.idle but no valid output yet, waiting:", task.id)
+          return
+        }
+
+        const hasIncompleteTodos = await this.checkSessionTodos(sessionID)
        if (hasIncompleteTodos) {
          log("[background-agent] Task has incomplete todos, waiting for todo-continuation:", task.id)
          return
@@ -340,8 +405,10 @@ export class BackgroundManager {
        task.status = "completed"
        task.completedAt = new Date()
        this.markForNotification(task)
-        this.notifyParentSession(task)
+        await this.notifyParentSession(task)
        log("[background-agent] Task completed via session.idle event:", task.id)
+      }).catch(err => {
+        log("[background-agent] Error in session.idle handler:", err)
      })
    }

@@ -382,6 +449,66 @@ export class BackgroundManager {
    this.notifications.delete(sessionID)
  }

+  /**
+   * Validates that a session has actual assistant/tool output before marking complete.
+   * Prevents premature completion when session.idle fires before agent responds.
+   */
+  private async validateSessionHasOutput(sessionID: string): Promise<boolean> {
+    try {
+      const response = await this.client.session.messages({
+        path: { id: sessionID },
+      })
+
+      const messages = response.data ?? []
+      
+      // Check for at least one assistant or tool message
+      const hasAssistantOrToolMessage = messages.some(
+        (m: { info?: { role?: string } }) => 
+          m.info?.role === "assistant" || m.info?.role === "tool"
+      )
+
+      if (!hasAssistantOrToolMessage) {
+        log("[background-agent] No assistant/tool messages found in session:", sessionID)
+        return false
+      }
+
+      // Additionally check that at least one message has content (not just empty)
+      // OpenCode API uses different part types than Anthropic's API:
+      // - "reasoning" with .text property (thinking/reasoning content)
+      // - "tool" with .state.output property (tool call results)
+      // - "text" with .text property (final text output)
+      // - "step-start"/"step-finish" (metadata, no content)
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const hasContent = messages.some((m: any) => {
+        if (m.info?.role !== "assistant" && m.info?.role !== "tool") return false
+        const parts = m.parts ?? []
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      return parts.some((p: any) => 
+        // Text content (final output)
+        (p.type === "text" && p.text && p.text.trim().length > 0) ||
+        // Reasoning content (thinking blocks)
+        (p.type === "reasoning" && p.text && p.text.trim().length > 0) ||
+        // Tool calls (indicates work was done)
+        p.type === "tool" ||
+        // Tool results (output from executed tools) - important for tool-only tasks
+        (p.type === "tool_result" && p.content && 
+          (typeof p.content === "string" ? p.content.trim().length > 0 : p.content.length > 0))
+      )
+      })
+
+      if (!hasContent) {
+        log("[background-agent] Messages exist but no content found in session:", sessionID)
+        return false
+      }
+
+      return true
+    } catch (error) {
+      log("[background-agent] Error validating session output:", error)
+      // On error, allow completion to proceed (don't block indefinitely)
+      return true
+    }
+  }
+
  private clearNotificationsForTask(taskId: string): void {
    for (const [sessionID, tasks] of this.notifications.entries()) {
      const filtered = tasks.filter((t) => t.id !== taskId)
@@ -409,17 +536,33 @@ export class BackgroundManager {
    }
  }

-  cleanup(): void {
+cleanup(): void {
    this.stopPolling()
    this.tasks.clear()
    this.notifications.clear()
+    this.pendingByParent.clear()
  }

-  private notifyParentSession(task: BackgroundTask): void {
+  /**
+   * Get all running tasks (for compaction hook)
+   */
+  getRunningTasks(): BackgroundTask[] {
+    return Array.from(this.tasks.values()).filter(t => t.status === "running")
+  }
+
+  /**
+   * Get all completed tasks still in memory (for compaction hook)
+   */
+  getCompletedTasks(): BackgroundTask[] {
+    return Array.from(this.tasks.values()).filter(t => t.status !== "running")
+  }
+
+private async notifyParentSession(task: BackgroundTask): Promise<void> {
    const duration = this.formatDuration(task.startedAt, task.completedAt)

    log("[background-agent] notifyParentSession called for task:", task.id)

+    // Show toast notification
    const toastManager = getTaskToastManager()
    if (toastManager) {
      toastManager.showCompletionToast({
@@ -429,41 +572,83 @@ export class BackgroundManager {
      })
    }

-    const message = `[BACKGROUND TASK COMPLETED] Task "${task.description}" finished in ${duration}. Use background_output with task_id="${task.id}" to get results.`
+    // Update pending tracking and check if all tasks complete
+    const pendingSet = this.pendingByParent.get(task.parentSessionID)
+    if (pendingSet) {
+      pendingSet.delete(task.id)
+      if (pendingSet.size === 0) {
+        this.pendingByParent.delete(task.parentSessionID)
+      }
+    }

-    log("[background-agent] Sending notification to parent session:", { parentSessionID: task.parentSessionID })
+    const allComplete = !pendingSet || pendingSet.size === 0
+    const remainingCount = pendingSet?.size ?? 0

+    // Build notification message
+    const statusText = task.status === "error" ? "FAILED" : "COMPLETED"
+    const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
+    
+    let notification: string
+    if (allComplete) {
+      // All tasks complete - build summary
+      const completedTasks = Array.from(this.tasks.values())
+        .filter(t => t.parentSessionID === task.parentSessionID && t.status !== "running")
+        .map(t => `- \`${t.id}\`: ${t.description}`)
+        .join("\n")
+
+      notification = `<system-reminder>
+[ALL BACKGROUND TASKS COMPLETE]
+
+**Completed:**
+${completedTasks || `- \`${task.id}\`: ${task.description}`}
+
+Use \`background_output(task_id="<id>")\` to retrieve each result.
+</system-reminder>`
+    } else {
+      // Individual completion - silent notification
+      notification = `<system-reminder>
+[BACKGROUND TASK ${statusText}]
+**ID:** \`${task.id}\`
+**Description:** ${task.description}
+**Duration:** ${duration}${errorInfo}
+
+**${remainingCount} task${remainingCount === 1 ? "" : "s"} still in progress.** You WILL be notified when ALL complete.
+Do NOT poll - continue productive work.
+
+Use \`background_output(task_id="${task.id}")\` to retrieve this result when ready.
+</system-reminder>`
+    }
+
+    // Inject notification via session.prompt with noReply
+    try {
+      await this.client.session.prompt({
+        path: { id: task.parentSessionID },
+        body: {
+          noReply: !allComplete,  // Silent unless all complete
+          agent: task.parentAgent,
+          parts: [{ type: "text", text: notification }],
+        },
+      })
+      log("[background-agent] Sent notification to parent session:", {
+        taskId: task.id,
+        allComplete,
+        noReply: !allComplete,
+      })
+    } catch (error) {
+      log("[background-agent] Failed to send notification:", error)
+    }
+
+    // Cleanup after retention period
    const taskId = task.id
-    setTimeout(async () => {
+    setTimeout(() => {
      if (task.concurrencyKey) {
        this.concurrencyManager.release(task.concurrencyKey)
+        task.concurrencyKey = undefined
      }
-
-      try {
-        // Use only parentModel/parentAgent - don't fallback to prevMessage
-        // This prevents accidentally changing parent session's model/agent
-        const modelField = task.parentModel?.providerID && task.parentModel?.modelID
-          ? { providerID: task.parentModel.providerID, modelID: task.parentModel.modelID }
-          : undefined
-
-        await this.client.session.prompt({
-          path: { id: task.parentSessionID },
-          body: {
-            agent: task.parentAgent,
-            model: modelField,
-            parts: [{ type: "text", text: message }],
-          },
-          query: { directory: this.directory },
-        })
-        log("[background-agent] Successfully sent prompt to parent session:", { parentSessionID: task.parentSessionID })
-      } catch (error) {
-        log("[background-agent] prompt failed:", String(error))
-      } finally {
-        this.clearNotificationsForTask(taskId)
-        this.tasks.delete(taskId)
-        log("[background-agent] Removed completed task from memory:", taskId)
-      }
-    }, 200)
+      this.clearNotificationsForTask(taskId)
+      this.tasks.delete(taskId)
+      log("[background-agent] Removed completed task from memory:", taskId)
+    }, 5 * 60 * 1000)
  }

  private formatDuration(start: Date, end?: Date): string {
@@ -532,15 +717,18 @@ export class BackgroundManager {
    for (const task of this.tasks.values()) {
      if (task.status !== "running") continue

-      try {
+try {
        const sessionStatus = allStatuses[task.sessionID]
        
-        if (!sessionStatus) {
-          log("[background-agent] Session not found in status:", task.sessionID)
-          continue
-        }
+        // Don't skip if session not in status - fall through to message-based detection
+        if (sessionStatus?.type === "idle") {
+          // Edge guard: Validate session has actual output before completing
+          const hasValidOutput = await this.validateSessionHasOutput(task.sessionID)
+          if (!hasValidOutput) {
+            log("[background-agent] Polling idle but no valid output yet, waiting:", task.id)
+            continue
+          }

-        if (sessionStatus.type === "idle") {
          const hasIncompleteTodos = await this.checkSessionTodos(task.sessionID)
          if (hasIncompleteTodos) {
            log("[background-agent] Task has incomplete todos via polling, waiting:", task.id)
@@ -550,7 +738,7 @@ export class BackgroundManager {
          task.status = "completed"
          task.completedAt = new Date()
          this.markForNotification(task)
-          this.notifyParentSession(task)
+          await this.notifyParentSession(task)
          log("[background-agent] Task completed via polling:", task.id)
          continue
        }
@@ -591,10 +779,41 @@ export class BackgroundManager {
          task.progress.toolCalls = toolCalls
          task.progress.lastTool = lastTool
          task.progress.lastUpdate = new Date()
-          if (lastMessage) {
+if (lastMessage) {
            task.progress.lastMessage = lastMessage
            task.progress.lastMessageAt = new Date()
          }
+
+          // Stability detection: complete when message count unchanged for 3 polls
+          const currentMsgCount = messages.length
+          const elapsedMs = Date.now() - task.startedAt.getTime()
+
+          if (elapsedMs >= MIN_STABILITY_TIME_MS) {
+            if (task.lastMsgCount === currentMsgCount) {
+              task.stablePolls = (task.stablePolls ?? 0) + 1
+              if (task.stablePolls >= 3) {
+                // Edge guard: Validate session has actual output before completing
+                const hasValidOutput = await this.validateSessionHasOutput(task.sessionID)
+                if (!hasValidOutput) {
+                  log("[background-agent] Stability reached but no valid output, waiting:", task.id)
+                  continue
+                }
+
+                const hasIncompleteTodos = await this.checkSessionTodos(task.sessionID)
+                if (!hasIncompleteTodos) {
+                  task.status = "completed"
+                  task.completedAt = new Date()
+                  this.markForNotification(task)
+                  await this.notifyParentSession(task)
+                  log("[background-agent] Task completed via stability detection:", task.id)
+                  continue
+                }
+              }
+            } else {
+              task.stablePolls = 0
+            }
+          }
+          task.lastMsgCount = currentMsgCount
        }
      } catch (error) {
        log("[background-agent] Poll error for task:", { taskId: task.id, error })
--- a/src/features/background-agent/types.ts
+++ b/src/features/background-agent/types.ts
@@ -32,6 +32,10 @@ export interface BackgroundTask {
  concurrencyKey?: string
  /** Parent session's agent name for notification */
  parentAgent?: string
+  /** Last message count for stability detection */
+  lastMsgCount?: number
+  /** Number of consecutive polls with stable message count */
+  stablePolls?: number
 }

 export interface LaunchInput {
--- a/src/hooks/AGENTS.md
+++ b/src/hooks/AGENTS.md
@@ -2,35 +2,42 @@

 ## OVERVIEW

-22 lifecycle hooks intercepting/modifying agent behavior. Context injection, error recovery, output control, notifications.
+22+ lifecycle hooks intercepting/modifying agent behavior. Context injection, error recovery, output control, notifications.

 ## STRUCTURE

 ```
 hooks/
-├── anthropic-context-window-limit-recovery/  # Auto-compact at token limit (554 lines)
+├── anthropic-context-window-limit-recovery/  # Auto-compact at token limit (556 lines)
 ├── auto-slash-command/         # Detect and execute /command patterns
 ├── auto-update-checker/        # Version notifications, startup toast
 ├── background-notification/    # OS notify on task complete
-├── claude-code-hooks/          # settings.json PreToolUse/PostToolUse/etc
+├── claude-code-hooks/          # settings.json PreToolUse/PostToolUse/etc (408 lines)
 ├── comment-checker/            # Prevent excessive AI comments
-│   └── filters/                # docstring, directive, bdd, etc
+│   ├── filters/                # docstring, directive, bdd, shebang
+│   └── output/                 # XML builder, formatter
 ├── compaction-context-injector/ # Preserve context during compaction
 ├── directory-agents-injector/  # Auto-inject AGENTS.md
 ├── directory-readme-injector/  # Auto-inject README.md
+├── edit-error-recovery/        # Recover from edit failures
 ├── empty-message-sanitizer/    # Sanitize empty messages
 ├── interactive-bash-session/   # Tmux session management
 ├── keyword-detector/           # ultrawork/search keyword activation
 ├── non-interactive-env/        # CI/headless handling
 ├── preemptive-compaction/      # Pre-emptive at 85% usage
+├── prometheus-md-only/         # Restrict prometheus to read-only
 ├── ralph-loop/                 # Self-referential dev loop
 ├── rules-injector/             # Conditional rules from .claude/rules/
-├── session-recovery/           # Recover from errors (430 lines)
+├── session-recovery/           # Recover from errors (432 lines)
+├── sisyphus-orchestrator/      # Main orchestration hook (660 lines)
+├── start-work/                 # Initialize Sisyphus work session
+├── task-resume-info/           # Track task resume state
 ├── think-mode/                 # Auto-detect thinking triggers
+├── thinking-block-validator/   # Validate thinking block format
 ├── agent-usage-reminder/       # Remind to use specialists
 ├── context-window-monitor.ts   # Monitor usage (standalone)
 ├── session-notification.ts     # OS notify on idle
-├── todo-continuation-enforcer.ts # Force TODO completion
+├── todo-continuation-enforcer.ts # Force TODO completion (413 lines)
 └── tool-output-truncator.ts    # Truncate verbose outputs
 ```

--- a/src/hooks/agent-usage-reminder/constants.ts
+++ b/src/hooks/agent-usage-reminder/constants.ts
@@ -15,6 +15,8 @@ export const TARGET_TOOLS = new Set([
  "safe_glob",
  "webfetch",
  "context7_resolve-library-id",
+  "context7_query-docs",
+  "websearch_web_search_exa",
  "context7_get-library-docs",
  "grep_app_searchgithub",
 ]);
--- a/src/hooks/auto-update-checker/index.test.ts
+++ b/src/hooks/auto-update-checker/index.test.ts
@@ -0,0 +1,153 @@
+import { describe, test, expect } from "bun:test"
+import { isPrereleaseVersion, isDistTag, isPrereleaseOrDistTag } from "./index"
+
+describe("auto-update-checker", () => {
+  describe("isPrereleaseVersion", () => {
+    test("returns true for beta versions", () => {
+      // #given a beta version
+      const version = "3.0.0-beta.1"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for alpha versions", () => {
+      // #given an alpha version
+      const version = "1.0.0-alpha"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for rc versions", () => {
+      // #given an rc version
+      const version = "2.0.0-rc.1"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns false for stable versions", () => {
+      // #given a stable version
+      const version = "2.14.0"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+  })
+
+  describe("isDistTag", () => {
+    test("returns true for beta dist-tag", () => {
+      // #given beta dist-tag
+      const version = "beta"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for next dist-tag", () => {
+      // #given next dist-tag
+      const version = "next"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for canary dist-tag", () => {
+      // #given canary dist-tag
+      const version = "canary"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns false for semver versions", () => {
+      // #given a semver version
+      const version = "2.14.0"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+
+    test("returns false for latest (handled separately)", () => {
+      // #given latest tag
+      const version = "latest"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true (but latest is filtered before this check)
+      expect(result).toBe(true)
+    })
+  })
+
+  describe("isPrereleaseOrDistTag", () => {
+    test("returns false for null", () => {
+      // #given null version
+      const version = null
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+
+    test("returns true for prerelease version", () => {
+      // #given prerelease version
+      const version = "3.0.0-beta.1"
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for dist-tag", () => {
+      // #given dist-tag
+      const version = "beta"
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns false for stable version", () => {
+      // #given stable version
+      const version = "2.14.0"
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+  })
+})
--- a/src/hooks/auto-update-checker/index.ts
+++ b/src/hooks/auto-update-checker/index.ts
@@ -9,6 +9,20 @@ import type { AutoUpdateCheckerOptions } from "./types"

 const SISYPHUS_SPINNER = ["·", "•", "●", "○", "◌", "◦", " "]

+export function isPrereleaseVersion(version: string): boolean {
+  return version.includes("-")
+}
+
+export function isDistTag(version: string): boolean {
+  const startsWithDigit = /^\d/.test(version)
+  return !startsWithDigit
+}
+
+export function isPrereleaseOrDistTag(pinnedVersion: string | null): boolean {
+  if (!pinnedVersion) return false
+  return isPrereleaseVersion(pinnedVersion) || isDistTag(pinnedVersion)
+}
+
 export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdateCheckerOptions = {}) {
  const { showStartupToast = true, isSisyphusEnabled = false, autoUpdate = true } = options

@@ -63,7 +77,7 @@ export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdat
 }

 async function runBackgroundUpdateCheck(
-  ctx: PluginInput, 
+  ctx: PluginInput,
  autoUpdate: boolean,
  getToastMessage: (isUpdate: boolean, latestVersion?: string) => string
 ): Promise<void> {
@@ -99,7 +113,18 @@ async function runBackgroundUpdateCheck(
    return
  }

+  // Check if current version is a prerelease - don't auto-downgrade prerelease to stable
+  if (isPrereleaseVersion(currentVersion)) {
+    log(`[auto-update-checker] Skipping auto-update for prerelease version: ${currentVersion}`)
+    return
+  }
+
  if (pluginInfo.isPinned) {
+    if (isPrereleaseOrDistTag(pluginInfo.pinnedVersion)) {
+      log(`[auto-update-checker] Skipping auto-update for prerelease/dist-tag: ${pluginInfo.pinnedVersion}`)
+      return
+    }
+
    const updated = updatePinnedVersion(pluginInfo.configPath, pluginInfo.entry, latestVersion)
    if (!updated) {
      await showUpdateAvailableToast(ctx, latestVersion, getToastMessage)
@@ -112,7 +137,7 @@ async function runBackgroundUpdateCheck(
  invalidatePackage(PACKAGE_NAME)

  const installSuccess = await runBunInstallSafe()
-  
+
  if (installSuccess) {
    await showAutoUpdatedToast(ctx, currentVersion, latestVersion)
    log(`[auto-update-checker] Update installed: ${currentVersion} → ${latestVersion}`)
@@ -180,7 +205,7 @@ async function showSpinnerToast(ctx: PluginInput, version: string, message: stri
 }

 async function showUpdateAvailableToast(
-  ctx: PluginInput, 
+  ctx: PluginInput,
  latestVersion: string,
  getToastMessage: (isUpdate: boolean, latestVersion?: string) => string
 ): Promise<void> {
--- a/src/hooks/background-compaction/index.ts
+++ b/src/hooks/background-compaction/index.ts
@@ -0,0 +1,85 @@
+import type { BackgroundManager } from "../../features/background-agent"
+
+interface CompactingInput {
+  sessionID: string
+}
+
+interface CompactingOutput {
+  context: string[]
+  prompt?: string
+}
+
+/**
+ * Background agent compaction hook - preserves task state during context compaction.
+ * 
+ * When OpenCode compacts session context to save tokens, this hook injects
+ * information about running and recently completed background tasks so the
+ * agent doesn't lose awareness of delegated work.
+ */
+export function createBackgroundCompactionHook(manager: BackgroundManager) {
+  return {
+    "experimental.session.compacting": async (
+      input: CompactingInput,
+      output: CompactingOutput
+    ): Promise<void> => {
+      const { sessionID } = input
+
+      // Get running tasks for this session
+      const running = manager.getRunningTasks()
+        .filter(t => t.parentSessionID === sessionID)
+        .map(t => ({
+          id: t.id,
+          agent: t.agent,
+          description: t.description,
+          startedAt: t.startedAt,
+        }))
+
+      // Get recently completed tasks (still in memory within 5-min retention)
+      const completed = manager.getCompletedTasks()
+        .filter(t => t.parentSessionID === sessionID)
+        .slice(-10) // Last 10 completed
+        .map(t => ({
+          id: t.id,
+          agent: t.agent,
+          description: t.description,
+          status: t.status,
+        }))
+
+      // Early exit if nothing to preserve
+      if (running.length === 0 && completed.length === 0) return
+
+      const sections: string[] = ["<background-tasks>"]
+
+      // Running tasks section
+      if (running.length > 0) {
+        sections.push("## Running Background Tasks")
+        sections.push("")
+        for (const t of running) {
+          const elapsed = Math.floor((Date.now() - t.startedAt.getTime()) / 1000)
+          sections.push(`- **\`${t.id}\`** (${t.agent}): ${t.description} [${elapsed}s elapsed]`)
+        }
+        sections.push("")
+        sections.push("> **Note:** You WILL be notified when tasks complete.")
+        sections.push("> Do NOT poll - continue productive work.")
+        sections.push("")
+      }
+
+      // Completed tasks section
+      if (completed.length > 0) {
+        sections.push("## Recently Completed Tasks")
+        sections.push("")
+        for (const t of completed) {
+          const statusEmoji = t.status === "completed" ? "✅" : t.status === "error" ? "❌" : "⏱️"
+          sections.push(`- ${statusEmoji} **\`${t.id}\`**: ${t.description}`)
+        }
+        sections.push("")
+      }
+
+      sections.push("## Retrieval")
+      sections.push('Use `background_output(task_id="<id>")` to retrieve task results.')
+      sections.push("</background-tasks>")
+
+      output.context.push(sections.join("\n"))
+    }
+  }
+}
--- a/src/hooks/background-notification/index.ts
+++ b/src/hooks/background-notification/index.ts
@@ -9,6 +9,12 @@ interface EventInput {
  event: Event
 }

+/**
+ * Background notification hook - handles event routing to BackgroundManager.
+ * 
+ * Notifications are now delivered directly via session.prompt({ noReply }) 
+ * from the manager, so this hook only needs to handle event routing.
+ */
 export function createBackgroundNotificationHook(manager: BackgroundManager) {
  const eventHandler = async ({ event }: EventInput) => {
    manager.handleEvent(event)
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -14,6 +14,7 @@ export { createThinkModeHook } from "./think-mode";
 export { createClaudeCodeHooksHook } from "./claude-code-hooks";
 export { createRulesInjectorHook } from "./rules-injector";
 export { createBackgroundNotificationHook } from "./background-notification"
+export { createBackgroundCompactionHook } from "./background-compaction"
 export { createAutoUpdateCheckerHook } from "./auto-update-checker";

 export { createAgentUsageReminderHook } from "./agent-usage-reminder";
--- a/src/hooks/ralph-loop/index.test.ts
+++ b/src/hooks/ralph-loop/index.test.ts
@@ -591,6 +591,73 @@ describe("ralph-loop", () => {
      expect(hook.getState()).toBeNull()
    })

+    test("should allow starting new loop while previous loop is active (different session)", async () => {
+      // #given - active loop in session A
+      const hook = createRalphLoopHook(createMockPluginInput())
+      hook.startLoop("session-A", "First task", { maxIterations: 10 })
+      expect(hook.getState()?.session_id).toBe("session-A")
+      expect(hook.getState()?.prompt).toBe("First task")
+
+      // #when - start new loop in session B (without completing A)
+      hook.startLoop("session-B", "Second task", { maxIterations: 20 })
+
+      // #then - state should be overwritten with session B's loop
+      expect(hook.getState()?.session_id).toBe("session-B")
+      expect(hook.getState()?.prompt).toBe("Second task")
+      expect(hook.getState()?.max_iterations).toBe(20)
+      expect(hook.getState()?.iteration).toBe(1)
+
+      // #when - session B goes idle
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-B" } },
+      })
+
+      // #then - continuation should be injected for session B
+      expect(promptCalls.length).toBe(1)
+      expect(promptCalls[0].sessionID).toBe("session-B")
+      expect(promptCalls[0].text).toContain("Second task")
+      expect(promptCalls[0].text).toContain("2/20")
+
+      // #then - iteration incremented
+      expect(hook.getState()?.iteration).toBe(2)
+    })
+
+    test("should allow starting new loop in same session (restart)", async () => {
+      // #given - active loop in session A at iteration 5
+      const hook = createRalphLoopHook(createMockPluginInput())
+      hook.startLoop("session-A", "First task", { maxIterations: 10 })
+      
+      // Simulate some iterations
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-A" } },
+      })
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-A" } },
+      })
+      expect(hook.getState()?.iteration).toBe(3)
+      expect(promptCalls.length).toBe(2)
+
+      // #when - start NEW loop in same session (restart)
+      hook.startLoop("session-A", "Restarted task", { maxIterations: 50 })
+
+      // #then - state should be reset to iteration 1 with new prompt
+      expect(hook.getState()?.session_id).toBe("session-A")
+      expect(hook.getState()?.prompt).toBe("Restarted task")
+      expect(hook.getState()?.max_iterations).toBe(50)
+      expect(hook.getState()?.iteration).toBe(1)
+
+      // #when - session goes idle
+      promptCalls = [] // Reset to check new continuation
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-A" } },
+      })
+
+      // #then - continuation should use new task
+      expect(promptCalls.length).toBe(1)
+      expect(promptCalls[0].text).toContain("Restarted task")
+      expect(promptCalls[0].text).toContain("2/50")
+    })
+
    test("should check transcript BEFORE API to optimize performance", async () => {
      // #given - transcript has completion promise
      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
--- a/src/tools/AGENTS.md
+++ b/src/tools/AGENTS.md
@@ -19,9 +19,10 @@ tools/
 ├── interactive-bash/   # Tmux session management
 ├── look-at/            # Multimodal analysis (PDF, images)
 ├── lsp/                # 11 LSP tools
-│   ├── client.ts       # LSP connection lifecycle
+│   ├── client.ts       # LSP connection lifecycle (612 lines)
+│   ├── utils.ts        # LSP utilities (461 lines)
 │   ├── config.ts       # Server configurations
-│   ├── tools.ts        # Tool implementations
+│   ├── tools.ts        # Tool implementations (405 lines)
 │   └── types.ts
 ├── session-manager/    # OpenCode session file management
 │   ├── constants.ts    # Storage paths, descriptions
@@ -29,6 +30,7 @@ tools/
 │   ├── storage.ts      # File I/O operations
 │   ├── utils.ts        # Formatting, filtering
 │   └── tools.ts        # Tool implementations
+├── sisyphus-task/      # Category-based task delegation (493 lines)
 ├── skill/              # Skill loading and execution
 ├── skill-mcp/          # Skill-embedded MCP invocation
 ├── slashcommand/       # Slash command execution
--- a/src/tools/background-task/tools.ts
+++ b/src/tools/background-task/tools.ts
@@ -74,7 +74,7 @@ export function createBackgroundTask(manager: BackgroundManager): ToolDefinition
          parentSessionID: ctx.sessionID,
          parentMessageID: ctx.messageID,
          parentModel,
-          parentAgent: prevMessage?.agent,
+          parentAgent: ctx.agent ?? prevMessage?.agent,
        })

        ctx.metadata?.({
@@ -176,8 +176,13 @@ async function formatTaskResult(task: BackgroundTask, client: OpencodeClient): P
  // Handle both SDK response structures: direct array or wrapped in .data
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  const messages = ((messagesResult as any).data ?? messagesResult) as Array<{
-    info?: { role?: string }
-    parts?: Array<{ type?: string; text?: string }>
+    info?: { role?: string; time?: string }
+    parts?: Array<{ 
+      type?: string
+      text?: string
+      content?: string | Array<{ type: string; text?: string }>
+      name?: string
+    }>
  }>

  if (!Array.isArray(messages) || messages.length === 0) {
@@ -193,11 +198,13 @@ Session ID: ${task.sessionID}
 (No messages found)`
  }

-  const assistantMessages = messages.filter(
-    (m) => m.info?.role === "assistant"
+  // Include both assistant messages AND tool messages
+  // Tool results (grep, glob, bash output) come from role "tool"
+  const relevantMessages = messages.filter(
+    (m) => m.info?.role === "assistant" || m.info?.role === "tool"
  )

-  if (assistantMessages.length === 0) {
+  if (relevantMessages.length === 0) {
    return `Task Result

 Task ID: ${task.id}
@@ -207,17 +214,46 @@ Session ID: ${task.sessionID}

 ---

-(No assistant response found)`
+(No assistant or tool response found)`
  }

-  const lastMessage = assistantMessages[assistantMessages.length - 1]
-  const textParts = lastMessage?.parts?.filter(
-    (p) => p.type === "text"
-  ) ?? []
-  const textContent = textParts
-    .map((p) => p.text ?? "")
+  // Sort by time ascending (oldest first) to process messages in order
+  const sortedMessages = [...relevantMessages].sort((a, b) => {
+    const timeA = String((a as { info?: { time?: string } }).info?.time ?? "")
+    const timeB = String((b as { info?: { time?: string } }).info?.time ?? "")
+    return timeA.localeCompare(timeB)
+  })
+  
+  // Extract content from ALL messages, not just the last one
+  // Tool results may be in earlier messages while the final message is empty
+  const extractedContent: string[] = []
+  
+  for (const message of sortedMessages) {
+    for (const part of message.parts ?? []) {
+      // Handle both "text" and "reasoning" parts (thinking models use "reasoning")
+      if ((part.type === "text" || part.type === "reasoning") && part.text) {
+        extractedContent.push(part.text)
+      } else if (part.type === "tool_result") {
+        // Tool results contain the actual output from tool calls
+        const toolResult = part as { content?: string | Array<{ type: string; text?: string }> }
+        if (typeof toolResult.content === "string" && toolResult.content) {
+          extractedContent.push(toolResult.content)
+        } else if (Array.isArray(toolResult.content)) {
+          // Handle array of content blocks
+          for (const block of toolResult.content) {
+            // Handle both "text" and "reasoning" parts (thinking models use "reasoning")
+            if ((block.type === "text" || block.type === "reasoning") && block.text) {
+              extractedContent.push(block.text)
+            }
+          }
+        }
+      }
+    }
+  }
+  
+  const textContent = extractedContent
    .filter((text) => text.length > 0)
-    .join("\n")
+    .join("\n\n")

  const duration = formatDuration(task.startedAt, task.completedAt)

--- a/src/tools/call-omo-agent/tools.ts
+++ b/src/tools/call-omo-agent/tools.ts
@@ -170,23 +170,59 @@ async function executeSync(
  const messages = messagesResult.data
  log(`[call_omo_agent] Got ${messages.length} messages`)

+  // Include both assistant messages AND tool messages
+  // Tool results (grep, glob, bash output) come from role "tool"
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const lastAssistantMessage = messages
-    .filter((m: any) => m.info.role === "assistant")
-    .sort((a: any, b: any) => (b.info.time?.created || 0) - (a.info.time?.created || 0))[0]
+  const relevantMessages = messages.filter(
+    (m: any) => m.info?.role === "assistant" || m.info?.role === "tool"
+  )

-  if (!lastAssistantMessage) {
-    log(`[call_omo_agent] No assistant message found`)
+  if (relevantMessages.length === 0) {
+    log(`[call_omo_agent] No assistant or tool messages found`)
    log(`[call_omo_agent] All messages:`, JSON.stringify(messages, null, 2))
-    return `Error: No assistant response found\n\n<task_metadata>\nsession_id: ${sessionID}\n</task_metadata>`
+    return `Error: No assistant or tool response found\n\n<task_metadata>\nsession_id: ${sessionID}\n</task_metadata>`
  }

-  log(`[call_omo_agent] Found assistant message with ${lastAssistantMessage.parts.length} parts`)
+  log(`[call_omo_agent] Found ${relevantMessages.length} relevant messages`)

+  // Sort by time ascending (oldest first) to process messages in order
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const textParts = lastAssistantMessage.parts.filter((p: any) => p.type === "text")
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const responseText = textParts.map((p: any) => p.text).join("\n")
+  const sortedMessages = [...relevantMessages].sort((a: any, b: any) => {
+    const timeA = a.info?.time?.created ?? 0
+    const timeB = b.info?.time?.created ?? 0
+    return timeA - timeB
+  })
+
+  // Extract content from ALL messages, not just the last one
+  // Tool results may be in earlier messages while the final message is empty
+  const extractedContent: string[] = []
+
+  for (const message of sortedMessages) {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    for (const part of (message as any).parts ?? []) {
+      // Handle both "text" and "reasoning" parts (thinking models use "reasoning")
+      if ((part.type === "text" || part.type === "reasoning") && part.text) {
+        extractedContent.push(part.text)
+      } else if (part.type === "tool_result") {
+        // Tool results contain the actual output from tool calls
+        const toolResult = part as { content?: string | Array<{ type: string; text?: string }> }
+        if (typeof toolResult.content === "string" && toolResult.content) {
+          extractedContent.push(toolResult.content)
+        } else if (Array.isArray(toolResult.content)) {
+          // Handle array of content blocks
+          for (const block of toolResult.content) {
+            if ((block.type === "text" || block.type === "reasoning") && block.text) {
+              extractedContent.push(block.text)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  const responseText = extractedContent
+    .filter((text) => text.length > 0)
+    .join("\n\n")

  log(`[call_omo_agent] Got response, length: ${responseText.length}`)

--- a/src/tools/sisyphus-task/tools.test.ts
+++ b/src/tools/sisyphus-task/tools.test.ts
@@ -259,6 +259,7 @@ describe("sisyphus-task", () => {

  describe("resume with background parameter", () => {
  test("resume with background=false should wait for result and return content", async () => {
+    // Note: This test needs extended timeout because the implementation has MIN_STABILITY_TIME_MS = 5000
    // #given
    const { createSisyphusTask } = require("./tools")
    
@@ -319,7 +320,7 @@ describe("sisyphus-task", () => {
    // #then - should contain actual result, not just "Background task resumed"
    expect(result).toContain("This is the resumed task result")
    expect(result).not.toContain("Background task resumed")
-  })
+  }, { timeout: 10000 })

  test("resume with background=true should return immediately without waiting", async () => {
    // #given
--- a/src/tools/sisyphus-task/tools.ts
+++ b/src/tools/sisyphus-task/tools.ts
@@ -221,6 +221,33 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
          return `❌ Failed to send resume prompt: ${errorMessage}\n\nSession ID: ${args.resume}`
        }

+        // Wait for message stability after prompt completes
+        const POLL_INTERVAL_MS = 500
+        const MIN_STABILITY_TIME_MS = 5000
+        const STABILITY_POLLS_REQUIRED = 3
+        const pollStart = Date.now()
+        let lastMsgCount = 0
+        let stablePolls = 0
+
+        while (Date.now() - pollStart < 60000) {
+          await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
+          
+          const elapsed = Date.now() - pollStart
+          if (elapsed < MIN_STABILITY_TIME_MS) continue
+
+          const messagesCheck = await client.session.messages({ path: { id: args.resume } })
+          const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array<unknown>
+          const currentMsgCount = msgs.length
+
+          if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
+            stablePolls++
+            if (stablePolls >= STABILITY_POLLS_REQUIRED) break
+          } else {
+            stablePolls = 0
+            lastMsgCount = currentMsgCount
+          }
+        }
+
        const messagesResult = await client.session.messages({
          path: { id: args.resume },
        })
@@ -250,7 +277,8 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
          return `❌ No assistant response found.\n\nSession ID: ${args.resume}`
        }

-        const textParts = lastMessage?.parts?.filter((p) => p.type === "text") ?? []
+        // Extract text from both "text" and "reasoning" parts (thinking models use "reasoning")
+        const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
        const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")

        const duration = formatDuration(startTime)
@@ -390,13 +418,13 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          metadata: { sessionId: sessionID, category: args.category, sync: true },
        })

-        // Use promptAsync to avoid changing main session's active state
+        // Use fire-and-forget prompt() - awaiting causes JSON parse errors with thinking models
+        // Note: Don't pass model in body - use agent's configured model instead
        let promptError: Error | undefined
-        await client.session.promptAsync({
+        client.session.prompt({
          path: { id: sessionID },
          body: {
            agent: agentToUse,
-            model: categoryModel,
            system: systemContent,
            tools: {
              task: false,
@@ -408,6 +436,9 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          promptError = error instanceof Error ? error : new Error(String(error))
        })

+        // Small delay to let the prompt start
+        await new Promise(resolve => setTimeout(resolve, 100))
+
        if (promptError) {
          if (toastManager && taskId !== undefined) {
            toastManager.removeTask(taskId)
@@ -419,21 +450,63 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          return `❌ Failed to send prompt: ${errorMessage}\n\nSession ID: ${sessionID}`
        }

-        // Poll for session completion
+        // Poll for session completion with stability detection
+        // The session may show as "idle" before messages appear, so we also check message stability
        const POLL_INTERVAL_MS = 500
        const MAX_POLL_TIME_MS = 10 * 60 * 1000
+        const MIN_STABILITY_TIME_MS = 10000  // Minimum 10s before accepting completion
+        const STABILITY_POLLS_REQUIRED = 3
        const pollStart = Date.now()
+        let lastMsgCount = 0
+        let stablePolls = 0

        while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
          await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))

+          // Check for async errors that may have occurred after the initial 100ms delay
+          // TypeScript doesn't understand async mutation, so we cast to check
+          const asyncError = promptError as Error | undefined
+          if (asyncError) {
+            if (toastManager && taskId !== undefined) {
+              toastManager.removeTask(taskId)
+            }
+            const errorMessage = asyncError.message
+            if (errorMessage.includes("agent.name") || errorMessage.includes("undefined")) {
+              return `❌ Agent "${agentToUse}" not found. Make sure the agent is registered in your opencode.json or provided by a plugin.\n\nSession ID: ${sessionID}`
+            }
+            return `❌ Failed to send prompt: ${errorMessage}\n\nSession ID: ${sessionID}`
+          }
+
          const statusResult = await client.session.status()
          const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
          const sessionStatus = allStatuses[sessionID]

-          // Break if session is idle OR no longer in status (completed and removed)
-          if (!sessionStatus || sessionStatus.type === "idle") {
-            break
+          // If session is actively running, reset stability
+          if (sessionStatus && sessionStatus.type !== "idle") {
+            stablePolls = 0
+            lastMsgCount = 0
+            continue
+          }
+
+          // Session is idle or not in status - check message stability
+          const elapsed = Date.now() - pollStart
+          if (elapsed < MIN_STABILITY_TIME_MS) {
+            continue  // Don't accept completion too early
+          }
+
+          // Get current message count
+          const messagesCheck = await client.session.messages({ path: { id: sessionID } })
+          const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array<unknown>
+          const currentMsgCount = msgs.length
+
+          if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
+            stablePolls++
+            if (stablePolls >= STABILITY_POLLS_REQUIRED) {
+              break  // Messages stable for 3 polls - task complete
+            }
+          } else {
+            stablePolls = 0
+            lastMsgCount = currentMsgCount
          }
        }

@@ -459,7 +532,8 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          return `❌ No assistant response found.\n\nSession ID: ${sessionID}`
        }
        
-        const textParts = lastMessage?.parts?.filter((p) => p.type === "text") ?? []
+        // Extract text from both "text" and "reasoning" parts (thinking models use "reasoning")
+        const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
        const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")

        const duration = formatDuration(startTime)
--- a/src/tools/skill/tools.ts
+++ b/src/tools/skill/tools.ts
@@ -194,4 +194,4 @@ export function createSkillTool(options: SkillLoadOptions = {}): ToolDefinition
  })
 }

-export const skill = createSkillTool()
+export const skill: ToolDefinition = createSkillTool()
--- a/src/tools/slashcommand/tools.ts
+++ b/src/tools/slashcommand/tools.ts
@@ -249,4 +249,4 @@ export function createSlashcommandTool(options: SlashcommandToolOptions = {}): T
 }

 // Default instance for backward compatibility (lazy loading)
-export const slashcommand = createSlashcommandTool()
+export const slashcommand: ToolDefinition = createSlashcommandTool()