release: v3.0.0-beta.4

fix(sisyphus-orchestrator): add cross-platform path validation for Windows support
Add isSisyphusPath() helper function that handles both forward slashes (Unix) and backslashes (Windows) using regex pattern /\.sisyphus[/\\]/. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-11 05:46:20 +00:00 · 2026-01-11 14:42:53 +09:00 · 2026-01-11 14:33:17 +09:00 · 2026-01-11 05:03:55 +00:00 · 2026-01-11 12:33:20 +09:00 · 2026-01-11 12:23:13 +09:00
55 changed files with 2147 additions and 379 deletions
--- a/.github/assets/orchestrator-sisyphus.png
+++ b/.github/assets/orchestrator-sisyphus.png
--- a/.github/assets/sisyphuslabs.png
+++ b/.github/assets/sisyphuslabs.png
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -1,5 +1,5 @@
 name: publish
-run-name: "${{ format('release {0} ({1})', inputs.bump, inputs.tag || 'latest') }}"
+run-name: "${{ format('release {0}', inputs.bump) }}"

 on:
  workflow_dispatch:
@@ -16,15 +16,6 @@ on:
        description: "Override version (optional)"
        required: false
        type: string
-      tag:
-        description: "npm dist-tag (latest, beta, next)"
-        required: false
-        type: choice
-        default: "latest"
-        options:
-          - latest
-          - beta
-          - next

 concurrency: ${{ github.workflow }}-${{ github.ref }}

@@ -135,7 +126,6 @@ jobs:
        env:
          BUMP: ${{ inputs.bump }}
          VERSION: ${{ inputs.version }}
-          NPM_TAG: ${{ inputs.tag || 'latest' }}
          CI: true
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NPM_CONFIG_PROVENANCE: true
@@ -146,7 +136,6 @@ jobs:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Merge to master
-        if: inputs.tag == 'latest' || inputs.tag == ''
        run: |
          git config user.name "github-actions[bot]"
          git config user.email "github-actions[bot]@users.noreply.github.com"
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,7 +1,7 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-01-02T22:41:22+09:00
-**Commit:** d0694e5
+**Generated:** 2026-01-09T15:38:00+09:00
+**Commit:** 0581793
 **Branch:** dev

 ## OVERVIEW
@@ -22,7 +22,7 @@ oh-my-opencode/
 │   ├── cli/           # CLI installer, doctor - see src/cli/AGENTS.md
 │   ├── mcp/           # MCP configs: context7, grep_app
 │   ├── config/        # Zod schema, TypeScript types
-│   └── index.ts       # Main plugin entry (464 lines)
+│   └── index.ts       # Main plugin entry (548 lines)
 ├── script/            # build-schema.ts, publish.ts, generate-changelog.ts
 ├── assets/            # JSON schema
 └── dist/              # Build output (ESM + .d.ts)
@@ -50,6 +50,7 @@ oh-my-opencode/
 | Shared utilities | `src/shared/` | Cross-cutting utilities |
 | Slash commands | `src/hooks/auto-slash-command/` | Auto-detect and execute `/command` patterns |
 | Ralph Loop | `src/hooks/ralph-loop/` | Self-referential dev loop until completion |
+| Orchestrator | `src/hooks/sisyphus-orchestrator/` | Main orchestration hook (660 lines) |

 ## TDD (Test-Driven Development)

@@ -64,15 +65,16 @@ oh-my-opencode/

 | Phase | Action | Verification |
 |-------|--------|--------------|
-| **RED** | Write test describing expected behavior | `bun test` → FAIL (expected) |
-| **GREEN** | Implement minimum code to pass | `bun test` → PASS |
-| **REFACTOR** | Improve code quality, remove duplication | `bun test` → PASS (must stay green) |
+| **RED** | Write test describing expected behavior | `bun test` -> FAIL (expected) |
+| **GREEN** | Implement minimum code to pass | `bun test` -> PASS |
+| **REFACTOR** | Improve code quality, remove duplication | `bun test` -> PASS (must stay green) |

 **Rules:**
 - NEVER write implementation before test
 - NEVER delete failing tests to "pass" - fix the code
 - One test at a time - don't batch
 - Test file naming: `*.test.ts` alongside source
+- BDD comments: `#given`, `#when`, `#then` (same as AAA)

 ## CONVENTIONS

@@ -81,7 +83,7 @@ oh-my-opencode/
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern in index.ts; explicit named exports for tools/hooks
 - **Naming**: kebab-case directories, createXXXHook/createXXXTool factories
- **Testing**: BDD comments `#given`, `#when`, `#then` (same as AAA); TDD workflow (RED-GREEN-REFACTOR)
+- **Testing**: BDD comments `#given/#when/#then`, TDD workflow (RED-GREEN-REFACTOR)
 - **Temperature**: 0.1 for code agents, max 0.3

 ## ANTI-PATTERNS (THIS PROJECT)
@@ -99,6 +101,11 @@ oh-my-opencode/
 - **Sequential agent calls**: Use `sisyphus_task` for parallel execution
 - **Heavy PreToolUse logic**: Slows every tool call
 - **Self-planning for complex tasks**: Spawn planning agent (Prometheus) instead
+- **Trust agent self-reports**: ALWAYS verify results independently
+- **Skip TODO creation**: Multi-step tasks MUST have todos first
+- **Batch completions**: Mark TODOs complete immediately, don't group
+- **Giant commits**: 3+ files = 2+ commits minimum
+- **Separate test from impl**: Same commit always

 ## UNIQUE STYLES

@@ -114,10 +121,10 @@ oh-my-opencode/
 ## AGENT MODELS

 | Agent | Default Model | Purpose |
-|-------|-------|---------|
+|-------|---------------|---------|
 | Sisyphus | anthropic/claude-opus-4-5 | Primary orchestrator |
 | oracle | openai/gpt-5.2 | Read-only consultation. High-IQ debugging, architecture |
-| librarian | anthropic/claude-sonnet-4-5 | Multi-repo analysis, docs |
+| librarian | opencode/glm-4.7-free | Multi-repo analysis, docs |
 | explore | opencode/grok-code | Fast codebase exploration |
 | frontend-ui-ux-engineer | google/gemini-3-pro-preview | UI generation |
 | document-writer | google/gemini-3-pro-preview | Technical docs |
@@ -130,7 +137,7 @@ bun run typecheck      # Type check
 bun run build          # ESM + declarations + schema
 bun run rebuild        # Clean + Build
 bun run build:schema   # Schema only
-bun test               # Run tests
+bun test               # Run tests (76 test files, 2559+ BDD assertions)
 ```

 ## DEPLOYMENT
@@ -153,18 +160,23 @@ bun test               # Run tests

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/index.ts` | 464 | Main plugin, all hook/tool init |
-| `src/cli/config-manager.ts` | 669 | JSONC parsing, env detection |
-| `src/auth/antigravity/fetch.ts` | 621 | Token refresh, URL rewriting |
-| `src/tools/lsp/client.ts` | 611 | LSP protocol, JSON-RPC |
-| `src/auth/antigravity/response.ts` | 598 | Response transformation, streaming |
-| `src/auth/antigravity/thinking.ts` | 571 | Thinking block extraction/transformation |
-| `src/hooks/anthropic-context-window-limit-recovery/executor.ts` | 564 | Multi-stage recovery |
-| `src/agents/sisyphus.ts` | 504 | Orchestrator prompt |
+| `src/agents/orchestrator-sisyphus.ts` | 1484 | Orchestrator agent, complex delegation |
+| `src/features/builtin-skills/skills.ts` | 1230 | Skill definitions (frontend-ui-ux, playwright) |
+| `src/agents/prometheus-prompt.ts` | 982 | Planning agent system prompt |
+| `src/auth/antigravity/fetch.ts` | 798 | Token refresh, URL rewriting |
+| `src/auth/antigravity/thinking.ts` | 755 | Thinking block extraction |
+| `src/cli/config-manager.ts` | 725 | JSONC parsing, env detection |
+| `src/hooks/sisyphus-orchestrator/index.ts` | 660 | Orchestrator hook impl |
+| `src/agents/sisyphus.ts` | 641 | Main Sisyphus prompt |
+| `src/tools/lsp/client.ts` | 612 | LSP protocol, JSON-RPC |
+| `src/features/background-agent/manager.ts` | 608 | Task lifecycle |
+| `src/auth/antigravity/response.ts` | 599 | Response transformation, streaming |
+| `src/hooks/anthropic-context-window-limit-recovery/executor.ts` | 556 | Multi-stage recovery |
+| `src/index.ts` | 548 | Main plugin, all hook/tool init |

 ## NOTES

- **Testing**: Bun native test (`bun test`), BDD-style `#given/#when/#then`, 360+ tests
+- **Testing**: Bun native test (`bun test`), BDD-style `#given/#when/#then`, 76 test files
 - **OpenCode**: Requires >= 1.0.150
 - **Multi-lang docs**: README.md (EN), README.ko.md (KO), README.ja.md (JA), README.zh-cn.md (ZH-CN)
 - **Config**: `~/.config/opencode/oh-my-opencode.json` (user) or `.opencode/oh-my-opencode.json` (project)
--- a/README.ja.md
+++ b/README.ja.md
@@ -1,9 +1,12 @@
 > [!NOTE]
 >
-> *「私はエージェントが生成したコードと人間が書いたコードを区別できない、しかしはるかに多くのことを達成できる世界を作り、ソフトウェア革命を起こすことを目指しています。私はこの旅に個人的な時間、情熱、そして資金を注ぎ込んできましたし、これからもそうし続けます。」*
+> [![Sisyphus Labs — Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
+> > **Sisyphusの完全製品化バージョンを構築中です。フロンティアエージェントの未来を定義します。<br />[こちら](https://sisyphuslabs.ai)からウェイトリストに参加してください。**
+
+> [!TIP]
 >
-> [![The Orchestrator is coming](./.github/assets/orchestrator-sisyphus.png)](https://x.com/justsisyphus/status/2006250634354548963)
-> > **オーケストレーターが来ます。今週中に。[Xで通知を受け取る](https://x.com/justsisyphus/status/2006250634354548963)**
+> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-sisyphus.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.1)
+> > **オーケストレーターがベータ版で利用可能になりました。`oh-my-opencode@3.0.0-beta.1`を使用してインストールしてください。**
 >
 > 一緒に歩みましょう！
 >
@@ -25,7 +28,7 @@

 > `oh-my-opencode` をインストールして、ドーピングしたかのようにコーディングしましょう。バックグラウンドでエージェントを走らせ、oracle、librarian、frontend engineer のような専門エージェントを呼び出してください。丹精込めて作られた LSP/AST ツール、厳選された MCP、そして完全な Claude Code 互換レイヤーを、たった一行で手に入れましょう。

-**今すぐ始めましょう。ChatGPT、Claude、Gemini のサブスクリプションで使えます。**
+**注意: librarianには高価なモデルを使用しないでください。これはあなたにとって役に立たないだけでなく、LLMプロバイダーにも負担をかけます。代わりにClaude Haiku、Gemini Flash、GLM 4.7、MiniMaxなどのモデルを使用してください。**

 <div align="center">

@@ -73,6 +76,7 @@
 - [Oh My OpenCode](#oh-my-opencode)
  - [この Readme は読まなくていいです](#この-readme-は読まなくていいです)
    - [エージェントの時代ですから](#エージェントの時代ですから)
+    - [🪄 魔法の言葉：`ultrawork`](#-魔法の言葉ultrawork)
    - [読みたい方のために：シジフォスに会う](#読みたい方のためにシジフォスに会う)
      - [インストールするだけで。](#インストールするだけで)
  - [インストール](#インストール)
@@ -85,10 +89,15 @@
    - [ステップ 4: 認証情報の設定](#ステップ-4-認証情報の設定)
      - [4.1 Anthropic (Claude)](#41-anthropic-claude)
      - [4.2 Google Gemini (Antigravity OAuth)](#42-google-gemini-antigravity-oauth)
+      - [4.2.1 モデル設定](#421-モデル設定)
+      - [4.2.2 oh-my-opencode エージェントモデルのオーバーライド](#422-oh-my-opencode-エージェントモデルのオーバーライド)
      - [4.3 OpenAI (ChatGPT Plus/Pro)](#43-openai-chatgpt-pluspro)
+        - [モデル設定](#モデル設定)
    - [⚠️ 注意](#️-注意)
    - [セットアップの確認](#セットアップの確認)
    - [ユーザーに「おめでとうございます！🎉」と伝える](#ユーザーにおめでとうございますと伝える)
+    - [無料広告](#無料広告)
+    - [スターをお願いする ⭐](#スターをお願いする-)
    - [複雑すぎますか？](#複雑すぎますか)
  - [アンインストール](#アンインストール)
  - [機能](#機能)
@@ -96,7 +105,8 @@
    - [バックグラウンドエージェント: 本当のチームのように働く](#バックグラウンドエージェント-本当のチームのように働く)
    - [ツール: 同僚にはもっと良い道具を](#ツール-同僚にはもっと良い道具を)
      - [なぜあなただけ IDE を使っているのですか？](#なぜあなただけ-ide-を使っているのですか)
-      - [Context is all you need.](#context-is-all-you-need)
+      - [セッション管理](#セッション管理)
+      - [Context Is All You Need](#context-is-all-you-need)
      - [マルチモーダルを活用し、トークンは節約する](#マルチモーダルを活用しトークンは節約する)
      - [止まらないエージェントループ](#止まらないエージェントループ)
    - [Claude Code 互換性: さらば Claude Code、ようこそ OpenCode](#claude-code-互換性-さらば-claude-codeようこそ-opencode)
@@ -106,16 +116,20 @@
      - [互換性トグル](#互換性トグル)
    - [エージェントのためだけでなく、あなたのために](#エージェントのためだけでなくあなたのために)
  - [設定](#設定)
+    - [JSONC のサポート](#jsonc-のサポート)
    - [Google Auth](#google-auth)
    - [Agents](#agents)
      - [Permission オプション](#permission-オプション)
    - [Sisyphus Agent](#sisyphus-agent)
+    - [Background Tasks](#background-tasks)
    - [Hooks](#hooks)
    - [MCPs](#mcps)
    - [LSP](#lsp)
    - [Experimental](#experimental)
  - [作者のノート](#作者のノート)
  - [注意](#注意)
+  - [こちらの企業の専門家にご愛用いただいています](#こちらの企業の専門家にご愛用いただいています)
+  - [スポンサー](#スポンサー)

 # Oh My OpenCode

@@ -319,9 +333,9 @@ opencode auth login
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -458,7 +472,7 @@ oh-my-opencode を削除するには：

 - **Sisyphus** (`anthropic/claude-opus-4-5`): **デフォルトエージェントです。** OpenCode のための強力な AI オーケストレーターです。専門のサブエージェントを活用して、複雑なタスクを計画、委任、実行します。バックグラウンドタスクへの委任と Todo ベースのワークフローを重視します。最大の推論能力を発揮するため、Claude Opus 4.5 と拡張思考 (32k token budget) を使用します。
 - **oracle** (`openai/gpt-5.2`): アーキテクチャ、コードレビュー、戦略立案のための専門アドバイザー。GPT-5.2 の卓越した論理的推論と深い分析能力を活用します。AmpCode からインスピレーションを得ました。
- **librarian** (`anthropic/claude-sonnet-4-5` または `google/gemini-3-flash`): マルチリポジトリ分析、ドキュメント検索、実装例の調査を担当。Antigravity 認証が設定されている場合は Gemini 3 Flash を使用し、それ以外は Claude Sonnet 4.5 を使用して、深いコードベース理解と GitHub リサーチ、根拠に基づいた回答を提供します。AmpCode からインスピレーションを得ました。
+- **librarian** (`opencode/glm-4.7-free`): マルチリポジトリ分析、ドキュメント検索、実装例の調査を担当。GLM-4.7 Free を使用して、深いコードベース理解と GitHub リサーチ、根拠に基づいた回答を提供します。AmpCode からインスピレーションを得ました。
 - **explore** (`opencode/grok-code`、`google/gemini-3-flash`、または `anthropic/claude-haiku-4-5`): 高速なコードベース探索、ファイルパターンマッチング。Antigravity 認証が設定されている場合は Gemini 3 Flash を使用し、Claude max20 が利用可能な場合は Haiku を使用し、それ以外は Grok を使います。Claude Code からインスピレーションを得ました。
 - **frontend-ui-ux-engineer** (`google/gemini-3-pro-preview`): 開発者に転身したデザイナーという設定です。素晴らしい UI を作ります。美しく独創的な UI コードを生成することに長けた Gemini を使用します。
 - **document-writer** (`google/gemini-3-pro-preview`): テクニカルライティングの専門家という設定です。Gemini は文筆家であり、流れるような文章を書きます。
@@ -718,10 +732,10 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 1. `.opencode/oh-my-opencode.json` (プロジェクト)
 2. ユーザー設定（プラットフォーム別）：

-| プラットフォーム | ユーザー設定パス |
-|------------------|------------------|
-| **Windows** | `~/.config/opencode/oh-my-opencode.json` (推奨) または `%APPDATA%\opencode\oh-my-opencode.json` (fallback) |
-| **macOS/Linux** | `~/.config/opencode/oh-my-opencode.json` |
+| プラットフォーム | ユーザー設定パス                                                                                           |
+| ---------------- | ---------------------------------------------------------------------------------------------------------- |
+| **Windows**      | `~/.config/opencode/oh-my-opencode.json` (推奨) または `%APPDATA%\opencode\oh-my-opencode.json` (fallback) |
+| **macOS/Linux**  | `~/.config/opencode/oh-my-opencode.json`                                                                   |

 スキーマ自動補完がサポートされています：

@@ -745,10 +759,10 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 ```jsonc
 {
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
-  
+
  // Antigravity OAuth 経由で Google Gemini を有効にする
  "google_auth": false,
-  
+
  /* エージェントのオーバーライド - 特定のタスクに合わせてモデルをカスタマイズ */
  "agents": {
    "oracle": {
@@ -771,9 +785,9 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -838,13 +852,13 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 }
 ```

-| Permission | 説明 | 値 |
-|------------|------|----|
-| `edit` | ファイル編集権限 | `ask` / `allow` / `deny` |
-| `bash` | Bash コマンド実行権限 | `ask` / `allow` / `deny` またはコマンド別: `{ "git": "allow", "rm": "deny" }` |
-| `webfetch` | ウェブアクセス権限 | `ask` / `allow` / `deny` |
-| `doom_loop` | 無限ループ検知のオーバーライド許可 | `ask` / `allow` / `deny` |
-| `external_directory` | プロジェクトルート外へのファイルアクセス | `ask` / `allow` / `deny` |
+| Permission           | 説明                                     | 値                                                                            |
+| -------------------- | ---------------------------------------- | ----------------------------------------------------------------------------- |
+| `edit`               | ファイル編集権限                         | `ask` / `allow` / `deny`                                                      |
+| `bash`               | Bash コマンド実行権限                    | `ask` / `allow` / `deny` またはコマンド別: `{ "git": "allow", "rm": "deny" }` |
+| `webfetch`           | ウェブアクセス権限                       | `ask` / `allow` / `deny`                                                      |
+| `doom_loop`          | 無限ループ検知のオーバーライド許可       | `ask` / `allow` / `deny`                                                      |
+| `external_directory` | プロジェクトルート外へのファイルアクセス | `ask` / `allow` / `deny`                                                      |

 または `~/.config/opencode/oh-my-opencode.json` か `.opencode/oh-my-opencode.json` の `disabled_agents` を使用して無効化できます：

@@ -922,12 +936,12 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 }
 ```

-| オプション                  | デフォルト | 説明                                                                                                                                                         |
-| --------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `disabled`                  | `false` | `true` の場合、すべての Sisyphus オーケストレーションを無効化し、元の build/plan をプライマリとして復元します。                                                                       |
-| `default_builder_enabled`   | `false` | `true` の場合、OpenCode-Builder エージェントを有効化します（OpenCode build と同じ、SDK 制限により名前変更）。デフォルトでは無効です。                                                   |
-| `planner_enabled`           | `true`  | `true` の場合、Prometheus (Planner) エージェントを有効化します（work-planner 方法論を含む）。デフォルトで有効です。                                                                   |
-| `replace_plan`              | `true`  | `true` の場合、デフォルトのプランエージェントをサブエージェントモードに降格させます。`false` に設定すると、Prometheus (Planner) とデフォルトのプランの両方を利用できます。                             |
+| オプション                | デフォルト | 説明                                                                                                                                                                       |
+| ------------------------- | ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `disabled`                | `false`    | `true` の場合、すべての Sisyphus オーケストレーションを無効化し、元の build/plan をプライマリとして復元します。                                                            |
+| `default_builder_enabled` | `false`    | `true` の場合、OpenCode-Builder エージェントを有効化します（OpenCode build と同じ、SDK 制限により名前変更）。デフォルトでは無効です。                                      |
+| `planner_enabled`         | `true`     | `true` の場合、Prometheus (Planner) エージェントを有効化します（work-planner 方法論を含む）。デフォルトで有効です。                                                        |
+| `replace_plan`            | `true`     | `true` の場合、デフォルトのプランエージェントをサブエージェントモードに降格させます。`false` に設定すると、Prometheus (Planner) とデフォルトのプランの両方を利用できます。 |

 ### Background Tasks

@@ -950,10 +964,10 @@ Oh My OpenCode は以下の場所からフックを読み込んで実行しま
 }
 ```

-| オプション            | デフォルト | 説明                                                                                                           |
-| --------------------- | ---------- | -------------------------------------------------------------------------------------------------------------- |
-| `defaultConcurrency`  | -          | すべてのプロバイダー/モデルに対するデフォルトの最大同時バックグラウンドタスク数                                 |
-| `providerConcurrency` | -          | プロバイダーごとの同時実行制限。キーはプロバイダー名（例：`anthropic`、`openai`、`google`）                     |
+| オプション            | デフォルト | 説明                                                                                                                  |
+| --------------------- | ---------- | --------------------------------------------------------------------------------------------------------------------- |
+| `defaultConcurrency`  | -          | すべてのプロバイダー/モデルに対するデフォルトの最大同時バックグラウンドタスク数                                       |
+| `providerConcurrency` | -          | プロバイダーごとの同時実行制限。キーはプロバイダー名（例：`anthropic`、`openai`、`google`）                           |
 | `modelConcurrency`    | -          | モデルごとの同時実行制限。キーは完全なモデル名（例：`anthropic/claude-opus-4-5`）。プロバイダー制限より優先されます。 |

 **優先順位**: `modelConcurrency` > `providerConcurrency` > `defaultConcurrency`
@@ -1032,13 +1046,13 @@ OpenCode でサポートされるすべての LSP 構成およびカスタム設
 }
 ```

-| オプション                        | デフォルト | 説明                                                                                                                                                                   |
-| --------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `preemptive_compaction_threshold` | `0.85`     | プリエンプティブコンパクションをトリガーする閾値（0.5-0.95）。`preemptive-compaction` フックはデフォルトで有効です。このオプションで閾値をカスタマイズできます。                 |
+| オプション                        | デフォルト | 説明                                                                                                                                                                               |
+| --------------------------------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `preemptive_compaction_threshold` | `0.85`     | プリエンプティブコンパクションをトリガーする閾値（0.5-0.95）。`preemptive-compaction` フックはデフォルトで有効です。このオプションで閾値をカスタマイズできます。                   |
 | `truncate_all_tool_outputs`       | `false`    | ホワイトリストのツール（Grep、Glob、LSP、AST-grep）だけでなく、すべてのツール出力を切り詰めます。Tool output truncator はデフォルトで有効です - `disabled_hooks`で無効化できます。 |
-| `aggressive_truncation`           | `false`    | トークン制限を超えた場合、ツール出力を積極的に切り詰めて制限内に収めます。デフォルトの切り詰めより積極的です。不十分な場合は要約/復元にフォールバックします。                 |
-| `auto_resume`                     | `false`    | thinking block エラーや thinking disabled violation からの回復成功後、自動的にセッションを再開します。最後のユーザーメッセージを抽出して続行します。                        |
-| `dcp_for_compaction`              | `false`    | コンパクション用DCP（動的コンテキスト整理）を有効化 - トークン制限超過時に最初に実行されます。コンパクション前に重複したツール呼び出しと古いツール出力を整理します。                |
+| `aggressive_truncation`           | `false`    | トークン制限を超えた場合、ツール出力を積極的に切り詰めて制限内に収めます。デフォルトの切り詰めより積極的です。不十分な場合は要約/復元にフォールバックします。                      |
+| `auto_resume`                     | `false`    | thinking block エラーや thinking disabled violation からの回復成功後、自動的にセッションを再開します。最後のユーザーメッセージを抽出して続行します。                               |
+| `dcp_for_compaction`              | `false`    | コンパクション用DCP（動的コンテキスト整理）を有効化 - トークン制限超過時に最初に実行されます。コンパクション前に重複したツール呼び出しと古いツール出力を整理します。               |

 **警告**：これらの機能は実験的であり、予期しない動作を引き起こす可能性があります。影響を理解した場合にのみ有効にしてください。

--- a/README.md
+++ b/README.md
@@ -1,9 +1,12 @@
 > [!NOTE]
 >
-> *"I aim to spark a software revolution by creating a world where agent-generated code is indistinguishable from human code, yet capable of achieving vastly more. I have poured my personal time, passion, and funds into this journey, and I will continue to do so."*
+> [![Sisyphus Labs — Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
+> > **We're building a fully productized version of Sisyphus to define the future of frontier agents. <br />Join the waitlist [here](https://sisyphuslabs.ai).**
+
+> [!TIP]
 >
-> [![The Orchestrator is coming](./.github/assets/orchestrator-sisyphus.png)](https://x.com/justsisyphus/status/2006250634354548963)
-> > **The Orchestrator is coming. This Week. [Get notified on X](https://x.com/justsisyphus/status/2006250634354548963)**
+> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-sisyphus.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.1)
+> > **The Orchestrator is now available in beta. Use `oh-my-opencode@3.0.0-beta.1` to install it.**
 >
 > Be with us!
 >
@@ -26,10 +29,7 @@
 > This is coding on steroids—`oh-my-opencode` in action. Run background agents, call specialized agents like oracle, librarian, and frontend engineer. Use crafted LSP/AST tools, curated MCPs, and a full Claude Code compatibility layer.


-No stupid token consumption massive subagents here. No bloat tools here.
-
-**Certified, Verified, Tested, Actually Useful Harness in Production, after $24,000 worth of tokens spent.**
-**START WITH YOUR ChatGPT, Claude, Gemini SUBSCRIPTIONS. WE ALL COVER THEM.**
+**Notice: Do not use expensive models for librarian. This is not only unhelpful to you, but also burdens LLM providers. Use models like Claude Haiku, Gemini Flash, GLM 4.7, or MiniMax instead.**


 <div align="center">
@@ -125,6 +125,7 @@ No stupid token consumption massive subagents here. No bloat tools here.
    - [Agents](#agents)
      - [Permission Options](#permission-options)
    - [Built-in Skills](#built-in-skills)
+    - [Git Master](#git-master)
    - [Sisyphus Agent](#sisyphus-agent)
    - [Background Tasks](#background-tasks)
    - [Categories](#categories)
@@ -132,6 +133,7 @@ No stupid token consumption massive subagents here. No bloat tools here.
    - [MCPs](#mcps)
    - [LSP](#lsp)
    - [Experimental](#experimental)
+    - [Environment Variables](#environment-variables)
  - [Author's Note](#authors-note)
  - [Warnings](#warnings)
  - [Loved by professionals at](#loved-by-professionals-at)
@@ -358,9 +360,9 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -496,9 +498,9 @@ To remove oh-my-opencode:

 - **Sisyphus** (`anthropic/claude-opus-4-5`): **The default agent.** A powerful AI orchestrator for OpenCode. Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Emphasizes background task delegation and todo-driven workflow. Uses Claude Opus 4.5 with extended thinking (32k budget) for maximum reasoning capability.
 - **oracle** (`openai/gpt-5.2`): Architecture, code review, strategy. Uses GPT-5.2 for its stellar logical reasoning and deep analysis. Inspired by AmpCode.
- **librarian** (`anthropic/claude-sonnet-4-5` or `google/gemini-3-flash`): Multi-repo analysis, doc lookup, implementation examples. Uses Gemini 3 Flash when Antigravity auth is configured, otherwise Claude Sonnet 4.5 for deep codebase understanding and GitHub research with evidence-based answers. Inspired by AmpCode.
+- **librarian** (`opencode/glm-4.7-free`): Multi-repo analysis, doc lookup, implementation examples. Uses GLM-4.7 Free for deep codebase understanding and GitHub research with evidence-based answers. Inspired by AmpCode.
 - **explore** (`opencode/grok-code`, `google/gemini-3-flash`, or `anthropic/claude-haiku-4-5`): Fast codebase exploration and pattern matching. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code.
- **frontend-ui-ux-engineer** (`google/gemini-3-pro-high`): A designer turned developer. Builds gorgeous UIs. Gemini excels at creative, beautiful UI code.
+- **frontend-ui-ux-engineer** (`google/gemini-3-pro-preview`): A designer turned developer. Builds gorgeous UIs. Gemini excels at creative, beautiful UI code.
 - **document-writer** (`google/gemini-3-flash`): Technical writing expert. Gemini is a wordsmith—writes prose that flows.
 - **multimodal-looker** (`google/gemini-3-flash`): Visual content specialist. Analyzes PDFs, images, diagrams to extract information.

@@ -827,9 +829,9 @@ When using `opencode-antigravity-auth`, disable the built-in auth and override a
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -942,10 +944,10 @@ Configure git-master skill behavior:
 }
 ```

-| Option | Default | Description |
-| ------ | ------- | ----------- |
-| `commit_footer` | `true` | Adds "Ultraworked with Sisyphus" footer to commit messages. |
-| `include_co_authored_by` | `true` | Adds `Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>` trailer to commits. |
+| Option                   | Default | Description                                                                      |
+| ------------------------ | ------- | -------------------------------------------------------------------------------- |
+| `commit_footer`          | `true`  | Adds "Ultraworked with Sisyphus" footer to commit messages.                      |
+| `include_co_authored_by` | `true`  | Adds `Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>` trailer to commits. |

 ### Sisyphus Agent

@@ -1013,12 +1015,12 @@ You can also customize Sisyphus agents like other agents:
 }
 ```

-| Option                      | Default | Description                                                                                                                                         |
-| --------------------------- | ------- | --------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `disabled`                  | `false` | When `true`, disables all Sisyphus orchestration and restores original build/plan as primary.                                                       |
-| `default_builder_enabled`   | `false` | When `true`, enables OpenCode-Builder agent (same as OpenCode build, renamed due to SDK limitations). Disabled by default.                         |
-| `planner_enabled`           | `true`  | When `true`, enables Prometheus (Planner) agent with work-planner methodology. Enabled by default.                                                 |
-| `replace_plan`              | `true`  | When `true`, demotes default plan agent to subagent mode. Set to `false` to keep both Prometheus (Planner) and default plan available.             |
+| Option                    | Default | Description                                                                                                                            |
+| ------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------- |
+| `disabled`                | `false` | When `true`, disables all Sisyphus orchestration and restores original build/plan as primary.                                          |
+| `default_builder_enabled` | `false` | When `true`, enables OpenCode-Builder agent (same as OpenCode build, renamed due to SDK limitations). Disabled by default.             |
+| `planner_enabled`         | `true`  | When `true`, enables Prometheus (Planner) agent with work-planner methodology. Enabled by default.                                     |
+| `replace_plan`            | `true`  | When `true`, demotes default plan agent to subagent mode. Set to `false` to keep both Prometheus (Planner) and default plan available. |

 ### Background Tasks

@@ -1060,10 +1062,10 @@ Categories enable domain-specific task delegation via the `sisyphus_task` tool.

 **Default Categories:**

-| Category | Model | Description |
-|----------|-------|-------------|
-| `visual` | `google/gemini-3-pro-preview` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7). |
-| `business-logic` | `openai/gpt-5.2` | Backend logic, architecture, strategic reasoning. Low creativity (temp 0.1). |
+| Category         | Model                         | Description                                                                  |
+| ---------------- | ----------------------------- | ---------------------------------------------------------------------------- |
+| `visual`         | `google/gemini-3-pro-preview` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7).           |
+| `business-logic` | `openai/gpt-5.2`              | Backend logic, architecture, strategic reasoning. Low creativity (temp 0.1). |

 **Usage:**

@@ -1089,7 +1091,7 @@ Add custom categories in `oh-my-opencode.json`:
      "prompt_append": "Focus on data analysis, ML pipelines, and statistical methods."
    },
    "visual": {
-      "model": "google/gemini-3-pro-high",
+      "model": "google/gemini-3-pro-preview",
      "prompt_append": "Use shadcn/ui components and Tailwind CSS."
    }
  }
@@ -1178,6 +1180,12 @@ Opt-in experimental features that may change or be removed in future versions. U

 **Warning**: These features are experimental and may cause unexpected behavior. Enable only if you understand the implications.

+### Environment Variables
+
+| Variable              | Description                                                                                                                                     |
+| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
+| `OPENCODE_CONFIG_DIR` | Override the OpenCode configuration directory. Useful for profile isolation with tools like [OCX](https://github.com/kdcokenny/ocx) ghost mode. |
+

 ## Author's Note

--- a/README.zh-cn.md
+++ b/README.zh-cn.md
@@ -1,9 +1,12 @@
 > [!NOTE]
 >
-> *"我致力于引发一场软件革命，创造一个AI生成的代码与人类代码无法区分、却能实现更多的世界。我已经在这段旅程中投入了个人时间、热情和资金，并将继续这样做。"*
+> [![Sisyphus Labs — Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
+> > **我们正在构建Sisyphus的完全产品化版本，定义前沿代理的未来。<br />[点击此处](https://sisyphuslabs.ai)加入候补名单。**
+
+> [!TIP]
 >
-> [![The Orchestrator is coming](./.github/assets/orchestrator-sisyphus.png)](https://x.com/justsisyphus/status/2006250634354548963)
-> > **编排器即将到来。就在本周。[在X上获取通知](https://x.com/justsisyphus/status/2006250634354548963)**
+> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-sisyphus.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.1)
+> > **编排器现已推出测试版。使用`oh-my-opencode@3.0.0-beta.1`来安装。**
 >
 > 与我们同行！
 >
@@ -25,9 +28,7 @@
 > 装上 `oh-my-opencode`，编程体验直接起飞。后台跑着一堆 Agent，随时呼叫 Oracle、Librarian、Frontend Engineer 这些专家。精心打磨的 LSP/AST 工具、精选 MCP、完美的 Claude Code 兼容层——一行配置，全套带走。

 这里没有为了显摆而疯狂烧 Token 的臃肿 Subagent。没有垃圾工具。
-
-**这是烧了 24,000 美元 Token 换来的、真正经过生产环境验证、测试、靠谱的 Harness。**
-**拿着你的 ChatGPT、Claude、Gemini 订阅直接就能用。我们全包圆了。**
+**注意：请勿为 librarian 使用昂贵的模型。这不仅对你没有帮助,还会给 LLM 提供商带来负担。请使用 Claude Haiku、Gemini Flash、GLM 4.7 或 MiniMax 等模型。**

 <div align="center">

@@ -75,6 +76,7 @@
 - [Oh My OpenCode](#oh-my-opencode)
  - [太长不看？(TL;DR)](#太长不看tldr)
    - [现在是 Agent 的时代](#现在是-agent-的时代)
+    - [🪄 魔法口令：`ultrawork`](#-魔法口令ultrawork)
    - [如果你真的想读读看：认识西西弗斯](#如果你真的想读读看认识西西弗斯)
      - [闭眼装就行](#闭眼装就行)
  - [安装](#安装)
@@ -87,10 +89,15 @@
    - [步骤 4：搞定认证](#步骤-4搞定认证)
      - [4.1 Anthropic (Claude)](#41-anthropic-claude)
      - [4.2 Google Gemini (Antigravity OAuth)](#42-google-gemini-antigravity-oauth)
+        - [模型配置](#模型配置)
+        - [oh-my-opencode Agent 模型覆盖](#oh-my-opencode-agent-模型覆盖)
      - [4.3 OpenAI (ChatGPT Plus/Pro)](#43-openai-chatgpt-pluspro)
+        - [模型配置](#模型配置-1)
    - [⚠️ 注意](#️-注意)
    - [检查作业](#检查作业)
    - [跟用户说"恭喜！🎉"](#跟用户说恭喜)
+    - [免费广告](#免费广告)
+    - [求个 Star ⭐](#求个-star-)
    - [太麻烦了？](#太麻烦了)
  - [卸载](#卸载)
  - [功能](#功能)
@@ -98,6 +105,7 @@
    - [后台 Agent：像真正的团队一样干活](#后台-agent像真正的团队一样干活)
    - [工具：给队友配点好的](#工具给队友配点好的)
      - [凭什么只有你能用 IDE？](#凭什么只有你能用-ide)
+      - [会话管理 (Session Management)](#会话管理-session-management)
      - [上下文就是一切 (Context is all you need)](#上下文就是一切-context-is-all-you-need)
      - [多模态全开，Token 省着用](#多模态全开token-省着用)
      - [根本停不下来的 Agent Loop](#根本停不下来的-agent-loop)
@@ -108,16 +116,20 @@
      - [兼容性开关](#兼容性开关)
    - [不只是为了 Agent，也是为了你](#不只是为了-agent也是为了你)
  - [配置](#配置)
+    - [JSONC 支持](#jsonc-支持)
    - [Google Auth](#google-auth)
    - [Agents](#agents)
      - [权限选项](#权限选项)
    - [Sisyphus Agent](#sisyphus-agent)
+    - [Background Tasks（后台任务）](#background-tasks后台任务)
    - [Hooks](#hooks)
    - [MCPs](#mcps)
    - [LSP](#lsp)
    - [Experimental](#experimental)
  - [作者的话](#作者的话)
  - [注意事项](#注意事项)
+  - [以下企业的专业人士都在用](#以下企业的专业人士都在用)
+  - [赞助者](#赞助者)

 # Oh My OpenCode

@@ -324,9 +336,9 @@ opencode auth login
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -463,7 +475,7 @@ gh repo star code-yeongyu/oh-my-opencode

 - **Sisyphus** (`anthropic/claude-opus-4-5`)：**默认 Agent。** OpenCode 专属的强力 AI 编排器。指挥专业子 Agent 搞定复杂任务。主打后台任务委派和 Todo 驱动。用 Claude Opus 4.5 加上扩展思考（32k token 预算），智商拉满。
 - **oracle** (`openai/gpt-5.2`)：架构师、代码审查员、战略家。GPT-5.2 的逻辑推理和深度分析能力不是盖的。致敬 AmpCode。
- **librarian** (`anthropic/claude-sonnet-4-5` 或 `google/gemini-3-flash`)：多仓库分析、查文档、找示例。配置 Antigravity 认证时使用 Gemini 3 Flash，否则使用 Claude Sonnet 4.5 深入理解代码库，GitHub 调研，给出的答案都有据可查。致敬 AmpCode。
+- **librarian** (`opencode/glm-4.7-free`)：多仓库分析、查文档、找示例。使用 GLM-4.7 Free 深入理解代码库，GitHub 调研，给出的答案都有据可查。致敬 AmpCode。
 - **explore** (`opencode/grok-code`、`google/gemini-3-flash` 或 `anthropic/claude-haiku-4-5`)：极速代码库扫描、模式匹配。配置 Antigravity 认证时使用 Gemini 3 Flash，Claude max20 可用时使用 Haiku，否则用 Grok。致敬 Claude Code。
 - **frontend-ui-ux-engineer** (`google/gemini-3-pro-preview`)：设计师出身的程序员。UI 做得那是真漂亮。Gemini 写这种创意美观的代码是一绝。
 - **document-writer** (`google/gemini-3-pro-preview`)：技术写作专家。Gemini 文笔好，写出来的东西读着顺畅。
@@ -719,10 +731,10 @@ Agent 爽了，你自然也爽。但我还想直接让你爽。
 1. `.opencode/oh-my-opencode.json`（项目级）
 2. 用户配置（按平台）：

-| 平台 | 用户配置路径 |
-|----------|------------------|
-| **Windows** | `~/.config/opencode/oh-my-opencode.json` (首选) 或 `%APPDATA%\opencode\oh-my-opencode.json` (备选) |
-| **macOS/Linux** | `~/.config/opencode/oh-my-opencode.json` |
+| 平台            | 用户配置路径                                                                                       |
+| --------------- | -------------------------------------------------------------------------------------------------- |
+| **Windows**     | `~/.config/opencode/oh-my-opencode.json` (首选) 或 `%APPDATA%\opencode\oh-my-opencode.json` (备选) |
+| **macOS/Linux** | `~/.config/opencode/oh-my-opencode.json`                                                           |

 支持 Schema 自动补全：

@@ -746,10 +758,10 @@ Agent 爽了，你自然也爽。但我还想直接让你爽。
 ```jsonc
 {
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
-  
+
  // 通过 Antigravity OAuth 启用 Google Gemini
  "google_auth": false,
-  
+
  /* Agent 覆盖 - 为特定任务自定义模型 */
  "agents": {
    "oracle": {
@@ -772,9 +784,9 @@ Agent 爽了，你自然也爽。但我还想直接让你爽。
 {
  "google_auth": false,
  "agents": {
-    "frontend-ui-ux-engineer": { "model": "google/gemini-3-pro-high" },
-    "document-writer": { "model": "google/gemini-3-flash" },
-    "multimodal-looker": { "model": "google/gemini-3-flash" }
+    "frontend-ui-ux-engineer": { "model": "google/antigravity-gemini-3-pro-high" },
+    "document-writer": { "model": "google/antigravity-gemini-3-flash" },
+    "multimodal-looker": { "model": "google/antigravity-gemini-3-flash" }
  }
 }
 ```
@@ -839,13 +851,13 @@ Agent 爽了，你自然也爽。但我还想直接让你爽。
 }
 ```

-| Permission           | 说明                     | 值                                                                   |
-| -------------------- | ------------------------ | -------------------------------------------------------------------- |
-| `edit`               | 改文件                   | `ask` / `allow` / `deny`                                             |
-| `bash`               | 跑 Bash 命令             | `ask` / `allow` / `deny` 或按命令：`{ "git": "allow", "rm": "deny" }` |
-| `webfetch`           | 上网                     | `ask` / `allow` / `deny`                                             |
-| `doom_loop`          | 覆盖无限循环检测         | `ask` / `allow` / `deny`                                             |
-| `external_directory` | 访问根目录外面的文件     | `ask` / `allow` / `deny`                                             |
+| Permission           | 说明                 | 值                                                                    |
+| -------------------- | -------------------- | --------------------------------------------------------------------- |
+| `edit`               | 改文件               | `ask` / `allow` / `deny`                                              |
+| `bash`               | 跑 Bash 命令         | `ask` / `allow` / `deny` 或按命令：`{ "git": "allow", "rm": "deny" }` |
+| `webfetch`           | 上网                 | `ask` / `allow` / `deny`                                              |
+| `doom_loop`          | 覆盖无限循环检测     | `ask` / `allow` / `deny`                                              |
+| `external_directory` | 访问根目录外面的文件 | `ask` / `allow` / `deny`                                              |

 或者在 `~/.config/opencode/oh-my-opencode.json` 或 `.opencode/oh-my-opencode.json` 的 `disabled_agents` 里直接禁了：

@@ -923,12 +935,12 @@ Sisyphus Agent 也能自定义：
 }
 ```

-| 选项                        | 默认值   | 说明                                                                                                                                              |
-| --------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `disabled`                  | `false` | 设为 `true` 就禁用所有 Sisyphus 编排，恢复原来的 build/plan。                                                                                              |
-| `default_builder_enabled`   | `false` | 设为 `true` 就启用 OpenCode-Builder Agent（与 OpenCode build 相同，因 SDK 限制仅改名）。默认禁用。                                                           |
-| `planner_enabled`           | `true`  | 设为 `true` 就启用 Prometheus (Planner) Agent（含 work-planner 方法论）。默认启用。                                                                         |
-| `replace_plan`              | `true`  | 设为 `true` 就把默认计划 Agent 降级为子 Agent 模式。设为 `false` 可以同时保留 Prometheus (Planner) 和默认计划。                                                      |
+| 选项                      | 默认值  | 说明                                                                                                            |
+| ------------------------- | ------- | --------------------------------------------------------------------------------------------------------------- |
+| `disabled`                | `false` | 设为 `true` 就禁用所有 Sisyphus 编排，恢复原来的 build/plan。                                                   |
+| `default_builder_enabled` | `false` | 设为 `true` 就启用 OpenCode-Builder Agent（与 OpenCode build 相同，因 SDK 限制仅改名）。默认禁用。              |
+| `planner_enabled`         | `true`  | 设为 `true` 就启用 Prometheus (Planner) Agent（含 work-planner 方法论）。默认启用。                             |
+| `replace_plan`            | `true`  | 设为 `true` 就把默认计划 Agent 降级为子 Agent 模式。设为 `false` 可以同时保留 Prometheus (Planner) 和默认计划。 |

 ### Background Tasks（后台任务）

@@ -951,11 +963,11 @@ Sisyphus Agent 也能自定义：
 }
 ```

-| 选项                  | 默认值 | 说明                                                                                                           |
-| --------------------- | ------ | -------------------------------------------------------------------------------------------------------------- |
-| `defaultConcurrency`  | -      | 所有提供商/模型的默认最大并发后台任务数                                                                        |
-| `providerConcurrency` | -      | 按提供商设置并发限制。键是提供商名称（例如：`anthropic`、`openai`、`google`）                                  |
-| `modelConcurrency`    | -      | 按模型设置并发限制。键是完整的模型名称（例如：`anthropic/claude-opus-4-5`）。会覆盖提供商级别的限制。          |
+| 选项                  | 默认值 | 说明                                                                                                  |
+| --------------------- | ------ | ----------------------------------------------------------------------------------------------------- |
+| `defaultConcurrency`  | -      | 所有提供商/模型的默认最大并发后台任务数                                                               |
+| `providerConcurrency` | -      | 按提供商设置并发限制。键是提供商名称（例如：`anthropic`、`openai`、`google`）                         |
+| `modelConcurrency`    | -      | 按模型设置并发限制。键是完整的模型名称（例如：`anthropic/claude-opus-4-5`）。会覆盖提供商级别的限制。 |

 **优先级顺序**: `modelConcurrency` > `providerConcurrency` > `defaultConcurrency`

@@ -1033,13 +1045,13 @@ Oh My OpenCode 送你重构工具（重命名、代码操作）。
 }
 ```

-| 选项                              | 默认值  | 说明                                                                                                                                           |
-| --------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `preemptive_compaction_threshold` | `0.85`  | 触发预防性压缩的阈值比例（0.5-0.95）。`preemptive-compaction` 钩子默认启用；此选项用于自定义阈值。                                                     |
-| `truncate_all_tool_outputs`       | `false` | 截断所有工具输出，而不仅仅是白名单工具（Grep、Glob、LSP、AST-grep）。Tool output truncator 默认启用 - 使用 `disabled_hooks` 禁用。                    |
-| `aggressive_truncation`           | `false` | 超出 token 限制时，激进地截断工具输出以适应限制。比默认截断更激进。不够的话会回退到摘要/恢复。                                                     |
-| `auto_resume`                     | `false` | 从 thinking block 错误或 thinking disabled violation 成功恢复后，自动恢复会话。提取最后一条用户消息继续执行。                                     |
-| `dcp_for_compaction`              | `false` | 启用压缩用 DCP（动态上下文剪枝）- 在超出 token 限制时首先执行。在压缩前清理重复的工具调用和旧的工具输出。                                            |
+| 选项                              | 默认值  | 说明                                                                                                                               |
+| --------------------------------- | ------- | ---------------------------------------------------------------------------------------------------------------------------------- |
+| `preemptive_compaction_threshold` | `0.85`  | 触发预防性压缩的阈值比例（0.5-0.95）。`preemptive-compaction` 钩子默认启用；此选项用于自定义阈值。                                 |
+| `truncate_all_tool_outputs`       | `false` | 截断所有工具输出，而不仅仅是白名单工具（Grep、Glob、LSP、AST-grep）。Tool output truncator 默认启用 - 使用 `disabled_hooks` 禁用。 |
+| `aggressive_truncation`           | `false` | 超出 token 限制时，激进地截断工具输出以适应限制。比默认截断更激进。不够的话会回退到摘要/恢复。                                     |
+| `auto_resume`                     | `false` | 从 thinking block 错误或 thinking disabled violation 成功恢复后，自动恢复会话。提取最后一条用户消息继续执行。                      |
+| `dcp_for_compaction`              | `false` | 启用压缩用 DCP（动态上下文剪枝）- 在超出 token 限制时首先执行。在压缩前清理重复的工具调用和旧的工具输出。                          |

 **警告**：这些功能是实验性的，可能会导致意外行为。只有在理解其影响的情况下才启用。

--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -27,7 +27,9 @@
          "frontend-ui-ux-engineer",
          "document-writer",
          "multimodal-looker",
-          "Metis (Plan Consultant)"
+          "Metis (Plan Consultant)",
+          "Momus (Plan Reviewer)",
+          "orchestrator-sisyphus"
        ]
      }
    },
@@ -832,6 +834,129 @@
            }
          }
        },
+        "Momus (Plan Reviewer)": {
+          "type": "object",
+          "properties": {
+            "model": {
+              "type": "string"
+            },
+            "category": {
+              "type": "string"
+            },
+            "skills": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
+            "temperature": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 2
+            },
+            "top_p": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1
+            },
+            "prompt": {
+              "type": "string"
+            },
+            "prompt_append": {
+              "type": "string"
+            },
+            "tools": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {
+                "type": "boolean"
+              }
+            },
+            "disable": {
+              "type": "boolean"
+            },
+            "description": {
+              "type": "string"
+            },
+            "mode": {
+              "type": "string",
+              "enum": [
+                "subagent",
+                "primary",
+                "all"
+              ]
+            },
+            "color": {
+              "type": "string",
+              "pattern": "^#[0-9A-Fa-f]{6}$"
+            },
+            "permission": {
+              "type": "object",
+              "properties": {
+                "edit": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "bash": {
+                  "anyOf": [
+                    {
+                      "type": "string",
+                      "enum": [
+                        "ask",
+                        "allow",
+                        "deny"
+                      ]
+                    },
+                    {
+                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
+                      "additionalProperties": {
+                        "type": "string",
+                        "enum": [
+                          "ask",
+                          "allow",
+                          "deny"
+                        ]
+                      }
+                    }
+                  ]
+                },
+                "webfetch": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "doom_loop": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "external_directory": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                }
+              }
+            }
+          }
+        },
        "oracle": {
          "type": "object",
          "properties": {
@@ -1569,6 +1694,129 @@
              }
            }
          }
+        },
+        "orchestrator-sisyphus": {
+          "type": "object",
+          "properties": {
+            "model": {
+              "type": "string"
+            },
+            "category": {
+              "type": "string"
+            },
+            "skills": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              }
+            },
+            "temperature": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 2
+            },
+            "top_p": {
+              "type": "number",
+              "minimum": 0,
+              "maximum": 1
+            },
+            "prompt": {
+              "type": "string"
+            },
+            "prompt_append": {
+              "type": "string"
+            },
+            "tools": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {
+                "type": "boolean"
+              }
+            },
+            "disable": {
+              "type": "boolean"
+            },
+            "description": {
+              "type": "string"
+            },
+            "mode": {
+              "type": "string",
+              "enum": [
+                "subagent",
+                "primary",
+                "all"
+              ]
+            },
+            "color": {
+              "type": "string",
+              "pattern": "^#[0-9A-Fa-f]{6}$"
+            },
+            "permission": {
+              "type": "object",
+              "properties": {
+                "edit": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "bash": {
+                  "anyOf": [
+                    {
+                      "type": "string",
+                      "enum": [
+                        "ask",
+                        "allow",
+                        "deny"
+                      ]
+                    },
+                    {
+                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
+                      "additionalProperties": {
+                        "type": "string",
+                        "enum": [
+                          "ask",
+                          "allow",
+                          "deny"
+                        ]
+                      }
+                    }
+                  ]
+                },
+                "webfetch": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "doom_loop": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                },
+                "external_directory": {
+                  "type": "string",
+                  "enum": [
+                    "ask",
+                    "allow",
+                    "deny"
+                  ]
+                }
+              }
+            }
+          }
        }
      }
    },
--- a/docs/orchestration-guide.md
+++ b/docs/orchestration-guide.md
@@ -1,5 +1,26 @@
 # Oh-My-OpenCode Orchestration Guide

+## TL;DR - When to Use What
+
+| Complexity | Approach | When to Use |
+|------------|----------|-------------|
+| **Simple** | Just prompt | Simple tasks, quick fixes, single-file changes |
+| **Complex + Lazy** | Just type `ulw` or `ultrawork` | Complex tasks where explaining context is tedious. Agent figures it out. |
+| **Complex + Precise** | `@plan` → `/start-work` | Precise, multi-step work requiring true orchestration. Prometheus plans, Sisyphus executes. |
+
+**Decision Flow:**
+```
+Is it a quick fix or simple task?
+  └─ YES → Just prompt normally
+  └─ NO  → Is explaining the full context tedious?
+             └─ YES → Type "ulw" and let the agent figure it out
+             └─ NO  → Do you need precise, verifiable execution?
+                        └─ YES → Use @plan for Prometheus planning, then /start-work
+                        └─ NO  → Just use "ulw"
+```
+
+---
+
 This document provides a comprehensive guide to the orchestration system that implements Oh-My-OpenCode's core philosophy: **"Separation of Planning and Execution"**.

 ## 1. Overview
@@ -16,7 +37,7 @@ Oh-My-OpenCode solves this by clearly separating two roles:
 ## 2. Overall Architecture

 ```mermaid
-graph TD
+flowchart TD
    User[User Request] --> Prometheus
    
    subgraph Planning Phase
@@ -24,10 +45,10 @@ graph TD
        Metis --> Prometheus
        Prometheus --> Momus[Momus<br>Reviewer]
        Momus --> Prometheus
-        Prometheus --> PlanFile[/.sisyphus/plans/*.md]
+        Prometheus --> PlanFile["/.sisyphus/plans/{name}.md"]
    end
    
-    PlanFile --> StartWork[/start-work]
+    PlanFile --> StartWork[//start-work/]
    StartWork --> BoulderState[boulder.json]
    
    subgraph Execution Phase
@@ -93,9 +114,9 @@ When the user enters `/start-work`, the execution phase begins.

 ## 5. Commands and Usage

-### `/plan [request]`
+### `@plan [request]`
 Invokes Prometheus to start a planning session.
- Example: `/plan "I want to refactor the authentication system to NextAuth"`
+- Example: `@plan "I want to refactor the authentication system to NextAuth"`

 ### `/start-work`
 Executes the generated plan.
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.0.0-beta.1",
+  "version": "3.0.0-beta.4",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
--- a/script/publish.ts
+++ b/script/publish.ts
@@ -5,7 +5,6 @@ import { $ } from "bun"
 const PACKAGE_NAME = "oh-my-opencode"
 const bump = process.env.BUMP as "major" | "minor" | "patch" | undefined
 const versionOverride = process.env.VERSION
-const npmTag = process.env.NPM_TAG || "latest"

 console.log("=== Publishing oh-my-opencode ===\n")

@@ -107,12 +106,22 @@ async function getContributors(previous: string): Promise<string[]> {
  return notes
 }

-async function buildAndPublish(): Promise<void> {
-  console.log(`\nPublishing to npm with tag: ${npmTag}...`)
+function getDistTag(version: string): string | null {
+  if (!version.includes("-")) return null
+  const prerelease = version.split("-")[1]
+  const tag = prerelease?.split(".")[0]
+  return tag || "next"
+}
+
+async function buildAndPublish(version: string): Promise<void> {
+  console.log("\nPublishing to npm...")
+  const distTag = getDistTag(version)
+  const tagArgs = distTag ? ["--tag", distTag] : []
+  
  if (process.env.CI) {
-    await $`npm publish --access public --provenance --ignore-scripts --tag ${npmTag}`
+    await $`npm publish --access public --provenance --ignore-scripts ${tagArgs}`
  } else {
-    await $`npm publish --access public --ignore-scripts --tag ${npmTag}`
+    await $`npm publish --access public --ignore-scripts ${tagArgs}`
  }
 }

@@ -174,7 +183,7 @@ async function main() {
  const contributors = await getContributors(previous)
  const notes = [...changelog, ...contributors]

-  await buildAndPublish()
+  await buildAndPublish(newVersion)
  await gitTagAndRelease(newVersion, notes)

  console.log(`\n=== Successfully published ${PACKAGE_NAME}@${newVersion} ===`)
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -311,6 +311,102 @@
      "created_at": "2026-01-08T10:02:26Z",
      "repoId": 1108837393,
      "pullRequestNo": 592
+    },
+    {
+      "name": "xLillium",
+      "id": 16964936,
+      "comment_id": 3725604869,
+      "created_at": "2026-01-08T20:18:27Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 603
+    },
+    {
+      "name": "SJY0917032",
+      "id": 88534701,
+      "comment_id": 3728199745,
+      "created_at": "2026-01-09T10:01:19Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 625
+    },
+    {
+      "name": "kdcokenny",
+      "id": 99611484,
+      "comment_id": 3728801075,
+      "created_at": "2026-01-09T12:54:05Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 629
+    },
+    {
+      "name": "ElwinLiu",
+      "id": 87802244,
+      "comment_id": 3731812585,
+      "created_at": "2026-01-10T04:32:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 645
+    },
+    {
+      "name": "Luodian",
+      "id": 15847405,
+      "comment_id": 3731833107,
+      "created_at": "2026-01-10T05:01:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 634
+    },
+    {
+      "name": "imarshallwidjaja",
+      "id": 60992624,
+      "comment_id": 3732124681,
+      "created_at": "2026-01-10T07:58:43Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 648
+    },
+    {
+      "name": "GollyJer",
+      "id": 689204,
+      "comment_id": 3732253764,
+      "created_at": "2026-01-10T09:33:21Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 649
+    },
+    {
+      "name": "kargnas",
+      "id": 1438533,
+      "comment_id": 3732344143,
+      "created_at": "2026-01-10T10:25:25Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 653
+    },
+    {
+      "name": "ashir6892",
+      "id": 52703606,
+      "comment_id": 3733435826,
+      "created_at": "2026-01-10T19:50:07Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 675
+    },
+    {
+      "name": "arthur404dev",
+      "id": 59490008,
+      "comment_id": 3733697071,
+      "created_at": "2026-01-10T23:51:44Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 676
+    },
+    {
+      "name": "KNN-07",
+      "id": 55886589,
+      "comment_id": 3733788592,
+      "created_at": "2026-01-11T01:11:38Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 679
+    },
+    {
+      "name": "aw338WoWmUI",
+      "id": 121638634,
+      "comment_id": 3734013343,
+      "created_at": "2026-01-11T04:56:38Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 681
    }
  ]
 }
--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -8,13 +8,18 @@ AI agent definitions for multi-model orchestration. 7 specialized agents: Sisyph

 ```
 agents/
-├── sisyphus.ts              # Primary orchestrator (Claude Opus 4.5)
+├── orchestrator-sisyphus.ts # Orchestrator agent (1484 lines) - complex delegation
+├── sisyphus.ts              # Main Sisyphus prompt (641 lines)
+├── sisyphus-junior.ts       # Junior variant for delegated tasks
 ├── oracle.ts                # Strategic advisor (GPT-5.2)
 ├── librarian.ts             # Multi-repo research (Claude Sonnet 4.5)
 ├── explore.ts               # Fast codebase grep (Grok Code)
 ├── frontend-ui-ux-engineer.ts  # UI generation (Gemini 3 Pro)
-├── document-writer.ts       # Technical docs (Gemini 3 Flash)
+├── document-writer.ts       # Technical docs (Gemini 3 Pro)
 ├── multimodal-looker.ts     # PDF/image analysis (Gemini 3 Flash)
+├── prometheus-prompt.ts     # Planning agent prompt (982 lines)
+├── metis.ts                 # Plan Consultant agent (404 lines)
+├── momus.ts                 # Plan Reviewer agent (404 lines)
 ├── build-prompt.ts          # Shared build agent prompt
 ├── plan-prompt.ts           # Shared plan agent prompt
 ├── types.ts                 # AgentModelConfig interface
@@ -28,7 +33,7 @@ agents/
 |-------|---------------|----------|---------|
 | Sisyphus | anthropic/claude-opus-4-5 | - | Primary orchestrator with extended thinking |
 | oracle | openai/gpt-5.2 | - | Read-only consultation. High-IQ debugging, architecture |
-| librarian | anthropic/claude-sonnet-4-5 | google/gemini-3-flash | Docs, OSS research, GitHub examples |
+| librarian | opencode/glm-4.7-free | - | Docs, OSS research, GitHub examples |
 | explore | opencode/grok-code | google/gemini-3-flash, anthropic/claude-haiku-4-5 | Fast contextual grep |
 | frontend-ui-ux-engineer | google/gemini-3-pro-preview | - | UI/UX code generation |
 | document-writer | google/gemini-3-pro-preview | - | Technical writing |
--- a/src/agents/librarian.ts
+++ b/src/agents/librarian.ts
@@ -1,6 +1,5 @@
 import type { AgentConfig } from "@opencode-ai/sdk"
 import type { AgentPromptMetadata } from "./types"
-import { createAgentToolRestrictions } from "../shared/permission-compat"

 const DEFAULT_MODEL = "opencode/glm-4.7-free"

@@ -22,26 +21,18 @@ export const LIBRARIAN_PROMPT_METADATA: AgentPromptMetadata = {
 }

 export function createLibrarianAgent(model: string = DEFAULT_MODEL): AgentConfig {
-  const restrictions = createAgentToolRestrictions([
-    "write",
-    "edit",
-    "task",
-    "sisyphus_task",
-    "call_omo_agent",
-  ])
-
  return {
    description:
      "Specialized codebase understanding agent for multi-repository analysis, searching remote codebases, retrieving official documentation, and finding implementation examples using GitHub CLI, Context7, and Web Search. MUST BE USED when users ask to look up code in remote repositories, explain library internals, or find usage examples in open source.",
    mode: "subagent" as const,
    model,
    temperature: 0.1,
-    ...restrictions,
+    tools: { write: false, edit: false, background_task: false },
    prompt: `# THE LIBRARIAN

 You are **THE LIBRARIAN**, a specialized open-source codebase understanding agent.

-Your job: Answer questions about open-source libraries. Provide **EVIDENCE** with **GitHub permalinks** when the question requires verification, implementation details, or current/version-specific information. For well-known APIs and stable concepts, answer directly from knowledge.
+Your job: Answer questions about open-source libraries by finding **EVIDENCE** with **GitHub permalinks**.

 ## CRITICAL: DATE AWARENESS

@@ -53,20 +44,64 @@ Your job: Answer questions about open-source libraries. Provide **EVIDENCE** wit

 ---

-## PHASE 0: ASSESS BEFORE SEARCHING
+## PHASE 0: REQUEST CLASSIFICATION (MANDATORY FIRST STEP)

-**First**: Can you answer confidently from training knowledge? If yes, answer directly.
-
-**Search when**: version-specific info, implementation internals, recent changes, unfamiliar libraries, user explicitly requests source/examples.
-
-**If search needed**, classify into:
+Classify EVERY request into one of these categories before taking action:

 | Type | Trigger Examples | Tools |
 |------|------------------|-------|
-| **TYPE A: CONCEPTUAL** | "How do I use X?", "Best practice for Y?" | context7 + web search (if available) in parallel |
+| **TYPE A: CONCEPTUAL** | "How do I use X?", "Best practice for Y?" | Doc Discovery → context7 + websearch |
 | **TYPE B: IMPLEMENTATION** | "How does X implement Y?", "Show me source of Z" | gh clone + read + blame |
-| **TYPE C: CONTEXT** | "Why was this changed?", "What's the history?", "Related issues/PRs?" | gh issues/prs + git log/blame |
-| **TYPE D: COMPREHENSIVE** | Complex/ambiguous requests | ALL available tools in parallel |
+| **TYPE C: CONTEXT** | "Why was this changed?", "History of X?" | gh issues/prs + git log/blame |
+| **TYPE D: COMPREHENSIVE** | Complex/ambiguous requests | Doc Discovery → ALL tools |
+
+---
+
+## PHASE 0.5: DOCUMENTATION DISCOVERY (FOR TYPE A & D)
+
+**When to execute**: Before TYPE A or TYPE D investigations involving external libraries/frameworks.
+
+### Step 1: Find Official Documentation
+\`\`\`
+websearch("library-name official documentation site")
+\`\`\`
+- Identify the **official documentation URL** (not blogs, not tutorials)
+- Note the base URL (e.g., \`https://docs.example.com\`)
+
+### Step 2: Version Check (if version specified)
+If user mentions a specific version (e.g., "React 18", "Next.js 14", "v2.x"):
+\`\`\`
+websearch("library-name v{version} documentation")
+// OR check if docs have version selector:
+webfetch(official_docs_url + "/versions")
+// or
+webfetch(official_docs_url + "/v{version}")
+\`\`\`
+- Confirm you're looking at the **correct version's documentation**
+- Many docs have versioned URLs: \`/docs/v2/\`, \`/v14/\`, etc.
+
+### Step 3: Sitemap Discovery (understand doc structure)
+\`\`\`
+webfetch(official_docs_base_url + "/sitemap.xml")
+// Fallback options:
+webfetch(official_docs_base_url + "/sitemap-0.xml")
+webfetch(official_docs_base_url + "/docs/sitemap.xml")
+\`\`\`
+- Parse sitemap to understand documentation structure
+- Identify relevant sections for the user's question
+- This prevents random searching—you now know WHERE to look
+
+### Step 4: Targeted Investigation
+With sitemap knowledge, fetch the SPECIFIC documentation pages relevant to the query:
+\`\`\`
+webfetch(specific_doc_page_from_sitemap)
+context7_query-docs(libraryId: id, query: "specific topic")
+\`\`\`
+
+**Skip Doc Discovery when**:
+- TYPE B (implementation) - you're cloning repos anyway
+- TYPE C (context/history) - you're looking at issues/PRs
+- Library has no official docs (rare OSS projects)

 ---

@@ -75,15 +110,15 @@ Your job: Answer questions about open-source libraries. Provide **EVIDENCE** wit
 ### TYPE A: CONCEPTUAL QUESTION
 **Trigger**: "How do I...", "What is...", "Best practice for...", rough/general questions

-**If searching**, use tools as needed:
+**Execute Documentation Discovery FIRST (Phase 0.5)**, then:
 \`\`\`
 Tool 1: context7_resolve-library-id("library-name")
-        → then context7_get-library-docs(id, topic: "specific-topic")
-Tool 2: grep_app_searchGitHub(query: "usage pattern", language: ["TypeScript"])
-Tool 3 (optional): If web search is available, search "library-name topic 2025"
+        → then context7_query-docs(libraryId: id, query: "specific-topic")
+Tool 2: webfetch(relevant_pages_from_sitemap)  // Targeted, not random
+Tool 3: grep_app_searchGitHub(query: "usage pattern", language: ["TypeScript"])
 \`\`\`

-**Output**: Summarize findings with links to official docs and real-world examples.
+**Output**: Summarize findings with links to official docs (versioned if applicable) and real-world examples.

 ---

@@ -94,20 +129,20 @@ Tool 3 (optional): If web search is available, search "library-name topic 2025"
 \`\`\`
 Step 1: Clone to temp directory
        gh repo clone owner/repo \${TMPDIR:-/tmp}/repo-name -- --depth 1
-        
+
 Step 2: Get commit SHA for permalinks
        cd \${TMPDIR:-/tmp}/repo-name && git rev-parse HEAD
-        
+
 Step 3: Find the implementation
        - grep/ast_grep_search for function/class
        - read the specific file
        - git blame for context if needed
-        
+
 Step 4: Construct permalink
        https://github.com/owner/repo/blob/<sha>/path/to/file#L10-L20
 \`\`\`

-**For faster results, parallelize**:
+**Parallel acceleration (4+ calls)**:
 \`\`\`
 Tool 1: gh repo clone owner/repo \${TMPDIR:-/tmp}/repo -- --depth 1
 Tool 2: grep_app_searchGitHub(query: "function_name", repo: "owner/repo")
@@ -120,7 +155,7 @@ Tool 4: context7_get-library-docs(id, topic: "relevant-api")
 ### TYPE C: CONTEXT & HISTORY
 **Trigger**: "Why was this changed?", "What's the history?", "Related issues/PRs?"

-**Tools to use**:
+**Execute in parallel (4+ calls)**:
 \`\`\`
 Tool 1: gh search issues "keyword" --repo owner/repo --state all --limit 10
 Tool 2: gh search prs "keyword" --repo owner/repo --state merged --limit 10
@@ -142,22 +177,21 @@ gh api repos/owner/repo/pulls/<number>/files
 ### TYPE D: COMPREHENSIVE RESEARCH
 **Trigger**: Complex questions, ambiguous requests, "deep dive into..."

-**Use multiple tools as needed**:
+**Execute Documentation Discovery FIRST (Phase 0.5)**, then execute in parallel (6+ calls):
 \`\`\`
-// Documentation
-Tool 1: context7_resolve-library-id → context7_get-library-docs
+// Documentation (informed by sitemap discovery)
+Tool 1: context7_resolve-library-id → context7_query-docs
+Tool 2: webfetch(targeted_doc_pages_from_sitemap)

 // Code Search
-Tool 2: grep_app_searchGitHub(query: "pattern1", language: [...])
-Tool 3: grep_app_searchGitHub(query: "pattern2", useRegexp: true)
+Tool 3: grep_app_searchGitHub(query: "pattern1", language: [...])
+Tool 4: grep_app_searchGitHub(query: "pattern2", useRegexp: true)

 // Source Analysis
-Tool 4: gh repo clone owner/repo \${TMPDIR:-/tmp}/repo -- --depth 1
+Tool 5: gh repo clone owner/repo \${TMPDIR:-/tmp}/repo -- --depth 1

 // Context
-Tool 5: gh search issues "topic" --repo owner/repo
-
-// Optional: If web search is available, search for recent updates
+Tool 6: gh search issues "topic" --repo owner/repo
 \`\`\`

 ---
@@ -202,7 +236,11 @@ https://github.com/tanstack/query/blob/abc123def/packages/react-query/src/useQue

 | Purpose | Tool | Command/Usage |
 |---------|------|---------------|
-| **Official Docs** | context7 | \`context7_resolve-library-id\` → \`context7_get-library-docs\` |
+| **Official Docs** | context7 | \`context7_resolve-library-id\` → \`context7_query-docs\` |
+| **Find Docs URL** | websearch_exa | \`websearch_exa_web_search_exa("library official documentation")\` |
+| **Sitemap Discovery** | webfetch | \`webfetch(docs_url + "/sitemap.xml")\` to understand doc structure |
+| **Read Doc Page** | webfetch | \`webfetch(specific_doc_page)\` for targeted documentation |
+| **Latest Info** | websearch_exa | \`websearch_exa_web_search_exa("query 2025")\` |
 | **Fast Code Search** | grep_app | \`grep_app_searchGitHub(query, language, useRegexp)\` |
 | **Deep Code Search** | gh CLI | \`gh search code "query" --repo owner/repo\` |
 | **Clone Repo** | gh CLI | \`gh repo clone owner/repo \${TMPDIR:-/tmp}/name -- --depth 1\` |
@@ -210,8 +248,6 @@ https://github.com/tanstack/query/blob/abc123def/packages/react-query/src/useQue
 | **View Issue/PR** | gh CLI | \`gh issue/pr view <num> --repo owner/repo --comments\` |
 | **Release Info** | gh CLI | \`gh api repos/owner/repo/releases/latest\` |
 | **Git History** | git | \`git log\`, \`git blame\`, \`git show\` |
-| **Read URL** | webfetch | \`webfetch(url)\` for blog posts, SO threads |
-| **Web Search** | (if available) | Use any available web search tool for latest info |

 ### Temp Directory

@@ -228,16 +264,18 @@ Use OS-appropriate temp directory:

 ---

-## PARALLEL EXECUTION GUIDANCE
+## PARALLEL EXECUTION REQUIREMENTS

-When searching is needed, scale effort to question complexity:
-
-| Request Type | Suggested Calls |
+| Request Type | Suggested Calls | Doc Discovery Required |
 |--------------|----------------|
-| TYPE A (Conceptual) | 1-2 |
-| TYPE B (Implementation) | 2-3 |
-| TYPE C (Context) | 2-3 |
-| TYPE D (Comprehensive) | 3-5 |
+| TYPE A (Conceptual) | 1-2 | YES (Phase 0.5 first) |
+| TYPE B (Implementation) | 2-3 NO |
+| TYPE C (Context) | 2-3 NO |
+| TYPE D (Comprehensive) | 3-5 | YES (Phase 0.5 first) |
+| Request Type | Minimum Parallel Calls
+
+**Doc Discovery is SEQUENTIAL** (websearch → version check → sitemap → investigate).
+**Main phase is PARALLEL** once you know where to look.

 **Always vary queries** when using grep_app:
 \`\`\`
@@ -261,6 +299,8 @@ grep_app_searchGitHub(query: "useQuery")
 | grep_app no results | Broaden query, try concept instead of exact name |
 | gh API rate limit | Use cloned repo in temp directory |
 | Repo not found | Search for forks or mirrors |
+| Sitemap not found | Try \`/sitemap-0.xml\`, \`/sitemap_index.xml\`, or fetch docs index page and parse navigation |
+| Versioned docs not found | Fall back to latest version, note this in response |
 | Uncertain | **STATE YOUR UNCERTAINTY**, propose hypothesis |

 ---
@@ -268,7 +308,7 @@ grep_app_searchGitHub(query: "useQuery")
 ## COMMUNICATION RULES

 1. **NO TOOL NAMES**: Say "I'll search the codebase" not "I'll use grep_app"
-2. **NO PREAMBLE**: Answer directly, skip "I'll help you with..." 
+2. **NO PREAMBLE**: Answer directly, skip "I'll help you with..."
 3. **ALWAYS CITE**: Every code claim needs a permalink
 4. **USE MARKDOWN**: Code blocks with language identifiers
 5. **BE CONCISE**: Facts > opinions, evidence > speculation
--- a/src/agents/metis.ts
+++ b/src/agents/metis.ts
@@ -278,16 +278,22 @@ const metisRestrictions = createAgentToolRestrictions([
  "sisyphus_task",
 ])

-export const metisAgent: AgentConfig = {
-  description:
-    "Pre-planning consultant that analyzes requests to identify hidden intentions, ambiguities, and AI failure points.",
-  mode: "subagent" as const,
-  model: "anthropic/claude-opus-4-5",
-  temperature: 0.3,
-  ...metisRestrictions,
-  prompt: METIS_SYSTEM_PROMPT,
-  thinking: { type: "enabled", budgetTokens: 32000 },
-} as AgentConfig
+const DEFAULT_MODEL = "anthropic/claude-opus-4-5"
+
+export function createMetisAgent(model: string = DEFAULT_MODEL): AgentConfig {
+  return {
+    description:
+      "Pre-planning consultant that analyzes requests to identify hidden intentions, ambiguities, and AI failure points.",
+    mode: "subagent" as const,
+    model,
+    temperature: 0.3,
+    ...metisRestrictions,
+    prompt: METIS_SYSTEM_PROMPT,
+    thinking: { type: "enabled", budgetTokens: 32000 },
+  } as AgentConfig
+}
+
+export const metisAgent: AgentConfig = createMetisAgent()

 export const metisPromptMetadata: AgentPromptMetadata = {
  category: "advisor",
--- a/src/agents/orchestrator-sisyphus.ts
+++ b/src/agents/orchestrator-sisyphus.ts
@@ -13,6 +13,7 @@ import { createAgentToolRestrictions } from "../shared/permission-compat"
 */

 export interface OrchestratorContext {
+  model?: string
  availableAgents?: AvailableAgent[]
  availableSkills?: AvailableSkill[]
  userCategories?: Record<string, CategoryConfig>
@@ -131,7 +132,6 @@ ${rows.join("\n")}
 }

 export const ORCHESTRATOR_SISYPHUS_SYSTEM_PROMPT = `You are "Sisyphus" - Powerful AI Agent with orchestration capabilities from OhMyOpenCode.
-Named by [YeonGyu Kim](https://github.com/code-yeongyu).

 **Why Sisyphus?**: Humans roll their boulder every day. So do you. We're not so different—your code should be indistinguishable from a senior engineer's.

@@ -1432,6 +1432,8 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string {
    .replace("{SKILLS_SECTION}", skillsSection)
 }

+const DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
+
 export function createOrchestratorSisyphusAgent(ctx?: OrchestratorContext): AgentConfig {
  const restrictions = createAgentToolRestrictions([
    "task",
@@ -1442,7 +1444,7 @@ export function createOrchestratorSisyphusAgent(ctx?: OrchestratorContext): Agen
    description:
      "Orchestrates work via sisyphus_task() to complete ALL tasks in a todo list until fully done",
    mode: "primary" as const,
-    model: "anthropic/claude-sonnet-4-5",
+    model: ctx?.model ?? DEFAULT_MODEL,
    temperature: 0.1,
    prompt: buildDynamicOrchestratorPrompt(ctx),
    thinking: { type: "enabled", budgetTokens: 32000 },
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -18,7 +18,6 @@ const DEFAULT_MODEL = "anthropic/claude-opus-4-5"

 const SISYPHUS_ROLE_SECTION = `<Role>
 You are "Sisyphus" - Powerful AI Agent with orchestration capabilities from OhMyOpenCode.
-Named by [YeonGyu Kim](https://github.com/code-yeongyu).

 **Why Sisyphus?**: Humans roll their boulder every day. So do you. We're not so different—your code should be indistinguishable from a senior engineer's.

--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -7,9 +7,9 @@ import { createExploreAgent, EXPLORE_PROMPT_METADATA } from "./explore"
 import { createFrontendUiUxEngineerAgent, FRONTEND_PROMPT_METADATA } from "./frontend-ui-ux-engineer"
 import { createDocumentWriterAgent, DOCUMENT_WRITER_PROMPT_METADATA } from "./document-writer"
 import { createMultimodalLookerAgent, MULTIMODAL_LOOKER_PROMPT_METADATA } from "./multimodal-looker"
-import { metisAgent } from "./metis"
+import { createMetisAgent } from "./metis"
 import { createOrchestratorSisyphusAgent, orchestratorSisyphusAgent } from "./orchestrator-sisyphus"
-import { momusAgent } from "./momus"
+import { createMomusAgent } from "./momus"
 import type { AvailableAgent } from "./sisyphus-prompt-builder"
 import { deepMerge } from "../shared"
 import { DEFAULT_CATEGORIES } from "../tools/sisyphus-task/constants"
@@ -25,8 +25,8 @@ const agentSources: Record<BuiltinAgentName, AgentSource> = {
  "frontend-ui-ux-engineer": createFrontendUiUxEngineerAgent,
  "document-writer": createDocumentWriterAgent,
  "multimodal-looker": createMultimodalLookerAgent,
-  "Metis (Plan Consultant)": metisAgent,
-  "Momus (Plan Reviewer)": momusAgent,
+  "Metis (Plan Consultant)": createMetisAgent,
+  "Momus (Plan Reviewer)": createMomusAgent,
  "orchestrator-sisyphus": orchestratorSisyphusAgent,
 }

@@ -176,7 +176,11 @@ export function createBuiltinAgents(

  if (!disabledAgents.includes("orchestrator-sisyphus")) {
    const orchestratorOverride = agentOverrides["orchestrator-sisyphus"]
-    let orchestratorConfig = createOrchestratorSisyphusAgent({ availableAgents })
+    const orchestratorModel = orchestratorOverride?.model
+    let orchestratorConfig = createOrchestratorSisyphusAgent({
+      model: orchestratorModel,
+      availableAgents,
+    })

    if (orchestratorOverride) {
      orchestratorConfig = mergeAgentConfig(orchestratorConfig, orchestratorOverride)
--- a/src/auth/AGENTS.md
+++ b/src/auth/AGENTS.md
@@ -9,16 +9,20 @@ Google Antigravity OAuth for Gemini models. Token management, fetch interception
 ```
 auth/
 └── antigravity/
-    ├── plugin.ts         # Main export, hooks registration
+    ├── plugin.ts         # Main export, hooks registration (554 lines)
    ├── oauth.ts          # OAuth flow, token acquisition
    ├── token.ts          # Token storage, refresh logic
-    ├── fetch.ts          # Fetch interceptor (621 lines)
-    ├── response.ts       # Response transformation (598 lines)
-    ├── thinking.ts       # Thinking block extraction (571 lines)
+    ├── fetch.ts          # Fetch interceptor (798 lines)
+    ├── response.ts       # Response transformation (599 lines)
+    ├── thinking.ts       # Thinking block extraction (755 lines)
    ├── thought-signature-store.ts  # Signature caching
    ├── message-converter.ts        # Format conversion
+    ├── accounts.ts       # Multi-account management
+    ├── browser.ts        # Browser automation for OAuth
+    ├── cli.ts            # CLI interaction
    ├── request.ts        # Request building
    ├── project.ts        # Project ID management
+    ├── storage.ts        # Token persistence
    ├── tools.ts          # OAuth tool registration
    ├── constants.ts      # API endpoints, model mappings
    └── types.ts
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -9,16 +9,20 @@ CLI for oh-my-opencode: interactive installer, health diagnostics (doctor), runt
 ```
 cli/
 ├── index.ts              # Commander.js entry, subcommand routing
-├── install.ts            # Interactive TUI installer (477 lines)
-├── config-manager.ts     # JSONC parsing, env detection (669 lines)
+├── install.ts            # Interactive TUI installer (436 lines)
+├── config-manager.ts     # JSONC parsing, env detection (725 lines)
 ├── types.ts              # CLI-specific types
+├── commands/             # CLI subcommands
 ├── doctor/               # Health check system
 │   ├── index.ts          # Doctor command entry
+│   ├── runner.ts         # Health check orchestration
 │   ├── constants.ts      # Check categories
 │   ├── types.ts          # Check result interfaces
-│   └── checks/           # 17+ individual checks
+│   └── checks/           # 17+ individual checks (auth, config, dependencies, gh, lsp, mcp, opencode, plugin, version)
 ├── get-local-version/    # Version detection
 └── run/                  # OpenCode session launcher
+    ├── completion.ts     # Completion logic
+    └── events.ts         # Event handling
 ```

 ## CLI COMMANDS
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -25,6 +25,8 @@ export const BuiltinAgentNameSchema = z.enum([
  "document-writer",
  "multimodal-looker",
  "Metis (Plan Consultant)",
+  "Momus (Plan Reviewer)",
+  "orchestrator-sisyphus",
 ])

 export const BuiltinSkillNameSchema = z.enum([
@@ -40,12 +42,14 @@ export const OverridableAgentNameSchema = z.enum([
  "OpenCode-Builder",
  "Prometheus (Planner)",
  "Metis (Plan Consultant)",
+  "Momus (Plan Reviewer)",
  "oracle",
  "librarian",
  "explore",
  "frontend-ui-ux-engineer",
  "document-writer",
  "multimodal-looker",
+  "orchestrator-sisyphus",
 ])

 export const AgentNameSchema = BuiltinAgentNameSchema
@@ -118,12 +122,14 @@ export const AgentOverridesSchema = z.object({
  "OpenCode-Builder": AgentOverrideConfigSchema.optional(),
  "Prometheus (Planner)": AgentOverrideConfigSchema.optional(),
  "Metis (Plan Consultant)": AgentOverrideConfigSchema.optional(),
+  "Momus (Plan Reviewer)": AgentOverrideConfigSchema.optional(),
  oracle: AgentOverrideConfigSchema.optional(),
  librarian: AgentOverrideConfigSchema.optional(),
  explore: AgentOverrideConfigSchema.optional(),
  "frontend-ui-ux-engineer": AgentOverrideConfigSchema.optional(),
  "document-writer": AgentOverrideConfigSchema.optional(),
  "multimodal-looker": AgentOverrideConfigSchema.optional(),
+  "orchestrator-sisyphus": AgentOverrideConfigSchema.optional(),
 })

 export const ClaudeCodeConfigSchema = z.object({
@@ -290,6 +296,7 @@ export const GitMasterConfigSchema = z.object({
  /** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
  include_co_authored_by: z.boolean().default(true),
 })
+
 export const OhMyOpenCodeConfigSchema = z.object({
  $schema: z.string().optional(),
  disabled_mcps: z.array(AnyMcpNameSchema).optional(),
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -8,17 +8,23 @@ Claude Code compatibility layer + core feature modules. Commands, skills, agents

 ```
 features/
-├── background-agent/           # Task lifecycle, notifications (460 lines)
+├── background-agent/           # Task lifecycle, notifications (608 lines)
+├── boulder-state/              # Boulder state persistence
 ├── builtin-commands/           # Built-in slash commands
-├── builtin-skills/             # Built-in skills (playwright)
+│   └── templates/              # start-work, refactor, init-deep, ralph-loop
+├── builtin-skills/             # Built-in skills
+│   ├── git-master/             # Atomic commits, rebase, history search
+│   └── frontend-ui-ux/         # Designer-turned-developer skill
 ├── claude-code-agent-loader/   # ~/.claude/agents/*.md
 ├── claude-code-command-loader/ # ~/.claude/commands/*.md
 ├── claude-code-mcp-loader/     # .mcp.json files
 │   └── env-expander.ts         # ${VAR} expansion
-├── claude-code-plugin-loader/  # installed_plugins.json (484 lines)
+├── claude-code-plugin-loader/  # installed_plugins.json (486 lines)
 ├── claude-code-session-state/  # Session state persistence
+├── context-injector/           # Context collection and injection
 ├── opencode-skill-loader/      # Skills from OpenCode + Claude paths
 ├── skill-mcp-manager/          # MCP servers in skill YAML
+├── task-toast-manager/         # Task toast notifications
 └── hook-message-injector/      # Inject messages into conversation
 ```

--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -674,3 +674,95 @@ describe("LaunchInput.skillContent", () => {
    expect(input.skillContent).toBe("You are a playwright expert")
  })
 })
+
+describe("BackgroundManager.notifyParentSession - agent context preservation", () => {
+  test("should not pass agent field when parentAgent is undefined", async () => {
+    // #given
+    const task: BackgroundTask = {
+      id: "task-no-agent",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task without agent context",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: undefined,
+      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
+    }
+
+    // #when
+    const promptBody = buildNotificationPromptBody(task)
+
+    // #then
+    expect("agent" in promptBody).toBe(false)
+    expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus" })
+  })
+
+  test("should include agent field when parentAgent is defined", async () => {
+    // #given
+    const task: BackgroundTask = {
+      id: "task-with-agent",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task with agent context",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: "Sisyphus",
+      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
+    }
+
+    // #when
+    const promptBody = buildNotificationPromptBody(task)
+
+    // #then
+    expect(promptBody.agent).toBe("Sisyphus")
+  })
+
+  test("should not pass model field when parentModel is undefined", async () => {
+    // #given
+    const task: BackgroundTask = {
+      id: "task-no-model",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task without model context",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: "Sisyphus",
+      parentModel: undefined,
+    }
+
+    // #when
+    const promptBody = buildNotificationPromptBody(task)
+
+    // #then
+    expect("model" in promptBody).toBe(false)
+    expect(promptBody.agent).toBe("Sisyphus")
+  })
+})
+
+function buildNotificationPromptBody(task: BackgroundTask): Record<string, unknown> {
+  const body: Record<string, unknown> = {
+    parts: [{ type: "text", text: `[BACKGROUND TASK COMPLETED] Task "${task.description}" finished.` }],
+  }
+
+  if (task.parentAgent !== undefined) {
+    body.agent = task.parentAgent
+  }
+
+  if (task.parentModel?.providerID && task.parentModel?.modelID) {
+    body.model = { providerID: task.parentModel.providerID, modelID: task.parentModel.modelID }
+  }
+
+  return body
+}
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -13,6 +13,7 @@ import { subagentSessions } from "../claude-code-session-state"
 import { getTaskToastManager } from "../task-toast-manager"

 const TASK_TTL_MS = 30 * 60 * 1000
+const MIN_STABILITY_TIME_MS = 10 * 1000  // Must run at least 10s before stability detection kicks in

 type OpencodeClient = PluginInput["client"]

@@ -43,6 +44,7 @@ interface Todo {
 export class BackgroundManager {
  private tasks: Map<string, BackgroundTask>
  private notifications: Map<string, BackgroundTask[]>
+  private pendingByParent: Map<string, Set<string>>  // Track pending tasks per parent for batching
  private client: OpencodeClient
  private directory: string
  private pollingInterval?: ReturnType<typeof setInterval>
@@ -51,12 +53,20 @@ export class BackgroundManager {
  constructor(ctx: PluginInput, config?: BackgroundTaskConfig) {
    this.tasks = new Map()
    this.notifications = new Map()
+    this.pendingByParent = new Map()
    this.client = ctx.client
    this.directory = ctx.directory
    this.concurrencyManager = new ConcurrencyManager(config)
  }

  async launch(input: LaunchInput): Promise<BackgroundTask> {
+    log("[background-agent] launch() called with:", {
+      agent: input.agent,
+      model: input.model,
+      description: input.description,
+      parentSessionID: input.parentSessionID,
+    })
+
    if (!input.agent || input.agent.trim() === "") {
      throw new Error("Agent parameter is required")
    }
@@ -106,6 +116,11 @@ export class BackgroundManager {
    this.tasks.set(task.id, task)
    this.startPolling()

+    // Track for batched notifications
+    const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
+    pending.add(task.id)
+    this.pendingByParent.set(input.parentSessionID, pending)
+
    log("[background-agent] Launching task:", { taskId: task.id, sessionID, agent: input.agent })

    const toastManager = getTaskToastManager()
@@ -119,10 +134,21 @@ export class BackgroundManager {
      })
    }

-    this.client.session.promptAsync({
+    log("[background-agent] Calling prompt (fire-and-forget) for launch with:", {
+      sessionID,
+      agent: input.agent,
+      model: input.model,
+      hasSkillContent: !!input.skillContent,
+      promptLength: input.prompt.length,
+    })
+
+    // Use prompt() instead of promptAsync() to properly initialize agent loop (fire-and-forget)
+    // Include model if caller provided one (e.g., from Sisyphus category configs)
+    this.client.session.prompt({
      path: { id: sessionID },
      body: {
        agent: input.agent,
+        ...(input.model ? { model: input.model } : {}),
        system: input.skillContent,
        tools: {
          task: false,
@@ -146,7 +172,9 @@ export class BackgroundManager {
          this.concurrencyManager.release(existingTask.concurrencyKey)
        }
        this.markForNotification(existingTask)
-        this.notifyParentSession(existingTask)
+        this.notifyParentSession(existingTask).catch(err => {
+          log("[background-agent] Failed to notify on error:", err)
+        })
      }
    })

@@ -199,6 +227,7 @@ export class BackgroundManager {
    parentSessionID: string
    description: string
    agent?: string
+    parentAgent?: string
  }): BackgroundTask {
    const task: BackgroundTask = {
      id: input.taskId,
@@ -214,12 +243,18 @@ export class BackgroundManager {
        toolCalls: 0,
        lastUpdate: new Date(),
      },
+      parentAgent: input.parentAgent,
    }

    this.tasks.set(task.id, task)
    subagentSessions.add(input.sessionID)
    this.startPolling()

+    // Track for batched notifications (external tasks need tracking too)
+    const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
+    pending.add(task.id)
+    this.pendingByParent.set(input.parentSessionID, pending)
+
    log("[background-agent] Registered external task:", { taskId: task.id, sessionID: input.sessionID })

    return task
@@ -247,6 +282,11 @@ export class BackgroundManager {
    this.startPolling()
    subagentSessions.add(existingTask.sessionID)

+    // Track for batched notifications (P2 fix: resumed tasks need tracking too)
+    const pending = this.pendingByParent.get(input.parentSessionID) ?? new Set()
+    pending.add(existingTask.id)
+    this.pendingByParent.set(input.parentSessionID, pending)
+
    const toastManager = getTaskToastManager()
    if (toastManager) {
      toastManager.addTask({
@@ -259,7 +299,15 @@ export class BackgroundManager {

    log("[background-agent] Resuming task:", { taskId: existingTask.id, sessionID: existingTask.sessionID })

-    this.client.session.promptAsync({
+    log("[background-agent] Resuming task - calling prompt (fire-and-forget) with:", {
+      sessionID: existingTask.sessionID,
+      agent: existingTask.agent,
+      promptLength: input.prompt.length,
+    })
+
+    // Note: Don't pass model in body - use agent's configured model instead
+    // Use prompt() instead of promptAsync() to properly initialize agent loop
+    this.client.session.prompt({
      path: { id: existingTask.sessionID },
      body: {
        agent: existingTask.agent,
@@ -270,13 +318,15 @@ export class BackgroundManager {
        parts: [{ type: "text", text: input.prompt }],
      },
    }).catch((error) => {
-      log("[background-agent] resume promptAsync error:", error)
+      log("[background-agent] resume prompt error:", error)
      existingTask.status = "error"
      const errorMessage = error instanceof Error ? error.message : String(error)
      existingTask.error = errorMessage
      existingTask.completedAt = new Date()
      this.markForNotification(existingTask)
-      this.notifyParentSession(existingTask)
+      this.notifyParentSession(existingTask).catch(err => {
+        log("[background-agent] Failed to notify on resume error:", err)
+      })
    })

    return existingTask
@@ -331,7 +381,22 @@ export class BackgroundManager {
      const task = this.findBySession(sessionID)
      if (!task || task.status !== "running") return

-      this.checkSessionTodos(sessionID).then((hasIncompleteTodos) => {
+      // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
+      const elapsedMs = Date.now() - task.startedAt.getTime()
+      const MIN_IDLE_TIME_MS = 5000
+      if (elapsedMs < MIN_IDLE_TIME_MS) {
+        log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
+        return
+      }
+
+      // Edge guard: Verify session has actual assistant output before completing
+      this.validateSessionHasOutput(sessionID).then(async (hasValidOutput) => {
+        if (!hasValidOutput) {
+          log("[background-agent] Session.idle but no valid output yet, waiting:", task.id)
+          return
+        }
+
+        const hasIncompleteTodos = await this.checkSessionTodos(sessionID)
        if (hasIncompleteTodos) {
          log("[background-agent] Task has incomplete todos, waiting for todo-continuation:", task.id)
          return
@@ -340,8 +405,10 @@ export class BackgroundManager {
        task.status = "completed"
        task.completedAt = new Date()
        this.markForNotification(task)
-        this.notifyParentSession(task)
+        await this.notifyParentSession(task)
        log("[background-agent] Task completed via session.idle event:", task.id)
+      }).catch(err => {
+        log("[background-agent] Error in session.idle handler:", err)
      })
    }

@@ -382,6 +449,66 @@ export class BackgroundManager {
    this.notifications.delete(sessionID)
  }

+  /**
+   * Validates that a session has actual assistant/tool output before marking complete.
+   * Prevents premature completion when session.idle fires before agent responds.
+   */
+  private async validateSessionHasOutput(sessionID: string): Promise<boolean> {
+    try {
+      const response = await this.client.session.messages({
+        path: { id: sessionID },
+      })
+
+      const messages = response.data ?? []
+      
+      // Check for at least one assistant or tool message
+      const hasAssistantOrToolMessage = messages.some(
+        (m: { info?: { role?: string } }) => 
+          m.info?.role === "assistant" || m.info?.role === "tool"
+      )
+
+      if (!hasAssistantOrToolMessage) {
+        log("[background-agent] No assistant/tool messages found in session:", sessionID)
+        return false
+      }
+
+      // Additionally check that at least one message has content (not just empty)
+      // OpenCode API uses different part types than Anthropic's API:
+      // - "reasoning" with .text property (thinking/reasoning content)
+      // - "tool" with .state.output property (tool call results)
+      // - "text" with .text property (final text output)
+      // - "step-start"/"step-finish" (metadata, no content)
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const hasContent = messages.some((m: any) => {
+        if (m.info?.role !== "assistant" && m.info?.role !== "tool") return false
+        const parts = m.parts ?? []
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      return parts.some((p: any) => 
+        // Text content (final output)
+        (p.type === "text" && p.text && p.text.trim().length > 0) ||
+        // Reasoning content (thinking blocks)
+        (p.type === "reasoning" && p.text && p.text.trim().length > 0) ||
+        // Tool calls (indicates work was done)
+        p.type === "tool" ||
+        // Tool results (output from executed tools) - important for tool-only tasks
+        (p.type === "tool_result" && p.content && 
+          (typeof p.content === "string" ? p.content.trim().length > 0 : p.content.length > 0))
+      )
+      })
+
+      if (!hasContent) {
+        log("[background-agent] Messages exist but no content found in session:", sessionID)
+        return false
+      }
+
+      return true
+    } catch (error) {
+      log("[background-agent] Error validating session output:", error)
+      // On error, allow completion to proceed (don't block indefinitely)
+      return true
+    }
+  }
+
  private clearNotificationsForTask(taskId: string): void {
    for (const [sessionID, tasks] of this.notifications.entries()) {
      const filtered = tasks.filter((t) => t.id !== taskId)
@@ -409,17 +536,33 @@ export class BackgroundManager {
    }
  }

-  cleanup(): void {
+cleanup(): void {
    this.stopPolling()
    this.tasks.clear()
    this.notifications.clear()
+    this.pendingByParent.clear()
  }

-  private notifyParentSession(task: BackgroundTask): void {
+  /**
+   * Get all running tasks (for compaction hook)
+   */
+  getRunningTasks(): BackgroundTask[] {
+    return Array.from(this.tasks.values()).filter(t => t.status === "running")
+  }
+
+  /**
+   * Get all completed tasks still in memory (for compaction hook)
+   */
+  getCompletedTasks(): BackgroundTask[] {
+    return Array.from(this.tasks.values()).filter(t => t.status !== "running")
+  }
+
+private async notifyParentSession(task: BackgroundTask): Promise<void> {
    const duration = this.formatDuration(task.startedAt, task.completedAt)

    log("[background-agent] notifyParentSession called for task:", task.id)

+    // Show toast notification
    const toastManager = getTaskToastManager()
    if (toastManager) {
      toastManager.showCompletionToast({
@@ -429,41 +572,83 @@ export class BackgroundManager {
      })
    }

-    const message = `[BACKGROUND TASK COMPLETED] Task "${task.description}" finished in ${duration}. Use background_output with task_id="${task.id}" to get results.`
+    // Update pending tracking and check if all tasks complete
+    const pendingSet = this.pendingByParent.get(task.parentSessionID)
+    if (pendingSet) {
+      pendingSet.delete(task.id)
+      if (pendingSet.size === 0) {
+        this.pendingByParent.delete(task.parentSessionID)
+      }
+    }

-    log("[background-agent] Sending notification to parent session:", { parentSessionID: task.parentSessionID })
+    const allComplete = !pendingSet || pendingSet.size === 0
+    const remainingCount = pendingSet?.size ?? 0

+    // Build notification message
+    const statusText = task.status === "error" ? "FAILED" : "COMPLETED"
+    const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
+    
+    let notification: string
+    if (allComplete) {
+      // All tasks complete - build summary
+      const completedTasks = Array.from(this.tasks.values())
+        .filter(t => t.parentSessionID === task.parentSessionID && t.status !== "running")
+        .map(t => `- \`${t.id}\`: ${t.description}`)
+        .join("\n")
+
+      notification = `<system-reminder>
+[ALL BACKGROUND TASKS COMPLETE]
+
+**Completed:**
+${completedTasks || `- \`${task.id}\`: ${task.description}`}
+
+Use \`background_output(task_id="<id>")\` to retrieve each result.
+</system-reminder>`
+    } else {
+      // Individual completion - silent notification
+      notification = `<system-reminder>
+[BACKGROUND TASK ${statusText}]
+**ID:** \`${task.id}\`
+**Description:** ${task.description}
+**Duration:** ${duration}${errorInfo}
+
+**${remainingCount} task${remainingCount === 1 ? "" : "s"} still in progress.** You WILL be notified when ALL complete.
+Do NOT poll - continue productive work.
+
+Use \`background_output(task_id="${task.id}")\` to retrieve this result when ready.
+</system-reminder>`
+    }
+
+    // Inject notification via session.prompt with noReply
+    try {
+      await this.client.session.prompt({
+        path: { id: task.parentSessionID },
+        body: {
+          noReply: !allComplete,  // Silent unless all complete
+          agent: task.parentAgent,
+          parts: [{ type: "text", text: notification }],
+        },
+      })
+      log("[background-agent] Sent notification to parent session:", {
+        taskId: task.id,
+        allComplete,
+        noReply: !allComplete,
+      })
+    } catch (error) {
+      log("[background-agent] Failed to send notification:", error)
+    }
+
+    // Cleanup after retention period
    const taskId = task.id
-    setTimeout(async () => {
+    setTimeout(() => {
      if (task.concurrencyKey) {
        this.concurrencyManager.release(task.concurrencyKey)
+        task.concurrencyKey = undefined
      }
-
-      try {
-        // Use only parentModel/parentAgent - don't fallback to prevMessage
-        // This prevents accidentally changing parent session's model/agent
-        const modelField = task.parentModel?.providerID && task.parentModel?.modelID
-          ? { providerID: task.parentModel.providerID, modelID: task.parentModel.modelID }
-          : undefined
-
-        await this.client.session.prompt({
-          path: { id: task.parentSessionID },
-          body: {
-            agent: task.parentAgent,
-            model: modelField,
-            parts: [{ type: "text", text: message }],
-          },
-          query: { directory: this.directory },
-        })
-        log("[background-agent] Successfully sent prompt to parent session:", { parentSessionID: task.parentSessionID })
-      } catch (error) {
-        log("[background-agent] prompt failed:", String(error))
-      } finally {
-        this.clearNotificationsForTask(taskId)
-        this.tasks.delete(taskId)
-        log("[background-agent] Removed completed task from memory:", taskId)
-      }
-    }, 200)
+      this.clearNotificationsForTask(taskId)
+      this.tasks.delete(taskId)
+      log("[background-agent] Removed completed task from memory:", taskId)
+    }, 5 * 60 * 1000)
  }

  private formatDuration(start: Date, end?: Date): string {
@@ -532,15 +717,18 @@ export class BackgroundManager {
    for (const task of this.tasks.values()) {
      if (task.status !== "running") continue

-      try {
+try {
        const sessionStatus = allStatuses[task.sessionID]
        
-        if (!sessionStatus) {
-          log("[background-agent] Session not found in status:", task.sessionID)
-          continue
-        }
+        // Don't skip if session not in status - fall through to message-based detection
+        if (sessionStatus?.type === "idle") {
+          // Edge guard: Validate session has actual output before completing
+          const hasValidOutput = await this.validateSessionHasOutput(task.sessionID)
+          if (!hasValidOutput) {
+            log("[background-agent] Polling idle but no valid output yet, waiting:", task.id)
+            continue
+          }

-        if (sessionStatus.type === "idle") {
          const hasIncompleteTodos = await this.checkSessionTodos(task.sessionID)
          if (hasIncompleteTodos) {
            log("[background-agent] Task has incomplete todos via polling, waiting:", task.id)
@@ -550,7 +738,7 @@ export class BackgroundManager {
          task.status = "completed"
          task.completedAt = new Date()
          this.markForNotification(task)
-          this.notifyParentSession(task)
+          await this.notifyParentSession(task)
          log("[background-agent] Task completed via polling:", task.id)
          continue
        }
@@ -591,10 +779,41 @@ export class BackgroundManager {
          task.progress.toolCalls = toolCalls
          task.progress.lastTool = lastTool
          task.progress.lastUpdate = new Date()
-          if (lastMessage) {
+if (lastMessage) {
            task.progress.lastMessage = lastMessage
            task.progress.lastMessageAt = new Date()
          }
+
+          // Stability detection: complete when message count unchanged for 3 polls
+          const currentMsgCount = messages.length
+          const elapsedMs = Date.now() - task.startedAt.getTime()
+
+          if (elapsedMs >= MIN_STABILITY_TIME_MS) {
+            if (task.lastMsgCount === currentMsgCount) {
+              task.stablePolls = (task.stablePolls ?? 0) + 1
+              if (task.stablePolls >= 3) {
+                // Edge guard: Validate session has actual output before completing
+                const hasValidOutput = await this.validateSessionHasOutput(task.sessionID)
+                if (!hasValidOutput) {
+                  log("[background-agent] Stability reached but no valid output, waiting:", task.id)
+                  continue
+                }
+
+                const hasIncompleteTodos = await this.checkSessionTodos(task.sessionID)
+                if (!hasIncompleteTodos) {
+                  task.status = "completed"
+                  task.completedAt = new Date()
+                  this.markForNotification(task)
+                  await this.notifyParentSession(task)
+                  log("[background-agent] Task completed via stability detection:", task.id)
+                  continue
+                }
+              }
+            } else {
+              task.stablePolls = 0
+            }
+          }
+          task.lastMsgCount = currentMsgCount
        }
      } catch (error) {
        log("[background-agent] Poll error for task:", { taskId: task.id, error })
--- a/src/features/background-agent/types.ts
+++ b/src/features/background-agent/types.ts
@@ -32,6 +32,10 @@ export interface BackgroundTask {
  concurrencyKey?: string
  /** Parent session's agent name for notification */
  parentAgent?: string
+  /** Last message count for stability detection */
+  lastMsgCount?: number
+  /** Number of consecutive polls with stable message count */
+  stablePolls?: number
 }

 export interface LaunchInput {
--- a/src/features/context-injector/injector.test.ts
+++ b/src/features/context-injector/injector.test.ts
@@ -133,7 +133,7 @@ describe("createContextInjectorHook", () => {
  })

  describe("chat.message handler", () => {
-    it("is a no-op (context injection moved to messages transform)", async () => {
+    it("injects pending context into output parts", async () => {
      // #given
      const hook = createContextInjectorHook(collector)
      const sessionID = "ses_hook1"
@@ -152,8 +152,9 @@ describe("createContextInjectorHook", () => {
      await hook["chat.message"](input, output)

      // #then
-      expect(output.parts[0].text).toBe("User message")
-      expect(collector.hasPending(sessionID)).toBe(true)
+      expect(output.parts[0].text).toContain("Hook context")
+      expect(output.parts[0].text).toContain("User message")
+      expect(collector.hasPending(sessionID)).toBe(false)
    })

    it("does nothing when no pending context", async () => {
--- a/src/features/context-injector/injector.ts
+++ b/src/features/context-injector/injector.ts
@@ -52,10 +52,16 @@ interface ChatMessageOutput {
 export function createContextInjectorHook(collector: ContextCollector) {
  return {
    "chat.message": async (
-      _input: ChatMessageInput,
-      _output: ChatMessageOutput
+      input: ChatMessageInput,
+      output: ChatMessageOutput
    ): Promise<void> => {
-      void collector
+      const result = injectPendingContext(collector, input.sessionID, output.parts)
+      if (result.injected) {
+        log("[context-injector] Injected pending context via chat.message", {
+          sessionID: input.sessionID,
+          contextLength: result.contextLength,
+        })
+      }
    },
  }
 }
--- a/src/hooks/AGENTS.md
+++ b/src/hooks/AGENTS.md
@@ -2,35 +2,42 @@

 ## OVERVIEW

-22 lifecycle hooks intercepting/modifying agent behavior. Context injection, error recovery, output control, notifications.
+22+ lifecycle hooks intercepting/modifying agent behavior. Context injection, error recovery, output control, notifications.

 ## STRUCTURE

 ```
 hooks/
-├── anthropic-context-window-limit-recovery/  # Auto-compact at token limit (554 lines)
+├── anthropic-context-window-limit-recovery/  # Auto-compact at token limit (556 lines)
 ├── auto-slash-command/         # Detect and execute /command patterns
 ├── auto-update-checker/        # Version notifications, startup toast
 ├── background-notification/    # OS notify on task complete
-├── claude-code-hooks/          # settings.json PreToolUse/PostToolUse/etc
+├── claude-code-hooks/          # settings.json PreToolUse/PostToolUse/etc (408 lines)
 ├── comment-checker/            # Prevent excessive AI comments
-│   └── filters/                # docstring, directive, bdd, etc
+│   ├── filters/                # docstring, directive, bdd, shebang
+│   └── output/                 # XML builder, formatter
 ├── compaction-context-injector/ # Preserve context during compaction
 ├── directory-agents-injector/  # Auto-inject AGENTS.md
 ├── directory-readme-injector/  # Auto-inject README.md
+├── edit-error-recovery/        # Recover from edit failures
 ├── empty-message-sanitizer/    # Sanitize empty messages
 ├── interactive-bash-session/   # Tmux session management
 ├── keyword-detector/           # ultrawork/search keyword activation
 ├── non-interactive-env/        # CI/headless handling
 ├── preemptive-compaction/      # Pre-emptive at 85% usage
+├── prometheus-md-only/         # Restrict prometheus to read-only
 ├── ralph-loop/                 # Self-referential dev loop
 ├── rules-injector/             # Conditional rules from .claude/rules/
-├── session-recovery/           # Recover from errors (430 lines)
+├── session-recovery/           # Recover from errors (432 lines)
+├── sisyphus-orchestrator/      # Main orchestration hook (660 lines)
+├── start-work/                 # Initialize Sisyphus work session
+├── task-resume-info/           # Track task resume state
 ├── think-mode/                 # Auto-detect thinking triggers
+├── thinking-block-validator/   # Validate thinking block format
 ├── agent-usage-reminder/       # Remind to use specialists
 ├── context-window-monitor.ts   # Monitor usage (standalone)
 ├── session-notification.ts     # OS notify on idle
-├── todo-continuation-enforcer.ts # Force TODO completion
+├── todo-continuation-enforcer.ts # Force TODO completion (413 lines)
 └── tool-output-truncator.ts    # Truncate verbose outputs
 ```

--- a/src/hooks/agent-usage-reminder/constants.ts
+++ b/src/hooks/agent-usage-reminder/constants.ts
@@ -15,6 +15,8 @@ export const TARGET_TOOLS = new Set([
  "safe_glob",
  "webfetch",
  "context7_resolve-library-id",
+  "context7_query-docs",
+  "websearch_web_search_exa",
  "context7_get-library-docs",
  "grep_app_searchgithub",
 ]);
--- a/src/hooks/auto-update-checker/index.test.ts
+++ b/src/hooks/auto-update-checker/index.test.ts
@@ -0,0 +1,153 @@
+import { describe, test, expect } from "bun:test"
+import { isPrereleaseVersion, isDistTag, isPrereleaseOrDistTag } from "./index"
+
+describe("auto-update-checker", () => {
+  describe("isPrereleaseVersion", () => {
+    test("returns true for beta versions", () => {
+      // #given a beta version
+      const version = "3.0.0-beta.1"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for alpha versions", () => {
+      // #given an alpha version
+      const version = "1.0.0-alpha"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for rc versions", () => {
+      // #given an rc version
+      const version = "2.0.0-rc.1"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns false for stable versions", () => {
+      // #given a stable version
+      const version = "2.14.0"
+
+      // #when checking if prerelease
+      const result = isPrereleaseVersion(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+  })
+
+  describe("isDistTag", () => {
+    test("returns true for beta dist-tag", () => {
+      // #given beta dist-tag
+      const version = "beta"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for next dist-tag", () => {
+      // #given next dist-tag
+      const version = "next"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for canary dist-tag", () => {
+      // #given canary dist-tag
+      const version = "canary"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns false for semver versions", () => {
+      // #given a semver version
+      const version = "2.14.0"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+
+    test("returns false for latest (handled separately)", () => {
+      // #given latest tag
+      const version = "latest"
+
+      // #when checking if dist-tag
+      const result = isDistTag(version)
+
+      // #then returns true (but latest is filtered before this check)
+      expect(result).toBe(true)
+    })
+  })
+
+  describe("isPrereleaseOrDistTag", () => {
+    test("returns false for null", () => {
+      // #given null version
+      const version = null
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+
+    test("returns true for prerelease version", () => {
+      // #given prerelease version
+      const version = "3.0.0-beta.1"
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns true for dist-tag", () => {
+      // #given dist-tag
+      const version = "beta"
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns true
+      expect(result).toBe(true)
+    })
+
+    test("returns false for stable version", () => {
+      // #given stable version
+      const version = "2.14.0"
+
+      // #when checking
+      const result = isPrereleaseOrDistTag(version)
+
+      // #then returns false
+      expect(result).toBe(false)
+    })
+  })
+})
--- a/src/hooks/auto-update-checker/index.ts
+++ b/src/hooks/auto-update-checker/index.ts
@@ -9,6 +9,20 @@ import type { AutoUpdateCheckerOptions } from "./types"

 const SISYPHUS_SPINNER = ["·", "•", "●", "○", "◌", "◦", " "]

+export function isPrereleaseVersion(version: string): boolean {
+  return version.includes("-")
+}
+
+export function isDistTag(version: string): boolean {
+  const startsWithDigit = /^\d/.test(version)
+  return !startsWithDigit
+}
+
+export function isPrereleaseOrDistTag(pinnedVersion: string | null): boolean {
+  if (!pinnedVersion) return false
+  return isPrereleaseVersion(pinnedVersion) || isDistTag(pinnedVersion)
+}
+
 export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdateCheckerOptions = {}) {
  const { showStartupToast = true, isSisyphusEnabled = false, autoUpdate = true } = options

@@ -63,7 +77,7 @@ export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdat
 }

 async function runBackgroundUpdateCheck(
-  ctx: PluginInput, 
+  ctx: PluginInput,
  autoUpdate: boolean,
  getToastMessage: (isUpdate: boolean, latestVersion?: string) => string
 ): Promise<void> {
@@ -99,7 +113,18 @@ async function runBackgroundUpdateCheck(
    return
  }

+  // Check if current version is a prerelease - don't auto-downgrade prerelease to stable
+  if (isPrereleaseVersion(currentVersion)) {
+    log(`[auto-update-checker] Skipping auto-update for prerelease version: ${currentVersion}`)
+    return
+  }
+
  if (pluginInfo.isPinned) {
+    if (isPrereleaseOrDistTag(pluginInfo.pinnedVersion)) {
+      log(`[auto-update-checker] Skipping auto-update for prerelease/dist-tag: ${pluginInfo.pinnedVersion}`)
+      return
+    }
+
    const updated = updatePinnedVersion(pluginInfo.configPath, pluginInfo.entry, latestVersion)
    if (!updated) {
      await showUpdateAvailableToast(ctx, latestVersion, getToastMessage)
@@ -112,7 +137,7 @@ async function runBackgroundUpdateCheck(
  invalidatePackage(PACKAGE_NAME)

  const installSuccess = await runBunInstallSafe()
-  
+
  if (installSuccess) {
    await showAutoUpdatedToast(ctx, currentVersion, latestVersion)
    log(`[auto-update-checker] Update installed: ${currentVersion} → ${latestVersion}`)
@@ -180,7 +205,7 @@ async function showSpinnerToast(ctx: PluginInput, version: string, message: stri
 }

 async function showUpdateAvailableToast(
-  ctx: PluginInput, 
+  ctx: PluginInput,
  latestVersion: string,
  getToastMessage: (isUpdate: boolean, latestVersion?: string) => string
 ): Promise<void> {
--- a/src/hooks/background-compaction/index.ts
+++ b/src/hooks/background-compaction/index.ts
@@ -0,0 +1,85 @@
+import type { BackgroundManager } from "../../features/background-agent"
+
+interface CompactingInput {
+  sessionID: string
+}
+
+interface CompactingOutput {
+  context: string[]
+  prompt?: string
+}
+
+/**
+ * Background agent compaction hook - preserves task state during context compaction.
+ * 
+ * When OpenCode compacts session context to save tokens, this hook injects
+ * information about running and recently completed background tasks so the
+ * agent doesn't lose awareness of delegated work.
+ */
+export function createBackgroundCompactionHook(manager: BackgroundManager) {
+  return {
+    "experimental.session.compacting": async (
+      input: CompactingInput,
+      output: CompactingOutput
+    ): Promise<void> => {
+      const { sessionID } = input
+
+      // Get running tasks for this session
+      const running = manager.getRunningTasks()
+        .filter(t => t.parentSessionID === sessionID)
+        .map(t => ({
+          id: t.id,
+          agent: t.agent,
+          description: t.description,
+          startedAt: t.startedAt,
+        }))
+
+      // Get recently completed tasks (still in memory within 5-min retention)
+      const completed = manager.getCompletedTasks()
+        .filter(t => t.parentSessionID === sessionID)
+        .slice(-10) // Last 10 completed
+        .map(t => ({
+          id: t.id,
+          agent: t.agent,
+          description: t.description,
+          status: t.status,
+        }))
+
+      // Early exit if nothing to preserve
+      if (running.length === 0 && completed.length === 0) return
+
+      const sections: string[] = ["<background-tasks>"]
+
+      // Running tasks section
+      if (running.length > 0) {
+        sections.push("## Running Background Tasks")
+        sections.push("")
+        for (const t of running) {
+          const elapsed = Math.floor((Date.now() - t.startedAt.getTime()) / 1000)
+          sections.push(`- **\`${t.id}\`** (${t.agent}): ${t.description} [${elapsed}s elapsed]`)
+        }
+        sections.push("")
+        sections.push("> **Note:** You WILL be notified when tasks complete.")
+        sections.push("> Do NOT poll - continue productive work.")
+        sections.push("")
+      }
+
+      // Completed tasks section
+      if (completed.length > 0) {
+        sections.push("## Recently Completed Tasks")
+        sections.push("")
+        for (const t of completed) {
+          const statusEmoji = t.status === "completed" ? "✅" : t.status === "error" ? "❌" : "⏱️"
+          sections.push(`- ${statusEmoji} **\`${t.id}\`**: ${t.description}`)
+        }
+        sections.push("")
+      }
+
+      sections.push("## Retrieval")
+      sections.push('Use `background_output(task_id="<id>")` to retrieve task results.')
+      sections.push("</background-tasks>")
+
+      output.context.push(sections.join("\n"))
+    }
+  }
+}
--- a/src/hooks/background-notification/index.ts
+++ b/src/hooks/background-notification/index.ts
@@ -9,6 +9,12 @@ interface EventInput {
  event: Event
 }

+/**
+ * Background notification hook - handles event routing to BackgroundManager.
+ * 
+ * Notifications are now delivered directly via session.prompt({ noReply }) 
+ * from the manager, so this hook only needs to handle event routing.
+ */
 export function createBackgroundNotificationHook(manager: BackgroundManager) {
  const eventHandler = async ({ event }: EventInput) => {
    manager.handleEvent(event)
--- a/src/hooks/claude-code-hooks/index.ts
+++ b/src/hooks/claude-code-hooks/index.ts
@@ -27,7 +27,6 @@ import { cacheToolInput, getToolInput } from "./tool-input-cache"
 import { recordToolUse, recordToolResult, getTranscriptPath, recordUserMessage } from "./transcript"
 import type { PluginConfig } from "./types"
 import { log, isHookDisabled } from "../../shared"
-import { detectKeywordsWithType, removeCodeBlocks } from "../keyword-detector"
 import type { ContextCollector } from "../../features/context-injector"

 const sessionFirstMessageProcessed = new Set<string>()
@@ -142,25 +141,9 @@ export function createClaudeCodeHooksHook(
          return
        }

-        const keywordMessages: string[] = []
-        if (!config.keywordDetectorDisabled) {
-          const detectedKeywords = detectKeywordsWithType(removeCodeBlocks(prompt), input.agent)
-          keywordMessages.push(...detectedKeywords.map((k) => k.message))
-
-          if (keywordMessages.length > 0) {
-            log("[claude-code-hooks] Detected keywords", {
-              sessionID: input.sessionID,
-              keywordCount: keywordMessages.length,
-              types: detectedKeywords.map((k) => k.type),
-            })
-          }
-        }
-
-        const allMessages = [...keywordMessages, ...result.messages]
-
-        if (allMessages.length > 0) {
-          const hookContent = allMessages.join("\n\n")
-          log(`[claude-code-hooks] Injecting ${allMessages.length} messages (${keywordMessages.length} keyword + ${result.messages.length} hook)`, { sessionID: input.sessionID, contentLength: hookContent.length, isFirstMessage })
+        if (result.messages.length > 0) {
+          const hookContent = result.messages.join("\n\n")
+          log(`[claude-code-hooks] Injecting ${result.messages.length} hook messages`, { sessionID: input.sessionID, contentLength: hookContent.length, isFirstMessage })

          if (isFirstMessage) {
            const idx = output.parts.findIndex((p) => p.type === "text" && p.text)
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -14,6 +14,7 @@ export { createThinkModeHook } from "./think-mode";
 export { createClaudeCodeHooksHook } from "./claude-code-hooks";
 export { createRulesInjectorHook } from "./rules-injector";
 export { createBackgroundNotificationHook } from "./background-notification"
+export { createBackgroundCompactionHook } from "./background-compaction"
 export { createAutoUpdateCheckerHook } from "./auto-update-checker";

 export { createAgentUsageReminderHook } from "./agent-usage-reminder";
--- a/src/hooks/keyword-detector/index.test.ts
+++ b/src/hooks/keyword-detector/index.test.ts
@@ -1,7 +1,95 @@
 import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
 import { createKeywordDetectorHook } from "./index"
 import { setMainSession } from "../../features/claude-code-session-state"
+import { ContextCollector } from "../../features/context-injector"
 import * as sharedModule from "../../shared"
+import * as sessionState from "../../features/claude-code-session-state"
+
+describe("keyword-detector registers to ContextCollector", () => {
+  let logCalls: Array<{ msg: string; data?: unknown }>
+  let logSpy: ReturnType<typeof spyOn>
+  let getMainSessionSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    logCalls = []
+    logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => {
+      logCalls.push({ msg, data })
+    })
+  })
+
+  afterEach(() => {
+    logSpy?.mockRestore()
+    getMainSessionSpy?.mockRestore()
+  })
+
+  function createMockPluginInput() {
+    return {
+      client: {
+        tui: {
+          showToast: async () => {},
+        },
+      },
+    } as any
+  }
+
+  test("should register ultrawork keyword to ContextCollector", async () => {
+    // #given - a fresh ContextCollector and keyword-detector hook
+    const collector = new ContextCollector()
+    const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
+    const sessionID = "test-session-123"
+    const output = {
+      message: {} as Record<string, unknown>,
+      parts: [{ type: "text", text: "ultrawork do something" }],
+    }
+
+    // #when - keyword detection runs
+    await hook["chat.message"]({ sessionID }, output)
+
+    // #then - ultrawork context should be registered in collector
+    expect(collector.hasPending(sessionID)).toBe(true)
+    const pending = collector.getPending(sessionID)
+    expect(pending.entries.length).toBeGreaterThan(0)
+    expect(pending.entries[0].source).toBe("keyword-detector")
+    expect(pending.entries[0].id).toBe("keyword-ultrawork")
+  })
+
+  test("should register search keyword to ContextCollector", async () => {
+    // #given - mock getMainSessionID to return our session (isolate from global state)
+    const collector = new ContextCollector()
+    const sessionID = "search-test-session"
+    getMainSessionSpy = spyOn(sessionState, "getMainSessionID").mockReturnValue(sessionID)
+    const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
+    const output = {
+      message: {} as Record<string, unknown>,
+      parts: [{ type: "text", text: "search for the bug" }],
+    }
+
+    // #when - keyword detection runs
+    await hook["chat.message"]({ sessionID }, output)
+
+    // #then - search context should be registered in collector
+    expect(collector.hasPending(sessionID)).toBe(true)
+    const pending = collector.getPending(sessionID)
+    expect(pending.entries.some((e) => e.id === "keyword-search")).toBe(true)
+  })
+
+  test("should NOT register to collector when no keywords detected", async () => {
+    // #given - no keywords in message
+    const collector = new ContextCollector()
+    const hook = createKeywordDetectorHook(createMockPluginInput(), collector)
+    const sessionID = "test-session"
+    const output = {
+      message: {} as Record<string, unknown>,
+      parts: [{ type: "text", text: "just a normal message" }],
+    }
+
+    // #when - keyword detection runs
+    await hook["chat.message"]({ sessionID }, output)
+
+    // #then - nothing should be registered
+    expect(collector.hasPending(sessionID)).toBe(false)
+  })
+})

 describe("keyword-detector session filtering", () => {
  let logCalls: Array<{ msg: string; data?: unknown }>
--- a/src/hooks/keyword-detector/index.ts
+++ b/src/hooks/keyword-detector/index.ts
@@ -2,12 +2,13 @@ import type { PluginInput } from "@opencode-ai/plugin"
 import { detectKeywordsWithType, extractPromptText, removeCodeBlocks } from "./detector"
 import { log } from "../../shared"
 import { getMainSessionID } from "../../features/claude-code-session-state"
+import type { ContextCollector } from "../../features/context-injector"

 export * from "./detector"
 export * from "./constants"
 export * from "./types"

-export function createKeywordDetectorHook(ctx: PluginInput) {
+export function createKeywordDetectorHook(ctx: PluginInput, collector?: ContextCollector) {
  return {
    "chat.message": async (
      input: {
@@ -28,8 +29,6 @@ export function createKeywordDetectorHook(ctx: PluginInput) {
        return
      }

-      // Only ultrawork keywords work in non-main sessions
-      // Other keywords (search, analyze, etc.) only work in main sessions
      const mainSessionID = getMainSessionID()
      const isNonMainSession = mainSessionID && input.sessionID !== mainSessionID

@@ -64,6 +63,17 @@ export function createKeywordDetectorHook(ctx: PluginInput) {
          )
      }

+      if (collector) {
+        for (const keyword of detectedKeywords) {
+          collector.register(input.sessionID, {
+            id: `keyword-${keyword.type}`,
+            source: "keyword-detector",
+            content: keyword.message,
+            priority: keyword.type === "ultrawork" ? "critical" : "high",
+          })
+        }
+      }
+
      log(`[keyword-detector] Detected ${detectedKeywords.length} keywords`, {
        sessionID: input.sessionID,
        types: detectedKeywords.map((k) => k.type),
--- a/src/hooks/prometheus-md-only/constants.ts
+++ b/src/hooks/prometheus-md-only/constants.ts
@@ -4,7 +4,7 @@ export const PROMETHEUS_AGENTS = ["Prometheus (Planner)"]

 export const ALLOWED_EXTENSIONS = [".md"]

-export const ALLOWED_PATH_PREFIX = ".sisyphus/"
+export const ALLOWED_PATH_PREFIX = ".sisyphus"

 export const BLOCKED_TOOLS = ["Write", "Edit", "write", "edit"]

--- a/src/hooks/prometheus-md-only/index.test.ts
+++ b/src/hooks/prometheus-md-only/index.test.ts
@@ -70,7 +70,7 @@ describe("prometheus-md-only", () => {
        callID: "call-1",
      }
      const output = {
-        args: { filePath: "/project/.sisyphus/plans/work-plan.md" },
+        args: { filePath: "/tmp/test/.sisyphus/plans/work-plan.md" },
      }

      // #when / #then
@@ -295,4 +295,136 @@ describe("prometheus-md-only", () => {
      ).resolves.toBeUndefined()
    })
  })
+
+  describe("cross-platform path validation", () => {
+    beforeEach(() => {
+      setupMessageStorage(TEST_SESSION_ID, "Prometheus (Planner)")
+    })
+
+    test("should allow Windows-style backslash paths under .sisyphus/", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: ".sisyphus\\plans\\work-plan.md" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).resolves.toBeUndefined()
+    })
+
+    test("should allow mixed separator paths under .sisyphus/", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: ".sisyphus\\plans/work-plan.MD" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).resolves.toBeUndefined()
+    })
+
+    test("should allow uppercase .MD extension", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: ".sisyphus/plans/work-plan.MD" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).resolves.toBeUndefined()
+    })
+
+    test("should block paths outside workspace root even if containing .sisyphus", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: "/other/project/.sisyphus/plans/x.md" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
+    })
+
+    test("should block nested .sisyphus directories", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: "src/.sisyphus/plans/x.md" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
+    })
+
+    test("should block path traversal attempts", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: ".sisyphus/../secrets.md" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
+    })
+
+    test("should allow case-insensitive .SISYPHUS directory", async () => {
+      // #given
+      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+      const input = {
+        tool: "Write",
+        sessionID: TEST_SESSION_ID,
+        callID: "call-1",
+      }
+      const output = {
+        args: { filePath: ".SISYPHUS/plans/work-plan.md" },
+      }
+
+      // #when / #then
+      await expect(
+        hook["tool.execute.before"](input, output)
+      ).resolves.toBeUndefined()
+    })
+  })
 })
--- a/src/hooks/prometheus-md-only/index.ts
+++ b/src/hooks/prometheus-md-only/index.ts
@@ -1,16 +1,48 @@
 import type { PluginInput } from "@opencode-ai/plugin"
 import { existsSync, readdirSync } from "node:fs"
-import { join } from "node:path"
+import { join, resolve, relative, isAbsolute } from "node:path"
 import { HOOK_NAME, PROMETHEUS_AGENTS, ALLOWED_EXTENSIONS, ALLOWED_PATH_PREFIX, BLOCKED_TOOLS, PLANNING_CONSULT_WARNING } from "./constants"
 import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { log } from "../../shared/logger"

 export * from "./constants"

-function isAllowedFile(filePath: string): boolean {
-  const hasAllowedExtension = ALLOWED_EXTENSIONS.some(ext => filePath.endsWith(ext))
-  const isInAllowedPath = filePath.includes(ALLOWED_PATH_PREFIX)
-  return hasAllowedExtension && isInAllowedPath
+/**
+ * Cross-platform path validator for Prometheus file writes.
+ * Uses path.resolve/relative instead of string matching to handle:
+ * - Windows backslashes (e.g., .sisyphus\\plans\\x.md)
+ * - Mixed separators (e.g., .sisyphus\\plans/x.md)
+ * - Case-insensitive directory/extension matching
+ * - Workspace confinement (blocks paths outside root or via traversal)
+ */
+function isAllowedFile(filePath: string, workspaceRoot: string): boolean {
+  // 1. Resolve to absolute path
+  const resolved = resolve(workspaceRoot, filePath)
+
+  // 2. Get relative path from workspace root
+  const rel = relative(workspaceRoot, resolved)
+
+  // 3. Reject if escapes root (starts with ".." or is absolute)
+  if (rel.startsWith("..") || isAbsolute(rel)) {
+    return false
+  }
+
+  // 4. Split by both separators and check first segment matches ALLOWED_PATH_PREFIX (case-insensitive)
+  // Guard: if rel is empty (filePath === workspaceRoot), segments[0] would be "" — reject
+  const segments = rel.split(/[/\\]/)
+  if (!segments[0] || segments[0].toLowerCase() !== ALLOWED_PATH_PREFIX.toLowerCase()) {
+    return false
+  }
+
+  // 5. Check extension matches one of ALLOWED_EXTENSIONS (case-insensitive)
+  const hasAllowedExtension = ALLOWED_EXTENSIONS.some(
+    ext => resolved.toLowerCase().endsWith(ext.toLowerCase())
+  )
+  if (!hasAllowedExtension) {
+    return false
+  }
+
+  return true
 }

 function getMessageDir(sessionID: string): string | null {
@@ -35,7 +67,7 @@ function getAgentFromSession(sessionID: string): string | undefined {
  return findNearestMessageWithFields(messageDir)?.agent
 }

-export function createPrometheusMdOnlyHook(_ctx: PluginInput) {
+export function createPrometheusMdOnlyHook(ctx: PluginInput) {
  return {
    "tool.execute.before": async (
      input: { tool: string; sessionID: string; callID: string },
@@ -72,7 +104,7 @@ export function createPrometheusMdOnlyHook(_ctx: PluginInput) {
        return
      }

-      if (!isAllowedFile(filePath)) {
+      if (!isAllowedFile(filePath, ctx.directory)) {
        log(`[${HOOK_NAME}] Blocked: Prometheus can only write to .sisyphus/*.md`, {
          sessionID: input.sessionID,
          tool: toolName,
--- a/src/hooks/ralph-loop/index.test.ts
+++ b/src/hooks/ralph-loop/index.test.ts
@@ -591,6 +591,73 @@ describe("ralph-loop", () => {
      expect(hook.getState()).toBeNull()
    })

+    test("should allow starting new loop while previous loop is active (different session)", async () => {
+      // #given - active loop in session A
+      const hook = createRalphLoopHook(createMockPluginInput())
+      hook.startLoop("session-A", "First task", { maxIterations: 10 })
+      expect(hook.getState()?.session_id).toBe("session-A")
+      expect(hook.getState()?.prompt).toBe("First task")
+
+      // #when - start new loop in session B (without completing A)
+      hook.startLoop("session-B", "Second task", { maxIterations: 20 })
+
+      // #then - state should be overwritten with session B's loop
+      expect(hook.getState()?.session_id).toBe("session-B")
+      expect(hook.getState()?.prompt).toBe("Second task")
+      expect(hook.getState()?.max_iterations).toBe(20)
+      expect(hook.getState()?.iteration).toBe(1)
+
+      // #when - session B goes idle
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-B" } },
+      })
+
+      // #then - continuation should be injected for session B
+      expect(promptCalls.length).toBe(1)
+      expect(promptCalls[0].sessionID).toBe("session-B")
+      expect(promptCalls[0].text).toContain("Second task")
+      expect(promptCalls[0].text).toContain("2/20")
+
+      // #then - iteration incremented
+      expect(hook.getState()?.iteration).toBe(2)
+    })
+
+    test("should allow starting new loop in same session (restart)", async () => {
+      // #given - active loop in session A at iteration 5
+      const hook = createRalphLoopHook(createMockPluginInput())
+      hook.startLoop("session-A", "First task", { maxIterations: 10 })
+      
+      // Simulate some iterations
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-A" } },
+      })
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-A" } },
+      })
+      expect(hook.getState()?.iteration).toBe(3)
+      expect(promptCalls.length).toBe(2)
+
+      // #when - start NEW loop in same session (restart)
+      hook.startLoop("session-A", "Restarted task", { maxIterations: 50 })
+
+      // #then - state should be reset to iteration 1 with new prompt
+      expect(hook.getState()?.session_id).toBe("session-A")
+      expect(hook.getState()?.prompt).toBe("Restarted task")
+      expect(hook.getState()?.max_iterations).toBe(50)
+      expect(hook.getState()?.iteration).toBe(1)
+
+      // #when - session goes idle
+      promptCalls = [] // Reset to check new continuation
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-A" } },
+      })
+
+      // #then - continuation should use new task
+      expect(promptCalls.length).toBe(1)
+      expect(promptCalls[0].text).toContain("Restarted task")
+      expect(promptCalls[0].text).toContain("2/50")
+    })
+
    test("should check transcript BEFORE API to optimize performance", async () => {
      // #given - transcript has completion promise
      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
--- a/src/hooks/sisyphus-orchestrator/index.test.ts
+++ b/src/hooks/sisyphus-orchestrator/index.test.ts
@@ -506,6 +506,90 @@ describe("sisyphus-orchestrator hook", () => {
        // #then
        expect(output.output).toBe(originalOutput)
      })
+
+      describe("cross-platform path validation (Windows support)", () => {
+        test("should NOT append reminder when orchestrator writes inside .sisyphus\\ (Windows backslash)", async () => {
+          // #given
+          const hook = createSisyphusOrchestratorHook(createMockPluginInput())
+          const originalOutput = "File written successfully"
+          const output = {
+            title: "Write",
+            output: originalOutput,
+            metadata: { filePath: ".sisyphus\\plans\\work-plan.md" },
+          }
+
+          // #when
+          await hook["tool.execute.after"](
+            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
+            output
+          )
+
+          // #then
+          expect(output.output).toBe(originalOutput)
+          expect(output.output).not.toContain("DELEGATION REQUIRED")
+        })
+
+        test("should NOT append reminder when orchestrator writes inside .sisyphus with mixed separators", async () => {
+          // #given
+          const hook = createSisyphusOrchestratorHook(createMockPluginInput())
+          const originalOutput = "File written successfully"
+          const output = {
+            title: "Write",
+            output: originalOutput,
+            metadata: { filePath: ".sisyphus\\plans/work-plan.md" },
+          }
+
+          // #when
+          await hook["tool.execute.after"](
+            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
+            output
+          )
+
+          // #then
+          expect(output.output).toBe(originalOutput)
+          expect(output.output).not.toContain("DELEGATION REQUIRED")
+        })
+
+        test("should NOT append reminder for absolute Windows path inside .sisyphus\\", async () => {
+          // #given
+          const hook = createSisyphusOrchestratorHook(createMockPluginInput())
+          const originalOutput = "File written successfully"
+          const output = {
+            title: "Write",
+            output: originalOutput,
+            metadata: { filePath: "C:\\Users\\test\\project\\.sisyphus\\plans\\x.md" },
+          }
+
+          // #when
+          await hook["tool.execute.after"](
+            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
+            output
+          )
+
+          // #then
+          expect(output.output).toBe(originalOutput)
+          expect(output.output).not.toContain("DELEGATION REQUIRED")
+        })
+
+        test("should append reminder for Windows path outside .sisyphus\\", async () => {
+          // #given
+          const hook = createSisyphusOrchestratorHook(createMockPluginInput())
+          const output = {
+            title: "Write",
+            output: "File written successfully",
+            metadata: { filePath: "C:\\Users\\test\\project\\src\\code.ts" },
+          }
+
+          // #when
+          await hook["tool.execute.after"](
+            { tool: "Write", sessionID: ORCHESTRATOR_SESSION },
+            output
+          )
+
+          // #then
+          expect(output.output).toContain("DELEGATION REQUIRED")
+        })
+      })
    })
  })

--- a/src/hooks/sisyphus-orchestrator/index.ts
+++ b/src/hooks/sisyphus-orchestrator/index.ts
@@ -14,7 +14,14 @@ import type { BackgroundManager } from "../../features/background-agent"

 export const HOOK_NAME = "sisyphus-orchestrator"

-const ALLOWED_PATH_PREFIX = ".sisyphus/"
+/**
+ * Cross-platform check if a path is inside .sisyphus/ directory.
+ * Handles both forward slashes (Unix) and backslashes (Windows).
+ */
+function isSisyphusPath(filePath: string): boolean {
+  return /\.sisyphus[/\\]/.test(filePath)
+}
+
 const WRITE_EDIT_TOOLS = ["Write", "Edit", "write", "edit"]

 const DIRECT_WORK_REMINDER = `
@@ -549,7 +556,7 @@ export function createSisyphusOrchestratorHook(
      // Check Write/Edit tools for orchestrator - inject strong warning
      if (WRITE_EDIT_TOOLS.includes(input.tool)) {
        const filePath = (output.args.filePath ?? output.args.path ?? output.args.file) as string | undefined
-        if (filePath && !filePath.includes(ALLOWED_PATH_PREFIX)) {
+        if (filePath && !isSisyphusPath(filePath)) {
          // Store filePath for use in tool.execute.after
          if (input.callID) {
            pendingFilePaths.set(input.callID, filePath)
@@ -593,7 +600,7 @@ export function createSisyphusOrchestratorHook(
        if (!filePath) {
          filePath = output.metadata?.filePath as string | undefined
        }
-        if (filePath && !filePath.includes(ALLOWED_PATH_PREFIX)) {
+        if (filePath && !isSisyphusPath(filePath)) {
          output.output = (output.output || "") + DIRECT_WORK_REMINDER
          log(`[${HOOK_NAME}] Direct work reminder appended`, {
            sessionID: input.sessionID,
--- a/src/index.ts
+++ b/src/index.ts
@@ -63,6 +63,7 @@ import {
  createSisyphusTask,
  interactive_bash,
  startTmuxCheck,
+  lspManager,
 } from "./tools";
 import { BackgroundManager } from "./features/background-agent";
 import { SkillMcpManager } from "./features/skill-mcp-manager";
@@ -164,7 +165,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
      })
    : null;
  const keywordDetector = isHookEnabled("keyword-detector")
-    ? createKeywordDetectorHook(ctx)
+    ? createKeywordDetectorHook(ctx, contextCollector)
    : null;
  const contextInjector = createContextInjectorHook(contextCollector);
  const contextInjectorMessagesTransform =
@@ -312,8 +313,8 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
    },

    "chat.message": async (input, output) => {
-      await claudeCodeHooks["chat.message"]?.(input, output);
      await keywordDetector?.["chat.message"]?.(input, output);
+      await claudeCodeHooks["chat.message"]?.(input, output);
      await contextInjector["chat.message"]?.(input, output);
      await autoSlashCommand?.["chat.message"]?.(input, output);
      await startWork?.["chat.message"]?.(input, output);
@@ -427,6 +428,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
        }
        if (sessionInfo?.id) {
          await skillMcpManager.disconnectSession(sessionInfo.id);
+          await lspManager.cleanupTempDirectoryClients();
        }
      }

--- a/src/shared/opencode-config-dir.test.ts
+++ b/src/shared/opencode-config-dir.test.ts
@@ -1,6 +1,6 @@
 import { describe, test, expect, beforeEach, afterEach } from "bun:test"
 import { homedir } from "node:os"
-import { join } from "node:path"
+import { join, resolve } from "node:path"
 import {
  getOpenCodeConfigDir,
  getOpenCodeConfigPaths,
@@ -20,6 +20,7 @@ describe("opencode-config-dir", () => {
      APPDATA: process.env.APPDATA,
      XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME,
      XDG_DATA_HOME: process.env.XDG_DATA_HOME,
+      OPENCODE_CONFIG_DIR: process.env.OPENCODE_CONFIG_DIR,
    }
  })

@@ -34,6 +35,84 @@ describe("opencode-config-dir", () => {
    }
  })

+  describe("OPENCODE_CONFIG_DIR environment variable", () => {
+    test("returns OPENCODE_CONFIG_DIR when env var is set", () => {
+      // #given OPENCODE_CONFIG_DIR is set to a custom path
+      process.env.OPENCODE_CONFIG_DIR = "/custom/opencode/path"
+      Object.defineProperty(process, "platform", { value: "linux" })
+
+      // #when getOpenCodeConfigDir is called with binary="opencode"
+      const result = getOpenCodeConfigDir({ binary: "opencode", version: "1.0.200" })
+
+      // #then returns the custom path
+      expect(result).toBe("/custom/opencode/path")
+    })
+
+    test("falls back to default when env var is not set", () => {
+      // #given OPENCODE_CONFIG_DIR is not set, platform is Linux
+      delete process.env.OPENCODE_CONFIG_DIR
+      delete process.env.XDG_CONFIG_HOME
+      Object.defineProperty(process, "platform", { value: "linux" })
+
+      // #when getOpenCodeConfigDir is called with binary="opencode"
+      const result = getOpenCodeConfigDir({ binary: "opencode", version: "1.0.200" })
+
+      // #then returns default ~/.config/opencode
+      expect(result).toBe(join(homedir(), ".config", "opencode"))
+    })
+
+    test("falls back to default when env var is empty string", () => {
+      // #given OPENCODE_CONFIG_DIR is set to empty string
+      process.env.OPENCODE_CONFIG_DIR = ""
+      delete process.env.XDG_CONFIG_HOME
+      Object.defineProperty(process, "platform", { value: "linux" })
+
+      // #when getOpenCodeConfigDir is called with binary="opencode"
+      const result = getOpenCodeConfigDir({ binary: "opencode", version: "1.0.200" })
+
+      // #then returns default ~/.config/opencode
+      expect(result).toBe(join(homedir(), ".config", "opencode"))
+    })
+
+    test("falls back to default when env var is whitespace only", () => {
+      // #given OPENCODE_CONFIG_DIR is set to whitespace only
+      process.env.OPENCODE_CONFIG_DIR = "   "
+      delete process.env.XDG_CONFIG_HOME
+      Object.defineProperty(process, "platform", { value: "linux" })
+
+      // #when getOpenCodeConfigDir is called with binary="opencode"
+      const result = getOpenCodeConfigDir({ binary: "opencode", version: "1.0.200" })
+
+      // #then returns default ~/.config/opencode
+      expect(result).toBe(join(homedir(), ".config", "opencode"))
+    })
+
+    test("resolves relative path to absolute path", () => {
+      // #given OPENCODE_CONFIG_DIR is set to a relative path
+      process.env.OPENCODE_CONFIG_DIR = "./my-opencode-config"
+      Object.defineProperty(process, "platform", { value: "linux" })
+
+      // #when getOpenCodeConfigDir is called with binary="opencode"
+      const result = getOpenCodeConfigDir({ binary: "opencode", version: "1.0.200" })
+
+      // #then returns resolved absolute path
+      expect(result).toBe(resolve("./my-opencode-config"))
+    })
+
+    test("OPENCODE_CONFIG_DIR takes priority over XDG_CONFIG_HOME", () => {
+      // #given both OPENCODE_CONFIG_DIR and XDG_CONFIG_HOME are set
+      process.env.OPENCODE_CONFIG_DIR = "/custom/opencode/path"
+      process.env.XDG_CONFIG_HOME = "/xdg/config"
+      Object.defineProperty(process, "platform", { value: "linux" })
+
+      // #when getOpenCodeConfigDir is called with binary="opencode"
+      const result = getOpenCodeConfigDir({ binary: "opencode", version: "1.0.200" })
+
+      // #then OPENCODE_CONFIG_DIR takes priority
+      expect(result).toBe("/custom/opencode/path")
+    })
+  })
+
  describe("isDevBuild", () => {
    test("returns false for null version", () => {
      expect(isDevBuild(null)).toBe(false)
@@ -213,6 +292,7 @@ describe("opencode-config-dir", () => {
      // #given no config files exist
      Object.defineProperty(process, "platform", { value: "linux" })
      delete process.env.XDG_CONFIG_HOME
+      delete process.env.OPENCODE_CONFIG_DIR

      // #when detectExistingConfigDir is called
      const result = detectExistingConfigDir("opencode", "1.0.200")
@@ -220,5 +300,19 @@ describe("opencode-config-dir", () => {
      // #then result is either null or a valid string path
      expect(result === null || typeof result === "string").toBe(true)
    })
+
+    test("includes OPENCODE_CONFIG_DIR in search locations when set", () => {
+      // #given OPENCODE_CONFIG_DIR is set to a custom path
+      process.env.OPENCODE_CONFIG_DIR = "/custom/opencode/path"
+      Object.defineProperty(process, "platform", { value: "linux" })
+      delete process.env.XDG_CONFIG_HOME
+
+      // #when detectExistingConfigDir is called
+      const result = detectExistingConfigDir("opencode", "1.0.200")
+
+      // #then result is either null (no config file exists) or a valid string path
+      // The important thing is that the function doesn't throw
+      expect(result === null || typeof result === "string").toBe(true)
+    })
  })
 })
--- a/src/shared/opencode-config-dir.ts
+++ b/src/shared/opencode-config-dir.ts
@@ -1,6 +1,6 @@
 import { existsSync } from "node:fs"
 import { homedir } from "node:os"
-import { join } from "node:path"
+import { join, resolve } from "node:path"

 export type OpenCodeBinaryType = "opencode" | "opencode-desktop"

@@ -47,6 +47,11 @@ function getTauriConfigDir(identifier: string): string {
 }

 function getCliConfigDir(): string {
+  const envConfigDir = process.env.OPENCODE_CONFIG_DIR?.trim()
+  if (envConfigDir) {
+    return resolve(envConfigDir)
+  }
+
  if (process.platform === "win32") {
    const crossPlatformDir = join(homedir(), ".config", "opencode")
    const crossPlatformConfig = join(crossPlatformDir, "opencode.json")
@@ -108,6 +113,11 @@ export function getOpenCodeConfigPaths(options: OpenCodeConfigDirOptions): OpenC
 export function detectExistingConfigDir(binary: OpenCodeBinaryType, version?: string | null): string | null {
  const locations: string[] = []

+  const envConfigDir = process.env.OPENCODE_CONFIG_DIR?.trim()
+  if (envConfigDir) {
+    locations.push(resolve(envConfigDir))
+  }
+
  if (binary === "opencode-desktop") {
    const identifier = isDevBuild(version) ? TAURI_APP_IDENTIFIER_DEV : TAURI_APP_IDENTIFIER
    locations.push(getTauriConfigDir(identifier))
--- a/src/tools/AGENTS.md
+++ b/src/tools/AGENTS.md
@@ -19,9 +19,10 @@ tools/
 ├── interactive-bash/   # Tmux session management
 ├── look-at/            # Multimodal analysis (PDF, images)
 ├── lsp/                # 11 LSP tools
-│   ├── client.ts       # LSP connection lifecycle
+│   ├── client.ts       # LSP connection lifecycle (612 lines)
+│   ├── utils.ts        # LSP utilities (461 lines)
 │   ├── config.ts       # Server configurations
-│   ├── tools.ts        # Tool implementations
+│   ├── tools.ts        # Tool implementations (405 lines)
 │   └── types.ts
 ├── session-manager/    # OpenCode session file management
 │   ├── constants.ts    # Storage paths, descriptions
@@ -29,6 +30,7 @@ tools/
 │   ├── storage.ts      # File I/O operations
 │   ├── utils.ts        # Formatting, filtering
 │   └── tools.ts        # Tool implementations
+├── sisyphus-task/      # Category-based task delegation (493 lines)
 ├── skill/              # Skill loading and execution
 ├── skill-mcp/          # Skill-embedded MCP invocation
 ├── slashcommand/       # Slash command execution
--- a/src/tools/background-task/tools.ts
+++ b/src/tools/background-task/tools.ts
@@ -74,7 +74,7 @@ export function createBackgroundTask(manager: BackgroundManager): ToolDefinition
          parentSessionID: ctx.sessionID,
          parentMessageID: ctx.messageID,
          parentModel,
-          parentAgent: prevMessage?.agent,
+          parentAgent: ctx.agent ?? prevMessage?.agent,
        })

        ctx.metadata?.({
@@ -176,8 +176,13 @@ async function formatTaskResult(task: BackgroundTask, client: OpencodeClient): P
  // Handle both SDK response structures: direct array or wrapped in .data
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  const messages = ((messagesResult as any).data ?? messagesResult) as Array<{
-    info?: { role?: string }
-    parts?: Array<{ type?: string; text?: string }>
+    info?: { role?: string; time?: string }
+    parts?: Array<{ 
+      type?: string
+      text?: string
+      content?: string | Array<{ type: string; text?: string }>
+      name?: string
+    }>
  }>

  if (!Array.isArray(messages) || messages.length === 0) {
@@ -193,11 +198,13 @@ Session ID: ${task.sessionID}
 (No messages found)`
  }

-  const assistantMessages = messages.filter(
-    (m) => m.info?.role === "assistant"
+  // Include both assistant messages AND tool messages
+  // Tool results (grep, glob, bash output) come from role "tool"
+  const relevantMessages = messages.filter(
+    (m) => m.info?.role === "assistant" || m.info?.role === "tool"
  )

-  if (assistantMessages.length === 0) {
+  if (relevantMessages.length === 0) {
    return `Task Result

 Task ID: ${task.id}
@@ -207,17 +214,46 @@ Session ID: ${task.sessionID}

 ---

-(No assistant response found)`
+(No assistant or tool response found)`
  }

-  const lastMessage = assistantMessages[assistantMessages.length - 1]
-  const textParts = lastMessage?.parts?.filter(
-    (p) => p.type === "text"
-  ) ?? []
-  const textContent = textParts
-    .map((p) => p.text ?? "")
+  // Sort by time ascending (oldest first) to process messages in order
+  const sortedMessages = [...relevantMessages].sort((a, b) => {
+    const timeA = String((a as { info?: { time?: string } }).info?.time ?? "")
+    const timeB = String((b as { info?: { time?: string } }).info?.time ?? "")
+    return timeA.localeCompare(timeB)
+  })
+  
+  // Extract content from ALL messages, not just the last one
+  // Tool results may be in earlier messages while the final message is empty
+  const extractedContent: string[] = []
+  
+  for (const message of sortedMessages) {
+    for (const part of message.parts ?? []) {
+      // Handle both "text" and "reasoning" parts (thinking models use "reasoning")
+      if ((part.type === "text" || part.type === "reasoning") && part.text) {
+        extractedContent.push(part.text)
+      } else if (part.type === "tool_result") {
+        // Tool results contain the actual output from tool calls
+        const toolResult = part as { content?: string | Array<{ type: string; text?: string }> }
+        if (typeof toolResult.content === "string" && toolResult.content) {
+          extractedContent.push(toolResult.content)
+        } else if (Array.isArray(toolResult.content)) {
+          // Handle array of content blocks
+          for (const block of toolResult.content) {
+            // Handle both "text" and "reasoning" parts (thinking models use "reasoning")
+            if ((block.type === "text" || block.type === "reasoning") && block.text) {
+              extractedContent.push(block.text)
+            }
+          }
+        }
+      }
+    }
+  }
+  
+  const textContent = extractedContent
    .filter((text) => text.length > 0)
-    .join("\n")
+    .join("\n\n")

  const duration = formatDuration(task.startedAt, task.completedAt)

--- a/src/tools/call-omo-agent/tools.ts
+++ b/src/tools/call-omo-agent/tools.ts
@@ -170,23 +170,59 @@ async function executeSync(
  const messages = messagesResult.data
  log(`[call_omo_agent] Got ${messages.length} messages`)

+  // Include both assistant messages AND tool messages
+  // Tool results (grep, glob, bash output) come from role "tool"
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const lastAssistantMessage = messages
-    .filter((m: any) => m.info.role === "assistant")
-    .sort((a: any, b: any) => (b.info.time?.created || 0) - (a.info.time?.created || 0))[0]
+  const relevantMessages = messages.filter(
+    (m: any) => m.info?.role === "assistant" || m.info?.role === "tool"
+  )

-  if (!lastAssistantMessage) {
-    log(`[call_omo_agent] No assistant message found`)
+  if (relevantMessages.length === 0) {
+    log(`[call_omo_agent] No assistant or tool messages found`)
    log(`[call_omo_agent] All messages:`, JSON.stringify(messages, null, 2))
-    return `Error: No assistant response found\n\n<task_metadata>\nsession_id: ${sessionID}\n</task_metadata>`
+    return `Error: No assistant or tool response found\n\n<task_metadata>\nsession_id: ${sessionID}\n</task_metadata>`
  }

-  log(`[call_omo_agent] Found assistant message with ${lastAssistantMessage.parts.length} parts`)
+  log(`[call_omo_agent] Found ${relevantMessages.length} relevant messages`)

+  // Sort by time ascending (oldest first) to process messages in order
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const textParts = lastAssistantMessage.parts.filter((p: any) => p.type === "text")
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const responseText = textParts.map((p: any) => p.text).join("\n")
+  const sortedMessages = [...relevantMessages].sort((a: any, b: any) => {
+    const timeA = a.info?.time?.created ?? 0
+    const timeB = b.info?.time?.created ?? 0
+    return timeA - timeB
+  })
+
+  // Extract content from ALL messages, not just the last one
+  // Tool results may be in earlier messages while the final message is empty
+  const extractedContent: string[] = []
+
+  for (const message of sortedMessages) {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    for (const part of (message as any).parts ?? []) {
+      // Handle both "text" and "reasoning" parts (thinking models use "reasoning")
+      if ((part.type === "text" || part.type === "reasoning") && part.text) {
+        extractedContent.push(part.text)
+      } else if (part.type === "tool_result") {
+        // Tool results contain the actual output from tool calls
+        const toolResult = part as { content?: string | Array<{ type: string; text?: string }> }
+        if (typeof toolResult.content === "string" && toolResult.content) {
+          extractedContent.push(toolResult.content)
+        } else if (Array.isArray(toolResult.content)) {
+          // Handle array of content blocks
+          for (const block of toolResult.content) {
+            if ((block.type === "text" || block.type === "reasoning") && block.text) {
+              extractedContent.push(block.text)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  const responseText = extractedContent
+    .filter((text) => text.length > 0)
+    .join("\n\n")

  log(`[call_omo_agent] Got response, length: ${responseText.length}`)

--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -10,8 +10,11 @@ import {
  lsp_rename,
  lsp_code_actions,
  lsp_code_action_resolve,
+  lspManager,
 } from "./lsp"

+export { lspManager }
+
 import {
  ast_grep_search,
  ast_grep_replace,
--- a/src/tools/lsp/client.ts
+++ b/src/tools/lsp/client.ts
@@ -182,6 +182,26 @@ class LSPServerManager {
      this.cleanupInterval = null
    }
  }
+
+  async cleanupTempDirectoryClients(): Promise<void> {
+    const keysToRemove: string[] = []
+    for (const [key, managed] of this.clients.entries()) {
+      const isTempDir = key.startsWith("/tmp/") || key.startsWith("/var/folders/")
+      const isIdle = managed.refCount === 0
+      if (isTempDir && isIdle) {
+        keysToRemove.push(key)
+      }
+    }
+    for (const key of keysToRemove) {
+      const managed = this.clients.get(key)
+      if (managed) {
+        this.clients.delete(key)
+        try {
+          await managed.client.stop()
+        } catch {}
+      }
+    }
+  }
 }

 export const lspManager = LSPServerManager.getInstance()
--- a/src/tools/sisyphus-task/tools.test.ts
+++ b/src/tools/sisyphus-task/tools.test.ts
@@ -259,6 +259,7 @@ describe("sisyphus-task", () => {

  describe("resume with background parameter", () => {
  test("resume with background=false should wait for result and return content", async () => {
+    // Note: This test needs extended timeout because the implementation has MIN_STABILITY_TIME_MS = 5000
    // #given
    const { createSisyphusTask } = require("./tools")
    
@@ -319,7 +320,7 @@ describe("sisyphus-task", () => {
    // #then - should contain actual result, not just "Background task resumed"
    expect(result).toContain("This is the resumed task result")
    expect(result).not.toContain("Background task resumed")
-  })
+  }, { timeout: 10000 })

  test("resume with background=true should return immediately without waiting", async () => {
    // #given
--- a/src/tools/sisyphus-task/tools.ts
+++ b/src/tools/sisyphus-task/tools.ts
@@ -221,6 +221,33 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
          return `❌ Failed to send resume prompt: ${errorMessage}\n\nSession ID: ${args.resume}`
        }

+        // Wait for message stability after prompt completes
+        const POLL_INTERVAL_MS = 500
+        const MIN_STABILITY_TIME_MS = 5000
+        const STABILITY_POLLS_REQUIRED = 3
+        const pollStart = Date.now()
+        let lastMsgCount = 0
+        let stablePolls = 0
+
+        while (Date.now() - pollStart < 60000) {
+          await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
+          
+          const elapsed = Date.now() - pollStart
+          if (elapsed < MIN_STABILITY_TIME_MS) continue
+
+          const messagesCheck = await client.session.messages({ path: { id: args.resume } })
+          const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array<unknown>
+          const currentMsgCount = msgs.length
+
+          if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
+            stablePolls++
+            if (stablePolls >= STABILITY_POLLS_REQUIRED) break
+          } else {
+            stablePolls = 0
+            lastMsgCount = currentMsgCount
+          }
+        }
+
        const messagesResult = await client.session.messages({
          path: { id: args.resume },
        })
@@ -250,7 +277,8 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
          return `❌ No assistant response found.\n\nSession ID: ${args.resume}`
        }

-        const textParts = lastMessage?.parts?.filter((p) => p.type === "text") ?? []
+        // Extract text from both "text" and "reasoning" parts (thinking models use "reasoning")
+        const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
        const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")

        const duration = formatDuration(startTime)
@@ -390,13 +418,13 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          metadata: { sessionId: sessionID, category: args.category, sync: true },
        })

-        // Use promptAsync to avoid changing main session's active state
+        // Use fire-and-forget prompt() - awaiting causes JSON parse errors with thinking models
+        // Note: Don't pass model in body - use agent's configured model instead
        let promptError: Error | undefined
-        await client.session.promptAsync({
+        client.session.prompt({
          path: { id: sessionID },
          body: {
            agent: agentToUse,
-            model: categoryModel,
            system: systemContent,
            tools: {
              task: false,
@@ -408,6 +436,9 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          promptError = error instanceof Error ? error : new Error(String(error))
        })

+        // Small delay to let the prompt start
+        await new Promise(resolve => setTimeout(resolve, 100))
+
        if (promptError) {
          if (toastManager && taskId !== undefined) {
            toastManager.removeTask(taskId)
@@ -419,21 +450,63 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          return `❌ Failed to send prompt: ${errorMessage}\n\nSession ID: ${sessionID}`
        }

-        // Poll for session completion
+        // Poll for session completion with stability detection
+        // The session may show as "idle" before messages appear, so we also check message stability
        const POLL_INTERVAL_MS = 500
        const MAX_POLL_TIME_MS = 10 * 60 * 1000
+        const MIN_STABILITY_TIME_MS = 10000  // Minimum 10s before accepting completion
+        const STABILITY_POLLS_REQUIRED = 3
        const pollStart = Date.now()
+        let lastMsgCount = 0
+        let stablePolls = 0

        while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
          await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))

+          // Check for async errors that may have occurred after the initial 100ms delay
+          // TypeScript doesn't understand async mutation, so we cast to check
+          const asyncError = promptError as Error | undefined
+          if (asyncError) {
+            if (toastManager && taskId !== undefined) {
+              toastManager.removeTask(taskId)
+            }
+            const errorMessage = asyncError.message
+            if (errorMessage.includes("agent.name") || errorMessage.includes("undefined")) {
+              return `❌ Agent "${agentToUse}" not found. Make sure the agent is registered in your opencode.json or provided by a plugin.\n\nSession ID: ${sessionID}`
+            }
+            return `❌ Failed to send prompt: ${errorMessage}\n\nSession ID: ${sessionID}`
+          }
+
          const statusResult = await client.session.status()
          const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
          const sessionStatus = allStatuses[sessionID]

-          // Break if session is idle OR no longer in status (completed and removed)
-          if (!sessionStatus || sessionStatus.type === "idle") {
-            break
+          // If session is actively running, reset stability
+          if (sessionStatus && sessionStatus.type !== "idle") {
+            stablePolls = 0
+            lastMsgCount = 0
+            continue
+          }
+
+          // Session is idle or not in status - check message stability
+          const elapsed = Date.now() - pollStart
+          if (elapsed < MIN_STABILITY_TIME_MS) {
+            continue  // Don't accept completion too early
+          }
+
+          // Get current message count
+          const messagesCheck = await client.session.messages({ path: { id: sessionID } })
+          const msgs = ((messagesCheck as { data?: unknown }).data ?? messagesCheck) as Array<unknown>
+          const currentMsgCount = msgs.length
+
+          if (currentMsgCount > 0 && currentMsgCount === lastMsgCount) {
+            stablePolls++
+            if (stablePolls >= STABILITY_POLLS_REQUIRED) {
+              break  // Messages stable for 3 polls - task complete
+            }
+          } else {
+            stablePolls = 0
+            lastMsgCount = currentMsgCount
          }
        }

@@ -459,7 +532,8 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          return `❌ No assistant response found.\n\nSession ID: ${sessionID}`
        }
        
-        const textParts = lastMessage?.parts?.filter((p) => p.type === "text") ?? []
+        // Extract text from both "text" and "reasoning" parts (thinking models use "reasoning")
+        const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
        const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")

        const duration = formatDuration(startTime)
--- a/src/tools/skill/tools.ts
+++ b/src/tools/skill/tools.ts
@@ -194,4 +194,4 @@ export function createSkillTool(options: SkillLoadOptions = {}): ToolDefinition
  })
 }

-export const skill = createSkillTool()
+export const skill: ToolDefinition = createSkillTool()
--- a/src/tools/slashcommand/tools.ts
+++ b/src/tools/slashcommand/tools.ts
@@ -249,4 +249,4 @@ export function createSlashcommandTool(options: SlashcommandToolOptions = {}): T
 }

 // Default instance for backward compatibility (lazy loading)
-export const slashcommand = createSlashcommandTool()
+export const slashcommand: ToolDefinition = createSlashcommandTool()