release: v3.1.2

release: v3.1.1
Revert "feat(librarian): conditionally enable thinking based on model type"
2026-01-27 01:07:09 +00:00 · 2026-01-26 23:48:28 +00:00 · 2026-01-27 08:39:45 +09:00 · 2026-01-27 08:39:45 +09:00 · 2026-01-27 08:39:45 +09:00 · 2026-01-26 23:20:52 +00:00
157 changed files with 12263 additions and 3223 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,13 +4,32 @@ on:
  push:
    branches: [master, dev]
  pull_request:
-    branches: [dev]
+    branches: [master, dev]

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

 jobs:
+  # Block PRs targeting master branch
+  block-master-pr:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+    steps:
+      - name: Check PR target branch
+        run: |
+          if [ "${{ github.base_ref }}" = "master" ]; then
+            echo "::error::PRs to master branch are not allowed. Please target the 'dev' branch instead."
+            echo ""
+            echo "PULL REQUESTS TO MASTER ARE BLOCKED"
+            echo ""
+            echo "All PRs must target the 'dev' branch."
+            echo "Please close this PR and create a new one targeting 'dev'."
+            exit 1
+          else
+            echo "PR targets '${{ github.base_ref }}' branch - OK"
+          fi
+
  test:
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/publish-platform.yml
+++ b/.github/workflows/publish-platform.yml
@@ -29,7 +29,12 @@ permissions:

 jobs:
  publish-platform:
-    runs-on: ubuntu-latest
+    # Use windows-latest for Windows to avoid cross-compilation segfault (oven-sh/bun#18416)
+    # Fixes: #873, #844
+    runs-on: ${{ matrix.platform == 'windows-x64' && 'windows-latest' || 'ubuntu-latest' }}
+    defaults:
+      run:
+        shell: bash
    strategy:
      fail-fast: false
      max-parallel: 2
--- a/.opencode/command/publish.md
+++ b/.opencode/command/publish.md
@@ -35,6 +35,8 @@ You are the release manager for oh-my-opencode. Execute the FULL publish workflo
  { "id": "draft-release-notes", "content": "Draft enhanced release notes content", "status": "pending", "priority": "high" },
  { "id": "update-release-notes", "content": "Update GitHub release with enhanced notes", "status": "pending", "priority": "high" },
  { "id": "verify-npm", "content": "Verify npm package published successfully", "status": "pending", "priority": "high" },
+  { "id": "wait-platform-workflow", "content": "Wait for publish-platform workflow completion", "status": "pending", "priority": "high" },
+  { "id": "verify-platform-binaries", "content": "Verify all 7 platform binary packages published", "status": "pending", "priority": "high" },
  { "id": "final-confirmation", "content": "Final confirmation to user with links", "status": "pending", "priority": "low" }
 ]
 ```
@@ -219,12 +221,64 @@ Compare with expected version. If not matching after 2 minutes, warn user about

 ---

+## STEP 8.5: WAIT FOR PLATFORM WORKFLOW COMPLETION
+
+The main publish workflow triggers a separate `publish-platform` workflow for platform-specific binaries.
+
+1. Find the publish-platform workflow run triggered by the main workflow:
+```bash
+gh run list --workflow=publish-platform --limit=1 --json databaseId,status,conclusion --jq '.[0]'
+```
+
+2. Poll workflow status every 30 seconds until completion:
+```bash
+gh run view {platform_run_id} --json status,conclusion --jq '{status: .status, conclusion: .conclusion}'
+```
+
+**IMPORTANT: Use polling loop, NOT sleep commands.**
+
+If conclusion is `failure`, show error logs:
+```bash
+gh run view {platform_run_id} --log-failed
+```
+
+---
+
+## STEP 8.6: VERIFY PLATFORM BINARY PACKAGES
+
+After publish-platform workflow completes, verify all 7 platform packages are published:
+
+```bash
+PLATFORMS="darwin-arm64 darwin-x64 linux-x64 linux-arm64 linux-x64-musl linux-arm64-musl windows-x64"
+for PLATFORM in $PLATFORMS; do
+  npm view "oh-my-opencode-${PLATFORM}" version
+done
+```
+
+All 7 packages should show the same version as the main package (`${NEW_VERSION}`).
+
+**Expected packages:**
+| Package | Description |
+|---------|-------------|
+| `oh-my-opencode-darwin-arm64` | macOS Apple Silicon |
+| `oh-my-opencode-darwin-x64` | macOS Intel |
+| `oh-my-opencode-linux-x64` | Linux x64 (glibc) |
+| `oh-my-opencode-linux-arm64` | Linux ARM64 (glibc) |
+| `oh-my-opencode-linux-x64-musl` | Linux x64 (musl/Alpine) |
+| `oh-my-opencode-linux-arm64-musl` | Linux ARM64 (musl/Alpine) |
+| `oh-my-opencode-windows-x64` | Windows x64 |
+
+If any platform package version doesn't match, warn the user and suggest checking the publish-platform workflow logs.
+
+---
+
 ## STEP 9: FINAL CONFIRMATION

 Report success to user with:
 - New version number
 - GitHub release URL: https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v{version}
 - npm package URL: https://www.npmjs.com/package/oh-my-opencode
+- Platform packages status: List all 7 platform packages with their versions

 ---

@@ -234,6 +288,8 @@ Report success to user with:
 - **Release not found**: Wait and retry, may be propagation delay
 - **npm not updated**: npm can take 1-5 minutes to propagate, inform user
 - **Permission denied**: User may need to re-authenticate with `gh auth login`
+- **Platform workflow fails**: Show logs from publish-platform workflow, check which platform failed
+- **Platform package missing**: Some platforms may fail due to cross-compilation issues, suggest re-running publish-platform workflow manually

 ## LANGUAGE

--- a/.opencode/command/remove-deadcode.md
+++ b/.opencode/command/remove-deadcode.md
@@ -0,0 +1,342 @@
+---
+description: Remove unused code from this project with ultrawork mode, LSP-verified safety, atomic commits
+---
+
+<command-instruction>
+You are a dead code removal specialist. Execute the FULL dead code removal workflow using ultrawork mode.
+
+Your core weapon: **LSP FindReferences**. If a symbol has ZERO external references, it's dead. Remove it.
+
+## CRITICAL RULES
+
+1. **LSP is law.** Never guess. Always verify with `LspFindReferences` before removing ANYTHING.
+2. **One removal = one commit.** Every dead code removal gets its own atomic commit.
+3. **Test after every removal.** Run `bun test` after each. If it fails, REVERT and skip.
+4. **Leaf-first order.** Remove deepest unused symbols first, then work up the dependency chain. Removing a leaf may expose new dead code upstream.
+5. **Never remove entry points.** `src/index.ts`, `src/cli/index.ts`, test files, config files, and files in `packages/` are off-limits unless explicitly targeted.
+
+---
+
+## STEP 0: REGISTER TODO LIST (MANDATORY FIRST ACTION)
+
+```
+TodoWrite([
+  {"id": "scan", "content": "PHASE 1: Scan codebase for dead code candidates using LSP + explore agents", "status": "pending", "priority": "high"},
+  {"id": "verify", "content": "PHASE 2: Verify each candidate with LspFindReferences - zero false positives", "status": "pending", "priority": "high"},
+  {"id": "plan", "content": "PHASE 3: Plan removal order (leaf-first dependency order)", "status": "pending", "priority": "high"},
+  {"id": "remove", "content": "PHASE 4: Remove dead code one-by-one (remove -> test -> commit loop)", "status": "pending", "priority": "high"},
+  {"id": "final", "content": "PHASE 5: Final verification - full test suite + build + typecheck", "status": "pending", "priority": "high"}
+])
+```
+
+---
+
+## PHASE 1: SCAN FOR DEAD CODE CANDIDATES
+
+**Mark scan as in_progress.**
+
+### 1.1: Launch Parallel Explore Agents (ALL BACKGROUND)
+
+Fire ALL simultaneously:
+
+```
+// Agent 1: Find all exported symbols
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
+  List each with: file path, line number, symbol name, export type (named/default).
+  EXCLUDE: src/index.ts root exports, test files.
+  Return as structured list.")
+
+// Agent 2: Find potentially unused files
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find files in src/ that are NOT imported by any other file.
+  Check import/require statements across the entire codebase.
+  EXCLUDE: index.ts files, test files, entry points, config files, .md files.
+  Return list of potentially orphaned files.")
+
+// Agent 3: Find unused imports within files
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find unused imports across src/**/*.ts files.
+  Look for import statements where the imported symbol is never referenced in the file body.
+  Return: file path, line number, imported symbol name.")
+
+// Agent 4: Find functions/variables only used in their own declaration
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
+  to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
+```
+
+### 1.2: Direct AST-Grep Scans (WHILE AGENTS RUN)
+
+```typescript
+// Find unused imports pattern
+ast_grep_search(pattern="import { $NAME } from '$PATH'", lang="typescript", paths=["src/"])
+
+// Find empty export objects
+ast_grep_search(pattern="export {}", lang="typescript", paths=["src/"])
+```
+
+### 1.3: Collect All Results
+
+Collect background agent results. Compile into a master candidate list:
+
+```
+## DEAD CODE CANDIDATES
+
+| # | File | Line | Symbol | Type | Confidence |
+|---|------|------|--------|------|------------|
+| 1 | src/foo.ts | 42 | unusedFunc | function | HIGH |
+| 2 | src/bar.ts | 10 | OldType | type | MEDIUM |
+```
+
+**Mark scan as completed.**
+
+---
+
+## PHASE 2: VERIFY WITH LSP (ZERO FALSE POSITIVES)
+
+**Mark verify as in_progress.**
+
+For EVERY candidate from Phase 1, run this verification:
+
+### 2.1: The LSP Verification Protocol
+
+For each candidate symbol:
+
+```typescript
+// Step 1: Find the symbol's exact position
+LspDocumentSymbols(filePath)  // Get line/character of the symbol
+
+// Step 2: Find ALL references across the ENTIRE workspace
+LspFindReferences(filePath, line, character, includeDeclaration=false)
+// includeDeclaration=false → only counts USAGES, not the definition itself
+
+// Step 3: Evaluate
+// 0 references → CONFIRMED DEAD CODE
+// 1+ references → NOT dead, remove from candidate list
+```
+
+### 2.2: False Positive Guards
+
+**NEVER mark as dead code if:**
+- Symbol is in `src/index.ts` (package entry point)
+- Symbol is in any `index.ts` that re-exports (barrel file check: look if it's re-exported)
+- Symbol is referenced in test files (tests are valid consumers)
+- Symbol has `@public` or `@api` JSDoc tags
+- Symbol is in a file listed in `package.json` exports
+- Symbol is a hook factory (`createXXXHook`) registered in `src/index.ts`
+- Symbol is a tool factory (`createXXXTool`) registered in tool loading
+- Symbol is an agent definition registered in `agentSources`
+- File is a command template, skill definition, or MCP config
+
+### 2.3: Build Confirmed Dead Code List
+
+After verification, produce:
+
+```
+## CONFIRMED DEAD CODE (LSP-verified, 0 external references)
+
+| # | File | Line | Symbol | Type | Safe to Remove |
+|---|------|------|--------|------|----------------|
+| 1 | src/foo.ts | 42 | unusedFunc | function | YES |
+```
+
+**If ZERO confirmed dead code found: Report "No dead code found" and STOP.**
+
+**Mark verify as completed.**
+
+---
+
+## PHASE 3: PLAN REMOVAL ORDER
+
+**Mark plan as in_progress.**
+
+### 3.1: Dependency Analysis
+
+For each confirmed dead symbol:
+1. Check if removing it would expose other dead code
+2. Check if other dead symbols depend on this one
+3. Build removal dependency graph
+
+### 3.2: Order by Leaf-First
+
+```
+Removal Order:
+1. [Leaf symbols - no other dead code depends on them]
+2. [Intermediate symbols - depended on only by already-removed dead code]
+3. [Dead files - entire files with no live exports]
+```
+
+### 3.3: Register Granular Todos
+
+Create one todo per removal:
+
+```
+TodoWrite([
+  {"id": "remove-1", "content": "Remove unusedFunc from src/foo.ts:42", "status": "pending", "priority": "high"},
+  {"id": "remove-2", "content": "Remove OldType from src/bar.ts:10", "status": "pending", "priority": "high"},
+  // ... one per confirmed dead symbol
+])
+```
+
+**Mark plan as completed.**
+
+---
+
+## PHASE 4: ITERATIVE REMOVAL LOOP
+
+**Mark remove as in_progress.**
+
+For EACH dead code item, execute this exact loop:
+
+### 4.1: Pre-Removal Check
+
+```typescript
+// Re-verify it's still dead (previous removals may have changed things)
+LspFindReferences(filePath, line, character, includeDeclaration=false)
+// If references > 0 now → SKIP (previous removal exposed a new consumer)
+```
+
+### 4.2: Remove the Dead Code
+
+Use appropriate tool:
+
+**For unused imports:**
+```typescript
+Edit(filePath, oldString="import { deadSymbol } from '...';\n", newString="")
+// Or if it's one of many imports, remove just the symbol from the import list
+```
+
+**For unused functions/classes/types:**
+```typescript
+// Read the full symbol extent first
+Read(filePath, offset=startLine, limit=endLine-startLine+1)
+// Then remove it
+Edit(filePath, oldString="[full symbol text]", newString="")
+```
+
+**For dead files:**
+```bash
+# Only after confirming ZERO imports point to this file
+rm "path/to/dead-file.ts"
+```
+
+**After removal, also clean up:**
+- Remove any imports that were ONLY used by the removed code
+- Remove any now-empty import statements
+- Fix any trailing whitespace / double blank lines left behind
+
+### 4.3: Post-Removal Verification
+
+```typescript
+// 1. LSP diagnostics on changed file
+LspDiagnostics(filePath, severity="error")
+// Must be clean (or only pre-existing errors)
+
+// 2. Run tests
+bash("bun test")
+// Must pass
+
+// 3. Typecheck
+bash("bun run typecheck")
+// Must pass
+```
+
+### 4.4: Handle Failures
+
+If ANY verification fails:
+1. **REVERT** the change immediately (`git checkout -- [file]`)
+2. Mark this removal todo as `cancelled` with note: "Removal caused [error]. Skipped."
+3. Proceed to next item
+
+### 4.5: Commit
+
+```bash
+git add [changed-files]
+git commit -m "refactor: remove unused [symbolType] [symbolName] from [filePath]"
+```
+
+Mark this removal todo as `completed`.
+
+### 4.6: Re-scan After Removal
+
+After removing a symbol, check if its removal exposed NEW dead code:
+- Were there imports that only existed to serve the removed symbol?
+- Are there other symbols in the same file now unreferenced?
+
+If new dead code is found, add it to the removal queue.
+
+**Repeat 4.1-4.6 for every item. Mark remove as completed when done.**
+
+---
+
+## PHASE 5: FINAL VERIFICATION
+
+**Mark final as in_progress.**
+
+### 5.1: Full Test Suite
+```bash
+bun test
+```
+
+### 5.2: Full Typecheck
+```bash
+bun run typecheck
+```
+
+### 5.3: Full Build
+```bash
+bun run build
+```
+
+### 5.4: Summary Report
+
+```markdown
+## Dead Code Removal Complete
+
+### Removed
+| # | Symbol | File | Type | Commit |
+|---|--------|------|------|--------|
+| 1 | unusedFunc | src/foo.ts | function | abc1234 |
+
+### Skipped (caused failures)
+| # | Symbol | File | Reason |
+|---|--------|------|--------|
+| 1 | riskyFunc | src/bar.ts | Test failure: [details] |
+
+### Verification
+- Tests: PASSED (X/Y passing)
+- Typecheck: CLEAN
+- Build: SUCCESS
+- Total dead code removed: N symbols across M files
+- Total commits: K atomic commits
+```
+
+**Mark final as completed.**
+
+---
+
+## SCOPE CONTROL
+
+**If $ARGUMENTS is provided**, narrow the scan to the specified scope:
+- File path: Only scan that file
+- Directory: Only scan that directory
+- Symbol name: Only check that specific symbol
+- "all" or empty: Full project scan (default)
+
+## ABORT CONDITIONS
+
+**STOP and report to user if:**
+- 3 consecutive removals cause test failures
+- Build breaks and cannot be fixed by reverting
+- More than 50 candidates found (ask user to narrow scope)
+
+## LANGUAGE
+
+Use English for commit messages and technical output.
+
+</command-instruction>
+
+<user-request>
+$ARGUMENTS
+</user-request>
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,12 +1,24 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-01-23T02:09:00+09:00
-**Commit:** 0e18efc7
+**Generated:** 2026-01-26T14:50:00+09:00
+**Commit:** 9d66b807
 **Branch:** dev

+---
+
+## **IMPORTANT: PULL REQUEST TARGET BRANCH**
+
+> **ALL PULL REQUESTS MUST TARGET THE `dev` BRANCH.**
+>
+> **DO NOT CREATE PULL REQUESTS TARGETING `master` BRANCH.**
+>
+> PRs to `master` will be automatically rejected by CI.
+
+---
+
 ## OVERVIEW

-OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3, Grok, GLM-4.7). 31 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 10 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.
+OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3 Flash, Grok Code). 32 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 10 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.

 ## STRUCTURE

@@ -14,14 +26,14 @@ OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemi
 oh-my-opencode/
 ├── src/
 │   ├── agents/        # 10 AI agents - see src/agents/AGENTS.md
-│   ├── hooks/         # 31 lifecycle hooks - see src/hooks/AGENTS.md
+│   ├── hooks/         # 32 lifecycle hooks - see src/hooks/AGENTS.md
 │   ├── tools/         # 20+ tools - see src/tools/AGENTS.md
 │   ├── features/      # Background agents, Claude Code compat - see src/features/AGENTS.md
-│   ├── shared/        # 50 cross-cutting utilities - see src/shared/AGENTS.md
+│   ├── shared/        # 55 cross-cutting utilities - see src/shared/AGENTS.md
 │   ├── cli/           # CLI installer, doctor - see src/cli/AGENTS.md
 │   ├── mcp/           # Built-in MCPs - see src/mcp/AGENTS.md
 │   ├── config/        # Zod schema, TypeScript types
-│   └── index.ts       # Main plugin entry (590 lines)
+│   └── index.ts       # Main plugin entry (672 lines)
 ├── script/            # build-schema.ts, build-binaries.ts
 ├── packages/          # 7 platform-specific binaries
 └── dist/              # Build output (ESM + .d.ts)
@@ -36,9 +48,10 @@ oh-my-opencode/
 | Add tool | `src/tools/` | Dir with index/types/constants/tools.ts |
 | Add MCP | `src/mcp/` | Create config, add to index.ts |
 | Add skill | `src/features/builtin-skills/` | Create dir with SKILL.md |
+| Add command | `src/features/builtin-commands/` | Add template + register in commands.ts |
 | Config schema | `src/config/schema.ts` | Zod schema, run `bun run build:schema` |
-| Background agents | `src/features/background-agent/` | manager.ts (1335 lines) |
-| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (771 lines) |
+| Background agents | `src/features/background-agent/` | manager.ts (1377 lines) |
+| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (752 lines) |

 ## TDD (Test-Driven Development)

@@ -50,8 +63,8 @@ oh-my-opencode/
 **Rules:**
 - NEVER write implementation before test
 - NEVER delete failing tests - fix the code
- Test file: `*.test.ts` alongside source
- BDD comments: `#given`, `#when`, `#then`
+- Test file: `*.test.ts` alongside source (100 test files)
+- BDD comments: `//#given`, `//#when`, `//#then`

 ## CONVENTIONS

@@ -60,7 +73,7 @@ oh-my-opencode/
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern via index.ts
 - **Naming**: kebab-case dirs, `createXXXHook`/`createXXXTool` factories
- **Testing**: BDD comments, 90 test files
+- **Testing**: BDD comments, 100 test files
 - **Temperature**: 0.1 for code agents, max 0.3

 ## ANTI-PATTERNS
@@ -88,8 +101,8 @@ oh-my-opencode/
 | Sisyphus | anthropic/claude-opus-4-5 | Primary orchestrator |
 | Atlas | anthropic/claude-opus-4-5 | Master orchestrator |
 | oracle | openai/gpt-5.2 | Consultation, debugging |
-| librarian | opencode/glm-4.7-free | Docs, GitHub search |
-| explore | opencode/grok-code | Fast codebase grep |
+| librarian | opencode/big-pickle | Docs, GitHub search |
+| explore | opencode/gpt-5-nano | Fast codebase grep |
 | multimodal-looker | google/gemini-3-flash | PDF/image analysis |
 | Prometheus | anthropic/claude-opus-4-5 | Strategic planning |

@@ -99,7 +112,7 @@ oh-my-opencode/
 bun run typecheck      # Type check
 bun run build          # ESM + declarations + schema
 bun run rebuild        # Clean + Build
-bun test               # 90 test files
+bun test               # 100 test files
 ```

 ## DEPLOYMENT
@@ -113,12 +126,14 @@ bun test               # 90 test files

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/agents/atlas.ts` | 1383 | Orchestrator, 7-section delegation |
-| `src/features/background-agent/manager.ts` | 1335 | Task lifecycle, concurrency |
-| `src/features/builtin-skills/skills.ts` | 1203 | Skill definitions |
+| `src/features/builtin-skills/skills.ts` | 1729 | Skill definitions |
+| `src/features/background-agent/manager.ts` | 1377 | Task lifecycle, concurrency |
 | `src/agents/prometheus-prompt.ts` | 1196 | Planning agent |
-| `src/tools/delegate-task/tools.ts` | 1038 | Category-based delegation |
-| `src/hooks/atlas/index.ts` | 771 | Orchestrator hook |
+| `src/tools/delegate-task/tools.ts` | 1070 | Category-based delegation |
+| `src/hooks/atlas/index.ts` | 752 | Orchestrator hook |
+| `src/cli/config-manager.ts` | 664 | JSONC config parsing |
+| `src/index.ts` | 672 | Main plugin entry |
+| `src/features/builtin-commands/templates/refactor.ts` | 619 | Refactor command template |

 ## MCP ARCHITECTURE

--- a/README.ja.md
+++ b/README.ja.md
@@ -16,8 +16,8 @@

 > [!TIP]
 >
-> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **オーケストレーターがベータ版で利用可能になりました。`oh-my-opencode@3.0.0-beta.10`を使用してインストールしてください。**
+> [![Oh My OpenCode 3.0が正式リリースされました！](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0が正式リリースされました！`oh-my-opencode@latest`を使用してインストールしてください。**
 >
 > 一緒に歩みましょう！
 >
@@ -73,7 +73,9 @@
 [![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
 [![License](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)

-[English](README.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)

 </div>

--- a/README.ko.md
+++ b/README.ko.md
@@ -16,8 +16,8 @@
 >
 > [!TIP]
 >
-> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **오케스트레이터가 베타 버전으로 사용 가능합니다. 설치하려면 `oh-my-opencode@3.0.0-beta.10`을 사용하세요.**
+> [![Oh My OpenCode 3.0이 정식 출시되었습니다!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0이 정식 출시되었습니다! `oh-my-opencode@latest`를 사용하여 설치하세요.**
 >
 > 함께해요!
 >
@@ -73,10 +73,11 @@
 [![GitHub Stars](https://img.shields.io/github/stars/code-yeongyu/oh-my-opencode?color=ffcb47&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/stargazers)
 [![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
 [![License](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)
-[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)

 [English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)

+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)
+
 </div>

 <!-- </CENTERED SECTION FOR GITHUB DISPLAY> -->
--- a/README.md
+++ b/README.md
@@ -16,8 +16,8 @@

 > [!TIP]
 >
-> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **The Orchestrator is now available in beta. Use `oh-my-opencode@3.0.0-beta.10` to install it.**
+> [![Oh My OpenCode 3.0 is now stable!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0 is now stable! Use `oh-my-opencode@latest` to install it.**
 >
 > Be with us!
 >
--- a/README.zh-cn.md
+++ b/README.zh-cn.md
@@ -16,8 +16,8 @@

 > [!TIP]
 >
-> [![Orchestrator 现已进入测试阶段。](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **Orchestrator 现已进入测试阶段。使用 `oh-my-opencode@3.0.0-beta.10` 安装。**
+> [![Oh My OpenCode 3.0 正式发布！](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0 正式发布！使用 `oh-my-opencode@latest` 安装。**
 >
 > 加入我们！
 >
@@ -74,7 +74,9 @@
 [![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
 [![许可证](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)

-[English](README.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)

 </div>

--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -20,14 +20,15 @@
      "items": {
        "type": "string",
        "enum": [
-          "Sisyphus",
+          "sisyphus",
+          "prometheus",
          "oracle",
          "librarian",
          "explore",
          "multimodal-looker",
-          "Metis (Plan Consultant)",
-          "Momus (Plan Reviewer)",
-          "Atlas"
+          "metis",
+          "momus",
+          "atlas"
        ]
      }
    },
@@ -37,6 +38,7 @@
        "type": "string",
        "enum": [
          "playwright",
+          "agent-browser",
          "frontend-ui-ux",
          "git-master"
        ]
@@ -69,12 +71,14 @@
          "interactive-bash-session",
          "thinking-block-validator",
          "ralph-loop",
+          "category-skill-reminder",
          "compaction-context-injector",
          "claude-code-hooks",
          "auto-slash-command",
          "edit-error-recovery",
          "delegate-task-retry",
          "prometheus-md-only",
+          "sisyphus-junior-notepad",
          "start-work",
          "atlas"
        ]
@@ -216,6 +220,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
@@ -342,10 +391,55 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
-        "Sisyphus": {
+        "sisyphus": {
          "type": "object",
          "properties": {
            "model": {
@@ -468,10 +562,55 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
-        "Sisyphus-Junior": {
+        "sisyphus-junior": {
          "type": "object",
          "properties": {
            "model": {
@@ -594,6 +733,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
@@ -720,10 +904,55 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
-        "Prometheus (Planner)": {
+        "prometheus": {
          "type": "object",
          "properties": {
            "model": {
@@ -846,10 +1075,55 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
-        "Metis (Plan Consultant)": {
+        "metis": {
          "type": "object",
          "properties": {
            "model": {
@@ -972,10 +1246,55 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
-        "Momus (Plan Reviewer)": {
+        "momus": {
          "type": "object",
          "properties": {
            "model": {
@@ -1098,6 +1417,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
@@ -1224,6 +1588,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
@@ -1350,6 +1759,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
@@ -1476,6 +1930,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
@@ -1602,10 +2101,55 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        },
-        "Atlas": {
+        "atlas": {
          "type": "object",
          "properties": {
            "model": {
@@ -1728,6 +2272,51 @@
                  ]
                }
              }
+            },
+            "maxTokens": {
+              "type": "number"
+            },
+            "thinking": {
+              "type": "object",
+              "properties": {
+                "type": {
+                  "type": "string",
+                  "enum": [
+                    "enabled",
+                    "disabled"
+                  ]
+                },
+                "budgetTokens": {
+                  "type": "number"
+                }
+              },
+              "required": [
+                "type"
+              ]
+            },
+            "reasoningEffort": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high",
+                "xhigh"
+              ]
+            },
+            "textVerbosity": {
+              "type": "string",
+              "enum": [
+                "low",
+                "medium",
+                "high"
+              ]
+            },
+            "providerOptions": {
+              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
+              "additionalProperties": {}
            }
          }
        }
@@ -1786,7 +2375,8 @@
            "enum": [
              "low",
              "medium",
-              "high"
+              "high",
+              "xhigh"
            ]
          },
          "textVerbosity": {
@@ -2169,6 +2759,55 @@
          "type": "boolean"
        }
      }
+    },
+    "browser_automation_engine": {
+      "type": "object",
+      "properties": {
+        "provider": {
+          "default": "playwright",
+          "type": "string",
+          "enum": [
+            "playwright",
+            "agent-browser"
+          ]
+        }
+      }
+    },
+    "tmux": {
+      "type": "object",
+      "properties": {
+        "enabled": {
+          "default": false,
+          "type": "boolean"
+        },
+        "layout": {
+          "default": "main-vertical",
+          "type": "string",
+          "enum": [
+            "main-horizontal",
+            "main-vertical",
+            "tiled",
+            "even-horizontal",
+            "even-vertical"
+          ]
+        },
+        "main_pane_size": {
+          "default": 60,
+          "type": "number",
+          "minimum": 20,
+          "maximum": 80
+        },
+        "main_pane_min_width": {
+          "default": 120,
+          "type": "number",
+          "minimum": 40
+        },
+        "agent_pane_min_width": {
+          "default": 40,
+          "type": "number",
+          "minimum": 20
+        }
+      }
    }
  }
 }
--- a/bun.lock
+++ b/bun.lock
@@ -27,13 +27,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.0.0-beta.11",
-        "oh-my-opencode-darwin-x64": "3.0.0-beta.11",
-        "oh-my-opencode-linux-arm64": "3.0.0-beta.11",
-        "oh-my-opencode-linux-arm64-musl": "3.0.0-beta.11",
-        "oh-my-opencode-linux-x64": "3.0.0-beta.11",
-        "oh-my-opencode-linux-x64-musl": "3.0.0-beta.11",
-        "oh-my-opencode-windows-x64": "3.0.0-beta.11",
+        "oh-my-opencode-darwin-arm64": "3.1.0",
+        "oh-my-opencode-darwin-x64": "3.1.0",
+        "oh-my-opencode-linux-arm64": "3.1.0",
+        "oh-my-opencode-linux-arm64-musl": "3.1.0",
+        "oh-my-opencode-linux-x64": "3.1.0",
+        "oh-my-opencode-linux-x64-musl": "3.1.0",
+        "oh-my-opencode-windows-x64": "3.1.0",
      },
    },
  },
@@ -225,19 +225,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.0.0-beta.11", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7cFv2bbz9HTY7sshgVTu+IhvYf7CT0czDYqHEB+dYfEqFU6TaoSMimq6uHqcWegUUR1T7PNmc0dyjYVw69FeVA=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.1.0", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-8j7XI+n1bz7xIg35Zpjqp1AqoIoFWuVZdYyI9vTAZ0b6ta/mIlNOWPLAbFyEHfKelA9g3Xa+4sYnKPSxU5dQoA=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.0.0-beta.11", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-rGAbDdUySWITIdm2yiuNFB9lFYaSXT8LMtg97LTlOO5vZbI3M+obIS3QlIkBtAhgOTIPB7Ni+T0W44OmJpHoYA=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.1.0", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Kd/3KpnF07cw+qBAyLwA0y8tp3S0X8b8HWH55WGlVp6m4gvQ432kKgDum/jat1vqP/3J8hm4P/sly5ibY5gMqw=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.0.0-beta.11", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-F9dqwWwGAdqeSkE7Tre5DmHQXwDpU2Z8Jk0lwTJMLj+kMqYFDVPjLPo4iVUdwPpxpmm0pR84u/oonG/2+84/zw=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.1.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-qy/QohHGM6eSQjHVEgibsDauUvlAgYPw5xrQqa9cVLo1hL4KMIhb+i4wGAxCK2p84rG2bfC2m8+IfZUxhhwcTg=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.0.0-beta.11", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-H+zOtHkHd+TmdPj64M1A0zLOk7OHIK4C8yqfLFhfizOIBffT1yOhAs6EpK3EqPhfPLu54ADgcQcu8W96VP24UA=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.1.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-HIO7zj3M5QAYOfgvFM7Djeuen9kdZD4RA51wzXcXiPj1FPAuBNAW9N7lTEGYBSgObgwX+vXnC3HwLSF7nqkw8w=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.0.0-beta.11", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-IG+KODTJ8rs6cEJ2wN6Zpr6YtvCS5OpYP6jBdGJltmUpjQdMhdMsaY3ysZk+9Vxpx2KC3xj5KLHV1USg3uBTeg=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.1.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-zcKaibnEhvbReiTsqbg+dog/Z3pnBx4v6R3AR5nVhGBO27hRSAXgA/fviYyE5bWD591WB7Pqwduf0t854ilKjw=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.0.0-beta.11", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-irV+AuWrHqNm7VT7HO56qgymR0+vEfJbtB3vCq68kprH2V4NQmGp2MNKIYPnUCYL7NEK3H2NX+h06YFZJ/8ELQ=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.1.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-xmtHEyAhY93Djg5qEauvMqSF0x3tf8pzOGdKB6CuZmhCG69fZXk/dEwPrO0vKbOeGMV/T4K6HAg1+8Ue1N1ZaQ=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.0.0-beta.11", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-exZ/NEwGBlxyWszN7dvOfzbYX0cuhBZXftqAAFOlVP26elDHdo+AmSmLR/4cJyzpR9nCWz4xvl/RYF84bY6OEA=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.1.0", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-pDgHd0mGWWVsiO0fT8C7bi6CziOXU38g+k2dWlGm1YXCMzyrrWZZCF7oIp+EzJB02saSCF/oJ2f1/uj/VPeLMA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/docs/category-skill-guide.md
+++ b/docs/category-skill-guide.md
@@ -21,13 +21,13 @@ A Category is an agent configuration preset optimized for specific domains.

 | Category | Default Model | Use Cases |
 |----------|---------------|-----------|
-| `visual-engineering` | `google/gemini-3-pro-preview` | Frontend, UI/UX, design, styling, animation |
+| `visual-engineering` | `google/gemini-3-pro` | Frontend, UI/UX, design, styling, animation |
 | `ultrabrain` | `openai/gpt-5.2-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
-| `artistry` | `google/gemini-3-pro-preview` (max) | Highly creative/artistic tasks, novel ideas |
+| `artistry` | `google/gemini-3-pro` (max) | Highly creative/artistic tasks, novel ideas |
 | `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
 | `unspecified-low` | `anthropic/claude-sonnet-4-5` | Tasks that don't fit other categories, low effort required |
 | `unspecified-high` | `anthropic/claude-opus-4-5` (max) | Tasks that don't fit other categories, high effort required |
-| `writing` | `google/gemini-3-flash-preview` | Documentation, prose, technical writing |
+| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |

 ### Usage

@@ -70,12 +70,12 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to

 ### Usage

-Add desired skill names to the `skills` array.
+Add desired skill names to the `load_skills` array.

 ```typescript
 delegate_task(
  category="quick",
-  skills=["git-master"],
+  load_skills=["git-master"],
  prompt="Commit current changes. Follow commit message style."
 )
 ```
@@ -110,17 +110,17 @@ You can create powerful specialized agents by combining Categories and Skills.

 ### 🎨 The Designer (UI Implementation)
 - **Category**: `visual-engineering`
- **Skills**: `["frontend-ui-ux", "playwright"]`
+- **load_skills**: `["frontend-ui-ux", "playwright"]`
 - **Effect**: Implements aesthetic UI and verifies rendering results directly in browser.

 ### 🏗️ The Architect (Design Review)
 - **Category**: `ultrabrain`
- **Skills**: `[]` (pure reasoning)
+- **load_skills**: `[]` (pure reasoning)
 - **Effect**: Leverages GPT-5.2's logical reasoning for in-depth system architecture analysis.

 ### ⚡ The Maintainer (Quick Fixes)
 - **Category**: `quick`
- **Skills**: `["git-master"]`
+- **load_skills**: `["git-master"]`
 - **Effect**: Uses cost-effective models to quickly fix code and generate clean commits.

 ---
@@ -131,7 +131,7 @@ When delegating, **clear and specific** prompts are essential. Include these 7 e

 1. **TASK**: What needs to be done? (single objective)
 2. **EXPECTED OUTCOME**: What is the deliverable?
-3. **REQUIRED SKILLS**: Which skills should be used?
+3. **REQUIRED SKILLS**: Which skills should be loaded via `load_skills`?
 4. **REQUIRED TOOLS**: Which tools must be used? (whitelist)
 5. **MUST DO**: What must be done (constraints)
 6. **MUST NOT DO**: What must never be done
@@ -177,7 +177,7 @@ You can fine-tune categories in `oh-my-opencode.json`.
  "categories": {
    // 1. Define new custom category
    "korean-writer": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
      "temperature": 0.5,
      "prompt_append": "You are a Korean technical writer. Maintain a friendly and clear tone."
    },
--- a/docs/cli-guide.md
+++ b/docs/cli-guide.md
@@ -175,7 +175,7 @@ Configuration files support **JSONC (JSON with Comments)** format. You can use c
  /* Category customization */
  "categories": {
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
  },
 }
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -2,6 +2,39 @@

 Highly opinionated, but adjustable to taste.

+## Quick Start
+
+**Most users don't need to configure anything manually.** Run the interactive installer:
+
+```bash
+bunx oh-my-opencode install
+```
+
+It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optimal config automatically.
+
+**Want to customize?** Here's the common patterns:
+
+```jsonc
+{
+  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
+  
+  // Override specific agent models
+  "agents": {
+    "oracle": { "model": "openai/gpt-5.2" },           // Use GPT for debugging
+    "librarian": { "model": "zai-coding-plan/glm-4.7" }, // Cheap model for research
+    "explore": { "model": "opencode/gpt-5-nano" }        // Free model for grep
+  },
+  
+  // Override category models (used by delegate_task)
+  "categories": {
+    "quick": { "model": "opencode/gpt-5-nano" },         // Fast/cheap for trivial tasks
+    "visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
+  }
+}
+```
+
+**Find available models:** Run `opencode models` to see all models in your environment.
+
 ## Config File Locations

 Config file locations (priority order):
@@ -42,7 +75,7 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`
      "model": "openai/gpt-5.2"  // GPT for strategic reasoning
    },
    "explore": {
-      "model": "opencode/grok-code"  // Free & fast for exploration
+      "model": "opencode/gpt-5-nano"  // Free & fast for exploration
    },
  },
 }
@@ -50,7 +83,7 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`

 ## Google Auth

-**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin. It provides multi-account load balancing, more models (including Claude via Antigravity), and active maintenance. See [Installation > Google Gemini](../README.md#google-gemini-antigravity-oauth).
+**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin (`@latest`). It provides multi-account load balancing, variant-based thinking levels, dual quota system (Antigravity + Gemini CLI), and active maintenance. See [Installation > Google Gemini](docs/guide/installation.md#google-gemini-antigravity-oauth).

 ## Agents

@@ -126,8 +159,8 @@ Available agents: `oracle`, `librarian`, `explore`, `multimodal-looker`

 Oh My OpenCode includes built-in skills that provide additional capabilities:

- **playwright**: Browser automation with Playwright MCP. Use for web scraping, testing, screenshots, and browser interactions.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', skills=['git-master'], ...)` to save context.
+- **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
+- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.

 Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:

@@ -137,7 +170,231 @@ Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-openc
 }
 ```

-Available built-in skills: `playwright`, `git-master`
+Available built-in skills: `playwright`, `agent-browser`, `git-master`
+
+## Browser Automation
+
+Choose between two browser automation providers:
+
+| Provider | Interface | Features | Installation |
+|----------|-----------|----------|--------------|
+| **playwright** (default) | MCP tools | Playwright MCP server with structured tool calls | Auto-installed via npx |
+| **agent-browser** | Bash CLI | Vercel's CLI with session management, parallel browsers | Requires `bun add -g agent-browser` |
+
+**Switch providers** via `browser_automation_engine` in `oh-my-opencode.json`:
+
+```json
+{
+  "browser_automation_engine": {
+    "provider": "agent-browser"
+  }
+}
+```
+
+### Playwright (Default)
+
+Uses the official Playwright MCP server (`@playwright/mcp`). Browser automation happens through structured MCP tool calls.
+
+### agent-browser
+
+Uses [Vercel's agent-browser CLI](https://github.com/vercel-labs/agent-browser). Key advantages:
+- **Session management**: Run multiple isolated browser instances with `--session` flag
+- **Persistent profiles**: Keep browser state across restarts with `--profile`
+- **Snapshot-based workflow**: Get element refs via `snapshot -i`, interact with `@e1`, `@e2`, etc.
+- **CLI-first**: All commands via Bash - great for scripting
+
+**Installation required**:
+```bash
+bun add -g agent-browser
+agent-browser install  # Download Chromium
+```
+
+**Example workflow**:
+```bash
+agent-browser open https://example.com
+agent-browser snapshot -i  # Get interactive elements with refs
+agent-browser fill @e1 "user@example.com"
+agent-browser click @e2
+agent-browser screenshot result.png
+agent-browser close
+```
+
+## Tmux Integration
+
+Run background subagents in separate tmux panes for **visual multi-agent execution**. See your agents working in parallel, each in their own terminal pane.
+
+**Enable tmux integration** via `tmux` in `oh-my-opencode.json`:
+
+```json
+{
+  "tmux": {
+    "enabled": true,
+    "layout": "main-vertical",
+    "main_pane_size": 60,
+    "main_pane_min_width": 120,
+    "agent_pane_min_width": 40
+  }
+}
+```
+
+| Option | Default | Description |
+|--------|---------|-------------|
+| `enabled` | `false` | Enable tmux subagent pane spawning. Only works when running inside an existing tmux session. |
+| `layout` | `main-vertical` | Tmux layout for agent panes. See [Layout Options](#layout-options) below. |
+| `main_pane_size` | `60` | Main pane size as percentage (20-80). |
+| `main_pane_min_width` | `120` | Minimum width for main pane in columns. |
+| `agent_pane_min_width` | `40` | Minimum width for each agent pane in columns. |
+
+### Layout Options
+
+| Layout | Description |
+|--------|-------------|
+| `main-vertical` | Main pane left, agent panes stacked on right (default) |
+| `main-horizontal` | Main pane top, agent panes stacked bottom |
+| `tiled` | All panes in equal-sized grid |
+| `even-horizontal` | All panes in horizontal row |
+| `even-vertical` | All panes in vertical stack |
+
+### Requirements
+
+1. **Must run inside tmux**: The feature only activates when OpenCode is already running inside a tmux session
+2. **Tmux installed**: Requires tmux to be available in PATH
+3. **Server mode**: OpenCode must run with `--port` flag to enable subagent pane spawning
+
+### How It Works
+
+When `tmux.enabled` is `true` and you're inside a tmux session:
+- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes
+- Each pane shows the subagent's real-time output
+- Panes are automatically closed when the subagent completes
+- Layout is automatically adjusted based on your configuration
+
+### Running OpenCode with Tmux Subagent Support
+
+To enable tmux subagent panes, OpenCode must run in **server mode** with the `--port` flag. This starts an HTTP server that subagent panes connect to via `opencode attach`.
+
+**Basic setup**:
+```bash
+# Start tmux session
+tmux new -s dev
+
+# Run OpenCode with server mode (port 4096)
+opencode --port 4096
+
+# Now background agents will appear in separate panes
+```
+
+**Recommended: Shell Function**
+
+For convenience, create a shell function that automatically handles tmux sessions and port allocation. Here's an example for Fish shell:
+
+```fish
+# ~/.config/fish/config.fish
+function oc
+    set base_name (basename (pwd))
+    set path_hash (echo (pwd) | md5 | cut -c1-4)
+    set session_name "$base_name-$path_hash"
+    
+    # Find available port starting from 4096
+    function __oc_find_port
+        set port 4096
+        while test $port -lt 5096
+            if not lsof -i :$port >/dev/null 2>&1
+                echo $port
+                return 0
+            end
+            set port (math $port + 1)
+        end
+        echo 4096
+    end
+    
+    set oc_port (__oc_find_port)
+    set -x OPENCODE_PORT $oc_port
+    
+    if set -q TMUX
+        # Already inside tmux - just run with port
+        opencode --port $oc_port $argv
+    else
+        # Create tmux session and run opencode
+        set oc_cmd "OPENCODE_PORT=$oc_port opencode --port $oc_port $argv; exec fish"
+        if tmux has-session -t "$session_name" 2>/dev/null
+            tmux new-window -t "$session_name" -c (pwd) "$oc_cmd"
+            tmux attach-session -t "$session_name"
+        else
+            tmux new-session -s "$session_name" -c (pwd) "$oc_cmd"
+        end
+    end
+    
+    functions -e __oc_find_port
+end
+```
+
+**Bash/Zsh equivalent**:
+
+```bash
+# ~/.bashrc or ~/.zshrc
+oc() {
+    local base_name=$(basename "$PWD")
+    local path_hash=$(echo "$PWD" | md5sum | cut -c1-4)
+    local session_name="${base_name}-${path_hash}"
+    
+    # Find available port
+    local port=4096
+    while [ $port -lt 5096 ]; do
+        if ! lsof -i :$port >/dev/null 2>&1; then
+            break
+        fi
+        port=$((port + 1))
+    done
+    
+    export OPENCODE_PORT=$port
+    
+    if [ -n "$TMUX" ]; then
+        opencode --port $port "$@"
+    else
+        local oc_cmd="OPENCODE_PORT=$port opencode --port $port $*; exec $SHELL"
+        if tmux has-session -t "$session_name" 2>/dev/null; then
+            tmux new-window -t "$session_name" -c "$PWD" "$oc_cmd"
+            tmux attach-session -t "$session_name"
+        else
+            tmux new-session -s "$session_name" -c "$PWD" "$oc_cmd"
+        fi
+    fi
+}
+```
+
+**How subagent panes work**:
+
+1. Main OpenCode starts HTTP server on specified port (e.g., `http://localhost:4096`)
+2. When a background agent spawns, Oh My OpenCode creates a new tmux pane
+3. The pane runs: `opencode attach http://localhost:4096 --session <session-id>`
+4. Each subagent pane shows real-time streaming output
+5. Panes are automatically closed when the subagent completes
+
+**Environment variables**:
+
+| Variable | Description |
+|----------|-------------|
+| `OPENCODE_PORT` | Default port for the HTTP server (used if `--port` not specified) |
+
+### Server Mode Reference
+
+OpenCode's server mode exposes an HTTP API for programmatic interaction:
+
+```bash
+# Standalone server (no TUI)
+opencode serve --port 4096
+
+# TUI with server (recommended for tmux integration)
+opencode --port 4096
+```
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--port` | `4096` | Port for HTTP server |
+| `--hostname` | `127.0.0.1` | Hostname to listen on |
+
+For more details, see the [OpenCode Server documentation](https://opencode.ai/docs/server/).

 ## Git Master

@@ -272,7 +529,7 @@ Categories enable domain-specific task delegation via the `delegate_task` tool.

 | Category         | Model                         | Description                                                                  |
 | ---------------- | ----------------------------- | ---------------------------------------------------------------------------- |
-| `visual`         | `google/gemini-3-pro-preview` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7).           |
+| `visual`         | `google/gemini-3-pro` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7).           |
 | `business-logic` | `openai/gpt-5.2`              | Backend logic, architecture, strategic reasoning. Low creativity (temp 0.1). |

 **Usage:**
@@ -299,7 +556,7 @@ Add custom categories in `oh-my-opencode.json`:
      "prompt_append": "Focus on data analysis, ML pipelines, and statistical methods."
    },
    "visual": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "prompt_append": "Use shadcn/ui components and Tailwind CSS."
    }
  }
@@ -370,9 +627,9 @@ Each agent has a defined provider priority chain. The system tries providers in
 |-------|-------------------|-------------------------|
 | **Sisyphus** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **oracle** | `gpt-5.2` | openai → anthropic → google → github-copilot → opencode |
-| **librarian** | `glm-4.7-free` | opencode → github-copilot → anthropic |
-| **explore** | `grok-code` | opencode → anthropic → github-copilot |
-| **multimodal-looker** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
+| **librarian** | `big-pickle` | opencode → github-copilot → anthropic |
+| **explore** | `gpt-5-nano` | anthropic → opencode |
+| **multimodal-looker** | `gemini-3-flash` | google → openai → zai-coding-plan → anthropic → opencode |
 | **Prometheus (Planner)** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **Metis (Plan Consultant)** | `claude-sonnet-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **Momus (Plan Reviewer)** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
@@ -384,13 +641,13 @@ Categories follow the same resolution logic:

 | Category | Model (no prefix) | Provider Priority Chain |
 |----------|-------------------|-------------------------|
-| **visual-engineering** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
+| **visual-engineering** | `gemini-3-pro` | google → openai → anthropic → github-copilot → opencode |
 | **ultrabrain** | `gpt-5.2-codex` | openai → anthropic → google → github-copilot → opencode |
-| **artistry** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
+| **artistry** | `gemini-3-pro` | google → openai → anthropic → github-copilot → opencode |
 | **quick** | `claude-haiku-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **unspecified-low** | `claude-sonnet-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **unspecified-high** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
-| **writing** | `gemini-3-flash-preview` | google → openai → anthropic → github-copilot → opencode |
+| **writing** | `gemini-3-flash` | google → openai → anthropic → github-copilot → opencode |

 ### Checking Your Configuration

--- a/docs/features.md
+++ b/docs/features.md
@@ -12,8 +12,8 @@ Oh-My-OpenCode provides 10 specialized AI agents. Each has distinct expertise, o
 |-------|-------|---------|
 | **Sisyphus** | `anthropic/claude-opus-4-5` | **The default orchestrator.** Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Todo-driven workflow with extended thinking (32k budget). |
 | **oracle** | `openai/gpt-5.2` | Architecture decisions, code review, debugging. Read-only consultation - stellar logical reasoning and deep analysis. Inspired by AmpCode. |
-| **librarian** | `opencode/glm-4.7-free` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Inspired by AmpCode. |
-| **explore** | `opencode/grok-code` | Fast codebase exploration and contextual grep. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code. |
+| **librarian** | `opencode/big-pickle` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Inspired by AmpCode. |
+| **explore** | `opencode/gpt-5-nano` | Fast codebase exploration and contextual grep. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code. |
 | **multimodal-looker** | `google/gemini-3-flash` | Visual content specialist. Analyzes PDFs, images, diagrams to extract information. Saves tokens by having another agent process media. |

 ### Planning Agents
@@ -62,6 +62,27 @@ delegate_task(agent="explore", background=true, prompt="Find auth implementation
 background_output(task_id="bg_abc123")
 ```

+#### Visual Multi-Agent with Tmux
+
+Enable `tmux.enabled` to see background agents in separate tmux panes:
+
+```json
+{
+  "tmux": {
+    "enabled": true,
+    "layout": "main-vertical"
+  }
+}
+```
+
+When running inside tmux:
+- Background agents spawn in new panes
+- Watch multiple agents work in real-time
+- Each pane shows agent output live
+- Auto-cleanup when agents complete
+
+See [Tmux Integration](configurations.md#tmux-integration) for full configuration options.
+
 Customize agent models, prompts, and permissions in `oh-my-opencode.json`. See [Configuration](configurations.md#agents).

 ---
@@ -78,11 +99,15 @@ Skills provide specialized workflows with embedded MCP servers and detailed inst
 | **frontend-ui-ux** | UI/UX tasks, styling | Designer-turned-developer persona. Crafts stunning UI/UX even without design mockups. Emphasizes bold aesthetic direction, distinctive typography, cohesive color palettes. |
 | **git-master** | commit, rebase, squash, blame | MUST USE for ANY git operations. Atomic commits with automatic splitting, rebase/squash workflows, history search (blame, bisect, log -S). |

-### Skill: playwright
+### Skill: Browser Automation (playwright / agent-browser)

 **Trigger**: Any browser-related request

-Provides browser automation via Playwright MCP server:
+Oh-My-OpenCode provides two browser automation providers, configurable via `browser_automation_engine.provider`:
+
+#### Option 1: Playwright MCP (Default)
+
+The default provider uses Playwright MCP server:

 ```yaml
 mcp:
@@ -91,18 +116,41 @@ mcp:
    args: ["@playwright/mcp@latest"]
 ```

-**Capabilities**:
+**Usage**:
+```
+/playwright Navigate to example.com and take a screenshot
+```
+
+#### Option 2: Agent Browser CLI (Vercel)
+
+Alternative provider using [Vercel's agent-browser CLI](https://github.com/vercel-labs/agent-browser):
+
+```json
+{
+  "browser_automation_engine": {
+    "provider": "agent-browser"
+  }
+}
+```
+
+**Requires installation**:
+```bash
+bun add -g agent-browser
+```
+
+**Usage**:
+```
+Use agent-browser to navigate to example.com and extract the main heading
+```
+
+#### Capabilities (Both Providers)
+
 - Navigate and interact with web pages
 - Take screenshots and PDFs
 - Fill forms and click elements
 - Wait for network requests
 - Scrape content

-**Usage**:
-```
-/playwright Navigate to example.com and take a screenshot
-```
-
 ### Skill: frontend-ui-ux

 **Trigger**: UI design tasks, visual changes
@@ -418,6 +466,29 @@ Disable specific hooks in config:
 | **session_search** | Full-text search across session messages |
 | **session_info** | Get session metadata and statistics |

+### Interactive Terminal Tools
+
+| Tool | Description |
+|------|-------------|
+| **interactive_bash** | Tmux-based terminal for TUI apps (vim, htop, pudb). Pass tmux subcommands directly without prefix. |
+
+**Usage Examples**:
+```bash
+# Create a new session
+interactive_bash(tmux_command="new-session -d -s dev-app")
+
+# Send keystrokes to a session
+interactive_bash(tmux_command="send-keys -t dev-app 'vim main.py' Enter")
+
+# Capture pane output
+interactive_bash(tmux_command="capture-pane -p -t dev-app")
+```
+
+**Key Points**:
+- Commands are tmux subcommands (no `tmux` prefix)
+- Use for interactive apps that need persistent sessions
+- One-shot commands should use regular `Bash` tool with `&`
+
 ---

 ## MCPs: Built-in Servers
--- a/docs/guide/installation.md
+++ b/docs/guide/installation.md
@@ -132,7 +132,7 @@ First, add the opencode-antigravity-auth plugin:
 {
  "plugin": [
    "oh-my-opencode",
-    "opencode-antigravity-auth@1.2.8"
+    "opencode-antigravity-auth@latest"
  ]
 }
 ```
@@ -140,7 +140,7 @@ First, add the opencode-antigravity-auth plugin:
 ##### Model Configuration

 You'll also need full model settings in `opencode.json`.
-Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy provider/models config from the README, and merge carefully to avoid breaking the user's existing setup.
+Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy the full model configuration from the README, and merge carefully to avoid breaking the user's existing setup. The plugin now uses a **variant system** — models like `antigravity-gemini-3-pro` support `low`/`high` variants instead of separate `-low`/`-high` model entries.

 ##### oh-my-opencode Agent Model Override

@@ -154,7 +154,17 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
 }
 ```

-**Available model names**: `google/antigravity-gemini-3-pro-high`, `google/antigravity-gemini-3-pro-low`, `google/antigravity-gemini-3-flash`, `google/antigravity-claude-sonnet-4-5`, `google/antigravity-claude-sonnet-4-5-thinking-low`, `google/antigravity-claude-sonnet-4-5-thinking-medium`, `google/antigravity-claude-sonnet-4-5-thinking-high`, `google/antigravity-claude-opus-4-5-thinking-low`, `google/antigravity-claude-opus-4-5-thinking-medium`, `google/antigravity-claude-opus-4-5-thinking-high`, `google/gemini-3-pro-preview`, `google/gemini-3-flash-preview`, `google/gemini-2.5-pro`, `google/gemini-2.5-flash`
+**Available models (Antigravity quota)**:
+- `google/antigravity-gemini-3-pro` — variants: `low`, `high`
+- `google/antigravity-gemini-3-flash` — variants: `minimal`, `low`, `medium`, `high`
+- `google/antigravity-claude-sonnet-4-5` — no variants
+- `google/antigravity-claude-sonnet-4-5-thinking` — variants: `low`, `max`
+- `google/antigravity-claude-opus-4-5-thinking` — variants: `low`, `max`
+
+**Available models (Gemini CLI quota)**:
+- `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3-pro-preview`
+
+> **Note**: Legacy tier-suffixed names like `google/antigravity-gemini-3-pro-high` still work but variants are recommended. Use `--variant=high` with the base model name instead.

 Then authenticate:

@@ -183,7 +193,7 @@ When GitHub Copilot is the best available provider, oh-my-opencode uses these mo
 | ------------- | -------------------------------- |
 | **Sisyphus**  | `github-copilot/claude-opus-4.5` |
 | **Oracle**    | `github-copilot/gpt-5.2`         |
-| **Explore**   | `github-copilot/grok-code-fast-1`|
+| **Explore**   | `opencode/gpt-5-nano`              |
 | **Librarian** | `zai-coding-plan/glm-4.7` (if Z.ai available) or fallback |

 GitHub Copilot acts as a proxy provider, routing requests to underlying models based on your subscription.
@@ -203,7 +213,7 @@ If Z.ai is the only provider available, all agents will use GLM models:

 #### OpenCode Zen

-OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/grok-code`, and `opencode/glm-4.7-free`.
+OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/gpt-5-nano`, and `opencode/big-pickle`.

 When OpenCode Zen is the best available provider (no native or Copilot), these models are used:

@@ -211,8 +221,8 @@ When OpenCode Zen is the best available provider (no native or Copilot), these m
 | ------------- | -------------------------------- |
 | **Sisyphus**  | `opencode/claude-opus-4-5`       |
 | **Oracle**    | `opencode/gpt-5.2`               |
-| **Explore**   | `opencode/grok-code`             |
-| **Librarian** | `opencode/glm-4.7-free`          |
+| **Explore**   | `opencode/gpt-5-nano`             |
+| **Librarian** | `opencode/big-pickle`          |

 ##### Setup

--- a/docs/guide/overview.md
+++ b/docs/guide/overview.md
@@ -54,7 +54,7 @@ For complex or critical tasks, press **Tab** to switch to Prometheus (Planner) m

 2. **Plan generation** - Based on the interview, Prometheus generates a detailed work plan with tasks, acceptance criteria, and guardrails. Optionally reviewed by Momus (plan reviewer) for high-accuracy validation.

-3. **Run `/start-work`** - The Orchestrator-Sisyphus takes over:
+3. **Run `/start-work`** - The Atlas takes over:
   - Distributes tasks to specialized sub-agents
   - Verifies each task completion independently
   - Accumulates learnings across tasks
@@ -84,7 +84,78 @@ The orchestrator is designed to execute work plans created by Prometheus. Using
 4. Run /start-work → Orchestrator executes
 ```

-**Prometheus and Orchestrator-Sisyphus are a pair. Always use them together.**
+**Prometheus and Atlas are a pair. Always use them together.**
+
+---
+
+## Model Configuration
+
+Oh My OpenCode automatically configures models based on your available providers. You don't need to manually specify every model.
+
+### How Models Are Determined
+
+**1. At Installation Time (Interactive Installer)**
+
+When you run `bunx oh-my-opencode install`, the installer asks which providers you have:
+- Claude Pro/Max subscription?
+- OpenAI/ChatGPT Plus?
+- Google Gemini?
+- GitHub Copilot?
+- OpenCode Zen?
+- Z.ai Coding Plan?
+
+Based on your answers, it generates `~/.config/opencode/oh-my-opencode.json` with optimal model assignments for each agent and category.
+
+**2. At Runtime (Fallback Chain)**
+
+Each agent has a **provider priority chain**. The system tries providers in order until it finds an available model:
+
+```
+Example: multimodal-looker
+google → openai → zai-coding-plan → anthropic → opencode
+   ↓        ↓           ↓              ↓           ↓
+gemini   gpt-5.2     glm-4.6v       haiku     gpt-5-nano
+```
+
+If you have Gemini, it uses `google/gemini-3-flash`. No Gemini but have Claude? Uses `anthropic/claude-haiku-4-5`. And so on.
+
+### Example Configuration
+
+Here's a real-world config for a user with **Claude, OpenAI, Gemini, and Z.ai** all available:
+
+```jsonc
+{
+  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
+  "agents": {
+    // Override specific agents only - rest use fallback chain
+    "atlas": { "model": "anthropic/claude-sonnet-4-5", "variant": "max" },
+    "librarian": { "model": "zai-coding-plan/glm-4.7" },
+    "explore": { "model": "opencode/gpt-5-nano" },
+    "multimodal-looker": { "model": "zai-coding-plan/glm-4.6v" }
+  },
+  "categories": {
+    // Override categories for cost optimization
+    "quick": { "model": "opencode/gpt-5-nano" },
+    "unspecified-low": { "model": "zai-coding-plan/glm-4.7" }
+  },
+  "experimental": {
+    "aggressive_truncation": true
+  }
+}
+```
+
+**Key points:**
+- You only need to override what you want to change
+- Unspecified agents/categories use the automatic fallback chain
+- Mix providers freely (Claude for main work, Z.ai for cheap tasks, etc.)
+
+### Finding Available Models
+
+Run `opencode models` to see all available models in your environment. Model names follow the format `provider/model-name`.
+
+### Learn More
+
+For detailed configuration options including per-agent settings, category customization, and more, see the [Configuration Guide](../configurations.md).

 ---

--- a/docs/guide/understanding-orchestration-system.md
+++ b/docs/guide/understanding-orchestration-system.md
@@ -1,6 +1,6 @@
 # Understanding the Orchestration System

-Oh My OpenCode's orchestration system transforms a simple AI agent into a coordinated development team. This document explains how the Prometheus → Orchestrator → Junior workflow creates high-quality, reliable code output.
+Oh My OpenCode's orchestration system transforms a simple AI agent into a coordinated development team. This document explains how the Prometheus → Atlas → Junior workflow creates high-quality, reliable code output.

 ---

@@ -29,7 +29,7 @@ flowchart TB
    end
    
    subgraph Execution["Execution Layer (Orchestrator)"]
-        Orchestrator["⚡ Orchestrator-Sisyphus<br/>(Conductor)<br/>Claude Opus 4.5"]
+        Orchestrator["⚡ Atlas<br/>(Conductor)<br/>Claude Opus 4.5"]
    end
    
    subgraph Workers["Worker Layer (Specialized Agents)"]
@@ -152,7 +152,7 @@ If REJECTED, Prometheus fixes issues and resubmits. **No maximum retry limit.**

 ---

-## Layer 2: Execution (Orchestrator-Sisyphus)
+## Layer 2: Execution (Atlas)

 ### The Conductor Mindset

@@ -160,7 +160,7 @@ The Orchestrator is like an orchestra conductor: **it doesn't play instruments,

 ```mermaid
 flowchart LR
-    subgraph Orchestrator["Orchestrator-Sisyphus"]
+    subgraph Orchestrator["Atlas"]
        Read["1. Read Plan"]
        Analyze["2. Analyze Tasks"]
        Wisdom["3. Accumulate Wisdom"]
@@ -326,13 +326,13 @@ Skills prepend specialized instructions to subagent prompts:
 // Category + Skill combination
 delegate_task(
  category="visual-engineering", 
-  skills=["frontend-ui-ux"],  // Adds UI/UX expertise
+  load_skills=["frontend-ui-ux"],  // Adds UI/UX expertise
  prompt="..."
 )

 delegate_task(
  category="general",
-  skills=["playwright"],  // Adds browser automation expertise
+  load_skills=["playwright"],  // Adds browser automation expertise
  prompt="..."
 )
 ```
@@ -341,8 +341,8 @@ delegate_task(

 | Before | After |
 |--------|-------|
-| Hardcoded: `frontend-ui-ux-engineer` (Gemini 3 Pro) | `category="visual-engineering" + skills=["frontend-ui-ux"]` |
-| One-size-fits-all | `category="visual-engineering" + skills=["unity-master"]` |
+| Hardcoded: `frontend-ui-ux-engineer` (Gemini 3 Pro) | `category="visual-engineering" + load_skills=["frontend-ui-ux"]` |
+| One-size-fits-all | `category="visual-engineering" + load_skills=["unity-master"]` |
 | Model bias | Category-based: model abstraction eliminates bias |

 ---
@@ -352,7 +352,7 @@ delegate_task(
 ```mermaid
 sequenceDiagram
    participant User
-    participant Orchestrator as Orchestrator-Sisyphus
+    participant Orchestrator as Atlas
    participant Junior as Sisyphus-Junior
    participant Notepad as .sisyphus/notepads/
    
@@ -365,7 +365,7 @@ sequenceDiagram
        
        Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
        
-        Orchestrator->>Junior: delegate_task(category, skills, prompt)
+        Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
        
        Junior->>Junior: Create todos, execute
        Junior->>Junior: Verify (lsp_diagnostics, tests)
@@ -392,7 +392,7 @@ sequenceDiagram
 ### 1. Separation of Concerns

 - **Planning** (Prometheus): High reasoning, interview, strategic thinking
- **Orchestration** (Sisyphus): Coordination, verification, wisdom accumulation
+- **Orchestration** (Atlas): Coordination, verification, wisdom accumulation
 - **Execution** (Junior): Focused implementation, no distractions

 ### 2. Explicit Over Implicit
--- a/docs/orchestration-guide.md
+++ b/docs/orchestration-guide.md
@@ -6,9 +6,10 @@
 |------------|----------|-------------|
 | **Simple** | Just prompt | Simple tasks, quick fixes, single-file changes |
 | **Complex + Lazy** | Just type `ulw` or `ultrawork` | Complex tasks where explaining context is tedious. Agent figures it out. |
-| **Complex + Precise** | `@plan` → `/start-work` | Precise, multi-step work requiring true orchestration. Prometheus plans, Sisyphus executes. |
+| **Complex + Precise** | `@plan` → `/start-work` | Precise, multi-step work requiring true orchestration. Prometheus plans, Atlas executes. |

 **Decision Flow:**
+
 ```
 Is it a quick fix or simple task?
  └─ YES → Just prompt normally
@@ -30,7 +31,7 @@ Traditional AI agents often mix planning and execution, leading to context pollu
 Oh-My-OpenCode solves this by clearly separating two roles:

 1. **Prometheus (Planner)**: A pure strategist who never writes code. Establishes perfect plans through interviews and analysis.
-2. **Sisyphus (Executor)**: An orchestrator who executes plans. Delegates work to specialized agents and never stops until completion.
+2. **Atlas (Executor)**: An orchestrator who executes plans. Delegates work to specialized agents and never stops until completion.

 ---

@@ -52,10 +53,10 @@ flowchart TD
    StartWork --> BoulderState[boulder.json]
    
    subgraph Execution Phase
-        BoulderState --> Sisyphus[Sisyphus<br>Orchestrator]
-        Sisyphus --> Oracle[Oracle]
-        Sisyphus --> Frontend[Frontend<br>Engineer]
-        Sisyphus --> Explore[Explore]
+        BoulderState --> Atlas[Atlas<br>Orchestrator]
+        Atlas --> Oracle[Oracle]
+        Atlas --> Frontend[Frontend<br>Engineer]
+        Atlas --> Explore[Explore]
    end
 ```

@@ -64,22 +65,26 @@ flowchart TD
 ## 3. Key Components

 ### 🔮 Prometheus (The Planner)
+
 - **Model**: `anthropic/claude-opus-4-5`
 - **Role**: Strategic planning, requirements interviews, work plan creation
 - **Constraint**: **READ-ONLY**. Can only create/modify markdown files within `.sisyphus/` directory.
 - **Characteristic**: Never writes code directly, focuses solely on "how to do it".

-### 🦉 Metis (The Consultant)
+### 🦉 Metis (The Plan Consultant)
+
 - **Role**: Pre-analysis and gap detection
 - **Function**: Identifies hidden user intent, prevents AI over-engineering, eliminates ambiguity.
 - **Workflow**: Metis consultation is mandatory before plan creation.

-### ⚖️ Momus (The Reviewer)
+### ⚖️ Momus (The Plan Reviewer)
+
 - **Role**: High-precision plan validation (High Accuracy Mode)
 - **Function**: Rejects and demands revisions until the plan is perfect.
 - **Trigger**: Activated when user requests "high accuracy".

-### 🪨 Sisyphus (The Orchestrator)
+### ⚡ Atlas (The Plan Executor)
+
 - **Model**: `anthropic/claude-opus-4-5` (Extended Thinking 32k)
 - **Role**: Execution and delegation
 - **Characteristic**: Doesn't do everything directly, actively delegates to specialized agents (Frontend, Librarian, etc.).
@@ -89,6 +94,7 @@ flowchart TD
 ## 4. Workflow

 ### Phase 1: Interview and Planning (Interview Mode)
+
 Prometheus starts in **interview mode** by default. Instead of immediately creating a plan, it collects sufficient context.

 1. **Intent Identification**: Classifies whether the user's request is Refactoring or New Feature.
@@ -96,6 +102,7 @@ Prometheus starts in **interview mode** by default. Instead of immediately creat
 3. **Draft Creation**: Continuously records discussion content in `.sisyphus/drafts/`.

 ### Phase 2: Plan Generation
+
 When the user requests "Make it a plan", plan generation begins.

 1. **Metis Consultation**: Confirms any missed requirements or risk factors.
@@ -103,10 +110,11 @@ When the user requests "Make it a plan", plan generation begins.
 3. **Handoff**: Once plan creation is complete, guides user to use `/start-work` command.

 ### Phase 3: Execution
+
 When the user enters `/start-work`, the execution phase begins.

 1. **State Management**: Creates `boulder.json` file to track current plan and session ID.
-2. **Task Execution**: Sisyphus reads the plan and processes TODOs one by one.
+2. **Task Execution**: Atlas reads the plan and processes TODOs one by one.
 3. **Delegation**: UI work is delegated to Frontend agent, complex logic to Oracle.
 4. **Continuity**: Even if the session is interrupted, work continues in the next session through `boulder.json`.

@@ -115,11 +123,15 @@ When the user enters `/start-work`, the execution phase begins.
 ## 5. Commands and Usage

 ### `@plan [request]`
+
 Invokes Prometheus to start a planning session.
+
 - Example: `@plan "I want to refactor the authentication system to NextAuth"`

 ### `/start-work`
+
 Executes the generated plan.
+
 - Function: Finds plan in `.sisyphus/plans/` and enters execution mode.
 - If there's interrupted work, automatically resumes from where it left off.

@@ -132,7 +144,7 @@ You can control related features in `oh-my-opencode.json`.
 ```jsonc
 {
  "sisyphus_agent": {
-    "disabled": false,           // Enable Sisyphus orchestration (default: false)
+    "disabled": false,           // Enable Atlas orchestration (default: false)
    "planner_enabled": true,     // Enable Prometheus (default: true)
    "replace_plan": true         // Replace default plan agent with Prometheus (default: true)
  },
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -73,13 +73,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.0.0-beta.13",
-    "oh-my-opencode-darwin-x64": "3.0.0-beta.13",
-    "oh-my-opencode-linux-arm64": "3.0.0-beta.13",
-    "oh-my-opencode-linux-arm64-musl": "3.0.0-beta.13",
-    "oh-my-opencode-linux-x64": "3.0.0-beta.13",
-    "oh-my-opencode-linux-x64-musl": "3.0.0-beta.13",
-    "oh-my-opencode-windows-x64": "3.0.0-beta.13"
+    "oh-my-opencode-darwin-arm64": "3.1.2",
+    "oh-my-opencode-darwin-x64": "3.1.2",
+    "oh-my-opencode-linux-arm64": "3.1.2",
+    "oh-my-opencode-linux-arm64-musl": "3.1.2",
+    "oh-my-opencode-linux-x64": "3.1.2",
+    "oh-my-opencode-linux-x64-musl": "3.1.2",
+    "oh-my-opencode-windows-x64": "3.1.2"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.0.0-beta.13",
+  "version": "3.1.2",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -711,6 +711,174 @@
      "created_at": "2026-01-22T12:39:26Z",
      "repoId": 1108837393,
      "pullRequestNo": 989
+    },
+    {
+      "name": "l3aro",
+      "id": 25253808,
+      "comment_id": 3786383804,
+      "created_at": "2026-01-22T19:52:42Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 999
+    },
+    {
+      "name": "Ssoon-m",
+      "id": 89559826,
+      "comment_id": 3788539617,
+      "created_at": "2026-01-23T06:31:24Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1014
+    },
+    {
+      "name": "veetase",
+      "id": 2784250,
+      "comment_id": 3789028002,
+      "created_at": "2026-01-23T08:27:02Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 985
+    },
+    {
+      "name": "RouHim",
+      "id": 3582050,
+      "comment_id": 3791988227,
+      "created_at": "2026-01-23T19:32:01Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1031
+    },
+    {
+      "name": "gongxh0901",
+      "id": 15622561,
+      "comment_id": 3793478620,
+      "created_at": "2026-01-24T02:15:02Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1037
+    },
+    {
+      "name": "gongxh0901",
+      "id": 15622561,
+      "comment_id": 3793521632,
+      "created_at": "2026-01-24T02:23:34Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1037
+    },
+    {
+      "name": "AndersHsueh",
+      "id": 121805544,
+      "comment_id": 3793787614,
+      "created_at": "2026-01-24T04:41:46Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1042
+    },
+    {
+      "name": "AamiRobin",
+      "id": 22963668,
+      "comment_id": 3794632200,
+      "created_at": "2026-01-24T13:28:22Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1067
+    },
+    {
+      "name": "ThanhNguyxn",
+      "id": 74597207,
+      "comment_id": 3795232176,
+      "created_at": "2026-01-24T17:41:53Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1075
+    },
+    {
+      "name": "sadnow",
+      "id": 87896100,
+      "comment_id": 3795495342,
+      "created_at": "2026-01-24T20:49:29Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1080
+    },
+    {
+      "name": "jsl9208",
+      "id": 4048787,
+      "comment_id": 3795582626,
+      "created_at": "2026-01-24T21:41:24Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1082
+    },
+    {
+      "name": "potb",
+      "id": 10779093,
+      "comment_id": 3795856573,
+      "created_at": "2026-01-25T02:38:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1083
+    },
+    {
+      "name": "kvokka",
+      "id": 15954013,
+      "comment_id": 3795884358,
+      "created_at": "2026-01-25T03:13:52Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1084
+    },
+    {
+      "name": "misyuari",
+      "id": 12197761,
+      "comment_id": 3798225767,
+      "created_at": "2026-01-26T07:31:02Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1132
+    },
+    {
+      "name": "boguan",
+      "id": 3226538,
+      "comment_id": 3798448537,
+      "created_at": "2026-01-26T08:40:37Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1137
+    },
+    {
+      "name": "boguan",
+      "id": 3226538,
+      "comment_id": 3798471978,
+      "created_at": "2026-01-26T08:46:03Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1137
+    },
+    {
+      "name": "Jeremy-Kr",
+      "id": 110771206,
+      "comment_id": 3799211732,
+      "created_at": "2026-01-26T11:59:13Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1141
+    },
+    {
+      "name": "orientpine",
+      "id": 32758428,
+      "comment_id": 3799897021,
+      "created_at": "2026-01-26T14:30:33Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1145
+    },
+    {
+      "name": "craftaholic",
+      "id": 63741110,
+      "comment_id": 3797014417,
+      "created_at": "2026-01-25T17:52:34Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1110
+    },
+    {
+      "name": "acamq",
+      "id": 179265037,
+      "comment_id": 3801038978,
+      "created_at": "2026-01-26T18:20:17Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1151
+    },
+    {
+      "name": "itsmylife44",
+      "id": 34112129,
+      "comment_id": 3802225779,
+      "created_at": "2026-01-26T23:20:30Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1157
    }
  ]
 }
--- a/sisyphus-prompt.md
+++ b/sisyphus-prompt.md
@@ -239,7 +239,7 @@ Ask yourself:
 I will use delegate_task with:
 - **Category**: [selected-category-name]
 - **Why this category**: [how category description matches task domain]
- **Skills**: [list of selected skills]
+- **load_skills**: [list of selected skills]
 - **Skill evaluation**:
  - [skill-1]: INCLUDED because [reason based on skill description]
  - [skill-2]: OMITTED because [reason why skill domain doesn't apply]
@@ -256,7 +256,7 @@ I will use delegate_task with:
 I will use delegate_task with:
 - **Category**: [category-name]
 - **Why this category**: Category description says "[quote description]" which matches this task's requirements
- **Skills**: ["skill-a", "skill-b"]
+- **load_skills**: ["skill-a", "skill-b"]
 - **Skill evaluation**:
  - skill-a: INCLUDED - description says "[quote]" which applies to this task
  - skill-b: INCLUDED - description says "[quote]" which is needed here
@@ -265,7 +265,7 @@ I will use delegate_task with:

 delegate_task(
  category="[category-name]",
-  skills=["skill-a", "skill-b"],
+  load_skills=["skill-a", "skill-b"],
  prompt="..."
 )
 ```
@@ -276,12 +276,12 @@ delegate_task(
 I will use delegate_task with:
 - **Agent**: [agent-name]
 - **Reason**: This requires [agent's specialty] based on agent description
- **Skills**: [] (agents have built-in expertise)
+- **load_skills**: [] (agents have built-in expertise)
 - **Expected Outcome**: [what agent should return]

 delegate_task(
  subagent_type="[agent-name]",
-  skills=[],
+  load_skills=[],
  prompt="..."
 )
 ```
@@ -292,13 +292,13 @@ delegate_task(
 I will use delegate_task with:
 - **Agent**: explore
 - **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
- **Skills**: []
+- **load_skills**: []
 - **Expected Outcome**: List of files containing auth patterns

 delegate_task(
  subagent_type="explore",
  run_in_background=true,
-  skills=[],
+  load_skills=[],
  prompt="Find all authentication implementations in the codebase"
 )
 ```
@@ -306,7 +306,7 @@ delegate_task(
 **WRONG: No Skill Evaluation**

 ```
-delegate_task(category="...", skills=[], prompt="...")  // Where's the justification?
+delegate_task(category="...", load_skills=[], prompt="...")  // Where's the justification?
 ```

 **WRONG: Vague Category Selection**
@@ -329,11 +329,11 @@ I'll use this category because it seems right.
 ```typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find error handling patterns here...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find how production apps handle auth in Express...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
@@ -416,7 +416,7 @@ Skills inject specialized instructions into the subagent. Read the description t
 For EVERY skill listed above, ask yourself:
 > "Does this skill's expertise domain overlap with my task?"

- If YES → INCLUDE in `skills=[...]`
+- If YES → INCLUDE in `load_skills=[...]`
 - If NO → You MUST justify why (see below)

 **STEP 3: Justify Omissions**
@@ -444,14 +444,14 @@ SKILL EVALUATION for "[skill-name]":
 ```typescript
 delegate_task(
  category="[selected-category]",
-  skills=["skill-1", "skill-2"],  // Include ALL relevant skills
+  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills
  prompt="..."
 )
 ```

 **ANTI-PATTERN (will produce poor results):**
 ```typescript
-delegate_task(category="...", skills=[], prompt="...")  // Empty skills without justification
+delegate_task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
 ```
 ### Delegation Table:

@@ -724,7 +724,7 @@ If the user's approach seems problematic:
 | **Error Handling** | Empty catch blocks `catch(e) {}` |
 | **Testing** | Deleting failing tests to "pass" |
 | **Search** | Firing agents for single-line typos or obvious syntax errors |
-| **Delegation** | Using `skills=[]` without justifying why no skills apply |
+| **Delegation** | Using `load_skills=[]` without justifying why no skills apply |
 | **Debugging** | Shotgun debugging, random changes |
 ## Soft Guidelines

--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -1,38 +1,35 @@
 # AGENTS KNOWLEDGE BASE

 ## OVERVIEW
-
 10 AI agents for multi-model orchestration. Sisyphus (primary), Atlas (orchestrator), oracle, librarian, explore, multimodal-looker, Prometheus, Metis, Momus, Sisyphus-Junior.

 ## STRUCTURE
-
 ```
 agents/
-├── atlas.ts                    # Master Orchestrator (1383 lines)
-├── sisyphus.ts                 # Main prompt (615 lines)
-├── sisyphus-junior.ts          # Delegated task executor
-├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation
+├── atlas.ts                    # Master Orchestrator (holds todo list)
+├── sisyphus.ts                 # Main prompt (SF Bay Area engineer identity)
+├── sisyphus-junior.ts          # Delegated task executor (category-spawned)
 ├── oracle.ts                   # Strategic advisor (GPT-5.2)
-├── librarian.ts                # Multi-repo research (GLM-4.7-free)
-├── explore.ts                  # Fast grep (Grok Code)
+├── librarian.ts                # Multi-repo research (GitHub CLI, Context7)
+├── explore.ts                  # Fast contextual grep (Grok Code)
 ├── multimodal-looker.ts        # Media analyzer (Gemini 3 Flash)
-├── prometheus-prompt.ts        # Planning (1196 lines)
-├── metis.ts                    # Plan consultant
-├── momus.ts                    # Plan reviewer
+├── prometheus-prompt.ts        # Planning (Interview/Consultant mode, 1196 lines)
+├── metis.ts                    # Pre-planning analysis (Gap detection)
+├── momus.ts                    # Plan reviewer (Ruthless fault-finding)
+├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation
 ├── types.ts                    # AgentModelConfig, AgentPromptMetadata
 ├── utils.ts                    # createBuiltinAgents(), resolveModelWithFallback()
 └── index.ts                    # builtinAgents export
 ```

 ## AGENT MODELS
-
 | Agent | Model | Temp | Purpose |
 |-------|-------|------|---------|
 | Sisyphus | anthropic/claude-opus-4-5 | 0.1 | Primary orchestrator |
 | Atlas | anthropic/claude-opus-4-5 | 0.1 | Master orchestrator |
 | oracle | openai/gpt-5.2 | 0.1 | Consultation, debugging |
-| librarian | opencode/glm-4.7-free | 0.1 | Docs, GitHub search |
-| explore | opencode/grok-code | 0.1 | Fast contextual grep |
+| librarian | opencode/big-pickle | 0.1 | Docs, GitHub search |
+| explore | opencode/gpt-5-nano | 0.1 | Fast contextual grep |
 | multimodal-looker | google/gemini-3-flash | 0.1 | PDF/image analysis |
 | Prometheus | anthropic/claude-opus-4-5 | 0.1 | Strategic planning |
 | Metis | anthropic/claude-sonnet-4-5 | 0.3 | Pre-planning analysis |
@@ -40,14 +37,12 @@ agents/
 | Sisyphus-Junior | anthropic/claude-sonnet-4-5 | 0.1 | Category-spawned executor |

 ## HOW TO ADD
-
-1. Create `src/agents/my-agent.ts` exporting factory + metadata
-2. Add to `agentSources` in `src/agents/utils.ts`
-3. Update `AgentNameSchema` in `src/config/schema.ts`
-4. Register in `src/index.ts` initialization
+1. Create `src/agents/my-agent.ts` exporting factory + metadata.
+2. Add to `agentSources` in `src/agents/utils.ts`.
+3. Update `AgentNameSchema` in `src/config/schema.ts`.
+4. Register in `src/index.ts` initialization.

 ## TOOL RESTRICTIONS
-
 | Agent | Denied Tools |
 |-------|-------------|
 | oracle | write, edit, task, delegate_task |
@@ -57,14 +52,13 @@ agents/
 | Sisyphus-Junior | task, delegate_task |

 ## PATTERNS
-
- **Factory**: `createXXXAgent(model?: string): AgentConfig`
- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers
- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`
- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas
+- **Factory**: `createXXXAgent(model: string): AgentConfig`
+- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers.
+- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`.
+- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas.

 ## ANTI-PATTERNS
-
- **Trust reports**: NEVER trust "I'm done" - verify outputs
- **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background`
+- **Trust reports**: NEVER trust "I'm done" - verify outputs.
+- **High temp**: Don't use >0.3 for code agents.
+- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration.
+- **Prometheus writing code**: Planner only - never implements.
--- a/src/agents/atlas.ts
+++ b/src/agents/atlas.ts
--- a/src/agents/prometheus-prompt.ts
+++ b/src/agents/prometheus-prompt.ts
@@ -319,8 +319,8 @@ Or should I just note down this single fix?"

 **Research First:**
 \`\`\`typescript
-delegate_task(agent="explore", prompt="Find all usages of [target] using lsp_find_references pattern...", background=true)
-delegate_task(agent="explore", prompt="Find test coverage for [affected code]...", background=true)
+delegate_task(subagent_type="explore", prompt="Find all usages of [target] using lsp_find_references pattern...", run_in_background=true)
+delegate_task(subagent_type="explore", prompt="Find test coverage for [affected code]...", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -343,9 +343,9 @@ delegate_task(agent="explore", prompt="Find test coverage for [affected code]...
 **Pre-Interview Research (MANDATORY):**
 \`\`\`typescript
 // Launch BEFORE asking user questions
-delegate_task(agent="explore", prompt="Find similar implementations in codebase...", background=true)
-delegate_task(agent="explore", prompt="Find project patterns for [feature type]...", background=true)
-delegate_task(agent="librarian", prompt="Find best practices for [technology]...", background=true)
+delegate_task(subagent_type="explore", prompt="Find similar implementations in codebase...", run_in_background=true)
+delegate_task(subagent_type="explore", prompt="Find project patterns for [feature type]...", run_in_background=true)
+delegate_task(subagent_type="librarian", prompt="Find best practices for [technology]...", run_in_background=true)
 \`\`\`

 **Interview Focus** (AFTER research):
@@ -384,7 +384,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js

 Run this check:
 \`\`\`typescript
-delegate_task(agent="explore", prompt="Find test infrastructure: package.json test scripts, test config files (jest.config, vitest.config, pytest.ini, etc.), existing test files (*.test.*, *.spec.*, test_*). Report: 1) Does test infra exist? 2) What framework? 3) Example test file patterns.", background=true)
+delegate_task(subagent_type="explore", prompt="Find test infrastructure: package.json test scripts, test config files (jest.config, vitest.config, pytest.ini, etc.), existing test files (*.test.*, *.spec.*, test_*). Report: 1) Does test infra exist? 2) What framework? 3) Example test file patterns.", run_in_background=true)
 \`\`\`

 #### Step 2: Ask the Test Question (MANDATORY)
@@ -473,13 +473,13 @@ Add to draft immediately:

 **Research First:**
 \`\`\`typescript
-delegate_task(agent="explore", prompt="Find current system architecture and patterns...", background=true)
-delegate_task(agent="librarian", prompt="Find architectural best practices for [domain]...", background=true)
+delegate_task(subagent_type="explore", prompt="Find current system architecture and patterns...", run_in_background=true)
+delegate_task(subagent_type="librarian", prompt="Find architectural best practices for [domain]...", run_in_background=true)
 \`\`\`

 **Oracle Consultation** (recommend when stakes are high):
 \`\`\`typescript
-delegate_task(agent="oracle", prompt="Architecture consultation needed: [context]...", background=false)
+delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
 \`\`\`

 **Interview Focus:**
@@ -496,9 +496,9 @@ delegate_task(agent="oracle", prompt="Architecture consultation needed: [context

 **Parallel Investigation:**
 \`\`\`typescript
-delegate_task(agent="explore", prompt="Find how X is currently handled...", background=true)
-delegate_task(agent="librarian", prompt="Find official docs for Y...", background=true)
-delegate_task(agent="librarian", prompt="Find OSS implementations of Z...", background=true)
+delegate_task(subagent_type="explore", prompt="Find how X is currently handled...", run_in_background=true)
+delegate_task(subagent_type="librarian", prompt="Find official docs for Y...", run_in_background=true)
+delegate_task(subagent_type="librarian", prompt="Find OSS implementations of Z...", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -524,17 +524,17 @@ delegate_task(agent="librarian", prompt="Find OSS implementations of Z...", back

 **For Understanding Codebase:**
 \`\`\`typescript
-delegate_task(agent="explore", prompt="Find all files related to [topic]. Show patterns, conventions, and structure.", background=true)
+delegate_task(subagent_type="explore", prompt="Find all files related to [topic]. Show patterns, conventions, and structure.", run_in_background=true)
 \`\`\`

 **For External Knowledge:**
 \`\`\`typescript
-delegate_task(agent="librarian", prompt="Find official documentation for [library]. Focus on [specific feature] and best practices.", background=true)
+delegate_task(subagent_type="librarian", prompt="Find official documentation for [library]. Focus on [specific feature] and best practices.", run_in_background=true)
 \`\`\`

 **For Implementation Examples:**
 \`\`\`typescript
-delegate_task(agent="librarian", prompt="Find open source implementations of [feature]. Look for production-quality examples.", background=true)
+delegate_task(subagent_type="librarian", prompt="Find open source implementations of [feature]. Look for production-quality examples.", run_in_background=true)
 \`\`\`

 ## Interview Mode Anti-Patterns
@@ -631,7 +631,7 @@ todoWrite([

 \`\`\`typescript
 delegate_task(
-  agent="Metis (Plan Consultant)",
+  subagent_type="metis",
  prompt=\`Review this planning session before I generate the work plan:

  **User's Goal**: {summarize what user wants}
@@ -652,7 +652,7 @@ delegate_task(
  4. Assumptions I'm making that need validation
  5. Missing acceptance criteria
  6. Edge cases not addressed\`,
-  background=false
+  run_in_background=false
 )
 \`\`\`

@@ -797,9 +797,9 @@ Question({
 // After generating initial plan
 while (true) {
  const result = delegate_task(
-    agent="Momus (Plan Reviewer)",
+    subagent_type="momus",
    prompt=".sisyphus/plans/{name}.md",
-    background=false
+    run_in_background=false
  )

  if (result.verdict === "OKAY") {
--- a/src/agents/sisyphus-junior.ts
+++ b/src/agents/sisyphus-junior.ts
@@ -20,32 +20,6 @@ ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
 You work ALONE for implementation. No delegation of implementation tasks.
 </Critical_Constraints>

-<Work_Context>
-## Notepad Location (for recording learnings)
-NOTEPAD PATH: .sisyphus/notepads/{plan-name}/
- learnings.md: Record patterns, conventions, successful approaches
- issues.md: Record problems, blockers, gotchas encountered
- decisions.md: Record architectural choices and rationales
- problems.md: Record unresolved issues, technical debt
-
-You SHOULD append findings to notepad files after completing work.
-IMPORTANT: Always APPEND to notepad files - never overwrite or use Edit tool.
-
-## Plan Location (READ ONLY)
-PLAN PATH: .sisyphus/plans/{plan-name}.md
-
-CRITICAL RULE: NEVER MODIFY THE PLAN FILE
-
-The plan file (.sisyphus/plans/*.md) is SACRED and READ-ONLY.
- You may READ the plan to understand tasks
- You may READ checkbox items to know what to do
- You MUST NOT edit, modify, or update the plan file
- You MUST NOT mark checkboxes as complete in the plan
- Only the Orchestrator manages the plan file
-
-VIOLATION = IMMEDIATE FAILURE. The Orchestrator tracks plan state.
-</Work_Context>
-
 <Todo_Discipline>
 TODO OBSESSION (NON-NEGOTIABLE):
 - 2+ steps → todowrite FIRST, atomic breakdown
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -144,11 +144,11 @@ ${librarianSection}
 \`\`\`typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find error handling patterns here...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find how production apps handle auth in Express...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
@@ -205,6 +205,34 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 **Vague prompts = rejected. Be exhaustive.**

+### Session Continuity (MANDATORY)
+
+Every \`delegate_task()\` output includes a session_id. **USE IT.**
+
+**ALWAYS continue when:**
+| Scenario | Action |
+|----------|--------|
+| Task failed/incomplete | \`session_id="{session_id}", prompt="Fix: {specific error}"\` |
+| Follow-up question on result | \`session_id="{session_id}", prompt="Also: {question}"\` |
+| Multi-turn with same agent | \`session_id="{session_id}"\` - NEVER start fresh |
+| Verification failed | \`session_id="{session_id}", prompt="Failed verification: {error}. Fix."\` |
+
+**Why session_id is CRITICAL:**
+- Subagent has FULL conversation context preserved
+- No repeated file reads, exploration, or setup
+- Saves 70%+ tokens on follow-ups
+- Subagent knows what it already tried/learned
+
+\`\`\`typescript
+// WRONG: Starting fresh loses all context
+delegate_task(category="quick", prompt="Fix the type error in auth.ts...")
+
+// CORRECT: Resume preserves everything
+delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
+\`\`\`
+
+**After EVERY delegation, STORE the session_id for potential continuation.**
+
 ### Code Changes:
 - Match existing patterns (if codebase is disciplined)
 - Propose approach first (if codebase is chaotic)
--- a/src/agents/types.ts
+++ b/src/agents/types.ts
@@ -57,14 +57,14 @@ export function isGptModel(model: string): boolean {
 }

 export type BuiltinAgentName =
-  | "Sisyphus"
+  | "sisyphus"
  | "oracle"
  | "librarian"
  | "explore"
  | "multimodal-looker"
-  | "Metis (Plan Consultant)"
-  | "Momus (Plan Reviewer)"
-  | "Atlas"
+  | "metis"
+  | "momus"
+  | "atlas"

 export type OverridableAgentName =
  | "build"
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -1,6 +1,7 @@
-import { describe, test, expect } from "bun:test"
+import { describe, test, expect, beforeEach } from "bun:test"
 import { createBuiltinAgents } from "./utils"
 import type { AgentConfig } from "@opencode-ai/sdk"
+import { clearSkillCache } from "../features/opencode-skill-loader/skill-content"

 const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-5"

@@ -12,46 +13,46 @@ describe("createBuiltinAgents with model overrides", () => {
    const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL)

    // #then
-    expect(agents.Sisyphus.model).toBe("anthropic/claude-opus-4-5")
-    expect(agents.Sisyphus.thinking).toEqual({ type: "enabled", budgetTokens: 32000 })
-    expect(agents.Sisyphus.reasoningEffort).toBeUndefined()
+    expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect(agents.sisyphus.thinking).toEqual({ type: "enabled", budgetTokens: 32000 })
+    expect(agents.sisyphus.reasoningEffort).toBeUndefined()
  })

  test("Sisyphus with GPT model override has reasoningEffort, no thinking", async () => {
    // #given
    const overrides = {
-      Sisyphus: { model: "github-copilot/gpt-5.2" },
+      sisyphus: { model: "github-copilot/gpt-5.2" },
    }

    // #when
    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)

    // #then
-    expect(agents.Sisyphus.model).toBe("github-copilot/gpt-5.2")
-    expect(agents.Sisyphus.reasoningEffort).toBe("medium")
-    expect(agents.Sisyphus.thinking).toBeUndefined()
+    expect(agents.sisyphus.model).toBe("github-copilot/gpt-5.2")
+    expect(agents.sisyphus.reasoningEffort).toBe("medium")
+    expect(agents.sisyphus.thinking).toBeUndefined()
  })

-  test("Sisyphus uses first fallbackChain entry when no availableModels provided", async () => {
+  test("Sisyphus uses system default when no availableModels provided", async () => {
    // #given
-    const systemDefaultModel = "openai/gpt-5.2"
+    const systemDefaultModel = "anthropic/claude-opus-4-5"

    // #when
    const agents = await createBuiltinAgents([], {}, undefined, systemDefaultModel)

-    // #then - Sisyphus first fallbackChain entry is anthropic/claude-opus-4-5
-    expect(agents.Sisyphus.model).toBe("anthropic/claude-opus-4-5")
-    expect(agents.Sisyphus.thinking).toEqual({ type: "enabled", budgetTokens: 32000 })
-    expect(agents.Sisyphus.reasoningEffort).toBeUndefined()
+    // #then - falls back to system default when no availability match
+    expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect(agents.sisyphus.thinking).toEqual({ type: "enabled", budgetTokens: 32000 })
+    expect(agents.sisyphus.reasoningEffort).toBeUndefined()
  })

-  test("Oracle uses first fallbackChain entry when no availableModels provided", async () => {
-    // #given - Oracle's first fallbackChain entry is openai/gpt-5.2
+  test("Oracle uses first fallback entry when no availableModels provided (no cache scenario)", async () => {
+    // #given - no available models simulates CI without model cache

    // #when
    const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL)

-    // #then - Oracle first fallbackChain entry is openai/gpt-5.2
+    // #then - uses first fallback entry (openai/gpt-5.2) instead of system default
    expect(agents.oracle.model).toBe("openai/gpt-5.2")
    expect(agents.oracle.reasoningEffort).toBe("medium")
    expect(agents.oracle.textVerbosity).toBe("high")
@@ -90,18 +91,42 @@ describe("createBuiltinAgents with model overrides", () => {
    expect(agents.oracle.textVerbosity).toBeUndefined()
  })

-  test("non-model overrides are still applied after factory rebuild", async () => {
-    // #given
-    const overrides = {
-      Sisyphus: { model: "github-copilot/gpt-5.2", temperature: 0.5 },
-    }
+   test("non-model overrides are still applied after factory rebuild", async () => {
+     // #given
+     const overrides = {
+       sisyphus: { model: "github-copilot/gpt-5.2", temperature: 0.5 },
+     }
+
+     // #when
+     const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
+
+     // #then
+     expect(agents.sisyphus.model).toBe("github-copilot/gpt-5.2")
+     expect(agents.sisyphus.temperature).toBe(0.5)
+   })
+})
+
+describe("createBuiltinAgents without systemDefaultModel", () => {
+  test("creates agents successfully without systemDefaultModel", async () => {
+    // #given - no systemDefaultModel provided

    // #when
-    const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL)
+    const agents = await createBuiltinAgents([], {}, undefined, undefined)

-    // #then
-    expect(agents.Sisyphus.model).toBe("github-copilot/gpt-5.2")
-    expect(agents.Sisyphus.temperature).toBe(0.5)
+    // #then - agents should still be created using fallback chain
+    expect(agents.oracle).toBeDefined()
+    expect(agents.oracle.model).toBe("openai/gpt-5.2")
+  })
+
+  test("sisyphus uses fallback chain when systemDefaultModel undefined", async () => {
+    // #given - no systemDefaultModel
+
+    // #when
+    const agents = await createBuiltinAgents([], {}, undefined, undefined)
+
+    // #then - sisyphus should use its fallback chain
+    expect(agents.sisyphus).toBeDefined()
+    expect(agents.sisyphus.model).toBe("anthropic/claude-opus-4-5")
  })
 })

@@ -109,6 +134,10 @@ describe("buildAgent with category and skills", () => {
  const { buildAgent } = require("./utils")
  const TEST_MODEL = "anthropic/claude-opus-4-5"

+  beforeEach(() => {
+    clearSkillCache()
+  })
+
  test("agent with category inherits category settings", () => {
    // #given - agent factory that sets category but no model
    const source = {
@@ -123,7 +152,7 @@ describe("buildAgent with category and skills", () => {
    const agent = buildAgent(source["test-agent"], TEST_MODEL)

    // #then - category's built-in model is applied
-    expect(agent.model).toBe("google/gemini-3-pro-preview")
+    expect(agent.model).toBe("google/gemini-3-pro")
  })

  test("agent with category and existing model keeps existing model", () => {
@@ -308,4 +337,42 @@ describe("buildAgent with category and skills", () => {
    // #then
    expect(agent.prompt).toBe("Base prompt")
  })
+
+  test("agent with agent-browser skill resolves when browserProvider is set", () => {
+    // #given
+    const source = {
+      "test-agent": () =>
+        ({
+          description: "Test agent",
+          skills: ["agent-browser"],
+          prompt: "Base prompt",
+        }) as AgentConfig,
+    }
+
+    // #when - browserProvider is "agent-browser"
+    const agent = buildAgent(source["test-agent"], TEST_MODEL, undefined, undefined, "agent-browser")
+
+    // #then - agent-browser skill content should be in prompt
+    expect(agent.prompt).toContain("agent-browser")
+    expect(agent.prompt).toContain("Base prompt")
+  })
+
+  test("agent with agent-browser skill NOT resolved when browserProvider not set", () => {
+    // #given
+    const source = {
+      "test-agent": () =>
+        ({
+          description: "Test agent",
+          skills: ["agent-browser"],
+          prompt: "Base prompt",
+        }) as AgentConfig,
+    }
+
+    // #when - no browserProvider (defaults to playwright)
+    const agent = buildAgent(source["test-agent"], TEST_MODEL)
+
+    // #then - agent-browser skill not found, only base prompt remains
+    expect(agent.prompt).toBe("Base prompt")
+    expect(agent.prompt).not.toContain("agent-browser open")
+  })
 })
--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -10,25 +10,26 @@ import { createMetisAgent } from "./metis"
 import { createAtlasAgent } from "./atlas"
 import { createMomusAgent } from "./momus"
 import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
-import { deepMerge, fetchAvailableModels, resolveModelWithFallback, AGENT_MODEL_REQUIREMENTS, findCaseInsensitive, includesCaseInsensitive } from "../shared"
+import { deepMerge, fetchAvailableModels, resolveModelWithFallback, AGENT_MODEL_REQUIREMENTS, findCaseInsensitive, includesCaseInsensitive, readConnectedProvidersCache } from "../shared"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
 import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content"
 import { createBuiltinSkills } from "../features/builtin-skills"
 import type { LoadedSkill, SkillScope } from "../features/opencode-skill-loader/types"
+import type { BrowserAutomationProvider } from "../config/schema"

 type AgentSource = AgentFactory | AgentConfig

 const agentSources: Record<BuiltinAgentName, AgentSource> = {
-  Sisyphus: createSisyphusAgent,
+  sisyphus: createSisyphusAgent,
  oracle: createOracleAgent,
  librarian: createLibrarianAgent,
  explore: createExploreAgent,
  "multimodal-looker": createMultimodalLookerAgent,
-  "Metis (Plan Consultant)": createMetisAgent,
-  "Momus (Plan Reviewer)": createMomusAgent,
+  metis: createMetisAgent,
+  momus: createMomusAgent,
  // Note: Atlas is handled specially in createBuiltinAgents()
  // because it needs OrchestratorContext, not just a model string
-  Atlas: createAtlasAgent as unknown as AgentFactory,
+  atlas: createAtlasAgent as unknown as AgentFactory,
 }

 /**
@@ -50,7 +51,8 @@ export function buildAgent(
  source: AgentSource,
  model: string,
  categories?: CategoriesConfig,
-  gitMasterConfig?: GitMasterConfig
+  gitMasterConfig?: GitMasterConfig,
+  browserProvider?: BrowserAutomationProvider
 ): AgentConfig {
  const base = isFactory(source) ? source(model) : source
  const categoryConfigs: Record<string, CategoryConfig> = categories
@@ -74,7 +76,7 @@ export function buildAgent(
  }

  if (agentWithCategory.skills?.length) {
-    const { resolved } = resolveMultipleSkills(agentWithCategory.skills, { gitMasterConfig })
+    const { resolved } = resolveMultipleSkills(agentWithCategory.skills, { gitMasterConfig, browserProvider })
    if (resolved.size > 0) {
      const skillContent = Array.from(resolved.values()).join("\n\n")
      base.prompt = skillContent + (base.prompt ? "\n\n" + base.prompt : "")
@@ -139,21 +141,20 @@ function mapScopeToLocation(scope: SkillScope): AvailableSkill["location"] {
 }

 export async function createBuiltinAgents(
-  disabledAgents: BuiltinAgentName[] = [],
+  disabledAgents: string[] = [],
  agentOverrides: AgentOverrides = {},
  directory?: string,
  systemDefaultModel?: string,
  categories?: CategoriesConfig,
  gitMasterConfig?: GitMasterConfig,
  discoveredSkills: LoadedSkill[] = [],
-  client?: any
+  client?: any,
+  browserProvider?: BrowserAutomationProvider
 ): Promise<Record<string, AgentConfig>> {
-  if (!systemDefaultModel) {
-    throw new Error("createBuiltinAgents requires systemDefaultModel")
-  }
-
-  // Fetch available models at plugin init
-  const availableModels = client ? await fetchAvailableModels(client) : new Set<string>()
+  const connectedProviders = readConnectedProvidersCache()
+  const availableModels = client 
+    ? await fetchAvailableModels(client, { connectedProviders: connectedProviders ?? undefined }) 
+    : new Set<string>()

  const result: Record<string, AgentConfig> = {}
  const availableAgents: AvailableAgent[] = []
@@ -167,7 +168,7 @@ export async function createBuiltinAgents(
    description: categories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks",
  }))

-  const builtinSkills = createBuiltinSkills()
+  const builtinSkills = createBuiltinSkills({ browserProvider })
  const builtinSkillNames = new Set(builtinSkills.map(s => s.name))

  const builtinAvailable: AvailableSkill[] = builtinSkills.map((skill) => ({
@@ -186,31 +187,32 @@ export async function createBuiltinAgents(

  const availableSkills: AvailableSkill[] = [...builtinAvailable, ...discoveredAvailable]

-  for (const [name, source] of Object.entries(agentSources)) {
-    const agentName = name as BuiltinAgentName
+   for (const [name, source] of Object.entries(agentSources)) {
+     const agentName = name as BuiltinAgentName

-    if (agentName === "Sisyphus") continue
-    if (agentName === "Atlas") continue
-    if (includesCaseInsensitive(disabledAgents, agentName)) continue
+     if (agentName === "sisyphus") continue
+     if (agentName === "atlas") continue
+     if (includesCaseInsensitive(disabledAgents, agentName)) continue

    const override = findCaseInsensitive(agentOverrides, agentName)
    const requirement = AGENT_MODEL_REQUIREMENTS[agentName]
    
-    // Use resolver to determine model
-    const { model } = resolveModelWithFallback({
+    const resolution = resolveModelWithFallback({
      userModel: override?.model,
      fallbackChain: requirement?.fallbackChain,
      availableModels,
      systemDefaultModel,
    })
+    if (!resolution) continue
+    const { model, variant: resolvedVariant } = resolution

-    let config = buildAgent(source, model, mergedCategories, gitMasterConfig)
+    let config = buildAgent(source, model, mergedCategories, gitMasterConfig, browserProvider)
    
-    // Apply variant from override or requirement
+    // Apply variant from override or resolved fallback chain
    if (override?.variant) {
      config = { ...config, variant: override.variant }
-    } else if (requirement?.variant) {
-      config = { ...config, variant: requirement.variant }
+    } else if (resolvedVariant) {
+      config = { ...config, variant: resolvedVariant }
    }

    if (agentName === "librarian" && directory && config.prompt) {
@@ -234,77 +236,81 @@ export async function createBuiltinAgents(
    }
  }

-  if (!disabledAgents.includes("Sisyphus")) {
-    const sisyphusOverride = agentOverrides["Sisyphus"]
-    const sisyphusRequirement = AGENT_MODEL_REQUIREMENTS["Sisyphus"]
+   if (!disabledAgents.includes("sisyphus")) {
+     const sisyphusOverride = agentOverrides["sisyphus"]
+     const sisyphusRequirement = AGENT_MODEL_REQUIREMENTS["sisyphus"]
    
-    // Use resolver to determine model
-    const { model: sisyphusModel } = resolveModelWithFallback({
+    const sisyphusResolution = resolveModelWithFallback({
      userModel: sisyphusOverride?.model,
      fallbackChain: sisyphusRequirement?.fallbackChain,
      availableModels,
      systemDefaultModel,
    })

-    let sisyphusConfig = createSisyphusAgent(
-      sisyphusModel,
-      availableAgents,
-      undefined,
-      availableSkills,
-      availableCategories
-    )
+    if (sisyphusResolution) {
+      const { model: sisyphusModel, variant: sisyphusResolvedVariant } = sisyphusResolution
+
+      let sisyphusConfig = createSisyphusAgent(
+        sisyphusModel,
+        availableAgents,
+        undefined,
+        availableSkills,
+        availableCategories
+      )
+      
+      if (sisyphusOverride?.variant) {
+        sisyphusConfig = { ...sisyphusConfig, variant: sisyphusOverride.variant }
+      } else if (sisyphusResolvedVariant) {
+        sisyphusConfig = { ...sisyphusConfig, variant: sisyphusResolvedVariant }
+      }
+
+      if (directory && sisyphusConfig.prompt) {
+        const envContext = createEnvContext()
+        sisyphusConfig = { ...sisyphusConfig, prompt: sisyphusConfig.prompt + envContext }
+      }
+
+      if (sisyphusOverride) {
+        sisyphusConfig = mergeAgentConfig(sisyphusConfig, sisyphusOverride)
+      }
+
+      result["sisyphus"] = sisyphusConfig
+    }
+   }
+
+   if (!disabledAgents.includes("atlas")) {
+     const orchestratorOverride = agentOverrides["atlas"]
+     const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]
    
-    // Apply variant from override or requirement
-    if (sisyphusOverride?.variant) {
-      sisyphusConfig = { ...sisyphusConfig, variant: sisyphusOverride.variant }
-    } else if (sisyphusRequirement?.variant) {
-      sisyphusConfig = { ...sisyphusConfig, variant: sisyphusRequirement.variant }
-    }
-
-    if (directory && sisyphusConfig.prompt) {
-      const envContext = createEnvContext()
-      sisyphusConfig = { ...sisyphusConfig, prompt: sisyphusConfig.prompt + envContext }
-    }
-
-    if (sisyphusOverride) {
-      sisyphusConfig = mergeAgentConfig(sisyphusConfig, sisyphusOverride)
-    }
-
-    result["Sisyphus"] = sisyphusConfig
-  }
-
-  if (!disabledAgents.includes("Atlas")) {
-    const orchestratorOverride = agentOverrides["Atlas"]
-    const atlasRequirement = AGENT_MODEL_REQUIREMENTS["Atlas"]
-    
-    // Use resolver to determine model
-    const { model: atlasModel } = resolveModelWithFallback({
+    const atlasResolution = resolveModelWithFallback({
      userModel: orchestratorOverride?.model,
      fallbackChain: atlasRequirement?.fallbackChain,
      availableModels,
      systemDefaultModel,
    })
    
-    let orchestratorConfig = createAtlasAgent({
-      model: atlasModel,
-      availableAgents,
-      availableSkills,
-      userCategories: categories,
-    })
-    
-    // Apply variant from override or requirement
-    if (orchestratorOverride?.variant) {
-      orchestratorConfig = { ...orchestratorConfig, variant: orchestratorOverride.variant }
-    } else if (atlasRequirement?.variant) {
-      orchestratorConfig = { ...orchestratorConfig, variant: atlasRequirement.variant }
+    if (atlasResolution) {
+      const { model: atlasModel, variant: atlasResolvedVariant } = atlasResolution
+
+      let orchestratorConfig = createAtlasAgent({
+        model: atlasModel,
+        availableAgents,
+        availableSkills,
+        userCategories: categories,
+      })
+      
+      if (orchestratorOverride?.variant) {
+        orchestratorConfig = { ...orchestratorConfig, variant: orchestratorOverride.variant }
+      } else if (atlasResolvedVariant) {
+        orchestratorConfig = { ...orchestratorConfig, variant: atlasResolvedVariant }
+      }
+
+      if (orchestratorOverride) {
+        orchestratorConfig = mergeAgentConfig(orchestratorConfig, orchestratorOverride)
+      }
+
+      result["atlas"] = orchestratorConfig
    }
+   }

-    if (orchestratorOverride) {
-      orchestratorConfig = mergeAgentConfig(orchestratorConfig, orchestratorOverride)
-    }
-
-    result["Atlas"] = orchestratorConfig
-  }
-
-  return result
-}
+   return result
+ }
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -8,16 +8,17 @@ CLI entry: `bunx oh-my-opencode`. Interactive installer, doctor diagnostics. Com

 ```
 cli/
-├── index.ts              # Commander.js entry
+├── index.ts              # Commander.js entry (4 commands)
 ├── install.ts            # Interactive TUI (520 lines)
-├── config-manager.ts     # JSONC parsing (641 lines)
+├── config-manager.ts     # JSONC parsing (664 lines)
 ├── types.ts              # InstallArgs, InstallConfig
+├── model-fallback.ts     # Model fallback configuration
 ├── doctor/
 │   ├── index.ts          # Doctor entry
 │   ├── runner.ts         # Check orchestration
 │   ├── formatter.ts      # Colored output
 │   ├── constants.ts      # Check IDs, symbols
-│   ├── types.ts          # CheckResult, CheckDefinition
+│   ├── types.ts          # CheckResult, CheckDefinition (114 lines)
 │   └── checks/           # 14 checks, 21 files
 │       ├── version.ts    # OpenCode + plugin version
 │       ├── config.ts     # JSONC validity, Zod
@@ -25,6 +26,7 @@ cli/
 │       ├── dependencies.ts # AST-Grep, Comment Checker
 │       ├── lsp.ts        # LSP connectivity
 │       ├── mcp.ts        # MCP validation
+│       ├── model-resolution.ts # Model resolution check
 │       └── gh.ts         # GitHub CLI
 ├── run/
 │   └── index.ts          # Session launcher
@@ -36,36 +38,37 @@ cli/

 | Command | Purpose |
 |---------|---------|
-| `install` | Interactive setup |
-| `doctor` | 14 health checks |
-| `run` | Launch session |
-| `get-local-version` | Version check |
+| `install` | Interactive setup with provider selection |
+| `doctor` | 14 health checks for diagnostics |
+| `run` | Launch session with todo enforcement |
+| `get-local-version` | Version detection and update check |

-## DOCTOR CATEGORIES
+## DOCTOR CATEGORIES (14 Checks)

 | Category | Checks |
 |----------|--------|
 | installation | opencode, plugin |
-| configuration | config validity, Zod |
+| configuration | config validity, Zod, model-resolution |
 | authentication | anthropic, openai, google |
-| dependencies | ast-grep, comment-checker |
+| dependencies | ast-grep, comment-checker, gh-cli |
 | tools | LSP, MCP |
 | updates | version comparison |

 ## HOW TO ADD CHECK

 1. Create `src/cli/doctor/checks/my-check.ts`
-2. Export from `checks/index.ts`
-3. Add to `getAllCheckDefinitions()`
+2. Export `getXXXCheckDefinition()` factory returning `CheckDefinition`
+3. Add to `getAllCheckDefinitions()` in `checks/index.ts`

 ## TUI FRAMEWORK

- **@clack/prompts**: `select()`, `spinner()`, `intro()`
- **picocolors**: Terminal colors
- **Symbols**: ✓ (pass), ✗ (fail), ⚠ (warn)
+- **@clack/prompts**: `select()`, `spinner()`, `intro()`, `outro()`
+- **picocolors**: Terminal colors for status and headers
+- **Symbols**: ✓ (pass), ✗ (fail), ⚠ (warn), ℹ (info)

 ## ANTI-PATTERNS

- **Blocking in non-TTY**: Check `process.stdout.isTTY`
- **Direct JSON.parse**: Use `parseJsonc()`
- **Silent failures**: Return warn/fail in doctor
+- **Blocking in non-TTY**: Always check `process.stdout.isTTY`
+- **Direct JSON.parse**: Use `parseJsonc()` from shared utils
+- **Silent failures**: Return `warn` or `fail` in doctor instead of throwing
+- **Hardcoded paths**: Use `getOpenCodeConfigPaths()` from `config-manager.ts`
--- a/src/cli/snapshots/model-fallback.test.ts.snap
+++ b/src/cli/snapshots/model-fallback.test.ts.snap
--- a/src/cli/config-manager.test.ts
+++ b/src/cli/config-manager.test.ts
@@ -170,7 +170,7 @@ describe("fetchNpmDistTags", () => {
 })

 describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
-  test("Gemini models include full spec (limit + modalities)", () => {
+  test("all models include full spec (limit + modalities + Antigravity label)", () => {
    const google = (ANTIGRAVITY_PROVIDER_CONFIG as any).google
    expect(google).toBeTruthy()

@@ -178,9 +178,11 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
    expect(models).toBeTruthy()

    const required = [
-      "antigravity-gemini-3-pro-high",
-      "antigravity-gemini-3-pro-low",
+      "antigravity-gemini-3-pro",
      "antigravity-gemini-3-flash",
+      "antigravity-claude-sonnet-4-5",
+      "antigravity-claude-sonnet-4-5-thinking",
+      "antigravity-claude-opus-4-5-thinking",
    ]

    for (const key of required) {
@@ -198,6 +200,43 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
      expect(Array.isArray(model.modalities.output)).toBe(true)
    }
  })
+
+  test("Gemini models have variant definitions", () => {
+    // #given the antigravity provider config
+    const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
+
+    // #when checking Gemini Pro variants
+    const pro = models["antigravity-gemini-3-pro"]
+    // #then should have low and high variants
+    expect(pro.variants).toBeTruthy()
+    expect(pro.variants.low).toBeTruthy()
+    expect(pro.variants.high).toBeTruthy()
+
+    // #when checking Gemini Flash variants
+    const flash = models["antigravity-gemini-3-flash"]
+    // #then should have minimal, low, medium, high variants
+    expect(flash.variants).toBeTruthy()
+    expect(flash.variants.minimal).toBeTruthy()
+    expect(flash.variants.low).toBeTruthy()
+    expect(flash.variants.medium).toBeTruthy()
+    expect(flash.variants.high).toBeTruthy()
+  })
+
+  test("Claude thinking models have variant definitions", () => {
+    // #given the antigravity provider config
+    const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
+
+    // #when checking Claude thinking variants
+    const sonnetThinking = models["antigravity-claude-sonnet-4-5-thinking"]
+    const opusThinking = models["antigravity-claude-opus-4-5-thinking"]
+
+    // #then both should have low and max variants
+    for (const model of [sonnetThinking, opusThinking]) {
+      expect(model.variants).toBeTruthy()
+      expect(model.variants.low).toBeTruthy()
+      expect(model.variants.max).toBeTruthy()
+    }
+  })
 })

 describe("generateOmoConfig - model fallback system", () => {
@@ -219,7 +258,7 @@ describe("generateOmoConfig - model fallback system", () => {
    // #then should use native anthropic sonnet (cost-efficient for standard plan)
    expect(result.$schema).toBe("https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json")
    expect(result.agents).toBeDefined()
-    expect((result.agents as Record<string, { model: string }>).Sisyphus.model).toBe("anthropic/claude-sonnet-4-5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-sonnet-4-5")
  })

  test("generates native opus models when Claude max20 subscription", () => {
@@ -238,7 +277,7 @@ describe("generateOmoConfig - model fallback system", () => {
    const result = generateOmoConfig(config)

    // #then should use native anthropic opus (max power for max20 plan)
-    expect((result.agents as Record<string, { model: string }>).Sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-5")
  })

  test("uses github-copilot sonnet fallback when only copilot available", () => {
@@ -257,7 +296,7 @@ describe("generateOmoConfig - model fallback system", () => {
    const result = generateOmoConfig(config)

    // #then should use github-copilot sonnet models (copilot fallback)
-    expect((result.agents as Record<string, { model: string }>).Sisyphus.model).toBe("github-copilot/claude-sonnet-4.5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("github-copilot/claude-sonnet-4.5")
  })

  test("uses ultimate fallback when no providers configured", () => {
@@ -277,7 +316,7 @@ describe("generateOmoConfig - model fallback system", () => {

    // #then should use ultimate fallback for all agents
    expect(result.$schema).toBe("https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json")
-    expect((result.agents as Record<string, { model: string }>).Sisyphus.model).toBe("opencode/glm-4.7-free")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("opencode/big-pickle")
  })

  test("uses zai-coding-plan/glm-4.7 for librarian when Z.ai available", () => {
@@ -298,7 +337,7 @@ describe("generateOmoConfig - model fallback system", () => {
    // #then librarian should use zai-coding-plan/glm-4.7
    expect((result.agents as Record<string, { model: string }>).librarian.model).toBe("zai-coding-plan/glm-4.7")
    // #then other agents should use native opus (max20 plan)
-    expect((result.agents as Record<string, { model: string }>).Sisyphus.model).toBe("anthropic/claude-opus-4-5")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("anthropic/claude-opus-4-5")
  })

  test("uses native OpenAI models when only ChatGPT available", () => {
@@ -317,7 +356,7 @@ describe("generateOmoConfig - model fallback system", () => {
    const result = generateOmoConfig(config)

    // #then Sisyphus should use native OpenAI (fallback within native tier)
-    expect((result.agents as Record<string, { model: string }>).Sisyphus.model).toBe("openai/gpt-5.2")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("openai/gpt-5.2")
    // #then Oracle should use native OpenAI (first fallback entry)
    expect((result.agents as Record<string, { model: string }>).oracle.model).toBe("openai/gpt-5.2")
    // #then multimodal-looker should use native OpenAI (fallback within native tier)
@@ -343,7 +382,7 @@ describe("generateOmoConfig - model fallback system", () => {
    expect((result.agents as Record<string, { model: string }>).explore.model).toBe("anthropic/claude-haiku-4-5")
  })

-  test("uses grok-code for explore when not max20", () => {
+  test("uses haiku for explore regardless of max20 flag", () => {
    // #given user has Claude but not max20
    const config: InstallConfig = {
      hasClaude: true,
@@ -358,7 +397,7 @@ describe("generateOmoConfig - model fallback system", () => {
    // #when generating config
    const result = generateOmoConfig(config)

-    // #then explore should use grok-code (preserve Claude quota)
-    expect((result.agents as Record<string, { model: string }>).explore.model).toBe("opencode/grok-code")
+    // #then explore should use haiku (isMax20 doesn't affect explore anymore)
+    expect((result.agents as Record<string, { model: string }>).explore.model).toBe("anthropic/claude-haiku-4-5")
  })
 })
--- a/src/cli/config-manager.ts
+++ b/src/cli/config-manager.ts
@@ -497,38 +497,61 @@ export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
 *
 * IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
 *
- * The opencode-antigravity-auth plugin supports two naming conventions:
- * - `antigravity-gemini-3-pro-high` (RECOMMENDED, explicit Antigravity quota routing)
- * - `gemini-3-pro-high` (LEGACY, backward compatible but may break in future)
+ * Since opencode-antigravity-auth v1.3.0, models use a variant system:
+ * - `antigravity-gemini-3-pro` with variants: low, high
+ * - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
 *
- * Legacy names rely on Gemini CLI using `-preview` suffix for disambiguation.
- * If Google removes `-preview`, legacy names may route to wrong quota.
+ * Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work
+ * but variants are the recommended approach.
 *
- * @see https://github.com/NoeFabris/opencode-antigravity-auth#migration-guide-v127
+ * @see https://github.com/NoeFabris/opencode-antigravity-auth#models
 */
 export const ANTIGRAVITY_PROVIDER_CONFIG = {
  google: {
    name: "Google",
    models: {
-      "antigravity-gemini-3-pro-high": {
-        name: "Gemini 3 Pro High (Antigravity)",
-        thinking: true,
-        attachment: true,
-        limit: { context: 1048576, output: 65535 },
-        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
-      },
-      "antigravity-gemini-3-pro-low": {
-        name: "Gemini 3 Pro Low (Antigravity)",
-        thinking: true,
-        attachment: true,
+      "antigravity-gemini-3-pro": {
+        name: "Gemini 3 Pro (Antigravity)",
        limit: { context: 1048576, output: 65535 },
        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          low: { thinkingLevel: "low" },
+          high: { thinkingLevel: "high" },
+        },
      },
      "antigravity-gemini-3-flash": {
        name: "Gemini 3 Flash (Antigravity)",
-        attachment: true,
        limit: { context: 1048576, output: 65536 },
        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          minimal: { thinkingLevel: "minimal" },
+          low: { thinkingLevel: "low" },
+          medium: { thinkingLevel: "medium" },
+          high: { thinkingLevel: "high" },
+        },
+      },
+      "antigravity-claude-sonnet-4-5": {
+        name: "Claude Sonnet 4.5 (Antigravity)",
+        limit: { context: 200000, output: 64000 },
+        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+      },
+      "antigravity-claude-sonnet-4-5-thinking": {
+        name: "Claude Sonnet 4.5 Thinking (Antigravity)",
+        limit: { context: 200000, output: 64000 },
+        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          low: { thinkingConfig: { thinkingBudget: 8192 } },
+          max: { thinkingConfig: { thinkingBudget: 32768 } },
+        },
+      },
+      "antigravity-claude-opus-4-5-thinking": {
+        name: "Claude Opus 4.5 Thinking (Antigravity)",
+        limit: { context: 200000, output: 64000 },
+        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          low: { thinkingConfig: { thinkingBudget: 8192 } },
+          max: { thinkingConfig: { thinkingBudget: 32768 } },
+        },
      },
    },
  },
--- a/src/cli/doctor/checks/dependencies.test.ts
+++ b/src/cli/doctor/checks/dependencies.test.ts
@@ -16,10 +16,10 @@ describe("dependencies check", () => {
  })

  describe("checkAstGrepNapi", () => {
-    it("returns dependency info", () => {
+    it("returns dependency info", async () => {
      // #given
      // #when checking ast-grep napi
-      const info = deps.checkAstGrepNapi()
+      const info = await deps.checkAstGrepNapi()

      // #then should return valid info
      expect(info.name).toBe("AST-Grep NAPI")
@@ -95,7 +95,7 @@ describe("dependencies check", () => {

    it("returns pass when installed", async () => {
      // #given napi installed
-      checkSpy = spyOn(deps, "checkAstGrepNapi").mockReturnValue({
+      checkSpy = spyOn(deps, "checkAstGrepNapi").mockResolvedValue({
        name: "AST-Grep NAPI",
        required: false,
        installed: true,
--- a/src/cli/doctor/checks/dependencies.ts
+++ b/src/cli/doctor/checks/dependencies.ts
@@ -56,9 +56,10 @@ export async function checkAstGrepCli(): Promise<DependencyInfo> {
  }
 }

-export function checkAstGrepNapi(): DependencyInfo {
+export async function checkAstGrepNapi(): Promise<DependencyInfo> {
+  // Try dynamic import first (works in bunx temporary environments)
  try {
-    require.resolve("@ast-grep/napi")
+    await import("@ast-grep/napi")
    return {
      name: "AST-Grep NAPI",
      required: false,
@@ -67,6 +68,28 @@ export function checkAstGrepNapi(): DependencyInfo {
      path: null,
    }
  } catch {
+    // Fallback: check common installation paths
+    const { existsSync } = await import("fs")
+    const { join } = await import("path")
+    const { homedir } = await import("os")
+
+    const pathsToCheck = [
+      join(homedir(), ".config", "opencode", "node_modules", "@ast-grep", "napi"),
+      join(process.cwd(), "node_modules", "@ast-grep", "napi"),
+    ]
+
+    for (const napiPath of pathsToCheck) {
+      if (existsSync(napiPath)) {
+        return {
+          name: "AST-Grep NAPI",
+          required: false,
+          installed: true,
+          version: null,
+          path: napiPath,
+        }
+      }
+    }
+
    return {
      name: "AST-Grep NAPI",
      required: false,
@@ -127,7 +150,7 @@ export async function checkDependencyAstGrepCli(): Promise<CheckResult> {
 }

 export async function checkDependencyAstGrepNapi(): Promise<CheckResult> {
-  const info = checkAstGrepNapi()
+  const info = await checkAstGrepNapi()
  return dependencyToCheckResult(info, CHECK_NAMES[CHECK_IDS.DEP_AST_GREP_NAPI])
 }

--- a/src/cli/doctor/checks/model-resolution.test.ts
+++ b/src/cli/doctor/checks/model-resolution.test.ts
@@ -12,7 +12,7 @@ describe("model-resolution check", () => {
      const info = getModelResolutionInfo()

      // #then: Should have agent entries
-      const sisyphus = info.agents.find((a) => a.name === "Sisyphus")
+      const sisyphus = info.agents.find((a) => a.name === "sisyphus")
      expect(sisyphus).toBeDefined()
      expect(sisyphus!.requirement.fallbackChain[0]?.model).toBe("claude-opus-4-5")
      expect(sisyphus!.requirement.fallbackChain[0]?.providers).toContain("anthropic")
@@ -27,7 +27,7 @@ describe("model-resolution check", () => {
      // #then: Should have category entries
      const visual = info.categories.find((c) => c.name === "visual-engineering")
      expect(visual).toBeDefined()
-      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro-preview")
+      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
      expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
    })
  })
@@ -84,7 +84,7 @@ describe("model-resolution check", () => {
      const info = getModelResolutionInfoWithOverrides(mockConfig)

      // #then: Should show provider fallback chain
-      const sisyphus = info.agents.find((a) => a.name === "Sisyphus")
+      const sisyphus = info.agents.find((a) => a.name === "sisyphus")
      expect(sisyphus).toBeDefined()
      expect(sisyphus!.userOverride).toBeUndefined()
      expect(sisyphus!.effectiveResolution).toContain("Provider fallback:")
@@ -97,13 +97,14 @@ describe("model-resolution check", () => {
    // #when: Running the model resolution check
    // #then: Returns pass with details showing resolution flow

-    it("returns pass status with agent and category counts", async () => {
+    it("returns pass or warn status with agent and category counts", async () => {
      const { checkModelResolution } = await import("./model-resolution")

      const result = await checkModelResolution()

-      // #then: Should pass and show counts
-      expect(result.status).toBe("pass")
+      // #then: Should pass (with cache) or warn (no cache) and show counts
+      // In CI without model cache, status is "warn"; locally with cache, status is "pass"
+      expect(["pass", "warn"]).toContain(result.status)
      expect(result.message).toMatch(/\d+ agents?, \d+ categories?/)
    })

@@ -115,8 +116,9 @@ describe("model-resolution check", () => {
      // #then: Details should contain agent/category resolution info
      expect(result.details).toBeDefined()
      expect(result.details!.length).toBeGreaterThan(0)
-      // Should have Current Models header and sections
-      expect(result.details!.some((d) => d.includes("Current Models"))).toBe(true)
+      // Should have Available Models and Configured Models headers
+      expect(result.details!.some((d) => d.includes("Available Models"))).toBe(true)
+      expect(result.details!.some((d) => d.includes("Configured Models"))).toBe(true)
      expect(result.details!.some((d) => d.includes("Agents:"))).toBe(true)
      expect(result.details!.some((d) => d.includes("Categories:"))).toBe(true)
      // Should have legend
--- a/src/cli/doctor/checks/model-resolution.ts
+++ b/src/cli/doctor/checks/model-resolution.ts
@@ -1,4 +1,4 @@
-import { readFileSync } from "node:fs"
+import { readFileSync, existsSync } from "node:fs"
 import type { CheckResult, CheckDefinition } from "../types"
 import { CHECK_IDS, CHECK_NAMES } from "../constants"
 import { parseJsonc, detectConfigFile } from "../../../shared"
@@ -10,6 +10,38 @@ import {
 import { homedir } from "node:os"
 import { join } from "node:path"

+function getOpenCodeCacheDir(): string {
+  const xdgCache = process.env.XDG_CACHE_HOME
+  if (xdgCache) return join(xdgCache, "opencode")
+  return join(homedir(), ".cache", "opencode")
+}
+
+function loadAvailableModels(): { providers: string[]; modelCount: number; cacheExists: boolean } {
+  const cacheFile = join(getOpenCodeCacheDir(), "models.json")
+  
+  if (!existsSync(cacheFile)) {
+    return { providers: [], modelCount: 0, cacheExists: false }
+  }
+
+  try {
+    const content = readFileSync(cacheFile, "utf-8")
+    const data = JSON.parse(content) as Record<string, { models?: Record<string, unknown> }>
+    
+    const providers = Object.keys(data)
+    let modelCount = 0
+    for (const providerId of providers) {
+      const models = data[providerId]?.models
+      if (models && typeof models === "object") {
+        modelCount += Object.keys(models).length
+      }
+    }
+    
+    return { providers, modelCount, cacheExists: true }
+  } catch {
+    return { providers: [], modelCount: 0, cacheExists: false }
+  }
+}
+
 const PACKAGE_NAME = "oh-my-opencode"
 const USER_CONFIG_DIR = join(homedir(), ".config", "opencode")
 const USER_CONFIG_BASE = join(USER_CONFIG_DIR, PACKAGE_NAME)
@@ -155,10 +187,30 @@ function getEffectiveVariant(requirement: ModelRequirement): string | undefined
  return firstEntry?.variant ?? requirement.variant
 }

-function buildDetailsArray(info: ModelResolutionInfo): string[] {
+interface AvailableModelsInfo {
+  providers: string[]
+  modelCount: number
+  cacheExists: boolean
+}
+
+function buildDetailsArray(info: ModelResolutionInfo, available: AvailableModelsInfo): string[] {
  const details: string[] = []

-  details.push("═══ Current Models ═══")
+  details.push("═══ Available Models (from cache) ═══")
+  details.push("")
+  if (available.cacheExists) {
+    details.push(`  Providers in cache: ${available.providers.length}`)
+    details.push(`  Sample: ${available.providers.slice(0, 6).join(", ")}${available.providers.length > 6 ? "..." : ""}`)
+    details.push(`  Total models: ${available.modelCount}`)
+    details.push(`  Cache: ~/.cache/opencode/models.json`)
+    details.push(`  ℹ Runtime: only connected providers used`)
+    details.push(`  Refresh: opencode models --refresh`)
+  } else {
+    details.push("  ⚠ Cache not found. Run 'opencode' to populate.")
+  }
+  details.push("")
+
+  details.push("═══ Configured Models ═══")
  details.push("")
  details.push("Agents:")
  for (const agent of info.agents) {
@@ -182,6 +234,7 @@ function buildDetailsArray(info: ModelResolutionInfo): string[] {
 export async function checkModelResolution(): Promise<CheckResult> {
  const config = loadConfig() ?? {}
  const info = getModelResolutionInfoWithOverrides(config)
+  const available = loadAvailableModels()

  const agentCount = info.agents.length
  const categoryCount = info.categories.length
@@ -190,12 +243,13 @@ export async function checkModelResolution(): Promise<CheckResult> {
  const totalOverrides = agentOverrides + categoryOverrides

  const overrideNote = totalOverrides > 0 ? ` (${totalOverrides} override${totalOverrides > 1 ? "s" : ""})` : ""
+  const cacheNote = available.cacheExists ? `, ${available.modelCount} available` : ", cache not found"

  return {
    name: CHECK_NAMES[CHECK_IDS.MODEL_RESOLUTION],
-    status: "pass",
-    message: `${agentCount} agents, ${categoryCount} categories${overrideNote}`,
-    details: buildDetailsArray(info),
+    status: available.cacheExists ? "pass" : "warn",
+    message: `${agentCount} agents, ${categoryCount} categories${overrideNote}${cacheNote}`,
+    details: buildDetailsArray(info, available),
  }
 }

--- a/src/cli/doctor/checks/plugin.ts
+++ b/src/cli/doctor/checks/plugin.ts
@@ -22,6 +22,9 @@ function findPluginEntry(plugins: string[]): { entry: string; isPinned: boolean;
      const version = isPinned ? plugin.split("@")[1] : null
      return { entry: plugin, isPinned, version }
    }
+    if (plugin.startsWith("file://") && plugin.includes(PACKAGE_NAME)) {
+      return { entry: plugin, isPinned: false, version: "local-dev" }
+    }
  }
  return null
 }
--- a/src/cli/index.test.ts
+++ b/src/cli/index.test.ts
@@ -0,0 +1,17 @@
+import { describe, it, expect } from "bun:test"
+import packageJson from "../../package.json" with { type: "json" }
+
+describe("CLI version", () => {
+  it("reads version from package.json as valid semver", () => {
+    //#given
+    const semverRegex = /^\d+\.\d+\.\d+(-[\w.]+)?$/
+
+    //#when
+    const version = packageJson.version
+
+    //#then
+    expect(version).toMatch(semverRegex)
+    expect(typeof version).toBe("string")
+    expect(version.length).toBeGreaterThan(0)
+  })
+})
--- a/src/cli/install.ts
+++ b/src/cli/install.ts
@@ -44,7 +44,7 @@ function formatConfigSummary(config: InstallConfig): string {
  lines.push(formatProvider("Gemini", config.hasGemini))
  lines.push(formatProvider("GitHub Copilot", config.hasCopilot, "fallback"))
  lines.push(formatProvider("OpenCode Zen", config.hasOpencodeZen, "opencode/ models"))
-  lines.push(formatProvider("Z.ai Coding Plan", config.hasZaiCodingPlan, "Librarian: glm-4.7"))
+  lines.push(formatProvider("Z.ai Coding Plan", config.hasZaiCodingPlan, "Librarian/Multimodal"))

  lines.push("")
  lines.push(color.dim("─".repeat(40)))
@@ -178,7 +178,7 @@ async function runTuiMode(detected: DetectedConfig): Promise<InstallConfig | nul
  const claude = await p.select({
    message: "Do you have a Claude Pro/Max subscription?",
    options: [
-      { value: "no" as const, label: "No", hint: "Will use opencode/glm-4.7-free as fallback" },
+      { value: "no" as const, label: "No", hint: "Will use opencode/big-pickle as fallback" },
      { value: "yes" as const, label: "Yes (standard)", hint: "Claude Opus 4.5 for orchestration" },
      { value: "max20" as const, label: "Yes (max20 mode)", hint: "Full power with Claude Sonnet 4.5 for Librarian" },
    ],
@@ -250,7 +250,7 @@ async function runTuiMode(detected: DetectedConfig): Promise<InstallConfig | nul
    message: "Do you have a Z.ai Coding Plan subscription?",
    options: [
      { value: "no" as const, label: "No", hint: "Will use other configured providers" },
-      { value: "yes" as const, label: "Yes", hint: "zai-coding-plan/glm-4.7 for Librarian" },
+      { value: "yes" as const, label: "Yes", hint: "Fallback for Librarian and Multimodal Looker" },
    ],
    initialValue: initial.zaiCodingPlan,
  })
@@ -363,7 +363,7 @@ async function runNonTuiInstall(args: InstallArgs): Promise<number> {
  }

  if (!config.hasClaude && !config.hasOpenAI && !config.hasGemini && !config.hasCopilot && !config.hasOpencodeZen) {
-    printWarning("No model providers configured. Using opencode/glm-4.7-free as fallback.")
+    printWarning("No model providers configured. Using opencode/big-pickle as fallback.")
  }

  console.log(`${SYMBOLS.star} ${color.bold(color.green(isUpdate ? "Configuration updated!" : "Installation complete!"))}`)
@@ -480,7 +480,7 @@ export async function install(args: InstallArgs): Promise<number> {
  }

  if (!config.hasClaude && !config.hasOpenAI && !config.hasGemini && !config.hasCopilot && !config.hasOpencodeZen) {
-    p.log.warn("No model providers configured. Using opencode/glm-4.7-free as fallback.")
+    p.log.warn("No model providers configured. Using opencode/big-pickle as fallback.")
  }

  p.note(formatConfigSummary(config), isUpdate ? "Updated Configuration" : "Installation Complete")
--- a/src/cli/model-fallback.test.ts
+++ b/src/cli/model-fallback.test.ts
@@ -310,19 +310,19 @@ describe("generateModelConfig", () => {
  })

  describe("explore agent special cases", () => {
-    test("explore uses Gemini flash when Gemini available", () => {
-      // #given Gemini is available
+    test("explore uses gpt-5-nano when only Gemini available (no Claude)", () => {
+      // #given only Gemini is available (no Claude)
      const config = createConfig({ hasGemini: true })

      // #when generateModelConfig is called
      const result = generateModelConfig(config)

-      // #then explore should use gemini-3-flash-preview
-      expect(result.agents?.explore?.model).toBe("google/gemini-3-flash-preview")
+      // #then explore should use gpt-5-nano (Claude haiku not available)
+      expect(result.agents?.explore?.model).toBe("opencode/gpt-5-nano")
    })

-    test("explore uses Claude haiku when Claude + isMax20 but no Gemini", () => {
-      // #given Claude is available with Max 20 plan but no Gemini
+    test("explore uses Claude haiku when Claude available", () => {
+      // #given Claude is available
      const config = createConfig({ hasClaude: true, isMax20: true })

      // #when generateModelConfig is called
@@ -332,26 +332,37 @@ describe("generateModelConfig", () => {
      expect(result.agents?.explore?.model).toBe("anthropic/claude-haiku-4-5")
    })

-    test("explore uses grok-code when Claude without isMax20 and no Gemini", () => {
-      // #given Claude is available without Max 20 plan and no Gemini
+    test("explore uses Claude haiku regardless of isMax20 flag", () => {
+      // #given Claude is available without Max 20 plan
      const config = createConfig({ hasClaude: true, isMax20: false })

      // #when generateModelConfig is called
      const result = generateModelConfig(config)

-      // #then explore should use grok-code
-      expect(result.agents?.explore?.model).toBe("opencode/grok-code")
+      // #then explore should use claude-haiku-4-5 (isMax20 doesn't affect explore)
+      expect(result.agents?.explore?.model).toBe("anthropic/claude-haiku-4-5")
    })

-    test("explore uses grok-code when only OpenAI available", () => {
+    test("explore uses gpt-5-nano when only OpenAI available", () => {
      // #given only OpenAI is available
      const config = createConfig({ hasOpenAI: true })

      // #when generateModelConfig is called
      const result = generateModelConfig(config)

-      // #then explore should use grok-code (fallback)
-      expect(result.agents?.explore?.model).toBe("opencode/grok-code")
+      // #then explore should use gpt-5-nano (fallback)
+      expect(result.agents?.explore?.model).toBe("opencode/gpt-5-nano")
+    })
+
+    test("explore uses gpt-5-mini when only Copilot available", () => {
+      // #given only Copilot is available
+      const config = createConfig({ hasCopilot: true })
+
+      // #when generateModelConfig is called
+      const result = generateModelConfig(config)
+
+      // #then explore should use gpt-5-mini (Copilot fallback)
+      expect(result.agents?.explore?.model).toBe("github-copilot/gpt-5-mini")
    })
  })

@@ -364,7 +375,7 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then Sisyphus should use opus (sisyphus-high)
-      expect(result.agents?.Sisyphus?.model).toBe("anthropic/claude-opus-4-5")
+      expect(result.agents?.sisyphus?.model).toBe("anthropic/claude-opus-4-5")
    })

    test("Sisyphus uses sisyphus-low capability when isMax20 is false", () => {
@@ -375,7 +386,7 @@ describe("generateModelConfig", () => {
      const result = generateModelConfig(config)

      // #then Sisyphus should use sonnet (sisyphus-low)
-      expect(result.agents?.Sisyphus?.model).toBe("anthropic/claude-sonnet-4-5")
+      expect(result.agents?.sisyphus?.model).toBe("anthropic/claude-sonnet-4-5")
    })
  })

--- a/src/cli/model-fallback.ts
+++ b/src/cli/model-fallback.ts
@@ -36,7 +36,7 @@ export interface GeneratedOmoConfig {

 const ZAI_MODEL = "zai-coding-plan/glm-4.7"

-const ULTIMATE_FALLBACK = "opencode/glm-4.7-free"
+const ULTIMATE_FALLBACK = "opencode/big-pickle"
 const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json"

 function toProviderAvailability(config: InstallConfig): ProviderAvailability {
@@ -97,13 +97,13 @@ function resolveModelFromChain(
 function getSisyphusFallbackChain(isMaxPlan: boolean): FallbackEntry[] {
  // Sisyphus uses opus when isMaxPlan, sonnet otherwise
  if (isMaxPlan) {
-    return AGENT_MODEL_REQUIREMENTS.Sisyphus.fallbackChain
+    return AGENT_MODEL_REQUIREMENTS.sisyphus.fallbackChain
  }
  // For non-max plan, use sonnet instead of opus
  return [
    { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
    { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-    { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro-preview" },
+    { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
  ]
 }

@@ -139,21 +139,23 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
      continue
    }

-    // Special case: explore has custom Gemini → Claude → Grok logic
+    // Special case: explore uses Claude haiku → GitHub Copilot gpt-5-mini → OpenCode gpt-5-nano
    if (role === "explore") {
-      if (avail.native.gemini) {
-        agents[role] = { model: "google/gemini-3-flash-preview" }
-      } else if (avail.native.claude && avail.isMaxPlan) {
+      if (avail.native.claude) {
        agents[role] = { model: "anthropic/claude-haiku-4-5" }
+      } else if (avail.opencodeZen) {
+        agents[role] = { model: "opencode/claude-haiku-4-5" }
+      } else if (avail.copilot) {
+        agents[role] = { model: "github-copilot/gpt-5-mini" }
      } else {
-        agents[role] = { model: "opencode/grok-code" }
+        agents[role] = { model: "opencode/gpt-5-nano" }
      }
      continue
    }

    // Special case: Sisyphus uses different fallbackChain based on isMaxPlan
    const fallbackChain =
-      role === "Sisyphus" ? getSisyphusFallbackChain(avail.isMaxPlan) : req.fallbackChain
+      role === "sisyphus" ? getSisyphusFallbackChain(avail.isMaxPlan) : req.fallbackChain

    const resolved = resolveModelFromChain(fallbackChain, avail)
    if (resolved) {
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -9,6 +9,8 @@ export {
  SisyphusAgentConfigSchema,
  ExperimentalConfigSchema,
  RalphLoopConfigSchema,
+  TmuxConfigSchema,
+  TmuxLayoutSchema,
 } from "./schema"

 export type {
@@ -23,4 +25,6 @@ export type {
  ExperimentalConfig,
  DynamicContextPruningConfig,
  RalphLoopConfig,
+  TmuxConfig,
+  TmuxLayout,
 } from "./schema"
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -1,5 +1,12 @@
 import { describe, expect, test } from "bun:test"
-import { AgentOverrideConfigSchema, BuiltinCategoryNameSchema, CategoryConfigSchema, OhMyOpenCodeConfigSchema } from "./schema"
+import {
+  AgentOverrideConfigSchema,
+  BrowserAutomationConfigSchema,
+  BrowserAutomationProviderSchema,
+  BuiltinCategoryNameSchema,
+  CategoryConfigSchema,
+  OhMyOpenCodeConfigSchema,
+} from "./schema"

 describe("disabled_mcps schema", () => {
  test("should accept built-in MCP names", () => {
@@ -345,6 +352,20 @@ describe("CategoryConfigSchema", () => {
    }
  })

+  test("accepts reasoningEffort as optional string with xhigh", () => {
+    // #given
+    const config = { reasoningEffort: "xhigh" }
+
+    // #when
+    const result = CategoryConfigSchema.safeParse(config)
+
+    // #then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.reasoningEffort).toBe("xhigh")
+    }
+  })
+
  test("rejects non-string variant", () => {
    // #given
    const config = { model: "openai/gpt-5.2", variant: 123 }
@@ -375,7 +396,7 @@ describe("Sisyphus-Junior agent override", () => {
    // #given
    const config = {
      agents: {
-        "Sisyphus-Junior": {
+        "sisyphus-junior": {
          model: "openai/gpt-5.2",
          temperature: 0.2,
        },
@@ -388,18 +409,18 @@ describe("Sisyphus-Junior agent override", () => {
    // #then
    expect(result.success).toBe(true)
    if (result.success) {
-      expect(result.data.agents?.["Sisyphus-Junior"]).toBeDefined()
-      expect(result.data.agents?.["Sisyphus-Junior"]?.model).toBe("openai/gpt-5.2")
-      expect(result.data.agents?.["Sisyphus-Junior"]?.temperature).toBe(0.2)
+      expect(result.data.agents?.["sisyphus-junior"]).toBeDefined()
+      expect(result.data.agents?.["sisyphus-junior"]?.model).toBe("openai/gpt-5.2")
+      expect(result.data.agents?.["sisyphus-junior"]?.temperature).toBe(0.2)
    }
  })

-  test("schema accepts Sisyphus-Junior with prompt_append", () => {
+  test("schema accepts sisyphus-junior with prompt_append", () => {
    // #given
    const config = {
      agents: {
-        "Sisyphus-Junior": {
-          prompt_append: "Additional instructions for Sisyphus-Junior",
+        "sisyphus-junior": {
+          prompt_append: "Additional instructions for sisyphus-junior",
        },
      },
    }
@@ -410,17 +431,17 @@ describe("Sisyphus-Junior agent override", () => {
    // #then
    expect(result.success).toBe(true)
    if (result.success) {
-      expect(result.data.agents?.["Sisyphus-Junior"]?.prompt_append).toBe(
-        "Additional instructions for Sisyphus-Junior"
+      expect(result.data.agents?.["sisyphus-junior"]?.prompt_append).toBe(
+        "Additional instructions for sisyphus-junior"
      )
    }
  })

-  test("schema accepts Sisyphus-Junior with tools override", () => {
+  test("schema accepts sisyphus-junior with tools override", () => {
    // #given
    const config = {
      agents: {
-        "Sisyphus-Junior": {
+        "sisyphus-junior": {
          tools: {
            read: true,
            write: false,
@@ -435,10 +456,153 @@ describe("Sisyphus-Junior agent override", () => {
    // #then
    expect(result.success).toBe(true)
    if (result.success) {
-      expect(result.data.agents?.["Sisyphus-Junior"]?.tools).toEqual({
+      expect(result.data.agents?.["sisyphus-junior"]?.tools).toEqual({
        read: true,
        write: false,
      })
    }
  })
+
+  test("schema accepts lowercase agent names (sisyphus, atlas, prometheus)", () => {
+    // #given
+    const config = {
+      agents: {
+        sisyphus: {
+          temperature: 0.1,
+        },
+        atlas: {
+          temperature: 0.2,
+        },
+        prometheus: {
+          temperature: 0.3,
+        },
+      },
+    }
+
+    // #when
+    const result = OhMyOpenCodeConfigSchema.safeParse(config)
+
+    // #then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.agents?.sisyphus?.temperature).toBe(0.1)
+      expect(result.data.agents?.atlas?.temperature).toBe(0.2)
+      expect(result.data.agents?.prometheus?.temperature).toBe(0.3)
+    }
+  })
+
+  test("schema accepts lowercase metis and momus agent names", () => {
+    // #given
+    const config = {
+      agents: {
+        metis: {
+          category: "ultrabrain",
+        },
+        momus: {
+          category: "quick",
+        },
+      },
+    }
+
+    // #when
+    const result = OhMyOpenCodeConfigSchema.safeParse(config)
+
+    // #then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.agents?.metis?.category).toBe("ultrabrain")
+      expect(result.data.agents?.momus?.category).toBe("quick")
+    }
+  })
+})
+
+describe("BrowserAutomationProviderSchema", () => {
+  test("accepts 'playwright' as valid provider", () => {
+    // #given
+    const input = "playwright"
+
+    // #when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data).toBe("playwright")
+  })
+
+  test("accepts 'agent-browser' as valid provider", () => {
+    // #given
+    const input = "agent-browser"
+
+    // #when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data).toBe("agent-browser")
+  })
+
+  test("rejects invalid provider", () => {
+    // #given
+    const input = "invalid-provider"
+
+    // #when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(false)
+  })
+})
+
+describe("BrowserAutomationConfigSchema", () => {
+  test("defaults provider to 'playwright' when not specified", () => {
+    // #given
+    const input = {}
+
+    // #when
+    const result = BrowserAutomationConfigSchema.parse(input)
+
+    // #then
+    expect(result.provider).toBe("playwright")
+  })
+
+  test("accepts agent-browser provider", () => {
+    // #given
+    const input = { provider: "agent-browser" }
+
+    // #when
+    const result = BrowserAutomationConfigSchema.parse(input)
+
+    // #then
+    expect(result.provider).toBe("agent-browser")
+  })
+})
+
+describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
+  test("accepts browser_automation_engine config", () => {
+    // #given
+    const input = {
+      browser_automation_engine: {
+        provider: "agent-browser",
+      },
+    }
+
+    // #when
+    const result = OhMyOpenCodeConfigSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data?.browser_automation_engine?.provider).toBe("agent-browser")
+  })
+
+  test("accepts config without browser_automation_engine", () => {
+    // #given
+    const input = {}
+
+    // #when
+    const result = OhMyOpenCodeConfigSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data?.browser_automation_engine).toBeUndefined()
+  })
 })
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -17,18 +17,20 @@ const AgentPermissionSchema = z.object({
 })

 export const BuiltinAgentNameSchema = z.enum([
-  "Sisyphus",
+  "sisyphus",
+  "prometheus",
  "oracle",
  "librarian",
  "explore",
  "multimodal-looker",
-  "Metis (Plan Consultant)",
-  "Momus (Plan Reviewer)",
-  "Atlas",
+  "metis",
+  "momus",
+  "atlas",
 ])

 export const BuiltinSkillNameSchema = z.enum([
  "playwright",
+  "agent-browser",
  "frontend-ui-ux",
  "git-master",
 ])
@@ -36,17 +38,17 @@ export const BuiltinSkillNameSchema = z.enum([
 export const OverridableAgentNameSchema = z.enum([
  "build",
  "plan",
-  "Sisyphus",
-  "Sisyphus-Junior",
+  "sisyphus",
+  "sisyphus-junior",
  "OpenCode-Builder",
-  "Prometheus (Planner)",
-  "Metis (Plan Consultant)",
-  "Momus (Plan Reviewer)",
+  "prometheus",
+  "metis",
+  "momus",
  "oracle",
  "librarian",
  "explore",
  "multimodal-looker",
-  "Atlas",
+  "atlas",
 ])

 export const AgentNameSchema = BuiltinAgentNameSchema
@@ -75,6 +77,7 @@ export const HookNameSchema = z.enum([

  "thinking-block-validator",
  "ralph-loop",
+  "category-skill-reminder",

  "compaction-context-injector",
  "claude-code-hooks",
@@ -82,6 +85,7 @@ export const HookNameSchema = z.enum([
  "edit-error-recovery",
  "delegate-task-retry",
  "prometheus-md-only",
+  "sisyphus-junior-notepad",
  "start-work",
  "atlas",
 ])
@@ -112,22 +116,35 @@ export const AgentOverrideConfigSchema = z.object({
    .regex(/^#[0-9A-Fa-f]{6}$/)
    .optional(),
  permission: AgentPermissionSchema.optional(),
+  /** Maximum tokens for response. Passed directly to OpenCode SDK. */
+  maxTokens: z.number().optional(),
+  /** Extended thinking configuration (Anthropic). Overrides category and default settings. */
+  thinking: z.object({
+    type: z.enum(["enabled", "disabled"]),
+    budgetTokens: z.number().optional(),
+  }).optional(),
+  /** Reasoning effort level (OpenAI). Overrides category and default settings. */
+  reasoningEffort: z.enum(["low", "medium", "high", "xhigh"]).optional(),
+  /** Text verbosity level. */
+  textVerbosity: z.enum(["low", "medium", "high"]).optional(),
+  /** Provider-specific options. Passed directly to OpenCode SDK. */
+  providerOptions: z.record(z.string(), z.unknown()).optional(),
 })

 export const AgentOverridesSchema = z.object({
  build: AgentOverrideConfigSchema.optional(),
  plan: AgentOverrideConfigSchema.optional(),
-  Sisyphus: AgentOverrideConfigSchema.optional(),
-  "Sisyphus-Junior": AgentOverrideConfigSchema.optional(),
+  sisyphus: AgentOverrideConfigSchema.optional(),
+  "sisyphus-junior": AgentOverrideConfigSchema.optional(),
  "OpenCode-Builder": AgentOverrideConfigSchema.optional(),
-  "Prometheus (Planner)": AgentOverrideConfigSchema.optional(),
-  "Metis (Plan Consultant)": AgentOverrideConfigSchema.optional(),
-  "Momus (Plan Reviewer)": AgentOverrideConfigSchema.optional(),
+  prometheus: AgentOverrideConfigSchema.optional(),
+  metis: AgentOverrideConfigSchema.optional(),
+  momus: AgentOverrideConfigSchema.optional(),
  oracle: AgentOverrideConfigSchema.optional(),
  librarian: AgentOverrideConfigSchema.optional(),
  explore: AgentOverrideConfigSchema.optional(),
  "multimodal-looker": AgentOverrideConfigSchema.optional(),
-  Atlas: AgentOverrideConfigSchema.optional(),
+  atlas: AgentOverrideConfigSchema.optional(),
 })

 export const ClaudeCodeConfigSchema = z.object({
@@ -159,7 +176,7 @@ export const CategoryConfigSchema = z.object({
    type: z.enum(["enabled", "disabled"]),
    budgetTokens: z.number().optional(),
  }).optional(),
-  reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
+  reasoningEffort: z.enum(["low", "medium", "high", "xhigh"]).optional(),
  textVerbosity: z.enum(["low", "medium", "high"]).optional(),
  tools: z.record(z.string(), z.boolean()).optional(),
  prompt_append: z.string().optional(),
@@ -296,6 +313,32 @@ export const GitMasterConfigSchema = z.object({
  include_co_authored_by: z.boolean().default(true),
 })

+export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser"])
+
+export const BrowserAutomationConfigSchema = z.object({
+  /**
+   * Browser automation provider to use for the "playwright" skill.
+   * - "playwright": Uses Playwright MCP server (@playwright/mcp) - default
+   * - "agent-browser": Uses Vercel's agent-browser CLI (requires: bun add -g agent-browser)
+   */
+  provider: BrowserAutomationProviderSchema.default("playwright"),
+})
+
+export const TmuxLayoutSchema = z.enum([
+  'main-horizontal',  // main pane top, agent panes bottom stack
+  'main-vertical',    // main pane left, agent panes right stack (default)
+  'tiled',            // all panes same size grid
+  'even-horizontal',  // all panes horizontal row
+  'even-vertical',    // all panes vertical stack
+])
+
+export const TmuxConfigSchema = z.object({
+  enabled: z.boolean().default(false),
+  layout: TmuxLayoutSchema.default('main-vertical'),
+  main_pane_size: z.number().min(20).max(80).default(60),
+  main_pane_min_width: z.number().min(40).default(120),
+  agent_pane_min_width: z.number().min(20).default(40),
+})
 export const OhMyOpenCodeConfigSchema = z.object({
  $schema: z.string().optional(),
  disabled_mcps: z.array(AnyMcpNameSchema).optional(),
@@ -315,6 +358,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
  background_task: BackgroundTaskConfigSchema.optional(),
  notification: NotificationConfigSchema.optional(),
  git_master: GitMasterConfigSchema.optional(),
+  browser_automation_engine: BrowserAutomationConfigSchema.optional(),
+  tmux: TmuxConfigSchema.optional(),
 })

 export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
@@ -337,5 +382,9 @@ export type CategoryConfig = z.infer<typeof CategoryConfigSchema>
 export type CategoriesConfig = z.infer<typeof CategoriesConfigSchema>
 export type BuiltinCategoryName = z.infer<typeof BuiltinCategoryNameSchema>
 export type GitMasterConfig = z.infer<typeof GitMasterConfigSchema>
+export type BrowserAutomationProvider = z.infer<typeof BrowserAutomationProviderSchema>
+export type BrowserAutomationConfig = z.infer<typeof BrowserAutomationConfigSchema>
+export type TmuxConfig = z.infer<typeof TmuxConfigSchema>
+export type TmuxLayout = z.infer<typeof TmuxLayoutSchema>

 export { AnyMcpNameSchema, type AnyMcpName, McpNameSchema, type McpName } from "../mcp/types"
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -2,31 +2,31 @@

 ## OVERVIEW

-Core feature modules + Claude Code compatibility layer. Background agents, skill MCP, builtin skills/commands, 5 loaders.
+Core feature modules + Claude Code compatibility layer. Orchestrates background agents, skill MCPs, builtin skills/commands, and 16 feature modules.

 ## STRUCTURE

 ```
 features/
-├── background-agent/           # Task lifecycle (1335 lines)
+├── background-agent/           # Task lifecycle (1377 lines)
 │   ├── manager.ts              # Launch → poll → complete
-│   ├── concurrency.ts          # Per-provider limits
-│   └── types.ts                # BackgroundTask, LaunchInput
-├── skill-mcp-manager/          # MCP client lifecycle
-│   ├── manager.ts              # Lazy loading, cleanup
-│   └── types.ts                # SkillMcpConfig
-├── builtin-skills/             # Playwright, git-master, frontend-ui-ux
-│   └── skills.ts               # 1203 lines
-├── builtin-commands/           # ralph-loop, refactor, init-deep
+│   └── concurrency.ts          # Per-provider limits
+├── builtin-skills/             # Core skills (1729 lines)
+│   └── skills.ts               # agent-browser, dev-browser, frontend-ui-ux, git-master, typescript-programmer
+├── builtin-commands/           # ralph-loop, refactor, ulw-loop, init-deep, start-work, cancel-ralph
 ├── claude-code-agent-loader/   # ~/.claude/agents/*.md
 ├── claude-code-command-loader/ # ~/.claude/commands/*.md
-├── claude-code-mcp-loader/     # .mcp.json
+├── claude-code-mcp-loader/     # .mcp.json with ${VAR} expansion
 ├── claude-code-plugin-loader/  # installed_plugins.json
 ├── claude-code-session-state/  # Session persistence
 ├── opencode-skill-loader/      # Skills from 6 directories
 ├── context-injector/           # AGENTS.md/README.md injection
 ├── boulder-state/              # Todo state persistence
-└── hook-message-injector/      # Message injection
+├── hook-message-injector/      # Message injection
+├── task-toast-manager/         # Background task notifications
+├── skill-mcp-manager/          # MCP client lifecycle (520 lines)
+├── tmux-subagent/              # Tmux session management
+└── ... (16 modules total)
 ```

 ## LOADER PRIORITY
@@ -41,8 +41,9 @@ features/

 - **Lifecycle**: `launch` → `poll` (2s) → `complete`
 - **Stability**: 3 consecutive polls = idle
- **Concurrency**: Per-provider/model limits
+- **Concurrency**: Per-provider/model limits via `ConcurrencyManager`
 - **Cleanup**: 30m TTL, 3m stale timeout
+- **State**: Per-session Maps, cleaned on `session.deleted`

 ## SKILL MCP

@@ -55,3 +56,4 @@ features/
 - **Sequential delegation**: Use `delegate_task` parallel
 - **Trust self-reports**: ALWAYS verify
 - **Main thread blocks**: No heavy I/O in loader init
+- **Direct state mutation**: Use managers for boulder/session state
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -776,7 +776,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
      parentModel: { providerID: "old", modelID: "old-model" },
    }
    const currentMessage: CurrentMessage = {
-      agent: "Sisyphus",
+      agent: "sisyphus",
      model: { providerID: "anthropic", modelID: "claude-opus-4-5" },
    }

@@ -784,7 +784,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    const promptBody = buildNotificationPromptBody(task, currentMessage)

    // #then - uses currentMessage values, not task.parentModel/parentAgent
-    expect(promptBody.agent).toBe("Sisyphus")
+    expect(promptBody.agent).toBe("sisyphus")
    expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-5" })
  })

@@ -827,11 +827,11 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
      status: "completed",
      startedAt: new Date(),
      completedAt: new Date(),
-      parentAgent: "Sisyphus",
+      parentAgent: "sisyphus",
      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
    }
    const currentMessage: CurrentMessage = {
-      agent: "Sisyphus",
+      agent: "sisyphus",
      model: { providerID: "anthropic" },
    }

@@ -839,7 +839,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    const promptBody = buildNotificationPromptBody(task, currentMessage)

    // #then - model not passed due to incomplete data
-    expect(promptBody.agent).toBe("Sisyphus")
+    expect(promptBody.agent).toBe("sisyphus")
    expect("model" in promptBody).toBe(false)
  })

@@ -856,7 +856,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
      status: "completed",
      startedAt: new Date(),
      completedAt: new Date(),
-      parentAgent: "Sisyphus",
+      parentAgent: "sisyphus",
      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
    }

@@ -864,7 +864,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    const promptBody = buildNotificationPromptBody(task, null)

    // #then - falls back to task.parentAgent, no model
-    expect(promptBody.agent).toBe("Sisyphus")
+    expect(promptBody.agent).toBe("sisyphus")
    expect("model" in promptBody).toBe(false)
  })
 })
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -7,7 +7,8 @@ import type {
 } from "./types"
 import { log, getAgentToolRestrictions } from "../../shared"
 import { ConcurrencyManager } from "./concurrency"
-import type { BackgroundTaskConfig } from "../../config/schema"
+import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
+import { isInsideTmux } from "../../shared/tmux"

 import { subagentSessions } from "../claude-code-session-state"
 import { getTaskToastManager } from "../task-toast-manager"
@@ -54,6 +55,14 @@ interface QueueItem {
  input: LaunchInput
 }

+export interface SubagentSessionCreatedEvent {
+  sessionID: string
+  parentID: string
+  title: string
+}
+
+export type OnSubagentSessionCreated = (event: SubagentSessionCreatedEvent) => Promise<void>
+
 export class BackgroundManager {
  private static cleanupManagers = new Set<BackgroundManager>()
  private static cleanupRegistered = false
@@ -68,12 +77,20 @@ export class BackgroundManager {
  private concurrencyManager: ConcurrencyManager
  private shutdownTriggered = false
  private config?: BackgroundTaskConfig
-
+  private tmuxEnabled: boolean
+  private onSubagentSessionCreated?: OnSubagentSessionCreated

  private queuesByKey: Map<string, QueueItem[]> = new Map()
  private processingKeys: Set<string> = new Set()

-  constructor(ctx: PluginInput, config?: BackgroundTaskConfig) {
+  constructor(
+    ctx: PluginInput,
+    config?: BackgroundTaskConfig,
+    options?: {
+      tmuxConfig?: TmuxConfig
+      onSubagentSessionCreated?: OnSubagentSessionCreated
+    }
+  ) {
    this.tasks = new Map()
    this.notifications = new Map()
    this.pendingByParent = new Map()
@@ -81,6 +98,8 @@ export class BackgroundManager {
    this.directory = ctx.directory
    this.concurrencyManager = new ConcurrencyManager(config)
    this.config = config
+    this.tmuxEnabled = options?.tmuxConfig?.enabled ?? false
+    this.onSubagentSessionCreated = options?.onSubagentSessionCreated
    this.registerProcessCleanup()
  }

@@ -222,6 +241,29 @@ export class BackgroundManager {
    const sessionID = createResult.data.id
    subagentSessions.add(sessionID)

+    log("[background-agent] tmux callback check", {
+      hasCallback: !!this.onSubagentSessionCreated,
+      tmuxEnabled: this.tmuxEnabled,
+      isInsideTmux: isInsideTmux(),
+      sessionID,
+      parentID: input.parentSessionID,
+    })
+
+    if (this.onSubagentSessionCreated && this.tmuxEnabled && isInsideTmux()) {
+      log("[background-agent] Invoking tmux callback NOW", { sessionID })
+      await this.onSubagentSessionCreated({
+        sessionID,
+        parentID: input.parentSessionID,
+        title: input.description,
+      }).catch((err) => {
+        log("[background-agent] Failed to spawn tmux pane:", err)
+      })
+      log("[background-agent] tmux callback completed, waiting 200ms")
+      await new Promise(r => setTimeout(r, 200))
+    } else {
+      log("[background-agent] SKIP tmux callback - conditions not met")
+    }
+
    // Update task to running state
    task.status = "running"
    task.startedAt = new Date()
@@ -263,6 +305,7 @@ export class BackgroundManager {
          task: false,
          delegate_task: false,
          call_omo_agent: true,
+          question: false,
        },
        parts: [{ type: "text", text: input.prompt }],
      },
@@ -509,6 +552,7 @@ export class BackgroundManager {
          task: false,
          delegate_task: false,
          call_omo_agent: true,
+          question: false,
        },
        parts: [{ type: "text", text: input.prompt }],
      },
--- a/src/features/builtin-commands/commands.ts
+++ b/src/features/builtin-commands/commands.ts
@@ -55,7 +55,7 @@ ${REFACTOR_TEMPLATE}
  },
  "start-work": {
    description: "(builtin) Start Sisyphus work session from Prometheus plan",
-    agent: "Atlas",
+    agent: "atlas",
    template: `<command-instruction>
 ${START_WORK_TEMPLATE}
 </command-instruction>
@@ -81,7 +81,7 @@ export function loadBuiltinCommands(
  for (const [name, definition] of Object.entries(BUILTIN_COMMAND_DEFINITIONS)) {
    if (!disabled.has(name as BuiltinCommandName)) {
      const { argumentHint: _argumentHint, ...openCodeCompatible } = definition
-      commands[name] = openCodeCompatible as CommandDefinition
+      commands[name] = { ...openCodeCompatible, name } as CommandDefinition
    }
  }

--- a/src/features/builtin-commands/templates/ralph-loop.ts
+++ b/src/features/builtin-commands/templates/ralph-loop.ts
@@ -17,7 +17,7 @@ export const RALPH_LOOP_TEMPLATE = `You are starting a Ralph Loop - a self-refer

 ## Exit Conditions

-1. **Completion**: Output \`<promise>DONE</promise>\` (or custom promise text) when fully complete
+1. **Completion**: Output your completion promise tag when fully complete
 2. **Max Iterations**: Loop stops automatically at limit
 3. **Cancel**: User runs \`/cancel-ralph\` command

--- a/src/features/builtin-commands/templates/start-work.ts
+++ b/src/features/builtin-commands/templates/start-work.ts
@@ -25,7 +25,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
   }
   \`\`\`

-5. **Read the plan file** and start executing tasks according to Orchestrator Sisyphus workflow
+5. **Read the plan file** and start executing tasks according to atlas workflow

 ## OUTPUT FORMAT

@@ -69,4 +69,4 @@ Reading plan and beginning execution...
 - The session_id is injected by the hook - use it directly
 - Always update boulder.json BEFORE starting work
 - Read the FULL plan file before delegating any tasks
- Follow Orchestrator Sisyphus delegation protocols (7-section format)`
+- Follow atlas delegation protocols (7-section format)`
--- a/src/features/builtin-skills/agent-browser/SKILL.md
+++ b/src/features/builtin-skills/agent-browser/SKILL.md
@@ -0,0 +1,336 @@
+---
+name: agent-browser
+description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
+---
+
+# Browser Automation with agent-browser
+
+## Quick start
+
+```bash
+agent-browser open <url>        # Navigate to page
+agent-browser snapshot -i       # Get interactive elements with refs
+agent-browser click @e1         # Click element by ref
+agent-browser fill @e2 "text"   # Fill input by ref
+agent-browser close             # Close browser
+```
+
+## Core workflow
+
+1. Navigate: `agent-browser open <url>`
+2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
+3. Interact using refs from the snapshot
+4. Re-snapshot after navigation or significant DOM changes
+
+## Commands
+
+### Navigation
+```bash
+agent-browser open <url>      # Navigate to URL
+agent-browser back            # Go back
+agent-browser forward         # Go forward
+agent-browser reload          # Reload page
+agent-browser close           # Close browser
+```
+
+### Snapshot (page analysis)
+```bash
+agent-browser snapshot            # Full accessibility tree
+agent-browser snapshot -i         # Interactive elements only (recommended)
+agent-browser snapshot -c         # Compact output
+agent-browser snapshot -d 3       # Limit depth to 3
+agent-browser snapshot -s "#main" # Scope to CSS selector
+```
+
+### Interactions (use @refs from snapshot)
+```bash
+agent-browser click @e1           # Click
+agent-browser dblclick @e1        # Double-click
+agent-browser focus @e1           # Focus element
+agent-browser fill @e2 "text"     # Clear and type
+agent-browser type @e2 "text"     # Type without clearing
+agent-browser press Enter         # Press key
+agent-browser press Control+a     # Key combination
+agent-browser keydown Shift       # Hold key down
+agent-browser keyup Shift         # Release key
+agent-browser hover @e1           # Hover
+agent-browser check @e1           # Check checkbox
+agent-browser uncheck @e1         # Uncheck checkbox
+agent-browser select @e1 "value"  # Select dropdown
+agent-browser scroll down 500     # Scroll page
+agent-browser scrollintoview @e1  # Scroll element into view
+agent-browser drag @e1 @e2        # Drag and drop
+agent-browser upload @e1 file.pdf # Upload files
+```
+
+### Get information
+```bash
+agent-browser get text @e1        # Get element text
+agent-browser get html @e1        # Get innerHTML
+agent-browser get value @e1       # Get input value
+agent-browser get attr @e1 href   # Get attribute
+agent-browser get title           # Get page title
+agent-browser get url             # Get current URL
+agent-browser get count ".item"   # Count matching elements
+agent-browser get box @e1         # Get bounding box
+```
+
+### Check state
+```bash
+agent-browser is visible @e1      # Check if visible
+agent-browser is enabled @e1      # Check if enabled
+agent-browser is checked @e1      # Check if checked
+```
+
+### Screenshots & PDF
+```bash
+agent-browser screenshot          # Screenshot to stdout
+agent-browser screenshot path.png # Save to file
+agent-browser screenshot --full   # Full page
+agent-browser pdf output.pdf      # Save as PDF
+```
+
+### Video recording
+```bash
+agent-browser record start ./demo.webm    # Start recording (uses current URL + state)
+agent-browser click @e1                   # Perform actions
+agent-browser record stop                 # Stop and save video
+agent-browser record restart ./take2.webm # Stop current + start new recording
+```
+Recording creates a fresh context but preserves cookies/storage from your session.
+
+### Wait
+```bash
+agent-browser wait @e1                     # Wait for element
+agent-browser wait 2000                    # Wait milliseconds
+agent-browser wait --text "Success"        # Wait for text
+agent-browser wait --url "**/dashboard"    # Wait for URL pattern
+agent-browser wait --load networkidle      # Wait for network idle
+agent-browser wait --fn "window.ready"     # Wait for JS condition
+```
+
+### Mouse control
+```bash
+agent-browser mouse move 100 200      # Move mouse
+agent-browser mouse down left         # Press button
+agent-browser mouse up left           # Release button
+agent-browser mouse wheel 100         # Scroll wheel
+```
+
+### Semantic locators (alternative to refs)
+```bash
+agent-browser find role button click --name "Submit"
+agent-browser find text "Sign In" click
+agent-browser find label "Email" fill "user@test.com"
+agent-browser find first ".item" click
+agent-browser find nth 2 "a" text
+```
+
+### Browser settings
+```bash
+agent-browser set viewport 1920 1080      # Set viewport size
+agent-browser set device "iPhone 14"      # Emulate device
+agent-browser set geo 37.7749 -122.4194   # Set geolocation
+agent-browser set offline on              # Toggle offline mode
+agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
+agent-browser set credentials user pass   # HTTP basic auth
+agent-browser set media dark              # Emulate color scheme
+```
+
+### Cookies & Storage
+```bash
+agent-browser cookies                     # Get all cookies
+agent-browser cookies set name value      # Set cookie
+agent-browser cookies clear               # Clear cookies
+agent-browser storage local               # Get all localStorage
+agent-browser storage local key           # Get specific key
+agent-browser storage local set k v       # Set value
+agent-browser storage local clear         # Clear all
+agent-browser storage session             # Get all sessionStorage
+agent-browser storage session key         # Get specific key
+agent-browser storage session set k v     # Set value
+agent-browser storage session clear       # Clear all
+```
+
+### Network
+```bash
+agent-browser network route <url>              # Intercept requests
+agent-browser network route <url> --abort      # Block requests
+agent-browser network route <url> --body '{}'  # Mock response
+agent-browser network unroute [url]            # Remove routes
+agent-browser network requests                 # View tracked requests
+agent-browser network requests --filter api    # Filter requests
+```
+
+### Tabs & Windows
+```bash
+agent-browser tab                 # List tabs
+agent-browser tab new [url]       # New tab
+agent-browser tab 2               # Switch to tab
+agent-browser tab close           # Close tab
+agent-browser window new          # New window
+```
+
+### Frames
+```bash
+agent-browser frame "#iframe"     # Switch to iframe
+agent-browser frame main          # Back to main frame
+```
+
+### Dialogs
+```bash
+agent-browser dialog accept [text]  # Accept dialog
+agent-browser dialog dismiss        # Dismiss dialog
+```
+
+### JavaScript
+```bash
+agent-browser eval "document.title"   # Run JavaScript
+```
+
+## Global Options
+
+| Option | Description |
+|--------|-------------|
+| `--session <name>` | Isolated browser session (`AGENT_BROWSER_SESSION` env) |
+| `--profile <path>` | Persistent browser profile (`AGENT_BROWSER_PROFILE` env) |
+| `--headers <json>` | HTTP headers scoped to URL's origin |
+| `--executable-path <path>` | Custom browser binary (`AGENT_BROWSER_EXECUTABLE_PATH` env) |
+| `--args <args>` | Browser launch args (`AGENT_BROWSER_ARGS` env) |
+| `--user-agent <ua>` | Custom User-Agent (`AGENT_BROWSER_USER_AGENT` env) |
+| `--proxy <url>` | Proxy server (`AGENT_BROWSER_PROXY` env) |
+| `--proxy-bypass <hosts>` | Hosts to bypass proxy (`AGENT_BROWSER_PROXY_BYPASS` env) |
+| `-p, --provider <name>` | Cloud browser provider (`AGENT_BROWSER_PROVIDER` env) |
+| `--json` | Machine-readable JSON output |
+| `--headed` | Show browser window (not headless) |
+| `--cdp <port\|wss://url>` | Connect via Chrome DevTools Protocol |
+| `--debug` | Debug output |
+
+## Example: Form submission
+
+```bash
+agent-browser open https://example.com/form
+agent-browser snapshot -i
+# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
+
+agent-browser fill @e1 "user@example.com"
+agent-browser fill @e2 "password123"
+agent-browser click @e3
+agent-browser wait --load networkidle
+agent-browser snapshot -i  # Check result
+```
+
+## Example: Authentication with saved state
+
+```bash
+# Login once
+agent-browser open https://app.example.com/login
+agent-browser snapshot -i
+agent-browser fill @e1 "username"
+agent-browser fill @e2 "password"
+agent-browser click @e3
+agent-browser wait --url "**/dashboard"
+agent-browser state save auth.json
+
+# Later sessions: load saved state
+agent-browser state load auth.json
+agent-browser open https://app.example.com/dashboard
+```
+
+### Header-based Auth (Skip login flows)
+```bash
+# Headers scoped to api.example.com only
+agent-browser open api.example.com --headers '{"Authorization": "Bearer <token>"}'
+# Navigate to another domain - headers NOT sent (safe)
+agent-browser open other-site.com
+# Global headers (all domains)
+agent-browser set headers '{"X-Custom-Header": "value"}'
+```
+
+## Sessions & Persistent Profiles
+
+### Sessions (parallel browsers)
+```bash
+agent-browser --session test1 open site-a.com
+agent-browser --session test2 open site-b.com
+agent-browser session list
+```
+
+### Persistent Profiles
+Persists cookies, localStorage, IndexedDB, service workers, cache, login sessions across browser restarts.
+```bash
+agent-browser --profile ~/.myapp-profile open myapp.com
+# Or via env var
+AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
+```
+- Use different profile paths for different projects
+- Login once → restart browser → still logged in
+- Stores: cookies, localStorage, IndexedDB, service workers, browser cache
+
+## JSON output (for parsing)
+
+Add `--json` for machine-readable output:
+```bash
+agent-browser snapshot -i --json
+agent-browser get text @e1 --json
+```
+
+## Debugging
+
+```bash
+agent-browser open example.com --headed              # Show browser window
+agent-browser console                                # View console messages
+agent-browser errors                                 # View page errors
+agent-browser record start ./debug.webm              # Record from current page
+agent-browser record stop                            # Save recording
+agent-browser connect 9222                           # Local CDP port
+agent-browser --cdp "wss://browser-service.com/cdp?token=..." snapshot  # Remote via WebSocket
+agent-browser console --clear                        # Clear console
+agent-browser errors --clear                         # Clear errors
+agent-browser highlight @e1                          # Highlight element
+agent-browser trace start                            # Start recording trace
+agent-browser trace stop trace.zip                   # Stop and save trace
+```
+
+---
+
+## Installation
+
+### Step 1: Install agent-browser CLI
+
+```bash
+bun add -g agent-browser
+```
+
+### Step 2: Install Playwright browsers
+
+**IMPORTANT**: `agent-browser install` may fail on some platforms (e.g., darwin-arm64) with "No binary found" error. In that case, install Playwright browsers directly:
+
+```bash
+# Create a temp project and install playwright
+cd /tmp && bun init -y && bun add playwright
+
+# Install Chromium browser
+bun playwright install chromium
+```
+
+This downloads Chrome for Testing to `~/Library/Caches/ms-playwright/`.
+
+### Verify installation
+
+```bash
+agent-browser open https://example.com --headed
+```
+
+If the browser opens successfully, installation is complete.
+
+### Troubleshooting
+
+| Error | Solution |
+|-------|----------|
+| `No binary found for darwin-arm64` | Run `bun playwright install chromium` in a project with playwright dependency |
+| `Executable doesn't exist at .../chromium-XXXX` | Re-run `bun playwright install chromium` |
+| Browser doesn't open | Ensure `--headed` flag is used for visible browser |
+
+---
+Run `agent-browser --help` for all commands. Repo: https://github.com/vercel-labs/agent-browser
--- a/src/features/builtin-skills/dev-browser/SKILL.md
+++ b/src/features/builtin-skills/dev-browser/SKILL.md
@@ -0,0 +1,213 @@
+---
+name: dev-browser
+description: Browser automation with persistent page state. Use when users ask to navigate websites, fill forms, take screenshots, extract web data, test web apps, or automate browser workflows. Trigger phrases include "go to [url]", "click on", "fill out the form", "take a screenshot", "scrape", "automate", "test the website", "log into", or any browser interaction request.
+---
+
+# Dev Browser Skill
+
+Browser automation that maintains page state across script executions. Write small, focused scripts to accomplish tasks incrementally. Once you've proven out part of a workflow and there is repeated work to be done, you can write a script to do the repeated work in a single execution.
+
+## Choosing Your Approach
+
+- **Local/source-available sites**: Read the source code first to write selectors directly
+- **Unknown page layouts**: Use `getAISnapshot()` to discover elements and `selectSnapshotRef()` to interact with them
+- **Visual feedback**: Take screenshots to see what the user sees
+
+## Setup
+
+> **Installation**: See [references/installation.md](references/installation.md) for detailed setup instructions including Windows support.
+
+Two modes available. Ask the user if unclear which to use.
+
+### Standalone Mode (Default)
+
+Launches a new Chromium browser for fresh automation sessions.
+
+```bash
+./skills/dev-browser/server.sh &
+```
+
+Add `--headless` flag if user requests it. **Wait for the `Ready` message before running scripts.**
+
+### Extension Mode
+
+Connects to user's existing Chrome browser. Use this when:
+
+- The user is already logged into sites and wants you to do things behind an authed experience that isn't local dev.
+- The user asks you to use the extension
+
+**Important**: The core flow is still the same. You create named pages inside of their browser.
+
+**Start the relay server:**
+
+```bash
+cd skills/dev-browser && npm i && npm run start-extension &
+```
+
+Wait for `Waiting for extension to connect...` followed by `Extension connected` in the console. To know that a client has connected and the browser is ready to be controlled.
+**Workflow:**
+
+1. Scripts call `client.page("name")` just like the normal mode to create new pages / connect to existing ones.
+2. Automation runs on the user's actual browser session
+
+If the extension hasn't connected yet, tell the user to launch and activate it. Download link: https://github.com/SawyerHood/dev-browser/releases
+
+## Writing Scripts
+
+> **Run all scripts from `skills/dev-browser/` directory.** The `@/` import alias requires this directory's config.
+
+Execute scripts inline using heredocs:
+
+```bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect, waitForPageLoad } from "@/client.js";
+
+const client = await connect();
+// Create page with custom viewport size (optional)
+const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
+
+await page.goto("https://example.com");
+await waitForPageLoad(page);
+
+console.log({ title: await page.title(), url: page.url() });
+await client.disconnect();
+EOF
+```
+
+**Write to `tmp/` files only when** the script needs reuse, is complex, or user explicitly requests it.
+
+### Key Principles
+
+1. **Small scripts**: Each script does ONE thing (navigate, click, fill, check)
+2. **Evaluate state**: Log/return state at the end to decide next steps
+3. **Descriptive page names**: Use `"checkout"`, `"login"`, not `"main"`
+4. **Disconnect to exit**: `await client.disconnect()` - pages persist on server
+5. **Plain JS in evaluate**: `page.evaluate()` runs in browser - no TypeScript syntax
+
+## Workflow Loop
+
+Follow this pattern for complex tasks:
+
+1. **Write a script** to perform one action
+2. **Run it** and observe the output
+3. **Evaluate** - did it work? What's the current state?
+4. **Decide** - is the task complete or do we need another script?
+5. **Repeat** until task is done
+
+### No TypeScript in Browser Context
+
+Code passed to `page.evaluate()` runs in the browser, which doesn't understand TypeScript:
+
+```typescript
+// ✅ Correct: plain JavaScript
+const text = await page.evaluate(() => {
+  return document.body.innerText;
+});
+
+// ❌ Wrong: TypeScript syntax will fail at runtime
+const text = await page.evaluate(() => {
+  const el: HTMLElement = document.body; // Type annotation breaks in browser!
+  return el.innerText;
+});
+```
+
+## Scraping Data
+
+For scraping large datasets, intercept and replay network requests rather than scrolling the DOM. See [references/scraping.md](references/scraping.md) for the complete guide covering request capture, schema discovery, and paginated API replay.
+
+## Client API
+
+```typescript
+const client = await connect();
+
+// Get or create named page (viewport only applies to new pages)
+const page = await client.page("name");
+const pageWithSize = await client.page("name", { viewport: { width: 1920, height: 1080 } });
+
+const pages = await client.list(); // List all page names
+await client.close("name"); // Close a page
+await client.disconnect(); // Disconnect (pages persist)
+
+// ARIA Snapshot methods
+const snapshot = await client.getAISnapshot("name"); // Get accessibility tree
+const element = await client.selectSnapshotRef("name", "e5"); // Get element by ref
+```
+
+The `page` object is a standard Playwright Page.
+
+## Waiting
+
+```typescript
+import { waitForPageLoad } from "@/client.js";
+
+await waitForPageLoad(page); // After navigation
+await page.waitForSelector(".results"); // For specific elements
+await page.waitForURL("**/success"); // For specific URL
+```
+
+## Inspecting Page State
+
+### Screenshots
+
+```typescript
+await page.screenshot({ path: "tmp/screenshot.png" });
+await page.screenshot({ path: "tmp/full.png", fullPage: true });
+```
+
+### ARIA Snapshot (Element Discovery)
+
+Use `getAISnapshot()` to discover page elements. Returns YAML-formatted accessibility tree:
+
+```yaml
+- banner:
+  - link "Hacker News" [ref=e1]
+  - navigation:
+    - link "new" [ref=e2]
+- main:
+  - list:
+    - listitem:
+      - link "Article Title" [ref=e8]
+      - link "328 comments" [ref=e9]
+- contentinfo:
+  - textbox [ref=e10]
+    - /placeholder: "Search"
+```
+
+**Interpreting refs:**
+
+- `[ref=eN]` - Element reference for interaction (visible, clickable elements only)
+- `[checked]`, `[disabled]`, `[expanded]` - Element states
+- `[level=N]` - Heading level
+- `/url:`, `/placeholder:` - Element properties
+
+**Interacting with refs:**
+
+```typescript
+const snapshot = await client.getAISnapshot("hackernews");
+console.log(snapshot); // Find the ref you need
+
+const element = await client.selectSnapshotRef("hackernews", "e2");
+await element.click();
+```
+
+## Error Recovery
+
+Page state persists after failures. Debug with:
+
+```bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("hackernews");
+
+await page.screenshot({ path: "tmp/debug.png" });
+console.log({
+  url: page.url(),
+  title: await page.title(),
+  bodyText: await page.textContent("body").then((t) => t?.slice(0, 200)),
+});
+
+await client.disconnect();
+EOF
+```
--- a/src/features/builtin-skills/dev-browser/references/installation.md
+++ b/src/features/builtin-skills/dev-browser/references/installation.md
@@ -0,0 +1,193 @@
+# Dev Browser Installation Guide
+
+This guide covers installation for all platforms: macOS, Linux, and Windows.
+
+## Prerequisites
+
+- [Node.js](https://nodejs.org) v18 or later with npm
+- Git (for cloning the skill)
+
+## Installation
+
+### Step 1: Clone the Skill
+
+```bash
+# Clone dev-browser to a temporary location
+git clone https://github.com/sawyerhood/dev-browser /tmp/dev-browser-skill
+
+# Copy to skills directory (adjust path as needed)
+# For oh-my-opencode: already bundled
+# For manual installation:
+mkdir -p ~/.config/opencode/skills
+cp -r /tmp/dev-browser-skill/skills/dev-browser ~/.config/opencode/skills/dev-browser
+
+# Cleanup
+rm -rf /tmp/dev-browser-skill
+```
+
+**Windows (PowerShell):**
+```powershell
+# Clone dev-browser to temp location
+git clone https://github.com/sawyerhood/dev-browser $env:TEMP\dev-browser-skill
+
+# Copy to skills directory
+New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.config\opencode\skills"
+Copy-Item -Recurse "$env:TEMP\dev-browser-skill\skills\dev-browser" "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+
+# Cleanup
+Remove-Item -Recurse -Force "$env:TEMP\dev-browser-skill"
+```
+
+### Step 2: Install Dependencies
+
+```bash
+cd ~/.config/opencode/skills/dev-browser
+npm install
+```
+
+**Windows (PowerShell):**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+npm install
+```
+
+### Step 3: Start the Server
+
+#### Standalone Mode (New Browser Instance)
+
+**macOS/Linux:**
+```bash
+cd ~/.config/opencode/skills/dev-browser
+./server.sh &
+# Or for headless:
+./server.sh --headless &
+```
+
+**Windows (PowerShell):**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+Start-Process -NoNewWindow -FilePath "node" -ArgumentList "server.js"
+# Or for headless:
+Start-Process -NoNewWindow -FilePath "node" -ArgumentList "server.js", "--headless"
+```
+
+**Windows (CMD):**
+```cmd
+cd %USERPROFILE%\.config\opencode\skills\dev-browser
+start /B node server.js
+```
+
+Wait for the `Ready` message before running scripts.
+
+#### Extension Mode (Use Existing Chrome)
+
+**macOS/Linux:**
+```bash
+cd ~/.config/opencode/skills/dev-browser
+npm run start-extension &
+```
+
+**Windows (PowerShell):**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+Start-Process -NoNewWindow -FilePath "npm" -ArgumentList "run", "start-extension"
+```
+
+Wait for `Extension connected` message.
+
+## Chrome Extension Setup (Optional)
+
+The Chrome extension allows controlling your existing Chrome browser with all your logged-in sessions.
+
+### Installation
+
+1. Download `extension.zip` from [latest release](https://github.com/sawyerhood/dev-browser/releases/latest)
+2. Extract to a permanent location:
+   - **macOS/Linux:** `~/.dev-browser-extension`
+   - **Windows:** `%USERPROFILE%\.dev-browser-extension`
+3. Open Chrome → `chrome://extensions`
+4. Enable "Developer mode" (toggle in top right)
+5. Click "Load unpacked" → select the extracted folder
+
+### Usage
+
+1. Click the Dev Browser extension icon in Chrome toolbar
+2. Toggle to "Active"
+3. Start the extension relay server (see above)
+4. Use dev-browser scripts - they'll control your existing Chrome
+
+## Troubleshooting
+
+### Server Won't Start
+
+**Check Node.js version:**
+```bash
+node --version  # Should be v18+
+```
+
+**Check port availability:**
+```bash
+# macOS/Linux
+lsof -i :3000
+
+# Windows
+netstat -ano | findstr :3000
+```
+
+### Playwright Installation Issues
+
+If Chromium fails to install:
+```bash
+npx playwright install chromium
+```
+
+### Windows-Specific Issues
+
+**Execution Policy:**
+If PowerShell scripts are blocked:
+```powershell
+Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+```
+
+**Path Issues:**
+Use forward slashes or escaped backslashes in paths:
+```powershell
+# Good
+cd "$env:USERPROFILE/.config/opencode/skills/dev-browser"
+# Also good
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+```
+
+### Extension Not Connecting
+
+1. Ensure extension is "Active" (click icon to toggle)
+2. Check relay server is running (`npm run start-extension`)
+3. Look for `Extension connected` message in console
+4. Try reloading the extension in `chrome://extensions`
+
+## Permissions
+
+To skip permission prompts in Claude Code, add to `~/.claude/settings.json`:
+
+```json
+{
+  "permissions": {
+    "allow": ["Skill(dev-browser:dev-browser)", "Bash(npx tsx:*)"]
+  }
+}
+```
+
+## Updating
+
+```bash
+cd ~/.config/opencode/skills/dev-browser
+git pull
+npm install
+```
+
+**Windows:**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+git pull
+npm install
+```
--- a/src/features/builtin-skills/dev-browser/references/scraping.md
+++ b/src/features/builtin-skills/dev-browser/references/scraping.md
@@ -0,0 +1,155 @@
+# Data Scraping Guide
+
+For large datasets (followers, posts, search results), **intercept and replay network requests** rather than scrolling and parsing the DOM. This is faster, more reliable, and handles pagination automatically.
+
+## Why Not Scroll?
+
+Scrolling is slow, unreliable, and wastes time. APIs return structured data with pagination built in. Always prefer API replay.
+
+## Start Small, Then Scale
+
+**Don't try to automate everything at once.** Work incrementally:
+
+1. **Capture one request** - verify you're intercepting the right endpoint
+2. **Inspect one response** - understand the schema before writing extraction code
+3. **Extract a few items** - make sure your parsing logic works
+4. **Then scale up** - add pagination loop only after the basics work
+
+This prevents wasting time debugging a complex script when the issue is a simple path like `data.user.timeline` vs `data.user.result.timeline`.
+
+## Step-by-Step Workflow
+
+### 1. Capture Request Details
+
+First, intercept a request to understand URL structure and required headers:
+
+```typescript
+import { connect, waitForPageLoad } from "@/client.js";
+import * as fs from "node:fs";
+
+const client = await connect();
+const page = await client.page("site");
+
+let capturedRequest = null;
+page.on("request", (request) => {
+  const url = request.url();
+  // Look for API endpoints (adjust pattern for your target site)
+  if (url.includes("/api/") || url.includes("/graphql/")) {
+    capturedRequest = {
+      url: url,
+      headers: request.headers(),
+      method: request.method(),
+    };
+    fs.writeFileSync("tmp/request-details.json", JSON.stringify(capturedRequest, null, 2));
+    console.log("Captured request:", url.substring(0, 80) + "...");
+  }
+});
+
+await page.goto("https://example.com/profile");
+await waitForPageLoad(page);
+await page.waitForTimeout(3000);
+
+await client.disconnect();
+```
+
+### 2. Capture Response to Understand Schema
+
+Save a raw response to inspect the data structure:
+
+```typescript
+page.on("response", async (response) => {
+  const url = response.url();
+  if (url.includes("UserTweets") || url.includes("/api/data")) {
+    const json = await response.json();
+    fs.writeFileSync("tmp/api-response.json", JSON.stringify(json, null, 2));
+    console.log("Captured response");
+  }
+});
+```
+
+Then analyze the structure to find:
+
+- Where the data array lives (e.g., `data.user.result.timeline.instructions[].entries`)
+- Where pagination cursors are (e.g., `cursor-bottom` entries)
+- What fields you need to extract
+
+### 3. Replay API with Pagination
+
+Once you understand the schema, replay requests directly:
+
+```typescript
+import { connect } from "@/client.js";
+import * as fs from "node:fs";
+
+const client = await connect();
+const page = await client.page("site");
+
+const results = new Map(); // Use Map for deduplication
+const headers = JSON.parse(fs.readFileSync("tmp/request-details.json", "utf8")).headers;
+const baseUrl = "https://example.com/api/data";
+
+let cursor = null;
+let hasMore = true;
+
+while (hasMore) {
+  // Build URL with pagination cursor
+  const params = { count: 20 };
+  if (cursor) params.cursor = cursor;
+  const url = `${baseUrl}?params=${encodeURIComponent(JSON.stringify(params))}`;
+
+  // Execute fetch in browser context (has auth cookies/headers)
+  const response = await page.evaluate(
+    async ({ url, headers }) => {
+      const res = await fetch(url, { headers });
+      return res.json();
+    },
+    { url, headers }
+  );
+
+  // Extract data and cursor (adjust paths for your API)
+  const entries = response?.data?.entries || [];
+  for (const entry of entries) {
+    if (entry.type === "cursor-bottom") {
+      cursor = entry.value;
+    } else if (entry.id && !results.has(entry.id)) {
+      results.set(entry.id, {
+        id: entry.id,
+        text: entry.content,
+        timestamp: entry.created_at,
+      });
+    }
+  }
+
+  console.log(`Fetched page, total: ${results.size}`);
+
+  // Check stop conditions
+  if (!cursor || entries.length === 0) hasMore = false;
+
+  // Rate limiting - be respectful
+  await new Promise((r) => setTimeout(r, 500));
+}
+
+// Export results
+const data = Array.from(results.values());
+fs.writeFileSync("tmp/results.json", JSON.stringify(data, null, 2));
+console.log(`Saved ${data.length} items`);
+
+await client.disconnect();
+```
+
+## Key Patterns
+
+| Pattern                 | Description                                            |
+| ----------------------- | ------------------------------------------------------ |
+| `page.on('request')`    | Capture outgoing request URL + headers                 |
+| `page.on('response')`   | Capture response data to understand schema             |
+| `page.evaluate(fetch)`  | Replay requests in browser context (inherits auth)     |
+| `Map` for deduplication | APIs often return overlapping data across pages        |
+| Cursor-based pagination | Look for `cursor`, `next_token`, `offset` in responses |
+
+## Tips
+
+- **Extension mode**: `page.context().cookies()` doesn't work - capture auth headers from intercepted requests instead
+- **Rate limiting**: Add 500ms+ delays between requests to avoid blocks
+- **Stop conditions**: Check for empty results, missing cursor, or reaching a date/ID threshold
+- **GraphQL APIs**: URL params often include `variables` and `features` JSON objects - capture and reuse them
--- a/src/features/builtin-skills/git-master/SKILL.md
+++ b/src/features/builtin-skills/git-master/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: git-master
-description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
+description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
 ---

 # Git Master Agent
--- a/src/features/builtin-skills/index.ts
+++ b/src/features/builtin-skills/index.ts
@@ -1,2 +1,2 @@
 export * from "./types"
-export { createBuiltinSkills } from "./skills"
+export { createBuiltinSkills, type CreateBuiltinSkillsOptions } from "./skills"
--- a/src/features/builtin-skills/skills.test.ts
+++ b/src/features/builtin-skills/skills.test.ts
@@ -0,0 +1,89 @@
+import { describe, test, expect } from "bun:test"
+import { createBuiltinSkills } from "./skills"
+
+describe("createBuiltinSkills", () => {
+	test("returns playwright skill by default", () => {
+		// #given - no options (default)
+
+		// #when
+		const skills = createBuiltinSkills()
+
+		// #then
+		const browserSkill = skills.find((s) => s.name === "playwright")
+		expect(browserSkill).toBeDefined()
+		expect(browserSkill!.description).toContain("browser")
+		expect(browserSkill!.mcpConfig).toHaveProperty("playwright")
+	})
+
+	test("returns playwright skill when browserProvider is 'playwright'", () => {
+		// #given
+		const options = { browserProvider: "playwright" as const }
+
+		// #when
+		const skills = createBuiltinSkills(options)
+
+		// #then
+		const playwrightSkill = skills.find((s) => s.name === "playwright")
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+		expect(playwrightSkill).toBeDefined()
+		expect(agentBrowserSkill).toBeUndefined()
+	})
+
+	test("returns agent-browser skill when browserProvider is 'agent-browser'", () => {
+		// #given
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when
+		const skills = createBuiltinSkills(options)
+
+		// #then
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+		const playwrightSkill = skills.find((s) => s.name === "playwright")
+		expect(agentBrowserSkill).toBeDefined()
+		expect(agentBrowserSkill!.description).toContain("browser")
+		expect(agentBrowserSkill!.allowedTools).toContain("Bash(agent-browser:*)")
+		expect(agentBrowserSkill!.template).toContain("agent-browser")
+		expect(playwrightSkill).toBeUndefined()
+	})
+
+	test("agent-browser skill template is inlined (not loaded from file)", () => {
+		// #given
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when
+		const skills = createBuiltinSkills(options)
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+
+		// #then - template should contain substantial content (inlined, not fallback)
+		expect(agentBrowserSkill!.template).toContain("## Quick start")
+		expect(agentBrowserSkill!.template).toContain("## Commands")
+		expect(agentBrowserSkill!.template).toContain("agent-browser open")
+		expect(agentBrowserSkill!.template).toContain("agent-browser snapshot")
+	})
+
+	test("always includes frontend-ui-ux and git-master skills", () => {
+		// #given - both provider options
+
+		// #when
+		const defaultSkills = createBuiltinSkills()
+		const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
+
+		// #then
+		for (const skills of [defaultSkills, agentBrowserSkills]) {
+			expect(skills.find((s) => s.name === "frontend-ui-ux")).toBeDefined()
+			expect(skills.find((s) => s.name === "git-master")).toBeDefined()
+		}
+	})
+
+	test("returns exactly 4 skills regardless of provider", () => {
+		// #given
+
+		// #when
+		const defaultSkills = createBuiltinSkills()
+		const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
+
+		// #then
+		expect(defaultSkills).toHaveLength(4)
+		expect(agentBrowserSkills).toHaveLength(4)
+	})
+})
--- a/src/features/builtin-skills/skills.ts
+++ b/src/features/builtin-skills/skills.ts
@@ -1,4 +1,5 @@
 import type { BuiltinSkill } from "./types"
+import type { BrowserAutomationProvider } from "../../config/schema"

 const playwrightSkill: BuiltinSkill = {
  name: "playwright",
@@ -14,6 +15,303 @@ This skill provides browser automation capabilities via the Playwright MCP serve
  },
 }

+const agentBrowserSkill: BuiltinSkill = {
+  name: "agent-browser",
+  description: "MUST USE for any browser-related tasks. Browser automation via agent-browser CLI - verification, browsing, information gathering, web scraping, testing, screenshots, and all browser interactions.",
+  template: `# Browser Automation with agent-browser
+
+## Quick start
+
+\`\`\`bash
+agent-browser open <url>        # Navigate to page
+agent-browser snapshot -i       # Get interactive elements with refs
+agent-browser click @e1         # Click element by ref
+agent-browser fill @e2 "text"   # Fill input by ref
+agent-browser close             # Close browser
+\`\`\`
+
+## Core workflow
+
+1. Navigate: \`agent-browser open <url>\`
+2. Snapshot: \`agent-browser snapshot -i\` (returns elements with refs like \`@e1\`, \`@e2\`)
+3. Interact using refs from the snapshot
+4. Re-snapshot after navigation or significant DOM changes
+
+## Commands
+
+### Navigation
+\`\`\`bash
+agent-browser open <url>      # Navigate to URL
+agent-browser back            # Go back
+agent-browser forward         # Go forward
+agent-browser reload          # Reload page
+agent-browser close           # Close browser
+\`\`\`
+
+### Snapshot (page analysis)
+\`\`\`bash
+agent-browser snapshot            # Full accessibility tree
+agent-browser snapshot -i         # Interactive elements only (recommended)
+agent-browser snapshot -c         # Compact output
+agent-browser snapshot -d 3       # Limit depth to 3
+agent-browser snapshot -s "#main" # Scope to CSS selector
+\`\`\`
+
+### Interactions (use @refs from snapshot)
+\`\`\`bash
+agent-browser click @e1           # Click
+agent-browser dblclick @e1        # Double-click
+agent-browser focus @e1           # Focus element
+agent-browser fill @e2 "text"     # Clear and type
+agent-browser type @e2 "text"     # Type without clearing
+agent-browser press Enter         # Press key
+agent-browser press Control+a     # Key combination
+agent-browser keydown Shift       # Hold key down
+agent-browser keyup Shift         # Release key
+agent-browser hover @e1           # Hover
+agent-browser check @e1           # Check checkbox
+agent-browser uncheck @e1         # Uncheck checkbox
+agent-browser select @e1 "value"  # Select dropdown
+agent-browser scroll down 500     # Scroll page
+agent-browser scrollintoview @e1  # Scroll element into view
+agent-browser drag @e1 @e2        # Drag and drop
+agent-browser upload @e1 file.pdf # Upload files
+\`\`\`
+
+### Get information
+\`\`\`bash
+agent-browser get text @e1        # Get element text
+agent-browser get html @e1        # Get innerHTML
+agent-browser get value @e1       # Get input value
+agent-browser get attr @e1 href   # Get attribute
+agent-browser get title           # Get page title
+agent-browser get url             # Get current URL
+agent-browser get count ".item"   # Count matching elements
+agent-browser get box @e1         # Get bounding box
+\`\`\`
+
+### Check state
+\`\`\`bash
+agent-browser is visible @e1      # Check if visible
+agent-browser is enabled @e1      # Check if enabled
+agent-browser is checked @e1      # Check if checked
+\`\`\`
+
+### Screenshots & PDF
+\`\`\`bash
+agent-browser screenshot          # Screenshot to stdout
+agent-browser screenshot path.png # Save to file
+agent-browser screenshot --full   # Full page
+agent-browser pdf output.pdf      # Save as PDF
+\`\`\`
+
+### Video recording
+\`\`\`bash
+agent-browser record start ./demo.webm    # Start recording (uses current URL + state)
+agent-browser click @e1                   # Perform actions
+agent-browser record stop                 # Stop and save video
+agent-browser record restart ./take2.webm # Stop current + start new recording
+\`\`\`
+Recording creates a fresh context but preserves cookies/storage from your session.
+
+### Wait
+\`\`\`bash
+agent-browser wait @e1                     # Wait for element
+agent-browser wait 2000                    # Wait milliseconds
+agent-browser wait --text "Success"        # Wait for text
+agent-browser wait --url "**/dashboard"    # Wait for URL pattern
+agent-browser wait --load networkidle      # Wait for network idle
+agent-browser wait --fn "window.ready"     # Wait for JS condition
+\`\`\`
+
+### Mouse control
+\`\`\`bash
+agent-browser mouse move 100 200      # Move mouse
+agent-browser mouse down left         # Press button
+agent-browser mouse up left           # Release button
+agent-browser mouse wheel 100         # Scroll wheel
+\`\`\`
+
+### Semantic locators (alternative to refs)
+\`\`\`bash
+agent-browser find role button click --name "Submit"
+agent-browser find text "Sign In" click
+agent-browser find label "Email" fill "user@test.com"
+agent-browser find first ".item" click
+agent-browser find nth 2 "a" text
+\`\`\`
+
+### Browser settings
+\`\`\`bash
+agent-browser set viewport 1920 1080      # Set viewport size
+agent-browser set device "iPhone 14"      # Emulate device
+agent-browser set geo 37.7749 -122.4194   # Set geolocation
+agent-browser set offline on              # Toggle offline mode
+agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
+agent-browser set credentials user pass   # HTTP basic auth
+agent-browser set media dark              # Emulate color scheme
+\`\`\`
+
+### Cookies & Storage
+\`\`\`bash
+agent-browser cookies                     # Get all cookies
+agent-browser cookies set name value      # Set cookie
+agent-browser cookies clear               # Clear cookies
+agent-browser storage local               # Get all localStorage
+agent-browser storage local key           # Get specific key
+agent-browser storage local set k v       # Set value
+agent-browser storage local clear         # Clear all
+agent-browser storage session             # Get all sessionStorage
+agent-browser storage session key         # Get specific key
+agent-browser storage session set k v     # Set value
+agent-browser storage session clear       # Clear all
+\`\`\`
+
+### Network
+\`\`\`bash
+agent-browser network route <url>              # Intercept requests
+agent-browser network route <url> --abort      # Block requests
+agent-browser network route <url> --body '{}'  # Mock response
+agent-browser network unroute [url]            # Remove routes
+agent-browser network requests                 # View tracked requests
+agent-browser network requests --filter api    # Filter requests
+\`\`\`
+
+### Tabs & Windows
+\`\`\`bash
+agent-browser tab                 # List tabs
+agent-browser tab new [url]       # New tab
+agent-browser tab 2               # Switch to tab
+agent-browser tab close           # Close tab
+agent-browser window new          # New window
+\`\`\`
+
+### Frames
+\`\`\`bash
+agent-browser frame "#iframe"     # Switch to iframe
+agent-browser frame main          # Back to main frame
+\`\`\`
+
+### Dialogs
+\`\`\`bash
+agent-browser dialog accept [text]  # Accept dialog
+agent-browser dialog dismiss        # Dismiss dialog
+\`\`\`
+
+### JavaScript
+\`\`\`bash
+agent-browser eval "document.title"   # Run JavaScript
+\`\`\`
+
+## Global Options
+
+| Option | Description |
+|--------|-------------|
+| \`--session <name>\` | Isolated browser session (\`AGENT_BROWSER_SESSION\` env) |
+| \`--profile <path>\` | Persistent browser profile (\`AGENT_BROWSER_PROFILE\` env) |
+| \`--headers <json>\` | HTTP headers scoped to URL's origin |
+| \`--executable-path <path>\` | Custom browser binary (\`AGENT_BROWSER_EXECUTABLE_PATH\` env) |
+| \`--args <args>\` | Browser launch args (\`AGENT_BROWSER_ARGS\` env) |
+| \`--user-agent <ua>\` | Custom User-Agent (\`AGENT_BROWSER_USER_AGENT\` env) |
+| \`--proxy <url>\` | Proxy server (\`AGENT_BROWSER_PROXY\` env) |
+| \`--proxy-bypass <hosts>\` | Hosts to bypass proxy (\`AGENT_BROWSER_PROXY_BYPASS\` env) |
+| \`-p, --provider <name>\` | Cloud browser provider (\`AGENT_BROWSER_PROVIDER\` env) |
+| \`--json\` | Machine-readable JSON output |
+| \`--headed\` | Show browser window (not headless) |
+| \`--cdp <port\\|wss://url>\` | Connect via Chrome DevTools Protocol |
+| \`--debug\` | Debug output |
+
+## Example: Form submission
+
+\`\`\`bash
+agent-browser open https://example.com/form
+agent-browser snapshot -i
+# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
+
+agent-browser fill @e1 "user@example.com"
+agent-browser fill @e2 "password123"
+agent-browser click @e3
+agent-browser wait --load networkidle
+agent-browser snapshot -i  # Check result
+\`\`\`
+
+## Example: Authentication with saved state
+
+\`\`\`bash
+# Login once
+agent-browser open https://app.example.com/login
+agent-browser snapshot -i
+agent-browser fill @e1 "username"
+agent-browser fill @e2 "password"
+agent-browser click @e3
+agent-browser wait --url "**/dashboard"
+agent-browser state save auth.json
+
+# Later sessions: load saved state
+agent-browser state load auth.json
+agent-browser open https://app.example.com/dashboard
+\`\`\`
+
+### Header-based Auth (Skip login flows)
+\`\`\`bash
+# Headers scoped to api.example.com only
+agent-browser open api.example.com --headers '{"Authorization": "Bearer <token>"}'
+# Navigate to another domain - headers NOT sent (safe)
+agent-browser open other-site.com
+# Global headers (all domains)
+agent-browser set headers '{"X-Custom-Header": "value"}'
+\`\`\`
+
+## Sessions & Persistent Profiles
+
+### Sessions (parallel browsers)
+\`\`\`bash
+agent-browser --session test1 open site-a.com
+agent-browser --session test2 open site-b.com
+agent-browser session list
+\`\`\`
+
+### Persistent Profiles
+Persists cookies, localStorage, IndexedDB, service workers, cache, login sessions across browser restarts.
+\`\`\`bash
+agent-browser --profile ~/.myapp-profile open myapp.com
+# Or via env var
+AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
+\`\`\`
+- Use different profile paths for different projects
+- Login once → restart browser → still logged in
+- Stores: cookies, localStorage, IndexedDB, service workers, browser cache
+
+## JSON output (for parsing)
+
+Add \`--json\` for machine-readable output:
+\`\`\`bash
+agent-browser snapshot -i --json
+agent-browser get text @e1 --json
+\`\`\`
+
+## Debugging
+
+\`\`\`bash
+agent-browser open example.com --headed              # Show browser window
+agent-browser console                                # View console messages
+agent-browser errors                                 # View page errors
+agent-browser record start ./debug.webm              # Record from current page
+agent-browser record stop                            # Save recording
+agent-browser connect 9222                           # Local CDP port
+agent-browser --cdp "wss://browser-service.com/cdp?token=..." snapshot  # Remote via WebSocket
+agent-browser console --clear                        # Clear console
+agent-browser errors --clear                         # Clear errors
+agent-browser highlight @e1                          # Highlight element
+agent-browser trace start                            # Start recording trace
+agent-browser trace stop trace.zip                   # Stop and save trace
+\`\`\`
+
+---
+Install: \`bun add -g agent-browser && agent-browser install\`. Run \`agent-browser --help\` for all commands. Repo: https://github.com/vercel-labs/agent-browser`,
+  allowedTools: ["Bash(agent-browser:*)"],
+}
+
 const frontendUiUxSkill: BuiltinSkill = {
  name: "frontend-ui-ux",
  description: "Designer-turned-developer who crafts stunning UI/UX even without design mockups",
@@ -95,7 +393,7 @@ Interpret creatively and make unexpected choices that feel genuinely designed fo
 const gitMasterSkill: BuiltinSkill = {
  name: "git-master",
  description:
-    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
+    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
  template: `# Git Master Agent

 You are a Git expert combining three specializations:
@@ -1198,6 +1496,234 @@ POTENTIAL ACTIONS:
 - Bisect without proper good/bad boundaries -> Wasted time`,
 }

-export function createBuiltinSkills(): BuiltinSkill[] {
-  return [playwrightSkill, frontendUiUxSkill, gitMasterSkill]
+const devBrowserSkill: BuiltinSkill = {
+  name: "dev-browser",
+  description:
+    "Browser automation with persistent page state. Use when users ask to navigate websites, fill forms, take screenshots, extract web data, test web apps, or automate browser workflows. Trigger phrases include 'go to [url]', 'click on', 'fill out the form', 'take a screenshot', 'scrape', 'automate', 'test the website', 'log into', or any browser interaction request.",
+  template: `# Dev Browser Skill
+
+Browser automation that maintains page state across script executions. Write small, focused scripts to accomplish tasks incrementally. Once you've proven out part of a workflow and there is repeated work to be done, you can write a script to do the repeated work in a single execution.
+
+## Choosing Your Approach
+
+- **Local/source-available sites**: Read the source code first to write selectors directly
+- **Unknown page layouts**: Use \`getAISnapshot()\` to discover elements and \`selectSnapshotRef()\` to interact with them
+- **Visual feedback**: Take screenshots to see what the user sees
+
+## Setup
+
+**IMPORTANT**: Before using this skill, ensure the server is running. See [references/installation.md](references/installation.md) for platform-specific setup instructions (macOS, Linux, Windows).
+
+Two modes available. Ask the user if unclear which to use.
+
+### Standalone Mode (Default)
+
+Launches a new Chromium browser for fresh automation sessions.
+
+**macOS/Linux:**
+\`\`\`bash
+./skills/dev-browser/server.sh &
+\`\`\`
+
+**Windows (PowerShell):**
+\`\`\`powershell
+Start-Process -NoNewWindow -FilePath "node" -ArgumentList "skills/dev-browser/server.js"
+\`\`\`
+
+Add \`--headless\` flag if user requests it. **Wait for the \`Ready\` message before running scripts.**
+
+### Extension Mode
+
+Connects to user's existing Chrome browser. Use this when:
+
+- The user is already logged into sites and wants you to do things behind an authed experience that isn't local dev.
+- The user asks you to use the extension
+
+**Important**: The core flow is still the same. You create named pages inside of their browser.
+
+**Start the relay server:**
+
+**macOS/Linux:**
+\`\`\`bash
+cd skills/dev-browser && npm i && npm run start-extension &
+\`\`\`
+
+**Windows (PowerShell):**
+\`\`\`powershell
+cd skills/dev-browser; npm i; Start-Process -NoNewWindow -FilePath "npm" -ArgumentList "run", "start-extension"
+\`\`\`
+
+Wait for \`Waiting for extension to connect...\` followed by \`Extension connected\` in the console.
+
+If the extension hasn't connected yet, tell the user to launch and activate it. Download link: https://github.com/SawyerHood/dev-browser/releases
+
+## Writing Scripts
+
+> **Run all scripts from \`skills/dev-browser/\` directory.** The \`@/\` import alias requires this directory's config.
+
+Execute scripts inline using heredocs:
+
+**macOS/Linux:**
+\`\`\`bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect, waitForPageLoad } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
+
+await page.goto("https://example.com");
+await waitForPageLoad(page);
+
+console.log({ title: await page.title(), url: page.url() });
+await client.disconnect();
+EOF
+\`\`\`
+
+**Windows (PowerShell):**
+\`\`\`powershell
+cd skills/dev-browser
+@"
+import { connect, waitForPageLoad } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
+
+await page.goto("https://example.com");
+await waitForPageLoad(page);
+
+console.log({ title: await page.title(), url: page.url() });
+await client.disconnect();
+"@ | npx tsx --input-type=module
+\`\`\`
+
+### Key Principles
+
+1. **Small scripts**: Each script does ONE thing (navigate, click, fill, check)
+2. **Evaluate state**: Log/return state at the end to decide next steps
+3. **Descriptive page names**: Use \`"checkout"\`, \`"login"\`, not \`"main"\`
+4. **Disconnect to exit**: \`await client.disconnect()\` - pages persist on server
+5. **Plain JS in evaluate**: \`page.evaluate()\` runs in browser - no TypeScript syntax
+
+## Workflow Loop
+
+1. **Write a script** to perform one action
+2. **Run it** and observe the output
+3. **Evaluate** - did it work? What's the current state?
+4. **Decide** - is the task complete or do we need another script?
+5. **Repeat** until task is done
+
+### No TypeScript in Browser Context
+
+Code passed to \`page.evaluate()\` runs in the browser, which doesn't understand TypeScript:
+
+\`\`\`typescript
+// Correct: plain JavaScript
+const text = await page.evaluate(() => {
+  return document.body.innerText;
+});
+
+// Wrong: TypeScript syntax will fail at runtime
+const text = await page.evaluate(() => {
+  const el: HTMLElement = document.body; // Type annotation breaks in browser!
+  return el.innerText;
+});
+\`\`\`
+
+## Scraping Data
+
+For scraping large datasets, intercept and replay network requests rather than scrolling the DOM. See [references/scraping.md](references/scraping.md) for the complete guide.
+
+## Client API
+
+\`\`\`typescript
+const client = await connect();
+
+// Get or create named page
+const page = await client.page("name");
+const pageWithSize = await client.page("name", { viewport: { width: 1920, height: 1080 } });
+
+const pages = await client.list(); // List all page names
+await client.close("name"); // Close a page
+await client.disconnect(); // Disconnect (pages persist)
+
+// ARIA Snapshot methods
+const snapshot = await client.getAISnapshot("name"); // Get accessibility tree
+const element = await client.selectSnapshotRef("name", "e5"); // Get element by ref
+\`\`\`
+
+## Waiting
+
+\`\`\`typescript
+import { waitForPageLoad } from "@/client.js";
+
+await waitForPageLoad(page); // After navigation
+await page.waitForSelector(".results"); // For specific elements
+await page.waitForURL("**/success"); // For specific URL
+\`\`\`
+
+## Screenshots
+
+\`\`\`typescript
+await page.screenshot({ path: "tmp/screenshot.png" });
+await page.screenshot({ path: "tmp/full.png", fullPage: true });
+\`\`\`
+
+## ARIA Snapshot (Element Discovery)
+
+Use \`getAISnapshot()\` to discover page elements. Returns YAML-formatted accessibility tree:
+
+\`\`\`yaml
+- banner:
+  - link "Hacker News" [ref=e1]
+  - navigation:
+    - link "new" [ref=e2]
+- main:
+  - list:
+    - listitem:
+      - link "Article Title" [ref=e8]
+\`\`\`
+
+**Interacting with refs:**
+
+\`\`\`typescript
+const snapshot = await client.getAISnapshot("hackernews");
+console.log(snapshot); // Find the ref you need
+
+const element = await client.selectSnapshotRef("hackernews", "e2");
+await element.click();
+\`\`\`
+
+## Error Recovery
+
+Page state persists after failures. Debug with:
+
+\`\`\`bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("hackernews");
+
+await page.screenshot({ path: "tmp/debug.png" });
+console.log({
+  url: page.url(),
+  title: await page.title(),
+  bodyText: await page.textContent("body").then((t) => t?.slice(0, 200)),
+});
+
+await client.disconnect();
+EOF
+\`\`\``,
+}
+
+export interface CreateBuiltinSkillsOptions {
+  browserProvider?: BrowserAutomationProvider
+}
+
+export function createBuiltinSkills(options: CreateBuiltinSkillsOptions = {}): BuiltinSkill[] {
+  const { browserProvider = "playwright" } = options
+
+  const browserSkill = browserProvider === "agent-browser" ? agentBrowserSkill : playwrightSkill
+
+  return [browserSkill, frontendUiUxSkill, gitMasterSkill, devBrowserSkill]
 }
--- a/src/features/claude-code-mcp-loader/loader.ts
+++ b/src/features/claude-code-mcp-loader/loader.ts
@@ -77,7 +77,13 @@ export async function loadMcpConfigs(): Promise<McpLoadResult> {

    for (const [name, serverConfig] of Object.entries(config.mcpServers)) {
      if (serverConfig.disabled) {
-        log(`Skipping disabled MCP server "${name}"`, { path })
+        log(`Disabling MCP server "${name}"`, { path })
+        delete servers[name]
+        const existingIndex = loadedServers.findIndex((s) => s.name === name)
+        if (existingIndex !== -1) {
+          loadedServers.splice(existingIndex, 1)
+          log(`Removed previously loaded MCP server "${name}"`, { path })
+        }
        continue
      }

--- a/src/features/claude-code-session-state/state.test.ts
+++ b/src/features/claude-code-session-state/state.test.ts
@@ -37,7 +37,7 @@ describe("claude-code-session-state", () => {
      setSessionAgent(sessionID, "Prometheus (Planner)")

      // #when - try to overwrite
-      setSessionAgent(sessionID, "Sisyphus")
+      setSessionAgent(sessionID, "sisyphus")

      // #then - first agent preserved
      expect(getSessionAgent(sessionID)).toBe("Prometheus (Planner)")
@@ -58,10 +58,10 @@ describe("claude-code-session-state", () => {
      setSessionAgent(sessionID, "Prometheus (Planner)")

      // #when - force update
-      updateSessionAgent(sessionID, "Sisyphus")
+      updateSessionAgent(sessionID, "sisyphus")

      // #then
-      expect(getSessionAgent(sessionID)).toBe("Sisyphus")
+      expect(getSessionAgent(sessionID)).toBe("sisyphus")
    })
  })

@@ -123,4 +123,40 @@ describe("claude-code-session-state", () => {
      expect(getSessionAgent(sessionID)).toBeUndefined()
    })
  })
+
+  describe("issue #893: custom agent switch reset", () => {
+    test("should preserve custom agent when default agent is sent on subsequent messages", () => {
+      // #given - user switches to custom agent "MyCustomAgent"
+      const sessionID = "test-session-custom"
+      const customAgent = "MyCustomAgent"
+      const defaultAgent = "sisyphus"
+
+      // User switches to custom agent (via UI)
+      setSessionAgent(sessionID, customAgent)
+      expect(getSessionAgent(sessionID)).toBe(customAgent)
+
+      // #when - first message after switch sends default agent
+      // This simulates the bug: input.agent = "Sisyphus" on first message
+      // Using setSessionAgent (first-write wins) should preserve custom agent
+      setSessionAgent(sessionID, defaultAgent)
+
+      // #then - custom agent should be preserved, NOT overwritten
+      expect(getSessionAgent(sessionID)).toBe(customAgent)
+    })
+
+    test("should allow explicit agent update via updateSessionAgent", () => {
+      // #given - custom agent is set
+      const sessionID = "test-session-explicit"
+      const customAgent = "MyCustomAgent"
+      const newAgent = "AnotherAgent"
+
+      setSessionAgent(sessionID, customAgent)
+
+      // #when - explicit update (user intentionally switches)
+      updateSessionAgent(sessionID, newAgent)
+
+      // #then - should be updated
+      expect(getSessionAgent(sessionID)).toBe(newAgent)
+    })
+  })
 })
--- a/src/features/context-injector/injector.test.ts
+++ b/src/features/context-injector/injector.test.ts
@@ -21,7 +21,7 @@ describe("createContextInjectorMessagesTransformHook", () => {
      sessionID,
      role,
      time: { created: Date.now() },
-      agent: "Sisyphus",
+      agent: "sisyphus",
      model: { providerID: "test", modelID: "test" },
      path: { cwd: "/", root: "/" },
    },
--- a/src/features/opencode-skill-loader/skill-content.test.ts
+++ b/src/features/opencode-skill-loader/skill-content.test.ts
@@ -265,3 +265,66 @@ describe("resolveMultipleSkillsAsync", () => {
 		expect(result.notFound).toEqual([])
 	})
 })
+
+describe("resolveSkillContent with browserProvider", () => {
+	it("should resolve agent-browser skill when browserProvider is 'agent-browser'", () => {
+		// #given: browserProvider set to agent-browser
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when: resolving content for 'agent-browser'
+		const result = resolveSkillContent("agent-browser", options)
+
+		// #then: returns agent-browser template
+		expect(result).not.toBeNull()
+		expect(result).toContain("agent-browser")
+	})
+
+	it("should return null for agent-browser when browserProvider is default", () => {
+		// #given: no browserProvider (defaults to playwright)
+
+		// #when: resolving content for 'agent-browser'
+		const result = resolveSkillContent("agent-browser")
+
+		// #then: returns null because agent-browser is not in default builtin skills
+		expect(result).toBeNull()
+	})
+
+	it("should return null for playwright when browserProvider is agent-browser", () => {
+		// #given: browserProvider set to agent-browser
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when: resolving content for 'playwright'
+		const result = resolveSkillContent("playwright", options)
+
+		// #then: returns null because playwright is replaced by agent-browser
+		expect(result).toBeNull()
+	})
+})
+
+describe("resolveMultipleSkills with browserProvider", () => {
+	it("should resolve agent-browser when browserProvider is set", () => {
+		// #given: agent-browser and git-master requested with browserProvider
+		const skillNames = ["agent-browser", "git-master"]
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when: resolving multiple skills
+		const result = resolveMultipleSkills(skillNames, options)
+
+		// #then: both resolved
+		expect(result.resolved.has("agent-browser")).toBe(true)
+		expect(result.resolved.has("git-master")).toBe(true)
+		expect(result.notFound).toHaveLength(0)
+	})
+
+	it("should not resolve agent-browser without browserProvider option", () => {
+		// #given: agent-browser requested without browserProvider
+		const skillNames = ["agent-browser"]
+
+		// #when: resolving multiple skills
+		const result = resolveMultipleSkills(skillNames)
+
+		// #then: agent-browser not found
+		expect(result.resolved.has("agent-browser")).toBe(false)
+		expect(result.notFound).toContain("agent-browser")
+	})
+})
--- a/src/features/opencode-skill-loader/skill-content.ts
+++ b/src/features/opencode-skill-loader/skill-content.ts
@@ -3,24 +3,27 @@ import { discoverSkills } from "./loader"
 import type { LoadedSkill } from "./types"
 import { parseFrontmatter } from "../../shared/frontmatter"
 import { readFileSync } from "node:fs"
-import type { GitMasterConfig } from "../../config/schema"
+import type { GitMasterConfig, BrowserAutomationProvider } from "../../config/schema"

 export interface SkillResolutionOptions {
 	gitMasterConfig?: GitMasterConfig
+	browserProvider?: BrowserAutomationProvider
 }

-let cachedSkills: LoadedSkill[] | null = null
+const cachedSkillsByProvider = new Map<string, LoadedSkill[]>()

 function clearSkillCache(): void {
-	cachedSkills = null
+	cachedSkillsByProvider.clear()
 }

-async function getAllSkills(): Promise<LoadedSkill[]> {
-	if (cachedSkills) return cachedSkills
+async function getAllSkills(options?: SkillResolutionOptions): Promise<LoadedSkill[]> {
+	const cacheKey = options?.browserProvider ?? "playwright"
+	const cached = cachedSkillsByProvider.get(cacheKey)
+	if (cached) return cached

 	const [discoveredSkills, builtinSkillDefs] = await Promise.all([
 		discoverSkills({ includeClaudeCodePaths: true }),
-		Promise.resolve(createBuiltinSkills()),
+		Promise.resolve(createBuiltinSkills({ browserProvider: options?.browserProvider })),
 	])

 	const builtinSkillsAsLoaded: LoadedSkill[] = builtinSkillDefs.map((skill) => ({
@@ -44,8 +47,9 @@ async function getAllSkills(): Promise<LoadedSkill[]> {
 	const discoveredNames = new Set(discoveredSkills.map((s) => s.name))
 	const uniqueBuiltins = builtinSkillsAsLoaded.filter((s) => !discoveredNames.has(s.name))

-	cachedSkills = [...discoveredSkills, ...uniqueBuiltins]
-	return cachedSkills
+	const allSkills = [...discoveredSkills, ...uniqueBuiltins]
+	cachedSkillsByProvider.set(cacheKey, allSkills)
+	return allSkills
 }

 async function extractSkillTemplate(skill: LoadedSkill): Promise<string> {
@@ -118,7 +122,7 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
 }

 export function resolveSkillContent(skillName: string, options?: SkillResolutionOptions): string | null {
-	const skills = createBuiltinSkills()
+	const skills = createBuiltinSkills({ browserProvider: options?.browserProvider })
 	const skill = skills.find((s) => s.name === skillName)
 	if (!skill) return null

@@ -133,7 +137,7 @@ export function resolveMultipleSkills(skillNames: string[], options?: SkillResol
 	resolved: Map<string, string>
 	notFound: string[]
 } {
-	const skills = createBuiltinSkills()
+	const skills = createBuiltinSkills({ browserProvider: options?.browserProvider })
 	const skillMap = new Map(skills.map((s) => [s.name, s.template]))

 	const resolved = new Map<string, string>()
@@ -159,7 +163,7 @@ export async function resolveSkillContentAsync(
 	skillName: string,
 	options?: SkillResolutionOptions
 ): Promise<string | null> {
-	const allSkills = await getAllSkills()
+	const allSkills = await getAllSkills(options)
 	const skill = allSkills.find((s) => s.name === skillName)
 	if (!skill) return null

@@ -179,7 +183,7 @@ export async function resolveMultipleSkillsAsync(
 	resolved: Map<string, string>
 	notFound: string[]
 }> {
-	const allSkills = await getAllSkills()
+	const allSkills = await getAllSkills(options)
 	const skillMap = new Map<string, LoadedSkill>()
 	for (const skill of allSkills) {
 		skillMap.set(skill.name, skill)
--- a/src/features/task-toast-manager/manager.test.ts
+++ b/src/features/task-toast-manager/manager.test.ts
@@ -30,7 +30,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1",
        description: "Test task",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: true,
        skills: ["playwright", "git-master"],
      }
@@ -127,7 +127,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1",
        description: "Full info task",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: true,
        skills: ["frontend-ui-ux"],
      }
@@ -149,9 +149,9 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1",
        description: "Task with category default model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
-        modelInfo: { model: "google/gemini-3-pro-preview", type: "category-default" as const },
+        modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const },
      }

      // #when - addTask is called
@@ -169,7 +169,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1b",
        description: "Task with system default model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
        modelInfo: { model: "anthropic/claude-sonnet-4-5", type: "system-default" as const },
      }
@@ -190,7 +190,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_2",
        description: "Task with inherited model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
        modelInfo: { model: "cliproxy/claude-opus-4-5", type: "inherited" as const },
      }
@@ -211,7 +211,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_3",
        description: "Task with user model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
        modelInfo: { model: "my-provider/my-model", type: "user-defined" as const },
      }
--- a/src/features/tmux-subagent/action-executor.ts
+++ b/src/features/tmux-subagent/action-executor.ts
@@ -0,0 +1,97 @@
+import type { TmuxConfig } from "../../config/schema"
+import type { PaneAction, WindowState } from "./types"
+import { spawnTmuxPane, closeTmuxPane, enforceMainPaneWidth, replaceTmuxPane } from "../../shared/tmux"
+import { log } from "../../shared"
+
+export interface ActionResult {
+  success: boolean
+  paneId?: string
+  error?: string
+}
+
+export interface ExecuteActionsResult {
+  success: boolean
+  spawnedPaneId?: string
+  results: Array<{ action: PaneAction; result: ActionResult }>
+}
+
+export interface ExecuteContext {
+  config: TmuxConfig
+  serverUrl: string
+  windowState: WindowState
+}
+
+async function enforceMainPane(windowState: WindowState): Promise<void> {
+  if (!windowState.mainPane) return
+  await enforceMainPaneWidth(windowState.mainPane.paneId, windowState.windowWidth)
+}
+
+export async function executeAction(
+  action: PaneAction,
+  ctx: ExecuteContext
+): Promise<ActionResult> {
+  if (action.type === "close") {
+    const success = await closeTmuxPane(action.paneId)
+    if (success) {
+      await enforceMainPane(ctx.windowState)
+    }
+    return { success }
+  }
+
+  if (action.type === "replace") {
+    const result = await replaceTmuxPane(
+      action.paneId,
+      action.newSessionId,
+      action.description,
+      ctx.config,
+      ctx.serverUrl
+    )
+    return {
+      success: result.success,
+      paneId: result.paneId,
+    }
+  }
+
+  const result = await spawnTmuxPane(
+    action.sessionId,
+    action.description,
+    ctx.config,
+    ctx.serverUrl,
+    action.targetPaneId,
+    action.splitDirection
+  )
+
+  if (result.success) {
+    await enforceMainPane(ctx.windowState)
+  }
+
+  return {
+    success: result.success,
+    paneId: result.paneId,
+  }
+}
+
+export async function executeActions(
+  actions: PaneAction[],
+  ctx: ExecuteContext
+): Promise<ExecuteActionsResult> {
+  const results: Array<{ action: PaneAction; result: ActionResult }> = []
+  let spawnedPaneId: string | undefined
+
+  for (const action of actions) {
+    log("[action-executor] executing", { type: action.type })
+    const result = await executeAction(action, ctx)
+    results.push({ action, result })
+
+    if (!result.success) {
+      log("[action-executor] action failed", { type: action.type, error: result.error })
+      return { success: false, results }
+    }
+
+    if ((action.type === "spawn" || action.type === "replace") && result.paneId) {
+      spawnedPaneId = result.paneId
+    }
+  }
+
+  return { success: true, spawnedPaneId, results }
+}
--- a/src/features/tmux-subagent/decision-engine.test.ts
+++ b/src/features/tmux-subagent/decision-engine.test.ts
@@ -0,0 +1,354 @@
+import { describe, it, expect } from "bun:test"
+import { 
+  decideSpawnActions, 
+  calculateCapacity, 
+  canSplitPane, 
+  canSplitPaneAnyDirection,
+  getBestSplitDirection,
+  type SessionMapping 
+} from "./decision-engine"
+import type { WindowState, CapacityConfig, TmuxPaneInfo } from "./types"
+import { MIN_PANE_WIDTH, MIN_PANE_HEIGHT } from "./types"
+
+const MIN_SPLIT_WIDTH = 2 * MIN_PANE_WIDTH + 1
+const MIN_SPLIT_HEIGHT = 2 * MIN_PANE_HEIGHT + 1
+
+describe("canSplitPane", () => {
+  const createPane = (width: number, height: number): TmuxPaneInfo => ({
+    paneId: "%1",
+    width,
+    height,
+    left: 100,
+    top: 0,
+    title: "test",
+    isActive: false,
+  })
+
+  it("returns true for horizontal split when width >= 2*MIN+1", () => {
+    //#given - pane with exactly minimum splittable width (107)
+    const pane = createPane(MIN_SPLIT_WIDTH, 20)
+
+    //#when
+    const result = canSplitPane(pane, "-h")
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns false for horizontal split when width < 2*MIN+1", () => {
+    //#given - pane just below minimum splittable width
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, 20)
+
+    //#when
+    const result = canSplitPane(pane, "-h")
+
+    //#then
+    expect(result).toBe(false)
+  })
+
+  it("returns true for vertical split when height >= 2*MIN+1", () => {
+    //#given - pane with exactly minimum splittable height (23)
+    const pane = createPane(50, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = canSplitPane(pane, "-v")
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns false for vertical split when height < 2*MIN+1", () => {
+    //#given - pane just below minimum splittable height
+    const pane = createPane(50, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = canSplitPane(pane, "-v")
+
+    //#then
+    expect(result).toBe(false)
+  })
+})
+
+describe("canSplitPaneAnyDirection", () => {
+  const createPane = (width: number, height: number): TmuxPaneInfo => ({
+    paneId: "%1",
+    width,
+    height,
+    left: 100,
+    top: 0,
+    title: "test",
+    isActive: false,
+  })
+
+  it("returns true when can split horizontally but not vertically", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = canSplitPaneAnyDirection(pane)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns true when can split vertically but not horizontally", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = canSplitPaneAnyDirection(pane)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns false when cannot split in any direction", () => {
+    //#given - pane too small in both dimensions
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = canSplitPaneAnyDirection(pane)
+
+    //#then
+    expect(result).toBe(false)
+  })
+})
+
+describe("getBestSplitDirection", () => {
+  const createPane = (width: number, height: number): TmuxPaneInfo => ({
+    paneId: "%1",
+    width,
+    height,
+    left: 100,
+    top: 0,
+    title: "test",
+    isActive: false,
+  })
+
+  it("returns -h when only horizontal split possible", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-h")
+  })
+
+  it("returns -v when only vertical split possible", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-v")
+  })
+
+  it("returns null when no split possible", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe(null)
+  })
+
+  it("returns -h when width >= height and both splits possible", () => {
+    //#given - wider than tall
+    const pane = createPane(MIN_SPLIT_WIDTH + 10, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-h")
+  })
+
+  it("returns -v when height > width and both splits possible", () => {
+    //#given - taller than wide (height needs to be > width for -v)
+    const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_WIDTH + 10)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-v")
+  })
+})
+
+describe("decideSpawnActions", () => {
+  const defaultConfig: CapacityConfig = {
+    mainPaneMinWidth: 120,
+    agentPaneWidth: 40,
+  }
+
+  const createWindowState = (
+    windowWidth: number,
+    windowHeight: number,
+    agentPanes: Array<{ paneId: string; width: number; height: number; left: number; top: number }> = []
+  ): WindowState => ({
+    windowWidth,
+    windowHeight,
+    mainPane: { paneId: "%0", width: Math.floor(windowWidth / 2), height: windowHeight, left: 0, top: 0, title: "main", isActive: true },
+    agentPanes: agentPanes.map((p, i) => ({
+      ...p,
+      title: `agent-${i}`,
+      isActive: false,
+    })),
+  })
+
+  describe("minimum size enforcement", () => {
+    it("returns canSpawn=false when window too small", () => {
+      //#given - window smaller than minimum pane size
+      const state = createWindowState(50, 5)
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(false)
+      expect(result.reason).toContain("too small")
+    })
+
+    it("returns canSpawn=true when main pane can be split", () => {
+      //#given - main pane width >= 2*MIN_PANE_WIDTH+1 = 107
+      const state = createWindowState(220, 44)
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("spawn")
+    })
+
+    it("closes oldest pane when existing panes are too small to split", () => {
+      //#given - existing pane is below minimum splittable size
+      const state = createWindowState(220, 30, [
+        { paneId: "%1", width: 50, height: 15, left: 110, top: 0 },
+      ])
+      const mappings: SessionMapping[] = [
+        { sessionId: "old-ses", paneId: "%1", createdAt: new Date("2024-01-01") },
+      ]
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, mappings)
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(2)
+      expect(result.actions[0].type).toBe("close")
+      expect(result.actions[1].type).toBe("spawn")
+    })
+
+    it("can spawn when existing pane is large enough to split", () => {
+      //#given - existing pane is above minimum splittable size
+      const state = createWindowState(320, 50, [
+        { paneId: "%1", width: MIN_SPLIT_WIDTH + 10, height: MIN_SPLIT_HEIGHT + 10, left: 160, top: 0 },
+      ])
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("spawn")
+    })
+  })
+
+  describe("basic spawn decisions", () => {
+    it("returns canSpawn=true when capacity allows new pane", () => {
+      //#given - 220x44 window, mainPane width=110 >= MIN_SPLIT_WIDTH(107)
+      const state = createWindowState(220, 44)
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("spawn")
+    })
+
+    it("spawns with splitDirection", () => {
+      //#given
+      const state = createWindowState(212, 44, [
+        { paneId: "%1", width: MIN_SPLIT_WIDTH, height: MIN_SPLIT_HEIGHT, left: 106, top: 0 },
+      ])
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions[0].type).toBe("spawn")
+      if (result.actions[0].type === "spawn") {
+        expect(result.actions[0].sessionId).toBe("ses1")
+        expect(result.actions[0].splitDirection).toBeDefined()
+      }
+    })
+
+    it("returns canSpawn=false when no main pane", () => {
+      //#given
+      const state: WindowState = { windowWidth: 212, windowHeight: 44, mainPane: null, agentPanes: [] }
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(false)
+      expect(result.reason).toBe("no main pane found")
+    })
+  })
+})
+
+describe("calculateCapacity", () => {
+  it("calculates 2D grid capacity (cols x rows)", () => {
+    //#given - 212x44 window (user's actual screen)
+    //#when
+    const capacity = calculateCapacity(212, 44)
+
+    //#then - availableWidth=106, cols=(106+1)/(52+1)=2, rows=(44+1)/(11+1)=3 (accounting for dividers)
+    expect(capacity.cols).toBe(2)
+    expect(capacity.rows).toBe(3)
+    expect(capacity.total).toBe(6)
+  })
+
+  it("returns 0 cols when agent area too narrow", () => {
+    //#given - window too narrow for even 1 agent pane
+    //#when
+    const capacity = calculateCapacity(100, 44)
+
+    //#then - availableWidth=50, cols=50/53=0
+    expect(capacity.cols).toBe(0)
+    expect(capacity.total).toBe(0)
+  })
+
+  it("returns 0 rows when window too short", () => {
+    //#given - window too short
+    //#when
+    const capacity = calculateCapacity(212, 10)
+
+    //#then - rows=10/11=0
+    expect(capacity.rows).toBe(0)
+    expect(capacity.total).toBe(0)
+  })
+
+  it("scales with larger screens but caps at MAX_GRID_SIZE=4", () => {
+    //#given - larger 4K-like screen (400x100)
+    //#when
+    const capacity = calculateCapacity(400, 100)
+
+    //#then - cols capped at 4, rows capped at 4 (MAX_GRID_SIZE)
+    expect(capacity.cols).toBe(3)
+    expect(capacity.rows).toBe(4)
+    expect(capacity.total).toBe(12)
+  })
+})
--- a/src/features/tmux-subagent/decision-engine.ts
+++ b/src/features/tmux-subagent/decision-engine.ts
@@ -0,0 +1,386 @@
+import type { WindowState, PaneAction, SpawnDecision, CapacityConfig, TmuxPaneInfo, SplitDirection } from "./types"
+import { MIN_PANE_WIDTH, MIN_PANE_HEIGHT } from "./types"
+
+export interface SessionMapping {
+  sessionId: string
+  paneId: string
+  createdAt: Date
+}
+
+export interface GridCapacity {
+  cols: number
+  rows: number
+  total: number
+}
+
+export interface GridSlot {
+  row: number
+  col: number
+}
+
+export interface GridPlan {
+  cols: number
+  rows: number
+  slotWidth: number
+  slotHeight: number
+}
+
+export interface SpawnTarget {
+  targetPaneId: string
+  splitDirection: SplitDirection
+}
+
+const MAIN_PANE_RATIO = 0.5
+const MAX_COLS = 2
+const MAX_ROWS = 3
+const MAX_GRID_SIZE = 4
+const DIVIDER_SIZE = 1
+const MIN_SPLIT_WIDTH = 2 * MIN_PANE_WIDTH + DIVIDER_SIZE
+const MIN_SPLIT_HEIGHT = 2 * MIN_PANE_HEIGHT + DIVIDER_SIZE
+
+export function getColumnCount(paneCount: number): number {
+  if (paneCount <= 0) return 1
+  return Math.min(MAX_COLS, Math.max(1, Math.ceil(paneCount / MAX_ROWS)))
+}
+
+export function getColumnWidth(agentAreaWidth: number, paneCount: number): number {
+  const cols = getColumnCount(paneCount)
+  const dividersWidth = (cols - 1) * DIVIDER_SIZE
+  return Math.floor((agentAreaWidth - dividersWidth) / cols)
+}
+
+export function isSplittableAtCount(agentAreaWidth: number, paneCount: number): boolean {
+  const columnWidth = getColumnWidth(agentAreaWidth, paneCount)
+  return columnWidth >= MIN_SPLIT_WIDTH
+}
+
+export function findMinimalEvictions(agentAreaWidth: number, currentCount: number): number | null {
+  for (let k = 1; k <= currentCount; k++) {
+    if (isSplittableAtCount(agentAreaWidth, currentCount - k)) {
+      return k
+    }
+  }
+  return null
+}
+
+export function canSplitPane(pane: TmuxPaneInfo, direction: SplitDirection): boolean {
+  if (direction === "-h") {
+    return pane.width >= MIN_SPLIT_WIDTH
+  }
+  return pane.height >= MIN_SPLIT_HEIGHT
+}
+
+export function canSplitPaneAnyDirection(pane: TmuxPaneInfo): boolean {
+  return pane.width >= MIN_SPLIT_WIDTH || pane.height >= MIN_SPLIT_HEIGHT
+}
+
+export function getBestSplitDirection(pane: TmuxPaneInfo): SplitDirection | null {
+  const canH = pane.width >= MIN_SPLIT_WIDTH
+  const canV = pane.height >= MIN_SPLIT_HEIGHT
+  
+  if (!canH && !canV) return null
+  if (canH && !canV) return "-h"
+  if (!canH && canV) return "-v"
+  return pane.width >= pane.height ? "-h" : "-v"
+}
+
+export function calculateCapacity(
+  windowWidth: number,
+  windowHeight: number
+): GridCapacity {
+  const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
+  const cols = Math.min(MAX_GRID_SIZE, Math.max(0, Math.floor((availableWidth + DIVIDER_SIZE) / (MIN_PANE_WIDTH + DIVIDER_SIZE))))
+  const rows = Math.min(MAX_GRID_SIZE, Math.max(0, Math.floor((windowHeight + DIVIDER_SIZE) / (MIN_PANE_HEIGHT + DIVIDER_SIZE))))
+  const total = cols * rows
+  return { cols, rows, total }
+}
+
+export function computeGridPlan(
+  windowWidth: number,
+  windowHeight: number,
+  paneCount: number
+): GridPlan {
+  const capacity = calculateCapacity(windowWidth, windowHeight)
+  const { cols: maxCols, rows: maxRows } = capacity
+  
+  if (maxCols === 0 || maxRows === 0 || paneCount === 0) {
+    return { cols: 1, rows: 1, slotWidth: 0, slotHeight: 0 }
+  }
+
+  let bestCols = 1
+  let bestRows = 1
+  let bestArea = Infinity
+
+  for (let rows = 1; rows <= maxRows; rows++) {
+    for (let cols = 1; cols <= maxCols; cols++) {
+      if (cols * rows >= paneCount) {
+        const area = cols * rows
+        if (area < bestArea || (area === bestArea && rows < bestRows)) {
+          bestCols = cols
+          bestRows = rows
+          bestArea = area
+        }
+      }
+    }
+  }
+
+  const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
+  const slotWidth = Math.floor(availableWidth / bestCols)
+  const slotHeight = Math.floor(windowHeight / bestRows)
+
+  return { cols: bestCols, rows: bestRows, slotWidth, slotHeight }
+}
+
+export function mapPaneToSlot(
+  pane: TmuxPaneInfo,
+  plan: GridPlan,
+  mainPaneWidth: number
+): GridSlot {
+  const rightAreaX = mainPaneWidth
+  const relativeX = Math.max(0, pane.left - rightAreaX)
+  const relativeY = pane.top
+
+  const col = plan.slotWidth > 0 
+    ? Math.min(plan.cols - 1, Math.floor(relativeX / plan.slotWidth))
+    : 0
+  const row = plan.slotHeight > 0
+    ? Math.min(plan.rows - 1, Math.floor(relativeY / plan.slotHeight))
+    : 0
+
+  return { row, col }
+}
+
+function buildOccupancy(
+  agentPanes: TmuxPaneInfo[],
+  plan: GridPlan,
+  mainPaneWidth: number
+): Map<string, TmuxPaneInfo> {
+  const occupancy = new Map<string, TmuxPaneInfo>()
+  for (const pane of agentPanes) {
+    const slot = mapPaneToSlot(pane, plan, mainPaneWidth)
+    const key = `${slot.row}:${slot.col}`
+    occupancy.set(key, pane)
+  }
+  return occupancy
+}
+
+function findFirstEmptySlot(
+  occupancy: Map<string, TmuxPaneInfo>,
+  plan: GridPlan
+): GridSlot {
+  for (let row = 0; row < plan.rows; row++) {
+    for (let col = 0; col < plan.cols; col++) {
+      const key = `${row}:${col}`
+      if (!occupancy.has(key)) {
+        return { row, col }
+      }
+    }
+  }
+  return { row: plan.rows - 1, col: plan.cols - 1 }
+}
+
+function findSplittableTarget(
+  state: WindowState,
+  preferredDirection?: SplitDirection
+): SpawnTarget | null {
+  if (!state.mainPane) return null
+
+  const existingCount = state.agentPanes.length
+
+  if (existingCount === 0) {
+    const virtualMainPane: TmuxPaneInfo = {
+      ...state.mainPane,
+      width: state.windowWidth,
+    }
+    if (canSplitPane(virtualMainPane, "-h")) {
+      return { targetPaneId: state.mainPane.paneId, splitDirection: "-h" }
+    }
+    return null
+  }
+
+  const plan = computeGridPlan(state.windowWidth, state.windowHeight, existingCount + 1)
+  const mainPaneWidth = Math.floor(state.windowWidth * MAIN_PANE_RATIO)
+  const occupancy = buildOccupancy(state.agentPanes, plan, mainPaneWidth)
+  const targetSlot = findFirstEmptySlot(occupancy, plan)
+
+  const leftKey = `${targetSlot.row}:${targetSlot.col - 1}`
+  const leftPane = occupancy.get(leftKey)
+  if (leftPane && canSplitPane(leftPane, "-h")) {
+    return { targetPaneId: leftPane.paneId, splitDirection: "-h" }
+  }
+
+  const aboveKey = `${targetSlot.row - 1}:${targetSlot.col}`
+  const abovePane = occupancy.get(aboveKey)
+  if (abovePane && canSplitPane(abovePane, "-v")) {
+    return { targetPaneId: abovePane.paneId, splitDirection: "-v" }
+  }
+
+  const splittablePanes = state.agentPanes
+    .map(p => ({ pane: p, direction: getBestSplitDirection(p) }))
+    .filter(({ direction }) => direction !== null)
+    .sort((a, b) => (b.pane.width * b.pane.height) - (a.pane.width * a.pane.height))
+
+  if (splittablePanes.length > 0) {
+    const best = splittablePanes[0]
+    return { targetPaneId: best.pane.paneId, splitDirection: best.direction! }
+  }
+
+  return null
+}
+
+export function findSpawnTarget(state: WindowState): SpawnTarget | null {
+  return findSplittableTarget(state)
+}
+
+function findOldestSession(mappings: SessionMapping[]): SessionMapping | null {
+  if (mappings.length === 0) return null
+  return mappings.reduce((oldest, current) =>
+    current.createdAt < oldest.createdAt ? current : oldest
+  )
+}
+
+function findOldestAgentPane(
+  agentPanes: TmuxPaneInfo[],
+  sessionMappings: SessionMapping[]
+): TmuxPaneInfo | null {
+  if (agentPanes.length === 0) return null
+  
+  const paneIdToAge = new Map<string, Date>()
+  for (const mapping of sessionMappings) {
+    paneIdToAge.set(mapping.paneId, mapping.createdAt)
+  }
+  
+  const panesWithAge = agentPanes
+    .map(p => ({ pane: p, age: paneIdToAge.get(p.paneId) }))
+    .filter(({ age }) => age !== undefined)
+    .sort((a, b) => a.age!.getTime() - b.age!.getTime())
+  
+  if (panesWithAge.length > 0) {
+    return panesWithAge[0].pane
+  }
+  
+  return agentPanes.reduce((oldest, p) => {
+    if (p.top < oldest.top || (p.top === oldest.top && p.left < oldest.left)) {
+      return p
+    }
+    return oldest
+  })
+}
+
+export function decideSpawnActions(
+  state: WindowState,
+  sessionId: string,
+  description: string,
+  _config: CapacityConfig,
+  sessionMappings: SessionMapping[]
+): SpawnDecision {
+  if (!state.mainPane) {
+    return { canSpawn: false, actions: [], reason: "no main pane found" }
+  }
+
+  const agentAreaWidth = Math.floor(state.windowWidth * (1 - MAIN_PANE_RATIO))
+  const currentCount = state.agentPanes.length
+
+  if (agentAreaWidth < MIN_PANE_WIDTH) {
+    return {
+      canSpawn: false,
+      actions: [],
+      reason: `window too small for agent panes: ${state.windowWidth}x${state.windowHeight}`,
+    }
+  }
+
+  const oldestPane = findOldestAgentPane(state.agentPanes, sessionMappings)
+  const oldestMapping = oldestPane 
+    ? sessionMappings.find(m => m.paneId === oldestPane.paneId)
+    : null
+
+  if (currentCount === 0) {
+    const virtualMainPane: TmuxPaneInfo = { ...state.mainPane, width: state.windowWidth }
+    if (canSplitPane(virtualMainPane, "-h")) {
+      return {
+        canSpawn: true,
+        actions: [{
+          type: "spawn",
+          sessionId,
+          description,
+          targetPaneId: state.mainPane.paneId,
+          splitDirection: "-h"
+        }]
+      }
+    }
+    return { canSpawn: false, actions: [], reason: "mainPane too small to split" }
+  }
+
+  if (isSplittableAtCount(agentAreaWidth, currentCount)) {
+    const spawnTarget = findSplittableTarget(state)
+    if (spawnTarget) {
+      return {
+        canSpawn: true,
+        actions: [{
+          type: "spawn",
+          sessionId,
+          description,
+          targetPaneId: spawnTarget.targetPaneId,
+          splitDirection: spawnTarget.splitDirection
+        }]
+      }
+    }
+  }
+
+  const minEvictions = findMinimalEvictions(agentAreaWidth, currentCount)
+
+  if (minEvictions === 1 && oldestPane) {
+    return {
+      canSpawn: true,
+      actions: [
+        {
+          type: "close",
+          paneId: oldestPane.paneId,
+          sessionId: oldestMapping?.sessionId || ""
+        },
+        {
+          type: "spawn",
+          sessionId,
+          description,
+          targetPaneId: state.mainPane.paneId,
+          splitDirection: "-h"
+        }
+      ],
+      reason: "closed 1 pane to make room for split"
+    }
+  }
+
+  if (oldestPane) {
+    return {
+      canSpawn: true,
+      actions: [{
+        type: "replace",
+        paneId: oldestPane.paneId,
+        oldSessionId: oldestMapping?.sessionId || "",
+        newSessionId: sessionId,
+        description
+      }],
+      reason: "replaced oldest pane (no split possible)"
+    }
+  }
+
+  return {
+    canSpawn: false,
+    actions: [],
+    reason: "no pane available to replace"
+  }
+}
+
+export function decideCloseAction(
+  state: WindowState,
+  sessionId: string,
+  sessionMappings: SessionMapping[]
+): PaneAction | null {
+  const mapping = sessionMappings.find((m) => m.sessionId === sessionId)
+  if (!mapping) return null
+
+  const paneExists = state.agentPanes.some((p) => p.paneId === mapping.paneId)
+  if (!paneExists) return null
+
+  return { type: "close", paneId: mapping.paneId, sessionId }
+}
--- a/src/features/tmux-subagent/index.ts
+++ b/src/features/tmux-subagent/index.ts
@@ -0,0 +1,5 @@
+export * from "./manager"
+export * from "./types"
+export * from "./pane-state-querier"
+export * from "./decision-engine"
+export * from "./action-executor"
--- a/src/features/tmux-subagent/manager.test.ts
+++ b/src/features/tmux-subagent/manager.test.ts
@@ -0,0 +1,690 @@
+import { describe, test, expect, mock, beforeEach } from 'bun:test'
+import type { TmuxConfig } from '../../config/schema'
+import type { WindowState, PaneAction } from './types'
+import type { ActionResult, ExecuteContext } from './action-executor'
+
+type ExecuteActionsResult = {
+  success: boolean
+  spawnedPaneId?: string
+  results: Array<{ action: PaneAction; result: ActionResult }>
+}
+
+const mockQueryWindowState = mock<(paneId: string) => Promise<WindowState | null>>(
+  async () => ({
+    windowWidth: 212,
+    windowHeight: 44,
+    mainPane: { paneId: '%0', width: 106, height: 44, left: 0, top: 0, title: 'main', isActive: true },
+    agentPanes: [],
+  })
+)
+const mockPaneExists = mock<(paneId: string) => Promise<boolean>>(async () => true)
+const mockExecuteActions = mock<(
+  actions: PaneAction[],
+  ctx: ExecuteContext
+) => Promise<ExecuteActionsResult>>(async () => ({
+  success: true,
+  spawnedPaneId: '%mock',
+  results: [],
+}))
+const mockExecuteAction = mock<(
+  action: PaneAction,
+  ctx: ExecuteContext
+) => Promise<ActionResult>>(async () => ({ success: true }))
+const mockIsInsideTmux = mock<() => boolean>(() => true)
+const mockGetCurrentPaneId = mock<() => string | undefined>(() => '%0')
+
+mock.module('./pane-state-querier', () => ({
+  queryWindowState: mockQueryWindowState,
+  paneExists: mockPaneExists,
+  getRightmostAgentPane: (state: WindowState) =>
+    state.agentPanes.length > 0
+      ? state.agentPanes.reduce((r, p) => (p.left > r.left ? p : r))
+      : null,
+  getOldestAgentPane: (state: WindowState) =>
+    state.agentPanes.length > 0
+      ? state.agentPanes.reduce((o, p) => (p.left < o.left ? p : o))
+      : null,
+}))
+
+mock.module('./action-executor', () => ({
+  executeActions: mockExecuteActions,
+  executeAction: mockExecuteAction,
+}))
+
+mock.module('../../shared/tmux', () => ({
+  isInsideTmux: mockIsInsideTmux,
+  getCurrentPaneId: mockGetCurrentPaneId,
+  POLL_INTERVAL_BACKGROUND_MS: 2000,
+  SESSION_TIMEOUT_MS: 600000,
+  SESSION_MISSING_GRACE_MS: 6000,
+  SESSION_READY_POLL_INTERVAL_MS: 100,
+  SESSION_READY_TIMEOUT_MS: 500,
+}))
+
+const trackedSessions = new Set<string>()
+
+function createMockContext(overrides?: {
+  sessionStatusResult?: { data?: Record<string, { type: string }> }
+}) {
+  return {
+    serverUrl: new URL('http://localhost:4096'),
+    client: {
+      session: {
+        status: mock(async () => {
+          if (overrides?.sessionStatusResult) {
+            return overrides.sessionStatusResult
+          }
+          const data: Record<string, { type: string }> = {}
+          for (const sessionId of trackedSessions) {
+            data[sessionId] = { type: 'running' }
+          }
+          return { data }
+        }),
+      },
+    },
+  } as any
+}
+
+function createSessionCreatedEvent(
+  id: string,
+  parentID: string | undefined,
+  title: string
+) {
+  return {
+    type: 'session.created',
+    properties: {
+      info: { id, parentID, title },
+    },
+  }
+}
+
+function createWindowState(overrides?: Partial<WindowState>): WindowState {
+  return {
+    windowWidth: 220,
+    windowHeight: 44,
+    mainPane: { paneId: '%0', width: 110, height: 44, left: 0, top: 0, title: 'main', isActive: true },
+    agentPanes: [],
+    ...overrides,
+  }
+}
+
+describe('TmuxSessionManager', () => {
+  beforeEach(() => {
+    mockQueryWindowState.mockClear()
+    mockPaneExists.mockClear()
+    mockExecuteActions.mockClear()
+    mockExecuteAction.mockClear()
+    mockIsInsideTmux.mockClear()
+    mockGetCurrentPaneId.mockClear()
+    trackedSessions.clear()
+
+    mockQueryWindowState.mockImplementation(async () => createWindowState())
+    mockExecuteActions.mockImplementation(async (actions) => {
+      for (const action of actions) {
+        if (action.type === 'spawn') {
+          trackedSessions.add(action.sessionId)
+        }
+      }
+      return {
+        success: true,
+        spawnedPaneId: '%mock',
+        results: [],
+      }
+    })
+  })
+
+  describe('constructor', () => {
+    test('enabled when config.enabled=true and isInsideTmux=true', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+
+      //#when
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#then
+      expect(manager).toBeDefined()
+    })
+
+    test('disabled when config.enabled=true but isInsideTmux=false', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(false)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+
+      //#when
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#then
+      expect(manager).toBeDefined()
+    })
+
+    test('disabled when config.enabled=false', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: false,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+
+      //#when
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#then
+      expect(manager).toBeDefined()
+    })
+  })
+
+  describe('onSessionCreated', () => {
+    test('first agent spawns from source pane via decision engine', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      mockQueryWindowState.mockImplementation(async () => createWindowState())
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = createSessionCreatedEvent(
+        'ses_child',
+        'ses_parent',
+        'Background: Test Task'
+      )
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockQueryWindowState).toHaveBeenCalledTimes(1)
+      expect(mockExecuteActions).toHaveBeenCalledTimes(1)
+
+      const call = mockExecuteActions.mock.calls[0]
+      expect(call).toBeDefined()
+      const actionsArg = call![0]
+      expect(actionsArg).toHaveLength(1)
+      expect(actionsArg[0].type).toBe('spawn')
+      if (actionsArg[0].type === 'spawn') {
+        expect(actionsArg[0].sessionId).toBe('ses_child')
+        expect(actionsArg[0].description).toBe('Background: Test Task')
+        expect(actionsArg[0].targetPaneId).toBe('%0')
+        expect(actionsArg[0].splitDirection).toBe('-h')
+      }
+    })
+
+    test('second agent spawns with correct split direction', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+
+      let callCount = 0
+      mockQueryWindowState.mockImplementation(async () => {
+        callCount++
+        if (callCount === 1) {
+          return createWindowState()
+        }
+        return createWindowState({
+          agentPanes: [
+            {
+              paneId: '%1',
+              width: 40,
+              height: 44,
+              left: 100,
+              top: 0,
+              title: 'omo-subagent-Task 1',
+              isActive: false,
+            },
+          ],
+        })
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#when - first agent
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_1', 'ses_parent', 'Task 1')
+      )
+      mockExecuteActions.mockClear()
+
+      //#when - second agent
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_2', 'ses_parent', 'Task 2')
+      )
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(1)
+      const call = mockExecuteActions.mock.calls[0]
+      expect(call).toBeDefined()
+      const actionsArg = call![0]
+      expect(actionsArg).toHaveLength(1)
+      expect(actionsArg[0].type).toBe('spawn')
+    })
+
+    test('does NOT spawn pane when session has no parentID', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = createSessionCreatedEvent('ses_root', undefined, 'Root Session')
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(0)
+    })
+
+    test('does NOT spawn pane when disabled', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: false,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = createSessionCreatedEvent(
+        'ses_child',
+        'ses_parent',
+        'Background: Test Task'
+      )
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(0)
+    })
+
+    test('does NOT spawn pane for non session.created event type', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = {
+        type: 'session.deleted',
+        properties: {
+          info: { id: 'ses_child', parentID: 'ses_parent', title: 'Task' },
+        },
+      }
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(0)
+    })
+
+    test('replaces oldest agent when unsplittable (small window)', async () => {
+      //#given - small window where split is not possible
+      mockIsInsideTmux.mockReturnValue(true)
+      mockQueryWindowState.mockImplementation(async () =>
+        createWindowState({
+          windowWidth: 160,
+          windowHeight: 11,
+          agentPanes: [
+            {
+              paneId: '%1',
+              width: 40,
+              height: 11,
+              left: 80,
+              top: 0,
+              title: 'omo-subagent-Task 1',
+              isActive: false,
+            },
+          ],
+        })
+      )
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 120,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#when
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_new', 'ses_parent', 'New Task')
+      )
+
+      //#then - with small window, replace action is used instead of close+spawn
+      expect(mockExecuteActions).toHaveBeenCalledTimes(1)
+      const call = mockExecuteActions.mock.calls[0]
+      expect(call).toBeDefined()
+      const actionsArg = call![0]
+      expect(actionsArg).toHaveLength(1)
+      expect(actionsArg[0].type).toBe('replace')
+    })
+  })
+
+  describe('onSessionDeleted', () => {
+    test('closes pane when tracked session is deleted', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+
+      let stateCallCount = 0
+      mockQueryWindowState.mockImplementation(async () => {
+        stateCallCount++
+        if (stateCallCount === 1) {
+          return createWindowState()
+        }
+        return createWindowState({
+          agentPanes: [
+            {
+              paneId: '%mock',
+              width: 40,
+              height: 44,
+              left: 100,
+              top: 0,
+              title: 'omo-subagent-Task',
+              isActive: false,
+            },
+          ],
+        })
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      await manager.onSessionCreated(
+        createSessionCreatedEvent(
+          'ses_child',
+          'ses_parent',
+          'Background: Test Task'
+        )
+      )
+      mockExecuteAction.mockClear()
+
+      //#when
+      await manager.onSessionDeleted({ sessionID: 'ses_child' })
+
+      //#then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(1)
+      const call = mockExecuteAction.mock.calls[0]
+      expect(call).toBeDefined()
+      expect(call![0]).toEqual({
+        type: 'close',
+        paneId: '%mock',
+        sessionId: 'ses_child',
+      })
+    })
+
+    test('does nothing when untracked session is deleted', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#when
+      await manager.onSessionDeleted({ sessionID: 'ses_unknown' })
+
+      //#then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(0)
+    })
+  })
+
+  describe('cleanup', () => {
+    test('closes all tracked panes', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+
+      let callCount = 0
+      mockExecuteActions.mockImplementation(async () => {
+        callCount++
+        return {
+          success: true,
+          spawnedPaneId: `%${callCount}`,
+          results: [],
+        }
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_1', 'ses_parent', 'Task 1')
+      )
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_2', 'ses_parent', 'Task 2')
+      )
+
+      mockExecuteAction.mockClear()
+
+      //#when
+      await manager.cleanup()
+
+      //#then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(2)
+    })
+  })
+})
+
+describe('DecisionEngine', () => {
+  describe('calculateCapacity', () => {
+    test('calculates correct 2D grid capacity', async () => {
+      //#given
+      const { calculateCapacity } = await import('./decision-engine')
+
+      //#when
+      const result = calculateCapacity(212, 44)
+
+      //#then - availableWidth=106, cols=(106+1)/(52+1)=2, rows=(44+1)/(11+1)=3 (accounting for dividers)
+      expect(result.cols).toBe(2)
+      expect(result.rows).toBe(3)
+      expect(result.total).toBe(6)
+    })
+
+    test('returns 0 cols when agent area too narrow', async () => {
+      //#given
+      const { calculateCapacity } = await import('./decision-engine')
+
+      //#when
+      const result = calculateCapacity(100, 44)
+
+      //#then - availableWidth=50, cols=50/53=0
+      expect(result.cols).toBe(0)
+      expect(result.total).toBe(0)
+    })
+  })
+
+  describe('decideSpawnActions', () => {
+    test('returns spawn action with splitDirection when under capacity', async () => {
+      //#given
+      const { decideSpawnActions } = await import('./decision-engine')
+      const state: WindowState = {
+        windowWidth: 212,
+        windowHeight: 44,
+        mainPane: {
+          paneId: '%0',
+          width: 106,
+          height: 44,
+          left: 0,
+          top: 0,
+          title: 'main',
+          isActive: true,
+        },
+        agentPanes: [],
+      }
+
+      //#when
+      const decision = decideSpawnActions(
+        state,
+        'ses_1',
+        'Test Task',
+        { mainPaneMinWidth: 120, agentPaneWidth: 40 },
+        []
+      )
+
+      //#then
+      expect(decision.canSpawn).toBe(true)
+      expect(decision.actions).toHaveLength(1)
+      expect(decision.actions[0].type).toBe('spawn')
+      if (decision.actions[0].type === 'spawn') {
+        expect(decision.actions[0].sessionId).toBe('ses_1')
+        expect(decision.actions[0].description).toBe('Test Task')
+        expect(decision.actions[0].targetPaneId).toBe('%0')
+        expect(decision.actions[0].splitDirection).toBe('-h')
+      }
+    })
+
+    test('returns replace when split not possible', async () => {
+      //#given - small window where split is never possible
+      const { decideSpawnActions } = await import('./decision-engine')
+      const state: WindowState = {
+        windowWidth: 160,
+        windowHeight: 11,
+        mainPane: {
+          paneId: '%0',
+          width: 80,
+          height: 11,
+          left: 0,
+          top: 0,
+          title: 'main',
+          isActive: true,
+        },
+        agentPanes: [
+          {
+            paneId: '%1',
+            width: 80,
+            height: 11,
+            left: 80,
+            top: 0,
+            title: 'omo-subagent-Old',
+            isActive: false,
+          },
+        ],
+      }
+      const sessionMappings = [
+        { sessionId: 'ses_old', paneId: '%1', createdAt: new Date('2024-01-01') },
+      ]
+
+      //#when
+      const decision = decideSpawnActions(
+        state,
+        'ses_new',
+        'New Task',
+        { mainPaneMinWidth: 120, agentPaneWidth: 40 },
+        sessionMappings
+      )
+
+      //#then - agent area (80) < MIN_SPLIT_WIDTH (105), so replace is used
+      expect(decision.canSpawn).toBe(true)
+      expect(decision.actions).toHaveLength(1)
+      expect(decision.actions[0].type).toBe('replace')
+    })
+
+    test('returns canSpawn=false when window too small', async () => {
+      //#given
+      const { decideSpawnActions } = await import('./decision-engine')
+      const state: WindowState = {
+        windowWidth: 60,
+        windowHeight: 5,
+        mainPane: {
+          paneId: '%0',
+          width: 30,
+          height: 5,
+          left: 0,
+          top: 0,
+          title: 'main',
+          isActive: true,
+        },
+        agentPanes: [],
+      }
+
+      //#when
+      const decision = decideSpawnActions(
+        state,
+        'ses_1',
+        'Test Task',
+        { mainPaneMinWidth: 120, agentPaneWidth: 40 },
+        []
+      )
+
+      //#then
+      expect(decision.canSpawn).toBe(false)
+      expect(decision.reason).toContain('too small')
+    })
+  })
+})
--- a/src/features/tmux-subagent/manager.ts
+++ b/src/features/tmux-subagent/manager.ts
@@ -0,0 +1,396 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import type { TmuxConfig } from "../../config/schema"
+import type { TrackedSession, CapacityConfig } from "./types"
+import {
+  isInsideTmux,
+  getCurrentPaneId,
+  POLL_INTERVAL_BACKGROUND_MS,
+  SESSION_MISSING_GRACE_MS,
+  SESSION_READY_POLL_INTERVAL_MS,
+  SESSION_READY_TIMEOUT_MS,
+} from "../../shared/tmux"
+import { log } from "../../shared"
+import { queryWindowState } from "./pane-state-querier"
+import { decideSpawnActions, decideCloseAction, type SessionMapping } from "./decision-engine"
+import { executeActions, executeAction } from "./action-executor"
+
+type OpencodeClient = PluginInput["client"]
+
+interface SessionCreatedEvent {
+  type: string
+  properties?: { info?: { id?: string; parentID?: string; title?: string } }
+}
+
+const SESSION_TIMEOUT_MS = 10 * 60 * 1000
+
+/**
+ * State-first Tmux Session Manager
+ * 
+ * Architecture:
+ * 1. QUERY: Get actual tmux pane state (source of truth)
+ * 2. DECIDE: Pure function determines actions based on state
+ * 3. EXECUTE: Execute actions with verification
+ * 4. UPDATE: Update internal cache only after tmux confirms success
+ * 
+ * The internal `sessions` Map is just a cache for sessionId<->paneId mapping.
+ * The REAL source of truth is always queried from tmux.
+ */
+export class TmuxSessionManager {
+  private client: OpencodeClient
+  private tmuxConfig: TmuxConfig
+  private serverUrl: string
+  private sourcePaneId: string | undefined
+  private sessions = new Map<string, TrackedSession>()
+  private pendingSessions = new Set<string>()
+  private pollInterval?: ReturnType<typeof setInterval>
+
+  constructor(ctx: PluginInput, tmuxConfig: TmuxConfig) {
+    this.client = ctx.client
+    this.tmuxConfig = tmuxConfig
+    const defaultPort = process.env.OPENCODE_PORT ?? "4096"
+    this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
+    this.sourcePaneId = getCurrentPaneId()
+
+    log("[tmux-session-manager] initialized", {
+      configEnabled: this.tmuxConfig.enabled,
+      tmuxConfig: this.tmuxConfig,
+      serverUrl: this.serverUrl,
+      sourcePaneId: this.sourcePaneId,
+    })
+  }
+
+  private isEnabled(): boolean {
+    return this.tmuxConfig.enabled && isInsideTmux()
+  }
+
+  private getCapacityConfig(): CapacityConfig {
+    return {
+      mainPaneMinWidth: this.tmuxConfig.main_pane_min_width,
+      agentPaneWidth: this.tmuxConfig.agent_pane_min_width,
+    }
+  }
+
+  private getSessionMappings(): SessionMapping[] {
+    return Array.from(this.sessions.values()).map((s) => ({
+      sessionId: s.sessionId,
+      paneId: s.paneId,
+      createdAt: s.createdAt,
+    }))
+  }
+
+  private async waitForSessionReady(sessionId: string): Promise<boolean> {
+    const startTime = Date.now()
+    
+    while (Date.now() - startTime < SESSION_READY_TIMEOUT_MS) {
+      try {
+        const statusResult = await this.client.session.status({ path: undefined })
+        const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+        
+        if (allStatuses[sessionId]) {
+          log("[tmux-session-manager] session ready", {
+            sessionId,
+            status: allStatuses[sessionId].type,
+            waitedMs: Date.now() - startTime,
+          })
+          return true
+        }
+      } catch (err) {
+        log("[tmux-session-manager] session status check error", { error: String(err) })
+      }
+      
+      await new Promise((resolve) => setTimeout(resolve, SESSION_READY_POLL_INTERVAL_MS))
+    }
+    
+    log("[tmux-session-manager] session ready timeout", {
+      sessionId,
+      timeoutMs: SESSION_READY_TIMEOUT_MS,
+    })
+    return false
+  }
+
+  async onSessionCreated(event: SessionCreatedEvent): Promise<void> {
+    const enabled = this.isEnabled()
+    log("[tmux-session-manager] onSessionCreated called", {
+      enabled,
+      tmuxConfigEnabled: this.tmuxConfig.enabled,
+      isInsideTmux: isInsideTmux(),
+      eventType: event.type,
+      infoId: event.properties?.info?.id,
+      infoParentID: event.properties?.info?.parentID,
+    })
+
+    if (!enabled) return
+    if (event.type !== "session.created") return
+
+    const info = event.properties?.info
+    if (!info?.id || !info?.parentID) return
+
+    const sessionId = info.id
+    const title = info.title ?? "Subagent"
+
+    if (this.sessions.has(sessionId) || this.pendingSessions.has(sessionId)) {
+      log("[tmux-session-manager] session already tracked or pending", { sessionId })
+      return
+    }
+
+    if (!this.sourcePaneId) {
+      log("[tmux-session-manager] no source pane id")
+      return
+    }
+
+    this.pendingSessions.add(sessionId)
+
+    try {
+      const state = await queryWindowState(this.sourcePaneId)
+      if (!state) {
+        log("[tmux-session-manager] failed to query window state")
+        return
+      }
+
+      log("[tmux-session-manager] window state queried", {
+        windowWidth: state.windowWidth,
+        mainPane: state.mainPane?.paneId,
+        agentPaneCount: state.agentPanes.length,
+        agentPanes: state.agentPanes.map((p) => p.paneId),
+      })
+
+      const decision = decideSpawnActions(
+        state,
+        sessionId,
+        title,
+        this.getCapacityConfig(),
+        this.getSessionMappings()
+      )
+
+      log("[tmux-session-manager] spawn decision", {
+        canSpawn: decision.canSpawn,
+        reason: decision.reason,
+        actionCount: decision.actions.length,
+        actions: decision.actions.map((a) => {
+          if (a.type === "close") return { type: "close", paneId: a.paneId }
+          if (a.type === "replace") return { type: "replace", paneId: a.paneId, newSessionId: a.newSessionId }
+          return { type: "spawn", sessionId: a.sessionId }
+        }),
+      })
+
+      if (!decision.canSpawn) {
+        log("[tmux-session-manager] cannot spawn", { reason: decision.reason })
+        return
+      }
+
+      const result = await executeActions(
+        decision.actions,
+        { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+      )
+
+      for (const { action, result: actionResult } of result.results) {
+        if (action.type === "close" && actionResult.success) {
+          this.sessions.delete(action.sessionId)
+          log("[tmux-session-manager] removed closed session from cache", {
+            sessionId: action.sessionId,
+          })
+        }
+        if (action.type === "replace" && actionResult.success) {
+          this.sessions.delete(action.oldSessionId)
+          log("[tmux-session-manager] removed replaced session from cache", {
+            oldSessionId: action.oldSessionId,
+            newSessionId: action.newSessionId,
+          })
+        }
+      }
+
+      if (result.success && result.spawnedPaneId) {
+        const sessionReady = await this.waitForSessionReady(sessionId)
+        
+        if (!sessionReady) {
+          log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
+            sessionId,
+            paneId: result.spawnedPaneId,
+          })
+        }
+        
+        const now = Date.now()
+        this.sessions.set(sessionId, {
+          sessionId,
+          paneId: result.spawnedPaneId,
+          description: title,
+          createdAt: new Date(now),
+          lastSeenAt: new Date(now),
+        })
+        log("[tmux-session-manager] pane spawned and tracked", {
+          sessionId,
+          paneId: result.spawnedPaneId,
+          sessionReady,
+        })
+        this.startPolling()
+      } else {
+        log("[tmux-session-manager] spawn failed", {
+          success: result.success,
+          results: result.results.map((r) => ({
+            type: r.action.type,
+            success: r.result.success,
+            error: r.result.error,
+          })),
+        })
+      }
+    } finally {
+      this.pendingSessions.delete(sessionId)
+    }
+  }
+
+  async onSessionDeleted(event: { sessionID: string }): Promise<void> {
+    if (!this.isEnabled()) return
+    if (!this.sourcePaneId) return
+
+    const tracked = this.sessions.get(event.sessionID)
+    if (!tracked) return
+
+    log("[tmux-session-manager] onSessionDeleted", { sessionId: event.sessionID })
+
+    const state = await queryWindowState(this.sourcePaneId)
+    if (!state) {
+      this.sessions.delete(event.sessionID)
+      return
+    }
+
+    const closeAction = decideCloseAction(state, event.sessionID, this.getSessionMappings())
+    if (closeAction) {
+      await executeAction(closeAction, { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state })
+    }
+
+    this.sessions.delete(event.sessionID)
+
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+    }
+  }
+
+  private startPolling(): void {
+    if (this.pollInterval) return
+
+    this.pollInterval = setInterval(
+      () => this.pollSessions(),
+      POLL_INTERVAL_BACKGROUND_MS,
+    )
+    log("[tmux-session-manager] polling started")
+  }
+
+  private stopPolling(): void {
+    if (this.pollInterval) {
+      clearInterval(this.pollInterval)
+      this.pollInterval = undefined
+      log("[tmux-session-manager] polling stopped")
+    }
+  }
+
+  private async pollSessions(): Promise<void> {
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+      return
+    }
+
+    try {
+      const statusResult = await this.client.session.status({ path: undefined })
+      const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+
+      log("[tmux-session-manager] pollSessions", {
+        trackedSessions: Array.from(this.sessions.keys()),
+        allStatusKeys: Object.keys(allStatuses),
+      })
+
+      const now = Date.now()
+      const sessionsToClose: string[] = []
+
+      for (const [sessionId, tracked] of this.sessions.entries()) {
+        const status = allStatuses[sessionId]
+        const isIdle = status?.type === "idle"
+
+        if (status) {
+          tracked.lastSeenAt = new Date(now)
+        }
+
+        const missingSince = !status ? now - tracked.lastSeenAt.getTime() : 0
+        const missingTooLong = missingSince >= SESSION_MISSING_GRACE_MS
+        const isTimedOut = now - tracked.createdAt.getTime() > SESSION_TIMEOUT_MS
+
+        log("[tmux-session-manager] session check", {
+          sessionId,
+          statusType: status?.type,
+          isIdle,
+          missingSince,
+          missingTooLong,
+          isTimedOut,
+          shouldClose: isIdle || missingTooLong || isTimedOut,
+        })
+
+        if (isIdle || missingTooLong || isTimedOut) {
+          sessionsToClose.push(sessionId)
+        }
+      }
+
+      for (const sessionId of sessionsToClose) {
+        log("[tmux-session-manager] closing session due to poll", { sessionId })
+        await this.closeSessionById(sessionId)
+      }
+    } catch (err) {
+      log("[tmux-session-manager] poll error", { error: String(err) })
+    }
+  }
+
+  private async closeSessionById(sessionId: string): Promise<void> {
+    const tracked = this.sessions.get(sessionId)
+    if (!tracked) return
+
+    log("[tmux-session-manager] closing session pane", {
+      sessionId,
+      paneId: tracked.paneId,
+    })
+
+    const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
+    if (state) {
+      await executeAction(
+        { type: "close", paneId: tracked.paneId, sessionId },
+        { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+      )
+    }
+
+    this.sessions.delete(sessionId)
+
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+    }
+  }
+
+  createEventHandler(): (input: { event: { type: string; properties?: unknown } }) => Promise<void> {
+    return async (input) => {
+      await this.onSessionCreated(input.event as SessionCreatedEvent)
+    }
+  }
+
+  async cleanup(): Promise<void> {
+    this.stopPolling()
+
+    if (this.sessions.size > 0) {
+      log("[tmux-session-manager] closing all panes", { count: this.sessions.size })
+      const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
+      
+      if (state) {
+        const closePromises = Array.from(this.sessions.values()).map((s) =>
+          executeAction(
+            { type: "close", paneId: s.paneId, sessionId: s.sessionId },
+            { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+          ).catch((err) =>
+            log("[tmux-session-manager] cleanup error for pane", {
+              paneId: s.paneId,
+              error: String(err),
+            }),
+          ),
+        )
+        await Promise.all(closePromises)
+      }
+      this.sessions.clear()
+    }
+
+    log("[tmux-session-manager] cleanup complete")
+  }
+}
--- a/src/features/tmux-subagent/pane-state-querier.ts
+++ b/src/features/tmux-subagent/pane-state-querier.ts
@@ -0,0 +1,73 @@
+import { spawn } from "bun"
+import type { WindowState, TmuxPaneInfo } from "./types"
+import { getTmuxPath } from "../../tools/interactive-bash/utils"
+import { log } from "../../shared"
+
+export async function queryWindowState(sourcePaneId: string): Promise<WindowState | null> {
+  const tmux = await getTmuxPath()
+  if (!tmux) return null
+
+  const proc = spawn(
+    [
+      tmux,
+      "list-panes",
+      "-t",
+      sourcePaneId,
+      "-F",
+      "#{pane_id},#{pane_width},#{pane_height},#{pane_left},#{pane_top},#{pane_title},#{pane_active},#{window_width},#{window_height}",
+    ],
+    { stdout: "pipe", stderr: "pipe" }
+  )
+
+  const exitCode = await proc.exited
+  const stdout = await new Response(proc.stdout).text()
+
+  if (exitCode !== 0) {
+    log("[pane-state-querier] list-panes failed", { exitCode })
+    return null
+  }
+
+  const lines = stdout.trim().split("\n").filter(Boolean)
+  if (lines.length === 0) return null
+
+  let windowWidth = 0
+  let windowHeight = 0
+  const panes: TmuxPaneInfo[] = []
+
+  for (const line of lines) {
+    const [paneId, widthStr, heightStr, leftStr, topStr, title, activeStr, windowWidthStr, windowHeightStr] = line.split(",")
+    const width = parseInt(widthStr, 10)
+    const height = parseInt(heightStr, 10)
+    const left = parseInt(leftStr, 10)
+    const top = parseInt(topStr, 10)
+    const isActive = activeStr === "1"
+    windowWidth = parseInt(windowWidthStr, 10)
+    windowHeight = parseInt(windowHeightStr, 10)
+
+    if (!isNaN(width) && !isNaN(left) && !isNaN(height) && !isNaN(top)) {
+      panes.push({ paneId, width, height, left, top, title, isActive })
+    }
+  }
+
+  panes.sort((a, b) => a.left - b.left || a.top - b.top)
+
+  const mainPane = panes.find((p) => p.paneId === sourcePaneId)
+  if (!mainPane) {
+    log("[pane-state-querier] CRITICAL: sourcePaneId not found in panes", {
+      sourcePaneId,
+      availablePanes: panes.map((p) => p.paneId),
+    })
+    return null
+  }
+
+  const agentPanes = panes.filter((p) => p.paneId !== mainPane.paneId)
+
+  log("[pane-state-querier] window state", {
+    windowWidth,
+    windowHeight,
+    mainPane: mainPane.paneId,
+    agentPaneCount: agentPanes.length,
+  })
+
+  return { windowWidth, windowHeight, mainPane, agentPanes }
+}
--- a/src/features/tmux-subagent/types.ts
+++ b/src/features/tmux-subagent/types.ts
@@ -0,0 +1,45 @@
+export interface TrackedSession {
+  sessionId: string
+  paneId: string
+  description: string
+  createdAt: Date
+  lastSeenAt: Date
+}
+
+export const MIN_PANE_WIDTH = 52
+export const MIN_PANE_HEIGHT = 11
+
+export interface TmuxPaneInfo {
+  paneId: string
+  width: number
+  height: number
+  left: number
+  top: number
+  title: string
+  isActive: boolean
+}
+
+export interface WindowState {
+  windowWidth: number
+  windowHeight: number
+  mainPane: TmuxPaneInfo | null
+  agentPanes: TmuxPaneInfo[]
+}
+
+export type SplitDirection = "-h" | "-v"
+
+export type PaneAction =
+  | { type: "close"; paneId: string; sessionId: string }
+  | { type: "spawn"; sessionId: string; description: string; targetPaneId: string; splitDirection: SplitDirection }
+  | { type: "replace"; paneId: string; oldSessionId: string; newSessionId: string; description: string }
+
+export interface SpawnDecision {
+  canSpawn: boolean
+  actions: PaneAction[]
+  reason?: string
+}
+
+export interface CapacityConfig {
+  mainPaneMinWidth: number
+  agentPaneWidth: number
+}
--- a/src/hooks/AGENTS.md
+++ b/src/hooks/AGENTS.md
@@ -1,16 +1,14 @@
 # HOOKS KNOWLEDGE BASE

 ## OVERVIEW
-
-31 lifecycle hooks intercepting/modifying agent behavior. Events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, onSummarize.
+32 lifecycle hooks intercepting/modifying agent behavior. Events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, onSummarize.

 ## STRUCTURE
-
 ```
 hooks/
-├── atlas/                      # Main orchestration (771 lines)
-├── anthropic-context-window-limit-recovery/  # Auto-summarize
-├── todo-continuation-enforcer.ts # Force TODO completion
+├── atlas/                      # Main orchestration (752 lines)
+├── anthropic-context-window-limit-recovery/ # Auto-summarize
+├── todo-continuation-enforcer.ts # Force TODO completion (16k lines)
 ├── ralph-loop/                 # Self-referential dev loop
 ├── claude-code-hooks/          # settings.json compat layer - see AGENTS.md
 ├── comment-checker/            # Prevents AI slop
@@ -28,44 +26,61 @@ hooks/
 ├── prometheus-md-only/         # Planner read-only mode
 ├── agent-usage-reminder/       # Specialized agent hints
 ├── auto-update-checker/        # Plugin update check
-└── tool-output-truncator.ts    # Prevents context bloat
+├── tool-output-truncator.ts    # Prevents context bloat
+├── compaction-context-injector/ # Injects context on compaction
+├── delegate-task-retry/        # Retries failed delegations
+├── interactive-bash-session/   # Tmux session management
+├── non-interactive-env/        # Non-TTY environment handling
+├── start-work/                 # Sisyphus work session starter
+├── task-resume-info/           # Resume info for cancelled tasks
+├── question-label-truncator/   # Auto-truncates question labels
+├── category-skill-reminder/    # Reminds of category skills
+├── empty-task-response-detector.ts # Detects empty responses
+├── sisyphus-junior-notepad/    # Sisyphus Junior notepad
+└── index.ts                    # Hook aggregation + registration
 ```

 ## HOOK EVENTS
-
 | Event | Timing | Can Block | Use Case |
 |-------|--------|-----------|----------|
-| PreToolUse | Before tool | Yes | Validate/modify inputs |
-| PostToolUse | After tool | No | Append warnings, truncate |
-| UserPromptSubmit | On prompt | Yes | Keyword detection |
-| Stop | Session idle | No | Auto-continue |
-| onSummarize | Compaction | No | Preserve state |
+| UserPromptSubmit | `chat.message` | Yes | Keyword detection, slash commands |
+| PreToolUse | `tool.execute.before` | Yes | Validate/modify inputs, inject context |
+| PostToolUse | `tool.execute.after` | No | Truncate output, error recovery |
+| Stop | `event` (session.stop) | No | Auto-continue, notifications |
+| onSummarize | Compaction | No | Preserve state, inject summary context |

 ## EXECUTION ORDER
-
-**chat.message**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWork → ralphLoop
-
-**tool.execute.before**: claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → rulesInjector
-
-**tool.execute.after**: editErrorRecovery → delegateTaskRetry → commentChecker → toolOutputTruncator → claudeCodeHooks
+- **UserPromptSubmit**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWork
+- **PreToolUse**: questionLabelTruncator → claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → prometheusMdOnly → sisyphusJuniorNotepad → atlasHook
+- **PostToolUse**: claudeCodeHooks → toolOutputTruncator → contextWindowMonitor → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → emptyTaskResponseDetector → agentUsageReminder → interactiveBashSession → editErrorRecovery → delegateTaskRetry → atlasHook → taskResumeInfo

 ## HOW TO ADD
-
 1. Create `src/hooks/name/` with `index.ts` exporting `createMyHook(ctx)`
 2. Add hook name to `HookNameSchema` in `src/config/schema.ts`
-3. Register in `src/index.ts`:
-   ```typescript
-   const myHook = isHookEnabled("my-hook") ? createMyHook(ctx) : null
-   ```
+3. Register in `src/index.ts` and add to relevant lifecycle methods

-## PATTERNS
+## HOOK PATTERNS

- **Session-scoped state**: `Map<sessionID, Set<string>>`
- **Conditional execution**: Check `input.tool` before processing
- **Output modification**: `output.output += "\n${REMINDER}"`
+**Simple Single-Event**:
+```typescript
+export function createToolOutputTruncatorHook(ctx) {
+  return { "tool.execute.after": async (input, output) => { ... } }
+}
+```
+
+**Multi-Event with State**:
+```typescript
+export function createThinkModeHook() {
+  const state = new Map<string, ThinkModeState>()
+  return {
+    "chat.params": async (output, sessionID) => { ... },
+    "event": async ({ event }) => { /* cleanup */ }
+  }
+}
+```

 ## ANTI-PATTERNS
-
 - **Blocking non-critical**: Use PostToolUse warnings instead
- **Heavy computation**: Keep PreToolUse light
- **Redundant injection**: Track injected files
+- **Heavy computation**: Keep PreToolUse light to avoid latency
+- **Redundant injection**: Track injected files to avoid context bloat
+- **Direct state mutation**: Use `output.output +=` instead of replacing
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -123,7 +123,7 @@ describe("atlas hook", () => {
     test("should append standalone verification when no boulder state but caller is Atlas", async () => {
       // #given - no boulder state, but caller is Atlas
       const sessionID = "session-no-boulder-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const hook = createAtlasHook(createMockPluginInput())
      const output = {
@@ -141,7 +141,7 @@ describe("atlas hook", () => {
      // #then - standalone verification reminder appended
      expect(output.output).toContain("Task completed successfully")
      expect(output.output).toContain("MANDATORY:")
-      expect(output.output).toContain("delegate_task(resume=")
+      expect(output.output).toContain("delegate_task(session_id=")
      
      cleanupMessageStorage(sessionID)
    })
@@ -149,7 +149,7 @@ describe("atlas hook", () => {
     test("should transform output when caller is Atlas with boulder state", async () => {
       // #given - Atlas caller with boulder state
       const sessionID = "session-transform-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
@@ -180,7 +180,7 @@ describe("atlas hook", () => {
      expect(output.output).toContain("SUBAGENT WORK COMPLETED")
      expect(output.output).toContain("test-plan")
      expect(output.output).toContain("LIE")
-      expect(output.output).toContain("delegate_task(resume=")
+      expect(output.output).toContain("delegate_task(session_id=")
      
      cleanupMessageStorage(sessionID)
    })
@@ -188,7 +188,7 @@ describe("atlas hook", () => {
     test("should still transform when plan is complete (shows progress)", async () => {
       // #given - boulder state with complete plan, Atlas caller
       const sessionID = "session-complete-plan-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "complete-plan.md")
      writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")
@@ -225,7 +225,7 @@ describe("atlas hook", () => {
     test("should append session ID to boulder state if not present", async () => {
       // #given - boulder state without session-append-test, Atlas caller
       const sessionID = "session-append-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -261,7 +261,7 @@ describe("atlas hook", () => {
     test("should not duplicate existing session ID", async () => {
       // #given - boulder state already has session-dup-test, Atlas caller
       const sessionID = "session-dup-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -298,7 +298,7 @@ describe("atlas hook", () => {
     test("should include boulder.json path and notepad path in transformed output", async () => {
       // #given - boulder state, Atlas caller
       const sessionID = "session-path-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "my-feature.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3")
@@ -332,10 +332,10 @@ describe("atlas hook", () => {
      cleanupMessageStorage(sessionID)
    })

-     test("should include resume and checkbox instructions in reminder", async () => {
+     test("should include session_id and checkbox instructions in reminder", async () => {
       // #given - boulder state, Atlas caller
       const sessionID = "session-resume-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -361,8 +361,8 @@ describe("atlas hook", () => {
        output
      )

-      // #then - should include resume instructions and verification
-      expect(output.output).toContain("delegate_task(resume=")
+      // #then - should include session_id instructions and verification
+      expect(output.output).toContain("delegate_task(session_id=")
      expect(output.output).toContain("[x]")
      expect(output.output).toContain("MANDATORY:")
      
@@ -373,7 +373,7 @@ describe("atlas hook", () => {
      const ORCHESTRATOR_SESSION = "orchestrator-write-test"

       beforeEach(() => {
-         setupMessageStorage(ORCHESTRATOR_SESSION, "Atlas")
+         setupMessageStorage(ORCHESTRATOR_SESSION, "atlas")
       })

      afterEach(() => {
@@ -444,7 +444,7 @@ describe("atlas hook", () => {
      test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
        // #given
        const nonOrchestratorSession = "non-orchestrator-session"
-        setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
+        setupMessageStorage(nonOrchestratorSession, "sisyphus-junior")
        
        const hook = createAtlasHook(createMockPluginInput())
        const originalOutput = "File written successfully"
@@ -601,7 +601,7 @@ describe("atlas hook", () => {
         getMainSessionID: () => MAIN_SESSION_ID,
         subagentSessions: new Set<string>(),
       }))
-       setupMessageStorage(MAIN_SESSION_ID, "Atlas")
+       setupMessageStorage(MAIN_SESSION_ID, "atlas")
     })

    afterEach(() => {
@@ -845,7 +845,7 @@ describe("atlas hook", () => {

       // #given - last agent is NOT Atlas
       cleanupMessageStorage(MAIN_SESSION_ID)
-       setupMessageStorage(MAIN_SESSION_ID, "Sisyphus")
+       setupMessageStorage(MAIN_SESSION_ID, "sisyphus")

       const mockInput = createMockPluginInput()
       const hook = createAtlasHook(mockInput)
--- a/src/hooks/atlas/index.ts
+++ b/src/hooks/atlas/index.ts
@@ -11,6 +11,7 @@ import { getMainSessionID, subagentSessions } from "../../features/claude-code-s
 import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { log } from "../../shared/logger"
 import { createSystemDirective, SYSTEM_DIRECTIVE_PREFIX, SystemDirectiveTypes } from "../../shared/system-directive"
+import { isCallerOrchestrator, getMessageDir } from "../../shared/session-utils"
 import type { BackgroundManager } from "../../features/background-agent"

 export const HOOK_NAME = "atlas"
@@ -179,13 +180,13 @@ If you were NOT given **exactly ONE atomic task**, you MUST:
 `

 function buildVerificationReminder(sessionId: string): string {
-  return `${VERIFICATION_REMINDER}
+   return `${VERIFICATION_REMINDER}

 ---

 **If ANY verification fails, use this immediately:**
 \`\`\`
-delegate_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
+delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
 \`\`\``
 }

@@ -274,6 +275,7 @@ function getGitDiffStats(directory: string): GitFileStat[] {
      cwd: directory,
      encoding: "utf-8",
      timeout: 5000,
+      stdio: ["pipe", "pipe", "pipe"],
    }).trim()

    if (!output) return []
@@ -282,6 +284,7 @@ function getGitDiffStats(directory: string): GitFileStat[] {
      cwd: directory,
      encoding: "utf-8",
      timeout: 5000,
+      stdio: ["pipe", "pipe", "pipe"],
    }).trim()

    const statusMap = new Map<string, "modified" | "added" | "deleted">()
@@ -378,28 +381,6 @@ interface ToolExecuteAfterOutput {
  metadata: Record<string, unknown>
 }

-function getMessageDir(sessionID: string): string | null {
-  if (!existsSync(MESSAGE_STORAGE)) return null
-
-  const directPath = join(MESSAGE_STORAGE, sessionID)
-  if (existsSync(directPath)) return directPath
-
-  for (const dir of readdirSync(MESSAGE_STORAGE)) {
-    const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
-  }
-
-  return null
-}
-
-function isCallerOrchestrator(sessionID?: string): boolean {
-   if (!sessionID) return false
-   const messageDir = getMessageDir(sessionID)
-   if (!messageDir) return false
-   const nearest = findNearestMessageWithFields(messageDir)
-   return nearest?.agent === "Atlas"
- }
-
 interface SessionState {
  lastEventWasAbortError?: boolean
  lastContinuationInjectedAt?: number
@@ -496,7 +477,7 @@ export function createAtlasHook(
       await ctx.client.session.prompt({
         path: { id: sessionID },
         body: {
-            agent: "Atlas",
+            agent: "atlas",
           ...(model !== undefined ? { model } : {}),
           parts: [{ type: "text", text: prompt }],
         },
@@ -670,7 +651,7 @@ export function createAtlasHook(
      if (input.tool === "delegate_task") {
        const prompt = output.args.prompt as string | undefined
        if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
-          output.args.prompt = prompt + `\n<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>`
+          output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
          log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, {
            sessionID: input.sessionID,
          })
@@ -709,8 +690,8 @@ export function createAtlasHook(
        return
      }

-      const outputStr = output.output && typeof output.output === "string" ? output.output : ""
-      const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task resumed")
+       const outputStr = output.output && typeof output.output === "string" ? output.output : ""
+       const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
      
      if (isBackgroundLaunch) {
        return
--- a/src/hooks/auto-update-checker/index.ts
+++ b/src/hooks/auto-update-checker/index.ts
@@ -5,6 +5,8 @@ import { PACKAGE_NAME } from "./constants"
 import { log } from "../../shared/logger"
 import { getConfigLoadErrors, clearConfigLoadErrors } from "../../shared/config-errors"
 import { runBunInstall } from "../../cli/config-manager"
+import { isModelCacheAvailable } from "../../shared/model-availability"
+import { hasConnectedProvidersCache, updateConnectedProvidersCache } from "../../shared/connected-providers-cache"
 import type { AutoUpdateCheckerOptions } from "./types"

 const SISYPHUS_SPINNER = ["·", "•", "●", "○", "◌", "◦", " "]
@@ -75,6 +77,8 @@ export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdat
        const displayVersion = localDevVersion ?? cachedVersion

        await showConfigErrorsIfAny(ctx)
+        await showModelCacheWarningIfNeeded(ctx)
+        await updateAndShowConnectedProvidersCacheStatus(ctx)

        if (localDevVersion) {
          if (showStartupToast) {
@@ -167,6 +171,46 @@ async function runBunInstallSafe(): Promise<boolean> {
  }
 }

+async function showModelCacheWarningIfNeeded(ctx: PluginInput): Promise<void> {
+  if (isModelCacheAvailable()) return
+
+  await ctx.client.tui
+    .showToast({
+      body: {
+        title: "Model Cache Not Found",
+        message: "Run 'opencode models --refresh' or restart OpenCode to populate the models cache for optimal agent model selection.",
+        variant: "warning" as const,
+        duration: 10000,
+      },
+    })
+    .catch(() => {})
+
+  log("[auto-update-checker] Model cache warning shown")
+}
+
+async function updateAndShowConnectedProvidersCacheStatus(ctx: PluginInput): Promise<void> {
+  const hadCache = hasConnectedProvidersCache()
+
+  updateConnectedProvidersCache(ctx.client).catch(() => {})
+
+  if (!hadCache) {
+    await ctx.client.tui
+      .showToast({
+        body: {
+          title: "Connected Providers Cache",
+          message: "Building provider cache for first time. Restart OpenCode for full model filtering.",
+          variant: "info" as const,
+          duration: 8000,
+        },
+      })
+      .catch(() => {})
+
+    log("[auto-update-checker] Connected providers cache toast shown (first run)")
+  } else {
+    log("[auto-update-checker] Connected providers cache exists, updating in background")
+  }
+}
+
 async function showConfigErrorsIfAny(ctx: PluginInput): Promise<void> {
  const errors = getConfigLoadErrors()
  if (errors.length === 0) return
--- a/src/hooks/background-compaction/index.ts
+++ b/src/hooks/background-compaction/index.ts
@@ -1,87 +0,0 @@
-import type { BackgroundManager } from "../../features/background-agent"
-
-interface CompactingInput {
-  sessionID: string
-}
-
-interface CompactingOutput {
-  context: string[]
-  prompt?: string
-}
-
-/**
- * Background agent compaction hook - preserves task state during context compaction.
- * 
- * When OpenCode compacts session context to save tokens, this hook injects
- * information about running and recently completed background tasks so the
- * agent doesn't lose awareness of delegated work.
- */
-export function createBackgroundCompactionHook(manager: BackgroundManager) {
-  return {
-    "experimental.session.compacting": async (
-      input: CompactingInput,
-      output: CompactingOutput
-    ): Promise<void> => {
-      const { sessionID } = input
-
-      // Get running tasks for this session
-      const running = manager.getRunningTasks()
-        .filter(t => t.parentSessionID === sessionID)
-        .map(t => ({
-          id: t.id,
-          agent: t.agent,
-          description: t.description,
-          startedAt: t.startedAt,
-        }))
-
-      // Get recently completed tasks (still in memory within 5-min retention)
-      const completed = manager.getCompletedTasks()
-        .filter(t => t.parentSessionID === sessionID)
-        .slice(-10) // Last 10 completed
-        .map(t => ({
-          id: t.id,
-          agent: t.agent,
-          description: t.description,
-          status: t.status,
-        }))
-
-      // Early exit if nothing to preserve
-      if (running.length === 0 && completed.length === 0) return
-
-      const sections: string[] = ["<background-tasks>"]
-
-      // Running tasks section
-      if (running.length > 0) {
-        sections.push("## Running Background Tasks")
-        sections.push("")
-        for (const t of running) {
-          const elapsed = t.startedAt 
-            ? Math.floor((Date.now() - t.startedAt.getTime()) / 1000)
-            : 0
-          sections.push(`- **\`${t.id}\`** (${t.agent}): ${t.description} [${elapsed}s elapsed]`)
-        }
-        sections.push("")
-        sections.push("> **Note:** You WILL be notified when tasks complete.")
-        sections.push("> Do NOT poll - continue productive work.")
-        sections.push("")
-      }
-
-      // Completed tasks section
-      if (completed.length > 0) {
-        sections.push("## Recently Completed Tasks")
-        sections.push("")
-        for (const t of completed) {
-          const statusLabel = t.status === "completed" ? "[DONE]" : t.status === "error" ? "[ERROR]" : "[PENDING]"
-          sections.push(`- ${statusLabel} **\`${t.id}\`**: ${t.description}`)
-        }
-        sections.push("")
-      }
-
-      sections.push("## Retrieval")
-      sections.push('Use `background_output(task_id="<id>")` to retrieve task results.')
-      sections.push("</background-tasks>")
-
-      output.context.push(sections.join("\n"))
-    }
-  }
-}
--- a/src/hooks/category-skill-reminder/index.test.ts
+++ b/src/hooks/category-skill-reminder/index.test.ts
@@ -0,0 +1,346 @@
+import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
+import { createCategorySkillReminderHook } from "./index"
+import { updateSessionAgent, clearSessionAgent, _resetForTesting } from "../../features/claude-code-session-state"
+import * as sharedModule from "../../shared"
+
+describe("category-skill-reminder hook", () => {
+  let logCalls: Array<{ msg: string; data?: unknown }>
+  let logSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    _resetForTesting()
+    logCalls = []
+    logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => {
+      logCalls.push({ msg, data })
+    })
+  })
+
+  afterEach(() => {
+    logSpy?.mockRestore()
+  })
+
+  function createMockPluginInput() {
+    return {
+      client: {
+        tui: {
+          showToast: async () => {},
+        },
+      },
+    } as any
+  }
+
+  describe("target agent detection", () => {
+    test("should inject reminder for sisyphus agent after 3 tool calls", async () => {
+      // #given - sisyphus agent session with multiple tool calls
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "sisyphus-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "file content", metadata: {} }
+
+      // #when - 3 edit tool calls are made
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+      expect(output.output).toContain("delegate_task")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should inject reminder for atlas agent", async () => {
+      // #given - atlas agent session
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "atlas-session"
+      updateSessionAgent(sessionID, "Atlas")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls are made
+      await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should inject reminder for sisyphus-junior agent", async () => {
+      // #given - sisyphus-junior agent session
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "junior-session"
+      updateSessionAgent(sessionID, "sisyphus-junior")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls are made
+      await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should NOT inject reminder for non-target agents", async () => {
+      // #given - librarian agent session (not a target)
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "librarian-session"
+      updateSessionAgent(sessionID, "librarian")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls are made
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+
+      // #then - reminder should NOT be injected
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should detect agent from input.agent when session state is empty", async () => {
+      // #given - no session state, agent provided in input
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "input-agent-session"
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls with agent in input
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1", agent: "Sisyphus" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2", agent: "Sisyphus" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3", agent: "Sisyphus" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+    })
+  })
+
+  describe("delegation tool tracking", () => {
+    test("should NOT inject reminder if delegate_task is used", async () => {
+      // #given - sisyphus agent that uses delegate_task
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "delegation-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - delegate_task is used, then more tool calls
+      await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected (delegation was used)
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should NOT inject reminder if call_omo_agent is used", async () => {
+      // #given - sisyphus agent that uses call_omo_agent
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "omo-agent-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - call_omo_agent is used first
+      await hook["tool.execute.after"]({ tool: "call_omo_agent", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should NOT inject reminder if task tool is used", async () => {
+      // #given - sisyphus agent that uses task tool
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "task-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - task tool is used
+      await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+
+  describe("tool call counting", () => {
+    test("should NOT inject reminder before 3 tool calls", async () => {
+      // #given - sisyphus agent with only 2 tool calls
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "few-calls-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - only 2 tool calls are made
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+
+      // #then - reminder should NOT be injected yet
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should only inject reminder once per session", async () => {
+      // #given - sisyphus agent session
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "once-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output1 = { title: "", output: "result1", metadata: {} }
+      const output2 = { title: "", output: "result2", metadata: {} }
+
+      // #when - 6 tool calls are made (should trigger at 3, not again at 6)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
+
+      // #then - reminder should be in output1 but not output2
+      expect(output1.output).toContain("[Category+Skill Reminder]")
+      expect(output2.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should only count delegatable work tools", async () => {
+      // #given - sisyphus agent with mixed tool calls
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "mixed-tools-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - non-delegatable tools are called (should not count)
+      await hook["tool.execute.after"]({ tool: "lsp_goto_definition", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "lsp_find_references", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "lsp_symbols", sessionID, callID: "3" }, output)
+
+      // #then - reminder should NOT be injected (LSP tools don't count)
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+
+  describe("event handling", () => {
+    test("should reset state on session.deleted event", async () => {
+      // #given - sisyphus agent with reminder already shown
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "delete-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output1 = { title: "", output: "result1", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
+      expect(output1.output).toContain("[Category+Skill Reminder]")
+
+      // #when - session is deleted and new session starts
+      await hook.event({ event: { type: "session.deleted", properties: { info: { id: sessionID } } } })
+
+      const output2 = { title: "", output: "result2", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
+
+      // #then - reminder should be shown again (state was reset)
+      expect(output2.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should reset state on session.compacted event", async () => {
+      // #given - sisyphus agent with reminder already shown
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "compact-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output1 = { title: "", output: "result1", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
+      expect(output1.output).toContain("[Category+Skill Reminder]")
+
+      // #when - session is compacted
+      await hook.event({ event: { type: "session.compacted", properties: { sessionID } } })
+
+      const output2 = { title: "", output: "result2", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
+
+      // #then - reminder should be shown again (state was reset)
+      expect(output2.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+
+  describe("case insensitivity", () => {
+    test("should handle tool names case-insensitively", async () => {
+      // #given - sisyphus agent with mixed case tool names
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "case-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - tool calls with different cases
+      await hook["tool.execute.after"]({ tool: "EDIT", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "Edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected (all counted)
+      expect(output.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should handle delegation tool names case-insensitively", async () => {
+      // #given - sisyphus agent using DELEGATE_TASK in uppercase
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "case-delegate-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - DELEGATE_TASK in uppercase is used
+      await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected (delegation was detected)
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+})
--- a/src/hooks/category-skill-reminder/index.ts
+++ b/src/hooks/category-skill-reminder/index.ts
@@ -0,0 +1,165 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import { getSessionAgent } from "../../features/claude-code-session-state"
+import { log } from "../../shared"
+
+/**
+ * Target agents that should receive category+skill reminders.
+ * These are orchestrator agents that delegate work to specialized agents.
+ */
+const TARGET_AGENTS = new Set([
+  "sisyphus",
+  "sisyphus-junior",
+  "atlas",
+])
+
+/**
+ * Tools that indicate the agent is doing work that could potentially be delegated.
+ * When these tools are used, we remind the agent about the category+skill system.
+ */
+const DELEGATABLE_WORK_TOOLS = new Set([
+  "edit",
+  "write",
+  "bash",
+  "read",
+  "grep",
+  "glob",
+])
+
+/**
+ * Tools that indicate the agent is already using delegation properly.
+ */
+const DELEGATION_TOOLS = new Set([
+  "delegate_task",
+  "call_omo_agent",
+  "task",
+])
+
+const REMINDER_MESSAGE = `
+[Category+Skill Reminder]
+
+You are an orchestrator agent. Consider whether this work should be delegated:
+
+**DELEGATE when:**
+- UI/Frontend work → category: "visual-engineering", skills: ["frontend-ui-ux"]
+- Complex logic/architecture → category: "ultrabrain"
+- Quick/trivial tasks → category: "quick"
+- Git operations → skills: ["git-master"]
+- Browser automation → skills: ["playwright"] or ["agent-browser"]
+
+**DO IT YOURSELF when:**
+- Gathering context/exploring codebase
+- Simple edits that are part of a larger task you're coordinating
+- Tasks requiring your full context understanding
+
+Example delegation:
+\`\`\`
+delegate_task(
+  category="visual-engineering",
+  load_skills=["frontend-ui-ux"],
+  description="Implement responsive navbar with animations",
+  run_in_background=true
+)
+\`\`\`
+`
+
+interface ToolExecuteInput {
+  tool: string
+  sessionID: string
+  callID: string
+  agent?: string
+}
+
+interface ToolExecuteOutput {
+  title: string
+  output: string
+  metadata: unknown
+}
+
+interface SessionState {
+  delegationUsed: boolean
+  reminderShown: boolean
+  toolCallCount: number
+}
+
+export function createCategorySkillReminderHook(_ctx: PluginInput) {
+  const sessionStates = new Map<string, SessionState>()
+
+  function getOrCreateState(sessionID: string): SessionState {
+    if (!sessionStates.has(sessionID)) {
+      sessionStates.set(sessionID, {
+        delegationUsed: false,
+        reminderShown: false,
+        toolCallCount: 0,
+      })
+    }
+    return sessionStates.get(sessionID)!
+  }
+
+  function isTargetAgent(sessionID: string, inputAgent?: string): boolean {
+    const agent = getSessionAgent(sessionID) ?? inputAgent
+    if (!agent) return false
+    const agentLower = agent.toLowerCase()
+    return TARGET_AGENTS.has(agentLower) || 
+           agentLower.includes("sisyphus") || 
+           agentLower.includes("atlas")
+  }
+
+  const toolExecuteAfter = async (
+    input: ToolExecuteInput,
+    output: ToolExecuteOutput,
+  ) => {
+    const { tool, sessionID } = input
+    const toolLower = tool.toLowerCase()
+
+    if (!isTargetAgent(sessionID, input.agent)) {
+      return
+    }
+
+    const state = getOrCreateState(sessionID)
+
+    if (DELEGATION_TOOLS.has(toolLower)) {
+      state.delegationUsed = true
+      log("[category-skill-reminder] Delegation tool used", { sessionID, tool })
+      return
+    }
+
+    if (!DELEGATABLE_WORK_TOOLS.has(toolLower)) {
+      return
+    }
+
+    state.toolCallCount++
+
+    if (state.toolCallCount >= 3 && !state.delegationUsed && !state.reminderShown) {
+      output.output += REMINDER_MESSAGE
+      state.reminderShown = true
+      log("[category-skill-reminder] Reminder injected", { 
+        sessionID, 
+        toolCallCount: state.toolCallCount 
+      })
+    }
+  }
+
+  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
+    const props = event.properties as Record<string, unknown> | undefined
+
+    if (event.type === "session.deleted") {
+      const sessionInfo = props?.info as { id?: string } | undefined
+      if (sessionInfo?.id) {
+        sessionStates.delete(sessionInfo.id)
+      }
+    }
+
+    if (event.type === "session.compacted") {
+      const sessionID = (props?.sessionID ??
+        (props?.info as { id?: string } | undefined)?.id) as string | undefined
+      if (sessionID) {
+        sessionStates.delete(sessionID)
+      }
+    }
+  }
+
+  return {
+    "tool.execute.after": toolExecuteAfter,
+    event: eventHandler,
+  }
+}
--- a/src/hooks/claude-code-hooks/AGENTS.md
+++ b/src/hooks/claude-code-hooks/AGENTS.md
@@ -1,51 +1,48 @@
 # CLAUDE CODE HOOKS COMPATIBILITY

 ## OVERVIEW
-
-Full Claude Code settings.json hook compatibility. 5 lifecycle events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, PreCompact.
+Full Claude Code `settings.json` hook compatibility layer. Intercepts OpenCode events to execute external scripts/commands defined in Claude Code configuration.

 ## STRUCTURE
-
 ```
 claude-code-hooks/
 ├── index.ts              # Main factory (401 lines)
 ├── config.ts             # Loads ~/.claude/settings.json
-├── config-loader.ts      # Extended config
+├── config-loader.ts      # Extended config (disabledHooks)
 ├── pre-tool-use.ts       # PreToolUse executor
 ├── post-tool-use.ts      # PostToolUse executor
 ├── user-prompt-submit.ts # UserPromptSubmit executor
-├── stop.ts               # Stop hook executor
+├── stop.ts               # Stop hook executor (with active state tracking)
 ├── pre-compact.ts        # PreCompact executor
 ├── transcript.ts         # Tool use recording
-├── tool-input-cache.ts   # Pre→post caching
-├── types.ts              # Hook types
-└── todo.ts               # Todo JSON fix
+├── tool-input-cache.ts   # Pre→post input caching
+└── types.ts              # Hook & IO type definitions
 ```

 ## HOOK LIFECYCLE
-
-| Event | When | Can Block | Context |
-|-------|------|-----------|---------|
-| PreToolUse | Before tool | Yes | sessionId, toolName, toolInput |
-| PostToolUse | After tool | Warn | + toolOutput, transcriptPath |
-| UserPromptSubmit | On message | Yes | sessionId, prompt, parts |
-| Stop | Session idle | inject | sessionId, parentSessionId |
-| PreCompact | Before summarize | No | sessionId |
+| Event | Timing | Can Block | Context Provided |
+|-------|--------|-----------|------------------|
+| PreToolUse | Before tool exec | Yes | sessionId, toolName, toolInput, cwd |
+| PostToolUse | After tool exec | Warn | + toolOutput, transcriptPath |
+| UserPromptSubmit | On message send | Yes | sessionId, prompt, parts, cwd |
+| Stop | Session idle/end | Inject | sessionId, parentSessionId, cwd |
+| PreCompact | Before summarize | No | sessionId, cwd |

 ## CONFIG SOURCES
-
 Priority (highest first):
-1. `.claude/settings.json` (project)
-2. `~/.claude/settings.json` (user)
+1. `.claude/settings.json` (Project-local)
+2. `~/.claude/settings.json` (Global user)

 ## HOOK EXECUTION
-
-1. Hooks loaded from settings.json
-2. Matchers filter by tool name
-3. Commands via subprocess with `$SESSION_ID`, `$TOOL_NAME`
-4. Exit codes: 0=pass, 1=warn, 2=block
+- **Matchers**: Hooks filter by tool name or event type via regex/glob.
+- **Commands**: Executed via subprocess with env vars (`$SESSION_ID`, `$TOOL_NAME`).
+- **Exit Codes**:
+  - `0`: Pass (Success)
+  - `1`: Warn (Continue with system message)
+  - `2`: Block (Abort operation/prompt)

 ## ANTI-PATTERNS
-
- **Heavy PreToolUse**: Runs before EVERY tool call
- **Blocking non-critical**: Use PostToolUse warnings
+- **Heavy PreToolUse**: Runs before EVERY tool; keep logic light to avoid latency.
+- **Blocking non-critical**: Prefer PostToolUse warnings for non-fatal issues.
+- **Direct state mutation**: Use `updatedInput` in PreToolUse instead of side effects.
+- **Ignoring Exit Codes**: Ensure scripts return `2` to properly block sensitive tools.
--- a/src/hooks/compaction-context-injector/index.ts
+++ b/src/hooks/compaction-context-injector/index.ts
@@ -33,7 +33,13 @@ When summarizing this session, you MUST include the following sections in your s
 - Pending items from the original request
 - Follow-up tasks identified during the work

-## 5. MUST NOT Do (Critical Constraints)
+## 5. Active Working Context (For Seamless Continuation)
+- **Files**: Paths of files currently being edited or frequently referenced
+- **Code in Progress**: Key code snippets, function signatures, or data structures under active development
+- **External References**: Documentation URLs, library APIs, or external resources being consulted
+- **State & Variables**: Important variable names, configuration values, or runtime state relevant to ongoing work
+
+## 6. MUST NOT Do (Critical Constraints)
 - Things that were explicitly forbidden
 - Approaches that failed and should not be retried
 - User's explicit restrictions or preferences
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -22,11 +22,14 @@ export { createNonInteractiveEnvHook } from "./non-interactive-env";
 export { createInteractiveBashSessionHook } from "./interactive-bash-session";

 export { createThinkingBlockValidatorHook } from "./thinking-block-validator";
+export { createCategorySkillReminderHook } from "./category-skill-reminder";
 export { createRalphLoopHook, type RalphLoopHook } from "./ralph-loop";
 export { createAutoSlashCommandHook } from "./auto-slash-command";
 export { createEditErrorRecoveryHook } from "./edit-error-recovery";
 export { createPrometheusMdOnlyHook } from "./prometheus-md-only";
+export { createSisyphusJuniorNotepadHook } from "./sisyphus-junior-notepad";
 export { createTaskResumeInfoHook } from "./task-resume-info";
 export { createStartWorkHook } from "./start-work";
 export { createAtlasHook } from "./atlas";
 export { createDelegateTaskRetryHook } from "./delegate-task-retry";
+export { createQuestionLabelTruncatorHook } from "./question-label-truncator";
--- a/src/hooks/keyword-detector/constants.ts
+++ b/src/hooks/keyword-detector/constants.ts
@@ -166,34 +166,110 @@ delegate_task(agent="oracle", prompt="Review my approach: [describe plan]")
 YOU MUST LEVERAGE ALL AVAILABLE AGENTS / **CATEGORY + SKILLS** TO THEIR FULLEST POTENTIAL.
 TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.

-## AGENTS / **CATEGORY + SKILLS** UTILIZATION PRINCIPLES (by capability, not by name)
- **Codebase Exploration**: Spawn exploration agents using BACKGROUND TASKS for file patterns, internal implementations, project structure
- **Documentation & References**: Use librarian-type agents via BACKGROUND TASKS for API references, examples, external library docs
- **Planning & Strategy**: NEVER plan yourself - ALWAYS spawn a dedicated planning agent for work breakdown
-  - MUST USE PLAN AGENT. MUST USE PLAN AGENT. MUST USE PLAN AGENT.
-  - ALWAYS ASK PLAN AGENT TO WHAT CATEGORY + SKILLS / AGENTS TO LEVERAGE.
-  - IF IMPLEMENT TASK, MUST ADD TODO NOW: "CONSULT WITH PLAN AGENT WITH CATEGORY + SKILLS"
- **High-IQ Reasoning**: Leverage specialized agents for architecture decisions, code review, strategic planning
- **SPECIAL TASKS COVERED WITH CATEGORY + LOAD_SKILLS**: Delegate to specialized agents with category+skills for design and implementation, as following guide:
-  - CATEGORY + SKILL GUIDE
-    - MUST PASS \`load_skills\` FOR REQUIRED_SKILLS. MUST USE \`load_skills\` FOR REQUIRED_SKILLS.
-    - Simple project setup -> delegate_task(category="unspecified-low", load_skills=[{project-setup-skill}])
-    - Super Complex Server Workflow Implementation -> delegate_task(category="ultrabrain", load_skills=["terraform-master"], ...)
-    - Web Frontend Component Writing -> delegate_task(category="visual-engineering", load_skills=["frontend-ui-ux", "playwright"], ...)
+## MANDATORY: PLAN AGENT INVOCATION (NON-NEGOTIABLE)

-## EXECUTION RULES
- **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each.
- **PARALLEL**: Fire independent agent calls simultaneously via delegate_task(background=true) - NEVER wait sequentially.
- **BACKGROUND FIRST**: Use delegate_task for exploration/research agents (10+ concurrent if needed).
- **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done.
- **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths.
-  - **CATEGORY + LOAD_SKILLS**
+**YOU MUST ALWAYS INVOKE THE PLAN AGENT FOR ANY NON-TRIVIAL TASK.**

-## WORKFLOW
-1. Analyze the request and identify required capabilities
-2. Spawn exploration/librarian agents via delegate_task(background=true) in PARALLEL (10+ if needed)
-3. Always Use Plan agent with gathered context to create detailed work breakdown
-4. Execute with continuous verification against original requirements
+| Condition | Action |
+|-----------|--------|
+| Task has 2+ steps | MUST call Plan agent |
+| Task scope unclear | MUST call Plan agent |
+| Implementation required | MUST call Plan agent |
+| Architecture decision needed | MUST call Plan agent |
+
+\`\`\`
+delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
+\`\`\`
+
+**WHY THIS IS MANDATORY:**
+- Plan agent analyzes dependencies and parallel execution opportunities
+- Plan agent recommends CATEGORY + SKILLS for each task
+- Plan agent ensures nothing is missed
+- YOU are an orchestrator, NOT an implementer
+
+**FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
+
+---
+
+## AGENTS / **CATEGORY + SKILLS** UTILIZATION PRINCIPLES
+
+**DEFAULT BEHAVIOR: DELEGATE. DO NOT WORK YOURSELF.**
+
+| Task Type | Action | Why |
+|-----------|--------|-----|
+| Codebase exploration | delegate_task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
+| Documentation lookup | delegate_task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
+| Planning | delegate_task(subagent_type="plan") | Structured work breakdown |
+| Architecture/Debugging | delegate_task(subagent_type="oracle") | High-IQ reasoning |
+| Implementation | delegate_task(category="...", load_skills=[...]) | Domain-optimized models |
+
+**CATEGORY + SKILL DELEGATION:**
+\`\`\`
+// Frontend work
+delegate_task(category="visual-engineering", load_skills=["frontend-ui-ux"])
+
+// Complex logic
+delegate_task(category="ultrabrain", load_skills=["typescript-programmer"])
+
+// Quick fixes
+delegate_task(category="quick", load_skills=["git-master"])
+\`\`\`
+
+**YOU SHOULD ONLY DO IT YOURSELF WHEN:**
+- Task is trivially simple (1-2 lines, obvious change)
+- You have ALL context already loaded
+- Delegation overhead exceeds task complexity
+
+**OTHERWISE: DELEGATE. ALWAYS.**
+
+---
+
+## EXECUTION RULES (PARALLELIZATION MANDATORY)
+
+| Rule | Implementation |
+|------|----------------|
+| **PARALLEL FIRST** | Fire ALL independent agents simultaneously via delegate_task(run_in_background=true) |
+| **NEVER SEQUENTIAL** | If tasks A and B are independent, launch BOTH at once |
+| **10+ CONCURRENT** | Use 10+ background agents if needed for comprehensive exploration |
+| **COLLECT LATER** | Launch agents -> continue work -> background_output when needed |
+
+**ANTI-PATTERN (BLOCKING):**
+\`\`\`
+// WRONG: Sequential, slow
+result1 = delegate_task(..., run_in_background=false)  // waits
+result2 = delegate_task(..., run_in_background=false)  // waits again
+\`\`\`
+
+**CORRECT PATTERN:**
+\`\`\`
+// RIGHT: Parallel, fast
+delegate_task(..., run_in_background=true)  // task_id_1
+delegate_task(..., run_in_background=true)  // task_id_2
+delegate_task(..., run_in_background=true)  // task_id_3
+// Continue working, collect with background_output when needed
+\`\`\`
+
+---
+
+## WORKFLOW (MANDATORY SEQUENCE)
+
+1. **GATHER CONTEXT** (parallel background agents):
+   \`\`\`
+   delegate_task(subagent_type="explore", run_in_background=true, prompt="...")
+   delegate_task(subagent_type="librarian", run_in_background=true, prompt="...")
+   \`\`\`
+
+2. **INVOKE PLAN AGENT** (MANDATORY for non-trivial tasks):
+   \`\`\`
+   delegate_task(subagent_type="plan", prompt="<context + request>")
+   \`\`\`
+
+3. **EXECUTE VIA DELEGATION** (category + skills):
+   \`\`\`
+   delegate_task(category="...", load_skills=[...], prompt="<task from plan>")
+   \`\`\`
+
+4. **VERIFY** against original requirements

 ## VERIFICATION GUARANTEE (NON-NEGOTIABLE)

@@ -266,9 +342,9 @@ Write these criteria explicitly. Share with user if scope is non-trivial.

 THE USER ASKED FOR X. DELIVER EXACTLY X. NOT A SUBSET. NOT A DEMO. NOT A STARTING POINT.

-1. EXPLORES + LIBRARIANS
-2. GATHER -> PLAN AGENT SPAWN
-3. WORK BY DELEGATING TO ANOTHER AGENTS
+1. EXPLORES + LIBRARIANS (background)
+2. GATHER -> delegate_task(subagent_type="plan", prompt="<context + request>")
+3. WORK BY DELEGATING TO CATEGORY + SKILLS AGENTS

 NOW.

--- a/src/hooks/keyword-detector/index.test.ts
+++ b/src/hooks/keyword-detector/index.test.ts
@@ -419,7 +419,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
    }

    // #when - ultrawork keyword detected with Sisyphus agent
-    await hook["chat.message"]({ sessionID, agent: "Sisyphus" }, output)
+    await hook["chat.message"]({ sessionID, agent: "sisyphus" }, output)

    // #then - should use normal ultrawork message with agent utilization instructions
    const textPart = output.parts.find(p => p.type === "text")
@@ -471,7 +471,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
      message: {} as Record<string, unknown>,
      parts: [{ type: "text", text: "ultrawork implement" }],
    }
-    await hook["chat.message"]({ sessionID: sisyphusSessionID, agent: "Sisyphus" }, sisyphusOutput)
+    await hook["chat.message"]({ sessionID: sisyphusSessionID, agent: "sisyphus" }, sisyphusOutput)

    // #then - each session should have the correct message type
    const prometheusTextPart = prometheusOutput.parts.find(p => p.type === "text")
@@ -492,7 +492,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
    const sessionID = "same-session-agent-switch"

    // Simulate: session state was updated to sisyphus (by index.ts updateSessionAgent)
-    updateSessionAgent(sessionID, "Sisyphus")
+    updateSessionAgent(sessionID, "sisyphus")

    const output = {
      message: {} as Record<string, unknown>,
--- a/src/hooks/non-interactive-env/index.test.ts
+++ b/src/hooks/non-interactive-env/index.test.ts
@@ -178,7 +178,11 @@ describe("non-interactive-env hook", () => {
    })
  })

-  describe("cross-platform shell support", () => {
+  describe("bash tool always uses unix shell syntax", () => {
+    // The bash tool always runs in a Unix-like shell (bash/sh), even on Windows
+    // (via Git Bash, WSL, etc.), so we should always use unix export syntax.
+    // This fixes GitHub issues #983 and #889.
+
    test("#given macOS platform #when git command executes #then uses unix export syntax", async () => {
      delete process.env.PSModulePath
      process.env.SHELL = "/bin/zsh"
@@ -221,7 +225,9 @@ describe("non-interactive-env hook", () => {
      expect(cmd).toContain("; git commit")
    })

-    test("#given Windows with PowerShell #when git command executes #then uses powershell $env syntax", async () => {
+    test("#given Windows with PowerShell env #when bash tool git command executes #then still uses unix export syntax", async () => {
+      // Even when PSModulePath is set (indicating PowerShell environment),
+      // the bash tool runs in a Unix-like shell, so we use export syntax
      process.env.PSModulePath = "C:\\Program Files\\PowerShell\\Modules"
      Object.defineProperty(process, "platform", { value: "win32" })

@@ -236,13 +242,16 @@ describe("non-interactive-env hook", () => {
      )

      const cmd = output.args.command as string
-      expect(cmd).toContain("$env:")
+      // Should use unix export syntax, NOT PowerShell $env: syntax
+      expect(cmd).toStartWith("export ")
      expect(cmd).toContain("; git status")
-      expect(cmd).not.toStartWith("export ")
+      expect(cmd).not.toContain("$env:")
      expect(cmd).not.toContain("set ")
    })

-    test("#given Windows without PowerShell #when git command executes #then uses cmd set syntax", async () => {
+    test("#given Windows without SHELL env #when bash tool git command executes #then still uses unix export syntax", async () => {
+      // Even when detectShellType() would return "cmd" (no SHELL, no PSModulePath, win32),
+      // the bash tool runs in a Unix-like shell, so we use export syntax
      delete process.env.PSModulePath
      delete process.env.SHELL
      Object.defineProperty(process, "platform", { value: "win32" })
@@ -258,14 +267,18 @@ describe("non-interactive-env hook", () => {
      )

      const cmd = output.args.command as string
-      expect(cmd).toContain("set ")
-      expect(cmd).toContain("&&")
-      expect(cmd).not.toStartWith("export ")
+      // Should use unix export syntax, NOT cmd.exe set syntax
+      expect(cmd).toStartWith("export ")
+      expect(cmd).toContain("; git log")
+      expect(cmd).not.toContain("set ")
+      expect(cmd).not.toContain("&&")
      expect(cmd).not.toContain("$env:")
    })

-    test("#given PowerShell #when values contain quotes #then escapes correctly", async () => {
-      process.env.PSModulePath = "C:\\Program Files\\PowerShell\\Modules"
+    test("#given Windows Git Bash environment #when git command executes #then uses unix export syntax", async () => {
+      // Simulating Git Bash on Windows: SHELL might be set to /usr/bin/bash
+      delete process.env.PSModulePath
+      process.env.SHELL = "/usr/bin/bash"
      Object.defineProperty(process, "platform", { value: "win32" })

      const hook = createNonInteractiveEnvHook(mockCtx)
@@ -279,32 +292,16 @@ describe("non-interactive-env hook", () => {
      )

      const cmd = output.args.command as string
-      expect(cmd).toMatch(/\$env:\w+='[^']*'/)
+      expect(cmd).toStartWith("export ")
+      expect(cmd).toContain("; git status")
    })

-    test("#given cmd.exe #when values contain spaces #then escapes correctly", async () => {
+    test("#given any platform #when chained git commands via bash tool #then uses unix export syntax", async () => {
+      // Even on Windows, chained commands should use unix syntax
      delete process.env.PSModulePath
      delete process.env.SHELL
      Object.defineProperty(process, "platform", { value: "win32" })

-      const hook = createNonInteractiveEnvHook(mockCtx)
-      const output: { args: Record<string, unknown>; message?: string } = {
-        args: { command: "git status" },
-      }
-
-      await hook["tool.execute.before"](
-        { tool: "bash", sessionID: "test", callID: "1" },
-        output
-      )
-
-      const cmd = output.args.command as string
-      expect(cmd).toMatch(/set \w+="[^"]*"/)
-    })
-
-    test("#given PowerShell #when chained git commands #then env vars apply to all commands", async () => {
-      process.env.PSModulePath = "C:\\Program Files\\PowerShell\\Modules"
-      Object.defineProperty(process, "platform", { value: "win32" })
-
      const hook = createNonInteractiveEnvHook(mockCtx)
      const output: { args: Record<string, unknown>; message?: string } = {
        args: { command: "git add file && git commit -m 'test'" },
@@ -316,7 +313,7 @@ describe("non-interactive-env hook", () => {
      )

      const cmd = output.args.command as string
-      expect(cmd).toContain("$env:")
+      expect(cmd).toStartWith("export ")
      expect(cmd).toContain("; git add file && git commit")
    })
  })
--- a/src/hooks/non-interactive-env/index.ts
+++ b/src/hooks/non-interactive-env/index.ts
@@ -1,7 +1,8 @@
 import type { PluginInput } from "@opencode-ai/plugin"
+import type { ShellType } from "../../shared"
 import { HOOK_NAME, NON_INTERACTIVE_ENV, SHELL_COMMAND_PATTERNS } from "./constants"
 import { isNonInteractive } from "./detector"
-import { log, detectShellType, buildEnvPrefix } from "../../shared"
+import { log, buildEnvPrefix } from "../../shared"

 export * from "./constants"
 export * from "./detector"
@@ -50,7 +51,10 @@ export function createNonInteractiveEnvHook(_ctx: PluginInput) {
        return
      }

-      const shellType = detectShellType()
+      // The bash tool always runs in a Unix-like shell (bash/sh), even on Windows
+      // (via Git Bash, WSL, etc.), so we always use unix export syntax.
+      // This fixes GitHub issues #983 and #889.
+      const shellType: ShellType = "unix"
      const envPrefix = buildEnvPrefix(NON_INTERACTIVE_ENV, shellType)
      output.args.command = `${envPrefix} ${command}`

--- a/src/hooks/prometheus-md-only/constants.ts
+++ b/src/hooks/prometheus-md-only/constants.ts
@@ -1,8 +1,9 @@
 import { createSystemDirective, SystemDirectiveTypes } from "../../shared/system-directive"
+import { getAgentDisplayName } from "../../shared/agent-display-names"

 export const HOOK_NAME = "prometheus-md-only"

-export const PROMETHEUS_AGENTS = ["Prometheus (Planner)"]
+export const PROMETHEUS_AGENTS = ["prometheus"]

 export const ALLOWED_EXTENSIONS = [".md"]

@@ -16,7 +17,7 @@ export const PLANNING_CONSULT_WARNING = `

 ${createSystemDirective(SystemDirectiveTypes.PROMETHEUS_READ_ONLY)}

-You are being invoked by Prometheus (Planner), a READ-ONLY planning agent.
+You are being invoked by ${getAgentDisplayName("prometheus")}, a READ-ONLY planning agent.

 **CRITICAL CONSTRAINTS:**
 - DO NOT modify any files (no Write, Edit, or any file mutations)
--- a/src/hooks/prometheus-md-only/index.test.ts
+++ b/src/hooks/prometheus-md-only/index.test.ts
@@ -41,10 +41,10 @@ describe("prometheus-md-only", () => {
    }
  })

-  describe("with Prometheus agent in message storage", () => {
-    beforeEach(() => {
-      setupMessageStorage(TEST_SESSION_ID, "Prometheus (Planner)")
-    })
+   describe("with Prometheus agent in message storage", () => {
+     beforeEach(() => {
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+     })

    test("should block Prometheus from writing non-.md files", async () => {
      // #given
@@ -277,7 +277,7 @@ describe("prometheus-md-only", () => {

  describe("with non-Prometheus agent in message storage", () => {
    beforeEach(() => {
-      setupMessageStorage(TEST_SESSION_ID, "Sisyphus")
+      setupMessageStorage(TEST_SESSION_ID, "sisyphus")
    })

    test("should not affect non-Prometheus agents", async () => {
@@ -345,185 +345,195 @@ describe("prometheus-md-only", () => {
      setupMessageStorage(TEST_SESSION_ID, "Prometheus (Planner)")
    })

-    test("should allow Windows-style backslash paths under .sisyphus/", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: ".sisyphus\\plans\\work-plan.md" },
-      }
+     test("should allow Windows-style backslash paths under .sisyphus/", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: ".sisyphus\\plans\\work-plan.md" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should allow mixed separator paths under .sisyphus/", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: ".sisyphus\\plans/work-plan.MD" },
-      }
+     test("should allow mixed separator paths under .sisyphus/", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: ".sisyphus\\plans/work-plan.MD" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should allow uppercase .MD extension", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: ".sisyphus/plans/work-plan.MD" },
-      }
+     test("should allow uppercase .MD extension", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: ".sisyphus/plans/work-plan.MD" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should block paths outside workspace root even if containing .sisyphus", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: "/other/project/.sisyphus/plans/x.md" },
-      }
+     test("should block paths outside workspace root even if containing .sisyphus", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: "/other/project/.sisyphus/plans/x.md" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
+     })

-    test("should allow nested .sisyphus directories (ctx.directory may be parent)", async () => {
-      // #given - when ctx.directory is parent of actual project, path includes project name
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: "src/.sisyphus/plans/x.md" },
-      }
+     test("should allow nested .sisyphus directories (ctx.directory may be parent)", async () => {
+       // #given - when ctx.directory is parent of actual project, path includes project name
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: "src/.sisyphus/plans/x.md" },
+       }

-      // #when / #then - should allow because .sisyphus is in path
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then - should allow because .sisyphus is in path
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should block path traversal attempts", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: ".sisyphus/../secrets.md" },
-      }
+     test("should block path traversal attempts", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: ".sisyphus/../secrets.md" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).rejects.toThrow("can only write/edit .md files inside .sisyphus/")
+     })

-    test("should allow case-insensitive .SISYPHUS directory", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: ".SISYPHUS/plans/work-plan.md" },
-      }
+     test("should allow case-insensitive .SISYPHUS directory", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: ".SISYPHUS/plans/work-plan.md" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should allow nested project path with .sisyphus (Windows real-world case)", async () => {
-      // #given - simulates when ctx.directory is parent of actual project
-      // User reported: xauusd-dxy-plan\.sisyphus\drafts\supabase-email-templates.md
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: "xauusd-dxy-plan\\.sisyphus\\drafts\\supabase-email-templates.md" },
-      }
+     test("should allow nested project path with .sisyphus (Windows real-world case)", async () => {
+       // #given - simulates when ctx.directory is parent of actual project
+       // User reported: xauusd-dxy-plan\.sisyphus\drafts\supabase-email-templates.md
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: "xauusd-dxy-plan\\.sisyphus\\drafts\\supabase-email-templates.md" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should allow nested project path with mixed separators", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: "my-project/.sisyphus\\plans/task.md" },
-      }
+     test("should allow nested project path with mixed separators", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: "my-project/.sisyphus\\plans/task.md" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).resolves.toBeUndefined()
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).resolves.toBeUndefined()
+     })

-    test("should block nested project path without .sisyphus", async () => {
-      // #given
-      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
-      const input = {
-        tool: "Write",
-        sessionID: TEST_SESSION_ID,
-        callID: "call-1",
-      }
-      const output = {
-        args: { filePath: "my-project\\src\\code.ts" },
-      }
+     test("should block nested project path without .sisyphus", async () => {
+       // #given
+       setupMessageStorage(TEST_SESSION_ID, "prometheus")
+       const hook = createPrometheusMdOnlyHook(createMockPluginInput())
+       const input = {
+         tool: "Write",
+         sessionID: TEST_SESSION_ID,
+         callID: "call-1",
+       }
+       const output = {
+         args: { filePath: "my-project\\src\\code.ts" },
+       }

-      // #when / #then
-      await expect(
-        hook["tool.execute.before"](input, output)
-      ).rejects.toThrow("can only write/edit .md files")
-    })
+       // #when / #then
+       await expect(
+         hook["tool.execute.before"](input, output)
+       ).rejects.toThrow("can only write/edit .md files")
+     })
  })
 })
--- a/src/hooks/prometheus-md-only/index.ts
+++ b/src/hooks/prometheus-md-only/index.ts
@@ -6,6 +6,7 @@ import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAG
 import { getSessionAgent } from "../../features/claude-code-session-state"
 import { log } from "../../shared/logger"
 import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
+import { getAgentDisplayName } from "../../shared/agent-display-names"

 export * from "./constants"

@@ -88,10 +89,10 @@ export function createPrometheusMdOnlyHook(ctx: PluginInput) {
      const toolName = input.tool

      // Inject read-only warning for task tools called by Prometheus
-      if (TASK_TOOLS.includes(toolName)) {
-        const prompt = output.args.prompt as string | undefined
-        if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
-          output.args.prompt = prompt + PLANNING_CONSULT_WARNING
+       if (TASK_TOOLS.includes(toolName)) {
+         const prompt = output.args.prompt as string | undefined
+         if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
+           output.args.prompt = PLANNING_CONSULT_WARNING + prompt
          log(`[${HOOK_NAME}] Injected read-only planning warning to ${toolName}`, {
            sessionID: input.sessionID,
            tool: toolName,
@@ -110,20 +111,20 @@ export function createPrometheusMdOnlyHook(ctx: PluginInput) {
        return
      }

-      if (!isAllowedFile(filePath, ctx.directory)) {
-        log(`[${HOOK_NAME}] Blocked: Prometheus can only write to .sisyphus/*.md`, {
-          sessionID: input.sessionID,
-          tool: toolName,
-          filePath,
-          agent: agentName,
-        })
-        throw new Error(
-          `[${HOOK_NAME}] Prometheus (Planner) can only write/edit .md files inside .sisyphus/ directory. ` +
-          `Attempted to modify: ${filePath}. ` +
-          `Prometheus is a READ-ONLY planner. Use /start-work to execute the plan. ` +
-          `APOLOGIZE TO THE USER, REMIND OF YOUR PLAN WRITING PROCESSES, TELL USER WHAT YOU WILL GOING TO DO AS THE PROCESS, WRITE THE PLAN`
-        )
-      }
+       if (!isAllowedFile(filePath, ctx.directory)) {
+         log(`[${HOOK_NAME}] Blocked: Prometheus can only write to .sisyphus/*.md`, {
+           sessionID: input.sessionID,
+           tool: toolName,
+           filePath,
+           agent: agentName,
+         })
+         throw new Error(
+           `[${HOOK_NAME}] ${getAgentDisplayName("prometheus")} can only write/edit .md files inside .sisyphus/ directory. ` +
+           `Attempted to modify: ${filePath}. ` +
+           `${getAgentDisplayName("prometheus")} is a READ-ONLY planner. Use /start-work to execute the plan. ` +
+           `APOLOGIZE TO THE USER, REMIND OF YOUR PLAN WRITING PROCESSES, TELL USER WHAT YOU WILL GOING TO DO AS THE PROCESS, WRITE THE PLAN`
+         )
+       }

      const normalizedPath = filePath.toLowerCase().replace(/\\/g, "/")
      if (normalizedPath.includes(".sisyphus/plans/") || normalizedPath.includes(".sisyphus\\plans\\")) {
--- a/src/hooks/question-label-truncator/index.test.ts
+++ b/src/hooks/question-label-truncator/index.test.ts
@@ -0,0 +1,136 @@
+import { describe, it, expect } from "bun:test";
+import { createQuestionLabelTruncatorHook } from "./index";
+
+describe("createQuestionLabelTruncatorHook", () => {
+  const hook = createQuestionLabelTruncatorHook();
+
+  describe("tool.execute.before", () => {
+    it("truncates labels exceeding 30 characters with ellipsis", async () => {
+      // #given
+      const longLabel = "This is a very long label that exceeds thirty characters";
+      const input = { tool: "AskUserQuestion" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Choose an option",
+              options: [
+                { label: longLabel, description: "A long option" },
+              ],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const truncatedLabel = (output.args as any).questions[0].options[0].label;
+      expect(truncatedLabel.length).toBeLessThanOrEqual(30);
+      expect(truncatedLabel).toBe("This is a very long label t...");
+      expect(truncatedLabel.endsWith("...")).toBe(true);
+    });
+
+    it("preserves labels within 30 characters", async () => {
+      // #given
+      const shortLabel = "Short label";
+      const input = { tool: "AskUserQuestion" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Choose an option",
+              options: [
+                { label: shortLabel, description: "A short option" },
+              ],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const resultLabel = (output.args as any).questions[0].options[0].label;
+      expect(resultLabel).toBe(shortLabel);
+    });
+
+    it("handles exactly 30 character labels without truncation", async () => {
+      // #given
+      const exactLabel = "Exactly thirty chars here!!!!!"; // 30 chars
+      expect(exactLabel.length).toBe(30);
+      const input = { tool: "ask_user_question" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Choose",
+              options: [{ label: exactLabel }],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const resultLabel = (output.args as any).questions[0].options[0].label;
+      expect(resultLabel).toBe(exactLabel);
+    });
+
+    it("ignores non-AskUserQuestion tools", async () => {
+      // #given
+      const input = { tool: "Bash" };
+      const output = {
+        args: { command: "echo hello" },
+      };
+      const originalArgs = { ...output.args };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      expect(output.args).toEqual(originalArgs);
+    });
+
+    it("handles multiple questions with multiple options", async () => {
+      // #given
+      const input = { tool: "AskUserQuestion" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Q1",
+              options: [
+                { label: "Very long label number one that needs truncation" },
+                { label: "Short" },
+              ],
+            },
+            {
+              question: "Q2",
+              options: [
+                { label: "Another extremely long label for testing purposes" },
+              ],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const q1opts = (output.args as any).questions[0].options;
+      const q2opts = (output.args as any).questions[1].options;
+      
+      expect(q1opts[0].label).toBe("Very long label number one ...");
+      expect(q1opts[0].label.length).toBeLessThanOrEqual(30);
+      expect(q1opts[1].label).toBe("Short");
+      expect(q2opts[0].label).toBe("Another extremely long labe...");
+      expect(q2opts[0].label.length).toBeLessThanOrEqual(30);
+    });
+  });
+});
--- a/src/hooks/question-label-truncator/index.ts
+++ b/src/hooks/question-label-truncator/index.ts
@@ -0,0 +1,61 @@
+const MAX_LABEL_LENGTH = 30;
+
+interface QuestionOption {
+  label: string;
+  description?: string;
+}
+
+interface Question {
+  question: string;
+  header?: string;
+  options: QuestionOption[];
+  multiSelect?: boolean;
+}
+
+interface AskUserQuestionArgs {
+  questions: Question[];
+}
+
+function truncateLabel(label: string, maxLength: number = MAX_LABEL_LENGTH): string {
+  if (label.length <= maxLength) {
+    return label;
+  }
+  return label.substring(0, maxLength - 3) + "...";
+}
+
+function truncateQuestionLabels(args: AskUserQuestionArgs): AskUserQuestionArgs {
+  if (!args.questions || !Array.isArray(args.questions)) {
+    return args;
+  }
+
+  return {
+    ...args,
+    questions: args.questions.map((question) => ({
+      ...question,
+      options: question.options?.map((option) => ({
+        ...option,
+        label: truncateLabel(option.label),
+      })) ?? [],
+    })),
+  };
+}
+
+export function createQuestionLabelTruncatorHook() {
+  return {
+    "tool.execute.before": async (
+      input: { tool: string },
+      output: { args: Record<string, unknown> }
+    ): Promise<void> => {
+      const toolName = input.tool?.toLowerCase();
+
+      if (toolName === "askuserquestion" || toolName === "ask_user_question") {
+        const args = output.args as unknown as AskUserQuestionArgs | undefined;
+
+        if (args?.questions) {
+          const truncatedArgs = truncateQuestionLabels(args);
+          Object.assign(output.args, truncatedArgs);
+        }
+      }
+    },
+  };
+}
--- a/Show More
+++ b/Show More