release: v3.1.0

feat(tmux-subagent): add replace action to prevent mass eviction
- Add column-based splittable calculation (getColumnCount, getColumnWidth) - New decision tree: splittable → split, k=1 eviction → close+spawn, else → replace - Add 'replace' action type using tmux respawn-pane (preserves layout) - Replace oldest pane in-place instead of closing all panes when unsplittable - Prevents scenario where all agent panes get closed leaving only 1
2026-01-26 06:46:47 +00:00 · 2026-01-26 15:25:11 +09:00 · 2026-01-26 15:11:16 +09:00 · 2026-01-26 14:56:55 +09:00 · 2026-01-26 14:23:05 +09:00 · 2026-01-26 12:10:30 +09:00
139 changed files with 8215 additions and 1098 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,13 +4,32 @@ on:
  push:
    branches: [master, dev]
  pull_request:
-    branches: [dev]
+    branches: [master, dev]

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

 jobs:
+  # Block PRs targeting master branch
+  block-master-pr:
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request'
+    steps:
+      - name: Check PR target branch
+        run: |
+          if [ "${{ github.base_ref }}" = "master" ]; then
+            echo "::error::PRs to master branch are not allowed. Please target the 'dev' branch instead."
+            echo ""
+            echo "PULL REQUESTS TO MASTER ARE BLOCKED"
+            echo ""
+            echo "All PRs must target the 'dev' branch."
+            echo "Please close this PR and create a new one targeting 'dev'."
+            exit 1
+          else
+            echo "PR targets '${{ github.base_ref }}' branch - OK"
+          fi
+
  test:
    runs-on: ubuntu-latest
    steps:
--- a/.opencode/command/remove-deadcode.md
+++ b/.opencode/command/remove-deadcode.md
@@ -0,0 +1,342 @@
+---
+description: Remove unused code from this project with ultrawork mode, LSP-verified safety, atomic commits
+---
+
+<command-instruction>
+You are a dead code removal specialist. Execute the FULL dead code removal workflow using ultrawork mode.
+
+Your core weapon: **LSP FindReferences**. If a symbol has ZERO external references, it's dead. Remove it.
+
+## CRITICAL RULES
+
+1. **LSP is law.** Never guess. Always verify with `LspFindReferences` before removing ANYTHING.
+2. **One removal = one commit.** Every dead code removal gets its own atomic commit.
+3. **Test after every removal.** Run `bun test` after each. If it fails, REVERT and skip.
+4. **Leaf-first order.** Remove deepest unused symbols first, then work up the dependency chain. Removing a leaf may expose new dead code upstream.
+5. **Never remove entry points.** `src/index.ts`, `src/cli/index.ts`, test files, config files, and files in `packages/` are off-limits unless explicitly targeted.
+
+---
+
+## STEP 0: REGISTER TODO LIST (MANDATORY FIRST ACTION)
+
+```
+TodoWrite([
+  {"id": "scan", "content": "PHASE 1: Scan codebase for dead code candidates using LSP + explore agents", "status": "pending", "priority": "high"},
+  {"id": "verify", "content": "PHASE 2: Verify each candidate with LspFindReferences - zero false positives", "status": "pending", "priority": "high"},
+  {"id": "plan", "content": "PHASE 3: Plan removal order (leaf-first dependency order)", "status": "pending", "priority": "high"},
+  {"id": "remove", "content": "PHASE 4: Remove dead code one-by-one (remove -> test -> commit loop)", "status": "pending", "priority": "high"},
+  {"id": "final", "content": "PHASE 5: Final verification - full test suite + build + typecheck", "status": "pending", "priority": "high"}
+])
+```
+
+---
+
+## PHASE 1: SCAN FOR DEAD CODE CANDIDATES
+
+**Mark scan as in_progress.**
+
+### 1.1: Launch Parallel Explore Agents (ALL BACKGROUND)
+
+Fire ALL simultaneously:
+
+```
+// Agent 1: Find all exported symbols
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
+  List each with: file path, line number, symbol name, export type (named/default).
+  EXCLUDE: src/index.ts root exports, test files.
+  Return as structured list.")
+
+// Agent 2: Find potentially unused files
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find files in src/ that are NOT imported by any other file.
+  Check import/require statements across the entire codebase.
+  EXCLUDE: index.ts files, test files, entry points, config files, .md files.
+  Return list of potentially orphaned files.")
+
+// Agent 3: Find unused imports within files
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find unused imports across src/**/*.ts files.
+  Look for import statements where the imported symbol is never referenced in the file body.
+  Return: file path, line number, imported symbol name.")
+
+// Agent 4: Find functions/variables only used in their own declaration
+delegate_task(subagent_type="explore", run_in_background=true,
+  prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
+  to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
+```
+
+### 1.2: Direct AST-Grep Scans (WHILE AGENTS RUN)
+
+```typescript
+// Find unused imports pattern
+ast_grep_search(pattern="import { $NAME } from '$PATH'", lang="typescript", paths=["src/"])
+
+// Find empty export objects
+ast_grep_search(pattern="export {}", lang="typescript", paths=["src/"])
+```
+
+### 1.3: Collect All Results
+
+Collect background agent results. Compile into a master candidate list:
+
+```
+## DEAD CODE CANDIDATES
+
+| # | File | Line | Symbol | Type | Confidence |
+|---|------|------|--------|------|------------|
+| 1 | src/foo.ts | 42 | unusedFunc | function | HIGH |
+| 2 | src/bar.ts | 10 | OldType | type | MEDIUM |
+```
+
+**Mark scan as completed.**
+
+---
+
+## PHASE 2: VERIFY WITH LSP (ZERO FALSE POSITIVES)
+
+**Mark verify as in_progress.**
+
+For EVERY candidate from Phase 1, run this verification:
+
+### 2.1: The LSP Verification Protocol
+
+For each candidate symbol:
+
+```typescript
+// Step 1: Find the symbol's exact position
+LspDocumentSymbols(filePath)  // Get line/character of the symbol
+
+// Step 2: Find ALL references across the ENTIRE workspace
+LspFindReferences(filePath, line, character, includeDeclaration=false)
+// includeDeclaration=false → only counts USAGES, not the definition itself
+
+// Step 3: Evaluate
+// 0 references → CONFIRMED DEAD CODE
+// 1+ references → NOT dead, remove from candidate list
+```
+
+### 2.2: False Positive Guards
+
+**NEVER mark as dead code if:**
+- Symbol is in `src/index.ts` (package entry point)
+- Symbol is in any `index.ts` that re-exports (barrel file check: look if it's re-exported)
+- Symbol is referenced in test files (tests are valid consumers)
+- Symbol has `@public` or `@api` JSDoc tags
+- Symbol is in a file listed in `package.json` exports
+- Symbol is a hook factory (`createXXXHook`) registered in `src/index.ts`
+- Symbol is a tool factory (`createXXXTool`) registered in tool loading
+- Symbol is an agent definition registered in `agentSources`
+- File is a command template, skill definition, or MCP config
+
+### 2.3: Build Confirmed Dead Code List
+
+After verification, produce:
+
+```
+## CONFIRMED DEAD CODE (LSP-verified, 0 external references)
+
+| # | File | Line | Symbol | Type | Safe to Remove |
+|---|------|------|--------|------|----------------|
+| 1 | src/foo.ts | 42 | unusedFunc | function | YES |
+```
+
+**If ZERO confirmed dead code found: Report "No dead code found" and STOP.**
+
+**Mark verify as completed.**
+
+---
+
+## PHASE 3: PLAN REMOVAL ORDER
+
+**Mark plan as in_progress.**
+
+### 3.1: Dependency Analysis
+
+For each confirmed dead symbol:
+1. Check if removing it would expose other dead code
+2. Check if other dead symbols depend on this one
+3. Build removal dependency graph
+
+### 3.2: Order by Leaf-First
+
+```
+Removal Order:
+1. [Leaf symbols - no other dead code depends on them]
+2. [Intermediate symbols - depended on only by already-removed dead code]
+3. [Dead files - entire files with no live exports]
+```
+
+### 3.3: Register Granular Todos
+
+Create one todo per removal:
+
+```
+TodoWrite([
+  {"id": "remove-1", "content": "Remove unusedFunc from src/foo.ts:42", "status": "pending", "priority": "high"},
+  {"id": "remove-2", "content": "Remove OldType from src/bar.ts:10", "status": "pending", "priority": "high"},
+  // ... one per confirmed dead symbol
+])
+```
+
+**Mark plan as completed.**
+
+---
+
+## PHASE 4: ITERATIVE REMOVAL LOOP
+
+**Mark remove as in_progress.**
+
+For EACH dead code item, execute this exact loop:
+
+### 4.1: Pre-Removal Check
+
+```typescript
+// Re-verify it's still dead (previous removals may have changed things)
+LspFindReferences(filePath, line, character, includeDeclaration=false)
+// If references > 0 now → SKIP (previous removal exposed a new consumer)
+```
+
+### 4.2: Remove the Dead Code
+
+Use appropriate tool:
+
+**For unused imports:**
+```typescript
+Edit(filePath, oldString="import { deadSymbol } from '...';\n", newString="")
+// Or if it's one of many imports, remove just the symbol from the import list
+```
+
+**For unused functions/classes/types:**
+```typescript
+// Read the full symbol extent first
+Read(filePath, offset=startLine, limit=endLine-startLine+1)
+// Then remove it
+Edit(filePath, oldString="[full symbol text]", newString="")
+```
+
+**For dead files:**
+```bash
+# Only after confirming ZERO imports point to this file
+rm "path/to/dead-file.ts"
+```
+
+**After removal, also clean up:**
+- Remove any imports that were ONLY used by the removed code
+- Remove any now-empty import statements
+- Fix any trailing whitespace / double blank lines left behind
+
+### 4.3: Post-Removal Verification
+
+```typescript
+// 1. LSP diagnostics on changed file
+LspDiagnostics(filePath, severity="error")
+// Must be clean (or only pre-existing errors)
+
+// 2. Run tests
+bash("bun test")
+// Must pass
+
+// 3. Typecheck
+bash("bun run typecheck")
+// Must pass
+```
+
+### 4.4: Handle Failures
+
+If ANY verification fails:
+1. **REVERT** the change immediately (`git checkout -- [file]`)
+2. Mark this removal todo as `cancelled` with note: "Removal caused [error]. Skipped."
+3. Proceed to next item
+
+### 4.5: Commit
+
+```bash
+git add [changed-files]
+git commit -m "refactor: remove unused [symbolType] [symbolName] from [filePath]"
+```
+
+Mark this removal todo as `completed`.
+
+### 4.6: Re-scan After Removal
+
+After removing a symbol, check if its removal exposed NEW dead code:
+- Were there imports that only existed to serve the removed symbol?
+- Are there other symbols in the same file now unreferenced?
+
+If new dead code is found, add it to the removal queue.
+
+**Repeat 4.1-4.6 for every item. Mark remove as completed when done.**
+
+---
+
+## PHASE 5: FINAL VERIFICATION
+
+**Mark final as in_progress.**
+
+### 5.1: Full Test Suite
+```bash
+bun test
+```
+
+### 5.2: Full Typecheck
+```bash
+bun run typecheck
+```
+
+### 5.3: Full Build
+```bash
+bun run build
+```
+
+### 5.4: Summary Report
+
+```markdown
+## Dead Code Removal Complete
+
+### Removed
+| # | Symbol | File | Type | Commit |
+|---|--------|------|------|--------|
+| 1 | unusedFunc | src/foo.ts | function | abc1234 |
+
+### Skipped (caused failures)
+| # | Symbol | File | Reason |
+|---|--------|------|--------|
+| 1 | riskyFunc | src/bar.ts | Test failure: [details] |
+
+### Verification
+- Tests: PASSED (X/Y passing)
+- Typecheck: CLEAN
+- Build: SUCCESS
+- Total dead code removed: N symbols across M files
+- Total commits: K atomic commits
+```
+
+**Mark final as completed.**
+
+---
+
+## SCOPE CONTROL
+
+**If $ARGUMENTS is provided**, narrow the scan to the specified scope:
+- File path: Only scan that file
+- Directory: Only scan that directory
+- Symbol name: Only check that specific symbol
+- "all" or empty: Full project scan (default)
+
+## ABORT CONDITIONS
+
+**STOP and report to user if:**
+- 3 consecutive removals cause test failures
+- Build breaks and cannot be fixed by reverting
+- More than 50 candidates found (ask user to narrow scope)
+
+## LANGUAGE
+
+Use English for commit messages and technical output.
+
+</command-instruction>
+
+<user-request>
+$ARGUMENTS
+</user-request>
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,12 +1,24 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-01-23T15:59:00+09:00
-**Commit:** 599fad0e
+**Generated:** 2026-01-26T14:50:00+09:00
+**Commit:** 9d66b807
 **Branch:** dev

+---
+
+## **IMPORTANT: PULL REQUEST TARGET BRANCH**
+
+> **ALL PULL REQUESTS MUST TARGET THE `dev` BRANCH.**
+>
+> **DO NOT CREATE PULL REQUESTS TARGETING `master` BRANCH.**
+>
+> PRs to `master` will be automatically rejected by CI.
+
+---
+
 ## OVERVIEW

-OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3 Flash, Grok Code, GLM-4.7). 31 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 10 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.
+OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemini 3 Flash, Grok Code). 32 lifecycle hooks, 20+ tools (LSP, AST-Grep, delegation), 10 specialized agents, full Claude Code compatibility. "oh-my-zsh" for OpenCode.

 ## STRUCTURE

@@ -14,14 +26,14 @@ OpenCode plugin: multi-model agent orchestration (Claude Opus 4.5, GPT-5.2, Gemi
 oh-my-opencode/
 ├── src/
 │   ├── agents/        # 10 AI agents - see src/agents/AGENTS.md
-│   ├── hooks/         # 31 lifecycle hooks - see src/hooks/AGENTS.md
+│   ├── hooks/         # 32 lifecycle hooks - see src/hooks/AGENTS.md
 │   ├── tools/         # 20+ tools - see src/tools/AGENTS.md
 │   ├── features/      # Background agents, Claude Code compat - see src/features/AGENTS.md
-│   ├── shared/        # 50 cross-cutting utilities - see src/shared/AGENTS.md
+│   ├── shared/        # 55 cross-cutting utilities - see src/shared/AGENTS.md
 │   ├── cli/           # CLI installer, doctor - see src/cli/AGENTS.md
 │   ├── mcp/           # Built-in MCPs - see src/mcp/AGENTS.md
 │   ├── config/        # Zod schema, TypeScript types
-│   └── index.ts       # Main plugin entry (593 lines)
+│   └── index.ts       # Main plugin entry (672 lines)
 ├── script/            # build-schema.ts, build-binaries.ts
 ├── packages/          # 7 platform-specific binaries
 └── dist/              # Build output (ESM + .d.ts)
@@ -36,9 +48,10 @@ oh-my-opencode/
 | Add tool | `src/tools/` | Dir with index/types/constants/tools.ts |
 | Add MCP | `src/mcp/` | Create config, add to index.ts |
 | Add skill | `src/features/builtin-skills/` | Create dir with SKILL.md |
+| Add command | `src/features/builtin-commands/` | Add template + register in commands.ts |
 | Config schema | `src/config/schema.ts` | Zod schema, run `bun run build:schema` |
-| Background agents | `src/features/background-agent/` | manager.ts (1335 lines) |
-| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (773 lines) |
+| Background agents | `src/features/background-agent/` | manager.ts (1377 lines) |
+| Orchestrator | `src/hooks/atlas/` | Main orchestration hook (752 lines) |

 ## TDD (Test-Driven Development)

@@ -50,8 +63,8 @@ oh-my-opencode/
 **Rules:**
 - NEVER write implementation before test
 - NEVER delete failing tests - fix the code
- Test file: `*.test.ts` alongside source
- BDD comments: `#given`, `#when`, `#then`
+- Test file: `*.test.ts` alongside source (100 test files)
+- BDD comments: `//#given`, `//#when`, `//#then`

 ## CONVENTIONS

@@ -60,7 +73,7 @@ oh-my-opencode/
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern via index.ts
 - **Naming**: kebab-case dirs, `createXXXHook`/`createXXXTool` factories
- **Testing**: BDD comments, 90 test files
+- **Testing**: BDD comments, 100 test files
 - **Temperature**: 0.1 for code agents, max 0.3

 ## ANTI-PATTERNS
@@ -88,9 +101,9 @@ oh-my-opencode/
 | Sisyphus | anthropic/claude-opus-4-5 | Primary orchestrator |
 | Atlas | anthropic/claude-opus-4-5 | Master orchestrator |
 | oracle | openai/gpt-5.2 | Consultation, debugging |
-| librarian | opencode/glm-4.7-free | Docs, GitHub search |
-| explore | opencode/grok-code | Fast codebase grep |
-| multimodal-looker | google/gemini-3-flash-preview | PDF/image analysis |
+| librarian | opencode/big-pickle | Docs, GitHub search |
+| explore | opencode/gpt-5-nano | Fast codebase grep |
+| multimodal-looker | google/gemini-3-flash | PDF/image analysis |
 | Prometheus | anthropic/claude-opus-4-5 | Strategic planning |

 ## COMMANDS
@@ -99,7 +112,7 @@ oh-my-opencode/
 bun run typecheck      # Type check
 bun run build          # ESM + declarations + schema
 bun run rebuild        # Clean + Build
-bun test               # 90 test files
+bun test               # 100 test files
 ```

 ## DEPLOYMENT
@@ -113,12 +126,14 @@ bun test               # 90 test files

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/features/background-agent/manager.ts` | 1335 | Task lifecycle, concurrency |
-| `src/features/builtin-skills/skills.ts` | 1203 | Skill definitions |
+| `src/features/builtin-skills/skills.ts` | 1729 | Skill definitions |
+| `src/features/background-agent/manager.ts` | 1377 | Task lifecycle, concurrency |
 | `src/agents/prometheus-prompt.ts` | 1196 | Planning agent |
-| `src/tools/delegate-task/tools.ts` | 1039 | Category-based delegation |
-| `src/hooks/atlas/index.ts` | 773 | Orchestrator hook |
-| `src/cli/config-manager.ts` | 641 | JSONC config parsing |
+| `src/tools/delegate-task/tools.ts` | 1070 | Category-based delegation |
+| `src/hooks/atlas/index.ts` | 752 | Orchestrator hook |
+| `src/cli/config-manager.ts` | 664 | JSONC config parsing |
+| `src/index.ts` | 672 | Main plugin entry |
+| `src/features/builtin-commands/templates/refactor.ts` | 619 | Refactor command template |

 ## MCP ARCHITECTURE

--- a/README.ja.md
+++ b/README.ja.md
@@ -16,8 +16,8 @@

 > [!TIP]
 >
-> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **オーケストレーターがベータ版で利用可能になりました。`oh-my-opencode@3.0.0-beta.10`を使用してインストールしてください。**
+> [![Oh My OpenCode 3.0が正式リリースされました！](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0が正式リリースされました！`oh-my-opencode@latest`を使用してインストールしてください。**
 >
 > 一緒に歩みましょう！
 >
@@ -73,7 +73,9 @@
 [![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
 [![License](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)

-[English](README.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)

 </div>

--- a/README.ko.md
+++ b/README.ko.md
@@ -16,8 +16,8 @@
 >
 > [!TIP]
 >
-> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **오케스트레이터가 베타 버전으로 사용 가능합니다. 설치하려면 `oh-my-opencode@3.0.0-beta.10`을 사용하세요.**
+> [![Oh My OpenCode 3.0이 정식 출시되었습니다!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0이 정식 출시되었습니다! `oh-my-opencode@latest`를 사용하여 설치하세요.**
 >
 > 함께해요!
 >
@@ -73,10 +73,11 @@
 [![GitHub Stars](https://img.shields.io/github/stars/code-yeongyu/oh-my-opencode?color=ffcb47&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/stargazers)
 [![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
 [![License](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)
-[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)

 [English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)

+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)
+
 </div>

 <!-- </CENTERED SECTION FOR GITHUB DISPLAY> -->
--- a/README.md
+++ b/README.md
@@ -16,8 +16,8 @@

 > [!TIP]
 >
-> [![The Orchestrator is now available in beta.](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **The Orchestrator is now available in beta. Use `oh-my-opencode@3.0.0-beta.10` to install it.**
+> [![Oh My OpenCode 3.0 is now stable!](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0 is now stable! Use `oh-my-opencode@latest` to install it.**
 >
 > Be with us!
 >
--- a/README.zh-cn.md
+++ b/README.zh-cn.md
@@ -16,8 +16,8 @@

 > [!TIP]
 >
-> [![Orchestrator 现已进入测试阶段。](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0-beta.10)
-> > **Orchestrator 现已进入测试阶段。使用 `oh-my-opencode@3.0.0-beta.10` 安装。**
+> [![Oh My OpenCode 3.0 正式发布！](./.github/assets/orchestrator-atlas.png?v=3)](https://github.com/code-yeongyu/oh-my-opencode/releases/tag/v3.0.0)
+> > **Oh My OpenCode 3.0 正式发布！使用 `oh-my-opencode@latest` 安装。**
 >
 > 加入我们！
 >
@@ -74,7 +74,9 @@
 [![GitHub Issues](https://img.shields.io/github/issues/code-yeongyu/oh-my-opencode?color=ff80eb&labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/issues)
 [![许可证](https://img.shields.io/badge/license-SUL--1.0-white?labelColor=black&style=flat-square)](https://github.com/code-yeongyu/oh-my-opencode/blob/master/LICENSE.md)

-[English](README.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+[English](README.md) | [한국어](README.ko.md) | [日本語](README.ja.md) | [简体中文](README.zh-cn.md)
+
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/code-yeongyu/oh-my-opencode)

 </div>

--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -38,6 +38,7 @@
        "type": "string",
        "enum": [
          "playwright",
+          "agent-browser",
          "frontend-ui-ux",
          "git-master"
        ]
@@ -70,12 +71,14 @@
          "interactive-bash-session",
          "thinking-block-validator",
          "ralph-loop",
+          "category-skill-reminder",
          "compaction-context-injector",
          "claude-code-hooks",
          "auto-slash-command",
          "edit-error-recovery",
          "delegate-task-retry",
          "prometheus-md-only",
+          "sisyphus-junior-notepad",
          "start-work",
          "atlas"
        ]
@@ -1787,7 +1790,8 @@
            "enum": [
              "low",
              "medium",
-              "high"
+              "high",
+              "xhigh"
            ]
          },
          "textVerbosity": {
@@ -2170,6 +2174,55 @@
          "type": "boolean"
        }
      }
+    },
+    "browser_automation_engine": {
+      "type": "object",
+      "properties": {
+        "provider": {
+          "default": "playwright",
+          "type": "string",
+          "enum": [
+            "playwright",
+            "agent-browser"
+          ]
+        }
+      }
+    },
+    "tmux": {
+      "type": "object",
+      "properties": {
+        "enabled": {
+          "default": false,
+          "type": "boolean"
+        },
+        "layout": {
+          "default": "main-vertical",
+          "type": "string",
+          "enum": [
+            "main-horizontal",
+            "main-vertical",
+            "tiled",
+            "even-horizontal",
+            "even-vertical"
+          ]
+        },
+        "main_pane_size": {
+          "default": 60,
+          "type": "number",
+          "minimum": 20,
+          "maximum": 80
+        },
+        "main_pane_min_width": {
+          "default": 120,
+          "type": "number",
+          "minimum": 40
+        },
+        "agent_pane_min_width": {
+          "default": 40,
+          "type": "number",
+          "minimum": 20
+        }
+      }
    }
  }
 }
--- a/bun.lock
+++ b/bun.lock
@@ -27,13 +27,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.0.0-beta.11",
-        "oh-my-opencode-darwin-x64": "3.0.0-beta.11",
-        "oh-my-opencode-linux-arm64": "3.0.0-beta.11",
-        "oh-my-opencode-linux-arm64-musl": "3.0.0-beta.11",
-        "oh-my-opencode-linux-x64": "3.0.0-beta.11",
-        "oh-my-opencode-linux-x64-musl": "3.0.0-beta.11",
-        "oh-my-opencode-windows-x64": "3.0.0-beta.11",
+        "oh-my-opencode-darwin-arm64": "3.0.1",
+        "oh-my-opencode-darwin-x64": "3.0.1",
+        "oh-my-opencode-linux-arm64": "3.0.1",
+        "oh-my-opencode-linux-arm64-musl": "3.0.1",
+        "oh-my-opencode-linux-x64": "3.0.1",
+        "oh-my-opencode-linux-x64-musl": "3.0.1",
+        "oh-my-opencode-windows-x64": "3.0.1",
      },
    },
  },
@@ -225,19 +225,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.0.0-beta.11", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7cFv2bbz9HTY7sshgVTu+IhvYf7CT0czDYqHEB+dYfEqFU6TaoSMimq6uHqcWegUUR1T7PNmc0dyjYVw69FeVA=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.0.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-LRcLVi6DsmGh3ICFeN4yVJ0KinvCM5jotd2z7tZQ74n0sziHO7grjK1CmJaPV9eCv0clatoK5xfFCeEJ3FvXYg=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.0.0-beta.11", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-rGAbDdUySWITIdm2yiuNFB9lFYaSXT8LMtg97LTlOO5vZbI3M+obIS3QlIkBtAhgOTIPB7Ni+T0W44OmJpHoYA=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.0.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ZaC0ZBe5M2f2aMncNsAMu9IZ3MjSPfNVcfUTCgJkp03db8lLPsajgjeG3556Er72hxignDPsEbrLkJBNlsDbAA=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.0.0-beta.11", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-F9dqwWwGAdqeSkE7Tre5DmHQXwDpU2Z8Jk0lwTJMLj+kMqYFDVPjLPo4iVUdwPpxpmm0pR84u/oonG/2+84/zw=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.0.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-pcOvV6Y2GSwKr0exDndeB2BtFt297XhJFQgrq1cbeEJawoRONDRp7LNSpjwILSQpQ7YkkYnO2bIczBmxI5llNA=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.0.0-beta.11", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-H+zOtHkHd+TmdPj64M1A0zLOk7OHIK4C8yqfLFhfizOIBffT1yOhAs6EpK3EqPhfPLu54ADgcQcu8W96VP24UA=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.0.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7kXKaVbgFnOMSaw+j4JbZNs7O7mkvCekcfWPwh/9I/0WD21/n4PbAGl01ePhRoQh+u9MC6t8FH046hEjL2sk1g=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.0.0-beta.11", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-IG+KODTJ8rs6cEJ2wN6Zpr6YtvCS5OpYP6jBdGJltmUpjQdMhdMsaY3ysZk+9Vxpx2KC3xj5KLHV1USg3uBTeg=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.0.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-1BOV1EnKa5BErhZmWiddnbriHwm1KFrPr+0BUCDdFX/d/hrMAJTo1733zaEnvKuXzvrdHSp/VznXheeUI1VjkA=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.0.0-beta.11", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-irV+AuWrHqNm7VT7HO56qgymR0+vEfJbtB3vCq68kprH2V4NQmGp2MNKIYPnUCYL7NEK3H2NX+h06YFZJ/8ELQ=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.0.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ASyTVatvU1nNJ0mk9o+A/GjybT5vOdgU172ystzCsnQ+12Mnv68GgaeMu/UFJgJNaZmKdhyUAP9XhnOKvEDBGQ=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.0.0-beta.11", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-exZ/NEwGBlxyWszN7dvOfzbYX0cuhBZXftqAAFOlVP26elDHdo+AmSmLR/4cJyzpR9nCWz4xvl/RYF84bY6OEA=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.0.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-QIuA564mVpwzCprhhAoyd8TSw0Rt2VM6M9y7H0fOoC/UjXuU+d7wIuUNuqUUMVaUnMedkctTZop0X0i2Q+Bvhg=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/docs/category-skill-guide.md
+++ b/docs/category-skill-guide.md
@@ -21,13 +21,13 @@ A Category is an agent configuration preset optimized for specific domains.

 | Category | Default Model | Use Cases |
 |----------|---------------|-----------|
-| `visual-engineering` | `google/gemini-3-pro-preview` | Frontend, UI/UX, design, styling, animation |
+| `visual-engineering` | `google/gemini-3-pro` | Frontend, UI/UX, design, styling, animation |
 | `ultrabrain` | `openai/gpt-5.2-codex` (xhigh) | Deep logical reasoning, complex architecture decisions requiring extensive analysis |
-| `artistry` | `google/gemini-3-pro-preview` (max) | Highly creative/artistic tasks, novel ideas |
+| `artistry` | `google/gemini-3-pro` (max) | Highly creative/artistic tasks, novel ideas |
 | `quick` | `anthropic/claude-haiku-4-5` | Trivial tasks - single file changes, typo fixes, simple modifications |
 | `unspecified-low` | `anthropic/claude-sonnet-4-5` | Tasks that don't fit other categories, low effort required |
 | `unspecified-high` | `anthropic/claude-opus-4-5` (max) | Tasks that don't fit other categories, high effort required |
-| `writing` | `google/gemini-3-flash-preview` | Documentation, prose, technical writing |
+| `writing` | `google/gemini-3-flash` | Documentation, prose, technical writing |

 ### Usage

@@ -70,12 +70,12 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to

 ### Usage

-Add desired skill names to the `skills` array.
+Add desired skill names to the `load_skills` array.

 ```typescript
 delegate_task(
  category="quick",
-  skills=["git-master"],
+  load_skills=["git-master"],
  prompt="Commit current changes. Follow commit message style."
 )
 ```
@@ -110,17 +110,17 @@ You can create powerful specialized agents by combining Categories and Skills.

 ### 🎨 The Designer (UI Implementation)
 - **Category**: `visual-engineering`
- **Skills**: `["frontend-ui-ux", "playwright"]`
+- **load_skills**: `["frontend-ui-ux", "playwright"]`
 - **Effect**: Implements aesthetic UI and verifies rendering results directly in browser.

 ### 🏗️ The Architect (Design Review)
 - **Category**: `ultrabrain`
- **Skills**: `[]` (pure reasoning)
+- **load_skills**: `[]` (pure reasoning)
 - **Effect**: Leverages GPT-5.2's logical reasoning for in-depth system architecture analysis.

 ### ⚡ The Maintainer (Quick Fixes)
 - **Category**: `quick`
- **Skills**: `["git-master"]`
+- **load_skills**: `["git-master"]`
 - **Effect**: Uses cost-effective models to quickly fix code and generate clean commits.

 ---
@@ -131,7 +131,7 @@ When delegating, **clear and specific** prompts are essential. Include these 7 e

 1. **TASK**: What needs to be done? (single objective)
 2. **EXPECTED OUTCOME**: What is the deliverable?
-3. **REQUIRED SKILLS**: Which skills should be used?
+3. **REQUIRED SKILLS**: Which skills should be loaded via `load_skills`?
 4. **REQUIRED TOOLS**: Which tools must be used? (whitelist)
 5. **MUST DO**: What must be done (constraints)
 6. **MUST NOT DO**: What must never be done
@@ -177,7 +177,7 @@ You can fine-tune categories in `oh-my-opencode.json`.
  "categories": {
    // 1. Define new custom category
    "korean-writer": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
      "temperature": 0.5,
      "prompt_append": "You are a Korean technical writer. Maintain a friendly and clear tone."
    },
--- a/docs/cli-guide.md
+++ b/docs/cli-guide.md
@@ -175,7 +175,7 @@ Configuration files support **JSONC (JSON with Comments)** format. You can use c
  /* Category customization */
  "categories": {
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
  },
 }
--- a/docs/configurations.md
+++ b/docs/configurations.md
@@ -22,13 +22,13 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
  "agents": {
    "oracle": { "model": "openai/gpt-5.2" },           // Use GPT for debugging
    "librarian": { "model": "zai-coding-plan/glm-4.7" }, // Cheap model for research
-    "explore": { "model": "opencode/grok-code" }        // Free model for grep
+    "explore": { "model": "opencode/gpt-5-nano" }        // Free model for grep
  },
  
  // Override category models (used by delegate_task)
  "categories": {
-    "quick": { "model": "opencode/grok-code" },         // Fast/cheap for trivial tasks
-    "visual-engineering": { "model": "google/gemini-3-pro-preview" } // Gemini for UI
+    "quick": { "model": "opencode/gpt-5-nano" },         // Fast/cheap for trivial tasks
+    "visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
  }
 }
 ```
@@ -75,7 +75,7 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`
      "model": "openai/gpt-5.2"  // GPT for strategic reasoning
    },
    "explore": {
-      "model": "opencode/grok-code"  // Free & fast for exploration
+      "model": "opencode/gpt-5-nano"  // Free & fast for exploration
    },
  },
 }
@@ -83,7 +83,7 @@ When both `oh-my-opencode.jsonc` and `oh-my-opencode.json` files exist, `.jsonc`

 ## Google Auth

-**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin. It provides multi-account load balancing, more models (including Claude via Antigravity), and active maintenance. See [Installation > Google Gemini](../README.md#google-gemini-antigravity-oauth).
+**Recommended**: For Google Gemini authentication, install the [`opencode-antigravity-auth`](https://github.com/NoeFabris/opencode-antigravity-auth) plugin (`@latest`). It provides multi-account load balancing, variant-based thinking levels, dual quota system (Antigravity + Gemini CLI), and active maintenance. See [Installation > Google Gemini](docs/guide/installation.md#google-gemini-antigravity-oauth).

 ## Agents

@@ -159,8 +159,8 @@ Available agents: `oracle`, `librarian`, `explore`, `multimodal-looker`

 Oh My OpenCode includes built-in skills that provide additional capabilities:

- **playwright**: Browser automation with Playwright MCP. Use for web scraping, testing, screenshots, and browser interactions.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', skills=['git-master'], ...)` to save context.
+- **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
+- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.

 Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:

@@ -170,7 +170,54 @@ Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-openc
 }
 ```

-Available built-in skills: `playwright`, `git-master`
+Available built-in skills: `playwright`, `agent-browser`, `git-master`
+
+## Browser Automation
+
+Choose between two browser automation providers:
+
+| Provider | Interface | Features | Installation |
+|----------|-----------|----------|--------------|
+| **playwright** (default) | MCP tools | Playwright MCP server with structured tool calls | Auto-installed via npx |
+| **agent-browser** | Bash CLI | Vercel's CLI with session management, parallel browsers | Requires `bun add -g agent-browser` |
+
+**Switch providers** via `browser_automation_engine` in `oh-my-opencode.json`:
+
+```json
+{
+  "browser_automation_engine": {
+    "provider": "agent-browser"
+  }
+}
+```
+
+### Playwright (Default)
+
+Uses the official Playwright MCP server (`@playwright/mcp`). Browser automation happens through structured MCP tool calls.
+
+### agent-browser
+
+Uses [Vercel's agent-browser CLI](https://github.com/vercel-labs/agent-browser). Key advantages:
+- **Session management**: Run multiple isolated browser instances with `--session` flag
+- **Persistent profiles**: Keep browser state across restarts with `--profile`
+- **Snapshot-based workflow**: Get element refs via `snapshot -i`, interact with `@e1`, `@e2`, etc.
+- **CLI-first**: All commands via Bash - great for scripting
+
+**Installation required**:
+```bash
+bun add -g agent-browser
+agent-browser install  # Download Chromium
+```
+
+**Example workflow**:
+```bash
+agent-browser open https://example.com
+agent-browser snapshot -i  # Get interactive elements with refs
+agent-browser fill @e1 "user@example.com"
+agent-browser click @e2
+agent-browser screenshot result.png
+agent-browser close
+```

 ## Git Master

@@ -305,7 +352,7 @@ Categories enable domain-specific task delegation via the `delegate_task` tool.

 | Category         | Model                         | Description                                                                  |
 | ---------------- | ----------------------------- | ---------------------------------------------------------------------------- |
-| `visual`         | `google/gemini-3-pro-preview` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7).           |
+| `visual`         | `google/gemini-3-pro` | Frontend, UI/UX, design-focused tasks. High creativity (temp 0.7).           |
 | `business-logic` | `openai/gpt-5.2`              | Backend logic, architecture, strategic reasoning. Low creativity (temp 0.1). |

 **Usage:**
@@ -332,7 +379,7 @@ Add custom categories in `oh-my-opencode.json`:
      "prompt_append": "Focus on data analysis, ML pipelines, and statistical methods."
    },
    "visual": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "prompt_append": "Use shadcn/ui components and Tailwind CSS."
    }
  }
@@ -403,9 +450,9 @@ Each agent has a defined provider priority chain. The system tries providers in
 |-------|-------------------|-------------------------|
 | **Sisyphus** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **oracle** | `gpt-5.2` | openai → anthropic → google → github-copilot → opencode |
-| **librarian** | `glm-4.7-free` | opencode → github-copilot → anthropic |
-| **explore** | `grok-code` | opencode → anthropic → github-copilot |
-| **multimodal-looker** | `gemini-3-flash-preview` | google → anthropic → zai → openai → github-copilot → opencode |
+| **librarian** | `big-pickle` | opencode → github-copilot → anthropic |
+| **explore** | `gpt-5-nano` | anthropic → opencode |
+| **multimodal-looker** | `gemini-3-flash` | google → openai → zai-coding-plan → anthropic → opencode |
 | **Prometheus (Planner)** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **Metis (Plan Consultant)** | `claude-sonnet-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **Momus (Plan Reviewer)** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
@@ -417,13 +464,13 @@ Categories follow the same resolution logic:

 | Category | Model (no prefix) | Provider Priority Chain |
 |----------|-------------------|-------------------------|
-| **visual-engineering** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
+| **visual-engineering** | `gemini-3-pro` | google → openai → anthropic → github-copilot → opencode |
 | **ultrabrain** | `gpt-5.2-codex` | openai → anthropic → google → github-copilot → opencode |
-| **artistry** | `gemini-3-pro-preview` | google → openai → anthropic → github-copilot → opencode |
+| **artistry** | `gemini-3-pro` | google → openai → anthropic → github-copilot → opencode |
 | **quick** | `claude-haiku-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **unspecified-low** | `claude-sonnet-4-5` | anthropic → github-copilot → opencode → antigravity → google |
 | **unspecified-high** | `claude-opus-4-5` | anthropic → github-copilot → opencode → antigravity → google |
-| **writing** | `gemini-3-flash-preview` | google → openai → anthropic → github-copilot → opencode |
+| **writing** | `gemini-3-flash` | google → openai → anthropic → github-copilot → opencode |

 ### Checking Your Configuration

--- a/docs/features.md
+++ b/docs/features.md
@@ -12,9 +12,9 @@ Oh-My-OpenCode provides 10 specialized AI agents. Each has distinct expertise, o
 |-------|-------|---------|
 | **Sisyphus** | `anthropic/claude-opus-4-5` | **The default orchestrator.** Plans, delegates, and executes complex tasks using specialized subagents with aggressive parallel execution. Todo-driven workflow with extended thinking (32k budget). |
 | **oracle** | `openai/gpt-5.2` | Architecture decisions, code review, debugging. Read-only consultation - stellar logical reasoning and deep analysis. Inspired by AmpCode. |
-| **librarian** | `opencode/glm-4.7-free` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Inspired by AmpCode. |
-| **explore** | `opencode/grok-code` | Fast codebase exploration and contextual grep. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code. |
-| **multimodal-looker** | `google/gemini-3-flash-preview` | Visual content specialist. Analyzes PDFs, images, diagrams to extract information. Saves tokens by having another agent process media. |
+| **librarian** | `opencode/big-pickle` | Multi-repo analysis, documentation lookup, OSS implementation examples. Deep codebase understanding with evidence-based answers. Inspired by AmpCode. |
+| **explore** | `opencode/gpt-5-nano` | Fast codebase exploration and contextual grep. Uses Gemini 3 Flash when Antigravity auth is configured, Haiku when Claude max20 is available, otherwise Grok. Inspired by Claude Code. |
+| **multimodal-looker** | `google/gemini-3-flash` | Visual content specialist. Analyzes PDFs, images, diagrams to extract information. Saves tokens by having another agent process media. |

 ### Planning Agents

@@ -78,11 +78,15 @@ Skills provide specialized workflows with embedded MCP servers and detailed inst
 | **frontend-ui-ux** | UI/UX tasks, styling | Designer-turned-developer persona. Crafts stunning UI/UX even without design mockups. Emphasizes bold aesthetic direction, distinctive typography, cohesive color palettes. |
 | **git-master** | commit, rebase, squash, blame | MUST USE for ANY git operations. Atomic commits with automatic splitting, rebase/squash workflows, history search (blame, bisect, log -S). |

-### Skill: playwright
+### Skill: Browser Automation (playwright / agent-browser)

 **Trigger**: Any browser-related request

-Provides browser automation via Playwright MCP server:
+Oh-My-OpenCode provides two browser automation providers, configurable via `browser_automation_engine.provider`:
+
+#### Option 1: Playwright MCP (Default)
+
+The default provider uses Playwright MCP server:

 ```yaml
 mcp:
@@ -91,18 +95,41 @@ mcp:
    args: ["@playwright/mcp@latest"]
 ```

-**Capabilities**:
+**Usage**:
+```
+/playwright Navigate to example.com and take a screenshot
+```
+
+#### Option 2: Agent Browser CLI (Vercel)
+
+Alternative provider using [Vercel's agent-browser CLI](https://github.com/vercel-labs/agent-browser):
+
+```json
+{
+  "browser_automation_engine": {
+    "provider": "agent-browser"
+  }
+}
+```
+
+**Requires installation**:
+```bash
+bun add -g agent-browser
+```
+
+**Usage**:
+```
+Use agent-browser to navigate to example.com and extract the main heading
+```
+
+#### Capabilities (Both Providers)
+
 - Navigate and interact with web pages
 - Take screenshots and PDFs
 - Fill forms and click elements
 - Wait for network requests
 - Scrape content

-**Usage**:
-```
-/playwright Navigate to example.com and take a screenshot
-```
-
 ### Skill: frontend-ui-ux

 **Trigger**: UI design tasks, visual changes
--- a/docs/guide/installation.md
+++ b/docs/guide/installation.md
@@ -132,7 +132,7 @@ First, add the opencode-antigravity-auth plugin:
 {
  "plugin": [
    "oh-my-opencode",
-    "opencode-antigravity-auth@1.2.8"
+    "opencode-antigravity-auth@latest"
  ]
 }
 ```
@@ -140,7 +140,7 @@ First, add the opencode-antigravity-auth plugin:
 ##### Model Configuration

 You'll also need full model settings in `opencode.json`.
-Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy provider/models config from the README, and merge carefully to avoid breaking the user's existing setup.
+Read the [opencode-antigravity-auth documentation](https://github.com/NoeFabris/opencode-antigravity-auth), copy the full model configuration from the README, and merge carefully to avoid breaking the user's existing setup. The plugin now uses a **variant system** — models like `antigravity-gemini-3-pro` support `low`/`high` variants instead of separate `-low`/`-high` model entries.

 ##### oh-my-opencode Agent Model Override

@@ -154,7 +154,17 @@ The `opencode-antigravity-auth` plugin uses different model names than the built
 }
 ```

-**Available model names**: `google/antigravity-gemini-3-pro-high`, `google/antigravity-gemini-3-pro-low`, `google/antigravity-gemini-3-flash`, `google/antigravity-claude-sonnet-4-5`, `google/antigravity-claude-sonnet-4-5-thinking-low`, `google/antigravity-claude-sonnet-4-5-thinking-medium`, `google/antigravity-claude-sonnet-4-5-thinking-high`, `google/antigravity-claude-opus-4-5-thinking-low`, `google/antigravity-claude-opus-4-5-thinking-medium`, `google/antigravity-claude-opus-4-5-thinking-high`, `google/gemini-3-pro-preview`, `google/gemini-3-flash-preview`, `google/gemini-2.5-pro`, `google/gemini-2.5-flash`
+**Available models (Antigravity quota)**:
+- `google/antigravity-gemini-3-pro` — variants: `low`, `high`
+- `google/antigravity-gemini-3-flash` — variants: `minimal`, `low`, `medium`, `high`
+- `google/antigravity-claude-sonnet-4-5` — no variants
+- `google/antigravity-claude-sonnet-4-5-thinking` — variants: `low`, `max`
+- `google/antigravity-claude-opus-4-5-thinking` — variants: `low`, `max`
+
+**Available models (Gemini CLI quota)**:
+- `google/gemini-2.5-flash`, `google/gemini-2.5-pro`, `google/gemini-3-flash-preview`, `google/gemini-3-pro-preview`
+
+> **Note**: Legacy tier-suffixed names like `google/antigravity-gemini-3-pro-high` still work but variants are recommended. Use `--variant=high` with the base model name instead.

 Then authenticate:

@@ -183,7 +193,7 @@ When GitHub Copilot is the best available provider, oh-my-opencode uses these mo
 | ------------- | -------------------------------- |
 | **Sisyphus**  | `github-copilot/claude-opus-4.5` |
 | **Oracle**    | `github-copilot/gpt-5.2`         |
-| **Explore**   | `github-copilot/grok-code-fast-1`|
+| **Explore**   | `opencode/gpt-5-nano`              |
 | **Librarian** | `zai-coding-plan/glm-4.7` (if Z.ai available) or fallback |

 GitHub Copilot acts as a proxy provider, routing requests to underlying models based on your subscription.
@@ -203,7 +213,7 @@ If Z.ai is the only provider available, all agents will use GLM models:

 #### OpenCode Zen

-OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/grok-code`, and `opencode/glm-4.7-free`.
+OpenCode Zen provides access to `opencode/` prefixed models including `opencode/claude-opus-4-5`, `opencode/gpt-5.2`, `opencode/gpt-5-nano`, and `opencode/big-pickle`.

 When OpenCode Zen is the best available provider (no native or Copilot), these models are used:

@@ -211,8 +221,8 @@ When OpenCode Zen is the best available provider (no native or Copilot), these m
 | ------------- | -------------------------------- |
 | **Sisyphus**  | `opencode/claude-opus-4-5`       |
 | **Oracle**    | `opencode/gpt-5.2`               |
-| **Explore**   | `opencode/grok-code`             |
-| **Librarian** | `opencode/glm-4.7-free`          |
+| **Explore**   | `opencode/gpt-5-nano`             |
+| **Librarian** | `opencode/big-pickle`          |

 ##### Setup

--- a/docs/guide/overview.md
+++ b/docs/guide/overview.md
@@ -112,12 +112,12 @@ Each agent has a **provider priority chain**. The system tries providers in orde

 ```
 Example: multimodal-looker
-google → anthropic → zai → openai → github-copilot → opencode
-   ↓         ↓         ↓        ↓           ↓            ↓
-gemini   haiku     glm-4.6v  gpt-5.2    fallback     fallback
+google → openai → zai-coding-plan → anthropic → opencode
+   ↓        ↓           ↓              ↓           ↓
+gemini   gpt-5.2     glm-4.6v       haiku     gpt-5-nano
 ```

-If you have Gemini, it uses `google/gemini-3-flash-preview`. No Gemini but have Claude? Uses `anthropic/claude-haiku-4-5`. And so on.
+If you have Gemini, it uses `google/gemini-3-flash`. No Gemini but have Claude? Uses `anthropic/claude-haiku-4-5`. And so on.

 ### Example Configuration

@@ -128,14 +128,14 @@ Here's a real-world config for a user with **Claude, OpenAI, Gemini, and Z.ai**
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
  "agents": {
    // Override specific agents only - rest use fallback chain
-    "Atlas": { "model": "anthropic/claude-sonnet-4-5", "variant": "max" },
+    "atlas": { "model": "anthropic/claude-sonnet-4-5", "variant": "max" },
    "librarian": { "model": "zai-coding-plan/glm-4.7" },
-    "explore": { "model": "opencode/grok-code" },
+    "explore": { "model": "opencode/gpt-5-nano" },
    "multimodal-looker": { "model": "zai-coding-plan/glm-4.6v" }
  },
  "categories": {
    // Override categories for cost optimization
-    "quick": { "model": "opencode/grok-code" },
+    "quick": { "model": "opencode/gpt-5-nano" },
    "unspecified-low": { "model": "zai-coding-plan/glm-4.7" }
  },
  "experimental": {
--- a/docs/guide/understanding-orchestration-system.md
+++ b/docs/guide/understanding-orchestration-system.md
@@ -326,13 +326,13 @@ Skills prepend specialized instructions to subagent prompts:
 // Category + Skill combination
 delegate_task(
  category="visual-engineering", 
-  skills=["frontend-ui-ux"],  // Adds UI/UX expertise
+  load_skills=["frontend-ui-ux"],  // Adds UI/UX expertise
  prompt="..."
 )

 delegate_task(
  category="general",
-  skills=["playwright"],  // Adds browser automation expertise
+  load_skills=["playwright"],  // Adds browser automation expertise
  prompt="..."
 )
 ```
@@ -341,8 +341,8 @@ delegate_task(

 | Before | After |
 |--------|-------|
-| Hardcoded: `frontend-ui-ux-engineer` (Gemini 3 Pro) | `category="visual-engineering" + skills=["frontend-ui-ux"]` |
-| One-size-fits-all | `category="visual-engineering" + skills=["unity-master"]` |
+| Hardcoded: `frontend-ui-ux-engineer` (Gemini 3 Pro) | `category="visual-engineering" + load_skills=["frontend-ui-ux"]` |
+| One-size-fits-all | `category="visual-engineering" + load_skills=["unity-master"]` |
 | Model bias | Category-based: model abstraction eliminates bias |

 ---
@@ -365,7 +365,7 @@ sequenceDiagram
        
        Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
        
-        Orchestrator->>Junior: delegate_task(category, skills, prompt)
+        Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
        
        Junior->>Junior: Create todos, execute
        Junior->>Junior: Verify (lsp_diagnostics, tests)
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -73,13 +73,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.0.0-beta.14",
-    "oh-my-opencode-darwin-x64": "3.0.0-beta.14",
-    "oh-my-opencode-linux-arm64": "3.0.0-beta.14",
-    "oh-my-opencode-linux-arm64-musl": "3.0.0-beta.14",
-    "oh-my-opencode-linux-x64": "3.0.0-beta.14",
-    "oh-my-opencode-linux-x64-musl": "3.0.0-beta.14",
-    "oh-my-opencode-windows-x64": "3.0.0-beta.14"
+    "oh-my-opencode-darwin-arm64": "3.1.0",
+    "oh-my-opencode-darwin-x64": "3.1.0",
+    "oh-my-opencode-linux-arm64": "3.1.0",
+    "oh-my-opencode-linux-arm64-musl": "3.1.0",
+    "oh-my-opencode-linux-x64": "3.1.0",
+    "oh-my-opencode-linux-x64-musl": "3.1.0",
+    "oh-my-opencode-windows-x64": "3.1.0"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.0.0-beta.14",
+  "version": "3.1.0",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -735,6 +735,86 @@
      "created_at": "2026-01-23T08:27:02Z",
      "repoId": 1108837393,
      "pullRequestNo": 985
+    },
+    {
+      "name": "RouHim",
+      "id": 3582050,
+      "comment_id": 3791988227,
+      "created_at": "2026-01-23T19:32:01Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1031
+    },
+    {
+      "name": "gongxh0901",
+      "id": 15622561,
+      "comment_id": 3793478620,
+      "created_at": "2026-01-24T02:15:02Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1037
+    },
+    {
+      "name": "gongxh0901",
+      "id": 15622561,
+      "comment_id": 3793521632,
+      "created_at": "2026-01-24T02:23:34Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1037
+    },
+    {
+      "name": "AndersHsueh",
+      "id": 121805544,
+      "comment_id": 3793787614,
+      "created_at": "2026-01-24T04:41:46Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1042
+    },
+    {
+      "name": "AamiRobin",
+      "id": 22963668,
+      "comment_id": 3794632200,
+      "created_at": "2026-01-24T13:28:22Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1067
+    },
+    {
+      "name": "ThanhNguyxn",
+      "id": 74597207,
+      "comment_id": 3795232176,
+      "created_at": "2026-01-24T17:41:53Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1075
+    },
+    {
+      "name": "sadnow",
+      "id": 87896100,
+      "comment_id": 3795495342,
+      "created_at": "2026-01-24T20:49:29Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1080
+    },
+    {
+      "name": "jsl9208",
+      "id": 4048787,
+      "comment_id": 3795582626,
+      "created_at": "2026-01-24T21:41:24Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1082
+    },
+    {
+      "name": "potb",
+      "id": 10779093,
+      "comment_id": 3795856573,
+      "created_at": "2026-01-25T02:38:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1083
+    },
+    {
+      "name": "kvokka",
+      "id": 15954013,
+      "comment_id": 3795884358,
+      "created_at": "2026-01-25T03:13:52Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1084
    }
  ]
 }
--- a/sisyphus-prompt.md
+++ b/sisyphus-prompt.md
@@ -239,7 +239,7 @@ Ask yourself:
 I will use delegate_task with:
 - **Category**: [selected-category-name]
 - **Why this category**: [how category description matches task domain]
- **Skills**: [list of selected skills]
+- **load_skills**: [list of selected skills]
 - **Skill evaluation**:
  - [skill-1]: INCLUDED because [reason based on skill description]
  - [skill-2]: OMITTED because [reason why skill domain doesn't apply]
@@ -256,7 +256,7 @@ I will use delegate_task with:
 I will use delegate_task with:
 - **Category**: [category-name]
 - **Why this category**: Category description says "[quote description]" which matches this task's requirements
- **Skills**: ["skill-a", "skill-b"]
+- **load_skills**: ["skill-a", "skill-b"]
 - **Skill evaluation**:
  - skill-a: INCLUDED - description says "[quote]" which applies to this task
  - skill-b: INCLUDED - description says "[quote]" which is needed here
@@ -265,7 +265,7 @@ I will use delegate_task with:

 delegate_task(
  category="[category-name]",
-  skills=["skill-a", "skill-b"],
+  load_skills=["skill-a", "skill-b"],
  prompt="..."
 )
 ```
@@ -276,12 +276,12 @@ delegate_task(
 I will use delegate_task with:
 - **Agent**: [agent-name]
 - **Reason**: This requires [agent's specialty] based on agent description
- **Skills**: [] (agents have built-in expertise)
+- **load_skills**: [] (agents have built-in expertise)
 - **Expected Outcome**: [what agent should return]

 delegate_task(
  subagent_type="[agent-name]",
-  skills=[],
+  load_skills=[],
  prompt="..."
 )
 ```
@@ -292,13 +292,13 @@ delegate_task(
 I will use delegate_task with:
 - **Agent**: explore
 - **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
- **Skills**: []
+- **load_skills**: []
 - **Expected Outcome**: List of files containing auth patterns

 delegate_task(
  subagent_type="explore",
  run_in_background=true,
-  skills=[],
+  load_skills=[],
  prompt="Find all authentication implementations in the codebase"
 )
 ```
@@ -306,7 +306,7 @@ delegate_task(
 **WRONG: No Skill Evaluation**

 ```
-delegate_task(category="...", skills=[], prompt="...")  // Where's the justification?
+delegate_task(category="...", load_skills=[], prompt="...")  // Where's the justification?
 ```

 **WRONG: Vague Category Selection**
@@ -329,11 +329,11 @@ I'll use this category because it seems right.
 ```typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find error handling patterns here...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find how production apps handle auth in Express...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
@@ -416,7 +416,7 @@ Skills inject specialized instructions into the subagent. Read the description t
 For EVERY skill listed above, ask yourself:
 > "Does this skill's expertise domain overlap with my task?"

- If YES → INCLUDE in `skills=[...]`
+- If YES → INCLUDE in `load_skills=[...]`
 - If NO → You MUST justify why (see below)

 **STEP 3: Justify Omissions**
@@ -444,14 +444,14 @@ SKILL EVALUATION for "[skill-name]":
 ```typescript
 delegate_task(
  category="[selected-category]",
-  skills=["skill-1", "skill-2"],  // Include ALL relevant skills
+  load_skills=["skill-1", "skill-2"],  // Include ALL relevant skills
  prompt="..."
 )
 ```

 **ANTI-PATTERN (will produce poor results):**
 ```typescript
-delegate_task(category="...", skills=[], prompt="...")  // Empty skills without justification
+delegate_task(category="...", load_skills=[], prompt="...")  // Empty load_skills without justification
 ```
 ### Delegation Table:

@@ -724,7 +724,7 @@ If the user's approach seems problematic:
 | **Error Handling** | Empty catch blocks `catch(e) {}` |
 | **Testing** | Deleting failing tests to "pass" |
 | **Search** | Firing agents for single-line typos or obvious syntax errors |
-| **Delegation** | Using `skills=[]` without justifying why no skills apply |
+| **Delegation** | Using `load_skills=[]` without justifying why no skills apply |
 | **Debugging** | Shotgun debugging, random changes |
 ## Soft Guidelines

--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -1,53 +1,48 @@
 # AGENTS KNOWLEDGE BASE

 ## OVERVIEW
-
 10 AI agents for multi-model orchestration. Sisyphus (primary), Atlas (orchestrator), oracle, librarian, explore, multimodal-looker, Prometheus, Metis, Momus, Sisyphus-Junior.

 ## STRUCTURE
-
 ```
 agents/
-├── atlas.ts                    # Master Orchestrator (543 lines)
-├── sisyphus.ts                 # Main prompt (615 lines)
-├── sisyphus-junior.ts          # Delegated task executor
-├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation
+├── atlas.ts                    # Master Orchestrator (holds todo list)
+├── sisyphus.ts                 # Main prompt (SF Bay Area engineer identity)
+├── sisyphus-junior.ts          # Delegated task executor (category-spawned)
 ├── oracle.ts                   # Strategic advisor (GPT-5.2)
-├── librarian.ts                # Multi-repo research (GLM-4.7-free)
-├── explore.ts                  # Fast grep (Grok Code)
+├── librarian.ts                # Multi-repo research (GitHub CLI, Context7)
+├── explore.ts                  # Fast contextual grep (Grok Code)
 ├── multimodal-looker.ts        # Media analyzer (Gemini 3 Flash)
-├── prometheus-prompt.ts        # Planning (1196 lines)
-├── metis.ts                    # Plan consultant
-├── momus.ts                    # Plan reviewer
+├── prometheus-prompt.ts        # Planning (Interview/Consultant mode, 1196 lines)
+├── metis.ts                    # Pre-planning analysis (Gap detection)
+├── momus.ts                    # Plan reviewer (Ruthless fault-finding)
+├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation
 ├── types.ts                    # AgentModelConfig, AgentPromptMetadata
 ├── utils.ts                    # createBuiltinAgents(), resolveModelWithFallback()
 └── index.ts                    # builtinAgents export
 ```

 ## AGENT MODELS
-
 | Agent | Model | Temp | Purpose |
 |-------|-------|------|---------|
 | Sisyphus | anthropic/claude-opus-4-5 | 0.1 | Primary orchestrator |
 | Atlas | anthropic/claude-opus-4-5 | 0.1 | Master orchestrator |
 | oracle | openai/gpt-5.2 | 0.1 | Consultation, debugging |
-| librarian | opencode/glm-4.7-free | 0.1 | Docs, GitHub search |
-| explore | opencode/grok-code | 0.1 | Fast contextual grep |
-| multimodal-looker | google/gemini-3-flash-preview | 0.1 | PDF/image analysis |
+| librarian | opencode/big-pickle | 0.1 | Docs, GitHub search |
+| explore | opencode/gpt-5-nano | 0.1 | Fast contextual grep |
+| multimodal-looker | google/gemini-3-flash | 0.1 | PDF/image analysis |
 | Prometheus | anthropic/claude-opus-4-5 | 0.1 | Strategic planning |
 | Metis | anthropic/claude-sonnet-4-5 | 0.3 | Pre-planning analysis |
 | Momus | anthropic/claude-sonnet-4-5 | 0.1 | Plan validation |
 | Sisyphus-Junior | anthropic/claude-sonnet-4-5 | 0.1 | Category-spawned executor |

 ## HOW TO ADD
-
-1. Create `src/agents/my-agent.ts` exporting factory + metadata
-2. Add to `agentSources` in `src/agents/utils.ts`
-3. Update `AgentNameSchema` in `src/config/schema.ts`
-4. Register in `src/index.ts` initialization
+1. Create `src/agents/my-agent.ts` exporting factory + metadata.
+2. Add to `agentSources` in `src/agents/utils.ts`.
+3. Update `AgentNameSchema` in `src/config/schema.ts`.
+4. Register in `src/index.ts` initialization.

 ## TOOL RESTRICTIONS
-
 | Agent | Denied Tools |
 |-------|-------------|
 | oracle | write, edit, task, delegate_task |
@@ -57,14 +52,13 @@ agents/
 | Sisyphus-Junior | task, delegate_task |

 ## PATTERNS
-
- **Factory**: `createXXXAgent(model?: string): AgentConfig`
- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers
- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`
- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas
+- **Factory**: `createXXXAgent(model: string): AgentConfig`
+- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers.
+- **Tool restrictions**: `createAgentToolRestrictions(tools)` or `createAgentToolAllowlist(tools)`.
+- **Thinking**: 32k budget tokens for Sisyphus, Oracle, Prometheus, Atlas.

 ## ANTI-PATTERNS
-
- **Trust reports**: NEVER trust "I'm done" - verify outputs
- **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background`
+- **Trust reports**: NEVER trust "I'm done" - verify outputs.
+- **High temp**: Don't use >0.3 for code agents.
+- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration.
+- **Prometheus writing code**: Planner only - never implements.
--- a/src/agents/atlas.ts
+++ b/src/agents/atlas.ts
@@ -58,7 +58,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
 ${categoryRows.join("\n")}

 \`\`\`typescript
-delegate_task(category="[category-name]", skills=[...], prompt="...")
+delegate_task(category="[category-name]", load_skills=[...], prompt="...")
 \`\`\``
 }

@@ -84,12 +84,12 @@ ${skillRows.join("\n")}
 **MANDATORY: Evaluate ALL skills for relevance to your task.**

 Read each skill's description and ask: "Does this skill's domain overlap with my task?"
- If YES: INCLUDE in skills=[...]
+- If YES: INCLUDE in load_skills=[...]
 - If NO: You MUST justify why in your pre-delegation declaration

 **Usage:**
 \`\`\`typescript
-delegate_task(category="[category]", skills=["skill-1", "skill-2"], prompt="...")
+delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], prompt="...")
 \`\`\`

 **IMPORTANT:**
@@ -102,7 +102,7 @@ function buildDecisionMatrix(agents: AvailableAgent[], userCategories?: Record<s
  const allCategories = { ...DEFAULT_CATEGORIES, ...userCategories }

  const categoryRows = Object.entries(allCategories).map(([name]) =>
-    `| ${getCategoryDescription(name, userCategories)} | \`category="${name}", skills=[...]\` |`
+    `| ${getCategoryDescription(name, userCategories)} | \`category="${name}", load_skills=[...]\` |`
  )

  const agentRows = agents.map((a) => {
@@ -323,7 +323,7 @@ delegate_task(
 **If verification fails**: Resume the SAME session with the ACTUAL error output:
 \`\`\`typescript
 delegate_task(
-  resume="ses_xyz789",  // ALWAYS use the session from the failed task
+  session_id="ses_xyz789",  // ALWAYS use the session from the failed task
  load_skills=[...],
  prompt="Verification failed: {actual error}. Fix."
 )
@@ -331,24 +331,24 @@ delegate_task(

 ### 3.5 Handle Failures (USE RESUME)

-**CRITICAL: When re-delegating, ALWAYS use \`resume\` parameter.**
+**CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**

 Every \`delegate_task()\` output includes a session_id. STORE IT.

 If task fails:
 1. Identify what went wrong
 2. **Resume the SAME session** - subagent has full context already:
-   \`\`\`typescript
-   delegate_task(
-     resume="ses_xyz789",  // Session from failed task
-     load_skills=[...],
-     prompt="FAILED: {error}. Fix by: {specific instruction}"
-   )
-   \`\`\`
+    \`\`\`typescript
+    delegate_task(
+      session_id="ses_xyz789",  // Session from failed task
+      load_skills=[...],
+      prompt="FAILED: {error}. Fix by: {specific instruction}"
+    )
+    \`\`\`
 3. Maximum 3 retry attempts with the SAME session
 4. If blocked after 3 attempts: Document and continue to independent tasks

-**Why resume is MANDATORY for failures:**
+**Why session_id is MANDATORY for failures:**
 - Subagent already read all files, knows the context
 - No repeated exploration = 70%+ token savings
 - Subagent knows what approaches already failed
@@ -493,7 +493,7 @@ You are the QA gate. Subagents lie. Verify EVERYTHING.
 - Parallelize independent tasks
 - Verify with your own tools
 - **Store session_id from every delegation output**
- **Use \`resume="{session_id}"\` for retries, fixes, and follow-ups**
+- **Use \`session_id="{session_id}"\` for retries, fixes, and follow-ups**
 </critical_overrides>
 `

--- a/src/agents/sisyphus-junior.ts
+++ b/src/agents/sisyphus-junior.ts
@@ -20,32 +20,6 @@ ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
 You work ALONE for implementation. No delegation of implementation tasks.
 </Critical_Constraints>

-<Work_Context>
-## Notepad Location (for recording learnings)
-NOTEPAD PATH: .sisyphus/notepads/{plan-name}/
- learnings.md: Record patterns, conventions, successful approaches
- issues.md: Record problems, blockers, gotchas encountered
- decisions.md: Record architectural choices and rationales
- problems.md: Record unresolved issues, technical debt
-
-You SHOULD append findings to notepad files after completing work.
-IMPORTANT: Always APPEND to notepad files - never overwrite or use Edit tool.
-
-## Plan Location (READ ONLY)
-PLAN PATH: .sisyphus/plans/{plan-name}.md
-
-CRITICAL RULE: NEVER MODIFY THE PLAN FILE
-
-The plan file (.sisyphus/plans/*.md) is SACRED and READ-ONLY.
- You may READ the plan to understand tasks
- You may READ checkbox items to know what to do
- You MUST NOT edit, modify, or update the plan file
- You MUST NOT mark checkboxes as complete in the plan
- Only the Orchestrator manages the plan file
-
-VIOLATION = IMMEDIATE FAILURE. The Orchestrator tracks plan state.
-</Work_Context>
-
 <Todo_Discipline>
 TODO OBSESSION (NON-NEGOTIABLE):
 - 2+ steps → todowrite FIRST, atomic breakdown
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -144,11 +144,11 @@ ${librarianSection}
 \`\`\`typescript
 // CORRECT: Always background, always parallel
 // Contextual Grep (internal)
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find auth implementations in our codebase...")
-delegate_task(subagent_type="explore", run_in_background=true, skills=[], prompt="Find error handling patterns here...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
+delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
 // Reference Grep (external)
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find JWT best practices in official docs...")
-delegate_task(subagent_type="librarian", run_in_background=true, skills=[], prompt="Find how production apps handle auth in Express...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
+delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
 // Continue working immediately. Collect with background_output when needed.

 // WRONG: Sequential or blocking
@@ -209,15 +209,15 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:

 Every \`delegate_task()\` output includes a session_id. **USE IT.**

-**ALWAYS resume when:**
+**ALWAYS continue when:**
 | Scenario | Action |
 |----------|--------|
-| Task failed/incomplete | \`resume="{session_id}", prompt="Fix: {specific error}"\` |
-| Follow-up question on result | \`resume="{session_id}", prompt="Also: {question}"\` |
-| Multi-turn with same agent | \`resume="{session_id}"\` - NEVER start fresh |
-| Verification failed | \`resume="{session_id}", prompt="Failed verification: {error}. Fix."\` |
+| Task failed/incomplete | \`session_id="{session_id}", prompt="Fix: {specific error}"\` |
+| Follow-up question on result | \`session_id="{session_id}", prompt="Also: {question}"\` |
+| Multi-turn with same agent | \`session_id="{session_id}"\` - NEVER start fresh |
+| Verification failed | \`session_id="{session_id}", prompt="Failed verification: {error}. Fix."\` |

-**Why resume is CRITICAL:**
+**Why session_id is CRITICAL:**
 - Subagent has FULL conversation context preserved
 - No repeated file reads, exploration, or setup
 - Saves 70%+ tokens on follow-ups
@@ -228,10 +228,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**
 delegate_task(category="quick", prompt="Fix the type error in auth.ts...")

 // CORRECT: Resume preserves everything
-delegate_task(resume="ses_abc123", prompt="Fix: Type error on line 42")
+delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
 \`\`\`

-**After EVERY delegation, STORE the session_id for potential resume.**
+**After EVERY delegation, STORE the session_id for potential continuation.**

 ### Code Changes:
 - Match existing patterns (if codebase is disciplined)
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -1,6 +1,7 @@
-import { describe, test, expect } from "bun:test"
+import { describe, test, expect, beforeEach } from "bun:test"
 import { createBuiltinAgents } from "./utils"
 import type { AgentConfig } from "@opencode-ai/sdk"
+import { clearSkillCache } from "../features/opencode-skill-loader/skill-content"

 const TEST_DEFAULT_MODEL = "anthropic/claude-opus-4-5"

@@ -109,6 +110,10 @@ describe("buildAgent with category and skills", () => {
  const { buildAgent } = require("./utils")
  const TEST_MODEL = "anthropic/claude-opus-4-5"

+  beforeEach(() => {
+    clearSkillCache()
+  })
+
  test("agent with category inherits category settings", () => {
    // #given - agent factory that sets category but no model
    const source = {
@@ -123,7 +128,7 @@ describe("buildAgent with category and skills", () => {
    const agent = buildAgent(source["test-agent"], TEST_MODEL)

    // #then - category's built-in model is applied
-    expect(agent.model).toBe("google/gemini-3-pro-preview")
+    expect(agent.model).toBe("google/gemini-3-pro")
  })

  test("agent with category and existing model keeps existing model", () => {
@@ -308,4 +313,42 @@ describe("buildAgent with category and skills", () => {
    // #then
    expect(agent.prompt).toBe("Base prompt")
  })
+
+  test("agent with agent-browser skill resolves when browserProvider is set", () => {
+    // #given
+    const source = {
+      "test-agent": () =>
+        ({
+          description: "Test agent",
+          skills: ["agent-browser"],
+          prompt: "Base prompt",
+        }) as AgentConfig,
+    }
+
+    // #when - browserProvider is "agent-browser"
+    const agent = buildAgent(source["test-agent"], TEST_MODEL, undefined, undefined, "agent-browser")
+
+    // #then - agent-browser skill content should be in prompt
+    expect(agent.prompt).toContain("agent-browser")
+    expect(agent.prompt).toContain("Base prompt")
+  })
+
+  test("agent with agent-browser skill NOT resolved when browserProvider not set", () => {
+    // #given
+    const source = {
+      "test-agent": () =>
+        ({
+          description: "Test agent",
+          skills: ["agent-browser"],
+          prompt: "Base prompt",
+        }) as AgentConfig,
+    }
+
+    // #when - no browserProvider (defaults to playwright)
+    const agent = buildAgent(source["test-agent"], TEST_MODEL)
+
+    // #then - agent-browser skill not found, only base prompt remains
+    expect(agent.prompt).toBe("Base prompt")
+    expect(agent.prompt).not.toContain("agent-browser open")
+  })
 })
--- a/src/agents/utils.ts
+++ b/src/agents/utils.ts
@@ -10,11 +10,12 @@ import { createMetisAgent } from "./metis"
 import { createAtlasAgent } from "./atlas"
 import { createMomusAgent } from "./momus"
 import type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
-import { deepMerge, fetchAvailableModels, resolveModelWithFallback, AGENT_MODEL_REQUIREMENTS, findCaseInsensitive, includesCaseInsensitive } from "../shared"
+import { deepMerge, fetchAvailableModels, resolveModelWithFallback, AGENT_MODEL_REQUIREMENTS, findCaseInsensitive, includesCaseInsensitive, readConnectedProvidersCache } from "../shared"
 import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
 import { resolveMultipleSkills } from "../features/opencode-skill-loader/skill-content"
 import { createBuiltinSkills } from "../features/builtin-skills"
 import type { LoadedSkill, SkillScope } from "../features/opencode-skill-loader/types"
+import type { BrowserAutomationProvider } from "../config/schema"

 type AgentSource = AgentFactory | AgentConfig

@@ -50,7 +51,8 @@ export function buildAgent(
  source: AgentSource,
  model: string,
  categories?: CategoriesConfig,
-  gitMasterConfig?: GitMasterConfig
+  gitMasterConfig?: GitMasterConfig,
+  browserProvider?: BrowserAutomationProvider
 ): AgentConfig {
  const base = isFactory(source) ? source(model) : source
  const categoryConfigs: Record<string, CategoryConfig> = categories
@@ -74,7 +76,7 @@ export function buildAgent(
  }

  if (agentWithCategory.skills?.length) {
-    const { resolved } = resolveMultipleSkills(agentWithCategory.skills, { gitMasterConfig })
+    const { resolved } = resolveMultipleSkills(agentWithCategory.skills, { gitMasterConfig, browserProvider })
    if (resolved.size > 0) {
      const skillContent = Array.from(resolved.values()).join("\n\n")
      base.prompt = skillContent + (base.prompt ? "\n\n" + base.prompt : "")
@@ -146,14 +148,17 @@ export async function createBuiltinAgents(
  categories?: CategoriesConfig,
  gitMasterConfig?: GitMasterConfig,
  discoveredSkills: LoadedSkill[] = [],
-  client?: any
+  client?: any,
+  browserProvider?: BrowserAutomationProvider
 ): Promise<Record<string, AgentConfig>> {
  if (!systemDefaultModel) {
    throw new Error("createBuiltinAgents requires systemDefaultModel")
  }

-  // Fetch available models at plugin init
-  const availableModels = client ? await fetchAvailableModels(client) : new Set<string>()
+  const connectedProviders = readConnectedProvidersCache()
+  const availableModels = client 
+    ? await fetchAvailableModels(client, { connectedProviders: connectedProviders ?? undefined }) 
+    : new Set<string>()

  const result: Record<string, AgentConfig> = {}
  const availableAgents: AvailableAgent[] = []
@@ -167,7 +172,7 @@ export async function createBuiltinAgents(
    description: categories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks",
  }))

-  const builtinSkills = createBuiltinSkills()
+  const builtinSkills = createBuiltinSkills({ browserProvider })
  const builtinSkillNames = new Set(builtinSkills.map(s => s.name))

  const builtinAvailable: AvailableSkill[] = builtinSkills.map((skill) => ({
@@ -204,7 +209,7 @@ export async function createBuiltinAgents(
      systemDefaultModel,
    })

-    let config = buildAgent(source, model, mergedCategories, gitMasterConfig)
+    let config = buildAgent(source, model, mergedCategories, gitMasterConfig, browserProvider)
    
    // Apply variant from override or resolved fallback chain
    if (override?.variant) {
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -8,16 +8,17 @@ CLI entry: `bunx oh-my-opencode`. Interactive installer, doctor diagnostics. Com

 ```
 cli/
-├── index.ts              # Commander.js entry
+├── index.ts              # Commander.js entry (4 commands)
 ├── install.ts            # Interactive TUI (520 lines)
-├── config-manager.ts     # JSONC parsing (641 lines)
+├── config-manager.ts     # JSONC parsing (664 lines)
 ├── types.ts              # InstallArgs, InstallConfig
+├── model-fallback.ts     # Model fallback configuration
 ├── doctor/
 │   ├── index.ts          # Doctor entry
 │   ├── runner.ts         # Check orchestration
 │   ├── formatter.ts      # Colored output
 │   ├── constants.ts      # Check IDs, symbols
-│   ├── types.ts          # CheckResult, CheckDefinition
+│   ├── types.ts          # CheckResult, CheckDefinition (114 lines)
 │   └── checks/           # 14 checks, 21 files
 │       ├── version.ts    # OpenCode + plugin version
 │       ├── config.ts     # JSONC validity, Zod
@@ -25,6 +26,7 @@ cli/
 │       ├── dependencies.ts # AST-Grep, Comment Checker
 │       ├── lsp.ts        # LSP connectivity
 │       ├── mcp.ts        # MCP validation
+│       ├── model-resolution.ts # Model resolution check
 │       └── gh.ts         # GitHub CLI
 ├── run/
 │   └── index.ts          # Session launcher
@@ -36,36 +38,37 @@ cli/

 | Command | Purpose |
 |---------|---------|
-| `install` | Interactive setup |
-| `doctor` | 14 health checks |
-| `run` | Launch session |
-| `get-local-version` | Version check |
+| `install` | Interactive setup with provider selection |
+| `doctor` | 14 health checks for diagnostics |
+| `run` | Launch session with todo enforcement |
+| `get-local-version` | Version detection and update check |

-## DOCTOR CATEGORIES
+## DOCTOR CATEGORIES (14 Checks)

 | Category | Checks |
 |----------|--------|
 | installation | opencode, plugin |
-| configuration | config validity, Zod |
+| configuration | config validity, Zod, model-resolution |
 | authentication | anthropic, openai, google |
-| dependencies | ast-grep, comment-checker |
+| dependencies | ast-grep, comment-checker, gh-cli |
 | tools | LSP, MCP |
 | updates | version comparison |

 ## HOW TO ADD CHECK

 1. Create `src/cli/doctor/checks/my-check.ts`
-2. Export from `checks/index.ts`
-3. Add to `getAllCheckDefinitions()`
+2. Export `getXXXCheckDefinition()` factory returning `CheckDefinition`
+3. Add to `getAllCheckDefinitions()` in `checks/index.ts`

 ## TUI FRAMEWORK

- **@clack/prompts**: `select()`, `spinner()`, `intro()`
- **picocolors**: Terminal colors
- **Symbols**: ✓ (pass), ✗ (fail), ⚠ (warn)
+- **@clack/prompts**: `select()`, `spinner()`, `intro()`, `outro()`
+- **picocolors**: Terminal colors for status and headers
+- **Symbols**: ✓ (pass), ✗ (fail), ⚠ (warn), ℹ (info)

 ## ANTI-PATTERNS

- **Blocking in non-TTY**: Check `process.stdout.isTTY`
- **Direct JSON.parse**: Use `parseJsonc()`
- **Silent failures**: Return warn/fail in doctor
+- **Blocking in non-TTY**: Always check `process.stdout.isTTY`
+- **Direct JSON.parse**: Use `parseJsonc()` from shared utils
+- **Silent failures**: Return `warn` or `fail` in doctor instead of throwing
+- **Hardcoded paths**: Use `getOpenCodeConfigPaths()` from `config-manager.ts`
--- a/src/cli/snapshots/model-fallback.test.ts.snap
+++ b/src/cli/snapshots/model-fallback.test.ts.snap
@@ -5,54 +5,54 @@ exports[`generateModelConfig no providers available returns ULTIMATE_FALLBACK fo
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "explore": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "momus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "oracle": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "prometheus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "sisyphus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
  },
  "categories": {
    "artistry": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "ultrabrain": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "unspecified-high": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "unspecified-low": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "visual-engineering": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "writing": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
  },
 }
@@ -196,10 +196,10 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
      "model": "openai/gpt-5.2",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
      "model": "openai/gpt-5.2",
@@ -230,7 +230,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
      "model": "openai/gpt-5.2",
    },
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "ultrabrain": {
      "model": "openai/gpt-5.2-codex",
@@ -263,10 +263,10 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
      "model": "openai/gpt-5.2",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
      "model": "openai/gpt-5.2",
@@ -297,7 +297,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
      "model": "openai/gpt-5.2",
    },
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "ultrabrain": {
      "model": "openai/gpt-5.2-codex",
@@ -327,57 +327,57 @@ exports[`generateModelConfig single native provider uses Gemini models when only
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "momus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "prometheus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "sisyphus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "ultrabrain": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "unspecified-high": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "unspecified-low": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
@@ -388,57 +388,57 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "momus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "prometheus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "sisyphus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "ultrabrain": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "unspecified-high": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "unspecified-low": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
@@ -466,7 +466,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
      "model": "openai/gpt-5.2",
@@ -482,7 +482,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -499,10 +499,10 @@ exports[`generateModelConfig all native providers uses preferred models from fal
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
@@ -530,7 +530,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
      "model": "openai/gpt-5.2",
@@ -547,7 +547,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -565,10 +565,10 @@ exports[`generateModelConfig all native providers uses preferred models with isM
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
@@ -585,7 +585,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
      "model": "opencode/claude-haiku-4-5",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
      "model": "opencode/claude-opus-4-5",
@@ -596,7 +596,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "opencode/gemini-3-flash-preview",
+      "model": "opencode/gemini-3-flash",
    },
    "oracle": {
      "model": "opencode/gpt-5.2",
@@ -612,7 +612,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
  },
  "categories": {
    "artistry": {
-      "model": "opencode/gemini-3-pro-preview",
+      "model": "opencode/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -629,10 +629,10 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
      "model": "opencode/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "opencode/gemini-3-pro-preview",
+      "model": "opencode/gemini-3-pro",
    },
    "writing": {
-      "model": "opencode/gemini-3-flash-preview",
+      "model": "opencode/gemini-3-flash",
    },
  },
 }
@@ -649,7 +649,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
      "model": "opencode/claude-haiku-4-5",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
      "model": "opencode/claude-opus-4-5",
@@ -660,7 +660,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "opencode/gemini-3-flash-preview",
+      "model": "opencode/gemini-3-flash",
    },
    "oracle": {
      "model": "opencode/gpt-5.2",
@@ -677,7 +677,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
  },
  "categories": {
    "artistry": {
-      "model": "opencode/gemini-3-pro-preview",
+      "model": "opencode/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -695,10 +695,10 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
      "model": "opencode/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "opencode/gemini-3-pro-preview",
+      "model": "opencode/gemini-3-pro",
    },
    "writing": {
-      "model": "opencode/gemini-3-flash-preview",
+      "model": "opencode/gemini-3-flash",
    },
  },
 }
@@ -712,7 +712,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "github-copilot/gpt-5-mini",
    },
    "librarian": {
      "model": "github-copilot/claude-sonnet-4.5",
@@ -726,7 +726,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
    "oracle": {
      "model": "github-copilot/gpt-5.2",
@@ -742,7 +742,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
  },
  "categories": {
    "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -759,10 +759,10 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
    },
    "writing": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
  },
 }
@@ -776,7 +776,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "github-copilot/gpt-5-mini",
    },
    "librarian": {
      "model": "github-copilot/claude-sonnet-4.5",
@@ -790,7 +790,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
    "oracle": {
      "model": "github-copilot/gpt-5.2",
@@ -807,7 +807,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
  },
  "categories": {
    "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -825,10 +825,10 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
    },
    "writing": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
  },
 }
@@ -839,51 +839,51 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "momus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "multimodal-looker": {
      "model": "zai-coding-plan/glm-4.6v",
    },
    "oracle": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "prometheus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "sisyphus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
  },
  "categories": {
    "artistry": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "ultrabrain": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "unspecified-high": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "unspecified-low": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "visual-engineering": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "writing": {
      "model": "zai-coding-plan/glm-4.7",
@@ -897,28 +897,28 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "momus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "multimodal-looker": {
      "model": "zai-coding-plan/glm-4.6v",
    },
    "oracle": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "prometheus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "sisyphus": {
      "model": "zai-coding-plan/glm-4.7",
@@ -926,22 +926,22 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
  },
  "categories": {
    "artistry": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "ultrabrain": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "unspecified-high": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "unspecified-low": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "visual-engineering": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "writing": {
      "model": "zai-coding-plan/glm-4.7",
@@ -961,7 +961,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
      "model": "anthropic/claude-haiku-4-5",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/big-pickle",
    },
    "metis": {
      "model": "anthropic/claude-opus-4-5",
@@ -972,7 +972,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "opencode/gemini-3-flash-preview",
+      "model": "opencode/gemini-3-flash",
    },
    "oracle": {
      "model": "opencode/gpt-5.2",
@@ -988,7 +988,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
  },
  "categories": {
    "artistry": {
-      "model": "opencode/gemini-3-pro-preview",
+      "model": "opencode/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -1005,10 +1005,10 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "opencode/gemini-3-pro-preview",
+      "model": "opencode/gemini-3-pro",
    },
    "writing": {
-      "model": "opencode/gemini-3-flash-preview",
+      "model": "opencode/gemini-3-flash",
    },
  },
 }
@@ -1022,7 +1022,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "explore": {
-      "model": "opencode/grok-code",
+      "model": "github-copilot/gpt-5-mini",
    },
    "librarian": {
      "model": "github-copilot/claude-sonnet-4.5",
@@ -1036,7 +1036,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
    "oracle": {
      "model": "openai/gpt-5.2",
@@ -1052,7 +1052,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
  },
  "categories": {
    "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -1069,10 +1069,10 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
    },
    "writing": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
  },
 }
@@ -1099,7 +1099,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
      "model": "anthropic/claude-opus-4-5",
    },
    "multimodal-looker": {
-      "model": "anthropic/claude-haiku-4-5",
+      "model": "zai-coding-plan/glm-4.6v",
    },
    "oracle": {
      "model": "anthropic/claude-opus-4-5",
@@ -1163,7 +1163,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
      "model": "anthropic/claude-opus-4-5",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
      "model": "anthropic/claude-opus-4-5",
@@ -1179,7 +1179,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -1196,10 +1196,10 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
@@ -1227,7 +1227,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
    "oracle": {
      "model": "github-copilot/gpt-5.2",
@@ -1243,7 +1243,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
  },
  "categories": {
    "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -1260,10 +1260,10 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
      "model": "github-copilot/claude-sonnet-4.5",
    },
    "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3-pro",
    },
    "writing": {
-      "model": "github-copilot/gemini-3-flash-preview",
+      "model": "github-copilot/gemini-3-flash",
    },
  },
 }
@@ -1291,7 +1291,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
      "model": "openai/gpt-5.2",
@@ -1307,7 +1307,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -1324,10 +1324,10 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
@@ -1355,7 +1355,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
      "variant": "medium",
    },
    "multimodal-looker": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
    "oracle": {
      "model": "openai/gpt-5.2",
@@ -1372,7 +1372,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
  },
  "categories": {
    "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
      "variant": "max",
    },
    "quick": {
@@ -1390,10 +1390,10 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3-pro",
    },
    "writing": {
-      "model": "google/gemini-3-flash-preview",
+      "model": "google/gemini-3-flash",
    },
  },
 }
--- a/src/cli/config-manager.test.ts
+++ b/src/cli/config-manager.test.ts
@@ -170,7 +170,7 @@ describe("fetchNpmDistTags", () => {
 })

 describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
-  test("Gemini models include full spec (limit + modalities)", () => {
+  test("all models include full spec (limit + modalities + Antigravity label)", () => {
    const google = (ANTIGRAVITY_PROVIDER_CONFIG as any).google
    expect(google).toBeTruthy()

@@ -178,9 +178,11 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
    expect(models).toBeTruthy()

    const required = [
-      "antigravity-gemini-3-pro-high",
-      "antigravity-gemini-3-pro-low",
+      "antigravity-gemini-3-pro",
      "antigravity-gemini-3-flash",
+      "antigravity-claude-sonnet-4-5",
+      "antigravity-claude-sonnet-4-5-thinking",
+      "antigravity-claude-opus-4-5-thinking",
    ]

    for (const key of required) {
@@ -198,6 +200,43 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
      expect(Array.isArray(model.modalities.output)).toBe(true)
    }
  })
+
+  test("Gemini models have variant definitions", () => {
+    // #given the antigravity provider config
+    const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
+
+    // #when checking Gemini Pro variants
+    const pro = models["antigravity-gemini-3-pro"]
+    // #then should have low and high variants
+    expect(pro.variants).toBeTruthy()
+    expect(pro.variants.low).toBeTruthy()
+    expect(pro.variants.high).toBeTruthy()
+
+    // #when checking Gemini Flash variants
+    const flash = models["antigravity-gemini-3-flash"]
+    // #then should have minimal, low, medium, high variants
+    expect(flash.variants).toBeTruthy()
+    expect(flash.variants.minimal).toBeTruthy()
+    expect(flash.variants.low).toBeTruthy()
+    expect(flash.variants.medium).toBeTruthy()
+    expect(flash.variants.high).toBeTruthy()
+  })
+
+  test("Claude thinking models have variant definitions", () => {
+    // #given the antigravity provider config
+    const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
+
+    // #when checking Claude thinking variants
+    const sonnetThinking = models["antigravity-claude-sonnet-4-5-thinking"]
+    const opusThinking = models["antigravity-claude-opus-4-5-thinking"]
+
+    // #then both should have low and max variants
+    for (const model of [sonnetThinking, opusThinking]) {
+      expect(model.variants).toBeTruthy()
+      expect(model.variants.low).toBeTruthy()
+      expect(model.variants.max).toBeTruthy()
+    }
+  })
 })

 describe("generateOmoConfig - model fallback system", () => {
@@ -277,7 +316,7 @@ describe("generateOmoConfig - model fallback system", () => {

    // #then should use ultimate fallback for all agents
    expect(result.$schema).toBe("https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json")
-    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("opencode/glm-4.7-free")
+    expect((result.agents as Record<string, { model: string }>).sisyphus.model).toBe("opencode/big-pickle")
  })

  test("uses zai-coding-plan/glm-4.7 for librarian when Z.ai available", () => {
--- a/src/cli/config-manager.ts
+++ b/src/cli/config-manager.ts
@@ -497,38 +497,61 @@ export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
 *
 * IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
 *
- * The opencode-antigravity-auth plugin supports two naming conventions:
- * - `antigravity-gemini-3-pro-high` (RECOMMENDED, explicit Antigravity quota routing)
- * - `gemini-3-pro-high` (LEGACY, backward compatible but may break in future)
+ * Since opencode-antigravity-auth v1.3.0, models use a variant system:
+ * - `antigravity-gemini-3-pro` with variants: low, high
+ * - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
 *
- * Legacy names rely on Gemini CLI using `-preview` suffix for disambiguation.
- * If Google removes `-preview`, legacy names may route to wrong quota.
+ * Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work
+ * but variants are the recommended approach.
 *
- * @see https://github.com/NoeFabris/opencode-antigravity-auth#migration-guide-v127
+ * @see https://github.com/NoeFabris/opencode-antigravity-auth#models
 */
 export const ANTIGRAVITY_PROVIDER_CONFIG = {
  google: {
    name: "Google",
    models: {
-      "antigravity-gemini-3-pro-high": {
-        name: "Gemini 3 Pro High (Antigravity)",
-        thinking: true,
-        attachment: true,
-        limit: { context: 1048576, output: 65535 },
-        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
-      },
-      "antigravity-gemini-3-pro-low": {
-        name: "Gemini 3 Pro Low (Antigravity)",
-        thinking: true,
-        attachment: true,
+      "antigravity-gemini-3-pro": {
+        name: "Gemini 3 Pro (Antigravity)",
        limit: { context: 1048576, output: 65535 },
        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          low: { thinkingLevel: "low" },
+          high: { thinkingLevel: "high" },
+        },
      },
      "antigravity-gemini-3-flash": {
        name: "Gemini 3 Flash (Antigravity)",
-        attachment: true,
        limit: { context: 1048576, output: 65536 },
        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          minimal: { thinkingLevel: "minimal" },
+          low: { thinkingLevel: "low" },
+          medium: { thinkingLevel: "medium" },
+          high: { thinkingLevel: "high" },
+        },
+      },
+      "antigravity-claude-sonnet-4-5": {
+        name: "Claude Sonnet 4.5 (Antigravity)",
+        limit: { context: 200000, output: 64000 },
+        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+      },
+      "antigravity-claude-sonnet-4-5-thinking": {
+        name: "Claude Sonnet 4.5 Thinking (Antigravity)",
+        limit: { context: 200000, output: 64000 },
+        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          low: { thinkingConfig: { thinkingBudget: 8192 } },
+          max: { thinkingConfig: { thinkingBudget: 32768 } },
+        },
+      },
+      "antigravity-claude-opus-4-5-thinking": {
+        name: "Claude Opus 4.5 Thinking (Antigravity)",
+        limit: { context: 200000, output: 64000 },
+        modalities: { input: ["text", "image", "pdf"], output: ["text"] },
+        variants: {
+          low: { thinkingConfig: { thinkingBudget: 8192 } },
+          max: { thinkingConfig: { thinkingBudget: 32768 } },
+        },
      },
    },
  },
--- a/src/cli/doctor/checks/model-resolution.test.ts
+++ b/src/cli/doctor/checks/model-resolution.test.ts
@@ -27,7 +27,7 @@ describe("model-resolution check", () => {
      // #then: Should have category entries
      const visual = info.categories.find((c) => c.name === "visual-engineering")
      expect(visual).toBeDefined()
-      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro-preview")
+      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
      expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
    })
  })
--- a/src/cli/doctor/checks/model-resolution.ts
+++ b/src/cli/doctor/checks/model-resolution.ts
@@ -199,9 +199,11 @@ function buildDetailsArray(info: ModelResolutionInfo, available: AvailableModels
  details.push("═══ Available Models (from cache) ═══")
  details.push("")
  if (available.cacheExists) {
-    details.push(`  Providers: ${available.providers.length} (${available.providers.slice(0, 8).join(", ")}${available.providers.length > 8 ? "..." : ""})`)
+    details.push(`  Providers in cache: ${available.providers.length}`)
+    details.push(`  Sample: ${available.providers.slice(0, 6).join(", ")}${available.providers.length > 6 ? "..." : ""}`)
    details.push(`  Total models: ${available.modelCount}`)
    details.push(`  Cache: ~/.cache/opencode/models.json`)
+    details.push(`  ℹ Runtime: only connected providers used`)
    details.push(`  Refresh: opencode models --refresh`)
  } else {
    details.push("  ⚠ Cache not found. Run 'opencode' to populate.")
--- a/src/cli/install.ts
+++ b/src/cli/install.ts
@@ -178,7 +178,7 @@ async function runTuiMode(detected: DetectedConfig): Promise<InstallConfig | nul
  const claude = await p.select({
    message: "Do you have a Claude Pro/Max subscription?",
    options: [
-      { value: "no" as const, label: "No", hint: "Will use opencode/glm-4.7-free as fallback" },
+      { value: "no" as const, label: "No", hint: "Will use opencode/big-pickle as fallback" },
      { value: "yes" as const, label: "Yes (standard)", hint: "Claude Opus 4.5 for orchestration" },
      { value: "max20" as const, label: "Yes (max20 mode)", hint: "Full power with Claude Sonnet 4.5 for Librarian" },
    ],
@@ -363,7 +363,7 @@ async function runNonTuiInstall(args: InstallArgs): Promise<number> {
  }

  if (!config.hasClaude && !config.hasOpenAI && !config.hasGemini && !config.hasCopilot && !config.hasOpencodeZen) {
-    printWarning("No model providers configured. Using opencode/glm-4.7-free as fallback.")
+    printWarning("No model providers configured. Using opencode/big-pickle as fallback.")
  }

  console.log(`${SYMBOLS.star} ${color.bold(color.green(isUpdate ? "Configuration updated!" : "Installation complete!"))}`)
@@ -480,7 +480,7 @@ export async function install(args: InstallArgs): Promise<number> {
  }

  if (!config.hasClaude && !config.hasOpenAI && !config.hasGemini && !config.hasCopilot && !config.hasOpencodeZen) {
-    p.log.warn("No model providers configured. Using opencode/glm-4.7-free as fallback.")
+    p.log.warn("No model providers configured. Using opencode/big-pickle as fallback.")
  }

  p.note(formatConfigSummary(config), isUpdate ? "Updated Configuration" : "Installation Complete")
--- a/src/cli/model-fallback.test.ts
+++ b/src/cli/model-fallback.test.ts
@@ -310,15 +310,15 @@ describe("generateModelConfig", () => {
  })

  describe("explore agent special cases", () => {
-    test("explore uses grok-code when only Gemini available (no Claude)", () => {
+    test("explore uses gpt-5-nano when only Gemini available (no Claude)", () => {
      // #given only Gemini is available (no Claude)
      const config = createConfig({ hasGemini: true })

      // #when generateModelConfig is called
      const result = generateModelConfig(config)

-      // #then explore should use grok-code (Claude haiku not available)
-      expect(result.agents?.explore?.model).toBe("opencode/grok-code")
+      // #then explore should use gpt-5-nano (Claude haiku not available)
+      expect(result.agents?.explore?.model).toBe("opencode/gpt-5-nano")
    })

    test("explore uses Claude haiku when Claude available", () => {
@@ -343,15 +343,26 @@ describe("generateModelConfig", () => {
      expect(result.agents?.explore?.model).toBe("anthropic/claude-haiku-4-5")
    })

-    test("explore uses grok-code when only OpenAI available", () => {
+    test("explore uses gpt-5-nano when only OpenAI available", () => {
      // #given only OpenAI is available
      const config = createConfig({ hasOpenAI: true })

      // #when generateModelConfig is called
      const result = generateModelConfig(config)

-      // #then explore should use grok-code (fallback)
-      expect(result.agents?.explore?.model).toBe("opencode/grok-code")
+      // #then explore should use gpt-5-nano (fallback)
+      expect(result.agents?.explore?.model).toBe("opencode/gpt-5-nano")
+    })
+
+    test("explore uses gpt-5-mini when only Copilot available", () => {
+      // #given only Copilot is available
+      const config = createConfig({ hasCopilot: true })
+
+      // #when generateModelConfig is called
+      const result = generateModelConfig(config)
+
+      // #then explore should use gpt-5-mini (Copilot fallback)
+      expect(result.agents?.explore?.model).toBe("github-copilot/gpt-5-mini")
    })
  })

--- a/src/cli/model-fallback.ts
+++ b/src/cli/model-fallback.ts
@@ -36,7 +36,7 @@ export interface GeneratedOmoConfig {

 const ZAI_MODEL = "zai-coding-plan/glm-4.7"

-const ULTIMATE_FALLBACK = "opencode/glm-4.7-free"
+const ULTIMATE_FALLBACK = "opencode/big-pickle"
 const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json"

 function toProviderAvailability(config: InstallConfig): ProviderAvailability {
@@ -103,7 +103,7 @@ function getSisyphusFallbackChain(isMaxPlan: boolean): FallbackEntry[] {
  return [
    { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
    { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-    { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro-preview" },
+    { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
  ]
 }

@@ -139,14 +139,16 @@ export function generateModelConfig(config: InstallConfig): GeneratedOmoConfig {
      continue
    }

-    // Special case: explore uses Claude haiku → OpenCode grok-code
+    // Special case: explore uses Claude haiku → GitHub Copilot gpt-5-mini → OpenCode gpt-5-nano
    if (role === "explore") {
      if (avail.native.claude) {
        agents[role] = { model: "anthropic/claude-haiku-4-5" }
      } else if (avail.opencodeZen) {
        agents[role] = { model: "opencode/claude-haiku-4-5" }
+      } else if (avail.copilot) {
+        agents[role] = { model: "github-copilot/gpt-5-mini" }
      } else {
-        agents[role] = { model: "opencode/grok-code" }
+        agents[role] = { model: "opencode/gpt-5-nano" }
      }
      continue
    }
--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -9,6 +9,8 @@ export {
  SisyphusAgentConfigSchema,
  ExperimentalConfigSchema,
  RalphLoopConfigSchema,
+  TmuxConfigSchema,
+  TmuxLayoutSchema,
 } from "./schema"

 export type {
@@ -23,4 +25,6 @@ export type {
  ExperimentalConfig,
  DynamicContextPruningConfig,
  RalphLoopConfig,
+  TmuxConfig,
+  TmuxLayout,
 } from "./schema"
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -1,5 +1,12 @@
 import { describe, expect, test } from "bun:test"
-import { AgentOverrideConfigSchema, BuiltinCategoryNameSchema, CategoryConfigSchema, OhMyOpenCodeConfigSchema } from "./schema"
+import {
+  AgentOverrideConfigSchema,
+  BrowserAutomationConfigSchema,
+  BrowserAutomationProviderSchema,
+  BuiltinCategoryNameSchema,
+  CategoryConfigSchema,
+  OhMyOpenCodeConfigSchema,
+} from "./schema"

 describe("disabled_mcps schema", () => {
  test("should accept built-in MCP names", () => {
@@ -345,6 +352,20 @@ describe("CategoryConfigSchema", () => {
    }
  })

+  test("accepts reasoningEffort as optional string with xhigh", () => {
+    // #given
+    const config = { reasoningEffort: "xhigh" }
+
+    // #when
+    const result = CategoryConfigSchema.safeParse(config)
+
+    // #then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.reasoningEffort).toBe("xhigh")
+    }
+  })
+
  test("rejects non-string variant", () => {
    // #given
    const config = { model: "openai/gpt-5.2", variant: 123 }
@@ -494,3 +515,94 @@ describe("Sisyphus-Junior agent override", () => {
    }
  })
 })
+
+describe("BrowserAutomationProviderSchema", () => {
+  test("accepts 'playwright' as valid provider", () => {
+    // #given
+    const input = "playwright"
+
+    // #when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data).toBe("playwright")
+  })
+
+  test("accepts 'agent-browser' as valid provider", () => {
+    // #given
+    const input = "agent-browser"
+
+    // #when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data).toBe("agent-browser")
+  })
+
+  test("rejects invalid provider", () => {
+    // #given
+    const input = "invalid-provider"
+
+    // #when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(false)
+  })
+})
+
+describe("BrowserAutomationConfigSchema", () => {
+  test("defaults provider to 'playwright' when not specified", () => {
+    // #given
+    const input = {}
+
+    // #when
+    const result = BrowserAutomationConfigSchema.parse(input)
+
+    // #then
+    expect(result.provider).toBe("playwright")
+  })
+
+  test("accepts agent-browser provider", () => {
+    // #given
+    const input = { provider: "agent-browser" }
+
+    // #when
+    const result = BrowserAutomationConfigSchema.parse(input)
+
+    // #then
+    expect(result.provider).toBe("agent-browser")
+  })
+})
+
+describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
+  test("accepts browser_automation_engine config", () => {
+    // #given
+    const input = {
+      browser_automation_engine: {
+        provider: "agent-browser",
+      },
+    }
+
+    // #when
+    const result = OhMyOpenCodeConfigSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data?.browser_automation_engine?.provider).toBe("agent-browser")
+  })
+
+  test("accepts config without browser_automation_engine", () => {
+    // #given
+    const input = {}
+
+    // #when
+    const result = OhMyOpenCodeConfigSchema.safeParse(input)
+
+    // #then
+    expect(result.success).toBe(true)
+    expect(result.data?.browser_automation_engine).toBeUndefined()
+  })
+})
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -30,6 +30,7 @@ export const BuiltinAgentNameSchema = z.enum([

 export const BuiltinSkillNameSchema = z.enum([
  "playwright",
+  "agent-browser",
  "frontend-ui-ux",
  "git-master",
 ])
@@ -76,6 +77,7 @@ export const HookNameSchema = z.enum([

  "thinking-block-validator",
  "ralph-loop",
+  "category-skill-reminder",

  "compaction-context-injector",
  "claude-code-hooks",
@@ -83,6 +85,7 @@ export const HookNameSchema = z.enum([
  "edit-error-recovery",
  "delegate-task-retry",
  "prometheus-md-only",
+  "sisyphus-junior-notepad",
  "start-work",
  "atlas",
 ])
@@ -160,7 +163,7 @@ export const CategoryConfigSchema = z.object({
    type: z.enum(["enabled", "disabled"]),
    budgetTokens: z.number().optional(),
  }).optional(),
-  reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
+  reasoningEffort: z.enum(["low", "medium", "high", "xhigh"]).optional(),
  textVerbosity: z.enum(["low", "medium", "high"]).optional(),
  tools: z.record(z.string(), z.boolean()).optional(),
  prompt_append: z.string().optional(),
@@ -297,6 +300,32 @@ export const GitMasterConfigSchema = z.object({
  include_co_authored_by: z.boolean().default(true),
 })

+export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser"])
+
+export const BrowserAutomationConfigSchema = z.object({
+  /**
+   * Browser automation provider to use for the "playwright" skill.
+   * - "playwright": Uses Playwright MCP server (@playwright/mcp) - default
+   * - "agent-browser": Uses Vercel's agent-browser CLI (requires: bun add -g agent-browser)
+   */
+  provider: BrowserAutomationProviderSchema.default("playwright"),
+})
+
+export const TmuxLayoutSchema = z.enum([
+  'main-horizontal',  // main pane top, agent panes bottom stack
+  'main-vertical',    // main pane left, agent panes right stack (default)
+  'tiled',            // all panes same size grid
+  'even-horizontal',  // all panes horizontal row
+  'even-vertical',    // all panes vertical stack
+])
+
+export const TmuxConfigSchema = z.object({
+  enabled: z.boolean().default(false),
+  layout: TmuxLayoutSchema.default('main-vertical'),
+  main_pane_size: z.number().min(20).max(80).default(60),
+  main_pane_min_width: z.number().min(40).default(120),
+  agent_pane_min_width: z.number().min(20).default(40),
+})
 export const OhMyOpenCodeConfigSchema = z.object({
  $schema: z.string().optional(),
  disabled_mcps: z.array(AnyMcpNameSchema).optional(),
@@ -316,6 +345,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
  background_task: BackgroundTaskConfigSchema.optional(),
  notification: NotificationConfigSchema.optional(),
  git_master: GitMasterConfigSchema.optional(),
+  browser_automation_engine: BrowserAutomationConfigSchema.optional(),
+  tmux: TmuxConfigSchema.optional(),
 })

 export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
@@ -338,5 +369,9 @@ export type CategoryConfig = z.infer<typeof CategoryConfigSchema>
 export type CategoriesConfig = z.infer<typeof CategoriesConfigSchema>
 export type BuiltinCategoryName = z.infer<typeof BuiltinCategoryNameSchema>
 export type GitMasterConfig = z.infer<typeof GitMasterConfigSchema>
+export type BrowserAutomationProvider = z.infer<typeof BrowserAutomationProviderSchema>
+export type BrowserAutomationConfig = z.infer<typeof BrowserAutomationConfigSchema>
+export type TmuxConfig = z.infer<typeof TmuxConfigSchema>
+export type TmuxLayout = z.infer<typeof TmuxLayoutSchema>

 export { AnyMcpNameSchema, type AnyMcpName, McpNameSchema, type McpName } from "../mcp/types"
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -2,31 +2,31 @@

 ## OVERVIEW

-Core feature modules + Claude Code compatibility layer. Background agents, skill MCP, builtin skills/commands, 5 loaders.
+Core feature modules + Claude Code compatibility layer. Orchestrates background agents, skill MCPs, builtin skills/commands, and 16 feature modules.

 ## STRUCTURE

 ```
 features/
-├── background-agent/           # Task lifecycle (1335 lines)
+├── background-agent/           # Task lifecycle (1377 lines)
 │   ├── manager.ts              # Launch → poll → complete
-│   ├── concurrency.ts          # Per-provider limits
-│   └── types.ts                # BackgroundTask, LaunchInput
-├── skill-mcp-manager/          # MCP client lifecycle
-│   ├── manager.ts              # Lazy loading, cleanup
-│   └── types.ts                # SkillMcpConfig
-├── builtin-skills/             # Playwright, git-master, frontend-ui-ux
-│   └── skills.ts               # 1203 lines
-├── builtin-commands/           # ralph-loop, refactor, init-deep
+│   └── concurrency.ts          # Per-provider limits
+├── builtin-skills/             # Core skills (1729 lines)
+│   └── skills.ts               # agent-browser, dev-browser, frontend-ui-ux, git-master, typescript-programmer
+├── builtin-commands/           # ralph-loop, refactor, ulw-loop, init-deep, start-work, cancel-ralph
 ├── claude-code-agent-loader/   # ~/.claude/agents/*.md
 ├── claude-code-command-loader/ # ~/.claude/commands/*.md
-├── claude-code-mcp-loader/     # .mcp.json
+├── claude-code-mcp-loader/     # .mcp.json with ${VAR} expansion
 ├── claude-code-plugin-loader/  # installed_plugins.json
 ├── claude-code-session-state/  # Session persistence
 ├── opencode-skill-loader/      # Skills from 6 directories
 ├── context-injector/           # AGENTS.md/README.md injection
 ├── boulder-state/              # Todo state persistence
-└── hook-message-injector/      # Message injection
+├── hook-message-injector/      # Message injection
+├── task-toast-manager/         # Background task notifications
+├── skill-mcp-manager/          # MCP client lifecycle (520 lines)
+├── tmux-subagent/              # Tmux session management
+└── ... (16 modules total)
 ```

 ## LOADER PRIORITY
@@ -41,8 +41,9 @@ features/

 - **Lifecycle**: `launch` → `poll` (2s) → `complete`
 - **Stability**: 3 consecutive polls = idle
- **Concurrency**: Per-provider/model limits
+- **Concurrency**: Per-provider/model limits via `ConcurrencyManager`
 - **Cleanup**: 30m TTL, 3m stale timeout
+- **State**: Per-session Maps, cleaned on `session.deleted`

 ## SKILL MCP

@@ -55,3 +56,4 @@ features/
 - **Sequential delegation**: Use `delegate_task` parallel
 - **Trust self-reports**: ALWAYS verify
 - **Main thread blocks**: No heavy I/O in loader init
+- **Direct state mutation**: Use managers for boulder/session state
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -776,7 +776,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
      parentModel: { providerID: "old", modelID: "old-model" },
    }
    const currentMessage: CurrentMessage = {
-      agent: "Sisyphus",
+      agent: "sisyphus",
      model: { providerID: "anthropic", modelID: "claude-opus-4-5" },
    }

@@ -784,7 +784,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    const promptBody = buildNotificationPromptBody(task, currentMessage)

    // #then - uses currentMessage values, not task.parentModel/parentAgent
-    expect(promptBody.agent).toBe("Sisyphus")
+    expect(promptBody.agent).toBe("sisyphus")
    expect(promptBody.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-5" })
  })

@@ -827,11 +827,11 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
      status: "completed",
      startedAt: new Date(),
      completedAt: new Date(),
-      parentAgent: "Sisyphus",
+      parentAgent: "sisyphus",
      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
    }
    const currentMessage: CurrentMessage = {
-      agent: "Sisyphus",
+      agent: "sisyphus",
      model: { providerID: "anthropic" },
    }

@@ -839,7 +839,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    const promptBody = buildNotificationPromptBody(task, currentMessage)

    // #then - model not passed due to incomplete data
-    expect(promptBody.agent).toBe("Sisyphus")
+    expect(promptBody.agent).toBe("sisyphus")
    expect("model" in promptBody).toBe(false)
  })

@@ -856,7 +856,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
      status: "completed",
      startedAt: new Date(),
      completedAt: new Date(),
-      parentAgent: "Sisyphus",
+      parentAgent: "sisyphus",
      parentModel: { providerID: "anthropic", modelID: "claude-opus" },
    }

@@ -864,7 +864,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
    const promptBody = buildNotificationPromptBody(task, null)

    // #then - falls back to task.parentAgent, no model
-    expect(promptBody.agent).toBe("Sisyphus")
+    expect(promptBody.agent).toBe("sisyphus")
    expect("model" in promptBody).toBe(false)
  })
 })
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -7,7 +7,8 @@ import type {
 } from "./types"
 import { log, getAgentToolRestrictions } from "../../shared"
 import { ConcurrencyManager } from "./concurrency"
-import type { BackgroundTaskConfig } from "../../config/schema"
+import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
+import { isInsideTmux } from "../../shared/tmux"

 import { subagentSessions } from "../claude-code-session-state"
 import { getTaskToastManager } from "../task-toast-manager"
@@ -54,6 +55,14 @@ interface QueueItem {
  input: LaunchInput
 }

+export interface SubagentSessionCreatedEvent {
+  sessionID: string
+  parentID: string
+  title: string
+}
+
+export type OnSubagentSessionCreated = (event: SubagentSessionCreatedEvent) => Promise<void>
+
 export class BackgroundManager {
  private static cleanupManagers = new Set<BackgroundManager>()
  private static cleanupRegistered = false
@@ -68,12 +77,20 @@ export class BackgroundManager {
  private concurrencyManager: ConcurrencyManager
  private shutdownTriggered = false
  private config?: BackgroundTaskConfig
-
+  private tmuxEnabled: boolean
+  private onSubagentSessionCreated?: OnSubagentSessionCreated

  private queuesByKey: Map<string, QueueItem[]> = new Map()
  private processingKeys: Set<string> = new Set()

-  constructor(ctx: PluginInput, config?: BackgroundTaskConfig) {
+  constructor(
+    ctx: PluginInput,
+    config?: BackgroundTaskConfig,
+    options?: {
+      tmuxConfig?: TmuxConfig
+      onSubagentSessionCreated?: OnSubagentSessionCreated
+    }
+  ) {
    this.tasks = new Map()
    this.notifications = new Map()
    this.pendingByParent = new Map()
@@ -81,6 +98,8 @@ export class BackgroundManager {
    this.directory = ctx.directory
    this.concurrencyManager = new ConcurrencyManager(config)
    this.config = config
+    this.tmuxEnabled = options?.tmuxConfig?.enabled ?? false
+    this.onSubagentSessionCreated = options?.onSubagentSessionCreated
    this.registerProcessCleanup()
  }

@@ -222,6 +241,29 @@ export class BackgroundManager {
    const sessionID = createResult.data.id
    subagentSessions.add(sessionID)

+    log("[background-agent] tmux callback check", {
+      hasCallback: !!this.onSubagentSessionCreated,
+      tmuxEnabled: this.tmuxEnabled,
+      isInsideTmux: isInsideTmux(),
+      sessionID,
+      parentID: input.parentSessionID,
+    })
+
+    if (this.onSubagentSessionCreated && this.tmuxEnabled && isInsideTmux()) {
+      log("[background-agent] Invoking tmux callback NOW", { sessionID })
+      await this.onSubagentSessionCreated({
+        sessionID,
+        parentID: input.parentSessionID,
+        title: input.description,
+      }).catch((err) => {
+        log("[background-agent] Failed to spawn tmux pane:", err)
+      })
+      log("[background-agent] tmux callback completed, waiting 200ms")
+      await new Promise(r => setTimeout(r, 200))
+    } else {
+      log("[background-agent] SKIP tmux callback - conditions not met")
+    }
+
    // Update task to running state
    task.status = "running"
    task.startedAt = new Date()
--- a/src/features/builtin-commands/commands.ts
+++ b/src/features/builtin-commands/commands.ts
@@ -55,7 +55,7 @@ ${REFACTOR_TEMPLATE}
  },
  "start-work": {
    description: "(builtin) Start Sisyphus work session from Prometheus plan",
-    agent: "Atlas",
+    agent: "atlas",
    template: `<command-instruction>
 ${START_WORK_TEMPLATE}
 </command-instruction>
@@ -81,7 +81,7 @@ export function loadBuiltinCommands(
  for (const [name, definition] of Object.entries(BUILTIN_COMMAND_DEFINITIONS)) {
    if (!disabled.has(name as BuiltinCommandName)) {
      const { argumentHint: _argumentHint, ...openCodeCompatible } = definition
-      commands[name] = openCodeCompatible as CommandDefinition
+      commands[name] = { ...openCodeCompatible, name } as CommandDefinition
    }
  }

--- a/src/features/builtin-commands/templates/ralph-loop.ts
+++ b/src/features/builtin-commands/templates/ralph-loop.ts
@@ -17,7 +17,7 @@ export const RALPH_LOOP_TEMPLATE = `You are starting a Ralph Loop - a self-refer

 ## Exit Conditions

-1. **Completion**: Output \`<promise>DONE</promise>\` (or custom promise text) when fully complete
+1. **Completion**: Output your completion promise tag when fully complete
 2. **Max Iterations**: Loop stops automatically at limit
 3. **Cancel**: User runs \`/cancel-ralph\` command

--- a/src/features/builtin-commands/templates/start-work.ts
+++ b/src/features/builtin-commands/templates/start-work.ts
@@ -25,7 +25,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
   }
   \`\`\`

-5. **Read the plan file** and start executing tasks according to Orchestrator Sisyphus workflow
+5. **Read the plan file** and start executing tasks according to atlas workflow

 ## OUTPUT FORMAT

@@ -69,4 +69,4 @@ Reading plan and beginning execution...
 - The session_id is injected by the hook - use it directly
 - Always update boulder.json BEFORE starting work
 - Read the FULL plan file before delegating any tasks
- Follow Orchestrator Sisyphus delegation protocols (7-section format)`
+- Follow atlas delegation protocols (7-section format)`
--- a/src/features/builtin-skills/agent-browser/SKILL.md
+++ b/src/features/builtin-skills/agent-browser/SKILL.md
@@ -0,0 +1,336 @@
+---
+name: agent-browser
+description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
+---
+
+# Browser Automation with agent-browser
+
+## Quick start
+
+```bash
+agent-browser open <url>        # Navigate to page
+agent-browser snapshot -i       # Get interactive elements with refs
+agent-browser click @e1         # Click element by ref
+agent-browser fill @e2 "text"   # Fill input by ref
+agent-browser close             # Close browser
+```
+
+## Core workflow
+
+1. Navigate: `agent-browser open <url>`
+2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
+3. Interact using refs from the snapshot
+4. Re-snapshot after navigation or significant DOM changes
+
+## Commands
+
+### Navigation
+```bash
+agent-browser open <url>      # Navigate to URL
+agent-browser back            # Go back
+agent-browser forward         # Go forward
+agent-browser reload          # Reload page
+agent-browser close           # Close browser
+```
+
+### Snapshot (page analysis)
+```bash
+agent-browser snapshot            # Full accessibility tree
+agent-browser snapshot -i         # Interactive elements only (recommended)
+agent-browser snapshot -c         # Compact output
+agent-browser snapshot -d 3       # Limit depth to 3
+agent-browser snapshot -s "#main" # Scope to CSS selector
+```
+
+### Interactions (use @refs from snapshot)
+```bash
+agent-browser click @e1           # Click
+agent-browser dblclick @e1        # Double-click
+agent-browser focus @e1           # Focus element
+agent-browser fill @e2 "text"     # Clear and type
+agent-browser type @e2 "text"     # Type without clearing
+agent-browser press Enter         # Press key
+agent-browser press Control+a     # Key combination
+agent-browser keydown Shift       # Hold key down
+agent-browser keyup Shift         # Release key
+agent-browser hover @e1           # Hover
+agent-browser check @e1           # Check checkbox
+agent-browser uncheck @e1         # Uncheck checkbox
+agent-browser select @e1 "value"  # Select dropdown
+agent-browser scroll down 500     # Scroll page
+agent-browser scrollintoview @e1  # Scroll element into view
+agent-browser drag @e1 @e2        # Drag and drop
+agent-browser upload @e1 file.pdf # Upload files
+```
+
+### Get information
+```bash
+agent-browser get text @e1        # Get element text
+agent-browser get html @e1        # Get innerHTML
+agent-browser get value @e1       # Get input value
+agent-browser get attr @e1 href   # Get attribute
+agent-browser get title           # Get page title
+agent-browser get url             # Get current URL
+agent-browser get count ".item"   # Count matching elements
+agent-browser get box @e1         # Get bounding box
+```
+
+### Check state
+```bash
+agent-browser is visible @e1      # Check if visible
+agent-browser is enabled @e1      # Check if enabled
+agent-browser is checked @e1      # Check if checked
+```
+
+### Screenshots & PDF
+```bash
+agent-browser screenshot          # Screenshot to stdout
+agent-browser screenshot path.png # Save to file
+agent-browser screenshot --full   # Full page
+agent-browser pdf output.pdf      # Save as PDF
+```
+
+### Video recording
+```bash
+agent-browser record start ./demo.webm    # Start recording (uses current URL + state)
+agent-browser click @e1                   # Perform actions
+agent-browser record stop                 # Stop and save video
+agent-browser record restart ./take2.webm # Stop current + start new recording
+```
+Recording creates a fresh context but preserves cookies/storage from your session.
+
+### Wait
+```bash
+agent-browser wait @e1                     # Wait for element
+agent-browser wait 2000                    # Wait milliseconds
+agent-browser wait --text "Success"        # Wait for text
+agent-browser wait --url "**/dashboard"    # Wait for URL pattern
+agent-browser wait --load networkidle      # Wait for network idle
+agent-browser wait --fn "window.ready"     # Wait for JS condition
+```
+
+### Mouse control
+```bash
+agent-browser mouse move 100 200      # Move mouse
+agent-browser mouse down left         # Press button
+agent-browser mouse up left           # Release button
+agent-browser mouse wheel 100         # Scroll wheel
+```
+
+### Semantic locators (alternative to refs)
+```bash
+agent-browser find role button click --name "Submit"
+agent-browser find text "Sign In" click
+agent-browser find label "Email" fill "user@test.com"
+agent-browser find first ".item" click
+agent-browser find nth 2 "a" text
+```
+
+### Browser settings
+```bash
+agent-browser set viewport 1920 1080      # Set viewport size
+agent-browser set device "iPhone 14"      # Emulate device
+agent-browser set geo 37.7749 -122.4194   # Set geolocation
+agent-browser set offline on              # Toggle offline mode
+agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
+agent-browser set credentials user pass   # HTTP basic auth
+agent-browser set media dark              # Emulate color scheme
+```
+
+### Cookies & Storage
+```bash
+agent-browser cookies                     # Get all cookies
+agent-browser cookies set name value      # Set cookie
+agent-browser cookies clear               # Clear cookies
+agent-browser storage local               # Get all localStorage
+agent-browser storage local key           # Get specific key
+agent-browser storage local set k v       # Set value
+agent-browser storage local clear         # Clear all
+agent-browser storage session             # Get all sessionStorage
+agent-browser storage session key         # Get specific key
+agent-browser storage session set k v     # Set value
+agent-browser storage session clear       # Clear all
+```
+
+### Network
+```bash
+agent-browser network route <url>              # Intercept requests
+agent-browser network route <url> --abort      # Block requests
+agent-browser network route <url> --body '{}'  # Mock response
+agent-browser network unroute [url]            # Remove routes
+agent-browser network requests                 # View tracked requests
+agent-browser network requests --filter api    # Filter requests
+```
+
+### Tabs & Windows
+```bash
+agent-browser tab                 # List tabs
+agent-browser tab new [url]       # New tab
+agent-browser tab 2               # Switch to tab
+agent-browser tab close           # Close tab
+agent-browser window new          # New window
+```
+
+### Frames
+```bash
+agent-browser frame "#iframe"     # Switch to iframe
+agent-browser frame main          # Back to main frame
+```
+
+### Dialogs
+```bash
+agent-browser dialog accept [text]  # Accept dialog
+agent-browser dialog dismiss        # Dismiss dialog
+```
+
+### JavaScript
+```bash
+agent-browser eval "document.title"   # Run JavaScript
+```
+
+## Global Options
+
+| Option | Description |
+|--------|-------------|
+| `--session <name>` | Isolated browser session (`AGENT_BROWSER_SESSION` env) |
+| `--profile <path>` | Persistent browser profile (`AGENT_BROWSER_PROFILE` env) |
+| `--headers <json>` | HTTP headers scoped to URL's origin |
+| `--executable-path <path>` | Custom browser binary (`AGENT_BROWSER_EXECUTABLE_PATH` env) |
+| `--args <args>` | Browser launch args (`AGENT_BROWSER_ARGS` env) |
+| `--user-agent <ua>` | Custom User-Agent (`AGENT_BROWSER_USER_AGENT` env) |
+| `--proxy <url>` | Proxy server (`AGENT_BROWSER_PROXY` env) |
+| `--proxy-bypass <hosts>` | Hosts to bypass proxy (`AGENT_BROWSER_PROXY_BYPASS` env) |
+| `-p, --provider <name>` | Cloud browser provider (`AGENT_BROWSER_PROVIDER` env) |
+| `--json` | Machine-readable JSON output |
+| `--headed` | Show browser window (not headless) |
+| `--cdp <port\|wss://url>` | Connect via Chrome DevTools Protocol |
+| `--debug` | Debug output |
+
+## Example: Form submission
+
+```bash
+agent-browser open https://example.com/form
+agent-browser snapshot -i
+# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
+
+agent-browser fill @e1 "user@example.com"
+agent-browser fill @e2 "password123"
+agent-browser click @e3
+agent-browser wait --load networkidle
+agent-browser snapshot -i  # Check result
+```
+
+## Example: Authentication with saved state
+
+```bash
+# Login once
+agent-browser open https://app.example.com/login
+agent-browser snapshot -i
+agent-browser fill @e1 "username"
+agent-browser fill @e2 "password"
+agent-browser click @e3
+agent-browser wait --url "**/dashboard"
+agent-browser state save auth.json
+
+# Later sessions: load saved state
+agent-browser state load auth.json
+agent-browser open https://app.example.com/dashboard
+```
+
+### Header-based Auth (Skip login flows)
+```bash
+# Headers scoped to api.example.com only
+agent-browser open api.example.com --headers '{"Authorization": "Bearer <token>"}'
+# Navigate to another domain - headers NOT sent (safe)
+agent-browser open other-site.com
+# Global headers (all domains)
+agent-browser set headers '{"X-Custom-Header": "value"}'
+```
+
+## Sessions & Persistent Profiles
+
+### Sessions (parallel browsers)
+```bash
+agent-browser --session test1 open site-a.com
+agent-browser --session test2 open site-b.com
+agent-browser session list
+```
+
+### Persistent Profiles
+Persists cookies, localStorage, IndexedDB, service workers, cache, login sessions across browser restarts.
+```bash
+agent-browser --profile ~/.myapp-profile open myapp.com
+# Or via env var
+AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
+```
+- Use different profile paths for different projects
+- Login once → restart browser → still logged in
+- Stores: cookies, localStorage, IndexedDB, service workers, browser cache
+
+## JSON output (for parsing)
+
+Add `--json` for machine-readable output:
+```bash
+agent-browser snapshot -i --json
+agent-browser get text @e1 --json
+```
+
+## Debugging
+
+```bash
+agent-browser open example.com --headed              # Show browser window
+agent-browser console                                # View console messages
+agent-browser errors                                 # View page errors
+agent-browser record start ./debug.webm              # Record from current page
+agent-browser record stop                            # Save recording
+agent-browser connect 9222                           # Local CDP port
+agent-browser --cdp "wss://browser-service.com/cdp?token=..." snapshot  # Remote via WebSocket
+agent-browser console --clear                        # Clear console
+agent-browser errors --clear                         # Clear errors
+agent-browser highlight @e1                          # Highlight element
+agent-browser trace start                            # Start recording trace
+agent-browser trace stop trace.zip                   # Stop and save trace
+```
+
+---
+
+## Installation
+
+### Step 1: Install agent-browser CLI
+
+```bash
+bun add -g agent-browser
+```
+
+### Step 2: Install Playwright browsers
+
+**IMPORTANT**: `agent-browser install` may fail on some platforms (e.g., darwin-arm64) with "No binary found" error. In that case, install Playwright browsers directly:
+
+```bash
+# Create a temp project and install playwright
+cd /tmp && bun init -y && bun add playwright
+
+# Install Chromium browser
+bun playwright install chromium
+```
+
+This downloads Chrome for Testing to `~/Library/Caches/ms-playwright/`.
+
+### Verify installation
+
+```bash
+agent-browser open https://example.com --headed
+```
+
+If the browser opens successfully, installation is complete.
+
+### Troubleshooting
+
+| Error | Solution |
+|-------|----------|
+| `No binary found for darwin-arm64` | Run `bun playwright install chromium` in a project with playwright dependency |
+| `Executable doesn't exist at .../chromium-XXXX` | Re-run `bun playwright install chromium` |
+| Browser doesn't open | Ensure `--headed` flag is used for visible browser |
+
+---
+Run `agent-browser --help` for all commands. Repo: https://github.com/vercel-labs/agent-browser
--- a/src/features/builtin-skills/dev-browser/SKILL.md
+++ b/src/features/builtin-skills/dev-browser/SKILL.md
@@ -0,0 +1,213 @@
+---
+name: dev-browser
+description: Browser automation with persistent page state. Use when users ask to navigate websites, fill forms, take screenshots, extract web data, test web apps, or automate browser workflows. Trigger phrases include "go to [url]", "click on", "fill out the form", "take a screenshot", "scrape", "automate", "test the website", "log into", or any browser interaction request.
+---
+
+# Dev Browser Skill
+
+Browser automation that maintains page state across script executions. Write small, focused scripts to accomplish tasks incrementally. Once you've proven out part of a workflow and there is repeated work to be done, you can write a script to do the repeated work in a single execution.
+
+## Choosing Your Approach
+
+- **Local/source-available sites**: Read the source code first to write selectors directly
+- **Unknown page layouts**: Use `getAISnapshot()` to discover elements and `selectSnapshotRef()` to interact with them
+- **Visual feedback**: Take screenshots to see what the user sees
+
+## Setup
+
+> **Installation**: See [references/installation.md](references/installation.md) for detailed setup instructions including Windows support.
+
+Two modes available. Ask the user if unclear which to use.
+
+### Standalone Mode (Default)
+
+Launches a new Chromium browser for fresh automation sessions.
+
+```bash
+./skills/dev-browser/server.sh &
+```
+
+Add `--headless` flag if user requests it. **Wait for the `Ready` message before running scripts.**
+
+### Extension Mode
+
+Connects to user's existing Chrome browser. Use this when:
+
+- The user is already logged into sites and wants you to do things behind an authed experience that isn't local dev.
+- The user asks you to use the extension
+
+**Important**: The core flow is still the same. You create named pages inside of their browser.
+
+**Start the relay server:**
+
+```bash
+cd skills/dev-browser && npm i && npm run start-extension &
+```
+
+Wait for `Waiting for extension to connect...` followed by `Extension connected` in the console. To know that a client has connected and the browser is ready to be controlled.
+**Workflow:**
+
+1. Scripts call `client.page("name")` just like the normal mode to create new pages / connect to existing ones.
+2. Automation runs on the user's actual browser session
+
+If the extension hasn't connected yet, tell the user to launch and activate it. Download link: https://github.com/SawyerHood/dev-browser/releases
+
+## Writing Scripts
+
+> **Run all scripts from `skills/dev-browser/` directory.** The `@/` import alias requires this directory's config.
+
+Execute scripts inline using heredocs:
+
+```bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect, waitForPageLoad } from "@/client.js";
+
+const client = await connect();
+// Create page with custom viewport size (optional)
+const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
+
+await page.goto("https://example.com");
+await waitForPageLoad(page);
+
+console.log({ title: await page.title(), url: page.url() });
+await client.disconnect();
+EOF
+```
+
+**Write to `tmp/` files only when** the script needs reuse, is complex, or user explicitly requests it.
+
+### Key Principles
+
+1. **Small scripts**: Each script does ONE thing (navigate, click, fill, check)
+2. **Evaluate state**: Log/return state at the end to decide next steps
+3. **Descriptive page names**: Use `"checkout"`, `"login"`, not `"main"`
+4. **Disconnect to exit**: `await client.disconnect()` - pages persist on server
+5. **Plain JS in evaluate**: `page.evaluate()` runs in browser - no TypeScript syntax
+
+## Workflow Loop
+
+Follow this pattern for complex tasks:
+
+1. **Write a script** to perform one action
+2. **Run it** and observe the output
+3. **Evaluate** - did it work? What's the current state?
+4. **Decide** - is the task complete or do we need another script?
+5. **Repeat** until task is done
+
+### No TypeScript in Browser Context
+
+Code passed to `page.evaluate()` runs in the browser, which doesn't understand TypeScript:
+
+```typescript
+// ✅ Correct: plain JavaScript
+const text = await page.evaluate(() => {
+  return document.body.innerText;
+});
+
+// ❌ Wrong: TypeScript syntax will fail at runtime
+const text = await page.evaluate(() => {
+  const el: HTMLElement = document.body; // Type annotation breaks in browser!
+  return el.innerText;
+});
+```
+
+## Scraping Data
+
+For scraping large datasets, intercept and replay network requests rather than scrolling the DOM. See [references/scraping.md](references/scraping.md) for the complete guide covering request capture, schema discovery, and paginated API replay.
+
+## Client API
+
+```typescript
+const client = await connect();
+
+// Get or create named page (viewport only applies to new pages)
+const page = await client.page("name");
+const pageWithSize = await client.page("name", { viewport: { width: 1920, height: 1080 } });
+
+const pages = await client.list(); // List all page names
+await client.close("name"); // Close a page
+await client.disconnect(); // Disconnect (pages persist)
+
+// ARIA Snapshot methods
+const snapshot = await client.getAISnapshot("name"); // Get accessibility tree
+const element = await client.selectSnapshotRef("name", "e5"); // Get element by ref
+```
+
+The `page` object is a standard Playwright Page.
+
+## Waiting
+
+```typescript
+import { waitForPageLoad } from "@/client.js";
+
+await waitForPageLoad(page); // After navigation
+await page.waitForSelector(".results"); // For specific elements
+await page.waitForURL("**/success"); // For specific URL
+```
+
+## Inspecting Page State
+
+### Screenshots
+
+```typescript
+await page.screenshot({ path: "tmp/screenshot.png" });
+await page.screenshot({ path: "tmp/full.png", fullPage: true });
+```
+
+### ARIA Snapshot (Element Discovery)
+
+Use `getAISnapshot()` to discover page elements. Returns YAML-formatted accessibility tree:
+
+```yaml
+- banner:
+  - link "Hacker News" [ref=e1]
+  - navigation:
+    - link "new" [ref=e2]
+- main:
+  - list:
+    - listitem:
+      - link "Article Title" [ref=e8]
+      - link "328 comments" [ref=e9]
+- contentinfo:
+  - textbox [ref=e10]
+    - /placeholder: "Search"
+```
+
+**Interpreting refs:**
+
+- `[ref=eN]` - Element reference for interaction (visible, clickable elements only)
+- `[checked]`, `[disabled]`, `[expanded]` - Element states
+- `[level=N]` - Heading level
+- `/url:`, `/placeholder:` - Element properties
+
+**Interacting with refs:**
+
+```typescript
+const snapshot = await client.getAISnapshot("hackernews");
+console.log(snapshot); // Find the ref you need
+
+const element = await client.selectSnapshotRef("hackernews", "e2");
+await element.click();
+```
+
+## Error Recovery
+
+Page state persists after failures. Debug with:
+
+```bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("hackernews");
+
+await page.screenshot({ path: "tmp/debug.png" });
+console.log({
+  url: page.url(),
+  title: await page.title(),
+  bodyText: await page.textContent("body").then((t) => t?.slice(0, 200)),
+});
+
+await client.disconnect();
+EOF
+```
--- a/src/features/builtin-skills/dev-browser/references/installation.md
+++ b/src/features/builtin-skills/dev-browser/references/installation.md
@@ -0,0 +1,193 @@
+# Dev Browser Installation Guide
+
+This guide covers installation for all platforms: macOS, Linux, and Windows.
+
+## Prerequisites
+
+- [Node.js](https://nodejs.org) v18 or later with npm
+- Git (for cloning the skill)
+
+## Installation
+
+### Step 1: Clone the Skill
+
+```bash
+# Clone dev-browser to a temporary location
+git clone https://github.com/sawyerhood/dev-browser /tmp/dev-browser-skill
+
+# Copy to skills directory (adjust path as needed)
+# For oh-my-opencode: already bundled
+# For manual installation:
+mkdir -p ~/.config/opencode/skills
+cp -r /tmp/dev-browser-skill/skills/dev-browser ~/.config/opencode/skills/dev-browser
+
+# Cleanup
+rm -rf /tmp/dev-browser-skill
+```
+
+**Windows (PowerShell):**
+```powershell
+# Clone dev-browser to temp location
+git clone https://github.com/sawyerhood/dev-browser $env:TEMP\dev-browser-skill
+
+# Copy to skills directory
+New-Item -ItemType Directory -Force -Path "$env:USERPROFILE\.config\opencode\skills"
+Copy-Item -Recurse "$env:TEMP\dev-browser-skill\skills\dev-browser" "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+
+# Cleanup
+Remove-Item -Recurse -Force "$env:TEMP\dev-browser-skill"
+```
+
+### Step 2: Install Dependencies
+
+```bash
+cd ~/.config/opencode/skills/dev-browser
+npm install
+```
+
+**Windows (PowerShell):**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+npm install
+```
+
+### Step 3: Start the Server
+
+#### Standalone Mode (New Browser Instance)
+
+**macOS/Linux:**
+```bash
+cd ~/.config/opencode/skills/dev-browser
+./server.sh &
+# Or for headless:
+./server.sh --headless &
+```
+
+**Windows (PowerShell):**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+Start-Process -NoNewWindow -FilePath "node" -ArgumentList "server.js"
+# Or for headless:
+Start-Process -NoNewWindow -FilePath "node" -ArgumentList "server.js", "--headless"
+```
+
+**Windows (CMD):**
+```cmd
+cd %USERPROFILE%\.config\opencode\skills\dev-browser
+start /B node server.js
+```
+
+Wait for the `Ready` message before running scripts.
+
+#### Extension Mode (Use Existing Chrome)
+
+**macOS/Linux:**
+```bash
+cd ~/.config/opencode/skills/dev-browser
+npm run start-extension &
+```
+
+**Windows (PowerShell):**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+Start-Process -NoNewWindow -FilePath "npm" -ArgumentList "run", "start-extension"
+```
+
+Wait for `Extension connected` message.
+
+## Chrome Extension Setup (Optional)
+
+The Chrome extension allows controlling your existing Chrome browser with all your logged-in sessions.
+
+### Installation
+
+1. Download `extension.zip` from [latest release](https://github.com/sawyerhood/dev-browser/releases/latest)
+2. Extract to a permanent location:
+   - **macOS/Linux:** `~/.dev-browser-extension`
+   - **Windows:** `%USERPROFILE%\.dev-browser-extension`
+3. Open Chrome → `chrome://extensions`
+4. Enable "Developer mode" (toggle in top right)
+5. Click "Load unpacked" → select the extracted folder
+
+### Usage
+
+1. Click the Dev Browser extension icon in Chrome toolbar
+2. Toggle to "Active"
+3. Start the extension relay server (see above)
+4. Use dev-browser scripts - they'll control your existing Chrome
+
+## Troubleshooting
+
+### Server Won't Start
+
+**Check Node.js version:**
+```bash
+node --version  # Should be v18+
+```
+
+**Check port availability:**
+```bash
+# macOS/Linux
+lsof -i :3000
+
+# Windows
+netstat -ano | findstr :3000
+```
+
+### Playwright Installation Issues
+
+If Chromium fails to install:
+```bash
+npx playwright install chromium
+```
+
+### Windows-Specific Issues
+
+**Execution Policy:**
+If PowerShell scripts are blocked:
+```powershell
+Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
+```
+
+**Path Issues:**
+Use forward slashes or escaped backslashes in paths:
+```powershell
+# Good
+cd "$env:USERPROFILE/.config/opencode/skills/dev-browser"
+# Also good
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+```
+
+### Extension Not Connecting
+
+1. Ensure extension is "Active" (click icon to toggle)
+2. Check relay server is running (`npm run start-extension`)
+3. Look for `Extension connected` message in console
+4. Try reloading the extension in `chrome://extensions`
+
+## Permissions
+
+To skip permission prompts in Claude Code, add to `~/.claude/settings.json`:
+
+```json
+{
+  "permissions": {
+    "allow": ["Skill(dev-browser:dev-browser)", "Bash(npx tsx:*)"]
+  }
+}
+```
+
+## Updating
+
+```bash
+cd ~/.config/opencode/skills/dev-browser
+git pull
+npm install
+```
+
+**Windows:**
+```powershell
+cd "$env:USERPROFILE\.config\opencode\skills\dev-browser"
+git pull
+npm install
+```
--- a/src/features/builtin-skills/dev-browser/references/scraping.md
+++ b/src/features/builtin-skills/dev-browser/references/scraping.md
@@ -0,0 +1,155 @@
+# Data Scraping Guide
+
+For large datasets (followers, posts, search results), **intercept and replay network requests** rather than scrolling and parsing the DOM. This is faster, more reliable, and handles pagination automatically.
+
+## Why Not Scroll?
+
+Scrolling is slow, unreliable, and wastes time. APIs return structured data with pagination built in. Always prefer API replay.
+
+## Start Small, Then Scale
+
+**Don't try to automate everything at once.** Work incrementally:
+
+1. **Capture one request** - verify you're intercepting the right endpoint
+2. **Inspect one response** - understand the schema before writing extraction code
+3. **Extract a few items** - make sure your parsing logic works
+4. **Then scale up** - add pagination loop only after the basics work
+
+This prevents wasting time debugging a complex script when the issue is a simple path like `data.user.timeline` vs `data.user.result.timeline`.
+
+## Step-by-Step Workflow
+
+### 1. Capture Request Details
+
+First, intercept a request to understand URL structure and required headers:
+
+```typescript
+import { connect, waitForPageLoad } from "@/client.js";
+import * as fs from "node:fs";
+
+const client = await connect();
+const page = await client.page("site");
+
+let capturedRequest = null;
+page.on("request", (request) => {
+  const url = request.url();
+  // Look for API endpoints (adjust pattern for your target site)
+  if (url.includes("/api/") || url.includes("/graphql/")) {
+    capturedRequest = {
+      url: url,
+      headers: request.headers(),
+      method: request.method(),
+    };
+    fs.writeFileSync("tmp/request-details.json", JSON.stringify(capturedRequest, null, 2));
+    console.log("Captured request:", url.substring(0, 80) + "...");
+  }
+});
+
+await page.goto("https://example.com/profile");
+await waitForPageLoad(page);
+await page.waitForTimeout(3000);
+
+await client.disconnect();
+```
+
+### 2. Capture Response to Understand Schema
+
+Save a raw response to inspect the data structure:
+
+```typescript
+page.on("response", async (response) => {
+  const url = response.url();
+  if (url.includes("UserTweets") || url.includes("/api/data")) {
+    const json = await response.json();
+    fs.writeFileSync("tmp/api-response.json", JSON.stringify(json, null, 2));
+    console.log("Captured response");
+  }
+});
+```
+
+Then analyze the structure to find:
+
+- Where the data array lives (e.g., `data.user.result.timeline.instructions[].entries`)
+- Where pagination cursors are (e.g., `cursor-bottom` entries)
+- What fields you need to extract
+
+### 3. Replay API with Pagination
+
+Once you understand the schema, replay requests directly:
+
+```typescript
+import { connect } from "@/client.js";
+import * as fs from "node:fs";
+
+const client = await connect();
+const page = await client.page("site");
+
+const results = new Map(); // Use Map for deduplication
+const headers = JSON.parse(fs.readFileSync("tmp/request-details.json", "utf8")).headers;
+const baseUrl = "https://example.com/api/data";
+
+let cursor = null;
+let hasMore = true;
+
+while (hasMore) {
+  // Build URL with pagination cursor
+  const params = { count: 20 };
+  if (cursor) params.cursor = cursor;
+  const url = `${baseUrl}?params=${encodeURIComponent(JSON.stringify(params))}`;
+
+  // Execute fetch in browser context (has auth cookies/headers)
+  const response = await page.evaluate(
+    async ({ url, headers }) => {
+      const res = await fetch(url, { headers });
+      return res.json();
+    },
+    { url, headers }
+  );
+
+  // Extract data and cursor (adjust paths for your API)
+  const entries = response?.data?.entries || [];
+  for (const entry of entries) {
+    if (entry.type === "cursor-bottom") {
+      cursor = entry.value;
+    } else if (entry.id && !results.has(entry.id)) {
+      results.set(entry.id, {
+        id: entry.id,
+        text: entry.content,
+        timestamp: entry.created_at,
+      });
+    }
+  }
+
+  console.log(`Fetched page, total: ${results.size}`);
+
+  // Check stop conditions
+  if (!cursor || entries.length === 0) hasMore = false;
+
+  // Rate limiting - be respectful
+  await new Promise((r) => setTimeout(r, 500));
+}
+
+// Export results
+const data = Array.from(results.values());
+fs.writeFileSync("tmp/results.json", JSON.stringify(data, null, 2));
+console.log(`Saved ${data.length} items`);
+
+await client.disconnect();
+```
+
+## Key Patterns
+
+| Pattern                 | Description                                            |
+| ----------------------- | ------------------------------------------------------ |
+| `page.on('request')`    | Capture outgoing request URL + headers                 |
+| `page.on('response')`   | Capture response data to understand schema             |
+| `page.evaluate(fetch)`  | Replay requests in browser context (inherits auth)     |
+| `Map` for deduplication | APIs often return overlapping data across pages        |
+| Cursor-based pagination | Look for `cursor`, `next_token`, `offset` in responses |
+
+## Tips
+
+- **Extension mode**: `page.context().cookies()` doesn't work - capture auth headers from intercepted requests instead
+- **Rate limiting**: Add 500ms+ delays between requests to avoid blocks
+- **Stop conditions**: Check for empty results, missing cursor, or reaching a date/ID threshold
+- **GraphQL APIs**: URL params often include `variables` and `features` JSON objects - capture and reuse them
--- a/src/features/builtin-skills/git-master/SKILL.md
+++ b/src/features/builtin-skills/git-master/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: git-master
-description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
+description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
 ---

 # Git Master Agent
--- a/src/features/builtin-skills/index.ts
+++ b/src/features/builtin-skills/index.ts
@@ -1,2 +1,2 @@
 export * from "./types"
-export { createBuiltinSkills } from "./skills"
+export { createBuiltinSkills, type CreateBuiltinSkillsOptions } from "./skills"
--- a/src/features/builtin-skills/skills.test.ts
+++ b/src/features/builtin-skills/skills.test.ts
@@ -0,0 +1,89 @@
+import { describe, test, expect } from "bun:test"
+import { createBuiltinSkills } from "./skills"
+
+describe("createBuiltinSkills", () => {
+	test("returns playwright skill by default", () => {
+		// #given - no options (default)
+
+		// #when
+		const skills = createBuiltinSkills()
+
+		// #then
+		const browserSkill = skills.find((s) => s.name === "playwright")
+		expect(browserSkill).toBeDefined()
+		expect(browserSkill!.description).toContain("browser")
+		expect(browserSkill!.mcpConfig).toHaveProperty("playwright")
+	})
+
+	test("returns playwright skill when browserProvider is 'playwright'", () => {
+		// #given
+		const options = { browserProvider: "playwright" as const }
+
+		// #when
+		const skills = createBuiltinSkills(options)
+
+		// #then
+		const playwrightSkill = skills.find((s) => s.name === "playwright")
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+		expect(playwrightSkill).toBeDefined()
+		expect(agentBrowserSkill).toBeUndefined()
+	})
+
+	test("returns agent-browser skill when browserProvider is 'agent-browser'", () => {
+		// #given
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when
+		const skills = createBuiltinSkills(options)
+
+		// #then
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+		const playwrightSkill = skills.find((s) => s.name === "playwright")
+		expect(agentBrowserSkill).toBeDefined()
+		expect(agentBrowserSkill!.description).toContain("browser")
+		expect(agentBrowserSkill!.allowedTools).toContain("Bash(agent-browser:*)")
+		expect(agentBrowserSkill!.template).toContain("agent-browser")
+		expect(playwrightSkill).toBeUndefined()
+	})
+
+	test("agent-browser skill template is inlined (not loaded from file)", () => {
+		// #given
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when
+		const skills = createBuiltinSkills(options)
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+
+		// #then - template should contain substantial content (inlined, not fallback)
+		expect(agentBrowserSkill!.template).toContain("## Quick start")
+		expect(agentBrowserSkill!.template).toContain("## Commands")
+		expect(agentBrowserSkill!.template).toContain("agent-browser open")
+		expect(agentBrowserSkill!.template).toContain("agent-browser snapshot")
+	})
+
+	test("always includes frontend-ui-ux and git-master skills", () => {
+		// #given - both provider options
+
+		// #when
+		const defaultSkills = createBuiltinSkills()
+		const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
+
+		// #then
+		for (const skills of [defaultSkills, agentBrowserSkills]) {
+			expect(skills.find((s) => s.name === "frontend-ui-ux")).toBeDefined()
+			expect(skills.find((s) => s.name === "git-master")).toBeDefined()
+		}
+	})
+
+	test("returns exactly 4 skills regardless of provider", () => {
+		// #given
+
+		// #when
+		const defaultSkills = createBuiltinSkills()
+		const agentBrowserSkills = createBuiltinSkills({ browserProvider: "agent-browser" })
+
+		// #then
+		expect(defaultSkills).toHaveLength(4)
+		expect(agentBrowserSkills).toHaveLength(4)
+	})
+})
--- a/src/features/builtin-skills/skills.ts
+++ b/src/features/builtin-skills/skills.ts
@@ -1,4 +1,5 @@
 import type { BuiltinSkill } from "./types"
+import type { BrowserAutomationProvider } from "../../config/schema"

 const playwrightSkill: BuiltinSkill = {
  name: "playwright",
@@ -14,6 +15,303 @@ This skill provides browser automation capabilities via the Playwright MCP serve
  },
 }

+const agentBrowserSkill: BuiltinSkill = {
+  name: "agent-browser",
+  description: "MUST USE for any browser-related tasks. Browser automation via agent-browser CLI - verification, browsing, information gathering, web scraping, testing, screenshots, and all browser interactions.",
+  template: `# Browser Automation with agent-browser
+
+## Quick start
+
+\`\`\`bash
+agent-browser open <url>        # Navigate to page
+agent-browser snapshot -i       # Get interactive elements with refs
+agent-browser click @e1         # Click element by ref
+agent-browser fill @e2 "text"   # Fill input by ref
+agent-browser close             # Close browser
+\`\`\`
+
+## Core workflow
+
+1. Navigate: \`agent-browser open <url>\`
+2. Snapshot: \`agent-browser snapshot -i\` (returns elements with refs like \`@e1\`, \`@e2\`)
+3. Interact using refs from the snapshot
+4. Re-snapshot after navigation or significant DOM changes
+
+## Commands
+
+### Navigation
+\`\`\`bash
+agent-browser open <url>      # Navigate to URL
+agent-browser back            # Go back
+agent-browser forward         # Go forward
+agent-browser reload          # Reload page
+agent-browser close           # Close browser
+\`\`\`
+
+### Snapshot (page analysis)
+\`\`\`bash
+agent-browser snapshot            # Full accessibility tree
+agent-browser snapshot -i         # Interactive elements only (recommended)
+agent-browser snapshot -c         # Compact output
+agent-browser snapshot -d 3       # Limit depth to 3
+agent-browser snapshot -s "#main" # Scope to CSS selector
+\`\`\`
+
+### Interactions (use @refs from snapshot)
+\`\`\`bash
+agent-browser click @e1           # Click
+agent-browser dblclick @e1        # Double-click
+agent-browser focus @e1           # Focus element
+agent-browser fill @e2 "text"     # Clear and type
+agent-browser type @e2 "text"     # Type without clearing
+agent-browser press Enter         # Press key
+agent-browser press Control+a     # Key combination
+agent-browser keydown Shift       # Hold key down
+agent-browser keyup Shift         # Release key
+agent-browser hover @e1           # Hover
+agent-browser check @e1           # Check checkbox
+agent-browser uncheck @e1         # Uncheck checkbox
+agent-browser select @e1 "value"  # Select dropdown
+agent-browser scroll down 500     # Scroll page
+agent-browser scrollintoview @e1  # Scroll element into view
+agent-browser drag @e1 @e2        # Drag and drop
+agent-browser upload @e1 file.pdf # Upload files
+\`\`\`
+
+### Get information
+\`\`\`bash
+agent-browser get text @e1        # Get element text
+agent-browser get html @e1        # Get innerHTML
+agent-browser get value @e1       # Get input value
+agent-browser get attr @e1 href   # Get attribute
+agent-browser get title           # Get page title
+agent-browser get url             # Get current URL
+agent-browser get count ".item"   # Count matching elements
+agent-browser get box @e1         # Get bounding box
+\`\`\`
+
+### Check state
+\`\`\`bash
+agent-browser is visible @e1      # Check if visible
+agent-browser is enabled @e1      # Check if enabled
+agent-browser is checked @e1      # Check if checked
+\`\`\`
+
+### Screenshots & PDF
+\`\`\`bash
+agent-browser screenshot          # Screenshot to stdout
+agent-browser screenshot path.png # Save to file
+agent-browser screenshot --full   # Full page
+agent-browser pdf output.pdf      # Save as PDF
+\`\`\`
+
+### Video recording
+\`\`\`bash
+agent-browser record start ./demo.webm    # Start recording (uses current URL + state)
+agent-browser click @e1                   # Perform actions
+agent-browser record stop                 # Stop and save video
+agent-browser record restart ./take2.webm # Stop current + start new recording
+\`\`\`
+Recording creates a fresh context but preserves cookies/storage from your session.
+
+### Wait
+\`\`\`bash
+agent-browser wait @e1                     # Wait for element
+agent-browser wait 2000                    # Wait milliseconds
+agent-browser wait --text "Success"        # Wait for text
+agent-browser wait --url "**/dashboard"    # Wait for URL pattern
+agent-browser wait --load networkidle      # Wait for network idle
+agent-browser wait --fn "window.ready"     # Wait for JS condition
+\`\`\`
+
+### Mouse control
+\`\`\`bash
+agent-browser mouse move 100 200      # Move mouse
+agent-browser mouse down left         # Press button
+agent-browser mouse up left           # Release button
+agent-browser mouse wheel 100         # Scroll wheel
+\`\`\`
+
+### Semantic locators (alternative to refs)
+\`\`\`bash
+agent-browser find role button click --name "Submit"
+agent-browser find text "Sign In" click
+agent-browser find label "Email" fill "user@test.com"
+agent-browser find first ".item" click
+agent-browser find nth 2 "a" text
+\`\`\`
+
+### Browser settings
+\`\`\`bash
+agent-browser set viewport 1920 1080      # Set viewport size
+agent-browser set device "iPhone 14"      # Emulate device
+agent-browser set geo 37.7749 -122.4194   # Set geolocation
+agent-browser set offline on              # Toggle offline mode
+agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
+agent-browser set credentials user pass   # HTTP basic auth
+agent-browser set media dark              # Emulate color scheme
+\`\`\`
+
+### Cookies & Storage
+\`\`\`bash
+agent-browser cookies                     # Get all cookies
+agent-browser cookies set name value      # Set cookie
+agent-browser cookies clear               # Clear cookies
+agent-browser storage local               # Get all localStorage
+agent-browser storage local key           # Get specific key
+agent-browser storage local set k v       # Set value
+agent-browser storage local clear         # Clear all
+agent-browser storage session             # Get all sessionStorage
+agent-browser storage session key         # Get specific key
+agent-browser storage session set k v     # Set value
+agent-browser storage session clear       # Clear all
+\`\`\`
+
+### Network
+\`\`\`bash
+agent-browser network route <url>              # Intercept requests
+agent-browser network route <url> --abort      # Block requests
+agent-browser network route <url> --body '{}'  # Mock response
+agent-browser network unroute [url]            # Remove routes
+agent-browser network requests                 # View tracked requests
+agent-browser network requests --filter api    # Filter requests
+\`\`\`
+
+### Tabs & Windows
+\`\`\`bash
+agent-browser tab                 # List tabs
+agent-browser tab new [url]       # New tab
+agent-browser tab 2               # Switch to tab
+agent-browser tab close           # Close tab
+agent-browser window new          # New window
+\`\`\`
+
+### Frames
+\`\`\`bash
+agent-browser frame "#iframe"     # Switch to iframe
+agent-browser frame main          # Back to main frame
+\`\`\`
+
+### Dialogs
+\`\`\`bash
+agent-browser dialog accept [text]  # Accept dialog
+agent-browser dialog dismiss        # Dismiss dialog
+\`\`\`
+
+### JavaScript
+\`\`\`bash
+agent-browser eval "document.title"   # Run JavaScript
+\`\`\`
+
+## Global Options
+
+| Option | Description |
+|--------|-------------|
+| \`--session <name>\` | Isolated browser session (\`AGENT_BROWSER_SESSION\` env) |
+| \`--profile <path>\` | Persistent browser profile (\`AGENT_BROWSER_PROFILE\` env) |
+| \`--headers <json>\` | HTTP headers scoped to URL's origin |
+| \`--executable-path <path>\` | Custom browser binary (\`AGENT_BROWSER_EXECUTABLE_PATH\` env) |
+| \`--args <args>\` | Browser launch args (\`AGENT_BROWSER_ARGS\` env) |
+| \`--user-agent <ua>\` | Custom User-Agent (\`AGENT_BROWSER_USER_AGENT\` env) |
+| \`--proxy <url>\` | Proxy server (\`AGENT_BROWSER_PROXY\` env) |
+| \`--proxy-bypass <hosts>\` | Hosts to bypass proxy (\`AGENT_BROWSER_PROXY_BYPASS\` env) |
+| \`-p, --provider <name>\` | Cloud browser provider (\`AGENT_BROWSER_PROVIDER\` env) |
+| \`--json\` | Machine-readable JSON output |
+| \`--headed\` | Show browser window (not headless) |
+| \`--cdp <port\\|wss://url>\` | Connect via Chrome DevTools Protocol |
+| \`--debug\` | Debug output |
+
+## Example: Form submission
+
+\`\`\`bash
+agent-browser open https://example.com/form
+agent-browser snapshot -i
+# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
+
+agent-browser fill @e1 "user@example.com"
+agent-browser fill @e2 "password123"
+agent-browser click @e3
+agent-browser wait --load networkidle
+agent-browser snapshot -i  # Check result
+\`\`\`
+
+## Example: Authentication with saved state
+
+\`\`\`bash
+# Login once
+agent-browser open https://app.example.com/login
+agent-browser snapshot -i
+agent-browser fill @e1 "username"
+agent-browser fill @e2 "password"
+agent-browser click @e3
+agent-browser wait --url "**/dashboard"
+agent-browser state save auth.json
+
+# Later sessions: load saved state
+agent-browser state load auth.json
+agent-browser open https://app.example.com/dashboard
+\`\`\`
+
+### Header-based Auth (Skip login flows)
+\`\`\`bash
+# Headers scoped to api.example.com only
+agent-browser open api.example.com --headers '{"Authorization": "Bearer <token>"}'
+# Navigate to another domain - headers NOT sent (safe)
+agent-browser open other-site.com
+# Global headers (all domains)
+agent-browser set headers '{"X-Custom-Header": "value"}'
+\`\`\`
+
+## Sessions & Persistent Profiles
+
+### Sessions (parallel browsers)
+\`\`\`bash
+agent-browser --session test1 open site-a.com
+agent-browser --session test2 open site-b.com
+agent-browser session list
+\`\`\`
+
+### Persistent Profiles
+Persists cookies, localStorage, IndexedDB, service workers, cache, login sessions across browser restarts.
+\`\`\`bash
+agent-browser --profile ~/.myapp-profile open myapp.com
+# Or via env var
+AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
+\`\`\`
+- Use different profile paths for different projects
+- Login once → restart browser → still logged in
+- Stores: cookies, localStorage, IndexedDB, service workers, browser cache
+
+## JSON output (for parsing)
+
+Add \`--json\` for machine-readable output:
+\`\`\`bash
+agent-browser snapshot -i --json
+agent-browser get text @e1 --json
+\`\`\`
+
+## Debugging
+
+\`\`\`bash
+agent-browser open example.com --headed              # Show browser window
+agent-browser console                                # View console messages
+agent-browser errors                                 # View page errors
+agent-browser record start ./debug.webm              # Record from current page
+agent-browser record stop                            # Save recording
+agent-browser connect 9222                           # Local CDP port
+agent-browser --cdp "wss://browser-service.com/cdp?token=..." snapshot  # Remote via WebSocket
+agent-browser console --clear                        # Clear console
+agent-browser errors --clear                         # Clear errors
+agent-browser highlight @e1                          # Highlight element
+agent-browser trace start                            # Start recording trace
+agent-browser trace stop trace.zip                   # Stop and save trace
+\`\`\`
+
+---
+Install: \`bun add -g agent-browser && agent-browser install\`. Run \`agent-browser --help\` for all commands. Repo: https://github.com/vercel-labs/agent-browser`,
+  allowedTools: ["Bash(agent-browser:*)"],
+}
+
 const frontendUiUxSkill: BuiltinSkill = {
  name: "frontend-ui-ux",
  description: "Designer-turned-developer who crafts stunning UI/UX even without design mockups",
@@ -95,7 +393,7 @@ Interpret creatively and make unexpected choices that feel genuinely designed fo
 const gitMasterSkill: BuiltinSkill = {
  name: "git-master",
  description:
-    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
+    "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
  template: `# Git Master Agent

 You are a Git expert combining three specializations:
@@ -1198,6 +1496,234 @@ POTENTIAL ACTIONS:
 - Bisect without proper good/bad boundaries -> Wasted time`,
 }

-export function createBuiltinSkills(): BuiltinSkill[] {
-  return [playwrightSkill, frontendUiUxSkill, gitMasterSkill]
+const devBrowserSkill: BuiltinSkill = {
+  name: "dev-browser",
+  description:
+    "Browser automation with persistent page state. Use when users ask to navigate websites, fill forms, take screenshots, extract web data, test web apps, or automate browser workflows. Trigger phrases include 'go to [url]', 'click on', 'fill out the form', 'take a screenshot', 'scrape', 'automate', 'test the website', 'log into', or any browser interaction request.",
+  template: `# Dev Browser Skill
+
+Browser automation that maintains page state across script executions. Write small, focused scripts to accomplish tasks incrementally. Once you've proven out part of a workflow and there is repeated work to be done, you can write a script to do the repeated work in a single execution.
+
+## Choosing Your Approach
+
+- **Local/source-available sites**: Read the source code first to write selectors directly
+- **Unknown page layouts**: Use \`getAISnapshot()\` to discover elements and \`selectSnapshotRef()\` to interact with them
+- **Visual feedback**: Take screenshots to see what the user sees
+
+## Setup
+
+**IMPORTANT**: Before using this skill, ensure the server is running. See [references/installation.md](references/installation.md) for platform-specific setup instructions (macOS, Linux, Windows).
+
+Two modes available. Ask the user if unclear which to use.
+
+### Standalone Mode (Default)
+
+Launches a new Chromium browser for fresh automation sessions.
+
+**macOS/Linux:**
+\`\`\`bash
+./skills/dev-browser/server.sh &
+\`\`\`
+
+**Windows (PowerShell):**
+\`\`\`powershell
+Start-Process -NoNewWindow -FilePath "node" -ArgumentList "skills/dev-browser/server.js"
+\`\`\`
+
+Add \`--headless\` flag if user requests it. **Wait for the \`Ready\` message before running scripts.**
+
+### Extension Mode
+
+Connects to user's existing Chrome browser. Use this when:
+
+- The user is already logged into sites and wants you to do things behind an authed experience that isn't local dev.
+- The user asks you to use the extension
+
+**Important**: The core flow is still the same. You create named pages inside of their browser.
+
+**Start the relay server:**
+
+**macOS/Linux:**
+\`\`\`bash
+cd skills/dev-browser && npm i && npm run start-extension &
+\`\`\`
+
+**Windows (PowerShell):**
+\`\`\`powershell
+cd skills/dev-browser; npm i; Start-Process -NoNewWindow -FilePath "npm" -ArgumentList "run", "start-extension"
+\`\`\`
+
+Wait for \`Waiting for extension to connect...\` followed by \`Extension connected\` in the console.
+
+If the extension hasn't connected yet, tell the user to launch and activate it. Download link: https://github.com/SawyerHood/dev-browser/releases
+
+## Writing Scripts
+
+> **Run all scripts from \`skills/dev-browser/\` directory.** The \`@/\` import alias requires this directory's config.
+
+Execute scripts inline using heredocs:
+
+**macOS/Linux:**
+\`\`\`bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect, waitForPageLoad } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
+
+await page.goto("https://example.com");
+await waitForPageLoad(page);
+
+console.log({ title: await page.title(), url: page.url() });
+await client.disconnect();
+EOF
+\`\`\`
+
+**Windows (PowerShell):**
+\`\`\`powershell
+cd skills/dev-browser
+@"
+import { connect, waitForPageLoad } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("example", { viewport: { width: 1920, height: 1080 } });
+
+await page.goto("https://example.com");
+await waitForPageLoad(page);
+
+console.log({ title: await page.title(), url: page.url() });
+await client.disconnect();
+"@ | npx tsx --input-type=module
+\`\`\`
+
+### Key Principles
+
+1. **Small scripts**: Each script does ONE thing (navigate, click, fill, check)
+2. **Evaluate state**: Log/return state at the end to decide next steps
+3. **Descriptive page names**: Use \`"checkout"\`, \`"login"\`, not \`"main"\`
+4. **Disconnect to exit**: \`await client.disconnect()\` - pages persist on server
+5. **Plain JS in evaluate**: \`page.evaluate()\` runs in browser - no TypeScript syntax
+
+## Workflow Loop
+
+1. **Write a script** to perform one action
+2. **Run it** and observe the output
+3. **Evaluate** - did it work? What's the current state?
+4. **Decide** - is the task complete or do we need another script?
+5. **Repeat** until task is done
+
+### No TypeScript in Browser Context
+
+Code passed to \`page.evaluate()\` runs in the browser, which doesn't understand TypeScript:
+
+\`\`\`typescript
+// Correct: plain JavaScript
+const text = await page.evaluate(() => {
+  return document.body.innerText;
+});
+
+// Wrong: TypeScript syntax will fail at runtime
+const text = await page.evaluate(() => {
+  const el: HTMLElement = document.body; // Type annotation breaks in browser!
+  return el.innerText;
+});
+\`\`\`
+
+## Scraping Data
+
+For scraping large datasets, intercept and replay network requests rather than scrolling the DOM. See [references/scraping.md](references/scraping.md) for the complete guide.
+
+## Client API
+
+\`\`\`typescript
+const client = await connect();
+
+// Get or create named page
+const page = await client.page("name");
+const pageWithSize = await client.page("name", { viewport: { width: 1920, height: 1080 } });
+
+const pages = await client.list(); // List all page names
+await client.close("name"); // Close a page
+await client.disconnect(); // Disconnect (pages persist)
+
+// ARIA Snapshot methods
+const snapshot = await client.getAISnapshot("name"); // Get accessibility tree
+const element = await client.selectSnapshotRef("name", "e5"); // Get element by ref
+\`\`\`
+
+## Waiting
+
+\`\`\`typescript
+import { waitForPageLoad } from "@/client.js";
+
+await waitForPageLoad(page); // After navigation
+await page.waitForSelector(".results"); // For specific elements
+await page.waitForURL("**/success"); // For specific URL
+\`\`\`
+
+## Screenshots
+
+\`\`\`typescript
+await page.screenshot({ path: "tmp/screenshot.png" });
+await page.screenshot({ path: "tmp/full.png", fullPage: true });
+\`\`\`
+
+## ARIA Snapshot (Element Discovery)
+
+Use \`getAISnapshot()\` to discover page elements. Returns YAML-formatted accessibility tree:
+
+\`\`\`yaml
+- banner:
+  - link "Hacker News" [ref=e1]
+  - navigation:
+    - link "new" [ref=e2]
+- main:
+  - list:
+    - listitem:
+      - link "Article Title" [ref=e8]
+\`\`\`
+
+**Interacting with refs:**
+
+\`\`\`typescript
+const snapshot = await client.getAISnapshot("hackernews");
+console.log(snapshot); // Find the ref you need
+
+const element = await client.selectSnapshotRef("hackernews", "e2");
+await element.click();
+\`\`\`
+
+## Error Recovery
+
+Page state persists after failures. Debug with:
+
+\`\`\`bash
+cd skills/dev-browser && npx tsx <<'EOF'
+import { connect } from "@/client.js";
+
+const client = await connect();
+const page = await client.page("hackernews");
+
+await page.screenshot({ path: "tmp/debug.png" });
+console.log({
+  url: page.url(),
+  title: await page.title(),
+  bodyText: await page.textContent("body").then((t) => t?.slice(0, 200)),
+});
+
+await client.disconnect();
+EOF
+\`\`\``,
+}
+
+export interface CreateBuiltinSkillsOptions {
+  browserProvider?: BrowserAutomationProvider
+}
+
+export function createBuiltinSkills(options: CreateBuiltinSkillsOptions = {}): BuiltinSkill[] {
+  const { browserProvider = "playwright" } = options
+
+  const browserSkill = browserProvider === "agent-browser" ? agentBrowserSkill : playwrightSkill
+
+  return [browserSkill, frontendUiUxSkill, gitMasterSkill, devBrowserSkill]
 }
--- a/src/features/claude-code-mcp-loader/loader.ts
+++ b/src/features/claude-code-mcp-loader/loader.ts
@@ -77,7 +77,13 @@ export async function loadMcpConfigs(): Promise<McpLoadResult> {

    for (const [name, serverConfig] of Object.entries(config.mcpServers)) {
      if (serverConfig.disabled) {
-        log(`Skipping disabled MCP server "${name}"`, { path })
+        log(`Disabling MCP server "${name}"`, { path })
+        delete servers[name]
+        const existingIndex = loadedServers.findIndex((s) => s.name === name)
+        if (existingIndex !== -1) {
+          loadedServers.splice(existingIndex, 1)
+          log(`Removed previously loaded MCP server "${name}"`, { path })
+        }
        continue
      }

--- a/src/features/claude-code-session-state/state.test.ts
+++ b/src/features/claude-code-session-state/state.test.ts
@@ -37,7 +37,7 @@ describe("claude-code-session-state", () => {
      setSessionAgent(sessionID, "Prometheus (Planner)")

      // #when - try to overwrite
-      setSessionAgent(sessionID, "Sisyphus")
+      setSessionAgent(sessionID, "sisyphus")

      // #then - first agent preserved
      expect(getSessionAgent(sessionID)).toBe("Prometheus (Planner)")
@@ -58,10 +58,10 @@ describe("claude-code-session-state", () => {
      setSessionAgent(sessionID, "Prometheus (Planner)")

      // #when - force update
-      updateSessionAgent(sessionID, "Sisyphus")
+      updateSessionAgent(sessionID, "sisyphus")

      // #then
-      expect(getSessionAgent(sessionID)).toBe("Sisyphus")
+      expect(getSessionAgent(sessionID)).toBe("sisyphus")
    })
  })

@@ -129,7 +129,7 @@ describe("claude-code-session-state", () => {
      // #given - user switches to custom agent "MyCustomAgent"
      const sessionID = "test-session-custom"
      const customAgent = "MyCustomAgent"
-      const defaultAgent = "Sisyphus"
+      const defaultAgent = "sisyphus"

      // User switches to custom agent (via UI)
      setSessionAgent(sessionID, customAgent)
--- a/src/features/context-injector/injector.test.ts
+++ b/src/features/context-injector/injector.test.ts
@@ -21,7 +21,7 @@ describe("createContextInjectorMessagesTransformHook", () => {
      sessionID,
      role,
      time: { created: Date.now() },
-      agent: "Sisyphus",
+      agent: "sisyphus",
      model: { providerID: "test", modelID: "test" },
      path: { cwd: "/", root: "/" },
    },
--- a/src/features/opencode-skill-loader/skill-content.test.ts
+++ b/src/features/opencode-skill-loader/skill-content.test.ts
@@ -265,3 +265,66 @@ describe("resolveMultipleSkillsAsync", () => {
 		expect(result.notFound).toEqual([])
 	})
 })
+
+describe("resolveSkillContent with browserProvider", () => {
+	it("should resolve agent-browser skill when browserProvider is 'agent-browser'", () => {
+		// #given: browserProvider set to agent-browser
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when: resolving content for 'agent-browser'
+		const result = resolveSkillContent("agent-browser", options)
+
+		// #then: returns agent-browser template
+		expect(result).not.toBeNull()
+		expect(result).toContain("agent-browser")
+	})
+
+	it("should return null for agent-browser when browserProvider is default", () => {
+		// #given: no browserProvider (defaults to playwright)
+
+		// #when: resolving content for 'agent-browser'
+		const result = resolveSkillContent("agent-browser")
+
+		// #then: returns null because agent-browser is not in default builtin skills
+		expect(result).toBeNull()
+	})
+
+	it("should return null for playwright when browserProvider is agent-browser", () => {
+		// #given: browserProvider set to agent-browser
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when: resolving content for 'playwright'
+		const result = resolveSkillContent("playwright", options)
+
+		// #then: returns null because playwright is replaced by agent-browser
+		expect(result).toBeNull()
+	})
+})
+
+describe("resolveMultipleSkills with browserProvider", () => {
+	it("should resolve agent-browser when browserProvider is set", () => {
+		// #given: agent-browser and git-master requested with browserProvider
+		const skillNames = ["agent-browser", "git-master"]
+		const options = { browserProvider: "agent-browser" as const }
+
+		// #when: resolving multiple skills
+		const result = resolveMultipleSkills(skillNames, options)
+
+		// #then: both resolved
+		expect(result.resolved.has("agent-browser")).toBe(true)
+		expect(result.resolved.has("git-master")).toBe(true)
+		expect(result.notFound).toHaveLength(0)
+	})
+
+	it("should not resolve agent-browser without browserProvider option", () => {
+		// #given: agent-browser requested without browserProvider
+		const skillNames = ["agent-browser"]
+
+		// #when: resolving multiple skills
+		const result = resolveMultipleSkills(skillNames)
+
+		// #then: agent-browser not found
+		expect(result.resolved.has("agent-browser")).toBe(false)
+		expect(result.notFound).toContain("agent-browser")
+	})
+})
--- a/src/features/opencode-skill-loader/skill-content.ts
+++ b/src/features/opencode-skill-loader/skill-content.ts
@@ -3,24 +3,27 @@ import { discoverSkills } from "./loader"
 import type { LoadedSkill } from "./types"
 import { parseFrontmatter } from "../../shared/frontmatter"
 import { readFileSync } from "node:fs"
-import type { GitMasterConfig } from "../../config/schema"
+import type { GitMasterConfig, BrowserAutomationProvider } from "../../config/schema"

 export interface SkillResolutionOptions {
 	gitMasterConfig?: GitMasterConfig
+	browserProvider?: BrowserAutomationProvider
 }

-let cachedSkills: LoadedSkill[] | null = null
+const cachedSkillsByProvider = new Map<string, LoadedSkill[]>()

 function clearSkillCache(): void {
-	cachedSkills = null
+	cachedSkillsByProvider.clear()
 }

-async function getAllSkills(): Promise<LoadedSkill[]> {
-	if (cachedSkills) return cachedSkills
+async function getAllSkills(options?: SkillResolutionOptions): Promise<LoadedSkill[]> {
+	const cacheKey = options?.browserProvider ?? "playwright"
+	const cached = cachedSkillsByProvider.get(cacheKey)
+	if (cached) return cached

 	const [discoveredSkills, builtinSkillDefs] = await Promise.all([
 		discoverSkills({ includeClaudeCodePaths: true }),
-		Promise.resolve(createBuiltinSkills()),
+		Promise.resolve(createBuiltinSkills({ browserProvider: options?.browserProvider })),
 	])

 	const builtinSkillsAsLoaded: LoadedSkill[] = builtinSkillDefs.map((skill) => ({
@@ -44,8 +47,9 @@ async function getAllSkills(): Promise<LoadedSkill[]> {
 	const discoveredNames = new Set(discoveredSkills.map((s) => s.name))
 	const uniqueBuiltins = builtinSkillsAsLoaded.filter((s) => !discoveredNames.has(s.name))

-	cachedSkills = [...discoveredSkills, ...uniqueBuiltins]
-	return cachedSkills
+	const allSkills = [...discoveredSkills, ...uniqueBuiltins]
+	cachedSkillsByProvider.set(cacheKey, allSkills)
+	return allSkills
 }

 async function extractSkillTemplate(skill: LoadedSkill): Promise<string> {
@@ -118,7 +122,7 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
 }

 export function resolveSkillContent(skillName: string, options?: SkillResolutionOptions): string | null {
-	const skills = createBuiltinSkills()
+	const skills = createBuiltinSkills({ browserProvider: options?.browserProvider })
 	const skill = skills.find((s) => s.name === skillName)
 	if (!skill) return null

@@ -133,7 +137,7 @@ export function resolveMultipleSkills(skillNames: string[], options?: SkillResol
 	resolved: Map<string, string>
 	notFound: string[]
 } {
-	const skills = createBuiltinSkills()
+	const skills = createBuiltinSkills({ browserProvider: options?.browserProvider })
 	const skillMap = new Map(skills.map((s) => [s.name, s.template]))

 	const resolved = new Map<string, string>()
@@ -159,7 +163,7 @@ export async function resolveSkillContentAsync(
 	skillName: string,
 	options?: SkillResolutionOptions
 ): Promise<string | null> {
-	const allSkills = await getAllSkills()
+	const allSkills = await getAllSkills(options)
 	const skill = allSkills.find((s) => s.name === skillName)
 	if (!skill) return null

@@ -179,7 +183,7 @@ export async function resolveMultipleSkillsAsync(
 	resolved: Map<string, string>
 	notFound: string[]
 }> {
-	const allSkills = await getAllSkills()
+	const allSkills = await getAllSkills(options)
 	const skillMap = new Map<string, LoadedSkill>()
 	for (const skill of allSkills) {
 		skillMap.set(skill.name, skill)
--- a/src/features/task-toast-manager/manager.test.ts
+++ b/src/features/task-toast-manager/manager.test.ts
@@ -30,7 +30,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1",
        description: "Test task",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: true,
        skills: ["playwright", "git-master"],
      }
@@ -127,7 +127,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1",
        description: "Full info task",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: true,
        skills: ["frontend-ui-ux"],
      }
@@ -149,9 +149,9 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1",
        description: "Task with category default model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
-        modelInfo: { model: "google/gemini-3-pro-preview", type: "category-default" as const },
+        modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const },
      }

      // #when - addTask is called
@@ -169,7 +169,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_1b",
        description: "Task with system default model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
        modelInfo: { model: "anthropic/claude-sonnet-4-5", type: "system-default" as const },
      }
@@ -190,7 +190,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_2",
        description: "Task with inherited model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
        modelInfo: { model: "cliproxy/claude-opus-4-5", type: "inherited" as const },
      }
@@ -211,7 +211,7 @@ describe("TaskToastManager", () => {
      const task = {
        id: "task_3",
        description: "Task with user model",
-        agent: "Sisyphus-Junior",
+        agent: "sisyphus-junior",
        isBackground: false,
        modelInfo: { model: "my-provider/my-model", type: "user-defined" as const },
      }
--- a/src/features/tmux-subagent/action-executor.ts
+++ b/src/features/tmux-subagent/action-executor.ts
@@ -0,0 +1,97 @@
+import type { TmuxConfig } from "../../config/schema"
+import type { PaneAction, WindowState } from "./types"
+import { spawnTmuxPane, closeTmuxPane, enforceMainPaneWidth, replaceTmuxPane } from "../../shared/tmux"
+import { log } from "../../shared"
+
+export interface ActionResult {
+  success: boolean
+  paneId?: string
+  error?: string
+}
+
+export interface ExecuteActionsResult {
+  success: boolean
+  spawnedPaneId?: string
+  results: Array<{ action: PaneAction; result: ActionResult }>
+}
+
+export interface ExecuteContext {
+  config: TmuxConfig
+  serverUrl: string
+  windowState: WindowState
+}
+
+async function enforceMainPane(windowState: WindowState): Promise<void> {
+  if (!windowState.mainPane) return
+  await enforceMainPaneWidth(windowState.mainPane.paneId, windowState.windowWidth)
+}
+
+export async function executeAction(
+  action: PaneAction,
+  ctx: ExecuteContext
+): Promise<ActionResult> {
+  if (action.type === "close") {
+    const success = await closeTmuxPane(action.paneId)
+    if (success) {
+      await enforceMainPane(ctx.windowState)
+    }
+    return { success }
+  }
+
+  if (action.type === "replace") {
+    const result = await replaceTmuxPane(
+      action.paneId,
+      action.newSessionId,
+      action.description,
+      ctx.config,
+      ctx.serverUrl
+    )
+    return {
+      success: result.success,
+      paneId: result.paneId,
+    }
+  }
+
+  const result = await spawnTmuxPane(
+    action.sessionId,
+    action.description,
+    ctx.config,
+    ctx.serverUrl,
+    action.targetPaneId,
+    action.splitDirection
+  )
+
+  if (result.success) {
+    await enforceMainPane(ctx.windowState)
+  }
+
+  return {
+    success: result.success,
+    paneId: result.paneId,
+  }
+}
+
+export async function executeActions(
+  actions: PaneAction[],
+  ctx: ExecuteContext
+): Promise<ExecuteActionsResult> {
+  const results: Array<{ action: PaneAction; result: ActionResult }> = []
+  let spawnedPaneId: string | undefined
+
+  for (const action of actions) {
+    log("[action-executor] executing", { type: action.type })
+    const result = await executeAction(action, ctx)
+    results.push({ action, result })
+
+    if (!result.success) {
+      log("[action-executor] action failed", { type: action.type, error: result.error })
+      return { success: false, results }
+    }
+
+    if ((action.type === "spawn" || action.type === "replace") && result.paneId) {
+      spawnedPaneId = result.paneId
+    }
+  }
+
+  return { success: true, spawnedPaneId, results }
+}
--- a/src/features/tmux-subagent/decision-engine.test.ts
+++ b/src/features/tmux-subagent/decision-engine.test.ts
@@ -0,0 +1,354 @@
+import { describe, it, expect } from "bun:test"
+import { 
+  decideSpawnActions, 
+  calculateCapacity, 
+  canSplitPane, 
+  canSplitPaneAnyDirection,
+  getBestSplitDirection,
+  type SessionMapping 
+} from "./decision-engine"
+import type { WindowState, CapacityConfig, TmuxPaneInfo } from "./types"
+import { MIN_PANE_WIDTH, MIN_PANE_HEIGHT } from "./types"
+
+const MIN_SPLIT_WIDTH = 2 * MIN_PANE_WIDTH + 1
+const MIN_SPLIT_HEIGHT = 2 * MIN_PANE_HEIGHT + 1
+
+describe("canSplitPane", () => {
+  const createPane = (width: number, height: number): TmuxPaneInfo => ({
+    paneId: "%1",
+    width,
+    height,
+    left: 100,
+    top: 0,
+    title: "test",
+    isActive: false,
+  })
+
+  it("returns true for horizontal split when width >= 2*MIN+1", () => {
+    //#given - pane with exactly minimum splittable width (107)
+    const pane = createPane(MIN_SPLIT_WIDTH, 20)
+
+    //#when
+    const result = canSplitPane(pane, "-h")
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns false for horizontal split when width < 2*MIN+1", () => {
+    //#given - pane just below minimum splittable width
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, 20)
+
+    //#when
+    const result = canSplitPane(pane, "-h")
+
+    //#then
+    expect(result).toBe(false)
+  })
+
+  it("returns true for vertical split when height >= 2*MIN+1", () => {
+    //#given - pane with exactly minimum splittable height (23)
+    const pane = createPane(50, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = canSplitPane(pane, "-v")
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns false for vertical split when height < 2*MIN+1", () => {
+    //#given - pane just below minimum splittable height
+    const pane = createPane(50, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = canSplitPane(pane, "-v")
+
+    //#then
+    expect(result).toBe(false)
+  })
+})
+
+describe("canSplitPaneAnyDirection", () => {
+  const createPane = (width: number, height: number): TmuxPaneInfo => ({
+    paneId: "%1",
+    width,
+    height,
+    left: 100,
+    top: 0,
+    title: "test",
+    isActive: false,
+  })
+
+  it("returns true when can split horizontally but not vertically", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = canSplitPaneAnyDirection(pane)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns true when can split vertically but not horizontally", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = canSplitPaneAnyDirection(pane)
+
+    //#then
+    expect(result).toBe(true)
+  })
+
+  it("returns false when cannot split in any direction", () => {
+    //#given - pane too small in both dimensions
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = canSplitPaneAnyDirection(pane)
+
+    //#then
+    expect(result).toBe(false)
+  })
+})
+
+describe("getBestSplitDirection", () => {
+  const createPane = (width: number, height: number): TmuxPaneInfo => ({
+    paneId: "%1",
+    width,
+    height,
+    left: 100,
+    top: 0,
+    title: "test",
+    isActive: false,
+  })
+
+  it("returns -h when only horizontal split possible", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-h")
+  })
+
+  it("returns -v when only vertical split possible", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-v")
+  })
+
+  it("returns null when no split possible", () => {
+    //#given
+    const pane = createPane(MIN_SPLIT_WIDTH - 1, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe(null)
+  })
+
+  it("returns -h when width >= height and both splits possible", () => {
+    //#given - wider than tall
+    const pane = createPane(MIN_SPLIT_WIDTH + 10, MIN_SPLIT_HEIGHT)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-h")
+  })
+
+  it("returns -v when height > width and both splits possible", () => {
+    //#given - taller than wide (height needs to be > width for -v)
+    const pane = createPane(MIN_SPLIT_WIDTH, MIN_SPLIT_WIDTH + 10)
+
+    //#when
+    const result = getBestSplitDirection(pane)
+
+    //#then
+    expect(result).toBe("-v")
+  })
+})
+
+describe("decideSpawnActions", () => {
+  const defaultConfig: CapacityConfig = {
+    mainPaneMinWidth: 120,
+    agentPaneWidth: 40,
+  }
+
+  const createWindowState = (
+    windowWidth: number,
+    windowHeight: number,
+    agentPanes: Array<{ paneId: string; width: number; height: number; left: number; top: number }> = []
+  ): WindowState => ({
+    windowWidth,
+    windowHeight,
+    mainPane: { paneId: "%0", width: Math.floor(windowWidth / 2), height: windowHeight, left: 0, top: 0, title: "main", isActive: true },
+    agentPanes: agentPanes.map((p, i) => ({
+      ...p,
+      title: `agent-${i}`,
+      isActive: false,
+    })),
+  })
+
+  describe("minimum size enforcement", () => {
+    it("returns canSpawn=false when window too small", () => {
+      //#given - window smaller than minimum pane size
+      const state = createWindowState(50, 5)
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(false)
+      expect(result.reason).toContain("too small")
+    })
+
+    it("returns canSpawn=true when main pane can be split", () => {
+      //#given - main pane width >= 2*MIN_PANE_WIDTH+1 = 107
+      const state = createWindowState(220, 44)
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("spawn")
+    })
+
+    it("closes oldest pane when existing panes are too small to split", () => {
+      //#given - existing pane is below minimum splittable size
+      const state = createWindowState(220, 30, [
+        { paneId: "%1", width: 50, height: 15, left: 110, top: 0 },
+      ])
+      const mappings: SessionMapping[] = [
+        { sessionId: "old-ses", paneId: "%1", createdAt: new Date("2024-01-01") },
+      ]
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, mappings)
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(2)
+      expect(result.actions[0].type).toBe("close")
+      expect(result.actions[1].type).toBe("spawn")
+    })
+
+    it("can spawn when existing pane is large enough to split", () => {
+      //#given - existing pane is above minimum splittable size
+      const state = createWindowState(320, 50, [
+        { paneId: "%1", width: MIN_SPLIT_WIDTH + 10, height: MIN_SPLIT_HEIGHT + 10, left: 160, top: 0 },
+      ])
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("spawn")
+    })
+  })
+
+  describe("basic spawn decisions", () => {
+    it("returns canSpawn=true when capacity allows new pane", () => {
+      //#given - 220x44 window, mainPane width=110 >= MIN_SPLIT_WIDTH(107)
+      const state = createWindowState(220, 44)
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("spawn")
+    })
+
+    it("spawns with splitDirection", () => {
+      //#given
+      const state = createWindowState(212, 44, [
+        { paneId: "%1", width: MIN_SPLIT_WIDTH, height: MIN_SPLIT_HEIGHT, left: 106, top: 0 },
+      ])
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(true)
+      expect(result.actions[0].type).toBe("spawn")
+      if (result.actions[0].type === "spawn") {
+        expect(result.actions[0].sessionId).toBe("ses1")
+        expect(result.actions[0].splitDirection).toBeDefined()
+      }
+    })
+
+    it("returns canSpawn=false when no main pane", () => {
+      //#given
+      const state: WindowState = { windowWidth: 212, windowHeight: 44, mainPane: null, agentPanes: [] }
+
+      //#when
+      const result = decideSpawnActions(state, "ses1", "test", defaultConfig, [])
+
+      //#then
+      expect(result.canSpawn).toBe(false)
+      expect(result.reason).toBe("no main pane found")
+    })
+  })
+})
+
+describe("calculateCapacity", () => {
+  it("calculates 2D grid capacity (cols x rows)", () => {
+    //#given - 212x44 window (user's actual screen)
+    //#when
+    const capacity = calculateCapacity(212, 44)
+
+    //#then - availableWidth=106, cols=(106+1)/(52+1)=2, rows=(44+1)/(11+1)=3 (accounting for dividers)
+    expect(capacity.cols).toBe(2)
+    expect(capacity.rows).toBe(3)
+    expect(capacity.total).toBe(6)
+  })
+
+  it("returns 0 cols when agent area too narrow", () => {
+    //#given - window too narrow for even 1 agent pane
+    //#when
+    const capacity = calculateCapacity(100, 44)
+
+    //#then - availableWidth=50, cols=50/53=0
+    expect(capacity.cols).toBe(0)
+    expect(capacity.total).toBe(0)
+  })
+
+  it("returns 0 rows when window too short", () => {
+    //#given - window too short
+    //#when
+    const capacity = calculateCapacity(212, 10)
+
+    //#then - rows=10/11=0
+    expect(capacity.rows).toBe(0)
+    expect(capacity.total).toBe(0)
+  })
+
+  it("scales with larger screens but caps at MAX_GRID_SIZE=4", () => {
+    //#given - larger 4K-like screen (400x100)
+    //#when
+    const capacity = calculateCapacity(400, 100)
+
+    //#then - cols capped at 4, rows capped at 4 (MAX_GRID_SIZE)
+    expect(capacity.cols).toBe(3)
+    expect(capacity.rows).toBe(4)
+    expect(capacity.total).toBe(12)
+  })
+})
--- a/src/features/tmux-subagent/decision-engine.ts
+++ b/src/features/tmux-subagent/decision-engine.ts
@@ -0,0 +1,386 @@
+import type { WindowState, PaneAction, SpawnDecision, CapacityConfig, TmuxPaneInfo, SplitDirection } from "./types"
+import { MIN_PANE_WIDTH, MIN_PANE_HEIGHT } from "./types"
+
+export interface SessionMapping {
+  sessionId: string
+  paneId: string
+  createdAt: Date
+}
+
+export interface GridCapacity {
+  cols: number
+  rows: number
+  total: number
+}
+
+export interface GridSlot {
+  row: number
+  col: number
+}
+
+export interface GridPlan {
+  cols: number
+  rows: number
+  slotWidth: number
+  slotHeight: number
+}
+
+export interface SpawnTarget {
+  targetPaneId: string
+  splitDirection: SplitDirection
+}
+
+const MAIN_PANE_RATIO = 0.5
+const MAX_COLS = 2
+const MAX_ROWS = 3
+const MAX_GRID_SIZE = 4
+const DIVIDER_SIZE = 1
+const MIN_SPLIT_WIDTH = 2 * MIN_PANE_WIDTH + DIVIDER_SIZE
+const MIN_SPLIT_HEIGHT = 2 * MIN_PANE_HEIGHT + DIVIDER_SIZE
+
+export function getColumnCount(paneCount: number): number {
+  if (paneCount <= 0) return 1
+  return Math.min(MAX_COLS, Math.max(1, Math.ceil(paneCount / MAX_ROWS)))
+}
+
+export function getColumnWidth(agentAreaWidth: number, paneCount: number): number {
+  const cols = getColumnCount(paneCount)
+  const dividersWidth = (cols - 1) * DIVIDER_SIZE
+  return Math.floor((agentAreaWidth - dividersWidth) / cols)
+}
+
+export function isSplittableAtCount(agentAreaWidth: number, paneCount: number): boolean {
+  const columnWidth = getColumnWidth(agentAreaWidth, paneCount)
+  return columnWidth >= MIN_SPLIT_WIDTH
+}
+
+export function findMinimalEvictions(agentAreaWidth: number, currentCount: number): number | null {
+  for (let k = 1; k <= currentCount; k++) {
+    if (isSplittableAtCount(agentAreaWidth, currentCount - k)) {
+      return k
+    }
+  }
+  return null
+}
+
+export function canSplitPane(pane: TmuxPaneInfo, direction: SplitDirection): boolean {
+  if (direction === "-h") {
+    return pane.width >= MIN_SPLIT_WIDTH
+  }
+  return pane.height >= MIN_SPLIT_HEIGHT
+}
+
+export function canSplitPaneAnyDirection(pane: TmuxPaneInfo): boolean {
+  return pane.width >= MIN_SPLIT_WIDTH || pane.height >= MIN_SPLIT_HEIGHT
+}
+
+export function getBestSplitDirection(pane: TmuxPaneInfo): SplitDirection | null {
+  const canH = pane.width >= MIN_SPLIT_WIDTH
+  const canV = pane.height >= MIN_SPLIT_HEIGHT
+  
+  if (!canH && !canV) return null
+  if (canH && !canV) return "-h"
+  if (!canH && canV) return "-v"
+  return pane.width >= pane.height ? "-h" : "-v"
+}
+
+export function calculateCapacity(
+  windowWidth: number,
+  windowHeight: number
+): GridCapacity {
+  const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
+  const cols = Math.min(MAX_GRID_SIZE, Math.max(0, Math.floor((availableWidth + DIVIDER_SIZE) / (MIN_PANE_WIDTH + DIVIDER_SIZE))))
+  const rows = Math.min(MAX_GRID_SIZE, Math.max(0, Math.floor((windowHeight + DIVIDER_SIZE) / (MIN_PANE_HEIGHT + DIVIDER_SIZE))))
+  const total = cols * rows
+  return { cols, rows, total }
+}
+
+export function computeGridPlan(
+  windowWidth: number,
+  windowHeight: number,
+  paneCount: number
+): GridPlan {
+  const capacity = calculateCapacity(windowWidth, windowHeight)
+  const { cols: maxCols, rows: maxRows } = capacity
+  
+  if (maxCols === 0 || maxRows === 0 || paneCount === 0) {
+    return { cols: 1, rows: 1, slotWidth: 0, slotHeight: 0 }
+  }
+
+  let bestCols = 1
+  let bestRows = 1
+  let bestArea = Infinity
+
+  for (let rows = 1; rows <= maxRows; rows++) {
+    for (let cols = 1; cols <= maxCols; cols++) {
+      if (cols * rows >= paneCount) {
+        const area = cols * rows
+        if (area < bestArea || (area === bestArea && rows < bestRows)) {
+          bestCols = cols
+          bestRows = rows
+          bestArea = area
+        }
+      }
+    }
+  }
+
+  const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
+  const slotWidth = Math.floor(availableWidth / bestCols)
+  const slotHeight = Math.floor(windowHeight / bestRows)
+
+  return { cols: bestCols, rows: bestRows, slotWidth, slotHeight }
+}
+
+export function mapPaneToSlot(
+  pane: TmuxPaneInfo,
+  plan: GridPlan,
+  mainPaneWidth: number
+): GridSlot {
+  const rightAreaX = mainPaneWidth
+  const relativeX = Math.max(0, pane.left - rightAreaX)
+  const relativeY = pane.top
+
+  const col = plan.slotWidth > 0 
+    ? Math.min(plan.cols - 1, Math.floor(relativeX / plan.slotWidth))
+    : 0
+  const row = plan.slotHeight > 0
+    ? Math.min(plan.rows - 1, Math.floor(relativeY / plan.slotHeight))
+    : 0
+
+  return { row, col }
+}
+
+function buildOccupancy(
+  agentPanes: TmuxPaneInfo[],
+  plan: GridPlan,
+  mainPaneWidth: number
+): Map<string, TmuxPaneInfo> {
+  const occupancy = new Map<string, TmuxPaneInfo>()
+  for (const pane of agentPanes) {
+    const slot = mapPaneToSlot(pane, plan, mainPaneWidth)
+    const key = `${slot.row}:${slot.col}`
+    occupancy.set(key, pane)
+  }
+  return occupancy
+}
+
+function findFirstEmptySlot(
+  occupancy: Map<string, TmuxPaneInfo>,
+  plan: GridPlan
+): GridSlot {
+  for (let row = 0; row < plan.rows; row++) {
+    for (let col = 0; col < plan.cols; col++) {
+      const key = `${row}:${col}`
+      if (!occupancy.has(key)) {
+        return { row, col }
+      }
+    }
+  }
+  return { row: plan.rows - 1, col: plan.cols - 1 }
+}
+
+function findSplittableTarget(
+  state: WindowState,
+  preferredDirection?: SplitDirection
+): SpawnTarget | null {
+  if (!state.mainPane) return null
+
+  const existingCount = state.agentPanes.length
+
+  if (existingCount === 0) {
+    const virtualMainPane: TmuxPaneInfo = {
+      ...state.mainPane,
+      width: state.windowWidth,
+    }
+    if (canSplitPane(virtualMainPane, "-h")) {
+      return { targetPaneId: state.mainPane.paneId, splitDirection: "-h" }
+    }
+    return null
+  }
+
+  const plan = computeGridPlan(state.windowWidth, state.windowHeight, existingCount + 1)
+  const mainPaneWidth = Math.floor(state.windowWidth * MAIN_PANE_RATIO)
+  const occupancy = buildOccupancy(state.agentPanes, plan, mainPaneWidth)
+  const targetSlot = findFirstEmptySlot(occupancy, plan)
+
+  const leftKey = `${targetSlot.row}:${targetSlot.col - 1}`
+  const leftPane = occupancy.get(leftKey)
+  if (leftPane && canSplitPane(leftPane, "-h")) {
+    return { targetPaneId: leftPane.paneId, splitDirection: "-h" }
+  }
+
+  const aboveKey = `${targetSlot.row - 1}:${targetSlot.col}`
+  const abovePane = occupancy.get(aboveKey)
+  if (abovePane && canSplitPane(abovePane, "-v")) {
+    return { targetPaneId: abovePane.paneId, splitDirection: "-v" }
+  }
+
+  const splittablePanes = state.agentPanes
+    .map(p => ({ pane: p, direction: getBestSplitDirection(p) }))
+    .filter(({ direction }) => direction !== null)
+    .sort((a, b) => (b.pane.width * b.pane.height) - (a.pane.width * a.pane.height))
+
+  if (splittablePanes.length > 0) {
+    const best = splittablePanes[0]
+    return { targetPaneId: best.pane.paneId, splitDirection: best.direction! }
+  }
+
+  return null
+}
+
+export function findSpawnTarget(state: WindowState): SpawnTarget | null {
+  return findSplittableTarget(state)
+}
+
+function findOldestSession(mappings: SessionMapping[]): SessionMapping | null {
+  if (mappings.length === 0) return null
+  return mappings.reduce((oldest, current) =>
+    current.createdAt < oldest.createdAt ? current : oldest
+  )
+}
+
+function findOldestAgentPane(
+  agentPanes: TmuxPaneInfo[],
+  sessionMappings: SessionMapping[]
+): TmuxPaneInfo | null {
+  if (agentPanes.length === 0) return null
+  
+  const paneIdToAge = new Map<string, Date>()
+  for (const mapping of sessionMappings) {
+    paneIdToAge.set(mapping.paneId, mapping.createdAt)
+  }
+  
+  const panesWithAge = agentPanes
+    .map(p => ({ pane: p, age: paneIdToAge.get(p.paneId) }))
+    .filter(({ age }) => age !== undefined)
+    .sort((a, b) => a.age!.getTime() - b.age!.getTime())
+  
+  if (panesWithAge.length > 0) {
+    return panesWithAge[0].pane
+  }
+  
+  return agentPanes.reduce((oldest, p) => {
+    if (p.top < oldest.top || (p.top === oldest.top && p.left < oldest.left)) {
+      return p
+    }
+    return oldest
+  })
+}
+
+export function decideSpawnActions(
+  state: WindowState,
+  sessionId: string,
+  description: string,
+  _config: CapacityConfig,
+  sessionMappings: SessionMapping[]
+): SpawnDecision {
+  if (!state.mainPane) {
+    return { canSpawn: false, actions: [], reason: "no main pane found" }
+  }
+
+  const agentAreaWidth = Math.floor(state.windowWidth * (1 - MAIN_PANE_RATIO))
+  const currentCount = state.agentPanes.length
+
+  if (agentAreaWidth < MIN_PANE_WIDTH) {
+    return {
+      canSpawn: false,
+      actions: [],
+      reason: `window too small for agent panes: ${state.windowWidth}x${state.windowHeight}`,
+    }
+  }
+
+  const oldestPane = findOldestAgentPane(state.agentPanes, sessionMappings)
+  const oldestMapping = oldestPane 
+    ? sessionMappings.find(m => m.paneId === oldestPane.paneId)
+    : null
+
+  if (currentCount === 0) {
+    const virtualMainPane: TmuxPaneInfo = { ...state.mainPane, width: state.windowWidth }
+    if (canSplitPane(virtualMainPane, "-h")) {
+      return {
+        canSpawn: true,
+        actions: [{
+          type: "spawn",
+          sessionId,
+          description,
+          targetPaneId: state.mainPane.paneId,
+          splitDirection: "-h"
+        }]
+      }
+    }
+    return { canSpawn: false, actions: [], reason: "mainPane too small to split" }
+  }
+
+  if (isSplittableAtCount(agentAreaWidth, currentCount)) {
+    const spawnTarget = findSplittableTarget(state)
+    if (spawnTarget) {
+      return {
+        canSpawn: true,
+        actions: [{
+          type: "spawn",
+          sessionId,
+          description,
+          targetPaneId: spawnTarget.targetPaneId,
+          splitDirection: spawnTarget.splitDirection
+        }]
+      }
+    }
+  }
+
+  const minEvictions = findMinimalEvictions(agentAreaWidth, currentCount)
+
+  if (minEvictions === 1 && oldestPane) {
+    return {
+      canSpawn: true,
+      actions: [
+        {
+          type: "close",
+          paneId: oldestPane.paneId,
+          sessionId: oldestMapping?.sessionId || ""
+        },
+        {
+          type: "spawn",
+          sessionId,
+          description,
+          targetPaneId: state.mainPane.paneId,
+          splitDirection: "-h"
+        }
+      ],
+      reason: "closed 1 pane to make room for split"
+    }
+  }
+
+  if (oldestPane) {
+    return {
+      canSpawn: true,
+      actions: [{
+        type: "replace",
+        paneId: oldestPane.paneId,
+        oldSessionId: oldestMapping?.sessionId || "",
+        newSessionId: sessionId,
+        description
+      }],
+      reason: "replaced oldest pane (no split possible)"
+    }
+  }
+
+  return {
+    canSpawn: false,
+    actions: [],
+    reason: "no pane available to replace"
+  }
+}
+
+export function decideCloseAction(
+  state: WindowState,
+  sessionId: string,
+  sessionMappings: SessionMapping[]
+): PaneAction | null {
+  const mapping = sessionMappings.find((m) => m.sessionId === sessionId)
+  if (!mapping) return null
+
+  const paneExists = state.agentPanes.some((p) => p.paneId === mapping.paneId)
+  if (!paneExists) return null
+
+  return { type: "close", paneId: mapping.paneId, sessionId }
+}
--- a/src/features/tmux-subagent/index.ts
+++ b/src/features/tmux-subagent/index.ts
@@ -0,0 +1,5 @@
+export * from "./manager"
+export * from "./types"
+export * from "./pane-state-querier"
+export * from "./decision-engine"
+export * from "./action-executor"
--- a/src/features/tmux-subagent/manager.test.ts
+++ b/src/features/tmux-subagent/manager.test.ts
@@ -0,0 +1,690 @@
+import { describe, test, expect, mock, beforeEach } from 'bun:test'
+import type { TmuxConfig } from '../../config/schema'
+import type { WindowState, PaneAction } from './types'
+import type { ActionResult, ExecuteContext } from './action-executor'
+
+type ExecuteActionsResult = {
+  success: boolean
+  spawnedPaneId?: string
+  results: Array<{ action: PaneAction; result: ActionResult }>
+}
+
+const mockQueryWindowState = mock<(paneId: string) => Promise<WindowState | null>>(
+  async () => ({
+    windowWidth: 212,
+    windowHeight: 44,
+    mainPane: { paneId: '%0', width: 106, height: 44, left: 0, top: 0, title: 'main', isActive: true },
+    agentPanes: [],
+  })
+)
+const mockPaneExists = mock<(paneId: string) => Promise<boolean>>(async () => true)
+const mockExecuteActions = mock<(
+  actions: PaneAction[],
+  ctx: ExecuteContext
+) => Promise<ExecuteActionsResult>>(async () => ({
+  success: true,
+  spawnedPaneId: '%mock',
+  results: [],
+}))
+const mockExecuteAction = mock<(
+  action: PaneAction,
+  ctx: ExecuteContext
+) => Promise<ActionResult>>(async () => ({ success: true }))
+const mockIsInsideTmux = mock<() => boolean>(() => true)
+const mockGetCurrentPaneId = mock<() => string | undefined>(() => '%0')
+
+mock.module('./pane-state-querier', () => ({
+  queryWindowState: mockQueryWindowState,
+  paneExists: mockPaneExists,
+  getRightmostAgentPane: (state: WindowState) =>
+    state.agentPanes.length > 0
+      ? state.agentPanes.reduce((r, p) => (p.left > r.left ? p : r))
+      : null,
+  getOldestAgentPane: (state: WindowState) =>
+    state.agentPanes.length > 0
+      ? state.agentPanes.reduce((o, p) => (p.left < o.left ? p : o))
+      : null,
+}))
+
+mock.module('./action-executor', () => ({
+  executeActions: mockExecuteActions,
+  executeAction: mockExecuteAction,
+}))
+
+mock.module('../../shared/tmux', () => ({
+  isInsideTmux: mockIsInsideTmux,
+  getCurrentPaneId: mockGetCurrentPaneId,
+  POLL_INTERVAL_BACKGROUND_MS: 2000,
+  SESSION_TIMEOUT_MS: 600000,
+  SESSION_MISSING_GRACE_MS: 6000,
+  SESSION_READY_POLL_INTERVAL_MS: 100,
+  SESSION_READY_TIMEOUT_MS: 500,
+}))
+
+const trackedSessions = new Set<string>()
+
+function createMockContext(overrides?: {
+  sessionStatusResult?: { data?: Record<string, { type: string }> }
+}) {
+  return {
+    serverUrl: new URL('http://localhost:4096'),
+    client: {
+      session: {
+        status: mock(async () => {
+          if (overrides?.sessionStatusResult) {
+            return overrides.sessionStatusResult
+          }
+          const data: Record<string, { type: string }> = {}
+          for (const sessionId of trackedSessions) {
+            data[sessionId] = { type: 'running' }
+          }
+          return { data }
+        }),
+      },
+    },
+  } as any
+}
+
+function createSessionCreatedEvent(
+  id: string,
+  parentID: string | undefined,
+  title: string
+) {
+  return {
+    type: 'session.created',
+    properties: {
+      info: { id, parentID, title },
+    },
+  }
+}
+
+function createWindowState(overrides?: Partial<WindowState>): WindowState {
+  return {
+    windowWidth: 220,
+    windowHeight: 44,
+    mainPane: { paneId: '%0', width: 110, height: 44, left: 0, top: 0, title: 'main', isActive: true },
+    agentPanes: [],
+    ...overrides,
+  }
+}
+
+describe('TmuxSessionManager', () => {
+  beforeEach(() => {
+    mockQueryWindowState.mockClear()
+    mockPaneExists.mockClear()
+    mockExecuteActions.mockClear()
+    mockExecuteAction.mockClear()
+    mockIsInsideTmux.mockClear()
+    mockGetCurrentPaneId.mockClear()
+    trackedSessions.clear()
+
+    mockQueryWindowState.mockImplementation(async () => createWindowState())
+    mockExecuteActions.mockImplementation(async (actions) => {
+      for (const action of actions) {
+        if (action.type === 'spawn') {
+          trackedSessions.add(action.sessionId)
+        }
+      }
+      return {
+        success: true,
+        spawnedPaneId: '%mock',
+        results: [],
+      }
+    })
+  })
+
+  describe('constructor', () => {
+    test('enabled when config.enabled=true and isInsideTmux=true', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+
+      //#when
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#then
+      expect(manager).toBeDefined()
+    })
+
+    test('disabled when config.enabled=true but isInsideTmux=false', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(false)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+
+      //#when
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#then
+      expect(manager).toBeDefined()
+    })
+
+    test('disabled when config.enabled=false', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: false,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+
+      //#when
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#then
+      expect(manager).toBeDefined()
+    })
+  })
+
+  describe('onSessionCreated', () => {
+    test('first agent spawns from source pane via decision engine', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      mockQueryWindowState.mockImplementation(async () => createWindowState())
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = createSessionCreatedEvent(
+        'ses_child',
+        'ses_parent',
+        'Background: Test Task'
+      )
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockQueryWindowState).toHaveBeenCalledTimes(1)
+      expect(mockExecuteActions).toHaveBeenCalledTimes(1)
+
+      const call = mockExecuteActions.mock.calls[0]
+      expect(call).toBeDefined()
+      const actionsArg = call![0]
+      expect(actionsArg).toHaveLength(1)
+      expect(actionsArg[0].type).toBe('spawn')
+      if (actionsArg[0].type === 'spawn') {
+        expect(actionsArg[0].sessionId).toBe('ses_child')
+        expect(actionsArg[0].description).toBe('Background: Test Task')
+        expect(actionsArg[0].targetPaneId).toBe('%0')
+        expect(actionsArg[0].splitDirection).toBe('-h')
+      }
+    })
+
+    test('second agent spawns with correct split direction', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+
+      let callCount = 0
+      mockQueryWindowState.mockImplementation(async () => {
+        callCount++
+        if (callCount === 1) {
+          return createWindowState()
+        }
+        return createWindowState({
+          agentPanes: [
+            {
+              paneId: '%1',
+              width: 40,
+              height: 44,
+              left: 100,
+              top: 0,
+              title: 'omo-subagent-Task 1',
+              isActive: false,
+            },
+          ],
+        })
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#when - first agent
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_1', 'ses_parent', 'Task 1')
+      )
+      mockExecuteActions.mockClear()
+
+      //#when - second agent
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_2', 'ses_parent', 'Task 2')
+      )
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(1)
+      const call = mockExecuteActions.mock.calls[0]
+      expect(call).toBeDefined()
+      const actionsArg = call![0]
+      expect(actionsArg).toHaveLength(1)
+      expect(actionsArg[0].type).toBe('spawn')
+    })
+
+    test('does NOT spawn pane when session has no parentID', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = createSessionCreatedEvent('ses_root', undefined, 'Root Session')
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(0)
+    })
+
+    test('does NOT spawn pane when disabled', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: false,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = createSessionCreatedEvent(
+        'ses_child',
+        'ses_parent',
+        'Background: Test Task'
+      )
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(0)
+    })
+
+    test('does NOT spawn pane for non session.created event type', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+      const event = {
+        type: 'session.deleted',
+        properties: {
+          info: { id: 'ses_child', parentID: 'ses_parent', title: 'Task' },
+        },
+      }
+
+      //#when
+      await manager.onSessionCreated(event)
+
+      //#then
+      expect(mockExecuteActions).toHaveBeenCalledTimes(0)
+    })
+
+    test('replaces oldest agent when unsplittable (small window)', async () => {
+      //#given - small window where split is not possible
+      mockIsInsideTmux.mockReturnValue(true)
+      mockQueryWindowState.mockImplementation(async () =>
+        createWindowState({
+          windowWidth: 160,
+          windowHeight: 11,
+          agentPanes: [
+            {
+              paneId: '%1',
+              width: 40,
+              height: 11,
+              left: 80,
+              top: 0,
+              title: 'omo-subagent-Task 1',
+              isActive: false,
+            },
+          ],
+        })
+      )
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 120,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#when
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_new', 'ses_parent', 'New Task')
+      )
+
+      //#then - with small window, replace action is used instead of close+spawn
+      expect(mockExecuteActions).toHaveBeenCalledTimes(1)
+      const call = mockExecuteActions.mock.calls[0]
+      expect(call).toBeDefined()
+      const actionsArg = call![0]
+      expect(actionsArg).toHaveLength(1)
+      expect(actionsArg[0].type).toBe('replace')
+    })
+  })
+
+  describe('onSessionDeleted', () => {
+    test('closes pane when tracked session is deleted', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+
+      let stateCallCount = 0
+      mockQueryWindowState.mockImplementation(async () => {
+        stateCallCount++
+        if (stateCallCount === 1) {
+          return createWindowState()
+        }
+        return createWindowState({
+          agentPanes: [
+            {
+              paneId: '%mock',
+              width: 40,
+              height: 44,
+              left: 100,
+              top: 0,
+              title: 'omo-subagent-Task',
+              isActive: false,
+            },
+          ],
+        })
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      await manager.onSessionCreated(
+        createSessionCreatedEvent(
+          'ses_child',
+          'ses_parent',
+          'Background: Test Task'
+        )
+      )
+      mockExecuteAction.mockClear()
+
+      //#when
+      await manager.onSessionDeleted({ sessionID: 'ses_child' })
+
+      //#then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(1)
+      const call = mockExecuteAction.mock.calls[0]
+      expect(call).toBeDefined()
+      expect(call![0]).toEqual({
+        type: 'close',
+        paneId: '%mock',
+        sessionId: 'ses_child',
+      })
+    })
+
+    test('does nothing when untracked session is deleted', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      //#when
+      await manager.onSessionDeleted({ sessionID: 'ses_unknown' })
+
+      //#then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(0)
+    })
+  })
+
+  describe('cleanup', () => {
+    test('closes all tracked panes', async () => {
+      //#given
+      mockIsInsideTmux.mockReturnValue(true)
+
+      let callCount = 0
+      mockExecuteActions.mockImplementation(async () => {
+        callCount++
+        return {
+          success: true,
+          spawnedPaneId: `%${callCount}`,
+          results: [],
+        }
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext()
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config)
+
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_1', 'ses_parent', 'Task 1')
+      )
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_2', 'ses_parent', 'Task 2')
+      )
+
+      mockExecuteAction.mockClear()
+
+      //#when
+      await manager.cleanup()
+
+      //#then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(2)
+    })
+  })
+})
+
+describe('DecisionEngine', () => {
+  describe('calculateCapacity', () => {
+    test('calculates correct 2D grid capacity', async () => {
+      //#given
+      const { calculateCapacity } = await import('./decision-engine')
+
+      //#when
+      const result = calculateCapacity(212, 44)
+
+      //#then - availableWidth=106, cols=(106+1)/(52+1)=2, rows=(44+1)/(11+1)=3 (accounting for dividers)
+      expect(result.cols).toBe(2)
+      expect(result.rows).toBe(3)
+      expect(result.total).toBe(6)
+    })
+
+    test('returns 0 cols when agent area too narrow', async () => {
+      //#given
+      const { calculateCapacity } = await import('./decision-engine')
+
+      //#when
+      const result = calculateCapacity(100, 44)
+
+      //#then - availableWidth=50, cols=50/53=0
+      expect(result.cols).toBe(0)
+      expect(result.total).toBe(0)
+    })
+  })
+
+  describe('decideSpawnActions', () => {
+    test('returns spawn action with splitDirection when under capacity', async () => {
+      //#given
+      const { decideSpawnActions } = await import('./decision-engine')
+      const state: WindowState = {
+        windowWidth: 212,
+        windowHeight: 44,
+        mainPane: {
+          paneId: '%0',
+          width: 106,
+          height: 44,
+          left: 0,
+          top: 0,
+          title: 'main',
+          isActive: true,
+        },
+        agentPanes: [],
+      }
+
+      //#when
+      const decision = decideSpawnActions(
+        state,
+        'ses_1',
+        'Test Task',
+        { mainPaneMinWidth: 120, agentPaneWidth: 40 },
+        []
+      )
+
+      //#then
+      expect(decision.canSpawn).toBe(true)
+      expect(decision.actions).toHaveLength(1)
+      expect(decision.actions[0].type).toBe('spawn')
+      if (decision.actions[0].type === 'spawn') {
+        expect(decision.actions[0].sessionId).toBe('ses_1')
+        expect(decision.actions[0].description).toBe('Test Task')
+        expect(decision.actions[0].targetPaneId).toBe('%0')
+        expect(decision.actions[0].splitDirection).toBe('-h')
+      }
+    })
+
+    test('returns replace when split not possible', async () => {
+      //#given - small window where split is never possible
+      const { decideSpawnActions } = await import('./decision-engine')
+      const state: WindowState = {
+        windowWidth: 160,
+        windowHeight: 11,
+        mainPane: {
+          paneId: '%0',
+          width: 80,
+          height: 11,
+          left: 0,
+          top: 0,
+          title: 'main',
+          isActive: true,
+        },
+        agentPanes: [
+          {
+            paneId: '%1',
+            width: 80,
+            height: 11,
+            left: 80,
+            top: 0,
+            title: 'omo-subagent-Old',
+            isActive: false,
+          },
+        ],
+      }
+      const sessionMappings = [
+        { sessionId: 'ses_old', paneId: '%1', createdAt: new Date('2024-01-01') },
+      ]
+
+      //#when
+      const decision = decideSpawnActions(
+        state,
+        'ses_new',
+        'New Task',
+        { mainPaneMinWidth: 120, agentPaneWidth: 40 },
+        sessionMappings
+      )
+
+      //#then - agent area (80) < MIN_SPLIT_WIDTH (105), so replace is used
+      expect(decision.canSpawn).toBe(true)
+      expect(decision.actions).toHaveLength(1)
+      expect(decision.actions[0].type).toBe('replace')
+    })
+
+    test('returns canSpawn=false when window too small', async () => {
+      //#given
+      const { decideSpawnActions } = await import('./decision-engine')
+      const state: WindowState = {
+        windowWidth: 60,
+        windowHeight: 5,
+        mainPane: {
+          paneId: '%0',
+          width: 30,
+          height: 5,
+          left: 0,
+          top: 0,
+          title: 'main',
+          isActive: true,
+        },
+        agentPanes: [],
+      }
+
+      //#when
+      const decision = decideSpawnActions(
+        state,
+        'ses_1',
+        'Test Task',
+        { mainPaneMinWidth: 120, agentPaneWidth: 40 },
+        []
+      )
+
+      //#then
+      expect(decision.canSpawn).toBe(false)
+      expect(decision.reason).toContain('too small')
+    })
+  })
+})
--- a/src/features/tmux-subagent/manager.ts
+++ b/src/features/tmux-subagent/manager.ts
@@ -0,0 +1,396 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import type { TmuxConfig } from "../../config/schema"
+import type { TrackedSession, CapacityConfig } from "./types"
+import {
+  isInsideTmux,
+  getCurrentPaneId,
+  POLL_INTERVAL_BACKGROUND_MS,
+  SESSION_MISSING_GRACE_MS,
+  SESSION_READY_POLL_INTERVAL_MS,
+  SESSION_READY_TIMEOUT_MS,
+} from "../../shared/tmux"
+import { log } from "../../shared"
+import { queryWindowState } from "./pane-state-querier"
+import { decideSpawnActions, decideCloseAction, type SessionMapping } from "./decision-engine"
+import { executeActions, executeAction } from "./action-executor"
+
+type OpencodeClient = PluginInput["client"]
+
+interface SessionCreatedEvent {
+  type: string
+  properties?: { info?: { id?: string; parentID?: string; title?: string } }
+}
+
+const SESSION_TIMEOUT_MS = 10 * 60 * 1000
+
+/**
+ * State-first Tmux Session Manager
+ * 
+ * Architecture:
+ * 1. QUERY: Get actual tmux pane state (source of truth)
+ * 2. DECIDE: Pure function determines actions based on state
+ * 3. EXECUTE: Execute actions with verification
+ * 4. UPDATE: Update internal cache only after tmux confirms success
+ * 
+ * The internal `sessions` Map is just a cache for sessionId<->paneId mapping.
+ * The REAL source of truth is always queried from tmux.
+ */
+export class TmuxSessionManager {
+  private client: OpencodeClient
+  private tmuxConfig: TmuxConfig
+  private serverUrl: string
+  private sourcePaneId: string | undefined
+  private sessions = new Map<string, TrackedSession>()
+  private pendingSessions = new Set<string>()
+  private pollInterval?: ReturnType<typeof setInterval>
+
+  constructor(ctx: PluginInput, tmuxConfig: TmuxConfig) {
+    this.client = ctx.client
+    this.tmuxConfig = tmuxConfig
+    const defaultPort = process.env.OPENCODE_PORT ?? "4096"
+    this.serverUrl = ctx.serverUrl?.toString() ?? `http://localhost:${defaultPort}`
+    this.sourcePaneId = getCurrentPaneId()
+
+    log("[tmux-session-manager] initialized", {
+      configEnabled: this.tmuxConfig.enabled,
+      tmuxConfig: this.tmuxConfig,
+      serverUrl: this.serverUrl,
+      sourcePaneId: this.sourcePaneId,
+    })
+  }
+
+  private isEnabled(): boolean {
+    return this.tmuxConfig.enabled && isInsideTmux()
+  }
+
+  private getCapacityConfig(): CapacityConfig {
+    return {
+      mainPaneMinWidth: this.tmuxConfig.main_pane_min_width,
+      agentPaneWidth: this.tmuxConfig.agent_pane_min_width,
+    }
+  }
+
+  private getSessionMappings(): SessionMapping[] {
+    return Array.from(this.sessions.values()).map((s) => ({
+      sessionId: s.sessionId,
+      paneId: s.paneId,
+      createdAt: s.createdAt,
+    }))
+  }
+
+  private async waitForSessionReady(sessionId: string): Promise<boolean> {
+    const startTime = Date.now()
+    
+    while (Date.now() - startTime < SESSION_READY_TIMEOUT_MS) {
+      try {
+        const statusResult = await this.client.session.status({ path: undefined })
+        const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+        
+        if (allStatuses[sessionId]) {
+          log("[tmux-session-manager] session ready", {
+            sessionId,
+            status: allStatuses[sessionId].type,
+            waitedMs: Date.now() - startTime,
+          })
+          return true
+        }
+      } catch (err) {
+        log("[tmux-session-manager] session status check error", { error: String(err) })
+      }
+      
+      await new Promise((resolve) => setTimeout(resolve, SESSION_READY_POLL_INTERVAL_MS))
+    }
+    
+    log("[tmux-session-manager] session ready timeout", {
+      sessionId,
+      timeoutMs: SESSION_READY_TIMEOUT_MS,
+    })
+    return false
+  }
+
+  async onSessionCreated(event: SessionCreatedEvent): Promise<void> {
+    const enabled = this.isEnabled()
+    log("[tmux-session-manager] onSessionCreated called", {
+      enabled,
+      tmuxConfigEnabled: this.tmuxConfig.enabled,
+      isInsideTmux: isInsideTmux(),
+      eventType: event.type,
+      infoId: event.properties?.info?.id,
+      infoParentID: event.properties?.info?.parentID,
+    })
+
+    if (!enabled) return
+    if (event.type !== "session.created") return
+
+    const info = event.properties?.info
+    if (!info?.id || !info?.parentID) return
+
+    const sessionId = info.id
+    const title = info.title ?? "Subagent"
+
+    if (this.sessions.has(sessionId) || this.pendingSessions.has(sessionId)) {
+      log("[tmux-session-manager] session already tracked or pending", { sessionId })
+      return
+    }
+
+    if (!this.sourcePaneId) {
+      log("[tmux-session-manager] no source pane id")
+      return
+    }
+
+    this.pendingSessions.add(sessionId)
+
+    try {
+      const state = await queryWindowState(this.sourcePaneId)
+      if (!state) {
+        log("[tmux-session-manager] failed to query window state")
+        return
+      }
+
+      log("[tmux-session-manager] window state queried", {
+        windowWidth: state.windowWidth,
+        mainPane: state.mainPane?.paneId,
+        agentPaneCount: state.agentPanes.length,
+        agentPanes: state.agentPanes.map((p) => p.paneId),
+      })
+
+      const decision = decideSpawnActions(
+        state,
+        sessionId,
+        title,
+        this.getCapacityConfig(),
+        this.getSessionMappings()
+      )
+
+      log("[tmux-session-manager] spawn decision", {
+        canSpawn: decision.canSpawn,
+        reason: decision.reason,
+        actionCount: decision.actions.length,
+        actions: decision.actions.map((a) => {
+          if (a.type === "close") return { type: "close", paneId: a.paneId }
+          if (a.type === "replace") return { type: "replace", paneId: a.paneId, newSessionId: a.newSessionId }
+          return { type: "spawn", sessionId: a.sessionId }
+        }),
+      })
+
+      if (!decision.canSpawn) {
+        log("[tmux-session-manager] cannot spawn", { reason: decision.reason })
+        return
+      }
+
+      const result = await executeActions(
+        decision.actions,
+        { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+      )
+
+      for (const { action, result: actionResult } of result.results) {
+        if (action.type === "close" && actionResult.success) {
+          this.sessions.delete(action.sessionId)
+          log("[tmux-session-manager] removed closed session from cache", {
+            sessionId: action.sessionId,
+          })
+        }
+        if (action.type === "replace" && actionResult.success) {
+          this.sessions.delete(action.oldSessionId)
+          log("[tmux-session-manager] removed replaced session from cache", {
+            oldSessionId: action.oldSessionId,
+            newSessionId: action.newSessionId,
+          })
+        }
+      }
+
+      if (result.success && result.spawnedPaneId) {
+        const sessionReady = await this.waitForSessionReady(sessionId)
+        
+        if (!sessionReady) {
+          log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
+            sessionId,
+            paneId: result.spawnedPaneId,
+          })
+        }
+        
+        const now = Date.now()
+        this.sessions.set(sessionId, {
+          sessionId,
+          paneId: result.spawnedPaneId,
+          description: title,
+          createdAt: new Date(now),
+          lastSeenAt: new Date(now),
+        })
+        log("[tmux-session-manager] pane spawned and tracked", {
+          sessionId,
+          paneId: result.spawnedPaneId,
+          sessionReady,
+        })
+        this.startPolling()
+      } else {
+        log("[tmux-session-manager] spawn failed", {
+          success: result.success,
+          results: result.results.map((r) => ({
+            type: r.action.type,
+            success: r.result.success,
+            error: r.result.error,
+          })),
+        })
+      }
+    } finally {
+      this.pendingSessions.delete(sessionId)
+    }
+  }
+
+  async onSessionDeleted(event: { sessionID: string }): Promise<void> {
+    if (!this.isEnabled()) return
+    if (!this.sourcePaneId) return
+
+    const tracked = this.sessions.get(event.sessionID)
+    if (!tracked) return
+
+    log("[tmux-session-manager] onSessionDeleted", { sessionId: event.sessionID })
+
+    const state = await queryWindowState(this.sourcePaneId)
+    if (!state) {
+      this.sessions.delete(event.sessionID)
+      return
+    }
+
+    const closeAction = decideCloseAction(state, event.sessionID, this.getSessionMappings())
+    if (closeAction) {
+      await executeAction(closeAction, { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state })
+    }
+
+    this.sessions.delete(event.sessionID)
+
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+    }
+  }
+
+  private startPolling(): void {
+    if (this.pollInterval) return
+
+    this.pollInterval = setInterval(
+      () => this.pollSessions(),
+      POLL_INTERVAL_BACKGROUND_MS,
+    )
+    log("[tmux-session-manager] polling started")
+  }
+
+  private stopPolling(): void {
+    if (this.pollInterval) {
+      clearInterval(this.pollInterval)
+      this.pollInterval = undefined
+      log("[tmux-session-manager] polling stopped")
+    }
+  }
+
+  private async pollSessions(): Promise<void> {
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+      return
+    }
+
+    try {
+      const statusResult = await this.client.session.status({ path: undefined })
+      const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+
+      log("[tmux-session-manager] pollSessions", {
+        trackedSessions: Array.from(this.sessions.keys()),
+        allStatusKeys: Object.keys(allStatuses),
+      })
+
+      const now = Date.now()
+      const sessionsToClose: string[] = []
+
+      for (const [sessionId, tracked] of this.sessions.entries()) {
+        const status = allStatuses[sessionId]
+        const isIdle = status?.type === "idle"
+
+        if (status) {
+          tracked.lastSeenAt = new Date(now)
+        }
+
+        const missingSince = !status ? now - tracked.lastSeenAt.getTime() : 0
+        const missingTooLong = missingSince >= SESSION_MISSING_GRACE_MS
+        const isTimedOut = now - tracked.createdAt.getTime() > SESSION_TIMEOUT_MS
+
+        log("[tmux-session-manager] session check", {
+          sessionId,
+          statusType: status?.type,
+          isIdle,
+          missingSince,
+          missingTooLong,
+          isTimedOut,
+          shouldClose: isIdle || missingTooLong || isTimedOut,
+        })
+
+        if (isIdle || missingTooLong || isTimedOut) {
+          sessionsToClose.push(sessionId)
+        }
+      }
+
+      for (const sessionId of sessionsToClose) {
+        log("[tmux-session-manager] closing session due to poll", { sessionId })
+        await this.closeSessionById(sessionId)
+      }
+    } catch (err) {
+      log("[tmux-session-manager] poll error", { error: String(err) })
+    }
+  }
+
+  private async closeSessionById(sessionId: string): Promise<void> {
+    const tracked = this.sessions.get(sessionId)
+    if (!tracked) return
+
+    log("[tmux-session-manager] closing session pane", {
+      sessionId,
+      paneId: tracked.paneId,
+    })
+
+    const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
+    if (state) {
+      await executeAction(
+        { type: "close", paneId: tracked.paneId, sessionId },
+        { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+      )
+    }
+
+    this.sessions.delete(sessionId)
+
+    if (this.sessions.size === 0) {
+      this.stopPolling()
+    }
+  }
+
+  createEventHandler(): (input: { event: { type: string; properties?: unknown } }) => Promise<void> {
+    return async (input) => {
+      await this.onSessionCreated(input.event as SessionCreatedEvent)
+    }
+  }
+
+  async cleanup(): Promise<void> {
+    this.stopPolling()
+
+    if (this.sessions.size > 0) {
+      log("[tmux-session-manager] closing all panes", { count: this.sessions.size })
+      const state = this.sourcePaneId ? await queryWindowState(this.sourcePaneId) : null
+      
+      if (state) {
+        const closePromises = Array.from(this.sessions.values()).map((s) =>
+          executeAction(
+            { type: "close", paneId: s.paneId, sessionId: s.sessionId },
+            { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+          ).catch((err) =>
+            log("[tmux-session-manager] cleanup error for pane", {
+              paneId: s.paneId,
+              error: String(err),
+            }),
+          ),
+        )
+        await Promise.all(closePromises)
+      }
+      this.sessions.clear()
+    }
+
+    log("[tmux-session-manager] cleanup complete")
+  }
+}
--- a/src/features/tmux-subagent/pane-state-querier.ts
+++ b/src/features/tmux-subagent/pane-state-querier.ts
@@ -0,0 +1,73 @@
+import { spawn } from "bun"
+import type { WindowState, TmuxPaneInfo } from "./types"
+import { getTmuxPath } from "../../tools/interactive-bash/utils"
+import { log } from "../../shared"
+
+export async function queryWindowState(sourcePaneId: string): Promise<WindowState | null> {
+  const tmux = await getTmuxPath()
+  if (!tmux) return null
+
+  const proc = spawn(
+    [
+      tmux,
+      "list-panes",
+      "-t",
+      sourcePaneId,
+      "-F",
+      "#{pane_id},#{pane_width},#{pane_height},#{pane_left},#{pane_top},#{pane_title},#{pane_active},#{window_width},#{window_height}",
+    ],
+    { stdout: "pipe", stderr: "pipe" }
+  )
+
+  const exitCode = await proc.exited
+  const stdout = await new Response(proc.stdout).text()
+
+  if (exitCode !== 0) {
+    log("[pane-state-querier] list-panes failed", { exitCode })
+    return null
+  }
+
+  const lines = stdout.trim().split("\n").filter(Boolean)
+  if (lines.length === 0) return null
+
+  let windowWidth = 0
+  let windowHeight = 0
+  const panes: TmuxPaneInfo[] = []
+
+  for (const line of lines) {
+    const [paneId, widthStr, heightStr, leftStr, topStr, title, activeStr, windowWidthStr, windowHeightStr] = line.split(",")
+    const width = parseInt(widthStr, 10)
+    const height = parseInt(heightStr, 10)
+    const left = parseInt(leftStr, 10)
+    const top = parseInt(topStr, 10)
+    const isActive = activeStr === "1"
+    windowWidth = parseInt(windowWidthStr, 10)
+    windowHeight = parseInt(windowHeightStr, 10)
+
+    if (!isNaN(width) && !isNaN(left) && !isNaN(height) && !isNaN(top)) {
+      panes.push({ paneId, width, height, left, top, title, isActive })
+    }
+  }
+
+  panes.sort((a, b) => a.left - b.left || a.top - b.top)
+
+  const mainPane = panes.find((p) => p.paneId === sourcePaneId)
+  if (!mainPane) {
+    log("[pane-state-querier] CRITICAL: sourcePaneId not found in panes", {
+      sourcePaneId,
+      availablePanes: panes.map((p) => p.paneId),
+    })
+    return null
+  }
+
+  const agentPanes = panes.filter((p) => p.paneId !== mainPane.paneId)
+
+  log("[pane-state-querier] window state", {
+    windowWidth,
+    windowHeight,
+    mainPane: mainPane.paneId,
+    agentPaneCount: agentPanes.length,
+  })
+
+  return { windowWidth, windowHeight, mainPane, agentPanes }
+}
--- a/src/features/tmux-subagent/types.ts
+++ b/src/features/tmux-subagent/types.ts
@@ -0,0 +1,45 @@
+export interface TrackedSession {
+  sessionId: string
+  paneId: string
+  description: string
+  createdAt: Date
+  lastSeenAt: Date
+}
+
+export const MIN_PANE_WIDTH = 52
+export const MIN_PANE_HEIGHT = 11
+
+export interface TmuxPaneInfo {
+  paneId: string
+  width: number
+  height: number
+  left: number
+  top: number
+  title: string
+  isActive: boolean
+}
+
+export interface WindowState {
+  windowWidth: number
+  windowHeight: number
+  mainPane: TmuxPaneInfo | null
+  agentPanes: TmuxPaneInfo[]
+}
+
+export type SplitDirection = "-h" | "-v"
+
+export type PaneAction =
+  | { type: "close"; paneId: string; sessionId: string }
+  | { type: "spawn"; sessionId: string; description: string; targetPaneId: string; splitDirection: SplitDirection }
+  | { type: "replace"; paneId: string; oldSessionId: string; newSessionId: string; description: string }
+
+export interface SpawnDecision {
+  canSpawn: boolean
+  actions: PaneAction[]
+  reason?: string
+}
+
+export interface CapacityConfig {
+  mainPaneMinWidth: number
+  agentPaneWidth: number
+}
--- a/src/hooks/AGENTS.md
+++ b/src/hooks/AGENTS.md
@@ -1,16 +1,14 @@
 # HOOKS KNOWLEDGE BASE

 ## OVERVIEW
-
-31 lifecycle hooks intercepting/modifying agent behavior. Events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, onSummarize.
+32 lifecycle hooks intercepting/modifying agent behavior. Events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, onSummarize.

 ## STRUCTURE
-
 ```
 hooks/
-├── atlas/                      # Main orchestration (773 lines)
-├── anthropic-context-window-limit-recovery/  # Auto-summarize
-├── todo-continuation-enforcer.ts # Force TODO completion
+├── atlas/                      # Main orchestration (752 lines)
+├── anthropic-context-window-limit-recovery/ # Auto-summarize
+├── todo-continuation-enforcer.ts # Force TODO completion (16k lines)
 ├── ralph-loop/                 # Self-referential dev loop
 ├── claude-code-hooks/          # settings.json compat layer - see AGENTS.md
 ├── comment-checker/            # Prevents AI slop
@@ -28,44 +26,61 @@ hooks/
 ├── prometheus-md-only/         # Planner read-only mode
 ├── agent-usage-reminder/       # Specialized agent hints
 ├── auto-update-checker/        # Plugin update check
-└── tool-output-truncator.ts    # Prevents context bloat
+├── tool-output-truncator.ts    # Prevents context bloat
+├── compaction-context-injector/ # Injects context on compaction
+├── delegate-task-retry/        # Retries failed delegations
+├── interactive-bash-session/   # Tmux session management
+├── non-interactive-env/        # Non-TTY environment handling
+├── start-work/                 # Sisyphus work session starter
+├── task-resume-info/           # Resume info for cancelled tasks
+├── question-label-truncator/   # Auto-truncates question labels
+├── category-skill-reminder/    # Reminds of category skills
+├── empty-task-response-detector.ts # Detects empty responses
+├── sisyphus-junior-notepad/    # Sisyphus Junior notepad
+└── index.ts                    # Hook aggregation + registration
 ```

 ## HOOK EVENTS
-
 | Event | Timing | Can Block | Use Case |
 |-------|--------|-----------|----------|
-| PreToolUse | Before tool | Yes | Validate/modify inputs |
-| PostToolUse | After tool | No | Append warnings, truncate |
-| UserPromptSubmit | On prompt | Yes | Keyword detection |
-| Stop | Session idle | No | Auto-continue |
-| onSummarize | Compaction | No | Preserve state |
+| UserPromptSubmit | `chat.message` | Yes | Keyword detection, slash commands |
+| PreToolUse | `tool.execute.before` | Yes | Validate/modify inputs, inject context |
+| PostToolUse | `tool.execute.after` | No | Truncate output, error recovery |
+| Stop | `event` (session.stop) | No | Auto-continue, notifications |
+| onSummarize | Compaction | No | Preserve state, inject summary context |

 ## EXECUTION ORDER
-
-**chat.message**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWork → ralphLoop
-
-**tool.execute.before**: claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → rulesInjector
-
-**tool.execute.after**: editErrorRecovery → delegateTaskRetry → commentChecker → toolOutputTruncator → claudeCodeHooks
+- **UserPromptSubmit**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWork
+- **PreToolUse**: questionLabelTruncator → claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → prometheusMdOnly → sisyphusJuniorNotepad → atlasHook
+- **PostToolUse**: claudeCodeHooks → toolOutputTruncator → contextWindowMonitor → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → emptyTaskResponseDetector → agentUsageReminder → interactiveBashSession → editErrorRecovery → delegateTaskRetry → atlasHook → taskResumeInfo

 ## HOW TO ADD
-
 1. Create `src/hooks/name/` with `index.ts` exporting `createMyHook(ctx)`
 2. Add hook name to `HookNameSchema` in `src/config/schema.ts`
-3. Register in `src/index.ts`:
-   ```typescript
-   const myHook = isHookEnabled("my-hook") ? createMyHook(ctx) : null
-   ```
+3. Register in `src/index.ts` and add to relevant lifecycle methods

-## PATTERNS
+## HOOK PATTERNS

- **Session-scoped state**: `Map<sessionID, Set<string>>`
- **Conditional execution**: Check `input.tool` before processing
- **Output modification**: `output.output += "\n${REMINDER}"`
+**Simple Single-Event**:
+```typescript
+export function createToolOutputTruncatorHook(ctx) {
+  return { "tool.execute.after": async (input, output) => { ... } }
+}
+```
+
+**Multi-Event with State**:
+```typescript
+export function createThinkModeHook() {
+  const state = new Map<string, ThinkModeState>()
+  return {
+    "chat.params": async (output, sessionID) => { ... },
+    "event": async ({ event }) => { /* cleanup */ }
+  }
+}
+```

 ## ANTI-PATTERNS
-
 - **Blocking non-critical**: Use PostToolUse warnings instead
- **Heavy computation**: Keep PreToolUse light
- **Redundant injection**: Track injected files
+- **Heavy computation**: Keep PreToolUse light to avoid latency
+- **Redundant injection**: Track injected files to avoid context bloat
+- **Direct state mutation**: Use `output.output +=` instead of replacing
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -123,7 +123,7 @@ describe("atlas hook", () => {
     test("should append standalone verification when no boulder state but caller is Atlas", async () => {
       // #given - no boulder state, but caller is Atlas
       const sessionID = "session-no-boulder-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const hook = createAtlasHook(createMockPluginInput())
      const output = {
@@ -141,7 +141,7 @@ describe("atlas hook", () => {
      // #then - standalone verification reminder appended
      expect(output.output).toContain("Task completed successfully")
      expect(output.output).toContain("MANDATORY:")
-      expect(output.output).toContain("delegate_task(resume=")
+      expect(output.output).toContain("delegate_task(session_id=")
      
      cleanupMessageStorage(sessionID)
    })
@@ -149,7 +149,7 @@ describe("atlas hook", () => {
     test("should transform output when caller is Atlas with boulder state", async () => {
       // #given - Atlas caller with boulder state
       const sessionID = "session-transform-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [x] Task 2")
@@ -180,7 +180,7 @@ describe("atlas hook", () => {
      expect(output.output).toContain("SUBAGENT WORK COMPLETED")
      expect(output.output).toContain("test-plan")
      expect(output.output).toContain("LIE")
-      expect(output.output).toContain("delegate_task(resume=")
+      expect(output.output).toContain("delegate_task(session_id=")
      
      cleanupMessageStorage(sessionID)
    })
@@ -188,7 +188,7 @@ describe("atlas hook", () => {
     test("should still transform when plan is complete (shows progress)", async () => {
       // #given - boulder state with complete plan, Atlas caller
       const sessionID = "session-complete-plan-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "complete-plan.md")
      writeFileSync(planPath, "# Plan\n- [x] Task 1\n- [x] Task 2")
@@ -225,7 +225,7 @@ describe("atlas hook", () => {
     test("should append session ID to boulder state if not present", async () => {
       // #given - boulder state without session-append-test, Atlas caller
       const sessionID = "session-append-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -261,7 +261,7 @@ describe("atlas hook", () => {
     test("should not duplicate existing session ID", async () => {
       // #given - boulder state already has session-dup-test, Atlas caller
       const sessionID = "session-dup-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -298,7 +298,7 @@ describe("atlas hook", () => {
     test("should include boulder.json path and notepad path in transformed output", async () => {
       // #given - boulder state, Atlas caller
       const sessionID = "session-path-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "my-feature.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3")
@@ -332,10 +332,10 @@ describe("atlas hook", () => {
      cleanupMessageStorage(sessionID)
    })

-     test("should include resume and checkbox instructions in reminder", async () => {
+     test("should include session_id and checkbox instructions in reminder", async () => {
       // #given - boulder state, Atlas caller
       const sessionID = "session-resume-test"
-       setupMessageStorage(sessionID, "Atlas")
+       setupMessageStorage(sessionID, "atlas")
      
      const planPath = join(TEST_DIR, "test-plan.md")
      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
@@ -361,8 +361,8 @@ describe("atlas hook", () => {
        output
      )

-      // #then - should include resume instructions and verification
-      expect(output.output).toContain("delegate_task(resume=")
+      // #then - should include session_id instructions and verification
+      expect(output.output).toContain("delegate_task(session_id=")
      expect(output.output).toContain("[x]")
      expect(output.output).toContain("MANDATORY:")
      
@@ -373,7 +373,7 @@ describe("atlas hook", () => {
      const ORCHESTRATOR_SESSION = "orchestrator-write-test"

       beforeEach(() => {
-         setupMessageStorage(ORCHESTRATOR_SESSION, "Atlas")
+         setupMessageStorage(ORCHESTRATOR_SESSION, "atlas")
       })

      afterEach(() => {
@@ -444,7 +444,7 @@ describe("atlas hook", () => {
      test("should NOT append reminder when non-orchestrator writes outside .sisyphus/", async () => {
        // #given
        const nonOrchestratorSession = "non-orchestrator-session"
-        setupMessageStorage(nonOrchestratorSession, "Sisyphus-Junior")
+        setupMessageStorage(nonOrchestratorSession, "sisyphus-junior")
        
        const hook = createAtlasHook(createMockPluginInput())
        const originalOutput = "File written successfully"
@@ -601,7 +601,7 @@ describe("atlas hook", () => {
         getMainSessionID: () => MAIN_SESSION_ID,
         subagentSessions: new Set<string>(),
       }))
-       setupMessageStorage(MAIN_SESSION_ID, "Atlas")
+       setupMessageStorage(MAIN_SESSION_ID, "atlas")
     })

    afterEach(() => {
@@ -845,7 +845,7 @@ describe("atlas hook", () => {

       // #given - last agent is NOT Atlas
       cleanupMessageStorage(MAIN_SESSION_ID)
-       setupMessageStorage(MAIN_SESSION_ID, "Sisyphus")
+       setupMessageStorage(MAIN_SESSION_ID, "sisyphus")

       const mockInput = createMockPluginInput()
       const hook = createAtlasHook(mockInput)
--- a/src/hooks/atlas/index.ts
+++ b/src/hooks/atlas/index.ts
@@ -11,6 +11,7 @@ import { getMainSessionID, subagentSessions } from "../../features/claude-code-s
 import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { log } from "../../shared/logger"
 import { createSystemDirective, SYSTEM_DIRECTIVE_PREFIX, SystemDirectiveTypes } from "../../shared/system-directive"
+import { isCallerOrchestrator, getMessageDir } from "../../shared/session-utils"
 import type { BackgroundManager } from "../../features/background-agent"

 export const HOOK_NAME = "atlas"
@@ -179,13 +180,13 @@ If you were NOT given **exactly ONE atomic task**, you MUST:
 `

 function buildVerificationReminder(sessionId: string): string {
-  return `${VERIFICATION_REMINDER}
+   return `${VERIFICATION_REMINDER}

 ---

 **If ANY verification fails, use this immediately:**
 \`\`\`
-delegate_task(resume="${sessionId}", prompt="fix: [describe the specific failure]")
+delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
 \`\`\``
 }

@@ -380,28 +381,6 @@ interface ToolExecuteAfterOutput {
  metadata: Record<string, unknown>
 }

-function getMessageDir(sessionID: string): string | null {
-  if (!existsSync(MESSAGE_STORAGE)) return null
-
-  const directPath = join(MESSAGE_STORAGE, sessionID)
-  if (existsSync(directPath)) return directPath
-
-  for (const dir of readdirSync(MESSAGE_STORAGE)) {
-    const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
-  }
-
-  return null
-}
-
-function isCallerOrchestrator(sessionID?: string): boolean {
-   if (!sessionID) return false
-   const messageDir = getMessageDir(sessionID)
-   if (!messageDir) return false
-   const nearest = findNearestMessageWithFields(messageDir)
-   return nearest?.agent?.toLowerCase() === "atlas"
- }
-
 interface SessionState {
  lastEventWasAbortError?: boolean
  lastContinuationInjectedAt?: number
@@ -498,7 +477,7 @@ export function createAtlasHook(
       await ctx.client.session.prompt({
         path: { id: sessionID },
         body: {
-            agent: "Atlas",
+            agent: "atlas",
           ...(model !== undefined ? { model } : {}),
           parts: [{ type: "text", text: prompt }],
         },
@@ -672,7 +651,7 @@ export function createAtlasHook(
      if (input.tool === "delegate_task") {
        const prompt = output.args.prompt as string | undefined
        if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
-          output.args.prompt = prompt + `\n<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>`
+          output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
          log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, {
            sessionID: input.sessionID,
          })
@@ -711,8 +690,8 @@ export function createAtlasHook(
        return
      }

-      const outputStr = output.output && typeof output.output === "string" ? output.output : ""
-      const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task resumed")
+       const outputStr = output.output && typeof output.output === "string" ? output.output : ""
+       const isBackgroundLaunch = outputStr.includes("Background task launched") || outputStr.includes("Background task continued")
      
      if (isBackgroundLaunch) {
        return
--- a/src/hooks/auto-update-checker/index.ts
+++ b/src/hooks/auto-update-checker/index.ts
@@ -6,6 +6,7 @@ import { log } from "../../shared/logger"
 import { getConfigLoadErrors, clearConfigLoadErrors } from "../../shared/config-errors"
 import { runBunInstall } from "../../cli/config-manager"
 import { isModelCacheAvailable } from "../../shared/model-availability"
+import { hasConnectedProvidersCache, updateConnectedProvidersCache } from "../../shared/connected-providers-cache"
 import type { AutoUpdateCheckerOptions } from "./types"

 const SISYPHUS_SPINNER = ["·", "•", "●", "○", "◌", "◦", " "]
@@ -77,6 +78,7 @@ export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdat

        await showConfigErrorsIfAny(ctx)
        await showModelCacheWarningIfNeeded(ctx)
+        await updateAndShowConnectedProvidersCacheStatus(ctx)

        if (localDevVersion) {
          if (showStartupToast) {
@@ -186,6 +188,29 @@ async function showModelCacheWarningIfNeeded(ctx: PluginInput): Promise<void> {
  log("[auto-update-checker] Model cache warning shown")
 }

+async function updateAndShowConnectedProvidersCacheStatus(ctx: PluginInput): Promise<void> {
+  const hadCache = hasConnectedProvidersCache()
+
+  updateConnectedProvidersCache(ctx.client).catch(() => {})
+
+  if (!hadCache) {
+    await ctx.client.tui
+      .showToast({
+        body: {
+          title: "Connected Providers Cache",
+          message: "Building provider cache for first time. Restart OpenCode for full model filtering.",
+          variant: "info" as const,
+          duration: 8000,
+        },
+      })
+      .catch(() => {})
+
+    log("[auto-update-checker] Connected providers cache toast shown (first run)")
+  } else {
+    log("[auto-update-checker] Connected providers cache exists, updating in background")
+  }
+}
+
 async function showConfigErrorsIfAny(ctx: PluginInput): Promise<void> {
  const errors = getConfigLoadErrors()
  if (errors.length === 0) return
--- a/src/hooks/background-compaction/index.ts
+++ b/src/hooks/background-compaction/index.ts
@@ -1,87 +0,0 @@
-import type { BackgroundManager } from "../../features/background-agent"
-
-interface CompactingInput {
-  sessionID: string
-}
-
-interface CompactingOutput {
-  context: string[]
-  prompt?: string
-}
-
-/**
- * Background agent compaction hook - preserves task state during context compaction.
- * 
- * When OpenCode compacts session context to save tokens, this hook injects
- * information about running and recently completed background tasks so the
- * agent doesn't lose awareness of delegated work.
- */
-export function createBackgroundCompactionHook(manager: BackgroundManager) {
-  return {
-    "experimental.session.compacting": async (
-      input: CompactingInput,
-      output: CompactingOutput
-    ): Promise<void> => {
-      const { sessionID } = input
-
-      // Get running tasks for this session
-      const running = manager.getRunningTasks()
-        .filter(t => t.parentSessionID === sessionID)
-        .map(t => ({
-          id: t.id,
-          agent: t.agent,
-          description: t.description,
-          startedAt: t.startedAt,
-        }))
-
-      // Get recently completed tasks (still in memory within 5-min retention)
-      const completed = manager.getCompletedTasks()
-        .filter(t => t.parentSessionID === sessionID)
-        .slice(-10) // Last 10 completed
-        .map(t => ({
-          id: t.id,
-          agent: t.agent,
-          description: t.description,
-          status: t.status,
-        }))
-
-      // Early exit if nothing to preserve
-      if (running.length === 0 && completed.length === 0) return
-
-      const sections: string[] = ["<background-tasks>"]
-
-      // Running tasks section
-      if (running.length > 0) {
-        sections.push("## Running Background Tasks")
-        sections.push("")
-        for (const t of running) {
-          const elapsed = t.startedAt 
-            ? Math.floor((Date.now() - t.startedAt.getTime()) / 1000)
-            : 0
-          sections.push(`- **\`${t.id}\`** (${t.agent}): ${t.description} [${elapsed}s elapsed]`)
-        }
-        sections.push("")
-        sections.push("> **Note:** You WILL be notified when tasks complete.")
-        sections.push("> Do NOT poll - continue productive work.")
-        sections.push("")
-      }
-
-      // Completed tasks section
-      if (completed.length > 0) {
-        sections.push("## Recently Completed Tasks")
-        sections.push("")
-        for (const t of completed) {
-          const statusLabel = t.status === "completed" ? "[DONE]" : t.status === "error" ? "[ERROR]" : "[PENDING]"
-          sections.push(`- ${statusLabel} **\`${t.id}\`**: ${t.description}`)
-        }
-        sections.push("")
-      }
-
-      sections.push("## Retrieval")
-      sections.push('Use `background_output(task_id="<id>")` to retrieve task results.')
-      sections.push("</background-tasks>")
-
-      output.context.push(sections.join("\n"))
-    }
-  }
-}
--- a/src/hooks/category-skill-reminder/index.test.ts
+++ b/src/hooks/category-skill-reminder/index.test.ts
@@ -0,0 +1,346 @@
+import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
+import { createCategorySkillReminderHook } from "./index"
+import { updateSessionAgent, clearSessionAgent, _resetForTesting } from "../../features/claude-code-session-state"
+import * as sharedModule from "../../shared"
+
+describe("category-skill-reminder hook", () => {
+  let logCalls: Array<{ msg: string; data?: unknown }>
+  let logSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    _resetForTesting()
+    logCalls = []
+    logSpy = spyOn(sharedModule, "log").mockImplementation((msg: string, data?: unknown) => {
+      logCalls.push({ msg, data })
+    })
+  })
+
+  afterEach(() => {
+    logSpy?.mockRestore()
+  })
+
+  function createMockPluginInput() {
+    return {
+      client: {
+        tui: {
+          showToast: async () => {},
+        },
+      },
+    } as any
+  }
+
+  describe("target agent detection", () => {
+    test("should inject reminder for sisyphus agent after 3 tool calls", async () => {
+      // #given - sisyphus agent session with multiple tool calls
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "sisyphus-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "file content", metadata: {} }
+
+      // #when - 3 edit tool calls are made
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+      expect(output.output).toContain("delegate_task")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should inject reminder for atlas agent", async () => {
+      // #given - atlas agent session
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "atlas-session"
+      updateSessionAgent(sessionID, "Atlas")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls are made
+      await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "bash", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should inject reminder for sisyphus-junior agent", async () => {
+      // #given - sisyphus-junior agent session
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "junior-session"
+      updateSessionAgent(sessionID, "sisyphus-junior")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls are made
+      await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "write", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should NOT inject reminder for non-target agents", async () => {
+      // #given - librarian agent session (not a target)
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "librarian-session"
+      updateSessionAgent(sessionID, "librarian")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls are made
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+
+      // #then - reminder should NOT be injected
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should detect agent from input.agent when session state is empty", async () => {
+      // #given - no session state, agent provided in input
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "input-agent-session"
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - 3 tool calls with agent in input
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1", agent: "Sisyphus" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2", agent: "Sisyphus" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3", agent: "Sisyphus" }, output)
+
+      // #then - reminder should be injected
+      expect(output.output).toContain("[Category+Skill Reminder]")
+    })
+  })
+
+  describe("delegation tool tracking", () => {
+    test("should NOT inject reminder if delegate_task is used", async () => {
+      // #given - sisyphus agent that uses delegate_task
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "delegation-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - delegate_task is used, then more tool calls
+      await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected (delegation was used)
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should NOT inject reminder if call_omo_agent is used", async () => {
+      // #given - sisyphus agent that uses call_omo_agent
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "omo-agent-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - call_omo_agent is used first
+      await hook["tool.execute.after"]({ tool: "call_omo_agent", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should NOT inject reminder if task tool is used", async () => {
+      // #given - sisyphus agent that uses task tool
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "task-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - task tool is used
+      await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+
+  describe("tool call counting", () => {
+    test("should NOT inject reminder before 3 tool calls", async () => {
+      // #given - sisyphus agent with only 2 tool calls
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "few-calls-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - only 2 tool calls are made
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+
+      // #then - reminder should NOT be injected yet
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should only inject reminder once per session", async () => {
+      // #given - sisyphus agent session
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "once-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output1 = { title: "", output: "result1", metadata: {} }
+      const output2 = { title: "", output: "result2", metadata: {} }
+
+      // #when - 6 tool calls are made (should trigger at 3, not again at 6)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
+
+      // #then - reminder should be in output1 but not output2
+      expect(output1.output).toContain("[Category+Skill Reminder]")
+      expect(output2.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should only count delegatable work tools", async () => {
+      // #given - sisyphus agent with mixed tool calls
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "mixed-tools-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - non-delegatable tools are called (should not count)
+      await hook["tool.execute.after"]({ tool: "lsp_goto_definition", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "lsp_find_references", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "lsp_symbols", sessionID, callID: "3" }, output)
+
+      // #then - reminder should NOT be injected (LSP tools don't count)
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+
+  describe("event handling", () => {
+    test("should reset state on session.deleted event", async () => {
+      // #given - sisyphus agent with reminder already shown
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "delete-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output1 = { title: "", output: "result1", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
+      expect(output1.output).toContain("[Category+Skill Reminder]")
+
+      // #when - session is deleted and new session starts
+      await hook.event({ event: { type: "session.deleted", properties: { info: { id: sessionID } } } })
+
+      const output2 = { title: "", output: "result2", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
+
+      // #then - reminder should be shown again (state was reset)
+      expect(output2.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should reset state on session.compacted event", async () => {
+      // #given - sisyphus agent with reminder already shown
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "compact-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output1 = { title: "", output: "result1", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "1" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output1)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output1)
+      expect(output1.output).toContain("[Category+Skill Reminder]")
+
+      // #when - session is compacted
+      await hook.event({ event: { type: "session.compacted", properties: { sessionID } } })
+
+      const output2 = { title: "", output: "result2", metadata: {} }
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "5" }, output2)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "6" }, output2)
+
+      // #then - reminder should be shown again (state was reset)
+      expect(output2.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+
+  describe("case insensitivity", () => {
+    test("should handle tool names case-insensitively", async () => {
+      // #given - sisyphus agent with mixed case tool names
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "case-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - tool calls with different cases
+      await hook["tool.execute.after"]({ tool: "EDIT", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "Edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+
+      // #then - reminder should be injected (all counted)
+      expect(output.output).toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+
+    test("should handle delegation tool names case-insensitively", async () => {
+      // #given - sisyphus agent using DELEGATE_TASK in uppercase
+      const hook = createCategorySkillReminderHook(createMockPluginInput())
+      const sessionID = "case-delegate-session"
+      updateSessionAgent(sessionID, "Sisyphus")
+
+      const output = { title: "", output: "result", metadata: {} }
+
+      // #when - DELEGATE_TASK in uppercase is used
+      await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
+      await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
+
+      // #then - reminder should NOT be injected (delegation was detected)
+      expect(output.output).not.toContain("[Category+Skill Reminder]")
+
+      clearSessionAgent(sessionID)
+    })
+  })
+})
--- a/src/hooks/category-skill-reminder/index.ts
+++ b/src/hooks/category-skill-reminder/index.ts
@@ -0,0 +1,165 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import { getSessionAgent } from "../../features/claude-code-session-state"
+import { log } from "../../shared"
+
+/**
+ * Target agents that should receive category+skill reminders.
+ * These are orchestrator agents that delegate work to specialized agents.
+ */
+const TARGET_AGENTS = new Set([
+  "sisyphus",
+  "sisyphus-junior",
+  "atlas",
+])
+
+/**
+ * Tools that indicate the agent is doing work that could potentially be delegated.
+ * When these tools are used, we remind the agent about the category+skill system.
+ */
+const DELEGATABLE_WORK_TOOLS = new Set([
+  "edit",
+  "write",
+  "bash",
+  "read",
+  "grep",
+  "glob",
+])
+
+/**
+ * Tools that indicate the agent is already using delegation properly.
+ */
+const DELEGATION_TOOLS = new Set([
+  "delegate_task",
+  "call_omo_agent",
+  "task",
+])
+
+const REMINDER_MESSAGE = `
+[Category+Skill Reminder]
+
+You are an orchestrator agent. Consider whether this work should be delegated:
+
+**DELEGATE when:**
+- UI/Frontend work → category: "visual-engineering", skills: ["frontend-ui-ux"]
+- Complex logic/architecture → category: "ultrabrain"
+- Quick/trivial tasks → category: "quick"
+- Git operations → skills: ["git-master"]
+- Browser automation → skills: ["playwright"] or ["agent-browser"]
+
+**DO IT YOURSELF when:**
+- Gathering context/exploring codebase
+- Simple edits that are part of a larger task you're coordinating
+- Tasks requiring your full context understanding
+
+Example delegation:
+\`\`\`
+delegate_task(
+  category="visual-engineering",
+  load_skills=["frontend-ui-ux"],
+  description="Implement responsive navbar with animations",
+  run_in_background=true
+)
+\`\`\`
+`
+
+interface ToolExecuteInput {
+  tool: string
+  sessionID: string
+  callID: string
+  agent?: string
+}
+
+interface ToolExecuteOutput {
+  title: string
+  output: string
+  metadata: unknown
+}
+
+interface SessionState {
+  delegationUsed: boolean
+  reminderShown: boolean
+  toolCallCount: number
+}
+
+export function createCategorySkillReminderHook(_ctx: PluginInput) {
+  const sessionStates = new Map<string, SessionState>()
+
+  function getOrCreateState(sessionID: string): SessionState {
+    if (!sessionStates.has(sessionID)) {
+      sessionStates.set(sessionID, {
+        delegationUsed: false,
+        reminderShown: false,
+        toolCallCount: 0,
+      })
+    }
+    return sessionStates.get(sessionID)!
+  }
+
+  function isTargetAgent(sessionID: string, inputAgent?: string): boolean {
+    const agent = getSessionAgent(sessionID) ?? inputAgent
+    if (!agent) return false
+    const agentLower = agent.toLowerCase()
+    return TARGET_AGENTS.has(agentLower) || 
+           agentLower.includes("sisyphus") || 
+           agentLower.includes("atlas")
+  }
+
+  const toolExecuteAfter = async (
+    input: ToolExecuteInput,
+    output: ToolExecuteOutput,
+  ) => {
+    const { tool, sessionID } = input
+    const toolLower = tool.toLowerCase()
+
+    if (!isTargetAgent(sessionID, input.agent)) {
+      return
+    }
+
+    const state = getOrCreateState(sessionID)
+
+    if (DELEGATION_TOOLS.has(toolLower)) {
+      state.delegationUsed = true
+      log("[category-skill-reminder] Delegation tool used", { sessionID, tool })
+      return
+    }
+
+    if (!DELEGATABLE_WORK_TOOLS.has(toolLower)) {
+      return
+    }
+
+    state.toolCallCount++
+
+    if (state.toolCallCount >= 3 && !state.delegationUsed && !state.reminderShown) {
+      output.output += REMINDER_MESSAGE
+      state.reminderShown = true
+      log("[category-skill-reminder] Reminder injected", { 
+        sessionID, 
+        toolCallCount: state.toolCallCount 
+      })
+    }
+  }
+
+  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
+    const props = event.properties as Record<string, unknown> | undefined
+
+    if (event.type === "session.deleted") {
+      const sessionInfo = props?.info as { id?: string } | undefined
+      if (sessionInfo?.id) {
+        sessionStates.delete(sessionInfo.id)
+      }
+    }
+
+    if (event.type === "session.compacted") {
+      const sessionID = (props?.sessionID ??
+        (props?.info as { id?: string } | undefined)?.id) as string | undefined
+      if (sessionID) {
+        sessionStates.delete(sessionID)
+      }
+    }
+  }
+
+  return {
+    "tool.execute.after": toolExecuteAfter,
+    event: eventHandler,
+  }
+}
--- a/src/hooks/claude-code-hooks/AGENTS.md
+++ b/src/hooks/claude-code-hooks/AGENTS.md
@@ -1,51 +1,48 @@
 # CLAUDE CODE HOOKS COMPATIBILITY

 ## OVERVIEW
-
-Full Claude Code settings.json hook compatibility. 5 lifecycle events: PreToolUse, PostToolUse, UserPromptSubmit, Stop, PreCompact.
+Full Claude Code `settings.json` hook compatibility layer. Intercepts OpenCode events to execute external scripts/commands defined in Claude Code configuration.

 ## STRUCTURE
-
 ```
 claude-code-hooks/
 ├── index.ts              # Main factory (401 lines)
 ├── config.ts             # Loads ~/.claude/settings.json
-├── config-loader.ts      # Extended config
+├── config-loader.ts      # Extended config (disabledHooks)
 ├── pre-tool-use.ts       # PreToolUse executor
 ├── post-tool-use.ts      # PostToolUse executor
 ├── user-prompt-submit.ts # UserPromptSubmit executor
-├── stop.ts               # Stop hook executor
+├── stop.ts               # Stop hook executor (with active state tracking)
 ├── pre-compact.ts        # PreCompact executor
 ├── transcript.ts         # Tool use recording
-├── tool-input-cache.ts   # Pre→post caching
-├── types.ts              # Hook types
-└── todo.ts               # Todo JSON fix
+├── tool-input-cache.ts   # Pre→post input caching
+└── types.ts              # Hook & IO type definitions
 ```

 ## HOOK LIFECYCLE
-
-| Event | When | Can Block | Context |
-|-------|------|-----------|---------|
-| PreToolUse | Before tool | Yes | sessionId, toolName, toolInput |
-| PostToolUse | After tool | Warn | + toolOutput, transcriptPath |
-| UserPromptSubmit | On message | Yes | sessionId, prompt, parts |
-| Stop | Session idle | inject | sessionId, parentSessionId |
-| PreCompact | Before summarize | No | sessionId |
+| Event | Timing | Can Block | Context Provided |
+|-------|--------|-----------|------------------|
+| PreToolUse | Before tool exec | Yes | sessionId, toolName, toolInput, cwd |
+| PostToolUse | After tool exec | Warn | + toolOutput, transcriptPath |
+| UserPromptSubmit | On message send | Yes | sessionId, prompt, parts, cwd |
+| Stop | Session idle/end | Inject | sessionId, parentSessionId, cwd |
+| PreCompact | Before summarize | No | sessionId, cwd |

 ## CONFIG SOURCES
-
 Priority (highest first):
-1. `.claude/settings.json` (project)
-2. `~/.claude/settings.json` (user)
+1. `.claude/settings.json` (Project-local)
+2. `~/.claude/settings.json` (Global user)

 ## HOOK EXECUTION
-
-1. Hooks loaded from settings.json
-2. Matchers filter by tool name
-3. Commands via subprocess with `$SESSION_ID`, `$TOOL_NAME`
-4. Exit codes: 0=pass, 1=warn, 2=block
+- **Matchers**: Hooks filter by tool name or event type via regex/glob.
+- **Commands**: Executed via subprocess with env vars (`$SESSION_ID`, `$TOOL_NAME`).
+- **Exit Codes**:
+  - `0`: Pass (Success)
+  - `1`: Warn (Continue with system message)
+  - `2`: Block (Abort operation/prompt)

 ## ANTI-PATTERNS
-
- **Heavy PreToolUse**: Runs before EVERY tool call
- **Blocking non-critical**: Use PostToolUse warnings
+- **Heavy PreToolUse**: Runs before EVERY tool; keep logic light to avoid latency.
+- **Blocking non-critical**: Prefer PostToolUse warnings for non-fatal issues.
+- **Direct state mutation**: Use `updatedInput` in PreToolUse instead of side effects.
+- **Ignoring Exit Codes**: Ensure scripts return `2` to properly block sensitive tools.
--- a/src/hooks/compaction-context-injector/index.ts
+++ b/src/hooks/compaction-context-injector/index.ts
@@ -33,7 +33,13 @@ When summarizing this session, you MUST include the following sections in your s
 - Pending items from the original request
 - Follow-up tasks identified during the work

-## 5. MUST NOT Do (Critical Constraints)
+## 5. Active Working Context (For Seamless Continuation)
+- **Files**: Paths of files currently being edited or frequently referenced
+- **Code in Progress**: Key code snippets, function signatures, or data structures under active development
+- **External References**: Documentation URLs, library APIs, or external resources being consulted
+- **State & Variables**: Important variable names, configuration values, or runtime state relevant to ongoing work
+
+## 6. MUST NOT Do (Critical Constraints)
 - Things that were explicitly forbidden
 - Approaches that failed and should not be retried
 - User's explicit restrictions or preferences
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -22,11 +22,14 @@ export { createNonInteractiveEnvHook } from "./non-interactive-env";
 export { createInteractiveBashSessionHook } from "./interactive-bash-session";

 export { createThinkingBlockValidatorHook } from "./thinking-block-validator";
+export { createCategorySkillReminderHook } from "./category-skill-reminder";
 export { createRalphLoopHook, type RalphLoopHook } from "./ralph-loop";
 export { createAutoSlashCommandHook } from "./auto-slash-command";
 export { createEditErrorRecoveryHook } from "./edit-error-recovery";
 export { createPrometheusMdOnlyHook } from "./prometheus-md-only";
+export { createSisyphusJuniorNotepadHook } from "./sisyphus-junior-notepad";
 export { createTaskResumeInfoHook } from "./task-resume-info";
 export { createStartWorkHook } from "./start-work";
 export { createAtlasHook } from "./atlas";
 export { createDelegateTaskRetryHook } from "./delegate-task-retry";
+export { createQuestionLabelTruncatorHook } from "./question-label-truncator";
--- a/src/hooks/keyword-detector/constants.ts
+++ b/src/hooks/keyword-detector/constants.ts
@@ -169,10 +169,10 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
 ## AGENTS / **CATEGORY + SKILLS** UTILIZATION PRINCIPLES (by capability, not by name)
 - **Codebase Exploration**: Spawn exploration agents using BACKGROUND TASKS for file patterns, internal implementations, project structure
 - **Documentation & References**: Use librarian-type agents via BACKGROUND TASKS for API references, examples, external library docs
- **Planning & Strategy**: NEVER plan yourself - ALWAYS spawn a dedicated planning agent for work breakdown
-  - MUST USE PLAN AGENT. MUST USE PLAN AGENT. MUST USE PLAN AGENT.
-  - ALWAYS ASK PLAN AGENT TO WHAT CATEGORY + SKILLS / AGENTS TO LEVERAGE.
-  - IF IMPLEMENT TASK, MUST ADD TODO NOW: "CONSULT WITH PLAN AGENT WITH CATEGORY + SKILLS"
+- **Planning & Strategy**: NEVER plan yourself - ALWAYS spawn the Plan agent for work breakdown
+  - MUST invoke: \`delegate_task(subagent_type="plan", prompt="<gathered context + user request>")\`
+  - In your prompt to the Plan agent, ASK it to recommend which CATEGORY + SKILLS / AGENTS to leverage for implementation.
+  - IF IMPLEMENT TASK, MUST ADD TODO NOW: "Consult Plan agent via delegate_task(subagent_type='plan') for work breakdown with category + skills recommendations"
 - **High-IQ Reasoning**: Leverage specialized agents for architecture decisions, code review, strategic planning
 - **SPECIAL TASKS COVERED WITH CATEGORY + LOAD_SKILLS**: Delegate to specialized agents with category+skills for design and implementation, as following guide:
  - CATEGORY + SKILL GUIDE
@@ -192,7 +192,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
 ## WORKFLOW
 1. Analyze the request and identify required capabilities
 2. Spawn exploration/librarian agents via delegate_task(background=true) in PARALLEL (10+ if needed)
-3. Always Use Plan agent with gathered context to create detailed work breakdown
+3. Spawn Plan agent: \`delegate_task(subagent_type="plan", prompt="<context + request>")\` to create detailed work breakdown
 4. Execute with continuous verification against original requirements

 ## VERIFICATION GUARANTEE (NON-NEGOTIABLE)
@@ -266,9 +266,9 @@ Write these criteria explicitly. Share with user if scope is non-trivial.

 THE USER ASKED FOR X. DELIVER EXACTLY X. NOT A SUBSET. NOT A DEMO. NOT A STARTING POINT.

-1. EXPLORES + LIBRARIANS
-2. GATHER -> PLAN AGENT SPAWN
-3. WORK BY DELEGATING TO ANOTHER AGENTS
+1. EXPLORES + LIBRARIANS (background)
+2. GATHER -> delegate_task(subagent_type="plan", prompt="<context + request>")
+3. WORK BY DELEGATING TO CATEGORY + SKILLS AGENTS

 NOW.

--- a/src/hooks/keyword-detector/index.test.ts
+++ b/src/hooks/keyword-detector/index.test.ts
@@ -419,7 +419,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
    }

    // #when - ultrawork keyword detected with Sisyphus agent
-    await hook["chat.message"]({ sessionID, agent: "Sisyphus" }, output)
+    await hook["chat.message"]({ sessionID, agent: "sisyphus" }, output)

    // #then - should use normal ultrawork message with agent utilization instructions
    const textPart = output.parts.find(p => p.type === "text")
@@ -471,7 +471,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
      message: {} as Record<string, unknown>,
      parts: [{ type: "text", text: "ultrawork implement" }],
    }
-    await hook["chat.message"]({ sessionID: sisyphusSessionID, agent: "Sisyphus" }, sisyphusOutput)
+    await hook["chat.message"]({ sessionID: sisyphusSessionID, agent: "sisyphus" }, sisyphusOutput)

    // #then - each session should have the correct message type
    const prometheusTextPart = prometheusOutput.parts.find(p => p.type === "text")
@@ -492,7 +492,7 @@ describe("keyword-detector agent-specific ultrawork messages", () => {
    const sessionID = "same-session-agent-switch"

    // Simulate: session state was updated to sisyphus (by index.ts updateSessionAgent)
-    updateSessionAgent(sessionID, "Sisyphus")
+    updateSessionAgent(sessionID, "sisyphus")

    const output = {
      message: {} as Record<string, unknown>,
--- a/src/hooks/prometheus-md-only/index.test.ts
+++ b/src/hooks/prometheus-md-only/index.test.ts
@@ -277,7 +277,7 @@ describe("prometheus-md-only", () => {

  describe("with non-Prometheus agent in message storage", () => {
    beforeEach(() => {
-      setupMessageStorage(TEST_SESSION_ID, "Sisyphus")
+      setupMessageStorage(TEST_SESSION_ID, "sisyphus")
    })

    test("should not affect non-Prometheus agents", async () => {
--- a/src/hooks/prometheus-md-only/index.ts
+++ b/src/hooks/prometheus-md-only/index.ts
@@ -89,10 +89,10 @@ export function createPrometheusMdOnlyHook(ctx: PluginInput) {
      const toolName = input.tool

      // Inject read-only warning for task tools called by Prometheus
-      if (TASK_TOOLS.includes(toolName)) {
-        const prompt = output.args.prompt as string | undefined
-        if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
-          output.args.prompt = prompt + PLANNING_CONSULT_WARNING
+       if (TASK_TOOLS.includes(toolName)) {
+         const prompt = output.args.prompt as string | undefined
+         if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
+           output.args.prompt = PLANNING_CONSULT_WARNING + prompt
          log(`[${HOOK_NAME}] Injected read-only planning warning to ${toolName}`, {
            sessionID: input.sessionID,
            tool: toolName,
--- a/src/hooks/question-label-truncator/index.test.ts
+++ b/src/hooks/question-label-truncator/index.test.ts
@@ -0,0 +1,136 @@
+import { describe, it, expect } from "bun:test";
+import { createQuestionLabelTruncatorHook } from "./index";
+
+describe("createQuestionLabelTruncatorHook", () => {
+  const hook = createQuestionLabelTruncatorHook();
+
+  describe("tool.execute.before", () => {
+    it("truncates labels exceeding 30 characters with ellipsis", async () => {
+      // #given
+      const longLabel = "This is a very long label that exceeds thirty characters";
+      const input = { tool: "AskUserQuestion" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Choose an option",
+              options: [
+                { label: longLabel, description: "A long option" },
+              ],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const truncatedLabel = (output.args as any).questions[0].options[0].label;
+      expect(truncatedLabel.length).toBeLessThanOrEqual(30);
+      expect(truncatedLabel).toBe("This is a very long label t...");
+      expect(truncatedLabel.endsWith("...")).toBe(true);
+    });
+
+    it("preserves labels within 30 characters", async () => {
+      // #given
+      const shortLabel = "Short label";
+      const input = { tool: "AskUserQuestion" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Choose an option",
+              options: [
+                { label: shortLabel, description: "A short option" },
+              ],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const resultLabel = (output.args as any).questions[0].options[0].label;
+      expect(resultLabel).toBe(shortLabel);
+    });
+
+    it("handles exactly 30 character labels without truncation", async () => {
+      // #given
+      const exactLabel = "Exactly thirty chars here!!!!!"; // 30 chars
+      expect(exactLabel.length).toBe(30);
+      const input = { tool: "ask_user_question" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Choose",
+              options: [{ label: exactLabel }],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const resultLabel = (output.args as any).questions[0].options[0].label;
+      expect(resultLabel).toBe(exactLabel);
+    });
+
+    it("ignores non-AskUserQuestion tools", async () => {
+      // #given
+      const input = { tool: "Bash" };
+      const output = {
+        args: { command: "echo hello" },
+      };
+      const originalArgs = { ...output.args };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      expect(output.args).toEqual(originalArgs);
+    });
+
+    it("handles multiple questions with multiple options", async () => {
+      // #given
+      const input = { tool: "AskUserQuestion" };
+      const output = {
+        args: {
+          questions: [
+            {
+              question: "Q1",
+              options: [
+                { label: "Very long label number one that needs truncation" },
+                { label: "Short" },
+              ],
+            },
+            {
+              question: "Q2",
+              options: [
+                { label: "Another extremely long label for testing purposes" },
+              ],
+            },
+          ],
+        },
+      };
+
+      // #when
+      await hook["tool.execute.before"]?.(input as any, output as any);
+
+      // #then
+      const q1opts = (output.args as any).questions[0].options;
+      const q2opts = (output.args as any).questions[1].options;
+      
+      expect(q1opts[0].label).toBe("Very long label number one ...");
+      expect(q1opts[0].label.length).toBeLessThanOrEqual(30);
+      expect(q1opts[1].label).toBe("Short");
+      expect(q2opts[0].label).toBe("Another extremely long labe...");
+      expect(q2opts[0].label.length).toBeLessThanOrEqual(30);
+    });
+  });
+});
--- a/src/hooks/question-label-truncator/index.ts
+++ b/src/hooks/question-label-truncator/index.ts
@@ -0,0 +1,61 @@
+const MAX_LABEL_LENGTH = 30;
+
+interface QuestionOption {
+  label: string;
+  description?: string;
+}
+
+interface Question {
+  question: string;
+  header?: string;
+  options: QuestionOption[];
+  multiSelect?: boolean;
+}
+
+interface AskUserQuestionArgs {
+  questions: Question[];
+}
+
+function truncateLabel(label: string, maxLength: number = MAX_LABEL_LENGTH): string {
+  if (label.length <= maxLength) {
+    return label;
+  }
+  return label.substring(0, maxLength - 3) + "...";
+}
+
+function truncateQuestionLabels(args: AskUserQuestionArgs): AskUserQuestionArgs {
+  if (!args.questions || !Array.isArray(args.questions)) {
+    return args;
+  }
+
+  return {
+    ...args,
+    questions: args.questions.map((question) => ({
+      ...question,
+      options: question.options?.map((option) => ({
+        ...option,
+        label: truncateLabel(option.label),
+      })) ?? [],
+    })),
+  };
+}
+
+export function createQuestionLabelTruncatorHook() {
+  return {
+    "tool.execute.before": async (
+      input: { tool: string },
+      output: { args: Record<string, unknown> }
+    ): Promise<void> => {
+      const toolName = input.tool?.toLowerCase();
+
+      if (toolName === "askuserquestion" || toolName === "ask_user_question") {
+        const args = output.args as unknown as AskUserQuestionArgs | undefined;
+
+        if (args?.questions) {
+          const truncatedArgs = truncateQuestionLabels(args);
+          Object.assign(output.args, truncatedArgs);
+        }
+      }
+    },
+  };
+}
--- a/src/hooks/ralph-loop/index.test.ts
+++ b/src/hooks/ralph-loop/index.test.ts
@@ -459,7 +459,7 @@ describe("ralph-loop", () => {
      })
      hook.startLoop("session-123", "Build something", { completionPromise: "COMPLETE" })

-      writeFileSync(transcriptPath, JSON.stringify({ content: "Task done <promise>COMPLETE</promise>" }))
+      writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "Task done <promise>COMPLETE</promise>" } }) + "\n")

      // #when - session goes idle (transcriptPath now derived from sessionID via getTranscriptPath)
      await hook.event({
@@ -703,10 +703,105 @@ describe("ralph-loop", () => {
      expect(promptCalls[0].text).toContain("2/50")
    })

+    test("should NOT detect completion from user message in transcript (issue #622)", async () => {
+      // #given - transcript contains user message with template text that includes completion promise
+      // This reproduces the bug where the RALPH_LOOP_TEMPLATE instructional text
+      // containing `<promise>DONE</promise>` is recorded as a user message and
+      // falsely triggers completion detection
+      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
+      const templateText = `You are starting a Ralph Loop...
+Output <promise>DONE</promise> when fully complete`
+      const userEntry = JSON.stringify({
+        type: "user",
+        timestamp: new Date().toISOString(),
+        content: templateText,
+      })
+      writeFileSync(transcriptPath, userEntry + "\n")
+
+      const hook = createRalphLoopHook(createMockPluginInput(), {
+        getTranscriptPath: () => transcriptPath,
+      })
+      hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
+
+      // #when - session goes idle
+      await hook.event({
+        event: {
+          type: "session.idle",
+          properties: { sessionID: "session-123" },
+        },
+      })
+
+      // #then - loop should CONTINUE (user message completion promise is instructional, not actual)
+      expect(promptCalls.length).toBe(1)
+      expect(hook.getState()?.iteration).toBe(2)
+    })
+
+    test("should NOT detect completion from continuation prompt in transcript (issue #622)", async () => {
+      // #given - transcript contains continuation prompt (also a user message) with completion promise
+      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
+      const continuationText = `RALPH LOOP 2/100
+When FULLY complete, output: <promise>DONE</promise>
+Original task: Build something`
+      const userEntry = JSON.stringify({
+        type: "user",
+        timestamp: new Date().toISOString(),
+        content: continuationText,
+      })
+      writeFileSync(transcriptPath, userEntry + "\n")
+
+      const hook = createRalphLoopHook(createMockPluginInput(), {
+        getTranscriptPath: () => transcriptPath,
+      })
+      hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
+
+      // #when - session goes idle
+      await hook.event({
+        event: {
+          type: "session.idle",
+          properties: { sessionID: "session-123" },
+        },
+      })
+
+      // #then - loop should CONTINUE (continuation prompt text is not actual completion)
+      expect(promptCalls.length).toBe(1)
+      expect(hook.getState()?.iteration).toBe(2)
+    })
+
+    test("should detect completion from tool_result entry in transcript", async () => {
+      // #given - transcript contains a tool_result with completion promise
+      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
+      const toolResultEntry = JSON.stringify({
+        type: "tool_result",
+        timestamp: new Date().toISOString(),
+        tool_name: "write",
+        tool_input: {},
+        tool_output: { output: "Task complete! <promise>DONE</promise>" },
+      })
+      writeFileSync(transcriptPath, toolResultEntry + "\n")
+
+      const hook = createRalphLoopHook(createMockPluginInput(), {
+        getTranscriptPath: () => transcriptPath,
+      })
+      hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
+
+      // #when - session goes idle
+      await hook.event({
+        event: {
+          type: "session.idle",
+          properties: { sessionID: "session-123" },
+        },
+      })
+
+      // #then - loop should complete (tool_result contains actual completion output)
+      expect(promptCalls.length).toBe(0)
+      expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
+      expect(hook.getState()).toBeNull()
+    })
+
    test("should check transcript BEFORE API to optimize performance", async () => {
      // #given - transcript has completion promise
      const transcriptPath = join(TEST_DIR, "transcript.jsonl")
-      writeFileSync(transcriptPath, JSON.stringify({ content: "<promise>DONE</promise>" }))
+      writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "<promise>DONE</promise>" } }) + "\n")
      mockSessionMessages = [
        { info: { role: "assistant" }, parts: [{ type: "text", text: "No promise here" }] },
      ]
@@ -736,7 +831,7 @@ describe("ralph-loop", () => {
      const hook = createRalphLoopHook(createMockPluginInput(), {
        getTranscriptPath: () => transcriptPath,
      })
-      writeFileSync(transcriptPath, JSON.stringify({ content: "<promise>DONE</promise>" }))
+      writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "<promise>DONE</promise>" } }) + "\n")
      hook.startLoop("test-id", "Build API", { ultrawork: true })

      // #when - idle event triggered
@@ -754,7 +849,7 @@ describe("ralph-loop", () => {
      const hook = createRalphLoopHook(createMockPluginInput(), {
        getTranscriptPath: () => transcriptPath,
      })
-      writeFileSync(transcriptPath, JSON.stringify({ content: "<promise>DONE</promise>" }))
+      writeFileSync(transcriptPath, JSON.stringify({ type: "tool_result", tool_name: "write", tool_output: { output: "<promise>DONE</promise>" } }) + "\n")
      hook.startLoop("test-id", "Build API")

      // #when - idle event triggered
--- a/src/hooks/ralph-loop/index.ts
+++ b/src/hooks/ralph-loop/index.ts
@@ -100,7 +100,18 @@ export function createRalphLoopHook(

      const content = readFileSync(transcriptPath, "utf-8")
      const pattern = new RegExp(`<promise>\\s*${escapeRegex(promise)}\\s*</promise>`, "is")
-      return pattern.test(content)
+      const lines = content.split("\n").filter(l => l.trim())
+
+      for (const line of lines) {
+        try {
+          const entry = JSON.parse(line)
+          if (entry.type === "user") continue
+          if (pattern.test(line)) return true
+        } catch {
+          continue
+        }
+      }
+      return false
    } catch {
      return false
    }
--- a/src/hooks/sisyphus-junior-notepad/constants.ts
+++ b/src/hooks/sisyphus-junior-notepad/constants.ts
@@ -0,0 +1,29 @@
+export const HOOK_NAME = "sisyphus-junior-notepad"
+
+export const NOTEPAD_DIRECTIVE = `
+<Work_Context>
+## Notepad Location (for recording learnings)
+NOTEPAD PATH: .sisyphus/notepads/{plan-name}/
+- learnings.md: Record patterns, conventions, successful approaches
+- issues.md: Record problems, blockers, gotchas encountered
+- decisions.md: Record architectural choices and rationales
+- problems.md: Record unresolved issues, technical debt
+
+You SHOULD append findings to notepad files after completing work.
+IMPORTANT: Always APPEND to notepad files - never overwrite or use Edit tool.
+
+## Plan Location (READ ONLY)
+PLAN PATH: .sisyphus/plans/{plan-name}.md
+
+CRITICAL RULE: NEVER MODIFY THE PLAN FILE
+
+The plan file (.sisyphus/plans/*.md) is SACRED and READ-ONLY.
+- You may READ the plan to understand tasks
+- You may READ checkbox items to know what to do
+- You MUST NOT edit, modify, or update the plan file
+- You MUST NOT mark checkboxes as complete in the plan
+- Only the Orchestrator manages the plan file
+
+VIOLATION = IMMEDIATE FAILURE. The Orchestrator tracks plan state.
+</Work_Context>
+`
--- a/src/hooks/sisyphus-junior-notepad/index.ts
+++ b/src/hooks/sisyphus-junior-notepad/index.ts
@@ -0,0 +1,45 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import { isCallerOrchestrator } from "../../shared/session-utils"
+import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
+import { log } from "../../shared/logger"
+import { HOOK_NAME, NOTEPAD_DIRECTIVE } from "./constants"
+
+export * from "./constants"
+
+export function createSisyphusJuniorNotepadHook(ctx: PluginInput) {
+  return {
+    "tool.execute.before": async (
+      input: { tool: string; sessionID: string; callID: string },
+      output: { args: Record<string, unknown>; message?: string }
+    ): Promise<void> => {
+      // 1. Check if tool is delegate_task
+      if (input.tool !== "delegate_task") {
+        return
+      }
+
+      // 2. Check if caller is Atlas (orchestrator)
+      if (!isCallerOrchestrator(input.sessionID)) {
+        return
+      }
+
+      // 3. Get prompt from output.args
+      const prompt = output.args.prompt as string | undefined
+      if (!prompt) {
+        return
+      }
+
+      // 4. Check for double injection
+      if (prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
+        return
+      }
+
+      // 5. Prepend directive
+      output.args.prompt = NOTEPAD_DIRECTIVE + prompt
+
+      // 6. Log injection
+      log(`[${HOOK_NAME}] Injected notepad directive to delegate_task`, {
+        sessionID: input.sessionID,
+      })
+    },
+  }
+}
--- a/src/hooks/start-work/index.test.ts
+++ b/src/hooks/start-work/index.test.ts
@@ -395,7 +395,7 @@ describe("start-work hook", () => {
      )

      // #then
-      expect(updateSpy).toHaveBeenCalledWith("ses-prometheus-to-sisyphus", "Atlas")
+      expect(updateSpy).toHaveBeenCalledWith("ses-prometheus-to-sisyphus", "atlas")
      updateSpy.mockRestore()
    })
  })
--- a/src/hooks/start-work/index.ts
+++ b/src/hooks/start-work/index.ts
@@ -71,7 +71,7 @@ export function createStartWorkHook(ctx: PluginInput) {
        sessionID: input.sessionID,
      })

-      updateSessionAgent(input.sessionID, "Atlas")
+      updateSessionAgent(input.sessionID, "atlas")

      const existingState = readBoulderState(ctx.directory)
      const sessionId = input.sessionID
--- a/src/hooks/task-resume-info/index.ts
+++ b/src/hooks/task-resume-info/index.ts
@@ -16,21 +16,21 @@ function extractSessionId(output: string): string | null {
 }

 export function createTaskResumeInfoHook() {
-  const toolExecuteAfter = async (
-    input: { tool: string; sessionID: string; callID: string },
-    output: { title: string; output: string; metadata: unknown }
-  ) => {
-    if (!TARGET_TOOLS.includes(input.tool)) return
-    if (output.output.startsWith("Error:") || output.output.startsWith("Failed")) return
-    if (output.output.includes("\nto resume:")) return
+   const toolExecuteAfter = async (
+     input: { tool: string; sessionID: string; callID: string },
+     output: { title: string; output: string; metadata: unknown }
+   ) => {
+     if (!TARGET_TOOLS.includes(input.tool)) return
+     if (output.output.startsWith("Error:") || output.output.startsWith("Failed")) return
+     if (output.output.includes("\nto continue:")) return

-    const sessionId = extractSessionId(output.output)
-    if (!sessionId) return
+     const sessionId = extractSessionId(output.output)
+     if (!sessionId) return

-    output.output = output.output.trimEnd() + `\n\nto resume: delegate_task(resume="${sessionId}", prompt="...")`
-  }
+     output.output = output.output.trimEnd() + `\n\nto continue: delegate_task(session_id="${sessionId}", prompt="...")`
+   }

-  return {
-    "tool.execute.after": toolExecuteAfter,
-  }
+   return {
+     "tool.execute.after": toolExecuteAfter,
+   }
 }
--- a/src/hooks/think-mode/index.test.ts
+++ b/src/hooks/think-mode/index.test.ts
@@ -103,7 +103,7 @@ describe("createThinkModeHook integration", () => {
        const hook = createThinkModeHook()
        const input = createMockInput(
          "github-copilot",
-          "gemini-3-pro-preview",
+          "gemini-3-pro",
          "think about this"
        )

@@ -112,7 +112,7 @@ describe("createThinkModeHook integration", () => {

        // #then should upgrade to high variant and inject google thinking config
        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("gemini-3-pro-preview-high")
+        expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
        expect(message.providerOptions).toBeDefined()
        const googleOptions = (
          message.providerOptions as Record<string, unknown>
@@ -125,7 +125,7 @@ describe("createThinkModeHook integration", () => {
        const hook = createThinkModeHook()
        const input = createMockInput(
          "github-copilot",
-          "gemini-3-flash-preview",
+          "gemini-3-flash",
          "ultrathink"
        )

@@ -134,7 +134,7 @@ describe("createThinkModeHook integration", () => {

        // #then should upgrade to high variant
        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("gemini-3-flash-preview-high")
+        expect(input.message.model?.modelID).toBe("gemini-3-flash-high")
        expect(message.providerOptions).toBeDefined()
      })
    })
--- a/src/hooks/think-mode/switcher.test.ts
+++ b/src/hooks/think-mode/switcher.test.ts
@@ -50,7 +50,7 @@ describe("think-mode switcher", () => {
    describe("Gemini models via github-copilot", () => {
      it("should resolve github-copilot Gemini Pro to google config", () => {
        // #given a github-copilot provider with Gemini Pro model
-        const config = getThinkingConfig("github-copilot", "gemini-3-pro-preview")
+        const config = getThinkingConfig("github-copilot", "gemini-3-pro")

        // #then should return google thinking config
        expect(config).not.toBeNull()
@@ -65,7 +65,7 @@ describe("think-mode switcher", () => {
        // #given a github-copilot provider with Gemini Flash model
        const config = getThinkingConfig(
          "github-copilot",
-          "gemini-3-flash-preview"
+          "gemini-3-flash"
        )

        // #then should return google thinking config
@@ -159,11 +159,11 @@ describe("think-mode switcher", () => {

      it("should handle Gemini preview variants", () => {
        // #given Gemini preview model IDs
-        expect(getHighVariant("gemini-3-pro-preview")).toBe(
-          "gemini-3-pro-preview-high"
+        expect(getHighVariant("gemini-3-pro")).toBe(
+          "gemini-3-pro-high"
        )
-        expect(getHighVariant("gemini-3-flash-preview")).toBe(
-          "gemini-3-flash-preview-high"
+        expect(getHighVariant("gemini-3-flash")).toBe(
+          "gemini-3-flash-high"
        )
      })

--- a/src/hooks/think-mode/switcher.ts
+++ b/src/hooks/think-mode/switcher.ts
@@ -89,12 +89,10 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
  // Claude
  "claude-sonnet-4-5": "claude-sonnet-4-5-high",
  "claude-opus-4-5": "claude-opus-4-5-high",
-  // Gemini
-  "gemini-3-pro": "gemini-3-pro-high",
-  "gemini-3-pro-low": "gemini-3-pro-high",
-  "gemini-3-pro-preview": "gemini-3-pro-preview-high",
-  "gemini-3-flash": "gemini-3-flash-high",
-  "gemini-3-flash-preview": "gemini-3-flash-preview-high",
+   // Gemini
+   "gemini-3-pro": "gemini-3-pro-high",
+   "gemini-3-pro-low": "gemini-3-pro-high",
+   "gemini-3-flash": "gemini-3-flash-high",
  // GPT-5
  "gpt-5": "gpt-5-high",
  "gpt-5-mini": "gpt-5-mini-high",
--- a/src/hooks/todo-continuation-enforcer.test.ts
+++ b/src/hooks/todo-continuation-enforcer.test.ts
@@ -835,8 +835,8 @@ describe("todo-continuation-enforcer", () => {

    // OpenCode returns assistant messages with flat modelID/providerID, not nested model object
    const mockMessagesWithAssistant = [
-      { info: { id: "msg-1", role: "user", agent: "Sisyphus", model: { providerID: "openai", modelID: "gpt-5.2" } } },
-      { info: { id: "msg-2", role: "assistant", agent: "Sisyphus", modelID: "gpt-5.2", providerID: "openai" } },
+      { info: { id: "msg-1", role: "user", agent: "sisyphus", model: { providerID: "openai", modelID: "gpt-5.2" } } },
+      { info: { id: "msg-2", role: "assistant", agent: "sisyphus", modelID: "gpt-5.2", providerID: "openai" } },
    ]

    const mockInput = {
@@ -873,4 +873,193 @@ describe("todo-continuation-enforcer", () => {
    expect(promptCalls.length).toBe(1)
    expect(promptCalls[0].model).toEqual({ providerID: "openai", modelID: "gpt-5.2" })
  })
+
+  // ============================================================
+  // COMPACTION AGENT FILTERING TESTS
+  // These tests verify that compaction agent messages are filtered
+  // when resolving agent info, preventing infinite continuation loops
+  // ============================================================
+
+  test("should skip compaction agent messages when resolving agent info", async () => {
+    // #given - session where last message is from compaction agent but previous was Sisyphus
+    const sessionID = "main-compaction-filter"
+    setMainSession(sessionID)
+
+    const mockMessagesWithCompaction = [
+      { info: { id: "msg-1", role: "user", agent: "sisyphus", model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" } } },
+      { info: { id: "msg-2", role: "assistant", agent: "sisyphus", modelID: "claude-sonnet-4-5", providerID: "anthropic" } },
+      { info: { id: "msg-3", role: "assistant", agent: "compaction", modelID: "claude-sonnet-4-5", providerID: "anthropic" } },
+    ]
+
+    const mockInput = {
+      client: {
+        session: {
+          todo: async () => ({
+            data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
+          }),
+          messages: async () => ({ data: mockMessagesWithCompaction }),
+          prompt: async (opts: any) => {
+            promptCalls.push({
+              sessionID: opts.path.id,
+              agent: opts.body.agent,
+              model: opts.body.model,
+              text: opts.body.parts[0].text,
+            })
+            return {}
+          },
+        },
+        tui: { showToast: async () => ({}) },
+      },
+      directory: "/tmp/test",
+    } as any
+
+    const hook = createTodoContinuationEnforcer(mockInput, {
+      backgroundManager: createMockBackgroundManager(false),
+    })
+
+    // #when - session goes idle
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await new Promise(r => setTimeout(r, 2500))
+
+    // #then - continuation uses Sisyphus (skipped compaction agent)
+    expect(promptCalls.length).toBe(1)
+    expect(promptCalls[0].agent).toBe("sisyphus")
+  })
+
+  test("should skip injection when only compaction agent messages exist", async () => {
+    // #given - session with only compaction agent (post-compaction, no prior agent info)
+    const sessionID = "main-only-compaction"
+    setMainSession(sessionID)
+
+    const mockMessagesOnlyCompaction = [
+      { info: { id: "msg-1", role: "assistant", agent: "compaction" } },
+    ]
+
+    const mockInput = {
+      client: {
+        session: {
+          todo: async () => ({
+            data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
+          }),
+          messages: async () => ({ data: mockMessagesOnlyCompaction }),
+          prompt: async (opts: any) => {
+            promptCalls.push({
+              sessionID: opts.path.id,
+              agent: opts.body.agent,
+              model: opts.body.model,
+              text: opts.body.parts[0].text,
+            })
+            return {}
+          },
+        },
+        tui: { showToast: async () => ({}) },
+      },
+      directory: "/tmp/test",
+    } as any
+
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    // #when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await new Promise(r => setTimeout(r, 3000))
+
+    // #then - no continuation (compaction is in default skipAgents)
+    expect(promptCalls).toHaveLength(0)
+  })
+
+  test("should skip injection when prometheus agent is after compaction", async () => {
+    // #given - prometheus session that was compacted
+    const sessionID = "main-prometheus-compacted"
+    setMainSession(sessionID)
+
+    const mockMessagesPrometheusCompacted = [
+      { info: { id: "msg-1", role: "user", agent: "prometheus" } },
+      { info: { id: "msg-2", role: "assistant", agent: "prometheus" } },
+      { info: { id: "msg-3", role: "assistant", agent: "compaction" } },
+    ]
+
+    const mockInput = {
+      client: {
+        session: {
+          todo: async () => ({
+            data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
+          }),
+          messages: async () => ({ data: mockMessagesPrometheusCompacted }),
+          prompt: async (opts: any) => {
+            promptCalls.push({
+              sessionID: opts.path.id,
+              agent: opts.body.agent,
+              model: opts.body.model,
+              text: opts.body.parts[0].text,
+            })
+            return {}
+          },
+        },
+        tui: { showToast: async () => ({}) },
+      },
+      directory: "/tmp/test",
+    } as any
+
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    // #when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await new Promise(r => setTimeout(r, 3000))
+
+    // #then - no continuation (prometheus found after filtering compaction, prometheus is in skipAgents)
+    expect(promptCalls).toHaveLength(0)
+  })
+
+  test("should inject when agent info is undefined but skipAgents is empty", async () => {
+    // #given - session with no agent info but skipAgents is empty
+    const sessionID = "main-no-agent-no-skip"
+    setMainSession(sessionID)
+
+    const mockMessagesNoAgent = [
+      { info: { id: "msg-1", role: "user" } },
+      { info: { id: "msg-2", role: "assistant" } },
+    ]
+
+    const mockInput = {
+      client: {
+        session: {
+          todo: async () => ({
+            data: [{ id: "1", content: "Task 1", status: "pending", priority: "high" }],
+          }),
+          messages: async () => ({ data: mockMessagesNoAgent }),
+          prompt: async (opts: any) => {
+            promptCalls.push({
+              sessionID: opts.path.id,
+              agent: opts.body.agent,
+              model: opts.body.model,
+              text: opts.body.parts[0].text,
+            })
+            return {}
+          },
+        },
+        tui: { showToast: async () => ({}) },
+      },
+      directory: "/tmp/test",
+    } as any
+
+    const hook = createTodoContinuationEnforcer(mockInput, {
+      skipAgents: [],
+    })
+
+    // #when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await new Promise(r => setTimeout(r, 3000))
+
+    // #then - continuation injected (no agents to skip)
+    expect(promptCalls.length).toBe(1)
+  })
 })
--- a/src/hooks/todo-continuation-enforcer.ts
+++ b/src/hooks/todo-continuation-enforcer.ts
@@ -13,7 +13,7 @@ import { createSystemDirective, SystemDirectiveTypes } from "../shared/system-di

 const HOOK_NAME = "todo-continuation-enforcer"

-const DEFAULT_SKIP_AGENTS = ["prometheus"]
+const DEFAULT_SKIP_AGENTS = ["prometheus", "compaction"]

 export interface TodoContinuationEnforcerOptions {
  backgroundManager?: BackgroundManager
@@ -373,6 +373,7 @@ export function createTodoContinuationEnforcer(
      }

      let resolvedInfo: ResolvedMessageInfo | undefined
+      let hasCompactionMessage = false
      try {
        const messagesResp = await ctx.client.session.messages({
          path: { id: sessionID },
@@ -388,6 +389,10 @@ export function createTodoContinuationEnforcer(
        }>
        for (let i = messages.length - 1; i >= 0; i--) {
          const info = messages[i].info
+          if (info?.agent === "compaction") {
+            hasCompactionMessage = true
+            continue
+          }
          if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
            resolvedInfo = {
              agent: info.agent,
@@ -401,11 +406,15 @@ export function createTodoContinuationEnforcer(
        log(`[${HOOK_NAME}] Failed to fetch messages for agent check`, { sessionID, error: String(err) })
      }

-      log(`[${HOOK_NAME}] Agent check`, { sessionID, agentName: resolvedInfo?.agent, skipAgents })
+      log(`[${HOOK_NAME}] Agent check`, { sessionID, agentName: resolvedInfo?.agent, skipAgents, hasCompactionMessage })
      if (resolvedInfo?.agent && skipAgents.includes(resolvedInfo.agent)) {
        log(`[${HOOK_NAME}] Skipped: agent in skipAgents list`, { sessionID, agent: resolvedInfo.agent })
        return
      }
+      if (hasCompactionMessage && !resolvedInfo?.agent) {
+        log(`[${HOOK_NAME}] Skipped: compaction occurred but no agent info resolved`, { sessionID })
+        return
+      }

      startCountdown(sessionID, incompleteCount, todos.length, resolvedInfo)
      return
--- a/Show More
+++ b/Show More