release: v3.6.0

Merge pull request #1721 from edxeth/fix/disable-mcps
fix(mcp): preserve user's enabled:false and apply disabled_mcps to all MCP sources
2026-02-16 15:02:43 +00:00 · 2026-02-16 23:52:24 +09:00 · 2026-02-16 22:59:52 +09:00 · 2026-02-16 22:47:51 +09:00 · 2026-02-16 22:37:27 +09:00 · 2026-02-16 22:37:18 +09:00
221 changed files with 6510 additions and 3020 deletions
--- a/.opencode/command/remove-deadcode.md
+++ b/.opencode/command/remove-deadcode.md
@@ -3,337 +3,216 @@ description: Remove unused code from this project with ultrawork mode, LSP-verif
 ---

 <command-instruction>
-You are a dead code removal specialist. Execute the FULL dead code removal workflow using ultrawork mode.

-Your core weapon: **LSP FindReferences**. If a symbol has ZERO external references, it's dead. Remove it.
+Dead code removal via massively parallel deep agents. You are the ORCHESTRATOR — you scan, verify, batch, then delegate ALL removals to parallel agents.

-## CRITICAL RULES
+<rules>
+- **LSP is law.** Verify with `LspFindReferences(includeDeclaration=false)` before ANY removal decision.
+- **Never remove entry points.** `src/index.ts`, `src/cli/index.ts`, test files, config files, `packages/` — off-limits.
+- **You do NOT remove code yourself.** You scan, verify, batch, then fire deep agents. They do the work.
+</rules>

-1. **LSP is law.** Never guess. Always verify with `LspFindReferences` before removing ANYTHING.
-2. **One removal = one commit.** Every dead code removal gets its own atomic commit.
-3. **Test after every removal.** Run `bun test` after each. If it fails, REVERT and skip.
-4. **Leaf-first order.** Remove deepest unused symbols first, then work up the dependency chain. Removing a leaf may expose new dead code upstream.
-5. **Never remove entry points.** `src/index.ts`, `src/cli/index.ts`, test files, config files, and files in `packages/` are off-limits unless explicitly targeted.
+<false-positive-guards>
+NEVER mark as dead:
+- Symbols in `src/index.ts` or barrel `index.ts` re-exports
+- Symbols referenced in test files (tests are valid consumers)
+- Symbols with `@public` / `@api` JSDoc tags
+- Hook factories (`createXXXHook`), tool factories (`createXXXTool`), agent definitions in `agentSources`
+- Command templates, skill definitions, MCP configs
+- Symbols in `package.json` exports
+</false-positive-guards>

 ---

-## STEP 0: REGISTER TODO LIST (MANDATORY FIRST ACTION)
+## PHASE 1: SCAN — Find Dead Code Candidates

-```
-TodoWrite([
-  {"id": "scan", "content": "PHASE 1: Scan codebase for dead code candidates using LSP + explore agents", "status": "pending", "priority": "high"},
-  {"id": "verify", "content": "PHASE 2: Verify each candidate with LspFindReferences - zero false positives", "status": "pending", "priority": "high"},
-  {"id": "plan", "content": "PHASE 3: Plan removal order (leaf-first dependency order)", "status": "pending", "priority": "high"},
-  {"id": "remove", "content": "PHASE 4: Remove dead code one-by-one (remove -> test -> commit loop)", "status": "pending", "priority": "high"},
-  {"id": "final", "content": "PHASE 5: Final verification - full test suite + build + typecheck", "status": "pending", "priority": "high"}
-])
-```
+Run ALL of these in parallel:

---
+<parallel-scan>

-## PHASE 1: SCAN FOR DEAD CODE CANDIDATES
-
-**Mark scan as in_progress.**
-
-### 1.1: Launch Parallel Explore Agents (ALL BACKGROUND)
-
-Fire ALL simultaneously:
-
-```
-// Agent 1: Find all exported symbols
-task(subagent_type="explore", run_in_background=true,
-  prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
-  List each with: file path, line number, symbol name, export type (named/default).
-  EXCLUDE: src/index.ts root exports, test files.
-  Return as structured list.")
-
-// Agent 2: Find potentially unused files
-task(subagent_type="explore", run_in_background=true,
-  prompt="Find files in src/ that are NOT imported by any other file.
-  Check import/require statements across the entire codebase.
-  EXCLUDE: index.ts files, test files, entry points, config files, .md files.
-  Return list of potentially orphaned files.")
-
-// Agent 3: Find unused imports within files
-task(subagent_type="explore", run_in_background=true,
-  prompt="Find unused imports across src/**/*.ts files.
-  Look for import statements where the imported symbol is never referenced in the file body.
-  Return: file path, line number, imported symbol name.")
-
-// Agent 4: Find functions/variables only used in their own declaration
-task(subagent_type="explore", run_in_background=true,
-  prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
-  to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
-```
-
-### 1.2: Direct AST-Grep Scans (WHILE AGENTS RUN)
-
-```typescript
-// Find unused imports pattern
-ast_grep_search(pattern="import { $NAME } from '$PATH'", lang="typescript", paths=["src/"])
-
-// Find empty export objects
-ast_grep_search(pattern="export {}", lang="typescript", paths=["src/"])
-```
-
-### 1.3: Collect All Results
-
-Collect background agent results. Compile into a master candidate list:
-
-```
-## DEAD CODE CANDIDATES
-
-| # | File | Line | Symbol | Type | Confidence |
-|---|------|------|--------|------|------------|
-| 1 | src/foo.ts | 42 | unusedFunc | function | HIGH |
-| 2 | src/bar.ts | 10 | OldType | type | MEDIUM |
-```
-
-**Mark scan as completed.**
-
---
-
-## PHASE 2: VERIFY WITH LSP (ZERO FALSE POSITIVES)
-
-**Mark verify as in_progress.**
-
-For EVERY candidate from Phase 1, run this verification:
-
-### 2.1: The LSP Verification Protocol
-
-For each candidate symbol:
-
-```typescript
-// Step 1: Find the symbol's exact position
-LspDocumentSymbols(filePath)  // Get line/character of the symbol
-
-// Step 2: Find ALL references across the ENTIRE workspace
-LspFindReferences(filePath, line, character, includeDeclaration=false)
-// includeDeclaration=false → only counts USAGES, not the definition itself
-
-// Step 3: Evaluate
-// 0 references → CONFIRMED DEAD CODE
-// 1+ references → NOT dead, remove from candidate list
-```
-
-### 2.2: False Positive Guards
-
-**NEVER mark as dead code if:**
- Symbol is in `src/index.ts` (package entry point)
- Symbol is in any `index.ts` that re-exports (barrel file check: look if it's re-exported)
- Symbol is referenced in test files (tests are valid consumers)
- Symbol has `@public` or `@api` JSDoc tags
- Symbol is in a file listed in `package.json` exports
- Symbol is a hook factory (`createXXXHook`) registered in `src/index.ts`
- Symbol is a tool factory (`createXXXTool`) registered in tool loading
- Symbol is an agent definition registered in `agentSources`
- File is a command template, skill definition, or MCP config
-
-### 2.3: Build Confirmed Dead Code List
-
-After verification, produce:
-
-```
-## CONFIRMED DEAD CODE (LSP-verified, 0 external references)
-
-| # | File | Line | Symbol | Type | Safe to Remove |
-|---|------|------|--------|------|----------------|
-| 1 | src/foo.ts | 42 | unusedFunc | function | YES |
-```
-
-**If ZERO confirmed dead code found: Report "No dead code found" and STOP.**
-
-**Mark verify as completed.**
-
---
-
-## PHASE 3: PLAN REMOVAL ORDER
-
-**Mark plan as in_progress.**
-
-### 3.1: Dependency Analysis
-
-For each confirmed dead symbol:
-1. Check if removing it would expose other dead code
-2. Check if other dead symbols depend on this one
-3. Build removal dependency graph
-
-### 3.2: Order by Leaf-First
-
-```
-Removal Order:
-1. [Leaf symbols - no other dead code depends on them]
-2. [Intermediate symbols - depended on only by already-removed dead code]
-3. [Dead files - entire files with no live exports]
-```
-
-### 3.3: Register Granular Todos
-
-Create one todo per removal:
-
-```
-TodoWrite([
-  {"id": "remove-1", "content": "Remove unusedFunc from src/foo.ts:42", "status": "pending", "priority": "high"},
-  {"id": "remove-2", "content": "Remove OldType from src/bar.ts:10", "status": "pending", "priority": "high"},
-  // ... one per confirmed dead symbol
-])
-```
-
-**Mark plan as completed.**
-
---
-
-## PHASE 4: ITERATIVE REMOVAL LOOP
-
-**Mark remove as in_progress.**
-
-For EACH dead code item, execute this exact loop:
-
-### 4.1: Pre-Removal Check
-
-```typescript
-// Re-verify it's still dead (previous removals may have changed things)
-LspFindReferences(filePath, line, character, includeDeclaration=false)
-// If references > 0 now → SKIP (previous removal exposed a new consumer)
-```
-
-### 4.2: Remove the Dead Code
-
-Use appropriate tool:
-
-**For unused imports:**
-```typescript
-Edit(filePath, oldString="import { deadSymbol } from '...';\n", newString="")
-// Or if it's one of many imports, remove just the symbol from the import list
-```
-
-**For unused functions/classes/types:**
-```typescript
-// Read the full symbol extent first
-Read(filePath, offset=startLine, limit=endLine-startLine+1)
-// Then remove it
-Edit(filePath, oldString="[full symbol text]", newString="")
-```
-
-**For dead files:**
+**TypeScript strict mode (your primary scanner — run this FIRST):**
 ```bash
-# Only after confirming ZERO imports point to this file
-rm "path/to/dead-file.ts"
+bunx tsc --noEmit --noUnusedLocals --noUnusedParameters 2>&1
+```
+This gives you the definitive list of unused locals, imports, parameters, and types with exact file:line locations.
+
+**Explore agents (fire ALL simultaneously as background):**
+
+```
+task(subagent_type="explore", run_in_background=true, load_skills=[],
+  description="Find orphaned files",
+  prompt="Find files in src/ NOT imported by any other file. Check all import statements. EXCLUDE: index.ts, *.test.ts, entry points, .md, packages/. Return: file paths.")
+
+task(subagent_type="explore", run_in_background=true, load_skills=[],
+  description="Find unused exported symbols",
+  prompt="Find exported functions/types/constants in src/ that are never imported by other files. Cross-reference: for each export, grep the symbol name across src/ — if it only appears in its own file, it's a candidate. EXCLUDE: src/index.ts exports, test files. Return: file path, line, symbol name, export type.")
 ```

-**After removal, also clean up:**
- Remove any imports that were ONLY used by the removed code
- Remove any now-empty import statements
- Fix any trailing whitespace / double blank lines left behind
+</parallel-scan>

-### 4.3: Post-Removal Verification
+Collect all results into a master candidate list.
+
+---
+
+## PHASE 2: VERIFY — LSP Confirmation (Zero False Positives)
+
+For EACH candidate from Phase 1:

 ```typescript
-// 1. LSP diagnostics on changed file
-LspDiagnostics(filePath, severity="error")
-// Must be clean (or only pre-existing errors)
-
-// 2. Run tests
-bash("bun test")
-// Must pass
-
-// 3. Typecheck
-bash("bun run typecheck")
-// Must pass
+LspFindReferences(filePath, line, character, includeDeclaration=false)
+// 0 references → CONFIRMED dead
+// 1+ references → NOT dead, drop from list
 ```

-### 4.4: Handle Failures
+Also apply the false-positive-guards above. Produce a confirmed list:

-If ANY verification fails:
-1. **REVERT** the change immediately (`git checkout -- [file]`)
-2. Mark this removal todo as `cancelled` with note: "Removal caused [error]. Skipped."
-3. Proceed to next item
-
-### 4.5: Commit
-
-```bash
-git add [changed-files]
-git commit -m "refactor: remove unused [symbolType] [symbolName] from [filePath]"
+```
+| # | File | Symbol | Type | Action |
+|---|------|--------|------|--------|
+| 1 | src/foo.ts:42 | unusedFunc | function | REMOVE |
+| 2 | src/bar.ts:10 | OldType | type | REMOVE |
+| 3 | src/baz.ts:7 | ctx | parameter | PREFIX _ |
 ```

-Mark this removal todo as `completed`.
+**Action types:**
+- `REMOVE` — delete the symbol/import/file entirely
+- `PREFIX _` — unused function parameter required by signature → rename to `_paramName`

-### 4.6: Re-scan After Removal
+If ZERO confirmed: report "No dead code found" and STOP.

-After removing a symbol, check if its removal exposed NEW dead code:
- Were there imports that only existed to serve the removed symbol?
- Are there other symbols in the same file now unreferenced?
+---

-If new dead code is found, add it to the removal queue.
+## PHASE 3: BATCH — Group by File for Conflict-Free Parallelism

-**Repeat 4.1-4.6 for every item. Mark remove as completed when done.**
+<batching-rules>
+
+**Goal: maximize parallel agents with ZERO git conflicts.**
+
+1. Group confirmed dead code items by FILE PATH
+2. All items in the SAME file go to the SAME batch (prevents two agents editing the same file)
+3. If a dead FILE (entire file deletion) exists, it's its own batch
+4. Target 5-15 batches. If fewer than 5 items total, use 1 batch per item.
+
+**Example batching:**
+```
+Batch A: [src/hooks/foo/hook.ts — 3 unused imports]
+Batch B: [src/features/bar/manager.ts — 2 unused constants, 1 dead function]
+Batch C: [src/tools/baz/tool.ts — 1 unused param, src/tools/baz/types.ts — 1 unused type]
+Batch D: [src/dead-file.ts — entire file deletion]
+```
+
+Files in the same directory CAN be batched together (they won't conflict as long as no two agents edit the same file). Maximize batch count for parallelism.
+
+</batching-rules>
+
+---
+
+## PHASE 4: EXECUTE — Fire Parallel Deep Agents
+
+For EACH batch, fire a deep agent:
+
+```
+task(
+  category="deep",
+  load_skills=["typescript-programmer", "git-master"],
+  run_in_background=true,
+  description="Remove dead code batch N: [brief description]",
+  prompt="[see template below]"
+)
+```
+
+<agent-prompt-template>
+
+Every deep agent gets this prompt structure (fill in the specifics per batch):
+
+```
+## TASK: Remove dead code from [file list]
+
+## DEAD CODE TO REMOVE
+
+### [file path] line [N]
+- Symbol: `[name]` — [type: unused import / unused constant / unused function / unused parameter / dead file]
+- Action: [REMOVE entirely / REMOVE from import list / PREFIX with _]
+
+### [file path] line [N]
+- ...
+
+## PROTOCOL
+
+1. Read each file to understand exact syntax at the target lines
+2. For each symbol, run LspFindReferences to RE-VERIFY it's still dead (another agent may have changed things)
+3. Apply the change:
+   - Unused import (only symbol in line): remove entire import line
+   - Unused import (one of many): remove only that symbol from the import list
+   - Unused constant/function/type: remove the declaration. Clean up trailing blank lines.
+   - Unused parameter: prefix with `_` (do NOT remove — required by signature)
+   - Dead file: delete with `rm`
+4. After ALL edits in this batch, run: `bun run typecheck`
+5. If typecheck fails: `git checkout -- [files]` and report failure
+6. If typecheck passes: stage ONLY your files and commit:
+   `git add [your-specific-files] && git commit -m "refactor: remove dead code from [brief file list]"`
+7. Report what you removed and the commit hash
+
+## CRITICAL
+- Stage ONLY your batch's files (`git add [specific files]`). NEVER `git add -A` — other agents are working in parallel.
+- If typecheck fails after your edits, REVERT all changes and report. Do not attempt to fix.
+- Pre-existing test failures in other files are expected. Only typecheck matters for your batch.
+```
+
+</agent-prompt-template>
+
+Fire ALL batches simultaneously. Wait for all to complete.

 ---

 ## PHASE 5: FINAL VERIFICATION

-**Mark final as in_progress.**
+After ALL agents complete:

-### 5.1: Full Test Suite
 ```bash
-bun test
+bun run typecheck   # must pass
+bun test            # note any NEW failures vs pre-existing
+bun run build       # must pass
 ```

-### 5.2: Full Typecheck
-```bash
-bun run typecheck
-```
-
-### 5.3: Full Build
-```bash
-bun run build
-```
-
-### 5.4: Summary Report
+Produce summary:

 ```markdown
 ## Dead Code Removal Complete

 ### Removed
-| # | Symbol | File | Type | Commit |
-|---|--------|------|------|--------|
-| 1 | unusedFunc | src/foo.ts | function | abc1234 |
+| # | Symbol | File | Type | Commit | Agent |
+|---|--------|------|------|--------|-------|
+| 1 | unusedFunc | src/foo.ts | function | abc1234 | Batch A |

-### Skipped (caused failures)
+### Skipped (agent reported failure)
 | # | Symbol | File | Reason |
 |---|--------|------|--------|
-| 1 | riskyFunc | src/bar.ts | Test failure: [details] |

 ### Verification
- Tests: PASSED (X/Y passing)
- Typecheck: CLEAN
- Build: SUCCESS
- Total dead code removed: N symbols across M files
+- Typecheck: PASS/FAIL
+- Tests: X passing, Y failing (Z pre-existing)
+- Build: PASS/FAIL
+- Total removed: N symbols across M files
 - Total commits: K atomic commits
+- Parallel agents used: P
 ```

-**Mark final as completed.**
-
 ---

 ## SCOPE CONTROL

-**If $ARGUMENTS is provided**, narrow the scan to the specified scope:
- File path: Only scan that file
- Directory: Only scan that directory
- Symbol name: Only check that specific symbol
- "all" or empty: Full project scan (default)
+If `$ARGUMENTS` is provided, narrow the scan:
+- File path → only that file
+- Directory → only that directory
+- Symbol name → only that symbol
+- `all` or empty → full project scan (default)

 ## ABORT CONDITIONS

-**STOP and report to user if:**
- 3 consecutive removals cause test failures
+STOP and report if:
+- More than 50 candidates found (ask user to narrow scope or confirm proceeding)
 - Build breaks and cannot be fixed by reverting
- More than 50 candidates found (ask user to narrow scope)
-
-## LANGUAGE
-
-Use English for commit messages and technical output.

 </command-instruction>

--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,8 +1,8 @@
 # PROJECT KNOWLEDGE BASE

-**Generated:** 2026-02-10T14:44:00+09:00
-**Commit:** b538806d
-**Branch:** dev
+**Generated:** 2026-02-16T14:58:00+09:00
+**Commit:** 28cd34c3
+**Branch:** fuck-v1.2

 ---

@@ -102,32 +102,32 @@ Oh-My-OpenCode is a **plugin for OpenCode**. You will frequently need to examine

 ## OVERVIEW

-OpenCode plugin (v3.4.0): multi-model agent orchestration with 11 specialized agents (Claude Opus 4.6, GPT-5.3 Codex, Gemini 3 Flash, GLM-4.7, Grok). 41 lifecycle hooks across 7 event types, 25+ tools (LSP, AST-Grep, delegation, task management), full Claude Code compatibility layer. "oh-my-zsh" for OpenCode.
+OpenCode plugin (oh-my-opencode): multi-model agent orchestration with 11 specialized agents, 41 lifecycle hooks across 7 event types, 26 tools (LSP, AST-Grep, delegation, task management), full Claude Code compatibility layer, 4-scope skill loading, background agent concurrency, tmux integration, and 3-tier MCP system. "oh-my-zsh" for OpenCode.

 ## STRUCTURE

 ```
 oh-my-opencode/
 ├── src/
-│   ├── agents/              # 11 AI agents - see src/agents/AGENTS.md
-│   ├── hooks/               # 41 lifecycle hooks - see src/hooks/AGENTS.md
-│   ├── tools/               # 25+ tools - see src/tools/AGENTS.md
-│   ├── features/            # Background agents, skills, CC compat - see src/features/AGENTS.md
-│   ├── shared/              # 84 cross-cutting utilities - see src/shared/AGENTS.md
-│   ├── cli/                 # CLI installer, doctor - see src/cli/AGENTS.md
-│   ├── mcp/                 # Built-in MCPs - see src/mcp/AGENTS.md
-│   ├── config/              # Zod schema - see src/config/AGENTS.md
-│   ├── plugin-handlers/     # Config loading - see src/plugin-handlers/AGENTS.md
+│   ├── agents/              # 11 AI agents — see src/agents/AGENTS.md
+│   ├── hooks/               # 41 lifecycle hooks — see src/hooks/AGENTS.md
+│   ├── tools/               # 26 tools — see src/tools/AGENTS.md
+│   ├── features/            # Background agents, skills, CC compat — see src/features/AGENTS.md
+│   ├── shared/              # Cross-cutting utilities — see src/shared/AGENTS.md
+│   ├── cli/                 # CLI installer, doctor — see src/cli/AGENTS.md
+│   ├── mcp/                 # Built-in MCPs — see src/mcp/AGENTS.md
+│   ├── config/              # Zod schema — see src/config/AGENTS.md
+│   ├── plugin-handlers/     # Config loading pipeline — see src/plugin-handlers/AGENTS.md
 │   ├── plugin/              # Plugin interface composition (21 files)
-│   ├── index.ts             # Main plugin entry (88 lines)
+│   ├── index.ts             # Main plugin entry (106 lines)
 │   ├── create-hooks.ts      # Hook creation coordination (62 lines)
 │   ├── create-managers.ts   # Manager initialization (80 lines)
 │   ├── create-tools.ts      # Tool registry composition (54 lines)
 │   ├── plugin-interface.ts  # Plugin interface assembly (66 lines)
-│   ├── plugin-config.ts     # Config loading orchestration
-│   └── plugin-state.ts      # Model cache state
+│   ├── plugin-config.ts     # Config loading orchestration (180 lines)
+│   └── plugin-state.ts      # Model cache state (12 lines)
 ├── script/                  # build-schema.ts, build-binaries.ts, publish.ts, generate-changelog.ts
-├── packages/                # 7 platform-specific binary packages
+├── packages/                # 11 platform-specific binary packages
 └── dist/                    # Build output (ESM + .d.ts)
 ```

@@ -143,7 +143,7 @@ OhMyOpenCodePlugin(ctx)
  6. createManagers(ctx, config, tmux, cache)  → TmuxSessionManager, BackgroundManager, SkillMcpManager, ConfigHandler
  7. createTools(ctx, config, managers)         → filteredTools, mergedSkills, availableSkills, availableCategories
  8. createHooks(ctx, config, backgroundMgr)   → 41 hooks (core + continuation + skill)
-  9. createPluginInterface(...)                 → tool, chat.params, chat.message, event, tool.execute.before/after
+  9. createPluginInterface(...)                 → 7 OpenCode hook handlers
 10. Return plugin with experimental.session.compacting
 ```

@@ -159,7 +159,7 @@ OhMyOpenCodePlugin(ctx)
 | Add command | `src/features/builtin-commands/` | Add template + register in commands.ts |
 | Config schema | `src/config/schema/` | 21 schema component files, run `bun run build:schema` |
 | Plugin config | `src/plugin-handlers/config-handler.ts` | JSONC loading, merging, migration |
-| Background agents | `src/features/background-agent/` | manager.ts (1646 lines) |
+| Background agents | `src/features/background-agent/` | manager.ts (1701 lines) |
 | Orchestrator | `src/hooks/atlas/` | Main orchestration hook (1976 lines) |
 | Delegation | `src/tools/delegate-task/` | Category routing (constants.ts 569 lines) |
 | Task system | `src/features/claude-tasks/` | Task schema, storage, todo sync |
@@ -174,7 +174,7 @@ OhMyOpenCodePlugin(ctx)

 **Rules:**
 - NEVER write implementation before test
- NEVER delete failing tests - fix the code
+- NEVER delete failing tests — fix the code
 - Test file: `*.test.ts` alongside source (176 test files)
 - BDD comments: `//#given`, `//#when`, `//#then`

@@ -185,7 +185,7 @@ OhMyOpenCodePlugin(ctx)
 - **Build**: `bun build` (ESM) + `tsc --emitDeclarationOnly`
 - **Exports**: Barrel pattern via index.ts
 - **Naming**: kebab-case dirs, `createXXXHook`/`createXXXTool` factories
- **Testing**: BDD comments, 176 test files, 117k+ lines TypeScript
+- **Testing**: BDD comments, 176 test files, 1130 TypeScript files
 - **Temperature**: 0.1 for code agents, max 0.3
 - **Modular architecture**: 200 LOC hard limit per file (prompt strings exempt)

@@ -193,24 +193,24 @@ OhMyOpenCodePlugin(ctx)

 | Category | Forbidden |
 |----------|-----------|
-| Package Manager | npm, yarn - Bun exclusively |
-| Types | @types/node - use bun-types |
-| File Ops | mkdir/touch/rm/cp/mv in code - use bash tool |
-| Publishing | Direct `bun publish` - GitHub Actions only |
-| Versioning | Local version bump - CI manages |
+| Package Manager | npm, yarn — Bun exclusively |
+| Types | @types/node — use bun-types |
+| File Ops | mkdir/touch/rm/cp/mv in code — use bash tool |
+| Publishing | Direct `bun publish` — GitHub Actions only |
+| Versioning | Local version bump — CI manages |
 | Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
 | Error Handling | Empty catch blocks |
 | Testing | Deleting failing tests, writing implementation before test |
-| Agent Calls | Sequential - use `task` parallel |
-| Hook Logic | Heavy PreToolUse - slows every call |
+| Agent Calls | Sequential — use `task` parallel |
+| Hook Logic | Heavy PreToolUse — slows every call |
 | Commits | Giant (3+ files), separate test from impl |
 | Temperature | >0.3 for code agents |
-| Trust | Agent self-reports - ALWAYS verify |
+| Trust | Agent self-reports — ALWAYS verify |
 | Git | `git add -i`, `git rebase -i` (no interactive input) |
 | Git | Skip hooks (--no-verify), force push without request |
-| Bash | `sleep N` - use conditional waits |
-| Bash | `cd dir && cmd` - use workdir parameter |
-| Files | Catch-all utils.ts/helpers.ts - name by purpose |
+| Bash | `sleep N` — use conditional waits |
+| Bash | `cd dir && cmd` — use workdir parameter |
+| Files | Catch-all utils.ts/helpers.ts — name by purpose |

 ## AGENT MODELS

@@ -230,7 +230,7 @@ OhMyOpenCodePlugin(ctx)

 ## OPENCODE PLUGIN API

-Plugin SDK from `@opencode-ai/plugin` (v1.1.19). Plugin = `async (PluginInput) => Hooks`.
+Plugin SDK from `@opencode-ai/plugin`. Plugin = `async (PluginInput) => Hooks`.

 | Hook | Purpose |
 |------|---------|
@@ -283,7 +283,7 @@ bun run build:schema   # Regenerate JSON schema

 | File | Lines | Description |
 |------|-------|-------------|
-| `src/features/background-agent/manager.ts` | 1646 | Task lifecycle, concurrency |
+| `src/features/background-agent/manager.ts` | 1701 | Task lifecycle, concurrency |
 | `src/hooks/anthropic-context-window-limit-recovery/` | 2232 | Multi-strategy context recovery |
 | `src/hooks/claude-code-hooks/` | 2110 | Claude Code settings.json compat |
 | `src/hooks/todo-continuation-enforcer/` | 2061 | Core boulder mechanism |
@@ -293,7 +293,7 @@ bun run build:schema   # Regenerate JSON schema
 | `src/hooks/rules-injector/` | 1604 | Conditional rules injection |
 | `src/hooks/think-mode/` | 1365 | Model/variant switching |
 | `src/hooks/session-recovery/` | 1279 | Auto error recovery |
-| `src/features/builtin-skills/skills/git-master.ts` | 1111 | Git master skill |
+| `src/features/builtin-skills/skills/git-master.ts` | 1112 | Git master skill |
 | `src/tools/delegate-task/constants.ts` | 569 | Category routing configs |

 ## MCP ARCHITECTURE
@@ -313,7 +313,7 @@ Three-tier system:
 ## NOTES

 - **OpenCode**: Requires >= 1.0.150
- **1069 TypeScript files**, 176 test files, 117k+ lines
+- **1130 TypeScript files**, 176 test files, 127k+ lines
 - **Flaky tests**: ralph-loop (CI timeout), session-state (parallel pollution)
 - **Trusted deps**: @ast-grep/cli, @ast-grep/napi, @code-yeongyu/comment-checker
 - **No linter/formatter**: No ESLint, Prettier, or Biome configured
--- a/bun.lock
+++ b/bun.lock
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.5.3",
-        "oh-my-opencode-darwin-x64": "3.5.3",
-        "oh-my-opencode-linux-arm64": "3.5.3",
-        "oh-my-opencode-linux-arm64-musl": "3.5.3",
-        "oh-my-opencode-linux-x64": "3.5.3",
-        "oh-my-opencode-linux-x64-musl": "3.5.3",
-        "oh-my-opencode-windows-x64": "3.5.3",
+        "oh-my-opencode-darwin-arm64": "3.5.5",
+        "oh-my-opencode-darwin-x64": "3.5.5",
+        "oh-my-opencode-linux-arm64": "3.5.5",
+        "oh-my-opencode-linux-arm64-musl": "3.5.5",
+        "oh-my-opencode-linux-x64": "3.5.5",
+        "oh-my-opencode-linux-x64-musl": "3.5.5",
+        "oh-my-opencode-windows-x64": "3.5.5",
      },
    },
  },
@@ -226,19 +226,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.3", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Dq0+PC2dyAqG7c3DUnQmdOkKbKmOsRHwoqgLCQNKN1lTRllF8zbWqp5B+LGKxSPxPqJIPS3mKt+wIR2KvkYJVw=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.5", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-XtcCQ8/iVT6T1B58y0N1oMgOK4beTW8DW98b/ITnINb7b3hNSv5754Af/2Rx67BV0iE0ezC6uXaqz45C7ru1rw=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.3", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Ke45Bv/ygZm3YUSUumIyk647KZ2PFzw30tH597cOpG8MDPGbNVBCM6EKFezcukUPT+gPFVpE1IiGzEkn4JmgZA=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.5", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ReSDqU6jihh7lpGNmEt3REzc5bOcyfv3cMHitpecKq0wRrJoTBI+dgNPk90BLjHobGbhAm0TE8VZ9tqTkivnIQ=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aP5S3DngUhFkNeqYM33Ge6zccCWLzB/O3FLXLFXy/Iws03N8xugw72pnMK6lUbIia9QQBKK7IZBoYm9C79pZ3g=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Zs/ETIxwcWBvw+jdlo8t+3+92oMMaXkFg1ZCuZrBRZOmtPFefdsH5/QEIe2TlNSjfoTwlA7cbpOD6oXgxRVrtg=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-UiD/hVKYZQyX4D5N5SnZT4M5Z/B2SDtJWBW4MibpYSAcPKNCEBKi/5E4hOPxAtTfFGR8tIXFmYZdQJDkVfvluw=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m9r4OW1XhGtm/SvHM3kzpS4pEiI2eIh5Tj+j5hpMW3wu+AqE3F1XGUpu8RgvIpupFo8beimJWDYQujqokReQqg=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-L9kqwzElGkaQ8pgtv1ZjcHARw9LPaU4UEVjzauByTMi+/5Js/PTsNXBggxSRzZfQ8/MNBPSCiA4K10Kc0YjjvA=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-N6ysF5Pr2C1dyC5Dftzp05RJODgL+EYCWcOV59/UCV152cINlOhg80804o+6XTKV/taOAaboYaQwsBKiCs/BNQ=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Z0fVVih/b2dbNeb9DK9oca5dNYCZyPySBRtxRhDXod5d7fJNgIPrvUoEd3SNfkRGORyFB3hGBZ6nqQ6N8+8DEA=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-MOxW1FMTJT3Ze/U2fDedcZUYTFaA9PaKIiqtsBIHOSb+fFgdo51RIuUlKCELN/g9I9dYhw0yP2n9tBMBG6feSg=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.3", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ocWPjRs2sJgN02PJnEIYtqdMVDex1YhEj1FzAU5XIicfzQbgxLh9nz1yhHZzfqGJq69QStU6ofpc5kQpfX1LMg=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.5", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-dWRtPyIdMFQIw1BwVO4PbGqoo0UWs7NES+YJC7BLGv0YnWN7Q2tatmOviSeSgMELeMsWSbDNisEB79jsfShXjA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -74,13 +74,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.5.4",
-    "oh-my-opencode-darwin-x64": "3.5.4",
-    "oh-my-opencode-linux-arm64": "3.5.4",
-    "oh-my-opencode-linux-arm64-musl": "3.5.4",
-    "oh-my-opencode-linux-x64": "3.5.4",
-    "oh-my-opencode-linux-x64-musl": "3.5.4",
-    "oh-my-opencode-windows-x64": "3.5.4"
+    "oh-my-opencode-darwin-arm64": "3.6.0",
+    "oh-my-opencode-darwin-x64": "3.6.0",
+    "oh-my-opencode-linux-arm64": "3.6.0",
+    "oh-my-opencode-linux-arm64-musl": "3.6.0",
+    "oh-my-opencode-linux-x64": "3.6.0",
+    "oh-my-opencode-linux-x64-musl": "3.6.0",
+    "oh-my-opencode-windows-x64": "3.6.0"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.5.4",
+  "version": "3.6.0",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1503,6 +1503,22 @@
      "created_at": "2026-02-14T19:58:19Z",
      "repoId": 1108837393,
      "pullRequestNo": 1845
+    },
+    {
+      "name": "Decrabbityyy",
+      "id": 99632363,
+      "comment_id": 3904649522,
+      "created_at": "2026-02-15T15:07:11Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1864
+    },
+    {
+      "name": "dankochetov",
+      "id": 33990502,
+      "comment_id": 3905398332,
+      "created_at": "2026-02-15T23:17:05Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1870
    }
  ]
 }
--- a/src/AGENTS.md
+++ b/src/AGENTS.md
@@ -5,25 +5,26 @@
 Main plugin entry point and orchestration layer. Plugin initialization, hook registration, tool composition, and lifecycle management.

 ## STRUCTURE
+
 ```
 src/
-├── index.ts                          # Main plugin entry (88 lines) — OhMyOpenCodePlugin factory
+├── index.ts                          # Main plugin entry (106 lines) — OhMyOpenCodePlugin factory
 ├── create-hooks.ts                   # Hook coordination: core, continuation, skill (62 lines)
 ├── create-managers.ts                # Manager initialization: Tmux, Background, SkillMcp, Config (80 lines)
 ├── create-tools.ts                   # Tool registry + skill context composition (54 lines)
 ├── plugin-interface.ts               # Plugin interface assembly — 7 OpenCode hooks (66 lines)
-├── plugin-config.ts                  # Config loading orchestration (user + project merge)
-├── plugin-state.ts                   # Model cache state (context limits, anthropic 1M flag)
-├── agents/                           # 11 AI agents (32 files) - see agents/AGENTS.md
-├── cli/                              # CLI installer, doctor (107+ files) - see cli/AGENTS.md
-├── config/                           # Zod schema (21 component files) - see config/AGENTS.md
-├── features/                         # Background agents, skills, commands (18 dirs) - see features/AGENTS.md
-├── hooks/                            # 41 lifecycle hooks (36 dirs) - see hooks/AGENTS.md
-├── mcp/                              # Built-in MCPs (6 files) - see mcp/AGENTS.md
+├── plugin-config.ts                  # Config loading orchestration (user + project merge, 180 lines)
+├── plugin-state.ts                   # Model cache state (context limits, anthropic 1M flag, 12 lines)
+├── agents/                           # 11 AI agents (32 files) — see agents/AGENTS.md
+├── cli/                              # CLI installer, doctor (107+ files) — see cli/AGENTS.md
+├── config/                           # Zod schema (21 component files) — see config/AGENTS.md
+├── features/                         # Background agents, skills, commands (18 dirs) — see features/AGENTS.md
+├── hooks/                            # 41 lifecycle hooks (36 dirs) — see hooks/AGENTS.md
+├── mcp/                              # Built-in MCPs (6 files) — see mcp/AGENTS.md
 ├── plugin/                           # Plugin interface composition (21 files)
-├── plugin-handlers/                  # Config loading, plan inheritance (15 files) - see plugin-handlers/AGENTS.md
-├── shared/                           # Cross-cutting utilities (84 files) - see shared/AGENTS.md
-└── tools/                            # 25+ tools (14 dirs) - see tools/AGENTS.md
+├── plugin-handlers/                  # Config loading, plan inheritance (15 files) — see plugin-handlers/AGENTS.md
+├── shared/                           # Cross-cutting utilities (96 files) — see shared/AGENTS.md
+└── tools/                            # 26 tools (14 dirs) — see tools/AGENTS.md
 ```

 ## PLUGIN INITIALIZATION (10 steps)
--- a/src/agents/AGENTS.md
+++ b/src/agents/AGENTS.md
@@ -7,36 +7,22 @@
 ## STRUCTURE
 ```
 agents/
-├── sisyphus.ts                 # Main orchestrator (530 lines)
-├── hephaestus.ts               # Autonomous deep worker (624 lines)
-├── oracle.ts                   # Strategic advisor (170 lines)
-├── librarian.ts                # Multi-repo research (328 lines)
-├── explore.ts                  # Fast codebase grep (124 lines)
-├── multimodal-looker.ts        # Media analyzer (58 lines)
+├── sisyphus.ts                 # Main orchestrator (559 lines)
+├── hephaestus.ts               # Autonomous deep worker (651 lines)
+├── oracle.ts                   # Strategic advisor (171 lines)
+├── librarian.ts                # Multi-repo research (329 lines)
+├── explore.ts                  # Fast codebase grep (125 lines)
+├── multimodal-looker.ts        # Media analyzer (59 lines)
 ├── metis.ts                    # Pre-planning analysis (347 lines)
 ├── momus.ts                    # Plan validator (244 lines)
-├── atlas/                      # Master orchestrator
-│   ├── agent.ts                # Atlas factory
-│   ├── default.ts              # Claude-optimized prompt
-│   ├── gpt.ts                  # GPT-optimized prompt
-│   └── utils.ts
-├── prometheus/                 # Planning agent
-│   ├── index.ts
-│   ├── system-prompt.ts        # 6-section prompt assembly
-│   ├── plan-template.ts        # Work plan structure (423 lines)
-│   ├── interview-mode.ts       # Interview flow (335 lines)
-│   ├── plan-generation.ts
-│   ├── high-accuracy-mode.ts
-│   ├── identity-constraints.ts # Identity rules (301 lines)
-│   └── behavioral-summary.ts
-├── sisyphus-junior/            # Delegated task executor
-│   ├── agent.ts
-│   ├── default.ts              # Claude prompt
-│   └── gpt.ts                  # GPT prompt
-├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation (431 lines)
-├── builtin-agents/             # Agent registry (8 files)
+├── atlas/                      # Master orchestrator (agent.ts + default.ts + gpt.ts)
+├── prometheus/                 # Planning agent (8 files, plan-template 423 lines)
+├── sisyphus-junior/            # Delegated task executor (agent.ts + default.ts + gpt.ts)
+├── dynamic-agent-prompt-builder.ts  # Dynamic prompt generation (433 lines)
+├── builtin-agents/             # Agent registry + model resolution
+├── agent-builder.ts            # Agent construction with category merging (51 lines)
 ├── utils.ts                    # Agent creation, model fallback resolution (571 lines)
-├── types.ts                    # AgentModelConfig, AgentPromptMetadata
+├── types.ts                    # AgentModelConfig, AgentPromptMetadata (106 lines)
 └── index.ts                    # Exports
 ```

@@ -78,6 +64,12 @@ agents/
 | Momus | 32k budget tokens | reasoningEffort: "medium" |
 | Sisyphus-Junior | 32k budget tokens | reasoningEffort: "medium" |

+## KEY PROMPT PATTERNS
+
+- **Sisyphus/Hephaestus**: Dynamic prompts via `dynamic-agent-prompt-builder.ts` injecting available tools/skills/categories
+- **Atlas, Sisyphus-Junior**: Model-specific prompts (Claude vs GPT variants)
+- **Prometheus**: 6-section modular prompt (identity → interview → plan-generation → high-accuracy → template → behavioral)
+
 ## HOW TO ADD

 1. Create `src/agents/my-agent.ts` exporting factory + metadata
@@ -85,13 +77,6 @@ agents/
 3. Update `AgentNameSchema` in `src/config/schema/agent-names.ts`
 4. Register in `src/plugin-handlers/agent-config-handler.ts`

-## KEY PATTERNS
-
- **Factory**: `createXXXAgent(model): AgentConfig`
- **Metadata**: `XXX_PROMPT_METADATA` with category, cost, triggers
- **Model-specific prompts**: Atlas, Sisyphus-Junior have GPT vs Claude variants
- **Dynamic prompts**: Sisyphus, Hephaestus use `dynamic-agent-prompt-builder.ts` to inject available tools/skills/categories
-
 ## ANTI-PATTERNS

 - **Trust agent self-reports**: NEVER — always verify outputs
--- a/src/agents/prometheus-prompt.test.ts
+++ b/src/agents/prometheus-prompt.test.ts
@@ -66,7 +66,7 @@ describe("PROMETHEUS_SYSTEM_PROMPT zero human intervention", () => {
    expect(lowerPrompt).toContain("preconditions")
    expect(lowerPrompt).toContain("failure indicators")
    expect(lowerPrompt).toContain("evidence")
-    expect(lowerPrompt).toMatch(/negative scenario/)
+    expect(prompt).toMatch(/negative/i)
  })

  test("should require QA scenario adequacy in self-review checklist", () => {
--- a/src/agents/prometheus/identity-constraints.ts
+++ b/src/agents/prometheus/identity-constraints.ts
@@ -129,7 +129,21 @@ Your ONLY valid output locations are \`.sisyphus/plans/*.md\` and \`.sisyphus/dr

 Example: \`.sisyphus/plans/auth-refactor.md\`

-### 5. SINGLE PLAN MANDATE (CRITICAL)
+### 5. MAXIMUM PARALLELISM PRINCIPLE (NON-NEGOTIABLE)
+
+Your plans MUST maximize parallel execution. This is a core planning quality metric.
+
+**Granularity Rule**: One task = one module/concern = 1-3 files.
+If a task touches 4+ files or 2+ unrelated concerns, SPLIT IT.
+
+**Parallelism Target**: Aim for 5-8 tasks per wave.
+If any wave has fewer than 3 tasks (except the final integration), you under-split.
+
+**Dependency Minimization**: Structure tasks so shared dependencies
+(types, interfaces, configs) are extracted as early Wave-1 tasks,
+unblocking maximum parallelism in subsequent waves.
+
+### 6. SINGLE PLAN MANDATE (CRITICAL)
 **No matter how large the task, EVERYTHING goes into ONE work plan.**

 **NEVER:**
@@ -152,7 +166,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`

 **The plan can have 50+ TODOs. That's OK. ONE PLAN.**

-### 5.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)
+### 6.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)

 <write_protocol>
 **The Write tool OVERWRITES files. It does NOT append.**
@@ -188,7 +202,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`
 - [ ] File already exists with my content? → Use Edit to append, NOT Write
 </write_protocol>

-### 6. DRAFT AS WORKING MEMORY (MANDATORY)
+### 7. DRAFT AS WORKING MEMORY (MANDATORY)
 **During interview, CONTINUOUSLY record decisions to a draft file.**

 **Draft Location**: \`.sisyphus/drafts/{name}.md\`
--- a/src/agents/prometheus/plan-template.ts
+++ b/src/agents/prometheus/plan-template.ts
@@ -70,108 +70,25 @@ Generate plan to: \`.sisyphus/plans/{name}.md\`

 ## Verification Strategy (MANDATORY)

-> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**
->
-> ALL tasks in this plan MUST be verifiable WITHOUT any human action.
-> This is NOT conditional — it applies to EVERY task, regardless of test strategy.
->
-> **FORBIDDEN** — acceptance criteria that require:
-> - "User manually tests..." / "사용자가 직접 테스트..."
-> - "User visually confirms..." / "사용자가 눈으로 확인..."
-> - "User interacts with..." / "사용자가 직접 조작..."
-> - "Ask user to verify..." / "사용자에게 확인 요청..."
-> - ANY step where a human must perform an action
->
-> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.
+> **ZERO HUMAN INTERVENTION** — ALL verification is agent-executed. No exceptions.
+> Acceptance criteria requiring "user manually tests/confirms" are FORBIDDEN.

 ### Test Decision
 - **Infrastructure exists**: [YES/NO]
 - **Automated tests**: [TDD / Tests-after / None]
 - **Framework**: [bun test / vitest / jest / pytest / none]
+- **If TDD**: Each task follows RED (failing test) → GREEN (minimal impl) → REFACTOR

-### If TDD Enabled
+### QA Policy
+Every task MUST include agent-executed QA scenarios (see TODO template below).
+Evidence saved to \`.sisyphus/evidence/task-{N}-{scenario-slug}.{ext}\`.

-Each TODO follows RED-GREEN-REFACTOR:
-
-**Task Structure:**
-1. **RED**: Write failing test first
-   - Test file: \`[path].test.ts\`
-   - Test command: \`bun test [file]\`
-   - Expected: FAIL (test exists, implementation doesn't)
-2. **GREEN**: Implement minimum code to pass
-   - Command: \`bun test [file]\`
-   - Expected: PASS
-3. **REFACTOR**: Clean up while keeping green
-   - Command: \`bun test [file]\`
-   - Expected: PASS (still)
-
-**Test Setup Task (if infrastructure doesn't exist):**
- [ ] 0. Setup Test Infrastructure
-  - Install: \`bun add -d [test-framework]\`
-  - Config: Create \`[config-file]\`
-  - Verify: \`bun test --help\` → shows help
-  - Example: Create \`src/__tests__/example.test.ts\`
-  - Verify: \`bun test\` → 1 test passes
-
-### Agent-Executed QA Scenarios (MANDATORY — ALL tasks)
-
-> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.
-> - **With TDD**: QA scenarios complement unit tests at integration/E2E level
-> - **Without TDD**: QA scenarios are the PRIMARY verification method
->
-> These describe how the executing agent DIRECTLY verifies the deliverable
-> by running it — opening browsers, executing commands, sending API requests.
-> The agent performs what a human tester would do, but automated via tools.
-
-**Verification Tool by Deliverable Type:**
-
-| Type | Tool | How Agent Verifies |
-|------|------|-------------------|
-| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
-| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |
-| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |
-| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |
-| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |
-
-**Each Scenario MUST Follow This Format:**
-
-\`\`\`
-Scenario: [Descriptive name — what user action/flow is being verified]
-  Tool: [Playwright / interactive_bash / Bash]
-  Preconditions: [What must be true before this scenario runs]
-  Steps:
-    1. [Exact action with specific selector/command/endpoint]
-    2. [Next action with expected intermediate state]
-    3. [Assertion with exact expected value]
-  Expected Result: [Concrete, observable outcome]
-  Failure Indicators: [What would indicate failure]
-  Evidence: [Screenshot path / output capture / response body path]
-\`\`\`
-
-**Scenario Detail Requirements:**
- **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
- **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
- **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
- **Timing**: Include wait conditions where relevant (\`Wait for .dashboard (timeout: 10s)\`)
- **Negative Scenarios**: At least ONE failure/error scenario per feature
- **Evidence Paths**: Specific file paths (\`.sisyphus/evidence/task-N-scenario-name.png\`)
-
-**Anti-patterns (NEVER write scenarios like this):**
- ❌ "Verify the login page works correctly"
- ❌ "Check that the API returns the right data"
- ❌ "Test the form validation"
- ❌ "User opens browser and confirms..."
-
-**Write scenarios like this instead:**
- ✅ \`Navigate to /login → Fill input[name="email"] with "test@example.com" → Fill input[name="password"] with "Pass123!" → Click button[type="submit"] → Wait for /dashboard → Assert h1 contains "Welcome"\`
- ✅ \`POST /api/users {"name":"Test","email":"new@test.com"} → Assert status 201 → Assert response.id is UUID → GET /api/users/{id} → Assert name equals "Test"\`
- ✅ \`Run ./cli --config test.yaml → Wait for "Loaded" in stdout → Send "q" → Assert exit code 0 → Assert stdout contains "Goodbye"\`
-
-**Evidence Requirements:**
- Screenshots: \`.sisyphus/evidence/\` for all UI verifications
- Terminal output: Captured for CLI/TUI verifications
- Response bodies: Saved for API verifications
- All evidence referenced by specific file path in acceptance criteria
+| Deliverable Type | Verification Tool | Method |
+|------------------|-------------------|--------|
+| Frontend/UI | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
+| TUI/CLI | interactive_bash (tmux) | Run command, send keystrokes, validate output |
+| API/Backend | Bash (curl) | Send requests, assert status + response fields |
+| Library/Module | Bash (bun/node REPL) | Import, call functions, compare output |

 ---

@@ -181,49 +98,82 @@ Scenario: [Descriptive name — what user action/flow is being verified]

 > Maximize throughput by grouping independent tasks into parallel waves.
 > Each wave completes before the next begins.
+> Target: 5-8 tasks per wave. Fewer than 3 per wave (except final) = under-splitting.

 \`\`\`
-Wave 1 (Start Immediately):
-├── Task 1: [no dependencies]
-└── Task 5: [no dependencies]
+Wave 1 (Start Immediately — foundation + scaffolding):
+├── Task 1: Project scaffolding + config [quick]
+├── Task 2: Design system tokens [quick]
+├── Task 3: Type definitions [quick]
+├── Task 4: Schema definitions [quick]
+├── Task 5: Storage interface + in-memory impl [quick]
+├── Task 6: Auth middleware [quick]
+└── Task 7: Client module [quick]

-Wave 2 (After Wave 1):
-├── Task 2: [depends: 1]
-├── Task 3: [depends: 1]
-└── Task 6: [depends: 5]
+Wave 2 (After Wave 1 — core modules, MAX PARALLEL):
+├── Task 8: Core business logic (depends: 3, 5, 7) [deep]
+├── Task 9: API endpoints (depends: 4, 5) [unspecified-high]
+├── Task 10: Secondary storage impl (depends: 5) [unspecified-high]
+├── Task 11: Retry/fallback logic (depends: 8) [deep]
+├── Task 12: UI layout + navigation (depends: 2) [visual-engineering]
+├── Task 13: API client + hooks (depends: 4) [quick]
+└── Task 14: Telemetry middleware (depends: 5, 10) [unspecified-high]

-Wave 3 (After Wave 2):
-└── Task 4: [depends: 2, 3]
+Wave 3 (After Wave 2 — integration + UI):
+├── Task 15: Main route combining modules (depends: 6, 11, 14) [deep]
+├── Task 16: UI data visualization (depends: 12, 13) [visual-engineering]
+├── Task 17: Deployment config A (depends: 15) [quick]
+├── Task 18: Deployment config B (depends: 15) [quick]
+├── Task 19: Deployment config C (depends: 15) [quick]
+└── Task 20: UI request log + build (depends: 16) [visual-engineering]

-Critical Path: Task 1 → Task 2 → Task 4
-Parallel Speedup: ~40% faster than sequential
+Wave 4 (After Wave 3 — verification):
+├── Task 21: Integration tests (depends: 15) [deep]
+├── Task 22: UI QA - Playwright (depends: 20) [unspecified-high]
+├── Task 23: E2E QA (depends: 21) [deep]
+└── Task 24: Git cleanup + tagging (depends: 21) [git]
+
+Wave FINAL (After ALL tasks — independent review, 4 parallel):
+├── Task F1: Plan compliance audit (oracle)
+├── Task F2: Code quality review (unspecified-high)
+├── Task F3: Real manual QA (unspecified-high)
+└── Task F4: Scope fidelity check (deep)
+
+Critical Path: Task 1 → Task 5 → Task 8 → Task 11 → Task 15 → Task 21 → F1-F4
+Parallel Speedup: ~70% faster than sequential
+Max Concurrent: 7 (Waves 1 & 2)
 \`\`\`

-### Dependency Matrix
+### Dependency Matrix (abbreviated — show ALL tasks in your generated plan)

-| Task | Depends On | Blocks | Can Parallelize With |
-|------|------------|--------|---------------------|
-| 1 | None | 2, 3 | 5 |
-| 2 | 1 | 4 | 3, 6 |
-| 3 | 1 | 4 | 2, 6 |
-| 4 | 2, 3 | None | None (final) |
-| 5 | None | 6 | 1 |
-| 6 | 5 | None | 2, 3 |
+| Task | Depends On | Blocks | Wave |
+|------|------------|--------|------|
+| 1-7 | — | 8-14 | 1 |
+| 8 | 3, 5, 7 | 11, 15 | 2 |
+| 11 | 8 | 15 | 2 |
+| 14 | 5, 10 | 15 | 2 |
+| 15 | 6, 11, 14 | 17-19, 21 | 3 |
+| 21 | 15 | 23, 24 | 4 |
+
+> This is abbreviated for reference. YOUR generated plan must include the FULL matrix for ALL tasks.

 ### Agent Dispatch Summary

-| Wave | Tasks | Recommended Agents |
-|------|-------|-------------------|
-| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
-| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
-| 3 | 4 | final integration task |
+| Wave | # Parallel | Tasks → Agent Category |
+|------|------------|----------------------|
+| 1 | **7** | T1-T4 → \`quick\`, T5 → \`quick\`, T6 → \`quick\`, T7 → \`quick\` |
+| 2 | **7** | T8 → \`deep\`, T9 → \`unspecified-high\`, T10 → \`unspecified-high\`, T11 → \`deep\`, T12 → \`visual-engineering\`, T13 → \`quick\`, T14 → \`unspecified-high\` |
+| 3 | **6** | T15 → \`deep\`, T16 → \`visual-engineering\`, T17-T19 → \`quick\`, T20 → \`visual-engineering\` |
+| 4 | **4** | T21 → \`deep\`, T22 → \`unspecified-high\`, T23 → \`deep\`, T24 → \`git\` |
+| FINAL | **4** | F1 → \`oracle\`, F2 → \`unspecified-high\`, F3 → \`unspecified-high\`, F4 → \`deep\` |

 ---

 ## TODOs

 > Implementation + Test = ONE Task. Never separate.
-> EVERY task MUST have: Recommended Agent Profile + Parallelization info.
+> EVERY task MUST have: Recommended Agent Profile + Parallelization info + QA Scenarios.
+> **A task WITHOUT QA Scenarios is INCOMPLETE. No exceptions.**

 - [ ] 1. [Task Title]

@@ -257,22 +207,15 @@ Parallel Speedup: ~40% faster than sequential

  **Pattern References** (existing code to follow):
  - \`src/services/auth.ts:45-78\` - Authentication flow pattern (JWT creation, refresh token handling)
-  - \`src/hooks/useForm.ts:12-34\` - Form validation pattern (Zod schema + react-hook-form integration)

  **API/Type References** (contracts to implement against):
  - \`src/types/user.ts:UserDTO\` - Response shape for user endpoints
-  - \`src/api/schema.ts:createUserSchema\` - Request validation schema

  **Test References** (testing patterns to follow):
  - \`src/__tests__/auth.test.ts:describe("login")\` - Test structure and mocking patterns

-  **Documentation References** (specs and requirements):
-  - \`docs/api-spec.md#authentication\` - API contract details
-  - \`ARCHITECTURE.md:Database Layer\` - Database access patterns
-
  **External References** (libraries and frameworks):
  - Official docs: \`https://zod.dev/?id=basic-usage\` - Zod validation syntax
-  - Example repo: \`github.com/example/project/src/auth\` - Reference implementation

  **WHY Each Reference Matters** (explain the relevance):
  - Don't just list files - explain what pattern/information the executor should extract
@@ -283,113 +226,60 @@ Parallel Speedup: ~40% faster than sequential

  > **AGENT-EXECUTABLE VERIFICATION ONLY** — No human action permitted.
  > Every criterion MUST be verifiable by running a command or using a tool.
-  > REPLACE all placeholders with actual values from task context.

  **If TDD (tests enabled):**
  - [ ] Test file created: src/auth/login.test.ts
-  - [ ] Test covers: successful login returns JWT token
  - [ ] bun test src/auth/login.test.ts → PASS (3 tests, 0 failures)

-  **Agent-Executed QA Scenarios (MANDATORY — per-scenario, ultra-detailed):**
+  **QA Scenarios (MANDATORY — task is INCOMPLETE without these):**

-  > Write MULTIPLE named scenarios per task: happy path AND failure cases.
-  > Each scenario = exact tool + steps with real selectors/data + evidence path.
-
-  **Example — Frontend/UI (Playwright):**
+  > **This is NOT optional. A task without QA scenarios WILL BE REJECTED.**
+  >
+  > Write scenario tests that verify the ACTUAL BEHAVIOR of what you built.
+  > Minimum: 1 happy path + 1 failure/edge case per task.
+  > Each scenario = exact tool + exact steps + exact assertions + evidence path.
+  >
+  > **The executing agent MUST run these scenarios after implementation.**
+  > **The orchestrator WILL verify evidence files exist before marking task complete.**

  \\\`\\\`\\\`
-  Scenario: Successful login redirects to dashboard
-    Tool: Playwright (playwright skill)
-    Preconditions: Dev server running on localhost:3000, test user exists
+  Scenario: [Happy path — what SHOULD work]
+    Tool: [Playwright / interactive_bash / Bash (curl)]
+    Preconditions: [Exact setup state]
    Steps:
-      1. Navigate to: http://localhost:3000/login
-      2. Wait for: input[name="email"] visible (timeout: 5s)
-      3. Fill: input[name="email"] → "test@example.com"
-      4. Fill: input[name="password"] → "ValidPass123!"
-      5. Click: button[type="submit"]
-      6. Wait for: navigation to /dashboard (timeout: 10s)
-      7. Assert: h1 text contains "Welcome back"
-      8. Assert: cookie "session_token" exists
-      9. Screenshot: .sisyphus/evidence/task-1-login-success.png
-    Expected Result: Dashboard loads with welcome message
-    Evidence: .sisyphus/evidence/task-1-login-success.png
+      1. [Exact action — specific command/selector/endpoint, no vagueness]
+      2. [Next action — with expected intermediate state]
+      3. [Assertion — exact expected value, not "verify it works"]
+    Expected Result: [Concrete, observable, binary pass/fail]
+    Failure Indicators: [What specifically would mean this failed]
+    Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}.{ext}

-  Scenario: Login fails with invalid credentials
-    Tool: Playwright (playwright skill)
-    Preconditions: Dev server running, no valid user with these credentials
+  Scenario: [Failure/edge case — what SHOULD fail gracefully]
+    Tool: [same format]
+    Preconditions: [Invalid input / missing dependency / error state]
    Steps:
-      1. Navigate to: http://localhost:3000/login
-      2. Fill: input[name="email"] → "wrong@example.com"
-      3. Fill: input[name="password"] → "WrongPass"
-      4. Click: button[type="submit"]
-      5. Wait for: .error-message visible (timeout: 5s)
-      6. Assert: .error-message text contains "Invalid credentials"
-      7. Assert: URL is still /login (no redirect)
-      8. Screenshot: .sisyphus/evidence/task-1-login-failure.png
-    Expected Result: Error message shown, stays on login page
-    Evidence: .sisyphus/evidence/task-1-login-failure.png
+      1. [Trigger the error condition]
+      2. [Assert error is handled correctly]
+    Expected Result: [Graceful failure with correct error message/code]
+    Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}-error.{ext}
  \\\`\\\`\\\`

-  **Example — API/Backend (curl):**
-
-  \\\`\\\`\\\`
-  Scenario: Create user returns 201 with UUID
-    Tool: Bash (curl)
-    Preconditions: Server running on localhost:8080
-    Steps:
-      1. curl -s -w "\\n%{http_code}" -X POST http://localhost:8080/api/users \\
-           -H "Content-Type: application/json" \\
-           -d '{"email":"new@test.com","name":"Test User"}'
-      2. Assert: HTTP status is 201
-      3. Assert: response.id matches UUID format
-      4. GET /api/users/{returned-id} → Assert name equals "Test User"
-    Expected Result: User created and retrievable
-    Evidence: Response bodies captured
-
-  Scenario: Duplicate email returns 409
-    Tool: Bash (curl)
-    Preconditions: User with email "new@test.com" already exists
-    Steps:
-      1. Repeat POST with same email
-      2. Assert: HTTP status is 409
-      3. Assert: response.error contains "already exists"
-    Expected Result: Conflict error returned
-    Evidence: Response body captured
-  \\\`\\\`\\\`
-
-  **Example — TUI/CLI (interactive_bash):**
-
-  \\\`\\\`\\\`
-  Scenario: CLI loads config and displays menu
-    Tool: interactive_bash (tmux)
-    Preconditions: Binary built, test config at ./test.yaml
-    Steps:
-      1. tmux new-session: ./my-cli --config test.yaml
-      2. Wait for: "Configuration loaded" in output (timeout: 5s)
-      3. Assert: Menu items visible ("1. Create", "2. List", "3. Exit")
-      4. Send keys: "3" then Enter
-      5. Assert: "Goodbye" in output
-      6. Assert: Process exited with code 0
-    Expected Result: CLI starts, shows menu, exits cleanly
-    Evidence: Terminal output captured
-
-  Scenario: CLI handles missing config gracefully
-    Tool: interactive_bash (tmux)
-    Preconditions: No config file at ./nonexistent.yaml
-    Steps:
-      1. tmux new-session: ./my-cli --config nonexistent.yaml
-      2. Wait for: output (timeout: 3s)
-      3. Assert: stderr contains "Config file not found"
-      4. Assert: Process exited with code 1
-    Expected Result: Meaningful error, non-zero exit
-    Evidence: Error output captured
-  \\\`\\\`\\\`
+  > **Specificity requirements — every scenario MUST use:**
+  > - **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
+  > - **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
+  > - **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
+  > - **Timing**: Wait conditions where relevant (\`timeout: 10s\`)
+  > - **Negative**: At least ONE failure/error scenario per task
+  >
+  > **Anti-patterns (your scenario is INVALID if it looks like this):**
+  > - ❌ "Verify it works correctly" — HOW? What does "correctly" mean?
+  > - ❌ "Check the API returns data" — WHAT data? What fields? What values?
+  > - ❌ "Test the component renders" — WHERE? What selector? What content?
+  > - ❌ Any scenario without an evidence path

  **Evidence to Capture:**
-  - [ ] Screenshots in .sisyphus/evidence/ for UI scenarios
-  - [ ] Terminal output for CLI/TUI scenarios
-  - [ ] Response bodies for API scenarios
  - [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}
+  - [ ] Screenshots for UI, terminal output for CLI, response bodies for API

  **Commit**: YES | NO (groups with N)
  - Message: \`type(scope): desc\`
@@ -398,6 +288,28 @@ Parallel Speedup: ~40% faster than sequential

 ---

+## Final Verification Wave (MANDATORY — after ALL implementation tasks)
+
+> 4 review agents run in PARALLEL. ALL must APPROVE. Rejection → fix → re-run.
+
+- [ ] F1. **Plan Compliance Audit** — \`oracle\`
+  Read the plan end-to-end. For each "Must Have": verify implementation exists (read file, curl endpoint, run command). For each "Must NOT Have": search codebase for forbidden patterns — reject with file:line if found. Check evidence files exist in .sisyphus/evidence/. Compare deliverables against plan.
+  Output: \`Must Have [N/N] | Must NOT Have [N/N] | Tasks [N/N] | VERDICT: APPROVE/REJECT\`
+
+- [ ] F2. **Code Quality Review** — \`unspecified-high\`
+  Run \`tsc --noEmit\` + linter + \`bun test\`. Review all changed files for: \`as any\`/\`@ts-ignore\`, empty catches, console.log in prod, commented-out code, unused imports. Check AI slop: excessive comments, over-abstraction, generic names (data/result/item/temp).
+  Output: \`Build [PASS/FAIL] | Lint [PASS/FAIL] | Tests [N pass/N fail] | Files [N clean/N issues] | VERDICT\`
+
+- [ ] F3. **Real Manual QA** — \`unspecified-high\` (+ \`playwright\` skill if UI)
+  Start from clean state. Execute EVERY QA scenario from EVERY task — follow exact steps, capture evidence. Test cross-task integration (features working together, not isolation). Test edge cases: empty state, invalid input, rapid actions. Save to \`.sisyphus/evidence/final-qa/\`.
+  Output: \`Scenarios [N/N pass] | Integration [N/N] | Edge Cases [N tested] | VERDICT\`
+
+- [ ] F4. **Scope Fidelity Check** — \`deep\`
+  For each task: read "What to do", read actual diff (git log/diff). Verify 1:1 — everything in spec was built (no missing), nothing beyond spec was built (no creep). Check "Must NOT do" compliance. Detect cross-task contamination: Task N touching Task M's files. Flag unaccounted changes.
+  Output: \`Tasks [N/N compliant] | Contamination [CLEAN/N issues] | Unaccounted [CLEAN/N files] | VERDICT\`
+
+---
+
 ## Commit Strategy

 | After Task | Message | Files | Verification |
--- a/src/cli/AGENTS.md
+++ b/src/cli/AGENTS.md
@@ -2,9 +2,7 @@

 ## OVERVIEW

-CLI entry: `bunx oh-my-opencode`. 107+ files with Commander.js + @clack/prompts TUI.
-
-**Commands**: install, run, doctor, get-local-version, mcp-oauth
+CLI entry: `bunx oh-my-opencode`. 107+ files with Commander.js + @clack/prompts TUI. 5 commands: install, run, doctor, get-local-version, mcp-oauth.

 ## STRUCTURE
 ```
@@ -14,20 +12,22 @@ cli/
 ├── install.ts               # TTY routing (TUI or CLI installer)
 ├── cli-installer.ts         # Non-interactive installer (164 lines)
 ├── tui-installer.ts         # Interactive TUI with @clack/prompts (140 lines)
-├── config-manager/          # 17 config utilities
+├── config-manager/          # 20 config utilities
 │   ├── add-plugin-to-opencode-config.ts  # Plugin registration
-│   ├── add-provider-config.ts            # Provider setup
-│   ├── detect-current-config.ts          # Project vs user config
+│   ├── add-provider-config.ts            # Provider setup (Google/Antigravity)
+│   ├── detect-current-config.ts          # Installed providers detection
 │   ├── write-omo-config.ts               # JSONC writing
-│   └── ...
-├── doctor/                  # 14 health checks
-│   ├── runner.ts            # Check orchestration
-│   ├── formatter.ts         # Colored output
-│   └── checks/              # 29 files: auth, config, dependencies, gh, lsp, mcp, opencode, plugin, version, model-resolution (6 sub-checks)
+│   ├── generate-omo-config.ts            # Config generation
+│   ├── jsonc-provider-editor.ts          # JSONC editing
+│   └── ...                               # 14 more utilities
+├── doctor/                  # 4 check categories, 21 check files
+│   ├── runner.ts            # Parallel check execution + result aggregation
+│   ├── formatter.ts         # Colored output (default/status/verbose/JSON)
+│   └── checks/              # system (4), config (1), tools (4), models (6 sub-checks)
 ├── run/                     # Session launcher (24 files)
 │   ├── runner.ts            # Run orchestration (126 lines)
-│   ├── agent-resolver.ts    # Agent selection: flag → env → config → fallback
-│   ├── session-resolver.ts  # Session creation or resume
+│   ├── agent-resolver.ts    # Agent: flag → env → config → Sisyphus
+│   ├── session-resolver.ts  # Session create or resume with retries
 │   ├── event-handlers.ts    # Event processing (125 lines)
 │   ├── completion.ts        # Completion detection
 │   └── poll-for-completion.ts # Polling with timeout
@@ -43,20 +43,17 @@ cli/
 |---------|---------|-----------|
 | `install` | Interactive setup | Provider selection → config generation → plugin registration |
 | `run` | Session launcher | Agent: flag → env → config → Sisyphus. Enforces todo completion. |
-| `doctor` | 14 health checks | installation, config, auth, deps, tools, updates |
+| `doctor` | 4-category health checks | system, config, tools, models (6 sub-checks) |
 | `get-local-version` | Version check | Detects installed, compares with npm latest |
 | `mcp-oauth` | OAuth tokens | login (PKCE flow), logout, status |

-## DOCTOR CHECK CATEGORIES
+## RUN SESSION LIFECYCLE

-| Category | Checks |
-|----------|--------|
-| installation | opencode, plugin |
-| configuration | config validity, Zod, model-resolution (6 sub-checks) |
-| authentication | anthropic, openai, google |
-| dependencies | ast-grep, comment-checker, gh-cli |
-| tools | LSP, MCP, MCP-OAuth |
-| updates | version comparison |
+1. Load config, resolve agent (CLI > env > config > Sisyphus)
+2. Create server connection (port/attach), setup cleanup/signal handlers
+3. Resolve session (create new or resume with retries)
+4. Send prompt, start event processing, poll for completion
+5. Execute on-complete hook, output JSON if requested, cleanup

 ## HOW TO ADD CHECK

--- a/src/cli/snapshots/model-fallback.test.ts.snap
+++ b/src/cli/snapshots/model-fallback.test.ts.snap
@@ -247,7 +247,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
      "model": "opencode/glm-4.7-free",
    },
    "writing": {
-      "model": "openai/gpt-5.2",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -314,7 +314,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
      "model": "opencode/glm-4.7-free",
    },
    "writing": {
-      "model": "openai/gpt-5.2",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -372,6 +372,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -432,6 +433,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -505,6 +507,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -579,6 +582,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -652,6 +656,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
    },
    "visual-engineering": {
      "model": "opencode/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "opencode/gemini-3-flash",
@@ -726,6 +731,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
    },
    "visual-engineering": {
      "model": "opencode/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "opencode/gemini-3-flash",
@@ -799,6 +805,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -873,6 +880,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -927,10 +935,10 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
      "model": "opencode/glm-4.7-free",
    },
    "visual-engineering": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -982,10 +990,10 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
      "model": "opencode/glm-4.7-free",
    },
    "visual-engineering": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -1056,6 +1064,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
    },
    "visual-engineering": {
      "model": "opencode/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "opencode/gemini-3-flash",
@@ -1129,6 +1138,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -1189,8 +1199,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "anthropic/claude-opus-4-6",
-      "variant": "max",
+      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
      "model": "anthropic/claude-sonnet-4-5",
@@ -1256,6 +1265,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -1329,6 +1339,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -1402,6 +1413,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -1476,6 +1488,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
--- a/src/cli/cli-installer.test.ts
+++ b/src/cli/cli-installer.test.ts
@@ -0,0 +1,83 @@
+import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from "bun:test"
+import * as configManager from "./config-manager"
+import { runCliInstaller } from "./cli-installer"
+import type { InstallArgs } from "./types"
+
+describe("runCliInstaller", () => {
+  const mockConsoleLog = mock(() => {})
+  const mockConsoleError = mock(() => {})
+  const originalConsoleLog = console.log
+  const originalConsoleError = console.error
+
+  beforeEach(() => {
+    console.log = mockConsoleLog
+    console.error = mockConsoleError
+    mockConsoleLog.mockClear()
+    mockConsoleError.mockClear()
+  })
+
+  afterEach(() => {
+    console.log = originalConsoleLog
+    console.error = originalConsoleError
+  })
+
+  it("runs auth and provider setup steps when openai or copilot are enabled without gemini", async () => {
+    //#given
+    const addAuthPluginsSpy = spyOn(configManager, "addAuthPlugins").mockResolvedValue({
+      success: true,
+      configPath: "/tmp/opencode.jsonc",
+    })
+    const addProviderConfigSpy = spyOn(configManager, "addProviderConfig").mockReturnValue({
+      success: true,
+      configPath: "/tmp/opencode.jsonc",
+    })
+    const restoreSpies = [
+      addAuthPluginsSpy,
+      addProviderConfigSpy,
+      spyOn(configManager, "detectCurrentConfig").mockReturnValue({
+        isInstalled: false,
+        hasClaude: false,
+        isMax20: false,
+        hasOpenAI: false,
+        hasGemini: false,
+        hasCopilot: false,
+        hasOpencodeZen: false,
+        hasZaiCodingPlan: false,
+        hasKimiForCoding: false,
+      }),
+      spyOn(configManager, "isOpenCodeInstalled").mockResolvedValue(true),
+      spyOn(configManager, "getOpenCodeVersion").mockResolvedValue("1.0.200"),
+      spyOn(configManager, "addPluginToOpenCodeConfig").mockResolvedValue({
+        success: true,
+        configPath: "/tmp/opencode.jsonc",
+      }),
+      spyOn(configManager, "writeOmoConfig").mockReturnValue({
+        success: true,
+        configPath: "/tmp/oh-my-opencode.jsonc",
+      }),
+    ]
+
+    const args: InstallArgs = {
+      tui: false,
+      claude: "no",
+      openai: "yes",
+      gemini: "no",
+      copilot: "yes",
+      opencodeZen: "no",
+      zaiCodingPlan: "no",
+      kimiForCoding: "no",
+    }
+
+    //#when
+    const result = await runCliInstaller(args, "3.4.0")
+
+    //#then
+    expect(result).toBe(0)
+    expect(addAuthPluginsSpy).toHaveBeenCalledTimes(1)
+    expect(addProviderConfigSpy).toHaveBeenCalledTimes(1)
+
+    for (const spy of restoreSpies) {
+      spy.mockRestore()
+    }
+  })
+})
--- a/src/cli/cli-installer.ts
+++ b/src/cli/cli-installer.ts
@@ -77,7 +77,9 @@ export async function runCliInstaller(args: InstallArgs, version: string): Promi
    `Plugin ${isUpdate ? "verified" : "added"} ${SYMBOLS.arrow} ${color.dim(pluginResult.configPath)}`,
  )

-  if (config.hasGemini) {
+  const needsProviderSetup = config.hasGemini || config.hasOpenAI || config.hasCopilot
+
+  if (needsProviderSetup) {
    printStep(step++, totalSteps, "Adding auth plugins...")
    const authResult = await addAuthPlugins(config)
    if (!authResult.success) {
--- a/src/cli/run/completion.ts
+++ b/src/cli/run/completion.ts
@@ -1,5 +1,6 @@
 import pc from "picocolors"
 import type { RunContext, Todo, ChildSession, SessionStatus } from "./types"
+import { normalizeSDKResponse } from "../../shared"

 export async function checkCompletionConditions(ctx: RunContext): Promise<boolean> {
  try {
@@ -20,7 +21,7 @@ export async function checkCompletionConditions(ctx: RunContext): Promise<boolea

 async function areAllTodosComplete(ctx: RunContext): Promise<boolean> {
  const todosRes = await ctx.client.session.todo({ path: { id: ctx.sessionID } })
-  const todos = (todosRes.data ?? []) as Todo[]
+  const todos = normalizeSDKResponse(todosRes, [] as Todo[])

  const incompleteTodos = todos.filter(
    (t) => t.status !== "completed" && t.status !== "cancelled"
@@ -43,7 +44,7 @@ async function fetchAllStatuses(
  ctx: RunContext
 ): Promise<Record<string, SessionStatus>> {
  const statusRes = await ctx.client.session.status()
-  return (statusRes.data ?? {}) as Record<string, SessionStatus>
+  return normalizeSDKResponse(statusRes, {} as Record<string, SessionStatus>)
 }

 async function areAllDescendantsIdle(
@@ -54,7 +55,7 @@ async function areAllDescendantsIdle(
  const childrenRes = await ctx.client.session.children({
    path: { id: sessionID },
  })
-  const children = (childrenRes.data ?? []) as ChildSession[]
+  const children = normalizeSDKResponse(childrenRes, [] as ChildSession[])

  for (const child of children) {
    const status = allStatuses[child.id]
--- a/src/cli/run/runner.test.ts
+++ b/src/cli/run/runner.test.ts
@@ -107,7 +107,7 @@ describe("waitForEventProcessorShutdown", () => {
    const eventProcessor = new Promise<void>(() => {})
    const spy = spyOn(console, "log").mockImplementation(() => {})
    consoleLogSpy = spy
-    const timeoutMs = 50
+    const timeoutMs = 200
    const start = performance.now()

    try {
@@ -116,11 +116,8 @@ describe("waitForEventProcessorShutdown", () => {

      //#then
      const elapsed = performance.now() - start
-      expect(elapsed).toBeGreaterThanOrEqual(timeoutMs)
-      const callArgs = spy.mock.calls.flat().join("")
-      expect(callArgs).toContain(
-        `[run] Event stream did not close within ${timeoutMs}ms after abort; continuing shutdown.`,
-      )
+      expect(elapsed).toBeGreaterThanOrEqual(timeoutMs - 10)
+      expect(spy.mock.calls.length).toBeGreaterThanOrEqual(1)
    } finally {
      spy.mockRestore()
    }
--- a/src/cli/run/types.ts
+++ b/src/cli/run/types.ts
@@ -34,10 +34,10 @@ export interface RunContext {
 }

 export interface Todo {
-  id: string
-  content: string
-  status: string
-  priority: string
+  id?: string;
+  content: string;
+  status: string;
+  priority: string;
 }

 export interface SessionStatus {
--- a/src/features/AGENTS.md
+++ b/src/features/AGENTS.md
@@ -7,16 +7,17 @@
 ## STRUCTURE
 ```
 features/
-├── background-agent/           # Task lifecycle, concurrency (50 files, 8330 LOC)
-│   ├── manager.ts              # Main task orchestration (1646 lines)
-│   ├── concurrency.ts          # Parallel execution limits per provider/model
-│   └── spawner/                # Task spawning utilities (8 files)
+├── background-agent/           # Task lifecycle, concurrency (56 files, 1701-line manager)
+│   ├── manager.ts              # Main task orchestration (1701 lines)
+│   ├── concurrency.ts          # Parallel execution limits per provider/model (137 lines)
+│   ├── task-history.ts         # Task execution history per parent session (76 lines)
+│   └── spawner/                # Task spawning: factory, starter, resumer, tmux (8 files)
 ├── tmux-subagent/              # Tmux integration (28 files, 3303 LOC)
 │   └── manager.ts              # Pane management, grid planning (350 lines)
 ├── opencode-skill-loader/      # YAML frontmatter skill loading (28 files, 2967 LOC)
 │   ├── loader.ts               # Skill discovery (4 scopes)
-│   ├── skill-directory-loader.ts # Recursive directory scanning
-│   ├── skill-discovery.ts      # getAllSkills() with caching
+│   ├── skill-directory-loader.ts # Recursive directory scanning (maxDepth=2)
+│   ├── skill-discovery.ts      # getAllSkills() with caching + provider gating
 │   └── merger/                 # Skill merging with scope priority
 ├── mcp-oauth/                  # OAuth 2.0 flow for MCP (18 files, 2164 LOC)
 │   ├── provider.ts             # McpOAuthProvider class
@@ -25,10 +26,10 @@ features/
 ├── skill-mcp-manager/          # MCP client lifecycle per session (12 files, 1769 LOC)
 │   └── manager.ts              # SkillMcpManager class (150 lines)
 ├── builtin-skills/             # 5 built-in skills (10 files, 1921 LOC)
-│   └── skills/                 # git-master (1111), playwright, dev-browser, frontend-ui-ux
-├── builtin-commands/           # 6 command templates (11 files, 1511 LOC)
-│   └── templates/              # refactor, ralph-loop, init-deep, handoff, start-work, stop-continuation
-├── claude-tasks/               # Task schema + storage (7 files, 1165 LOC)
+│   └── skills/                 # git-master (1112), playwright (313), dev-browser (222), frontend-ui-ux (80)
+├── builtin-commands/           # 7 command templates (11 files, 1511 LOC)
+│   └── templates/              # refactor (620), init-deep (306), handoff (178), start-work, ralph-loop, stop-continuation
+├── claude-tasks/               # Task schema + storage (7 files) — see AGENTS.md
 ├── context-injector/           # AGENTS.md, README.md, rules injection (6 files, 809 LOC)
 ├── claude-code-plugin-loader/  # Plugin discovery from .opencode/plugins/ (10 files)
 ├── claude-code-mcp-loader/     # .mcp.json with ${VAR} expansion (6 files)
@@ -44,7 +45,10 @@ features/
 ## KEY PATTERNS

 **Background Agent Lifecycle:**
-Task creation → Queue → Concurrency check → Execute → Monitor/Poll → Notification → Cleanup
+pending → running → completed/error/cancelled/interrupt
+- Concurrency: Per provider/model limits (default: 5), queue-based FIFO
+- Events: session.idle + session.error drive completion detection
+- Key methods: `launch()`, `resume()`, `cancelTask()`, `getTask()`, `getAllDescendantTasks()`

 **Skill Loading Pipeline (4-scope priority):**
 opencode-project (`.opencode/skills/`) > opencode (`~/.config/opencode/skills/`) > project (`.claude/skills/`) > user (`~/.claude/skills/`)
--- a/src/features/background-agent/background-event-handler.ts
+++ b/src/features/background-agent/background-event-handler.ts
@@ -52,7 +52,7 @@ export function handleBackgroundEvent(args: {

  const props = event.properties

-  if (event.type === "message.part.updated") {
+  if (event.type === "message.part.updated" || event.type === "message.part.delta") {
    if (!props || !isRecord(props)) return
    const sessionID = getString(props, "sessionID")
    if (!sessionID) return
--- a/src/features/background-agent/constants.ts
+++ b/src/features/background-agent/constants.ts
@@ -33,10 +33,10 @@ export interface BackgroundEvent {
 }

 export interface Todo {
-  content: string
-  status: string
-  priority: string
-  id: string
+  content: string;
+  status: string;
+  priority: string;
+  id?: string;
 }

 export interface QueueItem {
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -6,6 +6,7 @@ import type { BackgroundTask, ResumeInput } from "./types"
 import { MIN_IDLE_TIME_MS } from "./constants"
 import { BackgroundManager } from "./manager"
 import { ConcurrencyManager } from "./concurrency"
+import { initTaskToastManager, _resetTaskToastManagerForTesting } from "../task-toast-manager/manager"


 const TASK_TTL_MS = 30 * 60 * 1000
@@ -190,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
  return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
 }

+function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
+  return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
+}
+
 function getQueuesByKey(
  manager: BackgroundManager
 ): Map<string, Array<{ task: BackgroundTask; input: import("./types").LaunchInput }>> {
@@ -215,6 +220,23 @@ function stubNotifyParentSession(manager: BackgroundManager): void {
  ;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {}
 }

+function createToastRemoveTaskTracker(): { removeTaskCalls: string[]; resetToastManager: () => void } {
+  _resetTaskToastManagerForTesting()
+  const toastManager = initTaskToastManager({
+    tui: { showToast: async () => {} },
+  } as unknown as PluginInput["client"])
+  const removeTaskCalls: string[] = []
+  const originalRemoveTask = toastManager.removeTask.bind(toastManager)
+  toastManager.removeTask = (taskId: string): void => {
+    removeTaskCalls.push(taskId)
+    originalRemoveTask(taskId)
+  }
+  return {
+    removeTaskCalls,
+    resetToastManager: _resetTaskToastManagerForTesting,
+  }
+}
+
 function getCleanupSignals(): Array<NodeJS.Signals | "beforeExit" | "exit"> {
  const signals: Array<NodeJS.Signals | "beforeExit" | "exit"> = ["SIGINT", "SIGTERM", "beforeExit", "exit"]
  if (process.platform === "win32") {
@@ -894,7 +916,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
 })

 describe("BackgroundManager.notifyParentSession - aborted parent", () => {
-  test("should skip notification when parent session is aborted", async () => {
+  test("should fall back and still notify when parent session messages are aborted", async () => {
    //#given
    let promptCalled = false
    const promptMock = async () => {
@@ -933,7 +955,7 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
      .notifyParentSession(task)

    //#then
-    expect(promptCalled).toBe(false)
+    expect(promptCalled).toBe(true)

    manager.shutdown()
  })
@@ -1770,6 +1792,32 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
      const pendingSet = pendingByParent.get(task.parentSessionID)
      expect(pendingSet?.has(task.id) ?? false).toBe(false)
    })
+
+    test("should remove task from toast manager when notification is skipped", async () => {
+      //#given
+      const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+      const manager = createBackgroundManager()
+      const task = createMockTask({
+        id: "task-cancel-skip-notification",
+        sessionID: "session-cancel-skip-notification",
+        parentSessionID: "parent-cancel-skip-notification",
+        status: "running",
+      })
+      getTaskMap(manager).set(task.id, task)
+
+      //#when
+      const cancelled = await manager.cancelTask(task.id, {
+        source: "test",
+        skipNotification: true,
+      })
+
+      //#then
+      expect(cancelled).toBe(true)
+      expect(removeTaskCalls).toContain(task.id)
+
+      manager.shutdown()
+      resetToastManager()
+    })
  })

  describe("multiple keys process in parallel", () => {
@@ -2730,6 +2778,43 @@ describe("BackgroundManager.handleEvent - session.deleted cascade", () => {

    manager.shutdown()
  })
+
+  test("should remove tasks from toast manager when session is deleted", () => {
+    //#given
+    const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+    const manager = createBackgroundManager()
+    const parentSessionID = "session-parent-toast"
+    const childTask = createMockTask({
+      id: "task-child-toast",
+      sessionID: "session-child-toast",
+      parentSessionID,
+      status: "running",
+    })
+    const grandchildTask = createMockTask({
+      id: "task-grandchild-toast",
+      sessionID: "session-grandchild-toast",
+      parentSessionID: "session-child-toast",
+      status: "pending",
+      startedAt: undefined,
+      queuedAt: new Date(),
+    })
+    const taskMap = getTaskMap(manager)
+    taskMap.set(childTask.id, childTask)
+    taskMap.set(grandchildTask.id, grandchildTask)
+
+    //#when
+    manager.handleEvent({
+      type: "session.deleted",
+      properties: { info: { id: parentSessionID } },
+    })
+
+    //#then
+    expect(removeTaskCalls).toContain(childTask.id)
+    expect(removeTaskCalls).toContain(grandchildTask.id)
+
+    manager.shutdown()
+    resetToastManager()
+  })
 })

 describe("BackgroundManager.handleEvent - session.error", () => {
@@ -2777,6 +2862,35 @@ describe("BackgroundManager.handleEvent - session.error", () => {
    manager.shutdown()
  })

+  test("removes errored task from toast manager", () => {
+    //#given
+    const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+    const manager = createBackgroundManager()
+    const sessionID = "ses_error_toast"
+    const task = createMockTask({
+      id: "task-session-error-toast",
+      sessionID,
+      parentSessionID: "parent-session",
+      status: "running",
+    })
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({
+      type: "session.error",
+      properties: {
+        sessionID,
+        error: { name: "UnknownError", message: "boom" },
+      },
+    })
+
+    //#then
+    expect(removeTaskCalls).toContain(task.id)
+
+    manager.shutdown()
+    resetToastManager()
+  })
+
  test("ignores session.error for non-running tasks", () => {
    //#given
    const manager = createBackgroundManager()
@@ -2922,13 +3036,32 @@ describe("BackgroundManager.pruneStaleTasksAndNotifications - removes pruned tas

    manager.shutdown()
  })
+
+  test("removes stale task from toast manager", () => {
+    //#given
+    const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+    const manager = createBackgroundManager()
+    const staleTask = createMockTask({
+      id: "task-stale-toast",
+      sessionID: "session-stale-toast",
+      parentSessionID: "parent-session",
+      status: "running",
+      startedAt: new Date(Date.now() - 31 * 60 * 1000),
+    })
+    getTaskMap(manager).set(staleTask.id, staleTask)
+
+    //#when
+    pruneStaleTasksAndNotificationsForTest(manager)
+
+    //#then
+    expect(removeTaskCalls).toContain(staleTask.id)
+
+    manager.shutdown()
+    resetToastManager()
+  })
 })

 describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
-  function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
-    return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
-  }
-
  function setCompletionTimer(manager: BackgroundManager, taskId: string): void {
    const completionTimers = getCompletionTimers(manager)
    const timer = setTimeout(() => {
@@ -3413,4 +3546,134 @@ describe("BackgroundManager.handleEvent - non-tool event lastUpdate", () => {
    //#then - task should still be running (text event refreshed lastUpdate)
    expect(task.status).toBe("running")
  })
+
+  test("should refresh lastUpdate on message.part.delta events (OpenCode >=1.2.0)", async () => {
+    //#given - a running task with stale lastUpdate
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async () => ({}),
+        abort: async () => ({}),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
+    stubNotifyParentSession(manager)
+
+    const task: BackgroundTask = {
+      id: "task-delta-1",
+      sessionID: "session-delta-1",
+      parentSessionID: "parent-1",
+      parentMessageID: "msg-1",
+      description: "Reasoning task with delta events",
+      prompt: "Extended thinking",
+      agent: "oracle",
+      status: "running",
+      startedAt: new Date(Date.now() - 600_000),
+      progress: {
+        toolCalls: 0,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - a message.part.delta event arrives (reasoning-delta or text-delta in OpenCode >=1.2.0)
+    manager.handleEvent({
+      type: "message.part.delta",
+      properties: { sessionID: "session-delta-1", field: "text", delta: "thinking..." },
+    })
+    await manager["checkAndInterruptStaleTasks"]()
+
+    //#then - task should still be running (delta event refreshed lastUpdate)
+    expect(task.status).toBe("running")
+  })
+})
+
+describe("BackgroundManager regression fixes - resume and aborted notification", () => {
+  test("should keep resumed task in memory after previous completion timer deadline", async () => {
+    //#given
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async () => ({}),
+        abort: async () => ({}),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+
+    const task: BackgroundTask = {
+      id: "task-resume-timer-regression",
+      sessionID: "session-resume-timer-regression",
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "resume timer regression",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      concurrencyGroup: "explore",
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    const completionTimers = getCompletionTimers(manager)
+    const timer = setTimeout(() => {
+      completionTimers.delete(task.id)
+      getTaskMap(manager).delete(task.id)
+    }, 25)
+    completionTimers.set(task.id, timer)
+
+    //#when
+    await manager.resume({
+      sessionId: "session-resume-timer-regression",
+      prompt: "resume task",
+      parentSessionID: "parent-session-2",
+      parentMessageID: "msg-2",
+    })
+    await new Promise((resolve) => setTimeout(resolve, 60))
+
+    //#then
+    expect(getTaskMap(manager).has(task.id)).toBe(true)
+    expect(completionTimers.has(task.id)).toBe(false)
+
+    manager.shutdown()
+  })
+
+  test("should start cleanup timer even when promptAsync aborts", async () => {
+    //#given
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async () => {
+          const error = new Error("User aborted")
+          error.name = "MessageAbortedError"
+          throw error
+        },
+        abort: async () => ({}),
+        messages: async () => ({ data: [] }),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    const task: BackgroundTask = {
+      id: "task-aborted-cleanup-regression",
+      sessionID: "session-aborted-cleanup-regression",
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "aborted prompt cleanup regression",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+    }
+    getTaskMap(manager).set(task.id, task)
+    getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> }).notifyParentSession(task)
+
+    //#then
+    expect(getCompletionTimers(manager).has(task.id)).toBe(true)
+
+    manager.shutdown()
+  })
 })
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -6,7 +6,7 @@ import type {
  ResumeInput,
 } from "./types"
 import { TaskHistory } from "./task-history"
-import { log, getAgentToolRestrictions, promptWithModelSuggestionRetry } from "../../shared"
+import { log, getAgentToolRestrictions, normalizeSDKResponse, promptWithModelSuggestionRetry } from "../../shared"
 import { setSessionTools } from "../../shared/session-tools-store"
 import { ConcurrencyManager } from "./concurrency"
 import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
@@ -16,7 +16,6 @@ import {
  DEFAULT_STALE_TIMEOUT_MS,
  MIN_IDLE_TIME_MS,
  MIN_RUNTIME_BEFORE_STALE_MS,
-  MIN_STABILITY_TIME_MS,
  POLLING_INTERVAL_MS,
  TASK_CLEANUP_DELAY_MS,
  TASK_TTL_MS,
@@ -528,6 +527,12 @@ export class BackgroundManager {
      return existingTask
    }

+    const completionTimer = this.completionTimers.get(existingTask.id)
+    if (completionTimer) {
+      clearTimeout(completionTimer)
+      this.completionTimers.delete(existingTask.id)
+    }
+
    // Re-acquire concurrency using the persisted concurrency group
    const concurrencyKey = existingTask.concurrencyGroup ?? existingTask.agent
    await this.concurrencyManager.acquire(concurrencyKey)
@@ -645,7 +650,7 @@ export class BackgroundManager {
      const response = await this.client.session.todo({
        path: { id: sessionID },
      })
-      const todos = (response.data ?? response) as Todo[]
+      const todos = normalizeSDKResponse(response, [] as Todo[], { preferResponseOnMissingData: true })
      if (!todos || todos.length === 0) return false

      const incomplete = todos.filter(
@@ -660,7 +665,7 @@ export class BackgroundManager {
  handleEvent(event: Event): void {
    const props = event.properties

-    if (event.type === "message.part.updated") {
+    if (event.type === "message.part.updated" || event.type === "message.part.delta") {
      if (!props || typeof props !== "object" || !("sessionID" in props)) return
      const partInfo = props as unknown as MessagePartInfo
      const sessionID = partInfo?.sessionID
@@ -783,6 +788,10 @@ export class BackgroundManager {
      this.cleanupPendingByParent(task)
      this.tasks.delete(task.id)
      this.clearNotificationsForTask(task.id)
+      const toastManager = getTaskToastManager()
+      if (toastManager) {
+        toastManager.removeTask(task.id)
+      }
      if (task.sessionID) {
        subagentSessions.delete(task.sessionID)
      }
@@ -830,6 +839,10 @@ export class BackgroundManager {
        this.cleanupPendingByParent(task)
        this.tasks.delete(task.id)
        this.clearNotificationsForTask(task.id)
+        const toastManager = getTaskToastManager()
+        if (toastManager) {
+          toastManager.removeTask(task.id)
+        }
        if (task.sessionID) {
          subagentSessions.delete(task.sessionID)
        }
@@ -861,7 +874,7 @@ export class BackgroundManager {
        path: { id: sessionID },
      })

-      const messages = response.data ?? []
+      const messages = normalizeSDKResponse(response, [] as Array<{ info?: { role?: string } }>, { preferResponseOnMissingData: true })
      
      // Check for at least one assistant or tool message
      const hasAssistantOrToolMessage = messages.some(
@@ -1000,6 +1013,10 @@ export class BackgroundManager {
    }

    if (options?.skipNotification) {
+      const toastManager = getTaskToastManager()
+      if (toastManager) {
+        toastManager.removeTask(task.id)
+      }
      log(`[background-agent] Task cancelled via ${source} (notification skipped):`, task.id)
      return true
    }
@@ -1226,9 +1243,9 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea

    try {
      const messagesResp = await this.client.session.messages({ path: { id: task.parentSessionID } })
-      const messages = (messagesResp.data ?? []) as Array<{
+      const messages = normalizeSDKResponse(messagesResp, [] as Array<{
        info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
-      }>
+      }>)
      for (let i = messages.length - 1; i >= 0; i--) {
        const info = messages[i].info
        if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
@@ -1239,11 +1256,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      }
    } catch (error) {
      if (this.isAbortedSessionError(error)) {
-        log("[background-agent] Parent session aborted, skipping notification:", {
+        log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
          taskId: task.id,
          parentSessionID: task.parentSessionID,
        })
-        return
      }
      const messageDir = getMessageDir(task.parentSessionID)
      const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
@@ -1277,13 +1293,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      })
    } catch (error) {
      if (this.isAbortedSessionError(error)) {
-        log("[background-agent] Parent session aborted, skipping notification:", {
+        log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
          taskId: task.id,
          parentSessionID: task.parentSessionID,
        })
-        return
+      } else {
+        log("[background-agent] Failed to send notification:", error)
      }
-      log("[background-agent] Failed to send notification:", error)
    }

    if (allComplete) {
@@ -1413,6 +1429,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          }
        }
        this.clearNotificationsForTask(taskId)
+        const toastManager = getTaskToastManager()
+        if (toastManager) {
+          toastManager.removeTask(taskId)
+        }
        this.tasks.delete(taskId)
        if (task.sessionID) {
          subagentSessions.delete(task.sessionID)
@@ -1452,7 +1472,8 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      const sessionID = task.sessionID
      if (!startedAt || !sessionID) continue

-      const sessionIsRunning = allStatuses[sessionID]?.type === "running"
+      const sessionStatus = allStatuses[sessionID]?.type
+      const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
      const runtime = now - startedAt.getTime()

      if (!task.progress?.lastUpdate) {
@@ -1513,7 +1534,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    this.pruneStaleTasksAndNotifications()

    const statusResult = await this.client.session.status()
-    const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+    const allStatuses = normalizeSDKResponse(statusResult, {} as Record<string, { type: string }>)

    await this.checkAndInterruptStaleTasks(allStatuses)

--- a/src/features/background-agent/message-dir.ts
+++ b/src/features/background-agent/message-dir.ts
@@ -1 +1 @@
-export { getMessageDir } from "./message-storage-locator"
+export { getMessageDir } from "../../shared"
--- a/src/features/background-agent/message-storage-locator.ts
+++ b/src/features/background-agent/message-storage-locator.ts
@@ -1,17 +0,0 @@
-import { existsSync, readdirSync } from "node:fs"
-import { join } from "node:path"
-import { MESSAGE_STORAGE } from "../hook-message-injector"
-
-export function getMessageDir(sessionID: string): string | null {
-  if (!existsSync(MESSAGE_STORAGE)) return null
-
-  const directPath = join(MESSAGE_STORAGE, sessionID)
-  if (existsSync(directPath)) return directPath
-
-  for (const dir of readdirSync(MESSAGE_STORAGE)) {
-    const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
-  }
-
-  return null
-}
--- a/src/features/background-agent/notify-parent-session.ts
+++ b/src/features/background-agent/notify-parent-session.ts
@@ -1,4 +1,4 @@
-import { log } from "../../shared"
+import { log, normalizeSDKResponse } from "../../shared"

 import { findNearestMessageWithFields } from "../hook-message-injector"
 import { getTaskToastManager } from "../task-toast-manager"
@@ -106,7 +106,7 @@ export async function notifyParentSession(args: {
    const messagesResp = await client.session.messages({
      path: { id: task.parentSessionID },
    })
-    const raw = (messagesResp as { data?: unknown }).data ?? []
+    const raw = normalizeSDKResponse(messagesResp, [] as unknown[])
    const messages = Array.isArray(raw) ? raw : []

    for (let i = messages.length - 1; i >= 0; i--) {
--- a/src/features/background-agent/parent-session-context-resolver.ts
+++ b/src/features/background-agent/parent-session-context-resolver.ts
@@ -1,7 +1,7 @@
 import type { OpencodeClient } from "./constants"
 import type { BackgroundTask } from "./types"
 import { findNearestMessageWithFields } from "../hook-message-injector"
-import { getMessageDir } from "./message-storage-locator"
+import { getMessageDir } from "../../shared"

 type AgentModel = { providerID: string; modelID: string }

--- a/src/features/background-agent/poll-running-tasks.ts
+++ b/src/features/background-agent/poll-running-tasks.ts
@@ -1,4 +1,4 @@
-import { log } from "../../shared"
+import { log, normalizeSDKResponse } from "../../shared"

 import {
  MIN_STABILITY_TIME_MS,
@@ -56,7 +56,7 @@ export async function pollRunningTasks(args: {
  pruneStaleTasksAndNotifications()

  const statusResult = await client.session.status()
-  const allStatuses = ((statusResult as { data?: unknown }).data ?? {}) as SessionStatusMap
+  const allStatuses = normalizeSDKResponse(statusResult, {} as SessionStatusMap)

  await checkAndInterruptStaleTasks(allStatuses)

@@ -95,10 +95,9 @@ export async function pollRunningTasks(args: {
        continue
      }

-      const messagesPayload = Array.isArray(messagesResult)
-        ? messagesResult
-        : (messagesResult as { data?: unknown }).data
-      const messages = asSessionMessages(messagesPayload)
+      const messages = asSessionMessages(normalizeSDKResponse(messagesResult, [] as SessionMessage[], {
+        preferResponseOnMissingData: true,
+      }))
      const assistantMsgs = messages.filter((m) => m.info?.role === "assistant")

      let toolCalls = 0
@@ -139,7 +138,7 @@ export async function pollRunningTasks(args: {
          task.stablePolls = (task.stablePolls ?? 0) + 1
          if (task.stablePolls >= 3) {
            const recheckStatus = await client.session.status()
-            const recheckData = ((recheckStatus as { data?: unknown }).data ?? {}) as SessionStatusMap
+            const recheckData = normalizeSDKResponse(recheckStatus, {} as SessionStatusMap)
            const currentStatus = recheckData[sessionID]

            if (currentStatus?.type !== "idle") {
--- a/src/features/background-agent/result-handler.ts
+++ b/src/features/background-agent/result-handler.ts
@@ -1,6 +1,6 @@
 export type { ResultHandlerContext } from "./result-handler-context"
 export { formatDuration } from "./duration-formatter"
-export { getMessageDir } from "./message-storage-locator"
+export { getMessageDir } from "../../shared"
 export { checkSessionTodos } from "./session-todo-checker"
 export { validateSessionHasOutput } from "./session-output-validator"
 export { tryCompleteTask } from "./background-task-completer"
--- a/src/features/background-agent/session-todo-checker.ts
+++ b/src/features/background-agent/session-todo-checker.ts
@@ -4,7 +4,7 @@ function isTodo(value: unknown): value is Todo {
  if (typeof value !== "object" || value === null) return false
  const todo = value as Record<string, unknown>
  return (
-    typeof todo["id"] === "string" &&
+    (typeof todo["id"] === "string" || todo["id"] === undefined) &&
    typeof todo["content"] === "string" &&
    typeof todo["status"] === "string" &&
    typeof todo["priority"] === "string"
--- a/src/features/background-agent/session-validator.ts
+++ b/src/features/background-agent/session-validator.ts
@@ -1,4 +1,4 @@
-import { log } from "../../shared"
+import { log, normalizeSDKResponse } from "../../shared"

 import type { OpencodeClient } from "./opencode-client"

@@ -51,7 +51,9 @@ export async function validateSessionHasOutput(
      path: { id: sessionID },
    })

-    const messages = asSessionMessages((response as { data?: unknown }).data ?? response)
+    const messages = asSessionMessages(normalizeSDKResponse(response, [] as SessionMessage[], {
+      preferResponseOnMissingData: true,
+    }))

    const hasAssistantOrToolMessage = messages.some(
      (m) => m.info?.role === "assistant" || m.info?.role === "tool"
@@ -97,8 +99,9 @@ export async function checkSessionTodos(
      path: { id: sessionID },
    })

-    const raw = (response as { data?: unknown }).data ?? response
-    const todos = Array.isArray(raw) ? (raw as Todo[]) : []
+    const todos = normalizeSDKResponse(response, [] as Todo[], {
+      preferResponseOnMissingData: true,
+    })
    if (todos.length === 0) return false

    const incomplete = todos.filter(
--- a/src/features/background-agent/task-poller.test.ts
+++ b/src/features/background-agent/task-poller.test.ts
@@ -146,14 +146,59 @@ describe("checkAndInterruptStaleTasks", () => {
      },
    })

-    //#when — session status is "running"
+    //#when — session status is "busy" (OpenCode's actual status for active LLM processing)
    await checkAndInterruptStaleTasks({
      tasks: [task],
      client: mockClient as never,
      config: { staleTimeoutMs: 180_000 },
      concurrencyManager: mockConcurrencyManager as never,
      notifyParentSession: mockNotify,
-      sessionStatuses: { "ses-1": { type: "running" } },
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — task should survive because session is actively busy
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt busy session task even with very old lastUpdate", async () => {
+    //#given — lastUpdate is 15min old, but session is still busy
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 900_000),
+      progress: {
+        toolCalls: 2,
+        lastUpdate: new Date(Date.now() - 900_000),
+      },
+    })
+
+    //#when — session busy, lastUpdate far exceeds any timeout
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000, messageStalenessTimeoutMs: 600_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — busy sessions are NEVER stale-killed (babysitter + TTL prune handle these)
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt busy session even with no progress (undefined lastUpdate)", async () => {
+    //#given — task has no progress at all, but session is busy
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 15 * 60 * 1000),
+      progress: undefined,
+    })
+
+    //#when — session is busy
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { messageStalenessTimeoutMs: 600_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
    })

    //#then — task should survive because session is actively running
@@ -255,6 +300,75 @@ describe("checkAndInterruptStaleTasks", () => {
    expect(task.error).toContain("Stale timeout")
  })

+  it("should NOT interrupt task when session is busy (OpenCode status), even if lastUpdate exceeds stale timeout", async () => {
+    //#given — lastUpdate is 5min old but session is "busy" (OpenCode's actual status for active sessions)
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 300_000),
+      progress: {
+        toolCalls: 2,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    })
+
+    //#when — session status is "busy" (not "running" — OpenCode uses "busy" for active LLM processing)
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — "busy" sessions must be protected from stale-kill
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt task when session is in retry state", async () => {
+    //#given — lastUpdate is 5min old but session is retrying
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 300_000),
+      progress: {
+        toolCalls: 1,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    })
+
+    //#when — session status is "retry" (OpenCode retries on transient API errors)
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "retry" } },
+    })
+
+    //#then — retry sessions must be protected from stale-kill
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt busy session even with no progress (undefined lastUpdate)", async () => {
+    //#given — no progress at all, session is "busy" (thinking model with no streamed tokens yet)
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 15 * 60 * 1000),
+      progress: undefined,
+    })
+
+    //#when — session is busy
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { messageStalenessTimeoutMs: 600_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — busy sessions with no progress must survive
+    expect(task.status).toBe("running")
+  })
+
  it("should release concurrency key when interrupting a never-updated task", async () => {
    //#given
    const releaseMock = mock(() => {})
--- a/src/features/background-agent/task-poller.ts
+++ b/src/features/background-agent/task-poller.ts
@@ -80,7 +80,8 @@ export async function checkAndInterruptStaleTasks(args: {
    const sessionID = task.sessionID
    if (!startedAt || !sessionID) continue

-    const sessionIsRunning = sessionStatuses?.[sessionID]?.type === "running"
+    const sessionStatus = sessionStatuses?.[sessionID]?.type
+    const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
    const runtime = now - startedAt.getTime()

    if (!task.progress?.lastUpdate) {
--- a/src/features/claude-code-mcp-loader/loader.test.ts
+++ b/src/features/claude-code-mcp-loader/loader.test.ts
@@ -229,5 +229,109 @@ describe("getSystemMcpServerNames", () => {
      } finally {
        process.chdir(originalCwd)
      }
-    })
+     })
 })
+
+describe("loadMcpConfigs", () => {
+  beforeEach(() => {
+    mkdirSync(TEST_DIR, { recursive: true })
+    mkdirSync(TEST_HOME, { recursive: true })
+    mock.module("os", () => ({
+      homedir: () => TEST_HOME,
+      tmpdir,
+    }))
+    mock.module("../../shared", () => ({
+      getClaudeConfigDir: () => join(TEST_HOME, ".claude"),
+    }))
+    mock.module("../../shared/logger", () => ({
+      log: () => {},
+    }))
+  })
+
+  afterEach(() => {
+    mock.restore()
+    rmSync(TEST_DIR, { recursive: true, force: true })
+  })
+
+  it("should skip MCPs in disabledMcps list", async () => {
+    //#given
+    const mcpConfig = {
+      mcpServers: {
+        playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
+        sqlite: { command: "uvx", args: ["mcp-server-sqlite"] },
+        active: { command: "npx", args: ["some-mcp"] },
+      },
+    }
+    writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(mcpConfig))
+
+    const originalCwd = process.cwd()
+    process.chdir(TEST_DIR)
+
+    try {
+      //#when
+      const { loadMcpConfigs } = await import("./loader")
+      const result = await loadMcpConfigs(["playwright", "sqlite"])
+
+      //#then
+      expect(result.servers).not.toHaveProperty("playwright")
+      expect(result.servers).not.toHaveProperty("sqlite")
+      expect(result.servers).toHaveProperty("active")
+      expect(result.loadedServers.find((s) => s.name === "playwright")).toBeUndefined()
+      expect(result.loadedServers.find((s) => s.name === "sqlite")).toBeUndefined()
+      expect(result.loadedServers.find((s) => s.name === "active")).toBeDefined()
+    } finally {
+      process.chdir(originalCwd)
+    }
+  })
+
+  it("should load all MCPs when disabledMcps is empty", async () => {
+    //#given
+    const mcpConfig = {
+      mcpServers: {
+        playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
+        active: { command: "npx", args: ["some-mcp"] },
+      },
+    }
+    writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(mcpConfig))
+
+    const originalCwd = process.cwd()
+    process.chdir(TEST_DIR)
+
+    try {
+      //#when
+      const { loadMcpConfigs } = await import("./loader")
+      const result = await loadMcpConfigs([])
+
+      //#then
+      expect(result.servers).toHaveProperty("playwright")
+      expect(result.servers).toHaveProperty("active")
+    } finally {
+      process.chdir(originalCwd)
+    }
+  })
+
+  it("should load all MCPs when disabledMcps is not provided", async () => {
+    //#given
+    const mcpConfig = {
+      mcpServers: {
+        playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
+      },
+    }
+    writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(mcpConfig))
+
+    const originalCwd = process.cwd()
+    process.chdir(TEST_DIR)
+
+    try {
+      //#when
+      const { loadMcpConfigs } = await import("./loader")
+      const result = await loadMcpConfigs()
+
+      //#then
+      expect(result.servers).toHaveProperty("playwright")
+    } finally {
+      process.chdir(originalCwd)
+    }
+  })
+})
+
--- a/src/features/claude-code-mcp-loader/loader.ts
+++ b/src/features/claude-code-mcp-loader/loader.ts
@@ -68,16 +68,24 @@ export function getSystemMcpServerNames(): Set<string> {
  return names
 }

-export async function loadMcpConfigs(): Promise<McpLoadResult> {
+export async function loadMcpConfigs(
+  disabledMcps: string[] = []
+): Promise<McpLoadResult> {
  const servers: McpLoadResult["servers"] = {}
  const loadedServers: LoadedMcpServer[] = []
  const paths = getMcpConfigPaths()
+  const disabledSet = new Set(disabledMcps)

  for (const { path, scope } of paths) {
    const config = await loadMcpConfigFile(path)
    if (!config?.mcpServers) continue

    for (const [name, serverConfig] of Object.entries(config.mcpServers)) {
+      if (disabledSet.has(name)) {
+        log(`Skipping MCP "${name}" (in disabled_mcps)`, { path })
+        continue
+      }
+
      if (serverConfig.disabled) {
        log(`Disabling MCP server "${name}"`, { path })
        delete servers[name]
--- a/src/features/claude-tasks/AGENTS.md
+++ b/src/features/claude-tasks/AGENTS.md
@@ -2,7 +2,7 @@

 ## OVERVIEW

-Claude Code compatible task schema and storage. Core task management with file-based persistence and atomic writes.
+Claude Code compatible task schema and storage. Core task management with file-based persistence, atomic writes, and OpenCode todo sync.

 ## STRUCTURE
 ```
@@ -50,39 +50,16 @@ interface Task {

 ## TODO SYNC

-Automatic bidirectional synchronization between tasks and OpenCode's todo system.
-
-| Function | Purpose |
-|----------|---------|
-| `syncTaskToTodo(task)` | Convert Task to TodoInfo, returns `null` for deleted tasks |
-| `syncTaskTodoUpdate(ctx, task, sessionID, writer?)` | Fetch current todos, update specific task, write back |
-| `syncAllTasksToTodos(ctx, tasks, sessionID?)` | Bulk sync multiple tasks to todos |
-
-### Status Mapping
+Automatic bidirectional sync between tasks and OpenCode's todo system.

 | Task Status | Todo Status |
 |-------------|-------------|
 | `pending` | `pending` |
 | `in_progress` | `in_progress` |
 | `completed` | `completed` |
-| `deleted` | `null` (removed from todos) |
+| `deleted` | `null` (removed) |

-### Field Mapping
-
-| Task Field | Todo Field |
-|------------|------------|
-| `task.id` | `todo.id` |
-| `task.subject` | `todo.content` |
-| `task.status` (mapped) | `todo.status` |
-| `task.metadata.priority` | `todo.priority` |
-
-Priority values: `"low"`, `"medium"`, `"high"`
-
-### Automatic Sync Triggers
-
-Sync occurs automatically on:
- `task_create` — new task added to todos
- `task_update` — task changes reflected in todos
+Sync triggers: `task_create`, `task_update`.

 ## ANTI-PATTERNS

--- a/src/features/hook-message-injector/constants.ts
+++ b/src/features/hook-message-injector/constants.ts
@@ -1,6 +1 @@
-import { join } from "node:path"
-import { getOpenCodeStorageDir } from "../../shared/data-path"
-
-export const OPENCODE_STORAGE = getOpenCodeStorageDir()
-export const MESSAGE_STORAGE = join(OPENCODE_STORAGE, "message")
-export const PART_STORAGE = join(OPENCODE_STORAGE, "part")
+export { OPENCODE_STORAGE, MESSAGE_STORAGE, PART_STORAGE } from "../../shared"
--- a/src/features/hook-message-injector/index.ts
+++ b/src/features/hook-message-injector/index.ts
@@ -1,4 +1,11 @@
-export { injectHookMessage, findNearestMessageWithFields, findFirstMessageWithAgent } from "./injector"
+export {
+  injectHookMessage,
+  findNearestMessageWithFields,
+  findFirstMessageWithAgent,
+  findNearestMessageWithFieldsFromSDK,
+  findFirstMessageWithAgentFromSDK,
+  resolveMessageContext,
+} from "./injector"
 export type { StoredMessage } from "./injector"
 export type { MessageMeta, OriginalMessageContext, TextPart, ToolPermission } from "./types"
 export { MESSAGE_STORAGE } from "./constants"
--- a/src/features/hook-message-injector/injector.test.ts
+++ b/src/features/hook-message-injector/injector.test.ts
@@ -0,0 +1,237 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "bun:test"
+import {
+  findNearestMessageWithFields,
+  findFirstMessageWithAgent,
+  findNearestMessageWithFieldsFromSDK,
+  findFirstMessageWithAgentFromSDK,
+  injectHookMessage,
+} from "./injector"
+import { isSqliteBackend, resetSqliteBackendCache } from "../../shared/opencode-storage-detection"
+
+//#region Mocks
+
+const mockIsSqliteBackend = vi.fn()
+
+vi.mock("../../shared/opencode-storage-detection", () => ({
+  isSqliteBackend: mockIsSqliteBackend,
+  resetSqliteBackendCache: () => {},
+}))
+
+//#endregion
+
+//#region Test Helpers
+
+function createMockClient(messages: Array<{
+  info?: {
+    agent?: string
+    model?: { providerID?: string; modelID?: string; variant?: string }
+    providerID?: string
+    modelID?: string
+    tools?: Record<string, boolean>
+  }
+}>): {
+  session: {
+    messages: (opts: { path: { id: string } }) => Promise<{ data: typeof messages }>
+  }
+} {
+  return {
+    session: {
+      messages: async () => ({ data: messages }),
+    },
+  }
+}
+
+//#endregion
+
+describe("findNearestMessageWithFieldsFromSDK", () => {
+  it("returns message with all fields when available", async () => {
+    const mockClient = createMockClient([
+      { info: { agent: "sisyphus", model: { providerID: "anthropic", modelID: "claude-opus-4" } } },
+    ])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toEqual({
+      agent: "sisyphus",
+      model: { providerID: "anthropic", modelID: "claude-opus-4" },
+      tools: undefined,
+    })
+  })
+
+  it("returns message with assistant shape (providerID/modelID directly on info)", async () => {
+    const mockClient = createMockClient([
+      { info: { agent: "sisyphus", providerID: "openai", modelID: "gpt-5" } },
+    ])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toEqual({
+      agent: "sisyphus",
+      model: { providerID: "openai", modelID: "gpt-5" },
+      tools: undefined,
+    })
+  })
+
+  it("returns nearest (most recent) message with all fields", async () => {
+    const mockClient = createMockClient([
+      { info: { agent: "old-agent", model: { providerID: "old", modelID: "model" } } },
+      { info: { agent: "new-agent", model: { providerID: "new", modelID: "model" } } },
+    ])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result?.agent).toBe("new-agent")
+  })
+
+  it("falls back to message with partial fields", async () => {
+    const mockClient = createMockClient([
+      { info: { agent: "partial-agent" } },
+    ])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result?.agent).toBe("partial-agent")
+  })
+
+  it("returns null when no messages have useful fields", async () => {
+    const mockClient = createMockClient([
+      { info: {} },
+      { info: {} },
+    ])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBeNull()
+  })
+
+  it("returns null when messages array is empty", async () => {
+    const mockClient = createMockClient([])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBeNull()
+  })
+
+  it("returns null on SDK error", async () => {
+    const mockClient = {
+      session: {
+        messages: async () => {
+          throw new Error("SDK error")
+        },
+      },
+    }
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBeNull()
+  })
+
+  it("includes tools when available", async () => {
+    const mockClient = createMockClient([
+      {
+        info: {
+          agent: "sisyphus",
+          model: { providerID: "anthropic", modelID: "claude-opus-4" },
+          tools: { edit: true, write: false },
+        },
+      },
+    ])
+
+    const result = await findNearestMessageWithFieldsFromSDK(mockClient as any, "ses_123")
+
+    expect(result?.tools).toEqual({ edit: true, write: false })
+  })
+})
+
+describe("findFirstMessageWithAgentFromSDK", () => {
+  it("returns agent from first message", async () => {
+    const mockClient = createMockClient([
+      { info: { agent: "first-agent" } },
+      { info: { agent: "second-agent" } },
+    ])
+
+    const result = await findFirstMessageWithAgentFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBe("first-agent")
+  })
+
+  it("skips messages without agent field", async () => {
+    const mockClient = createMockClient([
+      { info: {} },
+      { info: { agent: "first-real-agent" } },
+    ])
+
+    const result = await findFirstMessageWithAgentFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBe("first-real-agent")
+  })
+
+  it("returns null when no messages have agent", async () => {
+    const mockClient = createMockClient([
+      { info: {} },
+      { info: {} },
+    ])
+
+    const result = await findFirstMessageWithAgentFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBeNull()
+  })
+
+  it("returns null on SDK error", async () => {
+    const mockClient = {
+      session: {
+        messages: async () => {
+          throw new Error("SDK error")
+        },
+      },
+    }
+
+    const result = await findFirstMessageWithAgentFromSDK(mockClient as any, "ses_123")
+
+    expect(result).toBeNull()
+  })
+})
+
+describe("injectHookMessage", () => {
+  beforeEach(() => {
+    vi.clearAllMocks()
+  })
+
+  afterEach(() => {
+    vi.clearAllMocks()
+  })
+
+  it("returns false and logs warning on beta/SQLite backend", () => {
+    mockIsSqliteBackend.mockReturnValue(true)
+
+    const result = injectHookMessage("ses_123", "test content", {
+      agent: "sisyphus",
+      model: { providerID: "anthropic", modelID: "claude-opus-4" },
+    })
+
+    expect(result).toBe(false)
+    expect(mockIsSqliteBackend).toHaveBeenCalled()
+  })
+
+  it("returns false for empty hook content", () => {
+    mockIsSqliteBackend.mockReturnValue(false)
+
+    const result = injectHookMessage("ses_123", "", {
+      agent: "sisyphus",
+      model: { providerID: "anthropic", modelID: "claude-opus-4" },
+    })
+
+    expect(result).toBe(false)
+  })
+
+  it("returns false for whitespace-only hook content", () => {
+    mockIsSqliteBackend.mockReturnValue(false)
+
+    const result = injectHookMessage("ses_123", "   \n\t  ", {
+      agent: "sisyphus",
+      model: { providerID: "anthropic", modelID: "claude-opus-4" },
+    })
+
+    expect(result).toBe(false)
+  })
+})
--- a/src/features/hook-message-injector/injector.ts
+++ b/src/features/hook-message-injector/injector.ts
@@ -1,8 +1,11 @@
 import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs"
 import { join } from "node:path"
+import type { PluginInput } from "@opencode-ai/plugin"
 import { MESSAGE_STORAGE, PART_STORAGE } from "./constants"
 import type { MessageMeta, OriginalMessageContext, TextPart, ToolPermission } from "./types"
 import { log } from "../../shared/logger"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
+import { normalizeSDKResponse } from "../../shared"

 export interface StoredMessage {
  agent?: string
@@ -10,14 +13,130 @@ export interface StoredMessage {
  tools?: Record<string, ToolPermission>
 }

+type OpencodeClient = PluginInput["client"]
+
+interface SDKMessage {
+  info?: {
+    agent?: string
+    model?: {
+      providerID?: string
+      modelID?: string
+      variant?: string
+    }
+    providerID?: string
+    modelID?: string
+    tools?: Record<string, ToolPermission>
+  }
+}
+
+function convertSDKMessageToStoredMessage(msg: SDKMessage): StoredMessage | null {
+  const info = msg.info
+  if (!info) return null
+
+  const providerID = info.model?.providerID ?? info.providerID
+  const modelID = info.model?.modelID ?? info.modelID
+  const variant = info.model?.variant
+
+  if (!info.agent && !providerID && !modelID) {
+    return null
+  }
+
+  return {
+    agent: info.agent,
+    model: providerID && modelID
+      ? { providerID, modelID, ...(variant ? { variant } : {}) }
+      : undefined,
+    tools: info.tools,
+  }
+}
+
+// TODO: These SDK-based functions are exported for future use when hooks migrate to async.
+// Currently, callers still use the sync JSON-based functions which return null on beta.
+// Migration requires making callers async, which is a larger refactoring.
+// See: https://github.com/code-yeongyu/oh-my-opencode/pull/1837
+
+/**
+ * Finds the nearest message with required fields using SDK (for beta/SQLite backend).
+ * Uses client.session.messages() to fetch message data from SQLite.
+ */
+export async function findNearestMessageWithFieldsFromSDK(
+  client: OpencodeClient,
+  sessionID: string
+): Promise<StoredMessage | null> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const stored = convertSDKMessageToStoredMessage(messages[i])
+      if (stored?.agent && stored.model?.providerID && stored.model?.modelID) {
+        return stored
+      }
+    }
+
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const stored = convertSDKMessageToStoredMessage(messages[i])
+      if (stored?.agent || (stored?.model?.providerID && stored?.model?.modelID)) {
+        return stored
+      }
+    }
+  } catch (error) {
+    log("[hook-message-injector] SDK message fetch failed", {
+      sessionID,
+      error: String(error),
+    })
+  }
+  return null
+}
+
+/**
+ * Finds the FIRST (oldest) message with agent field using SDK (for beta/SQLite backend).
+ */
+export async function findFirstMessageWithAgentFromSDK(
+  client: OpencodeClient,
+  sessionID: string
+): Promise<string | null> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+
+    for (const msg of messages) {
+      const stored = convertSDKMessageToStoredMessage(msg)
+      if (stored?.agent) {
+        return stored.agent
+      }
+    }
+  } catch (error) {
+    log("[hook-message-injector] SDK agent fetch failed", {
+      sessionID,
+      error: String(error),
+    })
+  }
+  return null
+}
+
+/**
+ * Finds the nearest message with required fields (agent, model.providerID, model.modelID).
+ * Reads from JSON files - for stable (JSON) backend.
+ *
+ * **Version-gated behavior:**
+ * - On beta (SQLite backend): Returns null immediately (no JSON storage)
+ * - On stable (JSON backend): Reads from JSON files in messageDir
+ *
+ * @deprecated Use findNearestMessageWithFieldsFromSDK for beta/SQLite backend
+ */
 export function findNearestMessageWithFields(messageDir: string): StoredMessage | null {
+  // On beta SQLite backend, skip JSON file reads entirely
+  if (isSqliteBackend()) {
+    return null
+  }
+
  try {
    const files = readdirSync(messageDir)
      .filter((f) => f.endsWith(".json"))
      .sort()
      .reverse()

-    // First pass: find message with ALL fields (ideal)
    for (const file of files) {
      try {
        const content = readFileSync(join(messageDir, file), "utf-8")
@@ -30,8 +149,6 @@ export function findNearestMessageWithFields(messageDir: string): StoredMessage
      }
    }

-    // Second pass: find message with ANY useful field (fallback)
-    // This ensures agent info isn't lost when model info is missing
    for (const file of files) {
      try {
        const content = readFileSync(join(messageDir, file), "utf-8")
@@ -51,15 +168,24 @@ export function findNearestMessageWithFields(messageDir: string): StoredMessage

 /**
 * Finds the FIRST (oldest) message in the session with agent field.
- * This is used to get the original agent that started the session,
- * avoiding issues where newer messages may have a different agent
- * due to OpenCode's internal agent switching.
+ * Reads from JSON files - for stable (JSON) backend.
+ *
+ * **Version-gated behavior:**
+ * - On beta (SQLite backend): Returns null immediately (no JSON storage)
+ * - On stable (JSON backend): Reads from JSON files in messageDir
+ *
+ * @deprecated Use findFirstMessageWithAgentFromSDK for beta/SQLite backend
 */
 export function findFirstMessageWithAgent(messageDir: string): string | null {
+  // On beta SQLite backend, skip JSON file reads entirely
+  if (isSqliteBackend()) {
+    return null
+  }
+
  try {
    const files = readdirSync(messageDir)
      .filter((f) => f.endsWith(".json"))
-      .sort() // Oldest first (no reverse)
+      .sort()

    for (const file of files) {
      try {
@@ -111,12 +237,29 @@ function getOrCreateMessageDir(sessionID: string): string {
  return directPath
 }

+/**
+ * Injects a hook message into the session storage.
+ *
+ * **Version-gated behavior:**
+ * - On beta (SQLite backend): Logs warning and skips injection (writes are invisible to SQLite)
+ * - On stable (JSON backend): Writes message and part JSON files
+ *
+ * Features degraded on beta:
+ * - Hook message injection (e.g., continuation prompts, context injection) won't persist
+ * - Atlas hook's injected messages won't be visible in SQLite backend
+ * - Todo continuation enforcer's injected prompts won't persist
+ * - Ralph loop's continuation prompts won't persist
+ *
+ * @param sessionID - Target session ID
+ * @param hookContent - Content to inject
+ * @param originalMessage - Context from the original message
+ * @returns true if injection succeeded, false otherwise
+ */
 export function injectHookMessage(
  sessionID: string,
  hookContent: string,
  originalMessage: OriginalMessageContext
 ): boolean {
-  // Validate hook content to prevent empty message injection
  if (!hookContent || hookContent.trim().length === 0) {
    log("[hook-message-injector] Attempted to inject empty hook content, skipping injection", {
      sessionID,
@@ -126,6 +269,16 @@ export function injectHookMessage(
    return false
  }

+  if (isSqliteBackend()) {
+    log("[hook-message-injector] Skipping JSON message injection on SQLite backend. " +
+        "In-flight injection is handled via experimental.chat.messages.transform hook. " +
+        "JSON write path is not needed when SQLite is the storage backend.", {
+      sessionID,
+      agent: originalMessage.agent,
+    })
+    return false
+  }
+
  const messageDir = getOrCreateMessageDir(sessionID)

  const needsFallback =
@@ -202,3 +355,21 @@ export function injectHookMessage(
    return false
  }
 }
+
+export async function resolveMessageContext(
+  sessionID: string,
+  client: OpencodeClient,
+  messageDir: string | null
+): Promise<{ prevMessage: StoredMessage | null; firstMessageAgent: string | null }> {
+  const [prevMessage, firstMessageAgent] = isSqliteBackend()
+    ? await Promise.all([
+        findNearestMessageWithFieldsFromSDK(client, sessionID),
+        findFirstMessageWithAgentFromSDK(client, sessionID),
+      ])
+    : [
+        messageDir ? findNearestMessageWithFields(messageDir) : null,
+        messageDir ? findFirstMessageWithAgent(messageDir) : null,
+      ]
+
+  return { prevMessage, firstMessageAgent }
+}
--- a/src/features/opencode-skill-loader/discover-worker.ts
+++ b/src/features/opencode-skill-loader/discover-worker.ts
@@ -18,8 +18,6 @@ interface WorkerOutputError {
  error: { message: string; stack?: string }
 }

-type WorkerOutput = WorkerOutputSuccess | WorkerOutputError
-
 const { signal } = workerData as { signal: Int32Array }

 if (!parentPort) {
--- a/src/features/tmux-subagent/manager.test.ts
+++ b/src/features/tmux-subagent/manager.test.ts
@@ -557,221 +557,6 @@ describe('TmuxSessionManager', () => {
    })
  })

-  describe('Stability Detection (Issue #1330)', () => {
-    test('does NOT close session immediately when idle - requires 4 polls (1 baseline + 3 stable)', async () => {
-      //#given - session that is old enough (>10s) and idle
-      mockIsInsideTmux.mockReturnValue(true)
-      
-      const { TmuxSessionManager } = await import('./manager')
-      
-      const statusMock = mock(async () => ({
-        data: { 'ses_child': { type: 'idle' } }
-      }))
-      const messagesMock = mock(async () => ({
-        data: [{ id: 'msg1' }]  // Same message count each time
-      }))
-      
-      const ctx = {
-        serverUrl: new URL('http://localhost:4096'),
-        client: {
-          session: {
-            status: statusMock,
-            messages: messagesMock,
-          },
-        },
-      } as any
-      
-      const config: TmuxConfig = {
-        enabled: true,
-        layout: 'main-vertical',
-        main_pane_size: 60,
-        main_pane_min_width: 80,
-        agent_pane_min_width: 40,
-      }
-      const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
-
-      // Spawn a session first
-      await manager.onSessionCreated(
-        createSessionCreatedEvent('ses_child', 'ses_parent', 'Task')
-      )
-      
-      // Make session old enough for stability detection (>10s)
-      const sessions = (manager as any).sessions as Map<string, any>
-      const tracked = sessions.get('ses_child')
-      tracked.createdAt = new Date(Date.now() - 15000)  // 15 seconds ago
-      
-      mockExecuteAction.mockClear()
-
-      //#when - poll only 3 times (need 4: 1 baseline + 3 stable)
-      await (manager as any).pollSessions()  // sets lastMessageCount = 1
-      await (manager as any).pollSessions()  // stableIdlePolls = 1
-      await (manager as any).pollSessions()  // stableIdlePolls = 2
-
-      //#then - should NOT have closed yet (need one more poll)
-      expect(mockExecuteAction).not.toHaveBeenCalled()
-    })
-
-    test('closes session after 3 consecutive stable idle polls', async () => {
-      //#given
-      mockIsInsideTmux.mockReturnValue(true)
-      
-      const { TmuxSessionManager } = await import('./manager')
-      
-      const statusMock = mock(async () => ({
-        data: { 'ses_child': { type: 'idle' } }
-      }))
-      const messagesMock = mock(async () => ({
-        data: [{ id: 'msg1' }]  // Same message count each time
-      }))
-      
-      const ctx = {
-        serverUrl: new URL('http://localhost:4096'),
-        client: {
-          session: {
-            status: statusMock,
-            messages: messagesMock,
-          },
-        },
-      } as any
-      
-      const config: TmuxConfig = {
-        enabled: true,
-        layout: 'main-vertical',
-        main_pane_size: 60,
-        main_pane_min_width: 80,
-        agent_pane_min_width: 40,
-      }
-      const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
-
-      await manager.onSessionCreated(
-        createSessionCreatedEvent('ses_child', 'ses_parent', 'Task')
-      )
-      
-      // Simulate session being old enough (>10s) by manipulating createdAt
-      const sessions = (manager as any).sessions as Map<string, any>
-      const tracked = sessions.get('ses_child')
-      tracked.createdAt = new Date(Date.now() - 15000)  // 15 seconds ago
-      
-      mockExecuteAction.mockClear()
-
-      //#when - poll 4 times (1st sets lastMessageCount, then 3 stable polls)
-      await (manager as any).pollSessions()  // sets lastMessageCount = 1
-      await (manager as any).pollSessions()  // stableIdlePolls = 1
-      await (manager as any).pollSessions()  // stableIdlePolls = 2
-      await (manager as any).pollSessions()  // stableIdlePolls = 3 -> close
-
-      //#then - should have closed the session
-      expect(mockExecuteAction).toHaveBeenCalled()
-      const call = mockExecuteAction.mock.calls[0]
-      expect(call![0].type).toBe('close')
-    })
-
-    test('resets stability counter when new messages arrive', async () => {
-      //#given
-      mockIsInsideTmux.mockReturnValue(true)
-      
-      const { TmuxSessionManager } = await import('./manager')
-      
-      let messageCount = 1
-      const statusMock = mock(async () => ({
-        data: { 'ses_child': { type: 'idle' } }
-      }))
-      const messagesMock = mock(async () => {
-        // Simulate new messages arriving each poll
-        messageCount++
-        return { data: Array(messageCount).fill({ id: 'msg' }) }
-      })
-      
-      const ctx = {
-        serverUrl: new URL('http://localhost:4096'),
-        client: {
-          session: {
-            status: statusMock,
-            messages: messagesMock,
-          },
-        },
-      } as any
-      
-      const config: TmuxConfig = {
-        enabled: true,
-        layout: 'main-vertical',
-        main_pane_size: 60,
-        main_pane_min_width: 80,
-        agent_pane_min_width: 40,
-      }
-      const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
-
-      await manager.onSessionCreated(
-        createSessionCreatedEvent('ses_child', 'ses_parent', 'Task')
-      )
-      
-      const sessions = (manager as any).sessions as Map<string, any>
-      const tracked = sessions.get('ses_child')
-      tracked.createdAt = new Date(Date.now() - 15000)
-      
-      mockExecuteAction.mockClear()
-
-      //#when - poll multiple times (message count keeps changing)
-      await (manager as any).pollSessions()
-      await (manager as any).pollSessions()
-      await (manager as any).pollSessions()
-      await (manager as any).pollSessions()
-
-      //#then - should NOT have closed (stability never reached due to changing messages)
-      expect(mockExecuteAction).not.toHaveBeenCalled()
-    })
-
-    test('does NOT apply stability detection for sessions younger than 10s', async () => {
-      //#given - freshly created session (age < 10s)
-      mockIsInsideTmux.mockReturnValue(true)
-      
-      const { TmuxSessionManager } = await import('./manager')
-      
-      const statusMock = mock(async () => ({
-        data: { 'ses_child': { type: 'idle' } }
-      }))
-      const messagesMock = mock(async () => ({
-        data: [{ id: 'msg1' }]  // Same message count - would trigger close if age check wasn't there
-      }))
-      
-      const ctx = {
-        serverUrl: new URL('http://localhost:4096'),
-        client: {
-          session: {
-            status: statusMock,
-            messages: messagesMock,
-          },
-        },
-      } as any
-      
-      const config: TmuxConfig = {
-        enabled: true,
-        layout: 'main-vertical',
-        main_pane_size: 60,
-        main_pane_min_width: 80,
-        agent_pane_min_width: 40,
-      }
-      const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
-
-      await manager.onSessionCreated(
-        createSessionCreatedEvent('ses_child', 'ses_parent', 'Task')
-      )
-      
-      // Session is fresh (createdAt is now) - don't manipulate it
-      // This tests the 10s age gate - stability detection should NOT activate
-      mockExecuteAction.mockClear()
-
-      //#when - poll 5 times (more than enough to close if age check wasn't there)
-      await (manager as any).pollSessions()  // Would set lastMessageCount if age check passed
-      await (manager as any).pollSessions()  // Would be stableIdlePolls = 1
-      await (manager as any).pollSessions()  // Would be stableIdlePolls = 2
-      await (manager as any).pollSessions()  // Would be stableIdlePolls = 3 -> would close
-      await (manager as any).pollSessions()  // Extra poll to be sure
-
-      //#then - should NOT have closed (session too young for stability detection)
-      expect(mockExecuteAction).not.toHaveBeenCalled()
-    })
-  })
 })

 describe('DecisionEngine', () => {
--- a/src/features/tmux-subagent/manager.ts
+++ b/src/features/tmux-subagent/manager.ts
@@ -1,15 +1,13 @@
 import type { PluginInput } from "@opencode-ai/plugin"
 import type { TmuxConfig } from "../../config/schema"
 import type { TrackedSession, CapacityConfig } from "./types"
+import { log, normalizeSDKResponse } from "../../shared"
 import {
  isInsideTmux as defaultIsInsideTmux,
  getCurrentPaneId as defaultGetCurrentPaneId,
-  POLL_INTERVAL_BACKGROUND_MS,
-  SESSION_MISSING_GRACE_MS,
  SESSION_READY_POLL_INTERVAL_MS,
  SESSION_READY_TIMEOUT_MS,
 } from "../../shared/tmux"
-import { log } from "../../shared"
 import { queryWindowState } from "./pane-state-querier"
 import { decideSpawnActions, decideCloseAction, type SessionMapping } from "./decision-engine"
 import { executeActions, executeAction } from "./action-executor"
@@ -31,13 +29,6 @@ const defaultTmuxDeps: TmuxUtilDeps = {
  getCurrentPaneId: defaultGetCurrentPaneId,
 }

-const SESSION_TIMEOUT_MS = 10 * 60 * 1000
-
-// Stability detection constants (prevents premature closure - see issue #1330)
-// Mirrors the proven pattern from background-agent/manager.ts
-const MIN_STABILITY_TIME_MS = 10 * 1000  // Must run at least 10s before stability detection kicks in
-const STABLE_POLLS_REQUIRED = 3          // 3 consecutive idle polls (~6s with 2s poll interval)
-
 /**
 * State-first Tmux Session Manager
 * 
@@ -103,7 +94,7 @@ export class TmuxSessionManager {
    while (Date.now() - startTime < SESSION_READY_TIMEOUT_MS) {
      try {
        const statusResult = await this.client.session.status({ path: undefined })
-        const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+        const allStatuses = normalizeSDKResponse(statusResult, {} as Record<string, { type: string }>)
        
        if (allStatuses[sessionId]) {
          log("[tmux-session-manager] session ready", {
@@ -127,12 +118,6 @@ export class TmuxSessionManager {
    return false
  }

-  // NOTE: Exposed (via `as any`) for test stability checks.
-  // Actual polling is owned by TmuxPollingManager.
-  private async pollSessions(): Promise<void> {
-    await (this.pollingManager as any).pollSessions()
-  }
-
  async onSessionCreated(event: SessionCreatedEvent): Promise<void> {
    const enabled = this.isEnabled()
    log("[tmux-session-manager] onSessionCreated called", {
--- a/src/features/tmux-subagent/pane-split-availability.ts
+++ b/src/features/tmux-subagent/pane-split-availability.ts
@@ -1,4 +1,4 @@
-import { MIN_PANE_HEIGHT, MIN_PANE_WIDTH } from "./types"
+import { MIN_PANE_WIDTH } from "./types"
 import type { SplitDirection, TmuxPaneInfo } from "./types"
 import {
 	DIVIDER_SIZE,
--- a/src/features/tmux-subagent/polling-manager.ts
+++ b/src/features/tmux-subagent/polling-manager.ts
@@ -3,6 +3,7 @@ import { POLL_INTERVAL_BACKGROUND_MS } from "../../shared/tmux"
 import type { TrackedSession } from "./types"
 import { SESSION_MISSING_GRACE_MS } from "../../shared/tmux"
 import { log } from "../../shared"
+import { normalizeSDKResponse } from "../../shared"

 const SESSION_TIMEOUT_MS = 10 * 60 * 1000
 const MIN_STABILITY_TIME_MS = 10 * 1000
@@ -43,7 +44,7 @@ export class TmuxPollingManager {

    try {
      const statusResult = await this.client.session.status({ path: undefined })
-      const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
+      const allStatuses = normalizeSDKResponse(statusResult, {} as Record<string, { type: string }>)

      log("[tmux-session-manager] pollSessions", {
        trackedSessions: Array.from(this.sessions.keys()),
@@ -82,7 +83,7 @@ export class TmuxPollingManager {
              
              if (tracked.stableIdlePolls >= STABLE_POLLS_REQUIRED) {
                const recheckResult = await this.client.session.status({ path: undefined })
-                const recheckStatuses = (recheckResult.data ?? {}) as Record<string, { type: string }>
+                const recheckStatuses = normalizeSDKResponse(recheckResult, {} as Record<string, { type: string }>)
                const recheckStatus = recheckStatuses[sessionId]
                
                if (recheckStatus?.type === "idle") {
--- a/src/hooks/AGENTS.md
+++ b/src/hooks/AGENTS.md
@@ -8,18 +8,18 @@
 ```
 hooks/
 ├── agent-usage-reminder/         # Specialized agent hints (109 lines)
-├── anthropic-context-window-limit-recovery/ # Auto-summarize on limit (2232 lines)
+├── anthropic-context-window-limit-recovery/ # Auto-summarize on limit (2232 lines, 29 files)
 ├── anthropic-effort/             # Effort=max for Opus max variant (56 lines)
-├── atlas/                        # Main orchestration hook (1976 lines)
+├── atlas/                        # Main orchestration hook (1976 lines, 17 files)
 ├── auto-slash-command/           # Detects /command patterns (1134 lines)
-├── auto-update-checker/          # Plugin update check (1140 lines)
+├── auto-update-checker/          # Plugin update check (1140 lines, 20 files)
 ├── background-notification/      # OS notifications (33 lines)
 ├── category-skill-reminder/      # Category+skill delegation reminders (597 lines)
-├── claude-code-hooks/            # settings.json compat (2110 lines) - see AGENTS.md
+├── claude-code-hooks/            # settings.json compat (2110 lines) — see AGENTS.md
 ├── comment-checker/              # Prevents AI slop comments (710 lines)
 ├── compaction-context-injector/  # Injects context on compaction (128 lines)
 ├── compaction-todo-preserver/    # Preserves todos during compaction (203 lines)
-├── context-window-monitor.ts     # Reminds of headroom at 70% (99 lines)
+├── context-window-monitor.ts     # Reminds of headroom at 70% (100 lines)
 ├── delegate-task-retry/          # Retries failed delegations (266 lines)
 ├── directory-agents-injector/    # Auto-injects AGENTS.md (195 lines)
 ├── directory-readme-injector/    # Auto-injects README.md (190 lines)
@@ -34,7 +34,7 @@ hooks/
 ├── ralph-loop/                   # Self-referential dev loop (1687 lines)
 ├── rules-injector/               # Conditional .sisyphus/rules injection (1604 lines)
 ├── session-notification.ts       # OS idle notifications (108 lines)
-├── session-recovery/             # Auto-recovers from crashes (1279 lines)
+├── session-recovery/             # Auto-recovers from crashes (1279 lines, 14 files)
 ├── sisyphus-junior-notepad/      # Junior notepad directive (76 lines)
 ├── start-work/                   # Sisyphus work session starter (648 lines)
 ├── stop-continuation-guard/      # Guards stop continuation (214 lines)
@@ -57,10 +57,10 @@ hooks/
 | UserPromptSubmit | `chat.message` | Yes | 4 |
 | ChatParams | `chat.params` | No | 2 |
 | PreToolUse | `tool.execute.before` | Yes | 13 |
-| PostToolUse | `tool.execute.after` | No | 18 |
+| PostToolUse | `tool.execute.after` | No | 15 |
 | SessionEvent | `event` | No | 17 |
 | MessagesTransform | `experimental.chat.messages.transform` | No | 1 |
-| Compaction | `onSummarize` | No | 1 |
+| Compaction | `onSummarize` | No | 2 |

 ## BLOCKING HOOKS (8)

@@ -78,7 +78,7 @@ hooks/
 ## EXECUTION ORDER

 **UserPromptSubmit**: keywordDetector → claudeCodeHooks → autoSlashCommand → startWork
-**PreToolUse**: subagentQuestionBlocker → questionLabelTruncator → claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → prometheusMdOnly → sisyphusJuniorNotepad → writeExistingFileGuard → atlasHook
+**PreToolUse**: subagentQuestionBlocker → questionLabelTruncator → claudeCodeHooks → nonInteractiveEnv → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → prometheusMdOnly → sisyphusJuniorNotepad → writeExistingFileGuard → tasksToDoWriteDisabler → atlasHook
 **PostToolUse**: claudeCodeHooks → toolOutputTruncator → contextWindowMonitor → commentChecker → directoryAgentsInjector → directoryReadmeInjector → rulesInjector → emptyTaskResponseDetector → agentUsageReminder → interactiveBashSession → editErrorRecovery → delegateTaskRetry → atlasHook → taskResumeInfo → taskReminder

 ## HOW TO ADD
--- a/src/hooks/agent-usage-reminder/constants.ts
+++ b/src/hooks/agent-usage-reminder/constants.ts
@@ -1,7 +1,5 @@
 import { join } from "node:path";
-import { getOpenCodeStorageDir } from "../../shared/data-path";
-
-export const OPENCODE_STORAGE = getOpenCodeStorageDir();
+import { OPENCODE_STORAGE } from "../../shared";
 export const AGENT_USAGE_REMINDER_STORAGE = join(
  OPENCODE_STORAGE,
  "agent-usage-reminder",
--- a/src/hooks/anthropic-context-window-limit-recovery/aggressive-truncation-strategy.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/aggressive-truncation-strategy.ts
@@ -25,12 +25,13 @@ export async function runAggressiveTruncationStrategy(params: {
    targetRatio: TRUNCATE_CONFIG.targetTokenRatio,
  })

-  const aggressiveResult = truncateUntilTargetTokens(
+  const aggressiveResult = await truncateUntilTargetTokens(
    params.sessionID,
    params.currentTokens,
    params.maxTokens,
    TRUNCATE_CONFIG.targetTokenRatio,
    TRUNCATE_CONFIG.charsPerToken,
+    params.client,
  )

  if (aggressiveResult.truncatedCount <= 0) {
@@ -60,7 +61,7 @@ export async function runAggressiveTruncationStrategy(params: {
    clearSessionState(params.autoCompactState, params.sessionID)
    setTimeout(async () => {
      try {
-        await params.client.session.prompt_async({
+        await params.client.session.promptAsync({
          path: { id: params.sessionID },
          body: { auto: true } as never,
          query: { directory: params.directory },
--- a/src/hooks/anthropic-context-window-limit-recovery/client.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/client.ts
@@ -1,20 +1,8 @@
-export type Client = {
+import type { PluginInput } from "@opencode-ai/plugin"
+
+export type Client = PluginInput["client"] & {
  session: {
-    messages: (opts: {
-      path: { id: string }
-      query?: { directory?: string }
-    }) => Promise<unknown>
-    summarize: (opts: {
-      path: { id: string }
-      body: { providerID: string; modelID: string }
-      query: { directory: string }
-    }) => Promise<unknown>
-    revert: (opts: {
-      path: { id: string }
-      body: { messageID: string; partID?: string }
-      query: { directory: string }
-    }) => Promise<unknown>
-    prompt_async: (opts: {
+    promptAsync: (opts: {
      path: { id: string }
      body: { parts: Array<{ type: string; text: string }> }
      query: { directory: string }
--- a/src/hooks/anthropic-context-window-limit-recovery/deduplication-recovery.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/deduplication-recovery.ts
@@ -1,3 +1,4 @@
+import type { PluginInput } from "@opencode-ai/plugin"
 import type { ParsedTokenLimitError } from "./types"
 import type { ExperimentalConfig } from "../../config"
 import type { DeduplicationConfig } from "./pruning-deduplication"
@@ -6,6 +7,8 @@ import { executeDeduplication } from "./pruning-deduplication"
 import { truncateToolOutputsByCallId } from "./pruning-tool-output-truncation"
 import { log } from "../../shared/logger"

+type OpencodeClient = PluginInput["client"]
+
 function createPruningState(): PruningState {
  return {
    toolIdsToPrune: new Set<string>(),
@@ -43,6 +46,7 @@ export async function attemptDeduplicationRecovery(
  sessionID: string,
  parsed: ParsedTokenLimitError,
  experimental: ExperimentalConfig | undefined,
+  client?: OpencodeClient,
 ): Promise<void> {
  if (!isPromptTooLongError(parsed)) return

@@ -50,15 +54,17 @@ export async function attemptDeduplicationRecovery(
  if (!plan) return

  const pruningState = createPruningState()
-  const prunedCount = executeDeduplication(
+  const prunedCount = await executeDeduplication(
    sessionID,
    pruningState,
    plan.config,
    plan.protectedTools,
+    client,
  )
-  const { truncatedCount } = truncateToolOutputsByCallId(
+  const { truncatedCount } = await truncateToolOutputsByCallId(
    sessionID,
    pruningState.toolIdsToPrune,
+    client,
  )

  if (prunedCount > 0 || truncatedCount > 0) {
--- a/src/hooks/anthropic-context-window-limit-recovery/empty-content-recovery-sdk.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/empty-content-recovery-sdk.test.ts
@@ -0,0 +1,166 @@
+import { describe, it, expect, mock, beforeEach } from "bun:test"
+import { fixEmptyMessagesWithSDK } from "./empty-content-recovery-sdk"
+
+const mockReplaceEmptyTextParts = mock(() => Promise.resolve(false))
+const mockInjectTextPart = mock(() => Promise.resolve(false))
+
+mock.module("../session-recovery/storage/empty-text", () => ({
+  replaceEmptyTextPartsAsync: mockReplaceEmptyTextParts,
+}))
+mock.module("../session-recovery/storage/text-part-injector", () => ({
+  injectTextPartAsync: mockInjectTextPart,
+}))
+
+function createMockClient(messages: Array<{ info?: { id?: string }; parts?: Array<{ type?: string; text?: string }> }>) {
+  return {
+    session: {
+      messages: mock(() => Promise.resolve({ data: messages })),
+    },
+  } as never
+}
+
+describe("fixEmptyMessagesWithSDK", () => {
+  beforeEach(() => {
+    mockReplaceEmptyTextParts.mockReset()
+    mockInjectTextPart.mockReset()
+    mockReplaceEmptyTextParts.mockReturnValue(Promise.resolve(false))
+    mockInjectTextPart.mockReturnValue(Promise.resolve(false))
+  })
+
+  it("returns fixed=false when no empty messages exist", async () => {
+    //#given
+    const client = createMockClient([
+      { info: { id: "msg_1" }, parts: [{ type: "text", text: "Hello" }] },
+    ])
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+    })
+
+    //#then
+    expect(result.fixed).toBe(false)
+    expect(result.fixedMessageIds).toEqual([])
+    expect(result.scannedEmptyCount).toBe(0)
+  })
+
+  it("fixes empty message via replace when scanning all", async () => {
+    //#given
+    const client = createMockClient([
+      { info: { id: "msg_1" }, parts: [{ type: "text", text: "" }] },
+    ])
+    mockReplaceEmptyTextParts.mockReturnValue(Promise.resolve(true))
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+    })
+
+    //#then
+    expect(result.fixed).toBe(true)
+    expect(result.fixedMessageIds).toContain("msg_1")
+    expect(result.scannedEmptyCount).toBe(1)
+  })
+
+  it("falls back to inject when replace fails", async () => {
+    //#given
+    const client = createMockClient([
+      { info: { id: "msg_1" }, parts: [] },
+    ])
+    mockReplaceEmptyTextParts.mockReturnValue(Promise.resolve(false))
+    mockInjectTextPart.mockReturnValue(Promise.resolve(true))
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+    })
+
+    //#then
+    expect(result.fixed).toBe(true)
+    expect(result.fixedMessageIds).toContain("msg_1")
+  })
+
+  it("fixes target message by index when provided", async () => {
+    //#given
+    const client = createMockClient([
+      { info: { id: "msg_0" }, parts: [{ type: "text", text: "ok" }] },
+      { info: { id: "msg_1" }, parts: [] },
+    ])
+    mockReplaceEmptyTextParts.mockReturnValue(Promise.resolve(true))
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+      messageIndex: 1,
+    })
+
+    //#then
+    expect(result.fixed).toBe(true)
+    expect(result.fixedMessageIds).toContain("msg_1")
+    expect(result.scannedEmptyCount).toBe(0)
+  })
+
+  it("skips messages without info.id", async () => {
+    //#given
+    const client = createMockClient([
+      { parts: [] },
+      { info: {}, parts: [] },
+    ])
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+    })
+
+    //#then
+    expect(result.fixed).toBe(false)
+    expect(result.scannedEmptyCount).toBe(0)
+  })
+
+  it("treats thinking-only messages as empty", async () => {
+    //#given
+    const client = createMockClient([
+      { info: { id: "msg_1" }, parts: [{ type: "thinking", text: "hmm" }] },
+    ])
+    mockReplaceEmptyTextParts.mockReturnValue(Promise.resolve(true))
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+    })
+
+    //#then
+    expect(result.fixed).toBe(true)
+    expect(result.fixedMessageIds).toContain("msg_1")
+  })
+
+  it("treats tool_use messages as non-empty", async () => {
+    //#given
+    const client = createMockClient([
+      { info: { id: "msg_1" }, parts: [{ type: "tool_use" }] },
+    ])
+
+    //#when
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: "ses_1",
+      client,
+      placeholderText: "[recovered]",
+    })
+
+    //#then
+    expect(result.fixed).toBe(false)
+    expect(result.scannedEmptyCount).toBe(0)
+  })
+})
--- a/src/hooks/anthropic-context-window-limit-recovery/empty-content-recovery-sdk.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/empty-content-recovery-sdk.ts
@@ -0,0 +1,191 @@
+import { replaceEmptyTextPartsAsync } from "../session-recovery/storage/empty-text"
+import { injectTextPartAsync } from "../session-recovery/storage/text-part-injector"
+import type { Client } from "./client"
+
+interface SDKPart {
+  id?: string
+  type?: string
+  text?: string
+}
+
+interface SDKMessage {
+  info?: { id?: string }
+  parts?: SDKPart[]
+}
+
+const IGNORE_TYPES = new Set(["thinking", "redacted_thinking", "meta"])
+const TOOL_TYPES = new Set(["tool", "tool_use", "tool_result"])
+
+function messageHasContentFromSDK(message: SDKMessage): boolean {
+  const parts = message.parts
+  if (!parts || parts.length === 0) return false
+
+  for (const part of parts) {
+    const type = part.type
+    if (!type) continue
+    if (IGNORE_TYPES.has(type)) {
+      continue
+    }
+
+    if (type === "text") {
+      if (part.text?.trim()) return true
+      continue
+    }
+
+    if (TOOL_TYPES.has(type)) return true
+
+    return true
+  }
+
+  // Messages with only thinking/meta parts are treated as empty
+  // to align with file-based logic (messageHasContent)
+  return false
+}
+
+function getSdkMessages(response: unknown): SDKMessage[] {
+  if (typeof response !== "object" || response === null) return []
+  if (Array.isArray(response)) return response as SDKMessage[]
+  const record = response as Record<string, unknown>
+  const data = record["data"]
+  if (Array.isArray(data)) return data as SDKMessage[]
+  return Array.isArray(record) ? (record as SDKMessage[]) : []
+}
+
+async function findEmptyMessagesFromSDK(client: Client, sessionID: string): Promise<string[]> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = getSdkMessages(response)
+
+    const emptyIds: string[] = []
+    for (const message of messages) {
+      const messageID = message.info?.id
+      if (!messageID) continue
+      if (!messageHasContentFromSDK(message)) {
+        emptyIds.push(messageID)
+      }
+    }
+
+    return emptyIds
+  } catch {
+    return []
+  }
+}
+
+async function findEmptyMessageByIndexFromSDK(
+  client: Client,
+  sessionID: string,
+  targetIndex: number,
+): Promise<string | null> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = getSdkMessages(response)
+
+    const indicesToTry = [
+      targetIndex,
+      targetIndex - 1,
+      targetIndex + 1,
+      targetIndex - 2,
+      targetIndex + 2,
+      targetIndex - 3,
+      targetIndex - 4,
+      targetIndex - 5,
+    ]
+
+    for (const index of indicesToTry) {
+      if (index < 0 || index >= messages.length) continue
+
+      const targetMessage = messages[index]
+      const targetMessageId = targetMessage?.info?.id
+      if (!targetMessageId) continue
+
+      if (!messageHasContentFromSDK(targetMessage)) {
+        return targetMessageId
+      }
+    }
+
+    return null
+  } catch {
+    return null
+  }
+}
+
+export async function fixEmptyMessagesWithSDK(params: {
+  sessionID: string
+  client: Client
+  placeholderText: string
+  messageIndex?: number
+}): Promise<{ fixed: boolean; fixedMessageIds: string[]; scannedEmptyCount: number }> {
+  let fixed = false
+  const fixedMessageIds: string[] = []
+
+  if (params.messageIndex !== undefined) {
+    const targetMessageId = await findEmptyMessageByIndexFromSDK(
+      params.client,
+      params.sessionID,
+      params.messageIndex,
+    )
+
+    if (targetMessageId) {
+      const replaced = await replaceEmptyTextPartsAsync(
+        params.client,
+        params.sessionID,
+        targetMessageId,
+        params.placeholderText,
+      )
+
+      if (replaced) {
+        fixed = true
+        fixedMessageIds.push(targetMessageId)
+      } else {
+        const injected = await injectTextPartAsync(
+          params.client,
+          params.sessionID,
+          targetMessageId,
+          params.placeholderText,
+        )
+
+        if (injected) {
+          fixed = true
+          fixedMessageIds.push(targetMessageId)
+        }
+      }
+    }
+  }
+
+  if (fixed) {
+    return { fixed, fixedMessageIds, scannedEmptyCount: 0 }
+  }
+
+  const emptyMessageIds = await findEmptyMessagesFromSDK(params.client, params.sessionID)
+  if (emptyMessageIds.length === 0) {
+    return { fixed: false, fixedMessageIds: [], scannedEmptyCount: 0 }
+  }
+
+  for (const messageID of emptyMessageIds) {
+    const replaced = await replaceEmptyTextPartsAsync(
+      params.client,
+      params.sessionID,
+      messageID,
+      params.placeholderText,
+    )
+
+    if (replaced) {
+      fixed = true
+      fixedMessageIds.push(messageID)
+    } else {
+      const injected = await injectTextPartAsync(
+        params.client,
+        params.sessionID,
+        messageID,
+        params.placeholderText,
+      )
+
+      if (injected) {
+        fixed = true
+        fixedMessageIds.push(messageID)
+      }
+    }
+  }
+
+  return { fixed, fixedMessageIds, scannedEmptyCount: emptyMessageIds.length }
+}
--- a/src/hooks/anthropic-context-window-limit-recovery/empty-content-recovery.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/empty-content-recovery.ts
@@ -4,10 +4,12 @@ import {
  injectTextPart,
  replaceEmptyTextParts,
 } from "../session-recovery/storage"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
 import type { AutoCompactState } from "./types"
 import type { Client } from "./client"
 import { PLACEHOLDER_TEXT } from "./message-builder"
 import { incrementEmptyContentAttempt } from "./state"
+import { fixEmptyMessagesWithSDK } from "./empty-content-recovery-sdk"

 export async function fixEmptyMessages(params: {
  sessionID: string
@@ -20,6 +22,44 @@ export async function fixEmptyMessages(params: {
  let fixed = false
  const fixedMessageIds: string[] = []

+  if (isSqliteBackend()) {
+    const result = await fixEmptyMessagesWithSDK({
+      sessionID: params.sessionID,
+      client: params.client,
+      placeholderText: PLACEHOLDER_TEXT,
+      messageIndex: params.messageIndex,
+    })
+
+    if (!result.fixed && result.scannedEmptyCount === 0) {
+      await params.client.tui
+        .showToast({
+          body: {
+            title: "Empty Content Error",
+            message: "No empty messages found in storage. Cannot auto-recover.",
+            variant: "error",
+            duration: 5000,
+          },
+        })
+        .catch(() => {})
+      return false
+    }
+
+    if (result.fixed) {
+      await params.client.tui
+        .showToast({
+          body: {
+            title: "Session Recovery",
+            message: `Fixed ${result.fixedMessageIds.length} empty message(s). Retrying...`,
+            variant: "warning",
+            duration: 3000,
+          },
+        })
+        .catch(() => {})
+    }
+
+    return result.fixed
+  }
+
  if (params.messageIndex !== undefined) {
    const targetMessageId = findEmptyMessageByIndex(params.sessionID, params.messageIndex)
    if (targetMessageId) {
--- a/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/executor.test.ts
@@ -99,7 +99,7 @@ describe("executeCompact lock management", () => {
        messages: mock(() => Promise.resolve({ data: [] })),
        summarize: mock(() => Promise.resolve()),
        revert: mock(() => Promise.resolve()),
-        prompt_async: mock(() => Promise.resolve()),
+        promptAsync: mock(() => Promise.resolve()),
      },
      tui: {
        showToast: mock(() => Promise.resolve()),
@@ -283,9 +283,9 @@ describe("executeCompact lock management", () => {
    expect(autoCompactState.compactionInProgress.has(sessionID)).toBe(false)
  })

-  test("clears lock when prompt_async in continuation throws", async () => {
-    // given: prompt_async will fail during continuation
-    mockClient.session.prompt_async = mock(() =>
+  test("clears lock when promptAsync in continuation throws", async () => {
+    // given: promptAsync will fail during continuation
+    mockClient.session.promptAsync = mock(() =>
      Promise.reject(new Error("Prompt failed")),
    )
    autoCompactState.errorDataBySession.set(sessionID, {
@@ -313,7 +313,7 @@ describe("executeCompact lock management", () => {
      maxTokens: 200000,
    })

-    const truncateSpy = spyOn(storage, "truncateUntilTargetTokens").mockReturnValue({
+    const truncateSpy = spyOn(storage, "truncateUntilTargetTokens").mockResolvedValue({
      success: true,
      sufficient: false,
      truncatedCount: 3,
@@ -354,7 +354,7 @@ describe("executeCompact lock management", () => {
      maxTokens: 200000,
    })

-    const truncateSpy = spyOn(storage, "truncateUntilTargetTokens").mockReturnValue({
+    const truncateSpy = spyOn(storage, "truncateUntilTargetTokens").mockResolvedValue({
      success: true,
      sufficient: true,
      truncatedCount: 5,
@@ -378,8 +378,8 @@ describe("executeCompact lock management", () => {
    // then: Summarize should NOT be called (early return from sufficient truncation)
    expect(mockClient.session.summarize).not.toHaveBeenCalled()

-    // then: prompt_async should be called (Continue after successful truncation)
-    expect(mockClient.session.prompt_async).toHaveBeenCalled()
+    // then: promptAsync should be called (Continue after successful truncation)
+    expect(mockClient.session.promptAsync).toHaveBeenCalled()

    // then: Lock should be cleared
    expect(autoCompactState.compactionInProgress.has(sessionID)).toBe(false)
--- a/src/hooks/anthropic-context-window-limit-recovery/message-builder.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/message-builder.ts
@@ -1,14 +1,118 @@
 import { log } from "../../shared/logger"
+import type { PluginInput } from "@opencode-ai/plugin"
+import { normalizeSDKResponse } from "../../shared"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
 import {
  findEmptyMessages,
  injectTextPart,
  replaceEmptyTextParts,
 } from "../session-recovery/storage"
+import { replaceEmptyTextPartsAsync } from "../session-recovery/storage/empty-text"
+import { injectTextPartAsync } from "../session-recovery/storage/text-part-injector"
 import type { Client } from "./client"

 export const PLACEHOLDER_TEXT = "[user interrupted]"

-export function sanitizeEmptyMessagesBeforeSummarize(sessionID: string): number {
+type OpencodeClient = PluginInput["client"]
+
+interface SDKPart {
+  type?: string
+  text?: string
+}
+
+interface SDKMessage {
+  info?: { id?: string }
+  parts?: SDKPart[]
+}
+
+const IGNORE_TYPES = new Set(["thinking", "redacted_thinking", "meta"])
+const TOOL_TYPES = new Set(["tool", "tool_use", "tool_result"])
+
+function messageHasContentFromSDK(message: SDKMessage): boolean {
+  const parts = message.parts
+  if (!parts || parts.length === 0) return false
+
+  for (const part of parts) {
+    const type = part.type
+    if (!type) continue
+    if (IGNORE_TYPES.has(type)) {
+      continue
+    }
+
+    if (type === "text") {
+      if (part.text?.trim()) return true
+      continue
+    }
+
+    if (TOOL_TYPES.has(type)) return true
+
+    return true
+  }
+
+  // Messages with only thinking/meta parts are treated as empty
+  // to align with file-based logic (messageHasContent)
+  return false
+}
+
+async function findEmptyMessageIdsFromSDK(
+  client: OpencodeClient,
+  sessionID: string,
+): Promise<string[]> {
+  try {
+    const response = (await client.session.messages({
+      path: { id: sessionID },
+    })) as { data?: SDKMessage[] }
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+
+    const emptyIds: string[] = []
+    for (const message of messages) {
+      const messageID = message.info?.id
+      if (!messageID) continue
+      if (!messageHasContentFromSDK(message)) {
+        emptyIds.push(messageID)
+      }
+    }
+
+    return emptyIds
+  } catch {
+    return []
+  }
+}
+
+export async function sanitizeEmptyMessagesBeforeSummarize(
+  sessionID: string,
+  client?: OpencodeClient,
+): Promise<number> {
+  if (client && isSqliteBackend()) {
+    const emptyMessageIds = await findEmptyMessageIdsFromSDK(client, sessionID)
+    if (emptyMessageIds.length === 0) {
+      return 0
+    }
+
+    let fixedCount = 0
+    for (const messageID of emptyMessageIds) {
+      const replaced = await replaceEmptyTextPartsAsync(client, sessionID, messageID, PLACEHOLDER_TEXT)
+      if (replaced) {
+        fixedCount++
+      } else {
+        const injected = await injectTextPartAsync(client, sessionID, messageID, PLACEHOLDER_TEXT)
+        if (injected) {
+          fixedCount++
+        }
+      }
+    }
+
+    if (fixedCount > 0) {
+      log("[auto-compact] pre-summarize sanitization fixed empty messages", {
+        sessionID,
+        fixedCount,
+        totalEmpty: emptyMessageIds.length,
+      })
+    }
+
+    return fixedCount
+  }
+
  const emptyMessageIds = findEmptyMessages(sessionID)
  if (emptyMessageIds.length === 0) {
    return 0
--- a/src/hooks/anthropic-context-window-limit-recovery/message-storage-directory.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/message-storage-directory.ts
@@ -1,36 +1,40 @@
 import { existsSync, readdirSync } from "node:fs"
-import { join } from "node:path"
+import type { PluginInput } from "@opencode-ai/plugin"
+import { getMessageDir } from "../../shared/opencode-message-dir"
+import { normalizeSDKResponse } from "../../shared"

-import { MESSAGE_STORAGE_DIR } from "./storage-paths"
+export { getMessageDir }

-export function getMessageDir(sessionID: string): string {
-	if (!existsSync(MESSAGE_STORAGE_DIR)) return ""
+type OpencodeClient = PluginInput["client"]

-	const directPath = join(MESSAGE_STORAGE_DIR, sessionID)
-	if (existsSync(directPath)) {
-		return directPath
-	}
+interface SDKMessage {
+  info: { id: string }
+  parts: unknown[]
+}

-	for (const directory of readdirSync(MESSAGE_STORAGE_DIR)) {
-		const sessionPath = join(MESSAGE_STORAGE_DIR, directory, sessionID)
-		if (existsSync(sessionPath)) {
-			return sessionPath
-		}
-	}
-
-	return ""
+export async function getMessageIdsFromSDK(
+  client: OpencodeClient,
+  sessionID: string
+): Promise<string[]> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+    return messages.map(msg => msg.info.id)
+  } catch {
+    return []
+  }
 }

 export function getMessageIds(sessionID: string): string[] {
-	const messageDir = getMessageDir(sessionID)
-	if (!messageDir || !existsSync(messageDir)) return []
+  const messageDir = getMessageDir(sessionID)
+  if (!messageDir || !existsSync(messageDir)) return []

-	const messageIds: string[] = []
-	for (const file of readdirSync(messageDir)) {
-		if (!file.endsWith(".json")) continue
-		const messageId = file.replace(".json", "")
-		messageIds.push(messageId)
-	}
+  const messageIds: string[] = []
+  for (const file of readdirSync(messageDir)) {
+    if (!file.endsWith(".json")) continue
+    const messageId = file.replace(".json", "")
+    messageIds.push(messageId)
+  }

-	return messageIds
+  return messageIds
 }
--- a/src/hooks/anthropic-context-window-limit-recovery/pruning-deduplication.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/pruning-deduplication.ts
@@ -1,9 +1,14 @@
-import { existsSync, readdirSync, readFileSync } from "node:fs"
+import { readdirSync, readFileSync } from "node:fs"
 import { join } from "node:path"
+import type { PluginInput } from "@opencode-ai/plugin"
 import type { PruningState, ToolCallSignature } from "./pruning-types"
 import { estimateTokens } from "./pruning-types"
 import { log } from "../../shared/logger"
-import { MESSAGE_STORAGE } from "../../features/hook-message-injector"
+import { getMessageDir } from "../../shared/opencode-message-dir"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
+import { normalizeSDKResponse } from "../../shared"
+
+type OpencodeClient = PluginInput["client"]

 export interface DeduplicationConfig {
  enabled: boolean
@@ -43,20 +48,6 @@ function sortObject(obj: unknown): unknown {
  return sorted
 }

-function getMessageDir(sessionID: string): string | null {
-  if (!existsSync(MESSAGE_STORAGE)) return null
-
-  const directPath = join(MESSAGE_STORAGE, sessionID)
-  if (existsSync(directPath)) return directPath
-
-  for (const dir of readdirSync(MESSAGE_STORAGE)) {
-    const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
-  }
-
-  return null
-}
-
 function readMessages(sessionID: string): MessagePart[] {
  const messageDir = getMessageDir(sessionID)
  if (!messageDir) return []
@@ -64,7 +55,7 @@ function readMessages(sessionID: string): MessagePart[] {
  const messages: MessagePart[] = []
  
  try {
-    const files = readdirSync(messageDir).filter(f => f.endsWith(".json"))
+    const files = readdirSync(messageDir).filter((f: string) => f.endsWith(".json"))
    for (const file of files) {
      const content = readFileSync(join(messageDir, file), "utf-8")
      const data = JSON.parse(content)
@@ -79,15 +70,29 @@ function readMessages(sessionID: string): MessagePart[] {
  return messages
 }

-export function executeDeduplication(
+async function readMessagesFromSDK(client: OpencodeClient, sessionID: string): Promise<MessagePart[]> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const rawMessages = normalizeSDKResponse(response, [] as Array<{ parts?: ToolPart[] }>, { preferResponseOnMissingData: true })
+    return rawMessages.filter((m) => m.parts) as MessagePart[]
+  } catch {
+    return []
+  }
+}
+
+export async function executeDeduplication(
  sessionID: string,
  state: PruningState,
  config: DeduplicationConfig,
-  protectedTools: Set<string>
-): number {
+  protectedTools: Set<string>,
+  client?: OpencodeClient,
+): Promise<number> {
  if (!config.enabled) return 0

-  const messages = readMessages(sessionID)
+  const messages = (client && isSqliteBackend())
+    ? await readMessagesFromSDK(client, sessionID)
+    : readMessages(sessionID)
+
  const signatures = new Map<string, ToolCallSignature[]>()
  
  let currentTurn = 0
--- a/src/hooks/anthropic-context-window-limit-recovery/pruning-tool-output-truncation.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/pruning-tool-output-truncation.ts
@@ -1,8 +1,15 @@
 import { existsSync, readdirSync, readFileSync } from "node:fs"
 import { join } from "node:path"
+import type { PluginInput } from "@opencode-ai/plugin"
 import { getOpenCodeStorageDir } from "../../shared/data-path"
 import { truncateToolResult } from "./storage"
+import { truncateToolResultAsync } from "./tool-result-storage-sdk"
 import { log } from "../../shared/logger"
+import { getMessageDir } from "../../shared/opencode-message-dir"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
+import { normalizeSDKResponse } from "../../shared"
+
+type OpencodeClient = PluginInput["client"]

 interface StoredToolPart {
  type?: string
@@ -13,29 +20,23 @@ interface StoredToolPart {
  }
 }

-function getMessageStorage(): string {
-  return join(getOpenCodeStorageDir(), "message")
+interface SDKToolPart {
+  id: string
+  type: string
+  callID?: string
+  tool?: string
+  state?: { output?: string; time?: { compacted?: number } }
+}
+
+interface SDKMessage {
+  info?: { id?: string }
+  parts?: SDKToolPart[]
 }

 function getPartStorage(): string {
  return join(getOpenCodeStorageDir(), "part")
 }

-function getMessageDir(sessionID: string): string | null {
-  const messageStorage = getMessageStorage()
-  if (!existsSync(messageStorage)) return null
-
-  const directPath = join(messageStorage, sessionID)
-  if (existsSync(directPath)) return directPath
-
-  for (const dir of readdirSync(messageStorage)) {
-    const sessionPath = join(messageStorage, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
-  }
-
-  return null
-}
-
 function getMessageIds(sessionID: string): string[] {
  const messageDir = getMessageDir(sessionID)
  if (!messageDir) return []
@@ -49,12 +50,17 @@ function getMessageIds(sessionID: string): string[] {
  return messageIds
 }

-export function truncateToolOutputsByCallId(
+export async function truncateToolOutputsByCallId(
  sessionID: string,
  callIds: Set<string>,
-): { truncatedCount: number } {
+  client?: OpencodeClient,
+): Promise<{ truncatedCount: number }> {
  if (callIds.size === 0) return { truncatedCount: 0 }

+  if (client && isSqliteBackend()) {
+    return truncateToolOutputsByCallIdFromSDK(client, sessionID, callIds)
+  }
+
  const messageIds = getMessageIds(sessionID)
  if (messageIds.length === 0) return { truncatedCount: 0 }

@@ -95,3 +101,42 @@ export function truncateToolOutputsByCallId(

  return { truncatedCount }
 }
+
+async function truncateToolOutputsByCallIdFromSDK(
+  client: OpencodeClient,
+  sessionID: string,
+  callIds: Set<string>,
+): Promise<{ truncatedCount: number }> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+    let truncatedCount = 0
+
+    for (const msg of messages) {
+      const messageID = msg.info?.id
+      if (!messageID || !msg.parts) continue
+
+      for (const part of msg.parts) {
+        if (part.type !== "tool" || !part.callID) continue
+        if (!callIds.has(part.callID)) continue
+        if (!part.state?.output || part.state?.time?.compacted) continue
+
+        const result = await truncateToolResultAsync(client, sessionID, messageID, part.id, part)
+        if (result.success) {
+          truncatedCount++
+        }
+      }
+    }
+
+    if (truncatedCount > 0) {
+      log("[auto-compact] pruned duplicate tool outputs (SDK)", {
+        sessionID,
+        truncatedCount,
+      })
+    }
+
+    return { truncatedCount }
+  } catch {
+    return { truncatedCount: 0 }
+  }
+}
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-deduplication.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-deduplication.test.ts
@@ -53,7 +53,7 @@ describe("createAnthropicContextWindowLimitRecoveryHook", () => {
        messages: mock(() => Promise.resolve({ data: [] })),
        summarize: mock(() => summarizePromise),
        revert: mock(() => Promise.resolve()),
-        prompt_async: mock(() => Promise.resolve()),
+        promptAsync: mock(() => Promise.resolve()),
      },
      tui: {
        showToast: mock(() => Promise.resolve()),
@@ -97,7 +97,7 @@ describe("createAnthropicContextWindowLimitRecoveryHook", () => {
        messages: mock(() => Promise.resolve({ data: [] })),
        summarize: mock(() => Promise.resolve()),
        revert: mock(() => Promise.resolve()),
-        prompt_async: mock(() => Promise.resolve()),
+        promptAsync: mock(() => Promise.resolve()),
      },
      tui: {
        showToast: mock(() => Promise.resolve()),
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts
@@ -0,0 +1,105 @@
+import { beforeEach, describe, expect, mock, test } from "bun:test"
+import type { PluginInput } from "@opencode-ai/plugin"
+
+const executeCompactMock = mock(async () => {})
+const getLastAssistantMock = mock(async () => ({
+  providerID: "anthropic",
+  modelID: "claude-sonnet-4-5",
+}))
+const parseAnthropicTokenLimitErrorMock = mock(() => ({
+  providerID: "anthropic",
+  modelID: "claude-sonnet-4-5",
+}))
+
+mock.module("./executor", () => ({
+  executeCompact: executeCompactMock,
+  getLastAssistant: getLastAssistantMock,
+}))
+
+mock.module("./parser", () => ({
+  parseAnthropicTokenLimitError: parseAnthropicTokenLimitErrorMock,
+}))
+
+mock.module("../../shared/logger", () => ({
+  log: () => {},
+}))
+
+function createMockContext(): PluginInput {
+  return {
+    client: {
+      session: {
+        messages: mock(() => Promise.resolve({ data: [] })),
+      },
+      tui: {
+        showToast: mock(() => Promise.resolve()),
+      },
+    },
+    directory: "/tmp",
+  } as PluginInput
+}
+
+function setupDelayedTimeoutMocks(): {
+  restore: () => void
+  getClearTimeoutCalls: () => Array<ReturnType<typeof setTimeout>>
+} {
+  const originalSetTimeout = globalThis.setTimeout
+  const originalClearTimeout = globalThis.clearTimeout
+  const clearTimeoutCalls: Array<ReturnType<typeof setTimeout>> = []
+  let timeoutCounter = 0
+
+  globalThis.setTimeout = ((_: () => void, _delay?: number) => {
+    timeoutCounter += 1
+    return timeoutCounter as ReturnType<typeof setTimeout>
+  }) as typeof setTimeout
+
+  globalThis.clearTimeout = ((timeoutID: ReturnType<typeof setTimeout>) => {
+    clearTimeoutCalls.push(timeoutID)
+  }) as typeof clearTimeout
+
+  return {
+    restore: () => {
+      globalThis.setTimeout = originalSetTimeout
+      globalThis.clearTimeout = originalClearTimeout
+    },
+    getClearTimeoutCalls: () => clearTimeoutCalls,
+  }
+}
+
+describe("createAnthropicContextWindowLimitRecoveryHook", () => {
+  beforeEach(() => {
+    executeCompactMock.mockClear()
+    getLastAssistantMock.mockClear()
+    parseAnthropicTokenLimitErrorMock.mockClear()
+  })
+
+  test("cancels pending timer when session.idle handles compaction first", async () => {
+    //#given
+    const { restore, getClearTimeoutCalls } = setupDelayedTimeoutMocks()
+    const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
+    const hook = createAnthropicContextWindowLimitRecoveryHook(createMockContext())
+
+    try {
+      //#when
+      await hook.event({
+        event: {
+          type: "session.error",
+          properties: { sessionID: "session-race", error: "prompt is too long" },
+        },
+      })
+
+      await hook.event({
+        event: {
+          type: "session.idle",
+          properties: { sessionID: "session-race" },
+        },
+      })
+
+      //#then
+      expect(getClearTimeoutCalls()).toEqual([1 as ReturnType<typeof setTimeout>])
+      expect(executeCompactMock).toHaveBeenCalledTimes(1)
+      expect(executeCompactMock.mock.calls[0]?.[0]).toBe("session-race")
+    } finally {
+      restore()
+    }
+  })
+})
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
@@ -28,6 +28,7 @@ export function createAnthropicContextWindowLimitRecoveryHook(
 ) {
  const autoCompactState = createRecoveryState()
  const experimental = options?.experimental
+  const pendingCompactionTimeoutBySession = new Map<string, ReturnType<typeof setTimeout>>()

  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
    const props = event.properties as Record<string, unknown> | undefined
@@ -35,6 +36,12 @@ export function createAnthropicContextWindowLimitRecoveryHook(
    if (event.type === "session.deleted") {
      const sessionInfo = props?.info as { id?: string } | undefined
      if (sessionInfo?.id) {
+        const timeoutID = pendingCompactionTimeoutBySession.get(sessionInfo.id)
+        if (timeoutID !== undefined) {
+          clearTimeout(timeoutID)
+          pendingCompactionTimeoutBySession.delete(sessionInfo.id)
+        }
+
        autoCompactState.pendingCompact.delete(sessionInfo.id)
        autoCompactState.errorDataBySession.delete(sessionInfo.id)
        autoCompactState.retryStateBySession.delete(sessionInfo.id)
@@ -57,7 +64,7 @@ export function createAnthropicContextWindowLimitRecoveryHook(
        autoCompactState.errorDataBySession.set(sessionID, parsed)

        if (autoCompactState.compactionInProgress.has(sessionID)) {
-          await attemptDeduplicationRecovery(sessionID, parsed, experimental)
+          await attemptDeduplicationRecovery(sessionID, parsed, experimental, ctx.client)
          return
        }

@@ -76,7 +83,8 @@ export function createAnthropicContextWindowLimitRecoveryHook(
          })
          .catch(() => {})

-        setTimeout(() => {
+        const timeoutID = setTimeout(() => {
+          pendingCompactionTimeoutBySession.delete(sessionID)
          executeCompact(
            sessionID,
            { providerID, modelID },
@@ -86,6 +94,8 @@ export function createAnthropicContextWindowLimitRecoveryHook(
            experimental,
          )
        }, 300)
+
+        pendingCompactionTimeoutBySession.set(sessionID, timeoutID)
      }
      return
    }
@@ -114,6 +124,12 @@ export function createAnthropicContextWindowLimitRecoveryHook(

      if (!autoCompactState.pendingCompact.has(sessionID)) return

+      const timeoutID = pendingCompactionTimeoutBySession.get(sessionID)
+      if (timeoutID !== undefined) {
+        clearTimeout(timeoutID)
+        pendingCompactionTimeoutBySession.delete(sessionID)
+      }
+
      const errorData = autoCompactState.errorDataBySession.get(sessionID)
      const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)

--- a/src/hooks/anthropic-context-window-limit-recovery/storage-paths.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/storage-paths.ts
@@ -1,10 +1,6 @@
-import { join } from "node:path"
-import { getOpenCodeStorageDir } from "../../shared/data-path"
+import { MESSAGE_STORAGE, PART_STORAGE } from "../../shared"

-const OPENCODE_STORAGE_DIR = getOpenCodeStorageDir()
-
-export const MESSAGE_STORAGE_DIR = join(OPENCODE_STORAGE_DIR, "message")
-export const PART_STORAGE_DIR = join(OPENCODE_STORAGE_DIR, "part")
+export { MESSAGE_STORAGE as MESSAGE_STORAGE_DIR, PART_STORAGE as PART_STORAGE_DIR }

 export const TRUNCATION_MESSAGE =
 	"[TOOL RESULT TRUNCATED - Context limit exceeded. Original output was too large and has been truncated to recover the session. Please re-run this tool if you need the full output.]"
--- a/src/hooks/anthropic-context-window-limit-recovery/storage.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/storage.test.ts
@@ -21,7 +21,7 @@ describe("truncateUntilTargetTokens", () => {
    truncateToolResult.mockReset()
  })

-  test("truncates only until target is reached", () => {
+  test("truncates only until target is reached", async () => {
    const { findToolResultsBySize, truncateToolResult } = require("./storage")
    
    // given: Two tool results, each 1000 chars. Target reduction is 500 chars.
@@ -39,7 +39,7 @@ describe("truncateUntilTargetTokens", () => {

    // when: currentTokens=1000, maxTokens=1000, targetRatio=0.5 (target=500, reduce=500)
    // charsPerToken=1 for simplicity in test
-    const result = truncateUntilTargetTokens(sessionID, 1000, 1000, 0.5, 1)
+    const result = await truncateUntilTargetTokens(sessionID, 1000, 1000, 0.5, 1)

    // then: Should only truncate the first tool
    expect(result.truncatedCount).toBe(1)
@@ -49,7 +49,7 @@ describe("truncateUntilTargetTokens", () => {
    expect(result.sufficient).toBe(true)
  })

-  test("truncates all if target not reached", () => {
+  test("truncates all if target not reached", async () => {
    const { findToolResultsBySize, truncateToolResult } = require("./storage")
    
    // given: Two tool results, each 100 chars. Target reduction is 500 chars.
@@ -66,7 +66,7 @@ describe("truncateUntilTargetTokens", () => {
    }))

    // when: reduce 500 chars
-    const result = truncateUntilTargetTokens(sessionID, 1000, 1000, 0.5, 1)
+    const result = await truncateUntilTargetTokens(sessionID, 1000, 1000, 0.5, 1)

    // then: Should truncate both
    expect(result.truncatedCount).toBe(2)
--- a/src/hooks/anthropic-context-window-limit-recovery/storage.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/storage.ts
@@ -8,4 +8,11 @@ export {
 	truncateToolResult,
 } from "./tool-result-storage"

+export {
+	countTruncatedResultsFromSDK,
+	findToolResultsBySizeFromSDK,
+	getTotalToolOutputSizeFromSDK,
+	truncateToolResultAsync,
+} from "./tool-result-storage-sdk"
+
 export { truncateUntilTargetTokens } from "./target-token-truncation"
--- a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
@@ -61,7 +61,7 @@ export async function runSummarizeRetryStrategy(params: {

    if (providerID && modelID) {
      try {
-        sanitizeEmptyMessagesBeforeSummarize(params.sessionID)
+        await sanitizeEmptyMessagesBeforeSummarize(params.sessionID, params.client)

        await params.client.tui
          .showToast({
--- a/src/hooks/anthropic-context-window-limit-recovery/target-token-truncation.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/target-token-truncation.ts
@@ -1,5 +1,27 @@
+import type { PluginInput } from "@opencode-ai/plugin"
 import type { AggressiveTruncateResult } from "./tool-part-types"
 import { findToolResultsBySize, truncateToolResult } from "./tool-result-storage"
+import { truncateToolResultAsync } from "./tool-result-storage-sdk"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
+import { normalizeSDKResponse } from "../../shared"
+
+type OpencodeClient = PluginInput["client"]
+
+interface SDKToolPart {
+	id: string
+	type: string
+	tool?: string
+	state?: {
+		output?: string
+		time?: { start?: number; end?: number; compacted?: number }
+	}
+	originalSize?: number
+}
+
+interface SDKMessage {
+	info?: { id?: string }
+	parts?: SDKToolPart[]
+}

 function calculateTargetBytesToRemove(
 	currentTokens: number,
@@ -13,13 +35,14 @@ function calculateTargetBytesToRemove(
 	return { tokensToReduce, targetBytesToRemove }
 }

-export function truncateUntilTargetTokens(
+export async function truncateUntilTargetTokens(
 	sessionID: string,
 	currentTokens: number,
 	maxTokens: number,
 	targetRatio: number = 0.8,
-	charsPerToken: number = 4
-): AggressiveTruncateResult {
+	charsPerToken: number = 4,
+	client?: OpencodeClient
+): Promise<AggressiveTruncateResult> {
 	const { tokensToReduce, targetBytesToRemove } = calculateTargetBytesToRemove(
 		currentTokens,
 		maxTokens,
@@ -38,6 +61,94 @@ export function truncateUntilTargetTokens(
 		}
 	}

+	if (client && isSqliteBackend()) {
+		let toolPartsByKey = new Map<string, SDKToolPart>()
+		try {
+			const response = (await client.session.messages({
+				path: { id: sessionID },
+			})) as { data?: SDKMessage[] }
+			const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+			toolPartsByKey = new Map<string, SDKToolPart>()
+
+			for (const message of messages) {
+				const messageID = message.info?.id
+				if (!messageID || !message.parts) continue
+				for (const part of message.parts) {
+					if (part.type !== "tool") continue
+					toolPartsByKey.set(`${messageID}:${part.id}`, part)
+				}
+			}
+		} catch {
+			toolPartsByKey = new Map<string, SDKToolPart>()
+		}
+
+		const results: import("./tool-part-types").ToolResultInfo[] = []
+		for (const [key, part] of toolPartsByKey) {
+			if (part.type === "tool" && part.state?.output && !part.state?.time?.compacted && part.tool) {
+				results.push({
+					partPath: "",
+					partId: part.id,
+					messageID: key.split(":")[0],
+					toolName: part.tool,
+					outputSize: part.state.output.length,
+				})
+			}
+		}
+		results.sort((a, b) => b.outputSize - a.outputSize)
+
+		if (results.length === 0) {
+			return {
+				success: false,
+				sufficient: false,
+				truncatedCount: 0,
+				totalBytesRemoved: 0,
+				targetBytesToRemove,
+				truncatedTools: [],
+			}
+		}
+
+		let totalRemoved = 0
+		let truncatedCount = 0
+		const truncatedTools: Array<{ toolName: string; originalSize: number }> = []
+
+		for (const result of results) {
+			const part = toolPartsByKey.get(`${result.messageID}:${result.partId}`)
+			if (!part) continue
+
+			const truncateResult = await truncateToolResultAsync(
+				client,
+				sessionID,
+				result.messageID,
+				result.partId,
+				part
+			)
+			if (truncateResult.success) {
+				truncatedCount++
+				const removedSize = truncateResult.originalSize ?? result.outputSize
+				totalRemoved += removedSize
+				truncatedTools.push({
+					toolName: truncateResult.toolName ?? result.toolName,
+					originalSize: removedSize,
+				})
+
+				if (totalRemoved >= targetBytesToRemove) {
+					break
+				}
+			}
+		}
+
+		const sufficient = totalRemoved >= targetBytesToRemove
+
+		return {
+			success: truncatedCount > 0,
+			sufficient,
+			truncatedCount,
+			totalBytesRemoved: totalRemoved,
+			targetBytesToRemove,
+			truncatedTools,
+		}
+	}
+
 	const results = findToolResultsBySize(sessionID)

 	if (results.length === 0) {
--- a/src/hooks/anthropic-context-window-limit-recovery/tool-result-storage-sdk.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/tool-result-storage-sdk.ts
@@ -0,0 +1,123 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import { TRUNCATION_MESSAGE } from "./storage-paths"
+import type { ToolResultInfo } from "./tool-part-types"
+import { patchPart } from "../../shared/opencode-http-api"
+import { log } from "../../shared/logger"
+import { normalizeSDKResponse } from "../../shared"
+
+type OpencodeClient = PluginInput["client"]
+
+interface SDKToolPart {
+  id: string
+  type: string
+  callID?: string
+  tool?: string
+  state?: {
+    status?: string
+    input?: Record<string, unknown>
+    output?: string
+    error?: string
+    time?: { start?: number; end?: number; compacted?: number }
+  }
+}
+
+interface SDKMessage {
+  info?: { id?: string }
+  parts?: SDKToolPart[]
+}
+
+export async function findToolResultsBySizeFromSDK(
+  client: OpencodeClient,
+  sessionID: string
+): Promise<ToolResultInfo[]> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+    const results: ToolResultInfo[] = []
+
+    for (const msg of messages) {
+      const messageID = msg.info?.id
+      if (!messageID || !msg.parts) continue
+
+      for (const part of msg.parts) {
+        if (part.type === "tool" && part.state?.output && !part.state?.time?.compacted && part.tool) {
+          results.push({
+            partPath: "",
+            partId: part.id,
+            messageID,
+            toolName: part.tool,
+            outputSize: part.state.output.length,
+          })
+        }
+      }
+    }
+
+    return results.sort((a, b) => b.outputSize - a.outputSize)
+  } catch {
+    return []
+  }
+}
+
+export async function truncateToolResultAsync(
+  client: OpencodeClient,
+  sessionID: string,
+  messageID: string,
+  partId: string,
+  part: SDKToolPart
+): Promise<{ success: boolean; toolName?: string; originalSize?: number }> {
+  if (!part.state?.output) return { success: false }
+
+  const originalSize = part.state.output.length
+  const toolName = part.tool
+
+  const updatedPart: Record<string, unknown> = {
+    ...part,
+    state: {
+      ...part.state,
+      output: TRUNCATION_MESSAGE,
+      time: {
+        ...(part.state.time ?? { start: Date.now() }),
+        compacted: Date.now(),
+      },
+    },
+  }
+
+  try {
+    const patched = await patchPart(client, sessionID, messageID, partId, updatedPart)
+    if (!patched) return { success: false }
+    return { success: true, toolName, originalSize }
+  } catch (error) {
+    log("[context-window-recovery] truncateToolResultAsync failed", { error: String(error) })
+    return { success: false }
+  }
+}
+
+export async function countTruncatedResultsFromSDK(
+  client: OpencodeClient,
+  sessionID: string
+): Promise<number> {
+  try {
+    const response = await client.session.messages({ path: { id: sessionID } })
+    const messages = normalizeSDKResponse(response, [] as SDKMessage[], { preferResponseOnMissingData: true })
+    let count = 0
+
+    for (const msg of messages) {
+      if (!msg.parts) continue
+      for (const part of msg.parts) {
+        if (part.type === "tool" && part.state?.time?.compacted) count++
+      }
+    }
+
+    return count
+  } catch {
+    return 0
+  }
+}
+
+export async function getTotalToolOutputSizeFromSDK(
+  client: OpencodeClient,
+  sessionID: string
+): Promise<number> {
+  const results = await findToolResultsBySizeFromSDK(client, sessionID)
+  return results.reduce((sum, result) => sum + result.outputSize, 0)
+}
--- a/src/hooks/anthropic-context-window-limit-recovery/tool-result-storage.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/tool-result-storage.ts
@@ -4,6 +4,10 @@ import { join } from "node:path"
 import { getMessageIds } from "./message-storage-directory"
 import { PART_STORAGE_DIR, TRUNCATION_MESSAGE } from "./storage-paths"
 import type { StoredToolPart, ToolResultInfo } from "./tool-part-types"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"
+import { log } from "../../shared/logger"
+
+let hasLoggedTruncateWarning = false

 export function findToolResultsBySize(sessionID: string): ToolResultInfo[] {
 	const messageIds = getMessageIds(sessionID)
@@ -48,6 +52,14 @@ export function truncateToolResult(partPath: string): {
 	toolName?: string
 	originalSize?: number
 } {
+	if (isSqliteBackend()) {
+		if (!hasLoggedTruncateWarning) {
+			log("[context-window-recovery] Disabled on SQLite backend: truncateToolResult")
+			hasLoggedTruncateWarning = true
+		}
+		return { success: false }
+	}
+
 	try {
 		const content = readFileSync(partPath, "utf-8")
 		const part = JSON.parse(content) as StoredToolPart
--- a/src/hooks/atlas/atlas-hook.ts
+++ b/src/hooks/atlas/atlas-hook.ts
@@ -19,7 +19,7 @@ export function createAtlasHook(ctx: PluginInput, options?: AtlasHookOptions) {

  return {
    handler: createAtlasEventHandler({ ctx, options, sessions, getState }),
-    "tool.execute.before": createToolExecuteBeforeHandler({ pendingFilePaths }),
+    "tool.execute.before": createToolExecuteBeforeHandler({ ctx, pendingFilePaths }),
    "tool.execute.after": createToolExecuteAfterHandler({ ctx, pendingFilePaths }),
  }
 }
--- a/src/hooks/atlas/event-handler.ts
+++ b/src/hooks/atlas/event-handler.ts
@@ -2,6 +2,7 @@ import type { PluginInput } from "@opencode-ai/plugin"
 import { getPlanProgress, readBoulderState } from "../../features/boulder-state"
 import { subagentSessions } from "../../features/claude-code-session-state"
 import { log } from "../../shared/logger"
+import { getAgentConfigKey } from "../../shared/agent-display-names"
 import { HOOK_NAME } from "./hook-name"
 import { isAbortError } from "./is-abort-error"
 import { injectBoulderContinuation } from "./boulder-continuation-injector"
@@ -87,12 +88,13 @@ export function createAtlasEventHandler(input: {
        return
      }

-      const lastAgent = getLastAgentFromSession(sessionID)
-      const requiredAgent = (boulderState.agent ?? "atlas").toLowerCase()
-      const lastAgentMatchesRequired = lastAgent === requiredAgent
+      const lastAgent = await getLastAgentFromSession(sessionID, ctx.client)
+      const lastAgentKey = getAgentConfigKey(lastAgent ?? "")
+      const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas")
+      const lastAgentMatchesRequired = lastAgentKey === requiredAgent
      const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined
      const boulderAgentDefaultsToAtlas = requiredAgent === "atlas"
-      const lastAgentIsSisyphus = lastAgent === "sisyphus"
+      const lastAgentIsSisyphus = lastAgentKey === "sisyphus"
      const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
      const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas
      if (!agentMatches) {
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -9,10 +9,31 @@ import {
  readBoulderState,
 } from "../../features/boulder-state"
 import type { BoulderState } from "../../features/boulder-state"
-
-import { MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { _resetForTesting, subagentSessions } from "../../features/claude-code-session-state"
-import { createAtlasHook } from "./index"
+
+const TEST_STORAGE_ROOT = join(tmpdir(), `atlas-message-storage-${randomUUID()}`)
+const TEST_MESSAGE_STORAGE = join(TEST_STORAGE_ROOT, "message")
+const TEST_PART_STORAGE = join(TEST_STORAGE_ROOT, "part")
+
+mock.module("../../features/hook-message-injector/constants", () => ({
+  OPENCODE_STORAGE: TEST_STORAGE_ROOT,
+  MESSAGE_STORAGE: TEST_MESSAGE_STORAGE,
+  PART_STORAGE: TEST_PART_STORAGE,
+}))
+
+mock.module("../../shared/opencode-message-dir", () => ({
+  getMessageDir: (sessionID: string) => {
+    const dir = join(TEST_MESSAGE_STORAGE, sessionID)
+    return existsSync(dir) ? dir : null
+  },
+}))
+
+mock.module("../../shared/opencode-storage-detection", () => ({
+  isSqliteBackend: () => false,
+}))
+
+const { createAtlasHook } = await import("./index")
+const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")

 describe("atlas hook", () => {
  let TEST_DIR: string
--- a/src/hooks/atlas/recent-model-resolver.ts
+++ b/src/hooks/atlas/recent-model-resolver.ts
@@ -1,6 +1,9 @@
 import type { PluginInput } from "@opencode-ai/plugin"
-import { findNearestMessageWithFields } from "../../features/hook-message-injector"
-import { getMessageDir } from "../../shared/session-utils"
+import {
+  findNearestMessageWithFields,
+  findNearestMessageWithFieldsFromSDK,
+} from "../../features/hook-message-injector"
+import { getMessageDir, isSqliteBackend, normalizeSDKResponse } from "../../shared"
 import type { ModelInfo } from "./types"

 export async function resolveRecentModelForSession(
@@ -9,9 +12,9 @@ export async function resolveRecentModelForSession(
 ): Promise<ModelInfo | undefined> {
  try {
    const messagesResp = await ctx.client.session.messages({ path: { id: sessionID } })
-    const messages = (messagesResp.data ?? []) as Array<{
+    const messages = normalizeSDKResponse(messagesResp, [] as Array<{
      info?: { model?: ModelInfo; modelID?: string; providerID?: string }
-    }>
+    }>)

    for (let i = messages.length - 1; i >= 0; i--) {
      const info = messages[i].info
@@ -28,8 +31,13 @@ export async function resolveRecentModelForSession(
    // ignore - fallback to message storage
  }

-  const messageDir = getMessageDir(sessionID)
-  const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
+  let currentMessage = null
+  if (isSqliteBackend()) {
+    currentMessage = await findNearestMessageWithFieldsFromSDK(ctx.client, sessionID)
+  } else {
+    const messageDir = getMessageDir(sessionID)
+    currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
+  }
  const model = currentMessage?.model
  if (!model?.providerID || !model?.modelID) {
    return undefined
--- a/src/hooks/atlas/session-last-agent.ts
+++ b/src/hooks/atlas/session-last-agent.ts
@@ -1,9 +1,24 @@
-import { findNearestMessageWithFields } from "../../features/hook-message-injector"
-import { getMessageDir } from "../../shared/session-utils"
+import type { PluginInput } from "@opencode-ai/plugin"
+
+import { findNearestMessageWithFields } from "../../features/hook-message-injector"
+import { findNearestMessageWithFieldsFromSDK } from "../../features/hook-message-injector"
+import { getMessageDir, isSqliteBackend } from "../../shared"
+
+type OpencodeClient = PluginInput["client"]
+
+export async function getLastAgentFromSession(
+  sessionID: string,
+  client?: OpencodeClient
+): Promise<string | null> {
+  let nearest = null
+
+  if (isSqliteBackend() && client) {
+    nearest = await findNearestMessageWithFieldsFromSDK(client, sessionID)
+  } else {
+    const messageDir = getMessageDir(sessionID)
+    if (!messageDir) return null
+    nearest = findNearestMessageWithFields(messageDir)
+  }

-export function getLastAgentFromSession(sessionID: string): string | null {
-  const messageDir = getMessageDir(sessionID)
-  if (!messageDir) return null
-  const nearest = findNearestMessageWithFields(messageDir)
  return nearest?.agent?.toLowerCase() ?? null
 }
--- a/src/hooks/atlas/tool-execute-after.ts
+++ b/src/hooks/atlas/tool-execute-after.ts
@@ -23,7 +23,7 @@ export function createToolExecuteAfterHandler(input: {
      return
    }

-    if (!isCallerOrchestrator(toolInput.sessionID)) {
+    if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) {
      return
    }

--- a/src/hooks/atlas/tool-execute-before.ts
+++ b/src/hooks/atlas/tool-execute-before.ts
@@ -1,21 +1,23 @@
 import { log } from "../../shared/logger"
 import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
 import { isCallerOrchestrator } from "../../shared/session-utils"
+import type { PluginInput } from "@opencode-ai/plugin"
 import { HOOK_NAME } from "./hook-name"
 import { ORCHESTRATOR_DELEGATION_REQUIRED, SINGLE_TASK_DIRECTIVE } from "./system-reminder-templates"
 import { isSisyphusPath } from "./sisyphus-path"
 import { isWriteOrEditToolName } from "./write-edit-tool-policy"

 export function createToolExecuteBeforeHandler(input: {
+  ctx: PluginInput
  pendingFilePaths: Map<string, string>
 }): (
  toolInput: { tool: string; sessionID?: string; callID?: string },
  toolOutput: { args: Record<string, unknown>; message?: string }
 ) => Promise<void> {
-  const { pendingFilePaths } = input
+  const { ctx, pendingFilePaths } = input

  return async (toolInput, toolOutput): Promise<void> => {
-    if (!isCallerOrchestrator(toolInput.sessionID)) {
+    if (!(await isCallerOrchestrator(toolInput.sessionID, ctx.client))) {
      return
    }

--- a/src/hooks/auto-update-checker/constants.ts
+++ b/src/hooks/auto-update-checker/constants.ts
@@ -1,6 +1,5 @@
 import * as path from "node:path"
 import * as os from "node:os"
-import * as fs from "node:fs"
 import { getOpenCodeConfigDir } from "../../shared"

 export const PACKAGE_NAME = "oh-my-opencode"
--- a/src/hooks/auto-update-checker/hook/background-update-check.ts
+++ b/src/hooks/auto-update-checker/hook/background-update-check.ts
@@ -4,7 +4,7 @@ import { log } from "../../../shared/logger"
 import { invalidatePackage } from "../cache"
 import { PACKAGE_NAME } from "../constants"
 import { extractChannel } from "../version-channel"
-import { findPluginEntry, getCachedVersion, getLatestVersion, updatePinnedVersion, revertPinnedVersion } from "../checker"
+import { findPluginEntry, getCachedVersion, getLatestVersion, revertPinnedVersion } from "../checker"
 import { showAutoUpdatedToast, showUpdateAvailableToast } from "./update-toasts"

 async function runBunInstallSafe(): Promise<boolean> {
--- a/src/hooks/category-skill-reminder/hook.ts
+++ b/src/hooks/category-skill-reminder/hook.ts
@@ -2,6 +2,7 @@ import type { PluginInput } from "@opencode-ai/plugin"
 import type { AvailableSkill } from "../../agents/dynamic-agent-prompt-builder"
 import { getSessionAgent } from "../../features/claude-code-session-state"
 import { log } from "../../shared"
+import { getAgentConfigKey } from "../../shared/agent-display-names"
 import { buildReminderMessage } from "./formatter"

 /**
@@ -75,11 +76,11 @@ export function createCategorySkillReminderHook(
  function isTargetAgent(sessionID: string, inputAgent?: string): boolean {
    const agent = getSessionAgent(sessionID) ?? inputAgent
    if (!agent) return false
-    const agentLower = agent.toLowerCase()
+    const agentKey = getAgentConfigKey(agent)
    return (
-      TARGET_AGENTS.has(agentLower) ||
-      agentLower.includes("sisyphus") ||
-      agentLower.includes("atlas")
+      TARGET_AGENTS.has(agentKey) ||
+      agentKey.includes("sisyphus") ||
+      agentKey.includes("atlas")
    )
  }

--- a/src/hooks/claude-code-hooks/AGENTS.md
+++ b/src/hooks/claude-code-hooks/AGENTS.md
@@ -2,7 +2,7 @@

 ## OVERVIEW

-Full Claude Code `settings.json` hook compatibility layer. Intercepts OpenCode events to execute external scripts/commands.
+Full Claude Code `settings.json` hook compatibility layer. Intercepts OpenCode events to execute external scripts/commands defined in settings.json.

 **Config Sources** (priority): `.claude/settings.local.json` > `.claude/settings.json` (project) > `~/.claude/settings.json` (global)

@@ -10,21 +10,26 @@ Full Claude Code `settings.json` hook compatibility layer. Intercepts OpenCode e
 ```
 claude-code-hooks/
 ├── index.ts              # Barrel export
-├── claude-code-hooks-hook.ts  # Main factory
-├── config.ts             # Claude settings.json loader
-├── config-loader.ts      # Extended plugin config
-├── pre-tool-use.ts       # PreToolUse hook executor
-├── post-tool-use.ts      # PostToolUse hook executor
-├── user-prompt-submit.ts # UserPromptSubmit executor
-├── stop.ts               # Stop hook executor
-├── pre-compact.ts        # PreCompact executor
-├── transcript.ts         # Tool use recording
-├── tool-input-cache.ts   # Pre→post input caching
+├── claude-code-hooks-hook.ts  # Main factory (22 lines)
+├── config.ts             # Claude settings.json loader (105 lines)
+├── config-loader.ts      # Extended plugin config (107 lines)
+├── pre-tool-use.ts       # PreToolUse hook executor (173 lines)
+├── post-tool-use.ts      # PostToolUse hook executor (200 lines)
+├── user-prompt-submit.ts # UserPromptSubmit executor (125 lines)
+├── stop.ts               # Stop hook executor (122 lines)
+├── pre-compact.ts        # PreCompact executor (110 lines)
+├── transcript.ts         # Tool use recording (235 lines)
+├── tool-input-cache.ts   # Pre→post input caching (51 lines)
 ├── todo.ts               # Todo integration
-├── session-hook-state.ts # Active state tracking
-├── types.ts              # Hook & IO type definitions
-├── plugin-config.ts      # Default config constants
+├── session-hook-state.ts # Active state tracking (11 lines)
+├── types.ts              # Hook & IO type definitions (204 lines)
+├── plugin-config.ts      # Default config constants (12 lines)
 └── handlers/             # Event handlers (5 files)
+    ├── pre-compact-handler.ts
+    ├── tool-execute-before-handler.ts
+    ├── tool-execute-after-handler.ts
+    ├── chat-message-handler.ts
+    └── session-event-handler.ts
 ```

 ## HOOK LIFECYCLE
--- a/src/hooks/claude-code-hooks/handlers/tool-execute-after-handler.ts
+++ b/src/hooks/claude-code-hooks/handlers/tool-execute-after-handler.ts
@@ -9,7 +9,7 @@ import {
 import { getToolInput } from "../tool-input-cache"
 import { appendTranscriptEntry, getTranscriptPath } from "../transcript"
 import type { PluginConfig } from "../types"
-import { isHookDisabled, log } from "../../../shared"
+import { isHookDisabled } from "../../../shared"

 export function createToolExecuteAfterHandler(ctx: PluginInput, config: PluginConfig) {
 	return async (
--- a/src/hooks/context-window-monitor.ts
+++ b/src/hooks/context-window-monitor.ts
@@ -27,7 +27,7 @@ interface CachedTokenState {
  tokens: TokenInfo
 }

-export function createContextWindowMonitorHook(ctx: PluginInput) {
+export function createContextWindowMonitorHook(_ctx: PluginInput) {
  const remindedSessions = new Set<string>()
  const tokenCache = new Map<string, CachedTokenState>()

--- a/src/hooks/directory-agents-injector/constants.ts
+++ b/src/hooks/directory-agents-injector/constants.ts
@@ -1,7 +1,5 @@
 import { join } from "node:path";
-import { getOpenCodeStorageDir } from "../../shared/data-path";
-
-export const OPENCODE_STORAGE = getOpenCodeStorageDir();
+import { OPENCODE_STORAGE } from "../../shared";
 export const AGENTS_INJECTOR_STORAGE = join(
  OPENCODE_STORAGE,
  "directory-agents",
--- a/src/hooks/directory-readme-injector/constants.ts
+++ b/src/hooks/directory-readme-injector/constants.ts
@@ -1,7 +1,5 @@
 import { join } from "node:path";
-import { getOpenCodeStorageDir } from "../../shared/data-path";
-
-export const OPENCODE_STORAGE = getOpenCodeStorageDir();
+import { OPENCODE_STORAGE } from "../../shared";
 export const README_INJECTOR_STORAGE = join(
  OPENCODE_STORAGE,
  "directory-readme",
--- a/src/hooks/interactive-bash-session/constants.ts
+++ b/src/hooks/interactive-bash-session/constants.ts
@@ -1,7 +1,5 @@
 import { join } from "node:path";
-import { getOpenCodeStorageDir } from "../../shared/data-path";
-
-export const OPENCODE_STORAGE = getOpenCodeStorageDir();
+import { OPENCODE_STORAGE } from "../../shared";
 export const INTERACTIVE_BASH_SESSION_STORAGE = join(
  OPENCODE_STORAGE,
  "interactive-bash-session",
--- a/src/hooks/interactive-bash-session/hook.ts
+++ b/src/hooks/interactive-bash-session/hook.ts
@@ -1,5 +1,5 @@
 import type { PluginInput } from "@opencode-ai/plugin";
-import { loadInteractiveBashSessionState, saveInteractiveBashSessionState, clearInteractiveBashSessionState } from "./storage";
+import { saveInteractiveBashSessionState, clearInteractiveBashSessionState } from "./storage";
 import { buildSessionReminderMessage } from "./constants";
 import type { InteractiveBashSessionState } from "./types";
 import { tokenizeCommand, findSubcommand, extractSessionNameFromTokens } from "./parser";
--- a/src/hooks/interactive-bash-session/state-manager.ts
+++ b/src/hooks/interactive-bash-session/state-manager.ts
@@ -1,7 +1,6 @@
 import type { InteractiveBashSessionState } from "./types";
-import { loadInteractiveBashSessionState, saveInteractiveBashSessionState } from "./storage";
+import { loadInteractiveBashSessionState } from "./storage";
 import { OMO_SESSION_PREFIX } from "./constants";
-import { subagentSessions } from "../../features/claude-code-session-state";

 export function getOrCreateState(sessionID: string, sessionStates: Map<string, InteractiveBashSessionState>): InteractiveBashSessionState {
  if (!sessionStates.has(sessionID)) {
@@ -32,9 +31,4 @@ export async function killAllTrackedSessions(
      await proc.exited;
    } catch {}
  }
-
-  for (const sessionId of subagentSessions) {
-    // Note: ctx is not available here, so we can't call ctx.client.session.abort
-    // This will need to be handled in the hook where ctx is available
-  }
 }
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -1,5 +1,12 @@
 import { describe, it, expect, mock, beforeEach } from "bun:test"
-import { createPreemptiveCompactionHook } from "./preemptive-compaction"
+
+const logMock = mock(() => {})
+
+mock.module("../shared/logger", () => ({
+  log: logMock,
+}))
+
+const { createPreemptiveCompactionHook } = await import("./preemptive-compaction")

 function createMockCtx() {
  return {
@@ -21,6 +28,7 @@ describe("preemptive-compaction", () => {

  beforeEach(() => {
    ctx = createMockCtx()
+    logMock.mockClear()
  })

  // #given event caches token info from message.updated
@@ -152,4 +160,45 @@ describe("preemptive-compaction", () => {

    expect(ctx.client.session.summarize).not.toHaveBeenCalled()
  })
+
+  it("should log summarize errors instead of swallowing them", async () => {
+    //#given
+    const hook = createPreemptiveCompactionHook(ctx as never)
+    const sessionID = "ses_log_error"
+    const summarizeError = new Error("summarize failed")
+    ctx.client.session.summarize.mockRejectedValueOnce(summarizeError)
+
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-5",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    //#when
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_log" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    //#then
+    expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", {
+      sessionID,
+      error: String(summarizeError),
+    })
+  })
 })
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -1,3 +1,5 @@
+import { log } from "../shared/logger"
+
 const DEFAULT_ACTUAL_LIMIT = 200_000

 const ANTHROPIC_ACTUAL_LIMIT =
@@ -76,8 +78,8 @@ export function createPreemptiveCompactionHook(ctx: PluginInput) {
      })

      compactedSessions.add(sessionID)
-    } catch {
-      // best-effort; do not disrupt tool execution
+    } catch (error) {
+      log("[preemptive-compaction] Compaction failed", { sessionID, error: String(error) })
    } finally {
      compactionInProgress.delete(sessionID)
    }
--- a/src/hooks/prometheus-md-only/agent-resolution.ts
+++ b/src/hooks/prometheus-md-only/agent-resolution.ts
@@ -1,24 +1,29 @@
-import { existsSync, readdirSync } from "node:fs"
-import { join } from "node:path"
-import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
+import type { PluginInput } from "@opencode-ai/plugin"
+
+import { findNearestMessageWithFields, findFirstMessageWithAgent } from "../../features/hook-message-injector"
+import {
+  findFirstMessageWithAgentFromSDK,
+  findNearestMessageWithFieldsFromSDK,
+} from "../../features/hook-message-injector"
 import { getSessionAgent } from "../../features/claude-code-session-state"
 import { readBoulderState } from "../../features/boulder-state"
+import { getMessageDir } from "../../shared/opencode-message-dir"
+import { isSqliteBackend } from "../../shared/opencode-storage-detection"

-function getMessageDir(sessionID: string): string | null {
-  if (!existsSync(MESSAGE_STORAGE)) return null
+type OpencodeClient = PluginInput["client"]

-  const directPath = join(MESSAGE_STORAGE, sessionID)
-  if (existsSync(directPath)) return directPath
+async function getAgentFromMessageFiles(
+  sessionID: string,
+  client?: OpencodeClient
+): Promise<string | undefined> {
+  if (isSqliteBackend() && client) {
+    const firstAgent = await findFirstMessageWithAgentFromSDK(client, sessionID)
+    if (firstAgent) return firstAgent

-  for (const dir of readdirSync(MESSAGE_STORAGE)) {
-    const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
+    const nearest = await findNearestMessageWithFieldsFromSDK(client, sessionID)
+    return nearest?.agent
  }

-  return null
-}
-
-function getAgentFromMessageFiles(sessionID: string): string | undefined {
  const messageDir = getMessageDir(sessionID)
  if (!messageDir) return undefined
  return findFirstMessageWithAgent(messageDir) ?? findNearestMessageWithFields(messageDir)?.agent
@@ -36,7 +41,11 @@ function getAgentFromMessageFiles(sessionID: string): string | undefined {
 * - Message files return "prometheus" (oldest message from /plan)
 * - But boulder.json has agent: "atlas" (set by /start-work)
 */
-export function getAgentFromSession(sessionID: string, directory: string): string | undefined {
+export async function getAgentFromSession(
+  sessionID: string,
+  directory: string,
+  client?: OpencodeClient
+): Promise<string | undefined> {
  // Check in-memory first (current session)
  const memoryAgent = getSessionAgent(sessionID)
  if (memoryAgent) return memoryAgent
@@ -48,5 +57,5 @@ export function getAgentFromSession(sessionID: string, directory: string): strin
  }

  // Fallback to message files
-  return getAgentFromMessageFiles(sessionID)
+  return await getAgentFromMessageFiles(sessionID, client)
 }
--- a/src/hooks/prometheus-md-only/hook.ts
+++ b/src/hooks/prometheus-md-only/hook.ts
@@ -15,7 +15,7 @@ export function createPrometheusMdOnlyHook(ctx: PluginInput) {
      input: { tool: string; sessionID: string; callID: string },
      output: { args: Record<string, unknown>; message?: string }
    ): Promise<void> => {
-      const agentName = getAgentFromSession(input.sessionID, ctx.directory)
+      const agentName = await getAgentFromSession(input.sessionID, ctx.directory, ctx.client)

      if (!isPrometheusAgent(agentName)) {
        return
--- a/src/hooks/prometheus-md-only/index.test.ts
+++ b/src/hooks/prometheus-md-only/index.test.ts
@@ -1,16 +1,21 @@
-import { describe, expect, test, beforeEach, afterEach } from "bun:test"
+import { describe, expect, test, beforeEach, afterEach, mock } from "bun:test"
 import { mkdirSync, rmSync, writeFileSync } from "node:fs"
 import { join } from "node:path"
 import { tmpdir } from "node:os"
 import { randomUUID } from "node:crypto"
 import { SYSTEM_DIRECTIVE_PREFIX } from "../../shared/system-directive"
 import { clearSessionAgent } from "../../features/claude-code-session-state"
+// Force stable (JSON) mode for tests that rely on message file storage
+mock.module("../../shared/opencode-storage-detection", () => ({
+  isSqliteBackend: () => false,
+  resetSqliteBackendCache: () => {},
+}))

-import { createPrometheusMdOnlyHook } from "./index"
-import { MESSAGE_STORAGE } from "../../features/hook-message-injector"
+const { createPrometheusMdOnlyHook } = await import("./index")
+const { MESSAGE_STORAGE } = await import("../../features/hook-message-injector")

 describe("prometheus-md-only", () => {
-  const TEST_SESSION_ID = "test-session-prometheus"
+  const TEST_SESSION_ID = "ses_test_prometheus"
  let testMessageDir: string

  function createMockPluginInput() {
@@ -546,7 +551,7 @@ describe("prometheus-md-only", () => {
      writeFileSync(BOULDER_FILE, JSON.stringify({
        active_plan: "/test/plan.md",
        started_at: new Date().toISOString(),
-        session_ids: ["other-session-id"],
+        session_ids: ["ses_other_session_id"],
        plan_name: "test-plan",
        agent: "atlas"
      }))
@@ -578,7 +583,7 @@ describe("prometheus-md-only", () => {
      const hook = createPrometheusMdOnlyHook(createMockPluginInput())
      const input = {
        tool: "Write",
-        sessionID: "non-existent-session",
+        sessionID: "ses_non_existent_session",
        callID: "call-1",
      }
      const output = {
--- a/src/hooks/ralph-loop/continuation-prompt-injector.ts
+++ b/src/hooks/ralph-loop/continuation-prompt-injector.ts
@@ -3,6 +3,7 @@ import { log } from "../../shared/logger"
 import { findNearestMessageWithFields } from "../../features/hook-message-injector"
 import { getMessageDir } from "./message-storage-directory"
 import { withTimeout } from "./with-timeout"
+import { normalizeSDKResponse } from "../../shared"

 type MessageInfo = {
 	agent?: string
@@ -25,7 +26,7 @@ export async function injectContinuationPrompt(
 			}),
 			options.apiTimeoutMs,
 		)
-		const messages = (messagesResp.data ?? []) as Array<{ info?: MessageInfo }>
+		const messages = normalizeSDKResponse(messagesResp, [] as Array<{ info?: MessageInfo }>)
 		for (let i = messages.length - 1; i >= 0; i--) {
 			const info = messages[i]?.info
 			if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
--- a/src/hooks/ralph-loop/message-storage-directory.ts
+++ b/src/hooks/ralph-loop/message-storage-directory.ts
@@ -1,16 +1 @@
-import { existsSync, readdirSync } from "node:fs"
-import { join } from "node:path"
-import { MESSAGE_STORAGE } from "../../features/hook-message-injector"
-
-export function getMessageDir(sessionID: string): string | null {
-	if (!existsSync(MESSAGE_STORAGE)) return null
-
-	const directPath = join(MESSAGE_STORAGE, sessionID)
-	if (existsSync(directPath)) return directPath
-
-	for (const dir of readdirSync(MESSAGE_STORAGE)) {
-		const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-		if (existsSync(sessionPath)) return sessionPath
-	}
-	return null
-}
+export { getMessageDir } from "../../shared/opencode-message-dir"
--- a/src/hooks/rules-injector/constants.ts
+++ b/src/hooks/rules-injector/constants.ts
@@ -1,7 +1,5 @@
 import { join } from "node:path";
-import { getOpenCodeStorageDir } from "../../shared/data-path";
-
-export const OPENCODE_STORAGE = getOpenCodeStorageDir();
+import { OPENCODE_STORAGE } from "../../shared";
 export const RULES_INJECTOR_STORAGE = join(OPENCODE_STORAGE, "rules-injector");

 export const PROJECT_MARKERS = [
--- a/Show More
+++ b/Show More