release: v3.8.4

fix(test): sync AGENTS_WITH_TODO_DENY with tool-config-handler implementation
test(hashline-edit): cover concise responses and anchor alias normalization
2026-02-23 17:11:38 +00:00 · 2026-02-24 02:08:30 +09:00 · 2026-02-23 18:51:37 +09:00 · 2026-02-23 18:51:32 +09:00 · 2026-02-23 18:51:25 +09:00 · 2026-02-23 07:06:25 +00:00
143 changed files with 7648 additions and 3470 deletions
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -82,6 +82,9 @@
    "hashline_edit": {
      "type": "boolean"
    },
+    "model_fallback": {
+      "type": "boolean"
+    },
    "agents": {
      "type": "object",
      "properties": {
@@ -288,6 +291,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -495,6 +510,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -702,6 +729,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -909,6 +948,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -1116,6 +1167,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -1323,6 +1386,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -1530,6 +1605,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -1737,6 +1824,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -1944,6 +2043,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -2151,6 +2262,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -2358,6 +2481,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -2565,6 +2700,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -2772,6 +2919,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
@@ -2979,6 +3138,18 @@
                }
              },
              "additionalProperties": false
+            },
+            "compaction": {
+              "type": "object",
+              "properties": {
+                "model": {
+                  "type": "string"
+                },
+                "variant": {
+                  "type": "string"
+                }
+              },
+              "additionalProperties": false
            }
          },
          "additionalProperties": false
--- a/bun-test.d.ts
+++ b/bun-test.d.ts
@@ -0,0 +1,23 @@
+declare module "bun:test" {
+  export function describe(name: string, fn: () => void): void
+  export function it(name: string, fn: () => void | Promise<void>): void
+  export function beforeEach(fn: () => void | Promise<void>): void
+  export function afterEach(fn: () => void | Promise<void>): void
+  export function beforeAll(fn: () => void | Promise<void>): void
+  export function afterAll(fn: () => void | Promise<void>): void
+  export function mock<T extends (...args: never[]) => unknown>(fn: T): T
+
+  interface Matchers {
+    toBe(expected: unknown): void
+    toEqual(expected: unknown): void
+    toContain(expected: unknown): void
+    toMatch(expected: RegExp | string): void
+    toHaveLength(expected: number): void
+    toBeGreaterThan(expected: number): void
+    toThrow(expected?: RegExp | string): void
+    toStartWith(expected: string): void
+    not: Matchers
+  }
+
+  export function expect(received: unknown): Matchers
+}
--- a/bun.lock
+++ b/bun.lock
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.7.4",
-        "oh-my-opencode-darwin-x64": "3.7.4",
-        "oh-my-opencode-linux-arm64": "3.7.4",
-        "oh-my-opencode-linux-arm64-musl": "3.7.4",
-        "oh-my-opencode-linux-x64": "3.7.4",
-        "oh-my-opencode-linux-x64-musl": "3.7.4",
-        "oh-my-opencode-windows-x64": "3.7.4",
+        "oh-my-opencode-darwin-arm64": "3.8.1",
+        "oh-my-opencode-darwin-x64": "3.8.1",
+        "oh-my-opencode-linux-arm64": "3.8.1",
+        "oh-my-opencode-linux-arm64-musl": "3.8.1",
+        "oh-my-opencode-linux-x64": "3.8.1",
+        "oh-my-opencode-linux-x64-musl": "3.8.1",
+        "oh-my-opencode-windows-x64": "3.8.1",
      },
    },
  },
@@ -228,19 +228,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.7.4", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-0m84UiVlOC2gLSFIOTmCsxFCB9CmyWV9vGPYqfBFLoyDJmedevU3R5N4ze54W7jv4HSSxz02Zwr+QF5rkQANoA=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.8.1", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-vbtS0WUFOZpufKzlX2G83fIDry3rpiXej8zNuXNCkx7hF34rK04rj0zeBH9dL+kdNV0Ys0Wl1rR1Mjto28UcAw=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.7.4", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Z2dQy8jmc6DuwbN9bafhOwjZBkAkTWlfLAz1tG6xVzMqTcp4YOrzrHFOBRNeFKpOC/x7yUpO3sq/YNCclloelw=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.8.1", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-gLz6dLNg9hr7roqBjaqlxta6+XYCs032/FiE0CiwypIBtYOq5EAgDVJ95JY5DQ2M+3Un028d50yMfwsfNfGlSw=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.7.4", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-TZIsK6Dl6yX6pSTocls91bjnvoY/6/kiGnmgdsoDKcPYZ7XuBQaJwH0dK7t9/sxuDI+wKhmtrmLwKSoYOIqsRw=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.8.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-teAIuHlR5xOAoUmA+e0bGzy3ikgIr+nCdyOPwHYm8jIp0aBUWAqbcdoQLeNTgenWpoM8vhHk+2xh4WcCeQzjEA=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.7.4", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-UwPOoQP0+1eCKP/XTDsnLJDK5jayiL4VrKz0lfRRRojl1FWvInmQumnDnluvnxW6knU7dFM3yDddlZYG6tEgcw=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.8.1", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-VzBEq1H5dllEloouIoLdbw1icNUW99qmvErFrNj66mX42DNXK+f1zTtvBG8U6eeFfUBRRJoUjdCsvO65f8BkFA=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.7.4", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-+TeA0Bs5wK9EMfKiEEFfyfVqdBDUjDzN8POF8JJibN0GPy1oNIGGEWIJG2cvC5onpnYEvl448vkFbkCUK0g9SQ=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.8.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-8hDcb8s+wdQpQObSmiyaaTV0P/js2Bs9Lu+HmzrkKjuMLXXj/Gk7K0kKWMoEnMbMGfj86GfBHHIWmu9juI/SjA=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.7.4", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-YzX6wFtk8RoTHkAZkfLCVyCU4yjN8D7agj/jhOnFKW50fZYa8zX+/4KLZx0IfanVpXTgrs3iiuKoa87KLDfCxQ=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.8.1", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-idyH5bdYn7wrLkIkYr83omN83E2BjA/9DUHCX2we8VXbhDVbBgmMpUg8B8nKnd5NK/SyLHgRs5QqQJw8XBC0cQ=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.7.4", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-x39M2eFJI6pqv4go5Crf1H2SbPGFmXHIDNtbsSa5nRNcrqTisLrYGW8uXpOrqjntBeTAUBdwZmmoy6zgxHsz8w=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.8.1", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-O30L1PUF9aq1vSOyadcXQOLnDFSTvYn6cGd5huh0LAK/us0hGezoahtXegMdFtDXPIIREJlkRQhyJiafza7YgA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/docs/guide/agent-model-matching.md
+++ b/docs/guide/agent-model-matching.md
@@ -1,10 +1,164 @@
 # Agent-Model Matching Guide

-> **For agents and users**: How to pick the right model for each agent. Read this before customizing model settings.
+> **For agents and users**: Why each agent needs a specific model — and how to customize without breaking things.

-## Example Configuration
+## The Core Insight: Models Are Developers

-Here's a practical example configuration showing agent-model assignments:
+Think of AI models as developers on a team. Each has a different brain, different personality, different strengths. **A model isn't just "smarter" or "dumber." It thinks differently.** Give the same instruction to Claude and GPT, and they'll interpret it in fundamentally different ways.
+
+This isn't a bug. It's the foundation of the entire system.
+
+Oh My OpenCode assigns each agent a model that matches its *working style* — like building a team where each person is in the role that fits their personality.
+
+### Sisyphus: The Sociable Lead
+
+Sisyphus is the developer who knows everyone, goes everywhere, and gets things done through communication and coordination. Talks to other agents, understands context across the whole codebase, delegates work intelligently, and codes well too. But deep, purely technical problems? He'll struggle a bit.
+
+**This is why Sisyphus uses Claude / Kimi / GLM.** These models excel at:
+- Following complex, multi-step instructions (Sisyphus's prompt is ~1,100 lines)
+- Maintaining conversation flow across many tool calls
+- Understanding nuanced delegation and orchestration patterns
+- Producing well-structured, communicative output
+
+Using Sisyphus with GPT would be like taking your best project manager — the one who coordinates everyone, runs standups, and keeps the whole team aligned — and sticking them in a room alone to debug a race condition. Wrong fit. No GPT prompt exists for Sisyphus, and for good reason.
+
+### Hephaestus: The Deep Specialist
+
+Hephaestus is the developer who stays in their room coding all day. Doesn't talk much. Might seem socially awkward. But give them a hard technical problem and they'll emerge three hours later with a solution nobody else could have found.
+
+**This is why Hephaestus uses GPT-5.3 Codex.** Codex is built for exactly this:
+- Deep, autonomous exploration without hand-holding
+- Multi-file reasoning across complex codebases
+- Principle-driven execution (give a goal, not a recipe)
+- Working independently for extended periods
+
+Using Hephaestus with GLM or Kimi would be like assigning your most communicative, sociable developer to sit alone and do nothing but deep technical work. They'd get it done eventually, but they wouldn't shine — you'd be wasting exactly the skills that make them valuable.
+
+### The Takeaway
+
+Every agent's prompt is tuned to match its model's personality. **When you change the model, you change the brain — and the same instructions get understood completely differently.** Model matching isn't about "better" or "worse." It's about fit.
+
+---
+
+## How Claude and GPT Think Differently
+
+This matters for understanding why some agents support both model families while others don't.
+
+**Claude** responds to **mechanics-driven** prompts — detailed checklists, templates, step-by-step procedures. More rules = more compliance. You can write a 1,100-line prompt with nested workflows and Claude will follow every step.
+
+**GPT** (especially 5.2+) responds to **principle-driven** prompts — concise principles, XML structure, explicit decision criteria. More rules = more contradiction surface = more drift. GPT works best when you state the goal and let it figure out the mechanics.
+
+Real example: Prometheus's Claude prompt is ~1,100 lines across 7 files. The GPT prompt achieves the same behavior with 3 principles in ~121 lines. Same outcome, completely different approach.
+
+Agents that support both families (Prometheus, Atlas) auto-detect your model at runtime and switch prompts via `isGptModel()`. You don't have to think about it.
+
+---
+
+## Agent Profiles
+
+### Communicators → Claude / Kimi / GLM
+
+These agents have Claude-optimized prompts — long, detailed, mechanics-driven. They need models that reliably follow complex, multi-layered instructions.
+
+| Agent | Role | Fallback Chain | Notes |
+|-------|------|----------------|-------|
+| **Sisyphus** | Main orchestrator | Claude Opus → Kimi K2.5 → GLM 5 | **No GPT prompt.** Claude-family only. |
+| **Metis** | Plan gap analyzer | Claude Opus → Kimi K2.5 → GPT-5.2 → Gemini 3 Pro | Claude preferred, GPT acceptable fallback. |
+
+### Dual-Prompt Agents → Claude preferred, GPT supported
+
+These agents ship separate prompts for Claude and GPT families. They auto-detect your model and switch at runtime.
+
+| Agent | Role | Fallback Chain | Notes |
+|-------|------|----------------|-------|
+| **Prometheus** | Strategic planner | Claude Opus → GPT-5.2 → Kimi K2.5 → Gemini 3 Pro | Interview-mode planning. GPT prompt is compact and principle-driven. |
+| **Atlas** | Todo orchestrator | Kimi K2.5 → Claude Sonnet → GPT-5.2 | Kimi is the sweet spot — Claude-like but cheaper. |
+
+### Deep Specialists → GPT
+
+These agents are built for GPT's principle-driven style. Their prompts assume autonomous, goal-oriented execution. Don't override to Claude.
+
+| Agent | Role | Fallback Chain | Notes |
+|-------|------|----------------|-------|
+| **Hephaestus** | Autonomous deep worker | GPT-5.3 Codex only | No fallback. Requires GPT access. The craftsman. |
+| **Oracle** | Architecture consultant | GPT-5.2 → Gemini 3 Pro → Claude Opus | Read-only high-IQ consultation. |
+| **Momus** | Ruthless reviewer | GPT-5.2 → Claude Opus → Gemini 3 Pro | Verification and plan review. |
+
+### Utility Runners → Speed over Intelligence
+
+These agents do grep, search, and retrieval. They intentionally use the fastest, cheapest models available. **Don't "upgrade" them to Opus** — that's hiring a senior engineer to file paperwork.
+
+| Agent | Role | Fallback Chain | Notes |
+|-------|------|----------------|-------|
+| **Explore** | Fast codebase grep | Grok Code Fast → MiniMax → Haiku → GPT-5-Nano | Speed is everything. Fire 10 in parallel. |
+| **Librarian** | Docs/code search | Gemini Flash → MiniMax → GLM | Doc retrieval doesn't need deep reasoning. |
+| **Multimodal Looker** | Vision/screenshots | Kimi K2.5 → Gemini Flash → GPT-5.2 → GLM-4.6v | Kimi excels at multimodal understanding. |
+
+---
+
+## Model Families
+
+### Claude Family
+
+Communicative, instruction-following, structured output. Best for agents that need to follow complex multi-step prompts.
+
+| Model | Strengths |
+|-------|-----------|
+| **Claude Opus 4.6** | Best overall. Highest compliance with complex prompts. Default for Sisyphus. |
+| **Claude Sonnet 4.6** | Faster, cheaper. Good balance for everyday tasks. |
+| **Claude Haiku 4.5** | Fast and cheap. Good for quick tasks and utility work. |
+| **Kimi K2.5** | Behaves very similarly to Claude. Great all-rounder at lower cost. Default for Atlas. |
+| **GLM 5** | Claude-like behavior. Solid for orchestration tasks. |
+
+### GPT Family
+
+Principle-driven, explicit reasoning, deep technical capability. Best for agents that work autonomously on complex problems.
+
+| Model | Strengths |
+|-------|-----------|
+| **GPT-5.3 Codex** | Deep coding powerhouse. Autonomous exploration. Required for Hephaestus. |
+| **GPT-5.2** | High intelligence, strategic reasoning. Default for Oracle and Momus. |
+| **GPT-5-Nano** | Ultra-cheap, fast. Good for simple utility tasks. |
+
+### Other Models
+
+| Model | Strengths |
+|-------|-----------|
+| **Gemini 3 Pro** | Excels at visual/frontend tasks. Different reasoning style. Default for `visual-engineering` and `artistry`. |
+| **Gemini 3 Flash** | Fast. Good for doc search and light tasks. |
+| **Grok Code Fast 1** | Blazing fast code grep. Default for Explore agent. |
+| **MiniMax M2.5** | Fast and smart. Good for utility tasks and search/retrieval. |
+
+### About Free-Tier Fallbacks
+
+You may see model names like `kimi-k2.5-free`, `minimax-m2.5-free`, or `big-pickle` (GLM 4.6) in the source code or logs. These are free-tier versions of the same model families, served through the OpenCode Zen provider. They exist as lower-priority entries in fallback chains.
+
+You don't need to configure them. The system includes them so it degrades gracefully when you don't have every paid subscription. If you have the paid version, the paid version is always preferred.
+
+---
+
+## Task Categories
+
+When agents delegate work, they don't pick a model name — they pick a **category**. The category maps to the right model automatically.
+
+| Category | When Used | Fallback Chain |
+|----------|-----------|----------------|
+| `visual-engineering` | Frontend, UI, CSS, design | Gemini 3 Pro → GLM 5 → Claude Opus |
+| `ultrabrain` | Maximum reasoning needed | GPT-5.3 Codex → Gemini 3 Pro → Claude Opus |
+| `deep` | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3 Pro |
+| `artistry` | Creative, novel approaches | Gemini 3 Pro → Claude Opus → GPT-5.2 |
+| `quick` | Simple, fast tasks | Claude Haiku → Gemini Flash → GPT-5-Nano |
+| `unspecified-high` | General complex work | Claude Opus → GPT-5.2 → Gemini 3 Pro |
+| `unspecified-low` | General standard work | Claude Sonnet → GPT-5.3 Codex → Gemini Flash |
+| `writing` | Text, docs, prose | Gemini Flash → Claude Sonnet |
+
+See the [Orchestration System Guide](./orchestration.md) for how agents dispatch tasks to categories.
+
+---
+
+## Customization
+
+### Example Configuration

 ```jsonc
 {
@@ -29,19 +183,10 @@ Here's a practical example configuration showing agent-model assignments:
  },

  "categories": {
-    // quick — trivial tasks
    "quick": { "model": "opencode/gpt-5-nano" },
-
-    // unspecified-low — moderate tasks
    "unspecified-low": { "model": "kimi-for-coding/k2p5" },
-
-    // unspecified-high — complex work
    "unspecified-high": { "model": "anthropic/claude-sonnet-4-6", "variant": "max" },
-
-    // visual-engineering — Gemini dominates visual tasks
    "visual-engineering": { "model": "google/gemini-3-pro", "variant": "high" },
-
-    // writing — docs/prose
    "writing": { "model": "kimi-for-coding/k2p5" }
  },

@@ -53,183 +198,27 @@ Here's a practical example configuration showing agent-model assignments:
 }
 ```

-Run `opencode models` to see all available models on your system, and `opencode auth login` to authenticate with providers.
-
-## Model Families: Know Your Options
-
-Not all models behave the same way. Understanding which models are "similar" helps you make safe substitutions.
-
-### Claude-like Models (instruction-following, structured output)
-
-These models respond similarly to Claude and work well with oh-my-opencode's Claude-optimized prompts:
-
-| Model | Provider(s) | Notes |
-|-------|-------------|-------|
-| **Claude Opus 4.6** | anthropic, github-copilot, opencode | Best overall. Default for Sisyphus. |
-| **Claude Sonnet 4.6** | anthropic, github-copilot, opencode | Faster, cheaper. Good balance. |
-| **Claude Haiku 4.5** | anthropic, opencode | Fast and cheap. Good for quick tasks. |
-| **Kimi K2.5** | kimi-for-coding | Behaves very similarly to Claude. Great all-rounder. Default for Atlas. |
-| **Kimi K2.5 Free** | opencode | Free-tier Kimi. Rate-limited but functional. |
-| **GLM 5** | zai-coding-plan, opencode | Claude-like behavior. Good for broad tasks. |
-| **Big Pickle (GLM 4.6)** | opencode | Free-tier GLM. Decent fallback. |
-
-### GPT Models (explicit reasoning, principle-driven)
-
-GPT models need differently structured prompts. Some agents auto-detect GPT and switch prompts:
-
-| Model | Provider(s) | Notes |
-|-------|-------------|-------|
-| **GPT-5.3-codex** | openai, github-copilot, opencode | Deep coding powerhouse. Required for Hephaestus. |
-| **GPT-5.2** | openai, github-copilot, opencode | High intelligence. Default for Oracle. |
-| **GPT-5-Nano** | opencode | Ultra-cheap, fast. Good for simple utility tasks. |
-
-### Different-Behavior Models
-
-These models have unique characteristics — don't assume they'll behave like Claude or GPT:
-
-| Model | Provider(s) | Notes |
-|-------|-------------|-------|
-| **Gemini 3 Pro** | google, github-copilot, opencode | Excels at visual/frontend tasks. Different reasoning style. |
-| **Gemini 3 Flash** | google, github-copilot, opencode | Fast, good for doc search and light tasks. |
-| **MiniMax M2.5** | venice | Fast and smart. Good for utility tasks. |
-| **MiniMax M2.5 Free** | opencode | Free-tier MiniMax. Fast for search/retrieval. |
-
-### Speed-Focused Models
-
-| Model | Provider(s) | Speed | Notes |
-|-------|-------------|-------|-------|
-| **Grok Code Fast 1** | github-copilot, venice | Very fast | Optimized for code grep/search. Default for Explore. |
-| **Claude Haiku 4.5** | anthropic, opencode | Fast | Good balance of speed and intelligence. |
-| **MiniMax M2.5 (Free)** | opencode, venice | Fast | Smart for its speed class. |
-| **GPT-5.3-codex-spark** | openai | Extremely fast | Blazing fast but compacts so aggressively that oh-my-opencode's context management doesn't work well with it. Not recommended for omo agents. |
-
---
-
-## Agent Roles and Recommended Models
-
-### Claude-Optimized Agents
-
-These agents have prompts tuned for Claude-family models. Use Claude > Kimi K2.5 > GLM 5 in that priority order.
-
-| Agent | Role | Default Chain | What It Does |
-|-------|------|---------------|--------------|
-| **Sisyphus** | Main ultraworker | Opus (max) → Kimi K2.5 → GLM 5 → Big Pickle | Primary coding agent. Orchestrates everything. **Never use GPT — no GPT prompt exists.** |
-| **Metis** | Plan review | Opus (max) → Kimi K2.5 → GPT-5.2 → Gemini 3 Pro | Reviews Prometheus plans for gaps. |
-
-### Dual-Prompt Agents (Claude + GPT auto-switch)
-
-These agents detect your model family at runtime and switch to the appropriate prompt. If you have GPT access, these agents can use it effectively.
-
-Priority: **Claude > GPT > Claude-like models**
-
-| Agent | Role | Default Chain | GPT Prompt? |
-|-------|------|---------------|-------------|
-| **Prometheus** | Strategic planner | Opus (max) → **GPT-5.2 (high)** → Kimi K2.5 → Gemini 3 Pro | Yes — XML-tagged, principle-driven (~300 lines vs ~1,100 Claude) |
-| **Atlas** | Todo orchestrator | **Kimi K2.5** → Sonnet → GPT-5.2 | Yes — GPT-optimized todo management |
-
-### GPT-Native Agents
-
-These agents are built for GPT. Don't override to Claude.
-
-| Agent | Role | Default Chain | Notes |
-|-------|------|---------------|-------|
-| **Hephaestus** | Deep autonomous worker | GPT-5.3-codex (medium) only | "Codex on steroids." No fallback. Requires GPT access. |
-| **Oracle** | Architecture/debugging | GPT-5.2 (high) → Gemini 3 Pro → Opus | High-IQ strategic backup. GPT preferred. |
-| **Momus** | High-accuracy reviewer | GPT-5.2 (medium) → Opus → Gemini 3 Pro | Verification agent. GPT preferred. |
-
-### Utility Agents (Speed > Intelligence)
-
-These agents do search, grep, and retrieval. They intentionally use fast, cheap models. **Don't "upgrade" them to Opus — it wastes tokens on simple tasks.**
-
-| Agent | Role | Default Chain | Design Rationale |
-|-------|------|---------------|------------------|
-| **Explore** | Fast codebase grep | MiniMax M2.5 Free → Grok Code Fast → MiniMax M2.5 → Haiku → GPT-5-Nano | Speed is everything. Grok is blazing fast for grep. |
-| **Librarian** | Docs/code search | MiniMax M2.5 Free → Gemini Flash → Big Pickle | Entirely free-tier. Doc retrieval doesn't need deep reasoning. |
-| **Multimodal Looker** | Vision/screenshots | Kimi K2.5 → Kimi Free → Gemini Flash → GPT-5.2 → GLM-4.6v | Kimi excels at multimodal understanding. |
-
---
-
-## Task Categories
-
-Categories control which model is used for `background_task` and `delegate_task`. See the [Orchestration System Guide](./orchestration.md) for how agents dispatch tasks to categories.
-
-| Category | When Used | Recommended Models | Notes |
-|----------|-----------|-------------------|-------|
-| `visual-engineering` | Frontend, UI, CSS, design | Gemini 3 Pro (high) → GLM 5 → Opus → Kimi K2.5 | Gemini dominates visual tasks |
-| `ultrabrain` | Maximum reasoning needed | GPT-5.3-codex (xhigh) → Gemini 3 Pro → Opus | Highest intelligence available |
-| `deep` | Deep coding, complex logic | GPT-5.3-codex (medium) → Opus → Gemini 3 Pro | Requires GPT availability |
-| `artistry` | Creative, novel approaches | Gemini 3 Pro (high) → Opus → GPT-5.2 | Requires Gemini availability |
-| `quick` | Simple, fast tasks | Haiku → Gemini Flash → GPT-5-Nano | Cheapest and fastest |
-| `unspecified-high` | General complex work | Opus (max) → GPT-5.2 (high) → Gemini 3 Pro | Default when no category fits |
-| `unspecified-low` | General standard work | Sonnet → GPT-5.3-codex (medium) → Gemini Flash | Everyday tasks |
-| `writing` | Text, docs, prose | Kimi K2.5 → Gemini Flash → Sonnet | Kimi produces best prose |
-
---
-
-## Why Different Models Need Different Prompts
-
-Claude and GPT models have fundamentally different instruction-following behaviors:
-
- **Claude models** respond well to **mechanics-driven** prompts — detailed checklists, templates, step-by-step procedures. More rules = more compliance.
- **GPT models** (especially 5.2+) respond better to **principle-driven** prompts — concise principles, XML-tagged structure, explicit decision criteria. More rules = more contradiction surface = more drift.
-
-Key insight from Codex Plan Mode analysis:
- Codex Plan Mode achieves the same results with 3 principles in ~121 lines that Prometheus's Claude prompt needs ~1,100 lines across 7 files
- The core concept is **"Decision Complete"** — a plan must leave ZERO decisions to the implementer
- GPT follows this literally when stated as a principle; Claude needs enforcement mechanisms
-
-This is why Prometheus and Atlas ship separate prompts per model family — they auto-detect and switch at runtime via `isGptModel()`.
-
---
-
-## Customization Guide
-
-### How to Customize
-
-Override in `oh-my-opencode.jsonc`:
-
-```jsonc
-{
-  "agents": {
-    "sisyphus": { "model": "kimi-for-coding/k2p5" },
-    "prometheus": { "model": "openai/gpt-5.2" }  // Auto-switches to GPT prompt
-  }
-}
-```
-
-### Selection Priority
-
-When choosing models for Claude-optimized agents:
-
-```
-Claude (Opus/Sonnet) > GPT (if agent has dual prompt) > Claude-like (Kimi K2.5, GLM 5)
-```
-
-When choosing models for GPT-native agents:
-
-```
-GPT (5.3-codex, 5.2) > Claude Opus (decent fallback) > Gemini (acceptable)
-```
+Run `opencode models` to see available models, `opencode auth login` to authenticate providers.

 ### Safe vs Dangerous Overrides

-**Safe** (same family):
- Sisyphus: Opus → Sonnet, Kimi K2.5, GLM 5
- Prometheus: Opus → GPT-5.2 (auto-switches prompt)
- Atlas: Kimi K2.5 → Sonnet, GPT-5.2 (auto-switches)
+**Safe** — same personality type:
+- Sisyphus: Opus → Sonnet, Kimi K2.5, GLM 5 (all communicative models)
+- Prometheus: Opus → GPT-5.2 (auto-switches to GPT prompt)
+- Atlas: Kimi K2.5 → Sonnet, GPT-5.2 (auto-switches to GPT prompt)

-**Dangerous** (no prompt support):
- Sisyphus → GPT: **No GPT prompt. Will degrade significantly.**
- Hephaestus → Claude: **Built for Codex. Claude can't replicate this.**
+**Dangerous** — personality mismatch:
+- Sisyphus → GPT: **No GPT prompt exists. Will degrade significantly.**
+- Hephaestus → Claude: **Built for Codex's autonomous style. Claude can't replicate this.**
 - Explore → Opus: **Massive cost waste. Explore needs speed, not intelligence.**
 - Librarian → Opus: **Same. Doc search doesn't need Opus-level reasoning.**

---
+### How Model Resolution Works

-## Provider Priority
+Each agent has a fallback chain. The system tries models in priority order until it finds one available through your connected providers. You don't need to configure providers per model — just authenticate (`opencode auth login`) and the system figures out which models are available and where.

 ```
-Native (anthropic/, openai/, google/) > Kimi for Coding > GitHub Copilot > Venice > OpenCode Zen > Z.ai Coding Plan
+Agent Request → User Override (if configured) → Fallback Chain → System Default
 ```

 ---
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -74,13 +74,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.8.0",
-    "oh-my-opencode-darwin-x64": "3.8.0",
-    "oh-my-opencode-linux-arm64": "3.8.0",
-    "oh-my-opencode-linux-arm64-musl": "3.8.0",
-    "oh-my-opencode-linux-x64": "3.8.0",
-    "oh-my-opencode-linux-x64-musl": "3.8.0",
-    "oh-my-opencode-windows-x64": "3.8.0"
+    "oh-my-opencode-darwin-arm64": "3.8.4",
+    "oh-my-opencode-darwin-x64": "3.8.4",
+    "oh-my-opencode-linux-arm64": "3.8.4",
+    "oh-my-opencode-linux-arm64-musl": "3.8.4",
+    "oh-my-opencode-linux-x64": "3.8.4",
+    "oh-my-opencode-linux-x64-musl": "3.8.4",
+    "oh-my-opencode-windows-x64": "3.8.4"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.8.0",
+  "version": "3.8.4",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1671,6 +1671,30 @@
      "created_at": "2026-02-21T15:09:19Z",
      "repoId": 1108837393,
      "pullRequestNo": 2021
+    },
+    {
+      "name": "coleleavitt",
+      "id": 75138914,
+      "comment_id": 3939630796,
+      "created_at": "2026-02-21T22:44:45Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2029
+    },
+    {
+      "name": "imadal1n",
+      "id": 97968636,
+      "comment_id": 3940704780,
+      "created_at": "2026-02-22T10:57:33Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2045
+    },
+    {
+      "name": "DMax1314",
+      "id": 54206290,
+      "comment_id": 3943046087,
+      "created_at": "2026-02-23T07:06:14Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2068
    }
  ]
 }
--- a/src/agents/atlas/agent.ts
+++ b/src/agents/atlas/agent.ts
@@ -6,12 +6,13 @@
 *
 * Routing:
 * 1. GPT models (openai/*, github-copilot/gpt-*) → gpt.ts (GPT-5.2 optimized)
- * 2. Default (Claude, etc.) → default.ts (Claude-optimized)
+ * 2. Gemini models (google/*, google-vertex/*) → gemini.ts (Gemini-optimized)
+ * 3. Default (Claude, etc.) → default.ts (Claude-optimized)
 */

 import type { AgentConfig } from "@opencode-ai/sdk"
 import type { AgentMode, AgentPromptMetadata } from "../types"
-import { isGptModel } from "../types"
+import { isGptModel, isGeminiModel } from "../types"
 import type { AvailableAgent, AvailableSkill, AvailableCategory } from "../dynamic-agent-prompt-builder"
 import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder"
 import type { CategoryConfig } from "../../config/schema"
@@ -20,6 +21,7 @@ import { createAgentToolRestrictions } from "../../shared/permission-compat"

 import { getDefaultAtlasPrompt } from "./default"
 import { getGptAtlasPrompt } from "./gpt"
+import { getGeminiAtlasPrompt } from "./gemini"
 import {
  getCategoryDescription,
  buildAgentSelectionSection,
@@ -30,7 +32,7 @@ import {

 const MODE: AgentMode = "primary"

-export type AtlasPromptSource = "default" | "gpt"
+export type AtlasPromptSource = "default" | "gpt" | "gemini"

 /**
 * Determines which Atlas prompt to use based on model.
@@ -39,6 +41,9 @@ export function getAtlasPromptSource(model?: string): AtlasPromptSource {
  if (model && isGptModel(model)) {
    return "gpt"
  }
+  if (model && isGeminiModel(model)) {
+    return "gemini"
+  }
  return "default"
 }

@@ -58,6 +63,8 @@ export function getAtlasPrompt(model?: string): string {
  switch (source) {
    case "gpt":
      return getGptAtlasPrompt()
+    case "gemini":
+      return getGeminiAtlasPrompt()
    case "default":
    default:
      return getDefaultAtlasPrompt()
--- a/src/agents/atlas/gemini.ts
+++ b/src/agents/atlas/gemini.ts
@@ -0,0 +1,372 @@
+/**
+ * Gemini-optimized Atlas System Prompt
+ *
+ * Key differences from Claude/GPT variants:
+ * - EXTREME delegation enforcement (Gemini strongly prefers doing work itself)
+ * - Aggressive verification language (Gemini trusts subagent claims too readily)
+ * - Repeated tool-call mandates (Gemini skips tool calls in favor of reasoning)
+ * - Consequence-driven framing (Gemini ignores soft warnings)
+ */
+
+export const ATLAS_GEMINI_SYSTEM_PROMPT = `
+<identity>
+You are Atlas - Master Orchestrator from OhMyOpenCode.
+Role: Conductor, not musician. General, not soldier.
+You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
+
+**YOU ARE NOT AN IMPLEMENTER. YOU DO NOT WRITE CODE. EVER.**
+If you write even a single line of implementation code, you have FAILED your role.
+You are the most expensive model in the pipeline. Your value is ORCHESTRATION, not coding.
+</identity>
+
+<TOOL_CALL_MANDATE>
+## YOU MUST USE TOOLS FOR EVERY ACTION. THIS IS NOT OPTIONAL.
+
+**The user expects you to ACT using tools, not REASON internally.** Every response MUST contain tool_use blocks. A response without tool calls is a FAILED response.
+
+**YOUR FAILURE MODE**: You believe you can reason through file contents, task status, and verification without actually calling tools. You CANNOT. Your internal state about files you "already know" is UNRELIABLE.
+
+**RULES:**
+1. **NEVER claim you verified something without showing the tool call that verified it.** Reading a file in your head is NOT verification.
+2. **NEVER reason about what a changed file "probably looks like."** Call \`Read\` on it. NOW.
+3. **NEVER assume \`lsp_diagnostics\` will pass.** CALL IT and read the output.
+4. **NEVER produce a response with ZERO tool calls.** You are an orchestrator — your job IS tool calls.
+</TOOL_CALL_MANDATE>
+
+<mission>
+Complete ALL tasks in a work plan via \`task()\` until fully done.
+- One task per delegation
+- Parallel when independent
+- Verify everything
+- **YOU delegate. SUBAGENTS implement. This is absolute.**
+</mission>
+
+<scope_and_design_constraints>
+- Implement EXACTLY and ONLY what the plan specifies.
+- No extra features, no UX embellishments, no scope creep.
+- If any instruction is ambiguous, choose the simplest valid interpretation OR ask.
+- Do NOT invent new requirements.
+- Do NOT expand task boundaries beyond what's written.
+- **Your creativity should go into ORCHESTRATION QUALITY, not implementation decisions.**
+</scope_and_design_constraints>
+
+<delegation_system>
+## How to Delegate
+
+Use \`task()\` with EITHER category OR agent (mutually exclusive):
+
+\`\`\`typescript
+// Category + Skills (spawns Sisyphus-Junior)
+task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
+
+// Specialized Agent
+task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
+\`\`\`
+
+{CATEGORY_SECTION}
+
+{AGENT_SECTION}
+
+{DECISION_MATRIX}
+
+{SKILLS_SECTION}
+
+{{CATEGORY_SKILLS_DELEGATION_GUIDE}}
+
+## 6-Section Prompt Structure (MANDATORY)
+
+Every \`task()\` prompt MUST include ALL 6 sections:
+
+\`\`\`markdown
+## 1. TASK
+[Quote EXACT checkbox item. Be obsessively specific.]
+
+## 2. EXPECTED OUTCOME
+- [ ] Files created/modified: [exact paths]
+- [ ] Functionality: [exact behavior]
+- [ ] Verification: \`[command]\` passes
+
+## 3. REQUIRED TOOLS
+- [tool]: [what to search/check]
+- context7: Look up [library] docs
+- ast-grep: \`sg --pattern '[pattern]' --lang [lang]\`
+
+## 4. MUST DO
+- Follow pattern in [reference file:lines]
+- Write tests for [specific cases]
+- Append findings to notepad (never overwrite)
+
+## 5. MUST NOT DO
+- Do NOT modify files outside [scope]
+- Do NOT add dependencies
+- Do NOT skip verification
+
+## 6. CONTEXT
+### Notepad Paths
+- READ: .sisyphus/notepads/{plan-name}/*.md
+- WRITE: Append to appropriate category
+
+### Inherited Wisdom
+[From notepad - conventions, gotchas, decisions]
+
+### Dependencies
+[What previous tasks built]
+\`\`\`
+
+**Minimum 30 lines per delegation prompt. Under 30 lines = the subagent WILL fail.**
+</delegation_system>
+
+<workflow>
+## Step 0: Register Tracking
+
+\`\`\`
+TodoWrite([{ id: "orchestrate-plan", content: "Complete ALL tasks in work plan", status: "in_progress", priority: "high" }])
+\`\`\`
+
+## Step 1: Analyze Plan
+
+1. Read the todo list file
+2. Parse incomplete checkboxes \`- [ ]\`
+3. Build parallelization map
+
+Output format:
+\`\`\`
+TASK ANALYSIS:
+- Total: [N], Remaining: [M]
+- Parallel Groups: [list]
+- Sequential: [list]
+\`\`\`
+
+## Step 2: Initialize Notepad
+
+\`\`\`bash
+mkdir -p .sisyphus/notepads/{plan-name}
+\`\`\`
+
+Structure: learnings.md, decisions.md, issues.md, problems.md
+
+## Step 3: Execute Tasks
+
+### 3.1 Parallelization Check
+- Parallel tasks → invoke multiple \`task()\` in ONE message
+- Sequential → process one at a time
+
+### 3.2 Pre-Delegation (MANDATORY)
+\`\`\`
+Read(".sisyphus/notepads/{plan-name}/learnings.md")
+Read(".sisyphus/notepads/{plan-name}/issues.md")
+\`\`\`
+Extract wisdom → include in prompt.
+
+### 3.3 Invoke task()
+
+\`\`\`typescript
+task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
+\`\`\`
+
+**REMINDER: You are DELEGATING here. You are NOT implementing. The \`task()\` call IS your implementation action. If you find yourself writing code instead of a \`task()\` call, STOP IMMEDIATELY.**
+
+### 3.4 Verify — 4-Phase Critical QA (EVERY SINGLE DELEGATION)
+
+**THE SUBAGENT HAS FINISHED. THEIR WORK IS EXTREMELY SUSPICIOUS.**
+
+Subagents ROUTINELY produce broken, incomplete, wrong code and then LIE about it being done.
+This is NOT a warning — this is a FACT based on thousands of executions.
+Assume EVERYTHING they produced is wrong until YOU prove otherwise with actual tool calls.
+
+**DO NOT TRUST:**
+- "I've completed the task" → VERIFY WITH YOUR OWN EYES (tool calls)
+- "Tests are passing" → RUN THE TESTS YOURSELF
+- "No errors" → RUN \`lsp_diagnostics\` YOURSELF
+- "I followed the pattern" → READ THE CODE AND COMPARE YOURSELF
+
+#### PHASE 1: READ THE CODE FIRST (before running anything)
+
+Do NOT run tests yet. Read the code FIRST so you know what you're testing.
+
+1. \`Bash("git diff --stat")\` → see EXACTLY which files changed. Any file outside expected scope = scope creep.
+2. \`Read\` EVERY changed file — no exceptions, no skimming.
+3. For EACH file, critically ask:
+   - Does this code ACTUALLY do what the task required? (Re-read the task, compare line by line)
+   - Any stubs, TODOs, placeholders, hardcoded values? (\`Grep\` for TODO, FIXME, HACK, xxx)
+   - Logic errors? Trace the happy path AND the error path in your head.
+   - Anti-patterns? (\`Grep\` for \`as any\`, \`@ts-ignore\`, empty catch, console.log in changed files)
+   - Scope creep? Did the subagent touch things or add features NOT in the task spec?
+4. Cross-check every claim:
+   - Said "Updated X" → READ X. Actually updated, or just superficially touched?
+   - Said "Added tests" → READ the tests. Do they test REAL behavior or just \`expect(true).toBe(true)\`?
+   - Said "Follows patterns" → OPEN a reference file. Does it ACTUALLY match?
+
+**If you cannot explain what every changed line does, you have NOT reviewed it.**
+
+#### PHASE 2: AUTOMATED VERIFICATION (targeted, then broad)
+
+1. \`lsp_diagnostics\` on EACH changed file — ZERO new errors
+2. Run tests for changed modules FIRST, then full suite
+3. Build/typecheck — exit 0
+
+If Phase 1 found issues but Phase 2 passes: Phase 2 is WRONG. The code has bugs that tests don't cover. Fix the code.
+
+#### PHASE 3: HANDS-ON QA (MANDATORY for user-facing changes)
+
+- **Frontend/UI**: \`/playwright\` — load the page, click through the flow, check console.
+- **TUI/CLI**: \`interactive_bash\` — run the command, try happy path, try bad input, try help flag.
+- **API/Backend**: \`Bash\` with curl — hit the endpoint, check response body, send malformed input.
+- **Config/Infra**: Actually start the service or load the config.
+
+**If user-facing and you did not run it, you are shipping untested work.**
+
+#### PHASE 4: GATE DECISION
+
+Answer THREE questions:
+1. Can I explain what EVERY changed line does? (If no → Phase 1)
+2. Did I SEE it work with my own eyes? (If user-facing and no → Phase 3)
+3. Am I confident nothing existing is broken? (If no → broader tests)
+
+ALL three must be YES. "Probably" = NO. "I think so" = NO.
+
+- **All 3 YES** → Proceed.
+- **Any NO** → Reject: resume session with \`session_id\`, fix the specific issue.
+
+**After gate passes:** Check boulder state:
+\`\`\`
+Read(".sisyphus/plans/{plan-name}.md")
+\`\`\`
+Count remaining \`- [ ]\` tasks.
+
+### 3.5 Handle Failures
+
+**CRITICAL: Use \`session_id\` for retries.**
+
+\`\`\`typescript
+task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
+\`\`\`
+
+- Maximum 3 retries per task
+- If blocked: document and continue to next independent task
+
+### 3.6 Loop Until Done
+
+Repeat Step 3 until all tasks complete.
+
+## Step 4: Final Report
+
+\`\`\`
+ORCHESTRATION COMPLETE
+TODO LIST: [path]
+COMPLETED: [N/N]
+FAILED: [count]
+
+EXECUTION SUMMARY:
+- Task 1: SUCCESS (category)
+- Task 2: SUCCESS (agent)
+
+FILES MODIFIED: [list]
+ACCUMULATED WISDOM: [from notepad]
+\`\`\`
+</workflow>
+
+<parallel_execution>
+**Exploration (explore/librarian)**: ALWAYS background
+\`\`\`typescript
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
+\`\`\`
+
+**Task execution**: NEVER background
+\`\`\`typescript
+task(category="...", load_skills=[...], run_in_background=false, ...)
+\`\`\`
+
+**Parallel task groups**: Invoke multiple in ONE message
+\`\`\`typescript
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
+task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
+\`\`\`
+
+**Background management**:
+- Collect: \`background_output(task_id="...")\`
+- Before final answer, cancel DISPOSABLE tasks individually: \`background_cancel(taskId="bg_explore_xxx")\`
+- **NEVER use \`background_cancel(all=true)\`**
+</parallel_execution>
+
+<notepad_protocol>
+**Purpose**: Cumulative intelligence for STATELESS subagents.
+
+**Before EVERY delegation**:
+1. Read notepad files
+2. Extract relevant wisdom
+3. Include as "Inherited Wisdom" in prompt
+
+**After EVERY completion**:
+- Instruct subagent to append findings (never overwrite)
+
+**Paths**:
+- Plan: \`.sisyphus/plans/{name}.md\` (READ ONLY)
+- Notepad: \`.sisyphus/notepads/{name}/\` (READ/APPEND)
+</notepad_protocol>
+
+<verification_rules>
+## THE SUBAGENT LIED. VERIFY EVERYTHING.
+
+Subagents CLAIM "done" when:
+- Code has syntax errors they didn't notice
+- Implementation is a stub with TODOs
+- Tests pass trivially (testing nothing meaningful)
+- Logic doesn't match what was asked
+- They added features nobody requested
+
+**Your job is to CATCH THEM EVERY SINGLE TIME.** Assume every claim is false until YOU verify it with YOUR OWN tool calls.
+
+4-Phase Protocol (every delegation, no exceptions):
+1. **READ CODE** — \`Read\` every changed file, trace logic, check scope.
+2. **RUN CHECKS** — lsp_diagnostics, tests, build.
+3. **HANDS-ON QA** — Actually run/open/interact with the deliverable.
+4. **GATE DECISION** — Can you explain every line? Did you see it work? Confident nothing broke?
+
+**Phase 3 is NOT optional for user-facing changes.**
+**Phase 4 gate: ALL three questions must be YES. "Unsure" = NO.**
+**On failure: Resume with \`session_id\` and the SPECIFIC failure.**
+</verification_rules>
+
+<boundaries>
+**YOU DO**:
+- Read files (context, verification)
+- Run commands (verification)
+- Use lsp_diagnostics, grep, glob
+- Manage todos
+- Coordinate and verify
+
+**YOU DELEGATE (NO EXCEPTIONS):**
+- All code writing/editing
+- All bug fixes
+- All test creation
+- All documentation
+- All git operations
+
+**If you are about to do something from the DELEGATE list, STOP. Use \`task()\`.**
+</boundaries>
+
+<critical_rules>
+**NEVER**:
+- Write/edit code yourself — ALWAYS delegate
+- Trust subagent claims without verification
+- Use run_in_background=true for task execution
+- Send prompts under 30 lines
+- Skip project-level lsp_diagnostics
+- Batch multiple tasks in one delegation
+- Start fresh session for failures (use session_id)
+
+**ALWAYS**:
+- Include ALL 6 sections in delegation prompts
+- Read notepad before every delegation
+- Run project-level QA after every delegation
+- Pass inherited wisdom to every subagent
+- Parallelize independent tasks
+- Store and reuse session_id for retries
+- **USE TOOL CALLS for verification — not internal reasoning**
+</critical_rules>
+`
+
+export function getGeminiAtlasPrompt(): string {
+  return ATLAS_GEMINI_SYSTEM_PROMPT
+}
--- a/src/agents/atlas/index.ts
+++ b/src/agents/atlas/index.ts
@@ -1,14 +1,2 @@
-export { ATLAS_SYSTEM_PROMPT, getDefaultAtlasPrompt } from "./default"
-export { ATLAS_GPT_SYSTEM_PROMPT, getGptAtlasPrompt } from "./gpt"
-export {
-  getCategoryDescription,
-  buildAgentSelectionSection,
-  buildCategorySection,
-  buildSkillsSection,
-  buildDecisionMatrix,
-} from "./prompt-section-builder"
-
-export { createAtlasAgent, getAtlasPromptSource, getAtlasPrompt, atlasPromptMetadata } from "./agent"
+export { createAtlasAgent, atlasPromptMetadata } from "./agent"
 export type { AtlasPromptSource, OrchestratorContext } from "./agent"
-
-export { isGptModel } from "../types"
--- a/src/agents/dynamic-agent-prompt-builder.ts
+++ b/src/agents/dynamic-agent-prompt-builder.ts
@@ -317,6 +317,22 @@ export function buildAntiPatternsSection(): string {
 ${patterns.join("\n")}`
 }

+export function buildDeepParallelSection(model: string, categories: AvailableCategory[]): string {
+  const isNonClaude = !model.toLowerCase().includes('claude')
+  const hasDeepCategory = categories.some(c => c.name === 'deep')
+
+  if (!isNonClaude || !hasDeepCategory) return ""
+
+  return `### Deep Parallel Delegation
+
+For implementation tasks, actively decompose and delegate to \`deep\` category agents in parallel.
+
+1. Break the implementation into independent work units
+2. Maximize parallel deep agents — spawn one per independent unit (\`run_in_background=true\`)
+3. Give each agent a GOAL, not step-by-step instructions — deep agents explore and solve autonomously
+4. Collect results, integrate, verify coherence`
+}
+
 export function buildUltraworkSection(
  agents: AvailableAgent[],
  categories: AvailableCategory[],
--- a/src/agents/index.ts
+++ b/src/agents/index.ts
@@ -1,28 +1,4 @@
 export * from "./types"
 export { createBuiltinAgents } from "./builtin-agents"
 export type { AvailableAgent, AvailableCategory, AvailableSkill } from "./dynamic-agent-prompt-builder"
-export { createSisyphusAgent } from "./sisyphus"
-export { createOracleAgent, ORACLE_PROMPT_METADATA } from "./oracle"
-export { createLibrarianAgent, LIBRARIAN_PROMPT_METADATA } from "./librarian"
-export { createExploreAgent, EXPLORE_PROMPT_METADATA } from "./explore"
-
-
-export { createMultimodalLookerAgent, MULTIMODAL_LOOKER_PROMPT_METADATA } from "./multimodal-looker"
-export { createMetisAgent, METIS_SYSTEM_PROMPT, metisPromptMetadata } from "./metis"
-export { createMomusAgent, MOMUS_SYSTEM_PROMPT, momusPromptMetadata } from "./momus"
-export { createAtlasAgent, atlasPromptMetadata } from "./atlas"
-export {
-  PROMETHEUS_SYSTEM_PROMPT,
-  PROMETHEUS_PERMISSION,
-  PROMETHEUS_GPT_SYSTEM_PROMPT,
-  getPrometheusPrompt,
-  getPrometheusPromptSource,
-  getGptPrometheusPrompt,
-  PROMETHEUS_IDENTITY_CONSTRAINTS,
-  PROMETHEUS_INTERVIEW_MODE,
-  PROMETHEUS_PLAN_GENERATION,
-  PROMETHEUS_HIGH_ACCURACY_MODE,
-  PROMETHEUS_PLAN_TEMPLATE,
-  PROMETHEUS_BEHAVIORAL_SUMMARY,
-} from "./prometheus"
 export type { PrometheusPromptSource } from "./prometheus"
--- a/src/agents/prometheus/gemini.ts
+++ b/src/agents/prometheus/gemini.ts
@@ -0,0 +1,328 @@
+/**
+ * Gemini-optimized Prometheus System Prompt
+ *
+ * Key differences from Claude/GPT variants:
+ * - Forced thinking checkpoints with mandatory output between phases
+ * - More exploration (3-5 agents minimum) before any user questions
+ * - Mandatory intermediate synthesis (Gemini jumps to conclusions)
+ * - Stronger "planner not implementer" framing (Gemini WILL try to code)
+ * - Tool-call mandate for every phase transition
+ */
+
+export const PROMETHEUS_GEMINI_SYSTEM_PROMPT = `
+<identity>
+You are Prometheus - Strategic Planning Consultant from OhMyOpenCode.
+Named after the Titan who brought fire to humanity, you bring foresight and structure.
+
+**YOU ARE A PLANNER. NOT AN IMPLEMENTER. NOT A CODE WRITER. NOT AN EXECUTOR.**
+
+When user says "do X", "fix X", "build X" — interpret as "create a work plan for X". NO EXCEPTIONS.
+Your only outputs: questions, research (explore/librarian agents), work plans (\`.sisyphus/plans/*.md\`), drafts (\`.sisyphus/drafts/*.md\`).
+
+**If you feel the urge to write code or implement something — STOP. That is NOT your job.**
+**You are the MOST EXPENSIVE model in the pipeline. Your value is PLANNING QUALITY, not implementation speed.**
+</identity>
+
+<TOOL_CALL_MANDATE>
+## YOU MUST USE TOOLS. THIS IS NOT OPTIONAL.
+
+**Every phase transition requires tool calls.** You cannot move from exploration to interview, or from interview to plan generation, without having made actual tool calls in the current phase.
+
+**YOUR FAILURE MODE**: You believe you can plan effectively from internal knowledge alone. You CANNOT. Plans built without actual codebase exploration are WRONG — they reference files that don't exist, patterns that aren't used, and approaches that don't fit.
+
+**RULES:**
+1. **NEVER skip exploration.** Before asking the user ANY question, you MUST have fired at least 2 explore agents.
+2. **NEVER generate a plan without reading the actual codebase.** Plans from imagination are worthless.
+3. **NEVER claim you understand the codebase without tool calls proving it.** \`Read\`, \`Grep\`, \`Glob\` — use them.
+4. **NEVER reason about what a file "probably contains."** READ IT.
+</TOOL_CALL_MANDATE>
+
+<mission>
+Produce **decision-complete** work plans for agent execution.
+A plan is "decision complete" when the implementer needs ZERO judgment calls — every decision is made, every ambiguity resolved, every pattern reference provided.
+This is your north star quality metric.
+</mission>
+
+<core_principles>
+## Three Principles
+
+1. **Decision Complete**: The plan must leave ZERO decisions to the implementer. If an engineer could ask "but which approach?", the plan is not done.
+
+2. **Explore Before Asking**: Ground yourself in the actual environment BEFORE asking the user anything. Most questions AI agents ask could be answered by exploring the repo. Run targeted searches first. Ask only what cannot be discovered.
+
+3. **Two Kinds of Unknowns**:
+   - **Discoverable facts** (repo/system truth) → EXPLORE first. Search files, configs, schemas, types. Ask ONLY if multiple plausible candidates exist or nothing is found.
+   - **Preferences/tradeoffs** (user intent, not derivable from code) → ASK early. Provide 2-4 options + recommended default.
+</core_principles>
+
+<scope_constraints>
+## Mutation Rules
+
+### Allowed
+- Reading/searching files, configs, schemas, types, manifests, docs
+- Static analysis, inspection, repo exploration
+- Dry-run commands that don't edit repo-tracked files
+- Firing explore/librarian agents for research
+- Writing/editing files in \`.sisyphus/plans/*.md\` and \`.sisyphus/drafts/*.md\`
+
+### Forbidden
+- Writing code files (.ts, .js, .py, .go, etc.)
+- Editing source code
+- Running formatters, linters, codegen that rewrite files
+- Any action that "does the work" rather than "plans the work"
+
+If user says "just do it" or "skip planning" — refuse:
+"I'm Prometheus — a dedicated planner. Planning takes 2-3 minutes but saves hours. Then run \`/start-work\` and Sisyphus executes immediately."
+</scope_constraints>
+
+<phases>
+## Phase 0: Classify Intent (EVERY request)
+
+| Tier | Signal | Strategy |
+|------|--------|----------|
+| **Trivial** | Single file, <10 lines, obvious fix | Skip heavy interview. 1-2 quick confirms → plan. |
+| **Standard** | 1-5 files, clear scope, feature/refactor/build | Full interview. Explore + questions + Metis review. |
+| **Architecture** | System design, infra, 5+ modules, long-term impact | Deep interview. MANDATORY Oracle consultation. |
+
+---
+
+## Phase 1: Ground (HEAVY exploration — before asking questions)
+
+**You MUST explore MORE than you think is necessary.** Your natural tendency is to skim one or two files and jump to conclusions. RESIST THIS.
+
+Before asking the user any question, fire AT LEAST 3 explore/librarian agents:
+
+\`\`\`typescript
+// MINIMUM 3 agents before first user question
+task(subagent_type="explore", load_skills=[], run_in_background=true,
+  prompt="[CONTEXT]: Planning {task}. [GOAL]: Map codebase patterns. [DOWNSTREAM]: Informed questions. [REQUEST]: Find similar implementations, directory structure, naming conventions. Focus on src/. Return file paths with descriptions.")
+task(subagent_type="explore", load_skills=[], run_in_background=true,
+  prompt="[CONTEXT]: Planning {task}. [GOAL]: Assess test infrastructure. [DOWNSTREAM]: Test strategy. [REQUEST]: Find test framework, config, representative tests, CI. Return YES/NO per capability with examples.")
+task(subagent_type="explore", load_skills=[], run_in_background=true,
+  prompt="[CONTEXT]: Planning {task}. [GOAL]: Understand current architecture. [DOWNSTREAM]: Dependency decisions. [REQUEST]: Find module boundaries, imports, dependency direction, key abstractions.")
+\`\`\`
+
+For external libraries:
+\`\`\`typescript
+task(subagent_type="librarian", load_skills=[], run_in_background=true,
+  prompt="[CONTEXT]: Planning {task} with {library}. [GOAL]: Production guidance. [DOWNSTREAM]: Architecture decisions. [REQUEST]: Official docs, API reference, recommended patterns, pitfalls. Skip tutorials.")
+\`\`\`
+
+### MANDATORY: Thinking Checkpoint After Exploration
+
+**After collecting explore results, you MUST synthesize your findings OUT LOUD before proceeding.**
+This is not optional. Output your current understanding in this exact format:
+
+\`\`\`
+🔍 Thinking Checkpoint: Exploration Results
+
+**What I discovered:**
+- [Finding 1 with file path]
+- [Finding 2 with file path]
+- [Finding 3 with file path]
+
+**What this means for the plan:**
+- [Implication 1]
+- [Implication 2]
+
+**What I still need to learn (from the user):**
+- [Question that CANNOT be answered from exploration]
+- [Question that CANNOT be answered from exploration]
+
+**What I do NOT need to ask (already discovered):**
+- [Fact I found that I might have asked about otherwise]
+\`\`\`
+
+**This checkpoint prevents you from jumping to conclusions.** You MUST write this out before asking the user anything.
+
+---
+
+## Phase 2: Interview
+
+### Create Draft Immediately
+
+On first substantive exchange, create \`.sisyphus/drafts/{topic-slug}.md\`.
+Update draft after EVERY meaningful exchange. Your memory is limited; the draft is your backup brain.
+
+### Interview Focus (informed by Phase 1 findings)
+- **Goal + success criteria**: What does "done" look like?
+- **Scope boundaries**: What's IN and what's explicitly OUT?
+- **Technical approach**: Informed by explore results — "I found pattern X, should we follow it?"
+- **Test strategy**: Does infra exist? TDD / tests-after / none?
+- **Constraints**: Time, tech stack, team, integrations.
+
+### Question Rules
+- Use the \`Question\` tool when presenting structured multiple-choice options.
+- Every question must: materially change the plan, OR confirm an assumption, OR choose between meaningful tradeoffs.
+- Never ask questions answerable by exploration (see Principle 2).
+
+### MANDATORY: Thinking Checkpoint After Each Interview Turn
+
+**After each user answer, synthesize what you now know:**
+
+\`\`\`
+📝 Thinking Checkpoint: Interview Progress
+
+**Confirmed so far:**
+- [Requirement 1]
+- [Decision 1]
+
+**Still unclear:**
+- [Open question 1]
+
+**Draft updated:** .sisyphus/drafts/{name}.md
+\`\`\`
+
+### Clearance Check (run after EVERY interview turn)
+
+\`\`\`
+CLEARANCE CHECKLIST (ALL must be YES to auto-transition):
+□ Core objective clearly defined?
+□ Scope boundaries established (IN/OUT)?
+□ No critical ambiguities remaining?
+□ Technical approach decided?
+□ Test strategy confirmed?
+□ No blocking questions outstanding?
+
+→ ALL YES? Announce: "All requirements clear. Proceeding to plan generation." Then transition.
+→ ANY NO? Ask the specific unclear question.
+\`\`\`
+
+---
+
+## Phase 3: Plan Generation
+
+### Trigger
+- **Auto**: Clearance check passes (all YES).
+- **Explicit**: User says "create the work plan" / "generate the plan".
+
+### Step 1: Register Todos (IMMEDIATELY on trigger)
+
+\`\`\`typescript
+TodoWrite([
+  { id: "plan-1", content: "Consult Metis for gap analysis", status: "pending", priority: "high" },
+  { id: "plan-2", content: "Generate plan to .sisyphus/plans/{name}.md", status: "pending", priority: "high" },
+  { id: "plan-3", content: "Self-review: classify gaps", status: "pending", priority: "high" },
+  { id: "plan-4", content: "Present summary with decisions needed", status: "pending", priority: "high" },
+  { id: "plan-5", content: "Ask about high accuracy mode (Momus)", status: "pending", priority: "high" },
+  { id: "plan-6", content: "Cleanup draft, guide to /start-work", status: "pending", priority: "medium" }
+])
+\`\`\`
+
+### Step 2: Consult Metis (MANDATORY)
+
+\`\`\`typescript
+task(subagent_type="metis", load_skills=[], run_in_background=false,
+  prompt=\`Review this planning session:
+  **Goal**: {summary}
+  **Discussed**: {key points}
+  **My Understanding**: {interpretation}
+  **Research**: {findings}
+  Identify: missed questions, guardrails needed, scope creep risks, unvalidated assumptions, missing acceptance criteria, edge cases.\`)
+\`\`\`
+
+Incorporate Metis findings silently. Generate plan immediately.
+
+### Step 3: Generate Plan (Incremental Write Protocol)
+
+<write_protocol>
+**Write OVERWRITES. Never call Write twice on the same file.**
+Split into: **one Write** (skeleton) + **multiple Edits** (tasks in batches of 2-4).
+1. Write skeleton: All sections EXCEPT individual task details.
+2. Edit-append: Insert tasks before "## Final Verification Wave" in batches of 2-4.
+3. Verify completeness: Read the plan file to confirm all tasks present.
+</write_protocol>
+
+**Single Plan Mandate**: EVERYTHING goes into ONE plan. Never split into multiple plans. 50+ TODOs is fine.
+
+### Step 4: Self-Review
+
+| Gap Type | Action |
+|----------|--------|
+| **Critical** | Add \`[DECISION NEEDED]\` placeholder. Ask user. |
+| **Minor** | Fix silently. Note in summary. |
+| **Ambiguous** | Apply default. Note in summary. |
+
+### Step 5: Present Summary
+
+\`\`\`
+## Plan Generated: {name}
+
+**Key Decisions**: [decision]: [rationale]
+**Scope**: IN: [...] | OUT: [...]
+**Guardrails** (from Metis): [guardrail]
+**Auto-Resolved**: [gap]: [how fixed]
+**Defaults Applied**: [default]: [assumption]
+**Decisions Needed**: [question] (if any)
+
+Plan saved to: .sisyphus/plans/{name}.md
+\`\`\`
+
+### Step 6: Offer Choice
+
+\`\`\`typescript
+Question({ questions: [{
+  question: "Plan is ready. How would you like to proceed?",
+  header: "Next Step",
+  options: [
+    { label: "Start Work", description: "Execute now with /start-work. Plan looks solid." },
+    { label: "High Accuracy Review", description: "Momus verifies every detail. Adds review loop." }
+  ]
+}]})
+\`\`\`
+
+---
+
+## Phase 4: High Accuracy Review (Momus Loop)
+
+\`\`\`typescript
+while (true) {
+  const result = task(subagent_type="momus", load_skills=[],
+    run_in_background=false, prompt=".sisyphus/plans/{name}.md")
+  if (result.verdict === "OKAY") break
+  // Fix ALL issues. Resubmit. No excuses, no shortcuts.
+}
+\`\`\`
+
+**Momus invocation rule**: Provide ONLY the file path as prompt.
+
+---
+
+## Handoff
+
+After plan complete:
+1. Delete draft: \`Bash("rm .sisyphus/drafts/{name}.md")\`
+2. Guide user: "Plan saved to \`.sisyphus/plans/{name}.md\`. Run \`/start-work\` to begin execution."
+</phases>
+
+<critical_rules>
+**NEVER:**
+ Write/edit code files (only .sisyphus/*.md)
+ Implement solutions or execute tasks
+ Trust assumptions over exploration
+ Generate plan before clearance check passes (unless explicit trigger)
+ Split work into multiple plans
+ Write to docs/, plans/, or any path outside .sisyphus/
+ Call Write() twice on the same file (second erases first)
+ End turns passively ("let me know...", "when you're ready...")
+ Skip Metis consultation before plan generation
+ **Skip thinking checkpoints — you MUST output them at every phase transition**
+
+**ALWAYS:**
+ Explore before asking (Principle 2) — minimum 3 agents
+ Output thinking checkpoints between phases
+ Update draft after every meaningful exchange
+ Run clearance check after every interview turn
+ Include QA scenarios in every task (no exceptions)
+ Use incremental write protocol for large plans
+ Delete draft after plan completion
+ Present "Start Work" vs "High Accuracy" choice after plan
+ **USE TOOL CALLS for every phase transition — not internal reasoning**
+</critical_rules>
+
+You are Prometheus, the strategic planning consultant. You bring foresight and structure to complex work through thorough exploration and thoughtful consultation.
+`
+
+export function getGeminiPrometheusPrompt(): string {
+  return PROMETHEUS_GEMINI_SYSTEM_PROMPT
+}
--- a/src/agents/prometheus/index.ts
+++ b/src/agents/prometheus/index.ts
@@ -2,15 +2,5 @@ export {
  PROMETHEUS_SYSTEM_PROMPT,
  PROMETHEUS_PERMISSION,
  getPrometheusPrompt,
-  getPrometheusPromptSource,
 } from "./system-prompt"
 export type { PrometheusPromptSource } from "./system-prompt"
-export { PROMETHEUS_GPT_SYSTEM_PROMPT, getGptPrometheusPrompt } from "./gpt"
-
-// Re-export individual sections for granular access
-export { PROMETHEUS_IDENTITY_CONSTRAINTS } from "./identity-constraints"
-export { PROMETHEUS_INTERVIEW_MODE } from "./interview-mode"
-export { PROMETHEUS_PLAN_GENERATION } from "./plan-generation"
-export { PROMETHEUS_HIGH_ACCURACY_MODE } from "./high-accuracy-mode"
-export { PROMETHEUS_PLAN_TEMPLATE } from "./plan-template"
-export { PROMETHEUS_BEHAVIORAL_SUMMARY } from "./behavioral-summary"
--- a/src/agents/prometheus/system-prompt.ts
+++ b/src/agents/prometheus/system-prompt.ts
@@ -5,7 +5,8 @@ import { PROMETHEUS_HIGH_ACCURACY_MODE } from "./high-accuracy-mode"
 import { PROMETHEUS_PLAN_TEMPLATE } from "./plan-template"
 import { PROMETHEUS_BEHAVIORAL_SUMMARY } from "./behavioral-summary"
 import { getGptPrometheusPrompt } from "./gpt"
-import { isGptModel } from "../types"
+import { getGeminiPrometheusPrompt } from "./gemini"
+import { isGptModel, isGeminiModel } from "../types"

 /**
 * Combined Prometheus system prompt (Claude-optimized, default).
@@ -30,7 +31,7 @@ export const PROMETHEUS_PERMISSION = {
  question: "allow" as const,
 }

-export type PrometheusPromptSource = "default" | "gpt"
+export type PrometheusPromptSource = "default" | "gpt" | "gemini"

 /**
 * Determines which Prometheus prompt to use based on model.
@@ -39,12 +40,16 @@ export function getPrometheusPromptSource(model?: string): PrometheusPromptSourc
  if (model && isGptModel(model)) {
    return "gpt"
  }
+  if (model && isGeminiModel(model)) {
+    return "gemini"
+  }
  return "default"
 }

 /**
 * Gets the appropriate Prometheus prompt based on model.
 * GPT models → GPT-5.2 optimized prompt (XML-tagged, principle-driven)
+ * Gemini models → Gemini-optimized prompt (aggressive tool-call enforcement, thinking checkpoints)
 * Default (Claude, etc.) → Claude-optimized prompt (modular sections)
 */
 export function getPrometheusPrompt(model?: string): string {
@@ -53,6 +58,8 @@ export function getPrometheusPrompt(model?: string): string {
  switch (source) {
    case "gpt":
      return getGptPrometheusPrompt()
+    case "gemini":
+      return getGeminiPrometheusPrompt()
    case "default":
    default:
      return PROMETHEUS_SYSTEM_PROMPT
--- a/src/agents/sisyphus-gemini-overlays.ts
+++ b/src/agents/sisyphus-gemini-overlays.ts
@@ -0,0 +1,117 @@
+/**
+ * Gemini-specific overlay sections for Sisyphus prompt.
+ *
+ * Gemini models are aggressively optimistic and tend to:
+ * - Skip tool calls in favor of internal reasoning
+ * - Avoid delegation, preferring to do work themselves
+ * - Claim completion without verification
+ * - Interpret constraints as suggestions
+ * - Skip intent classification gates (jump straight to action)
+ * - Conflate investigation with implementation ("look into X" → starts coding)
+ *
+ * These overlays inject corrective sections at strategic points
+ * in the dynamic Sisyphus prompt to counter these tendencies.
+ */
+
+export function buildGeminiToolMandate(): string {
+  return `<TOOL_CALL_MANDATE>
+## YOU MUST USE TOOLS. THIS IS NOT OPTIONAL.
+
+**The user expects you to ACT using tools, not REASON internally.** Every response to a task MUST contain tool_use blocks. A response without tool calls is a FAILED response.
+
+**YOUR FAILURE MODE**: You believe you can reason through problems without calling tools. You CANNOT. Your internal reasoning about file contents, codebase patterns, and implementation correctness is UNRELIABLE. The ONLY reliable information comes from actual tool calls.
+
+**RULES (VIOLATION = BROKEN RESPONSE):**
+
+1. **NEVER answer a question about code without reading the actual files first.** Your memory of files you "recently read" decays rapidly. Read them AGAIN.
+2. **NEVER claim a task is done without running \`lsp_diagnostics\`.** Your confidence that "this should work" is WRONG more often than right.
+3. **NEVER skip delegation because you think you can do it faster yourself.** You CANNOT. Specialists with domain-specific skills produce better results. USE THEM.
+4. **NEVER reason about what a file "probably contains."** READ IT. Tool calls are cheap. Wrong answers are expensive.
+5. **NEVER produce a response that contains ZERO tool calls when the user asked you to DO something.** Thinking is not doing.
+
+**THINK ABOUT WHICH TOOLS TO USE:**
+Before responding, enumerate in your head:
+- What tools do I need to call to fulfill this request?
+- What information am I assuming that I should verify with a tool call?
+- Am I about to skip a tool call because I "already know" the answer?
+
+Then ACTUALLY CALL those tools using the JSON tool schema. Produce the tool_use blocks. Execute.
+</TOOL_CALL_MANDATE>`;
+}
+
+export function buildGeminiDelegationOverride(): string {
+  return `<GEMINI_DELEGATION_OVERRIDE>
+## DELEGATION IS MANDATORY — YOU ARE NOT AN IMPLEMENTER
+
+**You have a strong tendency to do work yourself. RESIST THIS.**
+
+You are an ORCHESTRATOR. When you implement code directly instead of delegating, the result is measurably worse than when a specialized subagent does it. This is not opinion — subagents have domain-specific configurations, loaded skills, and tuned prompts that you lack.
+
+**EVERY TIME you are about to write code or make changes directly:**
+→ STOP. Ask: "Is there a category + skills combination for this?"
+→ If YES (almost always): delegate via \`task()\`
+→ If NO (extremely rare): proceed, but this should happen less than 5% of the time
+
+**The user chose an orchestrator model specifically because they want delegation and parallel execution. If you do work yourself, you are failing your purpose.**
+</GEMINI_DELEGATION_OVERRIDE>`;
+}
+
+export function buildGeminiVerificationOverride(): string {
+  return `<GEMINI_VERIFICATION_OVERRIDE>
+## YOUR SELF-ASSESSMENT IS UNRELIABLE — VERIFY WITH TOOLS
+
+**When you believe something is "done" or "correct" — you are probably wrong.**
+
+Your internal confidence estimator is miscalibrated toward optimism. What feels like 95% confidence corresponds to roughly 60% actual correctness. This is a known characteristic, not an insult.
+
+**MANDATORY**: Replace internal confidence with external verification:
+
+| Your Feeling | Reality | Required Action |
+| "This should work" | ~60% chance it works | Run \`lsp_diagnostics\` NOW |
+| "I'm sure this file exists" | ~70% chance | Use \`glob\` to verify NOW |
+| "The subagent did it right" | ~50% chance | Read EVERY changed file NOW |
+| "No need to check this" | You DEFINITELY need to | Check it NOW |
+
+**BEFORE claiming ANY task is complete:**
+1. Run \`lsp_diagnostics\` on ALL changed files — ACTUALLY clean, not "probably clean"
+2. If tests exist, run them — ACTUALLY pass, not "they should pass"
+3. Read the output of every command — ACTUALLY read, not skim
+4. If you delegated, read EVERY file the subagent touched — not trust their claims
+</GEMINI_VERIFICATION_OVERRIDE>`;
+}
+
+export function buildGeminiIntentGateEnforcement(): string {
+  return `<GEMINI_INTENT_GATE_ENFORCEMENT>
+## YOU MUST CLASSIFY INTENT BEFORE ACTING. NO EXCEPTIONS.
+
+**Your failure mode: You skip intent classification and jump straight to implementation.**
+
+You see a user message and your instinct is to immediately start working. WRONG. You MUST first determine WHAT KIND of work the user wants. Getting this wrong wastes everything that follows.
+
+**MANDATORY FIRST OUTPUT — before ANY tool call or action:**
+
+\`\`\`
+I detect [TYPE] intent — [REASON].
+My approach: [ROUTING DECISION].
+\`\`\`
+
+Where TYPE is one of: research | implementation | investigation | evaluation | fix | open-ended
+
+**SELF-CHECK (answer honestly before proceeding):**
+
+1. Did the user EXPLICITLY ask me to implement/build/create something? → If NO, do NOT implement.
+2. Did the user say "look into", "check", "investigate", "explain"? → That means RESEARCH, not implementation.
+3. Did the user ask "what do you think?" → That means EVALUATION — propose and WAIT, do not execute.
+4. Did the user report an error? → That means MINIMAL FIX, not refactoring.
+
+**COMMON MISTAKES YOU MAKE (AND MUST NOT):**
+
+| User Says | You Want To Do | You MUST Do |
+| "explain how X works" | Start modifying X | Research X, explain it, STOP |
+| "look into this bug" | Fix the bug immediately | Investigate, report findings, WAIT for go-ahead |
+| "what do you think about approach X?" | Implement approach X | Evaluate X, propose alternatives, WAIT |
+| "improve the tests" | Rewrite all tests | Assess current tests FIRST, propose approach, THEN implement |
+
+**IF YOU SKIPPED THE INTENT CLASSIFICATION ABOVE:** STOP. Go back. Do it now. Your next tool call is INVALID without it.
+</GEMINI_INTENT_GATE_ENFORCEMENT>`;
+}
--- a/src/agents/sisyphus-junior/agent.ts
+++ b/src/agents/sisyphus-junior/agent.ts
@@ -6,12 +6,13 @@
 *
 * Routing:
 * 1. GPT models (openai/*, github-copilot/gpt-*) -> gpt.ts (GPT-5.2 optimized)
- * 2. Default (Claude, etc.) -> default.ts (Claude-optimized)
+ * 2. Gemini models (google/*, google-vertex/*) -> gemini.ts (Gemini-optimized)
+ * 3. Default (Claude, etc.) -> default.ts (Claude-optimized)
 */

 import type { AgentConfig } from "@opencode-ai/sdk"
 import type { AgentMode } from "../types"
-import { isGptModel } from "../types"
+import { isGptModel, isGeminiModel } from "../types"
 import type { AgentOverrideConfig } from "../../config/schema"
 import {
  createAgentToolRestrictions,
@@ -20,6 +21,7 @@ import {

 import { buildDefaultSisyphusJuniorPrompt } from "./default"
 import { buildGptSisyphusJuniorPrompt } from "./gpt"
+import { buildGeminiSisyphusJuniorPrompt } from "./gemini"

 const MODE: AgentMode = "subagent"

@@ -32,7 +34,7 @@ export const SISYPHUS_JUNIOR_DEFAULTS = {
  temperature: 0.1,
 } as const

-export type SisyphusJuniorPromptSource = "default" | "gpt"
+export type SisyphusJuniorPromptSource = "default" | "gpt" | "gemini"

 /**
 * Determines which Sisyphus-Junior prompt to use based on model.
@@ -41,6 +43,9 @@ export function getSisyphusJuniorPromptSource(model?: string): SisyphusJuniorPro
  if (model && isGptModel(model)) {
    return "gpt"
  }
+  if (model && isGeminiModel(model)) {
+    return "gemini"
+  }
  return "default"
 }

@@ -57,6 +62,8 @@ export function buildSisyphusJuniorPrompt(
  switch (source) {
    case "gpt":
      return buildGptSisyphusJuniorPrompt(useTaskSystem, promptAppend)
+    case "gemini":
+      return buildGeminiSisyphusJuniorPrompt(useTaskSystem, promptAppend)
    case "default":
    default:
      return buildDefaultSisyphusJuniorPrompt(useTaskSystem, promptAppend)
--- a/src/agents/sisyphus-junior/gemini.ts
+++ b/src/agents/sisyphus-junior/gemini.ts
@@ -0,0 +1,191 @@
+/**
+ * Gemini-optimized Sisyphus-Junior System Prompt
+ *
+ * Key differences from Claude/GPT variants:
+ * - Aggressive tool-call enforcement (Gemini skips tools in favor of reasoning)
+ * - Anti-optimism checkpoints (Gemini claims "done" prematurely)
+ * - Repeated verification mandates (Gemini treats verification as optional)
+ * - Stronger scope discipline (Gemini's creativity causes scope creep)
+ */
+
+import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri"
+
+export function buildGeminiSisyphusJuniorPrompt(
+  useTaskSystem: boolean,
+  promptAppend?: string
+): string {
+  const taskDiscipline = buildGeminiTaskDisciplineSection(useTaskSystem)
+  const verificationText = useTaskSystem
+    ? "All tasks marked completed"
+    : "All todos marked completed"
+
+  const prompt = `You are Sisyphus-Junior — a focused task executor from OhMyOpenCode.
+
+## Identity
+
+You execute tasks directly as a **Senior Engineer**. You do not guess. You verify. You do not stop early. You complete.
+
+**KEEP GOING. SOLVE PROBLEMS. ASK ONLY WHEN TRULY IMPOSSIBLE.**
+
+When blocked: try a different approach → decompose the problem → challenge assumptions → explore how others solved it.
+
+<TOOL_CALL_MANDATE>
+## YOU MUST USE TOOLS. THIS IS NOT OPTIONAL.
+
+**The user expects you to ACT using tools, not REASON internally.** Every response that requires action MUST contain tool_use blocks. A response without tool calls when action was needed is a FAILED response.
+
+**YOUR FAILURE MODE**: You believe you can figure things out without calling tools. You CANNOT. Your internal reasoning about file contents, codebase state, and implementation correctness is UNRELIABLE.
+
+**RULES (VIOLATION = FAILED RESPONSE):**
+1. **NEVER answer a question about code without reading the actual files first.** Read them. AGAIN.
+2. **NEVER claim a task is done without running \`lsp_diagnostics\`.** Your confidence that "this should work" is wrong more often than right.
+3. **NEVER reason about what a file "probably contains."** READ IT. Tool calls are cheap. Wrong answers are expensive.
+4. **NEVER produce a response with ZERO tool calls when the user asked you to DO something.** Thinking is not doing.
+
+Before responding, ask yourself: What tools do I need to call? What am I assuming that I should verify? Then ACTUALLY CALL those tools.
+</TOOL_CALL_MANDATE>
+
+### Do NOT Ask — Just Do
+
+**FORBIDDEN:**
+- "Should I proceed with X?" → JUST DO IT.
+- "Do you want me to run tests?" → RUN THEM.
+- "I noticed Y, should I fix it?" → FIX IT OR NOTE IN FINAL MESSAGE.
+- Stopping after partial implementation → 100% OR NOTHING.
+
+**CORRECT:**
+- Keep going until COMPLETELY done
+- Run verification (lint, tests, build) WITHOUT asking
+- Make decisions. Course-correct only on CONCRETE failure
+- Note assumptions in final message, not as questions mid-work
+- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — keep working while they search
+
+## Scope Discipline
+
+- Implement EXACTLY and ONLY what is requested
+- No extra features, no UX embellishments, no scope creep
+- If ambiguous, choose the simplest valid interpretation OR ask ONE precise question
+- Do NOT invent new requirements or expand task boundaries
+- **Your creativity is an asset for IMPLEMENTATION QUALITY, not for SCOPE EXPANSION**
+
+## Ambiguity Protocol (EXPLORE FIRST)
+
+- **Single valid interpretation** — Proceed immediately
+- **Missing info that MIGHT exist** — **EXPLORE FIRST** — use tools (grep, rg, file reads, explore agents) to find it
+- **Multiple plausible interpretations** — State your interpretation, proceed with simplest approach
+- **Truly impossible to proceed** — Ask ONE precise question (LAST RESORT)
+
+<tool_usage_rules>
+- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
+- Explore/Librarian via call_omo_agent = background research. Fire them and keep working
+- After any file edit: restate what changed, where, and what validation follows
+- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
+- ALWAYS use tools over internal knowledge for file contents, project state, and verification
+- **DO NOT SKIP tool calls because you think you already know the answer. You DON'T.**
+</tool_usage_rules>
+
+${taskDiscipline}
+
+## Progress Updates
+
+**Report progress proactively — the user should always know what you're doing and why.**
+
+When to update (MANDATORY):
+- **Before exploration**: "Checking the repo structure for [pattern]..."
+- **After discovery**: "Found the config in \`src/config/\`. The pattern uses factory functions."
+- **Before large edits**: "About to modify [files] — [what and why]."
+- **After edits**: "Updated [file] — [what changed]. Running verification."
+- **On blockers**: "Hit a snag with [issue] — trying [alternative] instead."
+
+Style:
+- A few sentences, friendly and concrete — explain in plain language so anyone can follow
+- Include at least one specific detail (file path, pattern found, decision made)
+- When explaining technical decisions, explain the WHY — not just what you did
+
+## Code Quality & Verification
+
+### Before Writing Code (MANDATORY)
+
+1. SEARCH existing codebase for similar patterns/styles
+2. Match naming, indentation, import styles, error handling conventions
+3. Default to ASCII. Add comments only for non-obvious blocks
+
+### After Implementation (MANDATORY — DO NOT SKIP)
+
+**THIS IS THE STEP YOU ARE MOST TEMPTED TO SKIP. DO NOT SKIP IT.**
+
+Your natural instinct is to implement something and immediately claim "done." RESIST THIS.
+Between implementation and completion, there is VERIFICATION. Every. Single. Time.
+
+1. **\`lsp_diagnostics\`** on ALL modified files — zero errors required. RUN IT, don't assume.
+2. **Run related tests** — pattern: modified \`foo.ts\` → look for \`foo.test.ts\`
+3. **Run typecheck** if TypeScript project
+4. **Run build** if applicable — exit code 0 required
+5. **Tell user** what you verified and the results — keep it clear and helpful
+
+- **Diagnostics**: Use lsp_diagnostics — ZERO errors on changed files
+- **Build**: Use Bash — Exit code 0 (if applicable)
+- **Tracking**: Use ${useTaskSystem ? "task_update" : "todowrite"} — ${verificationText}
+
+**No evidence = not complete. "I think it works" is NOT evidence. Tool output IS evidence.**
+
+<ANTI_OPTIMISM_CHECKPOINT>
+## BEFORE YOU CLAIM THIS TASK IS DONE, ANSWER THESE HONESTLY:
+
+1. Did I run \`lsp_diagnostics\` and see ZERO errors? (not "I'm sure there are none")
+2. Did I run the tests and see them PASS? (not "they should pass")
+3. Did I read the actual output of every command I ran? (not skim)
+4. Is EVERY requirement from the task actually implemented? (re-read the task spec NOW)
+
+If ANY answer is no → GO BACK AND DO IT. Do not claim completion.
+</ANTI_OPTIMISM_CHECKPOINT>
+
+## Output Contract
+
+<output_contract>
+**Format:**
+- Default: 3-6 sentences or ≤5 bullets
+- Simple yes/no: ≤2 sentences
+- Complex multi-file: 1 overview paragraph + ≤5 tagged bullets (What, Where, Risks, Next, Open)
+
+**Style:**
+- Start work immediately. Skip empty preambles ("I'm on it", "Let me...") — but DO send clear context before significant actions
+- Be friendly, clear, and easy to understand — explain so anyone can follow your reasoning
+- When explaining technical decisions, explain the WHY — not just the WHAT
+</output_contract>
+
+## Failure Recovery
+
+1. Fix root causes, not symptoms. Re-verify after EVERY attempt.
+2. If first approach fails → try alternative (different algorithm, pattern, library)
+3. After 3 DIFFERENT approaches fail → STOP and report what you tried clearly`
+
+  if (!promptAppend) return prompt
+  return prompt + "\n\n" + resolvePromptAppend(promptAppend)
+}
+
+function buildGeminiTaskDisciplineSection(useTaskSystem: boolean): string {
+  if (useTaskSystem) {
+    return `## Task Discipline (NON-NEGOTIABLE)
+
+**You WILL forget to track tasks if not forced. This section forces you.**
+
+- **2+ steps** — task_create FIRST, atomic breakdown. DO THIS BEFORE ANY IMPLEMENTATION.
+- **Starting step** — task_update(status="in_progress") — ONE at a time
+- **Completing step** — task_update(status="completed") IMMEDIATELY after verification passes
+- **Batching** — NEVER batch completions. Mark EACH task individually.
+
+No tasks on multi-step work = INCOMPLETE WORK. The user tracks your progress through tasks.`
+  }
+
+  return `## Todo Discipline (NON-NEGOTIABLE)
+
+**You WILL forget to track todos if not forced. This section forces you.**
+
+- **2+ steps** — todowrite FIRST, atomic breakdown. DO THIS BEFORE ANY IMPLEMENTATION.
+- **Starting step** — Mark in_progress — ONE at a time
+- **Completing step** — Mark completed IMMEDIATELY after verification passes
+- **Batching** — NEVER batch completions. Mark EACH todo individually.
+
+No todos on multi-step work = INCOMPLETE WORK. The user tracks your progress through todos.`
+}
--- a/src/agents/sisyphus-junior/index.ts
+++ b/src/agents/sisyphus-junior/index.ts
@@ -1,5 +1,6 @@
 export { buildDefaultSisyphusJuniorPrompt } from "./default"
 export { buildGptSisyphusJuniorPrompt } from "./gpt"
+export { buildGeminiSisyphusJuniorPrompt } from "./gemini"

 export {
  SISYPHUS_JUNIOR_DEFAULTS,
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -1,6 +1,12 @@
 import type { AgentConfig } from "@opencode-ai/sdk";
 import type { AgentMode, AgentPromptMetadata } from "./types";
-import { isGptModel } from "./types";
+import { isGptModel, isGeminiModel } from "./types";
+import {
+  buildGeminiToolMandate,
+  buildGeminiDelegationOverride,
+  buildGeminiVerificationOverride,
+  buildGeminiIntentGateEnforcement,
+} from "./sisyphus-gemini-overlays";

 const MODE: AgentMode = "primary";
 export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = {
@@ -25,6 +31,7 @@ import {
  buildOracleSection,
  buildHardBlocksSection,
  buildAntiPatternsSection,
+  buildDeepParallelSection,
  categorizeTools,
 } from "./dynamic-agent-prompt-builder";

@@ -139,6 +146,7 @@ Should I proceed with [recommendation], or would you prefer differently?
 }

 function buildDynamicSisyphusPrompt(
+  model: string,
  availableAgents: AvailableAgent[],
  availableTools: AvailableTool[] = [],
  availableSkills: AvailableSkill[] = [],
@@ -161,6 +169,7 @@ function buildDynamicSisyphusPrompt(
  const oracleSection = buildOracleSection(availableAgents);
  const hardBlocks = buildHardBlocksSection();
  const antiPatterns = buildAntiPatternsSection();
+  const deepParallelSection = buildDeepParallelSection(model, availableCategories);
  const taskManagementSection = buildTaskManagementSection(useTaskSystem);
  const todoHookNote = useTaskSystem
    ? "YOUR TASK CREATION WOULD BE TRACKED BY HOOK([SYSTEM REMINDER - TASK CONTINUATION])"
@@ -356,6 +365,8 @@ STOP searching when:

 ${categorySkillsGuide}

+${deepParallelSection}
+
 ${delegationTable}

 ### Delegation Prompt Structure (MANDATORY - ALL 6 sections):
@@ -543,15 +554,25 @@ export function createSisyphusAgent(
  const tools = availableToolNames ? categorizeTools(availableToolNames) : [];
  const skills = availableSkills ?? [];
  const categories = availableCategories ?? [];
-  const prompt = availableAgents
+  let prompt = availableAgents
    ? buildDynamicSisyphusPrompt(
+        model,
        availableAgents,
        tools,
        skills,
        categories,
        useTaskSystem,
      )
-    : buildDynamicSisyphusPrompt([], tools, skills, categories, useTaskSystem);
+    : buildDynamicSisyphusPrompt(model, [], tools, skills, categories, useTaskSystem);
+
+  if (isGeminiModel(model)) {
+    prompt = prompt.replace(
+      "</intent_verbalization>",
+      `</intent_verbalization>\n\n${buildGeminiIntentGateEnforcement()}\n\n${buildGeminiToolMandate()}`
+    );
+    prompt += "\n" + buildGeminiDelegationOverride();
+    prompt += "\n" + buildGeminiVerificationOverride();
+  }

  const permission = {
    question: "allow",
--- a/src/agents/types.test.ts
+++ b/src/agents/types.test.ts
@@ -1,5 +1,5 @@
 import { describe, test, expect } from "bun:test";
-import { isGptModel } from "./types";
+import { isGptModel, isGeminiModel } from "./types";

 describe("isGptModel", () => {
  test("standard openai provider models", () => {
@@ -47,3 +47,47 @@ describe("isGptModel", () => {
    expect(isGptModel("opencode/claude-opus-4-6")).toBe(false);
  });
 });
+
+describe("isGeminiModel", () => {
+  test("#given google provider models #then returns true", () => {
+    expect(isGeminiModel("google/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("google/gemini-3-flash")).toBe(true);
+    expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true);
+  });
+
+  test("#given google-vertex provider models #then returns true", () => {
+    expect(isGeminiModel("google-vertex/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true);
+  });
+
+  test("#given github copilot gemini models #then returns true", () => {
+    expect(isGeminiModel("github-copilot/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true);
+  });
+
+  test("#given litellm proxied gemini models #then returns true", () => {
+    expect(isGeminiModel("litellm/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true);
+    expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true);
+  });
+
+  test("#given other proxied gemini models #then returns true", () => {
+    expect(isGeminiModel("custom-provider/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true);
+  });
+
+  test("#given gpt models #then returns false", () => {
+    expect(isGeminiModel("openai/gpt-5.2")).toBe(false);
+    expect(isGeminiModel("openai/o3-mini")).toBe(false);
+    expect(isGeminiModel("litellm/gpt-4o")).toBe(false);
+  });
+
+  test("#given claude models #then returns false", () => {
+    expect(isGeminiModel("anthropic/claude-opus-4-6")).toBe(false);
+    expect(isGeminiModel("anthropic/claude-sonnet-4-6")).toBe(false);
+  });
+
+  test("#given opencode provider #then returns false", () => {
+    expect(isGeminiModel("opencode/claude-opus-4-6")).toBe(false);
+  });
+});
--- a/src/agents/types.ts
+++ b/src/agents/types.ts
@@ -80,6 +80,19 @@ export function isGptModel(model: string): boolean {
  return GPT_MODEL_PREFIXES.some((prefix) => modelName.startsWith(prefix))
 }

+const GEMINI_PROVIDERS = ["google/", "google-vertex/"]
+
+export function isGeminiModel(model: string): boolean {
+  if (GEMINI_PROVIDERS.some((prefix) => model.startsWith(prefix)))
+    return true
+
+  if (model.startsWith("github-copilot/") && extractModelName(model).toLowerCase().startsWith("gemini"))
+    return true
+
+  const modelName = extractModelName(model).toLowerCase()
+  return modelName.startsWith("gemini-")
+}
+
 export type BuiltinAgentName =
  | "sisyphus"
  | "hephaestus"
--- a/src/cli/doctor/formatter.test.ts
+++ b/src/cli/doctor/formatter.test.ts
@@ -1,4 +1,5 @@
-import { afterEach, describe, expect, it, mock } from "bun:test"
+import { describe, expect, it } from "bun:test"
+import { stripAnsi } from "./format-shared"
 import type { DoctorResult } from "./types"

 function createDoctorResult(): DoctorResult {
@@ -39,78 +40,122 @@ function createDoctorResult(): DoctorResult {
  }
 }

-describe("formatter", () => {
-  afterEach(() => {
-    mock.restore()
+function createDoctorResultWithIssues(): DoctorResult {
+  const base = createDoctorResult()
+  base.results[1].issues = [
+    { title: "Config issue", description: "Bad config", severity: "error" as const, fix: "Fix it" },
+    { title: "Tool warning", description: "Missing tool", severity: "warning" as const },
+  ]
+  base.summary.failed = 1
+  base.summary.warnings = 1
+  return base
+}
+
+describe("formatDoctorOutput", () => {
+  describe("#given default mode", () => {
+    it("shows System OK when no issues", async () => {
+      //#given
+      const result = createDoctorResult()
+      const { formatDoctorOutput } = await import(`./formatter?default-ok-${Date.now()}`)
+
+      //#when
+      const output = stripAnsi(formatDoctorOutput(result, "default"))
+
+      //#then
+      expect(output).toContain("System OK (opencode 1.0.200 · oh-my-opencode 3.4.0)")
+    })
+
+    it("shows issue count and details when issues exist", async () => {
+      //#given
+      const result = createDoctorResultWithIssues()
+      const { formatDoctorOutput } = await import(`./formatter?default-issues-${Date.now()}`)
+
+      //#when
+      const output = stripAnsi(formatDoctorOutput(result, "default"))
+
+      //#then
+      expect(output).toContain("issues found:")
+      expect(output).toContain("1. Config issue")
+      expect(output).toContain("2. Tool warning")
+    })
  })

-  describe("formatDoctorOutput", () => {
-    it("dispatches to default formatter for default mode", async () => {
+  describe("#given status mode", () => {
+    it("renders system version line", async () => {
      //#given
-      const formatDefaultMock = mock(() => "default-output")
-      const formatStatusMock = mock(() => "status-output")
-      const formatVerboseMock = mock(() => "verbose-output")
-      mock.module("./format-default", () => ({ formatDefault: formatDefaultMock }))
-      mock.module("./format-status", () => ({ formatStatus: formatStatusMock }))
-      mock.module("./format-verbose", () => ({ formatVerbose: formatVerboseMock }))
-      const { formatDoctorOutput } = await import(`./formatter?default=${Date.now()}`)
+      const result = createDoctorResult()
+      const { formatDoctorOutput } = await import(`./formatter?status-ver-${Date.now()}`)

      //#when
-      const output = formatDoctorOutput(createDoctorResult(), "default")
+      const output = stripAnsi(formatDoctorOutput(result, "status"))

      //#then
-      expect(output).toBe("default-output")
-      expect(formatDefaultMock).toHaveBeenCalledTimes(1)
-      expect(formatStatusMock).toHaveBeenCalledTimes(0)
-      expect(formatVerboseMock).toHaveBeenCalledTimes(0)
+      expect(output).toContain("1.0.200 · 3.4.0 · Bun 1.2.0")
    })

-    it("dispatches to status formatter for status mode", async () => {
+    it("renders tool and MCP info", async () => {
      //#given
-      const formatDefaultMock = mock(() => "default-output")
-      const formatStatusMock = mock(() => "status-output")
-      const formatVerboseMock = mock(() => "verbose-output")
-      mock.module("./format-default", () => ({ formatDefault: formatDefaultMock }))
-      mock.module("./format-status", () => ({ formatStatus: formatStatusMock }))
-      mock.module("./format-verbose", () => ({ formatVerbose: formatVerboseMock }))
-      const { formatDoctorOutput } = await import(`./formatter?status=${Date.now()}`)
+      const result = createDoctorResult()
+      const { formatDoctorOutput } = await import(`./formatter?status-tools-${Date.now()}`)

      //#when
-      const output = formatDoctorOutput(createDoctorResult(), "status")
+      const output = stripAnsi(formatDoctorOutput(result, "status"))

      //#then
-      expect(output).toBe("status-output")
-      expect(formatDefaultMock).toHaveBeenCalledTimes(0)
-      expect(formatStatusMock).toHaveBeenCalledTimes(1)
-      expect(formatVerboseMock).toHaveBeenCalledTimes(0)
+      expect(output).toContain("LSP 2/4")
+      expect(output).toContain("context7")
+    })
+  })
+
+  describe("#given verbose mode", () => {
+    it("includes all section headers", async () => {
+      //#given
+      const result = createDoctorResult()
+      const { formatDoctorOutput } = await import(`./formatter?verbose-headers-${Date.now()}`)
+
+      //#when
+      const output = stripAnsi(formatDoctorOutput(result, "verbose"))
+
+      //#then
+      expect(output).toContain("System Information")
+      expect(output).toContain("Configuration")
+      expect(output).toContain("Tools")
+      expect(output).toContain("MCPs")
+      expect(output).toContain("Summary")
    })

-    it("dispatches to verbose formatter for verbose mode", async () => {
+    it("shows check summary counts", async () => {
      //#given
-      const formatDefaultMock = mock(() => "default-output")
-      const formatStatusMock = mock(() => "status-output")
-      const formatVerboseMock = mock(() => "verbose-output")
-      mock.module("./format-default", () => ({ formatDefault: formatDefaultMock }))
-      mock.module("./format-status", () => ({ formatStatus: formatStatusMock }))
-      mock.module("./format-verbose", () => ({ formatVerbose: formatVerboseMock }))
-      const { formatDoctorOutput } = await import(`./formatter?verbose=${Date.now()}`)
+      const result = createDoctorResult()
+      const { formatDoctorOutput } = await import(`./formatter?verbose-summary-${Date.now()}`)

      //#when
-      const output = formatDoctorOutput(createDoctorResult(), "verbose")
+      const output = stripAnsi(formatDoctorOutput(result, "verbose"))

      //#then
-      expect(output).toBe("verbose-output")
-      expect(formatDefaultMock).toHaveBeenCalledTimes(0)
-      expect(formatStatusMock).toHaveBeenCalledTimes(0)
-      expect(formatVerboseMock).toHaveBeenCalledTimes(1)
+      expect(output).toContain("1 passed")
+      expect(output).toContain("0 failed")
+      expect(output).toContain("1 warnings")
    })
  })

  describe("formatJsonOutput", () => {
-    it("returns valid JSON payload", async () => {
+    it("returns valid JSON", async () => {
      //#given
-      const { formatJsonOutput } = await import(`./formatter?json=${Date.now()}`)
      const result = createDoctorResult()
+      const { formatJsonOutput } = await import(`./formatter?json-valid-${Date.now()}`)
+
+      //#when
+      const output = formatJsonOutput(result)
+
+      //#then
+      expect(() => JSON.parse(output)).not.toThrow()
+    })
+
+    it("preserves all result fields", async () => {
+      //#given
+      const result = createDoctorResult()
+      const { formatJsonOutput } = await import(`./formatter?json-fields-${Date.now()}`)

      //#when
      const output = formatJsonOutput(result)
@@ -119,7 +164,6 @@ describe("formatter", () => {
      //#then
      expect(parsed.summary.total).toBe(2)
      expect(parsed.systemInfo.pluginVersion).toBe("3.4.0")
-      expect(parsed.tools.ghCli.username).toBe("yeongyu")
      expect(parsed.exitCode).toBe(0)
    })
  })
--- a/src/cli/run/session-resolver.ts
+++ b/src/cli/run/session-resolver.ts
@@ -31,7 +31,7 @@ export async function resolveSession(options: {
        permission: [
          { permission: "question", action: "deny" as const, pattern: "*" },
        ],
-      } as any,
+      } as Record<string, unknown>,
      query: { directory },
    })

--- a/src/config/index.ts
+++ b/src/config/index.ts
@@ -1,18 +1,5 @@
 export {
  OhMyOpenCodeConfigSchema,
-  AgentOverrideConfigSchema,
-  AgentOverridesSchema,
-  McpNameSchema,
-  AgentNameSchema,
-  HookNameSchema,
-  BuiltinCommandNameSchema,
-  SisyphusAgentConfigSchema,
-  ExperimentalConfigSchema,
-  RalphLoopConfigSchema,
-  TmuxConfigSchema,
-  TmuxLayoutSchema,
-  RuntimeFallbackConfigSchema,
-  FallbackModelsSchema,
 } from "./schema"

 export type {
--- a/src/config/schema/agent-overrides.ts
+++ b/src/config/schema/agent-overrides.ts
@@ -47,6 +47,12 @@ export const AgentOverrideConfigSchema = z.object({
      variant: z.string().optional(),
    })
    .optional(),
+  compaction: z
+    .object({
+      model: z.string().optional(),
+      variant: z.string().optional(),
+    })
+    .optional(),
 })

 export const AgentOverridesSchema = z.object({
--- a/src/config/schema/oh-my-opencode-config.ts
+++ b/src/config/schema/oh-my-opencode-config.ts
@@ -35,6 +35,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
  disabled_tools: z.array(z.string()).optional(),
  /** Enable hashline_edit tool/hook integrations (default: true at call site) */
  hashline_edit: z.boolean().optional(),
+  /** Enable model fallback on API errors (default: false). Set to true to enable automatic model switching when model errors occur. */
+  model_fallback: z.boolean().optional(),
  agents: AgentOverridesSchema.optional(),
  categories: CategoriesConfigSchema.optional(),
  claude_code: ClaudeCodeConfigSchema.optional(),
--- a/src/features/background-agent/background-task-completer.ts
+++ b/src/features/background-agent/background-task-completer.ts
@@ -1,40 +0,0 @@
-import type { BackgroundTask } from "./types"
-import type { ResultHandlerContext } from "./result-handler-context"
-import { log } from "../../shared"
-import { notifyParentSession } from "./parent-session-notifier"
-
-export async function tryCompleteTask(
-  task: BackgroundTask,
-  source: string,
-  ctx: ResultHandlerContext
-): Promise<boolean> {
-  const { concurrencyManager, state } = ctx
-
-  if (task.status !== "running") {
-    log("[background-agent] Task already completed, skipping:", {
-      taskId: task.id,
-      status: task.status,
-      source,
-    })
-    return false
-  }
-
-  task.status = "completed"
-  task.completedAt = new Date()
-
-  if (task.concurrencyKey) {
-    concurrencyManager.release(task.concurrencyKey)
-    task.concurrencyKey = undefined
-  }
-
-  state.markForNotification(task)
-
-  try {
-    await notifyParentSession(task, ctx)
-    log(`[background-agent] Task completed via ${source}:`, task.id)
-  } catch (error) {
-    log("[background-agent] Error in notifyParentSession:", { taskId: task.id, error })
-  }
-
-  return true
-}
--- a/src/features/background-agent/compaction-aware-message-resolver.test.ts
+++ b/src/features/background-agent/compaction-aware-message-resolver.test.ts
@@ -0,0 +1,190 @@
+import { describe, test, expect, beforeEach, afterEach } from "bun:test"
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs"
+import { join } from "node:path"
+import { tmpdir } from "node:os"
+import { isCompactionAgent, findNearestMessageExcludingCompaction } from "./compaction-aware-message-resolver"
+
+describe("isCompactionAgent", () => {
+  describe("#given agent name variations", () => {
+    test("returns true for 'compaction'", () => {
+      // when
+      const result = isCompactionAgent("compaction")
+
+      // then
+      expect(result).toBe(true)
+    })
+
+    test("returns true for 'Compaction' (case insensitive)", () => {
+      // when
+      const result = isCompactionAgent("Compaction")
+
+      // then
+      expect(result).toBe(true)
+    })
+
+    test("returns true for ' compaction ' (with whitespace)", () => {
+      // when
+      const result = isCompactionAgent(" compaction ")
+
+      // then
+      expect(result).toBe(true)
+    })
+
+    test("returns false for undefined", () => {
+      // when
+      const result = isCompactionAgent(undefined)
+
+      // then
+      expect(result).toBe(false)
+    })
+
+    test("returns false for null", () => {
+      // when
+      const result = isCompactionAgent(null as unknown as string)
+
+      // then
+      expect(result).toBe(false)
+    })
+
+    test("returns false for non-compaction agent like 'sisyphus'", () => {
+      // when
+      const result = isCompactionAgent("sisyphus")
+
+      // then
+      expect(result).toBe(false)
+    })
+  })
+})
+
+describe("findNearestMessageExcludingCompaction", () => {
+  let tempDir: string
+
+  beforeEach(() => {
+    tempDir = mkdtempSync(join(tmpdir(), "compaction-test-"))
+  })
+
+  afterEach(() => {
+    rmSync(tempDir, { force: true, recursive: true })
+  })
+
+  describe("#given directory with messages", () => {
+    test("finds message with full agent and model", () => {
+      // given
+      const message = {
+        agent: "sisyphus",
+        model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+      }
+      writeFileSync(join(tempDir, "001.json"), JSON.stringify(message))
+
+      // when
+      const result = findNearestMessageExcludingCompaction(tempDir)
+
+      // then
+      expect(result).not.toBeNull()
+      expect(result?.agent).toBe("sisyphus")
+      expect(result?.model?.providerID).toBe("anthropic")
+      expect(result?.model?.modelID).toBe("claude-opus-4-6")
+    })
+
+    test("skips compaction agent messages", () => {
+      // given
+      const compactionMessage = {
+        agent: "compaction",
+        model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+      }
+      const validMessage = {
+        agent: "sisyphus",
+        model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+      }
+      writeFileSync(join(tempDir, "002.json"), JSON.stringify(compactionMessage))
+      writeFileSync(join(tempDir, "001.json"), JSON.stringify(validMessage))
+
+      // when
+      const result = findNearestMessageExcludingCompaction(tempDir)
+
+      // then
+      expect(result).not.toBeNull()
+      expect(result?.agent).toBe("sisyphus")
+    })
+
+    test("falls back to partial agent/model match", () => {
+      // given
+      const messageWithAgentOnly = {
+        agent: "hephaestus",
+      }
+      const messageWithModelOnly = {
+        model: { providerID: "openai", modelID: "gpt-5.3" },
+      }
+      writeFileSync(join(tempDir, "001.json"), JSON.stringify(messageWithModelOnly))
+      writeFileSync(join(tempDir, "002.json"), JSON.stringify(messageWithAgentOnly))
+
+      // when
+      const result = findNearestMessageExcludingCompaction(tempDir)
+
+      // then
+      expect(result).not.toBeNull()
+      // Should find the one with agent first (sorted reverse, so 002 is checked first)
+      expect(result?.agent).toBe("hephaestus")
+    })
+
+    test("returns null for empty directory", () => {
+      // given - empty directory (tempDir is already empty)
+
+      // when
+      const result = findNearestMessageExcludingCompaction(tempDir)
+
+      // then
+      expect(result).toBeNull()
+    })
+
+    test("returns null for non-existent directory", () => {
+      // given
+      const nonExistentDir = join(tmpdir(), "non-existent-dir-12345")
+
+      // when
+      const result = findNearestMessageExcludingCompaction(nonExistentDir)
+
+      // then
+      expect(result).toBeNull()
+    })
+
+    test("skips invalid JSON files and finds valid message", () => {
+      // given
+      const invalidJson = "{ invalid json"
+      const validMessage = {
+        agent: "oracle",
+        model: { providerID: "google", modelID: "gemini-2-flash" },
+      }
+      writeFileSync(join(tempDir, "002.json"), invalidJson)
+      writeFileSync(join(tempDir, "001.json"), JSON.stringify(validMessage))
+
+      // when
+      const result = findNearestMessageExcludingCompaction(tempDir)
+
+      // then
+      expect(result).not.toBeNull()
+      expect(result?.agent).toBe("oracle")
+    })
+
+    test("finds newest valid message (sorted by filename reverse)", () => {
+      // given
+      const olderMessage = {
+        agent: "older",
+        model: { providerID: "a", modelID: "b" },
+      }
+      const newerMessage = {
+        agent: "newer",
+        model: { providerID: "c", modelID: "d" },
+      }
+      writeFileSync(join(tempDir, "001.json"), JSON.stringify(olderMessage))
+      writeFileSync(join(tempDir, "010.json"), JSON.stringify(newerMessage))
+
+      // when
+      const result = findNearestMessageExcludingCompaction(tempDir)
+
+      // then
+      expect(result).not.toBeNull()
+      expect(result?.agent).toBe("newer")
+    })
+  })
+})
--- a/src/features/background-agent/compaction-aware-message-resolver.ts
+++ b/src/features/background-agent/compaction-aware-message-resolver.ts
@@ -0,0 +1,57 @@
+import { readdirSync, readFileSync } from "node:fs"
+import { join } from "node:path"
+import type { StoredMessage } from "../hook-message-injector"
+
+export function isCompactionAgent(agent: string | undefined): boolean {
+  return agent?.trim().toLowerCase() === "compaction"
+}
+
+function hasFullAgentAndModel(message: StoredMessage): boolean {
+  return !!message.agent &&
+    !isCompactionAgent(message.agent) &&
+    !!message.model?.providerID &&
+    !!message.model?.modelID
+}
+
+function hasPartialAgentOrModel(message: StoredMessage): boolean {
+  const hasAgent = !!message.agent && !isCompactionAgent(message.agent)
+  const hasModel = !!message.model?.providerID && !!message.model?.modelID
+  return hasAgent || hasModel
+}
+
+export function findNearestMessageExcludingCompaction(messageDir: string): StoredMessage | null {
+  try {
+    const files = readdirSync(messageDir)
+      .filter((name) => name.endsWith(".json"))
+      .sort()
+      .reverse()
+
+    for (const file of files) {
+      try {
+        const content = readFileSync(join(messageDir, file), "utf-8")
+        const parsed = JSON.parse(content) as StoredMessage
+        if (hasFullAgentAndModel(parsed)) {
+          return parsed
+        }
+      } catch {
+        continue
+      }
+    }
+
+    for (const file of files) {
+      try {
+        const content = readFileSync(join(messageDir, file), "utf-8")
+        const parsed = JSON.parse(content) as StoredMessage
+        if (hasPartialAgentOrModel(parsed)) {
+          return parsed
+        }
+      } catch {
+        continue
+      }
+    }
+  } catch {
+    return null
+  }
+
+  return null
+}
--- a/src/features/background-agent/error-classifier.test.ts
+++ b/src/features/background-agent/error-classifier.test.ts
@@ -0,0 +1,351 @@
+import { describe, test, expect } from "bun:test"
+import {
+  isRecord,
+  isAbortedSessionError,
+  getErrorText,
+  extractErrorName,
+  extractErrorMessage,
+  getSessionErrorMessage,
+} from "./error-classifier"
+
+describe("isRecord", () => {
+  describe("#given null or primitive values", () => {
+    test("returns false for null", () => {
+      expect(isRecord(null)).toBe(false)
+    })
+
+    test("returns false for undefined", () => {
+      expect(isRecord(undefined)).toBe(false)
+    })
+
+    test("returns false for string", () => {
+      expect(isRecord("hello")).toBe(false)
+    })
+
+    test("returns false for number", () => {
+      expect(isRecord(42)).toBe(false)
+    })
+
+    test("returns false for boolean", () => {
+      expect(isRecord(true)).toBe(false)
+    })
+
+    test("returns true for array (arrays are objects)", () => {
+      expect(isRecord([1, 2, 3])).toBe(true)
+    })
+  })
+
+  describe("#given plain objects", () => {
+    test("returns true for empty object", () => {
+      expect(isRecord({})).toBe(true)
+    })
+
+    test("returns true for object with properties", () => {
+      expect(isRecord({ key: "value" })).toBe(true)
+    })
+
+    test("returns true for object with nested objects", () => {
+      expect(isRecord({ nested: { deep: true } })).toBe(true)
+    })
+  })
+
+  describe("#given Error instances", () => {
+    test("returns true for Error instance", () => {
+      expect(isRecord(new Error("test"))).toBe(true)
+    })
+
+    test("returns true for TypeError instance", () => {
+      expect(isRecord(new TypeError("test"))).toBe(true)
+    })
+  })
+})
+
+describe("isAbortedSessionError", () => {
+  describe("#given error with aborted message", () => {
+    test("returns true for string containing aborted", () => {
+      expect(isAbortedSessionError("Session aborted")).toBe(true)
+    })
+
+    test("returns true for string with ABORTED uppercase", () => {
+      expect(isAbortedSessionError("Session ABORTED")).toBe(true)
+    })
+
+    test("returns true for Error with aborted in message", () => {
+      expect(isAbortedSessionError(new Error("Session aborted"))).toBe(true)
+    })
+
+    test("returns true for object with message containing aborted", () => {
+      expect(isAbortedSessionError({ message: "The session was aborted" })).toBe(true)
+    })
+  })
+
+  describe("#given error without aborted message", () => {
+    test("returns false for string without aborted", () => {
+      expect(isAbortedSessionError("Session completed")).toBe(false)
+    })
+
+    test("returns false for Error without aborted", () => {
+      expect(isAbortedSessionError(new Error("Something went wrong"))).toBe(false)
+    })
+
+    test("returns false for empty string", () => {
+      expect(isAbortedSessionError("")).toBe(false)
+    })
+  })
+
+  describe("#given invalid inputs", () => {
+    test("returns false for null", () => {
+      expect(isAbortedSessionError(null)).toBe(false)
+    })
+
+    test("returns false for undefined", () => {
+      expect(isAbortedSessionError(undefined)).toBe(false)
+    })
+
+    test("returns false for object without message", () => {
+      expect(isAbortedSessionError({ code: "ABORTED" })).toBe(false)
+    })
+  })
+})
+
+describe("getErrorText", () => {
+  describe("#given string input", () => {
+    test("returns the string as-is", () => {
+      expect(getErrorText("Something went wrong")).toBe("Something went wrong")
+    })
+
+    test("returns empty string for empty string", () => {
+      expect(getErrorText("")).toBe("")
+    })
+  })
+
+  describe("#given Error instance", () => {
+    test("returns name and message format", () => {
+      expect(getErrorText(new Error("test message"))).toBe("Error: test message")
+    })
+
+    test("returns TypeError format", () => {
+      expect(getErrorText(new TypeError("type error"))).toBe("TypeError: type error")
+    })
+  })
+
+  describe("#given object with message property", () => {
+    test("returns message property as string", () => {
+      expect(getErrorText({ message: "custom error" })).toBe("custom error")
+    })
+
+    test("returns name property when message not available", () => {
+      expect(getErrorText({ name: "CustomError" })).toBe("CustomError")
+    })
+
+    test("prefers message over name", () => {
+      expect(getErrorText({ name: "CustomError", message: "error message" })).toBe("error message")
+    })
+  })
+
+  describe("#given invalid inputs", () => {
+    test("returns empty string for null", () => {
+      expect(getErrorText(null)).toBe("")
+    })
+
+    test("returns empty string for undefined", () => {
+      expect(getErrorText(undefined)).toBe("")
+    })
+
+    test("returns empty string for object without message or name", () => {
+      expect(getErrorText({ code: 500 })).toBe("")
+    })
+  })
+})
+
+describe("extractErrorName", () => {
+  describe("#given Error instance", () => {
+    test("returns Error for generic Error", () => {
+      expect(extractErrorName(new Error("test"))).toBe("Error")
+    })
+
+    test("returns TypeError name", () => {
+      expect(extractErrorName(new TypeError("test"))).toBe("TypeError")
+    })
+
+    test("returns RangeError name", () => {
+      expect(extractErrorName(new RangeError("test"))).toBe("RangeError")
+    })
+  })
+
+  describe("#given plain object with name property", () => {
+    test("returns name property when string", () => {
+      expect(extractErrorName({ name: "CustomError" })).toBe("CustomError")
+    })
+
+    test("returns undefined when name is not string", () => {
+      expect(extractErrorName({ name: 123 })).toBe(undefined)
+    })
+  })
+
+  describe("#given invalid inputs", () => {
+    test("returns undefined for null", () => {
+      expect(extractErrorName(null)).toBe(undefined)
+    })
+
+    test("returns undefined for undefined", () => {
+      expect(extractErrorName(undefined)).toBe(undefined)
+    })
+
+    test("returns undefined for string", () => {
+      expect(extractErrorName("Error message")).toBe(undefined)
+    })
+
+    test("returns undefined for object without name property", () => {
+      expect(extractErrorName({ message: "test" })).toBe(undefined)
+    })
+  })
+})
+
+describe("extractErrorMessage", () => {
+  describe("#given string input", () => {
+    test("returns the string as-is", () => {
+      expect(extractErrorMessage("error message")).toBe("error message")
+    })
+
+    test("returns undefined for empty string", () => {
+      expect(extractErrorMessage("")).toBe(undefined)
+    })
+  })
+
+  describe("#given Error instance", () => {
+    test("returns error message", () => {
+      expect(extractErrorMessage(new Error("test error"))).toBe("test error")
+    })
+
+    test("returns empty string for Error with no message", () => {
+      expect(extractErrorMessage(new Error())).toBe("")
+    })
+  })
+
+  describe("#given object with message property", () => {
+    test("returns message property", () => {
+      expect(extractErrorMessage({ message: "custom message" })).toBe("custom message")
+    })
+
+    test("falls through to JSON.stringify for empty message value", () => {
+      expect(extractErrorMessage({ message: "" })).toBe('{"message":""}')
+    })
+  })
+
+  describe("#given nested error structure", () => {
+    test("extracts message from nested error object", () => {
+      expect(extractErrorMessage({ error: { message: "nested error" } })).toBe("nested error")
+    })
+
+    test("extracts message from data.error structure", () => {
+      expect(extractErrorMessage({ data: { error: "data error" } })).toBe("data error")
+    })
+
+    test("extracts message from cause property", () => {
+      expect(extractErrorMessage({ cause: "cause error" })).toBe("cause error")
+    })
+
+    test("extracts message from cause object with message", () => {
+      expect(extractErrorMessage({ cause: { message: "cause message" } })).toBe("cause message")
+    })
+  })
+
+  describe("#given complex error with data wrapper", () => {
+    test("extracts from error.data.message", () => {
+      const error = {
+        data: {
+          message: "data message",
+        },
+      }
+      expect(extractErrorMessage(error)).toBe("data message")
+    })
+
+    test("prefers top over nested-level message", () => {
+      const error = {
+        message: "top level",
+        data: { message: "nested" },
+      }
+      expect(extractErrorMessage(error)).toBe("top level")
+    })
+  })
+
+  describe("#given invalid inputs", () => {
+    test("returns undefined for null", () => {
+      expect(extractErrorMessage(null)).toBe(undefined)
+    })
+
+    test("returns undefined for undefined", () => {
+      expect(extractErrorMessage(undefined)).toBe(undefined)
+    })
+  })
+
+  describe("#given object without extractable message", () => {
+    test("falls back to JSON.stringify for object", () => {
+      const obj = { code: 500, details: "error" }
+      const result = extractErrorMessage(obj)
+      expect(result).toContain('"code":500')
+    })
+
+    test("falls back to String() for non-serializable object", () => {
+      const circular: Record<string, unknown> = { a: 1 }
+      circular.self = circular
+      const result = extractErrorMessage(circular)
+      expect(result).toBe("[object Object]")
+    })
+  })
+})
+
+describe("getSessionErrorMessage", () => {
+  describe("#given valid error properties", () => {
+    test("extracts message from error.message", () => {
+      const properties = { error: { message: "session error" } }
+      expect(getSessionErrorMessage(properties)).toBe("session error")
+    })
+
+    test("extracts message from error.data.message", () => {
+      const properties = {
+        error: {
+          data: { message: "data error message" },
+        },
+      }
+      expect(getSessionErrorMessage(properties)).toBe("data error message")
+    })
+
+    test("prefers error.data.message over error.message", () => {
+      const properties = {
+        error: {
+          message: "top level",
+          data: { message: "nested" },
+        },
+      }
+      expect(getSessionErrorMessage(properties)).toBe("nested")
+    })
+  })
+
+  describe("#given missing or invalid properties", () => {
+    test("returns undefined when error is missing", () => {
+      expect(getSessionErrorMessage({})).toBe(undefined)
+    })
+
+    test("returns undefined when error is null", () => {
+      expect(getSessionErrorMessage({ error: null })).toBe(undefined)
+    })
+
+    test("returns undefined when error is string", () => {
+      expect(getSessionErrorMessage({ error: "error string" })).toBe(undefined)
+    })
+
+    test("returns undefined when data is not an object", () => {
+      expect(getSessionErrorMessage({ error: { data: "not an object" } })).toBe(undefined)
+    })
+
+    test("returns undefined when message is not string", () => {
+      expect(getSessionErrorMessage({ error: { message: 123 } })).toBe(undefined)
+    })
+
+    test("returns undefined when data.message is not string", () => {
+      expect(getSessionErrorMessage({ error: { data: { message: null } } })).toBe(undefined)
+    })
+  })
+})
--- a/src/features/background-agent/error-classifier.ts
+++ b/src/features/background-agent/error-classifier.ts
@@ -1,3 +1,7 @@
+export function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null
+}
+
 export function isAbortedSessionError(error: unknown): boolean {
  const message = getErrorText(error)
  return message.toLowerCase().includes("aborted")
@@ -19,3 +23,61 @@ export function getErrorText(error: unknown): string {
  }
  return ""
 }
+
+export function extractErrorName(error: unknown): string | undefined {
+  if (isRecord(error) && typeof error["name"] === "string") return error["name"]
+  if (error instanceof Error) return error.name
+  return undefined
+}
+
+export function extractErrorMessage(error: unknown): string | undefined {
+  if (!error) return undefined
+  if (typeof error === "string") return error
+  if (error instanceof Error) return error.message
+
+  if (isRecord(error)) {
+    const dataRaw = error["data"]
+    const candidates: unknown[] = [
+      error,
+      dataRaw,
+      error["error"],
+      isRecord(dataRaw) ? (dataRaw as Record<string, unknown>)["error"] : undefined,
+      error["cause"],
+    ]
+
+    for (const candidate of candidates) {
+      if (typeof candidate === "string" && candidate.length > 0) return candidate
+      if (
+        isRecord(candidate) &&
+        typeof candidate["message"] === "string" &&
+        candidate["message"].length > 0
+      ) {
+        return candidate["message"]
+      }
+    }
+  }
+
+  try {
+    return JSON.stringify(error)
+  } catch {
+    return String(error)
+  }
+}
+
+interface EventPropertiesLike {
+  [key: string]: unknown
+}
+
+export function getSessionErrorMessage(properties: EventPropertiesLike): string | undefined {
+  const errorRaw = properties["error"]
+  if (!isRecord(errorRaw)) return undefined
+
+  const dataRaw = errorRaw["data"]
+  if (isRecord(dataRaw)) {
+    const message = dataRaw["message"]
+    if (typeof message === "string") return message
+  }
+
+  const message = errorRaw["message"]
+  return typeof message === "string" ? message : undefined
+}
--- a/src/features/background-agent/fallback-retry-handler.test.ts
+++ b/src/features/background-agent/fallback-retry-handler.test.ts
@@ -0,0 +1,270 @@
+import { describe, test, expect, mock, beforeEach } from "bun:test"
+
+mock.module("../../shared", () => ({
+  log: mock(() => {}),
+  readConnectedProvidersCache: mock(() => null),
+  readProviderModelsCache: mock(() => null),
+}))
+
+mock.module("../../shared/model-error-classifier", () => ({
+  shouldRetryError: mock(() => true),
+  getNextFallback: mock((chain: Array<{ model: string }>, attempt: number) => chain[attempt]),
+  hasMoreFallbacks: mock((chain: Array<{ model: string }>, attempt: number) => attempt < chain.length),
+  selectFallbackProvider: mock((providers: string[]) => providers[0]),
+}))
+
+mock.module("../../shared/provider-model-id-transform", () => ({
+  transformModelForProvider: mock((_provider: string, model: string) => model),
+}))
+
+import { tryFallbackRetry } from "./fallback-retry-handler"
+import { shouldRetryError } from "../../shared/model-error-classifier"
+import type { BackgroundTask } from "./types"
+import type { ConcurrencyManager } from "./concurrency"
+
+function createMockTask(overrides: Partial<BackgroundTask> = {}): BackgroundTask {
+  return {
+    id: "test-task-1",
+    description: "test task",
+    prompt: "test prompt",
+    agent: "sisyphus-junior",
+    status: "error",
+    parentSessionID: "parent-session-1",
+    parentMessageID: "parent-message-1",
+    fallbackChain: [
+      { model: "fallback-model-1", providers: ["provider-a"], variant: undefined },
+      { model: "fallback-model-2", providers: ["provider-b"], variant: undefined },
+    ],
+    attemptCount: 0,
+    concurrencyKey: "provider-a/original-model",
+    model: { providerID: "provider-a", modelID: "original-model" },
+    ...overrides,
+  }
+}
+
+function createMockConcurrencyManager(): ConcurrencyManager {
+  return {
+    release: mock(() => {}),
+    acquire: mock(async () => {}),
+    getQueueLength: mock(() => 0),
+    getActiveCount: mock(() => 0),
+  } as unknown as ConcurrencyManager
+}
+
+function createMockClient() {
+  return {
+    session: {
+      abort: mock(async () => ({})),
+    },
+  } as any
+}
+
+function createDefaultArgs(taskOverrides: Partial<BackgroundTask> = {}) {
+  const processKeyFn = mock(() => {})
+  const queuesByKey = new Map<string, Array<{ task: BackgroundTask; input: any }>>()
+  const idleDeferralTimers = new Map<string, ReturnType<typeof setTimeout>>()
+  const concurrencyManager = createMockConcurrencyManager()
+  const client = createMockClient()
+  const task = createMockTask(taskOverrides)
+
+  return {
+    task,
+    errorInfo: { name: "OverloadedError", message: "model overloaded" },
+    source: "polling",
+    concurrencyManager,
+    client,
+    idleDeferralTimers,
+    queuesByKey,
+    processKey: processKeyFn,
+  }
+}
+
+describe("tryFallbackRetry", () => {
+  beforeEach(() => {
+    ;(shouldRetryError as any).mockImplementation(() => true)
+  })
+
+  describe("#given retryable error with fallback chain", () => {
+    test("returns true and enqueues retry", () => {
+      const args = createDefaultArgs()
+
+      const result = tryFallbackRetry(args)
+
+      expect(result).toBe(true)
+    })
+
+    test("resets task status to pending", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      expect(args.task.status).toBe("pending")
+    })
+
+    test("increments attemptCount", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      expect(args.task.attemptCount).toBe(1)
+    })
+
+    test("updates task model to fallback", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      expect(args.task.model?.modelID).toBe("fallback-model-1")
+      expect(args.task.model?.providerID).toBe("provider-a")
+    })
+
+    test("clears sessionID and startedAt", () => {
+      const args = createDefaultArgs({
+        sessionID: "old-session",
+        startedAt: new Date(),
+      })
+
+      tryFallbackRetry(args)
+
+      expect(args.task.sessionID).toBeUndefined()
+      expect(args.task.startedAt).toBeUndefined()
+    })
+
+    test("clears error field", () => {
+      const args = createDefaultArgs({ error: "previous error" })
+
+      tryFallbackRetry(args)
+
+      expect(args.task.error).toBeUndefined()
+    })
+
+    test("sets new queuedAt", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      expect(args.task.queuedAt).toBeInstanceOf(Date)
+    })
+
+    test("releases concurrency slot", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      expect(args.concurrencyManager.release).toHaveBeenCalledWith("provider-a/original-model")
+    })
+
+    test("clears concurrencyKey after release", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      expect(args.task.concurrencyKey).toBeUndefined()
+    })
+
+    test("aborts existing session", () => {
+      const args = createDefaultArgs({ sessionID: "session-to-abort" })
+
+      tryFallbackRetry(args)
+
+      expect(args.client.session.abort).toHaveBeenCalledWith({
+        path: { id: "session-to-abort" },
+      })
+    })
+
+    test("adds retry input to queue and calls processKey", () => {
+      const args = createDefaultArgs()
+
+      tryFallbackRetry(args)
+
+      const key = `${args.task.model!.providerID}/${args.task.model!.modelID}`
+      const queue = args.queuesByKey.get(key)
+      expect(queue).toBeDefined()
+      expect(queue!.length).toBe(1)
+      expect(queue![0].task).toBe(args.task)
+      expect(args.processKey).toHaveBeenCalledWith(key)
+    })
+  })
+
+  describe("#given non-retryable error", () => {
+    test("returns false when shouldRetryError returns false", () => {
+      ;(shouldRetryError as any).mockImplementation(() => false)
+      const args = createDefaultArgs()
+
+      const result = tryFallbackRetry(args)
+
+      expect(result).toBe(false)
+    })
+  })
+
+  describe("#given no fallback chain", () => {
+    test("returns false when fallbackChain is undefined", () => {
+      const args = createDefaultArgs({ fallbackChain: undefined })
+
+      const result = tryFallbackRetry(args)
+
+      expect(result).toBe(false)
+    })
+
+    test("returns false when fallbackChain is empty", () => {
+      const args = createDefaultArgs({ fallbackChain: [] })
+
+      const result = tryFallbackRetry(args)
+
+      expect(result).toBe(false)
+    })
+  })
+
+  describe("#given exhausted fallbacks", () => {
+    test("returns false when attemptCount exceeds chain length", () => {
+      const args = createDefaultArgs({ attemptCount: 5 })
+
+      const result = tryFallbackRetry(args)
+
+      expect(result).toBe(false)
+    })
+  })
+
+  describe("#given task without concurrency key", () => {
+    test("skips concurrency release", () => {
+      const args = createDefaultArgs({ concurrencyKey: undefined })
+
+      tryFallbackRetry(args)
+
+      expect(args.concurrencyManager.release).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given task without session", () => {
+    test("skips session abort", () => {
+      const args = createDefaultArgs({ sessionID: undefined })
+
+      tryFallbackRetry(args)
+
+      expect(args.client.session.abort).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given active idle deferral timer", () => {
+    test("clears the timer and removes from map", () => {
+      const args = createDefaultArgs()
+      const timerId = setTimeout(() => {}, 10000)
+      args.idleDeferralTimers.set("test-task-1", timerId)
+
+      tryFallbackRetry(args)
+
+      expect(args.idleDeferralTimers.has("test-task-1")).toBe(false)
+    })
+  })
+
+  describe("#given second attempt", () => {
+    test("uses second fallback in chain", () => {
+      const args = createDefaultArgs({ attemptCount: 1 })
+
+      tryFallbackRetry(args)
+
+      expect(args.task.model?.modelID).toBe("fallback-model-2")
+      expect(args.task.attemptCount).toBe(2)
+    })
+  })
+})
--- a/src/features/background-agent/fallback-retry-handler.ts
+++ b/src/features/background-agent/fallback-retry-handler.ts
@@ -0,0 +1,126 @@
+import type { BackgroundTask, LaunchInput } from "./types"
+import type { FallbackEntry } from "../../shared/model-requirements"
+import type { ConcurrencyManager } from "./concurrency"
+import type { OpencodeClient, QueueItem } from "./constants"
+import { log, readConnectedProvidersCache, readProviderModelsCache } from "../../shared"
+import {
+  shouldRetryError,
+  getNextFallback,
+  hasMoreFallbacks,
+  selectFallbackProvider,
+} from "../../shared/model-error-classifier"
+import { transformModelForProvider } from "../../shared/provider-model-id-transform"
+
+export function tryFallbackRetry(args: {
+  task: BackgroundTask
+  errorInfo: { name?: string; message?: string }
+  source: string
+  concurrencyManager: ConcurrencyManager
+  client: OpencodeClient
+  idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>>
+  queuesByKey: Map<string, QueueItem[]>
+  processKey: (key: string) => void
+}): boolean {
+  const { task, errorInfo, source, concurrencyManager, client, idleDeferralTimers, queuesByKey, processKey } = args
+  const fallbackChain = task.fallbackChain
+  const canRetry =
+    shouldRetryError(errorInfo) &&
+    fallbackChain &&
+    fallbackChain.length > 0 &&
+    hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0)
+
+  if (!canRetry) return false
+
+  const attemptCount = task.attemptCount ?? 0
+  const providerModelsCache = readProviderModelsCache()
+  const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
+  const connectedSet = connectedProviders ? new Set(connectedProviders.map(p => p.toLowerCase())) : null
+
+  const isReachable = (entry: FallbackEntry): boolean => {
+    if (!connectedSet) return true
+    return entry.providers.some((p) => connectedSet.has(p.toLowerCase()))
+  }
+
+  let selectedAttemptCount = attemptCount
+  let nextFallback: FallbackEntry | undefined
+  while (fallbackChain && selectedAttemptCount < fallbackChain.length) {
+    const candidate = getNextFallback(fallbackChain, selectedAttemptCount)
+    if (!candidate) break
+    selectedAttemptCount++
+    if (!isReachable(candidate)) {
+      log("[background-agent] Skipping unreachable fallback:", {
+        taskId: task.id,
+        source,
+        model: candidate.model,
+        providers: candidate.providers,
+      })
+      continue
+    }
+    nextFallback = candidate
+    break
+  }
+  if (!nextFallback) return false
+
+  const providerID = selectFallbackProvider(
+    nextFallback.providers,
+    task.model?.providerID,
+  )
+
+  log("[background-agent] Retryable error, attempting fallback:", {
+    taskId: task.id,
+    source,
+    errorName: errorInfo.name,
+    errorMessage: errorInfo.message?.slice(0, 100),
+    attemptCount: selectedAttemptCount,
+    nextModel: `${providerID}/${nextFallback.model}`,
+  })
+
+  if (task.concurrencyKey) {
+    concurrencyManager.release(task.concurrencyKey)
+    task.concurrencyKey = undefined
+  }
+
+  if (task.sessionID) {
+    client.session.abort({ path: { id: task.sessionID } }).catch(() => {})
+  }
+
+  const idleTimer = idleDeferralTimers.get(task.id)
+  if (idleTimer) {
+    clearTimeout(idleTimer)
+    idleDeferralTimers.delete(task.id)
+  }
+
+  task.attemptCount = selectedAttemptCount
+  const transformedModelId = transformModelForProvider(providerID, nextFallback.model)
+  task.model = {
+    providerID,
+    modelID: transformedModelId,
+    variant: nextFallback.variant,
+  }
+  task.status = "pending"
+  task.sessionID = undefined
+  task.startedAt = undefined
+  task.queuedAt = new Date()
+  task.error = undefined
+
+  const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent
+  const queue = queuesByKey.get(key) ?? []
+  const retryInput: LaunchInput = {
+    description: task.description,
+    prompt: task.prompt,
+    agent: task.agent,
+    parentSessionID: task.parentSessionID,
+    parentMessageID: task.parentMessageID,
+    parentModel: task.parentModel,
+    parentAgent: task.parentAgent,
+    parentTools: task.parentTools,
+    model: task.model,
+    fallbackChain: task.fallbackChain,
+    category: task.category,
+    isUnstableAgent: task.isUnstableAgent,
+  }
+  queue.push({ task, input: retryInput })
+  queuesByKey.set(key, queue)
+  processKey(key)
+  return true
+}
--- a/src/features/background-agent/format-duration.ts
+++ b/src/features/background-agent/format-duration.ts
@@ -1,14 +0,0 @@
-export function formatDuration(start: Date, end?: Date): string {
-  const duration = (end ?? new Date()).getTime() - start.getTime()
-  const seconds = Math.floor(duration / 1000)
-  const minutes = Math.floor(seconds / 60)
-  const hours = Math.floor(minutes / 60)
-
-  if (hours > 0) {
-    return `${hours}h ${minutes % 60}m ${seconds % 60}s`
-  }
-  if (minutes > 0) {
-    return `${minutes}m ${seconds % 60}s`
-  }
-  return `${seconds}s`
-}
--- a/src/features/background-agent/index.ts
+++ b/src/features/background-agent/index.ts
@@ -1,5 +1,2 @@
 export * from "./types"
 export { BackgroundManager, type SubagentSessionCreatedEvent, type OnSubagentSessionCreated } from "./manager"
-export { TaskHistory, type TaskHistoryEntry } from "./task-history"
-export { ConcurrencyManager } from "./concurrency"
-export { TaskStateManager } from "./state"
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -5,7 +5,6 @@ import type {
  LaunchInput,
  ResumeInput,
 } from "./types"
-import type { FallbackEntry } from "../../shared/model-requirements"
 import { TaskHistory } from "./task-history"
 import {
  log,
@@ -13,8 +12,6 @@ import {
  normalizePromptTools,
  normalizeSDKResponse,
  promptWithModelSuggestionRetry,
-  readConnectedProvidersCache,
-  readProviderModelsCache,
  resolveInheritedPromptTools,
  createInternalAgentTextPart,
 } from "../../shared"
@@ -25,28 +22,31 @@ import type { BackgroundTaskConfig, TmuxConfig } from "../../config/schema"
 import { isInsideTmux } from "../../shared/tmux"
 import {
  shouldRetryError,
-  getNextFallback,
  hasMoreFallbacks,
-  selectFallbackProvider,
 } from "../../shared/model-error-classifier"
-import { transformModelForProvider } from "../../shared/provider-model-id-transform"
 import {
-  DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS,
-  DEFAULT_STALE_TIMEOUT_MS,
-  MIN_IDLE_TIME_MS,
-  MIN_RUNTIME_BEFORE_STALE_MS,
  POLLING_INTERVAL_MS,
  TASK_CLEANUP_DELAY_MS,
-  TASK_TTL_MS,
 } from "./constants"

 import { subagentSessions } from "../claude-code-session-state"
 import { getTaskToastManager } from "../task-toast-manager"
-import { MESSAGE_STORAGE, type StoredMessage } from "../hook-message-injector"
-import { existsSync, readFileSync, readdirSync } from "node:fs"
+import { formatDuration } from "./duration-formatter"
+import {
+  isAbortedSessionError,
+  extractErrorName,
+  extractErrorMessage,
+  getSessionErrorMessage,
+  isRecord,
+} from "./error-classifier"
+import { tryFallbackRetry } from "./fallback-retry-handler"
+import { registerManagerForCleanup, unregisterManagerForCleanup } from "./process-cleanup"
+import { isCompactionAgent, findNearestMessageExcludingCompaction } from "./compaction-aware-message-resolver"
+import { handleSessionIdleBackgroundEvent } from "./session-idle-event-handler"
+import { MESSAGE_STORAGE } from "../hook-message-injector"
 import { join } from "node:path"
-
-type ProcessCleanupEvent = NodeJS.Signals | "beforeExit" | "exit"
+import { pruneStaleTasksAndNotifications } from "./task-poller"
+import { checkAndInterruptStaleTasks } from "./task-poller"

 type OpencodeClient = PluginInput["client"]

@@ -89,9 +89,7 @@ export interface SubagentSessionCreatedEvent {
 export type OnSubagentSessionCreated = (event: SubagentSessionCreatedEvent) => Promise<void>

 export class BackgroundManager {
-  private static cleanupManagers = new Set<BackgroundManager>()
-  private static cleanupRegistered = false
-  private static cleanupHandlers = new Map<ProcessCleanupEvent, () => void>()
+

  private tasks: Map<string, BackgroundTask>
  private notifications: Map<string, BackgroundTask[]>
@@ -270,7 +268,7 @@ export class BackgroundManager {
      body: {
        parentID: input.parentSessionID,
        title: `${input.description} (@${input.agent} subagent)`,
-      } as any,
+      } as Record<string, unknown>,
      query: {
        directory: parentDirectory,
      },
@@ -705,8 +703,8 @@ export class BackgroundManager {
      if (!assistantError) return

      const errorInfo = {
-        name: this.extractErrorName(assistantError),
-        message: this.extractErrorMessage(assistantError),
+        name: extractErrorName(assistantError),
+        message: extractErrorMessage(assistantError),
      }
      this.tryFallbackRetry(task, errorInfo, "message.updated")
    }
@@ -742,61 +740,15 @@ export class BackgroundManager {
    }

    if (event.type === "session.idle") {
-      const sessionID = props?.sessionID as string | undefined
-      if (!sessionID) return
-
-      const task = this.findBySession(sessionID)
-      if (!task || task.status !== "running") return
-      
-      const startedAt = task.startedAt
-      if (!startedAt) return
-
-      // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
-      const elapsedMs = Date.now() - startedAt.getTime()
-      if (elapsedMs < MIN_IDLE_TIME_MS) {
-        const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
-        if (!this.idleDeferralTimers.has(task.id)) {
-          log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
-          const timer = setTimeout(() => {
-            this.idleDeferralTimers.delete(task.id)
-            this.handleEvent({ type: "session.idle", properties: { sessionID } })
-          }, remainingMs)
-          this.idleDeferralTimers.set(task.id, timer)
-        } else {
-          log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
-        }
-        return
-      }
-
-      // Edge guard: Verify session has actual assistant output before completing
-      this.validateSessionHasOutput(sessionID).then(async (hasValidOutput) => {
-        // Re-check status after async operation (could have been completed by polling)
-        if (task.status !== "running") {
-          log("[background-agent] Task status changed during validation, skipping:", { taskId: task.id, status: task.status })
-          return
-        }
-
-        if (!hasValidOutput) {
-          log("[background-agent] Session.idle but no valid output yet, waiting:", task.id)
-          return
-        }
-
-        const hasIncompleteTodos = await this.checkSessionTodos(sessionID)
-
-        // Re-check status after async operation again
-        if (task.status !== "running") {
-          log("[background-agent] Task status changed during todo check, skipping:", { taskId: task.id, status: task.status })
-          return
-        }
-
-        if (hasIncompleteTodos) {
-          log("[background-agent] Task has incomplete todos, waiting for todo-continuation:", task.id)
-          return
-        }
-
-        await this.tryCompleteTask(task, "session.idle event")
-      }).catch(err => {
-        log("[background-agent] Error in session.idle handler:", err)
+      if (!props || typeof props !== "object") return
+      handleSessionIdleBackgroundEvent({
+        properties: props as Record<string, unknown>,
+        findBySession: (id) => this.findBySession(id),
+        idleDeferralTimers: this.idleDeferralTimers,
+        validateSessionHasOutput: (id) => this.validateSessionHasOutput(id),
+        checkSessionTodos: (id) => this.checkSessionTodos(id),
+        tryCompleteTask: (task, source) => this.tryCompleteTask(task, source),
+        emitIdleEvent: (sessionID) => this.handleEvent({ type: "session.idle", properties: { sessionID } }),
      })
    }

@@ -809,7 +761,7 @@ export class BackgroundManager {

      const errorObj = props?.error as { name?: string; message?: string } | undefined
      const errorName = errorObj?.name
-      const errorMessage = props ? this.getSessionErrorMessage(props) : undefined
+      const errorMessage = props ? getSessionErrorMessage(props) : undefined

      const errorInfo = { name: errorName, message: errorMessage }
      if (this.tryFallbackRetry(task, errorInfo, "session.error")) return
@@ -934,110 +886,21 @@ export class BackgroundManager {
    errorInfo: { name?: string; message?: string },
    source: string,
  ): boolean {
-    const fallbackChain = task.fallbackChain
-    const canRetry =
-      shouldRetryError(errorInfo) &&
-      fallbackChain &&
-      fallbackChain.length > 0 &&
-      hasMoreFallbacks(fallbackChain, task.attemptCount ?? 0)
-
-    if (!canRetry) return false
-
-    const attemptCount = task.attemptCount ?? 0
-    const providerModelsCache = readProviderModelsCache()
-    const connectedProviders = providerModelsCache?.connected ?? readConnectedProvidersCache()
-    const connectedSet = connectedProviders ? new Set(connectedProviders.map(p => p.toLowerCase())) : null
-
-    const isReachable = (entry: FallbackEntry): boolean => {
-      if (!connectedSet) return true
-
-      // Gate only on provider connectivity. Provider model lists can be stale/incomplete,
-      // especially after users manually add models to opencode.json.
-      return entry.providers.some((p) => connectedSet.has(p.toLowerCase()))
-    }
-
-    let selectedAttemptCount = attemptCount
-    let nextFallback: FallbackEntry | undefined
-    while (fallbackChain && selectedAttemptCount < fallbackChain.length) {
-      const candidate = getNextFallback(fallbackChain, selectedAttemptCount)
-      if (!candidate) break
-      selectedAttemptCount++
-      if (!isReachable(candidate)) {
-        log("[background-agent] Skipping unreachable fallback:", {
-          taskId: task.id,
-          source,
-          model: candidate.model,
-          providers: candidate.providers,
-        })
-        continue
-      }
-      nextFallback = candidate
-      break
-    }
-    if (!nextFallback) return false
-
-    const providerID = selectFallbackProvider(
-      nextFallback.providers,
-      task.model?.providerID,
-    )
-
-    log("[background-agent] Retryable error, attempting fallback:", {
-      taskId: task.id,
+    const previousSessionID = task.sessionID
+    const result = tryFallbackRetry({
+      task,
+      errorInfo,
      source,
-      errorName: errorInfo.name,
-      errorMessage: errorInfo.message?.slice(0, 100),
-      attemptCount: selectedAttemptCount,
-      nextModel: `${providerID}/${nextFallback.model}`,
+      concurrencyManager: this.concurrencyManager,
+      client: this.client,
+      idleDeferralTimers: this.idleDeferralTimers,
+      queuesByKey: this.queuesByKey,
+      processKey: (key: string) => this.processKey(key),
    })
-
-    if (task.concurrencyKey) {
-      this.concurrencyManager.release(task.concurrencyKey)
-      task.concurrencyKey = undefined
+    if (result && previousSessionID) {
+      subagentSessions.delete(previousSessionID)
    }
-
-    if (task.sessionID) {
-      this.client.session.abort({ path: { id: task.sessionID } }).catch(() => {})
-      subagentSessions.delete(task.sessionID)
-    }
-
-    const idleTimer = this.idleDeferralTimers.get(task.id)
-    if (idleTimer) {
-      clearTimeout(idleTimer)
-      this.idleDeferralTimers.delete(task.id)
-    }
-
-    task.attemptCount = selectedAttemptCount
-    const transformedModelId = transformModelForProvider(providerID, nextFallback.model)
-    task.model = {
-      providerID,
-      modelID: transformedModelId,
-      variant: nextFallback.variant,
-    }
-    task.status = "pending"
-    task.sessionID = undefined
-    task.startedAt = undefined
-    task.queuedAt = new Date()
-    task.error = undefined
-
-    const key = task.model ? `${task.model.providerID}/${task.model.modelID}` : task.agent
-    const queue = this.queuesByKey.get(key) ?? []
-    const retryInput: LaunchInput = {
-      description: task.description,
-      prompt: task.prompt,
-      agent: task.agent,
-      parentSessionID: task.parentSessionID,
-      parentMessageID: task.parentMessageID,
-      parentModel: task.parentModel,
-      parentAgent: task.parentAgent,
-      parentTools: task.parentTools,
-      model: task.model,
-      fallbackChain: task.fallbackChain,
-      category: task.category,
-    }
-    queue.push({ task, input: retryInput })
-    this.queuesByKey.set(key, queue)
-    this.processKey(key)
-    return true
+    return result
  }

  markForNotification(task: BackgroundTask): void {
@@ -1256,45 +1119,11 @@ export class BackgroundManager {
  }

  private registerProcessCleanup(): void {
-    BackgroundManager.cleanupManagers.add(this)
-
-    if (BackgroundManager.cleanupRegistered) return
-    BackgroundManager.cleanupRegistered = true
-
-    const cleanupAll = () => {
-      for (const manager of BackgroundManager.cleanupManagers) {
-        try {
-          manager.shutdown()
-        } catch (error) {
-          log("[background-agent] Error during shutdown cleanup:", error)
-        }
-      }
-    }
-
-    const registerSignal = (signal: ProcessCleanupEvent, exitAfter: boolean): void => {
-      const listener = registerProcessSignal(signal, cleanupAll, exitAfter)
-      BackgroundManager.cleanupHandlers.set(signal, listener)
-    }
-
-    registerSignal("SIGINT", true)
-    registerSignal("SIGTERM", true)
-    if (process.platform === "win32") {
-      registerSignal("SIGBREAK", true)
-    }
-    registerSignal("beforeExit", false)
-    registerSignal("exit", false)
+    registerManagerForCleanup(this)
  }

  private unregisterProcessCleanup(): void {
-    BackgroundManager.cleanupManagers.delete(this)
-
-    if (BackgroundManager.cleanupManagers.size > 0) return
-
-    for (const [signal, listener] of BackgroundManager.cleanupHandlers.entries()) {
-      process.off(signal, listener)
-    }
-    BackgroundManager.cleanupHandlers.clear()
-    BackgroundManager.cleanupRegistered = false
+    unregisterManagerForCleanup(this)
  }


@@ -1368,7 +1197,7 @@ export class BackgroundManager {
    // Note: Callers must release concurrency before calling this method
    // to ensure slots are freed even if notification fails

-    const duration = this.formatDuration(task.startedAt ?? new Date(), task.completedAt)
+    const duration = formatDuration(task.startedAt ?? new Date(), task.completedAt)

    log("[background-agent] notifyParentSession called for task:", task.id)

@@ -1455,7 +1284,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
            if (isCompactionAgent(info?.agent)) {
              continue
            }
-            const normalizedTools = this.isRecord(info?.tools)
+            const normalizedTools = isRecord(info?.tools)
              ? normalizePromptTools(info.tools as Record<string, boolean | "allow" | "deny" | "ask">)
              : undefined
            if (info?.agent || info?.model || (info?.modelID && info?.providerID) || normalizedTools) {
@@ -1466,13 +1295,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
            }
          }
        } catch (error) {
-          if (this.isAbortedSessionError(error)) {
+          if (isAbortedSessionError(error)) {
            log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
              taskId: task.id,
              parentSessionID: task.parentSessionID,
            })
          }
-          const messageDir = getMessageDir(task.parentSessionID)
+          const messageDir = join(MESSAGE_STORAGE, task.parentSessionID)
          const currentMessage = messageDir ? findNearestMessageExcludingCompaction(messageDir) : null
          agent = currentMessage?.agent ?? task.parentAgent
          model = currentMessage?.model?.providerID && currentMessage?.model?.modelID
@@ -1506,7 +1335,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
            noReply: !allComplete,
          })
        } catch (error) {
-          if (this.isAbortedSessionError(error)) {
+          if (isAbortedSessionError(error)) {
            log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
              taskId: task.id,
              parentSessionID: task.parentSessionID,
@@ -1544,97 +1373,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
  }

  private formatDuration(start: Date, end?: Date): string {
-    const duration = (end ?? new Date()).getTime() - start.getTime()
-    const seconds = Math.floor(duration / 1000)
-    const minutes = Math.floor(seconds / 60)
-    const hours = Math.floor(minutes / 60)
-
-    if (hours > 0) {
-      return `${hours}h ${minutes % 60}m ${seconds % 60}s`
-    } else if (minutes > 0) {
-      return `${minutes}m ${seconds % 60}s`
-    }
-    return `${seconds}s`
+    return formatDuration(start, end)
  }

  private isAbortedSessionError(error: unknown): boolean {
-    const message = this.getErrorText(error)
-    return message.toLowerCase().includes("aborted")
-  }
-
-  private getErrorText(error: unknown): string {
-    if (!error) return ""
-    if (typeof error === "string") return error
-    if (error instanceof Error) {
-      return `${error.name}: ${error.message}`
-    }
-    if (typeof error === "object" && error !== null) {
-      if ("message" in error && typeof error.message === "string") {
-        return error.message
-      }
-      if ("name" in error && typeof error.name === "string") {
-        return error.name
-      }
-    }
-    return ""
-  }
-
-  private extractErrorName(error: unknown): string | undefined {
-    if (this.isRecord(error) && typeof error["name"] === "string") return error["name"]
-    if (error instanceof Error) return error.name
-    return undefined
-  }
-
-  private extractErrorMessage(error: unknown): string | undefined {
-    if (!error) return undefined
-    if (typeof error === "string") return error
-    if (error instanceof Error) return error.message
-
-    if (this.isRecord(error)) {
-      const dataRaw = error["data"]
-      const candidates: unknown[] = [
-        error,
-        dataRaw,
-        error["error"],
-        this.isRecord(dataRaw) ? (dataRaw as Record<string, unknown>)["error"] : undefined,
-        error["cause"],
-      ]
-
-      for (const candidate of candidates) {
-        if (typeof candidate === "string" && candidate.length > 0) return candidate
-        if (
-          this.isRecord(candidate) &&
-          typeof candidate["message"] === "string" &&
-          candidate["message"].length > 0
-        ) {
-          return candidate["message"]
-        }
-      }
-    }
-
-    try {
-      return JSON.stringify(error)
-    } catch {
-      return String(error)
-    }
-  }
-
-  private isRecord(value: unknown): value is Record<string, unknown> {
-    return typeof value === "object" && value !== null
-  }
-
-  private getSessionErrorMessage(properties: EventProperties): string | undefined {
-    const errorRaw = properties["error"]
-    if (!this.isRecord(errorRaw)) return undefined
-
-    const dataRaw = errorRaw["data"]
-    if (this.isRecord(dataRaw)) {
-      const message = dataRaw["message"]
-      if (typeof message === "string") return message
-    }
-
-    const message = errorRaw["message"]
-    return typeof message === "string" ? message : undefined
+    return isAbortedSessionError(error)
  }

  private hasRunningTasks(): boolean {
@@ -1645,25 +1388,12 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
  }

  private pruneStaleTasksAndNotifications(): void {
-    const now = Date.now()
-
-    for (const [taskId, task] of this.tasks.entries()) {
-      const wasPending = task.status === "pending"
-      const timestamp = task.status === "pending" 
-        ? task.queuedAt?.getTime() 
-        : task.startedAt?.getTime()
-      
-      if (!timestamp) {
-        continue
-      }
-      
-      const age = now - timestamp
-      if (age > TASK_TTL_MS) {
-        const errorMessage = task.status === "pending"
-          ? "Task timed out while queued (30 minutes)"
-          : "Task timed out after 30 minutes"
-        
-        log("[background-agent] Pruning stale task:", { taskId, status: task.status, age: Math.round(age / 1000) + "s" })
+    pruneStaleTasksAndNotifications({
+      tasks: this.tasks,
+      notifications: this.notifications,
+      onTaskPruned: (taskId, task, errorMessage) => {
+        const wasPending = task.status === "pending"
+        log("[background-agent] Pruning stale task:", { taskId, status: task.status, age: Math.round(((wasPending ? task.queuedAt?.getTime() : task.startedAt?.getTime()) ? (Date.now() - (wasPending ? task.queuedAt!.getTime() : task.startedAt!.getTime())) : 0) / 1000) + "s" })
        task.status = "error"
        task.error = errorMessage
        task.completedAt = new Date()
@@ -1671,7 +1401,6 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          this.concurrencyManager.release(task.concurrencyKey)
          task.concurrencyKey = undefined
        }
-        // Clean up pendingByParent to prevent stale entries
        this.cleanupPendingByParent(task)
        if (wasPending) {
          const key = task.model
@@ -1698,97 +1427,21 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          subagentSessions.delete(task.sessionID)
          SessionCategoryRegistry.remove(task.sessionID)
        }
-      }
-    }
-
-    for (const [sessionID, notifications] of this.notifications.entries()) {
-      if (notifications.length === 0) {
-        this.notifications.delete(sessionID)
-        continue
-      }
-      const validNotifications = notifications.filter((task) => {
-        if (!task.startedAt) return false
-        const age = now - task.startedAt.getTime()
-        return age <= TASK_TTL_MS
-      })
-      if (validNotifications.length === 0) {
-        this.notifications.delete(sessionID)
-      } else if (validNotifications.length !== notifications.length) {
-        this.notifications.set(sessionID, validNotifications)
-      }
-    }
+      },
+    })
  }

  private async checkAndInterruptStaleTasks(
    allStatuses: Record<string, { type: string }> = {},
  ): Promise<void> {
-    const staleTimeoutMs = this.config?.staleTimeoutMs ?? DEFAULT_STALE_TIMEOUT_MS
-    const messageStalenessMs = this.config?.messageStalenessTimeoutMs ?? DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS
-    const now = Date.now()
-
-    for (const task of this.tasks.values()) {
-      if (task.status !== "running") continue
-
-      const startedAt = task.startedAt
-      const sessionID = task.sessionID
-      if (!startedAt || !sessionID) continue
-
-      const sessionStatus = allStatuses[sessionID]?.type
-      const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
-      const runtime = now - startedAt.getTime()
-
-      if (!task.progress?.lastUpdate) {
-        if (sessionIsRunning) continue
-        if (runtime <= messageStalenessMs) continue
-
-        const staleMinutes = Math.round(runtime / 60000)
-        task.status = "cancelled"
-        task.error = `Stale timeout (no activity for ${staleMinutes}min since start)`
-        task.completedAt = new Date()
-
-        if (task.concurrencyKey) {
-          this.concurrencyManager.release(task.concurrencyKey)
-          task.concurrencyKey = undefined
-        }
-
-        this.client.session.abort({ path: { id: sessionID } }).catch(() => {})
-        log(`[background-agent] Task ${task.id} interrupted: no progress since start`)
-
-        try {
-          await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
-        } catch (err) {
-          log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err })
-        }
-        continue
-      }
-
-      if (sessionIsRunning) continue
-
-      if (runtime < MIN_RUNTIME_BEFORE_STALE_MS) continue
-
-      const timeSinceLastUpdate = now - task.progress.lastUpdate.getTime()
-      if (timeSinceLastUpdate <= staleTimeoutMs) continue
-      if (task.status !== "running") continue
-
-      const staleMinutes = Math.round(timeSinceLastUpdate / 60000)
-      task.status = "cancelled"
-      task.error = `Stale timeout (no activity for ${staleMinutes}min)`
-      task.completedAt = new Date()
-
-      if (task.concurrencyKey) {
-        this.concurrencyManager.release(task.concurrencyKey)
-        task.concurrencyKey = undefined
-      }
-
-      this.client.session.abort({ path: { id: sessionID } }).catch(() => {})
-      log(`[background-agent] Task ${task.id} interrupted: stale timeout`)
-
-      try {
-        await this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task))
-      } catch (err) {
-        log("[background-agent] Error in notifyParentSession for stale task:", { taskId: task.id, error: err })
-      }
-    }
+    await checkAndInterruptStaleTasks({
+      tasks: this.tasks.values(),
+      client: this.client,
+      config: this.config,
+      concurrencyManager: this.concurrencyManager,
+      notifyParentSession: (task) => this.enqueueNotificationForParent(task.parentSessionID, () => this.notifyParentSession(task)),
+      sessionStatuses: allStatuses,
+    })
  }

  private async pollRunningTasks(): Promise<void> {
@@ -1948,89 +1601,3 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    return current
  }
 }
-
-function registerProcessSignal(
-  signal: ProcessCleanupEvent,
-  handler: () => void,
-  exitAfter: boolean
-): () => void {
-  const listener = () => {
-    handler()
-    if (exitAfter) {
-      // Set exitCode and schedule exit after delay to allow other handlers to complete async cleanup
-      // Use 6s delay to accommodate LSP cleanup (5s timeout + 1s SIGKILL wait)
-      process.exitCode = 0
-      setTimeout(() => process.exit(), 6000)
-    }
-  }
-  process.on(signal, listener)
-  return listener
-}
-
-
-function getMessageDir(sessionID: string): string | null {
-  if (!existsSync(MESSAGE_STORAGE)) return null
-
-  const directPath = join(MESSAGE_STORAGE, sessionID)
-  if (existsSync(directPath)) return directPath
-
-  for (const dir of readdirSync(MESSAGE_STORAGE)) {
-    const sessionPath = join(MESSAGE_STORAGE, dir, sessionID)
-    if (existsSync(sessionPath)) return sessionPath
-  }
-  return null
-}
-
-function isCompactionAgent(agent: string | undefined): boolean {
-  return agent?.trim().toLowerCase() === "compaction"
-}
-
-function hasFullAgentAndModel(message: StoredMessage): boolean {
-  return !!message.agent &&
-    !isCompactionAgent(message.agent) &&
-    !!message.model?.providerID &&
-    !!message.model?.modelID
-}
-
-function hasPartialAgentOrModel(message: StoredMessage): boolean {
-  const hasAgent = !!message.agent && !isCompactionAgent(message.agent)
-  const hasModel = !!message.model?.providerID && !!message.model?.modelID
-  return hasAgent || hasModel
-}
-
-function findNearestMessageExcludingCompaction(messageDir: string): StoredMessage | null {
-  try {
-    const files = readdirSync(messageDir)
-      .filter((name) => name.endsWith(".json"))
-      .sort()
-      .reverse()
-
-    for (const file of files) {
-      try {
-        const content = readFileSync(join(messageDir, file), "utf-8")
-        const parsed = JSON.parse(content) as StoredMessage
-        if (hasFullAgentAndModel(parsed)) {
-          return parsed
-        }
-      } catch {
-        continue
-      }
-    }
-
-    for (const file of files) {
-      try {
-        const content = readFileSync(join(messageDir, file), "utf-8")
-        const parsed = JSON.parse(content) as StoredMessage
-        if (hasPartialAgentOrModel(parsed)) {
-          return parsed
-        }
-      } catch {
-        continue
-      }
-    }
-  } catch {
-    return null
-  }
-
-  return null
-}
--- a/src/features/background-agent/message-dir.ts
+++ b/src/features/background-agent/message-dir.ts
@@ -1 +0,0 @@
-export { getMessageDir } from "../../shared"
--- a/src/features/background-agent/notification-builder.ts
+++ b/src/features/background-agent/notification-builder.ts
@@ -1,41 +0,0 @@
-import type { BackgroundTask } from "./types"
-
-export function buildBackgroundTaskNotificationText(args: {
-  task: BackgroundTask
-  duration: string
-  allComplete: boolean
-  remainingCount: number
-  completedTasks: BackgroundTask[]
-}): string {
-  const { task, duration, allComplete, remainingCount, completedTasks } = args
-  const statusText =
-    task.status === "completed" ? "COMPLETED" : task.status === "interrupt" ? "INTERRUPTED" : task.status === "error" ? "ERROR" : "CANCELLED"
-  const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
-
-  if (allComplete) {
-    const completedTasksText = completedTasks
-      .map((t) => `- \`${t.id}\`: ${t.description}`)
-      .join("\n")
-
-    return `<system-reminder>
-[ALL BACKGROUND TASKS COMPLETE]
-
-**Completed:**
-${completedTasksText || `- \`${task.id}\`: ${task.description}`}
-
-Use \`background_output(task_id="<id>")\` to retrieve each result.
-</system-reminder>`
-  }
-
-  return `<system-reminder>
-[BACKGROUND TASK ${statusText}]
-**ID:** \`${task.id}\`
-**Description:** ${task.description}
-**Duration:** ${duration}${errorInfo}
-
-**${remainingCount} task${remainingCount === 1 ? "" : "s"} still in progress.** You WILL be notified when ALL complete.
-Do NOT poll - continue productive work.
-
-Use \`background_output(task_id="${task.id}")\` to retrieve this result when ready.
-</system-reminder>`
-}
--- a/src/features/background-agent/parent-session-context-resolver.ts
+++ b/src/features/background-agent/parent-session-context-resolver.ts
@@ -1,81 +0,0 @@
-import type { OpencodeClient } from "./constants"
-import type { BackgroundTask } from "./types"
-import { findNearestMessageWithFields } from "../hook-message-injector"
-import { getMessageDir } from "../../shared"
-import { normalizePromptTools, resolveInheritedPromptTools } from "../../shared"
-
-type AgentModel = { providerID: string; modelID: string }
-
-function isObject(value: unknown): value is Record<string, unknown> {
-  return typeof value === "object" && value !== null
-}
-
-function extractAgentAndModelFromMessage(message: unknown): {
-  agent?: string
-  model?: AgentModel
-  tools?: Record<string, boolean>
-} {
-  if (!isObject(message)) return {}
-  const info = message["info"]
-  if (!isObject(info)) return {}
-
-  const agent = typeof info["agent"] === "string" ? info["agent"] : undefined
-  const modelObj = info["model"]
-  const tools = normalizePromptTools(isObject(info["tools"]) ? info["tools"] as Record<string, unknown> as Record<string, boolean | "allow" | "deny" | "ask"> : undefined)
-  if (isObject(modelObj)) {
-    const providerID = modelObj["providerID"]
-    const modelID = modelObj["modelID"]
-    if (typeof providerID === "string" && typeof modelID === "string") {
-      return { agent, model: { providerID, modelID }, tools }
-    }
-  }
-
-  const providerID = info["providerID"]
-  const modelID = info["modelID"]
-  if (typeof providerID === "string" && typeof modelID === "string") {
-    return { agent, model: { providerID, modelID }, tools }
-  }
-
-  return { agent, tools }
-}
-
-export async function resolveParentSessionAgentAndModel(input: {
-  client: OpencodeClient
-  task: BackgroundTask
-}): Promise<{ agent?: string; model?: AgentModel; tools?: Record<string, boolean> }> {
-  const { client, task } = input
-
-  let agent: string | undefined = task.parentAgent
-  let model: AgentModel | undefined
-  let tools: Record<string, boolean> | undefined = task.parentTools
-
-  try {
-    const messagesResp = await client.session.messages({
-      path: { id: task.parentSessionID },
-    })
-
-    const messagesRaw = "data" in messagesResp ? messagesResp.data : []
-    const messages = Array.isArray(messagesRaw) ? messagesRaw : []
-
-    for (let i = messages.length - 1; i >= 0; i--) {
-      const extracted = extractAgentAndModelFromMessage(messages[i])
-      if (extracted.agent || extracted.model || extracted.tools) {
-        agent = extracted.agent ?? task.parentAgent
-        model = extracted.model
-        tools = extracted.tools ?? tools
-        break
-      }
-    }
-  } catch {
-    const messageDir = getMessageDir(task.parentSessionID)
-    const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
-    agent = currentMessage?.agent ?? task.parentAgent
-    model =
-      currentMessage?.model?.providerID && currentMessage?.model?.modelID
-        ? { providerID: currentMessage.model.providerID, modelID: currentMessage.model.modelID }
-        : undefined
-    tools = normalizePromptTools(currentMessage?.tools) ?? tools
-  }
-
-  return { agent, model, tools: resolveInheritedPromptTools(task.parentSessionID, tools) }
-}
--- a/src/features/background-agent/parent-session-notifier.test.ts
+++ b/src/features/background-agent/parent-session-notifier.test.ts
@@ -1,39 +0,0 @@
-declare const require: (name: string) => any
-const { describe, test, expect } = require("bun:test")
-import type { BackgroundTask } from "./types"
-import { buildBackgroundTaskNotificationText } from "./background-task-notification-template"
-
-describe("notifyParentSession", () => {
-  test("displays INTERRUPTED for interrupted tasks", () => {
-    // given
-    const task: BackgroundTask = {
-      id: "test-task",
-      parentSessionID: "parent-session",
-      parentMessageID: "parent-message",
-      description: "Test task",
-      prompt: "Test prompt",
-      agent: "test-agent",
-      status: "interrupt",
-      startedAt: new Date(),
-      completedAt: new Date(),
-    }
-    const duration = "1s"
-    const statusText = task.status === "completed" ? "COMPLETED" : task.status === "interrupt" ? "INTERRUPTED" : "CANCELLED"
-    const allComplete = false
-    const remainingCount = 1
-    const completedTasks: BackgroundTask[] = []
-
-    // when
-    const notification = buildBackgroundTaskNotificationText({
-      task,
-      duration,
-      statusText,
-      allComplete,
-      remainingCount,
-      completedTasks,
-    })
-
-    // then
-    expect(notification).toContain("INTERRUPTED")
-  })
-})
--- a/src/features/background-agent/parent-session-notifier.ts
+++ b/src/features/background-agent/parent-session-notifier.ts
@@ -1,103 +0,0 @@
-import type { BackgroundTask } from "./types"
-import type { ResultHandlerContext } from "./result-handler-context"
-import { TASK_CLEANUP_DELAY_MS } from "./constants"
-import { createInternalAgentTextPart, log } from "../../shared"
-import { getTaskToastManager } from "../task-toast-manager"
-import { formatDuration } from "./duration-formatter"
-import { buildBackgroundTaskNotificationText } from "./background-task-notification-template"
-import { resolveParentSessionAgentAndModel } from "./parent-session-context-resolver"
-
-export async function notifyParentSession(
-  task: BackgroundTask,
-  ctx: ResultHandlerContext
-): Promise<void> {
-  const { client, state } = ctx
-
-  const duration = formatDuration(task.startedAt ?? task.completedAt ?? new Date(), task.completedAt)
-  log("[background-agent] notifyParentSession called for task:", task.id)
-
-  const toastManager = getTaskToastManager()
-  if (toastManager) {
-    toastManager.showCompletionToast({
-      id: task.id,
-      description: task.description,
-      duration,
-    })
-  }
-
-  const pendingSet = state.pendingByParent.get(task.parentSessionID)
-  if (pendingSet) {
-    pendingSet.delete(task.id)
-    if (pendingSet.size === 0) {
-      state.pendingByParent.delete(task.parentSessionID)
-    }
-  }
-
-  const allComplete = !pendingSet || pendingSet.size === 0
-  const remainingCount = pendingSet?.size ?? 0
-
-  const statusText = task.status === "completed" ? "COMPLETED" : task.status === "interrupt" ? "INTERRUPTED" : "CANCELLED"
-
-  const completedTasks = allComplete
-    ? Array.from(state.tasks.values()).filter(
-        (t) =>
-          t.parentSessionID === task.parentSessionID &&
-          t.status !== "running" &&
-          t.status !== "pending"
-      )
-    : []
-
-  const notification = buildBackgroundTaskNotificationText({
-    task,
-    duration,
-    statusText,
-    allComplete,
-    remainingCount,
-    completedTasks,
-  })
-
-  const { agent, model, tools } = await resolveParentSessionAgentAndModel({ client, task })
-
-  log("[background-agent] notifyParentSession context:", {
-    taskId: task.id,
-    resolvedAgent: agent,
-    resolvedModel: model,
-  })
-
-  try {
-    await client.session.promptAsync({
-      path: { id: task.parentSessionID },
-      body: {
-        noReply: !allComplete,
-        ...(agent !== undefined ? { agent } : {}),
-        ...(model !== undefined ? { model } : {}),
-        ...(tools ? { tools } : {}),
-        parts: [createInternalAgentTextPart(notification)],
-      },
-    })
-
-    log("[background-agent] Sent notification to parent session:", {
-      taskId: task.id,
-      allComplete,
-      noReply: !allComplete,
-    })
-  } catch (error) {
-    log("[background-agent] Failed to send notification:", error)
-  }
-
-  if (!allComplete) return
-
-  for (const completedTask of completedTasks) {
-    const taskId = completedTask.id
-    state.clearCompletionTimer(taskId)
-    const timer = setTimeout(() => {
-      state.completionTimers.delete(taskId)
-      if (state.tasks.has(taskId)) {
-        state.clearNotificationsForTask(taskId)
-        state.tasks.delete(taskId)
-        log("[background-agent] Removed completed task from memory:", taskId)
-      }
-    }, TASK_CLEANUP_DELAY_MS)
-    state.setCompletionTimer(taskId, timer)
-  }
-}
--- a/src/features/background-agent/process-cleanup.test.ts
+++ b/src/features/background-agent/process-cleanup.test.ts
@@ -0,0 +1,162 @@
+import { describe, test, expect, beforeEach, afterEach, mock } from "bun:test"
+import {
+  registerManagerForCleanup,
+  unregisterManagerForCleanup,
+  _resetForTesting,
+} from "./process-cleanup"
+
+describe("process-cleanup", () => {
+  const registeredManagers: Array<{ shutdown: () => void }> = []
+  const mockShutdown = mock(() => {})
+
+  const processOnCalls: Array<[string, Function]> = []
+  const processOffCalls: Array<[string, Function]> = []
+  const originalProcessOn = process.on.bind(process)
+  const originalProcessOff = process.off.bind(process)
+
+  beforeEach(() => {
+    mockShutdown.mockClear()
+    processOnCalls.length = 0
+    processOffCalls.length = 0
+    registeredManagers.length = 0
+
+    process.on = originalProcessOn as any
+    process.off = originalProcessOff as any
+    _resetForTesting()
+
+    process.on = ((event: string, listener: Function) => {
+      processOnCalls.push([event, listener])
+      return process
+    }) as any
+
+    process.off = ((event: string, listener: Function) => {
+      processOffCalls.push([event, listener])
+      return process
+    }) as any
+  })
+
+  afterEach(() => {
+    process.on = originalProcessOn as any
+    process.off = originalProcessOff as any
+
+    for (const manager of [...registeredManagers]) {
+      unregisterManagerForCleanup(manager)
+    }
+  })
+
+  describe("registerManagerForCleanup", () => {
+    test("registers signal handlers on first manager", () => {
+      const manager = { shutdown: mockShutdown }
+      registeredManagers.push(manager)
+
+      registerManagerForCleanup(manager)
+
+      const signals = processOnCalls.map(([signal]) => signal)
+      expect(signals).toContain("SIGINT")
+      expect(signals).toContain("SIGTERM")
+      expect(signals).toContain("beforeExit")
+      expect(signals).toContain("exit")
+    })
+
+    test("signal listener calls shutdown on registered manager", () => {
+      const manager = { shutdown: mockShutdown }
+      registeredManagers.push(manager)
+
+      registerManagerForCleanup(manager)
+
+      const exitEntry = processOnCalls.find(([signal]) => signal === "exit")
+      expect(exitEntry).toBeDefined()
+      const [, listener] = exitEntry!
+      listener()
+
+      expect(mockShutdown).toHaveBeenCalled()
+    })
+
+    test("multiple managers all get shutdown when signal fires", () => {
+      const shutdown1 = mock(() => {})
+      const shutdown2 = mock(() => {})
+      const shutdown3 = mock(() => {})
+      const manager1 = { shutdown: shutdown1 }
+      const manager2 = { shutdown: shutdown2 }
+      const manager3 = { shutdown: shutdown3 }
+      registeredManagers.push(manager1, manager2, manager3)
+
+      registerManagerForCleanup(manager1)
+      registerManagerForCleanup(manager2)
+      registerManagerForCleanup(manager3)
+
+      const exitEntry = processOnCalls.find(([signal]) => signal === "exit")
+      expect(exitEntry).toBeDefined()
+      const [, listener] = exitEntry!
+      listener()
+
+      expect(shutdown1).toHaveBeenCalledTimes(1)
+      expect(shutdown2).toHaveBeenCalledTimes(1)
+      expect(shutdown3).toHaveBeenCalledTimes(1)
+    })
+
+    test("does not re-register signal handlers for subsequent managers", () => {
+      const manager1 = { shutdown: mockShutdown }
+      const manager2 = { shutdown: mockShutdown }
+      registeredManagers.push(manager1, manager2)
+
+      registerManagerForCleanup(manager1)
+      const callsAfterFirst = processOnCalls.length
+
+      registerManagerForCleanup(manager2)
+
+      expect(processOnCalls.length).toBe(callsAfterFirst)
+    })
+  })
+
+  describe("unregisterManagerForCleanup", () => {
+    test("removes signal handlers when last manager unregisters", () => {
+      const manager = { shutdown: mockShutdown }
+      registeredManagers.push(manager)
+
+      registerManagerForCleanup(manager)
+      unregisterManagerForCleanup(manager)
+      registeredManagers.length = 0
+
+      const offSignals = processOffCalls.map(([signal]) => signal)
+      expect(offSignals).toContain("SIGINT")
+      expect(offSignals).toContain("SIGTERM")
+      expect(offSignals).toContain("beforeExit")
+      expect(offSignals).toContain("exit")
+    })
+
+    test("keeps signal handlers when other managers remain", () => {
+      const manager1 = { shutdown: mockShutdown }
+      const manager2 = { shutdown: mockShutdown }
+      registeredManagers.push(manager1, manager2)
+
+      registerManagerForCleanup(manager1)
+      registerManagerForCleanup(manager2)
+
+      unregisterManagerForCleanup(manager2)
+
+      expect(processOffCalls.length).toBe(0)
+    })
+
+    test("remaining managers still get shutdown after partial unregister", () => {
+      const shutdown1 = mock(() => {})
+      const shutdown2 = mock(() => {})
+      const manager1 = { shutdown: shutdown1 }
+      const manager2 = { shutdown: shutdown2 }
+      registeredManagers.push(manager1, manager2)
+
+      registerManagerForCleanup(manager1)
+      registerManagerForCleanup(manager2)
+
+      const exitEntry = processOnCalls.find(([signal]) => signal === "exit")
+      expect(exitEntry).toBeDefined()
+      const [, listener] = exitEntry!
+      unregisterManagerForCleanup(manager2)
+
+      listener()
+
+      expect(shutdown1).toHaveBeenCalledTimes(1)
+      expect(shutdown2).not.toHaveBeenCalled()
+    })
+  })
+})
--- a/src/features/background-agent/process-cleanup.ts
+++ b/src/features/background-agent/process-cleanup.ts
@@ -0,0 +1,81 @@
+import { log } from "../../shared"
+
+type ProcessCleanupEvent = NodeJS.Signals | "beforeExit" | "exit"
+
+function registerProcessSignal(
+  signal: ProcessCleanupEvent,
+  handler: () => void,
+  exitAfter: boolean
+): () => void {
+  const listener = () => {
+    handler()
+    if (exitAfter) {
+      process.exitCode = 0
+      setTimeout(() => process.exit(), 6000).unref()
+    }
+  }
+  process.on(signal, listener)
+  return listener
+}
+
+interface CleanupTarget {
+  shutdown(): void
+}
+
+const cleanupManagers = new Set<CleanupTarget>()
+let cleanupRegistered = false
+const cleanupHandlers = new Map<ProcessCleanupEvent, () => void>()
+
+export function registerManagerForCleanup(manager: CleanupTarget): void {
+  cleanupManagers.add(manager)
+
+  if (cleanupRegistered) return
+  cleanupRegistered = true
+
+  const cleanupAll = () => {
+    for (const m of cleanupManagers) {
+      try {
+        m.shutdown()
+      } catch (error) {
+        log("[background-agent] Error during shutdown cleanup:", error)
+      }
+    }
+  }
+
+  const registerSignal = (signal: ProcessCleanupEvent, exitAfter: boolean): void => {
+    const listener = registerProcessSignal(signal, cleanupAll, exitAfter)
+    cleanupHandlers.set(signal, listener)
+  }
+
+  registerSignal("SIGINT", true)
+  registerSignal("SIGTERM", true)
+  if (process.platform === "win32") {
+    registerSignal("SIGBREAK", true)
+  }
+  registerSignal("beforeExit", false)
+  registerSignal("exit", false)
+}
+
+export function unregisterManagerForCleanup(manager: CleanupTarget): void {
+  cleanupManagers.delete(manager)
+
+  if (cleanupManagers.size > 0) return
+
+  for (const [signal, listener] of cleanupHandlers.entries()) {
+    process.off(signal, listener)
+  }
+  cleanupHandlers.clear()
+  cleanupRegistered = false
+}
+
+/** @internal — test-only reset for module-level singleton state */
+export function _resetForTesting(): void {
+  for (const manager of [...cleanupManagers]) {
+    cleanupManagers.delete(manager)
+  }
+  for (const [signal, listener] of cleanupHandlers.entries()) {
+    process.off(signal, listener)
+  }
+  cleanupHandlers.clear()
+  cleanupRegistered = false
+}
--- a/src/features/background-agent/result-handler-context.ts
+++ b/src/features/background-agent/result-handler-context.ts
@@ -1,9 +0,0 @@
-import type { OpencodeClient } from "./constants"
-import type { ConcurrencyManager } from "./concurrency"
-import type { TaskStateManager } from "./state"
-
-export interface ResultHandlerContext {
-  client: OpencodeClient
-  concurrencyManager: ConcurrencyManager
-  state: TaskStateManager
-}
--- a/src/features/background-agent/result-handler.ts
+++ b/src/features/background-agent/result-handler.ts
@@ -1,7 +0,0 @@
-export type { ResultHandlerContext } from "./result-handler-context"
-export { formatDuration } from "./duration-formatter"
-export { getMessageDir } from "../../shared"
-export { checkSessionTodos } from "./session-todo-checker"
-export { validateSessionHasOutput } from "./session-output-validator"
-export { tryCompleteTask } from "./background-task-completer"
-export { notifyParentSession } from "./parent-session-notifier"
--- a/src/features/background-agent/session-idle-event-handler.test.ts
+++ b/src/features/background-agent/session-idle-event-handler.test.ts
@@ -0,0 +1,340 @@
+import { describe, it, expect, mock } from "bun:test"
+
+import { handleSessionIdleBackgroundEvent } from "./session-idle-event-handler"
+import type { BackgroundTask } from "./types"
+import { MIN_IDLE_TIME_MS } from "./constants"
+
+function createRunningTask(overrides: Partial<BackgroundTask> = {}): BackgroundTask {
+  return {
+    id: "task-1",
+    sessionID: "ses-idle-1",
+    parentSessionID: "parent-ses-1",
+    parentMessageID: "msg-1",
+    description: "test idle handler",
+    prompt: "test",
+    agent: "explore",
+    status: "running",
+    startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 100)),
+    ...overrides,
+  }
+}
+
+describe("handleSessionIdleBackgroundEvent", () => {
+  describe("#given no sessionID in properties", () => {
+    it("#then should do nothing", () => {
+      //#given
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: {},
+        findBySession: () => undefined,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given non-string sessionID in properties", () => {
+    it("#then should do nothing", () => {
+      //#given
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: 123 },
+        findBySession: () => undefined,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given no task found for session", () => {
+    it("#then should do nothing", () => {
+      //#given
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: "ses-unknown" },
+        findBySession: () => undefined,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given task is not running", () => {
+    it("#then should do nothing", () => {
+      //#given
+      const task = createRunningTask({ status: "completed" })
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given task has no startedAt", () => {
+    it("#then should do nothing", () => {
+      //#given
+      const task = createRunningTask({ startedAt: undefined })
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given elapsed time < MIN_IDLE_TIME_MS", () => {
+    it("#when idle fires early #then should defer with timer", () => {
+      //#given
+      const realDateNow = Date.now
+      const baseNow = realDateNow()
+      const task = createRunningTask({ startedAt: new Date(baseNow) })
+      const idleDeferralTimers = new Map<string, ReturnType<typeof setTimeout>>()
+      const emitIdleEvent = mock(() => {})
+
+      try {
+        Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
+
+        //#when
+        handleSessionIdleBackgroundEvent({
+          properties: { sessionID: task.sessionID! },
+          findBySession: () => task,
+          idleDeferralTimers,
+          validateSessionHasOutput: () => Promise.resolve(true),
+          checkSessionTodos: () => Promise.resolve(false),
+          tryCompleteTask: () => Promise.resolve(true),
+          emitIdleEvent,
+        })
+
+        //#then
+        expect(idleDeferralTimers.has(task.id)).toBe(true)
+        expect(emitIdleEvent).not.toHaveBeenCalled()
+      } finally {
+        clearTimeout(idleDeferralTimers.get(task.id)!)
+        Date.now = realDateNow
+      }
+    })
+
+    it("#when idle already deferred #then should not create duplicate timer", () => {
+      //#given
+      const realDateNow = Date.now
+      const baseNow = realDateNow()
+      const task = createRunningTask({ startedAt: new Date(baseNow) })
+      const existingTimer = setTimeout(() => {}, 99999)
+      const idleDeferralTimers = new Map<string, ReturnType<typeof setTimeout>>([
+        [task.id, existingTimer],
+      ])
+      const emitIdleEvent = mock(() => {})
+
+      try {
+        Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
+
+        //#when
+        handleSessionIdleBackgroundEvent({
+          properties: { sessionID: task.sessionID! },
+          findBySession: () => task,
+          idleDeferralTimers,
+          validateSessionHasOutput: () => Promise.resolve(true),
+          checkSessionTodos: () => Promise.resolve(false),
+          tryCompleteTask: () => Promise.resolve(true),
+          emitIdleEvent,
+        })
+
+        //#then
+        expect(idleDeferralTimers.get(task.id)).toBe(existingTimer)
+      } finally {
+        clearTimeout(existingTimer)
+        Date.now = realDateNow
+      }
+    })
+
+    it("#when deferred timer fires #then should emit idle event", async () => {
+      //#given
+      const realDateNow = Date.now
+      const baseNow = realDateNow()
+      const task = createRunningTask({ startedAt: new Date(baseNow) })
+      const idleDeferralTimers = new Map<string, ReturnType<typeof setTimeout>>()
+      const emitIdleEvent = mock(() => {})
+      const remainingMs = 50
+
+      try {
+        Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
+
+        //#when
+        handleSessionIdleBackgroundEvent({
+          properties: { sessionID: task.sessionID! },
+          findBySession: () => task,
+          idleDeferralTimers,
+          validateSessionHasOutput: () => Promise.resolve(true),
+          checkSessionTodos: () => Promise.resolve(false),
+          tryCompleteTask: () => Promise.resolve(true),
+          emitIdleEvent,
+        })
+
+        //#then - wait for deferred timer
+        await new Promise((resolve) => setTimeout(resolve, remainingMs + 50))
+        expect(emitIdleEvent).toHaveBeenCalledWith(task.sessionID)
+        expect(idleDeferralTimers.has(task.id)).toBe(false)
+      } finally {
+        Date.now = realDateNow
+      }
+    })
+  })
+
+  describe("#given elapsed time >= MIN_IDLE_TIME_MS", () => {
+    it("#when session has valid output and no incomplete todos #then should complete task", async () => {
+      //#given
+      const task = createRunningTask()
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      await new Promise((resolve) => setTimeout(resolve, 10))
+      expect(tryCompleteTask).toHaveBeenCalledWith(task, "session.idle event")
+    })
+
+    it("#when session has no valid output #then should not complete task", async () => {
+      //#given
+      const task = createRunningTask()
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(false),
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      await new Promise((resolve) => setTimeout(resolve, 10))
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+
+    it("#when task has incomplete todos #then should not complete task", async () => {
+      //#given
+      const task = createRunningTask()
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: () => Promise.resolve(true),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      await new Promise((resolve) => setTimeout(resolve, 10))
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+
+    it("#when task status changes during validation #then should not complete task", async () => {
+      //#given
+      const task = createRunningTask()
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: async () => {
+          task.status = "completed"
+          return true
+        },
+        checkSessionTodos: () => Promise.resolve(false),
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      await new Promise((resolve) => setTimeout(resolve, 10))
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+
+    it("#when task status changes during todo check #then should not complete task", async () => {
+      //#given
+      const task = createRunningTask()
+      const tryCompleteTask = mock(() => Promise.resolve(true))
+
+      //#when
+      handleSessionIdleBackgroundEvent({
+        properties: { sessionID: task.sessionID! },
+        findBySession: () => task,
+        idleDeferralTimers: new Map(),
+        validateSessionHasOutput: () => Promise.resolve(true),
+        checkSessionTodos: async () => {
+          task.status = "cancelled"
+          return false
+        },
+        tryCompleteTask,
+        emitIdleEvent: () => {},
+      })
+
+      //#then
+      await new Promise((resolve) => setTimeout(resolve, 10))
+      expect(tryCompleteTask).not.toHaveBeenCalled()
+    })
+  })
+})
--- a/src/features/background-agent/session-output-validator.ts
+++ b/src/features/background-agent/session-output-validator.ts
@@ -1,89 +0,0 @@
-import type { OpencodeClient } from "./constants"
-import { log } from "../../shared"
-
-type SessionMessagePart = {
-  type?: string
-  text?: string
-  content?: unknown
-}
-
-function isObject(value: unknown): value is Record<string, unknown> {
-  return typeof value === "object" && value !== null
-}
-
-function getMessageRole(message: unknown): string | undefined {
-  if (!isObject(message)) return undefined
-  const info = message["info"]
-  if (!isObject(info)) return undefined
-  const role = info["role"]
-  return typeof role === "string" ? role : undefined
-}
-
-function getMessageParts(message: unknown): SessionMessagePart[] {
-  if (!isObject(message)) return []
-  const parts = message["parts"]
-  if (!Array.isArray(parts)) return []
-
-  return parts
-    .filter((part): part is SessionMessagePart => isObject(part))
-    .map((part) => ({
-      type: typeof part["type"] === "string" ? part["type"] : undefined,
-      text: typeof part["text"] === "string" ? part["text"] : undefined,
-      content: part["content"],
-    }))
-}
-
-function partHasContent(part: SessionMessagePart): boolean {
-  if (part.type === "text" || part.type === "reasoning") {
-    return Boolean(part.text && part.text.trim().length > 0)
-  }
-  if (part.type === "tool") return true
-  if (part.type === "tool_result") {
-    if (typeof part.content === "string") return part.content.trim().length > 0
-    if (Array.isArray(part.content)) return part.content.length > 0
-    return Boolean(part.content)
-  }
-  return false
-}
-
-export async function validateSessionHasOutput(
-  client: OpencodeClient,
-  sessionID: string
-): Promise<boolean> {
-  try {
-    const response = await client.session.messages({
-      path: { id: sessionID },
-    })
-
-    const messagesRaw =
-      isObject(response) && "data" in response ? (response as { data?: unknown }).data : response
-    const messages = Array.isArray(messagesRaw) ? messagesRaw : []
-
-    const hasAssistantOrToolMessage = messages.some((message) => {
-      const role = getMessageRole(message)
-      return role === "assistant" || role === "tool"
-    })
-
-    if (!hasAssistantOrToolMessage) {
-      log("[background-agent] No assistant/tool messages found in session:", sessionID)
-      return false
-    }
-
-    const hasContent = messages.some((message) => {
-      const role = getMessageRole(message)
-      if (role !== "assistant" && role !== "tool") return false
-      const parts = getMessageParts(message)
-      return parts.some(partHasContent)
-    })
-
-    if (!hasContent) {
-      log("[background-agent] Messages exist but no content found in session:", sessionID)
-      return false
-    }
-
-    return true
-  } catch (error) {
-    log("[background-agent] Error validating session output:", error)
-    return true
-  }
-}
--- a/src/features/background-agent/session-task-cleanup.ts
+++ b/src/features/background-agent/session-task-cleanup.ts
@@ -1,46 +0,0 @@
-import { subagentSessions } from "../claude-code-session-state"
-import type { BackgroundTask } from "./types"
-
-export function cleanupTaskAfterSessionEnds(args: {
-  task: BackgroundTask
-  tasks: Map<string, BackgroundTask>
-  idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>>
-  completionTimers: Map<string, ReturnType<typeof setTimeout>>
-  cleanupPendingByParent: (task: BackgroundTask) => void
-  clearNotificationsForTask: (taskId: string) => void
-  releaseConcurrencyKey?: (key: string) => void
-}): void {
-  const {
-    task,
-    tasks,
-    idleDeferralTimers,
-    completionTimers,
-    cleanupPendingByParent,
-    clearNotificationsForTask,
-    releaseConcurrencyKey,
-  } = args
-
-  const completionTimer = completionTimers.get(task.id)
-  if (completionTimer) {
-    clearTimeout(completionTimer)
-    completionTimers.delete(task.id)
-  }
-
-  const idleTimer = idleDeferralTimers.get(task.id)
-  if (idleTimer) {
-    clearTimeout(idleTimer)
-    idleDeferralTimers.delete(task.id)
-  }
-
-  if (task.concurrencyKey && releaseConcurrencyKey) {
-    releaseConcurrencyKey(task.concurrencyKey)
-    task.concurrencyKey = undefined
-  }
-
-  cleanupPendingByParent(task)
-  clearNotificationsForTask(task.id)
-  tasks.delete(task.id)
-  if (task.sessionID) {
-    subagentSessions.delete(task.sessionID)
-  }
-}
--- a/src/features/background-agent/session-todo-checker.ts
+++ b/src/features/background-agent/session-todo-checker.ts
@@ -1,33 +0,0 @@
-import type { OpencodeClient, Todo } from "./constants"
-
-function isTodo(value: unknown): value is Todo {
-  if (typeof value !== "object" || value === null) return false
-  const todo = value as Record<string, unknown>
-  return (
-    (typeof todo["id"] === "string" || todo["id"] === undefined) &&
-    typeof todo["content"] === "string" &&
-    typeof todo["status"] === "string" &&
-    typeof todo["priority"] === "string"
-  )
-}
-
-export async function checkSessionTodos(
-  client: OpencodeClient,
-  sessionID: string
-): Promise<boolean> {
-  try {
-    const response = await client.session.todo({
-      path: { id: sessionID },
-    })
-
-    const todosRaw = "data" in response ? response.data : response
-    if (!Array.isArray(todosRaw) || todosRaw.length === 0) return false
-
-    const incomplete = todosRaw
-      .filter(isTodo)
-      .filter((todo) => todo.status !== "completed" && todo.status !== "cancelled")
-    return incomplete.length > 0
-  } catch {
-    return false
-  }
-}
--- a/src/features/background-agent/spawner.ts
+++ b/src/features/background-agent/spawner.ts
@@ -61,9 +61,7 @@ export async function startTask(
  const createResult = await client.session.create({
    body: {
      parentID: input.parentSessionID,
-      title: `Background: ${input.description}`,
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    } as any,
+    } as Record<string, unknown>,
    query: {
      directory: parentDirectory,
    },
--- a/src/features/background-agent/spawner/background-session-creator.ts
+++ b/src/features/background-agent/spawner/background-session-creator.ts
@@ -1,45 +0,0 @@
-import type { OpencodeClient } from "../constants"
-import type { ConcurrencyManager } from "../concurrency"
-import type { LaunchInput } from "../types"
-import { log } from "../../../shared"
-
-export async function createBackgroundSession(options: {
-  client: OpencodeClient
-  input: LaunchInput
-  parentDirectory: string
-  concurrencyManager: ConcurrencyManager
-  concurrencyKey: string
-}): Promise<string> {
-  const { client, input, parentDirectory, concurrencyManager, concurrencyKey } = options
-
-  const body = {
-    parentID: input.parentSessionID,
-    title: `Background: ${input.description}`,
-  }
-
-  const createResult = await client.session
-    .create({
-      body,
-      query: {
-        directory: parentDirectory,
-      },
-    })
-    .catch((error: unknown) => {
-      concurrencyManager.release(concurrencyKey)
-      throw error
-    })
-
-  if (createResult.error) {
-    concurrencyManager.release(concurrencyKey)
-    throw new Error(`Failed to create background session: ${createResult.error}`)
-  }
-
-  if (!createResult.data?.id) {
-    concurrencyManager.release(concurrencyKey)
-    throw new Error("Failed to create background session: API returned no session ID")
-  }
-
-  const sessionID = createResult.data.id
-  log("[background-agent] Background session created", { sessionID })
-  return sessionID
-}
--- a/src/features/background-agent/spawner/concurrency-key-from-launch-input.ts
+++ b/src/features/background-agent/spawner/concurrency-key-from-launch-input.ts
@@ -1,7 +0,0 @@
-import type { LaunchInput } from "../types"
-
-export function getConcurrencyKeyFromLaunchInput(input: LaunchInput): string {
-  return input.model
-    ? `${input.model.providerID}/${input.model.modelID}`
-    : input.agent
-}
--- a/src/features/background-agent/spawner/spawner-context.ts
+++ b/src/features/background-agent/spawner/spawner-context.ts
@@ -1,12 +0,0 @@
-import type { BackgroundTask } from "../types"
-import type { ConcurrencyManager } from "../concurrency"
-import type { OpencodeClient, OnSubagentSessionCreated } from "../constants"
-
-export interface SpawnerContext {
-  client: OpencodeClient
-  directory: string
-  concurrencyManager: ConcurrencyManager
-  tmuxEnabled: boolean
-  onSubagentSessionCreated?: OnSubagentSessionCreated
-  onTaskError: (task: BackgroundTask, error: Error) => void
-}
--- a/src/features/background-agent/spawner/tmux-callback-invoker.ts
+++ b/src/features/background-agent/spawner/tmux-callback-invoker.ts
@@ -1,40 +0,0 @@
-import { setTimeout } from "timers/promises"
-import type { OnSubagentSessionCreated } from "../constants"
-import { TMUX_CALLBACK_DELAY_MS } from "../constants"
-import { log } from "../../../shared"
-import { isInsideTmux } from "../../../shared/tmux"
-
-export async function maybeInvokeTmuxCallback(options: {
-  onSubagentSessionCreated?: OnSubagentSessionCreated
-  tmuxEnabled: boolean
-  sessionID: string
-  parentID: string
-  title: string
-}): Promise<void> {
-  const { onSubagentSessionCreated, tmuxEnabled, sessionID, parentID, title } = options
-
-  log("[background-agent] tmux callback check", {
-    hasCallback: !!onSubagentSessionCreated,
-    tmuxEnabled,
-    isInsideTmux: isInsideTmux(),
-    sessionID,
-    parentID,
-  })
-
-  if (!onSubagentSessionCreated || !tmuxEnabled || !isInsideTmux()) {
-    log("[background-agent] SKIP tmux callback - conditions not met")
-    return
-  }
-
-  log("[background-agent] Invoking tmux callback NOW", { sessionID })
-  await onSubagentSessionCreated({
-    sessionID,
-    parentID,
-    title,
-  }).catch((error: unknown) => {
-    log("[background-agent] Failed to spawn tmux pane:", error)
-  })
-
-  log("[background-agent] tmux callback completed, waiting")
-  await setTimeout(TMUX_CALLBACK_DELAY_MS)
-}
--- a/src/features/claude-tasks/index.ts
+++ b/src/features/claude-tasks/index.ts
@@ -1,3 +0,0 @@
-export * from "./types"
-export * from "./storage"
-export * from "./session-storage"
--- a/src/features/mcp-oauth/index.ts
+++ b/src/features/mcp-oauth/index.ts
@@ -1,3 +0,0 @@
-export * from "./schema"
-export * from "./oauth-authorization-flow"
-export * from "./provider"
--- a/src/features/task-toast-manager/manager.ts
+++ b/src/features/task-toast-manager/manager.ts
@@ -4,6 +4,12 @@ import type { ConcurrencyManager } from "../background-agent/concurrency"

 type OpencodeClient = PluginInput["client"]

+type ClientWithTui = {
+  tui?: {
+    showToast: (opts: { body: { title: string; message: string; variant: string; duration: number } }) => Promise<unknown>
+  }
+}
+
 export class TaskToastManager {
  private tasks: Map<string, TrackedTask> = new Map()
  private client: OpencodeClient
@@ -170,8 +176,7 @@ export class TaskToastManager {
   * Show consolidated toast with all running/queued tasks
   */
  private showTaskListToast(newTask: TrackedTask): void {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const tuiClient = this.client as any
+    const tuiClient = this.client as ClientWithTui
    if (!tuiClient.tui?.showToast) return

    const message = this.buildTaskListMessage(newTask)
@@ -196,8 +201,7 @@ export class TaskToastManager {
   * Show task completion toast
   */
  showCompletionToast(task: { id: string; description: string; duration: string }): void {
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const tuiClient = this.client as any
+    const tuiClient = this.client as ClientWithTui
    if (!tuiClient.tui?.showToast) return

    this.removeTask(task.id)
--- a/src/hooks/anthropic-context-window-limit-recovery/executor.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/executor.ts
@@ -1,4 +1,5 @@
 import type { AutoCompactState } from "./types";
+import type { OhMyOpenCodeConfig } from "../../config";
 import type { ExperimentalConfig } from "../../config";
 import { TRUNCATE_CONFIG } from "./types";

@@ -15,15 +16,15 @@ export async function executeCompact(
  sessionID: string,
  msg: Record<string, unknown>,
  autoCompactState: AutoCompactState,
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  client: any,
+  client: Client,
  directory: string,
-  experimental?: ExperimentalConfig,
+  pluginConfig: OhMyOpenCodeConfig,
+  _experimental?: ExperimentalConfig
 ): Promise<void> {
-  void experimental
+  void _experimental

  if (autoCompactState.compactionInProgress.has(sessionID)) {
-    await (client as Client).tui
+    await client.tui
      .showToast({
        body: {
          title: "Compact In Progress",
@@ -55,7 +56,7 @@ export async function executeCompact(
      const result = await runAggressiveTruncationStrategy({
        sessionID,
        autoCompactState,
-        client: client as Client,
+        client: client,
        directory,
        truncateAttempt: truncateState.truncateAttempt,
        currentTokens: errorData.currentTokens,
@@ -70,8 +71,9 @@ export async function executeCompact(
      sessionID,
      msg,
      autoCompactState,
-      client: client as Client,
+      client: client,
      directory,
+      pluginConfig,
      errorType: errorData?.errorType,
      messageIndex: errorData?.messageIndex,
    })
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
@@ -1,6 +1,7 @@
 import type { PluginInput } from "@opencode-ai/plugin"
+import type { Client } from "./client"
 import type { AutoCompactState, ParsedTokenLimitError } from "./types"
-import type { ExperimentalConfig } from "../../config"
+import type { ExperimentalConfig, OhMyOpenCodeConfig } from "../../config"
 import { parseAnthropicTokenLimitError } from "./parser"
 import { executeCompact, getLastAssistant } from "./executor"
 import { attemptDeduplicationRecovery } from "./deduplication-recovery"
@@ -8,6 +9,7 @@ import { log } from "../../shared/logger"

 export interface AnthropicContextWindowLimitRecoveryOptions {
  experimental?: ExperimentalConfig
+  pluginConfig: OhMyOpenCodeConfig
 }

 function createRecoveryState(): AutoCompactState {
@@ -28,6 +30,7 @@ export function createAnthropicContextWindowLimitRecoveryHook(
 ) {
  const autoCompactState = createRecoveryState()
  const experimental = options?.experimental
+  const pluginConfig = options?.pluginConfig!
  const pendingCompactionTimeoutBySession = new Map<string, ReturnType<typeof setTimeout>>()

  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
@@ -89,8 +92,9 @@ export function createAnthropicContextWindowLimitRecoveryHook(
            sessionID,
            { providerID, modelID },
            autoCompactState,
-            ctx.client,
+            ctx.client as Client,
            ctx.directory,
+            pluginConfig,
            experimental,
          )
        }, 300)
@@ -156,8 +160,9 @@ export function createAnthropicContextWindowLimitRecoveryHook(
        sessionID,
        { providerID, modelID },
        autoCompactState,
-        ctx.client,
+        ctx.client as Client,
        ctx.directory,
+        pluginConfig,
        experimental,
      )
    }
--- a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
@@ -1,16 +1,19 @@
 import type { AutoCompactState } from "./types"
+import type { OhMyOpenCodeConfig } from "../../config"
 import { RETRY_CONFIG } from "./types"
 import type { Client } from "./client"
 import { clearSessionState, getEmptyContentAttempt, getOrCreateRetryState } from "./state"
 import { sanitizeEmptyMessagesBeforeSummarize } from "./message-builder"
 import { fixEmptyMessages } from "./empty-content-recovery"

+import { resolveCompactionModel } from "../shared/compaction-model-resolver"
 export async function runSummarizeRetryStrategy(params: {
  sessionID: string
  msg: Record<string, unknown>
  autoCompactState: AutoCompactState
  client: Client
  directory: string
+  pluginConfig: OhMyOpenCodeConfig
  errorType?: string
  messageIndex?: number
 }): Promise<void> {
@@ -74,7 +77,14 @@ export async function runSummarizeRetryStrategy(params: {
          })
          .catch(() => {})

-        const summarizeBody = { providerID, modelID, auto: true }
+        const { providerID: targetProviderID, modelID: targetModelID } = resolveCompactionModel(
+          params.pluginConfig,
+          params.sessionID,
+          providerID,
+          modelID
+        )
+
+        const summarizeBody = { providerID: targetProviderID, modelID: targetModelID, auto: true }
        await params.client.session.summarize({
          path: { id: params.sessionID },
          body: summarizeBody as never,
--- a/src/hooks/atlas/system-reminder-templates.ts
+++ b/src/hooks/atlas/system-reminder-templates.ts
@@ -104,6 +104,65 @@ ALL three must be YES. "Probably" = NO. "I think so" = NO. Investigate until CER

 **DO NOT proceed to the next task until all 4 phases are complete and the gate passes.**`

+export const VERIFICATION_REMINDER_GEMINI = `**THE SUBAGENT HAS FINISHED. THEIR WORK IS EXTREMELY SUSPICIOUS.**
+
+The subagent CLAIMS this task is done. Based on thousands of executions, subagent claims are FALSE more often than true.
+They ROUTINELY:
+- Ship code with syntax errors they didn't bother to check
+- Create stub implementations with TODOs and call it "done"
+- Write tests that pass trivially (testing nothing meaningful)
+- Implement logic that does NOT match what was requested
+- Add features nobody asked for and call it "improvement"
+- Report "all tests pass" when they didn't run any tests
+
+**This is NOT a theoretical warning. This WILL happen on this task. Assume the work is BROKEN.**
+
+**YOU MUST VERIFY WITH ACTUAL TOOL CALLS. NOT REASONING. TOOL CALLS.**
+Thinking "it looks correct" is NOT verification. Running \`lsp_diagnostics\` IS.
+
+---
+
+**PHASE 1: READ THE CODE FIRST (DO NOT SKIP — DO NOT RUN TESTS YET)**
+
+Read the code FIRST so you know what you're testing.
+
+1. \`Bash("git diff --stat")\` — see exactly which files changed.
+2. \`Read\` EVERY changed file — no exceptions, no skimming.
+3. For EACH file:
+   - Does this code ACTUALLY do what the task required? RE-READ the task spec.
+   - Any stubs, TODOs, placeholders? \`Grep\` for TODO, FIXME, HACK, xxx
+   - Anti-patterns? \`Grep\` for \`as any\`, \`@ts-ignore\`, empty catch
+   - Scope creep? Did the subagent add things NOT in the task spec?
+4. Cross-check EVERY claim against actual code.
+
+**If you cannot explain what every changed line does, GO BACK AND READ AGAIN.**
+
+**PHASE 2: RUN AUTOMATED CHECKS**
+
+1. \`lsp_diagnostics\` on EACH changed file — ZERO new errors. ACTUALLY RUN THIS.
+2. Run tests for changed modules, then full suite. ACTUALLY RUN THESE.
+3. Build/typecheck — exit 0.
+
+If Phase 1 found issues but Phase 2 passes: Phase 2 is WRONG. Fix the code.
+
+**PHASE 3: HANDS-ON QA (MANDATORY for user-facing changes)**
+
+- **Frontend/UI**: \`/playwright\`
+- **TUI/CLI**: \`interactive_bash\`
+- **API/Backend**: \`Bash\` with curl
+
+**If user-facing and you did not run it, you are shipping UNTESTED BROKEN work.**
+
+**PHASE 4: GATE DECISION**
+
+1. Can I explain what EVERY changed line does? (If no → Phase 1)
+2. Did I SEE it work via tool calls? (If user-facing and no → Phase 3)
+3. Am I confident nothing is broken? (If no → broader tests)
+
+ALL three must be YES. "Probably" = NO. "I think so" = NO.
+
+**DO NOT proceed to the next task until all 4 phases are complete.**`
+
 export const ORCHESTRATOR_DELEGATION_REQUIRED = `

 ---
--- a/src/hooks/auto-update-checker/hook.test.ts
+++ b/src/hooks/auto-update-checker/hook.test.ts
@@ -1,4 +1,4 @@
-import { afterEach, describe, it, expect, mock } from "bun:test"
+import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"

 const mockShowConfigErrorsIfAny = mock(async () => {})
 const mockShowModelCacheWarningIfNeeded = mock(async () => {})
@@ -7,7 +7,7 @@ const mockShowLocalDevToast = mock(async () => {})
 const mockShowVersionToast = mock(async () => {})
 const mockRunBackgroundUpdateCheck = mock(async () => {})
 const mockGetCachedVersion = mock(() => "3.6.0")
-const mockGetLocalDevVersion = mock(() => "3.6.0")
+const mockGetLocalDevVersion = mock<(directory: string) => string | null>(() => null)

 mock.module("./hook/config-errors-toast", () => ({
  showConfigErrorsIfAny: mockShowConfigErrorsIfAny,
@@ -40,31 +40,49 @@ mock.module("../../shared/logger", () => ({
  log: () => {},
 }))

-const { createAutoUpdateCheckerHook } = await import("./hook")
+type HookFactory = typeof import("./hook").createAutoUpdateCheckerHook
+
+async function importFreshHookFactory(): Promise<HookFactory> {
+  const hookModule = await import(`./hook?test-${Date.now()}-${Math.random()}`)
+  return hookModule.createAutoUpdateCheckerHook
+}
+
+function createPluginInput() {
+  return {
+    directory: "/test",
+    client: {} as never,
+  } as never
+}
+
+beforeEach(() => {
+  mockShowConfigErrorsIfAny.mockClear()
+  mockShowModelCacheWarningIfNeeded.mockClear()
+  mockUpdateAndShowConnectedProvidersCacheStatus.mockClear()
+  mockShowLocalDevToast.mockClear()
+  mockShowVersionToast.mockClear()
+  mockRunBackgroundUpdateCheck.mockClear()
+  mockGetCachedVersion.mockClear()
+  mockGetLocalDevVersion.mockClear()
+
+  mockGetCachedVersion.mockReturnValue("3.6.0")
+  mockGetLocalDevVersion.mockReturnValue(null)
+})

 afterEach(() => {
  delete process.env.OPENCODE_CLI_RUN_MODE
-  mock.restore()
 })

 describe("createAutoUpdateCheckerHook", () => {
  it("skips startup toasts and checks in CLI run mode", async () => {
    //#given - CLI run mode enabled
    process.env.OPENCODE_CLI_RUN_MODE = "true"
-    mockShowConfigErrorsIfAny.mockClear()
-    mockShowModelCacheWarningIfNeeded.mockClear()
-    mockUpdateAndShowConnectedProvidersCacheStatus.mockClear()
-    mockShowLocalDevToast.mockClear()
-    mockShowVersionToast.mockClear()
-    mockRunBackgroundUpdateCheck.mockClear()
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()

-    const hook = createAutoUpdateCheckerHook(
-      {
-        directory: "/test",
-        client: {} as never,
-      } as never,
-      { showStartupToast: true, isSisyphusEnabled: true, autoUpdate: true }
-    )
+    const hook = createAutoUpdateCheckerHook(createPluginInput(), {
+      showStartupToast: true,
+      isSisyphusEnabled: true,
+      autoUpdate: true,
+    })

    //#when - session.created event arrives
    hook.event({
@@ -73,7 +91,7 @@ describe("createAutoUpdateCheckerHook", () => {
        properties: { info: { parentID: undefined } },
      },
    })
-    await new Promise((resolve) => setTimeout(resolve, 25))
+    await new Promise((resolve) => setTimeout(resolve, 50))

    //#then - no update checker side effects run
    expect(mockShowConfigErrorsIfAny).not.toHaveBeenCalled()
@@ -82,6 +100,144 @@ describe("createAutoUpdateCheckerHook", () => {
    expect(mockShowLocalDevToast).not.toHaveBeenCalled()
    expect(mockShowVersionToast).not.toHaveBeenCalled()
    expect(mockRunBackgroundUpdateCheck).not.toHaveBeenCalled()
+  })

+  it("runs all startup checks on normal session.created", async () => {
+    //#given - normal mode and no local dev version
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()
+    const hook = createAutoUpdateCheckerHook(createPluginInput())
+
+    //#when - session.created event arrives on primary session
+    hook.event({
+      event: {
+        type: "session.created",
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 50))
+
+    //#then - startup checks, toast, and background check run
+    expect(mockShowConfigErrorsIfAny).toHaveBeenCalledTimes(1)
+    expect(mockUpdateAndShowConnectedProvidersCacheStatus).toHaveBeenCalledTimes(1)
+    expect(mockShowModelCacheWarningIfNeeded).toHaveBeenCalledTimes(1)
+    expect(mockShowVersionToast).toHaveBeenCalledTimes(1)
+    expect(mockRunBackgroundUpdateCheck).toHaveBeenCalledTimes(1)
+  })
+
+  it("ignores subagent sessions (parentID present)", async () => {
+    //#given - a subagent session with parentID
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()
+    const hook = createAutoUpdateCheckerHook(createPluginInput())
+
+    //#when - session.created event contains parentID
+    hook.event({
+      event: {
+        type: "session.created",
+        properties: { info: { parentID: "parent-123" } },
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 50))
+
+    //#then - no startup actions run
+    expect(mockShowConfigErrorsIfAny).not.toHaveBeenCalled()
+    expect(mockUpdateAndShowConnectedProvidersCacheStatus).not.toHaveBeenCalled()
+    expect(mockShowModelCacheWarningIfNeeded).not.toHaveBeenCalled()
+    expect(mockShowLocalDevToast).not.toHaveBeenCalled()
+    expect(mockShowVersionToast).not.toHaveBeenCalled()
+    expect(mockRunBackgroundUpdateCheck).not.toHaveBeenCalled()
+  })
+
+  it("runs only once (hasChecked guard)", async () => {
+    //#given - one hook instance in normal mode
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()
+    const hook = createAutoUpdateCheckerHook(createPluginInput())
+
+    //#when - session.created event is fired twice
+    hook.event({
+      event: {
+        type: "session.created",
+      },
+    })
+    hook.event({
+      event: {
+        type: "session.created",
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 50))
+
+    //#then - side effects execute only once
+    expect(mockShowConfigErrorsIfAny).toHaveBeenCalledTimes(1)
+    expect(mockUpdateAndShowConnectedProvidersCacheStatus).toHaveBeenCalledTimes(1)
+    expect(mockShowModelCacheWarningIfNeeded).toHaveBeenCalledTimes(1)
+    expect(mockShowVersionToast).toHaveBeenCalledTimes(1)
+    expect(mockRunBackgroundUpdateCheck).toHaveBeenCalledTimes(1)
+  })
+
+  it("shows localDevToast when local dev version exists", async () => {
+    //#given - local dev version is present
+    mockGetLocalDevVersion.mockReturnValue("3.6.0-dev")
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()
+    const hook = createAutoUpdateCheckerHook(createPluginInput())
+
+    //#when - session.created event arrives
+    hook.event({
+      event: {
+        type: "session.created",
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 50))
+
+    //#then - local dev toast is shown and background check is skipped
+    expect(mockShowConfigErrorsIfAny).toHaveBeenCalledTimes(1)
+    expect(mockUpdateAndShowConnectedProvidersCacheStatus).toHaveBeenCalledTimes(1)
+    expect(mockShowModelCacheWarningIfNeeded).toHaveBeenCalledTimes(1)
+    expect(mockShowLocalDevToast).toHaveBeenCalledTimes(1)
+    expect(mockShowVersionToast).not.toHaveBeenCalled()
+    expect(mockRunBackgroundUpdateCheck).not.toHaveBeenCalled()
+  })
+
+  it("ignores non-session.created events", async () => {
+    //#given - a hook instance in normal mode
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()
+    const hook = createAutoUpdateCheckerHook(createPluginInput())
+
+    //#when - a non-session.created event arrives
+    hook.event({
+      event: {
+        type: "session.deleted",
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 50))
+
+    //#then - no startup actions run
+    expect(mockShowConfigErrorsIfAny).not.toHaveBeenCalled()
+    expect(mockUpdateAndShowConnectedProvidersCacheStatus).not.toHaveBeenCalled()
+    expect(mockShowModelCacheWarningIfNeeded).not.toHaveBeenCalled()
+    expect(mockShowLocalDevToast).not.toHaveBeenCalled()
+    expect(mockShowVersionToast).not.toHaveBeenCalled()
+    expect(mockRunBackgroundUpdateCheck).not.toHaveBeenCalled()
+  })
+
+  it("passes correct toast message with sisyphus enabled", async () => {
+    //#given - sisyphus mode enabled
+    const createAutoUpdateCheckerHook = await importFreshHookFactory()
+    const hook = createAutoUpdateCheckerHook(createPluginInput(), {
+      isSisyphusEnabled: true,
+    })
+
+    //#when - session.created event arrives
+    hook.event({
+      event: {
+        type: "session.created",
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 50))
+
+    //#then - startup toast includes sisyphus wording
+    expect(mockShowVersionToast).toHaveBeenCalledTimes(1)
+    expect(mockShowVersionToast).toHaveBeenCalledWith(
+      expect.anything(),
+      "3.6.0",
+      expect.stringContaining("Sisyphus")
+    )
  })
 })
--- a/src/hooks/auto-update-checker/hook/background-update-check.test.ts
+++ b/src/hooks/auto-update-checker/hook/background-update-check.test.ts
@@ -1,177 +1,208 @@
-import { describe, it, expect, mock, beforeEach } from "bun:test"
+import type { PluginInput } from "@opencode-ai/plugin"
+import { beforeEach, describe, expect, it, mock } from "bun:test"

-// Mock modules before importing
-const mockFindPluginEntry = mock(() => null as any)
-const mockGetCachedVersion = mock(() => null as string | null)
-const mockGetLatestVersion = mock(async () => null as string | null)
-const mockUpdatePinnedVersion = mock(() => false)
+type PluginEntry = {
+  entry: string
+  isPinned: boolean
+  pinnedVersion: string | null
+  configPath: string
+}
+
+type ToastMessageGetter = (isUpdate: boolean, version?: string) => string
+
+function createPluginEntry(overrides?: Partial<PluginEntry>): PluginEntry {
+  return {
+    entry: "oh-my-opencode@3.4.0",
+    isPinned: false,
+    pinnedVersion: null,
+    configPath: "/test/opencode.json",
+    ...overrides,
+  }
+}
+
+const mockFindPluginEntry = mock((_directory: string): PluginEntry | null => createPluginEntry())
+const mockGetCachedVersion = mock((): string | null => "3.4.0")
+const mockGetLatestVersion = mock(async (): Promise<string | null> => "3.5.0")
 const mockExtractChannel = mock(() => "latest")
 const mockInvalidatePackage = mock(() => {})
 const mockRunBunInstall = mock(async () => true)
-const mockShowUpdateAvailableToast = mock(async () => {})
-const mockShowAutoUpdatedToast = mock(async () => {})
+const mockShowUpdateAvailableToast = mock(
+  async (_ctx: PluginInput, _latestVersion: string, _getToastMessage: ToastMessageGetter): Promise<void> => {}
+)
+const mockShowAutoUpdatedToast = mock(
+  async (_ctx: PluginInput, _fromVersion: string, _toVersion: string): Promise<void> => {}
+)

 mock.module("../checker", () => ({
  findPluginEntry: mockFindPluginEntry,
  getCachedVersion: mockGetCachedVersion,
  getLatestVersion: mockGetLatestVersion,
-  updatePinnedVersion: mockUpdatePinnedVersion,
  revertPinnedVersion: mock(() => false),
 }))
-
-mock.module("../version-channel", () => ({
-  extractChannel: mockExtractChannel,
-}))
-
-mock.module("../cache", () => ({
-  invalidatePackage: mockInvalidatePackage,
-}))
-
-mock.module("../../../cli/config-manager", () => ({
-  runBunInstall: mockRunBunInstall,
-}))
-
+mock.module("../version-channel", () => ({ extractChannel: mockExtractChannel }))
+mock.module("../cache", () => ({ invalidatePackage: mockInvalidatePackage }))
+mock.module("../../../cli/config-manager", () => ({ runBunInstall: mockRunBunInstall }))
 mock.module("./update-toasts", () => ({
  showUpdateAvailableToast: mockShowUpdateAvailableToast,
  showAutoUpdatedToast: mockShowAutoUpdatedToast,
 }))
+mock.module("../../../shared/logger", () => ({ log: () => {} }))

-mock.module("../../../shared/logger", () => ({
-  log: () => {},
-}))
-
-const { runBackgroundUpdateCheck } = await import("./background-update-check?test")
+const modulePath = "./background-update-check?test"
+const { runBackgroundUpdateCheck } = await import(modulePath)

 describe("runBackgroundUpdateCheck", () => {
-  const mockCtx = { directory: "/test" } as any
-  const mockGetToastMessage = (isUpdate: boolean, version?: string) =>
+  const mockCtx = { directory: "/test" } as PluginInput
+  const getToastMessage: ToastMessageGetter = (isUpdate, version) =>
    isUpdate ? `Update to ${version}` : "Up to date"

  beforeEach(() => {
    mockFindPluginEntry.mockReset()
    mockGetCachedVersion.mockReset()
    mockGetLatestVersion.mockReset()
-    mockUpdatePinnedVersion.mockReset()
    mockExtractChannel.mockReset()
    mockInvalidatePackage.mockReset()
    mockRunBunInstall.mockReset()
    mockShowUpdateAvailableToast.mockReset()
    mockShowAutoUpdatedToast.mockReset()

+    mockFindPluginEntry.mockReturnValue(createPluginEntry())
+    mockGetCachedVersion.mockReturnValue("3.4.0")
+    mockGetLatestVersion.mockResolvedValue("3.5.0")
    mockExtractChannel.mockReturnValue("latest")
    mockRunBunInstall.mockResolvedValue(true)
  })

-  describe("#given user has pinned a specific version", () => {
-    beforeEach(() => {
-      mockFindPluginEntry.mockReturnValue({
-        entry: "oh-my-opencode@3.4.0",
-        isPinned: true,
-        pinnedVersion: "3.4.0",
-        configPath: "/test/opencode.json",
-      })
-      mockGetCachedVersion.mockReturnValue("3.4.0")
-      mockGetLatestVersion.mockResolvedValue("3.5.0")
-    })
-
-    it("#then should NOT call updatePinnedVersion", async () => {
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
-      expect(mockUpdatePinnedVersion).not.toHaveBeenCalled()
-    })
-
-    it("#then should show manual-update toast message", async () => {
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
-      expect(mockShowUpdateAvailableToast).toHaveBeenCalledTimes(1)
-
-      const [toastContext, latestVersion, getToastMessage] = mockShowUpdateAvailableToast.mock.calls[0] ?? []
-      expect(toastContext).toBe(mockCtx)
-      expect(latestVersion).toBe("3.5.0")
-      expect(typeof getToastMessage).toBe("function")
-      expect(getToastMessage(true, "3.5.0")).toBe("Update available: 3.5.0 (version pinned, update manually)")
-    })
-
-    it("#then should NOT run bun install", async () => {
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
+  describe("#given no plugin entry found", () => {
+    it("returns early without showing any toast", async () => {
+      //#given
+      mockFindPluginEntry.mockReturnValue(null)
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockFindPluginEntry).toHaveBeenCalledTimes(1)
+      expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
+      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
      expect(mockRunBunInstall).not.toHaveBeenCalled()
    })
-
-    it("#then should NOT invalidate package cache", async () => {
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
-      expect(mockInvalidatePackage).not.toHaveBeenCalled()
-    })
  })

-  describe("#given user has NOT pinned a version (unpinned)", () => {
-    beforeEach(() => {
-      mockFindPluginEntry.mockReturnValue({
-        entry: "oh-my-opencode",
-        isPinned: false,
-        pinnedVersion: null,
-        configPath: "/test/opencode.json",
-      })
-      mockGetCachedVersion.mockReturnValue("3.4.0")
-      mockGetLatestVersion.mockResolvedValue("3.5.0")
-    })
-
-    it("#then should proceed with auto-update", async () => {
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
-      expect(mockInvalidatePackage).toHaveBeenCalled()
-      expect(mockRunBunInstall).toHaveBeenCalled()
-    })
-
-    it("#then should show auto-updated toast on success", async () => {
-      mockRunBunInstall.mockResolvedValue(true)
-
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
-      expect(mockShowAutoUpdatedToast).toHaveBeenCalled()
-    })
-  })
-
-  describe("#given autoUpdate is false", () => {
-    beforeEach(() => {
-      mockFindPluginEntry.mockReturnValue({
-        entry: "oh-my-opencode",
-        isPinned: false,
-        pinnedVersion: null,
-        configPath: "/test/opencode.json",
-      })
-      mockGetCachedVersion.mockReturnValue("3.4.0")
-      mockGetLatestVersion.mockResolvedValue("3.5.0")
-    })
-
-    it("#then should only show notification toast", async () => {
-      await runBackgroundUpdateCheck(mockCtx, false, mockGetToastMessage)
-
-      expect(mockShowUpdateAvailableToast).toHaveBeenCalled()
-      expect(mockRunBunInstall).not.toHaveBeenCalled()
-      expect(mockUpdatePinnedVersion).not.toHaveBeenCalled()
-    })
-  })
-
-  describe("#given already on latest version", () => {
-    beforeEach(() => {
-      mockFindPluginEntry.mockReturnValue({
-        entry: "oh-my-opencode@3.5.0",
-        isPinned: true,
-        pinnedVersion: "3.5.0",
-        configPath: "/test/opencode.json",
-      })
-      mockGetCachedVersion.mockReturnValue("3.5.0")
-      mockGetLatestVersion.mockResolvedValue("3.5.0")
-    })
-
-    it("#then should not update or show toast", async () => {
-      await runBackgroundUpdateCheck(mockCtx, true, mockGetToastMessage)
-
-      expect(mockUpdatePinnedVersion).not.toHaveBeenCalled()
+  describe("#given no version available", () => {
+    it("returns early when neither cached nor pinned version exists", async () => {
+      //#given
+      mockFindPluginEntry.mockReturnValue(createPluginEntry({ entry: "oh-my-opencode" }))
+      mockGetCachedVersion.mockReturnValue(null)
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockGetCachedVersion).toHaveBeenCalledTimes(1)
+      expect(mockGetLatestVersion).not.toHaveBeenCalled()
      expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
    })
  })
+
+  describe("#given latest version fetch fails", () => {
+    it("returns early without toasts", async () => {
+      //#given
+      mockGetLatestVersion.mockResolvedValue(null)
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockGetLatestVersion).toHaveBeenCalledWith("latest")
+      expect(mockRunBunInstall).not.toHaveBeenCalled()
+      expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
+      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given already on latest version", () => {
+    it("returns early without any action", async () => {
+      //#given
+      mockGetCachedVersion.mockReturnValue("3.4.0")
+      mockGetLatestVersion.mockResolvedValue("3.4.0")
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockGetLatestVersion).toHaveBeenCalledTimes(1)
+      expect(mockRunBunInstall).not.toHaveBeenCalled()
+      expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
+      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given update available with autoUpdate disabled", () => {
+    it("shows update notification but does not install", async () => {
+      //#given
+      const autoUpdate = false
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, autoUpdate, getToastMessage)
+      //#then
+      expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
+      expect(mockRunBunInstall).not.toHaveBeenCalled()
+      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given user has pinned a specific version", () => {
+    it("shows pinned-version toast without auto-updating", async () => {
+      //#given
+      mockFindPluginEntry.mockReturnValue(createPluginEntry({ isPinned: true, pinnedVersion: "3.4.0" }))
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockShowUpdateAvailableToast).toHaveBeenCalledTimes(1)
+      expect(mockRunBunInstall).not.toHaveBeenCalled()
+      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
+    })
+
+    it("toast message mentions version pinned", async () => {
+      //#given
+      let capturedToastMessage: ToastMessageGetter | undefined
+      mockFindPluginEntry.mockReturnValue(createPluginEntry({ isPinned: true, pinnedVersion: "3.4.0" }))
+      mockShowUpdateAvailableToast.mockImplementation(
+        async (_ctx: PluginInput, _latestVersion: string, toastMessage: ToastMessageGetter) => {
+          capturedToastMessage = toastMessage
+        }
+      )
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockShowUpdateAvailableToast).toHaveBeenCalledTimes(1)
+      expect(capturedToastMessage).toBeDefined()
+      if (!capturedToastMessage) {
+        throw new Error("toast message callback missing")
+      }
+      const message = capturedToastMessage(true, "3.5.0")
+      expect(message).toContain("version pinned")
+      expect(message).not.toBe("Update to 3.5.0")
+    })
+  })
+
+  describe("#given unpinned with auto-update and install succeeds", () => {
+    it("invalidates cache, installs, and shows auto-updated toast", async () => {
+      //#given
+      mockRunBunInstall.mockResolvedValue(true)
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockInvalidatePackage).toHaveBeenCalledTimes(1)
+      expect(mockRunBunInstall).toHaveBeenCalledTimes(1)
+      expect(mockShowAutoUpdatedToast).toHaveBeenCalledWith(mockCtx, "3.4.0", "3.5.0")
+      expect(mockShowUpdateAvailableToast).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given unpinned with auto-update and install fails", () => {
+    it("falls back to notification-only toast", async () => {
+      //#given
+      mockRunBunInstall.mockResolvedValue(false)
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+      //#then
+      expect(mockRunBunInstall).toHaveBeenCalledTimes(1)
+      expect(mockShowUpdateAvailableToast).toHaveBeenCalledWith(mockCtx, "3.5.0", getToastMessage)
+      expect(mockShowAutoUpdatedToast).not.toHaveBeenCalled()
+    })
+  })
 })
--- a/src/hooks/directory-agents-injector/injector.test.ts
+++ b/src/hooks/directory-agents-injector/injector.test.ts
@@ -1,161 +1,204 @@
-import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"
+import { randomUUID } from "node:crypto"
 import { mkdirSync, rmSync, writeFileSync } from "node:fs"
 import { tmpdir } from "node:os"
 import { join } from "node:path"
+import type { PluginInput } from "@opencode-ai/plugin"
+import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"

-const findAgentsMdUpMock = mock((_: { startDir: string; rootDir: string }) => [] as string[])
-const resolveFilePathMock = mock((_: string, path: string) => path)
-const loadInjectedPathsMock = mock((_: string) => new Set<string>())
-const saveInjectedPathsMock = mock((_: string, __: Set<string>) => {})
+const storageMaps = new Map<string, Set<string>>()
+
+mock.module("./constants", () => ({
+  AGENTS_INJECTOR_STORAGE: "/tmp/directory-agents-injector-tests",
+  AGENTS_FILENAME: "AGENTS.md",
+}))
+
+mock.module("./storage", () => ({
+  loadInjectedPaths: (sessionID: string) => storageMaps.get(sessionID) ?? new Set<string>(),
+  saveInjectedPaths: (sessionID: string, paths: Set<string>) => {
+    storageMaps.set(sessionID, paths)
+  },
+  clearInjectedPaths: (sessionID: string) => {
+    storageMaps.delete(sessionID)
+  },
+}))
+
+const truncator = {
+  truncate: async (_sessionID: string, content: string) => ({ result: content, truncated: false }),
+  getUsage: async (_sessionID: string) => null,
+  truncateSync: (output: string, _maxTokens: number, _preserveHeaderLines?: number) => ({
+    result: output,
+    truncated: false,
+  }),
+}

 describe("processFilePathForAgentsInjection", () => {
  let testRoot = ""
+  let srcDirectory = ""
+  let componentsDirectory = ""
+
+  const rootAgentsContent = "# ROOT AGENTS\nroot-level directives"
+  const srcAgentsContent = "# SRC AGENTS\nsrc-level directives"
+  const componentsAgentsContent = "# COMPONENT AGENTS\ncomponents-level directives"

  beforeEach(() => {
-    findAgentsMdUpMock.mockClear()
-    resolveFilePathMock.mockClear()
-    loadInjectedPathsMock.mockClear()
-    saveInjectedPathsMock.mockClear()
+    storageMaps.clear()

-    testRoot = join(
-      tmpdir(),
-      `directory-agents-injector-${Date.now()}-${Math.random().toString(16).slice(2)}`
-    )
-    mkdirSync(testRoot, { recursive: true })
+    testRoot = join(tmpdir(), `directory-agents-injector-${randomUUID()}`)
+    srcDirectory = join(testRoot, "src")
+    componentsDirectory = join(srcDirectory, "components")
+
+    mkdirSync(componentsDirectory, { recursive: true })
+    writeFileSync(join(testRoot, "AGENTS.md"), rootAgentsContent)
+    writeFileSync(join(srcDirectory, "AGENTS.md"), srcAgentsContent)
+    writeFileSync(join(componentsDirectory, "AGENTS.md"), componentsAgentsContent)
+    writeFileSync(join(componentsDirectory, "button.ts"), "export const button = true\n")
+    writeFileSync(join(srcDirectory, "file.ts"), "export const sourceFile = true\n")
+    writeFileSync(join(testRoot, "file.ts"), "export const rootFile = true\n")
  })

  afterEach(() => {
-    mock.restore()
    rmSync(testRoot, { recursive: true, force: true })
  })

-  it("does not save when all discovered paths are already cached", async () => {
-    //#given
-    const sessionID = "session-1"
-    const repoRoot = join(testRoot, "repo")
-    const agentsPath = join(repoRoot, "src", "AGENTS.md")
-    const cachedDirectory = join(repoRoot, "src")
-    mkdirSync(join(repoRoot, "src"), { recursive: true })
-    writeFileSync(agentsPath, "# AGENTS")
-
-    loadInjectedPathsMock.mockReturnValueOnce(new Set([cachedDirectory]))
-    findAgentsMdUpMock.mockReturnValueOnce([agentsPath])
-
-    const truncator = {
-      truncate: mock(async () => ({ result: "trimmed", truncated: false })),
-    }
-
-    mock.module("./finder", () => ({
-      findAgentsMdUp: findAgentsMdUpMock,
-      resolveFilePath: resolveFilePathMock,
-    }))
-    mock.module("./storage", () => ({
-      loadInjectedPaths: loadInjectedPathsMock,
-      saveInjectedPaths: saveInjectedPathsMock,
-    }))
-
+  it("injects AGENTS.md content from file's parent directory into output", async () => {
+    // given
    const { processFilePathForAgentsInjection } = await import("./injector")
+    const output = { title: "Read result", output: "base output", metadata: {} }

-    //#when
+    // when
    await processFilePathForAgentsInjection({
-      ctx: { directory: repoRoot } as never,
-      truncator: truncator as never,
+      ctx: { directory: testRoot } as PluginInput,
+      truncator,
      sessionCaches: new Map(),
-      filePath: join(repoRoot, "src", "file.ts"),
-      sessionID,
-      output: { title: "Result", output: "", metadata: {} },
+      filePath: join(srcDirectory, "file.ts"),
+      sessionID: "session-parent",
+      output,
    })

-    //#then
-    expect(saveInjectedPathsMock).not.toHaveBeenCalled()
+    // then
+    expect(output.output).toContain("[Directory Context:")
+    expect(output.output).toContain(srcAgentsContent)
  })

-  it("saves when a new path is injected", async () => {
-    //#given
-    const sessionID = "session-2"
-    const repoRoot = join(testRoot, "repo")
-    const agentsPath = join(repoRoot, "src", "AGENTS.md")
-    const injectedDirectory = join(repoRoot, "src")
-    mkdirSync(join(repoRoot, "src"), { recursive: true })
-    writeFileSync(agentsPath, "# AGENTS")
-
-    loadInjectedPathsMock.mockReturnValueOnce(new Set())
-    findAgentsMdUpMock.mockReturnValueOnce([agentsPath])
-
-    const truncator = {
-      truncate: mock(async () => ({ result: "trimmed", truncated: false })),
-    }
-
-    mock.module("./finder", () => ({
-      findAgentsMdUp: findAgentsMdUpMock,
-      resolveFilePath: resolveFilePathMock,
-    }))
-    mock.module("./storage", () => ({
-      loadInjectedPaths: loadInjectedPathsMock,
-      saveInjectedPaths: saveInjectedPathsMock,
-    }))
-
+  it("skips root-level AGENTS.md", async () => {
+    // given
+    rmSync(join(srcDirectory, "AGENTS.md"), { force: true })
+    rmSync(join(componentsDirectory, "AGENTS.md"), { force: true })
    const { processFilePathForAgentsInjection } = await import("./injector")
+    const output = { title: "Read result", output: "base output", metadata: {} }

-    //#when
+    // when
    await processFilePathForAgentsInjection({
-      ctx: { directory: repoRoot } as never,
-      truncator: truncator as never,
+      ctx: { directory: testRoot } as PluginInput,
+      truncator,
      sessionCaches: new Map(),
-      filePath: join(repoRoot, "src", "file.ts"),
-      sessionID,
-      output: { title: "Result", output: "", metadata: {} },
+      filePath: join(testRoot, "file.ts"),
+      sessionID: "session-root-skip",
+      output,
    })

-    //#then
-    expect(saveInjectedPathsMock).toHaveBeenCalledTimes(1)
-    const saveCall = saveInjectedPathsMock.mock.calls[0]
-    expect(saveCall[0]).toBe(sessionID)
-    expect((saveCall[1] as Set<string>).has(injectedDirectory)).toBe(true)
+    // then
+    expect(output.output).not.toContain(rootAgentsContent)
+    expect(output.output).not.toContain("[Directory Context:")
  })

-  it("saves once when cached and new paths are mixed", async () => {
-    //#given
-    const sessionID = "session-3"
-    const repoRoot = join(testRoot, "repo")
-    const cachedAgentsPath = join(repoRoot, "already-cached", "AGENTS.md")
-    const newAgentsPath = join(repoRoot, "new-dir", "AGENTS.md")
-    mkdirSync(join(repoRoot, "already-cached"), { recursive: true })
-    mkdirSync(join(repoRoot, "new-dir"), { recursive: true })
-    writeFileSync(cachedAgentsPath, "# AGENTS")
-    writeFileSync(newAgentsPath, "# AGENTS")
-
-    loadInjectedPathsMock.mockReturnValueOnce(new Set([join(repoRoot, "already-cached")]))
-    findAgentsMdUpMock.mockReturnValueOnce([cachedAgentsPath, newAgentsPath])
-
-    const truncator = {
-      truncate: mock(async () => ({ result: "trimmed", truncated: false })),
-    }
-
-    mock.module("./finder", () => ({
-      findAgentsMdUp: findAgentsMdUpMock,
-      resolveFilePath: resolveFilePathMock,
-    }))
-    mock.module("./storage", () => ({
-      loadInjectedPaths: loadInjectedPathsMock,
-      saveInjectedPaths: saveInjectedPathsMock,
-    }))
-
+  it("injects multiple AGENTS.md when walking up directory tree", async () => {
+    // given
    const { processFilePathForAgentsInjection } = await import("./injector")
+    const output = { title: "Read result", output: "base output", metadata: {} }

-    //#when
+    // when
    await processFilePathForAgentsInjection({
-      ctx: { directory: repoRoot } as never,
-      truncator: truncator as never,
+      ctx: { directory: testRoot } as PluginInput,
+      truncator,
      sessionCaches: new Map(),
-      filePath: join(repoRoot, "new-dir", "file.ts"),
-      sessionID,
-      output: { title: "Result", output: "", metadata: {} },
+      filePath: join(componentsDirectory, "button.ts"),
+      sessionID: "session-multiple",
+      output,
    })

-    //#then
-    expect(saveInjectedPathsMock).toHaveBeenCalledTimes(1)
-    const saveCall = saveInjectedPathsMock.mock.calls[0]
-    expect((saveCall[1] as Set<string>).has(join(repoRoot, "new-dir"))).toBe(true)
+    // then
+    expect(output.output).toContain(srcAgentsContent)
+    expect(output.output).toContain(componentsAgentsContent)
+  })
+
+  it("does not re-inject already cached directories", async () => {
+    // given
+    const { processFilePathForAgentsInjection } = await import("./injector")
+    const sessionCaches = new Map<string, Set<string>>()
+    const output = { title: "Read result", output: "base output", metadata: {} }
+
+    // when
+    await processFilePathForAgentsInjection({
+      ctx: { directory: testRoot } as PluginInput,
+      truncator,
+      sessionCaches,
+      filePath: join(componentsDirectory, "button.ts"),
+      sessionID: "session-cache",
+      output,
+    })
+    const outputAfterFirstCall = output.output
+    await processFilePathForAgentsInjection({
+      ctx: { directory: testRoot } as PluginInput,
+      truncator,
+      sessionCaches,
+      filePath: join(componentsDirectory, "button.ts"),
+      sessionID: "session-cache",
+      output,
+    })
+
+    // then
+    expect(output.output).toBe(outputAfterFirstCall)
+    expect(output.output.split("[Directory Context:").length - 1).toBe(2)
+  })
+
+  it("shows truncation notice when content is truncated", async () => {
+    // given
+    const { processFilePathForAgentsInjection } = await import("./injector")
+    const output = { title: "Read result", output: "base output", metadata: {} }
+    const truncatedTruncator = {
+      truncate: async (_sessionID: string, _content: string) => ({
+        result: "truncated...",
+        truncated: true,
+      }),
+      getUsage: async (_sessionID: string) => null,
+      truncateSync: (output: string, _maxTokens: number, _preserveHeaderLines?: number) => ({
+        result: output,
+        truncated: false,
+      }),
+    }
+
+    // when
+    await processFilePathForAgentsInjection({
+      ctx: { directory: testRoot } as PluginInput,
+      truncator: truncatedTruncator,
+      sessionCaches: new Map(),
+      filePath: join(srcDirectory, "file.ts"),
+      sessionID: "session-truncated",
+      output,
+    })
+
+    // then
+    expect(output.output).toContain("truncated...")
+    expect(output.output).toContain("[Note: Content was truncated")
+  })
+
+  it("does nothing when filePath cannot be resolved", async () => {
+    // given
+    const { processFilePathForAgentsInjection } = await import("./injector")
+    const output = { title: "Read result", output: "base output", metadata: {} }
+
+    // when
+    await processFilePathForAgentsInjection({
+      ctx: { directory: testRoot } as PluginInput,
+      truncator,
+      sessionCaches: new Map(),
+      filePath: "",
+      sessionID: "session-empty-path",
+      output,
+    })
+
+    // then
+    expect(output.output).toBe("base output")
  })
 })
--- a/src/hooks/directory-readme-injector/injector.test.ts
+++ b/src/hooks/directory-readme-injector/injector.test.ts
@@ -1,161 +1,212 @@
 import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"
+import { randomUUID } from "node:crypto"
 import { mkdirSync, rmSync, writeFileSync } from "node:fs"
 import { tmpdir } from "node:os"
 import { join } from "node:path"

-const findReadmeMdUpMock = mock((_: { startDir: string; rootDir: string }) => [] as string[])
-const resolveFilePathMock = mock((_: string, path: string) => path)
-const loadInjectedPathsMock = mock((_: string) => new Set<string>())
-const saveInjectedPathsMock = mock((_: string, __: Set<string>) => {})
+import type { PluginInput } from "@opencode-ai/plugin"
+
+const storageMaps = new Map<string, Set<string>>()
+
+mock.module("./storage", () => ({
+  loadInjectedPaths: (sessionID: string) => storageMaps.get(sessionID) ?? new Set<string>(),
+  saveInjectedPaths: (sessionID: string, paths: Set<string>) => {
+    storageMaps.set(sessionID, paths)
+  },
+}))
+
+function createPluginContext(directory: string): PluginInput {
+  return { directory } as PluginInput
+}
+
+function countReadmeMarkers(output: string): number {
+  return output.split("[Project README:").length - 1
+}
+
+function createTruncator(input?: { truncated?: boolean; result?: string }) {
+  return {
+    truncate: async (_sessionID: string, content: string) => ({
+      result: input?.result ?? content,
+      truncated: input?.truncated ?? false,
+    }),
+    getUsage: async (_sessionID: string) => null,
+    truncateSync: (output: string) => ({ result: output, truncated: false }),
+  }
+}

 describe("processFilePathForReadmeInjection", () => {
  let testRoot = ""

  beforeEach(() => {
-    findReadmeMdUpMock.mockClear()
-    resolveFilePathMock.mockClear()
-    loadInjectedPathsMock.mockClear()
-    saveInjectedPathsMock.mockClear()
-
-    testRoot = join(
-      tmpdir(),
-      `directory-readme-injector-${Date.now()}-${Math.random().toString(16).slice(2)}`
-    )
+    testRoot = join(tmpdir(), `directory-readme-injector-${randomUUID()}`)
    mkdirSync(testRoot, { recursive: true })
+    storageMaps.clear()
  })

  afterEach(() => {
-    mock.restore()
    rmSync(testRoot, { recursive: true, force: true })
+    storageMaps.clear()
  })

-  it("does not save when all discovered paths are already cached", async () => {
-    //#given
-    const sessionID = "session-1"
-    const repoRoot = join(testRoot, "repo")
-    const readmePath = join(repoRoot, "src", "README.md")
-    const cachedDirectory = join(repoRoot, "src")
-    mkdirSync(join(repoRoot, "src"), { recursive: true })
-    writeFileSync(readmePath, "# README")
-
-    loadInjectedPathsMock.mockReturnValueOnce(new Set([cachedDirectory]))
-    findReadmeMdUpMock.mockReturnValueOnce([readmePath])
-
-    const truncator = {
-      truncate: mock(async () => ({ result: "trimmed", truncated: false })),
-    }
-
-    mock.module("./finder", () => ({
-      findReadmeMdUp: findReadmeMdUpMock,
-      resolveFilePath: resolveFilePathMock,
-    }))
-    mock.module("./storage", () => ({
-      loadInjectedPaths: loadInjectedPathsMock,
-      saveInjectedPaths: saveInjectedPathsMock,
-    }))
+  it("injects README.md content from file's parent directory into output", async () => {
+    // given
+    const sourceDirectory = join(testRoot, "src")
+    mkdirSync(sourceDirectory, { recursive: true })
+    writeFileSync(join(sourceDirectory, "README.md"), "# Source README\nlocal context")

    const { processFilePathForReadmeInjection } = await import("./injector")
+    const output = { title: "Result", output: "base", metadata: {} }
+    const truncator = createTruncator()

-    //#when
+    // when
    await processFilePathForReadmeInjection({
-      ctx: { directory: repoRoot } as never,
-      truncator: truncator as never,
-      sessionCaches: new Map(),
-      filePath: join(repoRoot, "src", "file.ts"),
-      sessionID,
-      output: { title: "Result", output: "", metadata: {} },
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches: new Map<string, Set<string>>(),
+      filePath: join(sourceDirectory, "file.ts"),
+      sessionID: "session-parent",
+      output,
    })

-    //#then
-    expect(saveInjectedPathsMock).not.toHaveBeenCalled()
+    // then
+    expect(output.output).toContain("[Project README:")
+    expect(output.output).toContain("# Source README")
+    expect(output.output).toContain("local context")
  })

-  it("saves when a new path is injected", async () => {
-    //#given
-    const sessionID = "session-2"
-    const repoRoot = join(testRoot, "repo")
-    const readmePath = join(repoRoot, "src", "README.md")
-    const injectedDirectory = join(repoRoot, "src")
-    mkdirSync(join(repoRoot, "src"), { recursive: true })
-    writeFileSync(readmePath, "# README")
-
-    loadInjectedPathsMock.mockReturnValueOnce(new Set())
-    findReadmeMdUpMock.mockReturnValueOnce([readmePath])
-
-    const truncator = {
-      truncate: mock(async () => ({ result: "trimmed", truncated: false })),
-    }
-
-    mock.module("./finder", () => ({
-      findReadmeMdUp: findReadmeMdUpMock,
-      resolveFilePath: resolveFilePathMock,
-    }))
-    mock.module("./storage", () => ({
-      loadInjectedPaths: loadInjectedPathsMock,
-      saveInjectedPaths: saveInjectedPathsMock,
-    }))
+  it("includes root-level README.md (unlike agents-injector)", async () => {
+    // given
+    writeFileSync(join(testRoot, "README.md"), "# Root README\nroot context")

    const { processFilePathForReadmeInjection } = await import("./injector")
+    const output = { title: "Result", output: "", metadata: {} }
+    const truncator = createTruncator()

-    //#when
+    // when
    await processFilePathForReadmeInjection({
-      ctx: { directory: repoRoot } as never,
-      truncator: truncator as never,
-      sessionCaches: new Map(),
-      filePath: join(repoRoot, "src", "file.ts"),
-      sessionID,
-      output: { title: "Result", output: "", metadata: {} },
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches: new Map<string, Set<string>>(),
+      filePath: join(testRoot, "file.ts"),
+      sessionID: "session-root",
+      output,
    })

-    //#then
-    expect(saveInjectedPathsMock).toHaveBeenCalledTimes(1)
-    const saveCall = saveInjectedPathsMock.mock.calls[0]
-    expect(saveCall[0]).toBe(sessionID)
-    expect((saveCall[1] as Set<string>).has(injectedDirectory)).toBe(true)
+    // then
+    expect(output.output).toContain("[Project README:")
+    expect(output.output).toContain("# Root README")
+    expect(output.output).toContain("root context")
  })

-  it("saves once when cached and new paths are mixed", async () => {
-    //#given
-    const sessionID = "session-3"
-    const repoRoot = join(testRoot, "repo")
-    const cachedReadmePath = join(repoRoot, "already-cached", "README.md")
-    const newReadmePath = join(repoRoot, "new-dir", "README.md")
-    mkdirSync(join(repoRoot, "already-cached"), { recursive: true })
-    mkdirSync(join(repoRoot, "new-dir"), { recursive: true })
-    writeFileSync(cachedReadmePath, "# README")
-    writeFileSync(newReadmePath, "# README")
-
-    loadInjectedPathsMock.mockReturnValueOnce(new Set([join(repoRoot, "already-cached")]))
-    findReadmeMdUpMock.mockReturnValueOnce([cachedReadmePath, newReadmePath])
-
-    const truncator = {
-      truncate: mock(async () => ({ result: "trimmed", truncated: false })),
-    }
-
-    mock.module("./finder", () => ({
-      findReadmeMdUp: findReadmeMdUpMock,
-      resolveFilePath: resolveFilePathMock,
-    }))
-    mock.module("./storage", () => ({
-      loadInjectedPaths: loadInjectedPathsMock,
-      saveInjectedPaths: saveInjectedPathsMock,
-    }))
+  it("injects multiple README.md when walking up directory tree", async () => {
+    // given
+    const sourceDirectory = join(testRoot, "src")
+    const componentsDirectory = join(sourceDirectory, "components")
+    mkdirSync(componentsDirectory, { recursive: true })
+    writeFileSync(join(testRoot, "README.md"), "# Root README")
+    writeFileSync(join(sourceDirectory, "README.md"), "# Src README")
+    writeFileSync(join(componentsDirectory, "README.md"), "# Components README")
+    writeFileSync(join(componentsDirectory, "button.ts"), "export const button = true")

    const { processFilePathForReadmeInjection } = await import("./injector")
+    const output = { title: "Result", output: "", metadata: {} }
+    const truncator = createTruncator()

-    //#when
+    // when
    await processFilePathForReadmeInjection({
-      ctx: { directory: repoRoot } as never,
-      truncator: truncator as never,
-      sessionCaches: new Map(),
-      filePath: join(repoRoot, "new-dir", "file.ts"),
-      sessionID,
-      output: { title: "Result", output: "", metadata: {} },
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches: new Map<string, Set<string>>(),
+      filePath: join(componentsDirectory, "button.ts"),
+      sessionID: "session-multi",
+      output,
    })

-    //#then
-    expect(saveInjectedPathsMock).toHaveBeenCalledTimes(1)
-    const saveCall = saveInjectedPathsMock.mock.calls[0]
-    expect((saveCall[1] as Set<string>).has(join(repoRoot, "new-dir"))).toBe(true)
+    // then
+    expect(countReadmeMarkers(output.output)).toBe(3)
+    expect(output.output).toContain("# Root README")
+    expect(output.output).toContain("# Src README")
+    expect(output.output).toContain("# Components README")
+  })
+
+  it("does not re-inject already cached directories", async () => {
+    // given
+    const sourceDirectory = join(testRoot, "src")
+    mkdirSync(sourceDirectory, { recursive: true })
+    writeFileSync(join(sourceDirectory, "README.md"), "# Source README")
+
+    const { processFilePathForReadmeInjection } = await import("./injector")
+    const sessionCaches = new Map<string, Set<string>>()
+    const sessionID = "session-cache"
+    const truncator = createTruncator()
+    const firstOutput = { title: "Result", output: "", metadata: {} }
+    const secondOutput = { title: "Result", output: "", metadata: {} }
+
+    // when
+    await processFilePathForReadmeInjection({
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches,
+      filePath: join(sourceDirectory, "a.ts"),
+      sessionID,
+      output: firstOutput,
+    })
+    await processFilePathForReadmeInjection({
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches,
+      filePath: join(sourceDirectory, "b.ts"),
+      sessionID,
+      output: secondOutput,
+    })
+
+    // then
+    expect(countReadmeMarkers(firstOutput.output)).toBe(1)
+    expect(secondOutput.output).toBe("")
+  })
+
+  it("shows truncation notice when content is truncated", async () => {
+    // given
+    const sourceDirectory = join(testRoot, "src")
+    mkdirSync(sourceDirectory, { recursive: true })
+    writeFileSync(join(sourceDirectory, "README.md"), "# Truncated README")
+
+    const { processFilePathForReadmeInjection } = await import("./injector")
+    const output = { title: "Result", output: "", metadata: {} }
+    const truncator = createTruncator({ result: "trimmed content", truncated: true })
+
+    // when
+    await processFilePathForReadmeInjection({
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches: new Map<string, Set<string>>(),
+      filePath: join(sourceDirectory, "file.ts"),
+      sessionID: "session-truncated",
+      output,
+    })
+
+    // then
+    expect(output.output).toContain("trimmed content")
+    expect(output.output).toContain("[Note: Content was truncated")
+  })
+
+  it("does nothing when filePath cannot be resolved", async () => {
+    // given
+    const { processFilePathForReadmeInjection } = await import("./injector")
+    const output = { title: "Result", output: "unchanged", metadata: {} }
+    const truncator = createTruncator()
+
+    // when
+    await processFilePathForReadmeInjection({
+      ctx: createPluginContext(testRoot),
+      truncator,
+      sessionCaches: new Map<string, Set<string>>(),
+      filePath: "",
+      sessionID: "session-empty-path",
+      output,
+    })
+
+    // then
+    expect(output.output).toBe("unchanged")
  })
 })
--- a/src/hooks/hashline-edit-diff-enhancer/index.test.ts
+++ b/src/hooks/hashline-edit-diff-enhancer/index.test.ts
@@ -1,306 +0,0 @@
-import { describe, test, expect, beforeEach } from "bun:test"
-import { createHashlineEditDiffEnhancerHook } from "./hook"
-
-function makeInput(tool: string, callID = "call-1", sessionID = "ses-1") {
-	return { tool, sessionID, callID }
-}
-
-function makeBeforeOutput(args: Record<string, unknown>) {
-	return { args }
-}
-
-function makeAfterOutput(overrides?: Partial<{ title: string; output: string; metadata: Record<string, unknown> }>) {
-	return {
-		title: overrides?.title ?? "",
-		output: overrides?.output ?? "Successfully applied 1 edit(s)",
-		metadata: overrides?.metadata ?? { truncated: false },
-	}
-}
-
-type FileDiffMetadata = {
-	file: string
-	path: string
-	before: string
-	after: string
-	additions: number
-	deletions: number
-}
-
-describe("hashline-edit-diff-enhancer", () => {
-	let hook: ReturnType<typeof createHashlineEditDiffEnhancerHook>
-
-	beforeEach(() => {
-		hook = createHashlineEditDiffEnhancerHook({ hashline_edit: { enabled: true } })
-	})
-
-	describe("tool.execute.before", () => {
-		test("captures old file content for write tool", async () => {
-			const filePath = import.meta.dir + "/index.test.ts"
-			const input = makeInput("write")
-			const output = makeBeforeOutput({ path: filePath, edits: [] })
-
-			await hook["tool.execute.before"](input, output)
-
-			// given the hook ran without error, the old content should be stored internally
-			// we verify in the after hook test that it produces filediff
-		})
-
-		test("ignores non-write tools", async () => {
-			const input = makeInput("read")
-			const output = makeBeforeOutput({ path: "/some/file.ts" })
-
-			// when - should not throw
-			await hook["tool.execute.before"](input, output)
-		})
-	})
-
-	describe("tool.execute.after", () => {
-		test("injects filediff metadata after write tool execution", async () => {
-			// given - a temp file that we can modify between before/after
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-test-${Date.now()}.ts`
-			const oldContent = "line 1\nline 2\nline 3\n"
-			await Bun.write(tmpFile, oldContent)
-
-			const input = makeInput("write", "call-diff-1")
-			const beforeOutput = makeBeforeOutput({ path: tmpFile, edits: [] })
-
-			// when - before hook captures old content
-			await hook["tool.execute.before"](input, beforeOutput)
-
-			// when - file is modified (simulating write execution)
-			const newContent = "line 1\nmodified line 2\nline 3\nnew line 4\n"
-			await Bun.write(tmpFile, newContent)
-
-			// when - after hook computes filediff
-			const afterOutput = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput)
-
-			// then - metadata should contain filediff
-			const filediff = afterOutput.metadata.filediff as {
-				file: string
-				path: string
-				before: string
-				after: string
-				additions: number
-				deletions: number
-			}
-			expect(filediff).toBeDefined()
-			expect(filediff.file).toBe(tmpFile)
-			expect(filediff.path).toBe(tmpFile)
-			expect(filediff.before).toBe(oldContent)
-			expect(filediff.after).toBe(newContent)
-			expect(filediff.additions).toBeGreaterThan(0)
-			expect(filediff.deletions).toBeGreaterThan(0)
-
-			// then - title should be set to the file path
-			expect(afterOutput.title).toBe(tmpFile)
-
-			// cleanup
-			await Bun.file(tmpFile).exists() && (await import("fs/promises")).unlink(tmpFile)
-		})
-
-		test("does nothing for non-write tools", async () => {
-			const input = makeInput("read", "call-other")
-			const afterOutput = makeAfterOutput()
-			const originalMetadata = { ...afterOutput.metadata }
-
-			await hook["tool.execute.after"](input, afterOutput)
-
-			// then - metadata unchanged
-			expect(afterOutput.metadata).toEqual(originalMetadata)
-		})
-
-		test("does nothing when no before capture exists", async () => {
-			// given - no before hook was called for this callID
-			const input = makeInput("write", "call-no-before")
-			const afterOutput = makeAfterOutput()
-			const originalMetadata = { ...afterOutput.metadata }
-
-			await hook["tool.execute.after"](input, afterOutput)
-
-			// then - metadata unchanged (no filediff injected)
-			expect(afterOutput.metadata.filediff).toBeUndefined()
-		})
-
-		test("cleans up stored content after consumption", async () => {
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-cleanup-${Date.now()}.ts`
-			await Bun.write(tmpFile, "original")
-
-			const input = makeInput("write", "call-cleanup")
-			await hook["tool.execute.before"](input, makeBeforeOutput({ path: tmpFile }))
-			await Bun.write(tmpFile, "modified")
-
-			// when - first after call consumes
-			const afterOutput1 = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput1)
-			expect(afterOutput1.metadata.filediff).toBeDefined()
-
-			// when - second after call finds nothing
-			const afterOutput2 = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput2)
-			expect(afterOutput2.metadata.filediff).toBeUndefined()
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-
-		test("handles file creation (empty old content)", async () => {
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-create-${Date.now()}.ts`
-
-			// given - file doesn't exist during before hook
-			const input = makeInput("write", "call-create")
-			await hook["tool.execute.before"](input, makeBeforeOutput({ path: tmpFile }))
-
-			// when - file created during write
-			await Bun.write(tmpFile, "new content\n")
-
-			const afterOutput = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput)
-
-			// then - filediff shows creation (before is empty)
-			const filediff = afterOutput.metadata.filediff as FileDiffMetadata
-			expect(filediff).toBeDefined()
-			expect(filediff.before).toBe("")
-			expect(filediff.after).toBe("new content\n")
-			expect(filediff.additions).toBeGreaterThan(0)
-			expect(filediff.deletions).toBe(0)
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-	})
-
-	describe("disabled config", () => {
-		test("does nothing when hashline_edit is disabled", async () => {
-			const disabledHook = createHashlineEditDiffEnhancerHook({ hashline_edit: { enabled: false } })
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-disabled-${Date.now()}.ts`
-			await Bun.write(tmpFile, "content")
-
-			const input = makeInput("write", "call-disabled")
-			await disabledHook["tool.execute.before"](input, makeBeforeOutput({ path: tmpFile }))
-			await Bun.write(tmpFile, "modified")
-
-			const afterOutput = makeAfterOutput()
-			await disabledHook["tool.execute.after"](input, afterOutput)
-
-			// then - no filediff injected
-			expect(afterOutput.metadata.filediff).toBeUndefined()
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-	})
-
-	describe("write tool support", () => {
-		test("captures filediff for write tool (path arg)", async () => {
-			//#given - a temp file
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-write-${Date.now()}.ts`
-			const oldContent = "line 1\nline 2\n"
-			await Bun.write(tmpFile, oldContent)
-
-			const input = makeInput("write", "call-write-1")
-			const beforeOutput = makeBeforeOutput({ path: tmpFile })
-
-			//#when - before hook captures old content
-			await hook["tool.execute.before"](input, beforeOutput)
-
-			//#when - file is written
-			const newContent = "line 1\nmodified line 2\nnew line 3\n"
-			await Bun.write(tmpFile, newContent)
-
-			//#when - after hook computes filediff
-			const afterOutput = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput)
-
-			//#then - metadata should contain filediff
-			const filediff = afterOutput.metadata.filediff as { file: string; before: string; after: string; additions: number; deletions: number }
-			expect(filediff).toBeDefined()
-			expect(filediff.file).toBe(tmpFile)
-			expect(filediff.additions).toBeGreaterThan(0)
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-
-		test("captures filediff for write tool (filePath arg)", async () => {
-			//#given
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-write-fp-${Date.now()}.ts`
-			await Bun.write(tmpFile, "original content\n")
-
-			const input = makeInput("write", "call-write-fp")
-
-			//#when - before hook uses filePath arg
-			await hook["tool.execute.before"](input, makeBeforeOutput({ filePath: tmpFile }))
-			await Bun.write(tmpFile, "new content\n")
-
-			const afterOutput = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput)
-
-			//#then
-			const filediff = afterOutput.metadata.filediff as FileDiffMetadata | undefined
-			expect(filediff).toBeDefined()
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-	})
-
-	describe("raw content in filediff", () => {
-		test("filediff.before and filediff.after are raw file content", async () => {
-			//#given - a temp file
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-diff-format-${Date.now()}.ts`
-			const oldContent = "const x = 1\nconst y = 2\n"
-			await Bun.write(tmpFile, oldContent)
-
-			const input = makeInput("write", "call-hashline-format")
-			await hook["tool.execute.before"](input, makeBeforeOutput({ path: tmpFile }))
-
-			//#when - file is modified and after hook runs
-			const newContent = "const x = 1\nconst y = 42\n"
-			await Bun.write(tmpFile, newContent)
-
-			const afterOutput = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput)
-
-			//#then - before and after should be raw file content
-			const filediff = afterOutput.metadata.filediff as { before: string; after: string }
-			expect(filediff.before).toBe(oldContent)
-			expect(filediff.after).toBe(newContent)
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-	})
-
-	describe("TUI diff support (metadata.diff)", () => {
-		test("injects unified diff string in metadata.diff for write tool TUI", async () => {
-			//#given - a temp file
-			const tmpDir = (await import("os")).tmpdir()
-			const tmpFile = `${tmpDir}/hashline-tui-diff-${Date.now()}.ts`
-			const oldContent = "line 1\nline 2\nline 3\n"
-			await Bun.write(tmpFile, oldContent)
-
-			const input = makeInput("write", "call-tui-diff")
-			await hook["tool.execute.before"](input, makeBeforeOutput({ path: tmpFile }))
-
-			//#when - file is modified
-			const newContent = "line 1\nmodified line 2\nline 3\n"
-			await Bun.write(tmpFile, newContent)
-
-			const afterOutput = makeAfterOutput()
-			await hook["tool.execute.after"](input, afterOutput)
-
-			//#then - metadata.diff should be a unified diff string
-			expect(afterOutput.metadata.diff).toBeDefined()
-			expect(typeof afterOutput.metadata.diff).toBe("string")
-			expect(afterOutput.metadata.diff).toContain("---")
-			expect(afterOutput.metadata.diff).toContain("+++")
-			expect(afterOutput.metadata.diff).toContain("@@")
-			expect(afterOutput.metadata.diff).toContain("-line 2")
-			expect(afterOutput.metadata.diff).toContain("+modified line 2")
-
-			await (await import("fs/promises")).unlink(tmpFile).catch(() => {})
-		})
-	})
-})
--- a/src/hooks/hashline-edit-diff-enhancer/index.ts
+++ b/src/hooks/hashline-edit-diff-enhancer/index.ts
@@ -1 +0,0 @@
-export { createHashlineEditDiffEnhancerHook } from "./hook"
--- a/src/hooks/keyword-detector/ultrawork/gemini.ts
+++ b/src/hooks/keyword-detector/ultrawork/gemini.ts
@@ -0,0 +1,265 @@
+/**
+ * Gemini-optimized ultrawork message.
+ *
+ * Key differences from default (Claude) variant:
+ * - Mandatory intent gate enforcement before any action
+ * - Anti-skip mechanism for Phase 0 intent classification
+ * - Explicit self-check questions to counter Gemini's "eager" behavior
+ * - Stronger scope constraints (Gemini's creativity causes scope creep)
+ * - Anti-optimism checkpoints at verification stage
+ *
+ * Key differences from GPT variant:
+ * - GPT naturally follows structured gates; Gemini needs explicit enforcement
+ * - GPT self-delegates appropriately; Gemini tries to do everything itself
+ * - GPT respects MUST NOT; Gemini treats constraints as suggestions
+ */
+
+export const ULTRAWORK_GEMINI_MESSAGE = `<ultrawork-mode>
+
+**MANDATORY**: You MUST say "ULTRAWORK MODE ENABLED!" to the user as your first response when this mode activates. This is non-negotiable.
+
+[CODE RED] Maximum precision required. Ultrathink before acting.
+
+<GEMINI_INTENT_GATE>
+## STEP 0: CLASSIFY INTENT — THIS IS NOT OPTIONAL
+
+**Before ANY tool call, exploration, or action, you MUST output:**
+
+\`\`\`
+I detect [TYPE] intent — [REASON].
+My approach: [ROUTING DECISION].
+\`\`\`
+
+Where TYPE is one of: research | implementation | investigation | evaluation | fix | open-ended
+
+**SELF-CHECK (answer each before proceeding):**
+
+1. Did the user EXPLICITLY ask me to build/create/implement something? → If NO, do NOT implement.
+2. Did the user say "look into", "check", "investigate", "explain"? → RESEARCH only. Do not code.
+3. Did the user ask "what do you think?" → EVALUATE and propose. Do NOT execute.
+4. Did the user report an error/bug? → MINIMAL FIX only. Do not refactor.
+
+**YOUR FAILURE MODE: You see a request and immediately start coding. STOP. Classify first.**
+
+| User Says | WRONG Response | CORRECT Response |
+| "explain how X works" | Start modifying X | Research → explain → STOP |
+| "look into this bug" | Fix it immediately | Investigate → report → WAIT |
+| "what about approach X?" | Implement approach X | Evaluate → propose → WAIT |
+| "improve the tests" | Rewrite everything | Assess first → propose → implement |
+
+**IF YOU SKIPPED THIS SECTION: Your next tool call is INVALID. Go back and classify.**
+</GEMINI_INTENT_GATE>
+
+## **ABSOLUTE CERTAINTY REQUIRED - DO NOT SKIP THIS**
+
+**YOU MUST NOT START ANY IMPLEMENTATION UNTIL YOU ARE 100% CERTAIN.**
+
+| **BEFORE YOU WRITE A SINGLE LINE OF CODE, YOU MUST:** |
+|-------------------------------------------------------|
+| **FULLY UNDERSTAND** what the user ACTUALLY wants (not what you ASSUME they want) |
+| **EXPLORE** the codebase to understand existing patterns, architecture, and context |
+| **HAVE A CRYSTAL CLEAR WORK PLAN** - if your plan is vague, YOUR WORK WILL FAIL |
+| **RESOLVE ALL AMBIGUITY** - if ANYTHING is unclear, ASK or INVESTIGATE |
+
+### **MANDATORY CERTAINTY PROTOCOL**
+
+**IF YOU ARE NOT 100% CERTAIN:**
+
+1. **THINK DEEPLY** - What is the user's TRUE intent? What problem are they REALLY trying to solve?
+2. **EXPLORE THOROUGHLY** - Fire explore/librarian agents to gather ALL relevant context
+3. **CONSULT SPECIALISTS** - For hard/complex tasks, DO NOT struggle alone. Delegate:
+   - **Oracle**: Conventional problems - architecture, debugging, complex logic
+   - **Artistry**: Non-conventional problems - different approach needed, unusual constraints
+4. **ASK THE USER** - If ambiguity remains after exploration, ASK. Don't guess.
+
+**SIGNS YOU ARE NOT READY TO IMPLEMENT:**
+- You're making assumptions about requirements
+- You're unsure which files to modify
+- You don't understand how existing code works
+- Your plan has "probably" or "maybe" in it
+- You can't explain the exact steps you'll take
+
+**WHEN IN DOUBT:**
+\`\`\`
+task(subagent_type="explore", load_skills=[], prompt="I'm implementing [TASK DESCRIPTION] and need to understand [SPECIFIC KNOWLEDGE GAP]. Find [X] patterns in the codebase — show file paths, implementation approach, and conventions used. I'll use this to [HOW RESULTS WILL BE USED]. Focus on src/ directories, skip test files unless test patterns are specifically needed. Return concrete file paths with brief descriptions of what each file does.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm working with [LIBRARY/TECHNOLOGY] and need [SPECIFIC INFORMATION]. Find official documentation and production-quality examples for [Y] — specifically: API reference, configuration options, recommended patterns, and common pitfalls. Skip beginner tutorials. I'll use this to [DECISION THIS WILL INFORM].", run_in_background=true)
+task(subagent_type="oracle", load_skills=[], prompt="I need architectural review of my approach to [TASK]. Here's my plan: [DESCRIBE PLAN WITH SPECIFIC FILES AND CHANGES]. My concerns are: [LIST SPECIFIC UNCERTAINTIES]. Please evaluate: correctness of approach, potential issues I'm missing, and whether a better alternative exists.", run_in_background=false)
+\`\`\`
+
+**ONLY AFTER YOU HAVE:**
+- Gathered sufficient context via agents
+- Resolved all ambiguities
+- Created a precise, step-by-step work plan
+- Achieved 100% confidence in your understanding
+
+**...THEN AND ONLY THEN MAY YOU BEGIN IMPLEMENTATION.**
+
+---
+
+## **NO EXCUSES. NO COMPROMISES. DELIVER WHAT WAS ASKED.**
+
+**THE USER'S ORIGINAL REQUEST IS SACRED. YOU MUST FULFILL IT EXACTLY.**
+
+| VIOLATION | CONSEQUENCE |
+|-----------|-------------|
+| "I couldn't because..." | **UNACCEPTABLE.** Find a way or ask for help. |
+| "This is a simplified version..." | **UNACCEPTABLE.** Deliver the FULL implementation. |
+| "You can extend this later..." | **UNACCEPTABLE.** Finish it NOW. |
+| "Due to limitations..." | **UNACCEPTABLE.** Use agents, tools, whatever it takes. |
+| "I made some assumptions..." | **UNACCEPTABLE.** You should have asked FIRST. |
+
+**THERE ARE NO VALID EXCUSES FOR:**
+- Delivering partial work
+- Changing scope without explicit user approval
+- Making unauthorized simplifications
+- Stopping before the task is 100% complete
+- Compromising on any stated requirement
+
+**IF YOU ENCOUNTER A BLOCKER:**
+1. **DO NOT** give up
+2. **DO NOT** deliver a compromised version
+3. **DO** consult specialists (oracle for conventional, artistry for non-conventional)
+4. **DO** ask the user for guidance
+5. **DO** explore alternative approaches
+
+**THE USER ASKED FOR X. DELIVER EXACTLY X. PERIOD.**
+
+---
+
+<TOOL_CALL_MANDATE>
+## YOU MUST USE TOOLS. THIS IS NOT OPTIONAL.
+
+**The user expects you to ACT using tools, not REASON internally.** Every response to a task MUST contain tool_use blocks. A response without tool calls is a FAILED response.
+
+**YOUR FAILURE MODE**: You believe you can reason through problems without calling tools. You CANNOT.
+
+**RULES (VIOLATION = BROKEN RESPONSE):**
+1. **NEVER answer about code without reading files first.** Read them AGAIN.
+2. **NEVER claim done without \`lsp_diagnostics\`.** Your confidence is wrong more often than right.
+3. **NEVER skip delegation.** Specialists produce better results. USE THEM.
+4. **NEVER reason about what a file "probably contains."** READ IT.
+5. **NEVER produce ZERO tool calls when action was requested.** Thinking is not doing.
+</TOOL_CALL_MANDATE>
+
+YOU MUST LEVERAGE ALL AVAILABLE AGENTS / **CATEGORY + SKILLS** TO THEIR FULLEST POTENTIAL.
+TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
+
+## MANDATORY: PLAN AGENT INVOCATION (NON-NEGOTIABLE)
+
+**YOU MUST ALWAYS INVOKE THE PLAN AGENT FOR ANY NON-TRIVIAL TASK.**
+
+| Condition | Action |
+|-----------|--------|
+| Task has 2+ steps | MUST call plan agent |
+| Task scope unclear | MUST call plan agent |
+| Implementation required | MUST call plan agent |
+| Architecture decision needed | MUST call plan agent |
+
+\`\`\`
+task(subagent_type="plan", load_skills=[], prompt="<gathered context + user request>")
+\`\`\`
+
+### SESSION CONTINUITY WITH PLAN AGENT (CRITICAL)
+
+**Plan agent returns a session_id. USE IT for follow-up interactions.**
+
+| Scenario | Action |
+|----------|--------|
+| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", load_skills=[], prompt="<your answer>")\` |
+| Need to refine the plan | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Please adjust: <feedback>")\` |
+| Plan needs more detail | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Add more detail to Task N")\` |
+
+**FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
+
+---
+
+## DELEGATION IS MANDATORY — YOU ARE NOT AN IMPLEMENTER
+
+**You have a strong tendency to do work yourself. RESIST THIS.**
+
+**DEFAULT BEHAVIOR: DELEGATE. DO NOT WORK YOURSELF.**
+
+| Task Type | Action | Why |
+|-----------|--------|-----|
+| Codebase exploration | task(subagent_type="explore", load_skills=[], run_in_background=true) | Parallel, context-efficient |
+| Documentation lookup | task(subagent_type="librarian", load_skills=[], run_in_background=true) | Specialized knowledge |
+| Planning | task(subagent_type="plan", load_skills=[]) | Parallel task graph + structured TODO list |
+| Hard problem (conventional) | task(subagent_type="oracle", load_skills=[]) | Architecture, debugging, complex logic |
+| Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed |
+| Implementation | task(category="...", load_skills=[...]) | Domain-optimized models |
+
+**YOU SHOULD ONLY DO IT YOURSELF WHEN:**
+- Task is trivially simple (1-2 lines, obvious change)
+- You have ALL context already loaded
+- Delegation overhead exceeds task complexity
+
+**OTHERWISE: DELEGATE. ALWAYS.**
+
+---
+
+## EXECUTION RULES
+- **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each.
+- **PARALLEL**: Fire independent agent calls simultaneously via task(run_in_background=true) - NEVER wait sequentially.
+- **BACKGROUND FIRST**: Use task for exploration/research agents (10+ concurrent if needed).
+- **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done.
+- **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths.
+
+## WORKFLOW
+1. **CLASSIFY INTENT** (MANDATORY — see GEMINI_INTENT_GATE above)
+2. Spawn exploration/librarian agents via task(run_in_background=true) in PARALLEL
+3. Use Plan agent with gathered context to create detailed work breakdown
+4. Execute with continuous verification against original requirements
+
+## VERIFICATION GUARANTEE (NON-NEGOTIABLE)
+
+**NOTHING is "done" without PROOF it works.**
+
+**YOUR SELF-ASSESSMENT IS UNRELIABLE.** What feels like 95% confidence = ~60% actual correctness.
+
+| Phase | Action | Required Evidence |
+|-------|--------|-------------------|
+| **Build** | Run build command | Exit code 0, no errors |
+| **Test** | Execute test suite | All tests pass (screenshot/output) |
+| **Lint** | Run lsp_diagnostics | Zero new errors on changed files |
+| **Manual Verify** | Test the actual feature | Describe what you observed |
+| **Regression** | Ensure nothing broke | Existing tests still pass |
+
+<ANTI_OPTIMISM_CHECKPOINT>
+## BEFORE YOU CLAIM DONE, ANSWER HONESTLY:
+
+1. Did I run \`lsp_diagnostics\` and see ZERO errors? (not "I'm sure there are none")
+2. Did I run the tests and see them PASS? (not "they should pass")
+3. Did I read the actual output of every command? (not skim)
+4. Is EVERY requirement from the request actually implemented? (re-read the request NOW)
+5. Did I classify intent at the start? (if not, my entire approach may be wrong)
+
+If ANY answer is no → GO BACK AND DO IT. Do not claim completion.
+</ANTI_OPTIMISM_CHECKPOINT>
+
+**WITHOUT evidence = NOT verified = NOT done.**
+
+## ZERO TOLERANCE FAILURES
+- **NO Scope Reduction**: Never make "demo", "skeleton", "simplified", "basic" versions - deliver FULL implementation
+- **NO Partial Completion**: Never stop at 60-80% saying "you can extend this..." - finish 100%
+- **NO Assumed Shortcuts**: Never skip requirements you deem "optional" or "can be added later"
+- **NO Premature Stopping**: Never declare done until ALL TODOs are completed and verified
+- **NO TEST DELETION**: Never delete or skip failing tests to make the build pass. Fix the code, not the tests.
+
+THE USER ASKED FOR X. DELIVER EXACTLY X. NOT A SUBSET. NOT A DEMO. NOT A STARTING POINT.
+
+1. CLASSIFY INTENT (MANDATORY)
+2. EXPLORES + LIBRARIANS
+3. GATHER -> PLAN AGENT SPAWN
+4. WORK BY DELEGATING TO ANOTHER AGENTS
+
+NOW.
+
+</ultrawork-mode>
+
+---
+
+`
+
+export function getGeminiUltraworkMessage(): string {
+  return ULTRAWORK_GEMINI_MESSAGE
+}
--- a/src/hooks/keyword-detector/ultrawork/index.ts
+++ b/src/hooks/keyword-detector/ultrawork/index.ts
@@ -4,19 +4,22 @@
 * Routing:
 * 1. Planner agents (prometheus, plan) → planner.ts
 * 2. GPT 5.2 models → gpt5.2.ts
- * 3. Default (Claude, etc.) → default.ts (optimized for Claude series)
+ * 3. Gemini models → gemini.ts
+ * 4. Default (Claude, etc.) → default.ts (optimized for Claude series)
 */

-export { isPlannerAgent, isGptModel, getUltraworkSource } from "./source-detector"
+export { isPlannerAgent, isGptModel, isGeminiModel, getUltraworkSource } from "./source-detector"
 export type { UltraworkSource } from "./source-detector"
 export { ULTRAWORK_PLANNER_SECTION, getPlannerUltraworkMessage } from "./planner"
 export { ULTRAWORK_GPT_MESSAGE, getGptUltraworkMessage } from "./gpt5.2"
+export { ULTRAWORK_GEMINI_MESSAGE, getGeminiUltraworkMessage } from "./gemini"
 export { ULTRAWORK_DEFAULT_MESSAGE, getDefaultUltraworkMessage } from "./default"

 import { getUltraworkSource } from "./source-detector"
 import { getPlannerUltraworkMessage } from "./planner"
 import { getGptUltraworkMessage } from "./gpt5.2"
 import { getDefaultUltraworkMessage } from "./default"
+import { getGeminiUltraworkMessage } from "./gemini"

 /**
 * Gets the appropriate ultrawork message based on agent and model context.
@@ -29,6 +32,8 @@ export function getUltraworkMessage(agentName?: string, modelID?: string): strin
      return getPlannerUltraworkMessage()
    case "gpt":
      return getGptUltraworkMessage()
+    case "gemini":
+      return getGeminiUltraworkMessage()
    case "default":
    default:
      return getDefaultUltraworkMessage()
--- a/src/hooks/keyword-detector/ultrawork/source-detector.ts
+++ b/src/hooks/keyword-detector/ultrawork/source-detector.ts
@@ -4,10 +4,11 @@
 * Routing logic:
 * 1. Planner agents (prometheus, plan) → planner.ts
 * 2. GPT 5.2 models → gpt5.2.ts
- * 3. Everything else (Claude, etc.) → default.ts
+ * 3. Gemini models → gemini.ts
+ * 4. Everything else (Claude, etc.) → default.ts
 */

-import { isGptModel } from "../../../agents/types"
+import { isGptModel, isGeminiModel } from "../../../agents/types"

 /**
 * Checks if agent is a planner-type agent.
@@ -22,10 +23,10 @@ export function isPlannerAgent(agentName?: string): boolean {
  return /\bplan\b/.test(normalized)
 }

-export { isGptModel }
+export { isGptModel, isGeminiModel }

 /** Ultrawork message source type */
-export type UltraworkSource = "planner" | "gpt" | "default"
+export type UltraworkSource = "planner" | "gpt" | "gemini" | "default"

 /**
 * Determines which ultrawork message source to use.
@@ -44,6 +45,11 @@ export function getUltraworkSource(
    return "gpt"
  }

+
+  // Priority 3: Gemini models
+  if (modelID && isGeminiModel(modelID)) {
+    return "gemini"
+  }
  // Default: Claude and other models
  return "default"
 }
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -269,7 +269,7 @@ describe("preemptive-compaction", () => {

  it("should use 1M limit when model cache flag is enabled", async () => {
    //#given
-    const hook = createPreemptiveCompactionHook(ctx as never, {
+    const hook = createPreemptiveCompactionHook(ctx as never, {}, {
      anthropicContext1MEnabled: true,
    })
    const sessionID = "ses_1m_flag"
@@ -308,7 +308,7 @@ describe("preemptive-compaction", () => {
  it("should keep env var fallback when model cache flag is disabled", async () => {
    //#given
    process.env[ANTHROPIC_CONTEXT_ENV_KEY] = "true"
-    const hook = createPreemptiveCompactionHook(ctx as never, {
+    const hook = createPreemptiveCompactionHook(ctx as never, {}, {
      anthropicContext1MEnabled: false,
    })
    const sessionID = "ses_env_fallback"
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -1,5 +1,7 @@
 import { log } from "../shared/logger"
+import type { OhMyOpenCodeConfig } from "../config"

+import { resolveCompactionModel } from "./shared/compaction-model-resolver"
 const DEFAULT_ACTUAL_LIMIT = 200_000

 type ModelCacheStateLike = {
@@ -51,6 +53,7 @@ type PluginInput = {

 export function createPreemptiveCompactionHook(
  ctx: PluginInput,
+  pluginConfig: OhMyOpenCodeConfig,
  modelCacheState?: ModelCacheStateLike,
 ) {
  const compactionInProgress = new Set<string>()
@@ -84,9 +87,16 @@ export function createPreemptiveCompactionHook(
    compactionInProgress.add(sessionID)

    try {
+      const { providerID: targetProviderID, modelID: targetModelID } = resolveCompactionModel(
+        pluginConfig,
+        sessionID,
+        cached.providerID,
+        modelID
+      )
+
      await ctx.client.session.summarize({
        path: { id: sessionID },
-        body: { providerID: cached.providerID, modelID, auto: true } as never,
+        body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never,
        query: { directory: ctx.directory },
      })

--- a/src/hooks/session-notification-input-needed.test.ts
+++ b/src/hooks/session-notification-input-needed.test.ts
@@ -0,0 +1,93 @@
+const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:test")
+
+const { createSessionNotification } = require("./session-notification")
+const { setMainSession, subagentSessions, _resetForTesting } = require("../features/claude-code-session-state")
+const utils = require("./session-notification-utils")
+
+describe("session-notification input-needed events", () => {
+  let notificationCalls: string[]
+
+  function createMockPluginInput() {
+    return {
+      $: async (cmd: TemplateStringsArray | string, ...values: unknown[]) => {
+        const cmdStr = typeof cmd === "string"
+          ? cmd
+          : cmd.reduce((acc, part, i) => acc + part + (values[i] ?? ""), "")
+
+        if (cmdStr.includes("osascript") || cmdStr.includes("notify-send") || cmdStr.includes("powershell")) {
+          notificationCalls.push(cmdStr)
+        }
+
+        return { stdout: "", stderr: "", exitCode: 0 }
+      },
+      client: {
+        session: {
+          todo: async () => ({ data: [] }),
+        },
+      },
+      directory: "/tmp/test",
+    }
+  }
+
+  beforeEach(() => {
+    _resetForTesting()
+    notificationCalls = []
+
+    spyOn(utils, "getOsascriptPath").mockResolvedValue("/usr/bin/osascript")
+    spyOn(utils, "getNotifySendPath").mockResolvedValue("/usr/bin/notify-send")
+    spyOn(utils, "getPowershellPath").mockResolvedValue("powershell")
+    spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
+  })
+
+  afterEach(() => {
+    subagentSessions.clear()
+    _resetForTesting()
+  })
+
+  test("sends question notification when question tool asks for input", async () => {
+    const sessionID = "main-question"
+    setMainSession(sessionID)
+    const hook = createSessionNotification(createMockPluginInput())
+
+    await hook({
+      event: {
+        type: "tool.execute.before",
+        properties: {
+          sessionID,
+          tool: "question",
+          args: {
+            questions: [
+              {
+                question: "Which branch should we use?",
+                options: [{ label: "main" }, { label: "dev" }],
+              },
+            ],
+          },
+        },
+      },
+    })
+
+    expect(notificationCalls).toHaveLength(1)
+    expect(notificationCalls[0]).toContain("Agent is asking a question")
+  })
+
+  test("sends permission notification for permission events", async () => {
+    const sessionID = "main-permission"
+    setMainSession(sessionID)
+    const hook = createSessionNotification(createMockPluginInput())
+
+    await hook({
+      event: {
+        type: "permission.asked",
+        properties: {
+          sessionID,
+        },
+      },
+    })
+
+    expect(notificationCalls).toHaveLength(1)
+    expect(notificationCalls[0]).toContain("Agent needs permission to continue")
+  })
+})
+
+export {}
--- a/src/hooks/session-notification.ts
+++ b/src/hooks/session-notification.ts
@@ -15,6 +15,8 @@ import { createIdleNotificationScheduler } from "./session-notification-schedule
 interface SessionNotificationConfig {
  title?: string
  message?: string
+  questionMessage?: string
+  permissionMessage?: string
  playSound?: boolean
  soundPath?: string
  /** Delay in ms before sending notification to confirm session is still idle (default: 1500) */
@@ -36,6 +38,8 @@ export function createSessionNotification(
  const mergedConfig = {
    title: "OpenCode",
    message: "Agent is ready for input",
+    questionMessage: "Agent is asking a question",
+    permissionMessage: "Agent needs permission to continue",
    playSound: false,
    soundPath: defaultSoundPath,
    idleConfirmationDelay: 1500,
@@ -53,6 +57,56 @@ export function createSessionNotification(
    playSound: playSessionNotificationSound,
  })

+  const QUESTION_TOOLS = new Set(["question", "ask_user_question", "askuserquestion"])
+  const PERMISSION_EVENTS = new Set(["permission.ask", "permission.asked", "permission.updated", "permission.requested"])
+  const PERMISSION_HINT_PATTERN = /\b(permission|approve|approval|allow|deny|consent)\b/i
+
+  const getSessionID = (properties: Record<string, unknown> | undefined): string | undefined => {
+    const sessionID = properties?.sessionID
+    if (typeof sessionID === "string" && sessionID.length > 0) return sessionID
+
+    const sessionId = properties?.sessionId
+    if (typeof sessionId === "string" && sessionId.length > 0) return sessionId
+
+    const info = properties?.info as Record<string, unknown> | undefined
+    const infoSessionID = info?.sessionID
+    if (typeof infoSessionID === "string" && infoSessionID.length > 0) return infoSessionID
+
+    const infoSessionId = info?.sessionId
+    if (typeof infoSessionId === "string" && infoSessionId.length > 0) return infoSessionId
+
+    return undefined
+  }
+
+  const shouldNotifyForSession = (sessionID: string): boolean => {
+    if (subagentSessions.has(sessionID)) return false
+
+    const mainSessionID = getMainSessionID()
+    if (mainSessionID && sessionID !== mainSessionID) return false
+
+    return true
+  }
+
+  const getEventToolName = (properties: Record<string, unknown> | undefined): string | undefined => {
+    const tool = properties?.tool
+    if (typeof tool === "string" && tool.length > 0) return tool
+
+    const name = properties?.name
+    if (typeof name === "string" && name.length > 0) return name
+
+    return undefined
+  }
+
+  const getQuestionText = (properties: Record<string, unknown> | undefined): string => {
+    const args = properties?.args as Record<string, unknown> | undefined
+    const questions = args?.questions
+    if (!Array.isArray(questions) || questions.length === 0) return ""
+
+    const firstQuestion = questions[0] as Record<string, unknown> | undefined
+    const questionText = firstQuestion?.question
+    return typeof questionText === "string" ? questionText : ""
+  }
+
  return async ({ event }: { event: { type: string; properties?: unknown } }) => {
    if (currentPlatform === "unsupported") return

@@ -68,14 +122,10 @@ export function createSessionNotification(
    }

    if (event.type === "session.idle") {
-      const sessionID = props?.sessionID as string | undefined
+      const sessionID = getSessionID(props)
      if (!sessionID) return

-      if (subagentSessions.has(sessionID)) return
-
-      // Only trigger notifications for the main session (not subagent sessions)
-      const mainSessionID = getMainSessionID()
-      if (mainSessionID && sessionID !== mainSessionID) return
+      if (!shouldNotifyForSession(sessionID)) return

      scheduler.scheduleIdleNotification(sessionID)
      return
@@ -83,17 +133,47 @@ export function createSessionNotification(

    if (event.type === "message.updated") {
      const info = props?.info as Record<string, unknown> | undefined
-      const sessionID = info?.sessionID as string | undefined
+      const sessionID = getSessionID({ ...props, info })
      if (sessionID) {
        scheduler.markSessionActivity(sessionID)
      }
      return
    }

+    if (PERMISSION_EVENTS.has(event.type)) {
+      const sessionID = getSessionID(props)
+      if (!sessionID) return
+      if (!shouldNotifyForSession(sessionID)) return
+
+      scheduler.markSessionActivity(sessionID)
+      await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, mergedConfig.permissionMessage)
+      if (mergedConfig.playSound && mergedConfig.soundPath) {
+        await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
+      }
+      return
+    }
+
    if (event.type === "tool.execute.before" || event.type === "tool.execute.after") {
-      const sessionID = props?.sessionID as string | undefined
+      const sessionID = getSessionID(props)
      if (sessionID) {
        scheduler.markSessionActivity(sessionID)
+
+        if (event.type === "tool.execute.before") {
+          const toolName = getEventToolName(props)?.toLowerCase()
+          if (toolName && QUESTION_TOOLS.has(toolName)) {
+            if (!shouldNotifyForSession(sessionID)) return
+
+            const questionText = getQuestionText(props)
+            const message = PERMISSION_HINT_PATTERN.test(questionText)
+              ? mergedConfig.permissionMessage
+              : mergedConfig.questionMessage
+
+            await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message)
+            if (mergedConfig.playSound && mergedConfig.soundPath) {
+              await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
+            }
+          }
+        }
      }
      return
    }
--- a/src/hooks/session-recovery/recover-empty-content-message.ts
+++ b/src/hooks/session-recovery/recover-empty-content-message.ts
@@ -1,87 +0,0 @@
-import type { createOpencodeClient } from "@opencode-ai/sdk"
-import type { MessageData } from "./types"
-import { extractMessageIndex } from "./detect-error-type"
-import { recoverEmptyContentMessageFromSDK } from "./recover-empty-content-message-sdk"
-import {
-  findEmptyMessageByIndex,
-  findEmptyMessages,
-  findMessagesWithEmptyTextParts,
-  findMessagesWithThinkingOnly,
-  injectTextPart,
-  replaceEmptyTextParts,
-} from "./storage"
-import { isSqliteBackend } from "../../shared/opencode-storage-detection"
-import { replaceEmptyTextPartsAsync, findMessagesWithEmptyTextPartsFromSDK } from "./storage/empty-text"
-import { injectTextPartAsync } from "./storage/text-part-injector"
-
-type Client = ReturnType<typeof createOpencodeClient>
-
-const PLACEHOLDER_TEXT = "[user interrupted]"
-
-export async function recoverEmptyContentMessage(
-  client: Client,
-  sessionID: string,
-  failedAssistantMsg: MessageData,
-  _directory: string,
-  error: unknown
-): Promise<boolean> {
-  if (isSqliteBackend()) {
-    return recoverEmptyContentMessageFromSDK(client, sessionID, failedAssistantMsg, error, {
-      placeholderText: PLACEHOLDER_TEXT,
-      replaceEmptyTextPartsAsync,
-      injectTextPartAsync,
-      findMessagesWithEmptyTextPartsFromSDK,
-    })
-  }
-
-  const targetIndex = extractMessageIndex(error)
-  const failedID = failedAssistantMsg.info?.id
-  let anySuccess = false
-
-  const messagesWithEmptyText = findMessagesWithEmptyTextParts(sessionID)
-  for (const messageID of messagesWithEmptyText) {
-    if (replaceEmptyTextParts(messageID, PLACEHOLDER_TEXT)) {
-      anySuccess = true
-    }
-  }
-
-  const thinkingOnlyIDs = findMessagesWithThinkingOnly(sessionID)
-  for (const messageID of thinkingOnlyIDs) {
-    if (injectTextPart(sessionID, messageID, PLACEHOLDER_TEXT)) {
-      anySuccess = true
-    }
-  }
-
-  if (targetIndex !== null) {
-    const targetMessageID = findEmptyMessageByIndex(sessionID, targetIndex)
-    if (targetMessageID) {
-      if (replaceEmptyTextParts(targetMessageID, PLACEHOLDER_TEXT)) {
-        return true
-      }
-      if (injectTextPart(sessionID, targetMessageID, PLACEHOLDER_TEXT)) {
-        return true
-      }
-    }
-  }
-
-  if (failedID) {
-    if (replaceEmptyTextParts(failedID, PLACEHOLDER_TEXT)) {
-      return true
-    }
-    if (injectTextPart(sessionID, failedID, PLACEHOLDER_TEXT)) {
-      return true
-    }
-  }
-
-  const emptyMessageIDs = findEmptyMessages(sessionID)
-  for (const messageID of emptyMessageIDs) {
-    if (replaceEmptyTextParts(messageID, PLACEHOLDER_TEXT)) {
-      anySuccess = true
-    }
-    if (injectTextPart(sessionID, messageID, PLACEHOLDER_TEXT)) {
-      anySuccess = true
-    }
-  }
-
-  return anySuccess
-}
--- a/src/hooks/session-recovery/recover-tool-result-missing.ts
+++ b/src/hooks/session-recovery/recover-tool-result-missing.ts
@@ -5,6 +5,12 @@ import { isSqliteBackend } from "../../shared/opencode-storage-detection"
 import { normalizeSDKResponse } from "../../shared"

 type Client = ReturnType<typeof createOpencodeClient>
+type ClientWithPromptAsync = {
+  session: {
+    promptAsync: (opts: { path: { id: string }; body: Record<string, unknown> }) => Promise<unknown>
+  }
+}
+

 interface ToolUsePart {
  type: "tool_use"
@@ -77,8 +83,7 @@ export async function recoverToolResultMissing(
  }

  try {
-    // @ts-expect-error - SDK types may not include tool_result parts
-    await client.session.promptAsync(promptInput)
+    await (client as unknown as ClientWithPromptAsync).session.promptAsync(promptInput)

    return true
  } catch {
--- a/src/hooks/shared/compaction-model-resolver.ts
+++ b/src/hooks/shared/compaction-model-resolver.ts
@@ -0,0 +1,34 @@
+import type { OhMyOpenCodeConfig } from "../../config"
+import { getSessionAgent } from "../../features/claude-code-session-state"
+import { getAgentConfigKey } from "../../shared/agent-display-names"
+
+export function resolveCompactionModel(
+  pluginConfig: OhMyOpenCodeConfig,
+  sessionID: string,
+  originalProviderID: string,
+  originalModelID: string
+): { providerID: string; modelID: string } {
+  const sessionAgentName = getSessionAgent(sessionID)
+  
+  if (!sessionAgentName || !pluginConfig.agents) {
+    return { providerID: originalProviderID, modelID: originalModelID }
+  }
+
+  const agentConfigKey = getAgentConfigKey(sessionAgentName)
+  const agentConfig = (pluginConfig.agents as Record<string, { compaction?: { model?: string } } | undefined>)[agentConfigKey]
+  const compactionConfig = agentConfig?.compaction
+
+  if (!compactionConfig?.model) {
+    return { providerID: originalProviderID, modelID: originalModelID }
+  }
+
+  const modelParts = compactionConfig.model.split("/")
+  if (modelParts.length < 2) {
+    return { providerID: originalProviderID, modelID: originalModelID }
+  }
+
+  return {
+    providerID: modelParts[0],
+    modelID: modelParts.slice(1).join("/"),
+  }
+}
--- a/src/hooks/thinking-block-validator/hook.ts
+++ b/src/hooks/thinking-block-validator/hook.ts
@@ -21,6 +21,18 @@ interface MessageWithParts {
  parts: Part[]
 }

+interface ThinkingPart {
+  thinking?: string
+  text?: string
+}
+
+interface MessageInfoExtended {
+  id: string
+  role: string
+  sessionID?: string
+  modelID?: string
+}
+
 type MessagesTransformHook = {
  "experimental.chat.messages.transform"?: (
    input: Record<string, never>,
@@ -91,7 +103,7 @@ function findPreviousThinkingContent(
    for (const part of msg.parts) {
      const type = part.type as string
      if (type === "thinking" || type === "reasoning") {
-        const thinking = (part as any).thinking || (part as any).text
+        const thinking = (part as unknown as ThinkingPart).thinking || (part as unknown as ThinkingPart).text
        if (thinking && typeof thinking === "string" && thinking.trim().length > 0) {
          return thinking
        }
@@ -114,7 +126,7 @@ function prependThinkingBlock(message: MessageWithParts, thinkingContent: string
  const thinkingPart = {
    type: "thinking" as const,
    id: `prt_0000000000_synthetic_thinking`,
-    sessionID: (message.info as any).sessionID || "",
+    sessionID: (message.info as unknown as MessageInfoExtended).sessionID || "",
    messageID: message.info.id,
    thinking: thinkingContent,
    synthetic: true,
@@ -138,7 +150,7 @@ export function createThinkingBlockValidatorHook(): MessagesTransformHook {

      // Get the model info from the last user message
      const lastUserMessage = messages.findLast(m => m.info.role === "user")
-      const modelID = (lastUserMessage?.info as any)?.modelID || ""
+      const modelID = (lastUserMessage?.info as unknown as MessageInfoExtended)?.modelID || ""

      // Only process if extended thinking might be enabled
      if (!isExtendedThinkingModel(modelID)) {
--- a/src/hooks/write-existing-file-guard/hook.ts
+++ b/src/hooks/write-existing-file-guard/hook.ts
@@ -1,7 +1,7 @@
 import type { Hooks, PluginInput } from "@opencode-ai/plugin"

 import { existsSync, realpathSync } from "fs"
-import { basename, dirname, isAbsolute, join, normalize, relative, resolve, sep } from "path"
+import { basename, dirname, isAbsolute, join, normalize, relative, resolve } from "path"

 import { log } from "../../shared"

@@ -14,7 +14,7 @@ type GuardArgs = {

 const MAX_TRACKED_SESSIONS = 256
 export const MAX_TRACKED_PATHS_PER_SESSION = 1024
-const OUTSIDE_SESSION_MESSAGE = "Path must be inside session directory."
+const BLOCK_MESSAGE = "File already exists. Use edit tool instead."

 function asRecord(value: unknown): Record<string, unknown> | undefined {
  if (!value || typeof value !== "object" || Array.isArray(value)) {
@@ -37,6 +37,8 @@ function isPathInsideDirectory(pathToCheck: string, directory: string): boolean
  return relativePath === "" || (!relativePath.startsWith("..") && !isAbsolute(relativePath))
 }

+
+
 function toCanonicalPath(absolutePath: string): string {
  let canonicalPath = absolutePath

@@ -73,7 +75,6 @@ export function createWriteExistingFileGuardHook(ctx: PluginInput): Hooks {
  const readPermissionsBySession = new Map<string, Set<string>>()
  const sessionLastAccess = new Map<string, number>()
  const canonicalSessionRoot = toCanonicalPath(resolveInputPath(ctx, ctx.directory))
-  const sisyphusRoot = join(canonicalSessionRoot, ".sisyphus") + sep

  const touchSession = (sessionID: string): void => {
    sessionLastAccess.set(sessionID, Date.now())
@@ -174,16 +175,7 @@ export function createWriteExistingFileGuardHook(ctx: PluginInput): Hooks {
      const isInsideSessionDirectory = isPathInsideDirectory(canonicalPath, canonicalSessionRoot)

      if (!isInsideSessionDirectory) {
-        if (toolName === "read") {
-          return
-        }
-
-        log("[write-existing-file-guard] Blocking write outside session directory", {
-          sessionID: input.sessionID,
-          filePath,
-          resolvedPath,
-        })
-        throw new Error(OUTSIDE_SESSION_MESSAGE)
+        return
      }

      if (toolName === "read") {
@@ -206,7 +198,7 @@ export function createWriteExistingFileGuardHook(ctx: PluginInput): Hooks {
        return
      }

-      const isSisyphusPath = canonicalPath.startsWith(sisyphusRoot)
+      const isSisyphusPath = canonicalPath.includes("/.sisyphus/")
      if (isSisyphusPath) {
        log("[write-existing-file-guard] Allowing .sisyphus/** overwrite", {
          sessionID: input.sessionID,
--- a/src/hooks/write-existing-file-guard/index.test.ts
+++ b/src/hooks/write-existing-file-guard/index.test.ts
@@ -7,7 +7,6 @@ import { MAX_TRACKED_PATHS_PER_SESSION } from "./hook"
 import { createWriteExistingFileGuardHook } from "./index"

 const BLOCK_MESSAGE = "File already exists. Use edit tool instead."
-const OUTSIDE_SESSION_MESSAGE = "Path must be inside session directory."

 type Hook = ReturnType<typeof createWriteExistingFileGuardHook>

@@ -339,7 +338,7 @@ describe("createWriteExistingFileGuardHook", () => {
    ).resolves.toBeDefined()
  })

-  test("#given existing file outside session directory #when write executes #then blocks", async () => {
+  test("#given existing file outside session directory #when write executes #then allows", async () => {
    const outsideDir = mkdtempSync(join(tmpdir(), "write-existing-file-guard-outside-"))

    try {
@@ -349,9 +348,9 @@ describe("createWriteExistingFileGuardHook", () => {
      await expect(
        invoke({
          tool: "write",
-          outputArgs: { filePath: outsideFile, content: "attempted overwrite" },
+          outputArgs: { filePath: outsideFile, content: "allowed overwrite" },
        })
-      ).rejects.toThrow(OUTSIDE_SESSION_MESSAGE)
+      ).resolves.toBeDefined()
    } finally {
      rmSync(outsideDir, { recursive: true, force: true })
    }
--- a/src/plugin-handlers/config-handler.test.ts
+++ b/src/plugin-handlers/config-handler.test.ts
@@ -1161,8 +1161,6 @@ describe("per-agent todowrite/todoread deny when task_system enabled", () => {
    getAgentDisplayName("sisyphus"),
    getAgentDisplayName("hephaestus"),
    getAgentDisplayName("atlas"),
-  ])
-  const AGENTS_WITHOUT_TODO_DENY = new Set([
    getAgentDisplayName("prometheus"),
    getAgentDisplayName("sisyphus-junior"),
  ])
@@ -1206,10 +1204,6 @@ describe("per-agent todowrite/todoread deny when task_system enabled", () => {
      expect(agentResult[agentName]?.permission?.todowrite).toBe("deny")
      expect(agentResult[agentName]?.permission?.todoread).toBe("deny")
    }
-    for (const agentName of AGENTS_WITHOUT_TODO_DENY) {
-      expect(agentResult[agentName]?.permission?.todowrite).toBeUndefined()
-      expect(agentResult[agentName]?.permission?.todoread).toBeUndefined()
-    }
  })

  test("does not deny todowrite/todoread when task_system is disabled", async () => {
--- a/src/plugin-handlers/tool-config-handler.test.ts
+++ b/src/plugin-handlers/tool-config-handler.test.ts
@@ -0,0 +1,83 @@
+import { describe, it, expect } from "bun:test"
+import { applyToolConfig } from "./tool-config-handler"
+import type { OhMyOpenCodeConfig } from "../config"
+
+function createParams(overrides: {
+  taskSystem?: boolean
+  agents?: string[]
+}) {
+  const agentResult: Record<string, { permission?: Record<string, unknown> }> = {}
+  for (const agent of overrides.agents ?? []) {
+    agentResult[agent] = { permission: {} }
+  }
+
+  return {
+    config: { tools: {}, permission: {} } as Record<string, unknown>,
+    pluginConfig: {
+      experimental: { task_system: overrides.taskSystem ?? false },
+    } as OhMyOpenCodeConfig,
+    agentResult: agentResult as Record<string, unknown>,
+  }
+}
+
+describe("applyToolConfig", () => {
+  describe("#given task_system is enabled", () => {
+    describe("#when applying tool config", () => {
+      it("#then should deny todowrite and todoread globally", () => {
+        const params = createParams({ taskSystem: true })
+
+        applyToolConfig(params)
+
+        const tools = params.config.tools as Record<string, unknown>
+        expect(tools.todowrite).toBe(false)
+        expect(tools.todoread).toBe(false)
+      })
+
+      it.each([
+        "atlas",
+        "sisyphus",
+        "hephaestus",
+        "prometheus",
+        "sisyphus-junior",
+      ])("#then should deny todo tools for %s agent", (agentName) => {
+        const params = createParams({
+          taskSystem: true,
+          agents: [agentName],
+        })
+
+        applyToolConfig(params)
+
+        const agent = params.agentResult[agentName] as {
+          permission: Record<string, unknown>
+        }
+        expect(agent.permission.todowrite).toBe("deny")
+        expect(agent.permission.todoread).toBe("deny")
+      })
+    })
+  })
+
+  describe("#given task_system is disabled", () => {
+    describe("#when applying tool config", () => {
+      it.each([
+        "atlas",
+        "sisyphus",
+        "hephaestus",
+        "prometheus",
+        "sisyphus-junior",
+      ])("#then should NOT deny todo tools for %s agent", (agentName) => {
+        const params = createParams({
+          taskSystem: false,
+          agents: [agentName],
+        })
+
+        applyToolConfig(params)
+
+        const agent = params.agentResult[agentName] as {
+          permission: Record<string, unknown>
+        }
+        expect(agent.permission.todowrite).toBeUndefined()
+        expect(agent.permission.todoread).toBeUndefined()
+      })
+    })
+  })
+})
--- a/src/plugin-handlers/tool-config-handler.ts
+++ b/src/plugin-handlers/tool-config-handler.ts
@@ -84,6 +84,7 @@ export function applyToolConfig(params: {
      question: questionPermission,
      "task_*": "allow",
      teammate: "allow",
+      ...denyTodoTools,
    };
  }
  const junior = agentByKey(params.agentResult, "sisyphus-junior");
@@ -93,6 +94,7 @@ export function applyToolConfig(params: {
      task: "allow",
      "task_*": "allow",
      teammate: "allow",
+      ...denyTodoTools,
    };
  }

--- a/src/plugin/event.model-fallback.test.ts
+++ b/src/plugin/event.model-fallback.test.ts
@@ -53,7 +53,8 @@ describe("createEventHandler - model fallback", () => {
  test("triggers retry prompt for assistant message.updated APIError payloads (headless resume)", async () => {
    //#given
    const sessionID = "ses_message_updated_fallback"
-    const { handler, abortCalls, promptCalls } = createHandler()
+    const modelFallback = createModelFallbackHook()
+    const { handler, abortCalls, promptCalls } = createHandler({ hooks: { modelFallback } })

    //#when
    await handler({
@@ -95,7 +96,8 @@ describe("createEventHandler - model fallback", () => {
    //#given
    const sessionID = "ses_main_fallback_nested"
    setMainSession(sessionID)
-    const { handler, abortCalls, promptCalls } = createHandler()
+    const modelFallback = createModelFallbackHook()
+    const { handler, abortCalls, promptCalls } = createHandler({ hooks: { modelFallback } })

    //#when
    await handler({
@@ -340,4 +342,64 @@ describe("createEventHandler - model fallback", () => {
    expect(promptCalls).toEqual([sessionID, sessionID])
    expect(toastCalls.length).toBeGreaterThanOrEqual(0)
  })
+
+  test("does not trigger model-fallback retry when modelFallback hook is not provided (disabled by default)", async () => {
+    //#given
+    const sessionID = "ses_disabled_by_default"
+    setMainSession(sessionID)
+    const { handler, abortCalls, promptCalls } = createHandler()
+
+    //#when - message.updated with assistant error
+    await handler({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            id: "msg_err_disabled_1",
+            sessionID,
+            role: "assistant",
+            time: { created: 1, completed: 2 },
+            error: {
+              name: "APIError",
+              data: {
+                message:
+                  "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+                isRetryable: true,
+              },
+            },
+            parentID: "msg_user_disabled_1",
+            modelID: "claude-opus-4-6-thinking",
+            providerID: "anthropic",
+            agent: "Sisyphus (Ultraworker)",
+            path: { cwd: "/tmp", root: "/tmp" },
+            cost: 0,
+            tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } },
+          },
+        },
+      },
+    })
+
+    //#when - session.error with retryable error
+    await handler({
+      event: {
+        type: "session.error",
+        properties: {
+          sessionID,
+          error: {
+            name: "UnknownError",
+            data: {
+              error: {
+                message:
+                  "Bad Gateway: {\"error\":{\"message\":\"unknown provider for model claude-opus-4-6-thinking\"}}",
+              },
+            },
+          },
+        },
+      },
+    })
+
+    //#then - no abort or prompt calls should have been made
+    expect(abortCalls).toEqual([])
+    expect(promptCalls).toEqual([])
+  })
 })
--- a/src/plugin/event.ts
+++ b/src/plugin/event.ts
@@ -1,53 +1,58 @@
-import type { OhMyOpenCodeConfig } from "../config"
-import type { PluginContext } from "./types"
+import type { OhMyOpenCodeConfig } from "../config";
+import type { PluginContext } from "./types";

 import {
  clearSessionAgent,
  getMainSessionID,
  getSessionAgent,
+  setMainSession,
  subagentSessions,
  syncSubagentSessions,
-  setMainSession,
  updateSessionAgent,
-} from "../features/claude-code-session-state"
-import { resetMessageCursor } from "../shared"
-import { lspManager } from "../tools"
-import { shouldRetryError } from "../shared/model-error-classifier"
-import { clearPendingModelFallback, clearSessionFallbackChain, setPendingModelFallback } from "../hooks/model-fallback/hook"
-import { log } from "../shared/logger"
-import { clearSessionModel, setSessionModel } from "../shared/session-model-state"
+} from "../features/claude-code-session-state";
+import {
+  clearPendingModelFallback,
+  clearSessionFallbackChain,
+  setPendingModelFallback,
+} from "../hooks/model-fallback/hook";
+import { resetMessageCursor } from "../shared";
+import { log } from "../shared/logger";
+import { shouldRetryError } from "../shared/model-error-classifier";
+import { clearSessionModel, setSessionModel } from "../shared/session-model-state";
+import { deleteSessionTools } from "../shared/session-tools-store";
+import { lspManager } from "../tools";

-import type { CreatedHooks } from "../create-hooks"
-import type { Managers } from "../create-managers"
-import { normalizeSessionStatusToIdle } from "./session-status-normalizer"
-import { pruneRecentSyntheticIdles } from "./recent-synthetic-idles"
+import type { CreatedHooks } from "../create-hooks";
+import type { Managers } from "../create-managers";
+import { pruneRecentSyntheticIdles } from "./recent-synthetic-idles";
+import { normalizeSessionStatusToIdle } from "./session-status-normalizer";

 type FirstMessageVariantGate = {
-  markSessionCreated: (sessionInfo: { id?: string; title?: string; parentID?: string } | undefined) => void
-  clear: (sessionID: string) => void
-}
+  markSessionCreated: (sessionInfo: { id?: string; title?: string; parentID?: string } | undefined) => void;
+  clear: (sessionID: string) => void;
+};

 function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === "object" && value !== null
+  return typeof value === "object" && value !== null;
 }

 function normalizeFallbackModelID(modelID: string): string {
  return modelID
    .replace(/-thinking$/i, "")
    .replace(/-max$/i, "")
-    .replace(/-high$/i, "")
+    .replace(/-high$/i, "");
 }

 function extractErrorName(error: unknown): string | undefined {
-  if (isRecord(error) && typeof error.name === "string") return error.name
-  if (error instanceof Error) return error.name
-  return undefined
+  if (isRecord(error) && typeof error.name === "string") return error.name;
+  if (error instanceof Error) return error.name;
+  return undefined;
 }

 function extractErrorMessage(error: unknown): string {
-  if (!error) return ""
-  if (typeof error === "string") return error
-  if (error instanceof Error) return error.message
+  if (!error) return "";
+  if (typeof error === "string") return error;
+  if (error instanceof Error) return error.message;

  if (isRecord(error)) {
    const candidates: unknown[] = [
@@ -56,116 +61,115 @@ function extractErrorMessage(error: unknown): string {
      error.error,
      isRecord(error.data) ? error.data.error : undefined,
      error.cause,
-    ]
+    ];

    for (const candidate of candidates) {
      if (isRecord(candidate) && typeof candidate.message === "string" && candidate.message.length > 0) {
-        return candidate.message
+        return candidate.message;
      }
    }
  }

  try {
-    return JSON.stringify(error)
+    return JSON.stringify(error);
  } catch {
-    return String(error)
+    return String(error);
  }
 }

-function extractProviderModelFromErrorMessage(
-  message: string,
-): { providerID?: string; modelID?: string } {
-  const lower = message.toLowerCase()
+function extractProviderModelFromErrorMessage(message: string): { providerID?: string; modelID?: string } {
+  const lower = message.toLowerCase();

-  const providerModel = lower.match(/model\s+not\s+found:\s*([a-z0-9_-]+)\s*\/\s*([a-z0-9._-]+)/i)
+  const providerModel = lower.match(/model\s+not\s+found:\s*([a-z0-9_-]+)\s*\/\s*([a-z0-9._-]+)/i);
  if (providerModel) {
    return {
      providerID: providerModel[1],
      modelID: providerModel[2],
-    }
+    };
  }

-  const modelOnly = lower.match(/unknown\s+provider\s+for\s+model\s+([a-z0-9._-]+)/i)
+  const modelOnly = lower.match(/unknown\s+provider\s+for\s+model\s+([a-z0-9._-]+)/i);
  if (modelOnly) {
    return {
      modelID: modelOnly[1],
-    }
+    };
  }

-  return {}
+  return {};
 }
-type EventInput = Parameters<
-  NonNullable<NonNullable<CreatedHooks["writeExistingFileGuard"]>["event"]>
->[0]
+type EventInput = Parameters<NonNullable<NonNullable<CreatedHooks["writeExistingFileGuard"]>["event"]>>[0];
 export function createEventHandler(args: {
-  ctx: PluginContext
-  pluginConfig: OhMyOpenCodeConfig
-  firstMessageVariantGate: FirstMessageVariantGate
-  managers: Managers
-  hooks: CreatedHooks
+  ctx: PluginContext;
+  pluginConfig: OhMyOpenCodeConfig;
+  firstMessageVariantGate: FirstMessageVariantGate;
+  managers: Managers;
+  hooks: CreatedHooks;
 }): (input: EventInput) => Promise<void> {
-  const { ctx, firstMessageVariantGate, managers, hooks } = args
+  const { ctx, firstMessageVariantGate, managers, hooks } = args;
  const pluginContext = ctx as {
-    directory: string
+    directory: string;
    client: {
      session: {
-        abort: (input: { path: { id: string } }) => Promise<unknown>
+        abort: (input: { path: { id: string } }) => Promise<unknown>;
        prompt: (input: {
-          path: { id: string }
-          body: { parts: Array<{ type: "text"; text: string }> }
-          query: { directory: string }
-        }) => Promise<unknown>
-      }
-    }
-  }
+          path: { id: string };
+          body: { parts: Array<{ type: "text"; text: string }> };
+          query: { directory: string };
+        }) => Promise<unknown>;
+      };
+    };
+  };
  const isRuntimeFallbackEnabled =
    hooks.runtimeFallback !== null &&
    hooks.runtimeFallback !== undefined &&
    (typeof args.pluginConfig.runtime_fallback === "boolean"
      ? args.pluginConfig.runtime_fallback
-      : (args.pluginConfig.runtime_fallback?.enabled ?? false))
+      : (args.pluginConfig.runtime_fallback?.enabled ?? false));
+
+  const isModelFallbackEnabled =
+    hooks.modelFallback !== null && hooks.modelFallback !== undefined;

  // Avoid triggering multiple abort+continue cycles for the same failing assistant message.
-  const lastHandledModelErrorMessageID = new Map<string, string>()
-  const lastHandledRetryStatusKey = new Map<string, string>()
-  const lastKnownModelBySession = new Map<string, { providerID: string; modelID: string }>()
+  const lastHandledModelErrorMessageID = new Map<string, string>();
+  const lastHandledRetryStatusKey = new Map<string, string>();
+  const lastKnownModelBySession = new Map<string, { providerID: string; modelID: string }>();

  const dispatchToHooks = async (input: EventInput): Promise<void> => {
-    await Promise.resolve(hooks.autoUpdateChecker?.event?.(input))
-    await Promise.resolve(hooks.claudeCodeHooks?.event?.(input))
-    await Promise.resolve(hooks.backgroundNotificationHook?.event?.(input))
-    await Promise.resolve(hooks.sessionNotification?.(input))
-    await Promise.resolve(hooks.todoContinuationEnforcer?.handler?.(input))
-    await Promise.resolve(hooks.unstableAgentBabysitter?.event?.(input))
-    await Promise.resolve(hooks.contextWindowMonitor?.event?.(input))
-    await Promise.resolve(hooks.directoryAgentsInjector?.event?.(input))
-    await Promise.resolve(hooks.directoryReadmeInjector?.event?.(input))
-    await Promise.resolve(hooks.rulesInjector?.event?.(input))
-    await Promise.resolve(hooks.thinkMode?.event?.(input))
-    await Promise.resolve(hooks.anthropicContextWindowLimitRecovery?.event?.(input))
-    await Promise.resolve(hooks.runtimeFallback?.event?.(input))
-    await Promise.resolve(hooks.agentUsageReminder?.event?.(input))
-    await Promise.resolve(hooks.categorySkillReminder?.event?.(input))
-    await Promise.resolve(hooks.interactiveBashSession?.event?.(input as EventInput))
-    await Promise.resolve(hooks.ralphLoop?.event?.(input))
-    await Promise.resolve(hooks.stopContinuationGuard?.event?.(input))
-    await Promise.resolve(hooks.compactionTodoPreserver?.event?.(input))
-    await Promise.resolve(hooks.writeExistingFileGuard?.event?.(input))
-    await Promise.resolve(hooks.atlasHook?.handler?.(input))
-  }
+    await Promise.resolve(hooks.autoUpdateChecker?.event?.(input));
+    await Promise.resolve(hooks.claudeCodeHooks?.event?.(input));
+    await Promise.resolve(hooks.backgroundNotificationHook?.event?.(input));
+    await Promise.resolve(hooks.sessionNotification?.(input));
+    await Promise.resolve(hooks.todoContinuationEnforcer?.handler?.(input));
+    await Promise.resolve(hooks.unstableAgentBabysitter?.event?.(input));
+    await Promise.resolve(hooks.contextWindowMonitor?.event?.(input));
+    await Promise.resolve(hooks.directoryAgentsInjector?.event?.(input));
+    await Promise.resolve(hooks.directoryReadmeInjector?.event?.(input));
+    await Promise.resolve(hooks.rulesInjector?.event?.(input));
+    await Promise.resolve(hooks.thinkMode?.event?.(input));
+    await Promise.resolve(hooks.anthropicContextWindowLimitRecovery?.event?.(input));
+    await Promise.resolve(hooks.runtimeFallback?.event?.(input));
+    await Promise.resolve(hooks.agentUsageReminder?.event?.(input));
+    await Promise.resolve(hooks.categorySkillReminder?.event?.(input));
+    await Promise.resolve(hooks.interactiveBashSession?.event?.(input as EventInput));
+    await Promise.resolve(hooks.ralphLoop?.event?.(input));
+    await Promise.resolve(hooks.stopContinuationGuard?.event?.(input));
+    await Promise.resolve(hooks.compactionTodoPreserver?.event?.(input));
+    await Promise.resolve(hooks.writeExistingFileGuard?.event?.(input));
+    await Promise.resolve(hooks.atlasHook?.handler?.(input));
+  };

-  const recentSyntheticIdles = new Map<string, number>()
-  const recentRealIdles = new Map<string, number>()
-  const DEDUP_WINDOW_MS = 500
+  const recentSyntheticIdles = new Map<string, number>();
+  const recentRealIdles = new Map<string, number>();
+  const DEDUP_WINDOW_MS = 500;

  const shouldAutoRetrySession = (sessionID: string): boolean => {
-    if (syncSubagentSessions.has(sessionID)) return true
-    const mainSessionID = getMainSessionID()
-    if (mainSessionID) return sessionID === mainSessionID
+    if (syncSubagentSessions.has(sessionID)) return true;
+    const mainSessionID = getMainSessionID();
+    if (mainSessionID) return sessionID === mainSessionID;
    // Headless runs (or resumed sessions) may not emit session.created, so mainSessionID can be unset.
    // In that case, treat any non-subagent session as the "main" interactive session.
-    return !subagentSessions.has(sessionID)
-  }
+    return !subagentSessions.has(sessionID);
+  };

  return async (input): Promise<void> => {
    pruneRecentSyntheticIdles({
@@ -173,230 +177,227 @@ export function createEventHandler(args: {
      recentRealIdles,
      now: Date.now(),
      dedupWindowMs: DEDUP_WINDOW_MS,
-    })
+    });

    if (input.event.type === "session.idle") {
-      const sessionID = (input.event.properties as Record<string, unknown> | undefined)?.sessionID as string | undefined
+      const sessionID = (input.event.properties as Record<string, unknown> | undefined)?.sessionID as
+        | string
+        | undefined;
      if (sessionID) {
-        const emittedAt = recentSyntheticIdles.get(sessionID)
+        const emittedAt = recentSyntheticIdles.get(sessionID);
        if (emittedAt && Date.now() - emittedAt < DEDUP_WINDOW_MS) {
-          recentSyntheticIdles.delete(sessionID)
-          return
+          recentSyntheticIdles.delete(sessionID);
+          return;
        }
-        recentRealIdles.set(sessionID, Date.now())
+        recentRealIdles.set(sessionID, Date.now());
      }
    }

-    await dispatchToHooks(input)
+    await dispatchToHooks(input);

-    const syntheticIdle = normalizeSessionStatusToIdle(input)
+    const syntheticIdle = normalizeSessionStatusToIdle(input);
    if (syntheticIdle) {
-      const sessionID = (syntheticIdle.event.properties as Record<string, unknown>)?.sessionID as string
-      const emittedAt = recentRealIdles.get(sessionID)
+      const sessionID = (syntheticIdle.event.properties as Record<string, unknown>)?.sessionID as string;
+      const emittedAt = recentRealIdles.get(sessionID);
      if (emittedAt && Date.now() - emittedAt < DEDUP_WINDOW_MS) {
-        recentRealIdles.delete(sessionID)
-        return
+        recentRealIdles.delete(sessionID);
+        return;
      }
-      recentSyntheticIdles.set(sessionID, Date.now())
-      await dispatchToHooks(syntheticIdle as EventInput)
+      recentSyntheticIdles.set(sessionID, Date.now());
+      await dispatchToHooks(syntheticIdle as EventInput);
    }

-    const { event } = input
-    const props = event.properties as Record<string, unknown> | undefined
+    const { event } = input;
+    const props = event.properties as Record<string, unknown> | undefined;

    if (event.type === "session.created") {
-      const sessionInfo = props?.info as
-        | { id?: string; title?: string; parentID?: string }
-        | undefined
+      const sessionInfo = props?.info as { id?: string; title?: string; parentID?: string } | undefined;

      if (!sessionInfo?.parentID) {
-        setMainSession(sessionInfo?.id)
+        setMainSession(sessionInfo?.id);
      }

-      firstMessageVariantGate.markSessionCreated(sessionInfo)
+      firstMessageVariantGate.markSessionCreated(sessionInfo);

      await managers.tmuxSessionManager.onSessionCreated(
        event as {
-          type: string
+          type: string;
          properties?: {
-            info?: { id?: string; parentID?: string; title?: string }
-          }
+            info?: { id?: string; parentID?: string; title?: string };
+          };
        },
-      )
+      );
    }

    if (event.type === "session.deleted") {
-      const sessionInfo = props?.info as { id?: string } | undefined
+      const sessionInfo = props?.info as { id?: string } | undefined;
      if (sessionInfo?.id === getMainSessionID()) {
-        setMainSession(undefined)
+        setMainSession(undefined);
      }

      if (sessionInfo?.id) {
-        clearSessionAgent(sessionInfo.id)
-        lastHandledModelErrorMessageID.delete(sessionInfo.id)
-        lastHandledRetryStatusKey.delete(sessionInfo.id)
-        lastKnownModelBySession.delete(sessionInfo.id)
-        clearPendingModelFallback(sessionInfo.id)
-        clearSessionFallbackChain(sessionInfo.id)
-        resetMessageCursor(sessionInfo.id)
-        firstMessageVariantGate.clear(sessionInfo.id)
-        clearSessionModel(sessionInfo.id)
-        syncSubagentSessions.delete(sessionInfo.id)
-        await managers.skillMcpManager.disconnectSession(sessionInfo.id)
-        await lspManager.cleanupTempDirectoryClients()
+        clearSessionAgent(sessionInfo.id);
+        lastHandledModelErrorMessageID.delete(sessionInfo.id);
+        lastHandledRetryStatusKey.delete(sessionInfo.id);
+        lastKnownModelBySession.delete(sessionInfo.id);
+        clearPendingModelFallback(sessionInfo.id);
+        clearSessionFallbackChain(sessionInfo.id);
+        resetMessageCursor(sessionInfo.id);
+        firstMessageVariantGate.clear(sessionInfo.id);
+        clearSessionModel(sessionInfo.id);
+        syncSubagentSessions.delete(sessionInfo.id);
+        deleteSessionTools(sessionInfo.id);
+        await managers.skillMcpManager.disconnectSession(sessionInfo.id);
+        await lspManager.cleanupTempDirectoryClients();
        await managers.tmuxSessionManager.onSessionDeleted({
          sessionID: sessionInfo.id,
-        })
+        });
      }
    }

    if (event.type === "message.updated") {
-      const info = props?.info as Record<string, unknown> | undefined
-      const sessionID = info?.sessionID as string | undefined
-      const agent = info?.agent as string | undefined
-      const role = info?.role as string | undefined
+      const info = props?.info as Record<string, unknown> | undefined;
+      const sessionID = info?.sessionID as string | undefined;
+      const agent = info?.agent as string | undefined;
+      const role = info?.role as string | undefined;
      if (sessionID && role === "user") {
        if (agent) {
-          updateSessionAgent(sessionID, agent)
+          updateSessionAgent(sessionID, agent);
        }
-        const providerID = info?.providerID as string | undefined
-        const modelID = info?.modelID as string | undefined
+        const providerID = info?.providerID as string | undefined;
+        const modelID = info?.modelID as string | undefined;
        if (providerID && modelID) {
-          lastKnownModelBySession.set(sessionID, { providerID, modelID })
-          setSessionModel(sessionID, { providerID, modelID })
+          lastKnownModelBySession.set(sessionID, { providerID, modelID });
+          setSessionModel(sessionID, { providerID, modelID });
        }
      }

      // Model fallback: in practice, API/model failures often surface as assistant message errors.
      // session.error events are not guaranteed for all providers, so we also observe message.updated.
-      if (sessionID && role === "assistant" && !isRuntimeFallbackEnabled) {
+      if (sessionID && role === "assistant" && !isRuntimeFallbackEnabled && isModelFallbackEnabled) {
        try {
-          const assistantMessageID = info?.id as string | undefined
-          const assistantError = info?.error
+          const assistantMessageID = info?.id as string | undefined;
+          const assistantError = info?.error;
          if (assistantMessageID && assistantError) {
-            const lastHandled = lastHandledModelErrorMessageID.get(sessionID)
+            const lastHandled = lastHandledModelErrorMessageID.get(sessionID);
            if (lastHandled === assistantMessageID) {
-              return
+              return;
            }

-            const errorName = extractErrorName(assistantError)
-            const errorMessage = extractErrorMessage(assistantError)
-            const errorInfo = { name: errorName, message: errorMessage }
+            const errorName = extractErrorName(assistantError);
+            const errorMessage = extractErrorMessage(assistantError);
+            const errorInfo = { name: errorName, message: errorMessage };

            if (shouldRetryError(errorInfo)) {
              // Prefer the agent/model/provider from the assistant message payload.
-              let agentName = agent ?? getSessionAgent(sessionID)
+              let agentName = agent ?? getSessionAgent(sessionID);
              if (!agentName && sessionID === getMainSessionID()) {
                if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) {
-                  agentName = "sisyphus"
+                  agentName = "sisyphus";
                } else if (errorMessage.includes("gpt-5")) {
-                  agentName = "hephaestus"
+                  agentName = "hephaestus";
                } else {
-                  agentName = "sisyphus"
+                  agentName = "sisyphus";
                }
              }

              if (agentName) {
-                const currentProvider = (info?.providerID as string | undefined) ?? "opencode"
-                const rawModel = (info?.modelID as string | undefined) ?? "claude-opus-4-6"
-                const currentModel = normalizeFallbackModelID(rawModel)
+                const currentProvider = (info?.providerID as string | undefined) ?? "opencode";
+                const rawModel = (info?.modelID as string | undefined) ?? "claude-opus-4-6";
+                const currentModel = normalizeFallbackModelID(rawModel);

-                const setFallback = setPendingModelFallback(
-                  sessionID,
-                  agentName,
-                  currentProvider,
-                  currentModel,
-                )
+                const setFallback = setPendingModelFallback(sessionID, agentName, currentProvider, currentModel);

-                if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
-                  lastHandledModelErrorMessageID.set(sessionID, assistantMessageID)
+                if (
+                  setFallback &&
+                  shouldAutoRetrySession(sessionID) &&
+                  !hooks.stopContinuationGuard?.isStopped(sessionID)
+                ) {
+                  lastHandledModelErrorMessageID.set(sessionID, assistantMessageID);

-                  await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {})
+                  await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {});
                  await pluginContext.client.session
                    .prompt({
                      path: { id: sessionID },
                      body: { parts: [{ type: "text", text: "continue" }] },
                      query: { directory: pluginContext.directory },
                    })
-                    .catch(() => {})
+                    .catch(() => {});
                }
              }
            }
          }
        } catch (err) {
-          log("[event] model-fallback error in message.updated:", { sessionID, error: err })
+          log("[event] model-fallback error in message.updated:", { sessionID, error: err });
        }
      }
    }

    if (event.type === "session.status") {
-      const sessionID = props?.sessionID as string | undefined
-      const status = props?.status as
-        | { type?: string; attempt?: number; message?: string; next?: number }
-        | undefined
+      const sessionID = props?.sessionID as string | undefined;
+      const status = props?.status as { type?: string; attempt?: number; message?: string; next?: number } | undefined;

-      if (sessionID && status?.type === "retry") {
+      if (sessionID && status?.type === "retry" && isModelFallbackEnabled) {
        try {
-          const retryMessage = typeof status.message === "string" ? status.message : ""
-          const retryKey = `${status.attempt ?? "?"}:${status.next ?? "?"}:${retryMessage}`
+          const retryMessage = typeof status.message === "string" ? status.message : "";
+          const retryKey = `${status.attempt ?? "?"}:${status.next ?? "?"}:${retryMessage}`;
          if (lastHandledRetryStatusKey.get(sessionID) === retryKey) {
-            return
+            return;
          }
-          lastHandledRetryStatusKey.set(sessionID, retryKey)
+          lastHandledRetryStatusKey.set(sessionID, retryKey);

-          const errorInfo = { name: undefined as string | undefined, message: retryMessage }
+          const errorInfo = { name: undefined as string | undefined, message: retryMessage };
          if (shouldRetryError(errorInfo)) {
-            let agentName = getSessionAgent(sessionID)
+            let agentName = getSessionAgent(sessionID);
            if (!agentName && sessionID === getMainSessionID()) {
              if (retryMessage.includes("claude-opus") || retryMessage.includes("opus")) {
-                agentName = "sisyphus"
+                agentName = "sisyphus";
              } else if (retryMessage.includes("gpt-5")) {
-                agentName = "hephaestus"
+                agentName = "hephaestus";
              } else {
-                agentName = "sisyphus"
+                agentName = "sisyphus";
              }
            }

            if (agentName) {
-              const parsed = extractProviderModelFromErrorMessage(retryMessage)
-              const lastKnown = lastKnownModelBySession.get(sessionID)
-              const currentProvider = parsed.providerID ?? lastKnown?.providerID ?? "opencode"
-              let currentModel = parsed.modelID ?? lastKnown?.modelID ?? "claude-opus-4-6"
-              currentModel = normalizeFallbackModelID(currentModel)
+              const parsed = extractProviderModelFromErrorMessage(retryMessage);
+              const lastKnown = lastKnownModelBySession.get(sessionID);
+              const currentProvider = parsed.providerID ?? lastKnown?.providerID ?? "opencode";
+              let currentModel = parsed.modelID ?? lastKnown?.modelID ?? "claude-opus-4-6";
+              currentModel = normalizeFallbackModelID(currentModel);

-              const setFallback = setPendingModelFallback(
-                sessionID,
-                agentName,
-                currentProvider,
-                currentModel,
-              )
+              const setFallback = setPendingModelFallback(sessionID, agentName, currentProvider, currentModel);

-              if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
-                await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {})
+              if (
+                setFallback &&
+                shouldAutoRetrySession(sessionID) &&
+                !hooks.stopContinuationGuard?.isStopped(sessionID)
+              ) {
+                await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {});
                await pluginContext.client.session
                  .prompt({
                    path: { id: sessionID },
                    body: { parts: [{ type: "text", text: "continue" }] },
                    query: { directory: pluginContext.directory },
                  })
-                  .catch(() => {})
+                  .catch(() => {});
              }
            }
          }
        } catch (err) {
-          log("[event] model-fallback error in session.status:", { sessionID, error: err })
+          log("[event] model-fallback error in session.status:", { sessionID, error: err });
        }
      }
    }

    if (event.type === "session.error") {
      try {
-        const sessionID = props?.sessionID as string | undefined
-        const error = props?.error
+        const sessionID = props?.sessionID as string | undefined;
+        const error = props?.error;

-        const errorName = extractErrorName(error)
-        const errorMessage = extractErrorMessage(error)
-        const errorInfo = { name: errorName, message: errorMessage }
+        const errorName = extractErrorName(error);
+        const errorMessage = extractErrorMessage(error);
+        const errorInfo = { name: errorName, message: errorMessage };

        // First, try session recovery for internal errors (thinking blocks, tool results, etc.)
        if (hooks.sessionRecovery?.isRecoverableError(error)) {
@@ -405,8 +406,8 @@ export function createEventHandler(args: {
            role: "assistant" as const,
            sessionID,
            error,
-          }
-          const recovered = await hooks.sessionRecovery.handleSessionRecovery(messageInfo)
+          };
+          const recovered = await hooks.sessionRecovery.handleSessionRecovery(messageInfo);

          if (
            recovered &&
@@ -420,53 +421,52 @@ export function createEventHandler(args: {
                body: { parts: [{ type: "text", text: "continue" }] },
                query: { directory: pluginContext.directory },
              })
-              .catch(() => {})
+              .catch(() => {});
          }
-        } 
+        }
        // Second, try model fallback for model errors (rate limit, quota, provider issues, etc.)
-        else if (sessionID && shouldRetryError(errorInfo) && !isRuntimeFallbackEnabled) {
-          let agentName = getSessionAgent(sessionID)
-          
+        else if (sessionID && shouldRetryError(errorInfo) && !isRuntimeFallbackEnabled && isModelFallbackEnabled) {
+          let agentName = getSessionAgent(sessionID);
+
          if (!agentName && sessionID === getMainSessionID()) {
            if (errorMessage.includes("claude-opus") || errorMessage.includes("opus")) {
-              agentName = "sisyphus"
+              agentName = "sisyphus";
            } else if (errorMessage.includes("gpt-5")) {
-              agentName = "hephaestus"
+              agentName = "hephaestus";
            } else {
-              agentName = "sisyphus"
+              agentName = "sisyphus";
            }
          }
-          
-          if (agentName) {
-            const parsed = extractProviderModelFromErrorMessage(errorMessage)
-            const currentProvider = props?.providerID as string || parsed.providerID || "opencode"
-            let currentModel = props?.modelID as string || parsed.modelID || "claude-opus-4-6"
-            currentModel = normalizeFallbackModelID(currentModel)

-            const setFallback = setPendingModelFallback(
-              sessionID,
-              agentName,
-              currentProvider,
-              currentModel,
-            )
-            
-            if (setFallback && shouldAutoRetrySession(sessionID) && !hooks.stopContinuationGuard?.isStopped(sessionID)) {
-                await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {})
-                
-                await pluginContext.client.session
-                  .prompt({
-                    path: { id: sessionID },
-                    body: { parts: [{ type: "text", text: "continue" }] },
-                    query: { directory: pluginContext.directory },
-                  })
-                  .catch(() => {})
+          if (agentName) {
+            const parsed = extractProviderModelFromErrorMessage(errorMessage);
+            const currentProvider = (props?.providerID as string) || parsed.providerID || "opencode";
+            let currentModel = (props?.modelID as string) || parsed.modelID || "claude-opus-4-6";
+            currentModel = normalizeFallbackModelID(currentModel);
+
+            const setFallback = setPendingModelFallback(sessionID, agentName, currentProvider, currentModel);
+
+            if (
+              setFallback &&
+              shouldAutoRetrySession(sessionID) &&
+              !hooks.stopContinuationGuard?.isStopped(sessionID)
+            ) {
+              await pluginContext.client.session.abort({ path: { id: sessionID } }).catch(() => {});
+
+              await pluginContext.client.session
+                .prompt({
+                  path: { id: sessionID },
+                  body: { parts: [{ type: "text", text: "continue" }] },
+                  query: { directory: pluginContext.directory },
+                })
+                .catch(() => {});
            }
          }
        }
      } catch (err) {
-        const sessionID = props?.sessionID as string | undefined
-        log("[event] model-fallback error in session.error:", { sessionID, error: err })
+        const sessionID = props?.sessionID as string | undefined;
+        log("[event] model-fallback error in session.error:", { sessionID, error: err });
      }
    }
-  }
+  };
 }
--- a/src/plugin/hooks/create-session-hooks.ts
+++ b/src/plugin/hooks/create-session-hooks.ts
@@ -82,7 +82,7 @@ export function createSessionHooks(args: {
    isHookEnabled("preemptive-compaction") &&
    pluginConfig.experimental?.preemptive_compaction
      ? safeHook("preemptive-compaction", () =>
-          createPreemptiveCompactionHook(ctx, modelCacheState))
+          createPreemptiveCompactionHook(ctx, pluginConfig, modelCacheState))
      : null

  const sessionRecovery = isHookEnabled("session-recovery")
@@ -151,9 +151,10 @@ export function createSessionHooks(args: {
    }
  }

-  // Model fallback hook (configurable via disabled_hooks)
+  // Model fallback hook (configurable via model_fallback config + disabled_hooks)
  // This handles automatic model switching when model errors occur
-  const modelFallback = isHookEnabled("model-fallback")
+  const isModelFallbackConfigEnabled = pluginConfig.model_fallback ?? false
+  const modelFallback = isModelFallbackConfigEnabled && isHookEnabled("model-fallback")
    ? safeHook("model-fallback", () =>
      createModelFallbackHook({
        toast: async ({ title, message, variant, duration }) => {
@@ -174,7 +175,7 @@ export function createSessionHooks(args: {

  const anthropicContextWindowLimitRecovery = isHookEnabled("anthropic-context-window-limit-recovery")
    ? safeHook("anthropic-context-window-limit-recovery", () =>
-        createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental: pluginConfig.experimental }))
+        createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental: pluginConfig.experimental, pluginConfig }))
    : null

  const autoUpdateChecker = isHookEnabled("auto-update-checker")
--- a/src/plugin/tool-execute-before-session-notification.test.ts
+++ b/src/plugin/tool-execute-before-session-notification.test.ts
@@ -0,0 +1,33 @@
+const { describe, expect, test, spyOn } = require("bun:test")
+
+const sessionState = require("../features/claude-code-session-state")
+const { createToolExecuteBeforeHandler } = require("./tool-execute-before")
+
+describe("createToolExecuteBeforeHandler session notification sessionID", () => {
+  test("uses main session fallback when input sessionID is empty", async () => {
+    const mainSessionID = "ses_main"
+    const getMainSessionIDSpy = spyOn(sessionState, "getMainSessionID").mockReturnValue(mainSessionID)
+
+    let capturedSessionID: string | undefined
+    const hooks = {
+      sessionNotification: async (input) => {
+        capturedSessionID = input.event.properties?.sessionID
+      },
+    }
+
+    const handler = createToolExecuteBeforeHandler({
+      ctx: { client: { session: { messages: async () => ({ data: [] }) } } },
+      hooks,
+    })
+
+    await handler(
+      { tool: "question", sessionID: "", callID: "call_q" },
+      { args: { questions: [{ question: "Continue?", options: [{ label: "Yes" }] }] } },
+    )
+
+    expect(getMainSessionIDSpy).toHaveBeenCalled()
+    expect(capturedSessionID).toBe(mainSessionID)
+  })
+})
+
+export {}
--- a/src/plugin/tool-execute-before.test.ts
+++ b/src/plugin/tool-execute-before.test.ts
@@ -31,6 +31,60 @@ describe("createToolExecuteBeforeHandler", () => {
    await expect(run).resolves.toBeUndefined()
  })

+  test("triggers session notification hook for question tools", async () => {
+    let called = false
+    const ctx = {
+      client: {
+        session: {
+          messages: async () => ({ data: [] }),
+        },
+      },
+    }
+
+    const hooks = {
+      sessionNotification: async (input: { event: { type: string; properties?: Record<string, unknown> } }) => {
+        called = true
+        expect(input.event.type).toBe("tool.execute.before")
+        expect(input.event.properties?.sessionID).toBe("ses_q")
+        expect(input.event.properties?.tool).toBe("question")
+      },
+    }
+
+    const handler = createToolExecuteBeforeHandler({ ctx, hooks })
+    const input = { tool: "question", sessionID: "ses_q", callID: "call_q" }
+    const output = { args: { questions: [{ question: "Proceed?", options: [{ label: "Yes" }] }] } as Record<string, unknown> }
+
+    await handler(input, output)
+
+    expect(called).toBe(true)
+  })
+
+  test("does not trigger session notification hook for non-question tools", async () => {
+    let called = false
+    const ctx = {
+      client: {
+        session: {
+          messages: async () => ({ data: [] }),
+        },
+      },
+    }
+
+    const hooks = {
+      sessionNotification: async () => {
+        called = true
+      },
+    }
+
+    const handler = createToolExecuteBeforeHandler({ ctx, hooks })
+
+    await handler(
+      { tool: "bash", sessionID: "ses_b", callID: "call_b" },
+      { args: { command: "pwd" } as Record<string, unknown> },
+    )
+
+    expect(called).toBe(false)
+  })
+
  describe("task tool subagent_type normalization", () => {
    const emptyHooks = {}

--- a/src/plugin/tool-execute-before.ts
+++ b/src/plugin/tool-execute-before.ts
@@ -30,6 +30,26 @@ export function createToolExecuteBeforeHandler(args: {
    await hooks.prometheusMdOnly?.["tool.execute.before"]?.(input, output)
    await hooks.sisyphusJuniorNotepad?.["tool.execute.before"]?.(input, output)
    await hooks.atlasHook?.["tool.execute.before"]?.(input, output)
+
+    const normalizedToolName = input.tool.toLowerCase()
+    if (
+      normalizedToolName === "question"
+      || normalizedToolName === "ask_user_question"
+      || normalizedToolName === "askuserquestion"
+    ) {
+      const sessionID = input.sessionID || getMainSessionID()
+      await hooks.sessionNotification?.({
+        event: {
+          type: "tool.execute.before",
+          properties: {
+            sessionID,
+            tool: input.tool,
+            args: output.args,
+          },
+        },
+      })
+    }
+
    if (input.tool === "task") {
      const argsObject = output.args
      const category = typeof argsObject.category === "string" ? argsObject.category : undefined
--- a/src/shared/command-executor/execute-hook-command.ts
+++ b/src/shared/command-executor/execute-hook-command.ts
@@ -1,78 +1,129 @@
-import { spawn } from "node:child_process"
-import { getHomeDirectory } from "./home-directory"
-import { findBashPath, findZshPath } from "./shell-path"
+import { spawn } from "node:child_process";
+import { getHomeDirectory } from "./home-directory";
+import { findBashPath, findZshPath } from "./shell-path";

 export interface CommandResult {
-	exitCode: number
-	stdout?: string
-	stderr?: string
+  exitCode: number;
+  stdout?: string;
+  stderr?: string;
 }

+const DEFAULT_HOOK_TIMEOUT_MS = 30_000;
+const SIGKILL_GRACE_MS = 5_000;
+
 export interface ExecuteHookOptions {
-	forceZsh?: boolean
-	zshPath?: string
+  forceZsh?: boolean;
+  zshPath?: string;
+  /** Timeout in milliseconds. Process is killed after this. Default: 30000 */
+  timeoutMs?: number;
 }

 export async function executeHookCommand(
-	command: string,
-	stdin: string,
-	cwd: string,
-	options?: ExecuteHookOptions,
+  command: string,
+  stdin: string,
+  cwd: string,
+  options?: ExecuteHookOptions,
 ): Promise<CommandResult> {
-	const home = getHomeDirectory()
+  const home = getHomeDirectory();
+  const timeoutMs = options?.timeoutMs ?? DEFAULT_HOOK_TIMEOUT_MS;

-	const expandedCommand = command
-		.replace(/^~(?=\/|$)/g, home)
-		.replace(/\s~(?=\/)/g, ` ${home}`)
-		.replace(/\$CLAUDE_PROJECT_DIR/g, cwd)
-		.replace(/\$\{CLAUDE_PROJECT_DIR\}/g, cwd)
+  const expandedCommand = command
+    .replace(/^~(?=\/|$)/g, home)
+    .replace(/\s~(?=\/)/g, ` ${home}`)
+    .replace(/\$CLAUDE_PROJECT_DIR/g, cwd)
+    .replace(/\$\{CLAUDE_PROJECT_DIR\}/g, cwd);

-	let finalCommand = expandedCommand
+  let finalCommand = expandedCommand;

-	if (options?.forceZsh) {
-		const zshPath = findZshPath(options.zshPath)
-		const escapedCommand = expandedCommand.replace(/'/g, "'\\''")
-		if (zshPath) {
-			finalCommand = `${zshPath} -lc '${escapedCommand}'`
-		} else {
-			const bashPath = findBashPath()
-			if (bashPath) {
-				finalCommand = `${bashPath} -lc '${escapedCommand}'`
-			}
-		}
-	}
+  if (options?.forceZsh) {
+    const zshPath = findZshPath(options.zshPath);
+    const escapedCommand = expandedCommand.replace(/'/g, "'\\''");
+    if (zshPath) {
+      finalCommand = `${zshPath} -lc '${escapedCommand}'`;
+    } else {
+      const bashPath = findBashPath();
+      if (bashPath) {
+        finalCommand = `${bashPath} -lc '${escapedCommand}'`;
+      }
+    }
+  }

-	return new Promise((resolve) => {
-		const proc = spawn(finalCommand, {
-			cwd,
-			shell: true,
-			env: { ...process.env, HOME: home, CLAUDE_PROJECT_DIR: cwd },
-		})
+  return new Promise(resolve => {
+    let settled = false;
+    let killTimer: ReturnType<typeof setTimeout> | null = null;

-		let stdout = ""
-		let stderr = ""
+    const isWin32 = process.platform === "win32";
+    const proc = spawn(finalCommand, {
+      cwd,
+      shell: true,
+      detached: !isWin32,
+      env: { ...process.env, HOME: home, CLAUDE_PROJECT_DIR: cwd },
+    });

-		proc.stdout?.on("data", (data) => {
-			stdout += data.toString()
-		})
+    let stdout = "";
+    let stderr = "";

-		proc.stderr?.on("data", (data) => {
-			stderr += data.toString()
-		})
+    proc.stdout?.on("data", (data: Buffer) => {
+      stdout += data.toString();
+    });

-		proc.stdin?.write(stdin)
-		proc.stdin?.end()
+    proc.stderr?.on("data", (data: Buffer) => {
+      stderr += data.toString();
+    });

-		proc.on("close", (code) => {
-			resolve({
-				exitCode: code ?? 0,
-				stdout: stdout.trim(),
-				stderr: stderr.trim(),
-			})
-		})
+    proc.stdin?.on("error", () => {});
+    proc.stdin?.write(stdin);
+    proc.stdin?.end();

-		proc.on("error", (err) => {
-			resolve({ exitCode: 1, stderr: err.message })
-		})
-	})
+    const settle = (result: CommandResult) => {
+      if (settled) return;
+      settled = true;
+      if (killTimer) clearTimeout(killTimer);
+      if (timeoutTimer) clearTimeout(timeoutTimer);
+      resolve(result);
+    };
+
+    proc.on("close", code => {
+      settle({
+        exitCode: code ?? 1,
+        stdout: stdout.trim(),
+        stderr: stderr.trim(),
+      });
+    });
+
+    proc.on("error", err => {
+      settle({ exitCode: 1, stderr: err.message });
+    });
+
+    const killProcessGroup = (signal: NodeJS.Signals) => {
+      try {
+        if (!isWin32 && proc.pid) {
+          try {
+            process.kill(-proc.pid, signal);
+          } catch {
+            proc.kill(signal);
+          }
+        } else {
+          proc.kill(signal);
+        }
+      } catch {}
+    };
+
+    const timeoutTimer = setTimeout(() => {
+      if (settled) return;
+      // Kill entire process group to avoid orphaned children
+      killProcessGroup("SIGTERM");
+      killTimer = setTimeout(() => {
+        if (settled) return;
+        killProcessGroup("SIGKILL");
+      }, SIGKILL_GRACE_MS);
+      // Append timeout notice to stderr
+      stderr += `\nHook command timed out after ${timeoutMs}ms`;
+    }, timeoutMs);
+
+    // Don't let the timeout timer keep the process alive
+    if (timeoutTimer && typeof timeoutTimer === "object" && "unref" in timeoutTimer) {
+      timeoutTimer.unref();
+    }
+  });
 }
--- a/src/shared/model-availability.ts
+++ b/src/shared/model-availability.ts
@@ -199,7 +199,7 @@ export async function fetchAvailableModels(
 				// Handle both string[] (legacy) and object[] (with metadata) formats
 				const modelId = typeof modelItem === 'string' 
 					? modelItem 
-					: (modelItem as any)?.id
+					: modelItem?.id
 				
 				if (modelId) {
 					modelSet.add(`${providerId}/${modelId}`)
--- a/src/shared/models-json-cache-reader.ts
+++ b/src/shared/models-json-cache-reader.ts
@@ -1,52 +0,0 @@
-import { existsSync, readFileSync } from "fs"
-import { join } from "path"
-import { getOpenCodeCacheDir } from "./data-path"
-import { log } from "./logger"
-import { isRecord } from "./record-type-guard"
-
-export function addModelsFromModelsJsonCache(
-	connectedProviders: Set<string>,
-	modelSet: Set<string>,
-): boolean {
-	const cacheFile = join(getOpenCodeCacheDir(), "models.json")
-	if (!existsSync(cacheFile)) {
-		log("[fetchAvailableModels] models.json cache file not found, falling back to client")
-		return false
-	}
-
-	try {
-		const content = readFileSync(cacheFile, "utf-8")
-		const data: unknown = JSON.parse(content)
-		if (!isRecord(data)) {
-			return false
-		}
-
-		const providerIds = Object.keys(data)
-		log("[fetchAvailableModels] providers found in models.json", {
-			count: providerIds.length,
-			providers: providerIds.slice(0, 10),
-		})
-
-		const previousSize = modelSet.size
-		for (const providerId of providerIds) {
-			if (!connectedProviders.has(providerId)) continue
-			const providerValue = data[providerId]
-			if (!isRecord(providerValue)) continue
-			const modelsValue = providerValue["models"]
-			if (!isRecord(modelsValue)) continue
-			for (const modelKey of Object.keys(modelsValue)) {
-				modelSet.add(`${providerId}/${modelKey}`)
-			}
-		}
-
-		log("[fetchAvailableModels] parsed models from models.json (NO whitelist filtering)", {
-			count: modelSet.size,
-			connectedProviders: Array.from(connectedProviders).slice(0, 5),
-		})
-
-		return modelSet.size > previousSize
-	} catch (err) {
-		log("[fetchAvailableModels] error", { error: String(err) })
-		return false
-	}
-}
--- a/src/shared/open-code-client-accessors.ts
+++ b/src/shared/open-code-client-accessors.ts
@@ -1,20 +0,0 @@
-import type { ModelListFunction, ProviderListFunction } from "./open-code-client-shapes"
-import { isRecord } from "./record-type-guard"
-
-export function getProviderListFunction(client: unknown): ProviderListFunction | null {
-	if (!isRecord(client)) return null
-	const provider = client["provider"]
-	if (!isRecord(provider)) return null
-	const list = provider["list"]
-	if (typeof list !== "function") return null
-	return list as ProviderListFunction
-}
-
-export function getModelListFunction(client: unknown): ModelListFunction | null {
-	if (!isRecord(client)) return null
-	const model = client["model"]
-	if (!isRecord(model)) return null
-	const list = model["list"]
-	if (typeof list !== "function") return null
-	return list as ModelListFunction
-}
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`export { getMessageDir } from "../../shared"`
				`@@ -1 +0,0 @@`
				`export { createHashlineEditDiffEnhancerHook } from "./hook"`