chore: remove console.* debug logging from non-CLI source files

revert(todo-continuation): remove [TODO-DIAG] console.error debug logging
diag(todo-continuation): add comprehensive debug logging for session idle handling
2026-03-18 15:29:50 +09:00 · 2026-03-18 15:10:51 +09:00 · 2026-03-18 14:45:14 +09:00 · 2026-03-18 14:32:27 +09:00 · 2026-03-18 14:19:23 +09:00 · 2026-03-18 14:19:12 +09:00
125 changed files with 4666 additions and 821 deletions
--- a/.github/workflows/publish-platform.yml
+++ b/.github/workflows/publish-platform.yml
@@ -59,20 +59,39 @@ jobs:
      - name: Check if already published
        id: check
        run: |
-          PKG_NAME="oh-my-opencode-${{ matrix.platform }}"
          VERSION="${{ inputs.version }}"
-          STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/${PKG_NAME}/${VERSION}")
-          # Convert platform name for output (replace - with _)
          PLATFORM_KEY="${{ matrix.platform }}"
          PLATFORM_KEY="${PLATFORM_KEY//-/_}"
-          if [ "$STATUS" = "200" ]; then
+          
+          # Check oh-my-opencode
+          OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}")
+          # Check oh-my-openagent
+          OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}")
+          
+          echo "oh-my-opencode-${{ matrix.platform }}@${VERSION}: ${OC_STATUS}"
+          echo "oh-my-openagent-${{ matrix.platform }}@${VERSION}: ${OA_STATUS}"
+          
+          if [ "$OC_STATUS" = "200" ]; then
+            echo "skip_opencode=true" >> $GITHUB_OUTPUT
+            echo "✓ oh-my-opencode-${{ matrix.platform }}@${VERSION} already published"
+          else
+            echo "skip_opencode=false" >> $GITHUB_OUTPUT
+            echo "→ oh-my-opencode-${{ matrix.platform }}@${VERSION} needs publishing"
+          fi
+          
+          if [ "$OA_STATUS" = "200" ]; then
+            echo "skip_openagent=true" >> $GITHUB_OUTPUT
+            echo "✓ oh-my-openagent-${{ matrix.platform }}@${VERSION} already published"
+          else
+            echo "skip_openagent=false" >> $GITHUB_OUTPUT
+            echo "→ oh-my-openagent-${{ matrix.platform }}@${VERSION} needs publishing"
+          fi
+          
+          # Skip build only if BOTH are already published
+          if [ "$OC_STATUS" = "200" ] && [ "$OA_STATUS" = "200" ]; then
            echo "skip=true" >> $GITHUB_OUTPUT
-            echo "skip_${PLATFORM_KEY}=true" >> $GITHUB_OUTPUT
-            echo "✓ ${PKG_NAME}@${VERSION} already published"
          else
            echo "skip=false" >> $GITHUB_OUTPUT
-            echo "skip_${PLATFORM_KEY}=false" >> $GITHUB_OUTPUT
-            echo "→ ${PKG_NAME}@${VERSION} needs publishing"
          fi

      - name: Update version in package.json
@@ -207,23 +226,38 @@ jobs:
      matrix:
        platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
    steps:
-      - name: Check if oh-my-opencode already published
+      - name: Check if already published
        id: check
        run: |
-          PKG_NAME="oh-my-opencode-${{ matrix.platform }}"
          VERSION="${{ inputs.version }}"
-          STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/${PKG_NAME}/${VERSION}")
-          if [ "$STATUS" = "200" ]; then
-            echo "skip=true" >> $GITHUB_OUTPUT
-            echo "✓ ${PKG_NAME}@${VERSION} already published, skipping"
+          
+          OC_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-opencode-${{ matrix.platform }}/${VERSION}")
+          OA_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent-${{ matrix.platform }}/${VERSION}")
+          
+          if [ "$OC_STATUS" = "200" ]; then
+            echo "skip_opencode=true" >> $GITHUB_OUTPUT
+            echo "✓ oh-my-opencode-${{ matrix.platform }}@${VERSION} already published"
          else
-            echo "skip=false" >> $GITHUB_OUTPUT
-            echo "→ ${PKG_NAME}@${VERSION} will be published"
+            echo "skip_opencode=false" >> $GITHUB_OUTPUT
+          fi
+          
+          if [ "$OA_STATUS" = "200" ]; then
+            echo "skip_openagent=true" >> $GITHUB_OUTPUT
+            echo "✓ oh-my-openagent-${{ matrix.platform }}@${VERSION} already published"
+          else
+            echo "skip_openagent=false" >> $GITHUB_OUTPUT
+          fi
+          
+          # Need artifact if either package needs publishing
+          if [ "$OC_STATUS" = "200" ] && [ "$OA_STATUS" = "200" ]; then
+            echo "skip_all=true" >> $GITHUB_OUTPUT
+          else
+            echo "skip_all=false" >> $GITHUB_OUTPUT
          fi

      - name: Download artifact
        id: download
-        if: steps.check.outputs.skip != 'true'
+        if: steps.check.outputs.skip_all != 'true'
        continue-on-error: true
        uses: actions/download-artifact@v4
        with:
@@ -231,7 +265,7 @@ jobs:
          path: .

      - name: Extract artifact
-        if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
+        if: steps.check.outputs.skip_all != 'true' && steps.download.outcome == 'success'
        run: |
          PLATFORM="${{ matrix.platform }}"
          mkdir -p packages/${PLATFORM}
@@ -247,13 +281,13 @@ jobs:
          ls -la packages/${PLATFORM}/bin/

      - uses: actions/setup-node@v4
-        if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
+        if: steps.check.outputs.skip_all != 'true' && steps.download.outcome == 'success'
        with:
          node-version: "24"
          registry-url: "https://registry.npmjs.org"

-      - name: Publish ${{ matrix.platform }}
-        if: steps.check.outputs.skip != 'true' && steps.download.outcome == 'success'
+      - name: Publish oh-my-opencode-${{ matrix.platform }}
+        if: steps.check.outputs.skip_opencode != 'true' && steps.download.outcome == 'success'
        run: |
          cd packages/${{ matrix.platform }}
          
@@ -267,3 +301,25 @@ jobs:
          NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
          NPM_CONFIG_PROVENANCE: true
        timeout-minutes: 15
+
+      - name: Publish oh-my-openagent-${{ matrix.platform }}
+        if: steps.check.outputs.skip_openagent != 'true' && steps.download.outcome == 'success'
+        run: |
+          cd packages/${{ matrix.platform }}
+          
+          # Rename package for oh-my-openagent
+          jq --arg name "oh-my-openagent-${{ matrix.platform }}" \
+             --arg desc "Platform-specific binary for oh-my-openagent (${{ matrix.platform }})" \
+             '.name = $name | .description = $desc | .bin = {"oh-my-openagent": (.bin | to_entries | .[0].value)}' \
+             package.json > tmp.json && mv tmp.json package.json
+          
+          TAG_ARG=""
+          if [ -n "${{ inputs.dist_tag }}" ]; then
+            TAG_ARG="--tag ${{ inputs.dist_tag }}"
+          fi
+          
+          npm publish --access public --provenance $TAG_ARG
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
+          NPM_CONFIG_PROVENANCE: true
+        timeout-minutes: 15
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -216,6 +216,48 @@ jobs:
          NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
          NPM_CONFIG_PROVENANCE: true

+      - name: Check if oh-my-openagent already published
+        id: check-openagent
+        run: |
+          VERSION="${{ steps.version.outputs.version }}"
+          STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://registry.npmjs.org/oh-my-openagent/${VERSION}")
+          if [ "$STATUS" = "200" ]; then
+            echo "skip=true" >> $GITHUB_OUTPUT
+            echo "✓ oh-my-openagent@${VERSION} already published"
+          else
+            echo "skip=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Publish oh-my-openagent
+        if: steps.check-openagent.outputs.skip != 'true'
+        run: |
+          VERSION="${{ steps.version.outputs.version }}"
+          
+          # Update package name, version, and optionalDependencies for oh-my-openagent
+          jq --arg v "$VERSION" '
+            .name = "oh-my-openagent" |
+            .version = $v |
+            .optionalDependencies = (
+              .optionalDependencies | to_entries |
+              map(.key = (.key | sub("^oh-my-opencode-"; "oh-my-openagent-")) | .value = $v) |
+              from_entries
+            )
+          ' package.json > tmp.json && mv tmp.json package.json
+          
+          TAG_ARG=""
+          if [ -n "${{ steps.version.outputs.dist_tag }}" ]; then
+            TAG_ARG="--tag ${{ steps.version.outputs.dist_tag }}"
+          fi
+          npm publish --access public --provenance $TAG_ARG || echo "::warning::oh-my-openagent publish failed"
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NODE_AUTH_TOKEN }}
+          NPM_CONFIG_PROVENANCE: true
+
+      - name: Restore package.json
+        if: steps.check-openagent.outputs.skip != 'true'
+        run: |
+          git checkout -- package.json
+
  trigger-platform:
    runs-on: ubuntu-latest
    needs: publish-main
--- a/.opencode/skills/github-triage/SKILL.md
+++ b/.opencode/skills/github-triage/SKILL.md
@@ -136,7 +136,36 @@ fi

 ---

-## Phase 3: Spawn Subagents
+## Phase 3: Spawn Subagents (Individual Tool Calls)
+
+**CRITICAL: Create tasks ONE BY ONE using individual `task_create` tool calls. NEVER batch or script.**
+
+For each item, execute these steps sequentially:
+
+### Step 3.1: Create Task Record
+```typescript
+task_create(
+  subject="Triage: #{number} {title}",
+  description="GitHub {issue|PR} triage analysis - {type}",
+  metadata={"type": "{ISSUE_QUESTION|ISSUE_BUG|ISSUE_FEATURE|ISSUE_OTHER|PR_BUGFIX|PR_OTHER}", "number": {number}}
+)
+```
+
+### Step 3.2: Spawn Analysis Subagent (Background)
+```typescript
+task(
+  category="quick",
+  run_in_background=true,
+  load_skills=[],
+  prompt=SUBAGENT_PROMPT
+)
+```
+
+**ABSOLUTE RULES for Subagents:**
+- **ONLY ANALYZE** - Never take action on GitHub (no comments, merges, closes)
+- **READ-ONLY** - Use tools only for reading code/GitHub data
+- **WRITE REPORT ONLY** - Output goes to `{REPORT_DIR}/{issue|pr}-{number}.md` via Write tool
+- **EVIDENCE REQUIRED** - Every claim must have GitHub permalink as proof

 ```
 For each item:
@@ -170,6 +199,7 @@ ABSOLUTE RULES (violating ANY = critical failure):
 - Your ONLY writable output: {REPORT_DIR}/{issue|pr}-{number}.md via the Write tool
 ```

+
 ---

 ### ISSUE_QUESTION
--- a/FIX-BLOCKS.md
+++ b/FIX-BLOCKS.md
@@ -0,0 +1,122 @@
+# Pre-Publish BLOCK Issues: Fix ALL Before Release
+
+Two independent pre-publish reviews (Opus 4.6 + GPT-5.4) both concluded **BLOCK -- do not publish**. You must fix ALL blocking issues below using UltraBrain parallel agents. Work TDD-style: write/update tests first, then fix, verify tests pass.
+
+## Strategy
+
+Use ultrawork (ulw) to spawn UltraBrain agents in parallel. Each UB agent gets a non-overlapping scope. After all agents complete, run bun test to verify everything passes. Commit atomically per fix group.
+
+---
+
+## CRITICAL BLOCKERS (must fix -- 6 items)
+
+### C1: Hashline Backward Compatibility
+**Problem:** Strict whitespace hashing in hashline changes LINE#ID values for indented lines. Breaks existing anchors in cached/persisted edit operations.
+**Fix:** Add a compatibility shim -- when lookup by new hash fails, fall back to legacy hash (without strict whitespace). Or version the hash format.
+**Files:** Look for hashline-related files in src/tools/ or src/shared/
+
+### C2: OpenAI-Only Model Catalog Broken with OpenCode-Go
+**Problem:** isOpenAiOnlyAvailability() does not exclude availability.opencodeGo. When OpenCode-Go is present, OpenAI-only detection is wrong -- models get misrouted.
+**Fix:** Add !availability.opencodeGo check to isOpenAiOnlyAvailability().
+**Files:** Model/provider system files -- search for isOpenAiOnlyAvailability
+
+### C3: CLI/Runtime Model Table Divergence
+**Problem:** Model tables disagree between CLI install-time and runtime:
+- ultrabrain: gpt-5.3-codex in CLI vs gpt-5.4 in runtime
+- atlas: claude-sonnet-4-5 in CLI vs claude-sonnet-4-6 in runtime
+- unspecified-high also diverges
+**Fix:** Reconcile all model tables. Pick the correct model for each and make CLI + runtime match.
+**Files:** Search for model table definitions, agent configs, CLI model references
+
+### C4: atlas/metis/sisyphus-junior Missing OpenAI Fallbacks
+**Problem:** These agents can resolve to opencode/glm-4.7-free or undefined in OpenAI-only environments. No valid OpenAI fallback paths exist.
+**Fix:** Add valid OpenAI model fallback paths for all agents that need them.
+**Files:** Agent config/model resolution code
+
+### C5: model_fallback Default Mismatch
+**Problem:** Schema and docs say model_fallback defaults to false, but runtime treats unset as true. Silent behavior change for all users.
+**Fix:** Align -- either update schema/docs to say true, or fix runtime to default to false. Check what the intended behavior is from git history.
+**Files:** Schema definition, runtime config loading
+
+### C6: background_output Default Changed
+**Problem:** background_output now defaults to full_session=true. Old callers get different output format without code changes.
+**Fix:** Either document this change clearly, or restore old default and make full_session opt-in.
+**Files:** Background output handling code
+
+---
+
+## HIGH PRIORITY (strongly recommended -- 4 items)
+
+### H1: Runtime Fallback session-status-handler Race
+**Problem:** When fallback model is already pending, the handler cannot advance the chain on subsequent cooldown events.
+**Fix:** Allow override like message-update-handler does.
+**Files:** Search for session-status-handler, message-update-handler
+
+### H2: Atlas Final-Wave Approval Gate Logic
+**Problem:** Approval gate logic does not match real Prometheus plan structure (nested checkboxes, parallel execution). Trigger logic is wrong.
+**Fix:** Update to handle real plan structures.
+**Files:** Atlas agent code, approval gate logic
+
+### H3: delegate-task-english-directive Dead Code
+**Problem:** Not dispatched from tool-execute-before.ts + wrong hook signature. Either wire properly or remove entirely.
+**Fix:** Remove if not needed (cleaner). If needed, fix dispatch + signature.
+**Files:** src/hooks/, tool-execute-before.ts
+
+### H4: Auto-Slash-Command Session-Lifetime Dedup
+**Problem:** Dedup uses session lifetime, suppressing legitimate repeated identical commands.
+**Fix:** Change to short TTL (e.g., 30 seconds) instead of session lifetime.
+**Files:** Slash command handling code
+
+---
+
+## ADDITIONAL BLOCKERS FROM GPT-5.4 REVIEW
+
+### G1: Package Identity Split-Brain
+**Problem:** Installer writes oh-my-openagent but doctor, auto-update, version lookup, publish workflow still reference oh-my-opencode. Half-migrated state.
+**Fix:** Audit ALL references to package name. Either complete the migration consistently or revert to single name for this release.
+**Files:** Installer, doctor, auto-update, version lookup, publish workflow -- grep for both package names
+
+### G2: OpenCode-Go --opencode-go Value Validation
+**Problem:** No validation for --opencode-go CLI value. No detection of existing OpenCode-Go installations.
+**Fix:** Add value validation + existing install detection.
+**Files:** CLI option handling code
+
+### G3: Skill/Hook Reference Errors
+**Problem:**
+- work-with-pr references non-existent git tool category
+- github-triage references TaskCreate/TaskUpdate which are not real tool names
+**Fix:** Fix tool references to use actual tool names.
+**Files:** Skill definition files in .opencode/skills/
+
+### G4: Stale Context-Limit Cache
+**Problem:** Shared context-limit resolver caches provider config. When config changes, stale removed limits persist and corrupt compaction/truncation decisions.
+**Fix:** Add cache invalidation when provider config changes, or make the resolver stateless.
+**Files:** Context-limit resolver, compaction code
+
+### G5: disabled_hooks Schema vs Runtime Contract Mismatch
+**Problem:** Schema is strict (rejects unknown hook names) but runtime is permissive (ignores unknown). Contract disagreement.
+**Fix:** Align -- either make both strict or both permissive.
+**Files:** Hook schema definition, runtime hook loading
+
+---
+
+## EXECUTION INSTRUCTIONS
+
+1. Spawn UltraBrain agents to fix these in parallel -- group by file proximity:
+   - UB-1: C1 (hashline) + H4 (slash-command dedup)
+   - UB-2: C2 + C3 + C4 (model/provider system) + G2
+   - UB-3: C5 + C6 (config defaults) + G5
+   - UB-4: H1 + H2 (runtime handlers + Atlas gate)
+   - UB-5: H3 + G3 (dead code + skill references)
+   - UB-6: G1 (package identity -- full audit)
+   - UB-7: G4 (context-limit cache)
+
+2. Each UB agent MUST:
+   - Write or update tests FIRST (TDD)
+   - Implement the fix
+   - Run bun test on affected test files
+   - Commit with descriptive message
+
+3. After all UB agents complete, run full bun test to verify no regressions.
+
+ulw
--- a/README.md
+++ b/README.md
@@ -1,9 +1,3 @@
-> [!WARNING]
-> **TEMP NOTICE (This Week): Reduced Maintainer Availability**
->
-> Core maintainer Q got injured, so issue/PR responses and releases may be delayed this week.
-> Thank you for your patience and support.
-
 > [!NOTE]
 >
 > [![Sisyphus Labs - Sisyphus is the agent that codes like your team.](./.github/assets/sisyphuslabs.png?v=2)](https://sisyphuslabs.ai)
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -43,57 +43,7 @@
    "disabled_hooks": {
      "type": "array",
      "items": {
-        "type": "string",
-        "enum": [
-          "gpt-permission-continuation",
-          "todo-continuation-enforcer",
-          "context-window-monitor",
-          "session-recovery",
-          "session-notification",
-          "comment-checker",
-          "tool-output-truncator",
-          "question-label-truncator",
-          "directory-agents-injector",
-          "directory-readme-injector",
-          "empty-task-response-detector",
-          "think-mode",
-          "model-fallback",
-          "anthropic-context-window-limit-recovery",
-          "preemptive-compaction",
-          "rules-injector",
-          "background-notification",
-          "auto-update-checker",
-          "startup-toast",
-          "keyword-detector",
-          "agent-usage-reminder",
-          "non-interactive-env",
-          "interactive-bash-session",
-          "thinking-block-validator",
-          "ralph-loop",
-          "category-skill-reminder",
-          "compaction-context-injector",
-          "compaction-todo-preserver",
-          "claude-code-hooks",
-          "auto-slash-command",
-          "edit-error-recovery",
-          "json-error-recovery",
-          "delegate-task-retry",
-          "prometheus-md-only",
-          "sisyphus-junior-notepad",
-          "no-sisyphus-gpt",
-          "no-hephaestus-non-gpt",
-          "start-work",
-          "atlas",
-          "unstable-agent-babysitter",
-          "task-resume-info",
-          "stop-continuation-guard",
-          "tasks-todowrite-disabler",
-          "runtime-fallback",
-          "write-existing-file-guard",
-          "anthropic-effort",
-          "hashline-read-enhancer",
-          "read-image-resizer"
-        ]
+        "type": "string"
      }
    },
    "disabled_commands": {
@@ -3749,6 +3699,30 @@
        "syncPollTimeoutMs": {
          "type": "number",
          "minimum": 60000
+        },
+        "maxToolCalls": {
+          "type": "integer",
+          "minimum": 10,
+          "maximum": 9007199254740991
+        },
+        "circuitBreaker": {
+          "type": "object",
+          "properties": {
+            "enabled": {
+              "type": "boolean"
+            },
+            "maxToolCalls": {
+              "type": "integer",
+              "minimum": 10,
+              "maximum": 9007199254740991
+            },
+            "consecutiveThreshold": {
+              "type": "integer",
+              "minimum": 5,
+              "maximum": 9007199254740991
+            }
+          },
+          "additionalProperties": false
        }
      },
      "additionalProperties": false
@@ -3927,4 +3901,4 @@
    }
  },
  "additionalProperties": false
-}
+}
--- a/benchmarks/package.json
+++ b/benchmarks/package.json
@@ -1,18 +0,0 @@
-{
-  "name": "hashline-edit-benchmark",
-  "version": "0.1.0",
-  "private": true,
-  "type": "module",
-  "description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
-  "scripts": {
-    "bench:basic": "bun run test-edit-ops.ts",
-    "bench:edge": "bun run test-edge-cases.ts",
-    "bench:multi": "bun run test-multi-model.ts",
-    "bench:all": "bun run bench:basic && bun run bench:edge"
-  },
-  "dependencies": {
-    "@friendliai/ai-provider": "^1.0.9",
-    "ai": "^6.0.94",
-    "zod": "^4.1.0"
-  }
-}
--- a/docs/guide/agent-model-matching.md
+++ b/docs/guide/agent-model-matching.md
@@ -64,8 +64,8 @@ These agents have Claude-optimized prompts — long, detailed, mechanics-driven.

 | Agent        | Role              | Fallback Chain                         | Notes                                                                                             |
 | ------------ | ----------------- | -------------------------------------- | ------------------------------------------------------------------------------------------------- |
-| **Sisyphus** | Main orchestrator | Claude Opus → opencode-go/kimi-k2.5 → K2P5 → GPT-5.4 → GLM-5 → Big Pickle | Claude-family first. GPT-5.4 has dedicated prompt support. Kimi/GLM as intermediate fallbacks. |
-| **Metis**    | Plan gap analyzer | Claude Opus → opencode-go/glm-5 → K2P5 | Claude preferred. Uses opencode-go for reliable GLM-5 access.                                     |
+| **Sisyphus** | Main orchestrator | Claude Opus → opencode-go/kimi-k2.5 → K2P5 → Kimi K2.5 → GPT-5.4 → GLM-5 → Big Pickle | Claude-family first. GPT-5.4 has dedicated prompt support. Kimi available through multiple providers. |
+| **Metis**    | Plan gap analyzer | Claude Opus → GPT-5.4 → opencode-go/glm-5 → K2P5 | Claude preferred. GPT-5.4 as secondary before GLM-5 fallback.                                     |

 ### Dual-Prompt Agents → Claude preferred, GPT supported

@@ -74,7 +74,7 @@ These agents ship separate prompts for Claude and GPT families. They auto-detect
 | Agent          | Role              | Fallback Chain                         | Notes                                                                |
 | -------------- | ----------------- | -------------------------------------- | -------------------------------------------------------------------- |
 | **Prometheus** | Strategic planner | Claude Opus → GPT-5.4 → opencode-go/glm-5 → Gemini 3.1 Pro | Interview-mode planning. GPT prompt is compact and principle-driven. |
-| **Atlas**      | Todo orchestrator | Claude Sonnet → opencode-go/kimi-k2.5  | Claude first, opencode-go as the current fallback path.              |
+| **Atlas**      | Todo orchestrator | Claude Sonnet → opencode-go/kimi-k2.5 → GPT-5.4 | Claude first, opencode-go as intermediate, GPT-5.4 as last resort.   |

 ### Deep Specialists → GPT

@@ -82,9 +82,9 @@ These agents are built for GPT's principle-driven style. Their prompts assume au

 | Agent          | Role                    | Fallback Chain                         | Notes                                            |
 | -------------- | ----------------------- | -------------------------------------- | ------------------------------------------------ |
-| **Hephaestus** | Autonomous deep worker  | GPT-5.3 Codex only                     | No fallback. Requires GPT access. The craftsman. |
-| **Oracle**     | Architecture consultant | GPT-5.4 → Gemini 3.1 Pro → Claude Opus | Read-only high-IQ consultation.                  |
-| **Momus**      | Ruthless reviewer       | GPT-5.4 → Claude Opus → Gemini 3.1 Pro | Verification and plan review.                    |
+| **Hephaestus** | Autonomous deep worker  | GPT-5.3 Codex → GPT-5.4 (Copilot)     | Requires GPT access. GPT-5.4 via Copilot as fallback. The craftsman. |
+| **Oracle**     | Architecture consultant | GPT-5.4 → Gemini 3.1 Pro → Claude Opus → opencode-go/glm-5 | Read-only high-IQ consultation.                  |
+| **Momus**      | Ruthless reviewer       | GPT-5.4 → Claude Opus → Gemini 3.1 Pro → opencode-go/glm-5 | Verification and plan review. GPT-5.4 uses xhigh variant. |

 ### Utility Runners → Speed over Intelligence

@@ -95,6 +95,7 @@ These agents do grep, search, and retrieval. They intentionally use the fastest,
 | **Explore**           | Fast codebase grep | Grok Code Fast → opencode-go/minimax-m2.5 → MiniMax Free → Haiku → GPT-5-Nano | Speed is everything. Fire 10 in parallel.             |
 | **Librarian**         | Docs/code search   | opencode-go/minimax-m2.5 → MiniMax Free → Haiku → GPT-5-Nano                  | Doc retrieval doesn't need deep reasoning.            |
 | **Multimodal Looker** | Vision/screenshots | GPT-5.4 → opencode-go/kimi-k2.5 → GLM-4.6v → GPT-5-Nano                       | Uses the first available multimodal-capable fallback. |
+| **Sisyphus-Junior**   | Category executor  | Claude Sonnet → opencode-go/kimi-k2.5 → GPT-5.4 → Big Pickle                  | Handles delegated category tasks. Sonnet-tier default. |

 ---

@@ -119,8 +120,7 @@ Principle-driven, explicit reasoning, deep technical capability. Best for agents
 | Model             | Strengths                                                                                       |
 | ----------------- | ----------------------------------------------------------------------------------------------- |
 | **GPT-5.3 Codex** | Deep coding powerhouse. Autonomous exploration. Required for Hephaestus.                        |
-| **GPT-5.4**       | High intelligence, strategic reasoning. Default for Oracle.                                     |
-| **GPT-5.4**       | Strong principle-driven reasoning. Default for Momus and a key fallback for Prometheus / Atlas. |
+| **GPT-5.4**       | High intelligence, strategic reasoning. Default for Oracle, Momus, and a key fallback for Prometheus / Atlas. Uses xhigh variant for Momus. |
 | **GPT-5-Nano**    | Ultra-cheap, fast. Good for simple utility tasks.                                               |

 ### Other Models
@@ -166,14 +166,14 @@ When agents delegate work, they don't pick a model name — they pick a **catego

 | Category             | When Used                  | Fallback Chain                               |
 | -------------------- | -------------------------- | -------------------------------------------- |
-| `visual-engineering` | Frontend, UI, CSS, design  | Gemini 3.1 Pro → GLM 5 → Claude Opus         |
-| `ultrabrain`         | Maximum reasoning needed   | GPT-5.4 → Gemini 3.1 Pro → Claude Opus       |
+| `visual-engineering` | Frontend, UI, CSS, design  | Gemini 3.1 Pro → GLM 5 → Claude Opus → opencode-go/glm-5 → K2P5 |
+| `ultrabrain`         | Maximum reasoning needed   | GPT-5.4 → Gemini 3.1 Pro → Claude Opus → opencode-go/glm-5 |
 | `deep`               | Deep coding, complex logic | GPT-5.3 Codex → Claude Opus → Gemini 3.1 Pro |
 | `artistry`           | Creative, novel approaches | Gemini 3.1 Pro → Claude Opus → GPT-5.4       |
-| `quick`              | Simple, fast tasks         | Claude Haiku → Gemini Flash → GPT-5-Nano     |
-| `unspecified-high`   | General complex work       | Claude Opus → GPT-5.4 (high) → GLM 5 → K2P5  |
-| `unspecified-low`    | General standard work      | Claude Sonnet → GPT-5.3 Codex → Gemini Flash |
-| `writing`            | Text, docs, prose          | Gemini Flash → Claude Sonnet                 |
+| `quick`              | Simple, fast tasks         | Claude Haiku → Gemini Flash → opencode-go/minimax-m2.5 → GPT-5-Nano |
+| `unspecified-high`   | General complex work       | Claude Opus → GPT-5.4 → GLM 5 → K2P5 → opencode-go/glm-5 → Kimi K2.5 |
+| `unspecified-low`    | General standard work      | Claude Sonnet → GPT-5.3 Codex → opencode-go/kimi-k2.5 → Gemini Flash |
+| `writing`            | Text, docs, prose          | Gemini Flash → opencode-go/kimi-k2.5 → Claude Sonnet |

 See the [Orchestration System Guide](./orchestration.md) for how agents dispatch tasks to categories.

--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -2207,6 +2207,38 @@
      "created_at": "2026-03-16T04:55:10Z",
      "repoId": 1108837393,
      "pullRequestNo": 2604
+    },
+    {
+      "name": "gxlife",
+      "id": 110413359,
+      "comment_id": 4068427047,
+      "created_at": "2026-03-16T15:17:01Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2625
+    },
+    {
+      "name": "HaD0Yun",
+      "id": 102889891,
+      "comment_id": 4073195308,
+      "created_at": "2026-03-17T08:27:45Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2640
+    },
+    {
+      "name": "tad-hq",
+      "id": 213478119,
+      "comment_id": 4077697128,
+      "created_at": "2026-03-17T20:07:09Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2655
+    },
+    {
+      "name": "ogormans-deptstack",
+      "id": 208788555,
+      "comment_id": 4077893096,
+      "created_at": "2026-03-17T20:42:42Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2656
    }
  ]
 }
--- a/src/agents/builtin-agents/resolve-file-uri.test.ts
+++ b/src/agents/builtin-agents/resolve-file-uri.test.ts
@@ -1,20 +1,32 @@
-import { afterAll, beforeAll, describe, expect, test } from "bun:test"
+import { afterAll, beforeAll, describe, expect, mock, test } from "bun:test"
 import { mkdirSync, rmSync, writeFileSync } from "node:fs"
-import { homedir, tmpdir } from "node:os"
+import * as os from "node:os"
+import { tmpdir } from "node:os"
 import { join } from "node:path"
-import { resolvePromptAppend } from "./resolve-file-uri"
+
+const originalHomedir = os.homedir.bind(os)
+let mockedHomeDir = ""
+let moduleImportCounter = 0
+let resolvePromptAppend: typeof import("./resolve-file-uri").resolvePromptAppend
+
+mock.module("node:os", () => ({
+  ...os,
+  homedir: () => mockedHomeDir || originalHomedir(),
+}))

 describe("resolvePromptAppend", () => {
  const fixtureRoot = join(tmpdir(), `resolve-file-uri-${Date.now()}`)
  const configDir = join(fixtureRoot, "config")
-  const homeFixtureDir = join(homedir(), `.resolve-file-uri-home-${Date.now()}`)
+  const homeFixtureRoot = join(fixtureRoot, "home")
+  const homeFixtureDir = join(homeFixtureRoot, "fixture-home")

  const absoluteFilePath = join(fixtureRoot, "absolute.txt")
  const relativeFilePath = join(configDir, "relative.txt")
  const spacedFilePath = join(fixtureRoot, "with space.txt")
  const homeFilePath = join(homeFixtureDir, "home.txt")

-  beforeAll(() => {
+  beforeAll(async () => {
+    mockedHomeDir = homeFixtureRoot
    mkdirSync(fixtureRoot, { recursive: true })
    mkdirSync(configDir, { recursive: true })
    mkdirSync(homeFixtureDir, { recursive: true })
@@ -23,11 +35,14 @@ describe("resolvePromptAppend", () => {
    writeFileSync(relativeFilePath, "relative-content", "utf8")
    writeFileSync(spacedFilePath, "encoded-content", "utf8")
    writeFileSync(homeFilePath, "home-content", "utf8")
+
+    moduleImportCounter += 1
+    ;({ resolvePromptAppend } = await import(`./resolve-file-uri?test=${moduleImportCounter}`))
  })

  afterAll(() => {
    rmSync(fixtureRoot, { recursive: true, force: true })
-    rmSync(homeFixtureDir, { recursive: true, force: true })
+    mock.restore()
  })

  test("returns non-file URI strings unchanged", () => {
@@ -65,7 +80,7 @@ describe("resolvePromptAppend", () => {

  test("resolves home directory URI path", () => {
    //#given
-    const input = `file://~/${homeFixtureDir.split("/").pop()}/home.txt`
+    const input = "file://~/fixture-home/home.txt"

    //#when
    const resolved = resolvePromptAppend(input)
--- a/src/cli/snapshots/model-fallback.test.ts.snap
+++ b/src/cli/snapshots/model-fallback.test.ts.snap
@@ -5,60 +5,60 @@ exports[`generateModelConfig no providers available returns ULTIMATE_FALLBACK fo
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "explore": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "hephaestus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "librarian": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "momus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "oracle": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "prometheus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "sisyphus-junior": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
  "categories": {
    "artistry": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "deep": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "ultrabrain": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "unspecified-high": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "unspecified-low": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "visual-engineering": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "writing": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
 }
@@ -83,7 +83,7 @@ exports[`generateModelConfig single native provider uses Claude models when only
      "variant": "max",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "oracle": {
      "model": "anthropic/claude-opus-4-6",
@@ -145,7 +145,7 @@ exports[`generateModelConfig single native provider uses Claude models with isMa
      "variant": "max",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "oracle": {
      "model": "anthropic/claude-opus-4-6",
@@ -366,20 +366,20 @@ exports[`generateModelConfig single native provider uses Gemini models when only
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "explore": {
      "model": "opencode/gpt-5-nano",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "momus": {
      "model": "google/gemini-3.1-pro-preview",
      "variant": "high",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "oracle": {
      "model": "google/gemini-3.1-pro-preview",
@@ -389,7 +389,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
      "model": "google/gemini-3.1-pro-preview",
    },
    "sisyphus-junior": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
  "categories": {
@@ -426,20 +426,20 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "explore": {
      "model": "opencode/gpt-5-nano",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "momus": {
      "model": "google/gemini-3.1-pro-preview",
      "variant": "high",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "oracle": {
      "model": "google/gemini-3.1-pro-preview",
@@ -449,7 +449,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
      "model": "google/gemini-3.1-pro-preview",
    },
    "sisyphus-junior": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
  "categories": {
@@ -465,7 +465,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
      "variant": "high",
    },
    "unspecified-high": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "unspecified-low": {
      "model": "google/gemini-3-flash-preview",
@@ -929,7 +929,7 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "explore": {
      "model": "opencode/gpt-5-nano",
@@ -938,45 +938,45 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "momus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "multimodal-looker": {
      "model": "zai-coding-plan/glm-4.6v",
    },
    "oracle": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "prometheus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "sisyphus": {
      "model": "zai-coding-plan/glm-5",
    },
    "sisyphus-junior": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
  "categories": {
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "ultrabrain": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "unspecified-high": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "unspecified-low": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "visual-engineering": {
      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
 }
@@ -987,7 +987,7 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
  "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json",
  "agents": {
    "atlas": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "explore": {
      "model": "opencode/gpt-5-nano",
@@ -996,45 +996,45 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
      "model": "zai-coding-plan/glm-4.7",
    },
    "metis": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "momus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "multimodal-looker": {
      "model": "zai-coding-plan/glm-4.6v",
    },
    "oracle": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "prometheus": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "sisyphus": {
      "model": "zai-coding-plan/glm-5",
    },
    "sisyphus-junior": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
  "categories": {
    "quick": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "ultrabrain": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "unspecified-high": {
      "model": "zai-coding-plan/glm-5",
    },
    "unspecified-low": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "visual-engineering": {
      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
  },
 }
@@ -1273,7 +1273,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
      "variant": "max",
    },
    "multimodal-looker": {
-      "model": "opencode/glm-4.7-free",
+      "model": "opencode/gpt-5-nano",
    },
    "oracle": {
      "model": "google/gemini-3.1-pro-preview",
--- a/src/cli/config-manager/add-plugin-to-opencode-config.ts
+++ b/src/cli/config-manager/add-plugin-to-opencode-config.ts
@@ -1,5 +1,6 @@
 import { readFileSync, writeFileSync } from "node:fs"
 import type { ConfigMergeResult } from "../types"
+import { PLUGIN_NAME, LEGACY_PLUGIN_NAME } from "../../shared"
 import { getConfigDir } from "./config-context"
 import { ensureConfigDirectoryExists } from "./ensure-config-directory-exists"
 import { formatErrorWithSuggestion } from "./format-error-with-suggestion"
@@ -7,8 +8,6 @@ import { detectConfigFormat } from "./opencode-config-format"
 import { parseOpenCodeConfigFileWithError, type OpenCodeConfig } from "./parse-opencode-config-file"
 import { getPluginNameWithVersion } from "./plugin-name-with-version"

-const PACKAGE_NAME = "oh-my-opencode"
-
 export async function addPluginToOpenCodeConfig(currentVersion: string): Promise<ConfigMergeResult> {
  try {
    ensureConfigDirectoryExists()
@@ -21,7 +20,7 @@ export async function addPluginToOpenCodeConfig(currentVersion: string): Promise
  }

  const { format, path } = detectConfigFormat()
-  const pluginEntry = await getPluginNameWithVersion(currentVersion, PACKAGE_NAME)
+  const pluginEntry = await getPluginNameWithVersion(currentVersion, PLUGIN_NAME)

  try {
    if (format === "none") {
@@ -41,13 +40,24 @@ export async function addPluginToOpenCodeConfig(currentVersion: string): Promise

    const config = parseResult.config
    const plugins = config.plugin ?? []
-    const existingIndex = plugins.findIndex((plugin) => plugin === PACKAGE_NAME || plugin.startsWith(`${PACKAGE_NAME}@`))

-    if (existingIndex !== -1) {
-      if (plugins[existingIndex] === pluginEntry) {
+    // Check for existing plugin (either current or legacy name)
+    const currentNameIndex = plugins.findIndex(
+      (plugin) => plugin === PLUGIN_NAME || plugin.startsWith(`${PLUGIN_NAME}@`)
+    )
+    const legacyNameIndex = plugins.findIndex(
+      (plugin) => plugin === LEGACY_PLUGIN_NAME || plugin.startsWith(`${LEGACY_PLUGIN_NAME}@`)
+    )
+
+    // If either name exists, update to new name
+    if (currentNameIndex !== -1) {
+      if (plugins[currentNameIndex] === pluginEntry) {
        return { success: true, configPath: path }
      }
-      plugins[existingIndex] = pluginEntry
+      plugins[currentNameIndex] = pluginEntry
+    } else if (legacyNameIndex !== -1) {
+      // Upgrade legacy name to new name
+      plugins[legacyNameIndex] = pluginEntry
    } else {
      plugins.push(pluginEntry)
    }
--- a/src/cli/config-manager/bun-install.ts
+++ b/src/cli/config-manager/bun-install.ts
@@ -11,6 +11,8 @@ type BunInstallOutputMode = "inherit" | "pipe"

 interface RunBunInstallOptions {
  outputMode?: BunInstallOutputMode
+  /** Workspace directory to install to. Defaults to cache dir if not provided. */
+  workspaceDir?: string
 }

 interface BunInstallOutput {
@@ -65,7 +67,7 @@ function logCapturedOutputOnFailure(outputMode: BunInstallOutputMode, output: Bu

 export async function runBunInstallWithDetails(options?: RunBunInstallOptions): Promise<BunInstallResult> {
  const outputMode = options?.outputMode ?? "pipe"
-  const cacheDir = getOpenCodeCacheDir()
+  const cacheDir = options?.workspaceDir ?? getOpenCodeCacheDir()
  const packageJsonPath = `${cacheDir}/package.json`

  if (!existsSync(packageJsonPath)) {
--- a/src/cli/config-manager/detect-current-config.ts
+++ b/src/cli/config-manager/detect-current-config.ts
@@ -1,5 +1,5 @@
 import { existsSync, readFileSync } from "node:fs"
-import { parseJsonc } from "../../shared"
+import { parseJsonc, LEGACY_PLUGIN_NAME, PLUGIN_NAME } from "../../shared"
 import type { DetectedConfig } from "../types"
 import { getOmoConfigPath } from "./config-context"
 import { detectConfigFormat } from "./opencode-config-format"
@@ -55,8 +55,12 @@ function detectProvidersFromOmoConfig(): {
  }
 }

+function isOurPlugin(plugin: string): boolean {
+  return plugin === PLUGIN_NAME || plugin.startsWith(`${PLUGIN_NAME}@`) ||
+         plugin === LEGACY_PLUGIN_NAME || plugin.startsWith(`${LEGACY_PLUGIN_NAME}@`)
+}
+
 export function detectCurrentConfig(): DetectedConfig {
-  const PACKAGE_NAME = "oh-my-opencode"
  const result: DetectedConfig = {
    isInstalled: false,
    hasClaude: true,
@@ -82,7 +86,7 @@ export function detectCurrentConfig(): DetectedConfig {

  const openCodeConfig = parseResult.config
  const plugins = openCodeConfig.plugin ?? []
-  result.isInstalled = plugins.some((plugin) => plugin.startsWith(PACKAGE_NAME))
+  result.isInstalled = plugins.some(isOurPlugin)

  if (!result.isInstalled) {
    return result
--- a/src/cli/config-manager/plugin-detection.test.ts
+++ b/src/cli/config-manager/plugin-detection.test.ts
@@ -52,6 +52,30 @@ describe("detectCurrentConfig - single package detection", () => {
    expect(result.isInstalled).toBe(true)
  })

+  it("detects oh-my-openagent as installed (legacy name)", () => {
+    // given
+    const config = { plugin: ["oh-my-openagent"] }
+    writeFileSync(testConfigPath, JSON.stringify(config, null, 2) + "\n", "utf-8")
+
+    // when
+    const result = detectCurrentConfig()
+
+    // then
+    expect(result.isInstalled).toBe(true)
+  })
+
+  it("detects oh-my-openagent with version pin as installed (legacy name)", () => {
+    // given
+    const config = { plugin: ["oh-my-openagent@3.11.0"] }
+    writeFileSync(testConfigPath, JSON.stringify(config, null, 2) + "\n", "utf-8")
+
+    // when
+    const result = detectCurrentConfig()
+
+    // then
+    expect(result.isInstalled).toBe(true)
+  })
+
  it("returns false when plugin not present", () => {
    // given
    const config = { plugin: ["some-other-plugin"] }
@@ -64,6 +88,18 @@ describe("detectCurrentConfig - single package detection", () => {
    expect(result.isInstalled).toBe(false)
  })

+  it("returns false when plugin not present (even with similar name)", () => {
+    // given - not exactly oh-my-openagent
+    const config = { plugin: ["oh-my-openagent-extra"] }
+    writeFileSync(testConfigPath, JSON.stringify(config, null, 2) + "\n", "utf-8")
+
+    // when
+    const result = detectCurrentConfig()
+
+    // then
+    expect(result.isInstalled).toBe(false)
+  })
+
  it("detects OpenCode Go from the existing omo config", () => {
    // given
    writeFileSync(testConfigPath, JSON.stringify({ plugin: ["oh-my-opencode"] }, null, 2) + "\n", "utf-8")
@@ -130,6 +166,38 @@ describe("addPluginToOpenCodeConfig - single package writes", () => {
    expect(savedConfig.plugin).not.toContain("oh-my-opencode@3.10.0")
  })

+  it("recognizes oh-my-openagent as already installed (legacy name)", async () => {
+    // given
+    const config = { plugin: ["oh-my-openagent"] }
+    writeFileSync(testConfigPath, JSON.stringify(config, null, 2) + "\n", "utf-8")
+
+    // when
+    const result = await addPluginToOpenCodeConfig("3.11.0")
+
+    // then
+    expect(result.success).toBe(true)
+    const savedConfig = JSON.parse(readFileSync(testConfigPath, "utf-8"))
+    // Should upgrade to new name
+    expect(savedConfig.plugin).toContain("oh-my-opencode")
+    expect(savedConfig.plugin).not.toContain("oh-my-openagent")
+  })
+
+  it("replaces version-pinned oh-my-openagent@X.Y.Z with new name", async () => {
+    // given
+    const config = { plugin: ["oh-my-openagent@3.10.0"] }
+    writeFileSync(testConfigPath, JSON.stringify(config, null, 2) + "\n", "utf-8")
+
+    // when
+    const result = await addPluginToOpenCodeConfig("3.11.0")
+
+    // then
+    expect(result.success).toBe(true)
+    const savedConfig = JSON.parse(readFileSync(testConfigPath, "utf-8"))
+    // Legacy should be replaced with new name
+    expect(savedConfig.plugin).toContain("oh-my-opencode")
+    expect(savedConfig.plugin).not.toContain("oh-my-openagent")
+  })
+
  it("adds new plugin when none exists", async () => {
    // given
    const config = {}
--- a/src/cli/doctor/checks/system-plugin.ts
+++ b/src/cli/doctor/checks/system-plugin.ts
@@ -1,7 +1,6 @@
 import { existsSync, readFileSync } from "node:fs"

-import { PACKAGE_NAME } from "../constants"
-import { getOpenCodeConfigPaths, parseJsonc } from "../../../shared"
+import { LEGACY_PLUGIN_NAME, PLUGIN_NAME, getOpenCodeConfigPaths, parseJsonc } from "../../../shared"

 export interface PluginInfo {
  registered: boolean
@@ -24,18 +23,33 @@ function detectConfigPath(): string | null {
 }

 function parsePluginVersion(entry: string): string | null {
-  if (!entry.startsWith(`${PACKAGE_NAME}@`)) return null
-  const value = entry.slice(PACKAGE_NAME.length + 1)
-  if (!value || value === "latest") return null
-  return value
+  // Check for current package name
+  if (entry.startsWith(`${PLUGIN_NAME}@`)) {
+    const value = entry.slice(PLUGIN_NAME.length + 1)
+    if (!value || value === "latest") return null
+    return value
+  }
+  // Check for legacy package name
+  if (entry.startsWith(`${LEGACY_PLUGIN_NAME}@`)) {
+    const value = entry.slice(LEGACY_PLUGIN_NAME.length + 1)
+    if (!value || value === "latest") return null
+    return value
+  }
+  return null
 }

 function findPluginEntry(entries: string[]): { entry: string; isLocalDev: boolean } | null {
  for (const entry of entries) {
-    if (entry === PACKAGE_NAME || entry.startsWith(`${PACKAGE_NAME}@`)) {
+    // Check for current package name
+    if (entry === PLUGIN_NAME || entry.startsWith(`${PLUGIN_NAME}@`)) {
      return { entry, isLocalDev: false }
    }
-    if (entry.startsWith("file://") && entry.includes(PACKAGE_NAME)) {
+    // Check for legacy package name
+    if (entry === LEGACY_PLUGIN_NAME || entry.startsWith(`${LEGACY_PLUGIN_NAME}@`)) {
+      return { entry, isLocalDev: false }
+    }
+    // Check for file:// paths that include either name
+    if (entry.startsWith("file://") && (entry.includes(PLUGIN_NAME) || entry.includes(LEGACY_PLUGIN_NAME))) {
      return { entry, isLocalDev: true }
    }
  }
@@ -76,7 +90,7 @@ export function getPluginInfo(): PluginInfo {
      registered: true,
      configPath,
      entry: pluginEntry.entry,
-      isPinned: pinnedVersion !== null && /^\d+\.\d+\.\d+/.test(pinnedVersion),
+      isPinned: pinnedVersion !== null && /^\d+\.\d+\.\d+/.test(pinnedVersion ?? ""),
      pinnedVersion,
      isLocalDev: pluginEntry.isLocalDev,
    }
--- a/src/cli/model-fallback.ts
+++ b/src/cli/model-fallback.ts
@@ -19,7 +19,7 @@ export type { GeneratedOmoConfig } from "./model-fallback-types"

 const ZAI_MODEL = "zai-coding-plan/glm-4.7"

-const ULTIMATE_FALLBACK = "opencode/glm-4.7-free"
+const ULTIMATE_FALLBACK = "opencode/gpt-5-nano"
 const SCHEMA_URL = "https://raw.githubusercontent.com/code-yeongyu/oh-my-openagent/dev/assets/oh-my-opencode.schema.json"


--- a/src/cli/run/output-renderer.ts
+++ b/src/cli/run/output-renderer.ts
@@ -45,26 +45,26 @@ export function writePaddedText(
    return { output: text, atLineStart: text.endsWith("\n") }
  }

-  let output = ""
+  const parts: string[] = []
  let lineStart = atLineStart

  for (let i = 0; i < text.length; i++) {
    const ch = text[i]
    if (lineStart) {
-      output += "  "
+      parts.push("  ")
      lineStart = false
    }

    if (ch === "\n") {
-      output += "  \n"
+      parts.push("  \n")
      lineStart = true
      continue
    }

-    output += ch
+    parts.push(ch)
  }

-  return { output, atLineStart: lineStart }
+  return { output: parts.join(""), atLineStart: lineStart }
 }

 function colorizeWithProfileColor(text: string, hexColor?: string): string {
--- a/src/cli/run/runner.test.ts
+++ b/src/cli/run/runner.test.ts
@@ -1,6 +1,6 @@
 /// <reference types="bun-types" />

-import { describe, it, expect } from "bun:test"
+import { describe, it, expect, beforeEach, afterEach, vi } from "bun:test"
 import type { OhMyOpenCodeConfig } from "../../config"
 import { resolveRunAgent, waitForEventProcessorShutdown } from "./runner"

@@ -83,7 +83,6 @@ describe("resolveRunAgent", () => {
 })

 describe("waitForEventProcessorShutdown", () => {
-
  it("returns quickly when event processor completes", async () => {
    //#given
    const eventProcessor = new Promise<void>((resolve) => {
@@ -115,3 +114,44 @@ describe("waitForEventProcessorShutdown", () => {
    expect(elapsed).toBeGreaterThanOrEqual(timeoutMs - 10)
  })
 })
+
+describe("run with invalid model", () => {
+  it("given invalid --model value, when run, then returns exit code 1 with error message", async () => {
+    // given
+    const originalExit = process.exit
+    const originalError = console.error
+    const errorMessages: string[] = []
+    const exitCodes: number[] = []
+
+    console.error = (...args: unknown[]) => {
+      errorMessages.push(args.map(String).join(" "))
+    }
+    process.exit = ((code?: number) => {
+      exitCodes.push(code ?? 0)
+      throw new Error("exit")
+    }) as typeof process.exit
+
+    try {
+      // when
+      // Note: This will actually try to run - but the issue is that resolveRunModel
+      // is called BEFORE the try block, so it throws an unhandled exception
+      // We're testing the runner's error handling
+      const { run } = await import("./runner")
+
+      // This will throw because model "invalid" is invalid format
+      try {
+        await run({
+          message: "test",
+          model: "invalid",
+        })
+      } catch {
+        // Expected to potentially throw due to unhandled model resolution error
+      }
+    } finally {
+      // then - verify error handling
+      // Currently this will fail because the error is not caught properly
+      console.error = originalError
+      process.exit = originalExit
+    }
+  })
+})
--- a/src/cli/run/runner.ts
+++ b/src/cli/run/runner.ts
@@ -47,10 +47,11 @@ export async function run(options: RunOptions): Promise<number> {

  const pluginConfig = loadPluginConfig(directory, { command: "run" })
  const resolvedAgent = resolveRunAgent(options, pluginConfig)
-  const resolvedModel = resolveRunModel(options.model)
  const abortController = new AbortController()

  try {
+    const resolvedModel = resolveRunModel(options.model)
+
    const { client, cleanup: serverCleanup } = await createServerConnection({
      port: options.port,
      attach: options.attach,
--- a/src/config/schema/background-task-circuit-breaker.test.ts
+++ b/src/config/schema/background-task-circuit-breaker.test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, test } from "bun:test"
+import { ZodError } from "zod/v4"
+import { BackgroundTaskConfigSchema } from "./background-task"
+
+describe("BackgroundTaskConfigSchema.circuitBreaker", () => {
+  describe("#given valid circuit breaker settings", () => {
+    test("#when parsed #then returns nested config", () => {
+      const result = BackgroundTaskConfigSchema.parse({
+        circuitBreaker: {
+          maxToolCalls: 150,
+          consecutiveThreshold: 10,
+        },
+      })
+      expect(result.circuitBreaker).toEqual({
+        maxToolCalls: 150,
+        consecutiveThreshold: 10,
+      })
+    })
+  })
+
+  describe("#given consecutiveThreshold below minimum", () => {
+    test("#when parsed #then throws ZodError", () => {
+      let thrownError: unknown
+
+      try {
+        BackgroundTaskConfigSchema.parse({
+          circuitBreaker: {
+            consecutiveThreshold: 4,
+          },
+        })
+      } catch (error) {
+        thrownError = error
+      }
+
+      expect(thrownError).toBeInstanceOf(ZodError)
+    })
+  })
+
+  describe("#given consecutiveThreshold is zero", () => {
+    test("#when parsed #then throws ZodError", () => {
+      let thrownError: unknown
+
+      try {
+        BackgroundTaskConfigSchema.parse({
+          circuitBreaker: {
+            consecutiveThreshold: 0,
+          },
+        })
+      } catch (error) {
+        thrownError = error
+      }
+
+      expect(thrownError).toBeInstanceOf(ZodError)
+    })
+  })
+})
--- a/src/config/schema/background-task.ts
+++ b/src/config/schema/background-task.ts
@@ -1,5 +1,11 @@
 import { z } from "zod"

+const CircuitBreakerConfigSchema = z.object({
+  enabled: z.boolean().optional(),
+  maxToolCalls: z.number().int().min(10).optional(),
+  consecutiveThreshold: z.number().int().min(5).optional(),
+})
+
 export const BackgroundTaskConfigSchema = z.object({
  defaultConcurrency: z.number().min(1).optional(),
  providerConcurrency: z.record(z.string(), z.number().min(0)).optional(),
@@ -11,6 +17,9 @@ export const BackgroundTaskConfigSchema = z.object({
  /** Timeout for tasks that never received any progress update, falling back to startedAt (default: 1800000 = 30 minutes, minimum: 60000 = 1 minute) */
  messageStalenessTimeoutMs: z.number().min(60000).optional(),
  syncPollTimeoutMs: z.number().min(60000).optional(),
+  /** Maximum tool calls per subagent task before circuit breaker triggers (default: 200, minimum: 10). Prevents runaway loops from burning unlimited tokens. */
+  maxToolCalls: z.number().int().min(10).optional(),
+  circuitBreaker: CircuitBreakerConfigSchema.optional(),
 })

 export type BackgroundTaskConfig = z.infer<typeof BackgroundTaskConfigSchema>
--- a/src/config/schema/hooks.ts
+++ b/src/config/schema/hooks.ts
@@ -51,6 +51,7 @@ export const HookNameSchema = z.enum([
  "anthropic-effort",
  "hashline-read-enhancer",
  "read-image-resizer",
+  "todo-description-override",
 ])

 export type HookName = z.infer<typeof HookNameSchema>
--- a/src/features/background-agent/constants.ts
+++ b/src/features/background-agent/constants.ts
@@ -2,9 +2,13 @@ import type { PluginInput } from "@opencode-ai/plugin"
 import type { BackgroundTask, LaunchInput } from "./types"

 export const TASK_TTL_MS = 30 * 60 * 1000
+export const TERMINAL_TASK_TTL_MS = 30 * 60 * 1000
 export const MIN_STABILITY_TIME_MS = 10 * 1000
-export const DEFAULT_STALE_TIMEOUT_MS = 180_000
+export const DEFAULT_STALE_TIMEOUT_MS = 1_200_000
 export const DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS = 1_800_000
+export const DEFAULT_MAX_TOOL_CALLS = 200
+export const DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD = 20
+export const DEFAULT_CIRCUIT_BREAKER_ENABLED = true
 export const MIN_RUNTIME_BEFORE_STALE_MS = 30_000
 export const MIN_IDLE_TIME_MS = 5000
 export const POLLING_INTERVAL_MS = 3000
--- a/src/features/background-agent/default-stale-timeout.test.ts
+++ b/src/features/background-agent/default-stale-timeout.test.ts
@@ -0,0 +1,17 @@
+declare const require: (name: string) => any
+const { describe, expect, test } = require("bun:test")
+
+import { DEFAULT_STALE_TIMEOUT_MS } from "./constants"
+
+describe("DEFAULT_STALE_TIMEOUT_MS", () => {
+  test("uses a 20 minute default", () => {
+    // #given
+    const expectedTimeout = 20 * 60 * 1000
+
+    // #when
+    const timeout = DEFAULT_STALE_TIMEOUT_MS
+
+    // #then
+    expect(timeout).toBe(expectedTimeout)
+  })
+})
--- a/src/features/background-agent/loop-detector.test.ts
+++ b/src/features/background-agent/loop-detector.test.ts
@@ -0,0 +1,240 @@
+import { describe, expect, test } from "bun:test"
+import {
+  createToolCallSignature,
+  detectRepetitiveToolUse,
+  recordToolCall,
+  resolveCircuitBreakerSettings,
+} from "./loop-detector"
+
+function buildWindow(
+  toolNames: string[],
+  override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
+) {
+  const settings = resolveCircuitBreakerSettings(override)
+
+  return toolNames.reduce(
+    (window, toolName) => recordToolCall(window, toolName, settings),
+    undefined as ReturnType<typeof recordToolCall> | undefined
+  )
+}
+
+function buildWindowWithInputs(
+  calls: Array<{ tool: string; input?: Record<string, unknown> }>,
+  override?: Parameters<typeof resolveCircuitBreakerSettings>[0]
+) {
+  const settings = resolveCircuitBreakerSettings(override)
+  return calls.reduce(
+    (window, { tool, input }) => recordToolCall(window, tool, settings, input),
+    undefined as ReturnType<typeof recordToolCall> | undefined
+  )
+}
+
+describe("loop-detector", () => {
+  describe("resolveCircuitBreakerSettings", () => {
+    describe("#given nested circuit breaker config", () => {
+      test("#when resolved #then nested values override defaults", () => {
+        const result = resolveCircuitBreakerSettings({
+          maxToolCalls: 200,
+          circuitBreaker: {
+            maxToolCalls: 120,
+            consecutiveThreshold: 7,
+          },
+        })
+
+        expect(result).toEqual({
+          enabled: true,
+          maxToolCalls: 120,
+          consecutiveThreshold: 7,
+        })
+      })
+    })
+
+    describe("#given no enabled config", () => {
+      test("#when resolved #then enabled defaults to true", () => {
+        const result = resolveCircuitBreakerSettings({
+          circuitBreaker: {
+            maxToolCalls: 100,
+            consecutiveThreshold: 5,
+          },
+        })
+
+        expect(result.enabled).toBe(true)
+      })
+    })
+
+    describe("#given enabled is false in config", () => {
+      test("#when resolved #then enabled is false", () => {
+        const result = resolveCircuitBreakerSettings({
+          circuitBreaker: {
+            enabled: false,
+            maxToolCalls: 100,
+            consecutiveThreshold: 5,
+          },
+        })
+
+        expect(result.enabled).toBe(false)
+      })
+    })
+
+    describe("#given enabled is true in config", () => {
+      test("#when resolved #then enabled is true", () => {
+        const result = resolveCircuitBreakerSettings({
+          circuitBreaker: {
+            enabled: true,
+            maxToolCalls: 100,
+            consecutiveThreshold: 5,
+          },
+        })
+
+        expect(result.enabled).toBe(true)
+      })
+    })
+  })
+
+  describe("createToolCallSignature", () => {
+    test("#given tool with input #when signature created #then includes tool and sorted input", () => {
+      const result = createToolCallSignature("read", { filePath: "/a.ts" })
+
+      expect(result).toBe('read::{"filePath":"/a.ts"}')
+    })
+
+    test("#given tool with undefined input #when signature created #then returns bare tool name", () => {
+      const result = createToolCallSignature("read", undefined)
+
+      expect(result).toBe("read")
+    })
+
+    test("#given tool with null input #when signature created #then returns bare tool name", () => {
+      const result = createToolCallSignature("read", null)
+
+      expect(result).toBe("read")
+    })
+
+    test("#given tool with empty object input #when signature created #then returns bare tool name", () => {
+      const result = createToolCallSignature("read", {})
+
+      expect(result).toBe("read")
+    })
+
+    test("#given same input different key order #when signatures compared #then they are equal", () => {
+      const first = createToolCallSignature("read", { filePath: "/a.ts", offset: 0 })
+      const second = createToolCallSignature("read", { offset: 0, filePath: "/a.ts" })
+
+      expect(first).toBe(second)
+    })
+  })
+
+  describe("detectRepetitiveToolUse", () => {
+    describe("#given recent tools are diverse", () => {
+      test("#when evaluated #then it does not trigger", () => {
+        const window = buildWindow([
+          "read",
+          "grep",
+          "edit",
+          "bash",
+          "read",
+          "glob",
+          "lsp_diagnostics",
+          "read",
+          "grep",
+          "edit",
+        ])
+
+        const result = detectRepetitiveToolUse(window)
+
+        expect(result.triggered).toBe(false)
+      })
+    })
+
+    describe("#given the same tool is called consecutively", () => {
+      test("#when evaluated #then it triggers", () => {
+        const window = buildWindow(Array.from({ length: 20 }, () => "read"))
+
+        const result = detectRepetitiveToolUse(window)
+
+        expect(result).toEqual({
+          triggered: true,
+          toolName: "read",
+          repeatedCount: 20,
+        })
+      })
+    })
+
+    describe("#given consecutive calls are interrupted by different tool", () => {
+      test("#when evaluated #then it does not trigger", () => {
+        const window = buildWindow([
+          ...Array.from({ length: 19 }, () => "read"),
+          "edit",
+          "read",
+        ])
+
+        const result = detectRepetitiveToolUse(window)
+
+        expect(result).toEqual({ triggered: false })
+      })
+    })
+
+    describe("#given threshold boundary", () => {
+      test("#when below threshold #then it does not trigger", () => {
+        const belowThresholdWindow = buildWindow(Array.from({ length: 19 }, () => "read"))
+
+        const result = detectRepetitiveToolUse(belowThresholdWindow)
+
+        expect(result).toEqual({ triggered: false })
+      })
+
+      test("#when equal to threshold #then it triggers", () => {
+        const atThresholdWindow = buildWindow(Array.from({ length: 20 }, () => "read"))
+
+        const result = detectRepetitiveToolUse(atThresholdWindow)
+
+        expect(result).toEqual({
+          triggered: true,
+          toolName: "read",
+          repeatedCount: 20,
+        })
+      })
+    })
+
+    describe("#given same tool with different file inputs", () => {
+      test("#when evaluated #then it does not trigger", () => {
+        const calls = Array.from({ length: 20 }, (_, i) => ({
+          tool: "read",
+          input: { filePath: `/src/file-${i}.ts` },
+        }))
+        const window = buildWindowWithInputs(calls)
+        const result = detectRepetitiveToolUse(window)
+        expect(result.triggered).toBe(false)
+      })
+    })
+
+    describe("#given same tool with identical file inputs", () => {
+      test("#when evaluated #then it triggers with bare tool name", () => {
+        const calls = Array.from({ length: 20 }, () => ({
+          tool: "read",
+          input: { filePath: "/src/same.ts" },
+        }))
+        const window = buildWindowWithInputs(calls)
+        const result = detectRepetitiveToolUse(window)
+        expect(result).toEqual({
+          triggered: true,
+          toolName: "read",
+          repeatedCount: 20,
+        })
+      })
+    })
+
+    describe("#given tool calls with no input", () => {
+      test("#when evaluated #then it triggers", () => {
+        const calls = Array.from({ length: 20 }, () => ({ tool: "read" }))
+        const window = buildWindowWithInputs(calls)
+        const result = detectRepetitiveToolUse(window)
+        expect(result).toEqual({
+          triggered: true,
+          toolName: "read",
+          repeatedCount: 20,
+        })
+      })
+    })
+  })
+})
--- a/src/features/background-agent/loop-detector.ts
+++ b/src/features/background-agent/loop-detector.ts
@@ -0,0 +1,94 @@
+import type { BackgroundTaskConfig } from "../../config/schema"
+import {
+  DEFAULT_CIRCUIT_BREAKER_ENABLED,
+  DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD,
+  DEFAULT_MAX_TOOL_CALLS,
+} from "./constants"
+import type { ToolCallWindow } from "./types"
+
+export interface CircuitBreakerSettings {
+  enabled: boolean
+  maxToolCalls: number
+  consecutiveThreshold: number
+}
+
+export interface ToolLoopDetectionResult {
+  triggered: boolean
+  toolName?: string
+  repeatedCount?: number
+}
+
+export function resolveCircuitBreakerSettings(
+  config?: BackgroundTaskConfig
+): CircuitBreakerSettings {
+  return {
+    enabled: config?.circuitBreaker?.enabled ?? DEFAULT_CIRCUIT_BREAKER_ENABLED,
+    maxToolCalls:
+      config?.circuitBreaker?.maxToolCalls ?? config?.maxToolCalls ?? DEFAULT_MAX_TOOL_CALLS,
+    consecutiveThreshold:
+      config?.circuitBreaker?.consecutiveThreshold ?? DEFAULT_CIRCUIT_BREAKER_CONSECUTIVE_THRESHOLD,
+  }
+}
+
+export function recordToolCall(
+  window: ToolCallWindow | undefined,
+  toolName: string,
+  settings: CircuitBreakerSettings,
+  toolInput?: Record<string, unknown> | null
+): ToolCallWindow {
+  const signature = createToolCallSignature(toolName, toolInput)
+
+  if (window && window.lastSignature === signature) {
+    return {
+      lastSignature: signature,
+      consecutiveCount: window.consecutiveCount + 1,
+      threshold: settings.consecutiveThreshold,
+    }
+  }
+
+  return {
+    lastSignature: signature,
+    consecutiveCount: 1,
+    threshold: settings.consecutiveThreshold,
+  }
+}
+
+function sortObject(obj: unknown): unknown {
+  if (obj === null || obj === undefined) return obj
+  if (typeof obj !== "object") return obj
+  if (Array.isArray(obj)) return obj.map(sortObject)
+
+  const sorted: Record<string, unknown> = {}
+  const keys = Object.keys(obj as Record<string, unknown>).sort()
+  for (const key of keys) {
+    sorted[key] = sortObject((obj as Record<string, unknown>)[key])
+  }
+  return sorted
+}
+
+export function createToolCallSignature(
+  toolName: string,
+  toolInput?: Record<string, unknown> | null
+): string {
+  if (toolInput === undefined || toolInput === null) {
+    return toolName
+  }
+  if (Object.keys(toolInput).length === 0) {
+    return toolName
+  }
+  return `${toolName}::${JSON.stringify(sortObject(toolInput))}`
+}
+
+export function detectRepetitiveToolUse(
+  window: ToolCallWindow | undefined
+): ToolLoopDetectionResult {
+  if (!window || window.consecutiveCount < window.threshold) {
+    return { triggered: false }
+  }
+
+  return {
+    triggered: true,
+    toolName: window.lastSignature.split("::")[0],
+    repeatedCount: window.consecutiveCount,
+  }
+}
--- a/src/features/background-agent/manager-circuit-breaker.test.ts
+++ b/src/features/background-agent/manager-circuit-breaker.test.ts
@@ -0,0 +1,387 @@
+import { describe, expect, test } from "bun:test"
+import type { PluginInput } from "@opencode-ai/plugin"
+import { tmpdir } from "node:os"
+import type { BackgroundTaskConfig } from "../../config/schema"
+import { BackgroundManager } from "./manager"
+import type { BackgroundTask } from "./types"
+
+function createManager(config?: BackgroundTaskConfig): BackgroundManager {
+  const client = {
+    session: {
+      prompt: async () => ({}),
+      promptAsync: async () => ({}),
+      abort: async () => ({}),
+    },
+  }
+
+  const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, config)
+  const testManager = manager as unknown as {
+    enqueueNotificationForParent: (sessionID: string, fn: () => Promise<void>) => Promise<void>
+    notifyParentSession: (task: BackgroundTask) => Promise<void>
+    tasks: Map<string, BackgroundTask>
+  }
+
+  testManager.enqueueNotificationForParent = async (_sessionID, fn) => {
+    await fn()
+  }
+  testManager.notifyParentSession = async () => {}
+
+  return manager
+}
+
+function getTaskMap(manager: BackgroundManager): Map<string, BackgroundTask> {
+  return (manager as unknown as { tasks: Map<string, BackgroundTask> }).tasks
+}
+
+async function flushAsyncWork() {
+  await new Promise(resolve => setTimeout(resolve, 0))
+}
+
+describe("BackgroundManager circuit breaker", () => {
+  describe("#given the same tool is called consecutively", () => {
+    test("#when consecutive tool events arrive #then the task is cancelled", async () => {
+      const manager = createManager({
+        circuitBreaker: {
+          consecutiveThreshold: 20,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-loop-1",
+        sessionID: "session-loop-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Looping task",
+        prompt: "loop",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (let i = 0; i < 20; i++) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: { sessionID: task.sessionID, type: "tool", tool: "read" },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("cancelled")
+      expect(task.error).toContain("read 20 consecutive times")
+    })
+  })
+
+  describe("#given recent tool calls are diverse", () => {
+    test("#when the window fills #then the task keeps running", async () => {
+      const manager = createManager({
+        circuitBreaker: {
+          consecutiveThreshold: 10,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-diverse-1",
+        sessionID: "session-diverse-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Healthy task",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (const toolName of [
+        "read",
+        "grep",
+        "edit",
+        "bash",
+        "glob",
+        "read",
+        "lsp_diagnostics",
+        "grep",
+        "edit",
+        "read",
+      ]) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("running")
+      expect(task.progress?.toolCalls).toBe(10)
+    })
+  })
+
+  describe("#given the absolute cap is configured lower than the repetition detector needs", () => {
+    test("#when the raw tool-call cap is reached #then the backstop still cancels the task", async () => {
+      const manager = createManager({
+        maxToolCalls: 3,
+        circuitBreaker: {
+          consecutiveThreshold: 95,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-cap-1",
+        sessionID: "session-cap-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Backstop task",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (const toolName of ["read", "grep", "edit"]) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("cancelled")
+      expect(task.error).toContain("maximum tool call limit (3)")
+    })
+  })
+
+  describe("#given the same running tool part emits multiple updates", () => {
+    test("#when duplicate running updates arrive #then it only counts the tool once", async () => {
+      const manager = createManager({
+        maxToolCalls: 2,
+        circuitBreaker: {
+          consecutiveThreshold: 5,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-dedupe-1",
+        sessionID: "session-dedupe-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Dedupe task",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (let index = 0; index < 3; index += 1) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: {
+            part: {
+              id: "tool-1",
+              sessionID: task.sessionID,
+              type: "tool",
+              tool: "bash",
+              state: { status: "running" },
+            },
+          },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("running")
+      expect(task.progress?.toolCalls).toBe(1)
+      expect(task.progress?.countedToolPartIDs).toEqual(new Set(["tool-1"]))
+    })
+  })
+
+  describe("#given same tool reading different files", () => {
+    test("#when tool events arrive with state.input #then task keeps running", async () => {
+      const manager = createManager({
+        circuitBreaker: {
+          consecutiveThreshold: 20,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-diff-files-1",
+        sessionID: "session-diff-files-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Reading different files",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (let i = 0; i < 20; i++) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: {
+            part: {
+              sessionID: task.sessionID,
+              type: "tool",
+              tool: "read",
+              state: { status: "running", input: { filePath: `/src/file-${i}.ts` } },
+            },
+          },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("running")
+      expect(task.progress?.toolCalls).toBe(20)
+    })
+  })
+
+  describe("#given same tool reading same file repeatedly", () => {
+    test("#when tool events arrive with state.input #then task is cancelled with bare tool name in error", async () => {
+      const manager = createManager({
+        circuitBreaker: {
+          consecutiveThreshold: 20,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-same-file-1",
+        sessionID: "session-same-file-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Reading same file repeatedly",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (let i = 0; i < 20; i++) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: {
+            part: {
+              sessionID: task.sessionID,
+              type: "tool",
+              tool: "read",
+              state: { status: "running", input: { filePath: "/src/same.ts" } },
+            },
+          },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("cancelled")
+      expect(task.error).toContain("read 20 consecutive times")
+      expect(task.error).not.toContain("::")
+    })
+  })
+
+  describe("#given circuit breaker enabled is false", () => {
+    test("#when repetitive tools arrive #then task keeps running", async () => {
+      const manager = createManager({
+        circuitBreaker: {
+          enabled: false,
+          consecutiveThreshold: 20,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-disabled-1",
+        sessionID: "session-disabled-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Disabled circuit breaker task",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (let i = 0; i < 20; i++) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: {
+            sessionID: task.sessionID,
+            type: "tool",
+            tool: "read",
+          },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("running")
+    })
+  })
+
+  describe("#given circuit breaker enabled is false but absolute cap is low", () => {
+    test("#when max tool calls exceeded #then task is still cancelled by absolute cap", async () => {
+      const manager = createManager({
+        maxToolCalls: 3,
+        circuitBreaker: {
+          enabled: false,
+          consecutiveThreshold: 95,
+        },
+      })
+      const task: BackgroundTask = {
+        id: "task-cap-disabled-1",
+        sessionID: "session-cap-disabled-1",
+        parentSessionID: "parent-1",
+        parentMessageID: "msg-1",
+        description: "Backstop task with disabled circuit breaker",
+        prompt: "work",
+        agent: "explore",
+        status: "running",
+        startedAt: new Date(Date.now() - 60_000),
+        progress: {
+          toolCalls: 0,
+          lastUpdate: new Date(Date.now() - 60_000),
+        },
+      }
+      getTaskMap(manager).set(task.id, task)
+
+      for (const toolName of ["read", "grep", "edit"]) {
+        manager.handleEvent({
+          type: "message.part.updated",
+          properties: { sessionID: task.sessionID, type: "tool", tool: toolName },
+        })
+      }
+
+      await flushAsyncWork()
+
+      expect(task.status).toBe("cancelled")
+      expect(task.error).toContain("maximum tool call limit (3)")
+    })
+  })
+})
--- a/src/features/background-agent/manager.polling.test.ts
+++ b/src/features/background-agent/manager.polling.test.ts
@@ -153,4 +153,42 @@ describe("BackgroundManager pollRunningTasks", () => {
      expect(task.status).toBe("running")
    })
  })
+
+  describe("#given a running task whose session has terminal non-idle status", () => {
+    test('#when session status is "interrupted" #then completes the task', async () => {
+      //#given
+      const manager = createManagerWithClient({
+        status: async () => ({ data: { "ses-interrupted": { type: "interrupted" } } }),
+      })
+      const task = createRunningTask("ses-interrupted")
+      injectTask(manager, task)
+
+      //#when
+      const poll = (manager as unknown as { pollRunningTasks: () => Promise<void> }).pollRunningTasks
+      await poll.call(manager)
+      manager.shutdown()
+
+      //#then
+      expect(task.status).toBe("completed")
+      expect(task.completedAt).toBeDefined()
+    })
+
+    test('#when session status is an unknown type #then completes the task', async () => {
+      //#given
+      const manager = createManagerWithClient({
+        status: async () => ({ data: { "ses-unknown": { type: "some-weird-status" } } }),
+      })
+      const task = createRunningTask("ses-unknown")
+      injectTask(manager, task)
+
+      //#when
+      const poll = (manager as unknown as { pollRunningTasks: () => Promise<void> }).pollRunningTasks
+      await poll.call(manager)
+      manager.shutdown()
+
+      //#then
+      expect(task.status).toBe("completed")
+      expect(task.completedAt).toBeDefined()
+    })
+  })
 })
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -3027,10 +3027,10 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
      prompt: "Test",
      agent: "test-agent",
      status: "running",
-      startedAt: new Date(Date.now() - 300_000),
+      startedAt: new Date(Date.now() - 25 * 60 * 1000),
      progress: {
        toolCalls: 1,
-        lastUpdate: new Date(Date.now() - 200_000),
+        lastUpdate: new Date(Date.now() - 21 * 60 * 1000),
      },
    }

--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -27,6 +27,7 @@ import {
 import {
  POLLING_INTERVAL_MS,
  TASK_CLEANUP_DELAY_MS,
+  TASK_TTL_MS,
 } from "./constants"

 import { subagentSessions } from "../claude-code-session-state"
@@ -51,6 +52,13 @@ import { join } from "node:path"
 import { pruneStaleTasksAndNotifications } from "./task-poller"
 import { checkAndInterruptStaleTasks } from "./task-poller"
 import { removeTaskToastTracking } from "./remove-task-toast-tracking"
+import { isActiveSessionStatus, isTerminalSessionStatus } from "./session-status-classifier"
+import {
+  detectRepetitiveToolUse,
+  recordToolCall,
+  resolveCircuitBreakerSettings,
+  type CircuitBreakerSettings,
+} from "./loop-detector"
 import {
  createSubagentDepthLimitError,
  createSubagentDescendantLimitError,
@@ -64,9 +72,11 @@ type OpencodeClient = PluginInput["client"]


 interface MessagePartInfo {
+  id?: string
  sessionID?: string
  type?: string
  tool?: string
+  state?: { status?: string; input?: Record<string, unknown> }
 }

 interface EventProperties {
@@ -80,6 +90,19 @@ interface Event {
  properties?: EventProperties
 }

+function resolveMessagePartInfo(properties: EventProperties | undefined): MessagePartInfo | undefined {
+  if (!properties || typeof properties !== "object") {
+    return undefined
+  }
+
+  const nestedPart = properties.part
+  if (nestedPart && typeof nestedPart === "object") {
+    return nestedPart as MessagePartInfo
+  }
+
+  return properties as MessagePartInfo
+}
+
 interface Todo {
  content: string
  status: string
@@ -100,6 +123,8 @@ export interface SubagentSessionCreatedEvent {

 export type OnSubagentSessionCreated = (event: SubagentSessionCreatedEvent) => Promise<void>

+const MAX_TASK_REMOVAL_RESCHEDULES = 6
+
 export class BackgroundManager {


@@ -128,6 +153,7 @@ export class BackgroundManager {
  private preStartDescendantReservations: Set<string>
  private enableParentSessionNotifications: boolean
  readonly taskHistory = new TaskHistory()
+  private cachedCircuitBreakerSettings?: CircuitBreakerSettings

  constructor(
    ctx: PluginInput,
@@ -720,6 +746,8 @@ export class BackgroundManager {

    existingTask.progress = {
      toolCalls: existingTask.progress?.toolCalls ?? 0,
+      toolCallWindow: existingTask.progress?.toolCallWindow,
+      countedToolPartIDs: existingTask.progress?.countedToolPartIDs,
      lastUpdate: new Date(),
    }

@@ -852,8 +880,7 @@ export class BackgroundManager {
    }

    if (event.type === "message.part.updated" || event.type === "message.part.delta") {
-      if (!props || typeof props !== "object" || !("sessionID" in props)) return
-      const partInfo = props as unknown as MessagePartInfo
+      const partInfo = resolveMessagePartInfo(props)
      const sessionID = partInfo?.sessionID
      if (!sessionID) return

@@ -876,8 +903,65 @@ export class BackgroundManager {
      task.progress.lastUpdate = new Date()

      if (partInfo?.type === "tool" || partInfo?.tool) {
+        const countedToolPartIDs = task.progress.countedToolPartIDs ?? new Set<string>()
+        const shouldCountToolCall =
+          !partInfo.id ||
+          partInfo.state?.status !== "running" ||
+          !countedToolPartIDs.has(partInfo.id)
+
+        if (!shouldCountToolCall) {
+          return
+        }
+
+        if (partInfo.id && partInfo.state?.status === "running") {
+          countedToolPartIDs.add(partInfo.id)
+          task.progress.countedToolPartIDs = countedToolPartIDs
+        }
+
        task.progress.toolCalls += 1
        task.progress.lastTool = partInfo.tool
+        const circuitBreaker = this.cachedCircuitBreakerSettings ?? (this.cachedCircuitBreakerSettings = resolveCircuitBreakerSettings(this.config))
+        if (partInfo.tool) {
+         task.progress.toolCallWindow = recordToolCall(
+             task.progress.toolCallWindow,
+             partInfo.tool,
+             circuitBreaker,
+             partInfo.state?.input
+           )
+
+           if (circuitBreaker.enabled) {
+             const loopDetection = detectRepetitiveToolUse(task.progress.toolCallWindow)
+             if (loopDetection.triggered) {
+               log("[background-agent] Circuit breaker: consecutive tool usage detected", {
+                 taskId: task.id,
+                 agent: task.agent,
+                 sessionID,
+                 toolName: loopDetection.toolName,
+                 repeatedCount: loopDetection.repeatedCount,
+               })
+               void this.cancelTask(task.id, {
+                 source: "circuit-breaker",
+                 reason: `Subagent called ${loopDetection.toolName} ${loopDetection.repeatedCount} consecutive times (threshold: ${circuitBreaker.consecutiveThreshold}). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
+               })
+               return
+             }
+           }
+        }
+
+        const maxToolCalls = circuitBreaker.maxToolCalls
+        if (task.progress.toolCalls >= maxToolCalls) {
+          log("[background-agent] Circuit breaker: tool call limit reached", {
+            taskId: task.id,
+            toolCalls: task.progress.toolCalls,
+            maxToolCalls,
+            agent: task.agent,
+            sessionID,
+          })
+          void this.cancelTask(task.id, {
+            source: "circuit-breaker",
+            reason: `Subagent exceeded maximum tool call limit (${maxToolCalls}). This usually indicates an infinite loop. The task was automatically cancelled to prevent excessive token usage.`,
+          })
+        }
      }
    }

@@ -1188,7 +1272,7 @@ export class BackgroundManager {
    this.completedTaskSummaries.delete(parentSessionID)
  }

-  private scheduleTaskRemoval(taskId: string): void {
+  private scheduleTaskRemoval(taskId: string, rescheduleCount = 0): void {
    const existingTimer = this.completionTimers.get(taskId)
    if (existingTimer) {
      clearTimeout(existingTimer)
@@ -1198,17 +1282,29 @@ export class BackgroundManager {
    const timer = setTimeout(() => {
      this.completionTimers.delete(taskId)
      const task = this.tasks.get(taskId)
-      if (task) {
-        this.clearNotificationsForTask(taskId)
-        this.tasks.delete(taskId)
-        this.clearTaskHistoryWhenParentTasksGone(task.parentSessionID)
-        if (task.sessionID) {
-          subagentSessions.delete(task.sessionID)
-          SessionCategoryRegistry.remove(task.sessionID)
+      if (!task) return
+
+      if (task.parentSessionID) {
+        const siblings = this.getTasksByParentSession(task.parentSessionID)
+        const runningOrPendingSiblings = siblings.filter(
+          sibling => sibling.id !== taskId && (sibling.status === "running" || sibling.status === "pending"),
+        )
+        const completedAtTimestamp = task.completedAt?.getTime()
+        const reachedTaskTtl = completedAtTimestamp !== undefined && (Date.now() - completedAtTimestamp) >= TASK_TTL_MS
+        if (runningOrPendingSiblings.length > 0 && rescheduleCount < MAX_TASK_REMOVAL_RESCHEDULES && !reachedTaskTtl) {
+          this.scheduleTaskRemoval(taskId, rescheduleCount + 1)
+          return
        }
-        log("[background-agent] Removed completed task from memory:", taskId)
-        this.clearTaskHistoryWhenParentTasksGone(task?.parentSessionID)
      }
+
+      this.clearNotificationsForTask(taskId)
+      this.tasks.delete(taskId)
+      this.clearTaskHistoryWhenParentTasksGone(task.parentSessionID)
+      if (task.sessionID) {
+        subagentSessions.delete(task.sessionID)
+        SessionCategoryRegistry.remove(task.sessionID)
+      }
+      log("[background-agent] Removed completed task from memory:", taskId)
    }, TASK_CLEANUP_DELAY_MS)

    this.completionTimers.set(taskId, timer)
@@ -1688,11 +1784,9 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          }
        }

-        // Match sync-session-poller pattern: only skip completion check when
-        // status EXISTS and is not idle (i.e., session is actively running).
-        // When sessionStatus is undefined, the session has completed and dropped
-        // from the status response — fall through to completion detection.
-        if (sessionStatus && sessionStatus.type !== "idle") {
+        // Only skip completion when session status is actively running.
+        // Unknown or terminal statuses (like "interrupted") fall through to completion.
+        if (sessionStatus && isActiveSessionStatus(sessionStatus.type)) {
          log("[background-agent] Session still running, relying on event-based progress:", {
            taskId: task.id,
            sessionID,
@@ -1702,6 +1796,24 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          continue
        }

+        // Explicit terminal non-idle status (e.g., "interrupted") — complete immediately,
+        // skipping output validation (session will never produce more output).
+        // Unknown statuses fall through to the idle/gone path with output validation.
+        if (sessionStatus && isTerminalSessionStatus(sessionStatus.type)) {
+          await this.tryCompleteTask(task, `polling (terminal session status: ${sessionStatus.type})`)
+          continue
+        }
+
+        // Unknown non-idle status — not active, not terminal, not idle.
+        // Fall through to idle/gone completion path with output validation.
+        if (sessionStatus && sessionStatus.type !== "idle") {
+          log("[background-agent] Unknown session status, treating as potentially idle:", {
+            taskId: task.id,
+            sessionID,
+            sessionStatus: sessionStatus.type,
+          })
+        }
+
        // Session is idle or no longer in status response (completed/disappeared)
        const completionSource = sessionStatus?.type === "idle"
          ? "polling (idle status)"
--- a/src/features/background-agent/session-status-classifier.test.ts
+++ b/src/features/background-agent/session-status-classifier.test.ts
@@ -0,0 +1,66 @@
+import { describe, test, expect, mock } from "bun:test"
+import { isActiveSessionStatus, isTerminalSessionStatus } from "./session-status-classifier"
+
+const mockLog = mock()
+mock.module("../../shared", () => ({ log: mockLog }))
+
+describe("isActiveSessionStatus", () => {
+  describe("#given a known active session status", () => {
+    test('#when type is "busy" #then returns true', () => {
+      expect(isActiveSessionStatus("busy")).toBe(true)
+    })
+
+    test('#when type is "retry" #then returns true', () => {
+      expect(isActiveSessionStatus("retry")).toBe(true)
+    })
+
+    test('#when type is "running" #then returns true', () => {
+      expect(isActiveSessionStatus("running")).toBe(true)
+    })
+  })
+
+  describe("#given a known terminal session status", () => {
+    test('#when type is "idle" #then returns false', () => {
+      expect(isActiveSessionStatus("idle")).toBe(false)
+    })
+
+    test('#when type is "interrupted" #then returns false and does not log', () => {
+      mockLog.mockClear()
+      expect(isActiveSessionStatus("interrupted")).toBe(false)
+      expect(mockLog).not.toHaveBeenCalled()
+    })
+  })
+
+  describe("#given an unknown session status", () => {
+    test('#when type is an arbitrary unknown string #then returns false and logs warning', () => {
+      mockLog.mockClear()
+      expect(isActiveSessionStatus("some-unknown-status")).toBe(false)
+      expect(mockLog).toHaveBeenCalledWith(
+        "[background-agent] Unknown session status type encountered:",
+        "some-unknown-status",
+      )
+    })
+
+    test('#when type is empty string #then returns false', () => {
+      expect(isActiveSessionStatus("")).toBe(false)
+    })
+  })
+})
+
+describe("isTerminalSessionStatus", () => {
+  test('#when type is "interrupted" #then returns true', () => {
+    expect(isTerminalSessionStatus("interrupted")).toBe(true)
+  })
+
+  test('#when type is "idle" #then returns false (idle is handled separately)', () => {
+    expect(isTerminalSessionStatus("idle")).toBe(false)
+  })
+
+  test('#when type is "busy" #then returns false', () => {
+    expect(isTerminalSessionStatus("busy")).toBe(false)
+  })
+
+  test('#when type is an unknown string #then returns false', () => {
+    expect(isTerminalSessionStatus("some-unknown")).toBe(false)
+  })
+})
--- a/src/features/background-agent/session-status-classifier.ts
+++ b/src/features/background-agent/session-status-classifier.ts
@@ -0,0 +1,20 @@
+import { log } from "../../shared"
+
+const ACTIVE_SESSION_STATUSES = new Set(["busy", "retry", "running"])
+const KNOWN_TERMINAL_STATUSES = new Set(["idle", "interrupted"])
+
+export function isActiveSessionStatus(type: string): boolean {
+  if (ACTIVE_SESSION_STATUSES.has(type)) {
+    return true
+  }
+
+  if (!KNOWN_TERMINAL_STATUSES.has(type)) {
+    log("[background-agent] Unknown session status type encountered:", type)
+  }
+
+  return false
+}
+
+export function isTerminalSessionStatus(type: string): boolean {
+  return KNOWN_TERMINAL_STATUSES.has(type) && type !== "idle"
+}
--- a/src/features/background-agent/task-completion-cleanup.test.ts
+++ b/src/features/background-agent/task-completion-cleanup.test.ts
@@ -1,6 +1,5 @@
-declare const require: (name: string) => any
-const { describe, test, expect, afterEach } = require("bun:test")
 import { tmpdir } from "node:os"
+import { afterEach, describe, expect, test } from "bun:test"
 import type { PluginInput } from "@opencode-ai/plugin"
 import { TASK_CLEANUP_DELAY_MS } from "./constants"
 import { BackgroundManager } from "./manager"
@@ -157,17 +156,19 @@ function getRequiredTimer(manager: BackgroundManager, taskID: string): ReturnTyp
 }

 describe("BackgroundManager.notifyParentSession cleanup scheduling", () => {
-  describe("#given 2 tasks for same parent and task A completed", () => {
-    test("#when task B is still running #then task A is cleaned up from this.tasks after delay even though task B is not done", async () => {
+  describe("#given 3 tasks for same parent and task A completed first", () => {
+    test("#when siblings are still running or pending #then task A remains until siblings also complete", async () => {
      // given
      const { manager } = createManager(false)
      managerUnderTest = manager
      fakeTimers = installFakeTimers()
-      const taskA = createTask({ id: "task-a", parentSessionID: "parent-1", description: "task A", status: "completed", completedAt: new Date("2026-03-11T00:01:00.000Z") })
+      const taskA = createTask({ id: "task-a", parentSessionID: "parent-1", description: "task A", status: "completed", completedAt: new Date() })
      const taskB = createTask({ id: "task-b", parentSessionID: "parent-1", description: "task B", status: "running" })
+      const taskC = createTask({ id: "task-c", parentSessionID: "parent-1", description: "task C", status: "pending" })
      getTasks(manager).set(taskA.id, taskA)
      getTasks(manager).set(taskB.id, taskB)
-      getPendingByParent(manager).set(taskA.parentSessionID, new Set([taskA.id, taskB.id]))
+      getTasks(manager).set(taskC.id, taskC)
+      getPendingByParent(manager).set(taskA.parentSessionID, new Set([taskA.id, taskB.id, taskC.id]))

      // when
      await notifyParentSessionForTest(manager, taskA)
@@ -177,8 +178,23 @@ describe("BackgroundManager.notifyParentSession cleanup scheduling", () => {

      // then
      expect(fakeTimers.getDelay(taskATimer)).toBeUndefined()
-      expect(getTasks(manager).has(taskA.id)).toBe(false)
+      expect(getTasks(manager).has(taskA.id)).toBe(true)
      expect(getTasks(manager).get(taskB.id)).toBe(taskB)
+      expect(getTasks(manager).get(taskC.id)).toBe(taskC)
+
+      // when
+      taskB.status = "completed"
+      taskB.completedAt = new Date()
+      taskC.status = "completed"
+      taskC.completedAt = new Date()
+      await notifyParentSessionForTest(manager, taskB)
+      await notifyParentSessionForTest(manager, taskC)
+      const rescheduledTaskATimer = getRequiredTimer(manager, taskA.id)
+      expect(fakeTimers.getDelay(rescheduledTaskATimer)).toBe(TASK_CLEANUP_DELAY_MS)
+      fakeTimers.run(rescheduledTaskATimer)
+
+      // then
+      expect(getTasks(manager).has(taskA.id)).toBe(false)
    })
  })

--- a/src/features/background-agent/task-poller.test.ts
+++ b/src/features/background-agent/task-poller.test.ts
@@ -417,6 +417,56 @@ describe("checkAndInterruptStaleTasks", () => {
    expect(task.status).toBe("cancelled")
    expect(onTaskInterrupted).toHaveBeenCalledWith(task)
  })
+
+  it('should NOT protect task when session has terminal non-idle status like "interrupted"', async () => {
+    //#given — lastUpdate is 5min old, session is "interrupted" (terminal, not active)
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 300_000),
+      progress: {
+        toolCalls: 2,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    })
+
+    //#when — session status is "interrupted" (terminal)
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "interrupted" } },
+    })
+
+    //#then — terminal statuses should not protect from stale timeout
+    expect(task.status).toBe("cancelled")
+    expect(task.error).toContain("Stale timeout")
+  })
+
+  it('should NOT protect task when session has unknown status type', async () => {
+    //#given — lastUpdate is 5min old, session has an unknown status
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 300_000),
+      progress: {
+        toolCalls: 2,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    })
+
+    //#when — session has unknown status type
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "some-weird-status" } },
+    })
+
+    //#then — unknown statuses should not protect from stale timeout
+    expect(task.status).toBe("cancelled")
+    expect(task.error).toContain("Stale timeout")
+  })
 })

 describe("pruneStaleTasksAndNotifications", () => {
--- a/src/features/background-agent/task-poller.ts
+++ b/src/features/background-agent/task-poller.ts
@@ -9,12 +9,12 @@ import {
  DEFAULT_MESSAGE_STALENESS_TIMEOUT_MS,
  DEFAULT_STALE_TIMEOUT_MS,
  MIN_RUNTIME_BEFORE_STALE_MS,
+  TERMINAL_TASK_TTL_MS,
  TASK_TTL_MS,
 } from "./constants"
 import { removeTaskToastTracking } from "./remove-task-toast-tracking"

-const TERMINAL_TASK_TTL_MS = 30 * 60 * 1000
-
+import { isActiveSessionStatus } from "./session-status-classifier"
 const TERMINAL_TASK_STATUSES = new Set<BackgroundTask["status"]>([
  "completed",
  "error",
@@ -121,7 +121,7 @@ export async function checkAndInterruptStaleTasks(args: {
    if (!startedAt || !sessionID) continue

    const sessionStatus = sessionStatuses?.[sessionID]?.type
-    const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
+    const sessionIsRunning = sessionStatus !== undefined && isActiveSessionStatus(sessionStatus)
    const runtime = now - startedAt.getTime()

    if (!task.progress?.lastUpdate) {
--- a/src/features/background-agent/types.ts
+++ b/src/features/background-agent/types.ts
@@ -9,9 +9,17 @@ export type BackgroundTaskStatus =
  | "cancelled"
  | "interrupt"

+export interface ToolCallWindow {
+  lastSignature: string
+  consecutiveCount: number
+  threshold: number
+}
+
 export interface TaskProgress {
  toolCalls: number
  lastTool?: string
+  toolCallWindow?: ToolCallWindow
+  countedToolPartIDs?: Set<string>
  lastUpdate: Date
  lastMessage?: string
  lastMessageAt?: Date
--- a/src/features/boulder-state/storage.ts
+++ b/src/features/boulder-state/storage.ts
@@ -59,10 +59,13 @@ export function appendSessionId(directory: string, sessionId: string): BoulderSt
    if (!Array.isArray(state.session_ids)) {
      state.session_ids = []
    }
+    const originalSessionIds = [...state.session_ids]
    state.session_ids.push(sessionId)
    if (writeBoulderState(directory, state)) {
      return state
    }
+    state.session_ids = originalSessionIds
+    return null
  }

  return state
--- a/src/features/builtin-commands/templates/start-work.ts
+++ b/src/features/builtin-commands/templates/start-work.ts
@@ -7,7 +7,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
  - \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
    - If specified and valid: hook pre-sets worktree_path in boulder.json
    - If specified but invalid: you must run \`git worktree add <path> <branch>\` first
-    - If omitted: you MUST choose or create a worktree (see Worktree Setup below)
+    - If omitted: work directly in the current project directory (no worktree)

 ## WHAT TO DO

@@ -24,7 +24,7 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
     - If ONE plan: auto-select it
     - If MULTIPLE plans: show list with timestamps, ask user to select

-4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
+4. **Worktree Setup** (ONLY when \`--worktree\` was explicitly specified and \`worktree_path\` not already set in boulder.json):
   1. \`git worktree list --porcelain\` — see available worktrees
   2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
   3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
@@ -86,6 +86,38 @@ Reading plan and beginning execution...

 - The session_id is injected by the hook - use it directly
 - Always update boulder.json BEFORE starting work
- Always set worktree_path in boulder.json before executing any tasks
+- If worktree_path is set in boulder.json, all work happens inside that worktree directory
 - Read the FULL plan file before delegating any tasks
- Follow atlas delegation protocols (7-section format)`
+- Follow atlas delegation protocols (7-section format)
+
+## TASK BREAKDOWN (MANDATORY)
+
+After reading the plan file, you MUST decompose every plan task into granular, implementation-level sub-steps and register ALL of them as task/todo items BEFORE starting any work.
+
+**How to break down**:
+- Each plan checkbox item (e.g., \`- [ ] Add user authentication\`) must be split into concrete, actionable sub-tasks
+- Sub-tasks should be specific enough that each one touches a clear set of files/functions
+- Include: file to modify, what to change, expected behavior, and how to verify
+- Do NOT leave any task vague — "implement feature X" is NOT acceptable; "add validateToken() to src/auth/middleware.ts that checks JWT expiry and returns 401" IS acceptable
+
+**Example breakdown**:
+Plan task: \`- [ ] Add rate limiting to API\`
+→ Todo items:
+  1. Create \`src/middleware/rate-limiter.ts\` with sliding window algorithm (max 100 req/min per IP)
+  2. Add RateLimiter middleware to \`src/app.ts\` router chain, before auth middleware
+  3. Add rate limit headers (X-RateLimit-Limit, X-RateLimit-Remaining) to response in \`rate-limiter.ts\`
+  4. Add test: verify 429 response after exceeding limit in \`src/middleware/rate-limiter.test.ts\`
+  5. Add test: verify headers are present on normal responses
+
+Register these as task/todo items so progress is tracked and visible throughout the session.
+
+## WORKTREE COMPLETION
+
+When working in a worktree (\`worktree_path\` is set in boulder.json) and ALL plan tasks are complete:
+1. Commit all remaining changes in the worktree
+2. Switch to the main working directory (the original repo, NOT the worktree)
+3. Merge the worktree branch into the current branch: \`git merge <worktree-branch>\`
+4. If merge succeeds, clean up: \`git worktree remove <worktree-path>\`
+5. Remove the boulder.json state
+
+This is the DEFAULT behavior when \`--worktree\` was used. Skip merge only if the user explicitly instructs otherwise (e.g., asks to create a PR instead).`
--- a/src/features/claude-code-agent-loader/claude-model-mapper.test.ts
+++ b/src/features/claude-code-agent-loader/claude-model-mapper.test.ts
@@ -75,6 +75,10 @@ describe("mapClaudeModelToOpenCode", () => {
      expect(mapClaudeModelToOpenCode("anthropic/claude-sonnet-4-6")).toEqual({ providerID: "anthropic", modelID: "claude-sonnet-4-6" })
    })

+    it("#when called with anthropic/claude-3.5-sonnet #then normalizes dots before splitting into object format", () => {
+      expect(mapClaudeModelToOpenCode("anthropic/claude-3.5-sonnet")).toEqual({ providerID: "anthropic", modelID: "claude-3-5-sonnet" })
+    })
+
    it("#when called with openai/gpt-5.2 #then splits into object format", () => {
      expect(mapClaudeModelToOpenCode("openai/gpt-5.2")).toEqual({ providerID: "openai", modelID: "gpt-5.2" })
    })
--- a/src/features/claude-code-agent-loader/claude-model-mapper.ts
+++ b/src/features/claude-code-agent-loader/claude-model-mapper.ts
@@ -20,7 +20,16 @@ function mapClaudeModelString(model: string | undefined): string | undefined {
  const aliasResult = CLAUDE_CODE_ALIAS_MAP.get(trimmed.toLowerCase())
  if (aliasResult) return aliasResult

-  if (trimmed.includes("/")) return trimmed
+  if (trimmed.includes("/")) {
+    const [providerID, ...modelParts] = trimmed.split("/")
+    const modelID = modelParts.join("/")
+
+    if (providerID.length === 0 || modelID.length === 0) return trimmed
+
+    return modelID.startsWith("claude-")
+      ? `${providerID}/${normalizeModelID(modelID)}`
+      : trimmed
+  }

  const normalized = normalizeModelID(trimmed)

--- a/src/features/opencode-skill-loader/git-master-template-injection.test.ts
+++ b/src/features/opencode-skill-loader/git-master-template-injection.test.ts
@@ -153,3 +153,25 @@ describe("#given git_env_prefix with commit footer", () => {
 		})
 	})
 })
+
+describe("#given idempotency of prefixGitCommandsInBashCodeBlocks", () => {
+	describe("#when git_env_prefix is provided and template already has prefixed commands in env prefix section", () => {
+		it("#then does NOT double-prefix the already-prefixed commands", () => {
+			const result = injectGitMasterConfig(SAMPLE_TEMPLATE, {
+				commit_footer: false,
+				include_co_authored_by: false,
+				git_env_prefix: "GIT_MASTER=1",
+			})
+
+			expect(result).not.toContain("GIT_MASTER=1 GIT_MASTER=1 git status")
+			expect(result).not.toContain("GIT_MASTER=1 GIT_MASTER=1 git add")
+			expect(result).not.toContain("GIT_MASTER=1 GIT_MASTER=1 git commit")
+			expect(result).not.toContain("GIT_MASTER=1 GIT_MASTER=1 git push")
+
+			expect(result).toContain("GIT_MASTER=1 git status")
+			expect(result).toContain("GIT_MASTER=1 git add")
+			expect(result).toContain("GIT_MASTER=1 git commit")
+			expect(result).toContain("GIT_MASTER=1 git push")
+		})
+	})
+})
--- a/src/features/opencode-skill-loader/git-master-template-injection.ts
+++ b/src/features/opencode-skill-loader/git-master-template-injection.ts
@@ -72,8 +72,16 @@ function prefixGitCommandsInBashCodeBlocks(template: string, prefix: string): st

 function prefixGitCommandsInCodeBlock(codeBlock: string, prefix: string): string {
 	return codeBlock
-		.replace(LEADING_GIT_COMMAND_PATTERN, `$1${prefix} git`)
-		.replace(INLINE_GIT_COMMAND_PATTERN, `$1${prefix} git`)
+		.split("\n")
+		.map((line) => {
+			if (line.includes(prefix)) {
+				return line
+			}
+			return line
+				.replace(LEADING_GIT_COMMAND_PATTERN, `$1${prefix} git`)
+				.replace(INLINE_GIT_COMMAND_PATTERN, `$1${prefix} git`)
+		})
+		.join("\n")
 }

 function buildCommitFooterInjection(
--- a/src/features/skill-mcp-manager/env-cleaner.test.ts
+++ b/src/features/skill-mcp-manager/env-cleaner.test.ts
@@ -199,3 +199,236 @@ describe("EXCLUDED_ENV_PATTERNS", () => {
    }
  })
 })
+describe("secret env var filtering", () => {
+  it("filters out ANTHROPIC_API_KEY", () => {
+    // given
+    process.env.ANTHROPIC_API_KEY = "sk-ant-api03-secret"
+    process.env.PATH = "/usr/bin"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.ANTHROPIC_API_KEY).toBeUndefined()
+    expect(cleanEnv.PATH).toBe("/usr/bin")
+  })
+
+  it("filters out AWS_SECRET_ACCESS_KEY", () => {
+    // given
+    process.env.AWS_SECRET_ACCESS_KEY = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+    process.env.AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE"
+    process.env.HOME = "/home/user"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.AWS_SECRET_ACCESS_KEY).toBeUndefined()
+    expect(cleanEnv.AWS_ACCESS_KEY_ID).toBeUndefined()
+    expect(cleanEnv.HOME).toBe("/home/user")
+  })
+
+  it("filters out GITHUB_TOKEN", () => {
+    // given
+    process.env.GITHUB_TOKEN = "ghp_secrettoken123456789"
+    process.env.GITHUB_API_TOKEN = "another_secret_token"
+    process.env.SHELL = "/bin/bash"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.GITHUB_TOKEN).toBeUndefined()
+    expect(cleanEnv.GITHUB_API_TOKEN).toBeUndefined()
+    expect(cleanEnv.SHELL).toBe("/bin/bash")
+  })
+
+  it("filters out OPENAI_API_KEY", () => {
+    // given
+    process.env.OPENAI_API_KEY = "sk-secret123456789"
+    process.env.LANG = "en_US.UTF-8"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.OPENAI_API_KEY).toBeUndefined()
+    expect(cleanEnv.LANG).toBe("en_US.UTF-8")
+  })
+
+  it("filters out DATABASE_URL with credentials", () => {
+    // given
+    process.env.DATABASE_URL = "postgresql://user:password@localhost:5432/db"
+    process.env.DB_PASSWORD = "supersecretpassword"
+    process.env.TERM = "xterm-256color"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.DATABASE_URL).toBeUndefined()
+    expect(cleanEnv.DB_PASSWORD).toBeUndefined()
+    expect(cleanEnv.TERM).toBe("xterm-256color")
+  })
+})
+
+describe("suffix-based secret filtering", () => {
+  it("filters variables ending with _KEY", () => {
+    // given
+    process.env.MY_API_KEY = "secret-value"
+    process.env.SOME_KEY = "another-secret"
+    process.env.TMPDIR = "/tmp"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.MY_API_KEY).toBeUndefined()
+    expect(cleanEnv.SOME_KEY).toBeUndefined()
+    expect(cleanEnv.TMPDIR).toBe("/tmp")
+  })
+
+  it("filters variables ending with _SECRET", () => {
+    // given
+    process.env.AWS_SECRET = "secret-value"
+    process.env.JWT_SECRET = "jwt-secret-token"
+    process.env.USER = "testuser"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.AWS_SECRET).toBeUndefined()
+    expect(cleanEnv.JWT_SECRET).toBeUndefined()
+    expect(cleanEnv.USER).toBe("testuser")
+  })
+
+  it("filters variables ending with _TOKEN", () => {
+    // given
+    process.env.ACCESS_TOKEN = "token-value"
+    process.env.BEARER_TOKEN = "bearer-token"
+    process.env.HOME = "/home/user"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.ACCESS_TOKEN).toBeUndefined()
+    expect(cleanEnv.BEARER_TOKEN).toBeUndefined()
+    expect(cleanEnv.HOME).toBe("/home/user")
+  })
+
+  it("filters variables ending with _PASSWORD", () => {
+    // given
+    process.env.DB_PASSWORD = "db-password"
+    process.env.APP_PASSWORD = "app-secret"
+    process.env.NODE_ENV = "production"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.DB_PASSWORD).toBeUndefined()
+    expect(cleanEnv.APP_PASSWORD).toBeUndefined()
+    expect(cleanEnv.NODE_ENV).toBe("production")
+  })
+
+  it("filters variables ending with _CREDENTIAL", () => {
+    // given
+    process.env.GCP_CREDENTIAL = "json-credential"
+    process.env.AZURE_CREDENTIAL = "azure-creds"
+    process.env.PWD = "/current/dir"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.GCP_CREDENTIAL).toBeUndefined()
+    expect(cleanEnv.AZURE_CREDENTIAL).toBeUndefined()
+    expect(cleanEnv.PWD).toBe("/current/dir")
+  })
+
+  it("filters variables ending with _API_KEY", () => {
+    // given
+    // given
+    process.env.STRIPE_API_KEY = "sk_live_secret"
+    process.env.SENDGRID_API_KEY = "SG.secret"
+    process.env.SHELL = "/bin/zsh"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.STRIPE_API_KEY).toBeUndefined()
+    expect(cleanEnv.SENDGRID_API_KEY).toBeUndefined()
+    expect(cleanEnv.SHELL).toBe("/bin/zsh")
+  })
+})
+
+describe("safe environment variables preserved", () => {
+  it("preserves PATH", () => {
+    // given
+    process.env.PATH = "/usr/bin:/usr/local/bin"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.PATH).toBe("/usr/bin:/usr/local/bin")
+  })
+
+  it("preserves HOME", () => {
+    // given
+    process.env.HOME = "/home/testuser"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.HOME).toBe("/home/testuser")
+  })
+
+  it("preserves SHELL", () => {
+    // given
+    process.env.SHELL = "/bin/bash"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.SHELL).toBe("/bin/bash")
+  })
+
+  it("preserves LANG", () => {
+    // given
+    process.env.LANG = "en_US.UTF-8"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.LANG).toBe("en_US.UTF-8")
+  })
+
+  it("preserves TERM", () => {
+    // given
+    process.env.TERM = "xterm-256color"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.TERM).toBe("xterm-256color")
+  })
+
+  it("preserves TMPDIR", () => {
+    // given
+    process.env.TMPDIR = "/tmp"
+
+    // when
+    const cleanEnv = createCleanMcpEnvironment()
+
+    // then
+    expect(cleanEnv.TMPDIR).toBe("/tmp")
+})
+})
--- a/src/features/skill-mcp-manager/env-cleaner.ts
+++ b/src/features/skill-mcp-manager/env-cleaner.ts
@@ -1,10 +1,28 @@
 // Filters npm/pnpm/yarn config env vars that break MCP servers in pnpm projects (#456)
+// Also filters secret-containing env vars to prevent exposure to malicious stdio MCP servers (#B-02)
 export const EXCLUDED_ENV_PATTERNS: RegExp[] = [
+  // npm/pnpm/yarn config patterns (original)
  /^NPM_CONFIG_/i,
  /^npm_config_/,
  /^YARN_/,
  /^PNPM_/,
  /^NO_UPDATE_NOTIFIER$/,
+
+  // Specific high-risk secret env vars (explicit blocks)
+  /^ANTHROPIC_API_KEY$/i,
+  /^AWS_ACCESS_KEY_ID$/i,
+  /^AWS_SECRET_ACCESS_KEY$/i,
+  /^GITHUB_TOKEN$/i,
+  /^DATABASE_URL$/i,
+  /^OPENAI_API_KEY$/i,
+
+  // Suffix-based patterns for common secret naming conventions
+  /_KEY$/i,
+  /_SECRET$/i,
+  /_TOKEN$/i,
+  /_PASSWORD$/i,
+  /_CREDENTIAL$/i,
+  /_API_KEY$/i,
 ]

 export function createCleanMcpEnvironment(
--- a/src/features/task-toast-manager/manager.test.ts
+++ b/src/features/task-toast-manager/manager.test.ts
@@ -279,6 +279,116 @@ describe("TaskToastManager", () => {
    })
  })

+  describe("model name display in task line", () => {
+    test("should show model name before category when modelInfo exists", () => {
+      // given - a task with category and modelInfo
+      const task = {
+        id: "task_model_display",
+        description: "Build UI component",
+        agent: "sisyphus-junior",
+        isBackground: true,
+        category: "deep",
+        modelInfo: { model: "openai/gpt-5.3-codex", type: "category-default" as const },
+      }
+
+      // when - addTask is called
+      toastManager.addTask(task)
+
+      // then - toast should show model name before category like "gpt-5.3-codex: deep"
+      const call = mockClient.tui.showToast.mock.calls[0][0]
+      expect(call.body.message).toContain("gpt-5.3-codex: deep")
+      expect(call.body.message).not.toContain("sisyphus-junior/deep")
+    })
+
+    test("should strip provider prefix from model name", () => {
+      // given - a task with provider-prefixed model
+      const task = {
+        id: "task_strip_provider",
+        description: "Fix styles",
+        agent: "sisyphus-junior",
+        isBackground: false,
+        category: "visual-engineering",
+        modelInfo: { model: "google/gemini-3.1-pro", type: "category-default" as const },
+      }
+
+      // when - addTask is called
+      toastManager.addTask(task)
+
+      // then - should show model ID without provider prefix
+      const call = mockClient.tui.showToast.mock.calls[0][0]
+      expect(call.body.message).toContain("gemini-3.1-pro: visual-engineering")
+    })
+
+    test("should fall back to agent/category format when no modelInfo", () => {
+      // given - a task without modelInfo
+      const task = {
+        id: "task_no_model",
+        description: "Quick fix",
+        agent: "sisyphus-junior",
+        isBackground: true,
+        category: "quick",
+      }
+
+      // when - addTask is called
+      toastManager.addTask(task)
+
+      // then - should use old format with agent name
+      const call = mockClient.tui.showToast.mock.calls[0][0]
+      expect(call.body.message).toContain("sisyphus-junior/quick")
+    })
+
+    test("should show model name without category when category is absent", () => {
+      // given - a task with modelInfo but no category
+      const task = {
+        id: "task_model_no_cat",
+        description: "Explore codebase",
+        agent: "explore",
+        isBackground: true,
+        modelInfo: { model: "anthropic/claude-sonnet-4-6", type: "category-default" as const },
+      }
+
+      // when - addTask is called
+      toastManager.addTask(task)
+
+      // then - should show just the model name in parens
+      const call = mockClient.tui.showToast.mock.calls[0][0]
+      expect(call.body.message).toContain("(claude-sonnet-4-6)")
+    })
+
+    test("should show model name in queued tasks too", () => {
+      // given - a concurrency manager that limits to 1
+      const limitedConcurrency = {
+        getConcurrencyLimit: mock(() => 1),
+      } as unknown as ConcurrencyManager
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      const limitedManager = new TaskToastManager(mockClient as any, limitedConcurrency)
+
+      limitedManager.addTask({
+        id: "task_running",
+        description: "Running task",
+        agent: "sisyphus-junior",
+        isBackground: true,
+        category: "deep",
+        modelInfo: { model: "openai/gpt-5.3-codex", type: "category-default" as const },
+      })
+      limitedManager.addTask({
+        id: "task_queued",
+        description: "Queued task",
+        agent: "sisyphus-junior",
+        isBackground: true,
+        category: "quick",
+        status: "queued",
+        modelInfo: { model: "anthropic/claude-haiku-4-5", type: "category-default" as const },
+      })
+
+      // when - the queued task toast fires
+      const lastCall = mockClient.tui.showToast.mock.calls[1][0]
+
+      // then - queued task should also show model name
+      expect(lastCall.body.message).toContain("claude-haiku-4-5: quick")
+    })
+  })
+
  describe("updateTaskModelBySession", () => {
    test("updates task model info and shows fallback toast", () => {
      // given - task without model info
--- a/src/features/task-toast-manager/manager.ts
+++ b/src/features/task-toast-manager/manager.ts
@@ -127,6 +127,13 @@ export class TaskToastManager {
    const queued = this.getQueuedTasks()
    const concurrencyInfo = this.getConcurrencyInfo()

+    const formatTaskIdentifier = (task: TrackedTask): string => {
+      const modelName = task.modelInfo?.model?.split("/").pop()
+      if (modelName && task.category) return `${modelName}: ${task.category}`
+      if (modelName) return modelName
+      if (task.category) return `${task.agent}/${task.category}`
+      return task.agent
+    }
    const lines: string[] = []

    const isFallback = newTask.modelInfo && (
@@ -151,9 +158,9 @@ export class TaskToastManager {
        const duration = this.formatDuration(task.startedAt)
        const bgIcon = task.isBackground ? "[BG]" : "[RUN]"
        const isNew = task.id === newTask.id ? " ← NEW" : ""
-        const categoryInfo = task.category ? `/${task.category}` : ""
+        const taskId = formatTaskIdentifier(task)
        const skillsInfo = task.skills?.length ? ` [${task.skills.join(", ")}]` : ""
-        lines.push(`${bgIcon} ${task.description} (${task.agent}${categoryInfo})${skillsInfo} - ${duration}${isNew}`)
+        lines.push(`${bgIcon} ${task.description} (${taskId})${skillsInfo} - ${duration}${isNew}`)
      }
    }

@@ -162,10 +169,10 @@ export class TaskToastManager {
      lines.push(`Queued (${queued.length}):`)
      for (const task of queued) {
        const bgIcon = task.isBackground ? "[Q]" : "[W]"
-        const categoryInfo = task.category ? `/${task.category}` : ""
+        const taskId = formatTaskIdentifier(task)
        const skillsInfo = task.skills?.length ? ` [${task.skills.join(", ")}]` : ""
        const isNew = task.id === newTask.id ? " ← NEW" : ""
-        lines.push(`${bgIcon} ${task.description} (${task.agent}${categoryInfo})${skillsInfo} - Queued${isNew}`)
+        lines.push(`${bgIcon} ${task.description} (${taskId})${skillsInfo} - Queued${isNew}`)
      }
    }

--- a/src/hooks/anthropic-context-window-limit-recovery/parser.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/parser.ts
@@ -70,7 +70,7 @@ function isTokenLimitError(text: string): boolean {
    return false
  }
  const lower = text.toLowerCase()
-  return TOKEN_LIMIT_KEYWORDS.some((kw) => lower.includes(kw.toLowerCase()))
+  return TOKEN_LIMIT_KEYWORDS.some((kw) => lower.includes(kw))
 }

 export function parseAnthropicTokenLimitError(err: unknown): ParsedTokenLimitError | null {
--- a/src/hooks/atlas/session-last-agent.ts
+++ b/src/hooks/atlas/session-last-agent.ts
@@ -18,9 +18,9 @@ function getLastAgentFromMessageDir(messageDir: string): string | null {
    const files = readdirSync(messageDir)
      .filter((fileName) => fileName.endsWith(".json"))
      .sort()
-      .reverse()

-    for (const fileName of files) {
+    for (let i = files.length - 1; i >= 0; i--) {
+      const fileName = files[i]
      try {
        const content = readFileSync(join(messageDir, fileName), "utf-8")
        const parsed = JSON.parse(content) as { agent?: unknown }
--- a/src/hooks/auto-slash-command/auto-slash-command-leak.test.ts
+++ b/src/hooks/auto-slash-command/auto-slash-command-leak.test.ts
@@ -58,8 +58,8 @@ describe("createAutoSlashCommandHook leak prevention", () => {
  })

  describe("#given hook with sessionProcessedCommandExecutions", () => {
-    describe("#when same command executed twice within TTL for same session", () => {
-      it("#then second execution is deduplicated", async () => {
+    describe("#when same command executed twice after fallback dedup window", () => {
+      it("#then second execution is treated as intentional rerun", async () => {
        //#given
        const nowSpy = spyOn(Date, "now")
        try {
@@ -68,6 +68,61 @@ describe("createAutoSlashCommandHook leak prevention", () => {
          const firstOutput = createCommandOutput("first")
          const secondOutput = createCommandOutput("second")

+          //#when
+          nowSpy.mockReturnValue(0)
+          await hook["command.execute.before"](input, firstOutput)
+          nowSpy.mockReturnValue(101)
+          await hook["command.execute.before"](input, secondOutput)
+
+          //#then
+          expect(executeSlashCommandMock).toHaveBeenCalledTimes(2)
+          expect(firstOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
+          expect(secondOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
+        } finally {
+          nowSpy.mockRestore()
+        }
+      })
+    })
+
+    describe("#when same command is repeated within fallback dedup window", () => {
+      it("#then duplicate dispatch is suppressed", async () => {
+        //#given
+        const nowSpy = spyOn(Date, "now")
+        try {
+          const hook = createAutoSlashCommandHook()
+          const input = createCommandInput("session-dedup", "leak-test-command")
+          const firstOutput = createCommandOutput("first")
+          const secondOutput = createCommandOutput("second")
+
+          //#when
+          nowSpy.mockReturnValue(0)
+          await hook["command.execute.before"](input, firstOutput)
+          nowSpy.mockReturnValue(99)
+          await hook["command.execute.before"](input, secondOutput)
+
+          //#then
+          expect(executeSlashCommandMock).toHaveBeenCalledTimes(1)
+          expect(firstOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
+          expect(secondOutput.parts[0].text).toBe("second")
+        } finally {
+          nowSpy.mockRestore()
+        }
+      })
+    })
+
+    describe("#when same event identifier is dispatched twice", () => {
+      it("#then second dispatch is deduplicated regardless of elapsed seconds", async () => {
+        //#given
+        const nowSpy = spyOn(Date, "now")
+        try {
+          const hook = createAutoSlashCommandHook()
+          const input: CommandExecuteBeforeInput = {
+            ...createCommandInput("session-dedup", "leak-test-command"),
+            eventID: "event-1",
+          }
+          const firstOutput = createCommandOutput("first")
+          const secondOutput = createCommandOutput("second")
+
          //#when
          nowSpy.mockReturnValue(0)
          await hook["command.execute.before"](input, firstOutput)
@@ -83,32 +138,6 @@ describe("createAutoSlashCommandHook leak prevention", () => {
        }
      })
    })
-
-    describe("#when same command is repeated after TTL expires", () => {
-      it("#then command executes again", async () => {
-        //#given
-        const nowSpy = spyOn(Date, "now")
-        try {
-          const hook = createAutoSlashCommandHook()
-          const input = createCommandInput("session-dedup", "leak-test-command")
-          const firstOutput = createCommandOutput("first")
-          const secondOutput = createCommandOutput("second")
-
-          //#when
-          nowSpy.mockReturnValue(0)
-          await hook["command.execute.before"](input, firstOutput)
-          nowSpy.mockReturnValue(30_001)
-          await hook["command.execute.before"](input, secondOutput)
-
-          //#then
-          expect(executeSlashCommandMock).toHaveBeenCalledTimes(2)
-          expect(firstOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
-          expect(secondOutput.parts[0].text).toContain(AUTO_SLASH_COMMAND_TAG_OPEN)
-        } finally {
-          nowSpy.mockRestore()
-        }
-      })
-    })
  })

  describe("#given hook with entries from multiple sessions", () => {
--- a/src/hooks/auto-slash-command/executor-resolution.test.ts
+++ b/src/hooks/auto-slash-command/executor-resolution.test.ts
@@ -0,0 +1,98 @@
+import { describe, expect, it, mock } from "bun:test"
+import type { LoadedSkill } from "../../features/opencode-skill-loader"
+
+mock.module("../../shared", () => ({
+  resolveCommandsInText: async (content: string) => content,
+  resolveFileReferencesInText: async (content: string) => content,
+}))
+
+mock.module("../../tools/slashcommand", () => ({
+  discoverCommandsSync: () => [
+    {
+      name: "shadowed",
+      metadata: { name: "shadowed", description: "builtin" },
+      content: "builtin template",
+      scope: "builtin",
+    },
+    {
+      name: "shadowed",
+      metadata: { name: "shadowed", description: "project" },
+      content: "project template",
+      scope: "project",
+    },
+  ],
+}))
+
+mock.module("../../features/opencode-skill-loader", () => ({
+  discoverAllSkills: async (): Promise<LoadedSkill[]> => [],
+}))
+
+const { executeSlashCommand } = await import("./executor")
+
+function createRestrictedSkill(): LoadedSkill {
+  return {
+    name: "restricted-skill",
+    definition: {
+      name: "restricted-skill",
+      description: "restricted",
+      template: "restricted template",
+      agent: "hephaestus",
+    },
+    scope: "user",
+  }
+}
+
+describe("executeSlashCommand resolution semantics", () => {
+  it("returns project command when project and builtin names collide", async () => {
+    //#given
+    const parsed = {
+      command: "shadowed",
+      args: "",
+      raw: "/shadowed",
+    }
+
+    //#when
+    const result = await executeSlashCommand(parsed, { skills: [] })
+
+    //#then
+    expect(result.success).toBe(true)
+    expect(result.replacementText).toContain("**Scope**: project")
+    expect(result.replacementText).toContain("project template")
+    expect(result.replacementText).not.toContain("builtin template")
+  })
+
+  it("blocks slash skill invocation when invoking agent is missing", async () => {
+    //#given
+    const parsed = {
+      command: "restricted-skill",
+      args: "",
+      raw: "/restricted-skill",
+    }
+
+    //#when
+    const result = await executeSlashCommand(parsed, { skills: [createRestrictedSkill()] })
+
+    //#then
+    expect(result.success).toBe(false)
+    expect(result.error).toBe('Skill "restricted-skill" is restricted to agent "hephaestus"')
+  })
+
+  it("allows slash skill invocation when invoking agent matches restriction", async () => {
+    //#given
+    const parsed = {
+      command: "restricted-skill",
+      args: "",
+      raw: "/restricted-skill",
+    }
+
+    //#when
+    const result = await executeSlashCommand(parsed, {
+      skills: [createRestrictedSkill()],
+      agent: "hephaestus",
+    })
+
+    //#then
+    expect(result.success).toBe(true)
+    expect(result.replacementText).toContain("restricted template")
+  })
+})
--- a/src/hooks/auto-slash-command/executor.ts
+++ b/src/hooks/auto-slash-command/executor.ts
@@ -41,14 +41,9 @@ export interface ExecutorOptions {
  skills?: LoadedSkill[]
  pluginsEnabled?: boolean
  enabledPluginsOverride?: Record<string, boolean>
+  agent?: string
 }

-function filterDiscoveredCommandsByScope(
-  commands: DiscoveredCommandInfo[],
-  scope: DiscoveredCommandInfo["scope"],
-): DiscoveredCommandInfo[] {
-  return commands.filter(command => command.scope === scope)
-}

 async function discoverAllCommands(options?: ExecutorOptions): Promise<CommandInfo[]> {
  const discoveredCommands = discoverCommandsSync(process.cwd(), {
@@ -59,14 +54,18 @@ async function discoverAllCommands(options?: ExecutorOptions): Promise<CommandIn
  const skills = options?.skills ?? await discoverAllSkills()
  const skillCommands = skills.map(skillToCommandInfo)

+  const scopeOrder: DiscoveredCommandInfo["scope"][] = ["project", "user", "opencode-project", "opencode", "builtin", "plugin"]
+  const grouped = new Map<string, DiscoveredCommandInfo[]>()
+  for (const cmd of discoveredCommands) {
+    const list = grouped.get(cmd.scope) ?? []
+    list.push(cmd)
+    grouped.set(cmd.scope, list)
+  }
+  const orderedCommands = scopeOrder.flatMap((scope) => grouped.get(scope) ?? [])
+
  return [
-    ...filterDiscoveredCommandsByScope(discoveredCommands, "builtin"),
-    ...filterDiscoveredCommandsByScope(discoveredCommands, "opencode-project"),
-    ...filterDiscoveredCommandsByScope(discoveredCommands, "project"),
-    ...filterDiscoveredCommandsByScope(discoveredCommands, "opencode"),
-    ...filterDiscoveredCommandsByScope(discoveredCommands, "user"),
    ...skillCommands,
-    ...filterDiscoveredCommandsByScope(discoveredCommands, "plugin"),
+    ...orderedCommands,
  ]
 }

@@ -141,6 +140,15 @@ export async function executeSlashCommand(parsed: ParsedSlashCommand, options?:
    }
  }

+  if (command.scope === "skill" && command.metadata.agent) {
+    if (!options?.agent || command.metadata.agent !== options.agent) {
+      return {
+        success: false,
+        error: `Skill "${command.name}" is restricted to agent "${command.metadata.agent}"`,
+      }
+    }
+  }
+
  try {
    const template = await formatCommandTemplate(command, parsed.args)
    return {
--- a/src/hooks/auto-slash-command/hook.ts
+++ b/src/hooks/auto-slash-command/hook.ts
@@ -18,6 +18,8 @@ import type {
 } from "./types"
 import type { LoadedSkill } from "../../features/opencode-skill-loader"

+const COMMAND_EXECUTE_FALLBACK_DEDUP_TTL_MS = 100
+
 function isRecord(value: unknown): value is Record<string, unknown> {
  return typeof value === "object" && value !== null
 }
@@ -35,6 +37,33 @@ function getDeletedSessionID(properties: unknown): string | null {
  return typeof info.id === "string" ? info.id : null
 }

+function getCommandExecutionEventID(input: CommandExecuteBeforeInput): string | null {
+  const candidateKeys = [
+    "messageID",
+    "messageId",
+    "eventID",
+    "eventId",
+    "invocationID",
+    "invocationId",
+    "commandID",
+    "commandId",
+  ]
+
+  const recordInput = input as unknown
+  if (!isRecord(recordInput)) {
+    return null
+  }
+
+  for (const key of candidateKeys) {
+    const candidateValue = recordInput[key]
+    if (typeof candidateValue === "string" && candidateValue.length > 0) {
+      return candidateValue
+    }
+  }
+
+  return null
+}
+
 export interface AutoSlashCommandHookOptions {
  skills?: LoadedSkill[]
  pluginsEnabled?: boolean
@@ -96,7 +125,12 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
        args: parsed.args,
      })

-      const result = await executeSlashCommand(parsed, executorOptions)
+      const executionOptions: ExecutorOptions = {
+        ...executorOptions,
+        agent: input.agent,
+      }
+
+      const result = await executeSlashCommand(parsed, executionOptions)

      const idx = findSlashCommandPartIndex(output.parts)
      if (idx < 0) {
@@ -125,7 +159,10 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
      input: CommandExecuteBeforeInput,
      output: CommandExecuteBeforeOutput
    ): Promise<void> => {
-      const commandKey = `${input.sessionID}:${input.command.toLowerCase()}:${input.arguments || ""}`
+      const eventID = getCommandExecutionEventID(input)
+      const commandKey = eventID
+        ? `${input.sessionID}:event:${eventID}`
+        : `${input.sessionID}:fallback:${input.command.toLowerCase()}:${input.arguments || ""}`
      if (sessionProcessedCommandExecutions.has(commandKey)) {
        return
      }
@@ -142,7 +179,12 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
        raw: `/${input.command}${input.arguments ? " " + input.arguments : ""}`,
      }

-      const result = await executeSlashCommand(parsed, executorOptions)
+      const executionOptions: ExecutorOptions = {
+        ...executorOptions,
+        agent: input.agent,
+      }
+
+      const result = await executeSlashCommand(parsed, executionOptions)

      if (!result.success || !result.replacementText) {
        log(`[auto-slash-command] command.execute.before - command not found in our executor`, {
@@ -153,7 +195,10 @@ export function createAutoSlashCommandHook(options?: AutoSlashCommandHookOptions
        return
      }

-      sessionProcessedCommandExecutions.add(commandKey)
+      sessionProcessedCommandExecutions.add(
+        commandKey,
+        eventID ? undefined : COMMAND_EXECUTE_FALLBACK_DEDUP_TTL_MS
+      )

      const taggedContent = `${AUTO_SLASH_COMMAND_TAG_OPEN}\n${result.replacementText}\n${AUTO_SLASH_COMMAND_TAG_CLOSE}`

--- a/src/hooks/auto-slash-command/processed-command-store.ts
+++ b/src/hooks/auto-slash-command/processed-command-store.ts
@@ -24,7 +24,7 @@ function removeSessionEntries(entries: Map<string, number>, sessionID: string):

 export interface ProcessedCommandStore {
  has(commandKey: string): boolean
-  add(commandKey: string): void
+  add(commandKey: string, ttlMs?: number): void
  cleanupSession(sessionID: string): void
  clear(): void
 }
@@ -38,11 +38,11 @@ export function createProcessedCommandStore(): ProcessedCommandStore {
      entries = pruneExpiredEntries(entries, now)
      return entries.has(commandKey)
    },
-    add(commandKey: string): void {
+    add(commandKey: string, ttlMs = PROCESSED_COMMAND_TTL_MS): void {
      const now = Date.now()
      entries = pruneExpiredEntries(entries, now)
      entries.delete(commandKey)
-      entries.set(commandKey, now + PROCESSED_COMMAND_TTL_MS)
+      entries.set(commandKey, now + ttlMs)
      entries = trimProcessedEntries(entries)
    },
    cleanupSession(sessionID: string): void {
--- a/src/hooks/auto-slash-command/types.ts
+++ b/src/hooks/auto-slash-command/types.ts
@@ -26,6 +26,15 @@ export interface CommandExecuteBeforeInput {
  command: string
  sessionID: string
  arguments: string
+  agent?: string
+  messageID?: string
+  messageId?: string
+  eventID?: string
+  eventId?: string
+  invocationID?: string
+  invocationId?: string
+  commandID?: string
+  commandId?: string
 }

 export interface CommandExecuteBeforeOutput {
--- a/src/hooks/auto-update-checker/hook/background-update-check.ts
+++ b/src/hooks/auto-update-checker/hook/background-update-check.ts
@@ -1,6 +1,9 @@
 import type { PluginInput } from "@opencode-ai/plugin"
+import { existsSync } from "node:fs"
+import { join } from "node:path"
 import { runBunInstallWithDetails } from "../../../cli/config-manager"
 import { log } from "../../../shared/logger"
+import { getOpenCodeCacheDir, getOpenCodeConfigPaths } from "../../../shared"
 import { invalidatePackage } from "../cache"
 import { PACKAGE_NAME } from "../constants"
 import { extractChannel } from "../version-channel"
@@ -11,9 +14,36 @@ function getPinnedVersionToastMessage(latestVersion: string): string {
  return `Update available: ${latestVersion} (version pinned, update manually)`
 }

-async function runBunInstallSafe(): Promise<boolean> {
+/**
+ * Resolves the active install workspace.
+ * Same logic as doctor check: prefer config-dir if installed, fall back to cache-dir.
+ */
+function resolveActiveInstallWorkspace(): string {
+  const configPaths = getOpenCodeConfigPaths({ binary: "opencode" })
+  const cacheDir = getOpenCodeCacheDir()
+
+  const configInstallPath = join(configPaths.configDir, "node_modules", PACKAGE_NAME, "package.json")
+  const cacheInstallPath = join(cacheDir, "node_modules", PACKAGE_NAME, "package.json")
+
+  // Prefer config-dir if installed there, otherwise fall back to cache-dir
+  if (existsSync(configInstallPath)) {
+    log(`[auto-update-checker] Active workspace: config-dir (${configPaths.configDir})`)
+    return configPaths.configDir
+  }
+
+  if (existsSync(cacheInstallPath)) {
+    log(`[auto-update-checker] Active workspace: cache-dir (${cacheDir})`)
+    return cacheDir
+  }
+
+  // Default to config-dir if neither exists (matches doctor behavior)
+  log(`[auto-update-checker] Active workspace: config-dir (default, no install detected)`)
+  return configPaths.configDir
+}
+
+async function runBunInstallSafe(workspaceDir: string): Promise<boolean> {
  try {
-    const result = await runBunInstallWithDetails({ outputMode: "pipe" })
+    const result = await runBunInstallWithDetails({ outputMode: "pipe", workspaceDir })
    if (!result.success && result.error) {
      log("[auto-update-checker] bun install error:", result.error)
    }
@@ -82,7 +112,8 @@ export async function runBackgroundUpdateCheck(

  invalidatePackage(PACKAGE_NAME)

-  const installSuccess = await runBunInstallSafe()
+  const activeWorkspace = resolveActiveInstallWorkspace()
+  const installSuccess = await runBunInstallSafe(activeWorkspace)

  if (installSuccess) {
    await showAutoUpdatedToast(ctx, currentVersion, latestVersion)
--- a/src/hooks/auto-update-checker/hook/workspace-resolution.test.ts
+++ b/src/hooks/auto-update-checker/hook/workspace-resolution.test.ts
@@ -0,0 +1,223 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test"
+import { existsSync, mkdirSync, rmSync, writeFileSync } from "node:fs"
+import { join } from "node:path"
+
+type PluginEntry = {
+  entry: string
+  isPinned: boolean
+  pinnedVersion: string | null
+  configPath: string
+}
+
+type ToastMessageGetter = (isUpdate: boolean, version?: string) => string
+
+function createPluginEntry(overrides?: Partial<PluginEntry>): PluginEntry {
+  return {
+    entry: "oh-my-opencode@3.4.0",
+    isPinned: false,
+    pinnedVersion: null,
+    configPath: "/test/opencode.json",
+    ...overrides,
+  }
+}
+
+const TEST_DIR = join(import.meta.dir, "__test-workspace-resolution__")
+const TEST_CACHE_DIR = join(TEST_DIR, "cache")
+const TEST_CONFIG_DIR = join(TEST_DIR, "config")
+
+const mockFindPluginEntry = mock((_directory: string): PluginEntry | null => createPluginEntry())
+const mockGetCachedVersion = mock((): string | null => "3.4.0")
+const mockGetLatestVersion = mock(async (): Promise<string | null> => "3.5.0")
+const mockExtractChannel = mock(() => "latest")
+const mockInvalidatePackage = mock(() => {})
+const mockShowUpdateAvailableToast = mock(
+  async (_ctx: PluginInput, _latestVersion: string, _getToastMessage: ToastMessageGetter): Promise<void> => {}
+)
+const mockShowAutoUpdatedToast = mock(
+  async (_ctx: PluginInput, _fromVersion: string, _toVersion: string): Promise<void> => {}
+)
+const mockSyncCachePackageJsonToIntent = mock(() => ({ synced: true, error: null }))
+
+const mockRunBunInstallWithDetails = mock(
+  async (opts?: { outputMode?: string; workspaceDir?: string }) => {
+    return { success: true }
+  }
+)
+
+mock.module("../checker", () => ({
+  findPluginEntry: mockFindPluginEntry,
+  getCachedVersion: mockGetCachedVersion,
+  getLatestVersion: mockGetLatestVersion,
+  revertPinnedVersion: mock(() => false),
+  syncCachePackageJsonToIntent: mockSyncCachePackageJsonToIntent,
+}))
+mock.module("../version-channel", () => ({ extractChannel: mockExtractChannel }))
+mock.module("../cache", () => ({ invalidatePackage: mockInvalidatePackage }))
+mock.module("../../../cli/config-manager", () => ({
+  runBunInstallWithDetails: mockRunBunInstallWithDetails,
+}))
+mock.module("./update-toasts", () => ({
+  showUpdateAvailableToast: mockShowUpdateAvailableToast,
+  showAutoUpdatedToast: mockShowAutoUpdatedToast,
+}))
+mock.module("../../../shared/logger", () => ({ log: () => {} }))
+mock.module("../../../shared", () => ({
+  getOpenCodeCacheDir: () => TEST_CACHE_DIR,
+  getOpenCodeConfigPaths: () => ({
+    configDir: TEST_CONFIG_DIR,
+    configJson: join(TEST_CONFIG_DIR, "opencode.json"),
+    configJsonc: join(TEST_CONFIG_DIR, "opencode.jsonc"),
+    packageJson: join(TEST_CONFIG_DIR, "package.json"),
+    omoConfig: join(TEST_CONFIG_DIR, "oh-my-opencode.json"),
+  }),
+  getOpenCodeConfigDir: () => TEST_CONFIG_DIR,
+}))
+
+// Mock constants BEFORE importing the module
+const ORIGINAL_PACKAGE_NAME = "oh-my-opencode"
+mock.module("../constants", () => ({
+  PACKAGE_NAME: ORIGINAL_PACKAGE_NAME,
+  CACHE_DIR: TEST_CACHE_DIR,
+  USER_CONFIG_DIR: TEST_CONFIG_DIR,
+}))
+
+// Need to mock getOpenCodeCacheDir and getOpenCodeConfigPaths before importing the module
+mock.module("../../../shared/data-path", () => ({
+  getDataDir: () => join(TEST_DIR, "data"),
+  getOpenCodeStorageDir: () => join(TEST_DIR, "data", "opencode", "storage"),
+  getCacheDir: () => TEST_DIR,
+  getOmoOpenCodeCacheDir: () => join(TEST_DIR, "oh-my-opencode"),
+  getOpenCodeCacheDir: () => TEST_CACHE_DIR,
+}))
+mock.module("../../../shared/opencode-config-dir", () => ({
+  getOpenCodeConfigDir: () => TEST_CONFIG_DIR,
+  getOpenCodeConfigPaths: () => ({
+    configDir: TEST_CONFIG_DIR,
+    configJson: join(TEST_CONFIG_DIR, "opencode.json"),
+    configJsonc: join(TEST_CONFIG_DIR, "opencode.jsonc"),
+    packageJson: join(TEST_CONFIG_DIR, "package.json"),
+    omoConfig: join(TEST_CONFIG_DIR, "oh-my-opencode.json"),
+  }),
+}))
+
+const modulePath = "./background-update-check?test"
+const { runBackgroundUpdateCheck } = await import(modulePath)
+
+describe("workspace resolution", () => {
+  const mockCtx = { directory: "/test" } as PluginInput
+  const getToastMessage: ToastMessageGetter = (isUpdate, version) =>
+    isUpdate ? `Update to ${version}` : "Up to date"
+
+  beforeEach(() => {
+    // Setup test directories
+    if (existsSync(TEST_DIR)) {
+      rmSync(TEST_DIR, { recursive: true, force: true })
+    }
+    mkdirSync(TEST_DIR, { recursive: true })
+
+    mockFindPluginEntry.mockReset()
+    mockGetCachedVersion.mockReset()
+    mockGetLatestVersion.mockReset()
+    mockExtractChannel.mockReset()
+    mockInvalidatePackage.mockReset()
+    mockRunBunInstallWithDetails.mockReset()
+    mockShowUpdateAvailableToast.mockReset()
+    mockShowAutoUpdatedToast.mockReset()
+
+    mockFindPluginEntry.mockReturnValue(createPluginEntry())
+    mockGetCachedVersion.mockReturnValue("3.4.0")
+    mockGetLatestVersion.mockResolvedValue("3.5.0")
+    mockExtractChannel.mockReturnValue("latest")
+    // Note: Don't use mockResolvedValue here - it overrides the function that captures args
+    mockSyncCachePackageJsonToIntent.mockReturnValue({ synced: true, error: null })
+  })
+
+  afterEach(() => {
+    if (existsSync(TEST_DIR)) {
+      rmSync(TEST_DIR, { recursive: true, force: true })
+    }
+  })
+
+  describe("#given config-dir install exists but cache-dir does not", () => {
+    it("installs to config-dir, not cache-dir", async () => {
+      //#given - config-dir has installation, cache-dir does not
+      mkdirSync(join(TEST_CONFIG_DIR, "node_modules", "oh-my-opencode"), { recursive: true })
+      writeFileSync(
+        join(TEST_CONFIG_DIR, "package.json"),
+        JSON.stringify({ dependencies: { "oh-my-opencode": "3.4.0" } }, null, 2)
+      )
+      writeFileSync(
+        join(TEST_CONFIG_DIR, "node_modules", "oh-my-opencode", "package.json"),
+        JSON.stringify({ name: "oh-my-opencode", version: "3.4.0" }, null, 2)
+      )
+
+      // cache-dir should NOT exist
+      expect(existsSync(TEST_CACHE_DIR)).toBe(false)
+
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+
+      //#then - install should be called with config-dir
+      const mockCalls = mockRunBunInstallWithDetails.mock.calls
+      expect(mockCalls[0][0]?.workspaceDir).toBe(TEST_CONFIG_DIR)
+    })
+  })
+
+  describe("#given both config-dir and cache-dir exist", () => {
+    it("prefers config-dir over cache-dir", async () => {
+      //#given - both directories have installations
+      mkdirSync(join(TEST_CONFIG_DIR, "node_modules", "oh-my-opencode"), { recursive: true })
+      writeFileSync(
+        join(TEST_CONFIG_DIR, "package.json"),
+        JSON.stringify({ dependencies: { "oh-my-opencode": "3.4.0" } }, null, 2)
+      )
+      writeFileSync(
+        join(TEST_CONFIG_DIR, "node_modules", "oh-my-opencode", "package.json"),
+        JSON.stringify({ name: "oh-my-opencode", version: "3.4.0" }, null, 2)
+      )
+
+      mkdirSync(join(TEST_CACHE_DIR, "node_modules", "oh-my-opencode"), { recursive: true })
+      writeFileSync(
+        join(TEST_CACHE_DIR, "package.json"),
+        JSON.stringify({ dependencies: { "oh-my-opencode": "3.4.0" } }, null, 2)
+      )
+      writeFileSync(
+        join(TEST_CACHE_DIR, "node_modules", "oh-my-opencode", "package.json"),
+        JSON.stringify({ name: "oh-my-opencode", version: "3.4.0" }, null, 2)
+      )
+
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+
+      //#then - install should prefer config-dir
+      const mockCalls2 = mockRunBunInstallWithDetails.mock.calls
+      expect(mockCalls2[0][0]?.workspaceDir).toBe(TEST_CONFIG_DIR)
+    })
+  })
+
+  describe("#given only cache-dir install exists", () => {
+    it("falls back to cache-dir", async () => {
+      //#given - only cache-dir has installation
+      mkdirSync(join(TEST_CACHE_DIR, "node_modules", "oh-my-opencode"), { recursive: true })
+      writeFileSync(
+        join(TEST_CACHE_DIR, "package.json"),
+        JSON.stringify({ dependencies: { "oh-my-opencode": "3.4.0" } }, null, 2)
+      )
+      writeFileSync(
+        join(TEST_CACHE_DIR, "node_modules", "oh-my-opencode", "package.json"),
+        JSON.stringify({ name: "oh-my-opencode", version: "3.4.0" }, null, 2)
+      )
+
+      // config-dir should NOT exist
+      expect(existsSync(TEST_CONFIG_DIR)).toBe(false)
+
+      //#when
+      await runBackgroundUpdateCheck(mockCtx, true, getToastMessage)
+
+      //#then - install should fall back to cache-dir
+      const mockCalls3 = mockRunBunInstallWithDetails.mock.calls
+      expect(mockCalls3[0][0]?.workspaceDir).toBe(TEST_CACHE_DIR)
+    })
+  })
+})
--- a/src/hooks/claude-code-hooks/handlers/tool-execute-after-handler.ts
+++ b/src/hooks/claude-code-hooks/handlers/tool-execute-after-handler.ts
@@ -79,8 +79,6 @@ export function createToolExecuteAfterHandler(ctx: PluginInput, config: PluginCo
 			return
 		}

-		const claudeConfig = await loadClaudeHooksConfig()
-		const extendedConfig = await loadPluginExtendedConfig()

 		const cachedInput = getToolInput(input.sessionID, input.tool, input.callID) || {}

@@ -96,6 +94,9 @@ export function createToolExecuteAfterHandler(ctx: PluginInput, config: PluginCo
 			return
 		}

+		const claudeConfig = await loadClaudeHooksConfig()
+		const extendedConfig = await loadPluginExtendedConfig()
+
 		const postClient: PostToolUseClient = {
 			session: {
 				messages: (opts) => ctx.client.session.messages(opts),
--- a/src/hooks/claude-code-hooks/handlers/tool-execute-before-handler.ts
+++ b/src/hooks/claude-code-hooks/handlers/tool-execute-before-handler.ts
@@ -43,8 +43,6 @@ export function createToolExecuteBeforeHandler(ctx: PluginInput, config: PluginC
 			log("todowrite: parsed todos string to array", { sessionID: input.sessionID })
 		}

-		const claudeConfig = await loadClaudeHooksConfig()
-		const extendedConfig = await loadPluginExtendedConfig()

 		appendTranscriptEntry(input.sessionID, {
 			type: "tool_use",
@@ -59,6 +57,9 @@ export function createToolExecuteBeforeHandler(ctx: PluginInput, config: PluginC
 			return
 		}

+		const claudeConfig = await loadClaudeHooksConfig()
+		const extendedConfig = await loadPluginExtendedConfig()
+
 		const preCtx: PreToolUseContext = {
 			sessionId: input.sessionID,
 			toolName: input.tool,
--- a/src/hooks/comment-checker/hook.ts
+++ b/src/hooks/comment-checker/hook.ts
@@ -3,6 +3,18 @@ import type { CommentCheckerConfig } from "../../config/schema"

 import z from "zod"

+const ApplyPatchMetadataSchema = z.object({
+  files: z.array(
+    z.object({
+      filePath: z.string(),
+      movePath: z.string().optional(),
+      before: z.string(),
+      after: z.string(),
+      type: z.string().optional(),
+    }),
+  ),
+})
+
 import {
  initializeCommentCheckerCli,
  getCommentCheckerCliPathPromise,
@@ -104,17 +116,6 @@ export function createCommentCheckerHooks(config?: CommentCheckerConfig) {
        return
      }

-      const ApplyPatchMetadataSchema = z.object({
-        files: z.array(
-          z.object({
-            filePath: z.string(),
-            movePath: z.string().optional(),
-            before: z.string(),
-            after: z.string(),
-            type: z.string().optional(),
-          }),
-        ),
-      })

      if (toolLower === "apply_patch") {
        const parsed = ApplyPatchMetadataSchema.safeParse(output.metadata)
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -52,3 +52,4 @@ export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
 export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
 export { createJsonErrorRecoveryHook, JSON_ERROR_TOOL_EXCLUDE_LIST, JSON_ERROR_PATTERNS, JSON_ERROR_REMINDER } from "./json-error-recovery";
 export { createReadImageResizerHook } from "./read-image-resizer"
+export { createTodoDescriptionOverrideHook } from "./todo-description-override"
--- a/src/hooks/ralph-loop/completion-handler.ts
+++ b/src/hooks/ralph-loop/completion-handler.ts
@@ -23,6 +23,10 @@ export async function handleDetectedCompletion(
 	const { sessionID, state, loopState, directory, apiTimeoutMs } = input

 	if (state.ultrawork && !state.verification_pending) {
+		if (state.verification_session_id) {
+			ctx.client.session.abort({ path: { id: state.verification_session_id } }).catch(() => {})
+		}
+
 		const verificationState = loopState.markVerificationPending(sessionID)
 		if (!verificationState) {
 			log(`[${HOOK_NAME}] Failed to transition ultrawork loop to verification`, {
--- a/src/hooks/ralph-loop/pending-verification-handler.ts
+++ b/src/hooks/ralph-loop/pending-verification-handler.ts
@@ -1,11 +1,96 @@
 import type { PluginInput } from "@opencode-ai/plugin"
 import { log } from "../../shared/logger"
 import { HOOK_NAME } from "./constants"
+import { ULTRAWORK_VERIFICATION_PROMISE } from "./constants"
 import type { RalphLoopState } from "./types"
 import { handleFailedVerification } from "./verification-failure-handler"
+import { withTimeout } from "./with-timeout"
+
+type OpenCodeSessionMessage = {
+	info?: { role?: string }
+	parts?: Array<{ type?: string; text?: string }>
+}
+
+const ORACLE_AGENT_PATTERN = /Agent:\s*oracle/i
+const TASK_METADATA_SESSION_PATTERN = /<task_metadata>[\s\S]*?session_id:\s*([^\s<]+)[\s\S]*?<\/task_metadata>/i
+const VERIFIED_PROMISE_PATTERN = new RegExp(
+	`<promise>\\s*${ULTRAWORK_VERIFICATION_PROMISE}\\s*<\\/promise>`,
+	"i",
+)
+
+function collectAssistantText(message: OpenCodeSessionMessage): string {
+	if (!Array.isArray(message.parts)) {
+		return ""
+	}
+
+	let text = ""
+	for (const part of message.parts) {
+		if (part.type !== "text") {
+			continue
+		}
+		text += `${text ? "\n" : ""}${part.text ?? ""}`
+	}
+
+	return text
+}
+
+async function detectOracleVerificationFromParentSession(
+	ctx: PluginInput,
+	parentSessionID: string,
+	directory: string,
+	apiTimeoutMs: number,
+): Promise<string | undefined> {
+	try {
+		const response = await withTimeout(
+			ctx.client.session.messages({
+				path: { id: parentSessionID },
+				query: { directory },
+			}),
+			apiTimeoutMs,
+		)
+
+		const messagesResponse: unknown = response
+		const responseData =
+			typeof messagesResponse === "object" && messagesResponse !== null && "data" in messagesResponse
+				? (messagesResponse as { data?: unknown }).data
+				: undefined
+		const messageArray: unknown[] = Array.isArray(messagesResponse)
+			? messagesResponse
+			: Array.isArray(responseData)
+				? responseData
+				: []
+
+		for (let index = messageArray.length - 1; index >= 0; index -= 1) {
+			const message = messageArray[index] as OpenCodeSessionMessage
+			if (message.info?.role !== "assistant") {
+				continue
+			}
+
+			const assistantText = collectAssistantText(message)
+			if (!VERIFIED_PROMISE_PATTERN.test(assistantText) || !ORACLE_AGENT_PATTERN.test(assistantText)) {
+				continue
+			}
+
+			const sessionMatch = assistantText.match(TASK_METADATA_SESSION_PATTERN)
+			const detectedOracleSessionID = sessionMatch?.[1]?.trim()
+			if (detectedOracleSessionID) {
+				return detectedOracleSessionID
+			}
+		}
+
+		return undefined
+	} catch (error) {
+		log(`[${HOOK_NAME}] Failed to scan parent session for oracle verification evidence`, {
+			parentSessionID,
+			error: String(error),
+		})
+		return undefined
+	}
+}

 type LoopStateController = {
 	restartAfterFailedVerification: (sessionID: string, messageCountAtStart?: number) => RalphLoopState | null
+	setVerificationSessionID: (sessionID: string, verificationSessionID: string) => RalphLoopState | null
 }

 export async function handlePendingVerification(
@@ -33,6 +118,29 @@ export async function handlePendingVerification(
 	} = input

 	if (matchesParentSession || (verificationSessionID && matchesVerificationSession)) {
+		if (!verificationSessionID && state.session_id) {
+			const recoveredVerificationSessionID = await detectOracleVerificationFromParentSession(
+				ctx,
+				state.session_id,
+				directory,
+				apiTimeoutMs,
+			)
+
+			if (recoveredVerificationSessionID) {
+				const updatedState = loopState.setVerificationSessionID(
+					state.session_id,
+					recoveredVerificationSessionID,
+				)
+				if (updatedState) {
+					log(`[${HOOK_NAME}] Recovered missing verification session from parent evidence`, {
+						parentSessionID: state.session_id,
+						recoveredVerificationSessionID,
+					})
+					return
+				}
+			}
+		}
+
 		const restarted = await handleFailedVerification(ctx, {
 			state,
 			loopState,
--- a/src/hooks/ralph-loop/ralph-loop-event-handler.ts
+++ b/src/hooks/ralph-loop/ralph-loop-event-handler.ts
@@ -136,6 +136,13 @@ export function createRalphLoopEventHandler(
 				}

 				if (state.verification_pending) {
+					if (!verificationSessionID && matchesParentSession) {
+						log(`[${HOOK_NAME}] Verification pending without tracked oracle session, running recovery check`, {
+							sessionID,
+							iteration: state.iteration,
+						})
+					}
+
 					await handlePendingVerification(ctx, {
 						sessionID,
 						state,
--- a/src/hooks/ralph-loop/ulw-loop-verification.test.ts
+++ b/src/hooks/ralph-loop/ulw-loop-verification.test.ts
@@ -10,6 +10,7 @@ describe("ulw-loop verification", () => {
 	const testDir = join(tmpdir(), `ulw-loop-verification-${Date.now()}`)
 	let promptCalls: Array<{ sessionID: string; text: string }>
 	let toastCalls: Array<{ title: string; message: string; variant: string }>
+	let abortCalls: Array<{ id: string }>
 	let parentTranscriptPath: string
 	let oracleTranscriptPath: string

@@ -25,6 +26,10 @@ describe("ulw-loop verification", () => {
 						return {}
 					},
 					messages: async () => ({ data: [] }),
+					abort: async (opts: { path: { id: string } }) => {
+						abortCalls.push({ id: opts.path.id })
+						return {}
+					},
 				},
 				tui: {
 					showToast: async (opts: { body: { title: string; message: string; variant: string } }) => {
@@ -40,6 +45,7 @@ describe("ulw-loop verification", () => {
 	beforeEach(() => {
 		promptCalls = []
 		toastCalls = []
+		abortCalls = []
 		parentTranscriptPath = join(testDir, "transcript-parent.jsonl")
 		oracleTranscriptPath = join(testDir, "transcript-oracle.jsonl")

@@ -385,4 +391,96 @@ describe("ulw-loop verification", () => {
 		expect(promptCalls).toHaveLength(2)
 		expect(promptCalls[1]?.text).toContain("Verification failed")
 	})
+
+	test("#given oracle verification fails #when loop restarts #then old oracle session is aborted", async () => {
+		const sessionMessages: Record<string, unknown[]> = {
+			"session-123": [{}, {}, {}],
+		}
+		const hook = createRalphLoopHook({
+			...createMockPluginInput(),
+			client: {
+				...createMockPluginInput().client,
+				session: {
+					...createMockPluginInput().client.session,
+					messages: async (opts: { path: { id: string } }) => ({
+						data: sessionMessages[opts.path.id] ?? [],
+					}),
+				},
+			},
+		} as Parameters<typeof createRalphLoopHook>[0], {
+			getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
+		})
+		hook.startLoop("session-123", "Build API", { ultrawork: true })
+		writeFileSync(
+			parentTranscriptPath,
+			`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
+		)
+
+		await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
+		writeState(testDir, {
+			...hook.getState()!,
+			verification_session_id: "ses-oracle",
+		})
+		writeFileSync(
+			oracleTranscriptPath,
+			`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "verification failed: missing tests" } })}\n`,
+		)
+
+		await hook.event({ event: { type: "session.idle", properties: { sessionID: "ses-oracle" } } })
+
+		expect(abortCalls).toHaveLength(1)
+		expect(abortCalls[0].id).toBe("ses-oracle")
+	})
+
+	test("#given ulw loop re-enters verification #when DONE detected again after failed verification #then previous verification session is aborted", async () => {
+		const sessionMessages: Record<string, unknown[]> = {
+			"session-123": [{}, {}, {}],
+		}
+		const hook = createRalphLoopHook({
+			...createMockPluginInput(),
+			client: {
+				...createMockPluginInput().client,
+				session: {
+					...createMockPluginInput().client.session,
+					messages: async (opts: { path: { id: string } }) => ({
+						data: sessionMessages[opts.path.id] ?? [],
+					}),
+				},
+			},
+		} as Parameters<typeof createRalphLoopHook>[0], {
+			getTranscriptPath: (sessionID) => sessionID === "ses-oracle" ? oracleTranscriptPath : parentTranscriptPath,
+		})
+		hook.startLoop("session-123", "Build API", { ultrawork: true })
+		writeFileSync(
+			parentTranscriptPath,
+			`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "done <promise>DONE</promise>" } })}\n`,
+		)
+
+		await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
+		writeState(testDir, {
+			...hook.getState()!,
+			verification_session_id: "ses-oracle",
+		})
+		writeFileSync(
+			oracleTranscriptPath,
+			`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "failed" } })}\n`,
+		)
+
+		await hook.event({ event: { type: "session.idle", properties: { sessionID: "ses-oracle" } } })
+		abortCalls.length = 0
+
+		writeFileSync(
+			parentTranscriptPath,
+			`${JSON.stringify({ type: "tool_result", timestamp: new Date().toISOString(), tool_output: { output: "fixed it <promise>DONE</promise>" } })}\n`,
+		)
+		writeState(testDir, {
+			...hook.getState()!,
+			verification_session_id: "ses-oracle-old",
+		})
+
+		await hook.event({ event: { type: "session.idle", properties: { sessionID: "session-123" } } })
+
+		expect(abortCalls).toHaveLength(1)
+		expect(abortCalls[0].id).toBe("ses-oracle-old")
+	})
 })
--- a/src/hooks/ralph-loop/verification-failure-handler.ts
+++ b/src/hooks/ralph-loop/verification-failure-handler.ts
@@ -68,6 +68,10 @@ export async function handleFailedVerification(
 		return false
 	}

+	if (state.verification_session_id) {
+		ctx.client.session.abort({ path: { id: state.verification_session_id } }).catch(() => {})
+	}
+
 	const resumedState = loopState.restartAfterFailedVerification(
 		parentSessionID,
 		messageCountAtStart,
--- a/src/hooks/runtime-fallback/error-classifier.test.ts
+++ b/src/hooks/runtime-fallback/error-classifier.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, test } from "bun:test"

-import { classifyErrorType, extractAutoRetrySignal, isRetryableError } from "./error-classifier"
+import { classifyErrorType, extractAutoRetrySignal, extractStatusCode, isRetryableError } from "./error-classifier"

 describe("runtime-fallback error classifier", () => {
  test("detects cooling-down auto-retry status signals", () => {
@@ -97,3 +97,72 @@ describe("runtime-fallback error classifier", () => {
    expect(signal).toBeUndefined()
  })
 })
+
+describe("extractStatusCode", () => {
+  test("extracts numeric statusCode from top-level", () => {
+    expect(extractStatusCode({ statusCode: 429 })).toBe(429)
+  })
+
+  test("extracts numeric status from top-level", () => {
+    expect(extractStatusCode({ status: 503 })).toBe(503)
+  })
+
+  test("extracts statusCode from nested data", () => {
+    expect(extractStatusCode({ data: { statusCode: 500 } })).toBe(500)
+  })
+
+  test("extracts statusCode from nested error", () => {
+    expect(extractStatusCode({ error: { statusCode: 502 } })).toBe(502)
+  })
+
+  test("extracts statusCode from nested cause", () => {
+    expect(extractStatusCode({ cause: { statusCode: 504 } })).toBe(504)
+  })
+
+  test("skips non-numeric status and finds deeper numeric statusCode", () => {
+    //#given — status is a string, but error.statusCode is numeric
+    const error = {
+      status: "error",
+      error: { statusCode: 429 },
+    }
+
+    //#when
+    const code = extractStatusCode(error)
+
+    //#then
+    expect(code).toBe(429)
+  })
+
+  test("skips non-numeric statusCode string and finds numeric in cause", () => {
+    const error = {
+      statusCode: "UNKNOWN",
+      status: "failed",
+      cause: { statusCode: 503 },
+    }
+
+    expect(extractStatusCode(error)).toBe(503)
+  })
+
+  test("returns undefined when no numeric status exists", () => {
+    expect(extractStatusCode({ status: "error", message: "something broke" })).toBeUndefined()
+  })
+
+  test("returns undefined for null/undefined error", () => {
+    expect(extractStatusCode(null)).toBeUndefined()
+    expect(extractStatusCode(undefined)).toBeUndefined()
+  })
+
+  test("falls back to regex match in error message", () => {
+    const error = { message: "Request failed with status code 429" }
+    expect(extractStatusCode(error, [429, 503])).toBe(429)
+  })
+
+  test("prefers top-level numeric over nested numeric", () => {
+    const error = {
+      statusCode: 400,
+      error: { statusCode: 429 },
+      cause: { statusCode: 503 },
+    }
+    expect(extractStatusCode(error)).toBe(400)
+  })
+})
--- a/src/hooks/runtime-fallback/error-classifier.ts
+++ b/src/hooks/runtime-fallback/error-classifier.ts
@@ -28,18 +28,28 @@ export function getErrorMessage(error: unknown): string {
  }
 }

+const DEFAULT_RETRY_PATTERN = new RegExp(`\\b(${DEFAULT_CONFIG.retry_on_errors.join("|")})\\b`)
+
 export function extractStatusCode(error: unknown, retryOnErrors?: number[]): number | undefined {
  if (!error) return undefined

  const errorObj = error as Record<string, unknown>

-  const statusCode = errorObj.statusCode ?? errorObj.status ?? (errorObj.data as Record<string, unknown>)?.statusCode
-  if (typeof statusCode === "number") {
+  const statusCode = [
+    errorObj.statusCode,
+    errorObj.status,
+    (errorObj.data as Record<string, unknown>)?.statusCode,
+    (errorObj.error as Record<string, unknown>)?.statusCode,
+    (errorObj.cause as Record<string, unknown>)?.statusCode,
+  ].find((code): code is number => typeof code === "number")
+
+  if (statusCode !== undefined) {
    return statusCode
  }

-  const codes = retryOnErrors ?? DEFAULT_CONFIG.retry_on_errors
-  const pattern = new RegExp(`\\b(${codes.join("|")})\\b`)
+  const pattern = retryOnErrors 
+    ? new RegExp(`\\b(${retryOnErrors.join("|")})\\b`)
+    : DEFAULT_RETRY_PATTERN
  const message = getErrorMessage(error)
  const statusMatch = message.match(pattern)
  if (statusMatch) {
--- a/src/hooks/think-mode/detector.ts
+++ b/src/hooks/think-mode/detector.ts
@@ -32,8 +32,10 @@ const MULTILINGUAL_KEYWORDS = [
  "fikir", "berfikir",
 ]

-const MULTILINGUAL_PATTERNS = MULTILINGUAL_KEYWORDS.map((kw) => new RegExp(kw, "i"))
-const THINK_PATTERNS = [...ENGLISH_PATTERNS, ...MULTILINGUAL_PATTERNS]
+const COMBINED_THINK_PATTERN = new RegExp(
+  `\\b(?:ultrathink|think)\\b|${MULTILINGUAL_KEYWORDS.join("|")}`,
+  "i"
+)

 const CODE_BLOCK_PATTERN = /```[\s\S]*?```/g
 const INLINE_CODE_PATTERN = /`[^`]+`/g
@@ -44,7 +46,7 @@ function removeCodeBlocks(text: string): string {

 export function detectThinkKeyword(text: string): boolean {
  const textWithoutCode = removeCodeBlocks(text)
-  return THINK_PATTERNS.some((pattern) => pattern.test(textWithoutCode))
+  return COMBINED_THINK_PATTERN.test(textWithoutCode)
 }

 export function extractPromptText(
--- a/src/hooks/todo-continuation-enforcer/idle-event.ts
+++ b/src/hooks/todo-continuation-enforcer/idle-event.ts
@@ -97,6 +97,7 @@ export async function handleSessionIdle(args: {
  }

  if (!todos || todos.length === 0) {
+    sessionStateStore.resetContinuationProgress(sessionID)
    sessionStateStore.resetContinuationProgress(sessionID)
    log(`[${HOOK_NAME}] No todos`, { sessionID })
    return
@@ -104,6 +105,7 @@ export async function handleSessionIdle(args: {

  const incompleteCount = getIncompleteCount(todos)
  if (incompleteCount === 0) {
+    sessionStateStore.resetContinuationProgress(sessionID)
    sessionStateStore.resetContinuationProgress(sessionID)
    log(`[${HOOK_NAME}] All todos complete`, { sessionID, total: todos.length })
    return
@@ -124,22 +126,14 @@ export async function handleSessionIdle(args: {
  }

  if (state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
-    log(`[${HOOK_NAME}] Skipped: max consecutive failures reached`, {
-      sessionID,
-      consecutiveFailures: state.consecutiveFailures,
-      maxConsecutiveFailures: MAX_CONSECUTIVE_FAILURES,
-    })
+    log(`[${HOOK_NAME}] Skipped: max consecutive failures reached`, { sessionID, consecutiveFailures: state.consecutiveFailures })
    return
  }

  const effectiveCooldown =
    CONTINUATION_COOLDOWN_MS * Math.pow(2, Math.min(state.consecutiveFailures, 5))
  if (state.lastInjectedAt && Date.now() - state.lastInjectedAt < effectiveCooldown) {
-    log(`[${HOOK_NAME}] Skipped: cooldown active`, {
-      sessionID,
-      effectiveCooldown,
-      consecutiveFailures: state.consecutiveFailures,
-    })
+    log(`[${HOOK_NAME}] Skipped: cooldown active`, { sessionID, effectiveCooldown, consecutiveFailures: state.consecutiveFailures })
    return
  }

--- a/src/hooks/todo-continuation-enforcer/session-state.regression.test.ts
+++ b/src/hooks/todo-continuation-enforcer/session-state.regression.test.ts
@@ -18,7 +18,7 @@ describe("createSessionStateStore regressions", () => {

  describe("#given external activity happens after a successful continuation", () => {
    describe("#when todos stay unchanged", () => {
-      test("#then it treats the activity as progress instead of stagnation", () => {
+      test("#then it keeps counting stagnation", () => {
        const sessionID = "ses-activity-progress"
        const todos = [
          { id: "1", content: "Task 1", status: "pending", priority: "high" },
@@ -37,9 +37,9 @@ describe("createSessionStateStore regressions", () => {
        trackedState.abortDetectedAt = undefined
        const progressUpdate = sessionStateStore.trackContinuationProgress(sessionID, 2, todos)

-        expect(progressUpdate.hasProgressed).toBe(true)
-        expect(progressUpdate.progressSource).toBe("activity")
-        expect(progressUpdate.stagnationCount).toBe(0)
+        expect(progressUpdate.hasProgressed).toBe(false)
+        expect(progressUpdate.progressSource).toBe("none")
+        expect(progressUpdate.stagnationCount).toBe(1)
      })
    })
  })
@@ -72,7 +72,7 @@ describe("createSessionStateStore regressions", () => {

  describe("#given stagnation already halted a session", () => {
    describe("#when new activity appears before the next idle check", () => {
-      test("#then it resets the stop condition on the next progress check", () => {
+      test("#then it does not reset the stop condition", () => {
        const sessionID = "ses-stagnation-recovery"
        const todos = [
          { id: "1", content: "Task 1", status: "pending", priority: "high" },
@@ -96,9 +96,9 @@ describe("createSessionStateStore regressions", () => {
        const progressUpdate = sessionStateStore.trackContinuationProgress(sessionID, 2, todos)

        expect(progressUpdate.previousStagnationCount).toBe(MAX_STAGNATION_COUNT)
-        expect(progressUpdate.hasProgressed).toBe(true)
-        expect(progressUpdate.progressSource).toBe("activity")
-        expect(progressUpdate.stagnationCount).toBe(0)
+        expect(progressUpdate.hasProgressed).toBe(false)
+        expect(progressUpdate.progressSource).toBe("none")
+        expect(progressUpdate.stagnationCount).toBe(MAX_STAGNATION_COUNT)
      })
    })
  })
--- a/src/hooks/todo-continuation-enforcer/session-state.ts
+++ b/src/hooks/todo-continuation-enforcer/session-state.ts
@@ -16,8 +16,6 @@ interface TrackedSessionState {
  lastAccessedAt: number
  lastCompletedCount?: number
  lastTodoSnapshot?: string
-  activitySignalCount: number
-  lastObservedActivitySignalCount?: number
 }

 export interface ContinuationProgressUpdate {
@@ -25,7 +23,7 @@ export interface ContinuationProgressUpdate {
  previousStagnationCount: number
  stagnationCount: number
  hasProgressed: boolean
-  progressSource: "none" | "todo" | "activity"
+  progressSource: "none" | "todo"
 }

 export interface SessionStateStore {
@@ -98,17 +96,7 @@ export function createSessionStateStore(): SessionStateStore {
    const trackedSession: TrackedSessionState = {
      state: rawState,
      lastAccessedAt: Date.now(),
-      activitySignalCount: 0,
    }
-    trackedSession.state = new Proxy(rawState, {
-      set(target, property, value, receiver) {
-        if (property === "abortDetectedAt" && value === undefined) {
-          trackedSession.activitySignalCount += 1
-        }
-
-        return Reflect.set(target, property, value, receiver)
-      },
-    })
    sessions.set(sessionID, trackedSession)
    return trackedSession
  }
@@ -137,7 +125,6 @@ export function createSessionStateStore(): SessionStateStore {
    const previousStagnationCount = state.stagnationCount
    const currentCompletedCount = todos?.filter((todo) => todo.status === "completed").length
    const currentTodoSnapshot = todos ? getTodoSnapshot(todos) : undefined
-    const currentActivitySignalCount = trackedSession.activitySignalCount
    const hasCompletedMoreTodos =
      currentCompletedCount !== undefined
      && trackedSession.lastCompletedCount !== undefined
@@ -146,9 +133,6 @@ export function createSessionStateStore(): SessionStateStore {
      currentTodoSnapshot !== undefined
      && trackedSession.lastTodoSnapshot !== undefined
      && currentTodoSnapshot !== trackedSession.lastTodoSnapshot
-    const hasObservedExternalActivity =
-      trackedSession.lastObservedActivitySignalCount !== undefined
-      && currentActivitySignalCount > trackedSession.lastObservedActivitySignalCount
    const hadSuccessfulInjectionAwaitingProgressCheck = state.awaitingPostInjectionProgressCheck === true

    state.lastIncompleteCount = incompleteCount
@@ -158,7 +142,6 @@ export function createSessionStateStore(): SessionStateStore {
    if (currentTodoSnapshot !== undefined) {
      trackedSession.lastTodoSnapshot = currentTodoSnapshot
    }
-    trackedSession.lastObservedActivitySignalCount = currentActivitySignalCount

    if (previousIncompleteCount === undefined) {
      state.stagnationCount = 0
@@ -173,9 +156,7 @@ export function createSessionStateStore(): SessionStateStore {

    const progressSource = incompleteCount < previousIncompleteCount || hasCompletedMoreTodos || hasTodoSnapshotChanged
      ? "todo"
-      : hasObservedExternalActivity
-        ? "activity"
-        : "none"
+      : "none"

    if (progressSource !== "none") {
      state.stagnationCount = 0
@@ -223,8 +204,6 @@ export function createSessionStateStore(): SessionStateStore {
    state.awaitingPostInjectionProgressCheck = false
    trackedSession.lastCompletedCount = undefined
    trackedSession.lastTodoSnapshot = undefined
-    trackedSession.activitySignalCount = 0
-    trackedSession.lastObservedActivitySignalCount = undefined
  }

  function cancelCountdown(sessionID: string): void {
--- a/src/hooks/todo-continuation-enforcer/stagnation-detection.test.ts
+++ b/src/hooks/todo-continuation-enforcer/stagnation-detection.test.ts
@@ -3,6 +3,8 @@
 import { describe, expect, it as test } from "bun:test"

 import { MAX_STAGNATION_COUNT } from "./constants"
+import { handleNonIdleEvent } from "./non-idle-events"
+import { createSessionStateStore } from "./session-state"
 import { shouldStopForStagnation } from "./stagnation-detection"

 describe("shouldStopForStagnation", () => {
@@ -25,7 +27,7 @@ describe("shouldStopForStagnation", () => {
      })
    })

-    describe("#when activity progress is detected after the halt", () => {
+    describe("#when todo progress is detected after the halt", () => {
      test("#then it clears the stop condition", () => {
        const shouldStop = shouldStopForStagnation({
          sessionID: "ses-recovered",
@@ -35,7 +37,7 @@ describe("shouldStopForStagnation", () => {
            previousStagnationCount: MAX_STAGNATION_COUNT,
            stagnationCount: 0,
            hasProgressed: true,
-            progressSource: "activity",
+            progressSource: "todo",
          },
        })

@@ -43,4 +45,60 @@ describe("shouldStopForStagnation", () => {
      })
    })
  })
+
+  describe("#given only non-idle tool and message events happen between idle checks", () => {
+    describe("#when todo state does not change across three idle cycles", () => {
+      test("#then stagnation count reaches three", () => {
+        // given
+        const sessionStateStore = createSessionStateStore()
+        const sessionID = "ses-non-idle-activity-without-progress"
+        const state = sessionStateStore.getState(sessionID)
+        const todos = [
+          { id: "1", content: "Task 1", status: "pending", priority: "high" },
+          { id: "2", content: "Task 2", status: "pending", priority: "medium" },
+        ]
+
+        sessionStateStore.trackContinuationProgress(sessionID, 2, todos)
+
+        // when
+        state.awaitingPostInjectionProgressCheck = true
+        const firstCycle = sessionStateStore.trackContinuationProgress(sessionID, 2, todos)
+
+        handleNonIdleEvent({
+          eventType: "tool.execute.before",
+          properties: { sessionID },
+          sessionStateStore,
+        })
+        handleNonIdleEvent({
+          eventType: "message.updated",
+          properties: { info: { sessionID, role: "assistant" } },
+          sessionStateStore,
+        })
+
+        state.awaitingPostInjectionProgressCheck = true
+        const secondCycle = sessionStateStore.trackContinuationProgress(sessionID, 2, todos)
+
+        handleNonIdleEvent({
+          eventType: "tool.execute.after",
+          properties: { sessionID },
+          sessionStateStore,
+        })
+        handleNonIdleEvent({
+          eventType: "message.part.updated",
+          properties: { info: { sessionID, role: "assistant" } },
+          sessionStateStore,
+        })
+
+        state.awaitingPostInjectionProgressCheck = true
+        const thirdCycle = sessionStateStore.trackContinuationProgress(sessionID, 2, todos)
+
+        // then
+        expect(firstCycle.stagnationCount).toBe(1)
+        expect(secondCycle.stagnationCount).toBe(2)
+        expect(thirdCycle.stagnationCount).toBe(3)
+
+        sessionStateStore.shutdown()
+      })
+    })
+  })
 })
--- a/src/hooks/todo-description-override/description.ts
+++ b/src/hooks/todo-description-override/description.ts
@@ -0,0 +1,28 @@
+export const TODOWRITE_DESCRIPTION = `Use this tool to create and manage a structured task list for tracking progress on multi-step work.
+
+## Todo Format (MANDATORY)
+
+Each todo title MUST encode four elements: WHERE, WHY, HOW, and EXPECTED RESULT.
+
+Format: "[WHERE] [HOW] to [WHY] — expect [RESULT]"
+
+GOOD:
+- "src/utils/validation.ts: Add validateEmail() for input sanitization — returns boolean"
+- "UserService.create(): Call validateEmail() before DB insert — rejects invalid emails with 400"
+- "validation.test.ts: Add test for missing @ sign — expect validateEmail('foo') to return false"
+
+BAD:
+- "Implement email validation" (where? how? what result?)
+- "Add dark mode" (this is a feature, not a todo)
+- "Fix auth" (what file? what changes? what's expected?)
+
+## Granularity Rules
+
+Each todo MUST be a single atomic action completable in 1-3 tool calls. If it needs more, split it.
+
+**Size test**: Can you complete this todo by editing one file or running one command? If not, it's too big.
+
+## Task Management
+- One in_progress at a time. Complete it before starting the next.
+- Mark completed immediately after finishing each item.
+- Skip this tool for single trivial tasks (one-step, obvious action).`
--- a/src/hooks/todo-description-override/hook.ts
+++ b/src/hooks/todo-description-override/hook.ts
@@ -0,0 +1,14 @@
+import { TODOWRITE_DESCRIPTION } from "./description"
+
+export function createTodoDescriptionOverrideHook() {
+  return {
+    "tool.definition": async (
+      input: { toolID: string },
+      output: { description: string; parameters: unknown },
+    ) => {
+      if (input.toolID === "todowrite") {
+        output.description = TODOWRITE_DESCRIPTION
+      }
+    },
+  }
+}
--- a/src/hooks/todo-description-override/index.test.ts
+++ b/src/hooks/todo-description-override/index.test.ts
@@ -0,0 +1,40 @@
+import { describe, it, expect } from "bun:test"
+import { createTodoDescriptionOverrideHook } from "./hook"
+import { TODOWRITE_DESCRIPTION } from "./description"
+
+describe("createTodoDescriptionOverrideHook", () => {
+  describe("#given hook is created", () => {
+    describe("#when tool.definition is called with todowrite", () => {
+      it("#then should override the description", async () => {
+        const hook = createTodoDescriptionOverrideHook()
+        const output = { description: "original description", parameters: {} }
+
+        await hook["tool.definition"]({ toolID: "todowrite" }, output)
+
+        expect(output.description).toBe(TODOWRITE_DESCRIPTION)
+      })
+    })
+
+    describe("#when tool.definition is called with non-todowrite tool", () => {
+      it("#then should not modify the description", async () => {
+        const hook = createTodoDescriptionOverrideHook()
+        const output = { description: "original description", parameters: {} }
+
+        await hook["tool.definition"]({ toolID: "bash" }, output)
+
+        expect(output.description).toBe("original description")
+      })
+    })
+
+    describe("#when tool.definition is called with TodoWrite (case-insensitive)", () => {
+      it("#then should not override for different casing since OpenCode sends lowercase", async () => {
+        const hook = createTodoDescriptionOverrideHook()
+        const output = { description: "original description", parameters: {} }
+
+        await hook["tool.definition"]({ toolID: "TodoWrite" }, output)
+
+        expect(output.description).toBe("original description")
+      })
+    })
+  })
+})
--- a/src/hooks/todo-description-override/index.ts
+++ b/src/hooks/todo-description-override/index.ts
@@ -0,0 +1 @@
+export { createTodoDescriptionOverrideHook } from "./hook"
--- a/src/hooks/write-existing-file-guard/index.test.ts
+++ b/src/hooks/write-existing-file-guard/index.test.ts
@@ -411,10 +411,7 @@ describe("createWriteExistingFileGuardHook", () => {
    try {
      symlinkSync(targetFile, symlinkPath)
    } catch (error) {
-      console.warn(
-        "Skipping symlink test: symlinks are not supported or cannot be created in this environment.",
-        error
-      )
+      // Symlinks not supported in this environment — skip
      return
    }

--- a/src/plugin-interface.ts
+++ b/src/plugin-interface.ts
@@ -32,10 +32,7 @@ export function createPluginInterface(args: {
  return {
    tool: tools,

-    "chat.params": async (input: unknown, output: unknown) => {
-      const handler = createChatParamsHandler({ anthropicEffort: hooks.anthropicEffort })
-      await handler(input, output)
-    },
+    "chat.params": createChatParamsHandler({ anthropicEffort: hooks.anthropicEffort }),

    "chat.headers": createChatHeadersHandler({ ctx }),

@@ -71,5 +68,9 @@ export function createPluginInterface(args: {
      ctx,
      hooks,
    }),
+
+    "tool.definition": async (input, output) => {
+      await hooks.todoDescriptionOverride?.["tool.definition"]?.(input, output)
+    },
  }
 }
--- a/src/plugin/event.test.ts
+++ b/src/plugin/event.test.ts
@@ -1,8 +1,15 @@
-import { describe, it, expect } from "bun:test"
+import { describe, it, expect, afterEach } from "bun:test"

 import { createEventHandler } from "./event"
+import { createChatMessageHandler } from "./chat-message"
+import { _resetForTesting, setMainSession } from "../features/claude-code-session-state"
+import { clearPendingModelFallback, createModelFallbackHook } from "../hooks/model-fallback/hook"

-type EventInput = { event: { type: string; properties?: Record<string, unknown> } }
+type EventInput = { event: { type: string; properties?: unknown } }
+
+afterEach(() => {
+	_resetForTesting()
+})

 	describe("createEventHandler - idle deduplication", () => {
 	it("Order A (status→idle): synthetic idle deduped - real idle not dispatched again", async () => {
@@ -66,7 +73,7 @@ type EventInput = { event: { type: string; properties?: Record<string, unknown>
 		//#then - synthetic idle dispatched once
 		expect(dispatchCalls.length).toBe(1)
 		expect(dispatchCalls[0].event.type).toBe("session.idle")
-		expect(dispatchCalls[0].event.properties?.sessionID).toBe(sessionId)
+		expect((dispatchCalls[0].event.properties as { sessionID?: string } | undefined)?.sessionID).toBe(sessionId)

 		//#when - real session.idle arrives
 		await eventHandler({
@@ -142,7 +149,7 @@ type EventInput = { event: { type: string; properties?: Record<string, unknown>
 		//#then - real idle dispatched once
 		expect(dispatchCalls.length).toBe(1)
 		expect(dispatchCalls[0].event.type).toBe("session.idle")
-		expect(dispatchCalls[0].event.properties?.sessionID).toBe(sessionId)
+		expect((dispatchCalls[0].event.properties as { sessionID?: string } | undefined)?.sessionID).toBe(sessionId)

 		//#when - session.status with idle (generates synthetic idle)
 		await eventHandler({
@@ -245,7 +252,7 @@ type EventInput = { event: { type: string; properties?: Record<string, unknown>
 			event: {
 				type: "message.updated",
 			},
-		})
+		} as any)

 		//#then - both maps should be pruned (no dedup should occur for new events)
 		// We verify by checking that a new idle event for same session is dispatched
@@ -287,7 +294,7 @@ type EventInput = { event: { type: string; properties?: Record<string, unknown>
 				stopContinuationGuard: { event: async () => {} },
 				compactionTodoPreserver: { event: async () => {} },
 				atlasHook: { handler: async () => {} },
-			},
+			} as any,
 		})

 		await eventHandlerWithMock({
@@ -426,7 +433,7 @@ describe("createEventHandler - event forwarding", () => {
 				type: "session.deleted",
 				properties: { info: { id: sessionID } },
 			},
-		})
+		} as any)

 		//#then
 		expect(forwardedEvents.length).toBe(1)
@@ -435,3 +442,146 @@ describe("createEventHandler - event forwarding", () => {
 		expect(deletedSessions).toEqual([sessionID])
 	})
 })
+
+describe("createEventHandler - retry dedupe lifecycle", () => {
+	it("re-handles same retry key after session recovers to idle status", async () => {
+		//#given
+		const sessionID = "ses_retry_recovery_rearm"
+		setMainSession(sessionID)
+		clearPendingModelFallback(sessionID)
+
+		const abortCalls: string[] = []
+		const promptCalls: string[] = []
+		const modelFallback = createModelFallbackHook()
+
+		const eventHandler = createEventHandler({
+			ctx: {
+				directory: "/tmp",
+				client: {
+					session: {
+						abort: async ({ path }: { path: { id: string } }) => {
+							abortCalls.push(path.id)
+							return {}
+						},
+						prompt: async ({ path }: { path: { id: string } }) => {
+							promptCalls.push(path.id)
+							return {}
+						},
+					},
+				},
+			} as any,
+			pluginConfig: {} as any,
+			firstMessageVariantGate: {
+				markSessionCreated: () => {},
+				clear: () => {},
+			},
+			managers: {
+				tmuxSessionManager: {
+					onSessionCreated: async () => {},
+					onSessionDeleted: async () => {},
+				},
+				skillMcpManager: {
+					disconnectSession: async () => {},
+				},
+			} as any,
+			hooks: {
+				modelFallback,
+				stopContinuationGuard: { isStopped: () => false },
+			} as any,
+		})
+
+		const chatMessageHandler = createChatMessageHandler({
+			ctx: {
+				client: {
+					tui: {
+						showToast: async () => ({}),
+					},
+				},
+			} as any,
+			pluginConfig: {} as any,
+			firstMessageVariantGate: {
+				shouldOverride: () => false,
+				markApplied: () => {},
+			},
+			hooks: {
+				modelFallback,
+				stopContinuationGuard: null,
+				keywordDetector: null,
+				claudeCodeHooks: null,
+				autoSlashCommand: null,
+				startWork: null,
+				ralphLoop: null,
+			} as any,
+		})
+
+		const retryStatus = {
+			type: "retry",
+			attempt: 1,
+			message: "All credentials for model claude-opus-4-6-thinking are cooling down [retrying in 7m 56s attempt #1]",
+			next: 476,
+		} as const
+
+		await eventHandler({
+			event: {
+				type: "message.updated",
+				properties: {
+					info: {
+						id: "msg_user_retry_rearm",
+						sessionID,
+						role: "user",
+						modelID: "claude-opus-4-6-thinking",
+						providerID: "anthropic",
+						agent: "Sisyphus (Ultraworker)",
+					},
+				},
+			},
+		} as any)
+
+		//#when - first retry key is handled
+		await eventHandler({
+			event: {
+				type: "session.status",
+				properties: {
+					sessionID,
+					status: retryStatus,
+				},
+			},
+		} as any)
+
+		const firstOutput = { message: {}, parts: [] as Array<{ type: string; text?: string }> }
+		await chatMessageHandler(
+			{
+				sessionID,
+				agent: "sisyphus",
+				model: { providerID: "anthropic", modelID: "claude-opus-4-6-thinking" },
+			},
+			firstOutput,
+		)
+
+		//#when - session recovers to non-retry idle state
+		await eventHandler({
+			event: {
+				type: "session.status",
+				properties: {
+					sessionID,
+					status: { type: "idle" },
+				},
+			},
+		} as any)
+
+		//#when - same retry key appears again after recovery
+		await eventHandler({
+			event: {
+				type: "session.status",
+				properties: {
+					sessionID,
+					status: retryStatus,
+				},
+			},
+		} as any)
+
+		//#then
+		expect(abortCalls).toEqual([sessionID, sessionID])
+		expect(promptCalls).toEqual([sessionID, sessionID])
+	})
+})
--- a/src/plugin/event.ts
+++ b/src/plugin/event.ts
@@ -421,6 +421,12 @@ export function createEventHandler(args: {
      const sessionID = props?.sessionID as string | undefined;
      const status = props?.status as { type?: string; attempt?: number; message?: string; next?: number } | undefined;

+      // Retry dedupe lifecycle: set key when a retry status is handled, clear it after recovery
+      // (non-retry idle) so future failures with the same key can trigger fallback again.
+      if (sessionID && status?.type === "idle") {
+        lastHandledRetryStatusKey.delete(sessionID);
+      }
+
      if (sessionID && status?.type === "retry" && isModelFallbackEnabled && !isRuntimeFallbackEnabled) {
        try {
          const retryMessage = typeof status.message === "string" ? status.message : "";
--- a/src/plugin/hooks/create-tool-guard-hooks.ts
+++ b/src/plugin/hooks/create-tool-guard-hooks.ts
@@ -14,6 +14,7 @@ import {
  createHashlineReadEnhancerHook,
  createReadImageResizerHook,
  createJsonErrorRecoveryHook,
+  createTodoDescriptionOverrideHook,
 } from "../../hooks"
 import {
  getOpenCodeVersion,
@@ -35,6 +36,7 @@ export type ToolGuardHooks = {
  hashlineReadEnhancer: ReturnType<typeof createHashlineReadEnhancerHook> | null
  jsonErrorRecovery: ReturnType<typeof createJsonErrorRecoveryHook> | null
  readImageResizer: ReturnType<typeof createReadImageResizerHook> | null
+  todoDescriptionOverride: ReturnType<typeof createTodoDescriptionOverrideHook> | null
 }

 export function createToolGuardHooks(args: {
@@ -111,6 +113,10 @@ export function createToolGuardHooks(args: {
    ? safeHook("read-image-resizer", () => createReadImageResizerHook(ctx))
    : null

+  const todoDescriptionOverride = isHookEnabled("todo-description-override")
+    ? safeHook("todo-description-override", () => createTodoDescriptionOverrideHook())
+    : null
+
  return {
    commentChecker,
    toolOutputTruncator,
@@ -123,5 +129,6 @@ export function createToolGuardHooks(args: {
    hashlineReadEnhancer,
    jsonErrorRecovery,
    readImageResizer,
+    todoDescriptionOverride,
  }
 }
--- a/src/plugin/tool-execute-after.ts
+++ b/src/plugin/tool-execute-after.ts
@@ -48,22 +48,50 @@ export function createToolExecuteAfterHandler(args: {
      const prompt = typeof output.metadata?.prompt === "string" ? output.metadata.prompt : undefined
      const verificationAttemptId = prompt?.match(VERIFICATION_ATTEMPT_PATTERN)?.[1]?.trim()
      const loopState = directory ? readState(directory) : null
-
-      if (
+      const isVerificationContext =
        agent === "oracle"
-        && sessionId
-        && verificationAttemptId
-        && directory
+        && !!sessionId
+        && !!directory
        && loopState?.active === true
        && loopState.ultrawork === true
        && loopState.verification_pending === true
        && loopState.session_id === input.sessionID
+
+      log("[tool-execute-after] ULW verification tracking check", {
+        tool: input.tool,
+        agent,
+        parentSessionID: input.sessionID,
+        oracleSessionID: sessionId,
+        hasPromptInMetadata: typeof prompt === "string",
+        extractedVerificationAttemptId: verificationAttemptId,
+      })
+
+      if (
+        isVerificationContext
+        && verificationAttemptId
        && loopState.verification_attempt_id === verificationAttemptId
      ) {
        writeState(directory, {
          ...loopState,
          verification_session_id: sessionId,
        })
+        log("[tool-execute-after] Stored oracle verification session via attempt match", {
+          parentSessionID: input.sessionID,
+          oracleSessionID: sessionId,
+          verificationAttemptId,
+        })
+      } else if (isVerificationContext && !verificationAttemptId) {
+        writeState(directory, {
+          ...loopState,
+          verification_session_id: sessionId,
+        })
+        log("[tool-execute-after] Fallback: stored oracle verification session without attempt match", {
+          parentSessionID: input.sessionID,
+          oracleSessionID: sessionId,
+          hasPromptInMetadata: typeof prompt === "string",
+          expectedAttemptId: loopState.verification_attempt_id,
+          extractedAttemptId: verificationAttemptId,
+        })
      }
    }

--- a/src/plugin/tool-execute-before.ts
+++ b/src/plugin/tool-execute-before.ts
@@ -79,6 +79,12 @@ export function createToolExecuteBeforeHandler(args: {

      if (shouldInjectOracleVerification) {
        const verificationAttemptId = randomUUID()
+        log("[tool-execute-before] Injecting ULW oracle verification attempt", {
+          sessionID: input.sessionID,
+          callID: input.callID,
+          verificationAttemptId,
+          loopSessionID: loopState.session_id,
+        })
        writeState(ctx.directory, {
          ...loopState,
          verification_attempt_id: verificationAttemptId,
--- a/src/plugin/tool-execute-before.ulw-loop.test.ts
+++ b/src/plugin/tool-execute-before.ulw-loop.test.ts
@@ -19,6 +19,27 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 		}
 	}

+	function createOracleTaskArgs(prompt: string): Record<string, unknown> {
+		return {
+			subagent_type: "oracle",
+			run_in_background: true,
+			prompt,
+		}
+	}
+
+	function createSyncTaskMetadata(
+		args: Record<string, unknown>,
+		sessionId: string,
+	): Record<string, unknown> {
+		return {
+			prompt: args.prompt,
+			agent: "oracle",
+			run_in_background: args.run_in_background,
+			sessionId,
+			sync: true,
+		}
+	}
+
 	test("#given ulw loop is awaiting verification #when oracle task runs #then oracle prompt is enforced and sync", async () => {
 		const directory = join(tmpdir(), `tool-before-ulw-${Date.now()}`)
 		mkdirSync(directory, { recursive: true })
@@ -38,13 +59,7 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 			ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
 			hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
 		})
-		const output = {
-			args: {
-				subagent_type: "oracle",
-				run_in_background: true,
-				prompt: "Check it",
-			} as Record<string, unknown>,
-		}
+		const output = { args: createOracleTaskArgs("Check it") }

 		await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)

@@ -64,13 +79,7 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 			ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
 			hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
 		})
-		const output = {
-			args: {
-				subagent_type: "oracle",
-				run_in_background: true,
-				prompt: "Check it",
-			} as Record<string, unknown>,
-		}
+		const output = { args: createOracleTaskArgs("Check it") }

 		await handler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, output)

@@ -80,7 +89,7 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 		rmSync(directory, { recursive: true, force: true })
 	})

-	test("#given ulw loop is awaiting verification #when oracle task finishes #then oracle session id is stored", async () => {
+	test("#given ulw loop is awaiting verification #when oracle sync task metadata is persisted #then oracle session id is stored", async () => {
 		const directory = join(tmpdir(), `tool-after-ulw-${Date.now()}`)
 		mkdirSync(directory, { recursive: true })
 		writeState(directory, {
@@ -99,14 +108,44 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 			ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteBeforeHandler>[0]["ctx"],
 			hooks: {} as Parameters<typeof createToolExecuteBeforeHandler>[0]["hooks"],
 		})
-		const beforeOutput = {
-			args: {
-				subagent_type: "oracle",
-				run_in_background: true,
-				prompt: "Check it",
-			} as Record<string, unknown>,
-		}
+		const beforeOutput = { args: createOracleTaskArgs("Check it") }
 		await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, beforeOutput)
+		const metadataFromSyncTask = createSyncTaskMetadata(beforeOutput.args, "ses-oracle")
+
+		const handler = createToolExecuteAfterHandler({
+			ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
+			hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
+		})
+
+		await handler(
+			{ tool: "task", sessionID: "ses-main", callID: "call-1" },
+			{
+				title: "oracle task",
+				output: "done",
+				metadata: metadataFromSyncTask,
+			},
+		)
+
+		expect(readState(directory)?.verification_session_id).toBe("ses-oracle")
+
+		clearState(directory)
+		rmSync(directory, { recursive: true, force: true })
+	})
+
+	test("#given ulw loop is awaiting verification #when oracle metadata prompt is missing #then oracle session fallback is stored", async () => {
+		const directory = join(tmpdir(), `tool-after-ulw-fallback-${Date.now()}`)
+		mkdirSync(directory, { recursive: true })
+		writeState(directory, {
+			active: true,
+			iteration: 3,
+			completion_promise: ULTRAWORK_VERIFICATION_PROMISE,
+			initial_completion_promise: "DONE",
+			started_at: new Date().toISOString(),
+			prompt: "Ship feature",
+			session_id: "ses-main",
+			ultrawork: true,
+			verification_pending: true,
+		})

 		const handler = createToolExecuteAfterHandler({
 			ctx: createCtx(directory) as unknown as Parameters<typeof createToolExecuteAfterHandler>[0]["ctx"],
@@ -120,13 +159,13 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 				output: "done",
 				metadata: {
 					agent: "oracle",
-					prompt: String(beforeOutput.args.prompt),
-					sessionId: "ses-oracle",
+					sessionId: "ses-oracle-fallback",
+					sync: true,
 				},
 			},
 		)

-		expect(readState(directory)?.verification_session_id).toBe("ses-oracle")
+		expect(readState(directory)?.verification_session_id).toBe("ses-oracle-fallback")

 		clearState(directory)
 		rmSync(directory, { recursive: true, force: true })
@@ -156,23 +195,11 @@ describe("tool.execute.before ultrawork oracle verification", () => {
 			hooks: {} as Parameters<typeof createToolExecuteAfterHandler>[0]["hooks"],
 		})

-		const firstOutput = {
-			args: {
-				subagent_type: "oracle",
-				run_in_background: true,
-				prompt: "Check it",
-			} as Record<string, unknown>,
-		}
+		const firstOutput = { args: createOracleTaskArgs("Check it") }
 		await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-1" }, firstOutput)
 		const firstAttemptId = readState(directory)?.verification_attempt_id

-		const secondOutput = {
-			args: {
-				subagent_type: "oracle",
-				run_in_background: true,
-				prompt: "Check it again",
-			} as Record<string, unknown>,
-		}
+		const secondOutput = { args: createOracleTaskArgs("Check it again") }
 		await beforeHandler({ tool: "task", sessionID: "ses-main", callID: "call-2" }, secondOutput)
 		const secondAttemptId = readState(directory)?.verification_attempt_id

--- a/src/plugin/ultrawork-model-override.ts
+++ b/src/plugin/ultrawork-model-override.ts
@@ -110,12 +110,16 @@ function applyResolvedUltraworkOverride(args: {
  if (!override.providerID || !override.modelID) return

  const targetModel = { providerID: override.providerID, modelID: override.modelID }
+  const messageId = output.message["id"] as string | undefined
  if (isSameModel(output.message.model, targetModel)) {
+    if (validatedVariant && messageId) {
+      scheduleDeferredModelOverride(messageId, targetModel, validatedVariant)
+      log(`[ultrawork-model-override] Persist validated variant for active model: ${override.modelID}`)
+      return
+    }
    log(`[ultrawork-model-override] Skip override; target model already active: ${override.modelID}`)
    return
  }
-
-  const messageId = output.message["id"] as string | undefined
  if (!messageId) {
    log("[ultrawork-model-override] No message ID found, falling back to direct mutation")
    output.message.model = targetModel
--- a/src/shared/connected-providers-cache.test.ts
+++ b/src/shared/connected-providers-cache.test.ts
@@ -1,45 +1,30 @@
 /// <reference types="bun-types" />

-import { beforeAll, beforeEach, afterEach, describe, expect, mock, test } from "bun:test"
+import { beforeEach, afterEach, describe, expect, test } from "bun:test"

 import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs"
 import { tmpdir } from "node:os"
 import { join } from "node:path"
-import * as dataPath from "./data-path"
+import {
+	createConnectedProvidersCacheStore,
+} from "./connected-providers-cache"

+let fakeUserCacheRoot = ""
 let testCacheDir = ""
-let moduleImportCounter = 0
-
-const getOmoOpenCodeCacheDirMock = mock(() => testCacheDir)
-
-let updateConnectedProvidersCache: typeof import("./connected-providers-cache").updateConnectedProvidersCache
-let readProviderModelsCache: typeof import("./connected-providers-cache").readProviderModelsCache
-
-async function prepareConnectedProvidersCacheTestModule(): Promise<void> {
-	testCacheDir = mkdtempSync(join(tmpdir(), "connected-providers-cache-test-"))
-	getOmoOpenCodeCacheDirMock.mockClear()
-	mock.module("./data-path", () => ({
-		getOmoOpenCodeCacheDir: getOmoOpenCodeCacheDirMock,
-	}))
-	moduleImportCounter += 1
-	;({ updateConnectedProvidersCache, readProviderModelsCache } = await import(`./connected-providers-cache?test=${moduleImportCounter}`))
-}
+let testCacheStore: ReturnType<typeof createConnectedProvidersCacheStore>

 describe("updateConnectedProvidersCache", () => {
-	beforeAll(() => {
-		mock.restore()
-	})
-
-	beforeEach(async () => {
-		mock.restore()
-		await prepareConnectedProvidersCacheTestModule()
+	beforeEach(() => {
+		fakeUserCacheRoot = mkdtempSync(join(tmpdir(), "connected-providers-user-cache-"))
+		testCacheDir = join(fakeUserCacheRoot, "oh-my-opencode")
+		testCacheStore = createConnectedProvidersCacheStore(() => testCacheDir)
 	})

 	afterEach(() => {
-		mock.restore()
-		if (existsSync(testCacheDir)) {
-			rmSync(testCacheDir, { recursive: true, force: true })
+		if (existsSync(fakeUserCacheRoot)) {
+			rmSync(fakeUserCacheRoot, { recursive: true, force: true })
 		}
+		fakeUserCacheRoot = ""
 		testCacheDir = ""
 	})

@@ -76,10 +61,10 @@ describe("updateConnectedProvidersCache", () => {
 		}

 		//#when
-		await updateConnectedProvidersCache(mockClient)
+		await testCacheStore.updateConnectedProvidersCache(mockClient)

 		//#then
-		const cache = readProviderModelsCache()
+		const cache = testCacheStore.readProviderModelsCache()
 		expect(cache).not.toBeNull()
 		expect(cache!.connected).toEqual(["openai", "anthropic"])
 		expect(cache!.models).toEqual({
@@ -109,10 +94,10 @@ describe("updateConnectedProvidersCache", () => {
 		}

 		//#when
-		await updateConnectedProvidersCache(mockClient)
+		await testCacheStore.updateConnectedProvidersCache(mockClient)

 		//#then
-		const cache = readProviderModelsCache()
+		const cache = testCacheStore.readProviderModelsCache()
 		expect(cache).not.toBeNull()
 		expect(cache!.models).toEqual({})
 	})
@@ -130,10 +115,10 @@ describe("updateConnectedProvidersCache", () => {
 		}

 		//#when
-		await updateConnectedProvidersCache(mockClient)
+		await testCacheStore.updateConnectedProvidersCache(mockClient)

 		//#then
-		const cache = readProviderModelsCache()
+		const cache = testCacheStore.readProviderModelsCache()
 		expect(cache).not.toBeNull()
 		expect(cache!.models).toEqual({})
 	})
@@ -143,25 +128,44 @@ describe("updateConnectedProvidersCache", () => {
 		const mockClient = {}

 		//#when
-		await updateConnectedProvidersCache(mockClient)
+		await testCacheStore.updateConnectedProvidersCache(mockClient)

 		//#then
-		const cache = readProviderModelsCache()
+		const cache = testCacheStore.readProviderModelsCache()
 		expect(cache).toBeNull()
 	})

-	test("does not remove the user's real cache directory during test setup", async () => {
+	test("does not remove unrelated files in the cache directory", async () => {
 		//#given
-		const realCacheDir = join(dataPath.getCacheDir(), "oh-my-opencode")
+		const realCacheDir = join(fakeUserCacheRoot, "oh-my-opencode")
 		const sentinelPath = join(realCacheDir, "connected-providers-cache.test-sentinel.json")
 		mkdirSync(realCacheDir, { recursive: true })
 		writeFileSync(sentinelPath, JSON.stringify({ keep: true }))

+		const mockClient = {
+			provider: {
+				list: async () => ({
+					data: {
+						connected: ["openai"],
+						all: [
+							{
+								id: "openai",
+								models: {
+									"gpt-5.4": { id: "gpt-5.4" },
+								},
+							},
+						],
+					},
+				}),
+			},
+		}
+
 		try {
 			//#when
-			await prepareConnectedProvidersCacheTestModule()
+			await testCacheStore.updateConnectedProvidersCache(mockClient)

 			//#then
+			expect(testCacheStore.readConnectedProvidersCache()).toEqual(["openai"])
 			expect(existsSync(sentinelPath)).toBe(true)
 			expect(readFileSync(sentinelPath, "utf-8")).toBe(JSON.stringify({ keep: true }))
 		} finally {
--- a/src/shared/connected-providers-cache.ts
+++ b/src/shared/connected-providers-cache.ts
@@ -25,172 +25,190 @@ interface ProviderModelsCache {
 	updatedAt: string
 }

-function getCacheFilePath(filename: string): string {
-	return join(dataPath.getOmoOpenCodeCacheDir(), filename)
-}
-
-function ensureCacheDir(): void {
-	const cacheDir = dataPath.getOmoOpenCodeCacheDir()
-	if (!existsSync(cacheDir)) {
-		mkdirSync(cacheDir, { recursive: true })
-	}
-}
-
-/**
- * Read the connected providers cache.
- * Returns the list of connected provider IDs, or null if cache doesn't exist.
- */
-export function readConnectedProvidersCache(): string[] | null {
-	const cacheFile = getCacheFilePath(CONNECTED_PROVIDERS_CACHE_FILE)
-
-	if (!existsSync(cacheFile)) {
-		log("[connected-providers-cache] Cache file not found", { cacheFile })
-		return null
+export function createConnectedProvidersCacheStore(
+	getCacheDir: () => string = dataPath.getOmoOpenCodeCacheDir
+) {
+	function getCacheFilePath(filename: string): string {
+		return join(getCacheDir(), filename)
 	}

-	try {
-		const content = readFileSync(cacheFile, "utf-8")
-		const data = JSON.parse(content) as ConnectedProvidersCache
-		log("[connected-providers-cache] Read cache", { count: data.connected.length, updatedAt: data.updatedAt })
-		return data.connected
-	} catch (err) {
-		log("[connected-providers-cache] Error reading cache", { error: String(err) })
-		return null
-	}
-}
+	let memConnected: string[] | null | undefined
+	let memProviderModels: ProviderModelsCache | null | undefined

-/**
- * Check if connected providers cache exists.
- */
-export function hasConnectedProvidersCache(): boolean {
-	const cacheFile = getCacheFilePath(CONNECTED_PROVIDERS_CACHE_FILE)
-	return existsSync(cacheFile)
-}
-
-/**
- * Write the connected providers cache.
- */
-function writeConnectedProvidersCache(connected: string[]): void {
-	ensureCacheDir()
-	const cacheFile = getCacheFilePath(CONNECTED_PROVIDERS_CACHE_FILE)
-
-	const data: ConnectedProvidersCache = {
-		connected,
-		updatedAt: new Date().toISOString(),
+	function ensureCacheDir(): void {
+		const cacheDir = getCacheDir()
+		if (!existsSync(cacheDir)) {
+			mkdirSync(cacheDir, { recursive: true })
+		}
 	}

-	try {
-		writeFileSync(cacheFile, JSON.stringify(data, null, 2))
-		log("[connected-providers-cache] Cache written", { count: connected.length })
-	} catch (err) {
-		log("[connected-providers-cache] Error writing cache", { error: String(err) })
-	}
-}
+	function readConnectedProvidersCache(): string[] | null {
+		if (memConnected !== undefined) return memConnected
+		const cacheFile = getCacheFilePath(CONNECTED_PROVIDERS_CACHE_FILE)

-/**
- * Read the provider-models cache.
- * Returns the cache data, or null if cache doesn't exist.
- */
-export function readProviderModelsCache(): ProviderModelsCache | null {
-	const cacheFile = getCacheFilePath(PROVIDER_MODELS_CACHE_FILE)
-
-	if (!existsSync(cacheFile)) {
-		log("[connected-providers-cache] Provider-models cache file not found", { cacheFile })
-		return null
-	}
-
-	try {
-		const content = readFileSync(cacheFile, "utf-8")
-		const data = JSON.parse(content) as ProviderModelsCache
-		log("[connected-providers-cache] Read provider-models cache", { 
-			providerCount: Object.keys(data.models).length, 
-			updatedAt: data.updatedAt 
-		})
-		return data
-	} catch (err) {
-		log("[connected-providers-cache] Error reading provider-models cache", { error: String(err) })
-		return null
-	}
-}
-
-/**
- * Check if provider-models cache exists.
- */
-export function hasProviderModelsCache(): boolean {
-	const cacheFile = getCacheFilePath(PROVIDER_MODELS_CACHE_FILE)
-	return existsSync(cacheFile)
-}
-
-/**
- * Write the provider-models cache.
- */
-export function writeProviderModelsCache(data: { models: Record<string, string[]>; connected: string[] }): void {
-	ensureCacheDir()
-	const cacheFile = getCacheFilePath(PROVIDER_MODELS_CACHE_FILE)
-
-	const cacheData: ProviderModelsCache = {
-		...data,
-		updatedAt: new Date().toISOString(),
-	}
-
-	try {
-		writeFileSync(cacheFile, JSON.stringify(cacheData, null, 2))
-		log("[connected-providers-cache] Provider-models cache written", { 
-			providerCount: Object.keys(data.models).length 
-		})
-	} catch (err) {
-		log("[connected-providers-cache] Error writing provider-models cache", { error: String(err) })
-	}
-}
-
-/**
- * Update the connected providers cache by fetching from the client.
- * Also updates the provider-models cache with model lists per provider.
- */
-export async function updateConnectedProvidersCache(client: {
-	provider?: {
-		list?: () => Promise<{
-			data?: {
-				connected?: string[]
-				all?: Array<{ id: string; models?: Record<string, unknown> }>
-			}
-		}>
-	}
-}): Promise<void> {
-	if (!client?.provider?.list) {
-		log("[connected-providers-cache] client.provider.list not available")
-		return
-	}
-
-	try {
-		const result = await client.provider.list()
-		const connected = result.data?.connected ?? []
-		log("[connected-providers-cache] Fetched connected providers", { count: connected.length, providers: connected })
-
-		writeConnectedProvidersCache(connected)
-
-		const modelsByProvider: Record<string, string[]> = {}
-		const allProviders = result.data?.all ?? []
-
-		for (const provider of allProviders) {
-			if (provider.models) {
-				const modelIds = Object.keys(provider.models)
-				if (modelIds.length > 0) {
-					modelsByProvider[provider.id] = modelIds
-				}
-			}
+		if (!existsSync(cacheFile)) {
+			log("[connected-providers-cache] Cache file not found", { cacheFile })
+			memConnected = null
+			return null
 		}

-		log("[connected-providers-cache] Extracted models from provider list", {
-			providerCount: Object.keys(modelsByProvider).length,
-			totalModels: Object.values(modelsByProvider).reduce((sum, ids) => sum + ids.length, 0),
-		})
+		try {
+			const content = readFileSync(cacheFile, "utf-8")
+			const data = JSON.parse(content) as ConnectedProvidersCache
+			log("[connected-providers-cache] Read cache", { count: data.connected.length, updatedAt: data.updatedAt })
+			memConnected = data.connected
+			return data.connected
+		} catch (err) {
+			log("[connected-providers-cache] Error reading cache", { error: String(err) })
+			memConnected = null
+			return null
+		}
+	}

-		writeProviderModelsCache({
-			models: modelsByProvider,
+	function hasConnectedProvidersCache(): boolean {
+		const cacheFile = getCacheFilePath(CONNECTED_PROVIDERS_CACHE_FILE)
+		return existsSync(cacheFile)
+	}
+
+	function writeConnectedProvidersCache(connected: string[]): void {
+		ensureCacheDir()
+		const cacheFile = getCacheFilePath(CONNECTED_PROVIDERS_CACHE_FILE)
+
+		const data: ConnectedProvidersCache = {
 			connected,
-		})
-	} catch (err) {
-		log("[connected-providers-cache] Error updating cache", { error: String(err) })
+			updatedAt: new Date().toISOString(),
+		}
+
+		try {
+			writeFileSync(cacheFile, JSON.stringify(data, null, 2))
+			memConnected = connected
+			log("[connected-providers-cache] Cache written", { count: connected.length })
+		} catch (err) {
+			log("[connected-providers-cache] Error writing cache", { error: String(err) })
+		}
+	}
+
+	function readProviderModelsCache(): ProviderModelsCache | null {
+		if (memProviderModels !== undefined) return memProviderModels
+		const cacheFile = getCacheFilePath(PROVIDER_MODELS_CACHE_FILE)
+
+		if (!existsSync(cacheFile)) {
+			log("[connected-providers-cache] Provider-models cache file not found", { cacheFile })
+			memProviderModels = null
+			return null
+		}
+
+		try {
+			const content = readFileSync(cacheFile, "utf-8")
+			const data = JSON.parse(content) as ProviderModelsCache
+			log("[connected-providers-cache] Read provider-models cache", {
+				providerCount: Object.keys(data.models).length,
+				updatedAt: data.updatedAt,
+			})
+			memProviderModels = data
+			return data
+		} catch (err) {
+			log("[connected-providers-cache] Error reading provider-models cache", { error: String(err) })
+			memProviderModels = null
+			return null
+		}
+	}
+
+	function hasProviderModelsCache(): boolean {
+		const cacheFile = getCacheFilePath(PROVIDER_MODELS_CACHE_FILE)
+		return existsSync(cacheFile)
+	}
+
+	function writeProviderModelsCache(data: { models: Record<string, string[]>; connected: string[] }): void {
+		ensureCacheDir()
+		const cacheFile = getCacheFilePath(PROVIDER_MODELS_CACHE_FILE)
+
+		const cacheData: ProviderModelsCache = {
+			...data,
+			updatedAt: new Date().toISOString(),
+		}
+
+		try {
+			writeFileSync(cacheFile, JSON.stringify(cacheData, null, 2))
+			memProviderModels = cacheData
+			log("[connected-providers-cache] Provider-models cache written", {
+				providerCount: Object.keys(data.models).length,
+			})
+		} catch (err) {
+			log("[connected-providers-cache] Error writing provider-models cache", { error: String(err) })
+		}
+	}
+
+	async function updateConnectedProvidersCache(client: {
+		provider?: {
+			list?: () => Promise<{
+				data?: {
+					connected?: string[]
+					all?: Array<{ id: string; models?: Record<string, unknown> }>
+				}
+			}>
+		}
+	}): Promise<void> {
+		if (!client?.provider?.list) {
+			log("[connected-providers-cache] client.provider.list not available")
+			return
+		}
+
+		try {
+			const result = await client.provider.list()
+			const connected = result.data?.connected ?? []
+			log("[connected-providers-cache] Fetched connected providers", {
+				count: connected.length,
+				providers: connected,
+			})
+
+			writeConnectedProvidersCache(connected)
+
+			const modelsByProvider: Record<string, string[]> = {}
+			const allProviders = result.data?.all ?? []
+
+			for (const provider of allProviders) {
+				if (provider.models) {
+					const modelIds = Object.keys(provider.models)
+					if (modelIds.length > 0) {
+						modelsByProvider[provider.id] = modelIds
+					}
+				}
+			}
+
+			log("[connected-providers-cache] Extracted models from provider list", {
+				providerCount: Object.keys(modelsByProvider).length,
+				totalModels: Object.values(modelsByProvider).reduce((sum, ids) => sum + ids.length, 0),
+			})
+
+			writeProviderModelsCache({
+				models: modelsByProvider,
+				connected,
+			})
+		} catch (err) {
+			log("[connected-providers-cache] Error updating cache", { error: String(err) })
+		}
+	}
+
+	return {
+		readConnectedProvidersCache,
+		hasConnectedProvidersCache,
+		readProviderModelsCache,
+		hasProviderModelsCache,
+		writeProviderModelsCache,
+		updateConnectedProvidersCache,
 	}
 }
+
+const defaultConnectedProvidersCacheStore = createConnectedProvidersCacheStore(
+	() => dataPath.getOmoOpenCodeCacheDir()
+)
+
+export const {
+	readConnectedProvidersCache,
+	hasConnectedProvidersCache,
+	readProviderModelsCache,
+	hasProviderModelsCache,
+	writeProviderModelsCache,
+	updateConnectedProvidersCache,
+} = defaultConnectedProvidersCacheStore
--- a/src/shared/file-reference-resolver.ts
+++ b/src/shared/file-reference-resolver.ts
@@ -74,7 +74,7 @@ export async function resolveFileReferencesInText(

  let resolved = text
  for (const [pattern, replacement] of replacements.entries()) {
-    resolved = resolved.split(pattern).join(replacement)
+    resolved = resolved.replaceAll(pattern, replacement)
  }

  if (findFileReferences(resolved).length > 0 && depth + 1 < maxDepth) {
--- a/src/shared/logger.ts
+++ b/src/shared/logger.ts
@@ -1,16 +1,42 @@
-// Shared logging utility for the plugin
-
 import * as fs from "fs"
 import * as os from "os"
 import * as path from "path"

 const logFile = path.join(os.tmpdir(), "oh-my-opencode.log")

+let buffer: string[] = []
+let flushTimer: ReturnType<typeof setTimeout> | null = null
+const FLUSH_INTERVAL_MS = 500
+const BUFFER_SIZE_LIMIT = 50
+
+function flush(): void {
+  if (buffer.length === 0) return
+  const data = buffer.join("")
+  buffer = []
+  try {
+    fs.appendFileSync(logFile, data)
+  } catch {
+  }
+}
+
+function scheduleFlush(): void {
+  if (flushTimer) return
+  flushTimer = setTimeout(() => {
+    flushTimer = null
+    flush()
+  }, FLUSH_INTERVAL_MS)
+}
+
 export function log(message: string, data?: unknown): void {
  try {
    const timestamp = new Date().toISOString()
    const logEntry = `[${timestamp}] ${message} ${data ? JSON.stringify(data) : ""}\n`
-    fs.appendFileSync(logFile, logEntry)
+    buffer.push(logEntry)
+    if (buffer.length >= BUFFER_SIZE_LIMIT) {
+      flush()
+    } else {
+      scheduleFlush()
+    }
  } catch {
  }
 }
--- a/src/shared/pattern-matcher.ts
+++ b/src/shared/pattern-matcher.ts
@@ -9,6 +9,8 @@ function escapeRegexExceptAsterisk(str: string): string {
  return str.replace(/[.+?^${}()|[\]\\]/g, "\\$&")
 }

+const regexCache = new Map<string, RegExp>()
+
 export function matchesToolMatcher(toolName: string, matcher: string): boolean {
  if (!matcher) {
    return true
@@ -17,8 +19,12 @@ export function matchesToolMatcher(toolName: string, matcher: string): boolean {
  return patterns.some((p) => {
    if (p.includes("*")) {
      // First escape regex special chars (except *), then convert * to .*
-      const escaped = escapeRegexExceptAsterisk(p)
-      const regex = new RegExp(`^${escaped.replace(/\*/g, ".*")}$`, "i")
+      let regex = regexCache.get(p)
+      if (!regex) {
+        const escaped = escapeRegexExceptAsterisk(p)
+        regex = new RegExp(`^${escaped.replace(/\*/g, ".*")}$`, "i")
+        regexCache.set(p, regex)
+      }
      return regex.test(toolName)
    }
    return p.toLowerCase() === toolName.toLowerCase()
--- a/src/shared/plugin-identity.ts
+++ b/src/shared/plugin-identity.ts
@@ -1,4 +1,5 @@
 export const PLUGIN_NAME = "oh-my-opencode"
+export const LEGACY_PLUGIN_NAME = "oh-my-openagent"
 export const CONFIG_BASENAME = "oh-my-opencode"
 export const LOG_FILENAME = "oh-my-opencode.log"
 export const CACHE_DIR_NAME = "oh-my-opencode"
--- a/src/shared/port-utils.test.ts
+++ b/src/shared/port-utils.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, beforeAll, afterAll } from "bun:test"
+import { afterEach, beforeEach, describe, expect, it, spyOn } from "bun:test"
 import {
  isPortAvailable,
  findAvailablePort,
@@ -6,96 +6,283 @@ import {
  DEFAULT_SERVER_PORT,
 } from "./port-utils"

+const HOSTNAME = "127.0.0.1"
+const REAL_PORT_SEARCH_WINDOW = 200
+
+function supportsRealSocketBinding(): boolean {
+  try {
+    const server = Bun.serve({
+      port: 0,
+      hostname: HOSTNAME,
+      fetch: () => new Response("probe"),
+    })
+    server.stop(true)
+    return true
+  } catch {
+    return false
+  }
+}
+
+const canBindRealSockets = supportsRealSocketBinding()
+
 describe("port-utils", () => {
-  describe("isPortAvailable", () => {
-    it("#given unused port #when checking availability #then returns true", async () => {
-      const port = 59999
-      const result = await isPortAvailable(port)
-      expect(result).toBe(true)
-    })
-
-    it("#given port in use #when checking availability #then returns false", async () => {
-      const port = 59998
-      const blocker = Bun.serve({
+  if (canBindRealSockets) {
+    function startRealBlocker(port: number = 0) {
+      return Bun.serve({
        port,
-        hostname: "127.0.0.1",
+        hostname: HOSTNAME,
        fetch: () => new Response("blocked"),
      })
+    }

-      try {
-        const result = await isPortAvailable(port)
-        expect(result).toBe(false)
-      } finally {
-        blocker.stop(true)
+    async function findContiguousAvailableStart(length: number): Promise<number> {
+      const probe = startRealBlocker()
+      const seedPort = probe.port
+      probe.stop(true)
+
+      for (let candidate = seedPort; candidate < seedPort + REAL_PORT_SEARCH_WINDOW; candidate++) {
+        const checks = await Promise.all(
+          Array.from({ length }, async (_, offset) => isPortAvailable(candidate + offset, HOSTNAME))
+        )
+        if (checks.every(Boolean)) {
+          return candidate
+        }
      }
-    })
-  })

-  describe("findAvailablePort", () => {
-    it("#given start port available #when finding port #then returns start port", async () => {
-      const startPort = 59997
-      const result = await findAvailablePort(startPort)
-      expect(result).toBe(startPort)
-    })
+      throw new Error(`Could not find ${length} contiguous available ports`)
+    }

-    it("#given start port blocked #when finding port #then returns next available", async () => {
-      const startPort = 59996
-      const blocker = Bun.serve({
-        port: startPort,
-        hostname: "127.0.0.1",
-        fetch: () => new Response("blocked"),
+    describe("with real sockets", () => {
+      describe("isPortAvailable", () => {
+        it("#given unused port #when checking availability #then returns true", async () => {
+          const blocker = startRealBlocker()
+          const port = blocker.port
+          blocker.stop(true)
+
+          const result = await isPortAvailable(port)
+          expect(result).toBe(true)
+        })
+
+        it("#given port in use #when checking availability #then returns false", async () => {
+          const blocker = startRealBlocker()
+          const port = blocker.port
+
+          try {
+            const result = await isPortAvailable(port)
+            expect(result).toBe(false)
+          } finally {
+            blocker.stop(true)
+          }
+        })
      })

-      try {
-        const result = await findAvailablePort(startPort)
-        expect(result).toBe(startPort + 1)
-      } finally {
-        blocker.stop(true)
-      }
-    })
+      describe("findAvailablePort", () => {
+        it("#given start port available #when finding port #then returns start port", async () => {
+          const startPort = await findContiguousAvailableStart(1)
+          const result = await findAvailablePort(startPort)
+          expect(result).toBe(startPort)
+        })

-    it("#given multiple ports blocked #when finding port #then skips all blocked", async () => {
-      const startPort = 59993
-      const blockers = [
-        Bun.serve({ port: startPort, hostname: "127.0.0.1", fetch: () => new Response() }),
-        Bun.serve({ port: startPort + 1, hostname: "127.0.0.1", fetch: () => new Response() }),
-        Bun.serve({ port: startPort + 2, hostname: "127.0.0.1", fetch: () => new Response() }),
-      ]
+        it("#given start port blocked #when finding port #then returns next available", async () => {
+          const startPort = await findContiguousAvailableStart(2)
+          const blocker = startRealBlocker(startPort)

-      try {
-        const result = await findAvailablePort(startPort)
-        expect(result).toBe(startPort + 3)
-      } finally {
-        blockers.forEach((b) => b.stop(true))
-      }
-    })
-  })
+          try {
+            const result = await findAvailablePort(startPort)
+            expect(result).toBe(startPort + 1)
+          } finally {
+            blocker.stop(true)
+          }
+        })

-  describe("getAvailableServerPort", () => {
-    it("#given preferred port available #when getting port #then returns preferred with wasAutoSelected=false", async () => {
-      const preferredPort = 59990
-      const result = await getAvailableServerPort(preferredPort)
-      expect(result.port).toBe(preferredPort)
-      expect(result.wasAutoSelected).toBe(false)
-    })
+        it("#given multiple ports blocked #when finding port #then skips all blocked", async () => {
+          const startPort = await findContiguousAvailableStart(4)
+          const blockers = [
+            startRealBlocker(startPort),
+            startRealBlocker(startPort + 1),
+            startRealBlocker(startPort + 2),
+          ]

-    it("#given preferred port blocked #when getting port #then returns alternative with wasAutoSelected=true", async () => {
-      const preferredPort = 59989
-      const blocker = Bun.serve({
-        port: preferredPort,
-        hostname: "127.0.0.1",
-        fetch: () => new Response("blocked"),
+          try {
+            const result = await findAvailablePort(startPort)
+            expect(result).toBe(startPort + 3)
+          } finally {
+            blockers.forEach((blocker) => blocker.stop(true))
+          }
+        })
      })

-      try {
-        const result = await getAvailableServerPort(preferredPort)
-        expect(result.port).toBeGreaterThan(preferredPort)
-        expect(result.wasAutoSelected).toBe(true)
-      } finally {
-        blocker.stop(true)
-      }
+      describe("getAvailableServerPort", () => {
+        it("#given preferred port available #when getting port #then returns preferred with wasAutoSelected=false", async () => {
+          const preferredPort = await findContiguousAvailableStart(1)
+          const result = await getAvailableServerPort(preferredPort)
+          expect(result.port).toBe(preferredPort)
+          expect(result.wasAutoSelected).toBe(false)
+        })
+
+        it("#given preferred port blocked #when getting port #then returns alternative with wasAutoSelected=true", async () => {
+          const preferredPort = await findContiguousAvailableStart(2)
+          const blocker = startRealBlocker(preferredPort)
+
+          try {
+            const result = await getAvailableServerPort(preferredPort)
+            expect(result.port).toBe(preferredPort + 1)
+            expect(result.wasAutoSelected).toBe(true)
+          } finally {
+            blocker.stop(true)
+          }
+        })
+      })
    })
-  })
+  } else {
+    const blockedSockets = new Set<string>()
+    let serveSpy: ReturnType<typeof spyOn>
+
+    function getSocketKey(port: number, hostname: string): string {
+      return `${hostname}:${port}`
+    }
+
+    beforeEach(() => {
+      blockedSockets.clear()
+      serveSpy = spyOn(Bun, "serve").mockImplementation(({ port, hostname }) => {
+        if (typeof port !== "number") {
+          throw new Error("Test expected numeric port")
+        }
+        const resolvedHostname = typeof hostname === "string" ? hostname : HOSTNAME
+        const socketKey = getSocketKey(port, resolvedHostname)
+
+        if (blockedSockets.has(socketKey)) {
+          const error = new Error(`Failed to start server. Is port ${port} in use?`) as Error & {
+            code?: string
+            syscall?: string
+            errno?: number
+            address?: string
+            port?: number
+          }
+          error.code = "EADDRINUSE"
+          error.syscall = "listen"
+          error.errno = 0
+          error.address = resolvedHostname
+          error.port = port
+          throw error
+        }
+
+        blockedSockets.add(socketKey)
+        return {
+          stop: (_force?: boolean) => {
+            blockedSockets.delete(socketKey)
+          },
+        } as { stop: (force?: boolean) => void }
+      })
+    })
+
+    afterEach(() => {
+      expect(blockedSockets.size).toBe(0)
+      serveSpy.mockRestore()
+      blockedSockets.clear()
+    })
+
+    describe("with mocked sockets fallback", () => {
+      describe("isPortAvailable", () => {
+        it("#given unused port #when checking availability #then returns true", async () => {
+          const port = 59999
+
+          const result = await isPortAvailable(port)
+          expect(result).toBe(true)
+          expect(blockedSockets.size).toBe(0)
+        })
+
+        it("#given port in use #when checking availability #then returns false", async () => {
+          const port = 59998
+          const blocker = Bun.serve({
+            port,
+            hostname: HOSTNAME,
+            fetch: () => new Response("blocked"),
+          })
+
+          try {
+            const result = await isPortAvailable(port)
+            expect(result).toBe(false)
+          } finally {
+            blocker.stop(true)
+          }
+        })
+
+        it("#given custom hostname #when checking availability #then passes hostname through to Bun.serve", async () => {
+          const hostname = "192.0.2.10"
+          await isPortAvailable(59995, hostname)
+
+          expect(serveSpy.mock.calls[0]?.[0]?.hostname).toBe(hostname)
+        })
+      })
+
+      describe("findAvailablePort", () => {
+        it("#given start port available #when finding port #then returns start port", async () => {
+          const startPort = 59997
+          const result = await findAvailablePort(startPort)
+          expect(result).toBe(startPort)
+        })
+
+        it("#given start port blocked #when finding port #then returns next available", async () => {
+          const startPort = 59996
+          const blocker = Bun.serve({
+            port: startPort,
+            hostname: HOSTNAME,
+            fetch: () => new Response("blocked"),
+          })
+
+          try {
+            const result = await findAvailablePort(startPort)
+            expect(result).toBe(startPort + 1)
+          } finally {
+            blocker.stop(true)
+          }
+        })
+
+        it("#given multiple ports blocked #when finding port #then skips all blocked", async () => {
+          const startPort = 59993
+          const blockers = [
+            Bun.serve({ port: startPort, hostname: HOSTNAME, fetch: () => new Response() }),
+            Bun.serve({ port: startPort + 1, hostname: HOSTNAME, fetch: () => new Response() }),
+            Bun.serve({ port: startPort + 2, hostname: HOSTNAME, fetch: () => new Response() }),
+          ]
+
+          try {
+            const result = await findAvailablePort(startPort)
+            expect(result).toBe(startPort + 3)
+          } finally {
+            blockers.forEach((blocker) => blocker.stop(true))
+          }
+        })
+      })
+
+      describe("getAvailableServerPort", () => {
+        it("#given preferred port available #when getting port #then returns preferred with wasAutoSelected=false", async () => {
+          const preferredPort = 59990
+          const result = await getAvailableServerPort(preferredPort)
+          expect(result.port).toBe(preferredPort)
+          expect(result.wasAutoSelected).toBe(false)
+        })
+
+        it("#given preferred port blocked #when getting port #then returns alternative with wasAutoSelected=true", async () => {
+          const preferredPort = 59989
+          const blocker = Bun.serve({
+            port: preferredPort,
+            hostname: HOSTNAME,
+            fetch: () => new Response("blocked"),
+          })
+
+          try {
+            const result = await getAvailableServerPort(preferredPort)
+            expect(result.port).toBe(preferredPort + 1)
+            expect(result.wasAutoSelected).toBe(true)
+          } finally {
+            blocker.stop(true)
+          }
+        })
+      })
+    })
+  }

  describe("DEFAULT_SERVER_PORT", () => {
    it("#given constant #when accessed #then returns 4096", () => {
--- a/src/shared/shell-env.ts
+++ b/src/shared/shell-env.ts
@@ -109,3 +109,44 @@ export function buildEnvPrefix(
      return ""
  }
 }
+
+/**
+ * Escape a value for use in a double-quoted shell -c command argument.
+ * 
+ * In shell -c "..." strings, these characters have special meaning and must be escaped:
+ * - $ - variable expansion, command substitution $(...)
+ * - ` - command substitution `...`
+ * - \\ - escape character
+ * - " - end quote
+ * - ; | & - command separators
+ * - # - comment
+ * - () - grouping operators
+ * 
+ * @param value - The value to escape
+ * @returns Escaped value safe for double-quoted shell -c argument
+ * 
+ * @example
+ * ```ts
+ * // For malicious input
+ * const url = "http://localhost:3000'; cat /etc/passwd; echo '"
+ * const escaped = shellEscapeForDoubleQuotedCommand(url)
+ * // => "http://localhost:3000'\''; cat /etc/passwd; echo '"
+ * 
+ * // Usage in command:
+ * const cmd = `/bin/sh -c "opencode attach ${escaped} --session ${sessionId}"`
+ * ```
+ */
+export function shellEscapeForDoubleQuotedCommand(value: string): string {
+  // Order matters: escape backslash FIRST, then other characters
+  return value
+    .replace(/\\/g, "\\\\") // escape backslash first
+    .replace(/\$/g, "\\$") // escape dollar sign
+    .replace(/`/g, "\\`") // escape backticks
+    .replace(/"/g, "\\\"") // escape double quotes
+    .replace(/;/g, "\\;") // escape semicolon (command separator)
+    .replace(/\|/g, "\\|") // escape pipe (command separator)
+    .replace(/&/g, "\\&") // escape ampersand (command separator)
+    .replace(/#/g, "\\#") // escape hash (comment)
+    .replace(/\(/g, "\\(") // escape parentheses
+    .replace(/\)/g, "\\)") // escape parentheses
+}
--- a/src/shared/tmux/tmux-utils/pane-replace.ts
+++ b/src/shared/tmux/tmux-utils/pane-replace.ts
@@ -3,6 +3,7 @@ import type { TmuxConfig } from "../../../config/schema"
 import { getTmuxPath } from "../../../tools/interactive-bash/tmux-path-resolver"
 import type { SpawnPaneResult } from "../types"
 import { isInsideTmux } from "./environment"
+import { shellEscapeForDoubleQuotedCommand } from "../../shell-env"

 export async function replaceTmuxPane(
 	paneId: string,
@@ -34,7 +35,9 @@ export async function replaceTmuxPane(
 	})
 	await ctrlCProc.exited

-	const opencodeCmd = `zsh -c 'opencode attach ${serverUrl} --session ${sessionId}'`
+	const shell = process.env.SHELL || "/bin/sh"
+	const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+	const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`

 	const proc = spawn([tmux, "respawn-pane", "-k", "-t", paneId, opencodeCmd], {
 		stdout: "pipe",
@@ -59,6 +62,7 @@ export async function replaceTmuxPane(
 		const titleStderr = await stderrPromise
 		log("[replaceTmuxPane] WARNING: failed to set pane title", {
 			paneId,
+			title,
 			exitCode: titleExitCode,
 			stderr: titleStderr.trim(),
 		})
--- a/src/shared/tmux/tmux-utils/pane-spawn.test.ts
+++ b/src/shared/tmux/tmux-utils/pane-spawn.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, it } from "bun:test"
+import { shellEscapeForDoubleQuotedCommand } from "../../shell-env"
+
+describe("given a serverUrl with shell metacharacters", () => {
+  describe("when building tmux spawn command with double quotes", () => {
+    it("then serverUrl is escaped to prevent shell injection", () => {
+      const serverUrl = "http://localhost:3000'; cat /etc/passwd; echo '"
+      const sessionId = "test-session"
+      const shell = "/bin/sh"
+
+      // Use double quotes for outer shell -c command, escape dangerous chars in URL
+      const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+      const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`
+
+      // The semicolon should be escaped so it's treated as literal, not separator
+      expect(opencodeCmd).toContain("\\;")
+      // The malicious content should be escaped - semicolons are now \\;
+      expect(opencodeCmd).not.toMatch(/[^\\];\s*cat/)
+    })
+  })
+
+  describe("when building tmux replace command", () => {
+    it("then serverUrl is escaped to prevent shell injection", () => {
+      const serverUrl = "http://localhost:3000'; rm -rf /; '"
+      const sessionId = "test-session"
+      const shell = "/bin/sh"
+
+      const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+      const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`
+
+      expect(opencodeCmd).toContain("\\;")
+      expect(opencodeCmd).not.toMatch(/[^\\];\s*rm/)
+    })
+  })
+})
+
+describe("given a normal serverUrl without shell metacharacters", () => {
+  describe("when building tmux spawn command", () => {
+    it("then serverUrl works correctly", () => {
+      const serverUrl = "http://localhost:3000"
+      const sessionId = "test-session"
+      const shell = "/bin/sh"
+
+      const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+      const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`
+
+      expect(opencodeCmd).toContain(serverUrl)
+    })
+  })
+})
+
+describe("given a serverUrl with dollar sign (command injection)", () => {
+  describe("when building tmux command", () => {
+    it("then dollar sign is escaped properly", () => {
+      const serverUrl = "http://localhost:3000$(whoami)"
+      const sessionId = "test-session"
+      const shell = "/bin/sh"
+
+      const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+      const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`
+
+      // The $ should be escaped to literal $
+      expect(opencodeCmd).toContain("\\$")
+    })
+  })
+})
+
+describe("given a serverUrl with backticks (command injection)", () => {
+  describe("when building tmux command", () => {
+    it("then backticks are escaped properly", () => {
+      const serverUrl = "http://localhost:3000`whoami`"
+      const sessionId = "test-session"
+      const shell = "/bin/sh"
+
+      const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+      const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`
+
+      expect(opencodeCmd).toContain("\\`")
+    })
+  })
+})
+
+describe("given a serverUrl with pipe operator", () => {
+  describe("when building tmux command", () => {
+    it("then pipe is escaped properly", () => {
+      const serverUrl = "http://localhost:3000 | ls"
+      const sessionId = "test-session"
+      const shell = "/bin/sh"
+
+      const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+      const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`
+
+      expect(opencodeCmd).toContain("\\|")
+    })
+  })
+})
--- a/src/shared/tmux/tmux-utils/pane-spawn.ts
+++ b/src/shared/tmux/tmux-utils/pane-spawn.ts
@@ -5,6 +5,7 @@ import type { SpawnPaneResult } from "../types"
 import type { SplitDirection } from "./environment"
 import { isInsideTmux } from "./environment"
 import { isServerRunning } from "./server-health"
+import { shellEscapeForDoubleQuotedCommand } from "../../shell-env"

 export async function spawnTmuxPane(
 	sessionId: string,
@@ -48,7 +49,9 @@ export async function spawnTmuxPane(

 	log("[spawnTmuxPane] all checks passed, spawning...")

-	const opencodeCmd = `zsh -c 'opencode attach ${serverUrl} --session ${sessionId}'`
+	const shell = process.env.SHELL || "/bin/sh"
+	const escapedUrl = shellEscapeForDoubleQuotedCommand(serverUrl)
+	const opencodeCmd = `${shell} -c "opencode attach ${escapedUrl} --session ${sessionId}"`

 	const args = [
 		"split-window",
--- a/src/tools/delegate-task/category-resolver.test.ts
+++ b/src/tools/delegate-task/category-resolver.test.ts
@@ -7,16 +7,22 @@ import * as connectedProvidersCache from "../../shared/connected-providers-cache
 describe("resolveCategoryExecution", () => {
 	let connectedProvidersSpy: ReturnType<typeof spyOn> | undefined
 	let providerModelsSpy: ReturnType<typeof spyOn> | undefined
+	let hasConnectedProvidersSpy: ReturnType<typeof spyOn> | undefined
+	let hasProviderModelsSpy: ReturnType<typeof spyOn> | undefined

 	beforeEach(() => {
 		mock.restore()
 		connectedProvidersSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(null)
 		providerModelsSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue(null)
+		hasConnectedProvidersSpy = spyOn(connectedProvidersCache, "hasConnectedProvidersCache").mockReturnValue(false)
+		hasProviderModelsSpy = spyOn(connectedProvidersCache, "hasProviderModelsCache").mockReturnValue(false)
 	})

 	afterEach(() => {
 		connectedProvidersSpy?.mockRestore()
 		providerModelsSpy?.mockRestore()
+		hasConnectedProvidersSpy?.mockRestore()
+		hasProviderModelsSpy?.mockRestore()
 	})

 	const createMockExecutorContext = (): ExecutorContext => ({
@@ -27,7 +33,7 @@ describe("resolveCategoryExecution", () => {
 		sisyphusJuniorModel: undefined,
 	})

-	test("returns clear error when category exists but required model is not available", async () => {
+	test("returns unpinned resolution when category cache is not ready on first run", async () => {
 		//#given
 		const args = {
 			category: "deep",
@@ -39,6 +45,9 @@ describe("resolveCategoryExecution", () => {
 			enableSkillTools: false,
 		}
 		const executorCtx = createMockExecutorContext()
+		executorCtx.userCategories = {
+			deep: {},
+		}
 		const inheritedModel = undefined
 		const systemDefaultModel = "anthropic/claude-sonnet-4-6"

@@ -46,10 +55,10 @@ describe("resolveCategoryExecution", () => {
 		const result = await resolveCategoryExecution(args, executorCtx, inheritedModel, systemDefaultModel)

 		//#then
-		expect(result.error).toBeDefined()
-		expect(result.error).toContain("deep")
-		expect(result.error).toMatch(/model.*not.*available|requires.*model/i)
-		expect(result.error).not.toContain("Unknown category")
+		expect(result.error).toBeUndefined()
+		expect(result.actualModel).toBeUndefined()
+		expect(result.categoryModel).toBeUndefined()
+		expect(result.agentToUse).toBeDefined()
 	})

 	test("returns 'unknown category' error for truly unknown categories", async () => {
--- a/src/tools/delegate-task/category-resolver.ts
+++ b/src/tools/delegate-task/category-resolver.ts
@@ -85,6 +85,7 @@ Available categories: ${allCategoryNames}`,
  let actualModel: string | undefined
  let modelInfo: ModelFallbackInfo | undefined
  let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
+  let isModelResolutionSkipped = false

  const overrideModel = sisyphusJuniorModel
  const explicitCategoryModel = userCategories?.[args.category!]?.model
@@ -98,6 +99,11 @@ Available categories: ${allCategoryNames}`,
      modelInfo = explicitCategoryModel || overrideModel
        ? { model: actualModel, type: "user-defined", source: "override" }
        : { model: actualModel, type: "system-default", source: "system-default" }
+      const parsedModel = parseModelString(actualModel)
+      const variantToUse = userCategories?.[args.category!]?.variant ?? resolved.config.variant
+      categoryModel = parsedModel
+        ? (variantToUse ? { ...parsedModel, variant: variantToUse } : parsedModel)
+        : undefined
    }
  } else {
    const resolution = resolveModelForDelegateTask({
@@ -109,7 +115,9 @@ Available categories: ${allCategoryNames}`,
      systemDefaultModel,
    })

-    if (resolution) {
+    if (resolution && "skipped" in resolution) {
+      isModelResolutionSkipped = true
+    } else if (resolution) {
      const { model: resolvedModel, variant: resolvedVariant } = resolution
      actualModel = resolvedModel

@@ -156,7 +164,7 @@ Available categories: ${allCategoryNames}`,
  }
  const categoryPromptAppend = resolved.promptAppend || undefined

-  if (!categoryModel && !actualModel) {
+  if (!categoryModel && !actualModel && !isModelResolutionSkipped) {
    const categoryNames = Object.keys(enabledCategories)
    return {
      agentToUse: "",
--- a/src/tools/delegate-task/model-selection.test.ts
+++ b/src/tools/delegate-task/model-selection.test.ts
@@ -1,5 +1,5 @@
 declare const require: (name: string) => any
-const { describe, test, expect, beforeEach, afterEach, spyOn, mock } = require("bun:test")
+const { afterEach, beforeEach, describe, expect, mock, spyOn, test } = require("bun:test")
 import { resolveModelForDelegateTask } from "./model-selection"
 import * as connectedProvidersCache from "../../shared/connected-providers-cache"

@@ -23,7 +23,7 @@ describe("resolveModelForDelegateTask", () => {
 		})

 		describe("#when availableModels is empty and no user model override", () => {
-			test("#then returns undefined to let OpenCode use system default", () => {
+			test("#then returns skipped sentinel to leave model unpinned", () => {
 				const result = resolveModelForDelegateTask({
 					categoryDefaultModel: "anthropic/claude-sonnet-4-6",
 					fallbackChain: [
@@ -33,7 +33,7 @@ describe("resolveModelForDelegateTask", () => {
 					systemDefaultModel: "anthropic/claude-sonnet-4-6",
 				})

-				expect(result).toBeUndefined()
+				expect(result).toEqual({ skipped: true })
 			})
 		})

@@ -54,7 +54,7 @@ describe("resolveModelForDelegateTask", () => {
 		})

 		describe("#when user set fallback_models but no cache exists", () => {
-			test("#then returns undefined (skip fallback resolution without cache)", () => {
+			test("#then returns skipped sentinel (skip fallback resolution without cache)", () => {
 				const result = resolveModelForDelegateTask({
 					userFallbackModels: ["openai/gpt-5.4", "google/gemini-3.1-pro"],
 					categoryDefaultModel: "anthropic/claude-sonnet-4-6",
@@ -64,7 +64,7 @@ describe("resolveModelForDelegateTask", () => {
 					availableModels: new Set(),
 				})

-				expect(result).toBeUndefined()
+				expect(result).toEqual({ skipped: true })
 			})
 		})
 	})
@@ -86,8 +86,7 @@ describe("resolveModelForDelegateTask", () => {
 					systemDefaultModel: "anthropic/claude-sonnet-4-6",
 				})

-				expect(result).toBeDefined()
-				expect(result!.model).toBe("anthropic/claude-sonnet-4-6")
+				expect(result).toEqual({ model: "anthropic/claude-sonnet-4-6" })
 			})
 		})

@@ -101,8 +100,27 @@ describe("resolveModelForDelegateTask", () => {
 					availableModels: new Set(["anthropic/claude-sonnet-4-6"]),
 				})

-				expect(result).toBeDefined()
-				expect(result!.model).toBe("anthropic/claude-sonnet-4-6")
+				expect(result).toEqual({ model: "anthropic/claude-sonnet-4-6" })
+			})
+		})
+
+		describe("#when user fallback models include variant syntax", () => {
+			test("#then resolves a parenthesized variant against the base available model", () => {
+				const result = resolveModelForDelegateTask({
+					userFallbackModels: ["openai/gpt-5.2(high)"],
+					availableModels: new Set(["openai/gpt-5.2"]),
+				})
+
+				expect(result).toEqual({ model: "openai/gpt-5.2", variant: "high" })
+			})
+
+			test("#then resolves a space-separated variant against the base available model", () => {
+				const result = resolveModelForDelegateTask({
+					userFallbackModels: ["gpt-5.2 medium"],
+					availableModels: new Set(["openai/gpt-5.2"]),
+				})
+
+				expect(result).toEqual({ model: "openai/gpt-5.2", variant: "medium" })
 			})
 		})
 	})
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`export { createTodoDescriptionOverrideHook } from "./hook"`