diff --git a/.github/workflows/publish-platform.yml b/.github/workflows/publish-platform.yml
index 173c11795..74089679f 100644
--- a/.github/workflows/publish-platform.yml
+++ b/.github/workflows/publish-platform.yml
@@ -35,15 +35,15 @@ jobs:
   # - Uploads compressed artifacts for the publish job
   # =============================================================================
   build:
-    runs-on: ${{ matrix.platform == 'windows-x64' && 'windows-latest' || 'ubuntu-latest' }}
+    runs-on: ${{ startsWith(matrix.platform, 'windows-') && 'windows-latest' || 'ubuntu-latest' }}
     defaults:
       run:
         shell: bash
     strategy:
       fail-fast: false
-      max-parallel: 7
+      max-parallel: 11
       matrix:
-        platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64]
+        platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
     steps:
       - uses: actions/checkout@v4
 
@@ -95,14 +95,18 @@ jobs:
             case "$PLATFORM" in
               darwin-arm64) TARGET="bun-darwin-arm64" ;;
               darwin-x64) TARGET="bun-darwin-x64" ;;
+              darwin-x64-baseline) TARGET="bun-darwin-x64-baseline" ;;
               linux-x64) TARGET="bun-linux-x64" ;;
+              linux-x64-baseline) TARGET="bun-linux-x64-baseline" ;;
               linux-arm64) TARGET="bun-linux-arm64" ;;
               linux-x64-musl) TARGET="bun-linux-x64-musl" ;;
+              linux-x64-musl-baseline) TARGET="bun-linux-x64-musl-baseline" ;;
               linux-arm64-musl) TARGET="bun-linux-arm64-musl" ;;
               windows-x64) TARGET="bun-windows-x64" ;;
+              windows-x64-baseline) TARGET="bun-windows-x64-baseline" ;;
             esac
             
-            if [ "$PLATFORM" = "windows-x64" ]; then
+            if [[ "$PLATFORM" == windows-* ]]; then
               OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode.exe"
             else
               OUTPUT="packages/${PLATFORM}/bin/oh-my-opencode"
@@ -119,7 +123,7 @@ jobs:
           PLATFORM="${{ matrix.platform }}"
           cd packages/${PLATFORM}
           
-          if [ "$PLATFORM" = "windows-x64" ]; then
+          if [[ "$PLATFORM" == windows-* ]]; then
             # Windows: use 7z (pre-installed on windows-latest)
             7z a -tzip ../../binary-${PLATFORM}.zip bin/ package.json
           else
@@ -155,7 +159,7 @@ jobs:
       fail-fast: false
       max-parallel: 2
       matrix:
-        platform: [darwin-arm64, darwin-x64, linux-x64, linux-arm64, linux-x64-musl, linux-arm64-musl, windows-x64]
+        platform: [darwin-arm64, darwin-x64, darwin-x64-baseline, linux-x64, linux-x64-baseline, linux-arm64, linux-x64-musl, linux-x64-musl-baseline, linux-arm64-musl, windows-x64, windows-x64-baseline]
     steps:
       - name: Check if already published
         id: check
@@ -184,7 +188,7 @@ jobs:
           PLATFORM="${{ matrix.platform }}"
           mkdir -p packages/${PLATFORM}
           
-          if [ "$PLATFORM" = "windows-x64" ]; then
+          if [[ "$PLATFORM" == windows-* ]]; then
             unzip binary-${PLATFORM}.zip -d packages/${PLATFORM}/
           else
             tar -xzvf binary-${PLATFORM}.tar.gz -C packages/${PLATFORM}/
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index d430e7caf..a64ddd55f 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -189,7 +189,7 @@ jobs:
           VERSION="${{ steps.version.outputs.version }}"
           jq --arg v "$VERSION" '.version = $v' package.json > tmp.json && mv tmp.json package.json
           
-          for platform in darwin-arm64 darwin-x64 linux-x64 linux-arm64 linux-x64-musl linux-arm64-musl windows-x64; do
+          for platform in darwin-arm64 darwin-x64 darwin-x64-baseline linux-x64 linux-x64-baseline linux-arm64 linux-x64-musl linux-x64-musl-baseline linux-arm64-musl windows-x64 windows-x64-baseline; do
             jq --arg v "$VERSION" '.version = $v' "packages/${platform}/package.json" > tmp.json
             mv tmp.json "packages/${platform}/package.json"
           done
diff --git a/assets/oh-my-opencode.schema.json b/assets/oh-my-opencode.schema.json
index 75a2a26f3..30757523b 100644
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -24,19 +24,7 @@
     "disabled_agents": {
       "type": "array",
       "items": {
-        "type": "string",
-        "enum": [
-          "sisyphus",
-          "hephaestus",
-          "prometheus",
-          "oracle",
-          "librarian",
-          "explore",
-          "multimodal-looker",
-          "metis",
-          "momus",
-          "atlas"
-        ]
+        "type": "string"
       }
     },
     "disabled_skills": {
@@ -960,6 +948,9 @@
                 }
               },
               "additionalProperties": false
+            },
+            "allow_non_gpt_model": {
+              "type": "boolean"
             }
           },
           "additionalProperties": false
@@ -3474,6 +3465,11 @@
           "prompt_append": {
             "type": "string"
           },
+          "max_prompt_tokens": {
+            "type": "integer",
+            "exclusiveMinimum": 0,
+            "maximum": 9007199254740991
+          },
           "is_unstable_agent": {
             "type": "boolean"
           },
diff --git a/benchmarks/bun.lock b/benchmarks/bun.lock
new file mode 100644
index 000000000..3a31bf1c5
--- /dev/null
+++ b/benchmarks/bun.lock
@@ -0,0 +1,62 @@
+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "hashline-edit-benchmark",
+      "dependencies": {
+        "@ai-sdk/openai": "^1.3.0",
+        "@friendliai/ai-provider": "^1.0.9",
+        "ai": "^6.0.94",
+        "zod": "^4.1.0",
+      },
+    },
+  },
+  "packages": {
+    "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.55", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@vercel/oidc": "3.1.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-7xMeTJnCjwRwXKVCiv4Ly4qzWvDuW3+W1WIV0X1EFu6W83d4mEhV9bFArto10MeTw40ewuDjrbrZd21mXKohkw=="],
+
+    "@ai-sdk/openai": ["@ai-sdk/openai@1.3.24", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "@ai-sdk/provider-utils": "2.2.8" }, "peerDependencies": { "zod": "^3.0.0" } }, "sha512-GYXnGJTHRTZc4gJMSmFRgEQudjqd4PUN0ZjQhPwOAYH1yOAvQoG/Ikqs+HyISRbLPCrhbZnPKCNHuRU4OfpW0Q=="],
+
+    "@ai-sdk/openai-compatible": ["@ai-sdk/openai-compatible@2.0.30", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-iTjumHf1/u4NhjXYFn/aONM2GId3/o7J1Lp5ql8FCbgIMyRwrmanR5xy1S3aaVkfTscuDvLTzWiy1mAbGzK3nQ=="],
+
+    "@ai-sdk/provider": ["@ai-sdk/provider@1.1.3", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-qZMxYJ0qqX/RfnuIaab+zp8UAeJn/ygXXAffR5I4N0n1IrvA6qBsjc8hXLmBiMV2zoXlifkacF7sEFnYnjBcqg=="],
+
+    "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@2.2.8", "", { "dependencies": { "@ai-sdk/provider": "1.1.3", "nanoid": "^3.3.8", "secure-json-parse": "^2.7.0" }, "peerDependencies": { "zod": "^3.23.8" } }, "sha512-fqhG+4sCVv8x7nFzYnFo19ryhAa3w096Kmc3hWxMQfW/TubPOmt3A6tYZhl4mUfQWWQMsuSkLrtjlWuXBVSGQA=="],
+
+    "@friendliai/ai-provider": ["@friendliai/ai-provider@1.1.4", "", { "dependencies": { "@ai-sdk/openai-compatible": "2.0.30", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.12" } }, "sha512-9TU4B1QFqPhbkONjI5afCF7Ox4jOqtGg1xw8mA9QHZdtlEbZxU+mBNvMPlI5pU5kPoN6s7wkXmFmxpID+own1A=="],
+
+    "@opentelemetry/api": ["@opentelemetry/api@1.9.0", "", {}, "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg=="],
+
+    "@standard-schema/spec": ["@standard-schema/spec@1.1.0", "", {}, "sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w=="],
+
+    "@vercel/oidc": ["@vercel/oidc@3.1.0", "", {}, "sha512-Fw28YZpRnA3cAHHDlkt7xQHiJ0fcL+NRcIqsocZQUSmbzeIKRpwttJjik5ZGanXP+vlA4SbTg+AbA3bP363l+w=="],
+
+    "ai": ["ai@6.0.101", "", { "dependencies": { "@ai-sdk/gateway": "3.0.55", "@ai-sdk/provider": "3.0.8", "@ai-sdk/provider-utils": "4.0.15", "@opentelemetry/api": "1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-Ur/NgbgOp1rdhyDiKDk6EOpSgd1g5ADlbcD1cjQJtQsnmhEngz3Rf8nK5JetDh0vnbLy2aEBpaQeL+zvLRWuaA=="],
+
+    "eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
+
+    "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="],
+
+    "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
+
+    "secure-json-parse": ["secure-json-parse@2.7.0", "", {}, "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw=="],
+
+    "zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="],
+
+    "@ai-sdk/gateway/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
+
+    "@ai-sdk/gateway/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
+
+    "@ai-sdk/openai-compatible/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
+
+    "@ai-sdk/openai-compatible/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
+
+    "@friendliai/ai-provider/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
+
+    "@friendliai/ai-provider/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
+
+    "ai/@ai-sdk/provider": ["@ai-sdk/provider@3.0.8", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-oGMAgGoQdBXbZqNG0Ze56CHjDZ1IDYOwGYxYjO5KLSlz5HiNQ9udIXsPZ61VWaHGZ5XW/jyjmr6t2xz2jGVwbQ=="],
+
+    "ai/@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.15", "", { "dependencies": { "@ai-sdk/provider": "3.0.8", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.6" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-8XiKWbemmCbvNN0CLR9u3PQiet4gtEVIrX4zzLxnCj06AwsEDJwJVBbKrEI4t6qE8XRSIvU2irka0dcpziKW6w=="],
+  }
+}
diff --git a/benchmarks/headless.ts b/benchmarks/headless.ts
new file mode 100644
index 000000000..ae18853af
--- /dev/null
+++ b/benchmarks/headless.ts
@@ -0,0 +1,193 @@
+#!/usr/bin/env bun
+import { readFile, writeFile, mkdir } from "node:fs/promises"
+import { join, dirname } from "node:path"
+import { stepCountIs, streamText, type CoreMessage } from "ai"
+import { tool } from "ai"
+import { createFriendli } from "@friendliai/ai-provider"
+import { z } from "zod"
+import { formatHashLines } from "../src/tools/hashline-edit/hash-computation"
+import { normalizeHashlineEdits } from "../src/tools/hashline-edit/normalize-edits"
+import { applyHashlineEditsWithReport } from "../src/tools/hashline-edit/edit-operations"
+import { canonicalizeFileText, restoreFileText } from "../src/tools/hashline-edit/file-text-canonicalization"
+
+const DEFAULT_MODEL = "MiniMaxAI/MiniMax-M2.5"
+const MAX_STEPS = 50
+const sessionId = `bench-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`
+
+const emit = (event: Record<string, unknown>) =>
+  console.log(JSON.stringify({ sessionId, timestamp: new Date().toISOString(), ...event }))
+
+// ── CLI ──────────────────────────────────────────────────────
+function parseArgs(): { prompt: string; modelId: string } {
+  const args = process.argv.slice(2)
+  let prompt = ""
+  let modelId = DEFAULT_MODEL
+  for (let i = 0; i < args.length; i++) {
+    if ((args[i] === "-p" || args[i] === "--prompt") && args[i + 1]) {
+      prompt = args[++i]
+    } else if ((args[i] === "-m" || args[i] === "--model") && args[i + 1]) {
+      modelId = args[++i]
+    } else if (args[i] === "--reasoning-mode" && args[i + 1]) {
+      i++ // consume
+    }
+    // --no-translate, --think consumed silently
+  }
+  if (!prompt) {
+    console.error("Usage: bun run benchmarks/headless.ts -p <prompt> [-m <model>]")
+    process.exit(1)
+  }
+  return { prompt, modelId }
+}
+
+// ── Tools ────────────────────────────────────────────────────
+const readFileTool = tool({
+  description: "Read a file with hashline-tagged content (LINE#ID format)",
+  inputSchema: z.object({ path: z.string().describe("File path") }),
+  execute: async ({ path }) => {
+    const fullPath = join(process.cwd(), path)
+    try {
+      const content = await readFile(fullPath, "utf-8")
+      const lines = content.split("\n")
+      const tagged = formatHashLines(content)
+      return `OK - read file\npath: ${path}\nlines: ${lines.length}\n\n${tagged}`
+    } catch {
+      return `Error: File not found: ${path}`
+    }
+  },
+})
+
+const editFileTool = tool({
+  description: "Edit a file using hashline anchors (LINE#ID format)",
+  inputSchema: z.object({
+    path: z.string(),
+    edits: z.array(
+      z.object({
+        op: z.enum(["replace", "append", "prepend"]),
+        pos: z.string().optional(),
+        end: z.string().optional(),
+        lines: z.union([z.array(z.string()), z.string(), z.null()]),
+      })
+    ).min(1),
+  }),
+  execute: async ({ path, edits }) => {
+    const fullPath = join(process.cwd(), path)
+    try {
+      let rawContent = ""
+      let exists = true
+      try {
+        rawContent = await readFile(fullPath, "utf-8")
+      } catch {
+        exists = false
+      }
+
+      const normalized = normalizeHashlineEdits(edits)
+
+      if (!exists) {
+        const canCreate = normalized.every(
+          (e) => (e.op === "append" || e.op === "prepend") && !e.pos
+        )
+        if (!canCreate) return `Error: File not found: ${path}`
+      }
+
+      const envelope = canonicalizeFileText(rawContent)
+      const result = applyHashlineEditsWithReport(envelope.content, normalized)
+
+      if (result.content === envelope.content) {
+        return `Error: No changes made to ${path}. The edits produced identical content.`
+      }
+
+      const writeContent = restoreFileText(result.content, envelope)
+      await mkdir(dirname(fullPath), { recursive: true })
+      await writeFile(fullPath, writeContent, "utf-8")
+
+      const oldLineCount = rawContent.split("\n").length
+      const newLineCount = writeContent.split("\n").length
+      const delta = newLineCount - oldLineCount
+      const sign = delta > 0 ? "+" : ""
+      const action = exists ? "Updated" : "Created"
+      return `${action} ${path}\n${edits.length} edit(s) applied, ${sign}${delta} line(s)`
+    } catch (error) {
+      return `Error: ${error instanceof Error ? error.message : String(error)}`
+    }
+  },
+})
+
+// ── Agent Loop ───────────────────────────────────────────────
+async function run() {
+  const { prompt, modelId } = parseArgs()
+
+  const friendli = createFriendli({ apiKey: process.env.FRIENDLI_TOKEN! })
+  const model = friendli(modelId)
+  const tools = { read_file: readFileTool, edit_file: editFileTool }
+
+  emit({ type: "user", content: prompt })
+
+  const messages: CoreMessage[] = [{ role: "user", content: prompt }]
+  const system =
+    "You are a code editing assistant. Use read_file to read files and edit_file to edit them. " +
+    "Always read a file before editing it to get fresh LINE#ID anchors."
+
+  for (let step = 0; step < MAX_STEPS; step++) {
+    const stream = streamText({
+      model,
+      tools,
+      messages,
+      system,
+      stopWhen: stepCountIs(1),
+    })
+
+    let currentText = ""
+    for await (const part of stream.fullStream) {
+      switch (part.type) {
+        case "text-delta":
+          currentText += part.text
+          break
+        case "tool-call":
+          emit({
+            type: "tool_call",
+            tool_call_id: part.toolCallId,
+            tool_name: part.toolName,
+            tool_input: part.args,
+            model: modelId,
+          })
+          break
+        case "tool-result": {
+          const output = typeof part.result === "string" ? part.result : JSON.stringify(part.result)
+          const isError = typeof output === "string" && output.startsWith("Error:")
+          emit({
+            type: "tool_result",
+            tool_call_id: part.toolCallId,
+            output,
+            ...(isError ? { error: output } : {}),
+          })
+          break
+      }
+    }
+
+    const response = await stream.response
+    messages.push(...response.messages)
+
+    const finishReason = await stream.finishReason
+    if (finishReason !== "tool-calls") {
+      if (currentText.trim()) {
+        emit({ type: "assistant", content: currentText, model: modelId })
+      }
+      break
+    }
+  }
+}
+
+// ── Signal + Startup ─────────────────────────────────────────
+process.once("SIGINT", () => process.exit(0))
+process.once("SIGTERM", () => process.exit(143))
+
+const startTime = Date.now()
+run()
+  .catch((error) => {
+    emit({ type: "error", error: error instanceof Error ? error.message : String(error) })
+    process.exit(1)
+  })
+  .then(() => {
+    const elapsed = ((Date.now() - startTime) / 1000).toFixed(2)
+    console.error(`[headless] Completed in ${elapsed}s`)
+  })
diff --git a/benchmarks/package.json b/benchmarks/package.json
new file mode 100644
index 000000000..bbddfed8a
--- /dev/null
+++ b/benchmarks/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "hashline-edit-benchmark",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "description": "Hashline edit tool benchmark using Vercel AI SDK with FriendliAI provider",
+  "scripts": {
+    "bench:basic": "bun run test-edit-ops.ts",
+    "bench:edge": "bun run test-edge-cases.ts",
+    "bench:multi": "bun run test-multi-model.ts",
+    "bench:all": "bun run bench:basic && bun run bench:edge"
+  },
+  "dependencies": {
+    "ai": "^6.0.94",
+    "@ai-sdk/openai": "^1.3.0",
+    "@friendliai/ai-provider": "^1.0.9",
+    "zod": "^4.1.0"
+  }
+}
diff --git a/benchmarks/test-edge-cases.ts b/benchmarks/test-edge-cases.ts
new file mode 100644
index 000000000..b00b0302d
--- /dev/null
+++ b/benchmarks/test-edge-cases.ts
@@ -0,0 +1,1121 @@
+#!/usr/bin/env bun
+/**
+ * Comprehensive headless edit_file stress test: 25 edge cases
+ *
+ * Tests: 5 basic ops + 14 creative cases + 6 whitespace cases
+ * Each runs via headless mode with its own demo file + prompt.
+ *
+ * Usage:
+ *   bun run scripts/test-headless-edit-edge-cases.ts [-m <model>] [--provider <provider>]
+ */
+
+import { spawn } from "node:child_process";
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+
+// ── CLI arg passthrough ───────────────────────────────────────
+const extraArgs: string[] = [];
+const rawArgs = process.argv.slice(2);
+for (let i = 0; i < rawArgs.length; i++) {
+  const arg = rawArgs[i];
+  if (
+    (arg === "-m" || arg === "--model" || arg === "--provider") &&
+    i + 1 < rawArgs.length
+  ) {
+    extraArgs.push(arg, rawArgs[i + 1]);
+    i++;
+  } else if (arg === "--think" || arg === "--no-translate") {
+    extraArgs.push(arg);
+  } else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) {
+    extraArgs.push(arg, rawArgs[i + 1]);
+    i++;
+  }
+}
+
+// ── Colors ────────────────────────────────────────────────────
+const BOLD = "\x1b[1m";
+const GREEN = "\x1b[32m";
+const RED = "\x1b[31m";
+const YELLOW = "\x1b[33m";
+const DIM = "\x1b[2m";
+const CYAN = "\x1b[36m";
+const RESET = "\x1b[0m";
+
+const pass = (msg: string) => console.log(`  ${GREEN}✓${RESET} ${msg}`);
+const fail = (msg: string) => console.log(`  ${RED}✗${RESET} ${msg}`);
+const info = (msg: string) => console.log(`  ${DIM}${msg}${RESET}`);
+const warn = (msg: string) => console.log(`  ${YELLOW}⚠${RESET} ${msg}`);
+
+// ── Test case definition ─────────────────────────────────────
+interface TestCase {
+  fileContent: string;
+  fileName: string;
+  name: string;
+  prompt: string;
+  skipFileCreate?: boolean;
+  validate: (content: string) => { passed: boolean; reason: string };
+}
+
+const TEST_CASES: TestCase[] = [
+  {
+    name: "1. Single-line file — replace only line",
+    fileName: "single-line.txt",
+    fileContent: "only_line_original",
+    prompt: [
+      "Read single-line.txt with read_file.",
+      "Replace the only line using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['only_line_updated'] }].",
+      "Expected final content exactly one line: only_line_updated.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "").trimEnd();
+      const lines = normalized.split("\n");
+      if (lines.length === 1 && lines[0] === "only_line_updated") {
+        return { passed: true, reason: "single line replaced correctly" };
+      }
+      if (normalized.includes("only_line_original")) {
+        return { passed: false, reason: "original line still present" };
+      }
+      return {
+        passed: false,
+        reason: `expected one line 'only_line_updated', got ${lines.length} lines`,
+      };
+    },
+  },
+  {
+    name: "2. Large file (20 lines) — replace middle line 11",
+    fileName: "twenty-lines.txt",
+    fileContent: Array.from(
+      { length: 20 },
+      (_, i) => `line${String(i + 1).padStart(2, "0")}: value-${i + 1}`
+    ).join("\n"),
+    prompt: [
+      "Read twenty-lines.txt with read_file.",
+      "Replace line 11 using edit_file with edits: [{ op: 'replace', pos: '<line11 anchor>', lines: ['line11: UPDATED-MIDDLE'] }].",
+      "Keep all other lines unchanged.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines.length !== 20) {
+        return {
+          passed: false,
+          reason: `expected 20 lines, got ${lines.length}`,
+        };
+      }
+      if (lines[10] !== "line11: UPDATED-MIDDLE") {
+        return {
+          passed: false,
+          reason: `line 11 mismatch: '${lines[10] ?? "<missing>"}'`,
+        };
+      }
+      if (lines[9] !== "line10: value-10" || lines[11] !== "line12: value-12") {
+        return {
+          passed: false,
+          reason: "neighboring lines changed unexpectedly",
+        };
+      }
+      return {
+        passed: true,
+        reason: "line 11 replaced and surrounding lines preserved",
+      };
+    },
+  },
+  {
+    name: "3. Range replace entire file (first→last to one line)",
+    fileName: "range-all.txt",
+    fileContent: ["first", "second", "third", "fourth", "fifth"].join("\n"),
+    prompt: [
+      "Read range-all.txt with read_file.",
+      "Replace the full file from first line to last line using one range edit: edits: [{ op: 'replace', pos: '<line1 anchor>', end: '<line5 anchor>', lines: ['collapsed-to-one-line'] }].",
+      "Expected final content exactly: collapsed-to-one-line.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "").trimEnd();
+      if (normalized === "collapsed-to-one-line") {
+        return {
+          passed: true,
+          reason: "entire file collapsed to single replacement line",
+        };
+      }
+      if (normalized.includes("first") || normalized.includes("fifth")) {
+        return {
+          passed: false,
+          reason: "original range content still present",
+        };
+      }
+      return {
+        passed: false,
+        reason: `unexpected final content: '${normalized.slice(0, 120)}'`,
+      };
+    },
+  },
+  {
+    name: "4. Mixed ops in one call (replace + append + prepend)",
+    fileName: "mixed-one-call.txt",
+    fileContent: ["alpha", "beta", "gamma"].join("\n"),
+    prompt: [
+      "Read mixed-one-call.txt with read_file.",
+      "Call edit_file exactly once with three edits in one edits array:",
+      "edits: [",
+      "{ op: 'replace', pos: '<line2 anchor>', lines: ['BETA'] },",
+      "{ op: 'append', pos: '<line3 anchor>', lines: ['delta'] },",
+      "{ op: 'prepend', pos: '<line1 anchor>', lines: ['start'] }",
+      "].",
+      "Expected final content: start, alpha, BETA, gamma, delta.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      const expected = ["start", "alpha", "BETA", "gamma", "delta"];
+      if (lines.length !== expected.length) {
+        return {
+          passed: false,
+          reason: `expected ${expected.length} lines, got ${lines.length}`,
+        };
+      }
+      for (let i = 0; i < expected.length; i++) {
+        if (lines[i] !== expected[i]) {
+          return {
+            passed: false,
+            reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`,
+          };
+        }
+      }
+      return {
+        passed: true,
+        reason: "single call applied replace, append, and prepend",
+      };
+    },
+  },
+  {
+    name: "5. Large batch (5 replaces) in one call",
+    fileName: "batch-five.txt",
+    fileContent: [
+      "row-1",
+      "row-2",
+      "row-3",
+      "row-4",
+      "row-5",
+      "row-6",
+      "row-7",
+      "row-8",
+      "row-9",
+      "row-10",
+    ].join("\n"),
+    prompt: [
+      "Read batch-five.txt with read_file.",
+      "Call edit_file once with five replace edits in one edits array:",
+      "edits: [",
+      "{ op: 'replace', pos: '<line1 anchor>', lines: ['ROW-1'] },",
+      "{ op: 'replace', pos: '<line3 anchor>', lines: ['ROW-3'] },",
+      "{ op: 'replace', pos: '<line5 anchor>', lines: ['ROW-5'] },",
+      "{ op: 'replace', pos: '<line7 anchor>', lines: ['ROW-7'] },",
+      "{ op: 'replace', pos: '<line10 anchor>', lines: ['ROW-10'] }",
+      "].",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines.length !== 10) {
+        return {
+          passed: false,
+          reason: `expected 10 lines, got ${lines.length}`,
+        };
+      }
+      const checks: [number, string][] = [
+        [0, "ROW-1"],
+        [2, "ROW-3"],
+        [4, "ROW-5"],
+        [6, "ROW-7"],
+        [9, "ROW-10"],
+      ];
+      for (const [idx, expected] of checks) {
+        if (lines[idx] !== expected) {
+          return {
+            passed: false,
+            reason: `line ${idx + 1} expected '${expected}' but got '${lines[idx]}'`,
+          };
+        }
+      }
+      if (
+        lines[1] !== "row-2" ||
+        lines[3] !== "row-4" ||
+        lines[8] !== "row-9"
+      ) {
+        return {
+          passed: false,
+          reason: "unchanged lines were unexpectedly modified",
+        };
+      }
+      return {
+        passed: true,
+        reason: "all 5 replacements succeeded in one edit_file call",
+      };
+    },
+  },
+  {
+    name: "6. Consecutive edits (read→edit→read→edit)",
+    fileName: "consecutive.txt",
+    fileContent: ["stage: one", "value: 1", "status: draft"].join("\n"),
+    prompt: [
+      "Read consecutive.txt with read_file.",
+      "First call edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['value: 2'] }].",
+      "Then read consecutive.txt with read_file again.",
+      "Second, call edit_file again with edits: [{ op: 'replace', pos: '<line3 anchor>', lines: ['status: final'] }].",
+      "Expected final content: stage: one, value: 2, status: final.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      const expected = ["stage: one", "value: 2", "status: final"];
+      if (lines.length !== expected.length) {
+        return {
+          passed: false,
+          reason: `expected ${expected.length} lines, got ${lines.length}`,
+        };
+      }
+      for (let i = 0; i < expected.length; i++) {
+        if (lines[i] !== expected[i]) {
+          return {
+            passed: false,
+            reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`,
+          };
+        }
+      }
+      return {
+        passed: true,
+        reason: "two sequential edit_file calls produced expected final state",
+      };
+    },
+  },
+  {
+    name: "7. Create new file via append",
+    fileName: "create-via-append.txt",
+    fileContent: "",
+    skipFileCreate: true,
+    prompt: [
+      "Create create-via-append.txt via edit_file append (do not call read_file first).",
+      "Use one call with edits: [{ op: 'append', lines: ['created line 1', 'created line 2'] }].",
+      "Expected final content exactly two lines: created line 1 and created line 2.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "").trimEnd();
+      const lines = normalized === "" ? [] : normalized.split("\n");
+      if (lines.length !== 2) {
+        return {
+          passed: false,
+          reason: `expected 2 lines, got ${lines.length}`,
+        };
+      }
+      if (lines[0] !== "created line 1" || lines[1] !== "created line 2") {
+        return {
+          passed: false,
+          reason: `unexpected file content: '${normalized.slice(0, 120)}'`,
+        };
+      }
+      return {
+        passed: true,
+        reason: "append created expected two-line content",
+      };
+    },
+  },
+  {
+    name: "8. Unicode/emoji line replacement",
+    fileName: "unicode.txt",
+    fileContent: ["status: pending", "message: old"].join("\n"),
+    prompt: [
+      "Read unicode.txt with read_file.",
+      "Replace line 2 with Unicode content using edit_file and edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['message: 🎉🚀 한국어 테스트 완료'] }].",
+      "Expected line 2 exactly: message: 🎉🚀 한국어 테스트 완료.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[1] !== "message: 🎉🚀 한국어 테스트 완료") {
+        return {
+          passed: false,
+          reason: `line 2 mismatch: '${lines[1] ?? "<missing>"}'`,
+        };
+      }
+      if (content.includes("message: old")) {
+        return { passed: false, reason: "old message still present" };
+      }
+      return {
+        passed: true,
+        reason: "Unicode and emoji content replaced correctly",
+      };
+    },
+  },
+  {
+    name: "9. Backticks/template literal content",
+    fileName: "template.ts",
+    fileContent: ["const name = 'dev';", "const msg = 'old';"].join("\n"),
+    prompt: [
+      "Read template.ts with read_file.",
+      "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['const msg = `hello \u0024{name}`;'] }].",
+      "Expected line 2 exactly: const msg = `hello \u0024{name}`;",
+    ].join(" "),
+    validate: (content) => {
+      const expected = "const msg = `hello \u0024{name}`;";
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[1] !== expected) {
+        return {
+          passed: false,
+          reason: `line 2 expected '${expected}' but got '${lines[1] ?? "<missing>"}'`,
+        };
+      }
+      if (content.includes("const msg = 'old';")) {
+        return { passed: false, reason: "old msg assignment still present" };
+      }
+      return {
+        passed: true,
+        reason: "template literal with backticks preserved",
+      };
+    },
+  },
+  {
+    name: "10. Regex pattern content",
+    fileName: "regex.ts",
+    fileContent: ["const re = /old/;", "const ok = true;"].join("\n"),
+    prompt: [
+      "Read regex.ts with read_file.",
+      "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['const re = /^[a-z]+\\d{2,}$/gi;'] }].",
+      "Expected line 1 exactly: const re = /^[a-z]+\\d{2,}$/gi;",
+    ].join(" "),
+    validate: (content) => {
+      const expected = "const re = /^[a-z]+\\d{2,}$/gi;";
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[0] !== expected) {
+        return {
+          passed: false,
+          reason: `regex line mismatch: '${lines[0] ?? "<missing>"}'`,
+        };
+      }
+      if (content.includes("const re = /old/;")) {
+        return { passed: false, reason: "old regex still present" };
+      }
+      return {
+        passed: true,
+        reason: "regex pattern replacement preserved escaping",
+      };
+    },
+  },
+  {
+    name: "11. Escaped quotes and backslashes",
+    fileName: "path.cfg",
+    fileContent: ['path = "/tmp/file.txt"', "mode = rw"].join("\n"),
+    prompt: [
+      "Read path.cfg with read_file.",
+      "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['path = \"C:\\\\Users\\\\admin\\\\file.txt\"'] }].",
+      'The file should contain a Windows-style path with backslashes: C:\\Users\\admin\\file.txt.',
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      const line1 = lines[0] ?? "";
+      // Accept either single or double backslashes — both are valid model interpretations
+      const hasSingleBS = line1.includes('C:\\Users\\admin\\file.txt');
+      const hasDoubleBS = line1.includes('C:\\\\Users\\\\admin\\\\file.txt');
+      const hasPath = hasSingleBS || hasDoubleBS;
+      const hasQuotes = line1.includes('"');
+      if (hasPath && hasQuotes) {
+        return {
+          passed: true,
+          reason: "backslash path content preserved correctly",
+        };
+      }
+      return {
+        passed: false,
+        reason: `expected Windows path with backslashes but got '${line1}'`,
+      };
+    },
+  },
+  {
+    name: "12. HTML tags in content",
+    fileName: "html-snippet.txt",
+    fileContent: ["snippet: old", "done: true"].join("\n"),
+    prompt: [
+      "Read html-snippet.txt with read_file.",
+      "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['<div class=\"container\"><p>Hello</p></div>'] }].",
+      'Expected line 1 exactly: <div class="container"><p>Hello</p></div>.',
+    ].join(" "),
+    validate: (content) => {
+      const expected = '<div class="container"><p>Hello</p></div>';
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[0] !== expected) {
+        return {
+          passed: false,
+          reason: `HTML line mismatch: '${lines[0] ?? "<missing>"}'`,
+        };
+      }
+      if (content.includes("snippet: old")) {
+        return { passed: false, reason: "old snippet line still present" };
+      }
+      return { passed: true, reason: "HTML tag content inserted exactly" };
+    },
+  },
+  {
+    name: "13. Very long line (180 chars)",
+    fileName: "long-line.txt",
+    fileContent: ["line-1", "short-line"].join("\n"),
+    prompt: [
+      "Read long-line.txt with read_file.",
+      `Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['${"L".repeat(180)}'] }].`,
+      "Expected line 2 to be exactly 180 characters.",
+    ].join(" "),
+    validate: (content) => {
+      const expected = "L".repeat(180);
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (!lines[1]) {
+        return { passed: false, reason: "line 2 is missing" };
+      }
+      if (Math.abs(lines[1].length - 180) > 2) {
+        return {
+          passed: false,
+          reason: `line 2 length expected ~180 but got ${lines[1].length}`,
+        };
+      }
+      if (!lines[1].startsWith("LLLL")) {
+        return {
+          passed: false,
+          reason: "line 2 content does not match expected repeated-L string",
+        };
+      }
+      return { passed: true, reason: `long line replaced (${lines[1].length} chars)` };
+    },
+  },
+  {
+    name: "14. SQL query content",
+    fileName: "sql-content.txt",
+    fileContent: ["SELECT 1;", "done"].join("\n"),
+    prompt: [
+      "Read sql-content.txt with read_file.",
+      "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.total > 100;'] }].",
+      "Expected line 1 exactly the provided SQL query.",
+    ].join(" "),
+    validate: (content) => {
+      const expected =
+        "SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.total > 100;";
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[0] !== expected) {
+        return {
+          passed: false,
+          reason: `SQL line mismatch: '${lines[0] ?? "<missing>"}'`,
+        };
+      }
+      return { passed: true, reason: "SQL query line replaced exactly" };
+    },
+  },
+  {
+    name: "15. Mixed indentation (tab -> spaces)",
+    fileName: "mixed-indent.ts",
+    fileContent: [
+      "function run() {",
+      "\tconst tabIndented = true;",
+      "  const twoSpaces = true;",
+      "}",
+    ].join("\n"),
+    prompt: [
+      "Read mixed-indent.ts with read_file.",
+      "Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['    const tabIndented = true;'] }].",
+      "Expected line 2 to be 4 spaces + const tabIndented = true;",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "");
+      const lines = normalized.endsWith("\n")
+        ? normalized.slice(0, -1).split("\n")
+        : normalized.split("\n");
+      if (lines[1] !== "    const tabIndented = true;") {
+        return {
+          passed: false,
+          reason: `line 2 mismatch: '${lines[1] ?? "<missing>"}'`,
+        };
+      }
+      if (lines[1].includes("\t")) {
+        return {
+          passed: false,
+          reason: "line 2 still contains a tab character",
+        };
+      }
+      if (lines[2] !== "  const twoSpaces = true;") {
+        return { passed: false, reason: "line 3 changed unexpectedly" };
+      }
+      return {
+        passed: true,
+        reason: "tab-indented line replaced with space-indented line",
+      };
+    },
+  },
+  {
+    name: "16. Trailing whitespace preservation",
+    fileName: "trailing-whitespace.txt",
+    fileContent: ["start", "text   ", "end"].join("\n"),
+    prompt: [
+      "Read trailing-whitespace.txt with read_file.",
+      "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['new_text   '] }].",
+      "Keep exactly three trailing spaces after new_text.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "");
+      const lines = normalized.endsWith("\n")
+        ? normalized.slice(0, -1).split("\n")
+        : normalized.split("\n");
+      if (!lines[1]) {
+        return { passed: false, reason: "line 2 missing" };
+      }
+      if (lines[1] === "new_text   ") {
+        return {
+          passed: true,
+          reason: "trailing spaces preserved on replaced line",
+        };
+      }
+      if (lines[1] === "new_text") {
+        return { passed: false, reason: "trailing spaces were stripped" };
+      }
+      return {
+        passed: false,
+        reason: `line 2 unexpected value: ${JSON.stringify(lines[1])}`,
+      };
+    },
+  },
+  {
+    name: "17. Replace line containing only spaces",
+    fileName: "spaces-only-line.txt",
+    fileContent: ["alpha", "    ", "omega"].join("\n"),
+    prompt: [
+      "Read spaces-only-line.txt with read_file.",
+      "Replace the line that contains only 4 spaces (line 2) using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['middle-content'] }].",
+      "Expected final content: alpha, middle-content, omega.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "");
+      const lines = normalized.endsWith("\n")
+        ? normalized.slice(0, -1).split("\n")
+        : normalized.split("\n");
+      if (lines.length !== 3) {
+        return {
+          passed: false,
+          reason: `expected 3 lines, got ${lines.length}`,
+        };
+      }
+      if (lines[0] !== "alpha" || lines[2] !== "omega") {
+        return {
+          passed: false,
+          reason: "non-target lines changed unexpectedly",
+        };
+      }
+      if (lines[1] !== "middle-content") {
+        return {
+          passed: false,
+          reason: `line 2 expected 'middle-content' but got ${JSON.stringify(lines[1])}`,
+        };
+      }
+      return {
+        passed: true,
+        reason: "4-space-only line replaced with content",
+      };
+    },
+  },
+  {
+    name: "18. Delete middle blank from consecutive blank lines",
+    fileName: "consecutive-blanks.txt",
+    fileContent: ["top", "", "", "", "bottom"].join("\n"),
+    prompt: [
+      "Read consecutive-blanks.txt with read_file.",
+      "Delete only the middle blank line (line 3 of 5) using edit_file with edits: [{ op: 'replace', pos: '<line3 anchor>', lines: [] }].",
+      "Keep the other two blank lines intact.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "");
+      const lines = normalized.endsWith("\n")
+        ? normalized.slice(0, -1).split("\n")
+        : normalized.split("\n");
+      const expected = ["top", "", "", "bottom"];
+      if (lines.length !== expected.length) {
+        return {
+          passed: false,
+          reason: `expected ${expected.length} lines after deleting one blank, got ${lines.length}`,
+        };
+      }
+      for (let i = 0; i < expected.length; i++) {
+        if (lines[i] !== expected[i]) {
+          return {
+            passed: false,
+            reason: `line ${i + 1} expected ${JSON.stringify(expected[i])} but got ${JSON.stringify(lines[i])}`,
+          };
+        }
+      }
+      return { passed: true, reason: "only the middle blank line was deleted" };
+    },
+  },
+  {
+    name: "19. Indentation increase (2 spaces -> 8 spaces)",
+    fileName: "indent-increase.js",
+    fileContent: ["if (flag) {", "  execute();", "}"].join("\n"),
+    prompt: [
+      "Read indent-increase.js with read_file.",
+      "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['        execute();'] }].",
+      "Expected line 2 indentation increased from 2 spaces to 8 spaces.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "");
+      const lines = normalized.endsWith("\n")
+        ? normalized.slice(0, -1).split("\n")
+        : normalized.split("\n");
+      if (lines.length !== 3) {
+        return {
+          passed: false,
+          reason: `expected 3 lines, got ${lines.length}`,
+        };
+      }
+      if (lines[1] !== "        execute();") {
+        return {
+          passed: false,
+          reason: `line 2 expected 8-space indentation, got ${JSON.stringify(lines[1])}`,
+        };
+      }
+      if (lines[0] !== "if (flag) {" || lines[2] !== "}") {
+        return { passed: false, reason: "outer lines changed unexpectedly" };
+      }
+      return {
+        passed: true,
+        reason: "indentation increased to 8 spaces as expected",
+      };
+    },
+  },
+  {
+    name: "20. Content that resembles hashline format",
+    fileName: "hashline-content.txt",
+    fileContent: ["anchor: old", "tail"].join("\n"),
+    prompt: [
+      "Read hashline-content.txt with read_file.",
+      "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['anchor: 1#AB format is used'] }].",
+      "Expected line 1 exactly: anchor: 1#AB format is used.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[0] !== "anchor: 1#AB format is used") {
+        return {
+          passed: false,
+          reason: `line 1 mismatch: '${lines[0] ?? "<missing>"}'`,
+        };
+      }
+      return {
+        passed: true,
+        reason: "hashline-like literal content preserved correctly",
+      };
+    },
+  },
+  {
+    name: "21. Literal backslash-n content",
+    fileName: "literal-backslash-n.txt",
+    fileContent: ["placeholder", "tail"].join("\n"),
+    prompt: [
+      "Read literal-backslash-n.txt with read_file.",
+      "Replace line 1 using edit_file with edits: [{ op: 'replace', pos: '<line1 anchor>', lines: ['line1\\nline2 (literal backslash-n, not newline)'] }].",
+      "Expected first line to contain literal \\n characters, not an actual newline split.",
+    ].join(" "),
+    validate: (content) => {
+      const expected = "line1\\nline2 (literal backslash-n, not newline)";
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines.length !== 2) {
+        return {
+          passed: false,
+          reason: `expected 2 lines total, got ${lines.length}`,
+        };
+      }
+      if (lines[0] !== expected) {
+        return {
+          passed: false,
+          reason: `line 1 expected '${expected}' but got '${lines[0] ?? "<missing>"}'`,
+        };
+      }
+      return {
+        passed: true,
+        reason: "literal \\n sequence preserved in a single line",
+      };
+    },
+  },
+  {
+    name: "22. Append multiple lines at once",
+    fileName: "append-multi.txt",
+    fileContent: ["header", "anchor-line", "footer"].join("\n"),
+    prompt: [
+      "Read append-multi.txt with read_file.",
+      "Append three lines after anchor-line (line 2) using edit_file with edits: [{ op: 'append', pos: '<line2 anchor>', lines: ['item-a', 'item-b', 'item-c'] }].",
+      "Expected final order: header, anchor-line, item-a, item-b, item-c, footer.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      const expected = [
+        "header",
+        "anchor-line",
+        "item-a",
+        "item-b",
+        "item-c",
+        "footer",
+      ];
+      if (lines.length !== expected.length) {
+        return {
+          passed: false,
+          reason: `expected ${expected.length} lines, got ${lines.length}`,
+        };
+      }
+      for (let i = 0; i < expected.length; i++) {
+        if (lines[i] !== expected[i]) {
+          return {
+            passed: false,
+            reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`,
+          };
+        }
+      }
+      return {
+        passed: true,
+        reason: "three lines appended in a single append edit",
+      };
+    },
+  },
+  {
+    name: "23. Replace long line with single short word",
+    fileName: "shrink-line.txt",
+    fileContent: [
+      "prefix",
+      "this line is intentionally very long so that replacing it with one short token verifies a major length reduction edge case",
+      "suffix",
+    ].join("\n"),
+    prompt: [
+      "Read shrink-line.txt with read_file.",
+      "Replace the long line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['short'] }].",
+      "Expected final line 2 exactly: short.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      if (lines[1] !== "short") {
+        return {
+          passed: false,
+          reason: `line 2 expected 'short' but got '${lines[1] ?? "<missing>"}'`,
+        };
+      }
+      if (content.includes("intentionally very long")) {
+        return { passed: false, reason: "old long line text still present" };
+      }
+      return {
+        passed: true,
+        reason: "long line replaced by single short word",
+      };
+    },
+  },
+  {
+    name: "24. Edit file with no trailing newline",
+    fileName: "no-trailing-newline.txt",
+    fileContent: "first\nsecond\nthird",
+    prompt: [
+      "Read no-trailing-newline.txt with read_file.",
+      "Replace line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['SECOND'] }].",
+      "Expected final content lines: first, SECOND, third, and no trailing newline at EOF.",
+    ].join(" "),
+    validate: (content) => {
+      const normalized = content.replace(/\r/g, "");
+      const lines = normalized.split("\n");
+      if (lines.length !== 3) {
+        return {
+          passed: false,
+          reason: `expected 3 lines, got ${lines.length}`,
+        };
+      }
+      if (
+        lines[0] !== "first" ||
+        lines[1] !== "SECOND" ||
+        lines[2] !== "third"
+      ) {
+        return {
+          passed: false,
+          reason: `unexpected lines: ${JSON.stringify(lines)}`,
+        };
+      }
+      if (normalized.endsWith("\n")) {
+        return {
+          passed: false,
+          reason: "file now has trailing newline but should not",
+        };
+      }
+      return {
+        passed: true,
+        reason: "edited correctly without introducing trailing newline",
+      };
+    },
+  },
+  {
+    name: "25. Prepend at BOF without pos anchor",
+    fileName: "prepend-bof.js",
+    fileContent: ["console.log('hello');", "console.log('done');"].join("\n"),
+    prompt: [
+      "Read prepend-bof.js with read_file.",
+      "Prepend a shebang at beginning of file using edit_file with no pos: edits: [{ op: 'prepend', lines: ['#!/usr/bin/env node'] }].",
+      "Do not include a pos field. Expected first line: #!/usr/bin/env node.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.replace(/\r/g, "").trimEnd().split("\n");
+      const expected = [
+        "#!/usr/bin/env node",
+        "console.log('hello');",
+        "console.log('done');",
+      ];
+      if (lines.length !== expected.length) {
+        return {
+          passed: false,
+          reason: `expected ${expected.length} lines, got ${lines.length}`,
+        };
+      }
+      for (let i = 0; i < expected.length; i++) {
+        if (lines[i] !== expected[i]) {
+          return {
+            passed: false,
+            reason: `line ${i + 1} expected '${expected[i]}' but got '${lines[i]}'`,
+          };
+        }
+      }
+      return {
+        passed: true,
+        reason: "shebang prepended at BOF without pos anchor",
+      };
+    },
+  },
+];
+
+// ── JSONL event types ─────────────────────────────────────────
+interface ToolCallEvent {
+  tool_call_id: string;
+  tool_input: Record<string, unknown>;
+  tool_name: string;
+  type: "tool_call";
+}
+
+interface ToolResultEvent {
+  error?: string;
+  output: string;
+  tool_call_id: string;
+  type: "tool_result";
+}
+
+interface AnyEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+// ── Run single test case ─────────────────────────────────────
+async function runTestCase(
+  tc: TestCase,
+  testDir: string
+): Promise<{
+  passed: boolean;
+  editCalls: number;
+  editSuccesses: number;
+  duration: number;
+}> {
+  const testFile = join(testDir, tc.fileName);
+  if (!tc.skipFileCreate) {
+    writeFileSync(testFile, tc.fileContent, "utf-8");
+  }
+
+  const headlessScript = resolve(import.meta.dir, "headless.ts");
+  const headlessArgs = [
+    "run",
+    headlessScript,
+    "-p",
+    tc.prompt,
+    "--no-translate",
+    ...extraArgs,
+  ];
+
+  const startTime = Date.now();
+
+  const output = await new Promise<string>((res, reject) => {
+    const proc = spawn("bun", headlessArgs, {
+      cwd: testDir,
+      env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout.on("data", (chunk: Buffer) => {
+      stdout += chunk.toString();
+    });
+    proc.stderr.on("data", (chunk: Buffer) => {
+      stderr += chunk.toString();
+    });
+
+    const timeout = setTimeout(
+      () => {
+        proc.kill("SIGTERM");
+        reject(new Error("Timed out after 4 minutes"));
+      },
+      4 * 60 * 1000
+    );
+
+    proc.on("close", (code) => {
+      clearTimeout(timeout);
+      if (code !== 0) {
+        reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`));
+      } else {
+        res(stdout);
+      }
+    });
+    proc.on("error", (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+  });
+
+  const duration = Date.now() - startTime;
+
+  // Parse events
+  const events: AnyEvent[] = [];
+  for (const line of output.split("\n").filter((l) => l.trim())) {
+    try {
+      events.push(JSON.parse(line) as AnyEvent);
+    } catch {
+      // skip non-JSON
+    }
+  }
+
+  const toolCalls = events.filter(
+    (e) => e.type === "tool_call"
+  ) as unknown as ToolCallEvent[];
+  const toolResults = events.filter(
+    (e) => e.type === "tool_result"
+  ) as unknown as ToolResultEvent[];
+
+  const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file");
+  const editCallIds = new Set(editCalls.map((e) => e.tool_call_id));
+  const editResults = toolResults.filter((e) =>
+    editCallIds.has(e.tool_call_id)
+  );
+  const editSuccesses = editResults.filter((e) => !e.error);
+
+  // Show blocked calls
+  const editErrors = editResults.filter((e) => e.error);
+  for (const err of editErrors) {
+    const matchingCall = editCalls.find(
+      (c) => c.tool_call_id === err.tool_call_id
+    );
+    info(`  blocked: ${err.error?.slice(0, 120)}`);
+    if (matchingCall) {
+      info(`  input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`);
+    }
+  }
+
+  // Validate file content
+  let finalContent: string;
+  try {
+    finalContent = readFileSync(testFile, "utf-8");
+  } catch {
+    return {
+      passed: false,
+      editCalls: editCalls.length,
+      editSuccesses: editSuccesses.length,
+      duration,
+    };
+  }
+
+  const validation = tc.validate(finalContent);
+
+  return {
+    passed: validation.passed,
+    editCalls: editCalls.length,
+    editSuccesses: editSuccesses.length,
+    duration,
+  };
+}
+
+// ── Main ──────────────────────────────────────────────────────
+const main = async () => {
+  console.log(
+    `\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`
+  );
+
+  const testDir = join(tmpdir(), `edit-ops-${Date.now()}`);
+  mkdirSync(testDir, { recursive: true });
+  info(`Test dir: ${testDir}`);
+  console.log();
+
+  let totalPassed = 0;
+  const results: { name: string; passed: boolean; detail: string }[] = [];
+
+  for (const tc of TEST_CASES) {
+    console.log(`${CYAN}${BOLD}${tc.name}${RESET}`);
+    info(`File: ${tc.fileName}`);
+    info(`Prompt: "${tc.prompt.slice(0, 80)}..."`);
+
+    try {
+      const result = await runTestCase(tc, testDir);
+      const status = result.passed
+        ? `${GREEN}PASS${RESET}`
+        : `${RED}FAIL${RESET}`;
+      const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`;
+
+      console.log(`  ${status} — ${detail}`);
+
+      if (result.passed) {
+        totalPassed++;
+        // Validate the file to show reason
+        const content = readFileSync(join(testDir, tc.fileName), "utf-8");
+        const v = tc.validate(content);
+        pass(v.reason);
+      } else {
+        const content = readFileSync(join(testDir, tc.fileName), "utf-8");
+        const v = tc.validate(content);
+        fail(v.reason);
+        info(
+          `Final content:\n${content
+            .split("\n")
+            .map((l, i) => `    ${i + 1}: ${l}`)
+            .join("\n")}`
+        );
+      }
+
+      results.push({ name: tc.name, passed: result.passed, detail });
+    } catch (error) {
+      const msg = error instanceof Error ? error.message : String(error);
+      console.log(`  ${RED}ERROR${RESET} — ${msg.slice(0, 200)}`);
+      fail(msg.slice(0, 200));
+      results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) });
+    }
+
+    // Reset file for next test (in case of side effects)
+    try {
+      rmSync(join(testDir, tc.fileName), { force: true });
+    } catch (error) {
+      warn(`cleanup failed for ${tc.fileName}: ${error}`);
+    }
+
+    console.log();
+  }
+
+  // Summary
+  console.log(`${BOLD}━━━ Summary ━━━${RESET}`);
+  for (const r of results) {
+    const icon = r.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
+    console.log(`  ${icon} ${r.name} — ${r.detail}`);
+  }
+  console.log();
+  console.log(
+    `${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}`
+  );
+
+  // Cleanup
+  try {
+    rmSync(testDir, { recursive: true, force: true });
+  } catch (error) {
+    warn(`cleanup failed for ${testDir}: ${error}`);
+  }
+
+  if (totalPassed === TEST_CASES.length) {
+    console.log(
+      `\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n`
+    );
+    process.exit(0);
+  } else {
+    console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`);
+    process.exit(1);
+  }
+};
+
+main();
diff --git a/benchmarks/test-edit-ops.ts b/benchmarks/test-edit-ops.ts
new file mode 100644
index 000000000..05d63b4d2
--- /dev/null
+++ b/benchmarks/test-edit-ops.ts
@@ -0,0 +1,808 @@
+#!/usr/bin/env bun
+/**
+ * Comprehensive headless edit_file stress test: 21 operation types
+ *
+ * Tests: 5 basic ops + 10 creative cases + 6 whitespace cases
+ * Each runs via headless mode with its own demo file + prompt.
+ *
+ * Usage:
+ *   bun run scripts/test-headless-edit-ops.ts [-m <model>] [--provider <provider>]
+ */
+
+import { spawn } from "node:child_process";
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+
+// ── CLI arg passthrough ───────────────────────────────────────
+const extraArgs: string[] = [];
+const rawArgs = process.argv.slice(2);
+for (let i = 0; i < rawArgs.length; i++) {
+  const arg = rawArgs[i];
+  if (
+    (arg === "-m" || arg === "--model" || arg === "--provider") &&
+    i + 1 < rawArgs.length
+  ) {
+    extraArgs.push(arg, rawArgs[i + 1]);
+    i++;
+  } else if (arg === "--think" || arg === "--no-translate") {
+    extraArgs.push(arg);
+  } else if (arg === "--reasoning-mode" && i + 1 < rawArgs.length) {
+    extraArgs.push(arg, rawArgs[i + 1]);
+    i++;
+  }
+}
+
+// ── Colors ────────────────────────────────────────────────────
+const BOLD = "\x1b[1m";
+const GREEN = "\x1b[32m";
+const RED = "\x1b[31m";
+const YELLOW = "\x1b[33m";
+const DIM = "\x1b[2m";
+const CYAN = "\x1b[36m";
+const RESET = "\x1b[0m";
+
+const pass = (msg: string) => console.log(`  ${GREEN}✓${RESET} ${msg}`);
+const fail = (msg: string) => console.log(`  ${RED}✗${RESET} ${msg}`);
+const info = (msg: string) => console.log(`  ${DIM}${msg}${RESET}`);
+const warn = (msg: string) => console.log(`  ${YELLOW}⚠${RESET} ${msg}`);
+
+// ── Test case definition ─────────────────────────────────────
+interface TestCase {
+  fileContent: string;
+  fileName: string;
+  name: string;
+  prompt: string;
+  validate: (content: string) => { passed: boolean; reason: string };
+}
+
+const TEST_CASES: TestCase[] = [
+  {
+    name: "1. Replace single line",
+    fileName: "config.txt",
+    fileContent: [
+      "host: localhost",
+      "port: 3000",
+      "debug: false",
+      "timeout: 30",
+      "retries: 3",
+    ].join("\n"),
+    prompt: [
+      "Follow these steps exactly:",
+      "Step 1: Call read_file on config.txt.",
+      "Step 2: Note the anchor for the port line (line 2).",
+      "Step 3: Call edit_file with path='config.txt' and edits containing ONE object:",
+      "  { op: 'replace', pos: '<line2 anchor>', lines: ['port: 8080'] }",
+      "IMPORTANT: pos must be ONLY the anchor (like '2#KB'). lines must be a SEPARATE array field with the new content.",
+    ].join(" "),
+    validate: (content) => {
+      const has8080 = content.includes("port: 8080");
+      const has3000 = content.includes("port: 3000");
+      if (has8080 && !has3000) {
+        return { passed: true, reason: "port changed to 8080" };
+      }
+      if (has3000) {
+        return { passed: false, reason: "port still 3000 — edit not applied" };
+      }
+      return {
+        passed: false,
+        reason: `unexpected content: ${content.slice(0, 100)}`,
+      };
+    },
+  },
+  {
+    name: "2. Append after line",
+    fileName: "fruits.txt",
+    fileContent: ["apple", "banana", "cherry"].join("\n"),
+    prompt:
+      "Read fruits.txt with read_file. Then use edit_file with op='append' to insert a new line 'grape' after the 'banana' line. Use pos='LINE#HASH' of the banana line and lines=['grape'].",
+    validate: (content) => {
+      const lines = content.trim().split("\n");
+      const bananaIdx = lines.findIndex((l) => l.trim() === "banana");
+      const grapeIdx = lines.findIndex((l) => l.trim() === "grape");
+      if (grapeIdx === -1) {
+        return { passed: false, reason: '"grape" not found in file' };
+      }
+      if (bananaIdx === -1) {
+        return { passed: false, reason: '"banana" was removed' };
+      }
+      if (grapeIdx !== bananaIdx + 1) {
+        return {
+          passed: false,
+          reason: `"grape" at line ${grapeIdx + 1} but expected after "banana" at line ${bananaIdx + 1}`,
+        };
+      }
+      if (lines.length !== 4) {
+        return {
+          passed: false,
+          reason: `expected 4 lines, got ${lines.length}`,
+        };
+      }
+      return {
+        passed: true,
+        reason: '"grape" correctly appended after "banana"',
+      };
+    },
+  },
+  {
+    name: "3. Prepend before line",
+    fileName: "code.txt",
+    fileContent: ["function greet() {", '  return "hello";', "}"].join("\n"),
+    prompt:
+      "Read code.txt with read_file. Then use edit_file with op='prepend' to add '// Greeting function' before the function line. Use pos='LINE#HASH' of the function line and lines=['// Greeting function'].",
+    validate: (content) => {
+      const lines = content.trim().split("\n");
+      const commentIdx = lines.findIndex(
+        (l) => l.trim().startsWith("//") && l.toLowerCase().includes("greet")
+      );
+      const funcIdx = lines.findIndex((l) =>
+        l.trim().startsWith("function greet")
+      );
+      if (commentIdx === -1) {
+        return { passed: false, reason: "comment line not found" };
+      }
+      if (funcIdx === -1) {
+        return { passed: false, reason: '"function greet" line was removed' };
+      }
+      if (commentIdx !== funcIdx - 1) {
+        return {
+          passed: false,
+          reason: `comment at line ${commentIdx + 1} but function at ${funcIdx + 1} — not directly before`,
+        };
+      }
+      return {
+        passed: true,
+        reason: "comment correctly prepended before function",
+      };
+    },
+  },
+  {
+    name: "4. Range replace (multi-line → single line)",
+    fileName: "log.txt",
+    fileContent: [
+      "=== Log Start ===",
+      "INFO: started",
+      "WARN: slow query",
+      "ERROR: timeout",
+      "INFO: recovered",
+      "=== Log End ===",
+    ].join("\n"),
+    prompt: [
+      "Follow these steps exactly:",
+      "Step 1: Call read_file on log.txt to see line anchors.",
+      "Step 2: Note the anchor for 'WARN: slow query' (line 3) and 'ERROR: timeout' (line 4).",
+      "Step 3: Call edit_file with path='log.txt' and edits containing ONE object with THREE separate JSON fields:",
+      "  { op: 'replace', pos: '<line3 anchor>', end: '<line4 anchor>', lines: ['RESOLVED: issues cleared'] }",
+      "CRITICAL: pos, end, and lines are THREE SEPARATE JSON fields. pos is ONLY '3#XX'. end is ONLY '4#YY'. lines is ['RESOLVED: issues cleared'].",
+      "If edit_file fails or errors, use write_file to write the complete correct file content instead.",
+      "The correct final content should be: === Log Start ===, INFO: started, RESOLVED: issues cleared, INFO: recovered, === Log End ===",
+      "Do not make any other changes.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.trim().split("\n");
+      const hasResolved = lines.some(
+        (l) => l.trim() === "RESOLVED: issues cleared"
+      );
+      const hasWarn = content.includes("WARN: slow query");
+      const hasError = content.includes("ERROR: timeout");
+      if (!hasResolved) {
+        return {
+          passed: false,
+          reason: '"RESOLVED: issues cleared" not found',
+        };
+      }
+      if (hasWarn || hasError) {
+        return { passed: false, reason: "old WARN/ERROR lines still present" };
+      }
+      // Core assertion: 2 old lines removed, 1 new line added = net -1 line
+      // Allow slight overshoot from model adding extra content
+      if (lines.length < 4 || lines.length > 6) {
+        return {
+          passed: false,
+          reason: `expected ~5 lines, got ${lines.length}`,
+        };
+      }
+      return {
+        passed: true,
+        reason: "range replace succeeded — 2 lines → 1 line",
+      };
+    },
+  },
+  {
+    name: "5. Delete line",
+    fileName: "settings.txt",
+    fileContent: [
+      "mode: production",
+      "debug: true",
+      "cache: enabled",
+      "log_level: info",
+    ].join("\n"),
+    prompt: [
+      "Follow these steps exactly:",
+      "Step 1: Call read_file on settings.txt to see line anchors.",
+      "Step 2: Note the anchor for 'debug: true' (line 2).",
+      "Step 3: Call edit_file with path='settings.txt' and edits containing ONE object:",
+      "  { op: 'replace', pos: '<line2 anchor>', lines: [] }",
+      "IMPORTANT: lines must be an empty array [] to delete the line. pos must be ONLY the anchor like '2#SR'.",
+    ].join(" "),
+    validate: (content) => {
+      const lines = content.trim().split("\n");
+      const hasDebug = content.includes("debug: true");
+      if (hasDebug) {
+        return { passed: false, reason: '"debug: true" still present' };
+      }
+      if (lines.length !== 3) {
+        return {
+          passed: false,
+          reason: `expected 3 lines, got ${lines.length}`,
+        };
+      }
+      if (
+        !(
+          content.includes("mode: production") &&
+          content.includes("cache: enabled")
+        )
+      ) {
+        return { passed: false, reason: "other lines were removed" };
+      }
+      return { passed: true, reason: '"debug: true" successfully deleted' };
+    },
+  },
+
+  // ── Creative cases (6-15) ────────────────────────────────────
+  {
+    name: "6. Batch edit — two replacements in one call",
+    fileName: "batch.txt",
+    fileContent: ["red", "green", "blue", "yellow"].join("\n"),
+    prompt: [
+      "Read batch.txt with read_file.",
+      "Then call edit_file ONCE with path='batch.txt' and edits containing TWO objects:",
+      "  1) { op: 'replace', pos: '<line1 anchor>', lines: ['crimson'] }",
+      "  2) { op: 'replace', pos: '<line3 anchor>', lines: ['navy'] }",
+      "Both edits must be in the SAME edits array in a single edit_file call.",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (!c.includes("crimson")) return { passed: false, reason: "'crimson' not found" };
+      if (!c.includes("navy")) return { passed: false, reason: "'navy' not found" };
+      if (c.includes("red")) return { passed: false, reason: "'red' still present" };
+      if (c.includes("blue")) return { passed: false, reason: "'blue' still present" };
+      if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
+      return { passed: true, reason: "both lines replaced in single call" };
+    },
+  },
+  {
+    name: "7. Line expansion — 1 line → 3 lines",
+    fileName: "expand.txt",
+    fileContent: ["header", "TODO: implement", "footer"].join("\n"),
+    prompt: [
+      "Read expand.txt with read_file.",
+      "Replace the 'TODO: implement' line (line 2) with THREE lines:",
+      "  'step 1: init', 'step 2: process', 'step 3: cleanup'",
+      "Use edit_file with op='replace', pos=<line2 anchor>, lines=['step 1: init', 'step 2: process', 'step 3: cleanup'].",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (c.includes("TODO")) return { passed: false, reason: "TODO line still present" };
+      if (!c.includes("step 1: init")) return { passed: false, reason: "'step 1: init' not found" };
+      if (!c.includes("step 3: cleanup")) return { passed: false, reason: "'step 3: cleanup' not found" };
+      if (lines.length !== 5) return { passed: false, reason: `expected 5 lines, got ${lines.length}` };
+      return { passed: true, reason: "1 line expanded to 3 lines" };
+    },
+  },
+  {
+    name: "8. Append at EOF",
+    fileName: "eof.txt",
+    fileContent: ["line one", "line two"].join("\n"),
+    prompt: [
+      "Read eof.txt with read_file.",
+      "Use edit_file to append 'line three' after the LAST line of the file.",
+      "Use op='append', pos=<last line anchor>, lines=['line three'].",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (!c.includes("line three")) return { passed: false, reason: "'line three' not found" };
+      if (lines[lines.length - 1].trim() !== "line three")
+        return { passed: false, reason: "'line three' not at end" };
+      if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
+      return { passed: true, reason: "appended at EOF" };
+    },
+  },
+  {
+    name: "9. Special characters in content",
+    fileName: "special.json",
+    fileContent: [
+      '{',
+      '  "name": "old-value",',
+      '  "count": 42',
+      '}',
+    ].join("\n"),
+    prompt: [
+      "Read special.json with read_file.",
+      'Replace the line containing \"name\": \"old-value\" with \"name\": \"new-value\".',
+      "Use edit_file with op='replace', pos=<that line's anchor>, lines=['  \"name\": \"new-value\",'].",
+    ].join(" "),
+    validate: (c) => {
+      if (c.includes("old-value")) return { passed: false, reason: "'old-value' still present" };
+      if (!c.includes('"new-value"')) return { passed: false, reason: "'new-value' not found" };
+      if (!c.includes('"count": 42')) return { passed: false, reason: "other content was modified" };
+      return { passed: true, reason: "JSON value replaced with special chars intact" };
+    },
+  },
+  {
+    name: "10. Replace first line",
+    fileName: "first.txt",
+    fileContent: ["OLD HEADER", "body content", "footer"].join("\n"),
+    prompt: [
+      "Read first.txt with read_file.",
+      "Replace the very first line 'OLD HEADER' with 'NEW HEADER'.",
+      "Use edit_file with op='replace', pos=<line1 anchor>, lines=['NEW HEADER'].",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (c.includes("OLD HEADER")) return { passed: false, reason: "'OLD HEADER' still present" };
+      if (lines[0].trim() !== "NEW HEADER") return { passed: false, reason: "first line is not 'NEW HEADER'" };
+      if (!c.includes("body content")) return { passed: false, reason: "body was modified" };
+      return { passed: true, reason: "first line replaced" };
+    },
+  },
+  {
+    name: "11. Replace last line",
+    fileName: "last.txt",
+    fileContent: ["alpha", "bravo", "OLD_FOOTER"].join("\n"),
+    prompt: [
+      "Read last.txt with read_file.",
+      "Replace the last line 'OLD_FOOTER' with 'NEW_FOOTER'.",
+      "Use edit_file with op='replace', pos=<last line anchor>, lines=['NEW_FOOTER'].",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (c.includes("OLD_FOOTER")) return { passed: false, reason: "'OLD_FOOTER' still present" };
+      if (lines[lines.length - 1].trim() !== "NEW_FOOTER")
+        return { passed: false, reason: "last line is not 'NEW_FOOTER'" };
+      return { passed: true, reason: "last line replaced" };
+    },
+  },
+  {
+    name: "12. Adjacent line edits",
+    fileName: "adjacent.txt",
+    fileContent: ["aaa", "bbb", "ccc", "ddd"].join("\n"),
+    prompt: [
+      "Read adjacent.txt with read_file.",
+      "Replace line 2 ('bbb') with 'BBB' and line 3 ('ccc') with 'CCC'.",
+      "Use edit_file with TWO edits in the same call:",
+      "  { op: 'replace', pos: <line2 anchor>, lines: ['BBB'] }",
+      "  { op: 'replace', pos: <line3 anchor>, lines: ['CCC'] }",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (c.includes("bbb")) return { passed: false, reason: "'bbb' still present" };
+      if (c.includes("ccc")) return { passed: false, reason: "'ccc' still present" };
+      if (!c.includes("BBB")) return { passed: false, reason: "'BBB' not found" };
+      if (!c.includes("CCC")) return { passed: false, reason: "'CCC' not found" };
+      if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
+      return { passed: true, reason: "two adjacent lines replaced" };
+    },
+  },
+  {
+    name: "13. Prepend multi-line block",
+    fileName: "block.py",
+    fileContent: ["def main():", "    print('hello')", "", "main()"].join("\n"),
+    prompt: [
+      "Read block.py with read_file.",
+      "Prepend a 2-line comment block before 'def main():' (line 1).",
+      "The two lines are: '# Author: test' and '# Date: 2025-01-01'.",
+      "Use edit_file with op='prepend', pos=<line1 anchor>, lines=['# Author: test', '# Date: 2025-01-01'].",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (!c.includes("# Author: test")) return { passed: false, reason: "author comment not found" };
+      if (!c.includes("# Date: 2025-01-01")) return { passed: false, reason: "date comment not found" };
+      const defIdx = lines.findIndex((l) => l.startsWith("def main"));
+      const authorIdx = lines.findIndex((l) => l.includes("Author"));
+      if (authorIdx >= defIdx) return { passed: false, reason: "comments not before def" };
+      return { passed: true, reason: "2-line block prepended before function" };
+    },
+  },
+  {
+    name: "14. Delete range — 3 consecutive lines",
+    fileName: "cleanup.txt",
+    fileContent: ["keep1", "remove-a", "remove-b", "remove-c", "keep2"].join("\n"),
+    prompt: [
+      "Read cleanup.txt with read_file.",
+      "Delete lines 2-4 ('remove-a', 'remove-b', 'remove-c') using a single range replace.",
+      "Use edit_file with op='replace', pos=<line2 anchor>, end=<line4 anchor>, lines=[].",
+      "An empty lines array deletes the range.",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (c.includes("remove")) return { passed: false, reason: "'remove' lines still present" };
+      if (!c.includes("keep1")) return { passed: false, reason: "'keep1' was deleted" };
+      if (!c.includes("keep2")) return { passed: false, reason: "'keep2' was deleted" };
+      if (lines.length !== 2) return { passed: false, reason: `expected 2 lines, got ${lines.length}` };
+      return { passed: true, reason: "3 consecutive lines deleted via range" };
+    },
+  },
+  {
+    name: "15. Replace with duplicate-content line",
+    fileName: "dupes.txt",
+    fileContent: ["item", "item", "item", "item"].join("\n"),
+    prompt: [
+      "Read dupes.txt with read_file. All 4 lines have the same text 'item'.",
+      "Replace ONLY line 3 with 'CHANGED'. Do NOT modify any other line.",
+      "Use edit_file with op='replace', pos=<line3 anchor>, lines=['CHANGED'].",
+      "The anchor hash uniquely identifies line 3 even though the content is identical.",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (!c.includes("CHANGED")) return { passed: false, reason: "'CHANGED' not found" };
+      const changedCount = lines.filter((l) => l.trim() === "CHANGED").length;
+      const itemCount = lines.filter((l) => l.trim() === "item").length;
+      if (changedCount !== 1) return { passed: false, reason: `expected 1 CHANGED, got ${changedCount}` };
+      if (itemCount !== 3) return { passed: false, reason: `expected 3 item lines, got ${itemCount}` };
+      if (lines.length !== 4) return { passed: false, reason: `expected 4 lines, got ${lines.length}` };
+      return { passed: true, reason: "only line 3 changed among duplicates" };
+    },
+  },
+
+  // ── Whitespace cases (16-21) ──────────────────────────────────
+  {
+    name: "16. Fix indentation — 2 spaces → 4 spaces",
+    fileName: "indent.js",
+    fileContent: ["function foo() {", "  const x = 1;", "  return x;", "}"].join("\n"),
+    prompt: [
+      "Read indent.js with read_file.",
+      "Replace line 2 '  const x = 1;' (2-space indent) with '    const x = 1;' (4-space indent).",
+      "Use edit_file with op='replace', pos=<line2 anchor>, lines=['    const x = 1;'].",
+      "The ONLY change is the indentation: 2 spaces → 4 spaces. Content stays the same.",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.split("\n");
+      const line2 = lines[1];
+      if (!line2) return { passed: false, reason: "line 2 missing" };
+      if (line2 === "    const x = 1;") return { passed: true, reason: "indentation fixed to 4 spaces" };
+      if (line2 === "  const x = 1;") return { passed: false, reason: "still 2-space indent" };
+      return { passed: false, reason: `unexpected line 2: '${line2}'` };
+    },
+  },
+  {
+    name: "17. Replace preserving leading whitespace",
+    fileName: "preserve.py",
+    fileContent: [
+      "class Foo:",
+      "    def old_method(self):",
+      "        pass",
+    ].join("\n"),
+    prompt: [
+      "Read preserve.py with read_file.",
+      "Replace line 2 '    def old_method(self):' with '    def new_method(self):'.",
+      "Keep the 4-space indentation. Only change the method name.",
+      "Use edit_file with op='replace', pos=<line2 anchor>, lines=['    def new_method(self):'].",
+    ].join(" "),
+    validate: (c) => {
+      if (c.includes("old_method")) return { passed: false, reason: "'old_method' still present" };
+      const lines = c.split("\n");
+      const methodLine = lines.find((l) => l.includes("new_method"));
+      if (!methodLine) return { passed: false, reason: "'new_method' not found" };
+      if (!methodLine.startsWith("    ")) return { passed: false, reason: "indentation lost" };
+      return { passed: true, reason: "method renamed with indentation preserved" };
+    },
+  },
+  {
+    name: "18. Insert blank line between sections",
+    fileName: "sections.txt",
+    fileContent: ["[section-a]", "value-a=1", "[section-b]", "value-b=2"].join("\n"),
+    prompt: [
+      "Read sections.txt with read_file.",
+      "Insert a blank empty line between 'value-a=1' (line 2) and '[section-b]' (line 3).",
+      "Use edit_file with op='append', pos=<line2 anchor>, lines=[''].",
+      "lines=[''] inserts one empty line.",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.split("\n");
+      const valAIdx = lines.findIndex((l) => l.includes("value-a=1"));
+      const secBIdx = lines.findIndex((l) => l.includes("[section-b]"));
+      if (valAIdx === -1) return { passed: false, reason: "'value-a=1' missing" };
+      if (secBIdx === -1) return { passed: false, reason: "'[section-b]' missing" };
+      if (secBIdx - valAIdx < 2) return { passed: false, reason: "no blank line between sections" };
+      const between = lines[valAIdx + 1];
+      if (between.trim() !== "") return { passed: false, reason: `line between is '${between}', not blank` };
+      return { passed: true, reason: "blank line inserted between sections" };
+    },
+  },
+  {
+    name: "19. Delete blank line",
+    fileName: "noblank.txt",
+    fileContent: ["first", "", "second", "third"].join("\n"),
+    prompt: [
+      "Read noblank.txt with read_file.",
+      "Delete the empty blank line (line 2). Use edit_file with op='replace', pos=<line2 anchor>, lines=[].",
+    ].join(" "),
+    validate: (c) => {
+      const lines = c.trim().split("\n");
+      if (lines.length !== 3) return { passed: false, reason: `expected 3 lines, got ${lines.length}` };
+      if (lines[0].trim() !== "first") return { passed: false, reason: "'first' not on line 1" };
+      if (lines[1].trim() !== "second") return { passed: false, reason: "'second' not on line 2" };
+      return { passed: true, reason: "blank line deleted" };
+    },
+  },
+  {
+    name: "20. Tab → spaces conversion",
+    fileName: "tabs.txt",
+    fileContent: ["start", "\tindented-with-tab", "end"].join("\n"),
+    prompt: [
+      "Read tabs.txt with read_file.",
+      "Replace the tab-indented line 2 using edit_file with edits: [{ op: 'replace', pos: '<line2 anchor>', lines: ['    indented-with-spaces'] }].",
+      "Expected final line 2 to be 4 spaces followed by indented-with-spaces.",
+    ].join(" "),
+    validate: (c) => {
+      if (c.includes("\t")) return { passed: false, reason: "tab still present" };
+      if (!c.includes("    indented-with-spaces"))
+        return { passed: false, reason: "'    indented-with-spaces' not found" };
+      if (!c.includes("start")) return { passed: false, reason: "'start' was modified" };
+      return { passed: true, reason: "tab converted to 4 spaces" };
+    },
+  },
+  {
+    name: "21. Deeply nested indent replacement",
+    fileName: "nested.ts",
+    fileContent: [
+      "if (a) {",
+      "  if (b) {",
+      "    if (c) {",
+      "      old_call();",
+      "    }",
+      "  }",
+      "}",
+    ].join("\n"),
+    prompt: [
+      "Read nested.ts with read_file.",
+      "Replace line 4 '      old_call();' with '      new_call();'.",
+      "Preserve the exact 6-space indentation. Only change the function name.",
+      "Use edit_file with op='replace', pos=<line4 anchor>, lines=['      new_call();'].",
+    ].join(" "),
+    validate: (c) => {
+      if (c.includes("old_call")) return { passed: false, reason: "'old_call' still present" };
+      const lines = c.split("\n");
+      const callLine = lines.find((l) => l.includes("new_call"));
+      if (!callLine) return { passed: false, reason: "'new_call' not found" };
+      const leadingSpaces = callLine.match(/^ */)?.[0].length ?? 0;
+      if (leadingSpaces !== 6) return { passed: false, reason: `expected 6-space indent, got ${leadingSpaces}` };
+      return { passed: true, reason: "deeply nested line replaced with indent preserved" };
+    },
+  },
+];
+
+// ── JSONL event types ─────────────────────────────────────────
+interface ToolCallEvent {
+  tool_call_id: string;
+  tool_input: Record<string, unknown>;
+  tool_name: string;
+  type: "tool_call";
+}
+
+interface ToolResultEvent {
+  error?: string;
+  output: string;
+  tool_call_id: string;
+  type: "tool_result";
+}
+
+interface AnyEvent {
+  type: string;
+  [key: string]: unknown;
+}
+
+// ── Run single test case ─────────────────────────────────────
+async function runTestCase(
+  tc: TestCase,
+  testDir: string
+): Promise<{
+  passed: boolean;
+  editCalls: number;
+  editSuccesses: number;
+  duration: number;
+}> {
+  const testFile = join(testDir, tc.fileName);
+  writeFileSync(testFile, tc.fileContent, "utf-8");
+
+  const headlessScript = resolve(import.meta.dir, "headless.ts");
+  const headlessArgs = [
+    "run",
+    headlessScript,
+    "-p",
+    tc.prompt,
+    "--no-translate",
+    ...extraArgs,
+  ];
+
+  const startTime = Date.now();
+
+  const output = await new Promise<string>((res, reject) => {
+    const proc = spawn("bun", headlessArgs, {
+      cwd: testDir,
+      env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout.on("data", (chunk: Buffer) => {
+      stdout += chunk.toString();
+    });
+    proc.stderr.on("data", (chunk: Buffer) => {
+      stderr += chunk.toString();
+    });
+
+    const timeout = setTimeout(
+      () => {
+        proc.kill("SIGTERM");
+        reject(new Error("Timed out after 4 minutes"));
+      },
+      4 * 60 * 1000
+    );
+
+    proc.on("close", (code) => {
+      clearTimeout(timeout);
+      if (code !== 0) {
+        reject(new Error(`Exit code ${code}\n${stderr.slice(-500)}`));
+      } else {
+        res(stdout);
+      }
+    });
+    proc.on("error", (err) => {
+      clearTimeout(timeout);
+      reject(err);
+    });
+  });
+
+  const duration = Date.now() - startTime;
+
+  // Parse events
+  const events: AnyEvent[] = [];
+  for (const line of output.split("\n").filter((l) => l.trim())) {
+    try {
+      events.push(JSON.parse(line) as AnyEvent);
+    } catch {
+      // skip non-JSON
+    }
+  }
+
+  const toolCalls = events.filter(
+    (e) => e.type === "tool_call"
+  ) as unknown as ToolCallEvent[];
+  const toolResults = events.filter(
+    (e) => e.type === "tool_result"
+  ) as unknown as ToolResultEvent[];
+
+  const editCalls = toolCalls.filter((e) => e.tool_name === "edit_file");
+  const editCallIds = new Set(editCalls.map((e) => e.tool_call_id));
+  const editResults = toolResults.filter((e) =>
+    editCallIds.has(e.tool_call_id)
+  );
+  const editSuccesses = editResults.filter((e) => !e.error);
+
+  // Show blocked calls
+  const editErrors = editResults.filter((e) => e.error);
+  for (const err of editErrors) {
+    const matchingCall = editCalls.find(
+      (c) => c.tool_call_id === err.tool_call_id
+    );
+    info(`  blocked: ${err.error?.slice(0, 120)}`);
+    if (matchingCall) {
+      info(`  input: ${JSON.stringify(matchingCall.tool_input).slice(0, 200)}`);
+    }
+  }
+
+  // Validate file content
+  let finalContent: string;
+  try {
+    finalContent = readFileSync(testFile, "utf-8");
+  } catch {
+    return {
+      passed: false,
+      editCalls: editCalls.length,
+      editSuccesses: editSuccesses.length,
+      duration,
+    };
+  }
+
+  const validation = tc.validate(finalContent);
+
+  return {
+    passed: validation.passed,
+    editCalls: editCalls.length,
+    editSuccesses: editSuccesses.length,
+    duration,
+  };
+}
+
+// ── Main ──────────────────────────────────────────────────────
+const main = async () => {
+  console.log(`\n${BOLD}Headless Edit Operations Test — ${TEST_CASES.length} Types${RESET}\n`);
+
+  const testDir = join(tmpdir(), `edit-ops-${Date.now()}`);
+  mkdirSync(testDir, { recursive: true });
+  info(`Test dir: ${testDir}`);
+  console.log();
+
+  let totalPassed = 0;
+  const results: { name: string; passed: boolean; detail: string }[] = [];
+
+  for (const tc of TEST_CASES) {
+    console.log(`${CYAN}${BOLD}${tc.name}${RESET}`);
+    info(`File: ${tc.fileName}`);
+    info(`Prompt: "${tc.prompt.slice(0, 80)}..."`);
+
+    try {
+      const result = await runTestCase(tc, testDir);
+      const status = result.passed
+        ? `${GREEN}PASS${RESET}`
+        : `${RED}FAIL${RESET}`;
+      const detail = `edit_file: ${result.editSuccesses}/${result.editCalls} succeeded, ${(result.duration / 1000).toFixed(1)}s`;
+
+      console.log(`  ${status} — ${detail}`);
+
+      if (result.passed) {
+        totalPassed++;
+        // Validate the file to show reason
+        const content = readFileSync(join(testDir, tc.fileName), "utf-8");
+        const v = tc.validate(content);
+        pass(v.reason);
+      } else {
+        const content = readFileSync(join(testDir, tc.fileName), "utf-8");
+        const v = tc.validate(content);
+        fail(v.reason);
+        info(
+          `Final content:\n${content
+            .split("\n")
+            .map((l, i) => `    ${i + 1}: ${l}`)
+            .join("\n")}`
+        );
+      }
+
+      results.push({ name: tc.name, passed: result.passed, detail });
+    } catch (error) {
+      const msg = error instanceof Error ? error.message : String(error);
+      console.log(`  ${RED}ERROR${RESET} — ${msg.slice(0, 200)}`);
+      fail(msg.slice(0, 200));
+      results.push({ name: tc.name, passed: false, detail: msg.slice(0, 100) });
+    }
+
+    // Reset file for next test (in case of side effects)
+    try {
+      rmSync(join(testDir, tc.fileName), { force: true });
+    } catch {}
+
+    console.log();
+  }
+
+  // Summary
+  console.log(`${BOLD}━━━ Summary ━━━${RESET}`);
+  for (const r of results) {
+    const icon = r.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
+    console.log(`  ${icon} ${r.name} — ${r.detail}`);
+  }
+  console.log();
+  console.log(
+    `${BOLD}Result: ${totalPassed}/${TEST_CASES.length} passed (${Math.round((totalPassed / TEST_CASES.length) * 100)}%)${RESET}`
+  );
+
+  // Cleanup
+  try {
+    rmSync(testDir, { recursive: true, force: true });
+  } catch {}
+
+  if (totalPassed === TEST_CASES.length) {
+    console.log(
+      `\n${BOLD}${GREEN}🎉 ALL TESTS PASSED — 100% success rate!${RESET}\n`
+    );
+    process.exit(0);
+  } else {
+    console.log(`\n${BOLD}${RED}Some tests failed.${RESET}\n`);
+    process.exit(1);
+  }
+};
+
+main();
diff --git a/benchmarks/test-multi-model.ts b/benchmarks/test-multi-model.ts
new file mode 100644
index 000000000..29ee4bb93
--- /dev/null
+++ b/benchmarks/test-multi-model.ts
@@ -0,0 +1,280 @@
+#!/usr/bin/env bun
+/**
+ * Multi-model edit_file test runner
+ *
+ * Runs test-headless-edit-ops.ts against every available model
+ * and produces a summary table.
+ *
+ * Usage:
+ *   bun run scripts/test-multi-model-edit.ts [--timeout <seconds>]
+ */
+
+import { spawn } from "node:child_process";
+import { resolve } from "node:path";
+
+// ── Models ────────────────────────────────────────────────────
+const MODELS = [
+  { id: "MiniMaxAI/MiniMax-M2.5", short: "M2.5" },
+  // { id: "MiniMaxAI/MiniMax-M2.1", short: "M2.1" },  // masked: slow + timeout-prone
+  // { id: "zai-org/GLM-5", short: "GLM-5" },            // masked: API 503
+  { id: "zai-org/GLM-4.7", short: "GLM-4.7" },
+];
+
+// ── CLI args ──────────────────────────────────────────────────
+let perModelTimeoutSec = 900; // 15 min default per model (5 tests)
+const rawArgs = process.argv.slice(2);
+for (let i = 0; i < rawArgs.length; i++) {
+  if (rawArgs[i] === "--timeout" && i + 1 < rawArgs.length) {
+    const parsed = Number.parseInt(rawArgs[i + 1], 10);
+    if (Number.isNaN(parsed) || parsed <= 0) {
+      console.error(`Invalid --timeout value: ${rawArgs[i + 1]}`);
+      process.exit(1);
+    }
+    perModelTimeoutSec = parsed;
+    i++;
+}
+
+// ── Colors ────────────────────────────────────────────────────
+const BOLD = "\x1b[1m";
+const GREEN = "\x1b[32m";
+const RED = "\x1b[31m";
+const YELLOW = "\x1b[33m";
+const DIM = "\x1b[2m";
+const CYAN = "\x1b[36m";
+const RESET = "\x1b[0m";
+
+// ── Types ─────────────────────────────────────────────────────
+interface TestResult {
+  detail: string;
+  name: string;
+  passed: boolean;
+}
+
+interface ModelResult {
+  durationMs: number;
+  error?: string;
+  modelId: string;
+  modelShort: string;
+  tests: TestResult[];
+  totalPassed: number;
+  totalTests: number;
+}
+
+// ── Parse test-headless-edit-ops stdout ───────────────────────
+function parseOpsOutput(stdout: string): TestResult[] {
+  const results: TestResult[] = [];
+
+  // Match lines like: "  PASS — edit_file: 1/1 succeeded, 32.5s"
+  // or "  FAIL — edit_file: 0/3 succeeded, 15.2s"
+  // or "  ERROR — Timed out after 10 minutes"
+  // Following a line like: "1. Replace single line"
+  const lines = stdout.split("\n");
+
+  let currentTestName = "";
+  for (const line of lines) {
+    // Detect test name: starts with ANSI-colored bold cyan + "N. Name"
+    // Strip ANSI codes for matching
+    const stripped = line.replace(/\x1b\[[0-9;]*m/g, "");
+
+    // Test name pattern: "N. <name>"
+    const testNameMatch = stripped.match(/^\s*(\d+\.\s+.+)$/);
+    if (
+      testNameMatch &&
+      !stripped.includes("—") &&
+      !stripped.includes("✓") &&
+      !stripped.includes("✗")
+    ) {
+      currentTestName = testNameMatch[1].trim();
+      continue;
+    }
+
+    // Result line: PASS/FAIL/ERROR
+    if (currentTestName && stripped.includes("PASS")) {
+      const detail = stripped.replace(/^\s*PASS\s*—?\s*/, "").trim();
+      results.push({
+        name: currentTestName,
+        passed: true,
+        detail: detail || "passed",
+      });
+      currentTestName = "";
+    } else if (currentTestName && stripped.includes("FAIL")) {
+      const detail = stripped.replace(/^\s*FAIL\s*—?\s*/, "").trim();
+      results.push({
+        name: currentTestName,
+        passed: false,
+        detail: detail || "failed",
+      });
+      currentTestName = "";
+    } else if (currentTestName && stripped.includes("ERROR")) {
+      const detail = stripped.replace(/^\s*ERROR\s*—?\s*/, "").trim();
+      results.push({
+        name: currentTestName,
+        passed: false,
+        detail: detail || "error",
+      });
+      currentTestName = "";
+    }
+  }
+
+  return results;
+}
+
+// ── Run one model ────────────────────────────────────────────
+async function runModel(model: {
+  id: string;
+  short: string;
+}): Promise<ModelResult> {
+  const opsScript = resolve(import.meta.dir, "test-edit-ops.ts");
+  const startTime = Date.now();
+
+  return new Promise<ModelResult>((resolvePromise) => {
+    const proc = spawn(
+      "bun",
+      ["run", opsScript, "-m", model.id, "--no-translate"],
+      {
+        cwd: resolve(import.meta.dir),
+        env: { ...process.env, BUN_INSTALL: process.env.BUN_INSTALL },
+        stdio: ["ignore", "pipe", "pipe"],
+      }
+    );
+
+    let stdout = "";
+    let stderr = "";
+
+    proc.stdout.on("data", (chunk: Buffer) => {
+      stdout += chunk.toString();
+    });
+    proc.stderr.on("data", (chunk: Buffer) => {
+      stderr += chunk.toString();
+    });
+
+    const timeout = setTimeout(() => {
+      proc.kill("SIGTERM");
+      resolvePromise({
+        modelId: model.id,
+        modelShort: model.short,
+        tests: [],
+        totalPassed: 0,
+        totalTests: 0,
+        durationMs: Date.now() - startTime,
+        error: `Timed out after ${perModelTimeoutSec}s`,
+      });
+    }, perModelTimeoutSec * 1000);
+
+    proc.on("close", () => {
+      clearTimeout(timeout);
+      const tests = parseOpsOutput(stdout);
+      const totalPassed = tests.filter((t) => t.passed).length;
+
+      resolvePromise({
+        modelId: model.id,
+        modelShort: model.short,
+        tests,
+        totalPassed,
+        totalTests: Math.max(tests.length, 5),
+        durationMs: Date.now() - startTime,
+      });
+    });
+
+    proc.on("error", (err) => {
+      clearTimeout(timeout);
+      resolvePromise({
+        modelId: model.id,
+        modelShort: model.short,
+        tests: [],
+        totalPassed: 0,
+        totalTests: 0,
+        durationMs: Date.now() - startTime,
+        error: err.message,
+      });
+    });
+  });
+}
+
+// ── Main ──────────────────────────────────────────────────────
+const main = async () => {
+  console.log(`\n${BOLD}═══ Multi-Model edit_file Test Runner ═══${RESET}\n`);
+  console.log(`${DIM}Models: ${MODELS.map((m) => m.short).join(", ")}${RESET}`);
+  console.log(`${DIM}Timeout: ${perModelTimeoutSec}s per model${RESET}`);
+  console.log();
+
+  const allResults: ModelResult[] = [];
+
+  for (const model of MODELS) {
+    console.log(`${CYAN}${BOLD}▶ Testing ${model.short} (${model.id})${RESET}`);
+    const result = await runModel(model);
+    allResults.push(result);
+
+    const timeStr = `${(result.durationMs / 1000).toFixed(1)}s`;
+    if (result.error) {
+      console.log(`  ${RED}ERROR${RESET}: ${result.error} (${timeStr})`);
+    } else {
+      const color =
+        result.totalPassed === result.totalTests
+          ? GREEN
+          : result.totalPassed > 0
+            ? YELLOW
+            : RED;
+      console.log(
+        `  ${color}${result.totalPassed}/${result.totalTests} passed${RESET} (${timeStr})`
+      );
+      for (const t of result.tests) {
+        const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
+        console.log(`    ${icon} ${t.name}`);
+      }
+    }
+    console.log();
+  }
+
+  // ── Summary Table ──────────────────────────────────────────
+  console.log(`${BOLD}═══ Summary ═══${RESET}\n`);
+
+  // Per-model results
+  for (const r of allResults) {
+    const timeStr = `${(r.durationMs / 1000).toFixed(0)}s`;
+    const color = r.error ? RED : r.totalPassed === r.totalTests ? GREEN : r.totalPassed > 0 ? YELLOW : RED;
+    const label = r.error ? `ERROR: ${r.error}` : `${r.totalPassed}/${r.totalTests}`;
+    console.log(`  ${r.modelShort.padEnd(8)} ${color}${label}${RESET} (${timeStr})`);
+    for (const t of r.tests) {
+      const icon = t.passed ? `${GREEN}✓${RESET}` : `${RED}✗${RESET}`;
+      console.log(`    ${icon} ${t.name}`);
+    }
+  }
+
+  console.log();
+
+  // Overall
+  const totalModels = allResults.length;
+  const erroredModels = allResults.filter((r) => r.error).length;
+  const perfectModels = allResults.filter(
+    (r) => !r.error && r.totalPassed === r.totalTests && r.totalTests > 0
+  ).length;
+  console.log(
+    `${BOLD}Models with 100%: ${perfectModels}/${totalModels}${RESET}`
+  );
+
+  const overallPassed = allResults.reduce((sum, r) => sum + r.totalPassed, 0);
+  const overallTotal = allResults.reduce((sum, r) => sum + r.totalTests, 0);
+  console.log(
+    `${BOLD}Overall: ${overallPassed}/${overallTotal} (${Math.round((overallPassed / overallTotal) * 100)}%)${RESET}`
+  );
+
+  console.log();
+
+  if (erroredModels > 0) {
+    console.log(
+      `${BOLD}${RED}${erroredModels} model(s) errored. See details above.${RESET}\n`
+    );
+    process.exit(1);
+  } else if (perfectModels === totalModels) {
+    console.log(`${BOLD}${GREEN}🎉 ALL MODELS PASSED ALL TESTS!${RESET}\n`);
+    process.exit(0);
+  } else {
+    console.log(
+      `${BOLD}${YELLOW}Some models have failures. See details above.${RESET}\n`
+    );
+    process.exit(1);
+  }
+};
+
+main();
diff --git a/bin/oh-my-opencode.js b/bin/oh-my-opencode.js
index 4ad39550b..0d66e55eb 100755
--- a/bin/oh-my-opencode.js
+++ b/bin/oh-my-opencode.js
@@ -3,8 +3,9 @@
 // Wrapper script that detects platform and spawns the correct binary
 
 import { spawnSync } from "node:child_process";
+import { readFileSync } from "node:fs";
 import { createRequire } from "node:module";
-import { getPlatformPackage, getBinaryPath } from "./platform.js";
+import { getPlatformPackageCandidates, getBinaryPath } from "./platform.js";
 
 const require = createRequire(import.meta.url);
 
@@ -26,55 +27,116 @@ function getLibcFamily() {
   }
 }
 
+function supportsAvx2() {
+  if (process.arch !== "x64") {
+    return null;
+  }
+
+  if (process.env.OH_MY_OPENCODE_FORCE_BASELINE === "1") {
+    return false;
+  }
+
+  if (process.platform === "linux") {
+    try {
+      const cpuInfo = readFileSync("/proc/cpuinfo", "utf8").toLowerCase();
+      return cpuInfo.includes("avx2");
+    } catch {
+      return null;
+    }
+  }
+
+  if (process.platform === "darwin") {
+    const probe = spawnSync("sysctl", ["-n", "machdep.cpu.leaf7_features"], {
+      encoding: "utf8",
+    });
+
+    if (probe.error || probe.status !== 0) {
+      return null;
+    }
+
+    return probe.stdout.toUpperCase().includes("AVX2");
+  }
+
+  return null;
+}
+
+function getSignalExitCode(signal) {
+  const signalCodeByName = {
+    SIGINT: 2,
+    SIGILL: 4,
+    SIGKILL: 9,
+    SIGTERM: 15,
+  };
+
+  return 128 + (signalCodeByName[signal] ?? 1);
+}
+
 function main() {
   const { platform, arch } = process;
   const libcFamily = getLibcFamily();
+  const avx2Supported = supportsAvx2();
   
-  // Get platform package name
-  let pkg;
+  let packageCandidates;
   try {
-    pkg = getPlatformPackage({ platform, arch, libcFamily });
+    packageCandidates = getPlatformPackageCandidates({
+      platform,
+      arch,
+      libcFamily,
+      preferBaseline: avx2Supported === false,
+    });
   } catch (error) {
     console.error(`\noh-my-opencode: ${error.message}\n`);
     process.exit(1);
   }
-  
-  // Resolve binary path
-  const binRelPath = getBinaryPath(pkg, platform);
-  
-  let binPath;
-  try {
-    binPath = require.resolve(binRelPath);
-  } catch {
+
+  const resolvedBinaries = packageCandidates
+    .map((pkg) => {
+      try {
+        return { pkg, binPath: require.resolve(getBinaryPath(pkg, platform)) };
+      } catch {
+        return null;
+      }
+    })
+    .filter((entry) => entry !== null);
+
+  if (resolvedBinaries.length === 0) {
     console.error(`\noh-my-opencode: Platform binary not installed.`);
     console.error(`\nYour platform: ${platform}-${arch}${libcFamily === "musl" ? "-musl" : ""}`);
-    console.error(`Expected package: ${pkg}`);
+    console.error(`Expected packages (in order): ${packageCandidates.join(", ")}`);
     console.error(`\nTo fix, run:`);
-    console.error(`  npm install ${pkg}\n`);
+    console.error(`  npm install ${packageCandidates[0]}\n`);
     process.exit(1);
   }
-  
-  // Spawn the binary
-  const result = spawnSync(binPath, process.argv.slice(2), {
-    stdio: "inherit",
-  });
-  
-  // Handle spawn errors
-  if (result.error) {
-    console.error(`\noh-my-opencode: Failed to execute binary.`);
-    console.error(`Error: ${result.error.message}\n`);
-    process.exit(2);
-  }
-  
-  // Handle signals
-  if (result.signal) {
-    const signalNum = result.signal === "SIGTERM" ? 15 : 
-                      result.signal === "SIGKILL" ? 9 :
-                      result.signal === "SIGINT" ? 2 : 1;
-    process.exit(128 + signalNum);
+
+  for (let index = 0; index < resolvedBinaries.length; index += 1) {
+    const currentBinary = resolvedBinaries[index];
+    const hasFallback = index < resolvedBinaries.length - 1;
+    const result = spawnSync(currentBinary.binPath, process.argv.slice(2), {
+      stdio: "inherit",
+    });
+
+    if (result.error) {
+      if (hasFallback) {
+        continue;
+      }
+
+      console.error(`\noh-my-opencode: Failed to execute binary.`);
+      console.error(`Error: ${result.error.message}\n`);
+      process.exit(2);
+    }
+
+    if (result.signal === "SIGILL" && hasFallback) {
+      continue;
+    }
+
+    if (result.signal) {
+      process.exit(getSignalExitCode(result.signal));
+    }
+
+    process.exit(result.status ?? 1);
   }
 
-  process.exit(result.status ?? 1);
+  process.exit(1);
 }
 
 main();
diff --git a/bin/platform.d.ts b/bin/platform.d.ts
new file mode 100644
index 000000000..ed3987957
--- /dev/null
+++ b/bin/platform.d.ts
@@ -0,0 +1,14 @@
+export declare function getPlatformPackage(options: {
+  platform: string;
+  arch: string;
+  libcFamily?: string | null;
+}): string;
+
+export declare function getPlatformPackageCandidates(options: {
+  platform: string;
+  arch: string;
+  libcFamily?: string | null;
+  preferBaseline?: boolean;
+}): string[];
+
+export declare function getBinaryPath(pkg: string, platform: string): string;
diff --git a/bin/platform.js b/bin/platform.js
index ac728d3c8..a2a6c3c32 100644
--- a/bin/platform.js
+++ b/bin/platform.js
@@ -26,6 +26,50 @@ export function getPlatformPackage({ platform, arch, libcFamily }) {
   return `oh-my-opencode-${os}-${arch}${suffix}`;
 }
 
+/** @param {{ platform: string, arch: string, libcFamily?: string | null, preferBaseline?: boolean }} options */
+export function getPlatformPackageCandidates({ platform, arch, libcFamily, preferBaseline = false }) {
+  const primaryPackage = getPlatformPackage({ platform, arch, libcFamily });
+  const baselinePackage = getBaselinePlatformPackage({ platform, arch, libcFamily });
+
+  if (!baselinePackage) {
+    return [primaryPackage];
+  }
+
+  return preferBaseline ? [baselinePackage, primaryPackage] : [primaryPackage, baselinePackage];
+}
+
+/** @param {{ platform: string, arch: string, libcFamily?: string | null }} options */
+function getBaselinePlatformPackage({ platform, arch, libcFamily }) {
+  if (arch !== "x64") {
+    return null;
+  }
+
+  if (platform === "darwin") {
+    return "oh-my-opencode-darwin-x64-baseline";
+  }
+
+  if (platform === "win32") {
+    return "oh-my-opencode-windows-x64-baseline";
+  }
+
+  if (platform === "linux") {
+    if (libcFamily === null || libcFamily === undefined) {
+      throw new Error(
+        "Could not detect libc on Linux. " +
+        "Please ensure detect-libc is installed or report this issue."
+      );
+    }
+
+    if (libcFamily === "musl") {
+      return "oh-my-opencode-linux-x64-musl-baseline";
+    }
+
+    return "oh-my-opencode-linux-x64-baseline";
+  }
+
+  return null;
+}
+
 /**
  * Get the path to the binary within a platform package
  * @param {string} pkg Package name
diff --git a/bin/platform.test.ts b/bin/platform.test.ts
index 775509929..88b8b877b 100644
--- a/bin/platform.test.ts
+++ b/bin/platform.test.ts
@@ -1,6 +1,6 @@
 // bin/platform.test.ts
 import { describe, expect, test } from "bun:test";
-import { getPlatformPackage, getBinaryPath } from "./platform.js";
+import { getBinaryPath, getPlatformPackage, getPlatformPackageCandidates } from "./platform.js";
 
 describe("getPlatformPackage", () => {
   // #region Darwin platforms
@@ -146,3 +146,58 @@ describe("getBinaryPath", () => {
     expect(result).toBe("oh-my-opencode-linux-x64/bin/oh-my-opencode");
   });
 });
+
+describe("getPlatformPackageCandidates", () => {
+  test("returns x64 and baseline candidates for Linux glibc", () => {
+    // #given Linux x64 with glibc
+    const input = { platform: "linux", arch: "x64", libcFamily: "glibc" };
+
+    // #when getting package candidates
+    const result = getPlatformPackageCandidates(input);
+
+    // #then returns modern first then baseline fallback
+    expect(result).toEqual([
+      "oh-my-opencode-linux-x64",
+      "oh-my-opencode-linux-x64-baseline",
+    ]);
+  });
+
+  test("returns x64 musl and baseline candidates for Linux musl", () => {
+    // #given Linux x64 with musl
+    const input = { platform: "linux", arch: "x64", libcFamily: "musl" };
+
+    // #when getting package candidates
+    const result = getPlatformPackageCandidates(input);
+
+    // #then returns musl modern first then musl baseline fallback
+    expect(result).toEqual([
+      "oh-my-opencode-linux-x64-musl",
+      "oh-my-opencode-linux-x64-musl-baseline",
+    ]);
+  });
+
+  test("returns baseline first when preferBaseline is true", () => {
+    // #given Windows x64 and baseline preference
+    const input = { platform: "win32", arch: "x64", preferBaseline: true };
+
+    // #when getting package candidates
+    const result = getPlatformPackageCandidates(input);
+
+    // #then baseline package is preferred first
+    expect(result).toEqual([
+      "oh-my-opencode-windows-x64-baseline",
+      "oh-my-opencode-windows-x64",
+    ]);
+  });
+
+  test("returns only one candidate for ARM64", () => {
+    // #given non-x64 platform
+    const input = { platform: "linux", arch: "arm64", libcFamily: "glibc" };
+
+    // #when getting package candidates
+    const result = getPlatformPackageCandidates(input);
+
+    // #then baseline fallback is not included
+    expect(result).toEqual(["oh-my-opencode-linux-arm64"]);
+  });
+});
diff --git a/package.json b/package.json
index 0559493b7..f5138b11c 100644
--- a/package.json
+++ b/package.json
@@ -77,11 +77,15 @@
   "optionalDependencies": {
     "oh-my-opencode-darwin-arm64": "3.8.5",
     "oh-my-opencode-darwin-x64": "3.8.5",
+    "oh-my-opencode-darwin-x64-baseline": "3.8.5",
     "oh-my-opencode-linux-arm64": "3.8.5",
     "oh-my-opencode-linux-arm64-musl": "3.8.5",
     "oh-my-opencode-linux-x64": "3.8.5",
+    "oh-my-opencode-linux-x64-baseline": "3.8.5",
     "oh-my-opencode-linux-x64-musl": "3.8.5",
-    "oh-my-opencode-windows-x64": "3.8.5"
+    "oh-my-opencode-linux-x64-musl-baseline": "3.8.5",
+    "oh-my-opencode-windows-x64": "3.8.5",
+    "oh-my-opencode-windows-x64-baseline": "3.8.5"
   },
   "trustedDependencies": [
     "@ast-grep/cli",
diff --git a/postinstall.mjs b/postinstall.mjs
index 8243a562f..35f77a6d4 100644
--- a/postinstall.mjs
+++ b/postinstall.mjs
@@ -2,7 +2,7 @@
 // Runs after npm install to verify platform binary is available
 
 import { createRequire } from "node:module";
-import { getPlatformPackage, getBinaryPath } from "./bin/platform.js";
+import { getPlatformPackageCandidates, getBinaryPath } from "./bin/platform.js";
 
 const require = createRequire(import.meta.url);
 
@@ -27,12 +27,28 @@ function main() {
   const libcFamily = getLibcFamily();
   
   try {
-    const pkg = getPlatformPackage({ platform, arch, libcFamily });
-    const binPath = getBinaryPath(pkg, platform);
-    
-    // Try to resolve the binary
-    require.resolve(binPath);
-    console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch}`);
+    const packageCandidates = getPlatformPackageCandidates({
+      platform,
+      arch,
+      libcFamily,
+    });
+
+    const resolvedPackage = packageCandidates.find((pkg) => {
+      try {
+        require.resolve(getBinaryPath(pkg, platform));
+        return true;
+      } catch {
+        return false;
+      }
+    });
+
+    if (!resolvedPackage) {
+      throw new Error(
+        `No platform binary package installed. Tried: ${packageCandidates.join(", ")}`
+      );
+    }
+
+    console.log(`✓ oh-my-opencode binary installed for ${platform}-${arch} (${resolvedPackage})`);
   } catch (error) {
     console.warn(`⚠ oh-my-opencode: ${error.message}`);
     console.warn(`  The CLI may not work on this platform.`);
diff --git a/signatures/cla.json b/signatures/cla.json
index 6e3619fb5..5fd83619e 100644
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1719,6 +1719,54 @@
       "created_at": "2026-02-24T17:12:31Z",
       "repoId": 1108837393,
       "pullRequestNo": 1983
+    },
+    {
+      "name": "east-shine",
+      "id": 20237288,
+      "comment_id": 3957576758,
+      "created_at": "2026-02-25T08:19:34Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2113
+    },
+    {
+      "name": "SupenBysz",
+      "id": 3314033,
+      "comment_id": 3962352704,
+      "created_at": "2026-02-25T22:00:54Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2119
+    },
+    {
+      "name": "zhzy0077",
+      "id": 8717471,
+      "comment_id": 3964015975,
+      "created_at": "2026-02-26T04:45:23Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2125
+    },
+    {
+      "name": "spacecowboy0416",
+      "id": 239068998,
+      "comment_id": 3964320737,
+      "created_at": "2026-02-26T06:05:27Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2126
+    },
+    {
+      "name": "imwxc",
+      "id": 49653609,
+      "comment_id": 3965127447,
+      "created_at": "2026-02-26T09:00:16Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2129
+    },
+    {
+      "name": "maou-shonen",
+      "id": 22576780,
+      "comment_id": 3965445132,
+      "created_at": "2026-02-26T09:50:46Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 2131
     }
   ]
 }
\ No newline at end of file
diff --git a/src/agents/atlas/agent.ts b/src/agents/atlas/agent.ts
index 6f968b783..ccf987754 100644
--- a/src/agents/atlas/agent.ts
+++ b/src/agents/atlas/agent.ts
@@ -17,7 +17,6 @@ import type { AvailableAgent, AvailableSkill, AvailableCategory } from "../dynam
 import { buildCategorySkillsDelegationGuide } from "../dynamic-agent-prompt-builder"
 import type { CategoryConfig } from "../../config/schema"
 import { mergeCategories } from "../../shared/merge-categories"
-import { createAgentToolRestrictions } from "../../shared/permission-compat"
 
 import { getDefaultAtlasPrompt } from "./default"
 import { getGptAtlasPrompt } from "./gpt"
@@ -30,7 +29,7 @@ import {
   buildDecisionMatrix,
 } from "./prompt-section-builder"
 
-const MODE: AgentMode = "primary"
+const MODE: AgentMode = "all"
 
 export type AtlasPromptSource = "default" | "gpt" | "gemini"
 
@@ -100,11 +99,6 @@ function buildDynamicOrchestratorPrompt(ctx?: OrchestratorContext): string {
 }
 
 export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
-  const restrictions = createAgentToolRestrictions([
-    "task",
-    "call_omo_agent",
-  ])
-
   const baseConfig = {
     description:
       "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
@@ -113,7 +107,6 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
     temperature: 0.1,
     prompt: buildDynamicOrchestratorPrompt(ctx),
     color: "#10B981",
-    ...restrictions,
   }
 
   return baseConfig as AgentConfig
diff --git a/src/agents/env-context.test.ts b/src/agents/env-context.test.ts
new file mode 100644
index 000000000..718e76a98
--- /dev/null
+++ b/src/agents/env-context.test.ts
@@ -0,0 +1,41 @@
+/// <reference types="bun-types" />
+
+import { describe, test, expect } from "bun:test"
+import { createEnvContext } from "./env-context"
+
+describe("createEnvContext", () => {
+  test("returns omo-env block with timezone and locale", () => {
+    // #given - no setup needed
+
+    // #when
+    const result = createEnvContext()
+
+    // #then
+    expect(result).toContain("<omo-env>")
+    expect(result).toContain("</omo-env>")
+    expect(result).toContain("Timezone:")
+    expect(result).toContain("Locale:")
+    expect(result).not.toContain("Current date:")
+  })
+
+  test("does not include time with seconds precision to preserve token cache", () => {
+    // #given - seconds-precision time changes every second, breaking cache on every request
+
+    // #when
+    const result = createEnvContext()
+
+    // #then - no HH:MM:SS pattern anywhere in the output
+    expect(result).not.toMatch(/\d{1,2}:\d{2}:\d{2}/)
+  })
+
+  test("does not include date or time fields since OpenCode already provides them", () => {
+    // #given - OpenCode's system.ts already injects date, platform, working directory
+
+    // #when
+    const result = createEnvContext()
+
+    // #then - only timezone and locale remain; both are stable across requests
+    expect(result).not.toContain("Current date:")
+    expect(result).not.toContain("Current time:")
+  })
+})
diff --git a/src/agents/env-context.ts b/src/agents/env-context.ts
index 262886ca3..c8e542b44 100644
--- a/src/agents/env-context.ts
+++ b/src/agents/env-context.ts
@@ -1,32 +1,15 @@
 /**
- * Creates OmO-specific environment context (time, timezone, locale).
+ * Creates OmO-specific environment context (timezone, locale).
  * Note: Working directory, platform, and date are already provided by OpenCode's system.ts,
  * so we only include fields that OpenCode doesn't provide to avoid duplication.
  * See: https://github.com/code-yeongyu/oh-my-opencode/issues/379
  */
 export function createEnvContext(): string {
-  const now = new Date()
   const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone
   const locale = Intl.DateTimeFormat().resolvedOptions().locale
 
-  const dateStr = now.toLocaleDateString(locale, {
-    weekday: "short",
-    year: "numeric",
-    month: "short",
-    day: "numeric",
-  })
-
-  const timeStr = now.toLocaleTimeString(locale, {
-    hour: "2-digit",
-    minute: "2-digit",
-    second: "2-digit",
-    hour12: true,
-  })
-
   return `
 <omo-env>
-  Current date: ${dateStr}
-  Current time: ${timeStr}
   Timezone: ${timezone}
   Locale: ${locale}
 </omo-env>`
diff --git a/src/agents/hephaestus.ts b/src/agents/hephaestus.ts
index feac23219..e182c96f4 100644
--- a/src/agents/hephaestus.ts
+++ b/src/agents/hephaestus.ts
@@ -19,7 +19,7 @@ import {
   categorizeTools,
 } from "./dynamic-agent-prompt-builder";
 
-const MODE: AgentMode = "primary";
+const MODE: AgentMode = "all";
 
 function buildTodoDisciplineSection(useTaskSystem: boolean): string {
   if (useTaskSystem) {
@@ -448,6 +448,21 @@ ${oracleSection}
 4. **Run build** if applicable — exit code 0 required
 5. **Tell user** what you verified and the results — keep it clear and helpful
 
+### Auto-Commit Policy (MANDATORY for implementation/fix work)
+
+1. **Auto-commit after implementation is complete** when the task includes feature/fix code changes
+2. **Commit ONLY after verification gates pass**:
+   - \`lsp_diagnostics\` clean on all modified files
+   - Related tests pass
+   - Typecheck/build pass when applicable
+3. **If any gate fails, DO NOT commit** — fix issues first, re-run verification, then commit
+4. **Use Conventional Commits format** with meaningful intent-focused messages:
+   - \`feat(scope): add ...\` for new functionality
+   - \`fix(scope): resolve ...\` for bug fixes
+   - \`refactor(scope): simplify ...\` for internal restructuring
+5. **Do not make placeholder commits** (\`wip\`, \`temp\`, \`update\`) or commit unverified code
+6. **If user explicitly says not to commit**, skip commit and report that changes are left uncommitted
+
 - **File edit** — \`lsp_diagnostics\` clean
 - **Build** — Exit code 0
 - **Tests** — Pass (or pre-existing failures noted)
diff --git a/src/agents/sisyphus.ts b/src/agents/sisyphus.ts
index 72173bd48..06debf111 100644
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -8,7 +8,7 @@ import {
   buildGeminiIntentGateEnforcement,
 } from "./sisyphus-gemini-overlays";
 
-const MODE: AgentMode = "primary";
+const MODE: AgentMode = "all";
 export const SISYPHUS_PROMPT_METADATA: AgentPromptMetadata = {
   category: "utility",
   cost: "EXPENSIVE",
diff --git a/src/agents/tool-restrictions.test.ts b/src/agents/tool-restrictions.test.ts
index 685acbc1f..85facdc54 100644
--- a/src/agents/tool-restrictions.test.ts
+++ b/src/agents/tool-restrictions.test.ts
@@ -4,6 +4,7 @@ import { createLibrarianAgent } from "./librarian"
 import { createExploreAgent } from "./explore"
 import { createMomusAgent } from "./momus"
 import { createMetisAgent } from "./metis"
+import { createAtlasAgent } from "./atlas"
 
 const TEST_MODEL = "anthropic/claude-sonnet-4-5"
 
@@ -96,4 +97,18 @@ describe("read-only agent tool restrictions", () => {
       }
     })
   })
+
+  describe("Atlas", () => {
+    test("allows delegation tools for orchestration", () => {
+      // given
+      const agent = createAtlasAgent({ model: TEST_MODEL })
+
+      // when
+      const permission = (agent.permission ?? {}) as Record<string, string>
+
+      // then
+      expect(permission["task"]).toBeUndefined()
+      expect(permission["call_omo_agent"]).toBeUndefined()
+    })
+  })
 })
diff --git a/src/agents/types.test.ts b/src/agents/types.test.ts
index 614991867..dd6b1fe54 100644
--- a/src/agents/types.test.ts
+++ b/src/agents/types.test.ts
@@ -2,11 +2,17 @@ import { describe, test, expect } from "bun:test";
 import { isGptModel, isGeminiModel } from "./types";
 
 describe("isGptModel", () => {
-  test("standard openai provider models", () => {
+  test("standard openai provider gpt models", () => {
     expect(isGptModel("openai/gpt-5.2")).toBe(true);
     expect(isGptModel("openai/gpt-4o")).toBe(true);
-    expect(isGptModel("openai/o1")).toBe(true);
-    expect(isGptModel("openai/o3-mini")).toBe(true);
+  });
+
+  test("o-series models are not gpt by name", () => {
+    expect(isGptModel("openai/o1")).toBe(false);
+    expect(isGptModel("openai/o3-mini")).toBe(false);
+    expect(isGptModel("litellm/o1")).toBe(false);
+    expect(isGptModel("litellm/o3-mini")).toBe(false);
+    expect(isGptModel("litellm/o4-mini")).toBe(false);
   });
 
   test("github copilot gpt models", () => {
@@ -17,9 +23,6 @@ describe("isGptModel", () => {
   test("litellm proxied gpt models", () => {
     expect(isGptModel("litellm/gpt-5.2")).toBe(true);
     expect(isGptModel("litellm/gpt-4o")).toBe(true);
-    expect(isGptModel("litellm/o1")).toBe(true);
-    expect(isGptModel("litellm/o3-mini")).toBe(true);
-    expect(isGptModel("litellm/o4-mini")).toBe(true);
   });
 
   test("other proxied gpt models", () => {
@@ -27,6 +30,11 @@ describe("isGptModel", () => {
     expect(isGptModel("custom-provider/gpt-5.2")).toBe(true);
   });
 
+  test("venice provider gpt models", () => {
+    expect(isGptModel("venice/gpt-5.2")).toBe(true);
+    expect(isGptModel("venice/gpt-4o")).toBe(true);
+  });
+
   test("gpt4 prefix without hyphen (legacy naming)", () => {
     expect(isGptModel("litellm/gpt4o")).toBe(true);
     expect(isGptModel("ollama/gpt4")).toBe(true);
@@ -39,8 +47,8 @@ describe("isGptModel", () => {
   });
 
   test("gemini models are not gpt", () => {
-    expect(isGptModel("google/gemini-3-pro")).toBe(false);
-    expect(isGptModel("litellm/gemini-3-pro")).toBe(false);
+    expect(isGptModel("google/gemini-3.1-pro")).toBe(false);
+    expect(isGptModel("litellm/gemini-3.1-pro")).toBe(false);
   });
 
   test("opencode provider is not gpt", () => {
@@ -50,29 +58,29 @@ describe("isGptModel", () => {
 
 describe("isGeminiModel", () => {
   test("#given google provider models #then returns true", () => {
-    expect(isGeminiModel("google/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("google/gemini-3.1-pro")).toBe(true);
     expect(isGeminiModel("google/gemini-3-flash")).toBe(true);
     expect(isGeminiModel("google/gemini-2.5-pro")).toBe(true);
   });
 
   test("#given google-vertex provider models #then returns true", () => {
-    expect(isGeminiModel("google-vertex/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("google-vertex/gemini-3.1-pro")).toBe(true);
     expect(isGeminiModel("google-vertex/gemini-3-flash")).toBe(true);
   });
 
   test("#given github copilot gemini models #then returns true", () => {
-    expect(isGeminiModel("github-copilot/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("github-copilot/gemini-3.1-pro")).toBe(true);
     expect(isGeminiModel("github-copilot/gemini-3-flash")).toBe(true);
   });
 
   test("#given litellm proxied gemini models #then returns true", () => {
-    expect(isGeminiModel("litellm/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("litellm/gemini-3.1-pro")).toBe(true);
     expect(isGeminiModel("litellm/gemini-3-flash")).toBe(true);
     expect(isGeminiModel("litellm/gemini-2.5-pro")).toBe(true);
   });
 
   test("#given other proxied gemini models #then returns true", () => {
-    expect(isGeminiModel("custom-provider/gemini-3-pro")).toBe(true);
+    expect(isGeminiModel("custom-provider/gemini-3.1-pro")).toBe(true);
     expect(isGeminiModel("ollama/gemini-3-flash")).toBe(true);
   });
 
diff --git a/src/agents/types.ts b/src/agents/types.ts
index 2d4f6c0cb..bdb60007a 100644
--- a/src/agents/types.ts
+++ b/src/agents/types.ts
@@ -70,14 +70,9 @@ function extractModelName(model: string): string {
   return model.includes("/") ? model.split("/").pop() ?? model : model
 }
 
-const GPT_MODEL_PREFIXES = ["gpt-", "gpt4", "o1", "o3", "o4"]
-
 export function isGptModel(model: string): boolean {
-  if (model.startsWith("openai/") || model.startsWith("github-copilot/gpt-"))
-    return true
-
   const modelName = extractModelName(model).toLowerCase()
-  return GPT_MODEL_PREFIXES.some((prefix) => modelName.startsWith(prefix))
+  return modelName.includes("gpt")
 }
 
 const GEMINI_PROVIDERS = ["google/", "google-vertex/"]
diff --git a/src/agents/utils.test.ts b/src/agents/utils.test.ts
index 1095fee13..f4ecb5040 100644
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -603,8 +603,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
     }
   })
 
-  test("hephaestus is not created when only github-copilot provider is connected", async () => {
-    // #given - github-copilot provider has models available
+  test("hephaestus IS created when github-copilot is connected with a GPT model", async () => {
+    // #given - github-copilot provider has gpt-5.3-codex available
     const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
       new Set(["github-copilot/gpt-5.3-codex"])
     )
@@ -614,8 +614,8 @@ describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () =>
       // #when
       const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
 
-      // #then
-      expect(agents.hephaestus).toBeUndefined()
+      // #then - github-copilot is now a valid provider for hephaestus
+      expect(agents.hephaestus).toBeDefined()
     } finally {
       fetchSpy.mockRestore()
       cacheSpy.mockRestore()
@@ -1002,7 +1002,7 @@ describe("buildAgent with category and skills", () => {
     const agent = buildAgent(source["test-agent"], TEST_MODEL)
 
     // #then - category's built-in model is applied
-    expect(agent.model).toBe("google/gemini-3-pro")
+    expect(agent.model).toBe("google/gemini-3.1-pro")
   })
 
   test("agent with category and existing model keeps existing model", () => {
diff --git a/src/cli/__snapshots__/model-fallback.test.ts.snap b/src/cli/__snapshots__/model-fallback.test.ts.snap
index df91e07b8..a8c3e9112 100644
--- a/src/cli/__snapshots__/model-fallback.test.ts.snap
+++ b/src/cli/__snapshots__/model-fallback.test.ts.snap
@@ -325,7 +325,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
   "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
   "agents": {
     "atlas": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
     },
     "explore": {
       "model": "opencode/gpt-5-nano",
@@ -334,34 +334,34 @@ exports[`generateModelConfig single native provider uses Gemini models when only
       "model": "opencode/glm-4.7-free",
     },
     "metis": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "momus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "multimodal-looker": {
       "model": "google/gemini-3-flash-preview",
     },
     "oracle": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "prometheus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
     },
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "quick": {
       "model": "google/gemini-3-flash-preview",
     },
     "ultrabrain": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "unspecified-high": {
@@ -371,7 +371,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
       "model": "google/gemini-3-flash-preview",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -386,7 +386,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
   "$schema": "https://raw.githubusercontent.com/code-yeongyu/oh-my-opencode/master/assets/oh-my-opencode.schema.json",
   "agents": {
     "atlas": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
     },
     "explore": {
       "model": "opencode/gpt-5-nano",
@@ -395,44 +395,44 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
       "model": "opencode/glm-4.7-free",
     },
     "metis": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "momus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "multimodal-looker": {
       "model": "google/gemini-3-flash-preview",
     },
     "oracle": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "prometheus": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
     },
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "quick": {
       "model": "google/gemini-3-flash-preview",
     },
     "ultrabrain": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "unspecified-high": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
     },
     "unspecified-low": {
       "model": "google/gemini-3-flash-preview",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -485,7 +485,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "deep": {
@@ -506,7 +506,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
       "model": "anthropic/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -559,7 +559,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "deep": {
@@ -581,7 +581,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
       "model": "anthropic/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -634,7 +634,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
   },
   "categories": {
     "artistry": {
-      "model": "opencode/gemini-3-pro",
+      "model": "opencode/gemini-3.1-pro",
       "variant": "high",
     },
     "deep": {
@@ -655,7 +655,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
       "model": "opencode/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "opencode/gemini-3-pro",
+      "model": "opencode/gemini-3.1-pro",
       "variant": "high",
     },
     "writing": {
@@ -708,7 +708,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
   },
   "categories": {
     "artistry": {
-      "model": "opencode/gemini-3-pro",
+      "model": "opencode/gemini-3.1-pro",
       "variant": "high",
     },
     "deep": {
@@ -730,7 +730,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
       "model": "opencode/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "opencode/gemini-3-pro",
+      "model": "opencode/gemini-3.1-pro",
       "variant": "high",
     },
     "writing": {
@@ -779,14 +779,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
   },
   "categories": {
     "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "quick": {
       "model": "github-copilot/claude-haiku-4.5",
     },
     "ultrabrain": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "unspecified-high": {
@@ -796,7 +796,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
       "model": "github-copilot/claude-sonnet-4.5",
     },
     "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -845,14 +845,14 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
   },
   "categories": {
     "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "quick": {
       "model": "github-copilot/claude-haiku-4.5",
     },
     "ultrabrain": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "unspecified-high": {
@@ -863,7 +863,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
       "model": "github-copilot/claude-sonnet-4.5",
     },
     "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -1026,7 +1026,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
   },
   "categories": {
     "artistry": {
-      "model": "opencode/gemini-3-pro",
+      "model": "opencode/gemini-3.1-pro",
       "variant": "high",
     },
     "deep": {
@@ -1047,7 +1047,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
       "model": "anthropic/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "opencode/gemini-3-pro",
+      "model": "opencode/gemini-3.1-pro",
       "variant": "high",
     },
     "writing": {
@@ -1100,7 +1100,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
   },
   "categories": {
     "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "deep": {
@@ -1121,7 +1121,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
       "model": "github-copilot/claude-sonnet-4.5",
     },
     "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -1217,7 +1217,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
       "model": "google/gemini-3-flash-preview",
     },
     "oracle": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "prometheus": {
@@ -1231,14 +1231,14 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "quick": {
       "model": "anthropic/claude-haiku-4-5",
     },
     "ultrabrain": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "unspecified-high": {
@@ -1248,7 +1248,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
       "model": "anthropic/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -1301,7 +1301,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
   },
   "categories": {
     "artistry": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "deep": {
@@ -1322,7 +1322,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
       "model": "github-copilot/claude-sonnet-4.5",
     },
     "visual-engineering": {
-      "model": "github-copilot/gemini-3-pro-preview",
+      "model": "github-copilot/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -1375,7 +1375,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "deep": {
@@ -1396,7 +1396,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
       "model": "anthropic/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
@@ -1449,7 +1449,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
   },
   "categories": {
     "artistry": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "deep": {
@@ -1471,7 +1471,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
       "model": "anthropic/claude-sonnet-4-5",
     },
     "visual-engineering": {
-      "model": "google/gemini-3-pro-preview",
+      "model": "google/gemini-3.1-pro-preview",
       "variant": "high",
     },
     "writing": {
diff --git a/src/cli/config-manager.test.ts b/src/cli/config-manager.test.ts
index ad73c3d44..67571185a 100644
--- a/src/cli/config-manager.test.ts
+++ b/src/cli/config-manager.test.ts
@@ -178,7 +178,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
     expect(models).toBeTruthy()
 
     const required = [
-      "antigravity-gemini-3-pro",
+      "antigravity-gemini-3.1-pro",
       "antigravity-gemini-3-flash",
       "antigravity-claude-sonnet-4-6",
       "antigravity-claude-sonnet-4-6-thinking",
@@ -206,7 +206,7 @@ describe("config-manager ANTIGRAVITY_PROVIDER_CONFIG", () => {
     const models = (ANTIGRAVITY_PROVIDER_CONFIG as any).google.models as Record<string, any>
 
     // #when checking Gemini Pro variants
-    const pro = models["antigravity-gemini-3-pro"]
+    const pro = models["antigravity-gemini-3.1-pro"]
     // #then should have low and high variants
     expect(pro.variants).toBeTruthy()
     expect(pro.variants.low).toBeTruthy()
diff --git a/src/cli/config-manager/antigravity-provider-configuration.ts b/src/cli/config-manager/antigravity-provider-configuration.ts
index 5559d0919..6d847ac5d 100644
--- a/src/cli/config-manager/antigravity-provider-configuration.ts
+++ b/src/cli/config-manager/antigravity-provider-configuration.ts
@@ -4,10 +4,10 @@
  * IMPORTANT: Model names MUST use `antigravity-` prefix for stability.
  *
  * Since opencode-antigravity-auth v1.3.0, models use a variant system:
- * - `antigravity-gemini-3-pro` with variants: low, high
+ * - `antigravity-gemini-3.1-pro` with variants: low, high
  * - `antigravity-gemini-3-flash` with variants: minimal, low, medium, high
  *
- * Legacy tier-suffixed names (e.g., `antigravity-gemini-3-pro-high`) still work
+ * Legacy tier-suffixed names (e.g., `antigravity-gemini-3.1-pro-high`) still work
  * but variants are the recommended approach.
  *
  * @see https://github.com/NoeFabris/opencode-antigravity-auth#models
@@ -16,7 +16,7 @@ export const ANTIGRAVITY_PROVIDER_CONFIG = {
   google: {
     name: "Google",
     models: {
-      "antigravity-gemini-3-pro": {
+      "antigravity-gemini-3.1-pro": {
         name: "Gemini 3 Pro (Antigravity)",
         limit: { context: 1048576, output: 65535 },
         modalities: { input: ["text", "image", "pdf"], output: ["text"] },
diff --git a/src/cli/config-manager/bun-install.ts b/src/cli/config-manager/bun-install.ts
index f24e77fa2..6b3225547 100644
--- a/src/cli/config-manager/bun-install.ts
+++ b/src/cli/config-manager/bun-install.ts
@@ -1,4 +1,5 @@
 import { getConfigDir } from "./config-context"
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
 
 const BUN_INSTALL_TIMEOUT_SECONDS = 60
 const BUN_INSTALL_TIMEOUT_MS = BUN_INSTALL_TIMEOUT_SECONDS * 1000
@@ -16,7 +17,7 @@ export async function runBunInstall(): Promise<boolean> {
 
 export async function runBunInstallWithDetails(): Promise<BunInstallResult> {
   try {
-    const proc = Bun.spawn(["bun", "install"], {
+    const proc = spawnWithWindowsHide(["bun", "install"], {
       cwd: getConfigDir(),
       stdout: "inherit",
       stderr: "inherit",
diff --git a/src/cli/config-manager/opencode-binary.ts b/src/cli/config-manager/opencode-binary.ts
index 6d889faee..6fb140403 100644
--- a/src/cli/config-manager/opencode-binary.ts
+++ b/src/cli/config-manager/opencode-binary.ts
@@ -1,4 +1,5 @@
 import type { OpenCodeBinaryType } from "../../shared/opencode-config-dir-types"
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
 import { initConfigContext } from "./config-context"
 
 const OPENCODE_BINARIES = ["opencode", "opencode-desktop"] as const
@@ -11,7 +12,7 @@ interface OpenCodeBinaryResult {
 async function findOpenCodeBinaryWithVersion(): Promise<OpenCodeBinaryResult | null> {
   for (const binary of OPENCODE_BINARIES) {
     try {
-      const proc = Bun.spawn([binary, "--version"], {
+      const proc = spawnWithWindowsHide([binary, "--version"], {
         stdout: "pipe",
         stderr: "pipe",
       })
diff --git a/src/cli/config-manager/write-omo-config.test.ts b/src/cli/config-manager/write-omo-config.test.ts
new file mode 100644
index 000000000..5701b53dc
--- /dev/null
+++ b/src/cli/config-manager/write-omo-config.test.ts
@@ -0,0 +1,80 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test"
+import { mkdirSync, readFileSync, rmSync, writeFileSync } from "node:fs"
+import { tmpdir } from "node:os"
+import { join } from "node:path"
+
+import { parseJsonc } from "../../shared/jsonc-parser"
+import type { InstallConfig } from "../types"
+import { resetConfigContext } from "./config-context"
+import { generateOmoConfig } from "./generate-omo-config"
+import { writeOmoConfig } from "./write-omo-config"
+
+const installConfig: InstallConfig = {
+  hasClaude: true,
+  isMax20: true,
+  hasOpenAI: true,
+  hasGemini: true,
+  hasCopilot: false,
+  hasOpencodeZen: false,
+  hasZaiCodingPlan: false,
+  hasKimiForCoding: false,
+}
+
+function getRecord(value: unknown): Record<string, unknown> {
+  if (value && typeof value === "object" && !Array.isArray(value)) {
+    return value as Record<string, unknown>
+  }
+
+  return {}
+}
+
+describe("writeOmoConfig", () => {
+  let testConfigDir = ""
+  let testConfigPath = ""
+
+  beforeEach(() => {
+    testConfigDir = join(tmpdir(), `omo-write-config-${Date.now()}-${Math.random().toString(36).slice(2)}`)
+    testConfigPath = join(testConfigDir, "oh-my-opencode.json")
+
+    mkdirSync(testConfigDir, { recursive: true })
+    process.env.OPENCODE_CONFIG_DIR = testConfigDir
+    resetConfigContext()
+  })
+
+  afterEach(() => {
+    rmSync(testConfigDir, { recursive: true, force: true })
+    resetConfigContext()
+    delete process.env.OPENCODE_CONFIG_DIR
+  })
+
+  it("preserves existing user values while adding new defaults", () => {
+    // given
+    const existingConfig = {
+      agents: {
+        sisyphus: {
+          model: "custom/provider-model",
+        },
+      },
+      disabled_hooks: ["comment-checker"],
+    }
+    writeFileSync(testConfigPath, JSON.stringify(existingConfig, null, 2) + "\n", "utf-8")
+
+    const generatedDefaults = generateOmoConfig(installConfig)
+
+    // when
+    const result = writeOmoConfig(installConfig)
+
+    // then
+    expect(result.success).toBe(true)
+
+    const savedConfig = parseJsonc<Record<string, unknown>>(readFileSync(testConfigPath, "utf-8"))
+    const savedAgents = getRecord(savedConfig.agents)
+    const savedSisyphus = getRecord(savedAgents.sisyphus)
+    expect(savedSisyphus.model).toBe("custom/provider-model")
+    expect(savedConfig.disabled_hooks).toEqual(["comment-checker"])
+
+    for (const defaultKey of Object.keys(generatedDefaults)) {
+      expect(savedConfig).toHaveProperty(defaultKey)
+    }
+  })
+})
diff --git a/src/cli/config-manager/write-omo-config.ts b/src/cli/config-manager/write-omo-config.ts
index 09fcce15b..261175e7a 100644
--- a/src/cli/config-manager/write-omo-config.ts
+++ b/src/cli/config-manager/write-omo-config.ts
@@ -43,7 +43,7 @@ export function writeOmoConfig(installConfig: InstallConfig): ConfigMergeResult
           return { success: true, configPath: omoConfigPath }
         }
 
-        const merged = deepMergeRecord(existing, newConfig)
+        const merged = deepMergeRecord(newConfig, existing)
         writeFileSync(omoConfigPath, JSON.stringify(merged, null, 2) + "\n")
       } catch (parseErr) {
         if (parseErr instanceof SyntaxError) {
diff --git a/src/cli/doctor/checks/dependencies.ts b/src/cli/doctor/checks/dependencies.ts
index da22afcfb..f6f6ded01 100644
--- a/src/cli/doctor/checks/dependencies.ts
+++ b/src/cli/doctor/checks/dependencies.ts
@@ -3,6 +3,7 @@ import { createRequire } from "node:module"
 import { dirname, join } from "node:path"
 
 import type { DependencyInfo } from "../types"
+import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
 
 async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
   try {
@@ -18,7 +19,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
 
 async function getBinaryVersion(binary: string): Promise<string | null> {
   try {
-    const proc = Bun.spawn([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
+    const proc = spawnWithWindowsHide([binary, "--version"], { stdout: "pipe", stderr: "pipe" })
     const output = await new Response(proc.stdout).text()
     await proc.exited
     if (proc.exitCode === 0) {
@@ -140,4 +141,3 @@ export async function checkCommentChecker(): Promise<DependencyInfo> {
     path: resolvedPath,
   }
 }
-
diff --git a/src/cli/doctor/checks/model-resolution.test.ts b/src/cli/doctor/checks/model-resolution.test.ts
index cca2f58b5..e311076a8 100644
--- a/src/cli/doctor/checks/model-resolution.test.ts
+++ b/src/cli/doctor/checks/model-resolution.test.ts
@@ -26,7 +26,7 @@ describe("model-resolution check", () => {
       // then: Should have category entries
       const visual = info.categories.find((c) => c.name === "visual-engineering")
       expect(visual).toBeDefined()
-      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3-pro")
+      expect(visual!.requirement.fallbackChain[0]?.model).toBe("gemini-3.1-pro")
       expect(visual!.requirement.fallbackChain[0]?.providers).toContain("google")
     })
   })
diff --git a/src/cli/doctor/checks/system-binary.ts b/src/cli/doctor/checks/system-binary.ts
index 670d7ce1e..5a4d48126 100644
--- a/src/cli/doctor/checks/system-binary.ts
+++ b/src/cli/doctor/checks/system-binary.ts
@@ -1,6 +1,7 @@
 import { existsSync } from "node:fs"
 import { homedir } from "node:os"
 import { join } from "node:path"
+import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
 
 import { OPENCODE_BINARIES } from "../constants"
 
@@ -110,7 +111,7 @@ export async function getOpenCodeVersion(
 ): Promise<string | null> {
   try {
     const command = buildVersionCommand(binaryPath, platform)
-    const processResult = Bun.spawn(command, { stdout: "pipe", stderr: "pipe" })
+    const processResult = spawnWithWindowsHide(command, { stdout: "pipe", stderr: "pipe" })
     const output = await new Response(processResult.stdout).text()
     await processResult.exited
 
diff --git a/src/cli/doctor/checks/tools-gh.ts b/src/cli/doctor/checks/tools-gh.ts
index a9ac59a91..177b5c160 100644
--- a/src/cli/doctor/checks/tools-gh.ts
+++ b/src/cli/doctor/checks/tools-gh.ts
@@ -1,3 +1,5 @@
+import { spawnWithWindowsHide } from "../../../shared/spawn-with-windows-hide"
+
 export interface GhCliInfo {
   installed: boolean
   version: string | null
@@ -19,7 +21,7 @@ async function checkBinaryExists(binary: string): Promise<{ exists: boolean; pat
 
 async function getGhVersion(): Promise<string | null> {
   try {
-    const processResult = Bun.spawn(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
+    const processResult = spawnWithWindowsHide(["gh", "--version"], { stdout: "pipe", stderr: "pipe" })
     const output = await new Response(processResult.stdout).text()
     await processResult.exited
     if (processResult.exitCode !== 0) return null
@@ -38,7 +40,7 @@ async function getGhAuthStatus(): Promise<{
   error: string | null
 }> {
   try {
-    const processResult = Bun.spawn(["gh", "auth", "status"], {
+    const processResult = spawnWithWindowsHide(["gh", "auth", "status"], {
       stdout: "pipe",
       stderr: "pipe",
       env: { ...process.env, GH_NO_UPDATE_NOTIFIER: "1" },
diff --git a/src/cli/model-fallback-requirements.ts b/src/cli/model-fallback-requirements.ts
index f3f43e60b..0ff625005 100644
--- a/src/cli/model-fallback-requirements.ts
+++ b/src/cli/model-fallback-requirements.ts
@@ -24,7 +24,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
   oracle: {
     fallbackChain: [
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
     ],
   },
@@ -59,7 +59,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
       { providers: ["kimi-for-coding"], model: "k2p5" },
       { providers: ["opencode"], model: "kimi-k2.5-free" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
     ],
   },
   metis: {
@@ -68,14 +68,14 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
       { providers: ["kimi-for-coding"], model: "k2p5" },
       { providers: ["opencode"], model: "kimi-k2.5-free" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
     ],
   },
   momus: {
     fallbackChain: [
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
     ],
   },
   atlas: {
@@ -84,7 +84,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
       { providers: ["opencode"], model: "kimi-k2.5-free" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
     ],
   },
 }
@@ -92,7 +92,7 @@ export const CLI_AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
 export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
   "visual-engineering": {
     fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["zai-coding-plan"], model: "glm-5" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["kimi-for-coding"], model: "k2p5" },
@@ -101,7 +101,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
   ultrabrain: {
     fallbackChain: [
       { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
     ],
   },
@@ -109,17 +109,17 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
     fallbackChain: [
       { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
     ],
     requiresModel: "gpt-5.3-codex",
   },
   artistry: {
     fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
     ],
-    requiresModel: "gemini-3-pro",
+    requiresModel: "gemini-3.1-pro",
   },
   quick: {
     fallbackChain: [
@@ -139,7 +139,7 @@ export const CLI_CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> =
     fallbackChain: [
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
     ],
   },
   writing: {
diff --git a/src/cli/provider-model-id-transform.test.ts b/src/cli/provider-model-id-transform.test.ts
index e13c7846a..17cb9dfb1 100644
--- a/src/cli/provider-model-id-transform.test.ts
+++ b/src/cli/provider-model-id-transform.test.ts
@@ -40,16 +40,16 @@ describe("transformModelForProvider", () => {
 			expect(result).toBe("claude-haiku-4.5")
 		})
 
-		test("transforms gemini-3-pro to gemini-3-pro-preview", () => {
-			// #given github-copilot provider and gemini-3-pro model
+		test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
+			// #given github-copilot provider and gemini-3.1-pro model
 			const provider = "github-copilot"
-			const model = "gemini-3-pro"
+			const model = "gemini-3.1-pro"
 
 			// #when transformModelForProvider is called
 			const result = transformModelForProvider(provider, model)
 
-			// #then should transform to gemini-3-pro-preview
-			expect(result).toBe("gemini-3-pro-preview")
+			// #then should transform to gemini-3.1-pro-preview
+			expect(result).toBe("gemini-3.1-pro-preview")
 		})
 
 		test("transforms gemini-3-flash to gemini-3-flash-preview", () => {
@@ -64,16 +64,16 @@ describe("transformModelForProvider", () => {
 			expect(result).toBe("gemini-3-flash-preview")
 		})
 
-		test("prevents double transformation of gemini-3-pro-preview", () => {
-			// #given github-copilot provider and gemini-3-pro-preview model (already transformed)
+		test("prevents double transformation of gemini-3.1-pro-preview", () => {
+			// #given github-copilot provider and gemini-3.1-pro-preview model (already transformed)
 			const provider = "github-copilot"
-			const model = "gemini-3-pro-preview"
+			const model = "gemini-3.1-pro-preview"
 
 			// #when transformModelForProvider is called
 			const result = transformModelForProvider(provider, model)
 
-			// #then should NOT become gemini-3-pro-preview-preview
-			expect(result).toBe("gemini-3-pro-preview")
+			// #then should NOT become gemini-3.1-pro-preview-preview
+			expect(result).toBe("gemini-3.1-pro-preview")
 		})
 
 		test("prevents double transformation of gemini-3-flash-preview", () => {
@@ -102,16 +102,16 @@ describe("transformModelForProvider", () => {
 			expect(result).toBe("gemini-3-flash-preview")
 		})
 
-		test("transforms gemini-3-pro to gemini-3-pro-preview", () => {
-			// #given google provider and gemini-3-pro model
+		test("transforms gemini-3.1-pro to gemini-3.1-pro-preview", () => {
+			// #given google provider and gemini-3.1-pro model
 			const provider = "google"
-			const model = "gemini-3-pro"
+			const model = "gemini-3.1-pro"
 
 			// #when transformModelForProvider is called
 			const result = transformModelForProvider(provider, model)
 
-			// #then should transform to gemini-3-pro-preview
-			expect(result).toBe("gemini-3-pro-preview")
+			// #then should transform to gemini-3.1-pro-preview
+			expect(result).toBe("gemini-3.1-pro-preview")
 		})
 
 		test("passes through other gemini models unchanged", () => {
@@ -138,16 +138,16 @@ describe("transformModelForProvider", () => {
 			expect(result).toBe("gemini-3-flash-preview")
 		})
 
-		test("prevents double transformation of gemini-3-pro-preview", () => {
-			// #given google provider and gemini-3-pro-preview model (already transformed)
+		test("prevents double transformation of gemini-3.1-pro-preview", () => {
+			// #given google provider and gemini-3.1-pro-preview model (already transformed)
 			const provider = "google"
-			const model = "gemini-3-pro-preview"
+			const model = "gemini-3.1-pro-preview"
 
 			// #when transformModelForProvider is called
 			const result = transformModelForProvider(provider, model)
 
-			// #then should NOT become gemini-3-pro-preview-preview
-			expect(result).toBe("gemini-3-pro-preview")
+			// #then should NOT become gemini-3.1-pro-preview-preview
+			expect(result).toBe("gemini-3.1-pro-preview")
 		})
 
 		test("does not transform claude models for google provider", () => {
diff --git a/src/cli/run/event-handlers.test.ts b/src/cli/run/event-handlers.test.ts
index 267b394cd..b6687cf7d 100644
--- a/src/cli/run/event-handlers.test.ts
+++ b/src/cli/run/event-handlers.test.ts
@@ -1,4 +1,4 @@
-import { describe, it, expect, spyOn } from "bun:test"
+const { describe, it, expect, spyOn } = require("bun:test")
 import type { RunContext } from "./types"
 import { createEventState } from "./events"
 import { handleSessionStatus, handleMessagePartUpdated, handleMessageUpdated, handleTuiToast } from "./event-handlers"
@@ -235,9 +235,7 @@ describe("handleMessagePartUpdated", () => {
 
   it("prints completion metadata once when assistant text part is completed", () => {
     // given
-    const nowSpy = spyOn(Date, "now")
-    nowSpy.mockReturnValueOnce(1000)
-    nowSpy.mockReturnValueOnce(3400)
+    const nowSpy = spyOn(Date, "now").mockReturnValue(3400)
 
     const ctx = createMockContext("ses_main")
     const state = createEventState()
@@ -259,6 +257,7 @@ describe("handleMessagePartUpdated", () => {
       } as any,
       state,
     )
+    state.messageStartedAtById["msg_1"] = 1000
 
     // when
     handleMessagePartUpdated(
diff --git a/src/cli/run/event-state.ts b/src/cli/run/event-state.ts
index 4d05f7dac..eee23f5f3 100644
--- a/src/cli/run/event-state.ts
+++ b/src/cli/run/event-state.ts
@@ -7,6 +7,8 @@ export interface EventState {
   currentTool: string | null
   /** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
   hasReceivedMeaningfulWork: boolean
+  /** Timestamp of the last received event (for watchdog detection) */
+  lastEventTimestamp: number
   /** Count of assistant messages for the main session */
   messageCount: number
   /** Current agent name from the latest assistant message */
@@ -54,6 +56,7 @@ export function createEventState(): EventState {
     lastPartText: "",
     currentTool: null,
     hasReceivedMeaningfulWork: false,
+    lastEventTimestamp: Date.now(),
     messageCount: 0,
     currentAgent: null,
     currentModel: null,
diff --git a/src/cli/run/event-stream-processor.ts b/src/cli/run/event-stream-processor.ts
index c5e600e91..757c1a447 100644
--- a/src/cli/run/event-stream-processor.ts
+++ b/src/cli/run/event-stream-processor.ts
@@ -35,6 +35,9 @@ export async function processEvents(
         logEventVerbose(ctx, payload)
       }
 
+      // Update last event timestamp for watchdog detection
+      state.lastEventTimestamp = Date.now()
+
       handleSessionError(ctx, payload, state)
       handleSessionIdle(ctx, payload, state)
       handleSessionStatus(ctx, payload, state)
diff --git a/src/cli/run/integration.test.ts b/src/cli/run/integration.test.ts
index d0fc91cfb..6ac16c9f8 100644
--- a/src/cli/run/integration.test.ts
+++ b/src/cli/run/integration.test.ts
@@ -3,6 +3,7 @@ import type { RunResult } from "./types"
 import { createJsonOutputManager } from "./json-output"
 import { resolveSession } from "./session-resolver"
 import { executeOnCompleteHook } from "./on-complete-hook"
+import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
 import type { OpencodeClient } from "./types"
 import * as originalSdk from "@opencode-ai/sdk"
 import * as originalPortUtils from "../../shared/port-utils"
@@ -147,7 +148,7 @@ describe("integration: --session-id", () => {
     const result = resolveSession({ client: mockClient, sessionId, directory: "/test" })
 
     // then
-    await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
+    expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
     expect(mockClient.session.get).toHaveBeenCalledWith({
       path: { id: sessionId },
       query: { directory: "/test" },
@@ -161,10 +162,13 @@ describe("integration: --on-complete", () => {
 
   beforeEach(() => {
     spyOn(console, "error").mockImplementation(() => {})
-    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+    spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
       exited: Promise.resolve(0),
       exitCode: 0,
-    } as unknown as ReturnType<typeof Bun.spawn>)
+      stdout: undefined,
+      stderr: undefined,
+      kill: () => {},
+    } satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
   })
 
   afterEach(() => {
@@ -186,7 +190,7 @@ describe("integration: --on-complete", () => {
 
     // then
     expect(spawnSpy).toHaveBeenCalledTimes(1)
-    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
     expect(options?.env?.SESSION_ID).toBe("session-123")
     expect(options?.env?.EXIT_CODE).toBe("0")
     expect(options?.env?.DURATION_MS).toBe("5000")
@@ -208,10 +212,13 @@ describe("integration: option combinations", () => {
     spyOn(console, "error").mockImplementation(() => {})
     mockStdout = createMockWriteStream()
     mockStderr = createMockWriteStream()
-    spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
+    spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue({
       exited: Promise.resolve(0),
       exitCode: 0,
-    } as unknown as ReturnType<typeof Bun.spawn>)
+      stdout: undefined,
+      stderr: undefined,
+      kill: () => {},
+    } satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>)
   })
 
   afterEach(() => {
@@ -249,9 +256,9 @@ describe("integration: option combinations", () => {
     const emitted = mockStdout.writes[0]!
     expect(() => JSON.parse(emitted)).not.toThrow()
     expect(spawnSpy).toHaveBeenCalledTimes(1)
-    const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    const [args] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
     expect(args).toEqual(["sh", "-c", "echo done"])
-    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+    const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
     expect(options?.env?.SESSION_ID).toBe("session-123")
     expect(options?.env?.EXIT_CODE).toBe("0")
     expect(options?.env?.DURATION_MS).toBe("5000")
diff --git a/src/cli/run/on-complete-hook.test.ts b/src/cli/run/on-complete-hook.test.ts
index e560cc10c..930651a2d 100644
--- a/src/cli/run/on-complete-hook.test.ts
+++ b/src/cli/run/on-complete-hook.test.ts
@@ -1,4 +1,5 @@
 import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
+import * as spawnWithWindowsHideModule from "../../shared/spawn-with-windows-hide"
 import { executeOnCompleteHook } from "./on-complete-hook"
 
 describe("executeOnCompleteHook", () => {
@@ -6,7 +7,10 @@ describe("executeOnCompleteHook", () => {
     return {
       exited: Promise.resolve(exitCode),
       exitCode,
-    } as unknown as ReturnType<typeof Bun.spawn>
+      stdout: undefined,
+      stderr: undefined,
+      kill: () => {},
+    } satisfies ReturnType<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
   }
 
   let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
@@ -21,7 +25,7 @@ describe("executeOnCompleteHook", () => {
 
   it("executes command with correct env vars", async () => {
     // given
-    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+    const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
 
     try {
       // when
@@ -35,7 +39,7 @@ describe("executeOnCompleteHook", () => {
 
       // then
       expect(spawnSpy).toHaveBeenCalledTimes(1)
-      const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+      const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
 
       expect(args).toEqual(["sh", "-c", "echo test"])
       expect(options?.env?.SESSION_ID).toBe("session-123")
@@ -51,7 +55,7 @@ describe("executeOnCompleteHook", () => {
 
   it("env var values are strings", async () => {
     // given
-    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+    const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
 
     try {
       // when
@@ -64,7 +68,7 @@ describe("executeOnCompleteHook", () => {
       })
 
       // then
-      const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
+      const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof spawnWithWindowsHideModule.spawnWithWindowsHide>
 
       expect(options?.env?.EXIT_CODE).toBe("1")
       expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
@@ -79,7 +83,7 @@ describe("executeOnCompleteHook", () => {
 
   it("empty command string is no-op", async () => {
     // given
-    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+    const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
 
     try {
       // when
@@ -100,7 +104,7 @@ describe("executeOnCompleteHook", () => {
 
   it("whitespace-only command is no-op", async () => {
     // given
-    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
+    const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(0))
 
     try {
       // when
@@ -121,11 +125,11 @@ describe("executeOnCompleteHook", () => {
 
   it("command failure logs warning but does not throw", async () => {
     // given
-    const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
+    const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockReturnValue(createProc(1))
 
     try {
       // when
-      await expect(
+      expect(
         executeOnCompleteHook({
           command: "false",
           sessionId: "session-123",
@@ -149,13 +153,13 @@ describe("executeOnCompleteHook", () => {
   it("spawn error logs warning but does not throw", async () => {
     // given
     const spawnError = new Error("Command not found")
-    const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
+    const spawnSpy = spyOn(spawnWithWindowsHideModule, "spawnWithWindowsHide").mockImplementation(() => {
       throw spawnError
     })
 
     try {
       // when
-      await expect(
+      expect(
         executeOnCompleteHook({
           command: "nonexistent-command",
           sessionId: "session-123",
diff --git a/src/cli/run/on-complete-hook.ts b/src/cli/run/on-complete-hook.ts
index 30c585439..b266ca887 100644
--- a/src/cli/run/on-complete-hook.ts
+++ b/src/cli/run/on-complete-hook.ts
@@ -1,4 +1,5 @@
 import pc from "picocolors"
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
 
 export async function executeOnCompleteHook(options: {
   command: string
@@ -17,7 +18,7 @@ export async function executeOnCompleteHook(options: {
   console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
 
   try {
-    const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
+    const proc = spawnWithWindowsHide(["sh", "-c", trimmedCommand], {
       env: {
         ...process.env,
         SESSION_ID: sessionId,
diff --git a/src/cli/run/opencode-binary-resolver.ts b/src/cli/run/opencode-binary-resolver.ts
index a4bbc60c5..1f42486f7 100644
--- a/src/cli/run/opencode-binary-resolver.ts
+++ b/src/cli/run/opencode-binary-resolver.ts
@@ -1,4 +1,5 @@
 import { delimiter, dirname, join } from "node:path"
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
 
 const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const
 const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const
@@ -41,7 +42,7 @@ export function collectCandidateBinaryPaths(
 
 export async function canExecuteBinary(binaryPath: string): Promise<boolean> {
   try {
-    const proc = Bun.spawn([binaryPath, "--version"], {
+    const proc = spawnWithWindowsHide([binaryPath, "--version"], {
       stdout: "pipe",
       stderr: "pipe",
     })
diff --git a/src/cli/run/poll-for-completion.ts b/src/cli/run/poll-for-completion.ts
index 684670cb8..529221094 100644
--- a/src/cli/run/poll-for-completion.ts
+++ b/src/cli/run/poll-for-completion.ts
@@ -8,11 +8,15 @@ const DEFAULT_POLL_INTERVAL_MS = 500
 const DEFAULT_REQUIRED_CONSECUTIVE = 1
 const ERROR_GRACE_CYCLES = 3
 const MIN_STABILIZATION_MS = 1_000
+const DEFAULT_EVENT_WATCHDOG_MS = 30_000 // 30 seconds
+const DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS = 60_000 // 60 seconds
 
 export interface PollOptions {
   pollIntervalMs?: number
   requiredConsecutive?: number
   minStabilizationMs?: number
+  eventWatchdogMs?: number
+  secondaryMeaningfulWorkTimeoutMs?: number
 }
 
 export async function pollForCompletion(
@@ -28,9 +32,15 @@ export async function pollForCompletion(
     options.minStabilizationMs ?? MIN_STABILIZATION_MS
   const minStabilizationMs =
     rawMinStabilizationMs > 0 ? rawMinStabilizationMs : MIN_STABILIZATION_MS
+  const eventWatchdogMs =
+    options.eventWatchdogMs ?? DEFAULT_EVENT_WATCHDOG_MS
+  const secondaryMeaningfulWorkTimeoutMs =
+    options.secondaryMeaningfulWorkTimeoutMs ??
+    DEFAULT_SECONDARY_MEANINGFUL_WORK_TIMEOUT_MS
   let consecutiveCompleteChecks = 0
   let errorCycleCount = 0
   let firstWorkTimestamp: number | null = null
+  let secondaryTimeoutChecked = false
   const pollStartTimestamp = Date.now()
 
   while (!abortController.signal.aborted) {
@@ -59,7 +69,37 @@ export async function pollForCompletion(
       errorCycleCount = 0
     }
 
-    const mainSessionStatus = await getMainSessionStatus(ctx)
+    // Watchdog: if no events received for N seconds, verify session status via API
+    let mainSessionStatus: "idle" | "busy" | "retry" | null = null
+    if (eventState.lastEventTimestamp !== null) {
+      const timeSinceLastEvent = Date.now() - eventState.lastEventTimestamp
+      if (timeSinceLastEvent > eventWatchdogMs) {
+        // Events stopped coming - verify actual session state
+        console.log(
+          pc.yellow(
+            `\n  No events for ${Math.round(
+              timeSinceLastEvent / 1000
+            )}s, verifying session status...`
+          )
+        )
+
+        // Force check session status directly
+        mainSessionStatus = await getMainSessionStatus(ctx)
+        if (mainSessionStatus === "idle") {
+          eventState.mainSessionIdle = true
+        } else if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
+          eventState.mainSessionIdle = false
+        }
+
+        // Reset timestamp to avoid repeated checks
+        eventState.lastEventTimestamp = Date.now()
+      }
+    }
+
+    // Only call getMainSessionStatus if watchdog didn't already check
+    if (mainSessionStatus === null) {
+      mainSessionStatus = await getMainSessionStatus(ctx)
+    }
     if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
       eventState.mainSessionIdle = false
     } else if (mainSessionStatus === "idle") {
@@ -81,6 +121,50 @@ export async function pollForCompletion(
         consecutiveCompleteChecks = 0
         continue
       }
+
+      // Secondary timeout: if we've been polling for reasonable time but haven't
+      // received meaningful work via events, check if there's active work via API
+      // Only check once to avoid unnecessary API calls every poll cycle
+      if (
+        Date.now() - pollStartTimestamp > secondaryMeaningfulWorkTimeoutMs &&
+        !secondaryTimeoutChecked
+      ) {
+        secondaryTimeoutChecked = true
+        // Check if session actually has pending work (children, todos, etc.)
+        const childrenRes = await ctx.client.session.children({
+          path: { id: ctx.sessionID },
+          query: { directory: ctx.directory },
+        })
+        const children = normalizeSDKResponse(childrenRes, [] as unknown[])
+        const todosRes = await ctx.client.session.todo({
+          path: { id: ctx.sessionID },
+          query: { directory: ctx.directory },
+        })
+        const todos = normalizeSDKResponse(todosRes, [] as unknown[])
+
+        const hasActiveChildren =
+          Array.isArray(children) && children.length > 0
+        const hasActiveTodos =
+          Array.isArray(todos) &&
+          todos.some(
+            (t: unknown) =>
+              (t as { status?: string })?.status !== "completed" &&
+              (t as { status?: string })?.status !== "cancelled"
+          )
+        const hasActiveWork = hasActiveChildren || hasActiveTodos
+
+        if (hasActiveWork) {
+          // Assume meaningful work is happening even without events
+          eventState.hasReceivedMeaningfulWork = true
+          console.log(
+            pc.yellow(
+              `\n  No meaningful work events for ${Math.round(
+                secondaryMeaningfulWorkTimeoutMs / 1000
+              )}s but session has active work - assuming in progress`
+            )
+          )
+        }
+      }
     } else {
       // Track when first meaningful work was received
       if (firstWorkTimestamp === null) {
diff --git a/src/config/schema/agent-overrides.ts b/src/config/schema/agent-overrides.ts
index 1103bf15a..eb5429fba 100644
--- a/src/config/schema/agent-overrides.ts
+++ b/src/config/schema/agent-overrides.ts
@@ -60,7 +60,9 @@ const BuiltinAgentOverridesSchema = z.object({
   build: AgentOverrideConfigSchema.optional(),
   plan: AgentOverrideConfigSchema.optional(),
   sisyphus: AgentOverrideConfigSchema.optional(),
-  hephaestus: AgentOverrideConfigSchema.optional(),
+  hephaestus: AgentOverrideConfigSchema.extend({
+    allow_non_gpt_model: z.boolean().optional(),
+  }).optional(),
   "sisyphus-junior": AgentOverrideConfigSchema.optional(),
   "OpenCode-Builder": AgentOverrideConfigSchema.optional(),
   prometheus: AgentOverrideConfigSchema.optional(),
diff --git a/src/config/schema/categories.ts b/src/config/schema/categories.ts
index b12005931..47c7d6c0b 100644
--- a/src/config/schema/categories.ts
+++ b/src/config/schema/categories.ts
@@ -20,6 +20,7 @@ export const CategoryConfigSchema = z.object({
   textVerbosity: z.enum(["low", "medium", "high"]).optional(),
   tools: z.record(z.string(), z.boolean()).optional(),
   prompt_append: z.string().optional(),
+  max_prompt_tokens: z.number().int().positive().optional(),
   /** Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini/minimax models. */
   is_unstable_agent: z.boolean().optional(),
   /** Disable this category. Disabled categories are excluded from task delegation. */
diff --git a/src/config/schema/oh-my-opencode-config.ts b/src/config/schema/oh-my-opencode-config.ts
index ceb82d451..2ebaf43d8 100644
--- a/src/config/schema/oh-my-opencode-config.ts
+++ b/src/config/schema/oh-my-opencode-config.ts
@@ -27,7 +27,7 @@ export const OhMyOpenCodeConfigSchema = z.object({
   /** Default agent name for `oh-my-opencode run` (env: OPENCODE_DEFAULT_AGENT) */
   default_run_agent: z.string().optional(),
   disabled_mcps: z.array(AnyMcpNameSchema).optional(),
-  disabled_agents: z.array(BuiltinAgentNameSchema).optional(),
+  disabled_agents: z.array(z.string()).optional(),
   disabled_skills: z.array(BuiltinSkillNameSchema).optional(),
   disabled_hooks: z.array(z.string()).optional(),
   disabled_commands: z.array(BuiltinCommandNameSchema).optional(),
diff --git a/src/features/background-agent/concurrency.test.ts b/src/features/background-agent/concurrency.test.ts
index 102076eef..682d6029a 100644
--- a/src/features/background-agent/concurrency.test.ts
+++ b/src/features/background-agent/concurrency.test.ts
@@ -34,7 +34,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
   test("should return provider limit even when modelConcurrency exists but doesn't match", () => {
     // given
     const config: BackgroundTaskConfig = {
-      modelConcurrency: { "google/gemini-3-pro": 5 },
+      modelConcurrency: { "google/gemini-3.1-pro": 5 },
       providerConcurrency: { anthropic: 3 }
     }
     const manager = new ConcurrencyManager(config)
@@ -95,7 +95,7 @@ describe("ConcurrencyManager.getConcurrencyLimit", () => {
     // when
     const modelLimit = manager.getConcurrencyLimit("anthropic/claude-sonnet-4-6")
     const providerLimit = manager.getConcurrencyLimit("anthropic/claude-opus-4-6")
-    const defaultLimit = manager.getConcurrencyLimit("google/gemini-3-pro")
+    const defaultLimit = manager.getConcurrencyLimit("google/gemini-3.1-pro")
 
     // then
     expect(modelLimit).toBe(10)
diff --git a/src/features/background-agent/manager.test.ts b/src/features/background-agent/manager.test.ts
index 7bd7709f1..2e78f63f3 100644
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -191,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
   return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
 }
 
+function getPendingNotifications(manager: BackgroundManager): Map<string, string[]> {
+  return (manager as unknown as { pendingNotifications: Map<string, string[]> }).pendingNotifications
+}
+
 function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
   return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
 }
@@ -1057,6 +1061,49 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
 
     manager.shutdown()
   })
+
+  test("should queue notification when promptAsync aborts while parent is idle", async () => {
+    //#given
+    const promptMock = async () => {
+      const error = new Error("Request aborted while waiting for input")
+      error.name = "MessageAbortedError"
+      throw error
+    }
+    const client = {
+      session: {
+        prompt: promptMock,
+        promptAsync: promptMock,
+        abort: async () => ({}),
+        messages: async () => ({ data: [] }),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    const task: BackgroundTask = {
+      id: "task-aborted-idle-queue",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task idle queue",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+    }
+    getPendingByParent(manager).set("session-parent", new Set([task.id]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
+      .notifyParentSession(task)
+
+    //#then
+    const queuedNotifications = getPendingNotifications(manager).get("session-parent") ?? []
+    expect(queuedNotifications).toHaveLength(1)
+    expect(queuedNotifications[0]).toContain("<system-reminder>")
+    expect(queuedNotifications[0]).toContain("[ALL BACKGROUND TASKS COMPLETE]")
+
+    manager.shutdown()
+  })
 })
 
 describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
@@ -1105,6 +1152,29 @@ describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
   })
 })
 
+describe("BackgroundManager.injectPendingNotificationsIntoChatMessage", () => {
+  test("should prepend queued notifications to first text part and clear queue", () => {
+    // given
+    const manager = createBackgroundManager()
+    manager.queuePendingNotification("session-parent", "<system-reminder>queued-one</system-reminder>")
+    manager.queuePendingNotification("session-parent", "<system-reminder>queued-two</system-reminder>")
+    const output = {
+      parts: [{ type: "text", text: "User prompt" }],
+    }
+
+    // when
+    manager.injectPendingNotificationsIntoChatMessage(output, "session-parent")
+
+    // then
+    expect(output.parts[0].text).toContain("<system-reminder>queued-one</system-reminder>")
+    expect(output.parts[0].text).toContain("<system-reminder>queued-two</system-reminder>")
+    expect(output.parts[0].text).toContain("User prompt")
+    expect(getPendingNotifications(manager).get("session-parent")).toBeUndefined()
+
+    manager.shutdown()
+  })
+})
+
 function buildNotificationPromptBody(
   task: BackgroundTask,
   currentMessage: CurrentMessage | null
diff --git a/src/features/background-agent/manager.ts b/src/features/background-agent/manager.ts
index 61e5d8434..1bc9e2b4b 100644
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -93,6 +93,7 @@ export class BackgroundManager {
 
   private tasks: Map<string, BackgroundTask>
   private notifications: Map<string, BackgroundTask[]>
+  private pendingNotifications: Map<string, string[]>
   private pendingByParent: Map<string, Set<string>>  // Track pending tasks per parent for batching
   private client: OpencodeClient
   private directory: string
@@ -125,6 +126,7 @@ export class BackgroundManager {
   ) {
     this.tasks = new Map()
     this.notifications = new Map()
+    this.pendingNotifications = new Map()
     this.pendingByParent = new Map()
     this.client = ctx.client
     this.directory = ctx.directory
@@ -917,6 +919,32 @@ export class BackgroundManager {
     this.notifications.delete(sessionID)
   }
 
+  queuePendingNotification(sessionID: string | undefined, notification: string): void {
+    if (!sessionID) return
+    const existingNotifications = this.pendingNotifications.get(sessionID) ?? []
+    existingNotifications.push(notification)
+    this.pendingNotifications.set(sessionID, existingNotifications)
+  }
+
+  injectPendingNotificationsIntoChatMessage(output: { parts: Array<{ type: string; text?: string; [key: string]: unknown }> }, sessionID: string): void {
+    const pendingNotifications = this.pendingNotifications.get(sessionID)
+    if (!pendingNotifications || pendingNotifications.length === 0) {
+      return
+    }
+
+    this.pendingNotifications.delete(sessionID)
+    const notificationContent = pendingNotifications.join("\n\n")
+    const firstTextPartIndex = output.parts.findIndex((part) => part.type === "text")
+
+    if (firstTextPartIndex === -1) {
+      output.parts.unshift(createInternalAgentTextPart(notificationContent))
+      return
+    }
+
+    const originalText = output.parts[firstTextPartIndex].text ?? ""
+    output.parts[firstTextPartIndex].text = `${notificationContent}\n\n---\n\n${originalText}`
+  }
+
   /**
    * Validates that a session has actual assistant/tool output before marking complete.
    * Prevents premature completion when session.idle fires before agent responds.
@@ -1340,6 +1368,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
               taskId: task.id,
               parentSessionID: task.parentSessionID,
             })
+            this.queuePendingNotification(task.parentSessionID, notification)
           } else {
             log("[background-agent] Failed to send notification:", error)
           }
@@ -1568,6 +1597,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
     this.concurrencyManager.clear()
     this.tasks.clear()
     this.notifications.clear()
+    this.pendingNotifications.clear()
     this.pendingByParent.clear()
     this.notificationQueueByParent.clear()
     this.queuesByKey.clear()
diff --git a/src/features/boulder-state/storage.test.ts b/src/features/boulder-state/storage.test.ts
index 967c090cf..e52174cef 100644
--- a/src/features/boulder-state/storage.test.ts
+++ b/src/features/boulder-state/storage.test.ts
@@ -269,6 +269,71 @@ describe("boulder-state", () => {
       expect(progress.isComplete).toBe(false)
     })
 
+    test("should count space-indented unchecked checkbox", () => {
+      // given - plan file with a two-space indented checkbox
+      const planPath = join(TEST_DIR, "space-indented-plan.md")
+      writeFileSync(planPath, `# Plan
+  - [ ] indented task
+`)
+
+      // when
+      const progress = getPlanProgress(planPath)
+
+      // then
+      expect(progress.total).toBe(1)
+      expect(progress.completed).toBe(0)
+      expect(progress.isComplete).toBe(false)
+    })
+
+    test("should count tab-indented unchecked checkbox", () => {
+      // given - plan file with a tab-indented checkbox
+      const planPath = join(TEST_DIR, "tab-indented-plan.md")
+      writeFileSync(planPath, `# Plan
+	- [ ] tab-indented task
+`)
+
+      // when
+      const progress = getPlanProgress(planPath)
+
+      // then
+      expect(progress.total).toBe(1)
+      expect(progress.completed).toBe(0)
+      expect(progress.isComplete).toBe(false)
+    })
+
+    test("should count mixed top-level checked and indented unchecked checkboxes", () => {
+      // given - plan file with checked top-level and unchecked indented task
+      const planPath = join(TEST_DIR, "mixed-indented-plan.md")
+      writeFileSync(planPath, `# Plan
+- [x] top-level completed task
+  - [ ] nested unchecked task
+`)
+
+      // when
+      const progress = getPlanProgress(planPath)
+
+      // then
+      expect(progress.total).toBe(2)
+      expect(progress.completed).toBe(1)
+      expect(progress.isComplete).toBe(false)
+    })
+
+    test("should count space-indented completed checkbox", () => {
+      // given - plan file with a two-space indented completed checkbox
+      const planPath = join(TEST_DIR, "indented-completed-plan.md")
+      writeFileSync(planPath, `# Plan
+  - [x] indented completed task
+`)
+
+      // when
+      const progress = getPlanProgress(planPath)
+
+      // then
+      expect(progress.total).toBe(1)
+      expect(progress.completed).toBe(1)
+      expect(progress.isComplete).toBe(true)
+    })
+
     test("should return isComplete true when all checked", () => {
       // given - all tasks completed
       const planPath = join(TEST_DIR, "complete-plan.md")
diff --git a/src/features/boulder-state/storage.ts b/src/features/boulder-state/storage.ts
index 2b0d1bdec..ab84368b7 100644
--- a/src/features/boulder-state/storage.ts
+++ b/src/features/boulder-state/storage.ts
@@ -121,8 +121,8 @@ export function getPlanProgress(planPath: string): PlanProgress {
     const content = readFileSync(planPath, "utf-8")
     
     // Match markdown checkboxes: - [ ] or - [x] or - [X]
-    const uncheckedMatches = content.match(/^[-*]\s*\[\s*\]/gm) || []
-    const checkedMatches = content.match(/^[-*]\s*\[[xX]\]/gm) || []
+    const uncheckedMatches = content.match(/^\s*[-*]\s*\[\s*\]/gm) || []
+    const checkedMatches = content.match(/^\s*[-*]\s*\[[xX]\]/gm) || []
 
     const total = uncheckedMatches.length + checkedMatches.length
     const completed = checkedMatches.length
@@ -150,7 +150,8 @@ export function getPlanName(planPath: string): string {
 export function createBoulderState(
   planPath: string,
   sessionId: string,
-  agent?: string
+  agent?: string,
+  worktreePath?: string,
 ): BoulderState {
   return {
     active_plan: planPath,
@@ -158,5 +159,6 @@ export function createBoulderState(
     session_ids: [sessionId],
     plan_name: getPlanName(planPath),
     ...(agent !== undefined ? { agent } : {}),
+    ...(worktreePath !== undefined ? { worktree_path: worktreePath } : {}),
   }
 }
diff --git a/src/features/boulder-state/types.ts b/src/features/boulder-state/types.ts
index f56dcdaa2..b1a225380 100644
--- a/src/features/boulder-state/types.ts
+++ b/src/features/boulder-state/types.ts
@@ -16,6 +16,8 @@ export interface BoulderState {
   plan_name: string
   /** Agent type to use when resuming (e.g., 'atlas') */
   agent?: string
+  /** Absolute path to the git worktree root where work happens */
+  worktree_path?: string
 }
 
 export interface PlanProgress {
diff --git a/src/features/builtin-commands/templates/start-work.ts b/src/features/builtin-commands/templates/start-work.ts
index 4db39be1b..98ffd1e21 100644
--- a/src/features/builtin-commands/templates/start-work.ts
+++ b/src/features/builtin-commands/templates/start-work.ts
@@ -1,5 +1,14 @@
 export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
 
+## ARGUMENTS
+
+- \`/start-work [plan-name] [--worktree <path>]\`
+  - \`plan-name\` (optional): name or partial match of the plan to start
+  - \`--worktree <path>\` (optional): absolute path to an existing git worktree to work in
+    - If specified and valid: hook pre-sets worktree_path in boulder.json
+    - If specified but invalid: you must run \`git worktree add <path> <branch>\` first
+    - If omitted: you MUST choose or create a worktree (see Worktree Setup below)
+
 ## WHAT TO DO
 
 1. **Find available plans**: Search for Prometheus-generated plan files at \`.sisyphus/plans/\`
@@ -15,17 +24,24 @@ export const START_WORK_TEMPLATE = `You are starting a Sisyphus work session.
      - If ONE plan: auto-select it
      - If MULTIPLE plans: show list with timestamps, ask user to select
 
-4. **Create/Update boulder.json**:
+4. **Worktree Setup** (when \`worktree_path\` not already set in boulder.json):
+   1. \`git worktree list --porcelain\` — see available worktrees
+   2. Create: \`git worktree add <absolute-path> <branch-or-HEAD>\`
+   3. Update boulder.json to add \`"worktree_path": "<absolute-path>"\`
+   4. All work happens inside that worktree directory
+
+5. **Create/Update boulder.json**:
    \`\`\`json
    {
      "active_plan": "/absolute/path/to/plan.md",
      "started_at": "ISO_TIMESTAMP",
      "session_ids": ["session_id_1", "session_id_2"],
-     "plan_name": "plan-name"
+     "plan_name": "plan-name",
+     "worktree_path": "/absolute/path/to/git/worktree"
    }
    \`\`\`
 
-5. **Read the plan file** and start executing tasks according to atlas workflow
+6. **Read the plan file** and start executing tasks according to atlas workflow
 
 ## OUTPUT FORMAT
 
@@ -49,6 +65,7 @@ Resuming Work Session
 Active Plan: {plan-name}
 Progress: {completed}/{total} tasks
 Sessions: {count} (appending current session)
+Worktree: {worktree_path}
 
 Reading plan and continuing from last incomplete task...
 \`\`\`
@@ -60,6 +77,7 @@ Starting Work Session
 Plan: {plan-name}
 Session ID: {session_id}
 Started: {timestamp}
+Worktree: {worktree_path}
 
 Reading plan and beginning execution...
 \`\`\`
@@ -68,5 +86,6 @@ Reading plan and beginning execution...
 
 - The session_id is injected by the hook - use it directly
 - Always update boulder.json BEFORE starting work
+- Always set worktree_path in boulder.json before executing any tasks
 - Read the FULL plan file before delegating any tasks
 - Follow atlas delegation protocols (7-section format)`
diff --git a/src/features/task-toast-manager/manager.test.ts b/src/features/task-toast-manager/manager.test.ts
index 323792815..a490f894b 100644
--- a/src/features/task-toast-manager/manager.test.ts
+++ b/src/features/task-toast-manager/manager.test.ts
@@ -162,7 +162,7 @@ describe("TaskToastManager", () => {
         description: "Task with category default model",
         agent: "sisyphus-junior",
         isBackground: false,
-        modelInfo: { model: "google/gemini-3-pro", type: "category-default" as const },
+        modelInfo: { model: "google/gemini-3.1-pro", type: "category-default" as const },
       }
 
       // when - addTask is called
diff --git a/src/hooks/anthropic-context-window-limit-recovery/state.ts b/src/hooks/anthropic-context-window-limit-recovery/state.ts
index 1ee1001fc..70fd69f53 100644
--- a/src/hooks/anthropic-context-window-limit-recovery/state.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/state.ts
@@ -6,7 +6,7 @@ export function getOrCreateRetryState(
 ): RetryState {
   let state = autoCompactState.retryStateBySession.get(sessionID)
   if (!state) {
-    state = { attempt: 0, lastAttemptTime: 0 }
+    state = { attempt: 0, lastAttemptTime: 0, firstAttemptTime: 0 }
     autoCompactState.retryStateBySession.set(sessionID, state)
   }
   return state
diff --git a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.test.ts b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.test.ts
new file mode 100644
index 000000000..fa0fb295d
--- /dev/null
+++ b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.test.ts
@@ -0,0 +1,122 @@
+import { afterEach, beforeEach, describe, expect, mock, test } from "bun:test"
+import { runSummarizeRetryStrategy } from "./summarize-retry-strategy"
+import type { AutoCompactState, ParsedTokenLimitError, RetryState } from "./types"
+import type { OhMyOpenCodeConfig } from "../../config"
+
+type TimeoutCall = {
+  delay: number
+}
+
+function createAutoCompactState(): AutoCompactState {
+  return {
+    pendingCompact: new Set<string>(),
+    errorDataBySession: new Map<string, ParsedTokenLimitError>(),
+    retryStateBySession: new Map<string, RetryState>(),
+    truncateStateBySession: new Map(),
+    emptyContentAttemptBySession: new Map(),
+    compactionInProgress: new Set<string>(),
+  }
+}
+
+describe("runSummarizeRetryStrategy", () => {
+  const sessionID = "ses_retry_timeout"
+  const directory = "/tmp"
+  let autoCompactState: AutoCompactState
+
+  const summarizeMock = mock(() => Promise.resolve())
+  const showToastMock = mock(() => Promise.resolve())
+  const client = {
+    session: {
+      summarize: summarizeMock,
+      messages: mock(() => Promise.resolve({ data: [] })),
+      promptAsync: mock(() => Promise.resolve()),
+      revert: mock(() => Promise.resolve()),
+    },
+    tui: {
+      showToast: showToastMock,
+    },
+  }
+
+  beforeEach(() => {
+    autoCompactState = createAutoCompactState()
+    summarizeMock.mockReset()
+    showToastMock.mockReset()
+    summarizeMock.mockResolvedValue(undefined)
+    showToastMock.mockResolvedValue(undefined)
+  })
+
+  afterEach(() => {
+    globalThis.setTimeout = originalSetTimeout
+  })
+
+  const originalSetTimeout = globalThis.setTimeout
+
+  test("stops retries when total summarize timeout is exceeded", async () => {
+    //#given
+    autoCompactState.pendingCompact.add(sessionID)
+    autoCompactState.errorDataBySession.set(sessionID, {
+      currentTokens: 250000,
+      maxTokens: 200000,
+      errorType: "token_limit_exceeded",
+    })
+    autoCompactState.retryStateBySession.set(sessionID, {
+      attempt: 1,
+      lastAttemptTime: Date.now(),
+      firstAttemptTime: Date.now() - 130000,
+    })
+
+    //#when
+    await runSummarizeRetryStrategy({
+      sessionID,
+      msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
+      autoCompactState,
+      client: client as never,
+      directory,
+      pluginConfig: {} as OhMyOpenCodeConfig,
+    })
+
+    //#then
+    expect(summarizeMock).not.toHaveBeenCalled()
+    expect(autoCompactState.pendingCompact.has(sessionID)).toBe(false)
+    expect(autoCompactState.errorDataBySession.has(sessionID)).toBe(false)
+    expect(autoCompactState.retryStateBySession.has(sessionID)).toBe(false)
+    expect(showToastMock).toHaveBeenCalledWith(
+      expect.objectContaining({
+        body: expect.objectContaining({
+          title: "Auto Compact Timed Out",
+        }),
+      }),
+    )
+  })
+
+  test("caps retry delay by remaining total timeout window", async () => {
+    //#given
+    const timeoutCalls: TimeoutCall[] = []
+    globalThis.setTimeout = ((_: (...args: unknown[]) => void, delay?: number) => {
+      timeoutCalls.push({ delay: delay ?? 0 })
+      return 1 as unknown as ReturnType<typeof setTimeout>
+    }) as typeof setTimeout
+
+    autoCompactState.retryStateBySession.set(sessionID, {
+      attempt: 1,
+      lastAttemptTime: Date.now(),
+      firstAttemptTime: Date.now() - 119700,
+    })
+    summarizeMock.mockRejectedValueOnce(new Error("rate limited"))
+
+    //#when
+    await runSummarizeRetryStrategy({
+      sessionID,
+      msg: { providerID: "anthropic", modelID: "claude-sonnet-4-6" },
+      autoCompactState,
+      client: client as never,
+      directory,
+      pluginConfig: {} as OhMyOpenCodeConfig,
+    })
+
+    //#then
+    expect(timeoutCalls.length).toBe(1)
+    expect(timeoutCalls[0]!.delay).toBeGreaterThan(0)
+    expect(timeoutCalls[0]!.delay).toBeLessThanOrEqual(500)
+  })
+})
diff --git a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
index 1dc9e2852..008ff74a5 100644
--- a/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/summarize-retry-strategy.ts
@@ -7,6 +7,8 @@ import { sanitizeEmptyMessagesBeforeSummarize } from "./message-builder"
 import { fixEmptyMessages } from "./empty-content-recovery"
 
 import { resolveCompactionModel } from "../shared/compaction-model-resolver"
+
+const SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS = 120_000
 export async function runSummarizeRetryStrategy(params: {
   sessionID: string
   msg: Record<string, unknown>
@@ -18,6 +20,27 @@ export async function runSummarizeRetryStrategy(params: {
   messageIndex?: number
 }): Promise<void> {
   const retryState = getOrCreateRetryState(params.autoCompactState, params.sessionID)
+  const now = Date.now()
+
+  if (retryState.firstAttemptTime === 0) {
+    retryState.firstAttemptTime = now
+  }
+
+  const elapsedTimeMs = now - retryState.firstAttemptTime
+  if (elapsedTimeMs >= SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS) {
+    clearSessionState(params.autoCompactState, params.sessionID)
+    await params.client.tui
+      .showToast({
+        body: {
+          title: "Auto Compact Timed Out",
+          message: "Compaction retries exceeded the timeout window. Please start a new session.",
+          variant: "error",
+          duration: 5000,
+        },
+      })
+      .catch(() => {})
+    return
+  }
 
   if (params.errorType?.includes("non-empty content")) {
     const attempt = getEmptyContentAttempt(params.autoCompactState, params.sessionID)
@@ -52,6 +75,7 @@ export async function runSummarizeRetryStrategy(params: {
 
   if (Date.now() - retryState.lastAttemptTime > 300000) {
     retryState.attempt = 0
+    retryState.firstAttemptTime = Date.now()
     params.autoCompactState.truncateStateBySession.delete(params.sessionID)
   }
 
@@ -92,10 +116,26 @@ export async function runSummarizeRetryStrategy(params: {
         })
         return
       } catch {
+        const remainingTimeMs = SUMMARIZE_RETRY_TOTAL_TIMEOUT_MS - (Date.now() - retryState.firstAttemptTime)
+        if (remainingTimeMs <= 0) {
+          clearSessionState(params.autoCompactState, params.sessionID)
+          await params.client.tui
+            .showToast({
+              body: {
+                title: "Auto Compact Timed Out",
+                message: "Compaction retries exceeded the timeout window. Please start a new session.",
+                variant: "error",
+                duration: 5000,
+              },
+            })
+            .catch(() => {})
+          return
+        }
+
         const delay =
           RETRY_CONFIG.initialDelayMs *
           Math.pow(RETRY_CONFIG.backoffFactor, retryState.attempt - 1)
-        const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs)
+        const cappedDelay = Math.min(delay, RETRY_CONFIG.maxDelayMs, remainingTimeMs)
 
         setTimeout(() => {
           void runSummarizeRetryStrategy(params)
diff --git a/src/hooks/anthropic-context-window-limit-recovery/types.ts b/src/hooks/anthropic-context-window-limit-recovery/types.ts
index 40b31d064..5c62b81fb 100644
--- a/src/hooks/anthropic-context-window-limit-recovery/types.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/types.ts
@@ -11,6 +11,7 @@ export interface ParsedTokenLimitError {
 export interface RetryState {
   attempt: number
   lastAttemptTime: number
+  firstAttemptTime: number
 }
 
 export interface TruncateState {
diff --git a/src/hooks/atlas/boulder-continuation-injector.ts b/src/hooks/atlas/boulder-continuation-injector.ts
index 289668b4b..4f8e35802 100644
--- a/src/hooks/atlas/boulder-continuation-injector.ts
+++ b/src/hooks/atlas/boulder-continuation-injector.ts
@@ -14,6 +14,7 @@ export async function injectBoulderContinuation(input: {
   remaining: number
   total: number
   agent?: string
+  worktreePath?: string
   backgroundManager?: BackgroundManager
   sessionState: SessionState
 }): Promise<void> {
@@ -24,6 +25,7 @@ export async function injectBoulderContinuation(input: {
     remaining,
     total,
     agent,
+    worktreePath,
     backgroundManager,
     sessionState,
   } = input
@@ -37,9 +39,11 @@ export async function injectBoulderContinuation(input: {
     return
   }
 
+  const worktreeContext = worktreePath ? `\n\n[Worktree: ${worktreePath}]` : ""
   const prompt =
     BOULDER_CONTINUATION_PROMPT.replace(/{PLAN_NAME}/g, planName) +
-    `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]`
+    `\n\n[Status: ${total - remaining}/${total} completed, ${remaining} remaining]` +
+    worktreeContext
 
   try {
     log(`[${HOOK_NAME}] Injecting boulder continuation`, { sessionID, planName, remaining })
@@ -62,6 +66,7 @@ export async function injectBoulderContinuation(input: {
     log(`[${HOOK_NAME}] Boulder continuation injected`, { sessionID })
   } catch (err) {
     sessionState.promptFailureCount += 1
+    sessionState.lastFailureAt = Date.now()
     log(`[${HOOK_NAME}] Boulder continuation failed`, {
       sessionID,
       error: String(err),
diff --git a/src/hooks/atlas/event-handler.ts b/src/hooks/atlas/event-handler.ts
index 76a3a5004..0f7187fc4 100644
--- a/src/hooks/atlas/event-handler.ts
+++ b/src/hooks/atlas/event-handler.ts
@@ -10,6 +10,7 @@ import { getLastAgentFromSession } from "./session-last-agent"
 import type { AtlasHookOptions, SessionState } from "./types"
 
 const CONTINUATION_COOLDOWN_MS = 5000
+const FAILURE_BACKOFF_MS = 5 * 60 * 1000
 
 export function createAtlasEventHandler(input: {
   ctx: PluginInput
@@ -53,6 +54,7 @@ export function createAtlasEventHandler(input: {
       }
 
       const state = getState(sessionID)
+      const now = Date.now()
 
       if (state.lastEventWasAbortError) {
         state.lastEventWasAbortError = false
@@ -61,11 +63,18 @@ export function createAtlasEventHandler(input: {
       }
 
       if (state.promptFailureCount >= 2) {
-        log(`[${HOOK_NAME}] Skipped: continuation disabled after repeated prompt failures`, {
-          sessionID,
-          promptFailureCount: state.promptFailureCount,
-        })
-        return
+        const timeSinceLastFailure = state.lastFailureAt !== undefined ? now - state.lastFailureAt : Number.POSITIVE_INFINITY
+        if (timeSinceLastFailure < FAILURE_BACKOFF_MS) {
+          log(`[${HOOK_NAME}] Skipped: continuation in backoff after repeated failures`, {
+            sessionID,
+            promptFailureCount: state.promptFailureCount,
+            backoffRemaining: FAILURE_BACKOFF_MS - timeSinceLastFailure,
+          })
+          return
+        }
+
+        state.promptFailureCount = 0
+        state.lastFailureAt = undefined
       }
 
       const backgroundManager = options?.backgroundManager
@@ -92,17 +101,15 @@ export function createAtlasEventHandler(input: {
       const lastAgentKey = getAgentConfigKey(lastAgent ?? "")
       const requiredAgent = getAgentConfigKey(boulderState.agent ?? "atlas")
       const lastAgentMatchesRequired = lastAgentKey === requiredAgent
-      const boulderAgentWasNotExplicitlySet = boulderState.agent === undefined
       const boulderAgentDefaultsToAtlas = requiredAgent === "atlas"
       const lastAgentIsSisyphus = lastAgentKey === "sisyphus"
-      const allowSisyphusWhenDefaultAtlas = boulderAgentWasNotExplicitlySet && boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
-      const agentMatches = lastAgentMatchesRequired || allowSisyphusWhenDefaultAtlas
+      const allowSisyphusForAtlasBoulder = boulderAgentDefaultsToAtlas && lastAgentIsSisyphus
+      const agentMatches = lastAgentMatchesRequired || allowSisyphusForAtlasBoulder
       if (!agentMatches) {
         log(`[${HOOK_NAME}] Skipped: last agent does not match boulder agent`, {
           sessionID,
           lastAgent: lastAgent ?? "unknown",
           requiredAgent,
-          boulderAgentExplicitlySet: boulderState.agent !== undefined,
         })
         return
       }
@@ -113,7 +120,6 @@ export function createAtlasEventHandler(input: {
         return
       }
 
-      const now = Date.now()
       if (state.lastContinuationInjectedAt && now - state.lastContinuationInjectedAt < CONTINUATION_COOLDOWN_MS) {
         log(`[${HOOK_NAME}] Skipped: continuation cooldown active`, {
           sessionID,
@@ -132,6 +138,7 @@ export function createAtlasEventHandler(input: {
           remaining,
           total: progress.total,
           agent: boulderState.agent,
+          worktreePath: boulderState.worktree_path,
           backgroundManager,
           sessionState: state,
         })
diff --git a/src/hooks/atlas/index.test.ts b/src/hooks/atlas/index.test.ts
index 065f20b9e..36f308270 100644
--- a/src/hooks/atlas/index.test.ts
+++ b/src/hooks/atlas/index.test.ts
@@ -933,8 +933,8 @@ describe("atlas hook", () => {
       expect(callArgs.body.parts[0].text).toContain("2 remaining")
     })
 
-     test("should not inject when last agent does not match boulder agent", async () => {
-       // given - boulder state with incomplete plan, but last agent does NOT match
+     test("should inject when last agent is sisyphus and boulder targets atlas explicitly", async () => {
+       // given - boulder explicitly set to atlas, but last agent is sisyphus (initial state after /start-work)
        const planPath = join(TEST_DIR, "test-plan.md")
        writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
 
@@ -947,7 +947,7 @@ describe("atlas hook", () => {
        }
        writeBoulderState(TEST_DIR, state)
 
-       // given - last agent is NOT the boulder agent
+       // given - last agent is sisyphus (typical state right after /start-work)
        cleanupMessageStorage(MAIN_SESSION_ID)
        setupMessageStorage(MAIN_SESSION_ID, "sisyphus")
 
@@ -962,7 +962,39 @@ describe("atlas hook", () => {
          },
        })
 
-       // then - should NOT call prompt because agent does not match
+       // then - should call prompt because sisyphus is always allowed for atlas boulders
+       expect(mockInput._promptMock).toHaveBeenCalled()
+     })
+
+     test("should not inject when last agent is non-sisyphus and does not match boulder agent", async () => {
+       // given - boulder explicitly set to atlas, last agent is hephaestus (unrelated agent)
+       const planPath = join(TEST_DIR, "test-plan.md")
+       writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+       const state: BoulderState = {
+         active_plan: planPath,
+         started_at: "2026-01-02T10:00:00Z",
+         session_ids: [MAIN_SESSION_ID],
+         plan_name: "test-plan",
+         agent: "atlas",
+       }
+       writeBoulderState(TEST_DIR, state)
+
+       cleanupMessageStorage(MAIN_SESSION_ID)
+       setupMessageStorage(MAIN_SESSION_ID, "hephaestus")
+
+       const mockInput = createMockPluginInput()
+       const hook = createAtlasHook(mockInput)
+
+       // when
+       await hook.handler({
+         event: {
+           type: "session.idle",
+           properties: { sessionID: MAIN_SESSION_ID },
+         },
+       })
+
+       // then - should NOT call prompt because hephaestus does not match atlas or sisyphus
        expect(mockInput._promptMock).not.toHaveBeenCalled()
      })
 
@@ -1122,6 +1154,144 @@ describe("atlas hook", () => {
       }
     })
 
+    test("should keep skipping continuation during 5-minute backoff after 2 consecutive failures", async () => {
+      //#given - boulder state with incomplete plan and prompt always fails
+      const planPath = join(TEST_DIR, "test-plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+      const state: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-02T10:00:00Z",
+        session_ids: [MAIN_SESSION_ID],
+        plan_name: "test-plan",
+      }
+      writeBoulderState(TEST_DIR, state)
+
+      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
+      const mockInput = createMockPluginInput({ promptMock })
+      const hook = createAtlasHook(mockInput)
+
+      const originalDateNow = Date.now
+      let now = 0
+      Date.now = () => now
+
+      try {
+        //#when - third idle occurs inside 5-minute backoff window
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 60000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+
+        //#then - third attempt should still be skipped
+        expect(promptMock).toHaveBeenCalledTimes(2)
+      } finally {
+        Date.now = originalDateNow
+      }
+    })
+
+    test("should retry continuation after 5-minute backoff expires following 2 consecutive failures", async () => {
+      //#given - boulder state with incomplete plan and prompt always fails
+      const planPath = join(TEST_DIR, "test-plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+      const state: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-02T10:00:00Z",
+        session_ids: [MAIN_SESSION_ID],
+        plan_name: "test-plan",
+      }
+      writeBoulderState(TEST_DIR, state)
+
+      const promptMock = mock(() => Promise.reject(new Error("Bad Request")))
+      const mockInput = createMockPluginInput({ promptMock })
+      const hook = createAtlasHook(mockInput)
+
+      const originalDateNow = Date.now
+      let now = 0
+      Date.now = () => now
+
+      try {
+        //#when - third idle occurs after 5+ minutes
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 300000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+
+        //#then - third attempt should run after backoff expiration
+        expect(promptMock).toHaveBeenCalledTimes(3)
+      } finally {
+        Date.now = originalDateNow
+      }
+    })
+
+    test("should reset prompt failure counter after successful retry beyond backoff window", async () => {
+      //#given - boulder state with incomplete plan and success on first retry after backoff
+      const planPath = join(TEST_DIR, "test-plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
+
+      const state: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-02T10:00:00Z",
+        session_ids: [MAIN_SESSION_ID],
+        plan_name: "test-plan",
+      }
+      writeBoulderState(TEST_DIR, state)
+
+      const promptMock = mock((): Promise<void> => Promise.reject(new Error("Bad Request")))
+      promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
+      promptMock.mockImplementationOnce(() => Promise.reject(new Error("Bad Request")))
+      promptMock.mockImplementationOnce(() => Promise.resolve(undefined))
+      const mockInput = createMockPluginInput({ promptMock })
+      const hook = createAtlasHook(mockInput)
+
+      const originalDateNow = Date.now
+      let now = 0
+      Date.now = () => now
+
+      try {
+        //#when - fail twice, recover after backoff with success, then fail twice again
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 300000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+        now += 6000
+
+        await hook.handler({ event: { type: "session.idle", properties: { sessionID: MAIN_SESSION_ID } } })
+        await flushMicrotasks()
+
+        //#then - success retry resets counter, so two additional failures are allowed before skip
+        expect(promptMock).toHaveBeenCalledTimes(5)
+      } finally {
+        Date.now = originalDateNow
+      }
+    })
+
     test("should reset continuation failure state on session.compacted event", async () => {
       //#given - boulder state with incomplete plan and prompt always fails
       const planPath = join(TEST_DIR, "test-plan.md")
diff --git a/src/hooks/atlas/types.ts b/src/hooks/atlas/types.ts
index e1919cd2a..7302f8307 100644
--- a/src/hooks/atlas/types.ts
+++ b/src/hooks/atlas/types.ts
@@ -26,4 +26,5 @@ export interface SessionState {
   lastEventWasAbortError?: boolean
   lastContinuationInjectedAt?: number
   promptFailureCount: number
+  lastFailureAt?: number
 }
diff --git a/src/hooks/background-notification/hook.ts b/src/hooks/background-notification/hook.ts
index f417bdbad..3f40ffadb 100644
--- a/src/hooks/background-notification/hook.ts
+++ b/src/hooks/background-notification/hook.ts
@@ -9,6 +9,14 @@ interface EventInput {
   event: Event
 }
 
+interface ChatMessageInput {
+  sessionID: string
+}
+
+interface ChatMessageOutput {
+  parts: Array<{ type: string; text?: string; [key: string]: unknown }>
+}
+
 /**
  * Background notification hook - handles event routing to BackgroundManager.
  *
@@ -20,7 +28,15 @@ export function createBackgroundNotificationHook(manager: BackgroundManager) {
     manager.handleEvent(event)
   }
 
+  const chatMessageHandler = async (
+    input: ChatMessageInput,
+    output: ChatMessageOutput,
+  ): Promise<void> => {
+    manager.injectPendingNotificationsIntoChatMessage(output, input.sessionID)
+  }
+
   return {
+    "chat.message": chatMessageHandler,
     event: eventHandler,
   }
 }
diff --git a/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts b/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts
index 428d6bbaa..20db3906a 100644
--- a/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts
+++ b/src/hooks/interactive-bash-session/interactive-bash-session-tracker.ts
@@ -6,6 +6,7 @@ import {
 import { OMO_SESSION_PREFIX, buildSessionReminderMessage } from "./constants";
 import type { InteractiveBashSessionState } from "./types";
 import { subagentSessions } from "../../features/claude-code-session-state";
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
 
 type AbortSession = (args: { path: { id: string } }) => Promise<unknown>
 
@@ -19,7 +20,7 @@ async function killAllTrackedSessions(
 ): Promise<void> {
   for (const sessionName of state.tmuxSessions) {
     try {
-      const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], {
+      const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
         stdout: "ignore",
         stderr: "ignore",
       })
diff --git a/src/hooks/interactive-bash-session/state-manager.ts b/src/hooks/interactive-bash-session/state-manager.ts
index e655bfafd..c3a286421 100644
--- a/src/hooks/interactive-bash-session/state-manager.ts
+++ b/src/hooks/interactive-bash-session/state-manager.ts
@@ -1,6 +1,7 @@
 import type { InteractiveBashSessionState } from "./types";
 import { loadInteractiveBashSessionState } from "./storage";
 import { OMO_SESSION_PREFIX } from "./constants";
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide";
 
 export function getOrCreateState(sessionID: string, sessionStates: Map<string, InteractiveBashSessionState>): InteractiveBashSessionState {
   if (!sessionStates.has(sessionID)) {
@@ -24,7 +25,7 @@ export async function killAllTrackedSessions(
 ): Promise<void> {
   for (const sessionName of state.tmuxSessions) {
     try {
-      const proc = Bun.spawn(["tmux", "kill-session", "-t", sessionName], {
+      const proc = spawnWithWindowsHide(["tmux", "kill-session", "-t", sessionName], {
         stdout: "ignore",
         stderr: "ignore",
       });
diff --git a/src/hooks/no-hephaestus-non-gpt/hook.ts b/src/hooks/no-hephaestus-non-gpt/hook.ts
index a1d08a2a1..e621c6d01 100644
--- a/src/hooks/no-hephaestus-non-gpt/hook.ts
+++ b/src/hooks/no-hephaestus-non-gpt/hook.ts
@@ -12,12 +12,16 @@ const TOAST_MESSAGE = [
 ].join("\n")
 const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus")
 
-function showToast(ctx: PluginInput, sessionID: string): void {
+type NoHephaestusNonGptHookOptions = {
+  allowNonGptModel?: boolean
+}
+
+function showToast(ctx: PluginInput, sessionID: string, variant: "error" | "warning"): void {
   ctx.client.tui.showToast({
     body: {
       title: TOAST_TITLE,
       message: TOAST_MESSAGE,
-      variant: "error",
+      variant,
       duration: 10000,
     },
   }).catch((error) => {
@@ -28,7 +32,10 @@ function showToast(ctx: PluginInput, sessionID: string): void {
   })
 }
 
-export function createNoHephaestusNonGptHook(ctx: PluginInput) {
+export function createNoHephaestusNonGptHook(
+  ctx: PluginInput,
+  options?: NoHephaestusNonGptHookOptions,
+) {
   return {
     "chat.message": async (input: {
       sessionID: string
@@ -40,9 +47,13 @@ export function createNoHephaestusNonGptHook(ctx: PluginInput) {
       const rawAgent = input.agent ?? getSessionAgent(input.sessionID) ?? ""
       const agentKey = getAgentConfigKey(rawAgent)
       const modelID = input.model?.modelID
+      const allowNonGptModel = options?.allowNonGptModel === true
 
       if (agentKey === "hephaestus" && modelID && !isGptModel(modelID)) {
-        showToast(ctx, input.sessionID)
+        showToast(ctx, input.sessionID, allowNonGptModel ? "warning" : "error")
+        if (allowNonGptModel) {
+          return
+        }
         input.agent = SISYPHUS_DISPLAY
         if (output?.message) {
           output.message.agent = SISYPHUS_DISPLAY
diff --git a/src/hooks/no-hephaestus-non-gpt/index.test.ts b/src/hooks/no-hephaestus-non-gpt/index.test.ts
index 51e1f3a0a..3440cccc8 100644
--- a/src/hooks/no-hephaestus-non-gpt/index.test.ts
+++ b/src/hooks/no-hephaestus-non-gpt/index.test.ts
@@ -1,3 +1,5 @@
+/// <reference types="bun-types" />
+
 import { describe, expect, spyOn, test } from "bun:test"
 import { _resetForTesting, updateSessionAgent } from "../../features/claude-code-session-state"
 import { getAgentDisplayName } from "../../shared/agent-display-names"
@@ -8,7 +10,7 @@ const SISYPHUS_DISPLAY = getAgentDisplayName("sisyphus")
 
 function createOutput() {
   return {
-    message: {},
+    message: {} as { agent?: string; [key: string]: unknown },
     parts: [],
   }
 }
@@ -16,7 +18,7 @@ function createOutput() {
 describe("no-hephaestus-non-gpt hook", () => {
   test("shows toast on every chat.message when hephaestus uses non-gpt model", async () => {
     // given - hephaestus with claude model
-    const showToast = spyOn({ fn: async () => ({}) }, "fn")
+    const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
     const hook = createNoHephaestusNonGptHook({
       client: { tui: { showToast } },
     } as any)
@@ -49,9 +51,38 @@ describe("no-hephaestus-non-gpt hook", () => {
     })
   })
 
+  test("shows warning and does not switch agent when allow_non_gpt_model is enabled", async () => {
+    // given - hephaestus with claude model and opt-out enabled
+    const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
+    const hook = createNoHephaestusNonGptHook({
+      client: { tui: { showToast } },
+    } as any, {
+      allowNonGptModel: true,
+    })
+
+    const output = createOutput()
+
+    // when - chat.message runs
+    await hook["chat.message"]?.({
+      sessionID: "ses_opt_out",
+      agent: HEPHAESTUS_DISPLAY,
+      model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+    }, output)
+
+    // then - warning toast is shown but agent is not switched
+    expect(showToast).toHaveBeenCalledTimes(1)
+    expect(output.message.agent).toBeUndefined()
+    expect(showToast.mock.calls[0]?.[0]).toMatchObject({
+      body: {
+        title: "NEVER Use Hephaestus with Non-GPT",
+        variant: "warning",
+      },
+    })
+  })
+
   test("does not show toast when hephaestus uses gpt model", async () => {
     // given - hephaestus with gpt model
-    const showToast = spyOn({ fn: async () => ({}) }, "fn")
+    const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
     const hook = createNoHephaestusNonGptHook({
       client: { tui: { showToast } },
     } as any)
@@ -72,7 +103,7 @@ describe("no-hephaestus-non-gpt hook", () => {
 
   test("does not show toast for non-hephaestus agent", async () => {
     // given - sisyphus with claude model (non-gpt)
-    const showToast = spyOn({ fn: async () => ({}) }, "fn")
+    const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
     const hook = createNoHephaestusNonGptHook({
       client: { tui: { showToast } },
     } as any)
@@ -95,7 +126,7 @@ describe("no-hephaestus-non-gpt hook", () => {
     // given - session agent saved as hephaestus
     _resetForTesting()
     updateSessionAgent("ses_4", HEPHAESTUS_DISPLAY)
-    const showToast = spyOn({ fn: async () => ({}) }, "fn")
+    const showToast = spyOn({ fn: async (_input: unknown) => ({}) }, "fn")
     const hook = createNoHephaestusNonGptHook({
       client: { tui: { showToast } },
     } as any)
diff --git a/src/hooks/preemptive-compaction.test.ts b/src/hooks/preemptive-compaction.test.ts
index 12912b3a8..279562aa6 100644
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -45,6 +45,23 @@ function createMockCtx() {
   }
 }
 
+function setupImmediateTimeouts(): () => void {
+  const originalSetTimeout = globalThis.setTimeout
+  const originalClearTimeout = globalThis.clearTimeout
+
+  globalThis.setTimeout = ((callback: (...args: unknown[]) => void, _delay?: number, ...args: unknown[]) => {
+    callback(...args)
+    return 1 as unknown as ReturnType<typeof setTimeout>
+  }) as typeof setTimeout
+
+  globalThis.clearTimeout = (() => {}) as typeof clearTimeout
+
+  return () => {
+    globalThis.setTimeout = originalSetTimeout
+    globalThis.clearTimeout = originalClearTimeout
+  }
+}
+
 describe("preemptive-compaction", () => {
   let ctx: ReturnType<typeof createMockCtx>
 
@@ -63,7 +80,7 @@ describe("preemptive-compaction", () => {
   // #when tool.execute.after is called
   // #then session.messages() should NOT be called
   it("should use cached token info instead of fetching session.messages()", async () => {
-    const hook = createPreemptiveCompactionHook(ctx as never)
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
     const sessionID = "ses_test1"
 
     // Simulate message.updated with token info below threshold
@@ -101,7 +118,7 @@ describe("preemptive-compaction", () => {
   // #when tool.execute.after is called
   // #then should skip without fetching
   it("should skip gracefully when no cached token info exists", async () => {
-    const hook = createPreemptiveCompactionHook(ctx as never)
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
 
     const output = { title: "", output: "test", metadata: null }
     await hook["tool.execute.after"](
@@ -116,7 +133,7 @@ describe("preemptive-compaction", () => {
   // #when tool.execute.after runs
   // #then should trigger summarize
   it("should trigger compaction when usage exceeds threshold", async () => {
-    const hook = createPreemptiveCompactionHook(ctx as never)
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
     const sessionID = "ses_high"
 
     // 170K input + 10K cache = 180K → 90% of 200K
@@ -153,7 +170,7 @@ describe("preemptive-compaction", () => {
 
   it("should trigger compaction for google-vertex-anthropic provider", async () => {
     //#given google-vertex-anthropic usage above threshold
-    const hook = createPreemptiveCompactionHook(ctx as never)
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
     const sessionID = "ses_vertex_anthropic_high"
 
     await hook.event({
@@ -191,7 +208,7 @@ describe("preemptive-compaction", () => {
   // #given session deleted
   // #then cache should be cleaned up
   it("should clean up cache on session.deleted", async () => {
-    const hook = createPreemptiveCompactionHook(ctx as never)
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
     const sessionID = "ses_del"
 
     await hook.event({
@@ -228,7 +245,7 @@ describe("preemptive-compaction", () => {
 
   it("should log summarize errors instead of swallowing them", async () => {
     //#given
-    const hook = createPreemptiveCompactionHook(ctx as never)
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
     const sessionID = "ses_log_error"
     const summarizeError = new Error("summarize failed")
     ctx.client.session.summarize.mockRejectedValueOnce(summarizeError)
@@ -343,4 +360,58 @@ describe("preemptive-compaction", () => {
     //#then
     expect(ctx.client.session.summarize).not.toHaveBeenCalled()
   })
+
+  it("should clear in-progress lock when summarize times out", async () => {
+    //#given
+    const restoreTimeouts = setupImmediateTimeouts()
+    const hook = createPreemptiveCompactionHook(ctx as never, {} as never)
+    const sessionID = "ses_timeout"
+
+    ctx.client.session.summarize
+      .mockImplementationOnce(() => new Promise(() => {}))
+      .mockResolvedValueOnce({})
+
+    try {
+      await hook.event({
+        event: {
+          type: "message.updated",
+          properties: {
+            info: {
+              role: "assistant",
+              sessionID,
+              providerID: "anthropic",
+              modelID: "claude-sonnet-4-6",
+              finish: true,
+              tokens: {
+                input: 170000,
+                output: 0,
+                reasoning: 0,
+                cache: { read: 10000, write: 0 },
+              },
+            },
+          },
+        },
+      })
+
+      //#when
+      await hook["tool.execute.after"](
+        { tool: "bash", sessionID, callID: "call_timeout_1" },
+        { title: "", output: "test", metadata: null },
+      )
+
+      await hook["tool.execute.after"](
+        { tool: "bash", sessionID, callID: "call_timeout_2" },
+        { title: "", output: "test", metadata: null },
+      )
+
+      //#then
+      expect(ctx.client.session.summarize).toHaveBeenCalledTimes(2)
+      expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", {
+        sessionID,
+        error: expect.stringContaining("Compaction summarize timed out"),
+      })
+    } finally {
+      restoreTimeouts()
+    }
+  })
 })
diff --git a/src/hooks/preemptive-compaction.ts b/src/hooks/preemptive-compaction.ts
index e2ac74718..d6c9bf130 100644
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -3,6 +3,7 @@ import type { OhMyOpenCodeConfig } from "../config"
 
 import { resolveCompactionModel } from "./shared/compaction-model-resolver"
 const DEFAULT_ACTUAL_LIMIT = 200_000
+const PREEMPTIVE_COMPACTION_TIMEOUT_MS = 120_000
 
 type ModelCacheStateLike = {
   anthropicContext1MEnabled: boolean
@@ -31,6 +32,26 @@ interface CachedCompactionState {
   tokens: TokenInfo
 }
 
+function withTimeout<TValue>(
+  promise: Promise<TValue>,
+  timeoutMs: number,
+  errorMessage: string,
+): Promise<TValue> {
+  let timeoutID: ReturnType<typeof setTimeout> | undefined
+
+  const timeoutPromise = new Promise<never>((_, reject) => {
+    timeoutID = setTimeout(() => {
+      reject(new Error(errorMessage))
+    }, timeoutMs)
+  })
+
+  return Promise.race([promise, timeoutPromise]).finally(() => {
+    if (timeoutID !== undefined) {
+      clearTimeout(timeoutID)
+    }
+  })
+}
+
 function isAnthropicProvider(providerID: string): boolean {
   return providerID === "anthropic" || providerID === "google-vertex-anthropic"
 }
@@ -94,11 +115,15 @@ export function createPreemptiveCompactionHook(
         modelID
       )
 
-      await ctx.client.session.summarize({
-        path: { id: sessionID },
-        body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never,
-        query: { directory: ctx.directory },
-      })
+      await withTimeout(
+        ctx.client.session.summarize({
+          path: { id: sessionID },
+          body: { providerID: targetProviderID, modelID: targetModelID, auto: true } as never,
+          query: { directory: ctx.directory },
+        }),
+        PREEMPTIVE_COMPACTION_TIMEOUT_MS,
+        `Compaction summarize timed out after ${PREEMPTIVE_COMPACTION_TIMEOUT_MS}ms`,
+      )
 
       compactedSessions.add(sessionID)
     } catch (error) {
diff --git a/src/hooks/ralph-loop/completion-promise-detector.ts b/src/hooks/ralph-loop/completion-promise-detector.ts
index d2b89b10c..95a43c289 100644
--- a/src/hooks/ralph-loop/completion-promise-detector.ts
+++ b/src/hooks/ralph-loop/completion-promise-detector.ts
@@ -79,8 +79,8 @@ export async function detectCompletionInSessionMessages(
 		if (assistantMessages.length === 0) return false
 
 		const pattern = buildPromisePattern(options.promise)
-		const recentAssistants = assistantMessages.slice(-3)
-		for (const assistant of recentAssistants) {
+		for (let index = assistantMessages.length - 1; index >= 0; index -= 1) {
+			const assistant = assistantMessages[index]
 			if (!assistant.parts) continue
 
 			let responseText = ""
diff --git a/src/hooks/ralph-loop/index.test.ts b/src/hooks/ralph-loop/index.test.ts
index 994773229..8492ec6ae 100644
--- a/src/hooks/ralph-loop/index.test.ts
+++ b/src/hooks/ralph-loop/index.test.ts
@@ -494,6 +494,7 @@ describe("ralph-loop", () => {
         config: {
           enabled: true,
           default_max_iterations: 200,
+          default_strategy: "continue",
         },
       })
 
@@ -708,6 +709,57 @@ describe("ralph-loop", () => {
       expect(promptCalls[0].text).toContain("<promise>CALCULATOR_DONE</promise>")
     })
 
+    test("should skip concurrent idle events for same session when handler is in flight", async () => {
+      // given - active loop with delayed prompt injection
+      let releasePromptAsync: (() => void) | undefined
+      const promptAsyncBlocked = new Promise<void>((resolve) => {
+        releasePromptAsync = resolve
+      })
+      let firstPromptStartedResolve: (() => void) | undefined
+      const firstPromptStarted = new Promise<void>((resolve) => {
+        firstPromptStartedResolve = resolve
+      })
+
+      const mockInput = createMockPluginInput() as {
+        client: {
+          session: {
+            promptAsync: (opts: { path: { id: string }; body: { parts: Array<{ type: string; text: string }> } }) => Promise<unknown>
+          }
+        }
+      }
+
+      const originalPromptAsync = mockInput.client.session.promptAsync
+      let promptAsyncCalls = 0
+      mockInput.client.session.promptAsync = async (opts) => {
+        promptAsyncCalls += 1
+        if (promptAsyncCalls === 1) {
+          firstPromptStartedResolve?.()
+        }
+        await promptAsyncBlocked
+        return originalPromptAsync(opts)
+      }
+
+      const hook = createRalphLoopHook(mockInput as Parameters<typeof createRalphLoopHook>[0])
+      hook.startLoop("session-123", "Build feature", { maxIterations: 10 })
+
+      // when - second idle arrives while first idle processing is still in flight
+      const firstIdle = hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-123" } },
+      })
+      await firstPromptStarted
+      const secondIdle = hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-123" } },
+      })
+
+      releasePromptAsync?.()
+      await Promise.all([firstIdle, secondIdle])
+
+      // then - only one continuation should be injected
+      expect(promptAsyncCalls).toBe(1)
+      expect(promptCalls.length).toBe(1)
+      expect(hook.getState()?.iteration).toBe(2)
+    })
+
     test("should clear loop state on user abort (MessageAbortedError)", async () => {
       // given - active loop
       const hook = createRalphLoopHook(createMockPluginInput())
@@ -782,8 +834,8 @@ describe("ralph-loop", () => {
       expect(hook.getState()).toBeNull()
     })
 
-    test("should NOT detect completion if promise is older than last 3 assistant messages", async () => {
-      // given - promise appears in an assistant message older than last 3
+    test("should detect completion even when promise is older than previous narrow window", async () => {
+      // given - promise appears in an older assistant message with additional assistant output after it
       mockSessionMessages = [
         { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
         { info: { role: "assistant" }, parts: [{ type: "text", text: "Promise early <promise>DONE</promise>" }] },
@@ -801,9 +853,40 @@ describe("ralph-loop", () => {
         event: { type: "session.idle", properties: { sessionID: "session-123" } },
       })
 
-      // then - loop should continue (promise is older than last 3 assistant messages)
-      expect(promptCalls.length).toBe(1)
-      expect(hook.getState()?.iteration).toBe(2)
+      // then - loop should complete because all assistant messages are scanned
+      expect(promptCalls.length).toBe(0)
+      expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
+      expect(hook.getState()).toBeNull()
+    })
+
+    test("should detect completion when many assistant messages are emitted after promise", async () => {
+      // given - completion promise followed by long assistant output sequence
+      mockSessionMessages = [
+        { info: { role: "user" }, parts: [{ type: "text", text: "Start task" }] },
+        { info: { role: "assistant" }, parts: [{ type: "text", text: "Done now <promise>DONE</promise>" }] },
+      ]
+
+      for (let index = 1; index <= 25; index += 1) {
+        mockSessionMessages.push({
+          info: { role: "assistant" },
+          parts: [{ type: "text", text: `Post-completion assistant output ${index}` }],
+        })
+      }
+
+      const hook = createRalphLoopHook(createMockPluginInput(), {
+        getTranscriptPath: () => join(TEST_DIR, "nonexistent.jsonl"),
+      })
+      hook.startLoop("session-123", "Build something", { completionPromise: "DONE" })
+
+      // when - session goes idle
+      await hook.event({
+        event: { type: "session.idle", properties: { sessionID: "session-123" } },
+      })
+
+      // then - loop should complete despite large trailing output
+      expect(promptCalls.length).toBe(0)
+      expect(toastCalls.some((t) => t.title === "Ralph Loop Complete!")).toBe(true)
+      expect(hook.getState()).toBeNull()
     })
 
     test("should allow starting new loop while previous loop is active (different session)", async () => {
diff --git a/src/hooks/ralph-loop/iteration-continuation.ts b/src/hooks/ralph-loop/iteration-continuation.ts
index 15fea10a9..be067b76c 100644
--- a/src/hooks/ralph-loop/iteration-continuation.ts
+++ b/src/hooks/ralph-loop/iteration-continuation.ts
@@ -33,15 +33,6 @@ export async function continueIteration(
       return
     }
 
-    const boundState = options.loopState.setSessionID(newSessionID)
-    if (!boundState) {
-      log(`[${HOOK_NAME}] Failed to bind loop state to new session`, {
-        previousSessionID: options.previousSessionID,
-        newSessionID,
-      })
-      return
-    }
-
     await injectContinuationPrompt(ctx, {
       sessionID: newSessionID,
       inheritFromSessionID: options.previousSessionID,
@@ -51,6 +42,16 @@ export async function continueIteration(
     })
 
     await selectSessionInTui(ctx.client, newSessionID)
+
+    const boundState = options.loopState.setSessionID(newSessionID)
+    if (!boundState) {
+      log(`[${HOOK_NAME}] Failed to bind loop state to new session`, {
+        previousSessionID: options.previousSessionID,
+        newSessionID,
+      })
+      return
+    }
+
     return
   }
 
diff --git a/src/hooks/ralph-loop/ralph-loop-event-handler.ts b/src/hooks/ralph-loop/ralph-loop-event-handler.ts
index b0fa5ed71..7d86d79eb 100644
--- a/src/hooks/ralph-loop/ralph-loop-event-handler.ts
+++ b/src/hooks/ralph-loop/ralph-loop-event-handler.ts
@@ -25,6 +25,8 @@ export function createRalphLoopEventHandler(
 	ctx: PluginInput,
 	options: RalphLoopEventHandlerOptions,
 ) {
+	const inFlightSessions = new Set<string>()
+
 	return async ({ event }: { event: { type: string; properties?: unknown } }): Promise<void> => {
 		const props = event.properties as Record<string, unknown> | undefined
 
@@ -32,115 +34,127 @@ export function createRalphLoopEventHandler(
 			const sessionID = props?.sessionID as string | undefined
 			if (!sessionID) return
 
-			if (options.sessionRecovery.isRecovering(sessionID)) {
-				log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID })
+			if (inFlightSessions.has(sessionID)) {
+				log(`[${HOOK_NAME}] Skipped: handler in flight`, { sessionID })
 				return
 			}
 
-			const state = options.loopState.getState()
-			if (!state || !state.active) {
-				return
-			}
-
-			if (state.session_id && state.session_id !== sessionID) {
-				if (options.checkSessionExists) {
-					try {
-						const exists = await options.checkSessionExists(state.session_id)
-						if (!exists) {
-							options.loopState.clear()
-							log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, {
-								orphanedSessionId: state.session_id,
-								currentSessionId: sessionID,
-							})
-							return
-						}
-					} catch (err) {
-						log(`[${HOOK_NAME}] Failed to check session existence`, {
-							sessionId: state.session_id,
-							error: String(err),
-						})
-					}
-				}
-				return
-			}
-
-			const transcriptPath = options.getTranscriptPath(sessionID)
-			const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise)
-			const completionViaApi = completionViaTranscript
-				? false
-				: await detectCompletionInSessionMessages(ctx, {
-					sessionID,
-					promise: state.completion_promise,
-					apiTimeoutMs: options.apiTimeoutMs,
-					directory: options.directory,
-				})
-
-			if (completionViaTranscript || completionViaApi) {
-				log(`[${HOOK_NAME}] Completion detected!`, {
-					sessionID,
-					iteration: state.iteration,
-					promise: state.completion_promise,
-					detectedVia: completionViaTranscript
-						? "transcript_file"
-						: "session_messages_api",
-				})
-				options.loopState.clear()
-
-				const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!"
-				const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)`
-				await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {})
-				return
-			}
-
-			if (state.iteration >= state.max_iterations) {
-				log(`[${HOOK_NAME}] Max iterations reached`, {
-					sessionID,
-					iteration: state.iteration,
-					max: state.max_iterations,
-				})
-				options.loopState.clear()
-
-				await ctx.client.tui?.showToast?.({
-					body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 },
-					}).catch(() => {})
-				return
-			}
-
-			const newState = options.loopState.incrementIteration()
-			if (!newState) {
-				log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID })
-				return
-			}
-
-			log(`[${HOOK_NAME}] Continuing loop`, {
-				sessionID,
-				iteration: newState.iteration,
-				max: newState.max_iterations,
-			})
-
-			await ctx.client.tui?.showToast?.({
-				body: {
-					title: "Ralph Loop",
-					message: `Iteration ${newState.iteration}/${newState.max_iterations}`,
-					variant: "info",
-					duration: 2000,
-				},
-				}).catch(() => {})
+			inFlightSessions.add(sessionID)
 
 			try {
-				await continueIteration(ctx, newState, {
-					previousSessionID: sessionID,
-					directory: options.directory,
-					apiTimeoutMs: options.apiTimeoutMs,
-					loopState: options.loopState,
-				})
-			} catch (err) {
-				log(`[${HOOK_NAME}] Failed to inject continuation`, {
+
+				if (options.sessionRecovery.isRecovering(sessionID)) {
+					log(`[${HOOK_NAME}] Skipped: in recovery`, { sessionID })
+					return
+				}
+
+				const state = options.loopState.getState()
+				if (!state || !state.active) {
+					return
+				}
+
+				if (state.session_id && state.session_id !== sessionID) {
+					if (options.checkSessionExists) {
+						try {
+							const exists = await options.checkSessionExists(state.session_id)
+							if (!exists) {
+								options.loopState.clear()
+								log(`[${HOOK_NAME}] Cleared orphaned state from deleted session`, {
+									orphanedSessionId: state.session_id,
+									currentSessionId: sessionID,
+								})
+								return
+							}
+						} catch (err) {
+							log(`[${HOOK_NAME}] Failed to check session existence`, {
+								sessionId: state.session_id,
+								error: String(err),
+							})
+						}
+					}
+					return
+				}
+
+				const transcriptPath = options.getTranscriptPath(sessionID)
+				const completionViaTranscript = detectCompletionInTranscript(transcriptPath, state.completion_promise)
+				const completionViaApi = completionViaTranscript
+					? false
+					: await detectCompletionInSessionMessages(ctx, {
+						sessionID,
+						promise: state.completion_promise,
+						apiTimeoutMs: options.apiTimeoutMs,
+						directory: options.directory,
+					})
+
+				if (completionViaTranscript || completionViaApi) {
+					log(`[${HOOK_NAME}] Completion detected!`, {
+						sessionID,
+						iteration: state.iteration,
+						promise: state.completion_promise,
+						detectedVia: completionViaTranscript
+							? "transcript_file"
+							: "session_messages_api",
+					})
+					options.loopState.clear()
+
+					const title = state.ultrawork ? "ULTRAWORK LOOP COMPLETE!" : "Ralph Loop Complete!"
+					const message = state.ultrawork ? `JUST ULW ULW! Task completed after ${state.iteration} iteration(s)` : `Task completed after ${state.iteration} iteration(s)`
+					await ctx.client.tui?.showToast?.({ body: { title, message, variant: "success", duration: 5000 } }).catch(() => {})
+					return
+				}
+
+				if (state.iteration >= state.max_iterations) {
+					log(`[${HOOK_NAME}] Max iterations reached`, {
+						sessionID,
+						iteration: state.iteration,
+						max: state.max_iterations,
+					})
+					options.loopState.clear()
+
+					await ctx.client.tui?.showToast?.({
+						body: { title: "Ralph Loop Stopped", message: `Max iterations (${state.max_iterations}) reached without completion`, variant: "warning", duration: 5000 },
+						}).catch(() => {})
+					return
+				}
+
+				const newState = options.loopState.incrementIteration()
+				if (!newState) {
+					log(`[${HOOK_NAME}] Failed to increment iteration`, { sessionID })
+					return
+				}
+
+				log(`[${HOOK_NAME}] Continuing loop`, {
 					sessionID,
-					error: String(err),
+					iteration: newState.iteration,
+					max: newState.max_iterations,
 				})
+
+				await ctx.client.tui?.showToast?.({
+					body: {
+						title: "Ralph Loop",
+						message: `Iteration ${newState.iteration}/${newState.max_iterations}`,
+						variant: "info",
+						duration: 2000,
+					},
+					}).catch(() => {})
+
+				try {
+					await continueIteration(ctx, newState, {
+						previousSessionID: sessionID,
+						directory: options.directory,
+						apiTimeoutMs: options.apiTimeoutMs,
+						loopState: options.loopState,
+					})
+				} catch (err) {
+					log(`[${HOOK_NAME}] Failed to inject continuation`, {
+						sessionID,
+						error: String(err),
+					})
+				}
+				return
+			} finally {
+				inFlightSessions.delete(sessionID)
 			}
-			return
 		}
 
 		if (event.type === "session.deleted") {
diff --git a/src/hooks/ralph-loop/reset-strategy-race-condition.test.ts b/src/hooks/ralph-loop/reset-strategy-race-condition.test.ts
new file mode 100644
index 000000000..5fcd35a2e
--- /dev/null
+++ b/src/hooks/ralph-loop/reset-strategy-race-condition.test.ts
@@ -0,0 +1,111 @@
+/// <reference types="bun-types" />
+import { describe, expect, test } from "bun:test"
+import { createRalphLoopHook } from "./index"
+
+function createDeferred(): {
+  promise: Promise<void>
+  resolve: () => void
+} {
+  let resolvePromise: (() => void) | null = null
+  const promise = new Promise<void>((resolve) => {
+    resolvePromise = resolve
+  })
+
+  return {
+    promise,
+    resolve: () => {
+      if (resolvePromise) {
+        resolvePromise()
+      }
+    },
+  }
+}
+
+async function waitUntil(condition: () => boolean): Promise<void> {
+  for (let index = 0; index < 100; index++) {
+    if (condition()) {
+      return
+    }
+
+    await new Promise<void>((resolve) => {
+      setTimeout(resolve, 0)
+    })
+  }
+
+  throw new Error("Condition was not met in time")
+}
+
+describe("ralph-loop reset strategy race condition", () => {
+  test("should skip duplicate idle while reset iteration handling is in flight", async () => {
+    // given - reset strategy loop with blocked TUI session switch
+    const promptCalls: Array<{ sessionID: string; text: string }> = []
+    const createSessionCalls: Array<{ parentID?: string }> = []
+    let selectSessionCalls = 0
+    const selectSessionDeferred = createDeferred()
+
+    const hook = createRalphLoopHook({
+      directory: process.cwd(),
+      client: {
+        session: {
+          prompt: async (options: {
+            path: { id: string }
+            body: { parts: Array<{ type: string; text: string }> }
+          }) => {
+            promptCalls.push({
+              sessionID: options.path.id,
+              text: options.body.parts[0].text,
+            })
+            return {}
+          },
+          promptAsync: async (options: {
+            path: { id: string }
+            body: { parts: Array<{ type: string; text: string }> }
+          }) => {
+            promptCalls.push({
+              sessionID: options.path.id,
+              text: options.body.parts[0].text,
+            })
+            return {}
+          },
+          create: async (options: {
+            body: { parentID?: string; title?: string }
+            query?: { directory?: string }
+          }) => {
+            createSessionCalls.push({ parentID: options.body.parentID })
+            return { data: { id: `new-session-${createSessionCalls.length}` } }
+          },
+          messages: async () => ({ data: [] }),
+        },
+        tui: {
+          showToast: async () => ({}),
+          selectSession: async () => {
+            selectSessionCalls += 1
+            await selectSessionDeferred.promise
+            return {}
+          },
+        },
+      },
+    } as unknown as Parameters<typeof createRalphLoopHook>[0])
+
+    hook.startLoop("session-old", "Build feature", { strategy: "reset" })
+
+    // when - first idle is in-flight and old session fires idle again before TUI switch resolves
+    const firstIdleEvent = hook.event({
+      event: { type: "session.idle", properties: { sessionID: "session-old" } },
+    })
+
+    await waitUntil(() => selectSessionCalls > 0)
+
+    const secondIdleEvent = hook.event({
+      event: { type: "session.idle", properties: { sessionID: "session-old" } },
+    })
+
+    selectSessionDeferred.resolve()
+    await Promise.all([firstIdleEvent, secondIdleEvent])
+
+    // then - duplicate idle should be skipped to prevent concurrent continuation injection
+    expect(createSessionCalls.length).toBe(1)
+    expect(promptCalls.length).toBe(1)
+    expect(hook.getState()?.iteration).toBe(2)
+  })
+})
diff --git a/src/hooks/runtime-fallback/index.test.ts b/src/hooks/runtime-fallback/index.test.ts
index 7660f1954..d9c873f16 100644
--- a/src/hooks/runtime-fallback/index.test.ts
+++ b/src/hooks/runtime-fallback/index.test.ts
@@ -125,7 +125,7 @@ describe("runtime-fallback", () => {
       await hook.event({
         event: {
           type: "session.created",
-          properties: { info: { id: sessionID, model: "google/gemini-3-pro" } },
+          properties: { info: { id: sessionID, model: "google/gemini-3.1-pro" } },
         },
       })
 
@@ -1841,7 +1841,7 @@ describe("runtime-fallback", () => {
     test("should apply fallback model on next chat.message after error", async () => {
       const hook = createRuntimeFallbackHook(createMockPluginInput(), {
         config: createMockConfig({ notify_on_fallback: false }),
-        pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3-pro"]),
+        pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.2", "google/gemini-3.1-pro"]),
       })
       const sessionID = "test-session-switch"
       SessionCategoryRegistry.register(sessionID, "test")
@@ -1916,7 +1916,7 @@ describe("runtime-fallback", () => {
       const input = createMockPluginInput()
       const hook = createRuntimeFallbackHook(input, {
         config: createMockConfig({ notify_on_fallback: false }),
-        pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3-pro"]),
+        pluginConfig: createMockPluginConfigWithAgentFallback("oracle", ["openai/gpt-5.2", "google/gemini-3.1-pro"]),
       })
       const sessionID = "test-agent-fallback"
 
diff --git a/src/hooks/session-notification-input-needed.test.ts b/src/hooks/session-notification-input-needed.test.ts
index 5e8552907..ee1614b88 100644
--- a/src/hooks/session-notification-input-needed.test.ts
+++ b/src/hooks/session-notification-input-needed.test.ts
@@ -3,6 +3,7 @@ const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:te
 const { createSessionNotification } = require("./session-notification")
 const { setMainSession, subagentSessions, _resetForTesting } = require("../features/claude-code-session-state")
 const utils = require("./session-notification-utils")
+const sender = require("./session-notification-sender")
 
 describe("session-notification input-needed events", () => {
   let notificationCalls: string[]
@@ -37,6 +38,10 @@ describe("session-notification input-needed events", () => {
     spyOn(utils, "getNotifySendPath").mockResolvedValue("/usr/bin/notify-send")
     spyOn(utils, "getPowershellPath").mockResolvedValue("powershell")
     spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
+    spyOn(sender, "detectPlatform").mockReturnValue("darwin")
+    spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx: unknown, _platform: unknown, _title: unknown, message: string) => {
+      notificationCalls.push(message)
+    })
   })
 
   afterEach(() => {
@@ -47,7 +52,7 @@ describe("session-notification input-needed events", () => {
   test("sends question notification when question tool asks for input", async () => {
     const sessionID = "main-question"
     setMainSession(sessionID)
-    const hook = createSessionNotification(createMockPluginInput())
+    const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false })
 
     await hook({
       event: {
@@ -74,7 +79,7 @@ describe("session-notification input-needed events", () => {
   test("sends permission notification for permission events", async () => {
     const sessionID = "main-permission"
     setMainSession(sessionID)
-    const hook = createSessionNotification(createMockPluginInput())
+    const hook = createSessionNotification(createMockPluginInput(), { enforceMainSessionFilter: false })
 
     await hook({
       event: {
diff --git a/src/hooks/session-notification.test.ts b/src/hooks/session-notification.test.ts
index 2f0377a4c..cf895ba98 100644
--- a/src/hooks/session-notification.test.ts
+++ b/src/hooks/session-notification.test.ts
@@ -1,8 +1,9 @@
-import { describe, expect, test, beforeEach, afterEach, spyOn } from "bun:test"
+const { describe, expect, test, beforeEach, afterEach, spyOn } = require("bun:test")
 
 import { createSessionNotification } from "./session-notification"
 import { setMainSession, subagentSessions, _resetForTesting } from "../features/claude-code-session-state"
 import * as utils from "./session-notification-utils"
+import * as sender from "./session-notification-sender"
 
 describe("session-notification", () => {
   let notificationCalls: string[]
@@ -40,6 +41,10 @@ describe("session-notification", () => {
     spyOn(utils, "getPaplayPath").mockResolvedValue("/usr/bin/paplay")
     spyOn(utils, "getAplayPath").mockResolvedValue("/usr/bin/aplay")
     spyOn(utils, "startBackgroundCheck").mockImplementation(() => {})
+    spyOn(sender, "detectPlatform").mockReturnValue("darwin")
+    spyOn(sender, "sendSessionNotification").mockImplementation(async (_ctx, _platform, _title, message) => {
+      notificationCalls.push(message)
+    })
   })
 
   afterEach(() => {
@@ -105,6 +110,7 @@ describe("session-notification", () => {
     const hook = createSessionNotification(createMockPluginInput(), {
       idleConfirmationDelay: 10,
       skipIfIncompleteTodos: false,
+      enforceMainSessionFilter: false,
     })
 
     // when - main session goes idle
@@ -332,6 +338,7 @@ describe("session-notification", () => {
     const hook = createSessionNotification(createMockPluginInput(), {
       idleConfirmationDelay: 10,
       skipIfIncompleteTodos: false,
+      enforceMainSessionFilter: false,
     })
 
     // when - session goes idle twice
diff --git a/src/hooks/session-notification.ts b/src/hooks/session-notification.ts
index 48e0d288b..3b3dcc514 100644
--- a/src/hooks/session-notification.ts
+++ b/src/hooks/session-notification.ts
@@ -4,11 +4,9 @@ import {
   startBackgroundCheck,
 } from "./session-notification-utils"
 import {
-  detectPlatform,
-  getDefaultSoundPath,
-  playSessionNotificationSound,
-  sendSessionNotification,
+  type Platform,
 } from "./session-notification-sender"
+import * as sessionNotificationSender from "./session-notification-sender"
 import { hasIncompleteTodos } from "./session-todo-status"
 import { createIdleNotificationScheduler } from "./session-notification-scheduler"
 
@@ -25,13 +23,14 @@ interface SessionNotificationConfig {
   skipIfIncompleteTodos?: boolean
   /** Maximum number of sessions to track before cleanup (default: 100) */
   maxTrackedSessions?: number
+  enforceMainSessionFilter?: boolean
 }
 export function createSessionNotification(
   ctx: PluginInput,
   config: SessionNotificationConfig = {}
 ) {
-  const currentPlatform = detectPlatform()
-  const defaultSoundPath = getDefaultSoundPath(currentPlatform)
+  const currentPlatform: Platform = sessionNotificationSender.detectPlatform()
+  const defaultSoundPath = sessionNotificationSender.getDefaultSoundPath(currentPlatform)
 
   startBackgroundCheck(currentPlatform)
 
@@ -45,6 +44,7 @@ export function createSessionNotification(
     idleConfirmationDelay: 1500,
     skipIfIncompleteTodos: true,
     maxTrackedSessions: 100,
+    enforceMainSessionFilter: true,
     ...config,
   }
 
@@ -53,8 +53,8 @@ export function createSessionNotification(
     platform: currentPlatform,
     config: mergedConfig,
     hasIncompleteTodos,
-    send: sendSessionNotification,
-    playSound: playSessionNotificationSound,
+    send: sessionNotificationSender.sendSessionNotification,
+    playSound: sessionNotificationSender.playSessionNotificationSound,
   })
 
   const QUESTION_TOOLS = new Set(["question", "ask_user_question", "askuserquestion"])
@@ -81,8 +81,10 @@ export function createSessionNotification(
   const shouldNotifyForSession = (sessionID: string): boolean => {
     if (subagentSessions.has(sessionID)) return false
 
-    const mainSessionID = getMainSessionID()
-    if (mainSessionID && sessionID !== mainSessionID) return false
+    if (mergedConfig.enforceMainSessionFilter) {
+      const mainSessionID = getMainSessionID()
+      if (mainSessionID && sessionID !== mainSessionID) return false
+    }
 
     return true
   }
@@ -146,9 +148,14 @@ export function createSessionNotification(
       if (!shouldNotifyForSession(sessionID)) return
 
       scheduler.markSessionActivity(sessionID)
-      await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, mergedConfig.permissionMessage)
+      await sessionNotificationSender.sendSessionNotification(
+        ctx,
+        currentPlatform,
+        mergedConfig.title,
+        mergedConfig.permissionMessage,
+      )
       if (mergedConfig.playSound && mergedConfig.soundPath) {
-        await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
+        await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
       }
       return
     }
@@ -168,9 +175,9 @@ export function createSessionNotification(
               ? mergedConfig.permissionMessage
               : mergedConfig.questionMessage
 
-            await sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message)
+            await sessionNotificationSender.sendSessionNotification(ctx, currentPlatform, mergedConfig.title, message)
             if (mergedConfig.playSound && mergedConfig.soundPath) {
-              await playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
+              await sessionNotificationSender.playSessionNotificationSound(ctx, currentPlatform, mergedConfig.soundPath)
             }
           }
         }
diff --git a/src/hooks/start-work/index.test.ts b/src/hooks/start-work/index.test.ts
index e633e85a9..26b87eba4 100644
--- a/src/hooks/start-work/index.test.ts
+++ b/src/hooks/start-work/index.test.ts
@@ -7,9 +7,12 @@ import { createStartWorkHook } from "./index"
 import {
   writeBoulderState,
   clearBoulderState,
+  readBoulderState,
 } from "../../features/boulder-state"
 import type { BoulderState } from "../../features/boulder-state"
 import * as sessionState from "../../features/claude-code-session-state"
+import * as worktreeDetector from "./worktree-detector"
+import * as worktreeDetector from "./worktree-detector"
 
 describe("start-work hook", () => {
   let testDir: string
@@ -402,4 +405,152 @@ describe("start-work hook", () => {
       updateSpy.mockRestore()
     })
   })
+
+  describe("worktree support", () => {
+    let detectSpy: ReturnType<typeof spyOn>
+
+    beforeEach(() => {
+      detectSpy = spyOn(worktreeDetector, "detectWorktreePath").mockReturnValue(null)
+    })
+
+    afterEach(() => {
+      detectSpy.mockRestore()
+    })
+
+    test("should inject model-decides instructions when no --worktree flag", async () => {
+      // given - single plan, no worktree flag
+      const plansDir = join(testDir, ".sisyphus", "plans")
+      mkdirSync(plansDir, { recursive: true })
+      writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
+
+      const hook = createStartWorkHook(createMockPluginInput())
+      const output = {
+        parts: [{ type: "text", text: "<session-context></session-context>" }],
+      }
+
+      // when
+      await hook["chat.message"]({ sessionID: "session-123" }, output)
+
+      // then - model-decides instructions should appear
+      expect(output.parts[0].text).toContain("Worktree Setup Required")
+      expect(output.parts[0].text).toContain("git worktree list --porcelain")
+      expect(output.parts[0].text).toContain("git worktree add")
+    })
+
+    test("should inject worktree path when --worktree flag is valid", async () => {
+      // given - single plan + valid worktree path
+      const plansDir = join(testDir, ".sisyphus", "plans")
+      mkdirSync(plansDir, { recursive: true })
+      writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
+      detectSpy.mockReturnValue("/validated/worktree")
+
+      const hook = createStartWorkHook(createMockPluginInput())
+      const output = {
+        parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /validated/worktree</user-request>\n</session-context>" }],
+      }
+
+      // when
+      await hook["chat.message"]({ sessionID: "session-123" }, output)
+
+      // then - validated path shown, no model-decides instructions
+      expect(output.parts[0].text).toContain("**Worktree**: /validated/worktree")
+      expect(output.parts[0].text).not.toContain("Worktree Setup Required")
+    })
+
+    test("should store worktree_path in boulder when --worktree is valid", async () => {
+      // given - plan + valid worktree
+      const plansDir = join(testDir, ".sisyphus", "plans")
+      mkdirSync(plansDir, { recursive: true })
+      writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
+      detectSpy.mockReturnValue("/valid/wt")
+
+      const hook = createStartWorkHook(createMockPluginInput())
+      const output = {
+        parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /valid/wt</user-request>\n</session-context>" }],
+      }
+
+      // when
+      await hook["chat.message"]({ sessionID: "session-123" }, output)
+
+      // then - boulder.json has worktree_path
+      const state = readBoulderState(testDir)
+      expect(state?.worktree_path).toBe("/valid/wt")
+    })
+
+    test("should NOT store worktree_path when --worktree path is invalid", async () => {
+      // given - plan + invalid worktree path (detectWorktreePath returns null)
+      const plansDir = join(testDir, ".sisyphus", "plans")
+      mkdirSync(plansDir, { recursive: true })
+      writeFileSync(join(plansDir, "my-plan.md"), "# Plan\n- [ ] Task 1")
+      // detectSpy already returns null by default
+
+      const hook = createStartWorkHook(createMockPluginInput())
+      const output = {
+        parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /nonexistent/wt</user-request>\n</session-context>" }],
+      }
+
+      // when
+      await hook["chat.message"]({ sessionID: "session-123" }, output)
+
+      // then - worktree_path absent, setup instructions present
+      const state = readBoulderState(testDir)
+      expect(state?.worktree_path).toBeUndefined()
+      expect(output.parts[0].text).toContain("needs setup")
+      expect(output.parts[0].text).toContain("git worktree add /nonexistent/wt")
+    })
+
+    test("should update boulder worktree_path on resume when new --worktree given", async () => {
+      // given - existing boulder with old worktree, user provides new worktree
+      const planPath = join(testDir, "plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
+      const existingState: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-01T00:00:00Z",
+        session_ids: ["old-session"],
+        plan_name: "plan",
+        worktree_path: "/old/wt",
+      }
+      writeBoulderState(testDir, existingState)
+      detectSpy.mockReturnValue("/new/wt")
+
+      const hook = createStartWorkHook(createMockPluginInput())
+      const output = {
+        parts: [{ type: "text", text: "<session-context>\n<user-request>--worktree /new/wt</user-request>\n</session-context>" }],
+      }
+
+      // when
+      await hook["chat.message"]({ sessionID: "session-456" }, output)
+
+      // then - boulder reflects updated worktree and new session appended
+      const state = readBoulderState(testDir)
+      expect(state?.worktree_path).toBe("/new/wt")
+      expect(state?.session_ids).toContain("session-456")
+    })
+
+    test("should show existing worktree on resume when no --worktree flag", async () => {
+      // given - existing boulder already has worktree_path, no flag given
+      const planPath = join(testDir, "plan.md")
+      writeFileSync(planPath, "# Plan\n- [ ] Task 1")
+      const existingState: BoulderState = {
+        active_plan: planPath,
+        started_at: "2026-01-01T00:00:00Z",
+        session_ids: ["old-session"],
+        plan_name: "plan",
+        worktree_path: "/existing/wt",
+      }
+      writeBoulderState(testDir, existingState)
+
+      const hook = createStartWorkHook(createMockPluginInput())
+      const output = {
+        parts: [{ type: "text", text: "<session-context></session-context>" }],
+      }
+
+      // when
+      await hook["chat.message"]({ sessionID: "session-789" }, output)
+
+      // then - shows existing worktree, no model-decides instructions
+      expect(output.parts[0].text).toContain("/existing/wt")
+      expect(output.parts[0].text).not.toContain("Worktree Setup Required")
+    })
+  })
 })
diff --git a/src/hooks/start-work/index.ts b/src/hooks/start-work/index.ts
index 41cb0b1a4..ee270861a 100644
--- a/src/hooks/start-work/index.ts
+++ b/src/hooks/start-work/index.ts
@@ -1 +1,4 @@
 export { HOOK_NAME, createStartWorkHook } from "./start-work-hook"
+export { detectWorktreePath } from "./worktree-detector"
+export type { ParsedUserRequest } from "./parse-user-request"
+export { parseUserRequest } from "./parse-user-request"
diff --git a/src/hooks/start-work/parse-user-request.test.ts b/src/hooks/start-work/parse-user-request.test.ts
new file mode 100644
index 000000000..e5d61a4c5
--- /dev/null
+++ b/src/hooks/start-work/parse-user-request.test.ts
@@ -0,0 +1,78 @@
+/// <reference types="bun-types" />
+
+import { describe, expect, test } from "bun:test"
+import { parseUserRequest } from "./parse-user-request"
+
+describe("parseUserRequest", () => {
+  describe("when no user-request tag", () => {
+    test("#given prompt without tag #when parsing #then returns nulls", () => {
+      const result = parseUserRequest("Just a regular message without any tags")
+      expect(result.planName).toBeNull()
+      expect(result.explicitWorktreePath).toBeNull()
+    })
+  })
+
+  describe("when user-request tag is empty", () => {
+    test("#given empty user-request tag #when parsing #then returns nulls", () => {
+      const result = parseUserRequest("<user-request>  </user-request>")
+      expect(result.planName).toBeNull()
+      expect(result.explicitWorktreePath).toBeNull()
+    })
+  })
+
+  describe("when only plan name given", () => {
+    test("#given plan name without worktree flag #when parsing #then returns plan name with null worktree", () => {
+      const result = parseUserRequest("<session-context>\n<user-request>my-plan</user-request>\n</session-context>")
+      expect(result.planName).toBe("my-plan")
+      expect(result.explicitWorktreePath).toBeNull()
+    })
+  })
+
+  describe("when only --worktree flag given", () => {
+    test("#given --worktree with path only #when parsing #then returns worktree path with null plan", () => {
+      const result = parseUserRequest("<user-request>--worktree /home/user/repo-feat</user-request>")
+      expect(result.planName).toBeNull()
+      expect(result.explicitWorktreePath).toBe("/home/user/repo-feat")
+    })
+  })
+
+  describe("when plan name and --worktree are both given", () => {
+    test("#given plan name before --worktree #when parsing #then returns both", () => {
+      const result = parseUserRequest("<user-request>my-plan --worktree /path/to/worktree</user-request>")
+      expect(result.planName).toBe("my-plan")
+      expect(result.explicitWorktreePath).toBe("/path/to/worktree")
+    })
+
+    test("#given --worktree before plan name #when parsing #then returns both", () => {
+      const result = parseUserRequest("<user-request>--worktree /path/to/worktree my-plan</user-request>")
+      expect(result.planName).toBe("my-plan")
+      expect(result.explicitWorktreePath).toBe("/path/to/worktree")
+    })
+  })
+
+  describe("when --worktree flag has no path", () => {
+    test("#given --worktree without path #when parsing #then worktree path is null", () => {
+      const result = parseUserRequest("<user-request>--worktree</user-request>")
+      expect(result.explicitWorktreePath).toBeNull()
+    })
+  })
+
+  describe("when ultrawork keywords are present", () => {
+    test("#given plan name with ultrawork keyword #when parsing #then strips keyword from plan name", () => {
+      const result = parseUserRequest("<user-request>my-plan ultrawork</user-request>")
+      expect(result.planName).toBe("my-plan")
+    })
+
+    test("#given plan name with ulw keyword and worktree #when parsing #then strips ulw, preserves worktree", () => {
+      const result = parseUserRequest("<user-request>my-plan ulw --worktree /path/to/wt</user-request>")
+      expect(result.planName).toBe("my-plan")
+      expect(result.explicitWorktreePath).toBe("/path/to/wt")
+    })
+
+    test("#given only ultrawork keyword with worktree #when parsing #then plan name is null, worktree preserved", () => {
+      const result = parseUserRequest("<user-request>ultrawork --worktree /wt</user-request>")
+      expect(result.planName).toBeNull()
+      expect(result.explicitWorktreePath).toBe("/wt")
+    })
+  })
+})
diff --git a/src/hooks/start-work/parse-user-request.ts b/src/hooks/start-work/parse-user-request.ts
new file mode 100644
index 000000000..627deb67a
--- /dev/null
+++ b/src/hooks/start-work/parse-user-request.ts
@@ -0,0 +1,29 @@
+const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi
+const WORKTREE_FLAG_PATTERN = /--worktree(?:\s+(\S+))?/
+
+export interface ParsedUserRequest {
+  planName: string | null
+  explicitWorktreePath: string | null
+}
+
+export function parseUserRequest(promptText: string): ParsedUserRequest {
+  const match = promptText.match(/<user-request>\s*([\s\S]*?)\s*<\/user-request>/i)
+  if (!match) return { planName: null, explicitWorktreePath: null }
+
+  let rawArg = match[1].trim()
+  if (!rawArg) return { planName: null, explicitWorktreePath: null }
+
+  const worktreeMatch = rawArg.match(WORKTREE_FLAG_PATTERN)
+  const explicitWorktreePath = worktreeMatch ? (worktreeMatch[1] ?? null) : null
+
+  if (worktreeMatch) {
+    rawArg = rawArg.replace(worktreeMatch[0], "").trim()
+  }
+
+  const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim()
+
+  return {
+    planName: cleanedArg || null,
+    explicitWorktreePath,
+  }
+}
diff --git a/src/hooks/start-work/start-work-hook.ts b/src/hooks/start-work/start-work-hook.ts
index 77c76d240..03cdb540e 100644
--- a/src/hooks/start-work/start-work-hook.ts
+++ b/src/hooks/start-work/start-work-hook.ts
@@ -1,3 +1,4 @@
+import { statSync } from "node:fs"
 import type { PluginInput } from "@opencode-ai/plugin"
 import {
   readBoulderState,
@@ -11,11 +12,11 @@ import {
 } from "../../features/boulder-state"
 import { log } from "../../shared/logger"
 import { updateSessionAgent } from "../../features/claude-code-session-state"
+import { detectWorktreePath } from "./worktree-detector"
+import { parseUserRequest } from "./parse-user-request"
 
 export const HOOK_NAME = "start-work" as const
 
-const KEYWORD_PATTERN = /\b(ultrawork|ulw)\b/gi
-
 interface StartWorkHookInput {
   sessionID: string
   messageID?: string
@@ -25,73 +26,76 @@ interface StartWorkHookOutput {
   parts: Array<{ type: string; text?: string }>
 }
 
-function extractUserRequestPlanName(promptText: string): string | null {
-  const userRequestMatch = promptText.match(/<user-request>\s*([\s\S]*?)\s*<\/user-request>/i)
-  if (!userRequestMatch) return null
-  
-  const rawArg = userRequestMatch[1].trim()
-  if (!rawArg) return null
-  
-  const cleanedArg = rawArg.replace(KEYWORD_PATTERN, "").trim()
-  return cleanedArg || null
-}
-
 function findPlanByName(plans: string[], requestedName: string): string | null {
   const lowerName = requestedName.toLowerCase()
-  
-  const exactMatch = plans.find(p => getPlanName(p).toLowerCase() === lowerName)
+  const exactMatch = plans.find((p) => getPlanName(p).toLowerCase() === lowerName)
   if (exactMatch) return exactMatch
-  
-  const partialMatch = plans.find(p => getPlanName(p).toLowerCase().includes(lowerName))
+  const partialMatch = plans.find((p) => getPlanName(p).toLowerCase().includes(lowerName))
   return partialMatch || null
 }
 
+const MODEL_DECIDES_WORKTREE_BLOCK = `
+## Worktree Setup Required
+
+No worktree specified. Before starting work, you MUST choose or create one:
+
+1. \`git worktree list --porcelain\` — list existing worktrees
+2. Create if needed: \`git worktree add <absolute-path> <branch-or-HEAD>\`
+3. Update \`.sisyphus/boulder.json\` — add \`"worktree_path": "<absolute-path>"\`
+4. Work exclusively inside that worktree directory`
+
+function resolveWorktreeContext(
+  explicitWorktreePath: string | null,
+): { worktreePath: string | undefined; block: string } {
+  if (explicitWorktreePath === null) {
+    return { worktreePath: undefined, block: MODEL_DECIDES_WORKTREE_BLOCK }
+  }
+
+  const validatedPath = detectWorktreePath(explicitWorktreePath)
+  if (validatedPath) {
+    return { worktreePath: validatedPath, block: `\n**Worktree**: ${validatedPath}` }
+  }
+
+  return {
+    worktreePath: undefined,
+    block: `\n**Worktree** (needs setup): \`git worktree add ${explicitWorktreePath} <branch>\`, then add \`"worktree_path"\` to boulder.json`,
+  }
+}
+
 export function createStartWorkHook(ctx: PluginInput) {
   return {
-    "chat.message": async (
-      input: StartWorkHookInput,
-      output: StartWorkHookOutput
-    ): Promise<void> => {
+    "chat.message": async (input: StartWorkHookInput, output: StartWorkHookOutput): Promise<void> => {
       const parts = output.parts
-      const promptText = parts
-        ?.filter((p) => p.type === "text" && p.text)
-        .map((p) => p.text)
-        .join("\n")
-        .trim() || ""
+      const promptText =
+        parts
+          ?.filter((p) => p.type === "text" && p.text)
+          .map((p) => p.text)
+          .join("\n")
+          .trim() || ""
 
-      // Only trigger on actual command execution (contains <session-context> tag)
-      // NOT on description text like "Start Sisyphus work session from Prometheus plan"
-      const isStartWorkCommand = promptText.includes("<session-context>")
+      if (!promptText.includes("<session-context>")) return
 
-      if (!isStartWorkCommand) {
-        return
-      }
-
-      log(`[${HOOK_NAME}] Processing start-work command`, {
-        sessionID: input.sessionID,
-      })
-
-      updateSessionAgent(input.sessionID, "atlas") // Always switch: fixes #1298
+      log(`[${HOOK_NAME}] Processing start-work command`, { sessionID: input.sessionID })
+      updateSessionAgent(input.sessionID, "atlas")
 
       const existingState = readBoulderState(ctx.directory)
       const sessionId = input.sessionID
       const timestamp = new Date().toISOString()
 
+      const { planName: explicitPlanName, explicitWorktreePath } = parseUserRequest(promptText)
+      const { worktreePath, block: worktreeBlock } = resolveWorktreeContext(explicitWorktreePath)
+
       let contextInfo = ""
-      
-      const explicitPlanName = extractUserRequestPlanName(promptText)
-      
+
       if (explicitPlanName) {
-        log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, {
-          sessionID: input.sessionID,
-        })
-        
+        log(`[${HOOK_NAME}] Explicit plan name requested: ${explicitPlanName}`, { sessionID: input.sessionID })
+
         const allPlans = findPrometheusPlans(ctx.directory)
         const matchedPlan = findPlanByName(allPlans, explicitPlanName)
-        
+
         if (matchedPlan) {
           const progress = getPlanProgress(matchedPlan)
-          
+
           if (progress.isComplete) {
             contextInfo = `
 ## Plan Already Complete
@@ -99,12 +103,10 @@ export function createStartWorkHook(ctx: PluginInput) {
 The requested plan "${getPlanName(matchedPlan)}" has been completed.
 All ${progress.total} tasks are done. Create a new plan with: /plan "your task"`
           } else {
-            if (existingState) {
-              clearBoulderState(ctx.directory)
-            }
-            const newState = createBoulderState(matchedPlan, sessionId, "atlas")
+            if (existingState) clearBoulderState(ctx.directory)
+            const newState = createBoulderState(matchedPlan, sessionId, "atlas", worktreePath)
             writeBoulderState(ctx.directory, newState)
-            
+
             contextInfo = `
 ## Auto-Selected Plan
 
@@ -113,17 +115,20 @@ All ${progress.total} tasks are done. Create a new plan with: /plan "your task"`
 **Progress**: ${progress.completed}/${progress.total} tasks
 **Session ID**: ${sessionId}
 **Started**: ${timestamp}
+${worktreeBlock}
 
 boulder.json has been created. Read the plan and begin execution.`
           }
         } else {
-          const incompletePlans = allPlans.filter(p => !getPlanProgress(p).isComplete)
+          const incompletePlans = allPlans.filter((p) => !getPlanProgress(p).isComplete)
           if (incompletePlans.length > 0) {
-            const planList = incompletePlans.map((p, i) => {
-              const prog = getPlanProgress(p)
-              return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}`
-            }).join("\n")
-            
+            const planList = incompletePlans
+              .map((p, i) => {
+                const prog = getPlanProgress(p)
+                return `${i + 1}. [${getPlanName(p)}] - Progress: ${prog.completed}/${prog.total}`
+              })
+              .join("\n")
+
             contextInfo = `
 ## Plan Not Found
 
@@ -143,9 +148,25 @@ No incomplete plans available. Create a new plan with: /plan "your task"`
         }
       } else if (existingState) {
         const progress = getPlanProgress(existingState.active_plan)
-        
+
         if (!progress.isComplete) {
-          appendSessionId(ctx.directory, sessionId)
+          const effectiveWorktree = worktreePath ?? existingState.worktree_path
+
+          if (worktreePath !== undefined) {
+            const updatedSessions = existingState.session_ids.includes(sessionId)
+              ? existingState.session_ids
+              : [...existingState.session_ids, sessionId]
+            writeBoulderState(ctx.directory, {
+              ...existingState,
+              worktree_path: worktreePath,
+              session_ids: updatedSessions,
+            })
+          } else {
+            appendSessionId(ctx.directory, sessionId)
+          }
+
+          const worktreeDisplay = effectiveWorktree ? `\n**Worktree**: ${effectiveWorktree}` : worktreeBlock
+
           contextInfo = `
 ## Active Work Session Found
 
@@ -155,6 +176,7 @@ No incomplete plans available. Create a new plan with: /plan "your task"`
 **Progress**: ${progress.completed}/${progress.total} tasks completed
 **Sessions**: ${existingState.session_ids.length + 1} (current session appended)
 **Started**: ${existingState.started_at}
+${worktreeDisplay}
 
 The current session (${sessionId}) has been added to session_ids.
 Read the plan file and continue from the first unchecked task.`
@@ -167,13 +189,15 @@ Looking for new plans...`
         }
       }
 
-      if ((!existingState && !explicitPlanName) || (existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)) {
+      if (
+        (!existingState && !explicitPlanName) ||
+        (existingState && !explicitPlanName && getPlanProgress(existingState.active_plan).isComplete)
+      ) {
         const plans = findPrometheusPlans(ctx.directory)
-        const incompletePlans = plans.filter(p => !getPlanProgress(p).isComplete)
-        
+        const incompletePlans = plans.filter((p) => !getPlanProgress(p).isComplete)
+
         if (plans.length === 0) {
           contextInfo += `
-
 ## No Plans Found
 
 No Prometheus plan files found at .sisyphus/plans/
@@ -187,7 +211,7 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta
         } else if (incompletePlans.length === 1) {
           const planPath = incompletePlans[0]
           const progress = getPlanProgress(planPath)
-          const newState = createBoulderState(planPath, sessionId, "atlas")
+          const newState = createBoulderState(planPath, sessionId, "atlas", worktreePath)
           writeBoulderState(ctx.directory, newState)
 
           contextInfo += `
@@ -199,15 +223,17 @@ All ${plans.length} plan(s) are complete. Create a new plan with: /plan "your ta
 **Progress**: ${progress.completed}/${progress.total} tasks
 **Session ID**: ${sessionId}
 **Started**: ${timestamp}
+${worktreeBlock}
 
 boulder.json has been created. Read the plan and begin execution.`
         } else {
-          const planList = incompletePlans.map((p, i) => {
-            const progress = getPlanProgress(p)
-            const stat = require("node:fs").statSync(p)
-            const modified = new Date(stat.mtimeMs).toISOString()
-            return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}`
-          }).join("\n")
+          const planList = incompletePlans
+            .map((p, i) => {
+              const progress = getPlanProgress(p)
+              const modified = new Date(statSync(p).mtimeMs).toISOString()
+              return `${i + 1}. [${getPlanName(p)}] - Modified: ${modified} - Progress: ${progress.completed}/${progress.total}`
+            })
+            .join("\n")
 
           contextInfo += `
 
@@ -220,6 +246,7 @@ Session ID: ${sessionId}
 ${planList}
 
 Ask the user which plan to work on. Present the options above and wait for their response.
+${worktreeBlock}
 </system-reminder>`
         }
       }
@@ -229,13 +256,14 @@ Ask the user which plan to work on. Present the options above and wait for their
         output.parts[idx].text = output.parts[idx].text
           .replace(/\$SESSION_ID/g, sessionId)
           .replace(/\$TIMESTAMP/g, timestamp)
-        
+
         output.parts[idx].text += `\n\n---\n${contextInfo}`
       }
 
       log(`[${HOOK_NAME}] Context injected`, {
         sessionID: input.sessionID,
         hasExistingState: !!existingState,
+        worktreePath,
       })
     },
   }
diff --git a/src/hooks/start-work/worktree-detector.test.ts b/src/hooks/start-work/worktree-detector.test.ts
new file mode 100644
index 000000000..b02d5af1b
--- /dev/null
+++ b/src/hooks/start-work/worktree-detector.test.ts
@@ -0,0 +1,79 @@
+/// <reference types="bun-types" />
+
+import { describe, expect, test, spyOn, beforeEach, afterEach } from "bun:test"
+import * as childProcess from "node:child_process"
+import { detectWorktreePath } from "./worktree-detector"
+
+describe("detectWorktreePath", () => {
+  let execFileSyncSpy: ReturnType<typeof spyOn>
+
+  beforeEach(() => {
+    execFileSyncSpy = spyOn(childProcess, "execFileSync").mockImplementation(
+      ((_file: string, _args: string[]) => "") as typeof childProcess.execFileSync,
+    )
+  })
+
+  afterEach(() => {
+    execFileSyncSpy.mockRestore()
+  })
+
+  describe("when directory is a valid git worktree", () => {
+    test("#given valid git dir #when detecting #then returns worktree root path", () => {
+      execFileSyncSpy.mockImplementation(
+        ((_file: string, _args: string[]) => "/home/user/my-repo\n") as typeof childProcess.execFileSync,
+      )
+
+      // when
+      const result = detectWorktreePath("/home/user/my-repo/src")
+
+      // then
+      expect(result).toBe("/home/user/my-repo")
+    })
+
+    test("#given git output with trailing newline #when detecting #then trims output", () => {
+      execFileSyncSpy.mockImplementation(
+        ((_file: string, _args: string[]) => "/projects/worktree-a\n\n") as typeof childProcess.execFileSync,
+      )
+
+      const result = detectWorktreePath("/projects/worktree-a")
+
+      expect(result).toBe("/projects/worktree-a")
+    })
+
+    test("#given valid dir #when detecting #then calls git rev-parse with cwd", () => {
+      execFileSyncSpy.mockImplementation(
+        ((_file: string, _args: string[]) => "/repo\n") as typeof childProcess.execFileSync,
+      )
+
+      detectWorktreePath("/repo/some/subdir")
+
+      expect(execFileSyncSpy).toHaveBeenCalledWith(
+        "git",
+        ["rev-parse", "--show-toplevel"],
+        expect.objectContaining({ cwd: "/repo/some/subdir" }),
+      )
+    })
+  })
+
+  describe("when directory is not a git worktree", () => {
+    test("#given non-git directory #when detecting #then returns null", () => {
+      execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => {
+        throw new Error("not a git repository")
+      })
+
+      const result = detectWorktreePath("/tmp/not-a-repo")
+
+      expect(result).toBeNull()
+    })
+
+    test("#given non-existent directory #when detecting #then returns null", () => {
+      execFileSyncSpy.mockImplementation((_file: string, _args: string[]) => {
+        throw new Error("ENOENT: no such file or directory")
+      })
+
+      const result = detectWorktreePath("/nonexistent/path")
+
+      expect(result).toBeNull()
+    })
+  })
+})
diff --git a/src/hooks/start-work/worktree-detector.ts b/src/hooks/start-work/worktree-detector.ts
new file mode 100644
index 000000000..74c919593
--- /dev/null
+++ b/src/hooks/start-work/worktree-detector.ts
@@ -0,0 +1,14 @@
+import { execFileSync } from "node:child_process"
+
+export function detectWorktreePath(directory: string): string | null {
+  try {
+    return execFileSync("git", ["rev-parse", "--show-toplevel"], {
+      cwd: directory,
+      encoding: "utf-8",
+      timeout: 5000,
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim()
+  } catch {
+    return null
+  }
+}
diff --git a/src/hooks/stop-continuation-guard/hook.ts b/src/hooks/stop-continuation-guard/hook.ts
index f7c49a563..747b7a9b6 100644
--- a/src/hooks/stop-continuation-guard/hook.ts
+++ b/src/hooks/stop-continuation-guard/hook.ts
@@ -1,4 +1,5 @@
 import type { PluginInput } from "@opencode-ai/plugin"
+import type { BackgroundManager } from "../../features/background-agent"
 
 import {
   clearContinuationMarker,
@@ -8,6 +9,11 @@ import { log } from "../../shared/logger"
 
 const HOOK_NAME = "stop-continuation-guard"
 
+type StopContinuationBackgroundManager = Pick<
+  BackgroundManager,
+  "getAllDescendantTasks" | "cancelTask"
+>
+
 export interface StopContinuationGuard {
   event: (input: { event: { type: string; properties?: unknown } }) => Promise<void>
   "chat.message": (input: { sessionID?: string }) => Promise<void>
@@ -17,7 +23,10 @@ export interface StopContinuationGuard {
 }
 
 export function createStopContinuationGuardHook(
-  ctx: PluginInput
+  ctx: PluginInput,
+  options?: {
+    backgroundManager?: StopContinuationBackgroundManager
+  }
 ): StopContinuationGuard {
   const stoppedSessions = new Set<string>()
 
@@ -25,6 +34,38 @@ export function createStopContinuationGuardHook(
     stoppedSessions.add(sessionID)
     setContinuationMarkerSource(ctx.directory, sessionID, "stop", "stopped", "continuation stopped")
     log(`[${HOOK_NAME}] Continuation stopped for session`, { sessionID })
+
+    const backgroundManager = options?.backgroundManager
+    if (!backgroundManager) {
+      return
+    }
+
+    const cancellableTasks = backgroundManager
+      .getAllDescendantTasks(sessionID)
+      .filter((task) => task.status === "running" || task.status === "pending")
+
+    if (cancellableTasks.length === 0) {
+      return
+    }
+
+    void Promise.allSettled(
+      cancellableTasks.map(async (task) => {
+        await backgroundManager.cancelTask(task.id, {
+          source: "stop-continuation",
+          reason: "Continuation stopped via /stop-continuation",
+          abortSession: task.status === "running",
+          skipNotification: true,
+        })
+      })
+    ).then((results) => {
+      const cancelledCount = results.filter((result) => result.status === "fulfilled").length
+      const failedCount = results.length - cancelledCount
+      log(`[${HOOK_NAME}] Cancelled background tasks for stopped session`, {
+        sessionID,
+        cancelledCount,
+        failedCount,
+      })
+    })
   }
 
   const isStopped = (sessionID: string): boolean => {
diff --git a/src/hooks/stop-continuation-guard/index.test.ts b/src/hooks/stop-continuation-guard/index.test.ts
index 9547accf2..a0d08f217 100644
--- a/src/hooks/stop-continuation-guard/index.test.ts
+++ b/src/hooks/stop-continuation-guard/index.test.ts
@@ -2,9 +2,15 @@ import { afterEach, describe, expect, test } from "bun:test"
 import { mkdtempSync, rmSync } from "node:fs"
 import { join } from "node:path"
 import { tmpdir } from "node:os"
+import type { BackgroundManager, BackgroundTask } from "../../features/background-agent"
 import { readContinuationMarker } from "../../features/run-continuation-state"
 import { createStopContinuationGuardHook } from "./index"
 
+type CancelCall = {
+  taskId: string
+  options?: Parameters<BackgroundManager["cancelTask"]>[1]
+}
+
 describe("stop-continuation-guard", () => {
   const tempDirs: string[] = []
 
@@ -34,6 +40,33 @@ describe("stop-continuation-guard", () => {
     } as any
   }
 
+  function createBackgroundTask(status: BackgroundTask["status"], id: string): BackgroundTask {
+    return {
+      id,
+      status,
+      description: `${id} description`,
+      parentSessionID: "parent-session",
+      parentMessageID: "parent-message",
+      prompt: "prompt",
+      agent: "sisyphus-junior",
+    }
+  }
+
+  function createMockBackgroundManager(tasks: BackgroundTask[], cancelCalls: CancelCall[]): Pick<BackgroundManager, "getAllDescendantTasks" | "cancelTask"> {
+    return {
+      getAllDescendantTasks: () => tasks,
+      cancelTask: async (taskId: string, options?: Parameters<BackgroundManager["cancelTask"]>[1]) => {
+        cancelCalls.push({ taskId, options })
+        return true
+      },
+    }
+  }
+
+  async function flushMicrotasks(): Promise<void> {
+    await Promise.resolve()
+    await Promise.resolve()
+  }
+
   test("should mark session as stopped", () => {
     // given - a guard hook with no stopped sessions
     const input = createMockPluginInput()
@@ -166,4 +199,31 @@ describe("stop-continuation-guard", () => {
     // then - should not throw and stopped session remains stopped
     expect(guard.isStopped("some-session")).toBe(true)
   })
+
+  test("should cancel only running and pending background tasks on stop", async () => {
+    // given - a background manager with mixed task statuses
+    const cancelCalls: CancelCall[] = []
+    const backgroundManager = createMockBackgroundManager(
+      [
+        createBackgroundTask("running", "task-running"),
+        createBackgroundTask("pending", "task-pending"),
+        createBackgroundTask("completed", "task-completed"),
+      ],
+      cancelCalls,
+    )
+    const guard = createStopContinuationGuardHook(createMockPluginInput(), {
+      backgroundManager,
+    })
+
+    // when - stop continuation is triggered
+    guard.stop("test-session-bg")
+    await flushMicrotasks()
+
+    // then - only running and pending tasks are cancelled
+    expect(cancelCalls).toHaveLength(2)
+    expect(cancelCalls[0]?.taskId).toBe("task-running")
+    expect(cancelCalls[0]?.options?.abortSession).toBe(true)
+    expect(cancelCalls[1]?.taskId).toBe("task-pending")
+    expect(cancelCalls[1]?.options?.abortSession).toBe(false)
+  })
 })
diff --git a/src/hooks/think-mode/hook.ts b/src/hooks/think-mode/hook.ts
index 17cd17f79..017cb616a 100644
--- a/src/hooks/think-mode/hook.ts
+++ b/src/hooks/think-mode/hook.ts
@@ -1,6 +1,6 @@
 import { detectThinkKeyword, extractPromptText } from "./detector"
-import { getHighVariant, getThinkingConfig, isAlreadyHighVariant } from "./switcher"
-import type { ThinkModeInput, ThinkModeState } from "./types"
+import { getHighVariant, isAlreadyHighVariant } from "./switcher"
+import type { ThinkModeState } from "./types"
 import { log } from "../../shared"
 
 const thinkModeState = new Map<string, ThinkModeState>()
@@ -10,53 +10,24 @@ export function clearThinkModeState(sessionID: string): void {
 }
 
 export function createThinkModeHook() {
-  function isDisabledThinkingConfig(config: Record<string, unknown>): boolean {
-    const thinkingConfig = config.thinking
-    if (
-      typeof thinkingConfig === "object" &&
-      thinkingConfig !== null &&
-      "type" in thinkingConfig &&
-      (thinkingConfig as { type?: string }).type === "disabled"
-    ) {
-      return true
-    }
-
-    const providerOptions = config.providerOptions
-    if (typeof providerOptions !== "object" || providerOptions === null) {
-      return false
-    }
-
-    return Object.values(providerOptions as Record<string, unknown>).some(
-      (providerConfig) => {
-        if (typeof providerConfig !== "object" || providerConfig === null) {
-          return false
-        }
-
-        const providerConfigMap = providerConfig as Record<string, unknown>
-        const extraBody = providerConfigMap.extra_body
-        if (typeof extraBody !== "object" || extraBody === null) {
-          return false
-        }
-
-        const extraBodyMap = extraBody as Record<string, unknown>
-        const extraThinking = extraBodyMap.thinking
-        return (
-          typeof extraThinking === "object" &&
-          extraThinking !== null &&
-          (extraThinking as { type?: string }).type === "disabled"
-        )
-      }
-    )
-  }
-
   return {
-    "chat.params": async (output: ThinkModeInput, sessionID: string): Promise<void> => {
+    "chat.message": async (
+      input: {
+        sessionID: string
+        model?: { providerID: string; modelID: string }
+      },
+      output: {
+        message: Record<string, unknown>
+        parts: Array<{ type: string; text?: string; [key: string]: unknown }>
+      }
+    ): Promise<void> => {
       const promptText = extractPromptText(output.parts)
+      const sessionID = input.sessionID
 
       const state: ThinkModeState = {
         requested: false,
         modelSwitched: false,
-        thinkingConfigInjected: false,
+        variantSet: false,
       }
 
       if (!detectThinkKeyword(promptText)) {
@@ -66,7 +37,12 @@ export function createThinkModeHook() {
 
       state.requested = true
 
-      const currentModel = output.message.model
+      if (typeof output.message.variant === "string") {
+        thinkModeState.set(sessionID, state)
+        return
+      }
+
+      const currentModel = input.model
       if (!currentModel) {
         thinkModeState.set(sessionID, state)
         return
@@ -81,14 +57,15 @@ export function createThinkModeHook() {
       }
 
       const highVariant = getHighVariant(currentModel.modelID)
-      const thinkingConfig = getThinkingConfig(currentModel.providerID, currentModel.modelID)
 
       if (highVariant) {
         output.message.model = {
           providerID: currentModel.providerID,
           modelID: highVariant,
         }
+        output.message.variant = "high"
         state.modelSwitched = true
+        state.variantSet = true
         log("Think mode: model switched to high variant", {
           sessionID,
           from: currentModel.modelID,
@@ -96,42 +73,6 @@ export function createThinkModeHook() {
         })
       }
 
-      if (thinkingConfig) {
-        const messageData = output.message as Record<string, unknown>
-        const agentThinking = messageData.thinking as { type?: string } | undefined
-        const agentProviderOptions = messageData.providerOptions
-
-        const agentDisabledThinking = agentThinking?.type === "disabled"
-        const agentHasCustomProviderOptions = Boolean(agentProviderOptions)
-
-        if (agentDisabledThinking) {
-          log("Think mode: skipping - agent has thinking disabled", {
-            sessionID,
-            provider: currentModel.providerID,
-          })
-        } else if (agentHasCustomProviderOptions) {
-          log("Think mode: skipping - agent has custom providerOptions", {
-            sessionID,
-            provider: currentModel.providerID,
-          })
-        } else if (
-          !isDisabledThinkingConfig(thinkingConfig as Record<string, unknown>)
-        ) {
-          Object.assign(output.message, thinkingConfig)
-          state.thinkingConfigInjected = true
-          log("Think mode: thinking config injected", {
-            sessionID,
-            provider: currentModel.providerID,
-            config: thinkingConfig,
-          })
-        } else {
-          log("Think mode: skipping disabled thinking config", {
-            sessionID,
-            provider: currentModel.providerID,
-          })
-        }
-      }
-
       thinkModeState.set(sessionID, state)
     },
 
diff --git a/src/hooks/think-mode/index.test.ts b/src/hooks/think-mode/index.test.ts
index 43f8003b1..b0d744738 100644
--- a/src/hooks/think-mode/index.test.ts
+++ b/src/hooks/think-mode/index.test.ts
@@ -1,452 +1,155 @@
-import { describe, expect, it, beforeEach } from "bun:test"
-import type { ThinkModeInput } from "./types"
+import { beforeEach, describe, expect, it } from "bun:test"
 
-const { createThinkModeHook, clearThinkModeState } = await import("./index")
+const { clearThinkModeState, createThinkModeHook } = await import("./index")
+
+type ThinkModeHookInput = {
+  sessionID: string
+  model?: { providerID: string; modelID: string }
+}
+
+type ThinkModeHookOutput = {
+  message: Record<string, unknown>
+  parts: Array<{ type: string; text?: string; [key: string]: unknown }>
+}
+
+function createHookInput(args: {
+  sessionID?: string
+  providerID?: string
+  modelID?: string
+}): ThinkModeHookInput {
+  const { sessionID = "test-session-id", providerID, modelID } = args
+
+  if (!providerID || !modelID) {
+    return { sessionID }
+  }
 
-/**
- * Helper to create a mock ThinkModeInput for testing
- */
-function createMockInput(
-  providerID: string,
-  modelID: string,
-  promptText: string
-): ThinkModeInput {
   return {
-    parts: [{ type: "text", text: promptText }],
-    message: {
-      model: {
-        providerID,
-        modelID,
-      },
-    },
+    sessionID,
+    model: { providerID, modelID },
   }
 }
 
-/**
- * Type helper for accessing dynamically injected properties on message
- */
-type MessageWithInjectedProps = Record<string, unknown>
+function createHookOutput(promptText: string, variant?: string): ThinkModeHookOutput {
+  return {
+    message: variant ? { variant } : {},
+    parts: [{ type: "text", text: promptText }],
+  }
+}
 
-describe("createThinkModeHook integration", () => {
+describe("createThinkModeHook", () => {
   const sessionID = "test-session-id"
 
   beforeEach(() => {
     clearThinkModeState(sessionID)
   })
 
-  describe("GitHub Copilot provider integration", () => {
-    describe("Claude models", () => {
-      it("should activate thinking mode for github-copilot Claude with think keyword", async () => {
-        // given a github-copilot Claude model and prompt with "think" keyword
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "claude-opus-4-6",
-          "Please think deeply about this problem"
-        )
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant and inject thinking config
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
-        expect(message.thinking).toBeDefined()
-        expect((message.thinking as Record<string, unknown>)?.type).toBe(
-          "enabled"
-        )
-        expect(
-          (message.thinking as Record<string, unknown>)?.budgetTokens
-        ).toBe(64000)
-      })
-
-      it("should handle github-copilot Claude with dots in version", async () => {
-        // given a github-copilot Claude model with dot format (claude-opus-4.6)
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "claude-opus-4.6",
-          "ultrathink mode"
-        )
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant (hyphen format)
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
-        expect(message.thinking).toBeDefined()
-      })
-
-      it("should handle github-copilot Claude Sonnet", async () => {
-        // given a github-copilot Claude Sonnet model
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "claude-sonnet-4-6",
-          "think about this"
-        )
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
-        expect(message.thinking).toBeDefined()
-      })
+  it("sets high variant and switches model when think keyword is present", async () => {
+    // given
+    const hook = createThinkModeHook()
+    const input = createHookInput({
+      sessionID,
+      providerID: "github-copilot",
+      modelID: "claude-opus-4-6",
     })
+    const output = createHookOutput("Please think deeply about this")
 
-    describe("Gemini models", () => {
-      it("should activate thinking mode for github-copilot Gemini Pro", async () => {
-        // given a github-copilot Gemini Pro model
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "gemini-3-pro",
-          "think about this"
-        )
+    // when
+    await hook["chat.message"](input, output)
 
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant and inject google thinking config
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
-        expect(message.providerOptions).toBeDefined()
-        const googleOptions = (
-          message.providerOptions as Record<string, unknown>
-        )?.google as Record<string, unknown>
-        expect(googleOptions?.thinkingConfig).toBeDefined()
-      })
-
-      it("should activate thinking mode for github-copilot Gemini Flash", async () => {
-        // given a github-copilot Gemini Flash model
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "gemini-3-flash",
-          "ultrathink"
-        )
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("gemini-3-flash-high")
-        expect(message.providerOptions).toBeDefined()
-      })
-    })
-
-    describe("GPT models", () => {
-      it("should activate thinking mode for github-copilot GPT-5.2", async () => {
-        // given a github-copilot GPT-5.2 model
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "gpt-5.2",
-          "please think"
-        )
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant and inject openai thinking config
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("gpt-5-2-high")
-        expect(message.reasoning_effort).toBe("high")
-      })
-
-      it("should activate thinking mode for github-copilot GPT-5", async () => {
-        // given a github-copilot GPT-5 model
-        const hook = createThinkModeHook()
-        const input = createMockInput("github-copilot", "gpt-5", "think deeply")
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should upgrade to high variant
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe("gpt-5-high")
-        expect(message.reasoning_effort).toBe("high")
-      })
-    })
-
-    describe("No think keyword", () => {
-      it("should NOT activate for github-copilot without think keyword", async () => {
-        // given a prompt without any think keyword
-        const hook = createThinkModeHook()
-        const input = createMockInput(
-          "github-copilot",
-          "claude-opus-4-6",
-          "Just do this task"
-        )
-        const originalModelID = input.message.model?.modelID
-
-        // when the chat.params hook is called
-        await hook["chat.params"](input, sessionID)
-
-        // then should NOT change model or inject config
-        const message = input.message as MessageWithInjectedProps
-        expect(input.message.model?.modelID).toBe(originalModelID)
-        expect(message.thinking).toBeUndefined()
-      })
+    // then
+    expect(output.message.variant).toBe("high")
+    expect(output.message.model).toEqual({
+      providerID: "github-copilot",
+      modelID: "claude-opus-4-6-high",
     })
   })
 
-  describe("Backwards compatibility with direct providers", () => {
-    it("should still work for direct anthropic provider", async () => {
-      // given direct anthropic provider
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "anthropic",
-        "claude-sonnet-4-6",
-        "think about this"
-      )
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should work as before
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
-      expect(message.thinking).toBeDefined()
+  it("supports dotted model IDs by switching to normalized high variant", async () => {
+    // given
+    const hook = createThinkModeHook()
+    const input = createHookInput({
+      sessionID,
+      providerID: "github-copilot",
+      modelID: "gpt-5.2",
     })
+    const output = createHookOutput("ultrathink about this")
 
-    it("should work for direct google-vertex-anthropic provider", async () => {
-      //#given direct google-vertex-anthropic provider
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "google-vertex-anthropic",
-        "claude-opus-4-6",
-        "think deeply"
-      )
+    // when
+    await hook["chat.message"](input, output)
 
-      //#when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      //#then should upgrade model and inject Claude thinking config
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
-      expect(message.thinking).toBeDefined()
-      expect((message.thinking as Record<string, unknown>)?.budgetTokens).toBe(
-        64000
-      )
-    })
-
-    it("should still work for direct google provider", async () => {
-      // given direct google provider
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "google",
-        "gemini-3-pro",
-        "think about this"
-      )
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should work as before
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("gemini-3-pro-high")
-      expect(message.providerOptions).toBeDefined()
-    })
-
-    it("should still work for direct openai provider", async () => {
-      // given direct openai provider
-      const hook = createThinkModeHook()
-      const input = createMockInput("openai", "gpt-5", "think about this")
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should work
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("gpt-5-high")
-      expect(message.reasoning_effort).toBe("high")
-    })
-
-    it("should still work for amazon-bedrock provider", async () => {
-      // given amazon-bedrock provider
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "amazon-bedrock",
-        "claude-sonnet-4-6",
-        "think"
-      )
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should inject bedrock thinking config
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("claude-sonnet-4-6-high")
-      expect(message.reasoningConfig).toBeDefined()
+    // then
+    expect(output.message.variant).toBe("high")
+    expect(output.message.model).toEqual({
+      providerID: "github-copilot",
+      modelID: "gpt-5-2-high",
     })
   })
 
-  describe("Already-high variants", () => {
-    it("should NOT re-upgrade already-high variants", async () => {
-      // given an already-high variant model
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "github-copilot",
-        "claude-opus-4-6-high",
-        "think deeply"
-      )
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should NOT modify the model (already high)
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
-      // No additional thinking config should be injected
-      expect(message.thinking).toBeUndefined()
+  it("skips when message variant is already set", async () => {
+    // given
+    const hook = createThinkModeHook()
+    const input = createHookInput({
+      sessionID,
+      providerID: "github-copilot",
+      modelID: "claude-sonnet-4-6",
     })
+    const output = createHookOutput("think through this", "max")
 
-    it("should NOT re-upgrade already-high GPT variants", async () => {
-      // given an already-high GPT variant
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "github-copilot",
-        "gpt-5.2-high",
-        "ultrathink"
-      )
+    // when
+    await hook["chat.message"](input, output)
 
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should NOT modify the model
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("gpt-5.2-high")
-      expect(message.reasoning_effort).toBeUndefined()
-    })
+    // then
+    expect(output.message.variant).toBe("max")
+    expect(output.message.model).toBeUndefined()
   })
 
-  describe("Unknown models", () => {
-    it("should not crash for unknown models via github-copilot", async () => {
-      // given an unknown model type
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "github-copilot",
-        "llama-3-70b",
-        "think about this"
-      )
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should not crash and model should remain unchanged
-      expect(input.message.model?.modelID).toBe("llama-3-70b")
+  it("does nothing when think keyword is absent", async () => {
+    // given
+    const hook = createThinkModeHook()
+    const input = createHookInput({
+      sessionID,
+      providerID: "google",
+      modelID: "gemini-3.1-pro",
     })
+    const output = createHookOutput("Please solve this directly")
+
+    // when
+    await hook["chat.message"](input, output)
+
+    // then
+    expect(output.message.variant).toBeUndefined()
+    expect(output.message.model).toBeUndefined()
   })
 
-  describe("Edge cases", () => {
-    it("should handle missing model gracefully", async () => {
-      // given input without a model
-      const hook = createThinkModeHook()
-      const input: ThinkModeInput = {
-        parts: [{ type: "text", text: "think about this" }],
-        message: {},
-      }
-
-      // when the chat.params hook is called
-      // then should not crash
-      await expect(
-        hook["chat.params"](input, sessionID)
-      ).resolves.toBeUndefined()
+  it("does not modify already-high models", async () => {
+    // given
+    const hook = createThinkModeHook()
+    const input = createHookInput({
+      sessionID,
+      providerID: "openai",
+      modelID: "gpt-5-high",
     })
+    const output = createHookOutput("think deeply")
 
-    it("should handle empty prompt gracefully", async () => {
-      // given empty prompt
-      const hook = createThinkModeHook()
-      const input = createMockInput("github-copilot", "claude-opus-4-6", "")
+    // when
+    await hook["chat.message"](input, output)
 
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should not upgrade (no think keyword)
-      expect(input.message.model?.modelID).toBe("claude-opus-4-6")
-    })
+    // then
+    expect(output.message.variant).toBeUndefined()
+    expect(output.message.model).toBeUndefined()
   })
 
-  describe("Agent-level thinking configuration respect", () => {
-    it("should omit Z.ai GLM disabled thinking config", async () => {
-      //#given a Z.ai GLM model with think prompt
-      const hook = createThinkModeHook()
-      const input = createMockInput(
-        "zai-coding-plan",
-        "glm-5",
-        "ultrathink mode"
-      )
+  it("handles missing input model without crashing", async () => {
+    // given
+    const hook = createThinkModeHook()
+    const input = createHookInput({ sessionID })
+    const output = createHookOutput("think about this")
 
-      //#when think mode resolves Z.ai thinking configuration
-      await hook["chat.params"](input, sessionID)
+    // when
+    await expect(hook["chat.message"](input, output)).resolves.toBeUndefined()
 
-      //#then thinking config should be omitted from request
-      const message = input.message as MessageWithInjectedProps
-      expect(input.message.model?.modelID).toBe("glm-5")
-      expect(message.thinking).toBeUndefined()
-      expect(message.providerOptions).toBeUndefined()
-    })
-
-    it("should NOT inject thinking config when agent has thinking disabled", async () => {
-      // given agent with thinking explicitly disabled
-      const hook = createThinkModeHook()
-      const input: ThinkModeInput = {
-        parts: [{ type: "text", text: "ultrathink deeply" }],
-        message: {
-          model: { providerID: "google", modelID: "gemini-3-pro" },
-          thinking: { type: "disabled" },
-        } as ThinkModeInput["message"],
-      }
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should NOT override agent's thinking disabled setting
-      const message = input.message as MessageWithInjectedProps
-      expect((message.thinking as { type: string }).type).toBe("disabled")
-      expect(message.providerOptions).toBeUndefined()
-    })
-
-    it("should NOT inject thinking config when agent has custom providerOptions", async () => {
-      // given agent with custom providerOptions
-      const hook = createThinkModeHook()
-      const input: ThinkModeInput = {
-        parts: [{ type: "text", text: "ultrathink" }],
-        message: {
-          model: { providerID: "google", modelID: "gemini-3-flash" },
-          providerOptions: {
-            google: { thinkingConfig: { thinkingBudget: 0 } },
-          },
-        } as ThinkModeInput["message"],
-      }
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should NOT override agent's providerOptions
-      const message = input.message as MessageWithInjectedProps
-      const providerOpts = message.providerOptions as Record<string, unknown>
-      expect((providerOpts.google as Record<string, unknown>).thinkingConfig).toEqual({
-        thinkingBudget: 0,
-      })
-    })
-
-    it("should still inject thinking config when agent has no thinking override", async () => {
-      // given agent without thinking override
-      const hook = createThinkModeHook()
-      const input = createMockInput("google", "gemini-3-pro", "ultrathink")
-
-      // when the chat.params hook is called
-      await hook["chat.params"](input, sessionID)
-
-      // then should inject thinking config as normal
-      const message = input.message as MessageWithInjectedProps
-      expect(message.providerOptions).toBeDefined()
-    })
+    // then
+    expect(output.message.variant).toBeUndefined()
+    expect(output.message.model).toBeUndefined()
   })
 })
diff --git a/src/hooks/think-mode/switcher.test.ts b/src/hooks/think-mode/switcher.test.ts
index 0abc4756d..bf20122fd 100644
--- a/src/hooks/think-mode/switcher.test.ts
+++ b/src/hooks/think-mode/switcher.test.ts
@@ -1,128 +1,10 @@
 import { describe, expect, it } from "bun:test"
 import {
   getHighVariant,
-  getThinkingConfig,
   isAlreadyHighVariant,
-  THINKING_CONFIGS,
 } from "./switcher"
 
 describe("think-mode switcher", () => {
-  describe("GitHub Copilot provider support", () => {
-    describe("Claude models via github-copilot", () => {
-      it("should resolve github-copilot Claude Opus to anthropic config", () => {
-        // given a github-copilot provider with Claude Opus model
-        const providerID = "github-copilot"
-        const modelID = "claude-opus-4-6"
-
-        // when getting thinking config
-        const config = getThinkingConfig(providerID, modelID)
-
-        // then should return anthropic thinking config
-        expect(config).not.toBeNull()
-        expect(config?.thinking).toBeDefined()
-        expect((config?.thinking as Record<string, unknown>)?.type).toBe(
-          "enabled"
-        )
-        expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
-          64000
-        )
-      })
-
-      it("should resolve github-copilot Claude Sonnet to anthropic config", () => {
-        // given a github-copilot provider with Claude Sonnet model
-        const config = getThinkingConfig("github-copilot", "claude-sonnet-4-6")
-
-        // then should return anthropic thinking config
-        expect(config).not.toBeNull()
-        expect(config?.thinking).toBeDefined()
-      })
-
-      it("should handle Claude with dots in version number", () => {
-        // given a model ID with dots (claude-opus-4.6)
-        const config = getThinkingConfig("github-copilot", "claude-opus-4.6")
-
-        // then should still return anthropic thinking config
-        expect(config).not.toBeNull()
-        expect(config?.thinking).toBeDefined()
-      })
-    })
-
-    describe("Gemini models via github-copilot", () => {
-      it("should resolve github-copilot Gemini Pro to google config", () => {
-        // given a github-copilot provider with Gemini Pro model
-        const config = getThinkingConfig("github-copilot", "gemini-3-pro")
-
-        // then should return google thinking config
-        expect(config).not.toBeNull()
-        expect(config?.providerOptions).toBeDefined()
-        const googleOptions = (
-          config?.providerOptions as Record<string, unknown>
-        )?.google as Record<string, unknown>
-        expect(googleOptions?.thinkingConfig).toBeDefined()
-      })
-
-      it("should resolve github-copilot Gemini Flash to google config", () => {
-        // given a github-copilot provider with Gemini Flash model
-        const config = getThinkingConfig(
-          "github-copilot",
-          "gemini-3-flash"
-        )
-
-        // then should return google thinking config
-        expect(config).not.toBeNull()
-        expect(config?.providerOptions).toBeDefined()
-      })
-    })
-
-    describe("GPT models via github-copilot", () => {
-      it("should resolve github-copilot GPT-5.2 to openai config", () => {
-        // given a github-copilot provider with GPT-5.2 model
-        const config = getThinkingConfig("github-copilot", "gpt-5.2")
-
-        // then should return openai thinking config
-        expect(config).not.toBeNull()
-        expect(config?.reasoning_effort).toBe("high")
-      })
-
-      it("should resolve github-copilot GPT-5 to openai config", () => {
-        // given a github-copilot provider with GPT-5 model
-        const config = getThinkingConfig("github-copilot", "gpt-5")
-
-        // then should return openai thinking config
-        expect(config).not.toBeNull()
-        expect(config?.reasoning_effort).toBe("high")
-      })
-
-      it("should resolve github-copilot o1 to openai config", () => {
-        // given a github-copilot provider with o1 model
-        const config = getThinkingConfig("github-copilot", "o1-preview")
-
-        // then should return openai thinking config
-        expect(config).not.toBeNull()
-        expect(config?.reasoning_effort).toBe("high")
-      })
-
-      it("should resolve github-copilot o3 to openai config", () => {
-        // given a github-copilot provider with o3 model
-        const config = getThinkingConfig("github-copilot", "o3-mini")
-
-        // then should return openai thinking config
-        expect(config).not.toBeNull()
-        expect(config?.reasoning_effort).toBe("high")
-      })
-    })
-
-    describe("Unknown models via github-copilot", () => {
-      it("should return null for unknown model types", () => {
-        // given a github-copilot provider with unknown model
-        const config = getThinkingConfig("github-copilot", "llama-3-70b")
-
-        // then should return null (no matching provider)
-        expect(config).toBeNull()
-      })
-    })
-  })
-
   describe("Model ID normalization", () => {
     describe("getHighVariant with dots vs hyphens", () => {
       it("should handle dots in Claude version numbers", () => {
@@ -167,8 +49,8 @@ describe("think-mode switcher", () => {
 
       it("should handle Gemini preview variants", () => {
         // given Gemini preview model IDs
-        expect(getHighVariant("gemini-3-pro")).toBe(
-          "gemini-3-pro-high"
+        expect(getHighVariant("gemini-3.1-pro")).toBe(
+          "gemini-3-1-pro-high"
         )
         expect(getHighVariant("gemini-3-flash")).toBe(
           "gemini-3-flash-high"
@@ -179,7 +61,7 @@ describe("think-mode switcher", () => {
         // given model IDs that are already high variants
         expect(getHighVariant("claude-opus-4-6-high")).toBeNull()
         expect(getHighVariant("gpt-5-2-high")).toBeNull()
-        expect(getHighVariant("gemini-3-pro-high")).toBeNull()
+        expect(getHighVariant("gemini-3-1-pro-high")).toBeNull()
       })
 
       it("should return null for unknown models", () => {
@@ -195,7 +77,7 @@ describe("think-mode switcher", () => {
       // given model IDs with -high suffix
       expect(isAlreadyHighVariant("claude-opus-4-6-high")).toBe(true)
       expect(isAlreadyHighVariant("gpt-5-2-high")).toBe(true)
-      expect(isAlreadyHighVariant("gemini-3-pro-high")).toBe(true)
+      expect(isAlreadyHighVariant("gemini-3.1-pro-high")).toBe(true)
     })
 
     it("should detect -high suffix after normalization", () => {
@@ -208,7 +90,7 @@ describe("think-mode switcher", () => {
       expect(isAlreadyHighVariant("claude-opus-4-6")).toBe(false)
       expect(isAlreadyHighVariant("claude-opus-4.6")).toBe(false)
       expect(isAlreadyHighVariant("gpt-5.2")).toBe(false)
-      expect(isAlreadyHighVariant("gemini-3-pro")).toBe(false)
+      expect(isAlreadyHighVariant("gemini-3.1-pro")).toBe(false)
     })
 
     it("should return false for models with 'high' in name but not suffix", () => {
@@ -217,149 +99,6 @@ describe("think-mode switcher", () => {
     })
   })
 
-  describe("getThinkingConfig", () => {
-    describe("Already high variants", () => {
-      it("should return null for already-high variants", () => {
-        // given already-high model variants
-        expect(
-          getThinkingConfig("anthropic", "claude-opus-4-6-high")
-        ).toBeNull()
-        expect(getThinkingConfig("openai", "gpt-5-2-high")).toBeNull()
-        expect(getThinkingConfig("google", "gemini-3-pro-high")).toBeNull()
-      })
-
-      it("should return null for already-high variants via github-copilot", () => {
-        // given already-high model variants via github-copilot
-        expect(
-          getThinkingConfig("github-copilot", "claude-opus-4-6-high")
-        ).toBeNull()
-        expect(getThinkingConfig("github-copilot", "gpt-5.2-high")).toBeNull()
-      })
-    })
-
-    describe("Non-thinking-capable models", () => {
-      it("should return null for non-thinking-capable models", () => {
-        // given models that don't support thinking mode
-        expect(getThinkingConfig("anthropic", "claude-2")).toBeNull()
-        expect(getThinkingConfig("openai", "gpt-4")).toBeNull()
-        expect(getThinkingConfig("google", "gemini-1")).toBeNull()
-      })
-    })
-
-    describe("Unknown providers", () => {
-      it("should return null for unknown providers", () => {
-        // given unknown provider IDs
-        expect(getThinkingConfig("unknown-provider", "some-model")).toBeNull()
-        expect(getThinkingConfig("azure", "gpt-5")).toBeNull()
-      })
-    })
-  })
-
-  describe("Direct provider configs (backwards compatibility)", () => {
-    it("should still work for direct anthropic provider", () => {
-      // given direct anthropic provider
-      const config = getThinkingConfig("anthropic", "claude-opus-4-6")
-
-      // then should return anthropic thinking config
-      expect(config).not.toBeNull()
-      expect(config?.thinking).toBeDefined()
-      expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
-    })
-
-    it("should work for direct google-vertex-anthropic provider", () => {
-      //#given direct google-vertex-anthropic provider
-      const config = getThinkingConfig(
-        "google-vertex-anthropic",
-        "claude-opus-4-6"
-      )
-
-      //#when thinking config is resolved
-
-      //#then it should return anthropic-style thinking config
-      expect(config).not.toBeNull()
-      expect(config?.thinking).toBeDefined()
-      expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
-      expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
-        64000
-      )
-    })
-
-    it("should still work for direct google provider", () => {
-      // given direct google provider
-      const config = getThinkingConfig("google", "gemini-3-pro")
-
-      // then should return google thinking config
-      expect(config).not.toBeNull()
-      expect(config?.providerOptions).toBeDefined()
-    })
-
-    it("should still work for amazon-bedrock provider", () => {
-      // given amazon-bedrock provider with claude model
-      const config = getThinkingConfig("amazon-bedrock", "claude-sonnet-4-6")
-
-      // then should return bedrock thinking config
-      expect(config).not.toBeNull()
-      expect(config?.reasoningConfig).toBeDefined()
-    })
-
-    it("should still work for google-vertex provider", () => {
-      // given google-vertex provider
-      const config = getThinkingConfig("google-vertex", "gemini-3-pro")
-
-      // then should return google-vertex thinking config
-      expect(config).not.toBeNull()
-      expect(config?.providerOptions).toBeDefined()
-      const vertexOptions = (config?.providerOptions as Record<string, unknown>)?.[
-        "google-vertex"
-      ] as Record<string, unknown>
-      expect(vertexOptions?.thinkingConfig).toBeDefined()
-    })
-
-    it("should work for direct openai provider", () => {
-      // given direct openai provider
-      const config = getThinkingConfig("openai", "gpt-5")
-
-      // then should return openai thinking config
-      expect(config).not.toBeNull()
-      expect(config?.reasoning_effort).toBe("high")
-    })
-  })
-
-  describe("THINKING_CONFIGS structure", () => {
-    it("should have correct structure for anthropic", () => {
-      const config = THINKING_CONFIGS.anthropic
-      expect(config.thinking).toBeDefined()
-      expect(config.maxTokens).toBe(128000)
-    })
-
-    it("should have correct structure for google-vertex-anthropic", () => {
-      //#given google-vertex-anthropic config entry
-      const config = THINKING_CONFIGS["google-vertex-anthropic"]
-
-      //#when structure is validated
-
-      //#then it should match anthropic style structure
-      expect(config.thinking).toBeDefined()
-      expect(config.maxTokens).toBe(128000)
-    })
-
-    it("should have correct structure for google", () => {
-      const config = THINKING_CONFIGS.google
-      expect(config.providerOptions).toBeDefined()
-    })
-
-    it("should have correct structure for openai", () => {
-      const config = THINKING_CONFIGS.openai
-      expect(config.reasoning_effort).toBe("high")
-    })
-
-    it("should have correct structure for amazon-bedrock", () => {
-      const config = THINKING_CONFIGS["amazon-bedrock"]
-      expect(config.reasoningConfig).toBeDefined()
-      expect(config.maxTokens).toBe(64000)
-    })
-  })
-
   describe("Custom provider prefixes support", () => {
     describe("getHighVariant with prefixes", () => {
       it("should preserve vertex_ai/ prefix when getting high variant", () => {
@@ -390,7 +129,7 @@ describe("think-mode switcher", () => {
         // given various custom prefixes
         expect(getHighVariant("azure/gpt-5")).toBe("azure/gpt-5-high")
         expect(getHighVariant("bedrock/claude-sonnet-4-6")).toBe("bedrock/claude-sonnet-4-6-high")
-        expect(getHighVariant("custom-llm/gemini-3-pro")).toBe("custom-llm/gemini-3-pro-high")
+        expect(getHighVariant("custom-llm/gemini-3.1-pro")).toBe("custom-llm/gemini-3-1-pro-high")
       })
 
       it("should return null for prefixed models without high variant mapping", () => {
@@ -411,7 +150,7 @@ describe("think-mode switcher", () => {
         // given prefixed model IDs with -high suffix
         expect(isAlreadyHighVariant("vertex_ai/claude-opus-4-6-high")).toBe(true)
         expect(isAlreadyHighVariant("openai/gpt-5-2-high")).toBe(true)
-        expect(isAlreadyHighVariant("custom/gemini-3-pro-high")).toBe(true)
+        expect(isAlreadyHighVariant("custom/gemini-3.1-pro-high")).toBe(true)
       })
 
       it("should return false for prefixed base models", () => {
@@ -426,141 +165,5 @@ describe("think-mode switcher", () => {
         expect(isAlreadyHighVariant("vertex_ai/gpt-5.2-high")).toBe(true)
       })
     })
-
-    describe("getThinkingConfig with prefixes", () => {
-      it("should return null for custom providers (not in THINKING_CONFIGS)", () => {
-        // given custom provider with prefixed Claude model
-        const config = getThinkingConfig("dia-llm", "vertex_ai/claude-sonnet-4-6")
-
-        // then should return null (custom provider not in THINKING_CONFIGS)
-        expect(config).toBeNull()
-      })
-
-      it("should work with prefixed models on known providers", () => {
-        // given known provider (anthropic) with prefixed model
-        // This tests that the base model name is correctly extracted for capability check
-        const config = getThinkingConfig("anthropic", "custom-prefix/claude-opus-4-6")
-
-        // then should return thinking config (base model is capable)
-        expect(config).not.toBeNull()
-        expect(config?.thinking).toBeDefined()
-      })
-
-      it("should return null for prefixed models that are already high", () => {
-        // given prefixed already-high model
-        const config = getThinkingConfig("anthropic", "vertex_ai/claude-opus-4-6-high")
-
-        // then should return null
-        expect(config).toBeNull()
-      })
-    })
-
-    describe("Real-world custom provider scenario", () => {
-      it("should handle LLM proxy with vertex_ai prefix correctly", () => {
-        // given a custom LLM proxy provider using vertex_ai/ prefix
-        const providerID = "dia-llm"
-        const modelID = "vertex_ai/claude-sonnet-4-6"
-
-        // when getting high variant
-        const highVariant = getHighVariant(modelID)
-
-        // then should preserve the prefix
-        expect(highVariant).toBe("vertex_ai/claude-sonnet-4-6-high")
-
-        // #and when checking if already high
-        expect(isAlreadyHighVariant(modelID)).toBe(false)
-        expect(isAlreadyHighVariant(highVariant!)).toBe(true)
-
-        // #and when getting thinking config for custom provider
-        const config = getThinkingConfig(providerID, modelID)
-
-        // then should return null (custom provider, not anthropic)
-        // This prevents applying incompatible thinking configs to custom providers
-        expect(config).toBeNull()
-      })
-
-      it("should not break when switching to high variant in think mode", () => {
-        // given think mode switching vertex_ai/claude model to high variant
-        const original = "vertex_ai/claude-opus-4-6"
-        const high = getHighVariant(original)
-
-        // then the high variant should be valid
-        expect(high).toBe("vertex_ai/claude-opus-4-6-high")
-
-        // #and should be recognized as already high
-        expect(isAlreadyHighVariant(high!)).toBe(true)
-
-        // #and switching again should return null (already high)
-        expect(getHighVariant(high!)).toBeNull()
-      })
-    })
-  })
-
-  describe("Z.AI GLM-4.7 provider support", () => {
-    describe("getThinkingConfig for zai-coding-plan", () => {
-      it("should return thinking config for glm-5", () => {
-        //#given a Z.ai GLM model
-        const config = getThinkingConfig("zai-coding-plan", "glm-5")
-
-        //#when thinking config is resolved
-
-        //#then thinking type is "disabled"
-        expect(config).not.toBeNull()
-        expect(config?.providerOptions).toBeDefined()
-        const zaiOptions = (config?.providerOptions as Record<string, unknown>)?.[
-          "zai-coding-plan"
-        ] as Record<string, unknown>
-        expect(zaiOptions?.extra_body).toBeDefined()
-        const extraBody = zaiOptions?.extra_body as Record<string, unknown>
-        expect(extraBody?.thinking).toBeDefined()
-        expect((extraBody?.thinking as Record<string, unknown>)?.type).toBe("disabled")
-      })
-
-      it("should return thinking config for glm-4.6v (multimodal)", () => {
-        // given zai-coding-plan provider with glm-4.6v model
-        const config = getThinkingConfig("zai-coding-plan", "glm-4.6v")
-
-        // then should return zai-coding-plan thinking config
-        expect(config).not.toBeNull()
-        expect(config?.providerOptions).toBeDefined()
-      })
-
-      it("should return null for non-GLM models on zai-coding-plan", () => {
-        // given zai-coding-plan provider with unknown model
-        const config = getThinkingConfig("zai-coding-plan", "some-other-model")
-
-        // then should return null
-        expect(config).toBeNull()
-      })
-    })
-
-    describe("HIGH_VARIANT_MAP for GLM", () => {
-      it("should NOT have high variant for glm-5", () => {
-        // given glm-5 model
-        const variant = getHighVariant("glm-5")
-
-        // then should return null (no high variant needed)
-        expect(variant).toBeNull()
-      })
-
-      it("should NOT have high variant for glm-4.6v", () => {
-        // given glm-4.6v model
-        const variant = getHighVariant("glm-4.6v")
-
-        // then should return null
-        expect(variant).toBeNull()
-      })
-    })
-  })
-
-  describe("THINKING_CONFIGS structure for zai-coding-plan", () => {
-    it("should have correct structure for zai-coding-plan", () => {
-      const config = THINKING_CONFIGS["zai-coding-plan"]
-      expect(config.providerOptions).toBeDefined()
-      const zaiOptions = (config.providerOptions as Record<string, unknown>)?.[
-        "zai-coding-plan"
-      ] as Record<string, unknown>
-      expect(zaiOptions?.extra_body).toBeDefined()
-    })
-  })
+})
 })
diff --git a/src/hooks/think-mode/switcher.ts b/src/hooks/think-mode/switcher.ts
index 8d88506d6..0a1a1dd38 100644
--- a/src/hooks/think-mode/switcher.ts
+++ b/src/hooks/think-mode/switcher.ts
@@ -53,35 +53,7 @@ function normalizeModelID(modelID: string): string {
   return modelID.replace(/\.(\d+)/g, "-$1")
 }
 
-/**
- * Resolves proxy providers (like github-copilot) to their underlying provider.
- * This allows GitHub Copilot to inherit thinking configurations from the actual
- * model provider (Anthropic, Google, OpenAI).
- *
- * @example
- * resolveProvider("github-copilot", "claude-opus-4-6") // "anthropic"
- * resolveProvider("github-copilot", "gemini-3-pro") // "google"
- * resolveProvider("github-copilot", "gpt-5.2") // "openai"
- * resolveProvider("anthropic", "claude-opus-4-6") // "anthropic" (unchanged)
- */
-function resolveProvider(providerID: string, modelID: string): string {
-  // GitHub Copilot is a proxy - infer actual provider from model name
-  if (providerID === "github-copilot") {
-    const modelLower = modelID.toLowerCase()
-    if (modelLower.includes("claude")) return "anthropic"
-    if (modelLower.includes("gemini")) return "google"
-    if (
-      modelLower.includes("gpt") ||
-      modelLower.includes("o1") ||
-      modelLower.includes("o3")
-    ) {
-      return "openai"
-    }
-  }
 
-  // Direct providers or unknown - return as-is
-  return providerID
-}
 
 // Maps model IDs to their "high reasoning" variant (internal convention)
 // For OpenAI models, this signals that reasoning_effort should be set to "high"
@@ -90,8 +62,8 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
   "claude-sonnet-4-6": "claude-sonnet-4-6-high",
   "claude-opus-4-6": "claude-opus-4-6-high",
    // Gemini
-   "gemini-3-pro": "gemini-3-pro-high",
-   "gemini-3-pro-low": "gemini-3-pro-high",
+   "gemini-3-1-pro": "gemini-3-1-pro-high",
+   "gemini-3-1-pro-low": "gemini-3-1-pro-high",
    "gemini-3-flash": "gemini-3-flash-high",
   // GPT-5
   "gpt-5": "gpt-5-high",
@@ -110,77 +82,12 @@ const HIGH_VARIANT_MAP: Record<string, string> = {
   "gpt-5-2-chat-latest": "gpt-5-2-chat-latest-high",
   "gpt-5-2-pro": "gpt-5-2-pro-high",
   // Antigravity (Google)
-  "antigravity-gemini-3-pro": "antigravity-gemini-3-pro-high",
+  "antigravity-gemini-3-1-pro": "antigravity-gemini-3-1-pro-high",
   "antigravity-gemini-3-flash": "antigravity-gemini-3-flash-high",
 }
 
 const ALREADY_HIGH: Set<string> = new Set(Object.values(HIGH_VARIANT_MAP))
 
-export const THINKING_CONFIGS = {
-  anthropic: {
-    thinking: {
-      type: "enabled",
-      budgetTokens: 64000,
-    },
-    maxTokens: 128000,
-  },
-  "google-vertex-anthropic": {
-    thinking: {
-      type: "enabled",
-      budgetTokens: 64000,
-    },
-    maxTokens: 128000,
-  },
-  "amazon-bedrock": {
-    reasoningConfig: {
-      type: "enabled",
-      budgetTokens: 32000,
-    },
-    maxTokens: 64000,
-  },
-  google: {
-    providerOptions: {
-      google: {
-        thinkingConfig: {
-          thinkingLevel: "HIGH",
-        },
-      },
-    },
-  },
-  "google-vertex": {
-    providerOptions: {
-      "google-vertex": {
-        thinkingConfig: {
-          thinkingLevel: "HIGH",
-        },
-      },
-    },
-  },
-  openai: {
-    reasoning_effort: "high",
-  },
-  "zai-coding-plan": {
-    providerOptions: {
-      "zai-coding-plan": {
-        extra_body: {
-          thinking: {
-            type: "disabled",
-          },
-        },
-      },
-    },
-  },
-} as const satisfies Record<string, Record<string, unknown>>
-
-const THINKING_CAPABLE_MODELS = {
-  anthropic: ["claude-sonnet-4", "claude-opus-4", "claude-3"],
-  "google-vertex-anthropic": ["claude-sonnet-4", "claude-opus-4", "claude-3"],
-  "amazon-bedrock": ["claude", "anthropic"],
-  google: ["gemini-2", "gemini-3"],
-  "google-vertex": ["gemini-2", "gemini-3"],
-  openai: ["gpt-5", "o1", "o3"],
-  "zai-coding-plan": ["glm"],
-} as const satisfies Record<string, readonly string[]>
 
 export function getHighVariant(modelID: string): string | null {
   const normalized = normalizeModelID(modelID)
@@ -207,37 +114,3 @@ export function isAlreadyHighVariant(modelID: string): boolean {
   return ALREADY_HIGH.has(base) || base.endsWith("-high")
 }
 
-type ThinkingProvider = keyof typeof THINKING_CONFIGS
-
-function isThinkingProvider(provider: string): provider is ThinkingProvider {
-  return provider in THINKING_CONFIGS
-}
-
-export function getThinkingConfig(
-  providerID: string,
-  modelID: string
-): Record<string, unknown> | null {
-  const normalized = normalizeModelID(modelID)
-  const { base } = extractModelPrefix(normalized)
-
-  if (isAlreadyHighVariant(normalized)) {
-    return null
-  }
-
-  const resolvedProvider = resolveProvider(providerID, modelID)
-
-  if (!isThinkingProvider(resolvedProvider)) {
-    return null
-  }
-
-  const config = THINKING_CONFIGS[resolvedProvider]
-  const capablePatterns = THINKING_CAPABLE_MODELS[resolvedProvider]
-
-  // Check capability using base model name (without prefix)
-  const baseLower = base.toLowerCase()
-  const isCapable = capablePatterns.some((pattern) =>
-    baseLower.includes(pattern.toLowerCase())
-  )
-
-  return isCapable ? config : null
-}
diff --git a/src/hooks/think-mode/types.ts b/src/hooks/think-mode/types.ts
index b17d654d7..a24f1ccab 100644
--- a/src/hooks/think-mode/types.ts
+++ b/src/hooks/think-mode/types.ts
@@ -1,21 +1,16 @@
 export interface ThinkModeState {
   requested: boolean
   modelSwitched: boolean
-  thinkingConfigInjected: boolean
+  variantSet: boolean
   providerID?: string
   modelID?: string
 }
 
-export interface ModelRef {
+interface ModelRef {
   providerID: string
   modelID: string
 }
 
-export interface MessageWithModel {
+interface MessageWithModel {
   model?: ModelRef
 }
-
-export interface ThinkModeInput {
-  parts: Array<{ type: string; text?: string }>
-  message: MessageWithModel
-}
diff --git a/src/hooks/todo-continuation-enforcer/constants.ts b/src/hooks/todo-continuation-enforcer/constants.ts
index db4d7b1cc..39799c531 100644
--- a/src/hooks/todo-continuation-enforcer/constants.ts
+++ b/src/hooks/todo-continuation-enforcer/constants.ts
@@ -17,6 +17,6 @@ export const TOAST_DURATION_MS = 900
 export const COUNTDOWN_GRACE_PERIOD_MS = 500
 
 export const ABORT_WINDOW_MS = 3000
-export const CONTINUATION_COOLDOWN_MS = 30_000
+export const CONTINUATION_COOLDOWN_MS = 5_000
 export const MAX_CONSECUTIVE_FAILURES = 5
 export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000
diff --git a/src/hooks/todo-continuation-enforcer/idle-event.ts b/src/hooks/todo-continuation-enforcer/idle-event.ts
index 10708d1a3..1f944db59 100644
--- a/src/hooks/todo-continuation-enforcer/idle-event.ts
+++ b/src/hooks/todo-continuation-enforcer/idle-event.ts
@@ -15,6 +15,7 @@ import {
   MAX_CONSECUTIVE_FAILURES,
 } from "./constants"
 import { isLastAssistantMessageAborted } from "./abort-detection"
+import { hasUnansweredQuestion } from "./pending-question-detection"
 import { getIncompleteCount } from "./todo"
 import type { MessageInfo, ResolvedMessageInfo, Todo } from "./types"
 import type { SessionStateStore } from "./session-state"
@@ -74,6 +75,10 @@ export async function handleSessionIdle(args: {
       log(`[${HOOK_NAME}] Skipped: last assistant message was aborted (API fallback)`, { sessionID })
       return
     }
+    if (hasUnansweredQuestion(messages)) {
+      log(`[${HOOK_NAME}] Skipped: pending question awaiting user response`, { sessionID })
+      return
+    }
   } catch (error) {
     log(`[${HOOK_NAME}] Messages fetch failed, continuing`, { sessionID, error: String(error) })
   }
diff --git a/src/hooks/todo-continuation-enforcer/pending-question-detection.test.ts b/src/hooks/todo-continuation-enforcer/pending-question-detection.test.ts
new file mode 100644
index 000000000..5ea4b214c
--- /dev/null
+++ b/src/hooks/todo-continuation-enforcer/pending-question-detection.test.ts
@@ -0,0 +1,100 @@
+/// <reference types="bun-types" />
+import { describe, expect, test } from "bun:test"
+
+import { hasUnansweredQuestion } from "./pending-question-detection"
+
+describe("hasUnansweredQuestion", () => {
+  test("given empty messages, returns false", () => {
+    expect(hasUnansweredQuestion([])).toBe(false)
+  })
+
+  test("given null-ish input, returns false", () => {
+    expect(hasUnansweredQuestion(undefined as never)).toBe(false)
+  })
+
+  test("given last assistant message with question tool_use, returns true", () => {
+    const messages = [
+      { info: { role: "user" } },
+      {
+        info: { role: "assistant" },
+        parts: [
+          { type: "tool_use", name: "question" },
+        ],
+      },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(true)
+  })
+
+  test("given last assistant message with question tool-invocation, returns true", () => {
+    const messages = [
+      { info: { role: "user" } },
+      {
+        info: { role: "assistant" },
+        parts: [
+          { type: "tool-invocation", toolName: "question" },
+        ],
+      },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(true)
+  })
+
+  test("given user message after question (answered), returns false", () => {
+    const messages = [
+      {
+        info: { role: "assistant" },
+        parts: [
+          { type: "tool_use", name: "question" },
+        ],
+      },
+      { info: { role: "user" } },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(false)
+  })
+
+  test("given assistant message with non-question tool, returns false", () => {
+    const messages = [
+      { info: { role: "user" } },
+      {
+        info: { role: "assistant" },
+        parts: [
+          { type: "tool_use", name: "bash" },
+        ],
+      },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(false)
+  })
+
+  test("given assistant message with no parts, returns false", () => {
+    const messages = [
+      { info: { role: "user" } },
+      { info: { role: "assistant" } },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(false)
+  })
+
+  test("given role on message directly (not in info), returns true for question", () => {
+    const messages = [
+      { role: "user" },
+      {
+        role: "assistant",
+        parts: [
+          { type: "tool_use", name: "question" },
+        ],
+      },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(true)
+  })
+
+  test("given mixed tools including question, returns true", () => {
+    const messages = [
+      {
+        info: { role: "assistant" },
+        parts: [
+          { type: "tool_use", name: "bash" },
+          { type: "tool_use", name: "question" },
+        ],
+      },
+    ]
+    expect(hasUnansweredQuestion(messages)).toBe(true)
+  })
+})
diff --git a/src/hooks/todo-continuation-enforcer/pending-question-detection.ts b/src/hooks/todo-continuation-enforcer/pending-question-detection.ts
new file mode 100644
index 000000000..fd97b6c35
--- /dev/null
+++ b/src/hooks/todo-continuation-enforcer/pending-question-detection.ts
@@ -0,0 +1,40 @@
+import { log } from "../../shared/logger"
+import { HOOK_NAME } from "./constants"
+
+interface MessagePart {
+  type: string
+  name?: string
+  toolName?: string
+}
+
+interface Message {
+  info?: { role?: string }
+  role?: string
+  parts?: MessagePart[]
+}
+
+export function hasUnansweredQuestion(messages: Message[]): boolean {
+  if (!messages || messages.length === 0) return false
+
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i]
+    const role = msg.info?.role ?? msg.role
+
+    if (role === "user") return false
+
+    if (role === "assistant" && msg.parts) {
+      const hasQuestion = msg.parts.some(
+        (part) =>
+          (part.type === "tool_use" || part.type === "tool-invocation") &&
+          (part.name === "question" || part.toolName === "question"),
+      )
+      if (hasQuestion) {
+        log(`[${HOOK_NAME}] Detected pending question tool in last assistant message`)
+        return true
+      }
+      return false
+    }
+  }
+
+  return false
+}
diff --git a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
index 19d2222f6..f8e7be079 100644
--- a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
+++ b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
@@ -297,6 +297,31 @@ describe("todo-continuation-enforcer", () => {
     expect(promptCalls).toHaveLength(0)
   })
 
+  test("should not inject when remaining todos are blocked or deleted", async () => {
+    // given - session where non-completed todos are only blocked/deleted
+    const sessionID = "main-blocked-deleted"
+    setMainSession(sessionID)
+
+    const mockInput = createMockPluginInput()
+    mockInput.client.session.todo = async () => ({ data: [
+      { id: "1", content: "Blocked task", status: "blocked", priority: "high" },
+      { id: "2", content: "Deleted task", status: "deleted", priority: "medium" },
+      { id: "3", content: "Done task", status: "completed", priority: "low" },
+    ]})
+
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    // when - session goes idle
+    await hook.handler({
+      event: { type: "session.idle", properties: { sessionID } },
+    })
+
+    await fakeTimers.advanceBy(3000)
+
+    // then - no continuation injected
+    expect(promptCalls).toHaveLength(0)
+  })
+
   test("should not inject when background tasks are running", async () => {
     // given - session with running background tasks
     const sessionID = "main-789"
@@ -1663,7 +1688,6 @@ describe("todo-continuation-enforcer", () => {
   test("should cancel all countdowns via cancelAllCountdowns", async () => {
     // given - multiple sessions with running countdowns
     const session1 = "main-cancel-all-1"
-    const session2 = "main-cancel-all-2"
     setMainSession(session1)
 
     const hook = createTodoContinuationEnforcer(createMockPluginInput(), {})
diff --git a/src/hooks/todo-continuation-enforcer/todo.ts b/src/hooks/todo-continuation-enforcer/todo.ts
index dbc6f5b61..1847cb527 100644
--- a/src/hooks/todo-continuation-enforcer/todo.ts
+++ b/src/hooks/todo-continuation-enforcer/todo.ts
@@ -1,5 +1,11 @@
 import type { Todo } from "./types"
 
 export function getIncompleteCount(todos: Todo[]): number {
-  return todos.filter((todo) => todo.status !== "completed" && todo.status !== "cancelled").length
+  return todos.filter(
+    (todo) =>
+      todo.status !== "completed"
+      && todo.status !== "cancelled"
+      && todo.status !== "blocked"
+      && todo.status !== "deleted",
+  ).length
 }
diff --git a/src/plugin-config.test.ts b/src/plugin-config.test.ts
index 549c5f1e8..5e2cd08aa 100644
--- a/src/plugin-config.test.ts
+++ b/src/plugin-config.test.ts
@@ -32,7 +32,7 @@ describe("mergeConfigs", () => {
             temperature: 0.3,
           },
           visual: {
-            model: "google/gemini-3-pro",
+            model: "google/gemini-3.1-pro",
           },
         },
       } as unknown as OhMyOpenCodeConfig;
@@ -46,7 +46,7 @@ describe("mergeConfigs", () => {
       // then quick should be preserved from base
       expect(result.categories?.quick?.model).toBe("anthropic/claude-haiku-4-5");
       // then visual should be added from override
-      expect(result.categories?.visual?.model).toBe("google/gemini-3-pro");
+      expect(result.categories?.visual?.model).toBe("google/gemini-3.1-pro");
     });
 
     it("should preserve base categories when override has no categories", () => {
diff --git a/src/plugin-handlers/agent-config-handler.ts b/src/plugin-handlers/agent-config-handler.ts
index 088bb1d06..7d8893be8 100644
--- a/src/plugin-handlers/agent-config-handler.ts
+++ b/src/plugin-handlers/agent-config-handler.ts
@@ -135,7 +135,14 @@ export async function applyAgentConfig(params: {
     useTaskSystem,
     disableOmoEnv,
   );
+  const disabledAgentNames = new Set(
+    (migratedDisabledAgents ?? []).map(a => a.toLowerCase())
+  );
 
+  const filterDisabledAgents = (agents: Record<string, unknown>) =>
+    Object.fromEntries(
+      Object.entries(agents).filter(([name]) => !disabledAgentNames.has(name.toLowerCase()))
+    );
   const isSisyphusEnabled = params.pluginConfig.sisyphus_agent?.disabled !== true;
   const builderEnabled =
     params.pluginConfig.sisyphus_agent?.default_builder_enabled ?? false;
@@ -223,9 +230,9 @@ export async function applyAgentConfig(params: {
       ...Object.fromEntries(
         Object.entries(builtinAgents).filter(([key]) => key !== "sisyphus"),
       ),
-      ...userAgents,
-      ...projectAgents,
-      ...pluginAgents,
+      ...filterDisabledAgents(userAgents),
+      ...filterDisabledAgents(projectAgents),
+      ...filterDisabledAgents(pluginAgents),
       ...filteredConfigAgents,
       build: { ...migratedBuild, mode: "subagent", hidden: true },
       ...(planDemoteConfig ? { plan: planDemoteConfig } : {}),
@@ -233,9 +240,9 @@ export async function applyAgentConfig(params: {
   } else {
     params.config.agent = {
       ...builtinAgents,
-      ...userAgents,
-      ...projectAgents,
-      ...pluginAgents,
+      ...filterDisabledAgents(userAgents),
+      ...filterDisabledAgents(projectAgents),
+      ...filterDisabledAgents(pluginAgents),
       ...configAgent,
     };
   }
diff --git a/src/plugin-handlers/agent-key-remapper.test.ts b/src/plugin-handlers/agent-key-remapper.test.ts
index fe78ea739..179f54d10 100644
--- a/src/plugin-handlers/agent-key-remapper.test.ts
+++ b/src/plugin-handlers/agent-key-remapper.test.ts
@@ -2,7 +2,7 @@ import { describe, it, expect } from "bun:test"
 import { remapAgentKeysToDisplayNames } from "./agent-key-remapper"
 
 describe("remapAgentKeysToDisplayNames", () => {
-  it("remaps known agent keys to display names", () => {
+  it("remaps known agent keys to display names while preserving original keys", () => {
     // given agents with lowercase keys
     const agents = {
       sisyphus: { prompt: "test", mode: "primary" },
@@ -12,10 +12,11 @@ describe("remapAgentKeysToDisplayNames", () => {
     // when remapping
     const result = remapAgentKeysToDisplayNames(agents)
 
-    // then known agents get display name keys
+    // then known agents get display name keys and original keys remain accessible
     expect(result["Sisyphus (Ultraworker)"]).toBeDefined()
     expect(result["oracle"]).toBeDefined()
-    expect(result["sisyphus"]).toBeUndefined()
+    expect(result["sisyphus"]).toBeDefined()
+    expect(result["Sisyphus (Ultraworker)"]).toBe(result["sisyphus"])
   })
 
   it("preserves unknown agent keys unchanged", () => {
@@ -31,7 +32,7 @@ describe("remapAgentKeysToDisplayNames", () => {
     expect(result["custom-agent"]).toBeDefined()
   })
 
-  it("remaps all core agents", () => {
+  it("remaps all core agents while preserving original keys", () => {
     // given all core agents
     const agents = {
       sisyphus: {},
@@ -46,15 +47,20 @@ describe("remapAgentKeysToDisplayNames", () => {
     // when remapping
     const result = remapAgentKeysToDisplayNames(agents)
 
-    // then all get display name keys
-    expect(Object.keys(result)).toEqual([
-      "Sisyphus (Ultraworker)",
-      "Hephaestus (Deep Agent)",
-      "Prometheus (Plan Builder)",
-      "Atlas (Plan Executor)",
-      "Metis (Plan Consultant)",
-      "Momus (Plan Critic)",
-      "Sisyphus-Junior",
-    ])
+    // then all get display name keys while original keys still work
+    expect(result["Sisyphus (Ultraworker)"]).toBeDefined()
+    expect(result["sisyphus"]).toBeDefined()
+    expect(result["Hephaestus (Deep Agent)"]).toBeDefined()
+    expect(result["hephaestus"]).toBeDefined()
+    expect(result["Prometheus (Plan Builder)"]).toBeDefined()
+    expect(result["prometheus"]).toBeDefined()
+    expect(result["Atlas (Plan Executor)"]).toBeDefined()
+    expect(result["atlas"]).toBeDefined()
+    expect(result["Metis (Plan Consultant)"]).toBeDefined()
+    expect(result["metis"]).toBeDefined()
+    expect(result["Momus (Plan Critic)"]).toBeDefined()
+    expect(result["momus"]).toBeDefined()
+    expect(result["Sisyphus-Junior"]).toBeDefined()
+    expect(result["sisyphus-junior"]).toBeDefined()
   })
 })
diff --git a/src/plugin-handlers/agent-key-remapper.ts b/src/plugin-handlers/agent-key-remapper.ts
index dd2a127e0..c60bcfcb9 100644
--- a/src/plugin-handlers/agent-key-remapper.ts
+++ b/src/plugin-handlers/agent-key-remapper.ts
@@ -9,6 +9,7 @@ export function remapAgentKeysToDisplayNames(
     const displayName = AGENT_DISPLAY_NAMES[key]
     if (displayName && displayName !== key) {
       result[displayName] = value
+      result[key] = value
     } else {
       result[key] = value
     }
diff --git a/src/plugin-handlers/config-handler-formatter.test.ts b/src/plugin-handlers/config-handler-formatter.test.ts
new file mode 100644
index 000000000..d8fb8494f
--- /dev/null
+++ b/src/plugin-handlers/config-handler-formatter.test.ts
@@ -0,0 +1,120 @@
+import { afterEach, beforeEach, describe, expect, spyOn, test } from "bun:test"
+
+import type { OhMyOpenCodeConfig } from "../config"
+import { createConfigHandler } from "./config-handler"
+import * as agentConfigHandler from "./agent-config-handler"
+import * as commandConfigHandler from "./command-config-handler"
+import * as mcpConfigHandler from "./mcp-config-handler"
+import * as pluginComponentsLoader from "./plugin-components-loader"
+import * as providerConfigHandler from "./provider-config-handler"
+import * as shared from "../shared"
+import * as toolConfigHandler from "./tool-config-handler"
+
+let logSpy: ReturnType<typeof spyOn>
+let loadPluginComponentsSpy: ReturnType<typeof spyOn>
+let applyAgentConfigSpy: ReturnType<typeof spyOn>
+let applyToolConfigSpy: ReturnType<typeof spyOn>
+let applyMcpConfigSpy: ReturnType<typeof spyOn>
+let applyCommandConfigSpy: ReturnType<typeof spyOn>
+let applyProviderConfigSpy: ReturnType<typeof spyOn>
+
+beforeEach(() => {
+  logSpy = spyOn(shared, "log").mockImplementation(() => {})
+  loadPluginComponentsSpy = spyOn(
+    pluginComponentsLoader,
+    "loadPluginComponents",
+  ).mockResolvedValue({
+    commands: {},
+    skills: {},
+    agents: {},
+    mcpServers: {},
+    hooksConfigs: [],
+    plugins: [],
+    errors: [],
+  })
+  applyAgentConfigSpy = spyOn(agentConfigHandler, "applyAgentConfig").mockResolvedValue(
+    {},
+  )
+  applyToolConfigSpy = spyOn(toolConfigHandler, "applyToolConfig").mockImplementation(
+    () => {},
+  )
+  applyMcpConfigSpy = spyOn(mcpConfigHandler, "applyMcpConfig").mockResolvedValue()
+  applyCommandConfigSpy = spyOn(
+    commandConfigHandler,
+    "applyCommandConfig",
+  ).mockResolvedValue()
+  applyProviderConfigSpy = spyOn(
+    providerConfigHandler,
+    "applyProviderConfig",
+  ).mockImplementation(() => {})
+})
+
+afterEach(() => {
+  logSpy.mockRestore()
+  loadPluginComponentsSpy.mockRestore()
+  applyAgentConfigSpy.mockRestore()
+  applyToolConfigSpy.mockRestore()
+  applyMcpConfigSpy.mockRestore()
+  applyCommandConfigSpy.mockRestore()
+  applyProviderConfigSpy.mockRestore()
+})
+
+describe("createConfigHandler formatter pass-through", () => {
+  test("preserves formatter object configured in opencode config", async () => {
+    // given
+    const pluginConfig: OhMyOpenCodeConfig = {}
+    const formatterConfig = {
+      prettier: {
+        command: ["prettier", "--write"],
+        extensions: [".ts", ".tsx"],
+        environment: {
+          PRETTIERD_DEFAULT_CONFIG: ".prettierrc",
+        },
+      },
+      eslint: {
+        disabled: false,
+        command: ["eslint", "--fix"],
+        extensions: [".js", ".ts"],
+      },
+    }
+    const config: Record<string, unknown> = {
+      formatter: formatterConfig,
+    }
+    const handler = createConfigHandler({
+      ctx: { directory: "/tmp" },
+      pluginConfig,
+      modelCacheState: {
+        anthropicContext1MEnabled: false,
+        modelContextLimitsCache: new Map(),
+      },
+    })
+
+    // when
+    await handler(config)
+
+    // then
+    expect(config.formatter).toEqual(formatterConfig)
+  })
+
+  test("preserves formatter=false configured in opencode config", async () => {
+    // given
+    const pluginConfig: OhMyOpenCodeConfig = {}
+    const config: Record<string, unknown> = {
+      formatter: false,
+    }
+    const handler = createConfigHandler({
+      ctx: { directory: "/tmp" },
+      pluginConfig,
+      modelCacheState: {
+        anthropicContext1MEnabled: false,
+        modelContextLimitsCache: new Map(),
+      },
+    })
+
+    // when
+    await handler(config)
+
+    // then
+    expect(config.formatter).toBe(false)
+  })
+})
diff --git a/src/plugin-handlers/config-handler.test.ts b/src/plugin-handlers/config-handler.test.ts
index 264460f1d..6896898c2 100644
--- a/src/plugin-handlers/config-handler.test.ts
+++ b/src/plugin-handlers/config-handler.test.ts
@@ -823,7 +823,7 @@ describe("Prometheus category config resolution", () => {
 
     // then
     expect(config).toBeDefined()
-    expect(config?.model).toBe("google/gemini-3-pro")
+    expect(config?.model).toBe("google/gemini-3.1-pro")
   })
 
   test("user categories override default categories", () => {
diff --git a/src/plugin-handlers/config-handler.ts b/src/plugin-handlers/config-handler.ts
index e9b814a4f..47050300f 100644
--- a/src/plugin-handlers/config-handler.ts
+++ b/src/plugin-handlers/config-handler.ts
@@ -20,6 +20,8 @@ export function createConfigHandler(deps: ConfigHandlerDeps) {
   const { ctx, pluginConfig, modelCacheState } = deps;
 
   return async (config: Record<string, unknown>) => {
+    const formatterConfig = config.formatter;
+
     applyProviderConfig({ config, modelCacheState });
 
     const pluginComponents = await loadPluginComponents({ pluginConfig });
@@ -35,6 +37,8 @@ export function createConfigHandler(deps: ConfigHandlerDeps) {
     await applyMcpConfig({ config, pluginConfig, pluginComponents });
     await applyCommandConfig({ config, pluginConfig, ctx, pluginComponents });
 
+    config.formatter = formatterConfig;
+
     log("[config-handler] config handler applied", {
       agentCount: Object.keys(agentResult).length,
       commandCount: Object.keys((config.command as Record<string, unknown>) ?? {})
diff --git a/src/plugin/chat-message.test.ts b/src/plugin/chat-message.test.ts
index 8cebd6b43..a10968303 100644
--- a/src/plugin/chat-message.test.ts
+++ b/src/plugin/chat-message.test.ts
@@ -19,6 +19,7 @@ function createMockHandlerArgs(overrides?: {
     },
     hooks: {
       stopContinuationGuard: null,
+      backgroundNotificationHook: null,
       keywordDetector: null,
       claudeCodeHooks: null,
       autoSlashCommand: null,
@@ -115,4 +116,30 @@ describe("createChatMessageHandler - TUI variant passthrough", () => {
     //#then - gate should still be marked as applied
     expect(args._appliedSessions).toContain("test-session")
   })
+
+  test("injects queued background notifications through chat.message hook", async () => {
+    //#given
+    const args = createMockHandlerArgs()
+    args.hooks.backgroundNotificationHook = {
+      "chat.message": async (
+        _input: { sessionID: string },
+        output: ChatMessageHandlerOutput,
+      ): Promise<void> => {
+        output.parts.push({
+          type: "text",
+          text: "<system-reminder>[BACKGROUND TASK COMPLETED]</system-reminder>",
+        })
+      },
+    }
+    const handler = createChatMessageHandler(args)
+    const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
+    const output = createMockOutput()
+
+    //#when
+    await handler(input, output)
+
+    //#then
+    expect(output.parts).toHaveLength(1)
+    expect(output.parts[0].text).toContain("[BACKGROUND TASK COMPLETED]")
+  })
 })
diff --git a/src/plugin/chat-message.ts b/src/plugin/chat-message.ts
index f3c02297f..2cc55c892 100644
--- a/src/plugin/chat-message.ts
+++ b/src/plugin/chat-message.ts
@@ -97,8 +97,10 @@ export function createChatMessageHandler(args: {
       setSessionModel(input.sessionID, input.model)
     }
     await hooks.stopContinuationGuard?.["chat.message"]?.(input)
+    await hooks.backgroundNotificationHook?.["chat.message"]?.(input, output)
     await hooks.runtimeFallback?.["chat.message"]?.(input, output)
     await hooks.keywordDetector?.["chat.message"]?.(input, output)
+    await hooks.thinkMode?.["chat.message"]?.(input, output)
     await hooks.claudeCodeHooks?.["chat.message"]?.(input, output)
     await hooks.autoSlashCommand?.["chat.message"]?.(input, output)
     await hooks.noSisyphusGpt?.["chat.message"]?.(input, output)
diff --git a/src/plugin/hooks/create-continuation-hooks.ts b/src/plugin/hooks/create-continuation-hooks.ts
index 96bf5de0c..da453f58d 100644
--- a/src/plugin/hooks/create-continuation-hooks.ts
+++ b/src/plugin/hooks/create-continuation-hooks.ts
@@ -49,7 +49,10 @@ export function createContinuationHooks(args: {
     safeCreateHook(hookName, factory, { enabled: safeHookEnabled })
 
   const stopContinuationGuard = isHookEnabled("stop-continuation-guard")
-    ? safeHook("stop-continuation-guard", () => createStopContinuationGuardHook(ctx))
+    ? safeHook("stop-continuation-guard", () =>
+        createStopContinuationGuardHook(ctx, {
+          backgroundManager,
+        }))
     : null
 
   const compactionContextInjector = isHookEnabled("compaction-context-injector")
diff --git a/src/plugin/hooks/create-session-hooks.ts b/src/plugin/hooks/create-session-hooks.ts
index daa4e12e0..daa5e4ff5 100644
--- a/src/plugin/hooks/create-session-hooks.ts
+++ b/src/plugin/hooks/create-session-hooks.ts
@@ -232,7 +232,10 @@ export function createSessionHooks(args: {
     : null
 
   const noHephaestusNonGpt = isHookEnabled("no-hephaestus-non-gpt")
-    ? safeHook("no-hephaestus-non-gpt", () => createNoHephaestusNonGptHook(ctx))
+    ? safeHook("no-hephaestus-non-gpt", () =>
+      createNoHephaestusNonGptHook(ctx, {
+        allowNonGptModel: pluginConfig.agents?.hephaestus?.allow_non_gpt_model,
+      }))
     : null
 
   const questionLabelTruncator = isHookEnabled("question-label-truncator")
diff --git a/src/plugin/ultrawork-db-model-override.ts b/src/plugin/ultrawork-db-model-override.ts
index 17d84a928..9009fb066 100644
--- a/src/plugin/ultrawork-db-model-override.ts
+++ b/src/plugin/ultrawork-db-model-override.ts
@@ -21,11 +21,10 @@ function tryUpdateMessageModel(
   )
   const result = stmt.run(targetModel.providerID, targetModel.modelID, messageId)
   if (result.changes === 0) return false
-
   if (variant) {
     db.prepare(
-      `UPDATE message SET data = json_set(data, '$.variant', ?, '$.thinking', ?) WHERE id = ?`,
-    ).run(variant, variant, messageId)
+      `UPDATE message SET data = json_set(data, '$.variant', ?) WHERE id = ?`,
+    ).run(variant, messageId)
   }
   return true
 }
diff --git a/src/plugin/ultrawork-model-override.test.ts b/src/plugin/ultrawork-model-override.test.ts
index 4f167e963..26dae2415 100644
--- a/src/plugin/ultrawork-model-override.test.ts
+++ b/src/plugin/ultrawork-model-override.test.ts
@@ -279,6 +279,30 @@ describe("applyUltraworkModelOverrideOnMessage", () => {
     )
   })
 
+  test("should override keyword-detector variant with configured ultrawork variant on deferred path", () => {
+    //#given
+    const config = createConfig("sisyphus", {
+      model: "anthropic/claude-opus-4-6",
+      variant: "extended",
+    })
+    const output = createOutput("ultrawork do something", { messageId: "msg_123" })
+    output.message["variant"] = "max"
+    output.message["thinking"] = "max"
+    const tui = createMockTui()
+
+    //#when
+    applyUltraworkModelOverrideOnMessage(config, "sisyphus", output, tui)
+
+    //#then
+    expect(dbOverrideSpy).toHaveBeenCalledWith(
+      "msg_123",
+      { providerID: "anthropic", modelID: "claude-opus-4-6" },
+      "extended",
+    )
+    expect(output.message["variant"]).toBe("extended")
+    expect(output.message["thinking"]).toBe("extended")
+  })
+
   test("should NOT mutate output.message.model when message ID present", () => {
     //#given
     const sonnetModel = { providerID: "anthropic", modelID: "claude-sonnet-4-6" }
@@ -308,7 +332,6 @@ describe("applyUltraworkModelOverrideOnMessage", () => {
     //#then
     expect(output.message.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" })
     expect(output.message["variant"]).toBe("max")
-    expect(output.message["thinking"]).toBe("max")
     expect(dbOverrideSpy).not.toHaveBeenCalled()
   })
 
@@ -324,7 +347,6 @@ describe("applyUltraworkModelOverrideOnMessage", () => {
     //#then
     expect(output.message.model).toBeUndefined()
     expect(output.message["variant"]).toBe("high")
-    expect(output.message["thinking"]).toBe("high")
     expect(dbOverrideSpy).not.toHaveBeenCalled()
   })
 
diff --git a/src/plugin/ultrawork-model-override.ts b/src/plugin/ultrawork-model-override.ts
index f6aa87bd2..736926bf6 100644
--- a/src/plugin/ultrawork-model-override.ts
+++ b/src/plugin/ultrawork-model-override.ts
@@ -114,11 +114,12 @@ export function applyUltraworkModelOverrideOnMessage(
   const override = resolveUltraworkOverride(pluginConfig, inputAgentName, output, sessionID)
   if (!override) return
 
+  if (override.variant) {
+    output.message["variant"] = override.variant
+    output.message["thinking"] = override.variant
+  }
+
   if (!override.providerID || !override.modelID) {
-    if (override.variant) {
-      output.message["variant"] = override.variant
-      output.message["thinking"] = override.variant
-    }
     return
   }
 
@@ -132,11 +133,8 @@ export function applyUltraworkModelOverrideOnMessage(
   if (!messageId) {
     log("[ultrawork-model-override] No message ID found, falling back to direct mutation")
     output.message.model = targetModel
-    if (override.variant) {
-      output.message["variant"] = override.variant
-      output.message["thinking"] = override.variant
-    }
     return
+
   }
 
   const fromModel = (output.message.model as { modelID?: string } | undefined)?.modelID ?? "unknown"
diff --git a/src/shared/migration.test.ts b/src/shared/migration.test.ts
index 7846cc725..eb3d1d101 100644
--- a/src/shared/migration.test.ts
+++ b/src/shared/migration.test.ts
@@ -774,7 +774,7 @@ describe("migrateAgentConfigToCategory", () => {
   test("migrates model to category when mapping exists", () => {
     // given: Config with a model that has a category mapping
     const config = {
-      model: "google/gemini-3-pro",
+      model: "google/gemini-3.1-pro",
       temperature: 0.5,
       top_p: 0.9,
     }
@@ -823,7 +823,7 @@ describe("migrateAgentConfigToCategory", () => {
   test("handles all mapped models correctly", () => {
     // given: Configs for each mapped model
     const configs = [
-      { model: "google/gemini-3-pro" },
+      { model: "google/gemini-3.1-pro" },
       { model: "google/gemini-3-flash" },
       { model: "openai/gpt-5.2" },
       { model: "anthropic/claude-haiku-4-5" },
@@ -893,7 +893,7 @@ describe("shouldDeleteAgentConfig", () => {
     // given: Config with fields matching category defaults
     const config = {
       category: "visual-engineering",
-      model: "google/gemini-3-pro",
+      model: "google/gemini-3.1-pro",
     }
 
     // when: Check if config should be deleted
@@ -1021,7 +1021,7 @@ describe("migrateConfigFile with backup", () => {
       agents: {
         "multimodal-looker": { model: "anthropic/claude-haiku-4-5" },
         oracle: { model: "openai/gpt-5.2" },
-        "my-custom-agent": { model: "google/gemini-3-pro" },
+        "my-custom-agent": { model: "google/gemini-3.1-pro" },
       },
     }
 
@@ -1037,7 +1037,7 @@ describe("migrateConfigFile with backup", () => {
     const agents = rawConfig.agents as Record<string, Record<string, unknown>>
     expect(agents["multimodal-looker"].model).toBe("anthropic/claude-haiku-4-5")
     expect(agents.oracle.model).toBe("openai/gpt-5.2")
-    expect(agents["my-custom-agent"].model).toBe("google/gemini-3-pro")
+    expect(agents["my-custom-agent"].model).toBe("google/gemini-3.1-pro")
   })
 
   test("preserves category setting when explicitly set", () => {
diff --git a/src/shared/migration/agent-category.ts b/src/shared/migration/agent-category.ts
index 51aac23d7..8b7df75a4 100644
--- a/src/shared/migration/agent-category.ts
+++ b/src/shared/migration/agent-category.ts
@@ -12,7 +12,7 @@
  * This map will be removed in a future major version once migration period ends.
  */
 export const MODEL_TO_CATEGORY_MAP: Record<string, string> = {
-  "google/gemini-3-pro": "visual-engineering",
+  "google/gemini-3.1-pro": "visual-engineering",
   "google/gemini-3-flash": "writing",
   "openai/gpt-5.2": "ultrabrain",
   "anthropic/claude-haiku-4-5": "quick",
diff --git a/src/shared/model-availability.test.ts b/src/shared/model-availability.test.ts
index 23a3f00f6..cb469b960 100644
--- a/src/shared/model-availability.test.ts
+++ b/src/shared/model-availability.test.ts
@@ -63,7 +63,7 @@ describe("fetchAvailableModels", () => {
     writeModelsCache({
       openai: { id: "openai", models: { "gpt-5.2": { id: "gpt-5.2" } } },
       anthropic: { id: "anthropic", models: { "claude-opus-4-6": { id: "claude-opus-4-6" } } },
-      google: { id: "google", models: { "gemini-3-pro": { id: "gemini-3-pro" } } },
+      google: { id: "google", models: { "gemini-3.1-pro": { id: "gemini-3.1-pro" } } },
     })
 
     const result = await fetchAvailableModels(undefined, {
@@ -74,7 +74,7 @@ describe("fetchAvailableModels", () => {
     expect(result.size).toBe(3)
     expect(result.has("openai/gpt-5.2")).toBe(true)
     expect(result.has("anthropic/claude-opus-4-6")).toBe(true)
-    expect(result.has("google/gemini-3-pro")).toBe(true)
+    expect(result.has("google/gemini-3.1-pro")).toBe(true)
   })
 
   it("#given connectedProviders unknown #when fetchAvailableModels called without options #then returns empty Set", async () => {
@@ -97,7 +97,7 @@ describe("fetchAvailableModels", () => {
         list: async () => ({
           data: [
             { id: "gpt-5.3-codex", provider: "openai" },
-            { id: "gemini-3-pro", provider: "google" },
+            { id: "gemini-3.1-pro", provider: "google" },
           ],
         }),
       },
@@ -107,7 +107,7 @@ describe("fetchAvailableModels", () => {
 
     expect(result).toBeInstanceOf(Set)
     expect(result.has("openai/gpt-5.3-codex")).toBe(true)
-    expect(result.has("google/gemini-3-pro")).toBe(false)
+    expect(result.has("google/gemini-3.1-pro")).toBe(false)
   })
 
   it("#given cache file not found #when fetchAvailableModels called with connectedProviders #then returns empty Set", async () => {
@@ -126,7 +126,7 @@ describe("fetchAvailableModels", () => {
         list: async () => ({
           data: [
             { id: "gpt-5.3-codex", provider: "openai" },
-            { id: "gemini-3-pro", provider: "google" },
+            { id: "gemini-3.1-pro", provider: "google" },
           ],
         }),
       },
@@ -136,7 +136,7 @@ describe("fetchAvailableModels", () => {
 
     expect(result).toBeInstanceOf(Set)
     expect(result.has("openai/gpt-5.3-codex")).toBe(true)
-    expect(result.has("google/gemini-3-pro")).toBe(true)
+    expect(result.has("google/gemini-3.1-pro")).toBe(true)
   })
 
   it("#given cache read twice #when second call made with same providers #then reads fresh each time", async () => {
@@ -515,7 +515,7 @@ describe("fetchAvailableModels with connected providers filtering", () => {
 		writeModelsCache({
 			openai: { models: { "gpt-5.2": { id: "gpt-5.2" } } },
 			anthropic: { models: { "claude-opus-4-6": { id: "claude-opus-4-6" } } },
-			google: { models: { "gemini-3-pro": { id: "gemini-3-pro" } } },
+			google: { models: { "gemini-3.1-pro": { id: "gemini-3.1-pro" } } },
 		})
 
 		const result = await fetchAvailableModels(undefined, {
@@ -525,7 +525,7 @@ describe("fetchAvailableModels with connected providers filtering", () => {
 		expect(result.size).toBe(1)
 		expect(result.has("anthropic/claude-opus-4-6")).toBe(true)
 		expect(result.has("openai/gpt-5.2")).toBe(false)
-		expect(result.has("google/gemini-3-pro")).toBe(false)
+		expect(result.has("google/gemini-3.1-pro")).toBe(false)
 	})
 
 	// given cache with multiple providers
@@ -535,7 +535,7 @@ describe("fetchAvailableModels with connected providers filtering", () => {
 		writeModelsCache({
 			openai: { models: { "gpt-5.2": { id: "gpt-5.2" } } },
 			anthropic: { models: { "claude-opus-4-6": { id: "claude-opus-4-6" } } },
-			google: { models: { "gemini-3-pro": { id: "gemini-3-pro" } } },
+			google: { models: { "gemini-3.1-pro": { id: "gemini-3.1-pro" } } },
 		})
 
 		const result = await fetchAvailableModels(undefined, {
@@ -544,7 +544,7 @@ describe("fetchAvailableModels with connected providers filtering", () => {
 
 		expect(result.size).toBe(2)
 		expect(result.has("anthropic/claude-opus-4-6")).toBe(true)
-		expect(result.has("google/gemini-3-pro")).toBe(true)
+		expect(result.has("google/gemini-3.1-pro")).toBe(true)
 		expect(result.has("openai/gpt-5.2")).toBe(false)
 	})
 
@@ -759,7 +759,7 @@ describe("fetchAvailableModels with provider-models cache (whitelist-filtered)",
 			models: {
 				opencode: ["big-pickle"],
 				anthropic: ["claude-opus-4-6"],
-				google: ["gemini-3-pro"]
+				google: ["gemini-3.1-pro"]
 			},
 			connected: ["opencode", "anthropic", "google"]
 		})
@@ -771,7 +771,7 @@ describe("fetchAvailableModels with provider-models cache (whitelist-filtered)",
 		expect(result.size).toBe(1)
 		expect(result.has("opencode/big-pickle")).toBe(true)
 		expect(result.has("anthropic/claude-opus-4-6")).toBe(false)
-		expect(result.has("google/gemini-3-pro")).toBe(false)
+		expect(result.has("google/gemini-3.1-pro")).toBe(false)
 	})
 
 	it("should handle object[] format with metadata (Ollama-style)", async () => {
@@ -953,7 +953,7 @@ describe("fallback model availability", () => {
 			{ providers: ["openai"], model: "gpt-5.2" },
 			{ providers: ["anthropic"], model: "claude-opus-4-6" },
 		]
-		const availableModels = new Set(["google/gemini-3-pro"])
+		const availableModels = new Set(["google/gemini-3.1-pro"])
 
 		// when
 		const result = resolveFirstAvailableFallback(fallbackChain, availableModels)
diff --git a/src/shared/model-requirements.test.ts b/src/shared/model-requirements.test.ts
index 2991775eb..df4cd696a 100644
--- a/src/shared/model-requirements.test.ts
+++ b/src/shared/model-requirements.test.ts
@@ -168,14 +168,14 @@ describe("AGENT_MODEL_REQUIREMENTS", () => {
     expect(primary.providers[0]).toBe("opencode")
   })
 
-  test("hephaestus requires openai/opencode provider (not github-copilot since gpt-5.3-codex unavailable there)", () => {
+  test("hephaestus supports openai, github-copilot, venice, and opencode providers", () => {
     // #given - hephaestus agent requirement
     const hephaestus = AGENT_MODEL_REQUIREMENTS["hephaestus"]
 
     // #when - accessing hephaestus requirement
-    // #then - requiresProvider is set to openai and opencode only (github-copilot removed)
+    // #then - requiresProvider includes openai, github-copilot, venice, and opencode
     expect(hephaestus).toBeDefined()
-    expect(hephaestus.requiresProvider).toEqual(["openai", "opencode"])
+    expect(hephaestus.requiresProvider).toEqual(["openai", "github-copilot", "venice", "opencode"])
     expect(hephaestus.requiresModel).toBeUndefined()
   })
 
@@ -248,19 +248,19 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
     expect(primary.providers[0]).toBe("openai")
   })
 
-  test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => {
+  test("visual-engineering has valid fallbackChain with gemini-3.1-pro high as primary", () => {
     // given - visual-engineering category requirement
     const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"]
 
     // when - accessing visual-engineering requirement
-    // then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max)
+    // then - fallbackChain: gemini-3.1-pro(high) → glm-5 → opus-4-6(max)
     expect(visualEngineering).toBeDefined()
     expect(visualEngineering.fallbackChain).toBeArray()
     expect(visualEngineering.fallbackChain).toHaveLength(3)
 
     const primary = visualEngineering.fallbackChain[0]
     expect(primary.providers[0]).toBe("google")
-    expect(primary.model).toBe("gemini-3-pro")
+    expect(primary.model).toBe("gemini-3.1-pro")
     expect(primary.variant).toBe("high")
 
     const second = visualEngineering.fallbackChain[1]
@@ -319,39 +319,43 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
     expect(primary.providers).toEqual(["anthropic", "github-copilot", "opencode"])
   })
 
-  test("artistry has valid fallbackChain with gemini-3-pro as primary", () => {
+  test("artistry has valid fallbackChain with gemini-3.1-pro as primary", () => {
     // given - artistry category requirement
     const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"]
 
     // when - accessing artistry requirement
-    // then - fallbackChain exists with gemini-3-pro as first entry
+    // then - fallbackChain exists with gemini-3.1-pro as first entry
     expect(artistry).toBeDefined()
     expect(artistry.fallbackChain).toBeArray()
     expect(artistry.fallbackChain.length).toBeGreaterThan(0)
 
     const primary = artistry.fallbackChain[0]
-    expect(primary.model).toBe("gemini-3-pro")
+    expect(primary.model).toBe("gemini-3.1-pro")
     expect(primary.variant).toBe("high")
     expect(primary.providers[0]).toBe("google")
   })
 
-  test("writing has valid fallbackChain with gemini-3-flash as primary", () => {
+  test("writing has valid fallbackChain with kimi-k2.5-free as primary", () => {
     // given - writing category requirement
     const writing = CATEGORY_MODEL_REQUIREMENTS["writing"]
 
     // when - accessing writing requirement
-    // then - fallbackChain: gemini-3-flash → claude-sonnet-4-6
+    // then - fallbackChain: kimi-k2.5-free -> gemini-3-flash -> claude-sonnet-4-6
     expect(writing).toBeDefined()
     expect(writing.fallbackChain).toBeArray()
-    expect(writing.fallbackChain).toHaveLength(2)
+    expect(writing.fallbackChain).toHaveLength(3)
 
     const primary = writing.fallbackChain[0]
-    expect(primary.model).toBe("gemini-3-flash")
-    expect(primary.providers[0]).toBe("google")
+    expect(primary.model).toBe("kimi-k2.5-free")
+    expect(primary.providers[0]).toBe("opencode")
 
     const second = writing.fallbackChain[1]
-    expect(second.model).toBe("claude-sonnet-4-6")
-    expect(second.providers[0]).toBe("anthropic")
+    expect(second.model).toBe("gemini-3-flash")
+    expect(second.providers[0]).toBe("google")
+
+    const third = writing.fallbackChain[2]
+    expect(third.model).toBe("claude-sonnet-4-6")
+    expect(third.providers[0]).toBe("anthropic")
   })
 
   test("all 8 categories have valid fallbackChain arrays", () => {
@@ -489,12 +493,12 @@ describe("requiresModel field in categories", () => {
     expect(deep.requiresModel).toBe("gpt-5.3-codex")
   })
 
-  test("artistry category has requiresModel set to gemini-3-pro", () => {
+  test("artistry category has requiresModel set to gemini-3.1-pro", () => {
     // given
     const artistry = CATEGORY_MODEL_REQUIREMENTS["artistry"]
 
     // when / #then
-    expect(artistry.requiresModel).toBe("gemini-3-pro")
+    expect(artistry.requiresModel).toBe("gemini-3.1-pro")
   })
 })
 
diff --git a/src/shared/model-requirements.ts b/src/shared/model-requirements.ts
index 9a795ba76..f8e197ea8 100644
--- a/src/shared/model-requirements.ts
+++ b/src/shared/model-requirements.ts
@@ -24,14 +24,15 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
   },
   hephaestus: {
     fallbackChain: [
-      { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
+      { providers: ["openai", "venice", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
+      { providers: ["github-copilot"], model: "gpt-5.2", variant: "medium" },
     ],
-    requiresProvider: ["openai", "opencode"],
+    requiresProvider: ["openai", "github-copilot", "venice", "opencode"],
   },
   oracle: {
     fallbackChain: [
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
     ],
   },
@@ -64,7 +65,7 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
       { providers: ["opencode"], model: "kimi-k2.5-free" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
     ],
   },
   metis: {
@@ -72,14 +73,14 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["opencode"], model: "kimi-k2.5-free" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
     ],
   },
   momus: {
     fallbackChain: [
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "medium" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
     ],
   },
   atlas: {
@@ -94,7 +95,7 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
 export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
   "visual-engineering": {
     fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["zai-coding-plan", "opencode"], model: "glm-5" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
     ],
@@ -102,7 +103,7 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
   ultrabrain: {
     fallbackChain: [
       { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "xhigh" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
     ],
   },
@@ -110,17 +111,17 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
     fallbackChain: [
       { providers: ["openai", "opencode"], model: "gpt-5.3-codex", variant: "medium" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
     ],
     requiresModel: "gpt-5.3-codex",
   },
   artistry: {
     fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro", variant: "high" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
     ],
-    requiresModel: "gemini-3-pro",
+    requiresModel: "gemini-3.1-pro",
   },
   quick: {
     fallbackChain: [
@@ -140,11 +141,12 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
     fallbackChain: [
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
       { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2", variant: "high" },
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
     ],
   },
   writing: {
     fallbackChain: [
+      { providers: ["opencode"], model: "kimi-k2.5-free" },
       { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
       { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-6" },
     ],
diff --git a/src/shared/model-resolver.test.ts b/src/shared/model-resolver.test.ts
index fc828f340..d18b46f5a 100644
--- a/src/shared/model-resolver.test.ts
+++ b/src/shared/model-resolver.test.ts
@@ -10,7 +10,7 @@ describe("resolveModel", () => {
       const input: ModelResolutionInput = {
         userModel: "anthropic/claude-opus-4-6",
         inheritedModel: "openai/gpt-5.2",
-        systemDefault: "google/gemini-3-pro",
+        systemDefault: "google/gemini-3.1-pro",
       }
 
       // when
@@ -25,7 +25,7 @@ describe("resolveModel", () => {
       const input: ModelResolutionInput = {
         userModel: undefined,
         inheritedModel: "openai/gpt-5.2",
-        systemDefault: "google/gemini-3-pro",
+        systemDefault: "google/gemini-3.1-pro",
       }
 
       // when
@@ -40,14 +40,14 @@ describe("resolveModel", () => {
       const input: ModelResolutionInput = {
         userModel: undefined,
         inheritedModel: undefined,
-        systemDefault: "google/gemini-3-pro",
+        systemDefault: "google/gemini-3.1-pro",
       }
 
       // when
       const result = resolveModel(input)
 
       // then
-      expect(result).toBe("google/gemini-3-pro")
+      expect(result).toBe("google/gemini-3.1-pro")
     })
   })
 
@@ -57,7 +57,7 @@ describe("resolveModel", () => {
       const input: ModelResolutionInput = {
         userModel: "",
         inheritedModel: "openai/gpt-5.2",
-        systemDefault: "google/gemini-3-pro",
+        systemDefault: "google/gemini-3.1-pro",
       }
 
       // when
@@ -72,14 +72,14 @@ describe("resolveModel", () => {
       const input: ModelResolutionInput = {
         userModel: "   ",
         inheritedModel: "",
-        systemDefault: "google/gemini-3-pro",
+        systemDefault: "google/gemini-3.1-pro",
       }
 
       // when
       const result = resolveModel(input)
 
       // then
-      expect(result).toBe("google/gemini-3-pro")
+      expect(result).toBe("google/gemini-3.1-pro")
     })
   })
 
@@ -89,7 +89,7 @@ describe("resolveModel", () => {
       const input: ModelResolutionInput = {
         userModel: "anthropic/claude-opus-4-6",
         inheritedModel: "openai/gpt-5.2",
-        systemDefault: "google/gemini-3-pro",
+        systemDefault: "google/gemini-3.1-pro",
       }
 
       // when
@@ -123,7 +123,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6", "github-copilot/claude-opus-4-6-preview"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -141,7 +141,7 @@ describe("resolveModelWithFallback", () => {
         uiSelectedModel: "opencode/big-pickle",
         userModel: "anthropic/claude-opus-4-6",
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -158,7 +158,7 @@ describe("resolveModelWithFallback", () => {
         uiSelectedModel: "   ",
         userModel: "anthropic/claude-opus-4-6",
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -175,7 +175,7 @@ describe("resolveModelWithFallback", () => {
         uiSelectedModel: "",
         userModel: "anthropic/claude-opus-4-6",
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -195,7 +195,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic", "github-copilot"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6", "github-copilot/claude-opus-4-6-preview"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -215,7 +215,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -234,7 +234,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -252,7 +252,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -271,7 +271,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(["github-copilot/claude-opus-4-6-preview", "opencode/claude-opus-4-7"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -294,8 +294,8 @@ describe("resolveModelWithFallback", () => {
         fallbackChain: [
           { providers: ["openai", "anthropic", "google"], model: "gpt-5.2" },
         ],
-        availableModels: new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6", "google/gemini-3-pro"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        availableModels: new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6", "google/gemini-3.1-pro"]),
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -313,7 +313,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic", "opencode"], model: "gpt-5-nano" },
         ],
         availableModels: new Set(["opencode/gpt-5-nano"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -331,7 +331,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic", "github-copilot"], model: "claude-opus" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6", "github-copilot/claude-opus-4-6-preview"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -346,7 +346,7 @@ describe("resolveModelWithFallback", () => {
       // given
       const input: ExtendedModelResolutionInput = {
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -361,7 +361,7 @@ describe("resolveModelWithFallback", () => {
       const input: ExtendedModelResolutionInput = {
         fallbackChain: [],
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -378,7 +378,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "CLAUDE-OPUS" },
         ],
         availableModels: new Set(["anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -397,7 +397,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "claude-sonnet-4-6" },
         ],
         availableModels: new Set(["opencode/glm-5", "anthropic/claude-sonnet-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -420,7 +420,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["zai-coding-plan"], model: "glm-5" },
         ],
         availableModels: new Set(["zai-coding-plan/glm-5", "opencode/glm-5"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -438,7 +438,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["zai-coding-plan"], model: "glm-5", variant: "high" },
         ],
         availableModels: new Set(["opencode/glm-5"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -457,7 +457,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "claude-sonnet-4-6" },
         ],
         availableModels: new Set(["anthropic/claude-sonnet-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -477,14 +477,14 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "nonexistent-model" },
         ],
         availableModels: new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
       const result = resolveModelWithFallback(input)
 
       // then
-      expect(result!.model).toBe("google/gemini-3-pro")
+      expect(result!.model).toBe("google/gemini-3.1-pro")
       expect(result!.source).toBe("system-default")
       expect(logSpy).toHaveBeenCalledWith("No available model found in fallback chain, falling through to system default")
     })
@@ -516,7 +516,7 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic", "openai"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -533,7 +533,7 @@ describe("resolveModelWithFallback", () => {
       const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["github-copilot"])
       const input: ExtendedModelResolutionInput = {
         fallbackChain: [
-          { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+          { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
         ],
         availableModels: new Set(),
         systemDefaultModel: "anthropic/claude-sonnet-4-6",
@@ -544,7 +544,7 @@ describe("resolveModelWithFallback", () => {
 
       // then - should use github-copilot (second provider) since google not connected
       // model name is transformed to preview variant for github-copilot provider
-      expect(result!.model).toBe("github-copilot/gemini-3-pro-preview")
+      expect(result!.model).toBe("github-copilot/gemini-3.1-pro-preview")
       expect(result!.source).toBe("provider-fallback")
       cacheSpy.mockRestore()
     })
@@ -577,14 +577,14 @@ describe("resolveModelWithFallback", () => {
           { providers: ["anthropic"], model: "claude-opus-4-6" },
         ],
         availableModels: new Set(),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
       const result = resolveModelWithFallback(input)
 
       // then - should fall through to system default
-      expect(result!.model).toBe("google/gemini-3-pro")
+      expect(result!.model).toBe("google/gemini-3.1-pro")
       expect(result!.source).toBe("system-default")
       cacheSpy.mockRestore()
     })
@@ -593,14 +593,14 @@ describe("resolveModelWithFallback", () => {
       // given
       const input: ExtendedModelResolutionInput = {
         availableModels: new Set(["openai/gpt-5.2"]),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
       const result = resolveModelWithFallback(input)
 
       // then
-      expect(result!.model).toBe("google/gemini-3-pro")
+      expect(result!.model).toBe("google/gemini-3.1-pro")
       expect(result!.source).toBe("system-default")
     })
   })
@@ -627,20 +627,20 @@ describe("resolveModelWithFallback", () => {
 
     test("tries all providers in first entry before moving to second entry", () => {
       // given
-      const availableModels = new Set(["google/gemini-3-pro"])
+      const availableModels = new Set(["google/gemini-3.1-pro"])
 
       // when
       const result = resolveModelWithFallback({
         fallbackChain: [
           { providers: ["openai", "anthropic"], model: "gpt-5.2" },
-          { providers: ["google"], model: "gemini-3-pro" },
+          { providers: ["google"], model: "gemini-3.1-pro" },
         ],
         availableModels,
         systemDefaultModel: "system/default",
       })
 
       // then
-      expect(result!.model).toBe("google/gemini-3-pro")
+      expect(result!.model).toBe("google/gemini-3.1-pro")
       expect(result!.source).toBe("provider-fallback")
     })
 
@@ -675,7 +675,7 @@ describe("resolveModelWithFallback", () => {
         fallbackChain: [
           { providers: ["openai"], model: "gpt-5.2" },
           { providers: ["anthropic"], model: "claude-opus-4-6" },
-          { providers: ["google"], model: "gemini-3-pro" },
+          { providers: ["google"], model: "gemini-3.1-pro" },
         ],
         availableModels,
         systemDefaultModel: "system/default",
@@ -693,7 +693,7 @@ describe("resolveModelWithFallback", () => {
       const input: ExtendedModelResolutionInput = {
         userModel: "anthropic/claude-opus-4-6",
         availableModels: new Set(),
-        systemDefaultModel: "google/gemini-3-pro",
+        systemDefaultModel: "google/gemini-3.1-pro",
       }
 
       // when
@@ -708,32 +708,32 @@ describe("resolveModelWithFallback", () => {
 
   describe("categoryDefaultModel (fuzzy matching for category defaults)", () => {
     test("applies fuzzy matching to categoryDefaultModel when userModel not provided", () => {
-      // given - gemini-3-pro is the category default, but only gemini-3-pro-preview is available
+      // given - gemini-3.1-pro is the category default, but only gemini-3.1-pro-preview is available
       const input: ExtendedModelResolutionInput = {
-        categoryDefaultModel: "google/gemini-3-pro",
+        categoryDefaultModel: "google/gemini-3.1-pro",
         fallbackChain: [
-          { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+          { providers: ["google", "github-copilot", "opencode"], model: "gemini-3.1-pro" },
         ],
-        availableModels: new Set(["google/gemini-3-pro-preview", "anthropic/claude-opus-4-6"]),
+        availableModels: new Set(["google/gemini-3.1-pro-preview", "anthropic/claude-opus-4-6"]),
         systemDefaultModel: "anthropic/claude-sonnet-4-6",
       }
 
       // when
       const result = resolveModelWithFallback(input)
 
-      // then - should fuzzy match gemini-3-pro → gemini-3-pro-preview
-      expect(result!.model).toBe("google/gemini-3-pro-preview")
+      // then - should fuzzy match gemini-3.1-pro → gemini-3.1-pro-preview
+      expect(result!.model).toBe("google/gemini-3.1-pro-preview")
       expect(result!.source).toBe("category-default")
     })
 
     test("categoryDefaultModel uses exact match when available", () => {
       // given - exact match exists
       const input: ExtendedModelResolutionInput = {
-        categoryDefaultModel: "google/gemini-3-pro",
+        categoryDefaultModel: "google/gemini-3.1-pro",
         fallbackChain: [
-          { providers: ["google"], model: "gemini-3-pro" },
+          { providers: ["google"], model: "gemini-3.1-pro" },
         ],
-        availableModels: new Set(["google/gemini-3-pro", "google/gemini-3-pro-preview"]),
+        availableModels: new Set(["google/gemini-3.1-pro", "google/gemini-3.1-pro-preview"]),
         systemDefaultModel: "anthropic/claude-sonnet-4-6",
       }
 
@@ -741,14 +741,14 @@ describe("resolveModelWithFallback", () => {
       const result = resolveModelWithFallback(input)
 
       // then - should use exact match
-      expect(result!.model).toBe("google/gemini-3-pro")
+      expect(result!.model).toBe("google/gemini-3.1-pro")
       expect(result!.source).toBe("category-default")
     })
 
     test("categoryDefaultModel falls through to fallbackChain when no match in availableModels", () => {
       // given - categoryDefaultModel has no match, but fallbackChain does
       const input: ExtendedModelResolutionInput = {
-        categoryDefaultModel: "google/gemini-3-pro",
+        categoryDefaultModel: "google/gemini-3.1-pro",
         fallbackChain: [
           { providers: ["anthropic"], model: "claude-opus-4-6" },
         ],
@@ -768,11 +768,11 @@ describe("resolveModelWithFallback", () => {
       // given - both userModel and categoryDefaultModel provided
       const input: ExtendedModelResolutionInput = {
         userModel: "anthropic/claude-opus-4-6",
-        categoryDefaultModel: "google/gemini-3-pro",
+        categoryDefaultModel: "google/gemini-3.1-pro",
         fallbackChain: [
-          { providers: ["google"], model: "gemini-3-pro" },
+          { providers: ["google"], model: "gemini-3.1-pro" },
         ],
-        availableModels: new Set(["google/gemini-3-pro-preview", "anthropic/claude-opus-4-6"]),
+        availableModels: new Set(["google/gemini-3.1-pro-preview", "anthropic/claude-opus-4-6"]),
         systemDefaultModel: "system/default",
       }
 
@@ -788,7 +788,7 @@ describe("resolveModelWithFallback", () => {
       // given - no availableModels but connected provider cache exists
       const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"])
       const input: ExtendedModelResolutionInput = {
-        categoryDefaultModel: "google/gemini-3-pro",
+        categoryDefaultModel: "google/gemini-3.1-pro",
         availableModels: new Set(),
         systemDefaultModel: "anthropic/claude-sonnet-4-6",
       }
@@ -797,7 +797,7 @@ describe("resolveModelWithFallback", () => {
       const result = resolveModelWithFallback(input)
 
       // then - should use transformed categoryDefaultModel since google is connected
-      expect(result!.model).toBe("google/gemini-3-pro-preview")
+      expect(result!.model).toBe("google/gemini-3.1-pro-preview")
       expect(result!.source).toBe("category-default")
       cacheSpy.mockRestore()
     })
@@ -824,7 +824,7 @@ describe("resolveModelWithFallback", () => {
       // given - category default already has -preview suffix
       const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"])
       const input: ExtendedModelResolutionInput = {
-        categoryDefaultModel: "google/gemini-3-pro-preview",
+        categoryDefaultModel: "google/gemini-3.1-pro-preview",
         availableModels: new Set(),
         systemDefaultModel: "anthropic/claude-sonnet-4-5",
       }
@@ -832,18 +832,18 @@ describe("resolveModelWithFallback", () => {
       // when
       const result = resolveModelWithFallback(input)
 
-      // then - should NOT become gemini-3-pro-preview-preview
-      expect(result!.model).toBe("google/gemini-3-pro-preview")
+      // then - should NOT become gemini-3.1-pro-preview-preview
+      expect(result!.model).toBe("google/gemini-3.1-pro-preview")
       expect(result!.source).toBe("category-default")
       cacheSpy.mockRestore()
     })
 
-    test("transforms gemini-3-pro in fallback chain for google connected provider", () => {
-      // given - google connected, fallback chain has gemini-3-pro
+    test("transforms gemini-3.1-pro in fallback chain for google connected provider", () => {
+      // given - google connected, fallback chain has gemini-3.1-pro
       const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["google"])
       const input: ExtendedModelResolutionInput = {
         fallbackChain: [
-          { providers: ["google", "github-copilot"], model: "gemini-3-pro" },
+          { providers: ["google", "github-copilot"], model: "gemini-3.1-pro" },
         ],
         availableModels: new Set(),
         systemDefaultModel: "anthropic/claude-sonnet-4-5",
@@ -853,7 +853,7 @@ describe("resolveModelWithFallback", () => {
       const result = resolveModelWithFallback(input)
 
       // then - should transform to preview variant for google provider
-      expect(result!.model).toBe("google/gemini-3-pro-preview")
+      expect(result!.model).toBe("google/gemini-3.1-pro-preview")
       expect(result!.source).toBe("provider-fallback")
       cacheSpy.mockRestore()
     })
diff --git a/src/shared/model-suggestion-retry.test.ts b/src/shared/model-suggestion-retry.test.ts
index 52edba3aa..9732367de 100644
--- a/src/shared/model-suggestion-retry.test.ts
+++ b/src/shared/model-suggestion-retry.test.ts
@@ -399,6 +399,43 @@ describe("promptSyncWithModelSuggestionRetry", () => {
     expect(promptAsyncMock).toHaveBeenCalledTimes(0)
   })
 
+  it("should abort and throw timeout error when sync prompt hangs", async () => {
+    // given a client where sync prompt never resolves unless aborted
+    let receivedSignal: AbortSignal | undefined
+    const promptMock = mock((input: { signal?: AbortSignal }) => {
+      receivedSignal = input.signal
+      return new Promise((_, reject) => {
+        const signal = input.signal
+        if (!signal) {
+          return
+        }
+        signal.addEventListener("abort", () => {
+          reject(signal.reason)
+        })
+      })
+    })
+    const client = {
+      session: {
+        prompt: promptMock,
+        promptAsync: mock(() => Promise.resolve()),
+      },
+    }
+
+    // when calling with short timeout
+    // then should abort the request and throw timeout error
+    await expect(
+      promptSyncWithModelSuggestionRetry(client as any, {
+        path: { id: "session-1" },
+        body: {
+          parts: [{ type: "text", text: "hello" }],
+          model: { providerID: "anthropic", modelID: "claude-sonnet-4" },
+        },
+      }, { timeoutMs: 1 })
+    ).rejects.toThrow("prompt timed out after 1ms")
+
+    expect(receivedSignal?.aborted).toBe(true)
+  })
+
   it("should retry with suggested model on ProviderModelNotFoundError", async () => {
     // given a client that fails first with model-not-found, then succeeds
     const promptMock = mock()
diff --git a/src/shared/model-suggestion-retry.ts b/src/shared/model-suggestion-retry.ts
index 6a34deacb..0ff9ca86e 100644
--- a/src/shared/model-suggestion-retry.ts
+++ b/src/shared/model-suggestion-retry.ts
@@ -1,5 +1,10 @@
 import type { createOpencodeClient } from "@opencode-ai/sdk"
 import { log } from "./logger"
+import {
+  createPromptTimeoutContext,
+  PROMPT_TIMEOUT_MS,
+  type PromptRetryOptions,
+} from "./prompt-timeout-context"
 
 type Client = ReturnType<typeof createOpencodeClient>
 
@@ -77,30 +82,36 @@ interface PromptBody {
 interface PromptArgs {
   path: { id: string }
   body: PromptBody
+  signal?: AbortSignal
   [key: string]: unknown
 }
 
 export async function promptWithModelSuggestionRetry(
   client: Client,
   args: PromptArgs,
+  options: PromptRetryOptions = {},
 ): Promise<void> {
+  const timeoutMs = options.timeoutMs ?? PROMPT_TIMEOUT_MS
+  const timeoutContext = createPromptTimeoutContext(args, timeoutMs)
   // NOTE: Model suggestion retry removed — promptAsync returns 204 immediately,
   // model errors happen asynchronously server-side and cannot be caught here
-  const promptPromise = client.session.promptAsync(
-    args as Parameters<typeof client.session.promptAsync>[0],
-  )
-
-  let timeoutID: ReturnType<typeof setTimeout> | null = null
-  const timeoutPromise = new Promise<never>((_, reject) => {
-    timeoutID = setTimeout(() => {
-      reject(new Error("promptAsync timed out after 120000ms"))
-    }, 120000)
-  })
+  const promptPromise = client.session.promptAsync({
+    ...args,
+    signal: timeoutContext.signal,
+  } as Parameters<typeof client.session.promptAsync>[0])
 
   try {
-    await Promise.race([promptPromise, timeoutPromise])
+    await promptPromise
+    if (timeoutContext.wasTimedOut()) {
+      throw new Error(`promptAsync timed out after ${timeoutMs}ms`)
+    }
+  } catch (error) {
+    if (timeoutContext.wasTimedOut()) {
+      throw new Error(`promptAsync timed out after ${timeoutMs}ms`)
+    }
+    throw error
   } finally {
-    if (timeoutID !== null) clearTimeout(timeoutID)
+    timeoutContext.cleanup()
   }
 }
 
@@ -116,9 +127,28 @@ export async function promptWithModelSuggestionRetry(
 export async function promptSyncWithModelSuggestionRetry(
   client: Client,
   args: PromptArgs,
+  options: PromptRetryOptions = {},
 ): Promise<void> {
+  const timeoutMs = options.timeoutMs ?? PROMPT_TIMEOUT_MS
+
   try {
-    await client.session.prompt(args as Parameters<typeof client.session.prompt>[0])
+    const timeoutContext = createPromptTimeoutContext(args, timeoutMs)
+    try {
+      await client.session.prompt({
+        ...args,
+        signal: timeoutContext.signal,
+      } as Parameters<typeof client.session.prompt>[0])
+      if (timeoutContext.wasTimedOut()) {
+        throw new Error(`prompt timed out after ${timeoutMs}ms`)
+      }
+    } catch (error) {
+      if (timeoutContext.wasTimedOut()) {
+        throw new Error(`prompt timed out after ${timeoutMs}ms`)
+      }
+      throw error
+    } finally {
+      timeoutContext.cleanup()
+    }
   } catch (error) {
     const suggestion = parseModelSuggestion(error)
     if (!suggestion || !args.body.model) {
@@ -130,7 +160,7 @@ export async function promptSyncWithModelSuggestionRetry(
       suggested: suggestion.suggestion,
     })
 
-    await client.session.prompt({
+    const retryArgs: PromptArgs = {
       ...args,
       body: {
         ...args.body,
@@ -139,6 +169,24 @@ export async function promptSyncWithModelSuggestionRetry(
           modelID: suggestion.suggestion,
         },
       },
-    } as Parameters<typeof client.session.prompt>[0])
+    }
+
+    const timeoutContext = createPromptTimeoutContext(retryArgs, timeoutMs)
+    try {
+      await client.session.prompt({
+        ...retryArgs,
+        signal: timeoutContext.signal,
+      } as Parameters<typeof client.session.prompt>[0])
+      if (timeoutContext.wasTimedOut()) {
+        throw new Error(`prompt timed out after ${timeoutMs}ms`)
+      }
+    } catch (retryError) {
+      if (timeoutContext.wasTimedOut()) {
+        throw new Error(`prompt timed out after ${timeoutMs}ms`)
+      }
+      throw retryError
+    } finally {
+      timeoutContext.cleanup()
+    }
   }
 }
diff --git a/src/shared/prompt-timeout-context.ts b/src/shared/prompt-timeout-context.ts
new file mode 100644
index 000000000..99f081278
--- /dev/null
+++ b/src/shared/prompt-timeout-context.ts
@@ -0,0 +1,49 @@
+export interface PromptTimeoutArgs {
+  signal?: AbortSignal
+}
+
+export interface PromptRetryOptions {
+  timeoutMs?: number
+}
+
+export const PROMPT_TIMEOUT_MS = 120000
+
+export function createPromptTimeoutContext(args: PromptTimeoutArgs, timeoutMs: number): {
+  signal: AbortSignal
+  wasTimedOut: () => boolean
+  cleanup: () => void
+} {
+  const timeoutController = new AbortController()
+  let timeoutID: ReturnType<typeof setTimeout> | null = null
+  let timedOut = false
+
+  const abortOnUpstreamSignal = (): void => {
+    timeoutController.abort(args.signal?.reason)
+  }
+
+  if (args.signal) {
+    if (args.signal.aborted) {
+      timeoutController.abort(args.signal.reason)
+    } else {
+      args.signal.addEventListener("abort", abortOnUpstreamSignal, { once: true })
+    }
+  }
+
+  timeoutID = setTimeout(() => {
+    timedOut = true
+    timeoutController.abort(new Error(`prompt timed out after ${timeoutMs}ms`))
+  }, timeoutMs)
+
+  return {
+    signal: timeoutController.signal,
+    wasTimedOut: () => timedOut,
+    cleanup: () => {
+      if (timeoutID !== null) {
+        clearTimeout(timeoutID)
+      }
+      if (args.signal) {
+        args.signal.removeEventListener("abort", abortOnUpstreamSignal)
+      }
+    },
+  }
+}
diff --git a/src/shared/provider-model-id-transform.ts b/src/shared/provider-model-id-transform.ts
index 5b8c810bb..0cf8eb801 100644
--- a/src/shared/provider-model-id-transform.ts
+++ b/src/shared/provider-model-id-transform.ts
@@ -6,12 +6,12 @@ export function transformModelForProvider(provider: string, model: string): stri
 			.replace("claude-sonnet-4-5", "claude-sonnet-4.5")
 			.replace("claude-haiku-4-5", "claude-haiku-4.5")
 			.replace("claude-sonnet-4", "claude-sonnet-4")
-			.replace(/gemini-3-pro(?!-)/g, "gemini-3-pro-preview")
+			.replace(/gemini-3\.1-pro(?!-)/g, "gemini-3.1-pro-preview")
 			.replace(/gemini-3-flash(?!-)/g, "gemini-3-flash-preview")
 	}
 	if (provider === "google") {
 		return model
-			.replace(/gemini-3-pro(?!-)/g, "gemini-3-pro-preview")
+			.replace(/gemini-3\.1-pro(?!-)/g, "gemini-3.1-pro-preview")
 			.replace(/gemini-3-flash(?!-)/g, "gemini-3-flash-preview")
 	}
 	return model
diff --git a/src/shared/spawn-with-windows-hide.ts b/src/shared/spawn-with-windows-hide.ts
new file mode 100644
index 000000000..7da9ed086
--- /dev/null
+++ b/src/shared/spawn-with-windows-hide.ts
@@ -0,0 +1,84 @@
+import { spawn as bunSpawn } from "bun"
+import { spawn as nodeSpawn, type ChildProcess } from "node:child_process"
+import { Readable } from "node:stream"
+
+export interface SpawnOptions {
+  cwd?: string
+  env?: Record<string, string | undefined>
+  stdin?: "pipe" | "inherit" | "ignore"
+  stdout?: "pipe" | "inherit" | "ignore"
+  stderr?: "pipe" | "inherit" | "ignore"
+}
+
+export interface SpawnedProcess {
+  readonly exitCode: number | null
+  readonly exited: Promise<number>
+  readonly stdout: ReadableStream<Uint8Array> | undefined
+  readonly stderr: ReadableStream<Uint8Array> | undefined
+  kill(signal?: NodeJS.Signals): void
+}
+
+function toReadableStream(stream: NodeJS.ReadableStream | null): ReadableStream<Uint8Array> | undefined {
+  if (!stream) {
+    return undefined
+  }
+
+  return Readable.toWeb(stream as Readable) as ReadableStream<Uint8Array>
+}
+
+function wrapNodeProcess(proc: ChildProcess): SpawnedProcess {
+  let resolveExited: (exitCode: number) => void
+  let exitCode: number | null = null
+
+  const exited = new Promise<number>((resolve) => {
+    resolveExited = resolve
+  })
+
+  proc.on("exit", (code) => {
+    exitCode = code ?? 1
+    resolveExited(exitCode)
+  })
+
+  proc.on("error", () => {
+    if (exitCode === null) {
+      exitCode = 1
+      resolveExited(1)
+    }
+  })
+
+  return {
+    get exitCode() {
+      return exitCode
+    },
+    exited,
+    stdout: toReadableStream(proc.stdout),
+    stderr: toReadableStream(proc.stderr),
+    kill(signal?: NodeJS.Signals): void {
+      try {
+        if (!signal) {
+          proc.kill()
+          return
+        }
+
+        proc.kill(signal)
+      } catch {}
+    },
+  }
+}
+
+export function spawnWithWindowsHide(command: string[], options: SpawnOptions): SpawnedProcess {
+  if (process.platform !== "win32") {
+    return bunSpawn(command, options)
+  }
+
+  const [cmd, ...args] = command
+  const proc = nodeSpawn(cmd, args, {
+    cwd: options.cwd,
+    env: options.env,
+    stdio: [options.stdin ?? "pipe", options.stdout ?? "pipe", options.stderr ?? "pipe"],
+    windowsHide: true,
+    shell: true,
+  })
+
+  return wrapNodeProcess(proc)
+}
diff --git a/src/tools/background-task/create-background-output.blocking.test.ts b/src/tools/background-task/create-background-output.blocking.test.ts
new file mode 100644
index 000000000..82de143e9
--- /dev/null
+++ b/src/tools/background-task/create-background-output.blocking.test.ts
@@ -0,0 +1,112 @@
+/// <reference types="bun-types" />
+
+import { describe, expect, test } from "bun:test"
+import type { ToolContext } from "@opencode-ai/plugin/tool"
+import type { BackgroundTask } from "../../features/background-agent"
+import type { BackgroundOutputClient, BackgroundOutputManager } from "./clients"
+import { createBackgroundOutput } from "./create-background-output"
+
+const projectDir = "/Users/yeongyu/local-workspaces/oh-my-opencode"
+
+const mockContext = {
+  sessionID: "test-session",
+  messageID: "test-message",
+  agent: "test-agent",
+  directory: projectDir,
+  worktree: projectDir,
+  abort: new AbortController().signal,
+  metadata: () => {},
+  ask: async () => {},
+} as unknown as ToolContext
+
+function createTask(overrides: Partial<BackgroundTask> = {}): BackgroundTask {
+  return {
+    id: "task-1",
+    sessionID: "ses-1",
+    parentSessionID: "main-1",
+    parentMessageID: "msg-1",
+    description: "background task",
+    prompt: "do work",
+    agent: "test-agent",
+    status: "running",
+    ...overrides,
+  }
+}
+
+function createMockClient(): BackgroundOutputClient {
+  return {
+    session: {
+      messages: async () => ({ data: [] }),
+    },
+  }
+}
+
+describe("createBackgroundOutput block=true polling", () => {
+  test("returns terminal error output when task fails during blocking wait", async () => {
+    // #given
+    let pollCount = 0
+    const task = createTask({ status: "running" })
+    const manager: BackgroundOutputManager = {
+      getTask: (id: string) => {
+        if (id !== task.id) return undefined
+
+        pollCount += 1
+        if (pollCount >= 2) {
+          task.status = "error"
+          task.error = "task failed"
+        }
+
+        return task
+      },
+    }
+
+    const tool = createBackgroundOutput(manager, createMockClient())
+
+    // #when
+    const output = await tool.execute(
+      {
+        task_id: task.id,
+        block: true,
+        timeout: 3000,
+        full_session: false,
+      },
+      mockContext
+    )
+
+    // #then
+    expect(pollCount).toBeGreaterThanOrEqual(2)
+    expect(output).toContain("Status | **error**")
+    expect(output).not.toContain("Timed out waiting")
+  })
+
+  test("returns latest output with timeout note when task stays running", async () => {
+    // #given
+    let pollCount = 0
+    const task = createTask({ status: "running" })
+    const manager: BackgroundOutputManager = {
+      getTask: (id: string) => {
+        if (id !== task.id) return undefined
+        pollCount += 1
+        return task
+      },
+    }
+
+    const tool = createBackgroundOutput(manager, createMockClient())
+
+    // #when
+    const output = await tool.execute(
+      {
+        task_id: task.id,
+        block: true,
+        timeout: 10,
+      },
+      mockContext
+    )
+
+    // #then
+    expect(pollCount).toBeGreaterThanOrEqual(2)
+    expect(output).toContain("# Full Session Output")
+    expect(output).toContain("Timed out waiting")
+    expect(output).toContain("still running")
+  })
+})
diff --git a/src/tools/background-task/create-background-output.ts b/src/tools/background-task/create-background-output.ts
index 78593a884..e12cfa9aa 100644
--- a/src/tools/background-task/create-background-output.ts
+++ b/src/tools/background-task/create-background-output.ts
@@ -33,6 +33,14 @@ function formatResolvedTitle(task: BackgroundTask): string {
   return `${label} - ${task.description}`
 }
 
+function isTaskActiveStatus(status: BackgroundTask["status"]): boolean {
+  return status === "pending" || status === "running"
+}
+
+function appendTimeoutNote(output: string, timeoutMs: number): string {
+  return `${output}\n\n> **Timed out waiting** after ${timeoutMs}ms. Task is still running; showing latest available output.`
+}
+
 export function createBackgroundOutput(manager: BackgroundOutputManager, client: BackgroundOutputClient): ToolDefinition {
   return tool({
     description: BACKGROUND_OUTPUT_DESCRIPTION,
@@ -83,7 +91,9 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
 
         let resolvedTask = task
 
-        if (shouldBlock && (task.status === "pending" || task.status === "running")) {
+        let didTimeoutWhileActive = false
+
+        if (shouldBlock && isTaskActiveStatus(task.status)) {
           const startTime = Date.now()
           while (Date.now() - startTime < timeoutMs) {
             await delay(1000)
@@ -93,30 +103,39 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
               return `Task was deleted: ${args.task_id}`
             }
 
-            if (currentTask.status !== "pending" && currentTask.status !== "running") {
-              resolvedTask = currentTask
+            resolvedTask = currentTask
+
+            if (!isTaskActiveStatus(currentTask.status)) {
               break
             }
           }
 
-          const finalCheck = manager.getTask(args.task_id)
-          if (finalCheck) {
-            resolvedTask = finalCheck
+          if (isTaskActiveStatus(resolvedTask.status)) {
+            const finalCheck = manager.getTask(args.task_id)
+            if (finalCheck) {
+              resolvedTask = finalCheck
+            }
+          }
+
+          if (isTaskActiveStatus(resolvedTask.status)) {
+            didTimeoutWhileActive = true
           }
         }
 
-        const isActive = resolvedTask.status === "pending" || resolvedTask.status === "running"
+        const isActive = isTaskActiveStatus(resolvedTask.status)
         const includeThinking = isActive || (args.include_thinking ?? false)
         const includeToolResults = isActive || (args.include_tool_results ?? false)
 
         if (fullSession) {
-          return await formatFullSession(resolvedTask, client, {
+          const output = await formatFullSession(resolvedTask, client, {
             includeThinking,
             messageLimit: args.message_limit,
             sinceMessageId: args.since_message_id,
             includeToolResults,
             thinkingMaxChars: args.thinking_max_chars,
           })
+
+          return didTimeoutWhileActive ? appendTimeoutNote(output, timeoutMs) : output
         }
 
         if (resolvedTask.status === "completed") {
@@ -127,7 +146,8 @@ export function createBackgroundOutput(manager: BackgroundOutputManager, client:
           return formatTaskStatus(resolvedTask)
         }
 
-        return formatTaskStatus(resolvedTask)
+        const statusOutput = formatTaskStatus(resolvedTask)
+        return didTimeoutWhileActive ? appendTimeoutNote(statusOutput, timeoutMs) : statusOutput
       } catch (error) {
         return `Error getting output: ${error instanceof Error ? error.message : String(error)}`
       }
diff --git a/src/tools/delegate-task/category-resolver.ts b/src/tools/delegate-task/category-resolver.ts
index a2f5bbd36..bc516dce7 100644
--- a/src/tools/delegate-task/category-resolver.ts
+++ b/src/tools/delegate-task/category-resolver.ts
@@ -14,6 +14,7 @@ export interface CategoryResolutionResult {
   agentToUse: string
   categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
   categoryPromptAppend: string | undefined
+  maxPromptTokens?: number
   modelInfo: ModelFallbackInfo | undefined
   actualModel: string | undefined
   isUnstableAgent: boolean
@@ -51,6 +52,7 @@ export async function resolveCategoryExecution(
         agentToUse: "",
         categoryModel: undefined,
         categoryPromptAppend: undefined,
+        maxPromptTokens: undefined,
         modelInfo: undefined,
         actualModel: undefined,
         isUnstableAgent: false,
@@ -68,6 +70,7 @@ Available categories: ${allCategoryNames}`,
       agentToUse: "",
       categoryModel: undefined,
       categoryPromptAppend: undefined,
+      maxPromptTokens: undefined,
       modelInfo: undefined,
       actualModel: undefined,
       isUnstableAgent: false,
@@ -111,6 +114,7 @@ Available categories: ${allCategoryNames}`,
           agentToUse: "",
           categoryModel: undefined,
           categoryPromptAppend: undefined,
+          maxPromptTokens: undefined,
           modelInfo: undefined,
           actualModel: undefined,
           isUnstableAgent: false,
@@ -154,6 +158,7 @@ Available categories: ${allCategoryNames}`,
       agentToUse: "",
       categoryModel: undefined,
       categoryPromptAppend: undefined,
+      maxPromptTokens: undefined,
       modelInfo: undefined,
       actualModel: undefined,
       isUnstableAgent: false,
@@ -177,6 +182,7 @@ Available categories: ${categoryNames.join(", ")}`,
     agentToUse: SISYPHUS_JUNIOR_AGENT,
     categoryModel,
     categoryPromptAppend,
+    maxPromptTokens: resolved.config.max_prompt_tokens,
     modelInfo,
     actualModel,
     isUnstableAgent,
diff --git a/src/tools/delegate-task/constants.ts b/src/tools/delegate-task/constants.ts
index 146838a42..ecf37135f 100644
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@@ -208,10 +208,10 @@ You are NOT an interactive assistant. You are an autonomous problem-solver.
 
 
 export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
-  "visual-engineering": { model: "google/gemini-3-pro", variant: "high" },
+  "visual-engineering": { model: "google/gemini-3.1-pro", variant: "high" },
   ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" },
   deep: { model: "openai/gpt-5.3-codex", variant: "medium" },
-  artistry: { model: "google/gemini-3-pro", variant: "high" },
+  artistry: { model: "google/gemini-3.1-pro", variant: "high" },
   quick: { model: "anthropic/claude-haiku-4-5" },
   "unspecified-low": { model: "anthropic/claude-sonnet-4-6" },
   "unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" },
diff --git a/src/tools/delegate-task/prompt-builder.ts b/src/tools/delegate-task/prompt-builder.ts
index 51d32366a..8230fed78 100644
--- a/src/tools/delegate-task/prompt-builder.ts
+++ b/src/tools/delegate-task/prompt-builder.ts
@@ -1,5 +1,21 @@
 import type { BuildSystemContentInput } from "./types"
 import { buildPlanAgentSystemPrepend, isPlanAgent } from "./constants"
+import { buildSystemContentWithTokenLimit } from "./token-limiter"
+
+const FREE_OR_LOCAL_PROMPT_TOKEN_LIMIT = 24000
+
+function usesFreeOrLocalModel(model: { providerID: string; modelID: string; variant?: string } | undefined): boolean {
+  if (!model) {
+    return false
+  }
+
+  const provider = model.providerID.toLowerCase()
+  const modelId = model.modelID.toLowerCase()
+  return provider.includes("local")
+    || provider === "ollama"
+    || provider === "lmstudio"
+    || modelId.includes("free")
+}
 
 /**
  * Build the system content to inject into the agent prompt.
@@ -8,7 +24,11 @@ import { buildPlanAgentSystemPrepend, isPlanAgent } from "./constants"
 export function buildSystemContent(input: BuildSystemContentInput): string | undefined {
   const {
     skillContent,
+    skillContents,
     categoryPromptAppend,
+    agentsContext,
+    maxPromptTokens,
+    model,
     agentName,
     availableCategories,
     availableSkills,
@@ -18,23 +38,17 @@ export function buildSystemContent(input: BuildSystemContentInput): string | und
     ? buildPlanAgentSystemPrepend(availableCategories, availableSkills)
     : ""
 
-  if (!skillContent && !categoryPromptAppend && !planAgentPrepend) {
-    return undefined
-  }
+  const effectiveMaxPromptTokens = maxPromptTokens
+    ?? (usesFreeOrLocalModel(model) ? FREE_OR_LOCAL_PROMPT_TOKEN_LIMIT : undefined)
 
-  const parts: string[] = []
-
-  if (planAgentPrepend) {
-    parts.push(planAgentPrepend)
-  }
-
-  if (skillContent) {
-    parts.push(skillContent)
-  }
-
-  if (categoryPromptAppend) {
-    parts.push(categoryPromptAppend)
-  }
-
-  return parts.join("\n\n") || undefined
+  return buildSystemContentWithTokenLimit(
+    {
+      skillContent,
+      skillContents,
+      categoryPromptAppend,
+      agentsContext: agentsContext ?? planAgentPrepend,
+      planAgentPrepend,
+    },
+    effectiveMaxPromptTokens
+  )
 }
diff --git a/src/tools/delegate-task/skill-resolver.ts b/src/tools/delegate-task/skill-resolver.ts
index bfd58e17b..e3bb89a50 100644
--- a/src/tools/delegate-task/skill-resolver.ts
+++ b/src/tools/delegate-task/skill-resolver.ts
@@ -5,17 +5,18 @@ import { discoverSkills } from "../../features/opencode-skill-loader"
 export async function resolveSkillContent(
   skills: string[],
   options: { gitMasterConfig?: GitMasterConfig; browserProvider?: BrowserAutomationProvider, disabledSkills?: Set<string>, directory?: string }
-): Promise<{ content: string | undefined; error: string | null }> {
+): Promise<{ content: string | undefined; contents: string[]; error: string | null }> {
   if (skills.length === 0) {
-    return { content: undefined, error: null }
+    return { content: undefined, contents: [], error: null }
   }
 
   const { resolved, notFound } = await resolveMultipleSkillsAsync(skills, options)
   if (notFound.length > 0) {
     const allSkills = await discoverSkills({ includeClaudeCodePaths: true, directory: options?.directory })
     const available = allSkills.map(s => s.name).join(", ")
-    return { content: undefined, error: `Skills not found: ${notFound.join(", ")}. Available: ${available}` }
+    return { content: undefined, contents: [], error: `Skills not found: ${notFound.join(", ")}. Available: ${available}` }
   }
 
-  return { content: Array.from(resolved.values()).join("\n\n"), error: null }
+  const contents = Array.from(resolved.values())
+  return { content: contents.join("\n\n"), contents, error: null }
 }
diff --git a/src/tools/delegate-task/token-limiter.test.ts b/src/tools/delegate-task/token-limiter.test.ts
new file mode 100644
index 000000000..57ba081cd
--- /dev/null
+++ b/src/tools/delegate-task/token-limiter.test.ts
@@ -0,0 +1,121 @@
+declare const require: (name: string) => unknown
+const { describe, test, expect } = require("bun:test") as {
+  describe: (name: string, fn: () => void) => void
+  test: (name: string, fn: () => void) => void
+  expect: (value: unknown) => {
+    toBe: (expected: unknown) => void
+    toContain: (expected: string) => void
+    not: {
+      toContain: (expected: string) => void
+    }
+    toBeLessThanOrEqual: (expected: number) => void
+    toBeUndefined: () => void
+  }
+}
+
+import {
+  buildSystemContentWithTokenLimit,
+  estimateTokenCount,
+  truncateToTokenBudget,
+} from "./token-limiter"
+
+describe("token-limiter", () => {
+  test("estimateTokenCount uses 1 token per 4 chars approximation", () => {
+    // given
+    const text = "12345678"
+
+    // when
+    const result = estimateTokenCount(text)
+
+    // then
+    expect(result).toBe(2)
+  })
+
+  test("truncateToTokenBudget keeps text within requested token budget", () => {
+    // given
+    const content = "A".repeat(120)
+    const maxTokens = 10
+
+    // when
+    const result = truncateToTokenBudget(content, maxTokens)
+
+    // then
+    expect(estimateTokenCount(result)).toBeLessThanOrEqual(maxTokens)
+  })
+
+  test("buildSystemContentWithTokenLimit returns undefined when there is no content", () => {
+    // given
+    const input = {
+      skillContent: undefined,
+      skillContents: [],
+      categoryPromptAppend: undefined,
+      agentsContext: undefined,
+      planAgentPrepend: "",
+    }
+
+    // when
+    const result = buildSystemContentWithTokenLimit(input, 20)
+
+    // then
+    expect(result).toBeUndefined()
+  })
+
+  test("buildSystemContentWithTokenLimit truncates skills before category and agents context", () => {
+    // given
+    const input = {
+      skillContents: [
+        "SKILL_ALPHA:" + "a".repeat(180),
+        "SKILL_BETA:" + "b".repeat(180),
+      ],
+      categoryPromptAppend: "CATEGORY_APPEND:keep",
+      agentsContext: "AGENTS_CONTEXT:keep",
+      planAgentPrepend: "",
+    }
+
+    // when
+    const result = buildSystemContentWithTokenLimit(input, 80)
+
+    // then
+    expect(result).toContain("AGENTS_CONTEXT:keep")
+    expect(result).toContain("CATEGORY_APPEND:keep")
+    expect(result).toContain("SKILL_ALPHA:")
+    expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(80)
+  })
+
+  test("buildSystemContentWithTokenLimit truncates category after skills are exhausted", () => {
+    // given
+    const input = {
+      skillContents: ["SKILL_ALPHA:" + "a".repeat(220)],
+      categoryPromptAppend: "CATEGORY_APPEND:" + "c".repeat(220),
+      agentsContext: "AGENTS_CONTEXT:keep",
+      planAgentPrepend: "",
+    }
+
+    // when
+    const result = buildSystemContentWithTokenLimit(input, 30)
+
+    // then
+    expect(result).toContain("AGENTS_CONTEXT:keep")
+    expect(result).not.toContain("SKILL_ALPHA:" + "a".repeat(80))
+    expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(30)
+  })
+
+  test("buildSystemContentWithTokenLimit truncates agents context last", () => {
+    // given
+    const input = {
+      skillContents: ["SKILL_ALPHA:" + "a".repeat(220)],
+      categoryPromptAppend: "CATEGORY_APPEND:" + "c".repeat(220),
+      agentsContext: "AGENTS_CONTEXT:" + "g".repeat(220),
+      planAgentPrepend: "",
+    }
+
+    // when
+    const result = buildSystemContentWithTokenLimit(input, 10)
+
+    // then
+    expect(result).toContain("AGENTS_CONTEXT:")
+    expect(result).not.toContain("SKILL_ALPHA:")
+    expect(result).not.toContain("CATEGORY_APPEND:")
+    expect(estimateTokenCount(result as string)).toBeLessThanOrEqual(10)
+  })
+})
diff --git a/src/tools/delegate-task/token-limiter.ts b/src/tools/delegate-task/token-limiter.ts
new file mode 100644
index 000000000..2ed6543c1
--- /dev/null
+++ b/src/tools/delegate-task/token-limiter.ts
@@ -0,0 +1,117 @@
+import type { BuildSystemContentInput } from "./types"
+
+const CHARACTERS_PER_TOKEN = 4
+
+export function estimateTokenCount(text: string): number {
+  if (!text) {
+    return 0
+  }
+
+  return Math.ceil(text.length / CHARACTERS_PER_TOKEN)
+}
+
+export function truncateToTokenBudget(content: string, maxTokens: number): string {
+  if (!content || maxTokens <= 0) {
+    return ""
+  }
+
+  const maxCharacters = maxTokens * CHARACTERS_PER_TOKEN
+  if (content.length <= maxCharacters) {
+    return content
+  }
+
+  return content.slice(0, maxCharacters)
+}
+
+function joinSystemParts(parts: string[]): string | undefined {
+  const filtered = parts.filter((part) => part.trim().length > 0)
+  if (filtered.length === 0) {
+    return undefined
+  }
+
+  return filtered.join("\n\n")
+}
+
+function reduceSegmentToFitBudget(content: string, overflowTokens: number): string {
+  if (overflowTokens <= 0 || !content) {
+    return content
+  }
+
+  const currentTokens = estimateTokenCount(content)
+  const nextBudget = Math.max(0, currentTokens - overflowTokens)
+  return truncateToTokenBudget(content, nextBudget)
+}
+
+export function buildSystemContentWithTokenLimit(
+  input: BuildSystemContentInput,
+  maxTokens: number | undefined
+): string | undefined {
+  const skillParts = input.skillContents?.length
+    ? [...input.skillContents]
+    : input.skillContent
+      ? [input.skillContent]
+      : []
+  const categoryPromptAppend = input.categoryPromptAppend ?? ""
+  const agentsContext = input.agentsContext ?? input.planAgentPrepend ?? ""
+
+  if (maxTokens === undefined) {
+    return joinSystemParts([agentsContext, ...skillParts, categoryPromptAppend])
+  }
+
+  let nextSkills = [...skillParts]
+  let nextCategoryPromptAppend = categoryPromptAppend
+  let nextAgentsContext = agentsContext
+
+  const buildCurrentContent = (): string | undefined =>
+    joinSystemParts([nextAgentsContext, ...nextSkills, nextCategoryPromptAppend])
+
+  let systemContent = buildCurrentContent()
+  if (!systemContent) {
+    return undefined
+  }
+
+  let overflowTokens = estimateTokenCount(systemContent) - maxTokens
+
+  if (overflowTokens > 0) {
+    for (let index = 0; index < nextSkills.length && overflowTokens > 0; index += 1) {
+      const skill = nextSkills[index]
+      const reducedSkill = reduceSegmentToFitBudget(skill, overflowTokens)
+      nextSkills[index] = reducedSkill
+      systemContent = buildCurrentContent()
+      if (!systemContent) {
+        return undefined
+      }
+      overflowTokens = estimateTokenCount(systemContent) - maxTokens
+    }
+
+    nextSkills = nextSkills.filter((skill) => skill.trim().length > 0)
+    systemContent = buildCurrentContent()
+    if (!systemContent) {
+      return undefined
+    }
+    overflowTokens = estimateTokenCount(systemContent) - maxTokens
+  }
+
+  if (overflowTokens > 0 && nextCategoryPromptAppend) {
+    nextCategoryPromptAppend = reduceSegmentToFitBudget(nextCategoryPromptAppend, overflowTokens)
+    systemContent = buildCurrentContent()
+    if (!systemContent) {
+      return undefined
+    }
+    overflowTokens = estimateTokenCount(systemContent) - maxTokens
+  }
+
+  if (overflowTokens > 0 && nextAgentsContext) {
+    nextAgentsContext = reduceSegmentToFitBudget(nextAgentsContext, overflowTokens)
+    systemContent = buildCurrentContent()
+    if (!systemContent) {
+      return undefined
+    }
+  }
+
+  if (!systemContent) {
+    return undefined
+  }
+
+  return truncateToTokenBudget(systemContent, maxTokens)
+}
diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts
index b9b1274bc..8c0b01acf 100644
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -17,7 +17,7 @@ const TEST_AVAILABLE_MODELS = new Set([
   "anthropic/claude-opus-4-6",
   "anthropic/claude-sonnet-4-6",
   "anthropic/claude-haiku-4-5",
-  "google/gemini-3-pro",
+  "google/gemini-3.1-pro",
   "google/gemini-3-flash",
   "openai/gpt-5.2",
   "openai/gpt-5.3-codex",
@@ -52,7 +52,7 @@ describe("sisyphus-task", () => {
     providerModelsSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue({
       models: {
         anthropic: ["claude-opus-4-6", "claude-sonnet-4-6", "claude-haiku-4-5"],
-        google: ["gemini-3-pro", "gemini-3-flash"],
+        google: ["gemini-3.1-pro", "gemini-3-flash"],
         openai: ["gpt-5.2", "gpt-5.3-codex"],
       },
       connected: ["anthropic", "google", "openai"],
@@ -73,7 +73,7 @@ describe("sisyphus-task", () => {
 
       // when / #then
       expect(category).toBeDefined()
-      expect(category.model).toBe("google/gemini-3-pro")
+      expect(category.model).toBe("google/gemini-3.1-pro")
       expect(category.variant).toBe("high")
     })
 
@@ -781,7 +781,7 @@ describe("sisyphus-task", () => {
 
       // then
       expect(result).not.toBeNull()
-      expect(result!.config.model).toBe("google/gemini-3-pro")
+      expect(result!.config.model).toBe("google/gemini-3.1-pro")
       expect(result!.promptAppend).toContain("VISUAL/UI")
     })
 
@@ -805,7 +805,7 @@ describe("sisyphus-task", () => {
       const categoryName = "visual-engineering"
       const userCategories = {
         "visual-engineering": {
-          model: "google/gemini-3-pro",
+          model: "google/gemini-3.1-pro",
           prompt_append: "Custom instructions here",
         },
       }
@@ -845,7 +845,7 @@ describe("sisyphus-task", () => {
       const categoryName = "visual-engineering"
       const userCategories = {
         "visual-engineering": {
-          model: "google/gemini-3-pro",
+          model: "google/gemini-3.1-pro",
           temperature: 0.3,
         },
       }
@@ -868,7 +868,7 @@ describe("sisyphus-task", () => {
 
       // then - category's built-in model wins over inheritedModel
       expect(result).not.toBeNull()
-      expect(result!.config.model).toBe("google/gemini-3-pro")
+      expect(result!.config.model).toBe("google/gemini-3.1-pro")
     })
 
     test("systemDefaultModel is used as fallback when custom category has no model", () => {
@@ -910,7 +910,7 @@ describe("sisyphus-task", () => {
 
       // then
       expect(result).not.toBeNull()
-      expect(result!.config.model).toBe("google/gemini-3-pro")
+      expect(result!.config.model).toBe("google/gemini-3.1-pro")
     })
   })
 
@@ -1738,7 +1738,7 @@ describe("sisyphus-task", () => {
        const mockClient = {
          app: { agents: async () => ({ data: [] }) },
          config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
-         model: { list: async () => [{ provider: "google", id: "gemini-3-pro" }] },
+         model: { list: async () => [{ provider: "google", id: "gemini-3.1-pro" }] },
          session: {
            get: async () => ({ data: { directory: "/project" } }),
            create: async () => ({ data: { id: "ses_unstable_gemini" } }),
@@ -2001,7 +2001,7 @@ describe("sisyphus-task", () => {
        const mockClient = {
          app: { agents: async () => ({ data: [] }) },
          config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
-         model: { list: async () => [{ provider: "google", id: "gemini-3-pro" }] },
+         model: { list: async () => [{ provider: "google", id: "gemini-3.1-pro" }] },
          session: {
            get: async () => ({ data: { directory: "/project" } }),
            create: async () => ({ data: { id: "ses_artistry_gemini" } }),
@@ -2028,7 +2028,7 @@ describe("sisyphus-task", () => {
         abort: new AbortController().signal,
       }
       
-      // when - artistry category (gemini-3-pro with high variant)
+      // when - artistry category (gemini-3.1-pro with high variant)
       const result = await tool.execute(
         {
           description: "Test artistry forced background",
@@ -3026,9 +3026,9 @@ describe("sisyphus-task", () => {
       // when resolveCategoryConfig is called
       const resolved = resolveCategoryConfig(categoryName, { userCategories, inheritedModel, systemDefaultModel: SYSTEM_DEFAULT_MODEL })
       
-      // then should use category's built-in model (gemini-3-pro for visual-engineering)
+      // then should use category's built-in model (gemini-3.1-pro for visual-engineering)
       expect(resolved).not.toBeNull()
-      expect(resolved!.model).toBe("google/gemini-3-pro")
+      expect(resolved!.model).toBe("google/gemini-3.1-pro")
     })
 
     test("systemDefaultModel is used when no other model is available", () => {
@@ -3522,7 +3522,7 @@ describe("sisyphus-task", () => {
       )
 
       // then - should resolve via AGENT_MODEL_REQUIREMENTS fallback chain for oracle
-      // oracle fallback chain: gpt-5.2 (openai) > gemini-3-pro (google) > claude-opus-4-6 (anthropic)
+      // oracle fallback chain: gpt-5.2 (openai) > gemini-3.1-pro (google) > claude-opus-4-6 (anthropic)
       // Since openai is in connectedProviders, should resolve to openai/gpt-5.2
       expect(promptBody.model).toBeDefined()
       expect(promptBody.model.providerID).toBe("openai")
diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts
index 0ab4c1baa..9b0915330 100644
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -142,7 +142,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
 
       const runInBackground = args.run_in_background === true
 
-      const { content: skillContent, error: skillError } = await resolveSkillContent(args.load_skills, {
+      const { content: skillContent, contents: skillContents, error: skillError } = await resolveSkillContent(args.load_skills, {
         gitMasterConfig: options.gitMasterConfig,
         browserProvider: options.browserProvider,
         disabledSkills: options.disabledSkills,
@@ -184,6 +184,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
       let actualModel: string | undefined
       let isUnstableAgent = false
       let fallbackChain: import("../../shared/model-requirements").FallbackEntry[] | undefined
+      let maxPromptTokens: number | undefined
 
       if (args.category) {
         const resolution = await resolveCategoryExecution(args, options, inheritedModel, systemDefaultModel)
@@ -197,6 +198,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
         actualModel = resolution.actualModel
         isUnstableAgent = resolution.isUnstableAgent
         fallbackChain = resolution.fallbackChain
+        maxPromptTokens = resolution.maxPromptTokens
 
         const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean
 
@@ -213,8 +215,11 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
         if (isUnstableAgent && isRunInBackgroundExplicitlyFalse) {
           const systemContent = buildSystemContent({
             skillContent,
+            skillContents,
             categoryPromptAppend,
             agentName: agentToUse,
+            maxPromptTokens,
+            model: categoryModel,
             availableCategories,
             availableSkills,
           })
@@ -239,8 +244,11 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
 
       const systemContent = buildSystemContent({
         skillContent,
+        skillContents,
         categoryPromptAppend,
         agentName: agentToUse,
+        maxPromptTokens,
+        model: categoryModel,
         availableCategories,
         availableSkills,
       })
diff --git a/src/tools/delegate-task/types.ts b/src/tools/delegate-task/types.ts
index 13d1973a4..7c749d208 100644
--- a/src/tools/delegate-task/types.ts
+++ b/src/tools/delegate-task/types.ts
@@ -72,7 +72,12 @@ export interface DelegateTaskToolOptions {
 
 export interface BuildSystemContentInput {
   skillContent?: string
+  skillContents?: string[]
   categoryPromptAppend?: string
+  agentsContext?: string
+  planAgentPrepend?: string
+  maxPromptTokens?: number
+  model?: { providerID: string; modelID: string; variant?: string }
   agentName?: string
   availableCategories?: AvailableCategory[]
   availableSkills?: AvailableSkill[]
diff --git a/src/tools/hashline-edit/diff-utils.test.ts b/src/tools/hashline-edit/diff-utils.test.ts
index c3373d995..c7d218728 100644
--- a/src/tools/hashline-edit/diff-utils.test.ts
+++ b/src/tools/hashline-edit/diff-utils.test.ts
@@ -41,6 +41,23 @@ describe("generateUnifiedDiff", () => {
     expect(diff).toContain(" line 13")
   })
 
+  it("limits each hunk to three context lines", () => {
+    //#given
+    const oldContent = createNumberedLines(20)
+    const newLines = oldContent.split("\n")
+    newLines[9] = "line 10 updated"
+    const newContent = newLines.join("\n")
+
+    //#when
+    const diff = generateUnifiedDiff(oldContent, newContent, "sample.txt")
+
+    //#then
+    expect(diff).toContain(" line 7")
+    expect(diff).toContain(" line 13")
+    expect(diff).not.toContain(" line 6")
+    expect(diff).not.toContain(" line 14")
+  })
+
   it("returns a diff string for identical content", () => {
     //#given
     const oldContent = "alpha\nbeta\ngamma"
diff --git a/src/tools/hashline-edit/diff-utils.ts b/src/tools/hashline-edit/diff-utils.ts
index 975438d27..10c3dfc9d 100644
--- a/src/tools/hashline-edit/diff-utils.ts
+++ b/src/tools/hashline-edit/diff-utils.ts
@@ -16,7 +16,7 @@ export function toHashlineContent(content: string): string {
 }
 
 export function generateUnifiedDiff(oldContent: string, newContent: string, filePath: string): string {
-	return createTwoFilesPatch(filePath, filePath, oldContent, newContent)
+	return createTwoFilesPatch(filePath, filePath, oldContent, newContent, undefined, undefined, { context: 3 })
 }
 
 export function countLineDiffs(oldContent: string, newContent: string): { additions: number; deletions: number } {
diff --git a/src/tools/hashline-edit/edit-deduplication.ts b/src/tools/hashline-edit/edit-deduplication.ts
index e689bb53a..8818b61ae 100644
--- a/src/tools/hashline-edit/edit-deduplication.ts
+++ b/src/tools/hashline-edit/edit-deduplication.ts
@@ -1,18 +1,24 @@
 import type { HashlineEdit } from "./types"
 import { toNewLines } from "./edit-text-normalization"
+import { normalizeLineRef } from "./validation"
 
 function normalizeEditPayload(payload: string | string[]): string {
   return toNewLines(payload).join("\n")
 }
 
+function canonicalAnchor(anchor: string | undefined): string {
+  if (!anchor) return ""
+  return normalizeLineRef(anchor)
+}
+
 function buildDedupeKey(edit: HashlineEdit): string {
   switch (edit.op) {
     case "replace":
-      return `replace|${edit.pos}|${edit.end ?? ""}|${normalizeEditPayload(edit.lines)}`
+      return `replace|${canonicalAnchor(edit.pos)}|${edit.end ? canonicalAnchor(edit.end) : ""}|${normalizeEditPayload(edit.lines)}`
     case "append":
-      return `append|${edit.pos ?? ""}|${normalizeEditPayload(edit.lines)}`
+      return `append|${canonicalAnchor(edit.pos)}|${normalizeEditPayload(edit.lines)}`
     case "prepend":
-      return `prepend|${edit.pos ?? ""}|${normalizeEditPayload(edit.lines)}`
+      return `prepend|${canonicalAnchor(edit.pos)}|${normalizeEditPayload(edit.lines)}`
     default:
       return JSON.stringify(edit)
   }
diff --git a/src/tools/hashline-edit/edit-operations.test.ts b/src/tools/hashline-edit/edit-operations.test.ts
index 5d8ad08ba..40585210f 100644
--- a/src/tools/hashline-edit/edit-operations.test.ts
+++ b/src/tools/hashline-edit/edit-operations.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from "bun:test"
-import { applyHashlineEdits } from "./edit-operations"
+import { applyHashlineEdits, applyHashlineEditsWithReport } from "./edit-operations"
 import { applyAppend, applyInsertAfter, applyPrepend, applyReplaceLines, applySetLine } from "./edit-operation-primitives"
 import { computeLineHash } from "./hash-computation"
 import type { HashlineEdit } from "./types"
@@ -389,3 +389,23 @@ describe("hashline edit operations", () => {
     expect(result).toEqual("replaced A\nline 3\nreplaced B")
   })
 })
+
+describe("dedupe anchor canonicalization", () => {
+  it("deduplicates edits with whitespace-variant anchors", () => {
+    //#given
+    const content = "line 1\nline 2"
+    const lines = content.split("\n")
+    const canonical = `1#${computeLineHash(1, lines[0])}`
+    const spaced = ` 1 # ${computeLineHash(1, lines[0])} `
+
+    //#when
+    const report = applyHashlineEditsWithReport(content, [
+      { op: "append", pos: canonical, lines: ["inserted"] },
+      { op: "append", pos: spaced, lines: ["inserted"] },
+    ])
+
+    //#then
+    expect(report.deduplicatedEdits).toBe(1)
+    expect(report.content).toBe("line 1\ninserted\nline 2")
+  })
+})
diff --git a/src/tools/hashline-edit/hashline-edit-executor.ts b/src/tools/hashline-edit/hashline-edit-executor.ts
index e20ebbf96..d316307db 100644
--- a/src/tools/hashline-edit/hashline-edit-executor.ts
+++ b/src/tools/hashline-edit/hashline-edit-executor.ts
@@ -33,7 +33,7 @@ function resolveToolCallID(ctx: ToolContextWithCallID): string | undefined {
 
 function canCreateFromMissingFile(edits: HashlineEdit[]): boolean {
   if (edits.length === 0) return false
-  return edits.every((edit) => edit.op === "append" || edit.op === "prepend")
+  return edits.every((edit) => (edit.op === "append" || edit.op === "prepend") && !edit.pos)
 }
 
 function buildSuccessMeta(
@@ -86,19 +86,19 @@ export async function executeHashlineEditTool(args: HashlineEditArgs, context: T
     const filePath = args.filePath
     const { delete: deleteMode, rename } = args
 
+    if (deleteMode && rename) {
+      return "Error: delete and rename cannot be used together"
+    }
+    if (deleteMode && args.edits.length > 0) {
+      return "Error: delete mode requires edits to be an empty array"
+    }
+
     if (!deleteMode && (!args.edits || !Array.isArray(args.edits) || args.edits.length === 0)) {
       return "Error: edits parameter must be a non-empty array"
     }
 
     const edits = deleteMode ? [] : normalizeHashlineEdits(args.edits)
 
-    if (deleteMode && rename) {
-      return "Error: delete and rename cannot be used together"
-    }
-    if (deleteMode && edits.length > 0) {
-      return "Error: delete mode requires edits to be an empty array"
-    }
-
     const file = Bun.file(filePath)
     const exists = await file.exists()
     if (!exists && !deleteMode && !canCreateFromMissingFile(edits)) {
diff --git a/src/tools/hashline-edit/tool-description.ts b/src/tools/hashline-edit/tool-description.ts
index 0b0ee00fa..2d452ccfa 100644
--- a/src/tools/hashline-edit/tool-description.ts
+++ b/src/tools/hashline-edit/tool-description.ts
@@ -10,7 +10,7 @@ WORKFLOW:
 VALIDATION:
   Payload shape: { "filePath": string, "edits": [...], "delete"?: boolean, "rename"?: string }
   Each edit must be one of: replace, append, prepend
-  Edit shape: { "op": "replace"|"append"|"prepend", "pos"?: "LINE#ID", "end"?: "LINE#ID", "lines"?: string|string[]|null }
+  Edit shape: { "op": "replace"|"append"|"prepend", "pos"?: "LINE#ID", "end"?: "LINE#ID", "lines": string|string[]|null }
   lines must contain plain replacement text only (no LINE#ID prefixes, no diff + markers)
   CRITICAL: all operations validate against the same pre-edit file snapshot and apply bottom-up. Refs/tags are interpreted against the last-read version of the file.
 
diff --git a/src/tools/hashline-edit/tools.test.ts b/src/tools/hashline-edit/tools.test.ts
index cb76b834b..1158ca3d2 100644
--- a/src/tools/hashline-edit/tools.test.ts
+++ b/src/tools/hashline-edit/tools.test.ts
@@ -341,4 +341,81 @@ describe("createHashlineEditTool", () => {
     //#then
     expect(envelope.lineEnding).toBe("\r\n")
   })
+
+  it("rejects delete=true with non-empty edits before normalization", async () => {
+    //#given
+    const filePath = path.join(tempDir, "delete-reject.txt")
+    fs.writeFileSync(filePath, "line1")
+
+    //#when
+    const result = await tool.execute(
+      {
+        filePath,
+        delete: true,
+        edits: [{ op: "replace", pos: "1#ZZ", lines: "bad" }],
+      },
+      createMockContext(),
+    )
+
+    //#then
+    expect(result).toContain("delete mode requires edits to be an empty array")
+    expect(fs.existsSync(filePath)).toBe(true)
+  })
+
+  it("rejects delete=true combined with rename", async () => {
+    //#given
+    const filePath = path.join(tempDir, "delete-rename.txt")
+    fs.writeFileSync(filePath, "line1")
+
+    //#when
+    const result = await tool.execute(
+      {
+        filePath,
+        delete: true,
+        rename: path.join(tempDir, "new-name.txt"),
+        edits: [],
+      },
+      createMockContext(),
+    )
+
+    //#then
+    expect(result).toContain("delete and rename cannot be used together")
+    expect(fs.existsSync(filePath)).toBe(true)
+  })
+
+  it("rejects missing file creation with anchored append", async () => {
+    //#given
+    const filePath = path.join(tempDir, "nonexistent.txt")
+
+    //#when
+    const result = await tool.execute(
+      {
+        filePath,
+        edits: [{ op: "append", pos: "1#ZZ", lines: ["bad"] }],
+      },
+      createMockContext(),
+    )
+
+    //#then
+    expect(result).toContain("File not found")
+  })
+
+  it("allows missing file creation with unanchored append", async () => {
+    //#given
+    const filePath = path.join(tempDir, "newfile.txt")
+
+    //#when
+    const result = await tool.execute(
+      {
+        filePath,
+        edits: [{ op: "append", lines: ["created"] }],
+      },
+      createMockContext(),
+    )
+
+    //#then
+    expect(fs.existsSync(filePath)).toBe(true)
+    expect(fs.readFileSync(filePath, "utf-8")).toBe("created")
+    expect(result).toBe(`Updated ${filePath}`)
+  })
 })
diff --git a/src/tools/hashline-edit/tools.ts b/src/tools/hashline-edit/tools.ts
index 132650297..bd2bf1f90 100644
--- a/src/tools/hashline-edit/tools.ts
+++ b/src/tools/hashline-edit/tools.ts
@@ -31,7 +31,6 @@ export function createHashlineEditTool(): ToolDefinition {
             end: tool.schema.string().optional().describe("Range end anchor in LINE#ID format"),
             lines: tool.schema
               .union([tool.schema.string(), tool.schema.array(tool.schema.string()), tool.schema.null()])
-              .optional()
               .describe("Replacement or inserted lines. null/[] deletes with replace"),
           })
         )
diff --git a/src/tools/hashline-edit/validation.ts b/src/tools/hashline-edit/validation.ts
index fc5b395a1..ed6061557 100644
--- a/src/tools/hashline-edit/validation.ts
+++ b/src/tools/hashline-edit/validation.ts
@@ -15,7 +15,7 @@ const MISMATCH_CONTEXT = 2
 
 const LINE_REF_EXTRACT_PATTERN = /([0-9]+#[ZPMQVRWSNKTXJBYH]{2})/
 
-function normalizeLineRef(ref: string): string {
+export function normalizeLineRef(ref: string): string {
   const originalTrimmed = ref.trim()
   let trimmed = originalTrimmed
   trimmed = trimmed.replace(/^(?:>>>|[+-])\s*/, "")
diff --git a/src/tools/interactive-bash/tools.ts b/src/tools/interactive-bash/tools.ts
index dac46bd60..a0795ee36 100644
--- a/src/tools/interactive-bash/tools.ts
+++ b/src/tools/interactive-bash/tools.ts
@@ -1,4 +1,5 @@
 import { tool, type ToolDefinition } from "@opencode-ai/plugin/tool"
+import { spawnWithWindowsHide } from "../../shared/spawn-with-windows-hide"
 import { BLOCKED_TMUX_SUBCOMMANDS, DEFAULT_TIMEOUT_MS, INTERACTIVE_BASH_DESCRIPTION } from "./constants"
 import { getCachedTmuxPath } from "./tmux-path-resolver"
 
@@ -89,7 +90,7 @@ tmux capture-pane -p -t ${sessionName} -S -1000
 The Bash tool can execute these commands directly. Do NOT retry with interactive_bash.`
       }
 
-      const proc = Bun.spawn([tmuxPath, ...parts], {
+      const proc = spawnWithWindowsHide([tmuxPath, ...parts], {
         stdout: "pipe",
         stderr: "pipe",
       })