fix: export fallback availability from traced module

fix: add fallback resolution warnings for unavailable models
fix(tmux): stop layout override after spawn, use configured main pane size
2026-02-17 10:44:57 +09:00 · 2026-02-17 10:29:48 +09:00 · 2026-02-17 09:50:17 +09:00 · 2026-02-17 09:48:18 +09:00 · 2026-02-17 09:42:59 +09:00 · 2026-02-16 19:01:47 +00:00
123 changed files with 5617 additions and 962 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -56,6 +56,7 @@ jobs:
          bun test src/cli/doctor/format-default.test.ts
          bun test src/tools/call-omo-agent/sync-executor.test.ts
          bun test src/tools/call-omo-agent/session-creator.test.ts
+          bun test src/tools/session-manager
          bun test src/features/opencode-skill-loader/loader.test.ts

      - name: Run remaining tests
@@ -63,7 +64,7 @@ jobs:
          # Enumerate subdirectories/files explicitly to EXCLUDE mock-heavy files
          # that were already run in isolation above.
          # Excluded from src/cli: doctor/formatter.test.ts, doctor/format-default.test.ts
-          # Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts
+          # Excluded from src/tools: call-omo-agent/sync-executor.test.ts, call-omo-agent/session-creator.test.ts, session-manager (all)
          bun test bin script src/config src/mcp src/index.test.ts \
            src/agents src/shared \
            src/cli/run src/cli/config-manager src/cli/mcp-oauth \
@@ -72,7 +73,7 @@ jobs:
            src/cli/doctor/runner.test.ts src/cli/doctor/checks \
            src/tools/ast-grep src/tools/background-task src/tools/delegate-task \
            src/tools/glob src/tools/grep src/tools/interactive-bash \
-            src/tools/look-at src/tools/lsp src/tools/session-manager \
+            src/tools/look-at src/tools/lsp \
            src/tools/skill src/tools/skill-mcp src/tools/slashcommand src/tools/task \
            src/tools/call-omo-agent/background-agent-executor.test.ts \
            src/tools/call-omo-agent/background-executor.test.ts \
--- a/assets/oh-my-opencode.schema.json
+++ b/assets/oh-my-opencode.schema.json
@@ -98,7 +98,8 @@
          "stop-continuation-guard",
          "tasks-todowrite-disabler",
          "write-existing-file-guard",
-          "anthropic-effort"
+          "anthropic-effort",
+          "hashline-read-enhancer"
        ]
      }
    },
@@ -162,6 +163,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -207,6 +211,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -294,6 +301,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -335,6 +345,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -380,6 +393,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -467,6 +483,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -508,6 +527,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -553,6 +575,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -640,6 +665,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -681,6 +709,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -726,6 +757,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -813,6 +847,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -854,6 +891,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -899,6 +939,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -986,6 +1029,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -1027,6 +1073,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -1072,6 +1121,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -1159,6 +1211,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -1200,6 +1255,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -1245,6 +1303,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -1332,6 +1393,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -1373,6 +1437,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -1418,6 +1485,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -1505,6 +1575,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -1546,6 +1619,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -1591,6 +1667,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -1678,6 +1757,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -1719,6 +1801,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -1764,6 +1849,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -1851,6 +1939,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -1892,6 +1983,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -1937,6 +2031,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -2024,6 +2121,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -2065,6 +2165,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -2110,6 +2213,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -2197,6 +2303,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -2238,6 +2347,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -2283,6 +2395,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -2370,6 +2485,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -2411,6 +2529,9 @@
            },
            "tools": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {
                "type": "boolean"
              }
@@ -2456,6 +2577,9 @@
                    },
                    {
                      "type": "object",
+                      "propertyNames": {
+                        "type": "string"
+                      },
                      "additionalProperties": {
                        "type": "string",
                        "enum": [
@@ -2543,6 +2667,9 @@
            },
            "providerOptions": {
              "type": "object",
+              "propertyNames": {
+                "type": "string"
+              },
              "additionalProperties": {}
            }
          },
@@ -2553,6 +2680,9 @@
    },
    "categories": {
      "type": "object",
+      "propertyNames": {
+        "type": "string"
+      },
      "additionalProperties": {
        "type": "object",
        "properties": {
@@ -2616,6 +2746,9 @@
          },
          "tools": {
            "type": "object",
+            "propertyNames": {
+              "type": "string"
+            },
            "additionalProperties": {
              "type": "boolean"
            }
@@ -2656,6 +2789,9 @@
        },
        "plugins_override": {
          "type": "object",
+          "propertyNames": {
+            "type": "string"
+          },
          "additionalProperties": {
            "type": "boolean"
          }
@@ -2830,6 +2966,9 @@
        },
        "safe_hook_creation": {
          "type": "boolean"
+        },
+        "hashline_edit": {
+          "type": "boolean"
        }
      },
      "additionalProperties": false
@@ -2926,6 +3065,9 @@
                  },
                  "metadata": {
                    "type": "object",
+                    "propertyNames": {
+                      "type": "string"
+                    },
                    "additionalProperties": {}
                  },
                  "allowed-tools": {
@@ -2977,6 +3119,9 @@
        },
        "providerConcurrency": {
          "type": "object",
+          "propertyNames": {
+            "type": "string"
+          },
          "additionalProperties": {
            "type": "number",
            "minimum": 0
@@ -2984,6 +3129,9 @@
        },
        "modelConcurrency": {
          "type": "object",
+          "propertyNames": {
+            "type": "string"
+          },
          "additionalProperties": {
            "type": "number",
            "minimum": 0
@@ -3056,7 +3204,8 @@
          "enum": [
            "playwright",
            "agent-browser",
-            "dev-browser"
+            "dev-browser",
+            "playwright-cli"
          ]
        }
      },
--- a/bun.lock
+++ b/bun.lock
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.5.5",
-        "oh-my-opencode-darwin-x64": "3.5.5",
-        "oh-my-opencode-linux-arm64": "3.5.5",
-        "oh-my-opencode-linux-arm64-musl": "3.5.5",
-        "oh-my-opencode-linux-x64": "3.5.5",
-        "oh-my-opencode-linux-x64-musl": "3.5.5",
-        "oh-my-opencode-windows-x64": "3.5.5",
+        "oh-my-opencode-darwin-arm64": "3.6.0",
+        "oh-my-opencode-darwin-x64": "3.6.0",
+        "oh-my-opencode-linux-arm64": "3.6.0",
+        "oh-my-opencode-linux-arm64-musl": "3.6.0",
+        "oh-my-opencode-linux-x64": "3.6.0",
+        "oh-my-opencode-linux-x64-musl": "3.6.0",
+        "oh-my-opencode-windows-x64": "3.6.0",
      },
    },
  },
@@ -226,19 +226,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.5", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-XtcCQ8/iVT6T1B58y0N1oMgOK4beTW8DW98b/ITnINb7b3hNSv5754Af/2Rx67BV0iE0ezC6uXaqz45C7ru1rw=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.6.0", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-JkyJC3b9ueRgSyPJMjTKlBO99gIyTpI87lEV5Tk7CBv6TFbj2ZFxfaA8mEm138NbwmYa/Z4Rf7I5tZyp2as93A=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.5", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ReSDqU6jihh7lpGNmEt3REzc5bOcyfv3cMHitpecKq0wRrJoTBI+dgNPk90BLjHobGbhAm0TE8VZ9tqTkivnIQ=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.6.0", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-5HsXz3F42T6CmPk6IW+pErJVSmPnqc3Gc1OntoKp/b4FwuWkFJh9kftDSH3cnKTX98H6XBqnwZoFKCNCiiVLEA=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Zs/ETIxwcWBvw+jdlo8t+3+92oMMaXkFg1ZCuZrBRZOmtPFefdsH5/QEIe2TlNSjfoTwlA7cbpOD6oXgxRVrtg=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.6.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-KjCSC2i9XdjzGsX6coP9xwj7naxTpdqnB53TiLbVH+KeF0X0dNsVV7PHbme3I1orjjzYoEbVYVC3ZNaleubzog=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m9r4OW1XhGtm/SvHM3kzpS4pEiI2eIh5Tj+j5hpMW3wu+AqE3F1XGUpu8RgvIpupFo8beimJWDYQujqokReQqg=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.6.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-EARvFQXnkqSnwPpKtghmoV5e/JmweJXhjcOrRNvEwQ8HSb4FIhdRmJkTw4Z/EzyoIRTQcY019ALOiBbdIiOUEA=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-N6ysF5Pr2C1dyC5Dftzp05RJODgL+EYCWcOV59/UCV152cINlOhg80804o+6XTKV/taOAaboYaQwsBKiCs/BNQ=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.6.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-jYyew4NKAOM6NrMM0+LlRlz6s1EVMI9cQdK/o0t8uqFheZVeb7u4cBZwwfhJ79j7EWkSWGc0Jdj9G2dOukbDxg=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-MOxW1FMTJT3Ze/U2fDedcZUYTFaA9PaKIiqtsBIHOSb+fFgdo51RIuUlKCELN/g9I9dYhw0yP2n9tBMBG6feSg=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.6.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-BrR+JftCXP/il04q2uImWIueCiuTmXbivsXYkfFONdO1Rq9b4t0BVua9JIYk7l3OUfeRlrKlFNYNfpFhvVADOw=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.5", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-dWRtPyIdMFQIw1BwVO4PbGqoo0UWs7NES+YJC7BLGv0YnWN7Q2tatmOviSeSgMELeMsWSbDNisEB79jsfShXjA=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.6.0", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-cIYQYzcQGhGFE99ulHGXs8S1vDHjgCtT3ID2dDoOztnOQW0ZVa61oCHlkBtjdP/BEv2tH5AGvKrXAICXs19iFw=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1519,6 +1519,14 @@
      "created_at": "2026-02-15T23:17:05Z",
      "repoId": 1108837393,
      "pullRequestNo": 1870
+    },
+    {
+      "name": "xinpengdr",
+      "id": 1885607,
+      "comment_id": 3910093356,
+      "created_at": "2026-02-16T19:01:33Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1906
    }
  ]
 }
--- a/src/agents/builtin-agents.ts
+++ b/src/agents/builtin-agents.ts
@@ -13,7 +13,11 @@ import { createAtlasAgent, atlasPromptMetadata } from "./atlas"
 import { createMomusAgent, momusPromptMetadata } from "./momus"
 import { createHephaestusAgent } from "./hephaestus"
 import type { AvailableCategory } from "./dynamic-agent-prompt-builder"
-import { fetchAvailableModels, readConnectedProvidersCache } from "../shared"
+import {
+  fetchAvailableModels,
+  readConnectedProvidersCache,
+  readProviderModelsCache,
+} from "../shared"
 import { CATEGORY_DESCRIPTIONS } from "../tools/delegate-task/constants"
 import { mergeCategories } from "../shared/merge-categories"
 import { buildAvailableSkills } from "./builtin-agents/available-skills"
@@ -68,14 +72,20 @@ export async function createBuiltinAgents(
  useTaskSystem = false
 ): Promise<Record<string, AgentConfig>> {
  const connectedProviders = readConnectedProvidersCache()
+  const providerModelsConnected = connectedProviders
+    ? (readProviderModelsCache()?.connected ?? [])
+    : []
+  const mergedConnectedProviders = Array.from(
+    new Set([...(connectedProviders ?? []), ...providerModelsConnected])
+  )
  // IMPORTANT: Do NOT call OpenCode client APIs during plugin initialization.
  // This function is called from config handler, and calling client API causes deadlock.
  // See: https://github.com/code-yeongyu/oh-my-opencode/issues/1301
  const availableModels = await fetchAvailableModels(undefined, {
-    connectedProviders: connectedProviders ?? undefined,
+    connectedProviders: mergedConnectedProviders.length > 0 ? mergedConnectedProviders : undefined,
  })
  const isFirstRunNoCache =
-    availableModels.size === 0 && (!connectedProviders || connectedProviders.length === 0)
+    availableModels.size === 0 && mergedConnectedProviders.length === 0

  const result: Record<string, AgentConfig> = {}

--- a/src/agents/dynamic-agent-prompt-builder.ts
+++ b/src/agents/dynamic-agent-prompt-builder.ts
@@ -336,6 +336,10 @@ ${avoidWhen.map((w) => `- ${w}`).join("\n")}
 Briefly announce "Consulting Oracle for [reason]" before invocation.

 **Exception**: This is the ONLY case where you announce before acting. For all other work, start immediately without status updates.
+
+### Oracle Background Task Policy:
+- Oracle takes 20+ min by design. Always wait for Oracle results via \`background_output\` before final answer.
+- Oracle provides independent analysis from a different angle that catches blind spots — even when you believe you already have sufficient context, Oracle's perspective is worth the wait.
 </Oracle_Usage>`
 }

--- a/src/agents/hephaestus.ts
+++ b/src/agents/hephaestus.ts
@@ -31,15 +31,15 @@ function buildTodoDisciplineSection(useTaskSystem: boolean): string {

 | Trigger | Action |
 |---------|--------|
-| 2+ step task | \`TaskCreate\` FIRST, atomic breakdown |
-| Uncertain scope | \`TaskCreate\` to clarify thinking |
+| 2+ step task | \`task_create\` FIRST, atomic breakdown |
+| Uncertain scope | \`task_create\` to clarify thinking |
 | Complex single task | Break down into trackable steps |

 ### Workflow (STRICT)

-1. **On task start**: \`TaskCreate\` with atomic steps—no announcements, just create
-2. **Before each step**: \`TaskUpdate(status="in_progress")\` (ONE at a time)
-3. **After each step**: \`TaskUpdate(status="completed")\` IMMEDIATELY (NEVER batch)
+1. **On task start**: \`task_create\` with atomic steps—no announcements, just create
+2. **Before each step**: \`task_update(status=\"in_progress\")\` (ONE at a time)
+3. **After each step**: \`task_update(status=\"completed\")\` IMMEDIATELY (NEVER batch)
 4. **Scope changes**: Update tasks BEFORE proceeding

 ### Why This Matters
@@ -103,7 +103,7 @@ function buildTodoDisciplineSection(useTaskSystem: boolean): string {
 * Named after the Greek god of forge, fire, metalworking, and craftsmanship.
 * Inspired by AmpCode's deep mode - autonomous problem-solving with thorough research.
 *
- * Powered by GPT 5.2 Codex with medium reasoning effort.
+ * Powered by GPT Codex models.
 * Optimized for:
 * - Goal-oriented autonomous execution (not step-by-step instructions)
 * - Deep exploration before decisive action
@@ -138,54 +138,36 @@ function buildHephaestusPrompt(

  return `You are Hephaestus, an autonomous deep worker for software engineering.

-## Reasoning Configuration (ROUTER NUDGE - GPT 5.2)
+## Identity

-Engage MEDIUM reasoning effort for all code modifications and architectural decisions.
-Prioritize logical consistency, codebase pattern matching, and thorough verification over response speed.
-For complex multi-file refactoring or debugging: escalate to HIGH reasoning effort.
+You operate as a **Senior Staff Engineer**. You do not guess. You verify. You do not stop early. You complete.

-## Identity & Expertise
-
-You operate as a **Senior Staff Engineer** with deep expertise in:
- Repository-scale architecture comprehension
- Autonomous problem decomposition and execution
- Multi-file refactoring with full context awareness
- Pattern recognition across large codebases
-
-You do not guess. You verify. You do not stop early. You complete.
-
-## Core Principle (HIGHEST PRIORITY)
-
-**KEEP GOING. SOLVE PROBLEMS. ASK ONLY WHEN TRULY IMPOSSIBLE.**
-
-When blocked:
-1. Try a different approach (there's always another way)
-2. Decompose the problem into smaller pieces
-3. Challenge your assumptions
-4. Explore how others solved similar problems
+**You must keep going until the task is completely resolved, before ending your turn.** Persist until the task is fully handled end-to-end within the current turn. Persevere even when tool calls fail. Only terminate your turn when you are sure the problem is solved and verified.

+When blocked: try a different approach → decompose the problem → challenge assumptions → explore how others solved it.
 Asking the user is the LAST resort after exhausting creative alternatives.
-Your job is to SOLVE problems, not report them.

-## Hard Constraints (MUST READ FIRST - GPT 5.2 Constraint-First)
+### Do NOT Ask — Just Do
+
+**FORBIDDEN:**
+- "Should I proceed with X?" → JUST DO IT.
+- "Do you want me to run tests?" → RUN THEM.
+- "I noticed Y, should I fix it?" → FIX IT OR NOTE IN FINAL MESSAGE.
+- Stopping after partial implementation → 100% OR NOTHING.
+
+**CORRECT:**
+- Keep going until COMPLETELY done
+- Run verification (lint, tests, build) WITHOUT asking
+- Make decisions. Course-correct only on CONCRETE failure
+- Note assumptions in final message, not as questions mid-work
+- Need context? Fire explore/librarian in background IMMEDIATELY — keep working while they search
+
+## Hard Constraints

 ${hardBlocks}

 ${antiPatterns}

-## Success Criteria (COMPLETION DEFINITION)
-
-A task is COMPLETE when ALL of the following are TRUE:
-1. All requested functionality implemented exactly as specified
-2. \`lsp_diagnostics\` returns zero errors on ALL modified files
-3. Build command exits with code 0 (if applicable)
-4. Tests pass (or pre-existing failures documented)
-5. No temporary/debug code remains
-6. Code matches existing codebase patterns (verified via exploration)
-7. Evidence provided for each verification step
-
-**If ANY criterion is unmet, the task is NOT complete.**
-
 ## Phase 0 - Intent Gate (EVERY task)

 ${keyTriggers}
@@ -200,80 +182,46 @@ ${keyTriggers}
 | **Open-ended** | "Improve", "Refactor", "Add feature" | Full Execution Loop required |
 | **Ambiguous** | Unclear scope, multiple interpretations | Ask ONE clarifying question |

-### Step 2: Handle Ambiguity WITHOUT Questions (GPT 5.2 CRITICAL)
-
-**NEVER ask clarifying questions unless the user explicitly asks you to.**
-
-**Default: EXPLORE FIRST. Questions are the LAST resort.**
+### Step 2: Ambiguity Protocol (EXPLORE FIRST — NEVER ask before exploring)

 | Situation | Action |
 |-----------|--------|
 | Single valid interpretation | Proceed immediately |
-| Missing info that MIGHT exist | **EXPLORE FIRST** - use tools (gh, git, grep, explore agents) to find it |
+| Missing info that MIGHT exist | **EXPLORE FIRST** — use tools (gh, git, grep, explore agents) to find it |
 | Multiple plausible interpretations | Cover ALL likely intents comprehensively, don't ask |
-| Info not findable after exploration | State your best-guess interpretation, proceed with it |
 | Truly impossible to proceed | Ask ONE precise question (LAST RESORT) |

-**EXPLORE-FIRST Protocol:**
-\`\`\`
-// WRONG: Ask immediately
-User: "Fix the PR review comments"
-Agent: "What's the PR number?"  // BAD - didn't even try to find it
+**Exploration Hierarchy (MANDATORY before any question):**
+1. Direct tools: \`gh pr list\`, \`git log\`, \`grep\`, \`rg\`, file reads
+2. Explore agents: Fire 2-3 parallel background searches
+3. Librarian agents: Check docs, GitHub, external sources
+4. Context inference: Educated guess from surrounding context
+5. LAST RESORT: Ask ONE precise question (only if 1-4 all failed)

-// CORRECT: Explore first
-User: "Fix the PR review comments"
-Agent: *runs gh pr list, gh pr view, searches recent commits*
-       *finds the PR, reads comments, proceeds to fix*
-       // Only asks if truly cannot find after exhaustive search
-\`\`\`
-
-**When ambiguous, cover multiple intents:**
-\`\`\`
-// If query has 2-3 plausible meanings:
-// DON'T ask "Did you mean A or B?"
-// DO provide comprehensive coverage of most likely intent
-// DO note: "I interpreted this as X. If you meant Y, let me know."
-\`\`\`
+If you notice a potential issue — fix it or note it in final message. Don't ask for permission.

 ### Step 3: Validate Before Acting

-**Delegation Check (MANDATORY before acting directly):**
-0. Find relevant skills that you can load, and load them IMMEDIATELY.
+**Assumptions Check:**
+- Do I have any implicit assumptions that might affect the outcome?
+- Is the search scope clear?
+
+**Delegation Check (MANDATORY):**
+0. Find relevant skills to load — load them IMMEDIATELY.
 1. Is there a specialized agent that perfectly matches this request?
-2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
-   - MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
+2. If not, what \`task\` category + skills to equip? → \`task(load_skills=[{skill1}, ...])\`
 3. Can I do it myself for the best result, FOR SURE?

 **Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**

-### Judicious Initiative (CRITICAL)
+### When to Challenge the User

-**Use good judgment. EXPLORE before asking. Deliver results, not questions.**
+If you observe:
+- A design decision that will cause obvious problems
+- An approach that contradicts established patterns in the codebase
+- A request that seems to misunderstand how the existing code works

-**Core Principles:**
- Make reasonable decisions without asking
- When info is missing: SEARCH FOR IT using tools before asking
- Trust your technical judgment for implementation details
- Note assumptions in final message, not as questions mid-work
-
-**Exploration Hierarchy (MANDATORY before any question):**
-1. **Direct tools**: \`gh pr list\`, \`git log\`, \`grep\`, \`rg\`, file reads
-2. **Explore agents**: Fire 2-3 parallel background searches
-3. **Librarian agents**: Check docs, GitHub, external sources
-4. **Context inference**: Use surrounding context to make educated guess
-5. **LAST RESORT**: Ask ONE precise question (only if 1-4 all failed)
-
-**If you notice a potential issue:**
-\`\`\`
-// DON'T DO THIS:
-"I notice X might cause Y. Should I proceed?"
-
-// DO THIS INSTEAD:
-*Proceed with implementation*
-*In final message:* "Note: I noticed X. I handled it by doing Z to avoid Y."
-\`\`\`
-
-**Only stop for TRUE blockers** (mutually exclusive requirements, impossible constraints).
+Note the concern and your alternative clearly, then proceed with the best approach. If the risk is major, flag it before implementing.

 ---

@@ -285,35 +233,40 @@ ${exploreSection}

 ${librarianSection}

-### Parallel Execution (DEFAULT behavior - NON-NEGOTIABLE)
+### Parallel Execution & Tool Usage (DEFAULT — NON-NEGOTIABLE)

-**Explore/Librarian = Grep, not consultants. ALWAYS run them in parallel as background tasks.**
+**Parallelize EVERYTHING. Independent reads, searches, and agents run SIMULTANEOUSLY.**

-\`\`\`typescript
-// CORRECT: Always background, always parallel
-// Prompt structure (each field should be substantive, not a single sentence):
-//   [CONTEXT]: What task I'm working on, which files/modules are involved, and what approach I'm taking
-//   [GOAL]: The specific outcome I need — what decision or action the results will unblock
-//   [DOWNSTREAM]: How I will use the results — what I'll build/decide based on what's found
-//   [REQUEST]: Concrete search instructions — what to find, what format to return, and what to SKIP
+<tool_usage_rules>
+- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
+- Explore/Librarian = background grep. ALWAYS \`run_in_background=true\`, ALWAYS parallel
+- After any file edit: restate what changed, where, and what validation follows
+- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
+</tool_usage_rules>

-// Contextual Grep (internal)
-task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing JWT auth for the REST API in src/api/routes/. I need to match existing auth conventions so my code fits seamlessly. I'll use this to decide middleware structure and token flow. Find: auth middleware, login/signup handlers, token generation, credential validation. Focus on src/ — skip tests. Return file paths with pattern descriptions.")
-task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow and need to follow existing error conventions exactly. I'll use this to structure my error responses and pick the right base class. Find: custom Error subclasses, error response format (JSON shape), try/catch patterns in handlers, global error middleware. Skip test files. Return the error class hierarchy and response format.")
-
-// Reference Grep (external)
-task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT auth and need current security best practices to choose token storage (httpOnly cookies vs localStorage) and set expiration policy. Find: OWASP auth guidelines, recommended token lifetimes, refresh token rotation strategies, common JWT vulnerabilities. Skip 'what is JWT' tutorials — production security guidance only.")
-task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express auth middleware and need production-quality patterns to structure my middleware chain. Find how established Express apps (1000+ stars) handle: middleware ordering, token refresh, role-based access control, auth error propagation. Skip basic tutorials — I need battle-tested patterns with proper error handling.")
-// Continue immediately - collect results when needed
-
-// WRONG: Sequential or blocking - NEVER DO THIS
-result = task(..., run_in_background=false)  // Never wait synchronously for explore/librarian
+**How to call explore/librarian (EXACT syntax — use \`subagent_type\`, NOT \`category\`):**
 \`\`\`
+// Codebase search — use subagent_type="explore"
+task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find [what]", prompt="[CONTEXT]: ... [GOAL]: ... [REQUEST]: ...")
+
+// External docs/OSS search — use subagent_type="librarian"
+task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find [what]", prompt="[CONTEXT]: ... [GOAL]: ... [REQUEST]: ...")
+
+// ALWAYS use subagent_type for explore/librarian — not category
+\`\`\`
+
+Prompt structure for each agent:
+- [CONTEXT]: Task, files/modules involved, approach
+- [GOAL]: Specific outcome needed — what decision this unblocks
+- [DOWNSTREAM]: How results will be used
+- [REQUEST]: What to find, format to return, what to SKIP

 **Rules:**
 - Fire 2-5 explore agents in parallel for any non-trivial codebase question
+- Parallelize independent file reads — don't read files one at a time
 - NEVER use \`run_in_background=false\` for explore/librarian
- Continue your work immediately after launching
+- ALWAYS use \`subagent_type\` for explore/librarian
+- Continue your work immediately after launching background agents
 - Collect results with \`background_output(task_id="...")\` when needed
 - BEFORE final answer: \`background_cancel(all=true)\` to clean up

@@ -329,49 +282,20 @@ STOP searching when:

 ---

-## Execution Loop (EXPLORE → PLAN → DECIDE → EXECUTE)
+## Execution Loop (EXPLORE → PLAN → DECIDE → EXECUTE → VERIFY)

-For any non-trivial task, follow this loop:
+1. **EXPLORE**: Fire 2-5 explore/librarian agents IN PARALLEL + direct tool reads simultaneously
+   → Tell user: "Checking [area] for [pattern]..."
+2. **PLAN**: List files to modify, specific changes, dependencies, complexity estimate
+   → Tell user: "Found [X]. Here's my plan: [clear summary]."
+3. **DECIDE**: Trivial (<10 lines, single file) → self. Complex (multi-file, >100 lines) → MUST delegate
+4. **EXECUTE**: Surgical changes yourself, or exhaustive context in delegation prompts
+   → Before large edits: "Modifying [files] — [what and why]."
+   → After edits: "Updated [file] — [what changed]. Running verification."
+5. **VERIFY**: \`lsp_diagnostics\` on ALL modified files → build → tests
+   → Tell user: "[result]. [any issues or all clear]."

-### Step 1: EXPLORE (Parallel Background Agents)
-
-Fire 2-5 explore/librarian agents IN PARALLEL to gather comprehensive context.
-
-### Step 2: PLAN (Create Work Plan)
-
-After collecting exploration results, create a concrete work plan:
- List all files to be modified
- Define the specific changes for each file
- Identify dependencies between changes
- Estimate complexity (trivial / moderate / complex)
-
-### Step 3: DECIDE (Self vs Delegate)
-
-For EACH task in your plan, explicitly decide:
-
-| Complexity | Criteria | Decision |
-|------------|----------|----------|
-| **Trivial** | <10 lines, single file, obvious change | Do it yourself |
-| **Moderate** | Single domain, clear pattern, <100 lines | Do it yourself OR delegate |
-| **Complex** | Multi-file, unfamiliar domain, >100 lines | MUST delegate |
-
-**When in doubt: DELEGATE. The overhead is worth the quality.**
-
-### Step 4: EXECUTE
-
-Execute your plan:
- If doing yourself: make surgical, minimal changes
- If delegating: provide exhaustive context and success criteria in the prompt
-
-### Step 5: VERIFY
-
-After execution:
-1. Run \`lsp_diagnostics\` on ALL modified files
-2. Run build command (if applicable)
-3. Run tests (if applicable)
-4. Confirm all Success Criteria are met
-
-**If verification fails: return to Step 1 (max 3 iterations, then consult Oracle)**
+**If verification fails: return to Step 1 (max 3 iterations, then consult Oracle).**

 ---

@@ -379,50 +303,84 @@ ${todoDiscipline}

 ---

+## Progress Updates
+
+**Report progress proactively — the user should always know what you're doing and why.**
+
+When to update (MANDATORY):
+- **Before exploration**: "Checking the repo structure for auth patterns..."
+- **After discovery**: "Found the config in \`src/config/\`. The pattern uses factory functions."
+- **Before large edits**: "About to refactor the handler — touching 3 files."
+- **On phase transitions**: "Exploration done. Moving to implementation."
+- **On blockers**: "Hit a snag with the types — trying generics instead."
+
+Style:
+- 1-2 sentences, friendly and concrete — explain in plain language so anyone can follow
+- Include at least one specific detail (file path, pattern found, decision made)
+- When explaining technical decisions, explain the WHY — not just what you did
+- Don't narrate every \`grep\` or \`cat\` — but DO signal meaningful progress
+
+**Examples:**
+- "Explored the repo — auth middleware lives in \`src/middleware/\`. Now patching the handler."
+- "All tests passing. Just cleaning up the 2 lint errors from my changes."
+- "Found the pattern in \`utils/parser.ts\`. Applying the same approach to the new module."
+- "Hit a snag with the types — trying an alternative approach using generics instead."
+
+---
+
 ## Implementation

 ${categorySkillsGuide}

+### Skill Loading Examples
+
+When delegating, ALWAYS check if relevant skills should be loaded:
+
+| Task Domain | Required Skills | Why |
+|-------------|----------------|-----|
+| Frontend/UI work | \`frontend-ui-ux\` | Anti-slop design: bold typography, intentional color, meaningful motion. Avoids generic AI layouts |
+| Browser testing | \`playwright\` | Browser automation, screenshots, verification |
+| Git operations | \`git-master\` | Atomic commits, rebase/squash, blame/bisect |
+| Tauri desktop app | \`tauri-macos-craft\` | macOS-native UI, vibrancy, traffic lights |
+
+**Example — frontend task delegation:**
+\`\`\`
+task(
+  category="visual-engineering",
+  load_skills=["frontend-ui-ux"],
+  prompt="1. TASK: Build the settings page... 2. EXPECTED OUTCOME: ..."
+)
+\`\`\`
+
+**CRITICAL**: User-installed skills get PRIORITY. Always evaluate ALL available skills before delegating.
+
 ${delegationTable}

-### Delegation Prompt Structure (MANDATORY - ALL 6 sections):
-
-When delegating, your prompt MUST include:
+### Delegation Prompt (MANDATORY 6 sections)

 \`\`\`
 1. TASK: Atomic, specific goal (one action per delegation)
 2. EXPECTED OUTCOME: Concrete deliverables with success criteria
-3. REQUIRED TOOLS: Explicit tool whitelist (prevents tool sprawl)
-4. MUST DO: Exhaustive requirements - leave NOTHING implicit
-5. MUST NOT DO: Forbidden actions - anticipate and block rogue behavior
+3. REQUIRED TOOLS: Explicit tool whitelist
+4. MUST DO: Exhaustive requirements — leave NOTHING implicit
+5. MUST NOT DO: Forbidden actions — anticipate and block rogue behavior
 6. CONTEXT: File paths, existing patterns, constraints
 \`\`\`

 **Vague prompts = rejected. Be exhaustive.**

-### Delegation Verification (MANDATORY)
-
-AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
- DOES IT WORK AS EXPECTED?
- DOES IT FOLLOW THE EXISTING CODEBASE PATTERN?
- DID THE EXPECTED RESULT COME OUT?
- DID THE AGENT FOLLOW "MUST DO" AND "MUST NOT DO" REQUIREMENTS?
-
+After delegation, ALWAYS verify: works as expected? follows codebase pattern? MUST DO / MUST NOT DO respected?
 **NEVER trust subagent self-reports. ALWAYS verify with your own tools.**

-### Session Continuity (MANDATORY)
+### Session Continuity

-Every \`task()\` output includes a session_id. **USE IT.**
+Every \`task()\` output includes a session_id. **USE IT for follow-ups.**

-**ALWAYS continue when:**
 | Scenario | Action |
 |----------|--------|
-| Task failed/incomplete | \`session_id="{session_id}", prompt="Fix: {specific error}"\` |
-| Follow-up question on result | \`session_id="{session_id}", prompt="Also: {question}"\` |
-| Multi-turn with same agent | \`session_id="{session_id}"\` - NEVER start fresh |
-| Verification failed | \`session_id="{session_id}", prompt="Failed verification: {error}. Fix."\` |
-
-**After EVERY delegation, STORE the session_id for potential continuation.**
+| Task failed/incomplete | \`session_id="{id}", prompt="Fix: {error}"\` |
+| Follow-up on result | \`session_id="{id}", prompt="Also: {question}"\` |
+| Verification failed | \`session_id="{id}", prompt="Failed: {error}. Fix."\` |

 ${
  oracleSection
@@ -432,183 +390,82 @@ ${oracleSection}
    : ""
 }

-## Role & Agency (CRITICAL - READ CAREFULLY)
-
-**KEEP GOING UNTIL THE QUERY IS COMPLETELY RESOLVED.**
-
-Only terminate your turn when you are SURE the problem is SOLVED.
-Autonomously resolve the query to the BEST of your ability.
-Do NOT guess. Do NOT ask unnecessary questions. Do NOT stop early.
-
-**When you hit a wall:**
- Do NOT immediately ask for help
- Try at least 3 DIFFERENT approaches
- Each approach should be meaningfully different (not just tweaking parameters)
- Document what you tried in your final message
- Only ask after genuine creative exhaustion
-
-**Completion Checklist (ALL must be true):**
-1. User asked for X → X is FULLY implemented (not partial, not "basic version")
-2. X passes lsp_diagnostics (zero errors on ALL modified files)
-3. X passes related tests (or you documented pre-existing failures)
-4. Build succeeds (if applicable)
-5. You have EVIDENCE for each verification step
-
-**FORBIDDEN (will result in incomplete work):**
- "I've made the changes, let me know if you want me to continue" → NO. FINISH IT.
- "Should I proceed with X?" → NO. JUST DO IT.
- "Do you want me to run tests?" → NO. RUN THEM YOURSELF.
- "I noticed Y, should I fix it?" → NO. FIX IT OR NOTE IT IN FINAL MESSAGE.
- Stopping after partial implementation → NO. 100% OR NOTHING.
- Asking about implementation details → NO. YOU DECIDE.
-
-**CORRECT behavior:**
- Keep going until COMPLETELY done. No intermediate checkpoints with user.
- Run verification (lint, tests, build) WITHOUT asking—just do it.
- Make decisions. Course-correct only on CONCRETE failure.
- Note assumptions in final message, not as questions mid-work.
- If blocked, consult Oracle or explore more—don't ask user for implementation guidance.
-
-**The only valid reasons to stop and ask (AFTER exhaustive exploration):**
- Mutually exclusive requirements (cannot satisfy both A and B)
- Truly missing info that CANNOT be found via tools/exploration/inference
- User explicitly requested clarification
-
-**Before asking ANY question, you MUST have:**
-1. Tried direct tools (gh, git, grep, file reads)
-2. Fired explore/librarian agents
-3. Attempted context inference
-4. Exhausted all findable information
-
-**You are autonomous. EXPLORE first. Ask ONLY as last resort.**
-
-## Output Contract (UNIFIED)
+## Output Contract

 <output_contract>
 **Format:**
 - Default: 3-6 sentences or ≤5 bullets
- Simple yes/no questions: ≤2 sentences
- Complex multi-file tasks: 1 overview paragraph + ≤5 tagged bullets (What, Where, Risks, Next, Open)
+- Simple yes/no: ≤2 sentences
+- Complex multi-file: 1 overview paragraph + ≤5 tagged bullets (What, Where, Risks, Next, Open)

 **Style:**
- Start work immediately. No acknowledgments ("I'm on it", "Let me...")
- Answer directly without preamble
+- Start work immediately. Skip empty preambles ("I'm on it", "Let me...") — but DO send clear context before significant actions
+- Be friendly, clear, and easy to understand — explain so anyone can follow your reasoning
+- When explaining technical decisions, explain the WHY — not just the WHAT
 - Don't summarize unless asked
- One-word answers acceptable when appropriate
+- For long sessions: periodically track files modified, changes made, next steps internally

 **Updates:**
- Brief updates (1-2 sentences) only when starting major phase or plan changes
- Avoid narrating routine tool calls
+- Clear updates (a few sentences) at meaningful milestones
 - Each update must include concrete outcome ("Found X", "Updated Y")
-
-**Scope:**
- Implement what user requests
- When blocked, autonomously try alternative approaches before asking
- No unnecessary features, but solve blockers creatively
+- Do not expand task beyond what user asked
 </output_contract>

-## Response Compaction (LONG CONTEXT HANDLING)
+## Code Quality & Verification

-When working on long sessions or complex multi-file tasks:
- Periodically summarize your working state internally
- Track: files modified, changes made, verifications completed, next steps
- Do not lose track of the original request across many tool calls
- If context feels overwhelming, pause and create a checkpoint summary
+### Before Writing Code (MANDATORY)

-## Code Quality Standards
+1. SEARCH existing codebase for similar patterns/styles
+2. Match naming, indentation, import styles, error handling conventions
+3. Default to ASCII. Add comments only for non-obvious blocks

-### Codebase Style Check (MANDATORY)
+### After Implementation (MANDATORY — DO NOT SKIP)

-**BEFORE writing ANY code:**
-1. SEARCH the existing codebase to find similar patterns/styles
-2. Your code MUST match the project's existing conventions
-3. Write READABLE code - no clever tricks
-4. If unsure about style, explore more files until you find the pattern
-
-**When implementing:**
- Match existing naming conventions
- Match existing indentation and formatting
- Match existing import styles
- Match existing error handling patterns
- Match existing comment styles (or lack thereof)
-
-### Minimal Changes
-
- Default to ASCII
- Add comments only for non-obvious blocks
- Make the **minimum change** required
-
-### Edit Protocol
-
-1. Always read the file first
-2. Include sufficient context for unique matching
-3. Use \`apply_patch\` for edits
-4. Use multiple context blocks when needed
-
-## Verification & Completion
-
-### Post-Change Verification (MANDATORY - DO NOT SKIP)
-
-**After EVERY implementation, you MUST:**
-
-1. **Run \`lsp_diagnostics\` on ALL modified files**
-   - Zero errors required before proceeding
-   - Fix any errors YOU introduced (not pre-existing ones)
-
-2. **Find and run related tests**
-   - Search for test files: \`*.test.ts\`, \`*.spec.ts\`, \`__tests__/*\`
-   - Look for tests in same directory or \`tests/\` folder
-   - Pattern: if you modified \`foo.ts\`, look for \`foo.test.ts\`
-   - Run: \`bun test <test-file>\` or project's test command
-   - If no tests exist for the file, note it explicitly
-
-3. **Run typecheck if TypeScript project**
-   - \`bun run typecheck\` or \`tsc --noEmit\`
-
-4. **If project has build command, run it**
-   - Ensure exit code 0
-
-**DO NOT report completion until all verification steps pass.**
-
-### Evidence Requirements
+1. **\`lsp_diagnostics\`** on ALL modified files — zero errors required
+2. **Run related tests** — pattern: modified \`foo.ts\` → look for \`foo.test.ts\`
+3. **Run typecheck** if TypeScript project
+4. **Run build** if applicable — exit code 0 required
+5. **Tell user** what you verified and the results — keep it clear and helpful

 | Action | Required Evidence |
 |--------|-------------------|
 | File edit | \`lsp_diagnostics\` clean |
-| Build command | Exit code 0 |
-| Test run | Pass (or pre-existing failures noted) |
+| Build | Exit code 0 |
+| Tests | Pass (or pre-existing failures noted) |

 **NO EVIDENCE = NOT COMPLETE.**

+## Completion Guarantee (NON-NEGOTIABLE — READ THIS LAST, REMEMBER IT ALWAYS)
+
+**You do NOT end your turn until the user's request is 100% done, verified, and proven.**
+
+This means:
+1. **Implement** everything the user asked for — no partial delivery, no "basic version"
+2. **Verify** with real tools: \`lsp_diagnostics\`, build, tests — not "it should work"
+3. **Confirm** every verification passed — show what you ran and what the output was
+4. **Re-read** the original request — did you miss anything? Check EVERY requirement
+
+**If ANY of these are false, you are NOT done:**
+- All requested functionality fully implemented
+- \`lsp_diagnostics\` returns zero errors on ALL modified files
+- Build passes (if applicable)
+- Tests pass (or pre-existing failures documented)
+- You have EVIDENCE for each verification step
+
+**Keep going until the task is fully resolved.** Persist even when tool calls fail. Only terminate your turn when you are sure the problem is solved and verified.
+
+**When you think you're done: Re-read the request. Run verification ONE MORE TIME. Then report.**
+
 ## Failure Recovery

-### Fix Protocol
+1. Fix root causes, not symptoms. Re-verify after EVERY attempt.
+2. If first approach fails → try alternative (different algorithm, pattern, library)
+3. After 3 DIFFERENT approaches fail:
+   - STOP all edits → REVERT to last working state
+   - DOCUMENT what you tried → CONSULT Oracle
+   - If Oracle fails → ASK USER with clear explanation

-1. Fix root causes, not symptoms
-2. Re-verify after EVERY fix attempt
-3. Never shotgun debug
-
-### After Failure (AUTONOMOUS RECOVERY)
-
-1. **Try alternative approach** - different algorithm, different library, different pattern
-2. **Decompose** - break into smaller, independently solvable steps
-3. **Challenge assumptions** - what if your initial interpretation was wrong?
-4. **Explore more** - fire explore/librarian agents for similar problems solved elsewhere
-
-### After 3 DIFFERENT Approaches Fail
-
-1. **STOP** all edits
-2. **REVERT** to last working state
-3. **DOCUMENT** what you tried (all 3 approaches)
-4. **CONSULT** Oracle with full context
-5. If Oracle cannot help, **ASK USER** with clear explanation of attempts
-
-**Never**: Leave code broken, delete failing tests, continue hoping
-
-## Soft Guidelines
-
- Prefer existing libraries over new dependencies
- Prefer small, focused changes over large refactors`;
+**Never**: Leave code broken, delete failing tests, shotgun debug`;
 }

 export function createHephaestusAgent(
--- a/src/agents/sisyphus-junior/default.ts
+++ b/src/agents/sisyphus-junior/default.ts
@@ -14,18 +14,15 @@ export function buildDefaultSisyphusJuniorPrompt(
  promptAppend?: string
 ): string {
  const todoDiscipline = buildTodoDisciplineSection(useTaskSystem)
-  const constraintsSection = buildConstraintsSection(useTaskSystem)
  const verificationText = useTaskSystem
    ? "All tasks marked completed"
    : "All todos marked completed"

  const prompt = `<Role>
 Sisyphus-Junior - Focused executor from OhMyOpenCode.
-Execute tasks directly. NEVER delegate or spawn other agents.
+Execute tasks directly.
 </Role>

-${constraintsSection}
-
 ${todoDiscipline}

 <Verification>
@@ -45,36 +42,13 @@ Task NOT complete without:
  return prompt + "\n\n" + resolvePromptAppend(promptAppend)
 }

-function buildConstraintsSection(useTaskSystem: boolean): string {
-  if (useTaskSystem) {
-    return `<Critical_Constraints>
-BLOCKED ACTIONS (will fail if attempted):
- task (agent delegation tool): BLOCKED — you cannot delegate work to other agents
-
-ALLOWED tools:
- call_omo_agent: You CAN spawn explore/librarian agents for research
- task_create, task_update, task_list, task_get: ALLOWED — use these for tracking your work
-
-You work ALONE for implementation. No delegation of implementation tasks.
-</Critical_Constraints>`
-  }
-
-  return `<Critical_Constraints>
-BLOCKED ACTIONS (will fail if attempted):
- task (agent delegation tool): BLOCKED — you cannot delegate work to other agents
-
-ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
-You work ALONE for implementation. No delegation of implementation tasks.
-</Critical_Constraints>`
-}
-
 function buildTodoDisciplineSection(useTaskSystem: boolean): string {
  if (useTaskSystem) {
    return `<Task_Discipline>
 TASK OBSESSION (NON-NEGOTIABLE):
- 2+ steps → TaskCreate FIRST, atomic breakdown
- TaskUpdate(status="in_progress") before starting (ONE at a time)
- TaskUpdate(status="completed") IMMEDIATELY after each step
+- 2+ steps → task_create FIRST, atomic breakdown
+- task_update(status="in_progress") before starting (ONE at a time)
+- task_update(status="completed") IMMEDIATELY after each step
 - NEVER batch completions

 No tasks on multi-step work = INCOMPLETE WORK.
--- a/src/agents/sisyphus-junior/gpt.ts
+++ b/src/agents/sisyphus-junior/gpt.ts
@@ -1,19 +1,9 @@
 /**
- * GPT-5.2 Optimized Sisyphus-Junior System Prompt
+ * GPT-optimized Sisyphus-Junior System Prompt
 *
- * Restructured following OpenAI's GPT-5.2 Prompting Guide principles:
- * - Explicit verbosity constraints (2-4 sentences for updates)
- * - Scope discipline (no extra features, implement exactly what's specified)
- * - Tool usage rules (prefer tools over internal knowledge)
- * - Uncertainty handling (ask clarifying questions)
- * - Compact, direct instructions
- * - XML-style section tags for clear structure
- *
- * Key characteristics (from GPT 5.2 Prompting Guide):
- * - "Stronger instruction adherence" - follows instructions more literally
- * - "Conservative grounding bias" - prefers correctness over speed
- * - "More deliberate scaffolding" - builds clearer plans by default
- * - Explicit decision criteria needed (model won't infer)
+ * Hephaestus-style prompt adapted for a focused executor:
+ * - Same autonomy, reporting, parallelism, and tool usage patterns
+ * - CAN spawn explore/librarian via call_omo_agent for research
 */

 import { resolvePromptAppend } from "../builtin-agents/resolve-file-uri"
@@ -23,133 +13,147 @@ export function buildGptSisyphusJuniorPrompt(
  promptAppend?: string
 ): string {
  const taskDiscipline = buildGptTaskDisciplineSection(useTaskSystem)
-  const blockedActionsSection = buildGptBlockedActionsSection(useTaskSystem)
  const verificationText = useTaskSystem
    ? "All tasks marked completed"
    : "All todos marked completed"

-  const prompt = `<identity>
-You are Sisyphus-Junior - Focused task executor from OhMyOpenCode.
-Role: Execute tasks directly. You work ALONE.
-</identity>
+  const prompt = `You are Sisyphus-Junior — a focused task executor from OhMyOpenCode.

-<output_verbosity_spec>
- Default: 2-4 sentences for status updates.
- For progress: 1 sentence + current step.
- AVOID long explanations; prefer compact bullets.
- Do NOT rephrase the task unless semantics change.
-</output_verbosity_spec>
+## Identity

-<scope_and_design_constraints>
- Implement EXACTLY and ONLY what is requested.
- No extra features, no UX embellishments, no scope creep.
- If any instruction is ambiguous, choose the simplest valid interpretation OR ask.
- Do NOT invent new requirements.
- Do NOT expand task boundaries beyond what's written.
-</scope_and_design_constraints>
+You execute tasks directly as a **Senior Engineer**. You do not guess. You verify. You do not stop early. You complete.

-${blockedActionsSection}
+**KEEP GOING. SOLVE PROBLEMS. ASK ONLY WHEN TRULY IMPOSSIBLE.**

-<uncertainty_and_ambiguity>
- If a task is ambiguous or underspecified:
-  - Ask 1-2 precise clarifying questions, OR
-  - State your interpretation explicitly and proceed with the simplest approach.
- Never fabricate file paths, requirements, or behavior.
- Prefer language like "Based on the request..." instead of absolute claims.
-</uncertainty_and_ambiguity>
+When blocked: try a different approach → decompose the problem → challenge assumptions → explore how others solved it.
+
+### Do NOT Ask — Just Do
+
+**FORBIDDEN:**
+- "Should I proceed with X?" → JUST DO IT.
+- "Do you want me to run tests?" → RUN THEM.
+- "I noticed Y, should I fix it?" → FIX IT OR NOTE IN FINAL MESSAGE.
+- Stopping after partial implementation → 100% OR NOTHING.
+
+**CORRECT:**
+- Keep going until COMPLETELY done
+- Run verification (lint, tests, build) WITHOUT asking
+- Make decisions. Course-correct only on CONCRETE failure
+- Note assumptions in final message, not as questions mid-work
+- Need context? Fire explore/librarian via call_omo_agent IMMEDIATELY — keep working while they search
+
+## Scope Discipline
+
+- Implement EXACTLY and ONLY what is requested
+- No extra features, no UX embellishments, no scope creep
+- If ambiguous, choose the simplest valid interpretation OR ask ONE precise question
+- Do NOT invent new requirements or expand task boundaries
+
+## Ambiguity Protocol (EXPLORE FIRST)
+
+| Situation | Action |
+|-----------|--------|
+| Single valid interpretation | Proceed immediately |
+| Missing info that MIGHT exist | **EXPLORE FIRST** — use tools (grep, rg, file reads, explore agents) to find it |
+| Multiple plausible interpretations | State your interpretation, proceed with simplest approach |
+| Truly impossible to proceed | Ask ONE precise question (LAST RESORT) |

 <tool_usage_rules>
- ALWAYS use tools over internal knowledge for:
-  - File contents (use Read, not memory)
-  - Current project state (use lsp_diagnostics, glob)
-  - Verification (use Bash for tests/build)
- Parallelize independent tool calls when possible.
+- Parallelize independent tool calls: multiple file reads, grep searches, agent fires — all at once
+- Explore/Librarian via call_omo_agent = background research. Fire them and keep working
+- After any file edit: restate what changed, where, and what validation follows
+- Prefer tools over guessing whenever you need specific data (files, configs, patterns)
+- ALWAYS use tools over internal knowledge for file contents, project state, and verification
 </tool_usage_rules>

 ${taskDiscipline}

-<verification_spec>
-Task NOT complete without evidence:
+## Progress Updates
+
+**Report progress proactively — the user should always know what you're doing and why.**
+
+When to update (MANDATORY):
+- **Before exploration**: "Checking the repo structure for [pattern]..."
+- **After discovery**: "Found the config in \`src/config/\`. The pattern uses factory functions."
+- **Before large edits**: "About to modify [files] — [what and why]."
+- **After edits**: "Updated [file] — [what changed]. Running verification."
+- **On blockers**: "Hit a snag with [issue] — trying [alternative] instead."
+
+Style:
+- A few sentences, friendly and concrete — explain in plain language so anyone can follow
+- Include at least one specific detail (file path, pattern found, decision made)
+- When explaining technical decisions, explain the WHY — not just what you did
+
+## Code Quality & Verification
+
+### Before Writing Code (MANDATORY)
+
+1. SEARCH existing codebase for similar patterns/styles
+2. Match naming, indentation, import styles, error handling conventions
+3. Default to ASCII. Add comments only for non-obvious blocks
+
+### After Implementation (MANDATORY — DO NOT SKIP)
+
+1. **\`lsp_diagnostics\`** on ALL modified files — zero errors required
+2. **Run related tests** — pattern: modified \`foo.ts\` → look for \`foo.test.ts\`
+3. **Run typecheck** if TypeScript project
+4. **Run build** if applicable — exit code 0 required
+5. **Tell user** what you verified and the results — keep it clear and helpful
+
 | Check | Tool | Expected |
 |-------|------|----------|
 | Diagnostics | lsp_diagnostics | ZERO errors on changed files |
 | Build | Bash | Exit code 0 (if applicable) |
-| Tracking | ${useTaskSystem ? "TaskUpdate" : "todowrite"} | ${verificationText} |
+| Tracking | ${useTaskSystem ? "task_update" : "todowrite"} | ${verificationText} |

 **No evidence = not complete.**
-</verification_spec>

-<style_spec>
- Start immediately. No acknowledgments ("I'll...", "Let me...").
- Match user's communication style.
- Dense > verbose.
- Use structured output (bullets, tables) over prose.
-</style_spec>`
+## Output Contract
+
+<output_contract>
+**Format:**
+- Default: 3-6 sentences or ≤5 bullets
+- Simple yes/no: ≤2 sentences
+- Complex multi-file: 1 overview paragraph + ≤5 tagged bullets (What, Where, Risks, Next, Open)
+
+**Style:**
+- Start work immediately. Skip empty preambles ("I'm on it", "Let me...") — but DO send clear context before significant actions
+- Be friendly, clear, and easy to understand — explain so anyone can follow your reasoning
+- When explaining technical decisions, explain the WHY — not just the WHAT
+</output_contract>
+
+## Failure Recovery
+
+1. Fix root causes, not symptoms. Re-verify after EVERY attempt.
+2. If first approach fails → try alternative (different algorithm, pattern, library)
+3. After 3 DIFFERENT approaches fail → STOP and report what you tried clearly`

  if (!promptAppend) return prompt
  return prompt + "\n\n" + resolvePromptAppend(promptAppend)
 }

-function buildGptBlockedActionsSection(useTaskSystem: boolean): string {
-  if (useTaskSystem) {
-    return `<blocked_actions>
-BLOCKED (will fail if attempted):
-| Tool | Status | Description |
-|------|--------|-------------|
-| task | BLOCKED | Agent delegation tool — you cannot spawn other agents |
-
-ALLOWED:
-| Tool | Usage |
-|------|-------|
-| call_omo_agent | Spawn explore/librarian for research ONLY |
-| task_create | Create tasks to track your work |
-| task_update | Update task status (in_progress, completed) |
-| task_list | List active tasks |
-| task_get | Get task details by ID |
-
-You work ALONE for implementation. No delegation.
-</blocked_actions>`
-  }
-
-  return `<blocked_actions>
-BLOCKED (will fail if attempted):
-| Tool | Status | Description |
-|------|--------|-------------|
-| task | BLOCKED | Agent delegation tool — you cannot spawn other agents |
-
-ALLOWED:
-| Tool | Usage |
-|------|-------|
-| call_omo_agent | Spawn explore/librarian for research ONLY |
-
-You work ALONE for implementation. No delegation.
-</blocked_actions>`
-}
-
 function buildGptTaskDisciplineSection(useTaskSystem: boolean): string {
  if (useTaskSystem) {
-    return `<task_discipline_spec>
-TASK TRACKING (NON-NEGOTIABLE):
+    return `## Task Discipline (NON-NEGOTIABLE)
+
 | Trigger | Action |
 |---------|--------|
-| 2+ steps | TaskCreate FIRST, atomic breakdown |
-| Starting step | TaskUpdate(status="in_progress") - ONE at a time |
-| Completing step | TaskUpdate(status="completed") IMMEDIATELY |
+| 2+ steps | task_create FIRST, atomic breakdown |
+| Starting step | task_update(status="in_progress") — ONE at a time |
+| Completing step | task_update(status="completed") IMMEDIATELY |
 | Batching | NEVER batch completions |

-No tasks on multi-step work = INCOMPLETE WORK.
-</task_discipline_spec>`
+No tasks on multi-step work = INCOMPLETE WORK.`
  }

-  return `<todo_discipline_spec>
-TODO TRACKING (NON-NEGOTIABLE):
+  return `## Todo Discipline (NON-NEGOTIABLE)
+
 | Trigger | Action |
 |---------|--------|
 | 2+ steps | todowrite FIRST, atomic breakdown |
-| Starting step | Mark in_progress - ONE at a time |
+| Starting step | Mark in_progress — ONE at a time |
 | Completing step | Mark completed IMMEDIATELY |
 | Batching | NEVER batch completions |

-No todos on multi-step work = INCOMPLETE WORK.
-</todo_discipline_spec>`
+No todos on multi-step work = INCOMPLETE WORK.`
 }
--- a/src/agents/sisyphus-junior/index.test.ts
+++ b/src/agents/sisyphus-junior/index.test.ts
@@ -71,7 +71,7 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override)

      // then
-      expect(result.prompt).toContain("You work ALONE")
+      expect(result.prompt).toContain("Sisyphus-Junior")
      expect(result.prompt).toContain("Extra instructions here")
    })
  })
@@ -138,7 +138,7 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override)

      // then
-      expect(result.prompt).toContain("You work ALONE")
+      expect(result.prompt).toContain("Sisyphus-Junior")
      expect(result.prompt).not.toBe("Completely new prompt that replaces everything")
    })
  })
@@ -209,12 +209,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true)

      //#then
-      expect(result.prompt).toContain("TaskCreate")
-      expect(result.prompt).toContain("TaskUpdate")
+      expect(result.prompt).toContain("task_create")
+      expect(result.prompt).toContain("task_update")
      expect(result.prompt).not.toContain("todowrite")
    })

-    test("useTaskSystem=true produces task_discipline_spec prompt for GPT", () => {
+    test("useTaskSystem=true produces Task Discipline prompt for GPT", () => {
      //#given
      const override = { model: "openai/gpt-5.2" }

@@ -222,9 +222,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true)

      //#then
-      expect(result.prompt).toContain("<task_discipline_spec>")
-      expect(result.prompt).toContain("TaskCreate")
-      expect(result.prompt).not.toContain("<todo_discipline_spec>")
+      expect(result.prompt).toContain("Task Discipline")
+      expect(result.prompt).toContain("task_create")
+      expect(result.prompt).not.toContain("Todo Discipline")
    })

    test("useTaskSystem=false (default) produces Todo_Discipline prompt", () => {
@@ -236,54 +236,48 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {

      //#then
      expect(result.prompt).toContain("todowrite")
-      expect(result.prompt).not.toContain("TaskCreate")
+      expect(result.prompt).not.toContain("task_create")
    })

-    test("useTaskSystem=true explicitly lists task management tools as ALLOWED for Claude", () => {
+    test("useTaskSystem=true includes task_create/task_update in Claude prompt", () => {
      //#given
      const override = { model: "anthropic/claude-sonnet-4-5" }

      //#when
      const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true)

-      //#then - prompt must disambiguate: delegation tool blocked, management tools allowed
+      //#then
      expect(result.prompt).toContain("task_create")
      expect(result.prompt).toContain("task_update")
-      expect(result.prompt).toContain("task_list")
-      expect(result.prompt).toContain("task_get")
-      expect(result.prompt).toContain("agent delegation tool")
    })

-    test("useTaskSystem=true explicitly lists task management tools as ALLOWED for GPT", () => {
+    test("useTaskSystem=true includes task_create/task_update in GPT prompt", () => {
      //#given
      const override = { model: "openai/gpt-5.2" }

      //#when
      const result = createSisyphusJuniorAgentWithOverrides(override, undefined, true)

-      //#then - prompt must disambiguate: delegation tool blocked, management tools allowed
+      //#then
      expect(result.prompt).toContain("task_create")
      expect(result.prompt).toContain("task_update")
-      expect(result.prompt).toContain("task_list")
-      expect(result.prompt).toContain("task_get")
-      expect(result.prompt).toContain("Agent delegation tool")
    })

-    test("useTaskSystem=false does NOT list task management tools in constraints", () => {
-      //#given - Claude model without task system
+    test("useTaskSystem=false uses todowrite instead of task_create", () => {
+      //#given
      const override = { model: "anthropic/claude-sonnet-4-5" }

      //#when
      const result = createSisyphusJuniorAgentWithOverrides(override, undefined, false)

-      //#then - no task management tool references in constraints section
+      //#then
+      expect(result.prompt).toContain("todowrite")
      expect(result.prompt).not.toContain("task_create")
-      expect(result.prompt).not.toContain("task_update")
    })
  })

  describe("prompt composition", () => {
-    test("base prompt contains discipline constraints", () => {
+    test("base prompt contains identity", () => {
      // given
      const override = {}

@@ -292,10 +286,10 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {

      // then
      expect(result.prompt).toContain("Sisyphus-Junior")
-      expect(result.prompt).toContain("You work ALONE")
+      expect(result.prompt).toContain("Execute tasks directly")
    })

-    test("Claude model uses default prompt with BLOCKED ACTIONS section", () => {
+    test("Claude model uses default prompt with discipline section", () => {
      // given
      const override = { model: "anthropic/claude-sonnet-4-5" }

@@ -303,11 +297,11 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override)

      // then
-      expect(result.prompt).toContain("BLOCKED ACTIONS")
-      expect(result.prompt).not.toContain("<blocked_actions>")
+      expect(result.prompt).toContain("<Role>")
+      expect(result.prompt).toContain("todowrite")
    })

-    test("GPT model uses GPT-optimized prompt with blocked_actions section", () => {
+    test("GPT model uses GPT-optimized prompt with Hephaestus-style sections", () => {
      // given
      const override = { model: "openai/gpt-5.2" }

@@ -315,9 +309,9 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override)

      // then
-      expect(result.prompt).toContain("<blocked_actions>")
-      expect(result.prompt).toContain("<output_verbosity_spec>")
-      expect(result.prompt).toContain("<scope_and_design_constraints>")
+      expect(result.prompt).toContain("Scope Discipline")
+      expect(result.prompt).toContain("<tool_usage_rules>")
+      expect(result.prompt).toContain("Progress Updates")
    })

    test("prompt_append is added after base prompt", () => {
@@ -328,7 +322,7 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
      const result = createSisyphusJuniorAgentWithOverrides(override)

      // then
-      const baseEndIndex = result.prompt!.indexOf("Dense > verbose.")
+      const baseEndIndex = result.prompt!.indexOf("</Style>")
      const appendIndex = result.prompt!.indexOf("CUSTOM_MARKER_FOR_TEST")
      expect(baseEndIndex).not.toBe(-1)
      expect(appendIndex).toBeGreaterThan(baseEndIndex)
@@ -383,7 +377,7 @@ describe("getSisyphusJuniorPromptSource", () => {
 })

 describe("buildSisyphusJuniorPrompt", () => {
-  test("GPT model prompt contains GPT-5.2 specific sections", () => {
+  test("GPT model prompt contains Hephaestus-style sections", () => {
    // given
    const model = "openai/gpt-5.2"

@@ -391,10 +385,10 @@ describe("buildSisyphusJuniorPrompt", () => {
    const prompt = buildSisyphusJuniorPrompt(model, false)

    // then
-    expect(prompt).toContain("<identity>")
-    expect(prompt).toContain("<output_verbosity_spec>")
-    expect(prompt).toContain("<scope_and_design_constraints>")
+    expect(prompt).toContain("## Identity")
+    expect(prompt).toContain("Scope Discipline")
    expect(prompt).toContain("<tool_usage_rules>")
+    expect(prompt).toContain("Progress Updates")
  })

  test("Claude model prompt contains Claude-specific sections", () => {
@@ -406,11 +400,11 @@ describe("buildSisyphusJuniorPrompt", () => {

    // then
    expect(prompt).toContain("<Role>")
-    expect(prompt).toContain("<Critical_Constraints>")
-    expect(prompt).toContain("BLOCKED ACTIONS")
+    expect(prompt).toContain("<Todo_Discipline>")
+    expect(prompt).toContain("todowrite")
  })

-  test("useTaskSystem=true includes Task_Discipline for GPT", () => {
+  test("useTaskSystem=true includes Task Discipline for GPT", () => {
    // given
    const model = "openai/gpt-5.2"

@@ -418,8 +412,8 @@ describe("buildSisyphusJuniorPrompt", () => {
    const prompt = buildSisyphusJuniorPrompt(model, true)

    // then
-    expect(prompt).toContain("<task_discipline_spec>")
-    expect(prompt).toContain("TaskCreate")
+    expect(prompt).toContain("Task Discipline")
+    expect(prompt).toContain("task_create")
  })

  test("useTaskSystem=false includes Todo_Discipline for Claude", () => {
--- a/src/agents/sisyphus.ts
+++ b/src/agents/sisyphus.ts
@@ -310,7 +310,7 @@ result = task(..., run_in_background=false)  // Never wait synchronously for exp
 1. Launch parallel agents → receive task_ids
 2. Continue immediate work
 3. When results needed: \`background_output(task_id="...")\`
-4. BEFORE final answer: \`background_cancel(all=true)\`
+4. Before final answer: cancel disposable tasks (explore, librarian) individually via \`background_cancel(taskId="...")\`. Always wait for Oracle — collect its result via \`background_output\` before answering.

 ### Search Stop Conditions

@@ -449,8 +449,9 @@ If verification fails:
 3. Report: "Done. Note: found N pre-existing lint errors unrelated to my changes."

 ### Before Delivering Final Answer:
- Cancel ALL running background tasks: \`background_cancel(all=true)\`
- This conserves resources and ensures clean workflow completion
+- Cancel disposable background tasks (explore, librarian) individually via \`background_cancel(taskId="...")\`
+- **Always wait for Oracle**: Oracle takes 20+ min by design and always provides valuable independent analysis from a different angle — even when you already have enough context. Collect Oracle results via \`background_output\` before answering.
+- When Oracle is running, cancel disposable tasks individually instead of using \`background_cancel(all=true)\`.
 </Behavior_Instructions>

 ${oracleSection}
--- a/src/agents/utils.test.ts
+++ b/src/agents/utils.test.ts
@@ -428,7 +428,7 @@ describe("createBuiltinAgents with model overrides", () => {
      )

      // #then
-      const matches = agents.sisyphus.prompt.match(/Custom agent: researcher/gi) ?? []
+      const matches = (agents.sisyphus?.prompt ?? "").match(/Custom agent: researcher/gi) ?? []
      expect(matches.length).toBe(1)
    } finally {
      fetchSpy.mockRestore()
@@ -525,6 +525,34 @@ describe("createBuiltinAgents without systemDefaultModel", () => {
 })

 describe("createBuiltinAgents with requiresProvider gating (hephaestus)", () => {
+  test("hephaestus is created when provider-models cache connected list includes required provider", async () => {
+    // #given
+    const connectedCacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(["anthropic"])
+    const providerModelsSpy = spyOn(connectedProvidersCache, "readProviderModelsCache").mockReturnValue({
+      connected: ["openai"],
+      models: {},
+      updatedAt: new Date().toISOString(),
+    })
+    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockImplementation(async (_, options) => {
+      const providers = options?.connectedProviders ?? []
+      return providers.includes("openai")
+        ? new Set(["openai/gpt-5.3-codex"])
+        : new Set(["anthropic/claude-opus-4-6"])
+    })
+
+    try {
+      // #when
+      const agents = await createBuiltinAgents([], {}, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
+
+      // #then
+      expect(agents.hephaestus).toBeDefined()
+    } finally {
+      connectedCacheSpy.mockRestore()
+      providerModelsSpy.mockRestore()
+      fetchSpy.mockRestore()
+    }
+  })
+
  test("hephaestus is not created when no required provider is connected", async () => {
    // #given - only anthropic models available, not in hephaestus requiresProvider
    const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
--- a/src/cli/run/agent-resolver.ts
+++ b/src/cli/run/agent-resolver.ts
@@ -1,32 +1,45 @@
 import pc from "picocolors"
 import type { RunOptions } from "./types"
 import type { OhMyOpenCodeConfig } from "../../config"
+import { getAgentConfigKey, getAgentDisplayName } from "../../shared/agent-display-names"

 const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
 const DEFAULT_AGENT = "sisyphus"

 type EnvVars = Record<string, string | undefined>
+type CoreAgentKey = (typeof CORE_AGENT_ORDER)[number]

-const normalizeAgentName = (agent?: string): string | undefined => {
-  if (!agent) return undefined
-  const trimmed = agent.trim()
-  if (!trimmed) return undefined
-  const lowered = trimmed.toLowerCase()
-  const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
-  return coreMatch ?? trimmed
+interface ResolvedAgent {
+  configKey: string
+  resolvedName: string
 }

-const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
-  const lowered = agent.toLowerCase()
-  if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
+const normalizeAgentName = (agent?: string): ResolvedAgent | undefined => {
+  if (!agent) return undefined
+  const trimmed = agent.trim()
+  if (trimmed.length === 0) return undefined
+
+  const configKey = getAgentConfigKey(trimmed)
+  const displayName = getAgentDisplayName(configKey)
+  const isKnownAgent = displayName !== configKey
+
+  return {
+    configKey,
+    resolvedName: isKnownAgent ? displayName : trimmed,
+  }
+}
+
+const isAgentDisabled = (agentConfigKey: string, config: OhMyOpenCodeConfig): boolean => {
+  const lowered = agentConfigKey.toLowerCase()
+  if (lowered === DEFAULT_AGENT && config.sisyphus_agent?.disabled === true) {
    return true
  }
  return (config.disabled_agents ?? []).some(
-    (disabled) => disabled.toLowerCase() === lowered
+    (disabled) => getAgentConfigKey(disabled) === lowered
  )
 }

-const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
+const pickFallbackAgent = (config: OhMyOpenCodeConfig): CoreAgentKey => {
  for (const agent of CORE_AGENT_ORDER) {
    if (!isAgentDisabled(agent, config)) {
      return agent
@@ -43,27 +56,33 @@ export const resolveRunAgent = (
  const cliAgent = normalizeAgentName(options.agent)
  const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
  const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
-  const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
-  const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
+  const resolved =
+    cliAgent ??
+    envAgent ??
+    configAgent ?? {
+      configKey: DEFAULT_AGENT,
+      resolvedName: getAgentDisplayName(DEFAULT_AGENT),
+    }

-  if (isAgentDisabled(normalized, pluginConfig)) {
+  if (isAgentDisabled(resolved.configKey, pluginConfig)) {
    const fallback = pickFallbackAgent(pluginConfig)
+    const fallbackName = getAgentDisplayName(fallback)
    const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
    if (fallbackDisabled) {
      console.log(
        pc.yellow(
-          `Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
+          `Requested agent "${resolved.resolvedName}" is disabled and no enabled core agent was found. Proceeding with "${fallbackName}".`
        )
      )
-      return fallback
+      return fallbackName
    }
    console.log(
      pc.yellow(
-        `Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
+        `Requested agent "${resolved.resolvedName}" is disabled. Falling back to "${fallbackName}".`
      )
    )
-    return fallback
+    return fallbackName
  }

-  return normalized
+  return resolved.resolvedName
 }
--- a/src/cli/run/completion.ts
+++ b/src/cli/run/completion.ts
@@ -20,7 +20,10 @@ export async function checkCompletionConditions(ctx: RunContext): Promise<boolea
 }

 async function areAllTodosComplete(ctx: RunContext): Promise<boolean> {
-  const todosRes = await ctx.client.session.todo({ path: { id: ctx.sessionID } })
+  const todosRes = await ctx.client.session.todo({
+    path: { id: ctx.sessionID },
+    query: { directory: ctx.directory },
+  })
  const todos = normalizeSDKResponse(todosRes, [] as Todo[])

  const incompleteTodos = todos.filter(
@@ -43,7 +46,9 @@ async function areAllChildrenIdle(ctx: RunContext): Promise<boolean> {
 async function fetchAllStatuses(
  ctx: RunContext
 ): Promise<Record<string, SessionStatus>> {
-  const statusRes = await ctx.client.session.status()
+  const statusRes = await ctx.client.session.status({
+    query: { directory: ctx.directory },
+  })
  return normalizeSDKResponse(statusRes, {} as Record<string, SessionStatus>)
 }

@@ -54,6 +59,7 @@ async function areAllDescendantsIdle(
 ): Promise<boolean> {
  const childrenRes = await ctx.client.session.children({
    path: { id: sessionID },
+    query: { directory: ctx.directory },
  })
  const children = normalizeSDKResponse(childrenRes, [] as ChildSession[])

--- a/src/cli/run/event-formatting.ts
+++ b/src/cli/run/event-formatting.ts
@@ -57,7 +57,11 @@ export function serializeError(error: unknown): string {
 function getSessionTag(ctx: RunContext, payload: EventPayload): string {
  const props = payload.properties as Record<string, unknown> | undefined
  const info = props?.info as Record<string, unknown> | undefined
-  const sessionID = props?.sessionID ?? info?.sessionID
+  const part = props?.part as Record<string, unknown> | undefined
+  const sessionID =
+    props?.sessionID ?? props?.sessionId ??
+    info?.sessionID ?? info?.sessionId ??
+    part?.sessionID ?? part?.sessionId
  const isMainSession = sessionID === ctx.sessionID
  if (isMainSession) return pc.green("[MAIN]")
  if (sessionID) return pc.yellow(`[${String(sessionID).slice(0, 8)}]`)
@@ -79,9 +83,9 @@ export function logEventVerbose(ctx: RunContext, payload: EventPayload): void {
    case "message.part.updated": {
      const partProps = props as MessagePartUpdatedProps | undefined
      const part = partProps?.part
-      if (part?.type === "tool-invocation") {
-        const toolPart = part as { toolName?: string; state?: string }
-        console.error(pc.dim(`${sessionTag} message.part (tool): ${toolPart.toolName} [${toolPart.state}]`))
+      if (part?.type === "tool") {
+        const status = part.state?.status ?? "unknown"
+        console.error(pc.dim(`${sessionTag} message.part (tool): ${part.tool ?? part.name ?? "?"} [${status}]`))
      } else if (part?.type === "text" && part.text) {
        const preview = part.text.slice(0, 80).replace(/\n/g, "\\n")
        console.error(pc.dim(`${sessionTag} message.part (text): "${preview}${part.text.length > 80 ? "..." : ""}"`))
--- a/src/cli/run/event-handlers.test.ts
+++ b/src/cli/run/event-handlers.test.ts
@@ -1,7 +1,7 @@
-import { describe, it, expect } from "bun:test"
+import { describe, it, expect, spyOn } from "bun:test"
 import type { RunContext } from "./types"
 import { createEventState } from "./events"
-import { handleSessionStatus } from "./event-handlers"
+import { handleSessionStatus, handleMessagePartUpdated, handleTuiToast } from "./event-handlers"

 const createMockContext = (sessionID: string = "test-session"): RunContext => ({
  sessionID,
@@ -70,4 +70,211 @@ describe("handleSessionStatus", () => {
    //#then - state.mainSessionIdle remains unchanged
    expect(state.mainSessionIdle).toBe(true)
  })
+
+  it("recognizes idle from camelCase sessionId", () => {
+    //#given - state with mainSessionIdle=false and payload using sessionId
+    const ctx = createMockContext("test-session")
+    const state = createEventState()
+    state.mainSessionIdle = false
+
+    const payload = {
+      type: "session.status",
+      properties: {
+        sessionId: "test-session",
+        status: { type: "idle" as const },
+      },
+    }
+
+    //#when - handleSessionStatus called with camelCase sessionId
+    handleSessionStatus(ctx, payload as any, state)
+
+    //#then - state.mainSessionIdle === true
+    expect(state.mainSessionIdle).toBe(true)
+  })
+})
+
+describe("handleMessagePartUpdated", () => {
+  it("extracts sessionID from part (current OpenCode event structure)", () => {
+    //#given - message.part.updated with sessionID in part, not info
+    const ctx = createMockContext("ses_main")
+    const state = createEventState()
+    const stdoutSpy = spyOn(process.stdout, "write").mockImplementation(() => true)
+
+    const payload = {
+      type: "message.part.updated",
+      properties: {
+        part: {
+          id: "part_1",
+          sessionID: "ses_main",
+          messageID: "msg_1",
+          type: "text",
+          text: "Hello world",
+        },
+      },
+    }
+
+    //#when
+    handleMessagePartUpdated(ctx, payload as any, state)
+
+    //#then
+    expect(state.hasReceivedMeaningfulWork).toBe(true)
+    expect(state.lastPartText).toBe("Hello world")
+    expect(stdoutSpy).toHaveBeenCalled()
+    stdoutSpy.mockRestore()
+  })
+
+  it("skips events for different session", () => {
+    //#given - message.part.updated with different session
+    const ctx = createMockContext("ses_main")
+    const state = createEventState()
+
+    const payload = {
+      type: "message.part.updated",
+      properties: {
+        part: {
+          id: "part_1",
+          sessionID: "ses_other",
+          messageID: "msg_1",
+          type: "text",
+          text: "Hello world",
+        },
+      },
+    }
+
+    //#when
+    handleMessagePartUpdated(ctx, payload as any, state)
+
+    //#then
+    expect(state.hasReceivedMeaningfulWork).toBe(false)
+    expect(state.lastPartText).toBe("")
+  })
+
+  it("handles tool part with running status", () => {
+    //#given - tool part in running state
+    const ctx = createMockContext("ses_main")
+    const state = createEventState()
+    const stdoutSpy = spyOn(process.stdout, "write").mockImplementation(() => true)
+
+    const payload = {
+      type: "message.part.updated",
+      properties: {
+        part: {
+          id: "part_1",
+          sessionID: "ses_main",
+          messageID: "msg_1",
+          type: "tool",
+          tool: "read",
+          state: { status: "running", input: { filePath: "/src/index.ts" } },
+        },
+      },
+    }
+
+    //#when
+    handleMessagePartUpdated(ctx, payload as any, state)
+
+    //#then
+    expect(state.currentTool).toBe("read")
+    expect(state.hasReceivedMeaningfulWork).toBe(true)
+    stdoutSpy.mockRestore()
+  })
+
+  it("clears currentTool when tool completes", () => {
+    //#given - tool part in completed state
+    const ctx = createMockContext("ses_main")
+    const state = createEventState()
+    state.currentTool = "read"
+    const stdoutSpy = spyOn(process.stdout, "write").mockImplementation(() => true)
+
+    const payload = {
+      type: "message.part.updated",
+      properties: {
+        part: {
+          id: "part_1",
+          sessionID: "ses_main",
+          messageID: "msg_1",
+          type: "tool",
+          tool: "read",
+          state: { status: "completed", input: {}, output: "file contents here" },
+        },
+      },
+    }
+
+    //#when
+    handleMessagePartUpdated(ctx, payload as any, state)
+
+    //#then
+    expect(state.currentTool).toBeNull()
+    stdoutSpy.mockRestore()
+  })
+
+  it("supports legacy info.sessionID for backward compatibility", () => {
+    //#given - legacy event with sessionID in info
+    const ctx = createMockContext("ses_legacy")
+    const state = createEventState()
+    const stdoutSpy = spyOn(process.stdout, "write").mockImplementation(() => true)
+
+    const payload = {
+      type: "message.part.updated",
+      properties: {
+        info: { sessionID: "ses_legacy", role: "assistant" },
+        part: {
+          type: "text",
+          text: "Legacy text",
+        },
+      },
+    }
+
+    //#when
+    handleMessagePartUpdated(ctx, payload as any, state)
+
+    //#then
+    expect(state.hasReceivedMeaningfulWork).toBe(true)
+    expect(state.lastPartText).toBe("Legacy text")
+    stdoutSpy.mockRestore()
+  })
+})
+
+describe("handleTuiToast", () => {
+  it("marks main session as error when toast variant is error", () => {
+    //#given - toast error payload
+    const ctx = createMockContext("test-session")
+    const state = createEventState()
+
+    const payload = {
+      type: "tui.toast.show",
+      properties: {
+        title: "Auth",
+        message: "Invalid API key",
+        variant: "error" as const,
+      },
+    }
+
+    //#when
+    handleTuiToast(ctx, payload as any, state)
+
+    //#then
+    expect(state.mainSessionError).toBe(true)
+    expect(state.lastError).toBe("Auth: Invalid API key")
+  })
+
+  it("does not mark session error for warning toast", () => {
+    //#given - toast warning payload
+    const ctx = createMockContext("test-session")
+    const state = createEventState()
+
+    const payload = {
+      type: "tui.toast.show",
+      properties: {
+        message: "Retrying provider",
+        variant: "warning" as const,
+      },
+    }
+
+    //#when
+    handleTuiToast(ctx, payload as any, state)
+
+    //#then
+    expect(state.mainSessionError).toBe(false)
+    expect(state.lastError).toBe(null)
+  })
 })
--- a/src/cli/run/event-handlers.ts
+++ b/src/cli/run/event-handlers.ts
@@ -9,15 +9,32 @@ import type {
  MessagePartUpdatedProps,
  ToolExecuteProps,
  ToolResultProps,
+  TuiToastShowProps,
 } from "./types"
 import type { EventState } from "./event-state"
 import { serializeError } from "./event-formatting"

+function getSessionId(props?: { sessionID?: string; sessionId?: string }): string | undefined {
+  return props?.sessionID ?? props?.sessionId
+}
+
+function getInfoSessionId(props?: {
+  info?: { sessionID?: string; sessionId?: string }
+}): string | undefined {
+  return props?.info?.sessionID ?? props?.info?.sessionId
+}
+
+function getPartSessionId(props?: {
+  part?: { sessionID?: string; sessionId?: string }
+}): string | undefined {
+  return props?.part?.sessionID ?? props?.part?.sessionId
+}
+
 export function handleSessionIdle(ctx: RunContext, payload: EventPayload, state: EventState): void {
  if (payload.type !== "session.idle") return

  const props = payload.properties as SessionIdleProps | undefined
-  if (props?.sessionID === ctx.sessionID) {
+  if (getSessionId(props) === ctx.sessionID) {
    state.mainSessionIdle = true
  }
 }
@@ -26,7 +43,7 @@ export function handleSessionStatus(ctx: RunContext, payload: EventPayload, stat
  if (payload.type !== "session.status") return

  const props = payload.properties as SessionStatusProps | undefined
-  if (props?.sessionID !== ctx.sessionID) return
+  if (getSessionId(props) !== ctx.sessionID) return

  if (props?.status?.type === "busy") {
    state.mainSessionIdle = false
@@ -41,7 +58,7 @@ export function handleSessionError(ctx: RunContext, payload: EventPayload, state
  if (payload.type !== "session.error") return

  const props = payload.properties as SessionErrorProps | undefined
-  if (props?.sessionID === ctx.sessionID) {
+  if (getSessionId(props) === ctx.sessionID) {
    state.mainSessionError = true
    state.lastError = serializeError(props?.error)
    console.error(pc.red(`\n[session.error] ${state.lastError}`))
@@ -52,10 +69,12 @@ export function handleMessagePartUpdated(ctx: RunContext, payload: EventPayload,
  if (payload.type !== "message.part.updated") return

  const props = payload.properties as MessagePartUpdatedProps | undefined
-  if (props?.info?.sessionID !== ctx.sessionID) return
-  if (props?.info?.role !== "assistant") return
+  // Current OpenCode puts sessionID inside part; legacy puts it in info
+  const partSid = getPartSessionId(props)
+  const infoSid = getInfoSessionId(props)
+  if ((partSid ?? infoSid) !== ctx.sessionID) return

-  const part = props.part
+  const part = props?.part
  if (!part) return

  if (part.type === "text" && part.text) {
@@ -66,13 +85,57 @@ export function handleMessagePartUpdated(ctx: RunContext, payload: EventPayload,
    }
    state.lastPartText = part.text
  }
+
+  if (part.type === "tool") {
+    handleToolPart(ctx, part, state)
+  }
+}
+
+function handleToolPart(
+  _ctx: RunContext,
+  part: NonNullable<MessagePartUpdatedProps["part"]>,
+  state: EventState,
+): void {
+  const toolName = part.tool || part.name || "unknown"
+  const status = part.state?.status
+
+  if (status === "running") {
+    state.currentTool = toolName
+    let inputPreview = ""
+    const input = part.state?.input
+    if (input) {
+      if (input.command) {
+        inputPreview = ` ${pc.dim(String(input.command).slice(0, 60))}`
+      } else if (input.pattern) {
+        inputPreview = ` ${pc.dim(String(input.pattern).slice(0, 40))}`
+      } else if (input.filePath) {
+        inputPreview = ` ${pc.dim(String(input.filePath))}`
+      } else if (input.query) {
+        inputPreview = ` ${pc.dim(String(input.query).slice(0, 40))}`
+      }
+    }
+    state.hasReceivedMeaningfulWork = true
+    process.stdout.write(`\n${pc.cyan(">")} ${pc.bold(toolName)}${inputPreview}\n`)
+  }
+
+  if (status === "completed" || status === "error") {
+    const output = part.state?.output || ""
+    const maxLen = 200
+    const preview = output.length > maxLen ? output.slice(0, maxLen) + "..." : output
+    if (preview.trim()) {
+      const lines = preview.split("\n").slice(0, 3)
+      process.stdout.write(pc.dim(`   └─ ${lines.join("\n      ")}\n`))
+    }
+    state.currentTool = null
+    state.lastPartText = ""
+  }
 }

 export function handleMessageUpdated(ctx: RunContext, payload: EventPayload, state: EventState): void {
  if (payload.type !== "message.updated") return

  const props = payload.properties as MessageUpdatedProps | undefined
-  if (props?.info?.sessionID !== ctx.sessionID) return
+  if (getInfoSessionId(props) !== ctx.sessionID) return
  if (props?.info?.role !== "assistant") return

  state.hasReceivedMeaningfulWork = true
@@ -84,7 +147,7 @@ export function handleToolExecute(ctx: RunContext, payload: EventPayload, state:
  if (payload.type !== "tool.execute") return

  const props = payload.properties as ToolExecuteProps | undefined
-  if (props?.sessionID !== ctx.sessionID) return
+  if (getSessionId(props) !== ctx.sessionID) return

  const toolName = props?.name || "unknown"
  state.currentTool = toolName
@@ -111,7 +174,7 @@ export function handleToolResult(ctx: RunContext, payload: EventPayload, state:
  if (payload.type !== "tool.result") return

  const props = payload.properties as ToolResultProps | undefined
-  if (props?.sessionID !== ctx.sessionID) return
+  if (getSessionId(props) !== ctx.sessionID) return

  const output = props?.output || ""
  const maxLen = 200
@@ -125,3 +188,24 @@ export function handleToolResult(ctx: RunContext, payload: EventPayload, state:
  state.currentTool = null
  state.lastPartText = ""
 }
+
+export function handleTuiToast(_ctx: RunContext, payload: EventPayload, state: EventState): void {
+  if (payload.type !== "tui.toast.show") return
+
+  const props = payload.properties as TuiToastShowProps | undefined
+  const title = props?.title ? `${props.title}: ` : ""
+  const message = props?.message?.trim()
+  const variant = props?.variant ?? "info"
+
+  if (!message) return
+
+  if (variant === "error") {
+    state.mainSessionError = true
+    state.lastError = `${title}${message}`
+    console.error(pc.red(`\n[tui.toast.error] ${state.lastError}`))
+    return
+  }
+
+  const colorize = variant === "warning" ? pc.yellow : pc.dim
+  console.log(colorize(`[toast:${variant}] ${title}${message}`))
+}
--- a/src/cli/run/event-stream-processor.ts
+++ b/src/cli/run/event-stream-processor.ts
@@ -10,6 +10,7 @@ import {
  handleMessageUpdated,
  handleToolExecute,
  handleToolResult,
+  handleTuiToast,
 } from "./event-handlers"

 export async function processEvents(
@@ -36,6 +37,7 @@ export async function processEvents(
      handleMessageUpdated(ctx, payload, state)
      handleToolExecute(ctx, payload, state)
      handleToolResult(ctx, payload, state)
+      handleTuiToast(ctx, payload, state)
    } catch (err) {
      console.error(pc.red(`[event error] ${err}`))
    }
--- a/src/cli/run/events.test.ts
+++ b/src/cli/run/events.test.ts
@@ -170,6 +170,28 @@ describe("event handling", () => {
    expect(state.hasReceivedMeaningfulWork).toBe(true)
  })

+  it("message.updated with camelCase sessionId sets hasReceivedMeaningfulWork", async () => {
+    //#given - assistant message uses sessionId key
+    const ctx = createMockContext("my-session")
+    const state = createEventState()
+
+    const payload: EventPayload = {
+      type: "message.updated",
+      properties: {
+        info: { sessionId: "my-session", role: "assistant" },
+      },
+    }
+
+    const events = toAsyncIterable([payload])
+    const { processEvents } = await import("./events")
+
+    //#when
+    await processEvents(ctx, events, state)
+
+    //#then
+    expect(state.hasReceivedMeaningfulWork).toBe(true)
+  })
+
  it("message.updated with user role does not set hasReceivedMeaningfulWork", async () => {
    // given - user message should not count as meaningful work
    const ctx = createMockContext("my-session")
@@ -251,6 +273,7 @@ describe("event handling", () => {
      lastPartText: "",
      currentTool: null,
      hasReceivedMeaningfulWork: false,
+      messageCount: 0,
    }

    const payload: EventPayload = {
--- a/src/cli/run/integration.test.ts
+++ b/src/cli/run/integration.test.ts
@@ -1,9 +1,11 @@
-import { describe, it, expect, mock, spyOn, beforeEach, afterEach } from "bun:test"
+import { describe, it, expect, mock, spyOn, beforeEach, afterEach, afterAll } from "bun:test"
 import type { RunResult } from "./types"
 import { createJsonOutputManager } from "./json-output"
 import { resolveSession } from "./session-resolver"
 import { executeOnCompleteHook } from "./on-complete-hook"
 import type { OpencodeClient } from "./types"
+import * as originalSdk from "@opencode-ai/sdk"
+import * as originalPortUtils from "../../shared/port-utils"

 const mockServerClose = mock(() => {})
 const mockCreateOpencode = mock(() =>
@@ -27,6 +29,11 @@ mock.module("../../shared/port-utils", () => ({
  DEFAULT_SERVER_PORT: 4096,
 }))

+afterAll(() => {
+  mock.module("@opencode-ai/sdk", () => originalSdk)
+  mock.module("../../shared/port-utils", () => originalPortUtils)
+})
+
 const { createServerConnection } = await import("./server-connection")

 interface MockWriteStream {
@@ -120,11 +127,14 @@ describe("integration: --session-id", () => {
    const mockClient = createMockClient({ data: { id: sessionId } })

    // when
-    const result = await resolveSession({ client: mockClient, sessionId })
+    const result = await resolveSession({ client: mockClient, sessionId, directory: "/test" })

    // then
    expect(result).toBe(sessionId)
-    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+      query: { directory: "/test" },
+    })
    expect(mockClient.session.create).not.toHaveBeenCalled()
  })

@@ -134,11 +144,14 @@ describe("integration: --session-id", () => {
    const mockClient = createMockClient({ error: { message: "Session not found" } })

    // when
-    const result = resolveSession({ client: mockClient, sessionId })
+    const result = resolveSession({ client: mockClient, sessionId, directory: "/test" })

    // then
    await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
-    expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
+    expect(mockClient.session.get).toHaveBeenCalledWith({
+      path: { id: sessionId },
+      query: { directory: "/test" },
+    })
    expect(mockClient.session.create).not.toHaveBeenCalled()
  })
 })
--- a/src/cli/run/opencode-bin-path.test.ts
+++ b/src/cli/run/opencode-bin-path.test.ts
@@ -0,0 +1,52 @@
+/// <reference types="bun-types" />
+
+import { describe, expect, it } from "bun:test"
+import { prependResolvedOpencodeBinToPath } from "./opencode-bin-path"
+
+describe("prependResolvedOpencodeBinToPath", () => {
+  it("prepends resolved opencode-ai bin path to PATH", () => {
+    //#given
+    const env: Record<string, string | undefined> = {
+      PATH: "/Users/yeongyu/node_modules/.bin:/usr/bin",
+    }
+    const resolver = () => "/tmp/bunx-123/node_modules/opencode-ai/bin/opencode"
+
+    //#when
+    prependResolvedOpencodeBinToPath(env, resolver)
+
+    //#then
+    expect(env.PATH).toBe(
+      "/tmp/bunx-123/node_modules/opencode-ai/bin:/Users/yeongyu/node_modules/.bin:/usr/bin",
+    )
+  })
+
+  it("does not duplicate an existing opencode-ai bin path", () => {
+    //#given
+    const env: Record<string, string | undefined> = {
+      PATH: "/tmp/bunx-123/node_modules/opencode-ai/bin:/usr/bin",
+    }
+    const resolver = () => "/tmp/bunx-123/node_modules/opencode-ai/bin/opencode"
+
+    //#when
+    prependResolvedOpencodeBinToPath(env, resolver)
+
+    //#then
+    expect(env.PATH).toBe("/tmp/bunx-123/node_modules/opencode-ai/bin:/usr/bin")
+  })
+
+  it("keeps PATH unchanged when opencode-ai cannot be resolved", () => {
+    //#given
+    const env: Record<string, string | undefined> = {
+      PATH: "/Users/yeongyu/node_modules/.bin:/usr/bin",
+    }
+    const resolver = () => {
+      throw new Error("module not found")
+    }
+
+    //#when
+    prependResolvedOpencodeBinToPath(env, resolver)
+
+    //#then
+    expect(env.PATH).toBe("/Users/yeongyu/node_modules/.bin:/usr/bin")
+  })
+})
--- a/src/cli/run/opencode-bin-path.ts
+++ b/src/cli/run/opencode-bin-path.ts
@@ -0,0 +1,30 @@
+import { delimiter, dirname } from "node:path"
+import { createRequire } from "node:module"
+
+type EnvLike = Record<string, string | undefined>
+
+const resolveFromCurrentModule = createRequire(import.meta.url).resolve
+
+export function prependResolvedOpencodeBinToPath(
+  env: EnvLike = process.env,
+  resolve: (id: string) => string = resolveFromCurrentModule,
+): void {
+  let resolvedPath: string
+  try {
+    resolvedPath = resolve("opencode-ai/bin/opencode")
+  } catch {
+    return
+  }
+
+  const opencodeBinDir = dirname(resolvedPath)
+  const currentPath = env.PATH ?? ""
+  const pathSegments = currentPath ? currentPath.split(delimiter) : []
+
+  if (pathSegments.includes(opencodeBinDir)) {
+    return
+  }
+
+  env.PATH = currentPath
+    ? `${opencodeBinDir}${delimiter}${currentPath}`
+    : opencodeBinDir
+}
--- a/src/cli/run/opencode-binary-resolver.test.ts
+++ b/src/cli/run/opencode-binary-resolver.test.ts
@@ -0,0 +1,102 @@
+import { describe, expect, it } from "bun:test"
+import { delimiter, join } from "node:path"
+import {
+  buildPathWithBinaryFirst,
+  collectCandidateBinaryPaths,
+  findWorkingOpencodeBinary,
+  withWorkingOpencodePath,
+} from "./opencode-binary-resolver"
+
+describe("collectCandidateBinaryPaths", () => {
+  it("includes Bun.which results first and removes duplicates", () => {
+    // given
+    const pathEnv = ["/bad", "/good"].join(delimiter)
+    const which = (command: string): string | undefined => {
+      if (command === "opencode") return "/bad/opencode"
+      return undefined
+    }
+
+    // when
+    const candidates = collectCandidateBinaryPaths(pathEnv, which, "darwin")
+
+    // then
+    expect(candidates[0]).toBe("/bad/opencode")
+    expect(candidates).toContain("/good/opencode")
+    expect(candidates.filter((candidate) => candidate === "/bad/opencode")).toHaveLength(1)
+  })
+})
+
+describe("findWorkingOpencodeBinary", () => {
+  it("returns the first runnable candidate", async () => {
+    // given
+    const pathEnv = ["/bad", "/good"].join(delimiter)
+    const which = (command: string): string | undefined => {
+      if (command === "opencode") return "/bad/opencode"
+      return undefined
+    }
+    const probe = async (binaryPath: string): Promise<boolean> =>
+      binaryPath === "/good/opencode"
+
+    // when
+    const resolved = await findWorkingOpencodeBinary(pathEnv, probe, which, "darwin")
+
+    // then
+    expect(resolved).toBe("/good/opencode")
+  })
+})
+
+describe("buildPathWithBinaryFirst", () => {
+  it("prepends the binary directory and avoids duplicate entries", () => {
+    // given
+    const binaryPath = "/good/opencode"
+    const pathEnv = ["/bad", "/good", "/other"].join(delimiter)
+
+    // when
+    const updated = buildPathWithBinaryFirst(pathEnv, binaryPath)
+
+    // then
+    expect(updated).toBe(["/good", "/bad", "/other"].join(delimiter))
+  })
+})
+
+describe("withWorkingOpencodePath", () => {
+  it("temporarily updates PATH while starting the server", async () => {
+    // given
+    const originalPath = process.env.PATH
+    process.env.PATH = ["/bad", "/other"].join(delimiter)
+    const finder = async (): Promise<string | null> => "/good/opencode"
+    let observedPath = ""
+
+    // when
+    await withWorkingOpencodePath(
+      async () => {
+        observedPath = process.env.PATH ?? ""
+      },
+      finder,
+    )
+
+    // then
+    expect(observedPath).toBe(["/good", "/bad", "/other"].join(delimiter))
+    expect(process.env.PATH).toBe(["/bad", "/other"].join(delimiter))
+    process.env.PATH = originalPath
+  })
+
+  it("restores PATH when server startup fails", async () => {
+    // given
+    const originalPath = process.env.PATH
+    process.env.PATH = ["/bad", "/other"].join(delimiter)
+    const finder = async (): Promise<string | null> => join("/good", "opencode")
+
+    // when & then
+    await expect(
+      withWorkingOpencodePath(
+        async () => {
+          throw new Error("boom")
+        },
+        finder,
+      ),
+    ).rejects.toThrow("boom")
+    expect(process.env.PATH).toBe(["/bad", "/other"].join(delimiter))
+    process.env.PATH = originalPath
+  })
+})
--- a/src/cli/run/opencode-binary-resolver.ts
+++ b/src/cli/run/opencode-binary-resolver.ts
@@ -0,0 +1,95 @@
+import { delimiter, dirname, join } from "node:path"
+
+const OPENCODE_COMMANDS = ["opencode", "opencode-desktop"] as const
+const WINDOWS_SUFFIXES = ["", ".exe", ".cmd", ".bat", ".ps1"] as const
+
+function getCommandCandidates(platform: NodeJS.Platform): string[] {
+  if (platform !== "win32") return [...OPENCODE_COMMANDS]
+
+  return OPENCODE_COMMANDS.flatMap((command) =>
+    WINDOWS_SUFFIXES.map((suffix) => `${command}${suffix}`),
+  )
+}
+
+export function collectCandidateBinaryPaths(
+  pathEnv: string | undefined,
+  which: (command: string) => string | null | undefined = Bun.which,
+  platform: NodeJS.Platform = process.platform,
+): string[] {
+  const seen = new Set<string>()
+  const candidates: string[] = []
+  const commandCandidates = getCommandCandidates(platform)
+
+  const addCandidate = (binaryPath: string | undefined | null): void => {
+    if (!binaryPath || seen.has(binaryPath)) return
+    seen.add(binaryPath)
+    candidates.push(binaryPath)
+  }
+
+  for (const command of commandCandidates) {
+    addCandidate(which(command))
+  }
+
+  for (const entry of (pathEnv ?? "").split(delimiter).filter(Boolean)) {
+    for (const command of commandCandidates) {
+      addCandidate(join(entry, command))
+    }
+  }
+
+  return candidates
+}
+
+export async function canExecuteBinary(binaryPath: string): Promise<boolean> {
+  try {
+    const proc = Bun.spawn([binaryPath, "--version"], {
+      stdout: "pipe",
+      stderr: "pipe",
+    })
+    await proc.exited
+    return proc.exitCode === 0
+  } catch {
+    return false
+  }
+}
+
+export async function findWorkingOpencodeBinary(
+  pathEnv: string | undefined = process.env.PATH,
+  probe: (binaryPath: string) => Promise<boolean> = canExecuteBinary,
+  which: (command: string) => string | null | undefined = Bun.which,
+  platform: NodeJS.Platform = process.platform,
+): Promise<string | null> {
+  const candidates = collectCandidateBinaryPaths(pathEnv, which, platform)
+  for (const candidate of candidates) {
+    if (await probe(candidate)) {
+      return candidate
+    }
+  }
+  return null
+}
+
+export function buildPathWithBinaryFirst(pathEnv: string | undefined, binaryPath: string): string {
+  const preferredDir = dirname(binaryPath)
+  const existing = (pathEnv ?? "").split(delimiter).filter(
+    (entry) => entry.length > 0 && entry !== preferredDir,
+  )
+  return [preferredDir, ...existing].join(delimiter)
+}
+
+export async function withWorkingOpencodePath<T>(
+  startServer: () => Promise<T>,
+  finder: (pathEnv: string | undefined) => Promise<string | null> = findWorkingOpencodeBinary,
+): Promise<T> {
+  const originalPath = process.env.PATH
+  const binaryPath = await finder(originalPath)
+
+  if (!binaryPath) {
+    return startServer()
+  }
+
+  process.env.PATH = buildPathWithBinaryFirst(originalPath, binaryPath)
+  try {
+    return await startServer()
+  } finally {
+    process.env.PATH = originalPath
+  }
+}
--- a/src/cli/run/poll-for-completion.test.ts
+++ b/src/cli/run/poll-for-completion.test.ts
@@ -207,6 +207,52 @@ describe("pollForCompletion", () => {
    expect(todoCallCount).toBe(0)
  })

+  it("falls back to session.status API when idle event is missing", async () => {
+    //#given - mainSessionIdle not set by events, but status API says idle
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+    const ctx = createMockContext({
+      statuses: {
+        "test-session": { type: "idle" },
+      },
+    })
+    const eventState = createEventState()
+    eventState.mainSessionIdle = false
+    eventState.hasReceivedMeaningfulWork = true
+    const abortController = new AbortController()
+
+    //#when
+    const result = await pollForCompletion(ctx, eventState, abortController, {
+      pollIntervalMs: 10,
+      requiredConsecutive: 2,
+      minStabilizationMs: 0,
+    })
+
+    //#then - completion succeeds without idle event
+    expect(result).toBe(0)
+  })
+
+  it("allows silent completion after stabilization when no meaningful work is received", async () => {
+    //#given - session is idle and stable but no assistant message/tool event arrived
+    spyOn(console, "log").mockImplementation(() => {})
+    spyOn(console, "error").mockImplementation(() => {})
+    const ctx = createMockContext()
+    const eventState = createEventState()
+    eventState.mainSessionIdle = true
+    eventState.hasReceivedMeaningfulWork = false
+    const abortController = new AbortController()
+
+    //#when
+    const result = await pollForCompletion(ctx, eventState, abortController, {
+      pollIntervalMs: 10,
+      requiredConsecutive: 1,
+      minStabilizationMs: 30,
+    })
+
+    //#then - completion succeeds after stabilization window
+    expect(result).toBe(0)
+  })
+
  it("simulates race condition: brief idle with 0 todos does not cause immediate exit", async () => {
    //#given - simulate Sisyphus outputting text, session goes idle briefly, then tool fires
    spyOn(console, "log").mockImplementation(() => {})
--- a/src/cli/run/poll-for-completion.ts
+++ b/src/cli/run/poll-for-completion.ts
@@ -2,6 +2,7 @@ import pc from "picocolors"
 import type { RunContext } from "./types"
 import type { EventState } from "./events"
 import { checkCompletionConditions } from "./completion"
+import { normalizeSDKResponse } from "../../shared"

 const DEFAULT_POLL_INTERVAL_MS = 500
 const DEFAULT_REQUIRED_CONSECUTIVE = 3
@@ -28,6 +29,7 @@ export async function pollForCompletion(
  let consecutiveCompleteChecks = 0
  let errorCycleCount = 0
  let firstWorkTimestamp: number | null = null
+  const pollStartTimestamp = Date.now()

  while (!abortController.signal.aborted) {
    await new Promise((resolve) => setTimeout(resolve, pollIntervalMs))
@@ -51,6 +53,13 @@ export async function pollForCompletion(
      errorCycleCount = 0
    }

+    const mainSessionStatus = await getMainSessionStatus(ctx)
+    if (mainSessionStatus === "busy" || mainSessionStatus === "retry") {
+      eventState.mainSessionIdle = false
+    } else if (mainSessionStatus === "idle") {
+      eventState.mainSessionIdle = true
+    }
+
    if (!eventState.mainSessionIdle) {
      consecutiveCompleteChecks = 0
      continue
@@ -62,8 +71,11 @@ export async function pollForCompletion(
    }

    if (!eventState.hasReceivedMeaningfulWork) {
+      if (Date.now() - pollStartTimestamp < minStabilizationMs) {
+        consecutiveCompleteChecks = 0
+        continue
+      }
      consecutiveCompleteChecks = 0
-      continue
    }

    // Track when first meaningful work was received
@@ -91,3 +103,24 @@ export async function pollForCompletion(

  return 130
 }
+
+async function getMainSessionStatus(
+  ctx: RunContext
+): Promise<"idle" | "busy" | "retry" | null> {
+  try {
+    const statusesRes = await ctx.client.session.status({
+      query: { directory: ctx.directory },
+    })
+    const statuses = normalizeSDKResponse(
+      statusesRes,
+      {} as Record<string, { type?: string }>
+    )
+    const status = statuses[ctx.sessionID]?.type
+    if (status === "idle" || status === "busy" || status === "retry") {
+      return status
+    }
+    return null
+  } catch {
+    return null
+  }
+}
--- a/src/cli/run/runner.test.ts
+++ b/src/cli/run/runner.test.ts
@@ -22,7 +22,7 @@ describe("resolveRunAgent", () => {
    )

    // then
-    expect(agent).toBe("hephaestus")
+    expect(agent).toBe("Hephaestus (Deep Agent)")
  })

  it("uses env agent over config", () => {
@@ -34,7 +34,7 @@ describe("resolveRunAgent", () => {
    const agent = resolveRunAgent({ message: "test" }, config, env)

    // then
-    expect(agent).toBe("atlas")
+    expect(agent).toBe("Atlas (Plan Executor)")
  })

  it("uses config agent over default", () => {
@@ -45,7 +45,7 @@ describe("resolveRunAgent", () => {
    const agent = resolveRunAgent({ message: "test" }, config, {})

    // then
-    expect(agent).toBe("prometheus")
+    expect(agent).toBe("Prometheus (Plan Builder)")
  })

  it("falls back to sisyphus when none set", () => {
@@ -56,7 +56,7 @@ describe("resolveRunAgent", () => {
    const agent = resolveRunAgent({ message: "test" }, config, {})

    // then
-    expect(agent).toBe("sisyphus")
+    expect(agent).toBe("Sisyphus (Ultraworker)")
  })

  it("skips disabled sisyphus for next available core agent", () => {
@@ -67,7 +67,18 @@ describe("resolveRunAgent", () => {
    const agent = resolveRunAgent({ message: "test" }, config, {})

    // then
-    expect(agent).toBe("hephaestus")
+    expect(agent).toBe("Hephaestus (Deep Agent)")
+  })
+
+  it("maps display-name style default_run_agent values to canonical display names", () => {
+    // given
+    const config = createConfig({ default_run_agent: "Sisyphus (Ultraworker)" })
+
+    // when
+    const agent = resolveRunAgent({ message: "test" }, config, {})
+
+    // then
+    expect(agent).toBe("Sisyphus (Ultraworker)")
  })
 })

--- a/src/cli/run/runner.ts
+++ b/src/cli/run/runner.ts
@@ -79,6 +79,7 @@ export async function run(options: RunOptions): Promise<number> {
      const sessionID = await resolveSession({
        client,
        sessionId: options.sessionId,
+        directory,
      })

      console.log(pc.dim(`Session: ${sessionID}`))
--- a/src/cli/run/server-connection.test.ts
+++ b/src/cli/run/server-connection.test.ts
@@ -1,4 +1,8 @@
-import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test"
+import { describe, it, expect, mock, beforeEach, afterEach, afterAll } from "bun:test"
+
+import * as originalSdk from "@opencode-ai/sdk"
+import * as originalPortUtils from "../../shared/port-utils"
+import * as originalBinaryResolver from "./opencode-binary-resolver"

 const originalConsole = globalThis.console

@@ -13,6 +17,7 @@ const mockCreateOpencodeClient = mock(() => ({ session: {} }))
 const mockIsPortAvailable = mock(() => Promise.resolve(true))
 const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 4096, wasAutoSelected: false }))
 const mockConsoleLog = mock(() => {})
+const mockWithWorkingOpencodePath = mock((startServer: () => Promise<unknown>) => startServer())

 mock.module("@opencode-ai/sdk", () => ({
  createOpencode: mockCreateOpencode,
@@ -25,6 +30,16 @@ mock.module("../../shared/port-utils", () => ({
  DEFAULT_SERVER_PORT: 4096,
 }))

+mock.module("./opencode-binary-resolver", () => ({
+  withWorkingOpencodePath: mockWithWorkingOpencodePath,
+}))
+
+afterAll(() => {
+  mock.module("@opencode-ai/sdk", () => originalSdk)
+  mock.module("../../shared/port-utils", () => originalPortUtils)
+  mock.module("./opencode-binary-resolver", () => originalBinaryResolver)
+})
+
 const { createServerConnection } = await import("./server-connection")

 describe("createServerConnection", () => {
@@ -35,6 +50,7 @@ describe("createServerConnection", () => {
    mockGetAvailableServerPort.mockClear()
    mockServerClose.mockClear()
    mockConsoleLog.mockClear()
+    mockWithWorkingOpencodePath.mockClear()
    globalThis.console = { ...console, log: mockConsoleLog } as typeof console
  })

@@ -52,6 +68,7 @@ describe("createServerConnection", () => {

    // then
    expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
+    expect(mockWithWorkingOpencodePath).not.toHaveBeenCalled()
    expect(result.client).toBeDefined()
    expect(result.cleanup).toBeDefined()
    result.cleanup()
@@ -69,6 +86,7 @@ describe("createServerConnection", () => {

    // then
    expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
+    expect(mockWithWorkingOpencodePath).toHaveBeenCalledTimes(1)
    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 8080, hostname: "127.0.0.1" })
    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
    expect(result.client).toBeDefined()
@@ -106,6 +124,7 @@ describe("createServerConnection", () => {

    // then
    expect(mockGetAvailableServerPort).toHaveBeenCalledWith(4096, "127.0.0.1")
+    expect(mockWithWorkingOpencodePath).toHaveBeenCalledTimes(1)
    expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 4100, hostname: "127.0.0.1" })
    expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
    expect(result.client).toBeDefined()
--- a/src/cli/run/server-connection.ts
+++ b/src/cli/run/server-connection.ts
@@ -2,12 +2,16 @@ import { createOpencode, createOpencodeClient } from "@opencode-ai/sdk"
 import pc from "picocolors"
 import type { ServerConnection } from "./types"
 import { getAvailableServerPort, isPortAvailable, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
+import { withWorkingOpencodePath } from "./opencode-binary-resolver"
+import { prependResolvedOpencodeBinToPath } from "./opencode-bin-path"

 export async function createServerConnection(options: {
  port?: number
  attach?: string
  signal: AbortSignal
 }): Promise<ServerConnection> {
+  prependResolvedOpencodeBinToPath()
+
  const { port, attach, signal } = options

  if (attach !== undefined) {
@@ -25,7 +29,9 @@ export async function createServerConnection(options: {

    if (available) {
      console.log(pc.dim("Starting server on port"), pc.cyan(port.toString()))
-      const { client, server } = await createOpencode({ signal, port, hostname: "127.0.0.1" })
+      const { client, server } = await withWorkingOpencodePath(() =>
+        createOpencode({ signal, port, hostname: "127.0.0.1" }),
+      )
      console.log(pc.dim("Server listening at"), pc.cyan(server.url))
      return { client, cleanup: () => server.close() }
    }
@@ -41,7 +47,9 @@ export async function createServerConnection(options: {
  } else {
    console.log(pc.dim("Starting server on port"), pc.cyan(selectedPort.toString()))
  }
-  const { client, server } = await createOpencode({ signal, port: selectedPort, hostname: "127.0.0.1" })
+  const { client, server } = await withWorkingOpencodePath(() =>
+    createOpencode({ signal, port: selectedPort, hostname: "127.0.0.1" }),
+  )
  console.log(pc.dim("Server listening at"), pc.cyan(server.url))
  return { client, cleanup: () => server.close() }
 }
--- a/src/cli/run/session-resolver.test.ts
+++ b/src/cli/run/session-resolver.test.ts
@@ -26,6 +26,8 @@ const createMockClient = (overrides: {
 }

 describe("resolveSession", () => {
+  const directory = "/test-project"
+
  beforeEach(() => {
    spyOn(console, "log").mockImplementation(() => {})
    spyOn(console, "error").mockImplementation(() => {})
@@ -39,12 +41,13 @@ describe("resolveSession", () => {
    })

    // when
-    const result = await resolveSession({ client: mockClient, sessionId })
+    const result = await resolveSession({ client: mockClient, sessionId, directory })

    // then
    expect(result).toBe(sessionId)
    expect(mockClient.session.get).toHaveBeenCalledWith({
      path: { id: sessionId },
+      query: { directory },
    })
    expect(mockClient.session.create).not.toHaveBeenCalled()
  })
@@ -57,7 +60,7 @@ describe("resolveSession", () => {
    })

    // when
-    const result = resolveSession({ client: mockClient, sessionId })
+    const result = resolveSession({ client: mockClient, sessionId, directory })

    // then
    await Promise.resolve(
@@ -65,6 +68,7 @@ describe("resolveSession", () => {
    )
    expect(mockClient.session.get).toHaveBeenCalledWith({
      path: { id: sessionId },
+      query: { directory },
    })
    expect(mockClient.session.create).not.toHaveBeenCalled()
  })
@@ -76,7 +80,7 @@ describe("resolveSession", () => {
    })

    // when
-    const result = await resolveSession({ client: mockClient })
+    const result = await resolveSession({ client: mockClient, directory })

    // then
    expect(result).toBe("new-session-id")
@@ -87,6 +91,7 @@ describe("resolveSession", () => {
          { permission: "question", action: "deny", pattern: "*" },
        ],
      },
+      query: { directory },
    })
    expect(mockClient.session.get).not.toHaveBeenCalled()
  })
@@ -101,7 +106,7 @@ describe("resolveSession", () => {
    })

    // when
-    const result = await resolveSession({ client: mockClient })
+    const result = await resolveSession({ client: mockClient, directory })

    // then
    expect(result).toBe("retried-session-id")
@@ -113,6 +118,7 @@ describe("resolveSession", () => {
          { permission: "question", action: "deny", pattern: "*" },
        ],
      },
+      query: { directory },
    })
  })

@@ -127,7 +133,7 @@ describe("resolveSession", () => {
    })

    // when
-    const result = resolveSession({ client: mockClient })
+    const result = resolveSession({ client: mockClient, directory })

    // then
    await Promise.resolve(
@@ -147,7 +153,7 @@ describe("resolveSession", () => {
    })

    // when
-    const result = resolveSession({ client: mockClient })
+    const result = resolveSession({ client: mockClient, directory })

    // then
    await Promise.resolve(
--- a/src/cli/run/session-resolver.ts
+++ b/src/cli/run/session-resolver.ts
@@ -8,11 +8,15 @@ const SESSION_CREATE_RETRY_DELAY_MS = 1000
 export async function resolveSession(options: {
  client: OpencodeClient
  sessionId?: string
+  directory: string
 }): Promise<string> {
-  const { client, sessionId } = options
+  const { client, sessionId, directory } = options

  if (sessionId) {
-    const res = await client.session.get({ path: { id: sessionId } })
+    const res = await client.session.get({
+      path: { id: sessionId },
+      query: { directory },
+    })
    if (res.error || !res.data) {
      throw new Error(`Session not found: ${sessionId}`)
    }
@@ -28,6 +32,7 @@ export async function resolveSession(options: {
          { permission: "question", action: "deny" as const, pattern: "*" },
        ],
      } as any,
+      query: { directory },
    })

    if (res.error) {
--- a/src/cli/run/types.ts
+++ b/src/cli/run/types.ts
@@ -55,16 +55,19 @@ export interface EventPayload {

 export interface SessionIdleProps {
  sessionID?: string
+  sessionId?: string
 }

 export interface SessionStatusProps {
  sessionID?: string
+  sessionId?: string
  status?: { type?: string }
 }

 export interface MessageUpdatedProps {
  info?: {
    sessionID?: string
+    sessionId?: string
    role?: string
    modelID?: string
    providerID?: string
@@ -73,28 +76,47 @@ export interface MessageUpdatedProps {
 }

 export interface MessagePartUpdatedProps {
-  info?: { sessionID?: string; role?: string }
+  /** @deprecated Legacy structure — current OpenCode puts sessionID inside part */
+  info?: { sessionID?: string; sessionId?: string; role?: string }
  part?: {
+    id?: string
+    sessionID?: string
+    sessionId?: string
+    messageID?: string
    type?: string
    text?: string
+    /** Tool name (for part.type === "tool") */
+    tool?: string
+    /** Tool state (for part.type === "tool") */
+    state?: { status?: string; input?: Record<string, unknown>; output?: string }
    name?: string
    input?: unknown
+    time?: { start?: number; end?: number }
  }
 }

 export interface ToolExecuteProps {
  sessionID?: string
+  sessionId?: string
  name?: string
  input?: Record<string, unknown>
 }

 export interface ToolResultProps {
  sessionID?: string
+  sessionId?: string
  name?: string
  output?: string
 }

 export interface SessionErrorProps {
  sessionID?: string
+  sessionId?: string
  error?: unknown
 }
+
+export interface TuiToastShowProps {
+  title?: string
+  message?: string
+  variant?: "info" | "success" | "warning" | "error"
+}
--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -553,6 +553,18 @@ describe("BrowserAutomationProviderSchema", () => {
    // then
    expect(result.success).toBe(false)
  })
+
+  test("accepts 'playwright-cli' as valid provider", () => {
+    // given
+    const input = "playwright-cli"
+
+    // when
+    const result = BrowserAutomationProviderSchema.safeParse(input)
+
+    // then
+    expect(result.success).toBe(true)
+    expect(result.data).toBe("playwright-cli")
+  })
 })

 describe("BrowserAutomationConfigSchema", () => {
@@ -577,6 +589,17 @@ describe("BrowserAutomationConfigSchema", () => {
    // then
    expect(result.provider).toBe("agent-browser")
  })
+
+  test("accepts playwright-cli provider in config", () => {
+    // given
+    const input = { provider: "playwright-cli" }
+
+    // when
+    const result = BrowserAutomationConfigSchema.parse(input)
+
+    // then
+    expect(result.provider).toBe("playwright-cli")
+  })
 })

 describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
@@ -607,6 +630,18 @@ describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
    expect(result.success).toBe(true)
    expect(result.data?.browser_automation_engine).toBeUndefined()
  })
+
+  test("accepts browser_automation_engine with playwright-cli", () => {
+    // given
+    const input = { browser_automation_engine: { provider: "playwright-cli" } }
+
+    // when
+    const result = OhMyOpenCodeConfigSchema.safeParse(input)
+
+    // then
+    expect(result.success).toBe(true)
+    expect(result.data?.browser_automation_engine?.provider).toBe("playwright-cli")
+  })
 })

 describe("ExperimentalConfigSchema feature flags", () => {
@@ -663,6 +698,59 @@ describe("ExperimentalConfigSchema feature flags", () => {
      expect(result.data.safe_hook_creation).toBeUndefined()
    }
  })
+
+  test("accepts hashline_edit as true", () => {
+    //#given
+    const config = { hashline_edit: true }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.hashline_edit).toBe(true)
+    }
+  })
+
+  test("accepts hashline_edit as false", () => {
+    //#given
+    const config = { hashline_edit: false }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.hashline_edit).toBe(false)
+    }
+  })
+
+  test("hashline_edit is optional", () => {
+    //#given
+    const config = { safe_hook_creation: true }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(true)
+    if (result.success) {
+      expect(result.data.hashline_edit).toBeUndefined()
+    }
+  })
+
+  test("rejects non-boolean hashline_edit", () => {
+    //#given
+    const config = { hashline_edit: "true" }
+
+    //#when
+    const result = ExperimentalConfigSchema.safeParse(config)
+
+    //#then
+    expect(result.success).toBe(false)
+  })
 })

 describe("GitMasterConfigSchema", () => {
--- a/src/config/schema/browser-automation.ts
+++ b/src/config/schema/browser-automation.ts
@@ -4,6 +4,7 @@ export const BrowserAutomationProviderSchema = z.enum([
  "playwright",
  "agent-browser",
  "dev-browser",
+  "playwright-cli",
 ])

 export const BrowserAutomationConfigSchema = z.object({
@@ -12,6 +13,7 @@ export const BrowserAutomationConfigSchema = z.object({
   * - "playwright": Uses Playwright MCP server (@playwright/mcp) - default
   * - "agent-browser": Uses Vercel's agent-browser CLI (requires: bun add -g agent-browser)
   * - "dev-browser": Uses dev-browser skill with persistent browser state
+   * - "playwright-cli": Uses Playwright CLI (@playwright/cli) - token-efficient CLI alternative
   */
  provider: BrowserAutomationProviderSchema.default("playwright"),
 })
--- a/src/config/schema/experimental.ts
+++ b/src/config/schema/experimental.ts
@@ -15,6 +15,8 @@ export const ExperimentalConfigSchema = z.object({
  plugin_load_timeout_ms: z.number().min(1000).optional(),
  /** Wrap hook creation in try/catch to prevent one failing hook from crashing the plugin (default: true at call site) */
  safe_hook_creation: z.boolean().optional(),
+  /** Enable hashline_edit tool for improved file editing with hash-based line anchors */
+  hashline_edit: z.boolean().optional(),
 })

 export type ExperimentalConfig = z.infer<typeof ExperimentalConfigSchema>
--- a/src/config/schema/hooks.ts
+++ b/src/config/schema/hooks.ts
@@ -45,6 +45,7 @@ export const HookNameSchema = z.enum([
  "tasks-todowrite-disabler",
  "write-existing-file-guard",
  "anthropic-effort",
+  "hashline-read-enhancer",
 ])

 export type HookName = z.infer<typeof HookNameSchema>
--- a/src/create-managers.ts
+++ b/src/create-managers.ts
@@ -22,8 +22,9 @@ export function createManagers(args: {
  pluginConfig: OhMyOpenCodeConfig
  tmuxConfig: TmuxConfig
  modelCacheState: ModelCacheState
+  backgroundNotificationHookEnabled: boolean
 }): Managers {
-  const { ctx, pluginConfig, tmuxConfig, modelCacheState } = args
+  const { ctx, pluginConfig, tmuxConfig, modelCacheState, backgroundNotificationHookEnabled } = args

  const tmuxSessionManager = new TmuxSessionManager(ctx, tmuxConfig)

@@ -57,6 +58,7 @@ export function createManagers(args: {
          log("[index] tmux cleanup error during shutdown:", error)
        })
      },
+      enableParentSessionNotifications: backgroundNotificationHookEnabled,
    },
  )

--- a/src/features/background-agent/manager.polling.test.ts
+++ b/src/features/background-agent/manager.polling.test.ts
@@ -0,0 +1,53 @@
+import { describe, test, expect } from "bun:test"
+import { tmpdir } from "node:os"
+import type { PluginInput } from "@opencode-ai/plugin"
+import { BackgroundManager } from "./manager"
+
+function createManagerWithStatus(statusImpl: () => Promise<{ data: Record<string, { type: string }> }>): BackgroundManager {
+  const client = {
+    session: {
+      status: statusImpl,
+      prompt: async () => ({}),
+      promptAsync: async () => ({}),
+      abort: async () => ({}),
+      todo: async () => ({ data: [] }),
+      messages: async () => ({ data: [] }),
+    },
+  }
+
+  return new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+}
+
+describe("BackgroundManager polling overlap", () => {
+  test("skips overlapping pollRunningTasks executions", async () => {
+    //#given
+    let activeCalls = 0
+    let maxActiveCalls = 0
+    let statusCallCount = 0
+    let releaseStatus: (() => void) | undefined
+    const statusGate = new Promise<void>((resolve) => {
+      releaseStatus = resolve
+    })
+
+    const manager = createManagerWithStatus(async () => {
+      statusCallCount += 1
+      activeCalls += 1
+      maxActiveCalls = Math.max(maxActiveCalls, activeCalls)
+      await statusGate
+      activeCalls -= 1
+      return { data: {} }
+    })
+
+    //#when
+    const firstPoll = (manager as unknown as { pollRunningTasks: () => Promise<void> }).pollRunningTasks()
+    await Promise.resolve()
+    const secondPoll = (manager as unknown as { pollRunningTasks: () => Promise<void> }).pollRunningTasks()
+    releaseStatus?.()
+    await Promise.all([firstPoll, secondPoll])
+    manager.shutdown()
+
+    //#then
+    expect(maxActiveCalls).toBe(1)
+    expect(statusCallCount).toBe(1)
+  })
+})
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -805,6 +805,62 @@ interface CurrentMessage {
 }

 describe("BackgroundManager.notifyParentSession - dynamic message lookup", () => {
+  test("should skip compaction agent and use nearest non-compaction message", async () => {
+    //#given
+    let capturedBody: Record<string, unknown> | undefined
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async (args: { body: Record<string, unknown> }) => {
+          capturedBody = args.body
+          return {}
+        },
+        abort: async () => ({}),
+        messages: async () => ({
+          data: [
+            {
+              info: {
+                agent: "sisyphus",
+                model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+              },
+            },
+            {
+              info: {
+                agent: "compaction",
+                model: { providerID: "anthropic", modelID: "claude-sonnet-4-5" },
+              },
+            },
+          ],
+        }),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    const task: BackgroundTask = {
+      id: "task-skip-compaction",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task with compaction at tail",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      parentAgent: "fallback-agent",
+    }
+    getPendingByParent(manager).set("session-parent", new Set([task.id, "still-running"]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (value: BackgroundTask) => Promise<void> })
+      .notifyParentSession(task)
+
+    //#then
+    expect(capturedBody?.agent).toBe("sisyphus")
+    expect(capturedBody?.model).toEqual({ providerID: "anthropic", modelID: "claude-opus-4-6" })
+
+    manager.shutdown()
+  })
+
  test("should use currentMessage model/agent when available", async () => {
    // given - currentMessage has model and agent
    const task: BackgroundTask = {
@@ -1003,6 +1059,52 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
  })
 })

+describe("BackgroundManager.notifyParentSession - notifications toggle", () => {
+  test("should skip parent prompt injection when notifications are disabled", async () => {
+    //#given
+    let promptCalled = false
+    const promptMock = async () => {
+      promptCalled = true
+      return {}
+    }
+    const client = {
+      session: {
+        prompt: promptMock,
+        promptAsync: promptMock,
+        abort: async () => ({}),
+        messages: async () => ({ data: [] }),
+      },
+    }
+    const manager = new BackgroundManager(
+      { client, directory: tmpdir() } as unknown as PluginInput,
+      undefined,
+      { enableParentSessionNotifications: false },
+    )
+    const task: BackgroundTask = {
+      id: "task-no-parent-notification",
+      sessionID: "session-child",
+      parentSessionID: "session-parent",
+      parentMessageID: "msg-parent",
+      description: "task notifications disabled",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+    }
+    getPendingByParent(manager).set("session-parent", new Set([task.id]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> })
+      .notifyParentSession(task)
+
+    //#then
+    expect(promptCalled).toBe(false)
+
+    manager.shutdown()
+  })
+})
+
 function buildNotificationPromptBody(
  task: BackgroundTask,
  currentMessage: CurrentMessage | null
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -23,8 +23,8 @@ import {

 import { subagentSessions } from "../claude-code-session-state"
 import { getTaskToastManager } from "../task-toast-manager"
-import { findNearestMessageWithFields, MESSAGE_STORAGE } from "../hook-message-injector"
-import { existsSync, readdirSync } from "node:fs"
+import { MESSAGE_STORAGE, type StoredMessage } from "../hook-message-injector"
+import { existsSync, readFileSync, readdirSync } from "node:fs"
 import { join } from "node:path"

 type ProcessCleanupEvent = NodeJS.Signals | "beforeExit" | "exit"
@@ -80,6 +80,7 @@ export class BackgroundManager {
  private client: OpencodeClient
  private directory: string
  private pollingInterval?: ReturnType<typeof setInterval>
+  private pollingInFlight = false
  private concurrencyManager: ConcurrencyManager
  private shutdownTriggered = false
  private config?: BackgroundTaskConfig
@@ -92,6 +93,7 @@ export class BackgroundManager {
  private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
  private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
  private notificationQueueByParent: Map<string, Promise<void>> = new Map()
+  private enableParentSessionNotifications: boolean
  readonly taskHistory = new TaskHistory()

  constructor(
@@ -101,6 +103,7 @@ export class BackgroundManager {
      tmuxConfig?: TmuxConfig
      onSubagentSessionCreated?: OnSubagentSessionCreated
      onShutdown?: () => void
+      enableParentSessionNotifications?: boolean
    }
  ) {
    this.tasks = new Map()
@@ -113,6 +116,7 @@ export class BackgroundManager {
    this.tmuxEnabled = options?.tmuxConfig?.enabled ?? false
    this.onSubagentSessionCreated = options?.onSubagentSessionCreated
    this.onShutdown = options?.onShutdown
+    this.enableParentSessionNotifications = options?.enableParentSessionNotifications ?? true
    this.registerProcessCleanup()
  }

@@ -1203,19 +1207,21 @@ export class BackgroundManager {
      allComplete = true
    }

+    const completedTasks = allComplete
+      ? Array.from(this.tasks.values())
+        .filter(t => t.parentSessionID === task.parentSessionID && t.status !== "running" && t.status !== "pending")
+      : []
+
    const statusText = task.status === "completed" ? "COMPLETED" : task.status === "interrupt" ? "INTERRUPTED" : "CANCELLED"
    const errorInfo = task.error ? `\n**Error:** ${task.error}` : ""
-    
-    let notification: string
-    let completedTasks: BackgroundTask[] = []
-    if (allComplete) {
-      completedTasks = Array.from(this.tasks.values())
-        .filter(t => t.parentSessionID === task.parentSessionID && t.status !== "running" && t.status !== "pending")
-      const completedTasksText = completedTasks
-        .map(t => `- \`${t.id}\`: ${t.description}`)
-        .join("\n")

-      notification = `<system-reminder>
+    let notification: string
+    if (allComplete) {
+        const completedTasksText = completedTasks
+          .map(t => `- \`${t.id}\`: ${t.description}`)
+          .join("\n")
+
+        notification = `<system-reminder>
 [ALL BACKGROUND TASKS COMPLETE]

 **Completed:**
@@ -1238,69 +1244,79 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
 </system-reminder>`
    }

-    let agent: string | undefined = task.parentAgent
-    let model: { providerID: string; modelID: string } | undefined
+      let agent: string | undefined = task.parentAgent
+      let model: { providerID: string; modelID: string } | undefined

-    try {
-      const messagesResp = await this.client.session.messages({ path: { id: task.parentSessionID } })
-      const messages = normalizeSDKResponse(messagesResp, [] as Array<{
-        info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
-      }>)
-      for (let i = messages.length - 1; i >= 0; i--) {
-        const info = messages[i].info
-        if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
-          agent = info.agent ?? task.parentAgent
-          model = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
-          break
+      if (this.enableParentSessionNotifications) {
+        try {
+          const messagesResp = await this.client.session.messages({ path: { id: task.parentSessionID } })
+          const messages = normalizeSDKResponse(messagesResp, [] as Array<{
+            info?: { agent?: string; model?: { providerID: string; modelID: string }; modelID?: string; providerID?: string }
+          }>)
+          for (let i = messages.length - 1; i >= 0; i--) {
+            const info = messages[i].info
+            if (isCompactionAgent(info?.agent)) {
+              continue
+            }
+            if (info?.agent || info?.model || (info?.modelID && info?.providerID)) {
+              agent = info.agent ?? task.parentAgent
+              model = info.model ?? (info.providerID && info.modelID ? { providerID: info.providerID, modelID: info.modelID } : undefined)
+              break
+            }
+          }
+        } catch (error) {
+          if (this.isAbortedSessionError(error)) {
+            log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
+              taskId: task.id,
+              parentSessionID: task.parentSessionID,
+            })
+          }
+          const messageDir = getMessageDir(task.parentSessionID)
+          const currentMessage = messageDir ? findNearestMessageExcludingCompaction(messageDir) : null
+          agent = currentMessage?.agent ?? task.parentAgent
+          model = currentMessage?.model?.providerID && currentMessage?.model?.modelID
+            ? { providerID: currentMessage.model.providerID, modelID: currentMessage.model.modelID }
+            : undefined
        }
-      }
-    } catch (error) {
-      if (this.isAbortedSessionError(error)) {
-        log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
-          taskId: task.id,
-          parentSessionID: task.parentSessionID,
-        })
-      }
-      const messageDir = getMessageDir(task.parentSessionID)
-      const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
-      agent = currentMessage?.agent ?? task.parentAgent
-      model = currentMessage?.model?.providerID && currentMessage?.model?.modelID
-        ? { providerID: currentMessage.model.providerID, modelID: currentMessage.model.modelID }
-        : undefined
-    }

-    log("[background-agent] notifyParentSession context:", {
-      taskId: task.id,
-      resolvedAgent: agent,
-      resolvedModel: model,
-    })
-
-    try {
-      await this.client.session.promptAsync({
-        path: { id: task.parentSessionID },
-        body: {
-          noReply: !allComplete,
-          ...(agent !== undefined ? { agent } : {}),
-          ...(model !== undefined ? { model } : {}),
-          ...(task.parentTools ? { tools: task.parentTools } : {}),
-          parts: [{ type: "text", text: notification }],
-        },
-      })
-      log("[background-agent] Sent notification to parent session:", {
-        taskId: task.id,
-        allComplete,
-        noReply: !allComplete,
-      })
-    } catch (error) {
-      if (this.isAbortedSessionError(error)) {
-        log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
+        log("[background-agent] notifyParentSession context:", {
          taskId: task.id,
-          parentSessionID: task.parentSessionID,
+          resolvedAgent: agent,
+          resolvedModel: model,
        })
+
+        try {
+          await this.client.session.promptAsync({
+            path: { id: task.parentSessionID },
+            body: {
+              noReply: !allComplete,
+              ...(agent !== undefined ? { agent } : {}),
+              ...(model !== undefined ? { model } : {}),
+              ...(task.parentTools ? { tools: task.parentTools } : {}),
+              parts: [{ type: "text", text: notification }],
+            },
+          })
+          log("[background-agent] Sent notification to parent session:", {
+            taskId: task.id,
+            allComplete,
+            noReply: !allComplete,
+          })
+        } catch (error) {
+          if (this.isAbortedSessionError(error)) {
+            log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
+              taskId: task.id,
+              parentSessionID: task.parentSessionID,
+            })
+          } else {
+            log("[background-agent] Failed to send notification:", error)
+          }
+        }
      } else {
-        log("[background-agent] Failed to send notification:", error)
+        log("[background-agent] Parent session notifications disabled, skipping prompt injection:", {
+          taskId: task.id,
+          parentSessionID: task.parentSessionID,
+        })
      }
-    }

    if (allComplete) {
      for (const completedTask of completedTasks) {
@@ -1531,6 +1547,9 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
  }

  private async pollRunningTasks(): Promise<void> {
+    if (this.pollingInFlight) return
+    this.pollingInFlight = true
+    try {
    this.pruneStaleTasksAndNotifications()

    const statusResult = await this.client.session.status()
@@ -1586,6 +1605,9 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
    if (!this.hasRunningTasks()) {
      this.stopPolling()
    }
+    } finally {
+      this.pollingInFlight = false
+    }
  }

  /**
@@ -1703,3 +1725,57 @@ function getMessageDir(sessionID: string): string | null {
  }
  return null
 }
+
+function isCompactionAgent(agent: string | undefined): boolean {
+  return agent?.trim().toLowerCase() === "compaction"
+}
+
+function hasFullAgentAndModel(message: StoredMessage): boolean {
+  return !!message.agent &&
+    !isCompactionAgent(message.agent) &&
+    !!message.model?.providerID &&
+    !!message.model?.modelID
+}
+
+function hasPartialAgentOrModel(message: StoredMessage): boolean {
+  const hasAgent = !!message.agent && !isCompactionAgent(message.agent)
+  const hasModel = !!message.model?.providerID && !!message.model?.modelID
+  return hasAgent || hasModel
+}
+
+function findNearestMessageExcludingCompaction(messageDir: string): StoredMessage | null {
+  try {
+    const files = readdirSync(messageDir)
+      .filter((name) => name.endsWith(".json"))
+      .sort()
+      .reverse()
+
+    for (const file of files) {
+      try {
+        const content = readFileSync(join(messageDir, file), "utf-8")
+        const parsed = JSON.parse(content) as StoredMessage
+        if (hasFullAgentAndModel(parsed)) {
+          return parsed
+        }
+      } catch {
+        continue
+      }
+    }
+
+    for (const file of files) {
+      try {
+        const content = readFileSync(join(messageDir, file), "utf-8")
+        const parsed = JSON.parse(content) as StoredMessage
+        if (hasPartialAgentOrModel(parsed)) {
+          return parsed
+        }
+      } catch {
+        continue
+      }
+    }
+  } catch {
+    return null
+  }
+
+  return null
+}
--- a/src/features/background-agent/spawner/parent-directory-resolver.test.ts
+++ b/src/features/background-agent/spawner/parent-directory-resolver.test.ts
@@ -0,0 +1,33 @@
+import { describe, expect, test } from "bun:test"
+
+import { resolveParentDirectory } from "./parent-directory-resolver"
+
+describe("background-agent parent-directory-resolver", () => {
+  const originalPlatform = process.platform
+
+  test("uses current working directory on Windows when parent session directory is AppData", async () => {
+    //#given
+    Object.defineProperty(process, "platform", { value: "win32" })
+    try {
+      const client = {
+        session: {
+          get: async () => ({
+            data: { directory: "C:\\Users\\test\\AppData\\Local\\ai.opencode.desktop" },
+          }),
+        },
+      }
+
+      //#when
+      const result = await resolveParentDirectory({
+        client: client as Parameters<typeof resolveParentDirectory>[0]["client"],
+        parentSessionID: "ses_parent",
+        defaultDirectory: "C:\\Users\\test\\AppData\\Roaming\\opencode",
+      })
+
+      //#then
+      expect(result).toBe(process.cwd())
+    } finally {
+      Object.defineProperty(process, "platform", { value: originalPlatform })
+    }
+  })
+})
--- a/src/features/background-agent/spawner/parent-directory-resolver.ts
+++ b/src/features/background-agent/spawner/parent-directory-resolver.ts
@@ -1,5 +1,5 @@
 import type { OpencodeClient } from "../constants"
-import { log } from "../../../shared"
+import { log, resolveSessionDirectory } from "../../../shared"

 export async function resolveParentDirectory(options: {
  client: OpencodeClient
@@ -15,7 +15,10 @@ export async function resolveParentDirectory(options: {
      return null
    })

-  const parentDirectory = parentSession?.data?.directory ?? defaultDirectory
+  const parentDirectory = resolveSessionDirectory({
+    parentDirectory: parentSession?.data?.directory,
+    fallbackDirectory: defaultDirectory,
+  })
  log(`[background-agent] Parent dir: ${parentSession?.data?.directory}, using: ${parentDirectory}`)
  return parentDirectory
 }
--- a/src/features/builtin-skills/skills.test.ts
+++ b/src/features/builtin-skills/skills.test.ts
@@ -140,4 +140,35 @@ describe("createBuiltinSkills", () => {
 		// #then
 		expect(skills.length).toBe(4)
 	})
+
+	test("returns playwright-cli skill when browserProvider is 'playwright-cli'", () => {
+		// given
+		const options = { browserProvider: "playwright-cli" as const }
+
+		// when
+		const skills = createBuiltinSkills(options)
+
+		// then
+		const playwrightSkill = skills.find((s) => s.name === "playwright")
+		const agentBrowserSkill = skills.find((s) => s.name === "agent-browser")
+		expect(playwrightSkill).toBeDefined()
+		expect(playwrightSkill!.description).toContain("browser")
+		expect(playwrightSkill!.allowedTools).toContain("Bash(playwright-cli:*)")
+		expect(playwrightSkill!.mcpConfig).toBeUndefined()
+		expect(agentBrowserSkill).toBeUndefined()
+	})
+
+	test("playwright-cli skill template contains CLI commands", () => {
+		// given
+		const options = { browserProvider: "playwright-cli" as const }
+
+		// when
+		const skills = createBuiltinSkills(options)
+		const skill = skills.find((s) => s.name === "playwright")
+
+		// then
+		expect(skill!.template).toContain("playwright-cli open")
+		expect(skill!.template).toContain("playwright-cli snapshot")
+		expect(skill!.template).toContain("playwright-cli click")
+	})
 })
--- a/src/features/builtin-skills/skills.ts
+++ b/src/features/builtin-skills/skills.ts
@@ -4,6 +4,7 @@ import type { BrowserAutomationProvider } from "../../config/schema"
 import {
  playwrightSkill,
  agentBrowserSkill,
+  playwrightCliSkill,
  frontendUiUxSkill,
  gitMasterSkill,
  devBrowserSkill,
@@ -17,7 +18,14 @@ export interface CreateBuiltinSkillsOptions {
 export function createBuiltinSkills(options: CreateBuiltinSkillsOptions = {}): BuiltinSkill[] {
  const { browserProvider = "playwright", disabledSkills } = options

-  const browserSkill = browserProvider === "agent-browser" ? agentBrowserSkill : playwrightSkill
+  let browserSkill: BuiltinSkill
+  if (browserProvider === "agent-browser") {
+    browserSkill = agentBrowserSkill
+  } else if (browserProvider === "playwright-cli") {
+    browserSkill = playwrightCliSkill
+  } else {
+    browserSkill = playwrightSkill
+  }

  const skills = [browserSkill, frontendUiUxSkill, gitMasterSkill, devBrowserSkill]

--- a/src/features/builtin-skills/skills/index.ts
+++ b/src/features/builtin-skills/skills/index.ts
@@ -1,4 +1,5 @@
 export { playwrightSkill, agentBrowserSkill } from "./playwright"
+export { playwrightCliSkill } from "./playwright-cli"
 export { frontendUiUxSkill } from "./frontend-ui-ux"
 export { gitMasterSkill } from "./git-master"
 export { devBrowserSkill } from "./dev-browser"
--- a/src/features/builtin-skills/skills/playwright-cli.ts
+++ b/src/features/builtin-skills/skills/playwright-cli.ts
@@ -0,0 +1,268 @@
+import type { BuiltinSkill } from "../types"
+
+/**
+ * Playwright CLI skill — token-efficient CLI alternative to the MCP-based playwright skill.
+ *
+ * Uses name "playwright" (not "playwright-cli") because agents hardcode "playwright" as the
+ * canonical browser skill name. The browserProvider config swaps the implementation behind
+ * the same name: "playwright" gives MCP, "playwright-cli" gives this CLI variant.
+ * The binary is still called `playwright-cli` (see allowedTools).
+ */
+export const playwrightCliSkill: BuiltinSkill = {
+  name: "playwright",
+  description: "MUST USE for any browser-related tasks. Browser automation via playwright-cli - verification, browsing, information gathering, web scraping, testing, screenshots, and all browser interactions.",
+  template: `# Browser Automation with playwright-cli
+
+## Quick start
+
+\`\`\`bash
+# open new browser
+playwright-cli open
+# navigate to a page
+playwright-cli goto https://playwright.dev
+# interact with the page using refs from the snapshot
+playwright-cli click e15
+playwright-cli type "page.click"
+playwright-cli press Enter
+# take a screenshot
+playwright-cli screenshot
+# close the browser
+playwright-cli close
+\`\`\`
+
+## Commands
+
+### Core
+
+\`\`\`bash
+playwright-cli open
+# open and navigate right away
+playwright-cli open https://example.com/
+playwright-cli goto https://playwright.dev
+playwright-cli type "search query"
+playwright-cli click e3
+playwright-cli dblclick e7
+playwright-cli fill e5 "user@example.com"
+playwright-cli drag e2 e8
+playwright-cli hover e4
+playwright-cli select e9 "option-value"
+playwright-cli upload ./document.pdf
+playwright-cli check e12
+playwright-cli uncheck e12
+playwright-cli snapshot
+playwright-cli snapshot --filename=after-click.yaml
+playwright-cli eval "document.title"
+playwright-cli eval "el => el.textContent" e5
+playwright-cli dialog-accept
+playwright-cli dialog-accept "confirmation text"
+playwright-cli dialog-dismiss
+playwright-cli resize 1920 1080
+playwright-cli close
+\`\`\`
+
+### Navigation
+
+\`\`\`bash
+playwright-cli go-back
+playwright-cli go-forward
+playwright-cli reload
+\`\`\`
+
+### Keyboard
+
+\`\`\`bash
+playwright-cli press Enter
+playwright-cli press ArrowDown
+playwright-cli keydown Shift
+playwright-cli keyup Shift
+\`\`\`
+
+### Mouse
+
+\`\`\`bash
+playwright-cli mousemove 150 300
+playwright-cli mousedown
+playwright-cli mousedown right
+playwright-cli mouseup
+playwright-cli mouseup right
+playwright-cli mousewheel 0 100
+\`\`\`
+
+### Save as
+
+\`\`\`bash
+playwright-cli screenshot
+playwright-cli screenshot e5
+playwright-cli screenshot --filename=page.png
+playwright-cli pdf --filename=page.pdf
+\`\`\`
+
+### Tabs
+
+\`\`\`bash
+playwright-cli tab-list
+playwright-cli tab-new
+playwright-cli tab-new https://example.com/page
+playwright-cli tab-close
+playwright-cli tab-close 2
+playwright-cli tab-select 0
+\`\`\`
+
+### Storage
+
+\`\`\`bash
+playwright-cli state-save
+playwright-cli state-save auth.json
+playwright-cli state-load auth.json
+
+# Cookies
+playwright-cli cookie-list
+playwright-cli cookie-list --domain=example.com
+playwright-cli cookie-get session_id
+playwright-cli cookie-set session_id abc123
+playwright-cli cookie-set session_id abc123 --domain=example.com --httpOnly --secure
+playwright-cli cookie-delete session_id
+playwright-cli cookie-clear
+
+# LocalStorage
+playwright-cli localstorage-list
+playwright-cli localstorage-get theme
+playwright-cli localstorage-set theme dark
+playwright-cli localstorage-delete theme
+playwright-cli localstorage-clear
+
+# SessionStorage
+playwright-cli sessionstorage-list
+playwright-cli sessionstorage-get step
+playwright-cli sessionstorage-set step 3
+playwright-cli sessionstorage-delete step
+playwright-cli sessionstorage-clear
+\`\`\`
+
+### Network
+
+\`\`\`bash
+playwright-cli route "**/*.jpg" --status=404
+playwright-cli route "https://api.example.com/**" --body='{"mock": true}'
+playwright-cli route-list
+playwright-cli unroute "**/*.jpg"
+playwright-cli unroute
+\`\`\`
+
+### DevTools
+
+\`\`\`bash
+playwright-cli console
+playwright-cli console warning
+playwright-cli network
+playwright-cli run-code "async page => await page.context().grantPermissions(['geolocation'])"
+playwright-cli tracing-start
+playwright-cli tracing-stop
+playwright-cli video-start
+playwright-cli video-stop video.webm
+\`\`\`
+
+### Install
+
+\`\`\`bash
+playwright-cli install --skills
+playwright-cli install-browser
+\`\`\`
+
+### Configuration
+\`\`\`bash
+# Use specific browser when creating session
+playwright-cli open --browser=chrome
+playwright-cli open --browser=firefox
+playwright-cli open --browser=webkit
+playwright-cli open --browser=msedge
+# Connect to browser via extension
+playwright-cli open --extension
+
+# Use persistent profile (by default profile is in-memory)
+playwright-cli open --persistent
+# Use persistent profile with custom directory
+playwright-cli open --profile=/path/to/profile
+
+# Start with config file
+playwright-cli open --config=my-config.json
+
+# Close the browser
+playwright-cli close
+# Delete user data for the default session
+playwright-cli delete-data
+\`\`\`
+
+### Browser Sessions
+
+\`\`\`bash
+# create new browser session named "mysession" with persistent profile
+playwright-cli -s=mysession open example.com --persistent
+# same with manually specified profile directory (use when requested explicitly)
+playwright-cli -s=mysession open example.com --profile=/path/to/profile
+playwright-cli -s=mysession click e6
+playwright-cli -s=mysession close  # stop a named browser
+playwright-cli -s=mysession delete-data  # delete user data for persistent session
+
+playwright-cli list
+# Close all browsers
+playwright-cli close-all
+# Forcefully kill all browser processes
+playwright-cli kill-all
+\`\`\`
+
+## Example: Form submission
+
+\`\`\`bash
+playwright-cli open https://example.com/form
+playwright-cli snapshot
+
+playwright-cli fill e1 "user@example.com"
+playwright-cli fill e2 "password123"
+playwright-cli click e3
+playwright-cli snapshot
+playwright-cli close
+\`\`\`
+
+## Example: Multi-tab workflow
+
+\`\`\`bash
+playwright-cli open https://example.com
+playwright-cli tab-new https://example.com/other
+playwright-cli tab-list
+playwright-cli tab-select 0
+playwright-cli snapshot
+playwright-cli close
+\`\`\`
+
+## Example: Debugging with DevTools
+
+\`\`\`bash
+playwright-cli open https://example.com
+playwright-cli click e4
+playwright-cli fill e7 "test"
+playwright-cli console
+playwright-cli network
+playwright-cli close
+\`\`\`
+
+\`\`\`bash
+playwright-cli open https://example.com
+playwright-cli tracing-start
+playwright-cli click e4
+playwright-cli fill e7 "test"
+playwright-cli tracing-stop
+playwright-cli close
+\`\`\`
+
+## Specific tasks
+
+* **Request mocking** [references/request-mocking.md](references/request-mocking.md)
+* **Running Playwright code** [references/running-code.md](references/running-code.md)
+* **Browser session management** [references/session-management.md](references/session-management.md)
+* **Storage state (cookies, localStorage)** [references/storage-state.md](references/storage-state.md)
+* **Test generation** [references/test-generation.md](references/test-generation.md)
+* **Tracing** [references/tracing.md](references/tracing.md)
+* **Video recording** [references/video-recording.md](references/video-recording.md)`,
+  allowedTools: ["Bash(playwright-cli:*)"],
+}
--- a/src/features/tmux-subagent/action-executor-core.ts
+++ b/src/features/tmux-subagent/action-executor-core.ts
@@ -0,0 +1,82 @@
+import type { TmuxConfig } from "../../config/schema"
+import type { applyLayout, closeTmuxPane, enforceMainPaneWidth, replaceTmuxPane, spawnTmuxPane } from "../../shared/tmux"
+import type { PaneAction, WindowState } from "./types"
+
+export interface ActionResult {
+	success: boolean
+	paneId?: string
+	error?: string
+}
+
+export interface ExecuteContext {
+	config: TmuxConfig
+	serverUrl: string
+	windowState: WindowState
+}
+
+export interface ActionExecutorDeps {
+	spawnTmuxPane: typeof spawnTmuxPane
+	closeTmuxPane: typeof closeTmuxPane
+	replaceTmuxPane: typeof replaceTmuxPane
+	applyLayout: typeof applyLayout
+	enforceMainPaneWidth: typeof enforceMainPaneWidth
+}
+
+async function enforceMainPane(
+	windowState: WindowState,
+	config: TmuxConfig,
+	deps: ActionExecutorDeps,
+): Promise<void> {
+	if (!windowState.mainPane) return
+	await deps.enforceMainPaneWidth(
+		windowState.mainPane.paneId,
+		windowState.windowWidth,
+		config.main_pane_size,
+	)
+}
+
+export async function executeActionWithDeps(
+	action: PaneAction,
+	ctx: ExecuteContext,
+	deps: ActionExecutorDeps,
+): Promise<ActionResult> {
+	if (action.type === "close") {
+		const success = await deps.closeTmuxPane(action.paneId)
+		if (success) {
+			await enforceMainPane(ctx.windowState, ctx.config, deps)
+		}
+		return { success }
+	}
+
+	if (action.type === "replace") {
+		const result = await deps.replaceTmuxPane(
+			action.paneId,
+			action.newSessionId,
+			action.description,
+			ctx.config,
+			ctx.serverUrl,
+		)
+		return {
+			success: result.success,
+			paneId: result.paneId,
+		}
+	}
+
+	const result = await deps.spawnTmuxPane(
+		action.sessionId,
+		action.description,
+		ctx.config,
+		ctx.serverUrl,
+		action.targetPaneId,
+		action.splitDirection,
+	)
+
+	if (result.success) {
+		await enforceMainPane(ctx.windowState, ctx.config, deps)
+	}
+
+	return {
+		success: result.success,
+		paneId: result.paneId,
+	}
+}
--- a/src/features/tmux-subagent/action-executor.test.ts
+++ b/src/features/tmux-subagent/action-executor.test.ts
@@ -0,0 +1,113 @@
+import { beforeEach, describe, expect, mock, test } from "bun:test"
+import type { TmuxConfig } from "../../config/schema"
+import { executeActionWithDeps } from "./action-executor-core"
+import type { ActionExecutorDeps, ExecuteContext } from "./action-executor-core"
+import type { WindowState } from "./types"
+
+const mockSpawnTmuxPane = mock(async () => ({ success: true, paneId: "%7" }))
+const mockCloseTmuxPane = mock(async () => true)
+const mockEnforceMainPaneWidth = mock(async () => undefined)
+const mockReplaceTmuxPane = mock(async () => ({ success: true, paneId: "%7" }))
+const mockApplyLayout = mock(async () => undefined)
+
+const mockDeps: ActionExecutorDeps = {
+	spawnTmuxPane: mockSpawnTmuxPane,
+	closeTmuxPane: mockCloseTmuxPane,
+	enforceMainPaneWidth: mockEnforceMainPaneWidth,
+	replaceTmuxPane: mockReplaceTmuxPane,
+	applyLayout: mockApplyLayout,
+}
+
+function createConfig(overrides?: Partial<TmuxConfig>): TmuxConfig {
+	return {
+		enabled: true,
+		layout: "main-horizontal",
+		main_pane_size: 55,
+		main_pane_min_width: 120,
+		agent_pane_min_width: 40,
+		...overrides,
+	}
+}
+
+function createWindowState(overrides?: Partial<WindowState>): WindowState {
+	return {
+		windowWidth: 220,
+		windowHeight: 44,
+		mainPane: {
+			paneId: "%0",
+			width: 110,
+			height: 44,
+			left: 0,
+			top: 0,
+			title: "main",
+			isActive: true,
+		},
+		agentPanes: [],
+		...overrides,
+	}
+}
+
+function createContext(overrides?: Partial<ExecuteContext>): ExecuteContext {
+	return {
+		config: createConfig(),
+		serverUrl: "http://localhost:4096",
+		windowState: createWindowState(),
+		...overrides,
+	}
+}
+
+describe("executeAction", () => {
+	beforeEach(() => {
+		mockSpawnTmuxPane.mockClear()
+		mockCloseTmuxPane.mockClear()
+		mockEnforceMainPaneWidth.mockClear()
+		mockReplaceTmuxPane.mockClear()
+		mockApplyLayout.mockClear()
+		mockSpawnTmuxPane.mockImplementation(async () => ({ success: true, paneId: "%7" }))
+	})
+
+	test("enforces main pane width with configured percentage after successful spawn", async () => {
+		// given
+		// when
+		const result = await executeActionWithDeps(
+			{
+				type: "spawn",
+				sessionId: "ses_new",
+				description: "background task",
+				targetPaneId: "%0",
+				splitDirection: "-h",
+			},
+			createContext(),
+			mockDeps,
+		)
+
+		// then
+		expect(result).toEqual({ success: true, paneId: "%7" })
+		expect(mockApplyLayout).not.toHaveBeenCalled()
+		expect(mockEnforceMainPaneWidth).toHaveBeenCalledTimes(1)
+		expect(mockEnforceMainPaneWidth).toHaveBeenCalledWith("%0", 220, 55)
+	})
+
+	test("does not apply layout when spawn fails", async () => {
+		// given
+		mockSpawnTmuxPane.mockImplementationOnce(async () => ({ success: false }))
+
+		// when
+		const result = await executeActionWithDeps(
+			{
+				type: "spawn",
+				sessionId: "ses_new",
+				description: "background task",
+				targetPaneId: "%0",
+				splitDirection: "-h",
+			},
+			createContext(),
+			mockDeps,
+		)
+
+		// then
+		expect(result).toEqual({ success: false, paneId: undefined })
+		expect(mockApplyLayout).not.toHaveBeenCalled()
+		expect(mockEnforceMainPaneWidth).not.toHaveBeenCalled()
+	})
+})
--- a/src/features/tmux-subagent/action-executor.ts
+++ b/src/features/tmux-subagent/action-executor.ts
@@ -1,13 +1,14 @@
-import type { TmuxConfig } from "../../config/schema"
-import type { PaneAction, WindowState } from "./types"
-import { spawnTmuxPane, closeTmuxPane, enforceMainPaneWidth, replaceTmuxPane } from "../../shared/tmux"
+import type { PaneAction } from "./types"
+import { applyLayout, spawnTmuxPane, closeTmuxPane, enforceMainPaneWidth, replaceTmuxPane } from "../../shared/tmux"
 import { log } from "../../shared"
+import type {
+  ActionExecutorDeps,
+  ActionResult,
+  ExecuteContext,
+} from "./action-executor-core"
+import { executeActionWithDeps } from "./action-executor-core"

-export interface ActionResult {
-  success: boolean
-  paneId?: string
-  error?: string
-}
+export type { ActionExecutorDeps, ActionResult, ExecuteContext } from "./action-executor-core"

 export interface ExecuteActionsResult {
  success: boolean
@@ -15,60 +16,19 @@ export interface ExecuteActionsResult {
  results: Array<{ action: PaneAction; result: ActionResult }>
 }

-export interface ExecuteContext {
-  config: TmuxConfig
-  serverUrl: string
-  windowState: WindowState
-}
-
-async function enforceMainPane(windowState: WindowState): Promise<void> {
-  if (!windowState.mainPane) return
-  await enforceMainPaneWidth(windowState.mainPane.paneId, windowState.windowWidth)
+const DEFAULT_DEPS: ActionExecutorDeps = {
+  spawnTmuxPane,
+  closeTmuxPane,
+  replaceTmuxPane,
+  applyLayout,
+  enforceMainPaneWidth,
 }

 export async function executeAction(
  action: PaneAction,
  ctx: ExecuteContext
 ): Promise<ActionResult> {
-  if (action.type === "close") {
-    const success = await closeTmuxPane(action.paneId)
-    if (success) {
-      await enforceMainPane(ctx.windowState)
-    }
-    return { success }
-  }
-
-  if (action.type === "replace") {
-    const result = await replaceTmuxPane(
-      action.paneId,
-      action.newSessionId,
-      action.description,
-      ctx.config,
-      ctx.serverUrl
-    )
-    return {
-      success: result.success,
-      paneId: result.paneId,
-    }
-  }
-
-  const result = await spawnTmuxPane(
-    action.sessionId,
-    action.description,
-    ctx.config,
-    ctx.serverUrl,
-    action.targetPaneId,
-    action.splitDirection
-  )
-
-  if (result.success) {
-    await enforceMainPane(ctx.windowState)
-  }
-
-  return {
-    success: result.success,
-    paneId: result.paneId,
-  }
+  return executeActionWithDeps(action, ctx, DEFAULT_DEPS)
 }

 export async function executeActions(
--- a/src/features/tmux-subagent/decision-engine.test.ts
+++ b/src/features/tmux-subagent/decision-engine.test.ts
@@ -112,6 +112,21 @@ describe("canSplitPaneAnyDirection", () => {
    // then
    expect(result).toBe(false)
  })
+
+  it("#given custom minPaneWidth #when pane fits smaller width #then returns true", () => {
+    //#given - pane too small for default MIN_PANE_WIDTH(52) but fits custom 30
+    const customMin = 30
+    const customMinSplitW = 2 * customMin + 1
+    const pane = createPane(customMinSplitW, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const defaultResult = canSplitPaneAnyDirection(pane)
+    const customResult = canSplitPaneAnyDirection(pane, customMin)
+
+    //#then
+    expect(defaultResult).toBe(false)
+    expect(customResult).toBe(true)
+  })
 })

 describe("getBestSplitDirection", () => {
@@ -179,6 +194,21 @@ describe("getBestSplitDirection", () => {
    // then
    expect(result).toBe("-v")
  })
+
+  it("#given custom minPaneWidth #when pane width below default but above custom #then returns -h", () => {
+    //#given
+    const customMin = 30
+    const customMinSplitW = 2 * customMin + 1
+    const pane = createPane(customMinSplitW, MIN_SPLIT_HEIGHT - 1)
+
+    //#when
+    const defaultResult = getBestSplitDirection(pane)
+    const customResult = getBestSplitDirection(pane, customMin)
+
+    //#then
+    expect(defaultResult).toBe(null)
+    expect(customResult).toBe("-h")
+  })
 })

 describe("decideSpawnActions", () => {
@@ -228,7 +258,7 @@ describe("decideSpawnActions", () => {
      expect(result.actions[0].type).toBe("spawn")
    })

-    it("closes oldest pane when existing panes are too small to split", () => {
+    it("replaces oldest pane when existing panes are too small to split", () => {
      // given - existing pane is below minimum splittable size
      const state = createWindowState(220, 30, [
        { paneId: "%1", width: 50, height: 15, left: 110, top: 0 },
@@ -242,9 +272,8 @@ describe("decideSpawnActions", () => {

      // then
      expect(result.canSpawn).toBe(true)
-      expect(result.actions.length).toBe(2)
-      expect(result.actions[0].type).toBe("close")
-      expect(result.actions[1].type).toBe("spawn")
+      expect(result.actions.length).toBe(1)
+      expect(result.actions[0].type).toBe("replace")
    })

    it("can spawn when existing pane is large enough to split", () => {
@@ -363,6 +392,20 @@ describe("calculateCapacity", () => {
    //#then
    expect(customCapacity.cols).toBeGreaterThanOrEqual(defaultCapacity.cols)
  })
+
+	it("#given non-50 main pane width #when calculating capacity #then uses real agent area width", () => {
+		//#given
+		const windowWidth = 220
+		const windowHeight = 44
+		const mainPaneWidth = 132
+
+		//#when
+		const capacity = calculateCapacity(windowWidth, windowHeight, 52, mainPaneWidth)
+
+		//#then
+		expect(capacity.cols).toBe(1)
+		expect(capacity.total).toBe(3)
+	})
 })

 describe("decideSpawnActions with custom agentPaneWidth", () => {
@@ -394,4 +437,63 @@ describe("decideSpawnActions with custom agentPaneWidth", () => {
    expect(defaultResult.canSpawn).toBe(false)
    expect(customResult.canSpawn).toBe(true)
  })
+
+  it("#given custom agentPaneWidth and splittable existing pane #when deciding spawn #then uses spawn without eviction", () => {
+    //#given
+    const customConfig: CapacityConfig = { mainPaneMinWidth: 120, agentPaneWidth: 40 }
+    const state = createWindowState(220, 44, [
+      { paneId: "%1", width: 90, height: 30, left: 110, top: 0 },
+    ])
+    const mappings: SessionMapping[] = [
+      { sessionId: "old-ses", paneId: "%1", createdAt: new Date("2024-01-01") },
+    ]
+
+    //#when
+    const result = decideSpawnActions(state, "ses1", "test", customConfig, mappings)
+
+    //#then
+    expect(result.canSpawn).toBe(true)
+    expect(result.actions.length).toBe(1)
+    expect(result.actions[0].type).toBe("spawn")
+    if (result.actions[0].type === "spawn") {
+      expect(result.actions[0].targetPaneId).toBe("%1")
+      expect(result.actions[0].splitDirection).toBe("-h")
+    }
+  })
+
+	it("#given wider main pane #when capacity needs two evictions #then replace is chosen", () => {
+		//#given
+		const config: CapacityConfig = { mainPaneMinWidth: 120, agentPaneWidth: 40 }
+		const state = createWindowState(220, 44, [
+			{ paneId: "%1", width: 43, height: 44, left: 133, top: 0 },
+			{ paneId: "%2", width: 43, height: 44, left: 177, top: 0 },
+			{ paneId: "%3", width: 43, height: 21, left: 133, top: 22 },
+			{ paneId: "%4", width: 43, height: 21, left: 177, top: 22 },
+			{ paneId: "%5", width: 43, height: 21, left: 133, top: 33 },
+		])
+		state.mainPane = {
+			paneId: "%0",
+			width: 132,
+			height: 44,
+			left: 0,
+			top: 0,
+			title: "main",
+			isActive: true,
+		}
+		const mappings: SessionMapping[] = [
+			{ sessionId: "old-1", paneId: "%1", createdAt: new Date("2024-01-01") },
+			{ sessionId: "old-2", paneId: "%2", createdAt: new Date("2024-01-02") },
+			{ sessionId: "old-3", paneId: "%3", createdAt: new Date("2024-01-03") },
+			{ sessionId: "old-4", paneId: "%4", createdAt: new Date("2024-01-04") },
+			{ sessionId: "old-5", paneId: "%5", createdAt: new Date("2024-01-05") },
+		]
+
+		//#when
+		const result = decideSpawnActions(state, "ses-new", "new task", config, mappings)
+
+		//#then
+		expect(result.canSpawn).toBe(true)
+		expect(result.actions).toHaveLength(1)
+		expect(result.actions[0].type).toBe("replace")
+	})
 })
--- a/src/features/tmux-subagent/grid-planning.ts
+++ b/src/features/tmux-subagent/grid-planning.ts
@@ -28,8 +28,12 @@ export function calculateCapacity(
 	windowWidth: number,
 	windowHeight: number,
 	minPaneWidth: number = MIN_PANE_WIDTH,
+	mainPaneWidth?: number,
 ): GridCapacity {
-	const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
+	const availableWidth =
+	typeof mainPaneWidth === "number"
+		? Math.max(0, windowWidth - mainPaneWidth - DIVIDER_SIZE)
+		: Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
 	const cols = Math.min(
 		MAX_GRID_SIZE,
 		Math.max(
@@ -55,8 +59,15 @@ export function computeGridPlan(
 	windowWidth: number,
 	windowHeight: number,
 	paneCount: number,
+	mainPaneWidth?: number,
+	minPaneWidth?: number,
 ): GridPlan {
-	const capacity = calculateCapacity(windowWidth, windowHeight)
+	const capacity = calculateCapacity(
+		windowWidth,
+		windowHeight,
+		minPaneWidth ?? MIN_PANE_WIDTH,
+		mainPaneWidth,
+	)
 	const { cols: maxCols, rows: maxRows } = capacity

 	if (maxCols === 0 || maxRows === 0 || paneCount === 0) {
@@ -79,7 +90,10 @@ export function computeGridPlan(
 		}
 	}

-	const availableWidth = Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
+	const availableWidth =
+	typeof mainPaneWidth === "number"
+		? Math.max(0, windowWidth - mainPaneWidth - DIVIDER_SIZE)
+		: Math.floor(windowWidth * (1 - MAIN_PANE_RATIO))
 	const slotWidth = Math.floor(availableWidth / bestCols)
 	const slotHeight = Math.floor(windowHeight / bestRows)

--- a/src/features/tmux-subagent/manager.test.ts
+++ b/src/features/tmux-subagent/manager.test.ts
@@ -55,6 +55,7 @@ mock.module('./pane-state-querier', () => ({
 mock.module('./action-executor', () => ({
  executeActions: mockExecuteActions,
  executeAction: mockExecuteAction,
+  executeActionWithDeps: mockExecuteAction,
 }))

 mock.module('../../shared/tmux', () => {
@@ -433,6 +434,53 @@ describe('TmuxSessionManager', () => {
  })

  describe('onSessionDeleted', () => {
+    test('does not track session when readiness timed out', async () => {
+      // given
+      mockIsInsideTmux.mockReturnValue(true)
+      let stateCallCount = 0
+      mockQueryWindowState.mockImplementation(async () => {
+        stateCallCount++
+        if (stateCallCount === 1) {
+          return createWindowState()
+        }
+        return createWindowState({
+          agentPanes: [
+            {
+              paneId: '%mock',
+              width: 40,
+              height: 44,
+              left: 100,
+              top: 0,
+              title: 'omo-subagent-Timeout Task',
+              isActive: false,
+            },
+          ],
+        })
+      })
+
+      const { TmuxSessionManager } = await import('./manager')
+      const ctx = createMockContext({ sessionStatusResult: { data: {} } })
+      const config: TmuxConfig = {
+        enabled: true,
+        layout: 'main-vertical',
+        main_pane_size: 60,
+        main_pane_min_width: 80,
+        agent_pane_min_width: 40,
+      }
+      const manager = new TmuxSessionManager(ctx, config, mockTmuxDeps)
+
+      await manager.onSessionCreated(
+        createSessionCreatedEvent('ses_timeout', 'ses_parent', 'Timeout Task')
+      )
+      mockExecuteAction.mockClear()
+
+      // when
+      await manager.onSessionDeleted({ sessionID: 'ses_timeout' })
+
+      // then
+      expect(mockExecuteAction).toHaveBeenCalledTimes(0)
+    })
+
    test('closes pane when tracked session is deleted', async () => {
      // given
      mockIsInsideTmux.mockReturnValue(true)
@@ -520,8 +568,13 @@ describe('TmuxSessionManager', () => {
      mockIsInsideTmux.mockReturnValue(true)

      let callCount = 0
-      mockExecuteActions.mockImplementation(async () => {
+      mockExecuteActions.mockImplementation(async (actions) => {
        callCount++
+        for (const action of actions) {
+          if (action.type === 'spawn') {
+            trackedSessions.add(action.sessionId)
+          }
+        }
        return {
          success: true,
          spawnedPaneId: `%${callCount}`,
--- a/src/features/tmux-subagent/manager.ts
+++ b/src/features/tmux-subagent/manager.ts
@@ -213,10 +213,17 @@ export class TmuxSessionManager {
        const sessionReady = await this.waitForSessionReady(sessionId)
        
        if (!sessionReady) {
-          log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
+          log("[tmux-session-manager] session not ready after timeout, closing spawned pane", {
            sessionId,
            paneId: result.spawnedPaneId,
          })
+
+          await executeAction(
+            { type: "close", paneId: result.spawnedPaneId, sessionId },
+            { config: this.tmuxConfig, serverUrl: this.serverUrl, windowState: state }
+          )
+
+          return
        }
        
        const now = Date.now()
--- a/src/features/tmux-subagent/pane-split-availability.ts
+++ b/src/features/tmux-subagent/pane-split-availability.ts
@@ -56,12 +56,22 @@ export function canSplitPane(
 	return pane.height >= MIN_SPLIT_HEIGHT
 }

-export function canSplitPaneAnyDirection(pane: TmuxPaneInfo): boolean {
-	return pane.width >= MIN_SPLIT_WIDTH || pane.height >= MIN_SPLIT_HEIGHT
+export function canSplitPaneAnyDirection(pane: TmuxPaneInfo, minPaneWidth: number = MIN_PANE_WIDTH): boolean {
+	return canSplitPaneAnyDirectionWithMinWidth(pane, minPaneWidth)
 }

-export function getBestSplitDirection(pane: TmuxPaneInfo): SplitDirection | null {
-	const canH = pane.width >= MIN_SPLIT_WIDTH
+export function canSplitPaneAnyDirectionWithMinWidth(
+	pane: TmuxPaneInfo,
+	minPaneWidth: number = MIN_PANE_WIDTH,
+): boolean {
+	return pane.width >= minSplitWidthFor(minPaneWidth) || pane.height >= MIN_SPLIT_HEIGHT
+}
+
+export function getBestSplitDirection(
+	pane: TmuxPaneInfo,
+	minPaneWidth: number = MIN_PANE_WIDTH,
+): SplitDirection | null {
+	const canH = pane.width >= minSplitWidthFor(minPaneWidth)
 	const canV = pane.height >= MIN_SPLIT_HEIGHT

 	if (!canH && !canV) return null
--- a/src/features/tmux-subagent/polling-manager.test.ts
+++ b/src/features/tmux-subagent/polling-manager.test.ts
@@ -0,0 +1,56 @@
+import { describe, test, expect } from "bun:test"
+import { TmuxPollingManager } from "./polling-manager"
+import type { TrackedSession } from "./types"
+
+describe("TmuxPollingManager overlap", () => {
+  test("skips overlapping pollSessions executions", async () => {
+    //#given
+    const sessions = new Map<string, TrackedSession>()
+    sessions.set("ses-1", {
+      sessionId: "ses-1",
+      paneId: "%1",
+      description: "test",
+      createdAt: new Date(),
+      lastSeenAt: new Date(),
+    })
+
+    let activeCalls = 0
+    let maxActiveCalls = 0
+    let statusCallCount = 0
+    let releaseStatus: (() => void) | undefined
+    const statusGate = new Promise<void>((resolve) => {
+      releaseStatus = resolve
+    })
+
+    const client = {
+      session: {
+        status: async () => {
+          statusCallCount += 1
+          activeCalls += 1
+          maxActiveCalls = Math.max(maxActiveCalls, activeCalls)
+          await statusGate
+          activeCalls -= 1
+          return { data: { "ses-1": { type: "running" } } }
+        },
+        messages: async () => ({ data: [] }),
+      },
+    }
+
+    const manager = new TmuxPollingManager(
+      client as unknown as import("../../tools/delegate-task/types").OpencodeClient,
+      sessions,
+      async () => {},
+    )
+
+    //#when
+    const firstPoll = (manager as unknown as { pollSessions: () => Promise<void> }).pollSessions()
+    await Promise.resolve()
+    const secondPoll = (manager as unknown as { pollSessions: () => Promise<void> }).pollSessions()
+    releaseStatus?.()
+    await Promise.all([firstPoll, secondPoll])
+
+    //#then
+    expect(maxActiveCalls).toBe(1)
+    expect(statusCallCount).toBe(1)
+  })
+})
--- a/src/features/tmux-subagent/polling-manager.ts
+++ b/src/features/tmux-subagent/polling-manager.ts
@@ -11,6 +11,7 @@ const STABLE_POLLS_REQUIRED = 3

 export class TmuxPollingManager {
  private pollInterval?: ReturnType<typeof setInterval>
+  private pollingInFlight = false

  constructor(
    private client: OpencodeClient,
@@ -37,12 +38,14 @@ export class TmuxPollingManager {
  }

  private async pollSessions(): Promise<void> {
-    if (this.sessions.size === 0) {
-      this.stopPolling()
-      return
-    }
-
+    if (this.pollingInFlight) return
+    this.pollingInFlight = true
    try {
+      if (this.sessions.size === 0) {
+        this.stopPolling()
+        return
+      }
+
      const statusResult = await this.client.session.status({ path: undefined })
      const allStatuses = normalizeSDKResponse(statusResult, {} as Record<string, { type: string }>)

@@ -135,6 +138,8 @@ export class TmuxPollingManager {
      }
    } catch (err) {
      log("[tmux-session-manager] poll error", { error: String(err) })
+    } finally {
+      this.pollingInFlight = false
    }
  }
 }
--- a/src/features/tmux-subagent/session-created-handler.ts
+++ b/src/features/tmux-subagent/session-created-handler.ts
@@ -135,10 +135,21 @@ export async function handleSessionCreated(

    const sessionReady = await deps.waitForSessionReady(sessionId)
    if (!sessionReady) {
-      log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
+      log("[tmux-session-manager] session not ready after timeout, closing spawned pane", {
        sessionId,
        paneId: result.spawnedPaneId,
      })
+
+      await executeActions(
+        [{ type: "close", paneId: result.spawnedPaneId, sessionId }],
+        {
+          config: deps.tmuxConfig,
+          serverUrl: deps.serverUrl,
+          windowState: state,
+        },
+      )
+
+      return
    }

    const now = Date.now()
--- a/src/features/tmux-subagent/session-spawner.ts
+++ b/src/features/tmux-subagent/session-spawner.ts
@@ -129,10 +129,21 @@ export class SessionSpawner {
        const sessionReady = await this.waitForSessionReady(sessionId)
        
        if (!sessionReady) {
-          log("[tmux-session-manager] session not ready after timeout, tracking anyway", {
+          log("[tmux-session-manager] session not ready after timeout, closing spawned pane", {
            sessionId,
            paneId: result.spawnedPaneId,
          })
+
+          await executeActions(
+            [{ type: "close", paneId: result.spawnedPaneId, sessionId }],
+            {
+              config: this.tmuxConfig,
+              serverUrl: this.serverUrl,
+              windowState: state,
+            },
+          )
+
+          return
        }
        
        const now = Date.now()
--- a/src/features/tmux-subagent/spawn-action-decider.ts
+++ b/src/features/tmux-subagent/spawn-action-decider.ts
@@ -5,7 +5,7 @@ import type {
 	TmuxPaneInfo,
 	WindowState,
 } from "./types"
-import { MAIN_PANE_RATIO } from "./tmux-grid-constants"
+import { DIVIDER_SIZE } from "./tmux-grid-constants"
 import {
 	canSplitPane,
 	findMinimalEvictions,
@@ -26,7 +26,10 @@ export function decideSpawnActions(
 	}

 	const minPaneWidth = config.agentPaneWidth
-	const agentAreaWidth = Math.floor(state.windowWidth * (1 - MAIN_PANE_RATIO))
+	const agentAreaWidth = Math.max(
+		0,
+		state.windowWidth - state.mainPane.width - DIVIDER_SIZE,
+	)
 	const currentCount = state.agentPanes.length

 	if (agentAreaWidth < minPaneWidth) {
@@ -62,7 +65,7 @@ export function decideSpawnActions(
 	}

 	if (isSplittableAtCount(agentAreaWidth, currentCount, minPaneWidth)) {
-		const spawnTarget = findSpawnTarget(state)
+		const spawnTarget = findSpawnTarget(state, minPaneWidth)
 		if (spawnTarget) {
 			return {
 				canSpawn: true,
@@ -85,19 +88,14 @@ export function decideSpawnActions(
 			canSpawn: true,
 			actions: [
 				{
-					type: "close",
+					type: "replace",
 					paneId: oldestPane.paneId,
-					sessionId: oldestMapping?.sessionId || "",
-				},
-				{
-					type: "spawn",
-					sessionId,
+					oldSessionId: oldestMapping?.sessionId || "",
+					newSessionId: sessionId,
 					description,
-					targetPaneId: state.mainPane.paneId,
-					splitDirection: "-h",
 				},
 			],
-			reason: "closed 1 pane to make room for split",
+			reason: "replaced oldest pane to avoid split churn",
 		}
 	}

--- a/src/features/tmux-subagent/spawn-target-finder.ts
+++ b/src/features/tmux-subagent/spawn-target-finder.ts
@@ -1,7 +1,7 @@
 import type { SplitDirection, TmuxPaneInfo, WindowState } from "./types"
-import { MAIN_PANE_RATIO } from "./tmux-grid-constants"
 import { computeGridPlan, mapPaneToSlot } from "./grid-planning"
 import { canSplitPane, getBestSplitDirection } from "./pane-split-availability"
+import { MIN_PANE_WIDTH } from "./types"

 export interface SpawnTarget {
 	targetPaneId: string
@@ -37,6 +37,7 @@ function findFirstEmptySlot(

 function findSplittableTarget(
 	state: WindowState,
+	minPaneWidth: number,
 	_preferredDirection?: SplitDirection,
 ): SpawnTarget | null {
 	if (!state.mainPane) return null
@@ -44,29 +45,35 @@ function findSplittableTarget(

 	if (existingCount === 0) {
 		const virtualMainPane: TmuxPaneInfo = { ...state.mainPane, width: state.windowWidth }
-		if (canSplitPane(virtualMainPane, "-h")) {
+		if (canSplitPane(virtualMainPane, "-h", minPaneWidth)) {
 			return { targetPaneId: state.mainPane.paneId, splitDirection: "-h" }
 		}
 		return null
 	}

-	const plan = computeGridPlan(state.windowWidth, state.windowHeight, existingCount + 1)
-	const mainPaneWidth = Math.floor(state.windowWidth * MAIN_PANE_RATIO)
+	const plan = computeGridPlan(
+		state.windowWidth,
+		state.windowHeight,
+		existingCount + 1,
+		state.mainPane.width,
+		minPaneWidth,
+	)
+	const mainPaneWidth = state.mainPane.width
 	const occupancy = buildOccupancy(state.agentPanes, plan, mainPaneWidth)
 	const targetSlot = findFirstEmptySlot(occupancy, plan)

 	const leftPane = occupancy.get(`${targetSlot.row}:${targetSlot.col - 1}`)
-	if (leftPane && canSplitPane(leftPane, "-h")) {
+	if (leftPane && canSplitPane(leftPane, "-h", minPaneWidth)) {
 		return { targetPaneId: leftPane.paneId, splitDirection: "-h" }
 	}

 	const abovePane = occupancy.get(`${targetSlot.row - 1}:${targetSlot.col}`)
-	if (abovePane && canSplitPane(abovePane, "-v")) {
+	if (abovePane && canSplitPane(abovePane, "-v", minPaneWidth)) {
 		return { targetPaneId: abovePane.paneId, splitDirection: "-v" }
 	}

 	const splittablePanes = state.agentPanes
-		.map((pane) => ({ pane, direction: getBestSplitDirection(pane) }))
+		.map((pane) => ({ pane, direction: getBestSplitDirection(pane, minPaneWidth) }))
 		.filter(
 			(item): item is { pane: TmuxPaneInfo; direction: SplitDirection } =>
 				item.direction !== null,
@@ -81,6 +88,9 @@ function findSplittableTarget(
 	return null
 }

-export function findSpawnTarget(state: WindowState): SpawnTarget | null {
-	return findSplittableTarget(state)
+export function findSpawnTarget(
+	state: WindowState,
+	minPaneWidth: number = MIN_PANE_WIDTH,
+): SpawnTarget | null {
+	return findSplittableTarget(state, minPaneWidth)
 }
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-deduplication.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-deduplication.test.ts
@@ -1,6 +1,7 @@
-import { describe, test, expect, mock, beforeEach } from "bun:test"
+import { describe, test, expect, mock, beforeEach, afterAll } from "bun:test"
 import type { PluginInput } from "@opencode-ai/plugin"
 import type { ExperimentalConfig } from "../../config"
+import * as originalDeduplicationRecovery from "./deduplication-recovery"

 const attemptDeduplicationRecoveryMock = mock(async () => {})

@@ -8,6 +9,10 @@ mock.module("./deduplication-recovery", () => ({
  attemptDeduplicationRecovery: attemptDeduplicationRecoveryMock,
 }))

+afterAll(() => {
+  mock.module("./deduplication-recovery", () => originalDeduplicationRecovery)
+})
+
 function createImmediateTimeouts(): () => void {
  const originalSetTimeout = globalThis.setTimeout
  const originalClearTimeout = globalThis.clearTimeout
--- a/src/hooks/anthropic-context-window-limit-recovery/storage.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/storage.test.ts
@@ -1,4 +1,4 @@
-import { describe, test, expect, mock, beforeEach } from "bun:test"
+import { describe, test, expect, mock, beforeEach, afterAll } from "bun:test"
 import { truncateUntilTargetTokens } from "./storage"
 import * as storage from "./storage"

@@ -11,6 +11,10 @@ mock.module("./storage", () => {
  }
 })

+afterAll(() => {
+  mock.module("./storage", () => storage)
+})
+
 describe("truncateUntilTargetTokens", () => {
  const sessionID = "test-session"
  
--- a/src/hooks/anthropic-effort/hook.ts
+++ b/src/hooks/anthropic-effort/hook.ts
@@ -7,7 +7,7 @@ function normalizeModelID(modelID: string): string {
 }

 function isClaudeProvider(providerID: string, modelID: string): boolean {
-  if (["anthropic", "opencode"].includes(providerID)) return true
+  if (["anthropic", "google-vertex-anthropic", "opencode"].includes(providerID)) return true
  if (providerID === "github-copilot" && modelID.toLowerCase().includes("claude")) return true
  return false
 }
--- a/src/hooks/anthropic-effort/index.test.ts
+++ b/src/hooks/anthropic-effort/index.test.ts
@@ -88,6 +88,21 @@ describe("createAnthropicEffortHook", () => {
      expect(output.options.effort).toBe("max")
    })

+    it("should inject effort max for google-vertex-anthropic provider", async () => {
+      //#given google-vertex-anthropic provider with claude-opus-4-6
+      const hook = createAnthropicEffortHook()
+      const { input, output } = createMockParams({
+        providerID: "google-vertex-anthropic",
+        modelID: "claude-opus-4-6",
+      })
+
+      //#when chat.params hook is called
+      await hook["chat.params"](input, output)
+
+      //#then effort should be injected
+      expect(output.options.effort).toBe("max")
+    })
+
    it("should handle normalized model ID with dots (opus-4.6)", async () => {
      //#given model ID with dots instead of hyphens
      const hook = createAnthropicEffortHook()
--- a/src/hooks/auto-update-checker/hook.test.ts
+++ b/src/hooks/auto-update-checker/hook.test.ts
@@ -0,0 +1,83 @@
+import { describe, it, expect, mock } from "bun:test"
+
+const mockShowConfigErrorsIfAny = mock(async () => {})
+const mockShowModelCacheWarningIfNeeded = mock(async () => {})
+const mockUpdateAndShowConnectedProvidersCacheStatus = mock(async () => {})
+const mockShowLocalDevToast = mock(async () => {})
+const mockShowVersionToast = mock(async () => {})
+const mockRunBackgroundUpdateCheck = mock(async () => {})
+const mockGetCachedVersion = mock(() => "3.6.0")
+const mockGetLocalDevVersion = mock(() => "3.6.0")
+
+mock.module("./hook/config-errors-toast", () => ({
+  showConfigErrorsIfAny: mockShowConfigErrorsIfAny,
+}))
+
+mock.module("./hook/model-cache-warning", () => ({
+  showModelCacheWarningIfNeeded: mockShowModelCacheWarningIfNeeded,
+}))
+
+mock.module("./hook/connected-providers-status", () => ({
+  updateAndShowConnectedProvidersCacheStatus:
+    mockUpdateAndShowConnectedProvidersCacheStatus,
+}))
+
+mock.module("./hook/startup-toasts", () => ({
+  showLocalDevToast: mockShowLocalDevToast,
+  showVersionToast: mockShowVersionToast,
+}))
+
+mock.module("./hook/background-update-check", () => ({
+  runBackgroundUpdateCheck: mockRunBackgroundUpdateCheck,
+}))
+
+mock.module("./checker", () => ({
+  getCachedVersion: mockGetCachedVersion,
+  getLocalDevVersion: mockGetLocalDevVersion,
+}))
+
+mock.module("../../shared/logger", () => ({
+  log: () => {},
+}))
+
+const { createAutoUpdateCheckerHook } = await import("./hook")
+
+describe("createAutoUpdateCheckerHook", () => {
+  it("skips startup toasts and checks in CLI run mode", async () => {
+    //#given - CLI run mode enabled
+    process.env.OPENCODE_CLI_RUN_MODE = "true"
+    mockShowConfigErrorsIfAny.mockClear()
+    mockShowModelCacheWarningIfNeeded.mockClear()
+    mockUpdateAndShowConnectedProvidersCacheStatus.mockClear()
+    mockShowLocalDevToast.mockClear()
+    mockShowVersionToast.mockClear()
+    mockRunBackgroundUpdateCheck.mockClear()
+
+    const hook = createAutoUpdateCheckerHook(
+      {
+        directory: "/test",
+        client: {} as never,
+      } as never,
+      { showStartupToast: true, isSisyphusEnabled: true, autoUpdate: true }
+    )
+
+    //#when - session.created event arrives
+    hook.event({
+      event: {
+        type: "session.created",
+        properties: { info: { parentID: undefined } },
+      },
+    })
+    await new Promise((resolve) => setTimeout(resolve, 25))
+
+    //#then - no update checker side effects run
+    expect(mockShowConfigErrorsIfAny).not.toHaveBeenCalled()
+    expect(mockShowModelCacheWarningIfNeeded).not.toHaveBeenCalled()
+    expect(mockUpdateAndShowConnectedProvidersCacheStatus).not.toHaveBeenCalled()
+    expect(mockShowLocalDevToast).not.toHaveBeenCalled()
+    expect(mockShowVersionToast).not.toHaveBeenCalled()
+    expect(mockRunBackgroundUpdateCheck).not.toHaveBeenCalled()
+
+    delete process.env.OPENCODE_CLI_RUN_MODE
+  })
+})
--- a/src/hooks/auto-update-checker/hook.ts
+++ b/src/hooks/auto-update-checker/hook.ts
@@ -10,6 +10,7 @@ import { showLocalDevToast, showVersionToast } from "./hook/startup-toasts"

 export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdateCheckerOptions = {}) {
  const { showStartupToast = true, isSisyphusEnabled = false, autoUpdate = true } = options
+  const isCliRunMode = process.env.OPENCODE_CLI_RUN_MODE === "true"

  const getToastMessage = (isUpdate: boolean, latestVersion?: string): string => {
    if (isSisyphusEnabled) {
@@ -27,6 +28,7 @@ export function createAutoUpdateCheckerHook(ctx: PluginInput, options: AutoUpdat
  return {
    event: ({ event }: { event: { type: string; properties?: unknown } }) => {
      if (event.type !== "session.created") return
+      if (isCliRunMode) return
      if (hasChecked) return

      const props = event.properties as { info?: { parentID?: string } } | undefined
--- a/src/hooks/comment-checker/pending-calls.test.ts
+++ b/src/hooks/comment-checker/pending-calls.test.ts
@@ -0,0 +1,38 @@
+import { describe, test, expect } from "bun:test"
+
+describe("pending-calls cleanup interval", () => {
+  test("starts cleanup once and unrefs timer", async () => {
+    //#given
+    const originalSetInterval = globalThis.setInterval
+    const setIntervalCalls: number[] = []
+    let unrefCalled = 0
+
+    globalThis.setInterval = ((
+      _handler: TimerHandler,
+      timeout?: number,
+      ..._args: any[]
+    ) => {
+      setIntervalCalls.push(timeout as number)
+      return {
+        unref: () => {
+          unrefCalled += 1
+        },
+      } as unknown as ReturnType<typeof setInterval>
+    }) as unknown as typeof setInterval
+
+    try {
+      const modulePath = new URL("./pending-calls.ts", import.meta.url).pathname
+      const pendingCallsModule = await import(`${modulePath}?pending-calls-test-once`)
+
+      //#when
+      pendingCallsModule.startPendingCallCleanup()
+      pendingCallsModule.startPendingCallCleanup()
+
+      //#then
+      expect(setIntervalCalls).toEqual([10_000])
+      expect(unrefCalled).toBe(1)
+    } finally {
+      globalThis.setInterval = originalSetInterval
+    }
+  })
+})
--- a/src/hooks/comment-checker/pending-calls.ts
+++ b/src/hooks/comment-checker/pending-calls.ts
@@ -4,6 +4,7 @@ const pendingCalls = new Map<string, PendingCall>()
 const PENDING_CALL_TTL = 60_000

 let cleanupIntervalStarted = false
+let cleanupInterval: ReturnType<typeof setInterval> | undefined

 function cleanupOldPendingCalls(): void {
  const now = Date.now()
@@ -17,7 +18,10 @@ function cleanupOldPendingCalls(): void {
 export function startPendingCallCleanup(): void {
  if (cleanupIntervalStarted) return
  cleanupIntervalStarted = true
-  setInterval(cleanupOldPendingCalls, 10_000)
+  cleanupInterval = setInterval(cleanupOldPendingCalls, 10_000)
+  if (typeof cleanupInterval === "object" && "unref" in cleanupInterval) {
+    cleanupInterval.unref()
+  }
 }

 export function registerPendingCall(callID: string, pendingCall: PendingCall): void {
--- a/src/hooks/compaction-todo-preserver/index.test.ts
+++ b/src/hooks/compaction-todo-preserver/index.test.ts
@@ -1,5 +1,7 @@
-import { describe, expect, it, mock } from "bun:test"
+import { describe, expect, it, afterAll, mock } from "bun:test"
 import type { PluginInput } from "@opencode-ai/plugin"
+import { createOpencodeClient } from "@opencode-ai/sdk"
+import type { Todo } from "@opencode-ai/sdk"
 import { createCompactionTodoPreserverHook } from "./index"

 const updateMock = mock(async () => {})
@@ -10,27 +12,37 @@ mock.module("opencode/session/todo", () => ({
  },
 }))

-type TodoSnapshot = {
-  id: string
-  content: string
-  status: "pending" | "in_progress" | "completed" | "cancelled"
-  priority?: "low" | "medium" | "high"
-}
-
-function createMockContext(todoResponses: TodoSnapshot[][]): PluginInput {
-  let callIndex = 0
-  return {
-    client: {
-      session: {
-        todo: async () => {
-          const current = todoResponses[Math.min(callIndex, todoResponses.length - 1)] ?? []
-          callIndex += 1
-          return { data: current }
-        },
-      },
+afterAll(() => {
+  mock.module("opencode/session/todo", () => ({
+    Todo: {
+      update: async () => {},
    },
+  }))
+})
+
+function createMockContext(todoResponses: Array<Todo>[]): PluginInput {
+  let callIndex = 0
+
+  const client = createOpencodeClient({ directory: "/tmp/test" })
+  type SessionTodoOptions = Parameters<typeof client.session.todo>[0]
+  type SessionTodoResult = ReturnType<typeof client.session.todo>
+
+  const request = new Request("http://localhost")
+  const response = new Response()
+  client.session.todo = mock((_: SessionTodoOptions): SessionTodoResult => {
+    const current = todoResponses[Math.min(callIndex, todoResponses.length - 1)] ?? []
+    callIndex += 1
+    return Promise.resolve({ data: current, error: undefined, request, response })
+  })
+
+  return {
+    client,
+    project: { id: "test-project", worktree: "/tmp/test", time: { created: Date.now() } },
    directory: "/tmp/test",
-  } as PluginInput
+    worktree: "/tmp/test",
+    serverUrl: new URL("http://localhost"),
+    $: Bun.$,
+  }
 }

 describe("compaction-todo-preserver", () => {
@@ -38,7 +50,7 @@ describe("compaction-todo-preserver", () => {
    //#given
    updateMock.mockClear()
    const sessionID = "session-compaction-missing"
-    const todos = [
+    const todos: Todo[] = [
      { id: "1", content: "Task 1", status: "pending", priority: "high" },
      { id: "2", content: "Task 2", status: "in_progress", priority: "medium" },
    ]
@@ -58,7 +70,7 @@ describe("compaction-todo-preserver", () => {
    //#given
    updateMock.mockClear()
    const sessionID = "session-compaction-present"
-    const todos = [
+    const todos: Todo[] = [
      { id: "1", content: "Task 1", status: "pending", priority: "high" },
    ]
    const ctx = createMockContext([todos, todos])
--- a/src/hooks/context-window-monitor.test.ts
+++ b/src/hooks/context-window-monitor.test.ts
@@ -113,6 +113,42 @@ describe("context-window-monitor", () => {
    expect(ctx.client.session.messages).not.toHaveBeenCalled()
  })

+  it("should append context reminder for google-vertex-anthropic provider", async () => {
+    //#given cached usage for google-vertex-anthropic above threshold
+    const hook = createContextWindowMonitorHook(ctx as never)
+    const sessionID = "ses_vertex_anthropic_high_usage"
+
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "google-vertex-anthropic",
+            finish: true,
+            tokens: {
+              input: 150000,
+              output: 1000,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    //#when tool.execute.after runs
+    const output = { title: "", output: "original", metadata: null }
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      output
+    )
+
+    //#then context reminder should be appended
+    expect(output.output).toContain("context remaining")
+  })
+
  // #given session is deleted
  // #when session.deleted event fires
  // #then cached data should be cleaned up
--- a/src/hooks/context-window-monitor.ts
+++ b/src/hooks/context-window-monitor.ts
@@ -27,6 +27,10 @@ interface CachedTokenState {
  tokens: TokenInfo
 }

+function isAnthropicProvider(providerID: string): boolean {
+  return providerID === "anthropic" || providerID === "google-vertex-anthropic"
+}
+
 export function createContextWindowMonitorHook(_ctx: PluginInput) {
  const remindedSessions = new Set<string>()
  const tokenCache = new Map<string, CachedTokenState>()
@@ -42,7 +46,7 @@ export function createContextWindowMonitorHook(_ctx: PluginInput) {
    const cached = tokenCache.get(sessionID)
    if (!cached) return

-    if (cached.providerID !== "anthropic") return
+    if (!isAnthropicProvider(cached.providerID)) return

    const lastTokens = cached.tokens
    const totalInputTokens = (lastTokens?.input ?? 0) + (lastTokens?.cache?.read ?? 0)
--- a/src/hooks/hashline-read-enhancer/hook.ts
+++ b/src/hooks/hashline-read-enhancer/hook.ts
@@ -0,0 +1,66 @@
+import type { PluginInput } from "@opencode-ai/plugin"
+import { computeLineHash } from "../../tools/hashline-edit/hash-computation"
+
+interface HashlineReadEnhancerConfig {
+  hashline_edit?: { enabled: boolean }
+}
+
+const READ_LINE_PATTERN = /^(\d+): (.*)$/
+
+function isReadTool(toolName: string): boolean {
+  return toolName.toLowerCase() === "read"
+}
+
+function shouldProcess(config: HashlineReadEnhancerConfig): boolean {
+  return config.hashline_edit?.enabled ?? false
+}
+
+function isTextFile(output: string): boolean {
+  const firstLine = output.split("\n")[0] ?? ""
+  return READ_LINE_PATTERN.test(firstLine)
+}
+
+function transformLine(line: string): string {
+  const match = READ_LINE_PATTERN.exec(line)
+  if (!match) {
+    return line
+  }
+  const lineNumber = parseInt(match[1], 10)
+  const content = match[2]
+  const hash = computeLineHash(lineNumber, content)
+  return `${lineNumber}:${hash}|${content}`
+}
+
+function transformOutput(output: string): string {
+  if (!output) {
+    return output
+  }
+  if (!isTextFile(output)) {
+    return output
+  }
+  const lines = output.split("\n")
+  return lines.map(transformLine).join("\n")
+}
+
+export function createHashlineReadEnhancerHook(
+  _ctx: PluginInput,
+  config: HashlineReadEnhancerConfig
+) {
+  return {
+    "tool.execute.after": async (
+      input: { tool: string; sessionID: string; callID: string },
+      output: { title: string; output: string; metadata: unknown }
+    ) => {
+      if (!isReadTool(input.tool)) {
+        return
+      }
+      if (typeof output.output !== "string") {
+        return
+      }
+      if (!shouldProcess(config)) {
+        return
+      }
+      output.output = transformOutput(output.output)
+    },
+  }
+}
--- a/src/hooks/hashline-read-enhancer/index.test.ts
+++ b/src/hooks/hashline-read-enhancer/index.test.ts
@@ -0,0 +1,248 @@
+import { describe, it, expect, beforeEach } from "bun:test"
+import { createHashlineReadEnhancerHook } from "./hook"
+import type { PluginInput } from "@opencode-ai/plugin"
+
+//#given - Test setup helpers
+function createMockContext(): PluginInput {
+  return {
+    client: {} as unknown as PluginInput["client"],
+    directory: "/test",
+  }
+}
+
+interface TestConfig {
+  hashline_edit?: { enabled: boolean }
+}
+
+function createMockConfig(enabled: boolean): TestConfig {
+  return {
+    hashline_edit: { enabled },
+  }
+}
+
+describe("createHashlineReadEnhancerHook", () => {
+  let mockCtx: PluginInput
+  const sessionID = "test-session-123"
+
+  beforeEach(() => {
+    mockCtx = createMockContext()
+  })
+
+  describe("tool name matching", () => {
+    it("should process 'read' tool (lowercase)", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: hello\n2: world", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toContain("1:")
+      expect(output.output).toContain("|")
+    })
+
+    it("should process 'Read' tool (mixed case)", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "Read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: hello\n2: world", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toContain("|")
+    })
+
+    it("should process 'READ' tool (uppercase)", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "READ", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: hello\n2: world", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toContain("|")
+    })
+
+    it("should skip non-read tools", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "edit", sessionID, callID: "call-1" }
+      const originalOutput = "1: hello\n2: world"
+      const output = { title: "Edit", output: originalOutput, metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBe(originalOutput)
+    })
+  })
+
+  describe("config flag check", () => {
+    it("should skip when hashline_edit is disabled", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(false))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const originalOutput = "1: hello\n2: world"
+      const output = { title: "Read", output: originalOutput, metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBe(originalOutput)
+    })
+
+    it("should skip when hashline_edit config is missing", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, {})
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const originalOutput = "1: hello\n2: world"
+      const output = { title: "Read", output: originalOutput, metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBe(originalOutput)
+    })
+  })
+
+  describe("output transformation", () => {
+    it("should transform 'N: content' format to 'N:HASH|content'", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: function hello() {\n2:   console.log('world')\n3: }", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      const lines = output.output.split("\n")
+      expect(lines[0]).toMatch(/^1:[a-f0-9]{2}\|function hello\(\) \{$/)
+      expect(lines[1]).toMatch(/^2:[a-f0-9]{2}\|  console\.log\('world'\)$/)
+      expect(lines[2]).toMatch(/^3:[a-f0-9]{2}\|\}$/)
+    })
+
+    it("should handle empty output", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBe("")
+    })
+
+    it("should handle single line", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: const x = 1", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toMatch(/^1:[a-f0-9]{2}\|const x = 1$/)
+    })
+  })
+
+  describe("binary file detection", () => {
+    it("should skip binary files (no line number prefix)", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const originalOutput = "PNG\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"
+      const output = { title: "Read", output: originalOutput, metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBe(originalOutput)
+    })
+
+    it("should skip if first line doesn't match pattern", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const originalOutput = "some binary data\nmore data"
+      const output = { title: "Read", output: originalOutput, metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBe(originalOutput)
+    })
+
+    it("should process if first line matches 'N: ' pattern", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: valid line\n2: another line", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toContain("|")
+    })
+  })
+
+  describe("edge cases", () => {
+    it("should handle non-string output gracefully", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: null as unknown as string, metadata: {} }
+
+      //#when - should not throw
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toBeNull()
+    })
+
+    it("should handle lines with no content after colon", async () => {
+      //#given
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: "1: hello\n2: \n3: world", metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      const lines = output.output.split("\n")
+      expect(lines[0]).toMatch(/^1:[a-f0-9]{2}\|hello$/)
+      expect(lines[1]).toMatch(/^2:[a-f0-9]{2}\|$/)
+      expect(lines[2]).toMatch(/^3:[a-f0-9]{2}\|world$/)
+    })
+
+    it("should handle very long lines", async () => {
+      //#given
+      const longContent = "a".repeat(1000)
+      const hook = createHashlineReadEnhancerHook(mockCtx, createMockConfig(true))
+      const input = { tool: "read", sessionID, callID: "call-1" }
+      const output = { title: "Read", output: `1: ${longContent}`, metadata: {} }
+
+      //#when
+      await hook["tool.execute.after"](input, output)
+
+      //#then
+      expect(output.output).toMatch(/^1:[a-f0-9]{2}\|a+$/)
+    })
+  })
+})
--- a/src/hooks/hashline-read-enhancer/index.ts
+++ b/src/hooks/hashline-read-enhancer/index.ts
@@ -0,0 +1 @@
+export { createHashlineReadEnhancerHook } from "./hook"
--- a/src/hooks/index.ts
+++ b/src/hooks/index.ts
@@ -43,3 +43,4 @@ export { createUnstableAgentBabysitterHook } from "./unstable-agent-babysitter";
 export { createPreemptiveCompactionHook } from "./preemptive-compaction";
 export { createTasksTodowriteDisablerHook } from "./tasks-todowrite-disabler";
 export { createWriteExistingFileGuardHook } from "./write-existing-file-guard";
+export { createHashlineReadEnhancerHook } from "./hashline-read-enhancer";
--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -123,6 +123,43 @@ describe("preemptive-compaction", () => {
    expect(ctx.client.session.summarize).toHaveBeenCalled()
  })

+  it("should trigger compaction for google-vertex-anthropic provider", async () => {
+    //#given google-vertex-anthropic usage above threshold
+    const hook = createPreemptiveCompactionHook(ctx as never)
+    const sessionID = "ses_vertex_anthropic_high"
+
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "google-vertex-anthropic",
+            modelID: "claude-sonnet-4-5",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 1000,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    //#when tool.execute.after runs
+    const output = { title: "", output: "test", metadata: null }
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_1" },
+      output
+    )
+
+    //#then summarize should be triggered
+    expect(ctx.client.session.summarize).toHaveBeenCalled()
+  })
+
  // #given session deleted
  // #then cache should be cleaned up
  it("should clean up cache on session.deleted", async () => {
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -23,6 +23,10 @@ interface CachedCompactionState {
  tokens: TokenInfo
 }

+function isAnthropicProvider(providerID: string): boolean {
+  return providerID === "anthropic" || providerID === "google-vertex-anthropic"
+}
+
 type PluginInput = {
  client: {
    session: {
@@ -55,7 +59,7 @@ export function createPreemptiveCompactionHook(ctx: PluginInput) {
    if (!cached) return

    const actualLimit =
-      cached.providerID === "anthropic"
+      isAnthropicProvider(cached.providerID)
        ? ANTHROPIC_ACTUAL_LIMIT
        : DEFAULT_ACTUAL_LIMIT

--- a/src/hooks/think-mode/index.test.ts
+++ b/src/hooks/think-mode/index.test.ts
@@ -214,6 +214,27 @@ describe("createThinkModeHook integration", () => {
      expect(message.thinking).toBeDefined()
    })

+    it("should work for direct google-vertex-anthropic provider", async () => {
+      //#given direct google-vertex-anthropic provider
+      const hook = createThinkModeHook()
+      const input = createMockInput(
+        "google-vertex-anthropic",
+        "claude-opus-4-6",
+        "think deeply"
+      )
+
+      //#when the chat.params hook is called
+      await hook["chat.params"](input, sessionID)
+
+      //#then should upgrade model and inject Claude thinking config
+      const message = input.message as MessageWithInjectedProps
+      expect(input.message.model?.modelID).toBe("claude-opus-4-6-high")
+      expect(message.thinking).toBeDefined()
+      expect((message.thinking as Record<string, unknown>)?.budgetTokens).toBe(
+        64000
+      )
+    })
+
    it("should still work for direct google provider", async () => {
      // given direct google provider
      const hook = createThinkModeHook()
--- a/src/hooks/think-mode/switcher.test.ts
+++ b/src/hooks/think-mode/switcher.test.ts
@@ -266,6 +266,24 @@ describe("think-mode switcher", () => {
      expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
    })

+    it("should work for direct google-vertex-anthropic provider", () => {
+      //#given direct google-vertex-anthropic provider
+      const config = getThinkingConfig(
+        "google-vertex-anthropic",
+        "claude-opus-4-6"
+      )
+
+      //#when thinking config is resolved
+
+      //#then it should return anthropic-style thinking config
+      expect(config).not.toBeNull()
+      expect(config?.thinking).toBeDefined()
+      expect((config?.thinking as Record<string, unknown>)?.type).toBe("enabled")
+      expect((config?.thinking as Record<string, unknown>)?.budgetTokens).toBe(
+        64000
+      )
+    })
+
    it("should still work for direct google provider", () => {
      // given direct google provider
      const config = getThinkingConfig("google", "gemini-3-pro")
@@ -314,6 +332,17 @@ describe("think-mode switcher", () => {
      expect(config.maxTokens).toBe(128000)
    })

+    it("should have correct structure for google-vertex-anthropic", () => {
+      //#given google-vertex-anthropic config entry
+      const config = THINKING_CONFIGS["google-vertex-anthropic"]
+
+      //#when structure is validated
+
+      //#then it should match anthropic style structure
+      expect(config.thinking).toBeDefined()
+      expect(config.maxTokens).toBe(128000)
+    })
+
    it("should have correct structure for google", () => {
      const config = THINKING_CONFIGS.google
      expect(config.providerOptions).toBeDefined()
--- a/src/hooks/think-mode/switcher.ts
+++ b/src/hooks/think-mode/switcher.ts
@@ -121,6 +121,13 @@ export const THINKING_CONFIGS = {
    },
    maxTokens: 128000,
  },
+  "google-vertex-anthropic": {
+    thinking: {
+      type: "enabled",
+      budgetTokens: 64000,
+    },
+    maxTokens: 128000,
+  },
  "amazon-bedrock": {
    reasoningConfig: {
      type: "enabled",
@@ -164,6 +171,7 @@ export const THINKING_CONFIGS = {

 const THINKING_CAPABLE_MODELS = {
  anthropic: ["claude-sonnet-4", "claude-opus-4", "claude-3"],
+  "google-vertex-anthropic": ["claude-sonnet-4", "claude-opus-4", "claude-3"],
  "amazon-bedrock": ["claude", "anthropic"],
  google: ["gemini-2", "gemini-3"],
  "google-vertex": ["gemini-2", "gemini-3"],
--- a/src/index.test.ts
+++ b/src/index.test.ts
@@ -1,4 +1,119 @@
-import { describe, expect, it } from "bun:test"
+import { describe, expect, it, mock } from "bun:test"
+
+describe("experimental.session.compacting handler", () => {
+  function createCompactingHandler(hooks: {
+    compactionTodoPreserver?: { capture: (sessionID: string) => Promise<void> }
+    claudeCodeHooks?: {
+      "experimental.session.compacting"?: (
+        input: { sessionID: string },
+        output: { context: string[] },
+      ) => Promise<void>
+    }
+    compactionContextInjector?: (sessionID: string) => string
+  }) {
+    return async (
+      _input: { sessionID: string },
+      output: { context: string[] },
+    ): Promise<void> => {
+      await hooks.compactionTodoPreserver?.capture(_input.sessionID)
+      await hooks.claudeCodeHooks?.["experimental.session.compacting"]?.(
+        _input,
+        output,
+      )
+      if (hooks.compactionContextInjector) {
+        output.context.push(hooks.compactionContextInjector(_input.sessionID))
+      }
+    }
+  }
+
+  //#given all three hooks are present
+  //#when compacting handler is invoked
+  //#then all hooks are called in order: capture → PreCompact → contextInjector
+  it("calls claudeCodeHooks PreCompact alongside other hooks", async () => {
+    const callOrder: string[] = []
+
+    const handler = createCompactingHandler({
+      compactionTodoPreserver: {
+        capture: mock(async () => { callOrder.push("capture") }),
+      },
+      claudeCodeHooks: {
+        "experimental.session.compacting": mock(async () => {
+          callOrder.push("preCompact")
+        }),
+      },
+      compactionContextInjector: mock((sessionID: string) => {
+        callOrder.push("contextInjector")
+        return `context-for-${sessionID}`
+      }),
+    })
+
+    const output = { context: [] as string[] }
+    await handler({ sessionID: "ses_test" }, output)
+
+    expect(callOrder).toEqual(["capture", "preCompact", "contextInjector"])
+    expect(output.context).toEqual(["context-for-ses_test"])
+  })
+
+  //#given claudeCodeHooks injects context during PreCompact
+  //#when compacting handler is invoked
+  //#then injected context from PreCompact is preserved in output
+  it("preserves context injected by PreCompact hooks", async () => {
+    const handler = createCompactingHandler({
+      claudeCodeHooks: {
+        "experimental.session.compacting": async (_input, output) => {
+          output.context.push("precompact-injected-context")
+        },
+      },
+    })
+
+    const output = { context: [] as string[] }
+    await handler({ sessionID: "ses_test" }, output)
+
+    expect(output.context).toContain("precompact-injected-context")
+  })
+
+  //#given claudeCodeHooks is null (no claude code hooks configured)
+  //#when compacting handler is invoked
+  //#then handler completes without error and other hooks still run
+  it("handles null claudeCodeHooks gracefully", async () => {
+    const captureMock = mock(async () => {})
+    const contextMock = mock(() => "injected-context")
+
+    const handler = createCompactingHandler({
+      compactionTodoPreserver: { capture: captureMock },
+      claudeCodeHooks: undefined,
+      compactionContextInjector: contextMock,
+    })
+
+    const output = { context: [] as string[] }
+    await handler({ sessionID: "ses_test" }, output)
+
+    expect(captureMock).toHaveBeenCalledWith("ses_test")
+    expect(contextMock).toHaveBeenCalledWith("ses_test")
+    expect(output.context).toEqual(["injected-context"])
+  })
+
+  //#given compactionContextInjector is null
+  //#when compacting handler is invoked
+  //#then handler does not early-return, PreCompact hooks still execute
+  it("does not early-return when compactionContextInjector is null", async () => {
+    const preCompactMock = mock(async () => {})
+
+    const handler = createCompactingHandler({
+      claudeCodeHooks: {
+        "experimental.session.compacting": preCompactMock,
+      },
+      compactionContextInjector: undefined,
+    })
+
+    const output = { context: [] as string[] }
+    await handler({ sessionID: "ses_test" }, output)
+
+    expect(preCompactMock).toHaveBeenCalled()
+    expect(output.context).toEqual([])
+  })
+})
+
 /**
 * Tests for conditional tool registration logic in index.ts
 * 
--- a/src/index.ts
+++ b/src/index.ts
@@ -44,6 +44,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
    pluginConfig,
    tmuxConfig,
    modelCacheState,
+    backgroundNotificationHookEnabled: isHookEnabled("background-notification"),
  })

  const toolsResult = await createTools({
@@ -79,10 +80,13 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
      output: { context: string[] },
    ): Promise<void> => {
      await hooks.compactionTodoPreserver?.capture(_input.sessionID)
-      if (!hooks.compactionContextInjector) {
-        return
+      await hooks.claudeCodeHooks?.["experimental.session.compacting"]?.(
+        _input,
+        output,
+      )
+      if (hooks.compactionContextInjector) {
+        output.context.push(hooks.compactionContextInjector(_input.sessionID))
      }
-      output.context.push(hooks.compactionContextInjector(_input.sessionID))
    },
  }
 }
--- a/src/plugin-handlers/agent-config-handler.ts
+++ b/src/plugin-handlers/agent-config-handler.ts
@@ -23,6 +23,11 @@ type AgentConfigRecord = Record<string, Record<string, unknown> | undefined> & {
  plan?: Record<string, unknown>;
 };

+function hasConfiguredDefaultAgent(config: Record<string, unknown>): boolean {
+  const defaultAgent = config.default_agent;
+  return typeof defaultAgent === "string" && defaultAgent.trim().length > 0;
+}
+
 export async function applyAgentConfig(params: {
  config: Record<string, unknown>;
  pluginConfig: OhMyOpenCodeConfig;
@@ -106,7 +111,10 @@ export async function applyAgentConfig(params: {
  const configAgent = params.config.agent as AgentConfigRecord | undefined;

  if (isSisyphusEnabled && builtinAgents.sisyphus) {
-    (params.config as { default_agent?: string }).default_agent = getAgentDisplayName("sisyphus");
+    if (!hasConfiguredDefaultAgent(params.config)) {
+      (params.config as { default_agent?: string }).default_agent =
+        getAgentDisplayName("sisyphus");
+    }

    const agentConfig: Record<string, unknown> = {
      sisyphus: builtinAgents.sisyphus,
--- a/src/plugin-handlers/config-handler.test.ts
+++ b/src/plugin-handlers/config-handler.test.ts
@@ -349,6 +349,55 @@ describe("Agent permission defaults", () => {
  })
 })

+describe("default_agent behavior with Sisyphus orchestration", () => {
+  test("preserves existing default_agent when already set", async () => {
+    // #given
+    const pluginConfig: OhMyOpenCodeConfig = {}
+    const config: Record<string, unknown> = {
+      model: "anthropic/claude-opus-4-6",
+      default_agent: "hephaestus",
+      agent: {},
+    }
+    const handler = createConfigHandler({
+      ctx: { directory: "/tmp" },
+      pluginConfig,
+      modelCacheState: {
+        anthropicContext1MEnabled: false,
+        modelContextLimitsCache: new Map(),
+      },
+    })
+
+    // #when
+    await handler(config)
+
+    // #then
+    expect(config.default_agent).toBe("hephaestus")
+  })
+
+  test("sets default_agent to sisyphus when missing", async () => {
+    // #given
+    const pluginConfig: OhMyOpenCodeConfig = {}
+    const config: Record<string, unknown> = {
+      model: "anthropic/claude-opus-4-6",
+      agent: {},
+    }
+    const handler = createConfigHandler({
+      ctx: { directory: "/tmp" },
+      pluginConfig,
+      modelCacheState: {
+        anthropicContext1MEnabled: false,
+        modelContextLimitsCache: new Map(),
+      },
+    })
+
+    // #when
+    await handler(config)
+
+    // #then
+    expect(config.default_agent).toBe(getAgentDisplayName("sisyphus"))
+  })
+})
+
 describe("Prometheus category config resolution", () => {
  test("resolves ultrabrain category config", () => {
    // given
--- a/src/plugin/hooks/create-tool-guard-hooks.ts
+++ b/src/plugin/hooks/create-tool-guard-hooks.ts
@@ -10,6 +10,7 @@ import {
  createRulesInjectorHook,
  createTasksTodowriteDisablerHook,
  createWriteExistingFileGuardHook,
+  createHashlineReadEnhancerHook,
 } from "../../hooks"
 import {
  getOpenCodeVersion,
@@ -28,6 +29,7 @@ export type ToolGuardHooks = {
  rulesInjector: ReturnType<typeof createRulesInjectorHook> | null
  tasksTodowriteDisabler: ReturnType<typeof createTasksTodowriteDisablerHook> | null
  writeExistingFileGuard: ReturnType<typeof createWriteExistingFileGuardHook> | null
+  hashlineReadEnhancer: ReturnType<typeof createHashlineReadEnhancerHook> | null
 }

 export function createToolGuardHooks(args: {
@@ -85,6 +87,10 @@ export function createToolGuardHooks(args: {
    ? safeHook("write-existing-file-guard", () => createWriteExistingFileGuardHook(ctx))
    : null

+  const hashlineReadEnhancer = isHookEnabled("hashline-read-enhancer")
+    ? safeHook("hashline-read-enhancer", () => createHashlineReadEnhancerHook(ctx, { hashline_edit: { enabled: pluginConfig.experimental?.hashline_edit ?? false } }))
+    : null
+
  return {
    commentChecker,
    toolOutputTruncator,
@@ -94,5 +100,6 @@ export function createToolGuardHooks(args: {
    rulesInjector,
    tasksTodowriteDisabler,
    writeExistingFileGuard,
+    hashlineReadEnhancer,
  }
 }
--- a/src/plugin/tool-execute-after.ts
+++ b/src/plugin/tool-execute-after.ts
@@ -43,5 +43,6 @@ export function createToolExecuteAfterHandler(args: {
    await hooks.delegateTaskRetry?.["tool.execute.after"]?.(input, output)
    await hooks.atlasHook?.["tool.execute.after"]?.(input, output)
    await hooks.taskResumeInfo?.["tool.execute.after"]?.(input, output)
+    await hooks.hashlineReadEnhancer?.["tool.execute.after"]?.(input, output)
  }
 }
--- a/src/plugin/tool-execute-before.ts
+++ b/src/plugin/tool-execute-before.ts
@@ -29,7 +29,6 @@ export function createToolExecuteBeforeHandler(args: {
    await hooks.prometheusMdOnly?.["tool.execute.before"]?.(input, output)
    await hooks.sisyphusJuniorNotepad?.["tool.execute.before"]?.(input, output)
    await hooks.atlasHook?.["tool.execute.before"]?.(input, output)
-
    if (input.tool === "task") {
      const argsObject = output.args
      const category = typeof argsObject.category === "string" ? argsObject.category : undefined
--- a/src/plugin/tool-registry.ts
+++ b/src/plugin/tool-registry.ts
@@ -25,6 +25,7 @@ import {
  createTaskGetTool,
  createTaskList,
  createTaskUpdateTool,
+  createHashlineEditTool,
 } from "../tools"
 import { getMainSessionID } from "../features/claude-code-session-state"
 import { filterDisabledTools } from "../shared/disabled-tools"
@@ -48,7 +49,7 @@ export function createToolRegistry(args: {
  const { ctx, pluginConfig, managers, skillContext, availableCategories } = args

  const backgroundTools = createBackgroundTools(managers.backgroundManager, ctx.client)
-  const callOmoAgent = createCallOmoAgent(ctx, managers.backgroundManager)
+  const callOmoAgent = createCallOmoAgent(ctx, managers.backgroundManager, pluginConfig.disabled_agents ?? [])

  const isMultimodalLookerEnabled = !(pluginConfig.disabled_agents ?? []).some(
    (agent) => agent.toLowerCase() === "multimodal-looker",
@@ -117,6 +118,11 @@ export function createToolRegistry(args: {
      }
    : {}

+  const hashlineEnabled = pluginConfig.experimental?.hashline_edit ?? false
+  const hashlineToolsRecord: Record<string, ToolDefinition> = hashlineEnabled
+    ? { edit: createHashlineEditTool() }
+    : {}
+
  const allTools: Record<string, ToolDefinition> = {
    ...builtinTools,
    ...createGrepTools(ctx),
@@ -132,6 +138,7 @@ export function createToolRegistry(args: {
    slashcommand: slashcommandTool,
    interactive_bash,
    ...taskToolsRecord,
+    ...hashlineToolsRecord,
  }

  const filteredTools = filterDisabledTools(allTools, pluginConfig.disabled_tools)
--- a/src/shared/fallback-model-availability.ts
+++ b/src/shared/fallback-model-availability.ts
@@ -2,29 +2,64 @@ import { readConnectedProvidersCache } from "./connected-providers-cache"
 import { log } from "./logger"
 import { fuzzyMatchModel } from "./model-name-matcher"

-export function isAnyFallbackModelAvailable(
-	fallbackChain: Array<{ providers: string[]; model: string }>,
+type FallbackEntry = { providers: string[]; model: string }
+
+type ResolvedFallbackModel = {
+	provider: string
+	model: string
+}
+
+export function resolveFirstAvailableFallback(
+	fallbackChain: FallbackEntry[],
 	availableModels: Set<string>,
-): boolean {
-	if (availableModels.size > 0) {
-		for (const entry of fallbackChain) {
-			const hasAvailableProvider = entry.providers.some((provider) => {
-				return fuzzyMatchModel(entry.model, availableModels, [provider]) !== null
+): ResolvedFallbackModel | null {
+	for (const entry of fallbackChain) {
+		for (const provider of entry.providers) {
+			const matchedModel = fuzzyMatchModel(entry.model, availableModels, [provider])
+			log("[resolveFirstAvailableFallback] attempt", {
+				provider,
+				requestedModel: entry.model,
+				resolvedModel: matchedModel,
 			})
-			if (hasAvailableProvider) {
-				return true
+
+			if (matchedModel !== null) {
+				log("[resolveFirstAvailableFallback] resolved", {
+					provider,
+					requestedModel: entry.model,
+					resolvedModel: matchedModel,
+				})
+				return { provider, model: matchedModel }
 			}
 		}
 	}

+	log("[resolveFirstAvailableFallback] WARNING: no fallback model resolved", {
+		chain: fallbackChain.map((entry) => ({
+			model: entry.model,
+			providers: entry.providers,
+		})),
+		availableCount: availableModels.size,
+	})
+
+	return null
+}
+
+export function isAnyFallbackModelAvailable(
+	fallbackChain: FallbackEntry[],
+	availableModels: Set<string>,
+): boolean {
+	if (resolveFirstAvailableFallback(fallbackChain, availableModels) !== null) {
+		return true
+	}
+
 	const connectedProviders = readConnectedProvidersCache()
 	if (connectedProviders) {
 		const connectedSet = new Set(connectedProviders)
 		for (const entry of fallbackChain) {
 			if (entry.providers.some((p) => connectedSet.has(p))) {
 				log(
-					"[isAnyFallbackModelAvailable] model not in available set, but provider is connected",
-					{ model: entry.model, availableCount: availableModels.size },
+					"[isAnyFallbackModelAvailable] WARNING: No fuzzy match found for any model in fallback chain, but provider is connected. Agent may fail at runtime.",
+					{ chain: fallbackChain.map((entryItem) => entryItem.model), availableCount: availableModels.size },
 				)
 				return true
 			}
--- a/src/shared/git-worktree/index.ts
+++ b/src/shared/git-worktree/index.ts
@@ -1,4 +1,6 @@
 export type { GitFileStatus, GitFileStat } from "./types"
+export type { ParsedGitStatusPorcelainLine } from "./parse-status-porcelain-line"
+export { parseGitStatusPorcelainLine } from "./parse-status-porcelain-line"
 export { parseGitStatusPorcelain } from "./parse-status-porcelain"
 export { parseGitDiffNumstat } from "./parse-diff-numstat"
 export { collectGitDiffStats } from "./collect-git-diff-stats"
--- a/src/shared/git-worktree/parse-status-porcelain-line.test.ts
+++ b/src/shared/git-worktree/parse-status-porcelain-line.test.ts
@@ -0,0 +1,72 @@
+/// <reference types="bun-types" />
+
+import { describe, expect, test } from "bun:test"
+import { parseGitStatusPorcelainLine } from "./parse-status-porcelain-line"
+
+describe("parseGitStatusPorcelainLine", () => {
+	test("#given modified porcelain line #when parsing #then returns modified status", () => {
+		//#given
+		const line = " M src/a.ts"
+
+		//#when
+		const result = parseGitStatusPorcelainLine(line)
+
+		//#then
+		expect(result).toEqual({ filePath: "src/a.ts", status: "modified" })
+	})
+
+	test("#given added porcelain line #when parsing #then returns added status", () => {
+		//#given
+		const line = "A  src/b.ts"
+
+		//#when
+		const result = parseGitStatusPorcelainLine(line)
+
+		//#then
+		expect(result).toEqual({ filePath: "src/b.ts", status: "added" })
+	})
+
+	test("#given untracked porcelain line #when parsing #then returns added status", () => {
+		//#given
+		const line = "?? src/c.ts"
+
+		//#when
+		const result = parseGitStatusPorcelainLine(line)
+
+		//#then
+		expect(result).toEqual({ filePath: "src/c.ts", status: "added" })
+	})
+
+	test("#given deleted porcelain line #when parsing #then returns deleted status", () => {
+		//#given
+		const line = "D  src/d.ts"
+
+		//#when
+		const result = parseGitStatusPorcelainLine(line)
+
+		//#then
+		expect(result).toEqual({ filePath: "src/d.ts", status: "deleted" })
+	})
+
+	test("#given empty line #when parsing #then returns null", () => {
+		//#given
+		const line = ""
+
+		//#when
+		const result = parseGitStatusPorcelainLine(line)
+
+		//#then
+		expect(result).toBeNull()
+	})
+
+	test("#given malformed line without path #when parsing #then returns null", () => {
+		//#given
+		const line = " M "
+
+		//#when
+		const result = parseGitStatusPorcelainLine(line)
+
+		//#then
+		expect(result).toBeNull()
+	})
+})
--- a/src/shared/git-worktree/parse-status-porcelain-line.ts
+++ b/src/shared/git-worktree/parse-status-porcelain-line.ts
@@ -0,0 +1,27 @@
+import type { GitFileStatus } from "./types"
+
+export interface ParsedGitStatusPorcelainLine {
+	filePath: string
+	status: GitFileStatus
+}
+
+function toGitFileStatus(statusToken: string): GitFileStatus {
+	if (statusToken === "A" || statusToken === "??") return "added"
+	if (statusToken === "D") return "deleted"
+	return "modified"
+}
+
+export function parseGitStatusPorcelainLine(
+	line: string,
+): ParsedGitStatusPorcelainLine | null {
+	if (!line) return null
+
+	const statusToken = line.substring(0, 2).trim()
+	const filePath = line.substring(3)
+	if (!filePath) return null
+
+	return {
+		filePath,
+		status: toGitFileStatus(statusToken),
+	}
+}
--- a/src/shared/git-worktree/parse-status-porcelain.ts
+++ b/src/shared/git-worktree/parse-status-porcelain.ts
@@ -1,24 +1,14 @@
 import type { GitFileStatus } from "./types"
+import { parseGitStatusPorcelainLine } from "./parse-status-porcelain-line"

 export function parseGitStatusPorcelain(output: string): Map<string, GitFileStatus> {
  const map = new Map<string, GitFileStatus>()
  if (!output) return map

  for (const line of output.split("\n")) {
-    if (!line) continue
-
-    const status = line.substring(0, 2).trim()
-    const filePath = line.substring(3)
-
-    if (!filePath) continue
-
-    if (status === "A" || status === "??") {
-      map.set(filePath, "added")
-    } else if (status === "D") {
-      map.set(filePath, "deleted")
-    } else {
-      map.set(filePath, "modified")
-    }
+    const parsed = parseGitStatusPorcelainLine(line)
+    if (!parsed) continue
+    map.set(parsed.filePath, parsed.status)
  }

  return map
--- a/src/shared/index.ts
+++ b/src/shared/index.ts
@@ -41,6 +41,7 @@ export type {
  ModelResolutionResult,
 } from "./model-resolution-types"
 export * from "./model-availability"
+export * from "./fallback-model-availability"
 export * from "./connected-providers-cache"
 export * from "./session-utils"
 export * from "./tmux"
@@ -54,3 +55,4 @@ export * from "./truncate-description"
 export * from "./opencode-storage-paths"
 export * from "./opencode-message-dir"
 export * from "./normalize-sdk-response"
+export * from "./session-directory-resolver"
--- a/src/shared/model-availability.test.ts
+++ b/src/shared/model-availability.test.ts
@@ -9,6 +9,14 @@ let fetchAvailableModels: (client?: unknown, options?: { connectedProviders?: st
 let fuzzyMatchModel: (target: string, available: Set<string>, providers?: string[]) => string | null
 let isModelAvailable: (targetModel: string, availableModels: Set<string>) => boolean
 let getConnectedProviders: (client: unknown) => Promise<string[]>
+let isAnyFallbackModelAvailable: (
+	fallbackChain: Array<{ providers: string[]; model: string }>,
+	availableModels: Set<string>,
+) => boolean
+let resolveFirstAvailableFallback: (
+	fallbackChain: Array<{ providers: string[]; model: string }>,
+	availableModels: Set<string>,
+) => { provider: string; model: string } | null

 beforeAll(async () => {
  ;({
@@ -18,6 +26,10 @@ beforeAll(async () => {
    isModelAvailable,
    getConnectedProviders,
  } = await import("./model-availability"))
+	;({
+		isAnyFallbackModelAvailable,
+		resolveFirstAvailableFallback,
+	} = await import("./fallback-model-availability"))
 })

 describe("fetchAvailableModels", () => {
@@ -233,6 +245,27 @@ describe("fuzzyMatchModel", () => {
 		expect(result).toBe("anthropic/claude-opus-4-6")
 	})

+	// given github-copilot serves claude versions with dot notation
+	// when fallback chain uses hyphen notation in requested model
+	// then normalize both forms and match github-copilot model
+	it("should match github-copilot claude-opus-4-6 to claude-opus-4.6", () => {
+		const available = new Set([
+			"github-copilot/claude-opus-4.6",
+			"opencode/glm-4.7-free",
+		])
+		const result = fuzzyMatchModel("claude-opus-4-6", available, ["github-copilot"])
+		expect(result).toBe("github-copilot/claude-opus-4.6")
+	})
+
+	// given claude models can evolve to newer version numbers
+	// when matching across dot and hyphen version separators
+	// then normalize generically without hardcoding specific versions
+	it("should normalize claude version separators for future versions", () => {
+		const available = new Set(["github-copilot/claude-sonnet-5.1"])
+		const result = fuzzyMatchModel("claude-sonnet-5-1", available, ["github-copilot"])
+		expect(result).toBe("github-copilot/claude-sonnet-5.1")
+	})
+
 	// given available models from multiple providers
 	// when providers filter is specified
 	// then only search models from specified providers
@@ -842,3 +875,90 @@ describe("isModelAvailable", () => {
 		expect(result).toBe(false)
 	})
 })
+
+describe("fallback model availability", () => {
+	let tempDir: string
+	let originalXdgCache: string | undefined
+
+	beforeEach(() => {
+		// given
+		tempDir = mkdtempSync(join(tmpdir(), "opencode-test-"))
+		originalXdgCache = process.env.XDG_CACHE_HOME
+		process.env.XDG_CACHE_HOME = tempDir
+	})
+
+	afterEach(() => {
+		if (originalXdgCache !== undefined) {
+			process.env.XDG_CACHE_HOME = originalXdgCache
+		} else {
+			delete process.env.XDG_CACHE_HOME
+		}
+		rmSync(tempDir, { recursive: true, force: true })
+	})
+
+	function writeConnectedProvidersCache(connected: string[]): void {
+		const cacheDir = join(tempDir, "oh-my-opencode")
+		require("fs").mkdirSync(cacheDir, { recursive: true })
+		writeFileSync(
+			join(cacheDir, "connected-providers.json"),
+			JSON.stringify({ connected, updatedAt: new Date().toISOString() }),
+		)
+	}
+
+	it("returns null for completely unknown model", () => {
+		// given
+		const available = new Set(["openai/gpt-5.2", "anthropic/claude-opus-4-6"])
+
+		// when
+		const result = fuzzyMatchModel("non-existent-model-family", available)
+
+		// then
+		expect(result).toBeNull()
+	})
+
+	it("returns true when models do not match but provider is connected", () => {
+		// given
+		const fallbackChain = [{ providers: ["openai"], model: "gpt-5.2" }]
+		const availableModels = new Set(["anthropic/claude-opus-4-6"])
+		writeConnectedProvidersCache(["openai"])
+
+		// when
+		const result = isAnyFallbackModelAvailable(fallbackChain, availableModels)
+
+		// then
+		expect(result).toBe(true)
+	})
+
+	it("returns first resolved fallback model from chain", () => {
+		// given
+		const fallbackChain = [
+			{ providers: ["openai"], model: "gpt-5.2" },
+			{ providers: ["anthropic"], model: "claude-opus-4-6" },
+		]
+		const availableModels = new Set([
+			"anthropic/claude-opus-4-6",
+			"openai/gpt-5.2-preview",
+		])
+
+		// when
+		const result = resolveFirstAvailableFallback(fallbackChain, availableModels)
+
+		// then
+		expect(result).toEqual({ provider: "openai", model: "openai/gpt-5.2-preview" })
+	})
+
+	it("returns null when no fallback model resolves", () => {
+		// given
+		const fallbackChain = [
+			{ providers: ["openai"], model: "gpt-5.2" },
+			{ providers: ["anthropic"], model: "claude-opus-4-6" },
+		]
+		const availableModels = new Set(["google/gemini-3-pro"])
+
+		// when
+		const result = resolveFirstAvailableFallback(fallbackChain, availableModels)
+
+		// then
+		expect(result).toBeNull()
+	})
+})
--- a/src/shared/model-availability.ts
+++ b/src/shared/model-availability.ts
@@ -28,8 +28,7 @@ import { normalizeSDKResponse } from "./normalize-sdk-response"
 function normalizeModelName(name: string): string {
 	return name
 		.toLowerCase()
-		.replace(/claude-(opus|sonnet|haiku)-4-5/g, "claude-$1-4.5")
-		.replace(/claude-(opus|sonnet|haiku)-4\.5/g, "claude-$1-4.5")
+		.replace(/claude-(opus|sonnet|haiku)-(\d+)[.-](\d+)/g, "claude-$1-$2.$3")
 }

 export function fuzzyMatchModel(
@@ -70,6 +69,7 @@ export function fuzzyMatchModel(
 	log("[fuzzyMatchModel] substring matches", { targetNormalized, matchCount: matches.length, matches })

 	if (matches.length === 0) {
+		log("[fuzzyMatchModel] WARNING: no match found", { target, availableCount: available.size, providers })
 		return null
 	}

@@ -283,71 +283,6 @@ export async function fetchAvailableModels(
 	return modelSet
 }

-export function isAnyFallbackModelAvailable(
-	fallbackChain: Array<{ providers: string[]; model: string }>,
-	availableModels: Set<string>,
-): boolean {
-	// If we have models, check them first
-	if (availableModels.size > 0) {
-		for (const entry of fallbackChain) {
-			const hasAvailableProvider = entry.providers.some((provider) => {
-				return fuzzyMatchModel(entry.model, availableModels, [provider]) !== null
-			})
-			if (hasAvailableProvider) {
-				return true
-			}
-		}
-	}
-
-	// Fallback: check if any provider in the chain is connected
-	// This handles race conditions where availableModels is empty or incomplete
-	// but we know the provider is connected.
-	const connectedProviders = connectedProvidersCache.readConnectedProvidersCache()
-	if (connectedProviders) {
-		const connectedSet = new Set(connectedProviders)
-		for (const entry of fallbackChain) {
-			if (entry.providers.some((p) => connectedSet.has(p))) {
-				log("[isAnyFallbackModelAvailable] model not in available set, but provider is connected", {
-					model: entry.model,
-					availableCount: availableModels.size,
-				})
-				return true
-			}
-		}
-	}
-
-	return false
-}
-
-export function isAnyProviderConnected(
-	providers: string[],
-	availableModels: Set<string>,
-): boolean {
-	if (availableModels.size > 0) {
-		const providerSet = new Set(providers)
-		for (const model of availableModels) {
-			const [provider] = model.split("/")
-			if (providerSet.has(provider)) {
-				log("[isAnyProviderConnected] found model from required provider", { provider, model })
-				return true
-			}
-		}
-	}
-
-	const connectedProviders = connectedProvidersCache.readConnectedProvidersCache()
-	if (connectedProviders) {
-		const connectedSet = new Set(connectedProviders)
-		for (const provider of providers) {
-			if (connectedSet.has(provider)) {
-				log("[isAnyProviderConnected] provider connected via cache", { provider })
-				return true
-			}
-		}
-	}
-
-	return false
-}
-
 export function __resetModelCache(): void {}

 export function isModelCacheAvailable(): boolean {
--- a/src/shared/model-name-matcher.ts
+++ b/src/shared/model-name-matcher.ts
@@ -3,8 +3,7 @@ import { log } from "./logger"
 function normalizeModelName(name: string): string {
 	return name
 		.toLowerCase()
-		.replace(/claude-(opus|sonnet|haiku)-4-5/g, "claude-$1-4.5")
-		.replace(/claude-(opus|sonnet|haiku)-4\.5/g, "claude-$1-4.5")
+		.replace(/claude-(opus|sonnet|haiku)-(\d+)[.-](\d+)/g, "claude-$1-$2.$3")
 }

 export function fuzzyMatchModel(
@@ -82,4 +81,3 @@ export function fuzzyMatchModel(
 	log("[fuzzyMatchModel] shortest match", { result })
 	return result
 }
-
--- a/src/shared/session-directory-resolver.test.ts
+++ b/src/shared/session-directory-resolver.test.ts
@@ -0,0 +1,101 @@
+import { describe, expect, test } from "bun:test"
+
+import { isWindowsAppDataDirectory, resolveSessionDirectory } from "./session-directory-resolver"
+
+describe("session-directory-resolver", () => {
+  describe("isWindowsAppDataDirectory", () => {
+    test("returns true when path is under AppData Local", () => {
+      //#given
+      const directory = "C:/Users/test/AppData/Local/opencode"
+
+      //#when
+      const result = isWindowsAppDataDirectory(directory)
+
+      //#then
+      expect(result).toBe(true)
+    })
+
+    test("returns true when path ends with AppData directory segment", () => {
+      //#given
+      const directory = "C:/Users/test/AppData/Local"
+
+      //#when
+      const result = isWindowsAppDataDirectory(directory)
+
+      //#then
+      expect(result).toBe(true)
+    })
+
+    test("returns false when path is outside AppData", () => {
+      //#given
+      const directory = "D:/projects/oh-my-opencode"
+
+      //#when
+      const result = isWindowsAppDataDirectory(directory)
+
+      //#then
+      expect(result).toBe(false)
+    })
+
+    test("returns false for lookalike non-AppData segment", () => {
+      //#given
+      const directory = "D:/projects/appdata/local-tools"
+
+      //#when
+      const result = isWindowsAppDataDirectory(directory)
+
+      //#then
+      expect(result).toBe(false)
+    })
+  })
+
+  describe("resolveSessionDirectory", () => {
+    test("uses process working directory on Windows when parent directory drifts to AppData", () => {
+      //#given
+      const options = {
+        parentDirectory: "C:\\Users\\test\\AppData\\Local\\ai.opencode.desktop",
+        fallbackDirectory: "C:\\Users\\test\\AppData\\Roaming\\opencode",
+        platform: "win32" as const,
+        currentWorkingDirectory: "D:\\projects\\oh-my-opencode",
+      }
+
+      //#when
+      const result = resolveSessionDirectory(options)
+
+      //#then
+      expect(result).toBe("D:\\projects\\oh-my-opencode")
+    })
+
+    test("keeps AppData directory when current working directory is also AppData", () => {
+      //#given
+      const options = {
+        parentDirectory: "C:\\Users\\test\\AppData\\Local\\ai.opencode.desktop",
+        fallbackDirectory: "C:\\Users\\test\\AppData\\Roaming\\opencode",
+        platform: "win32" as const,
+        currentWorkingDirectory: "C:\\Users\\test\\AppData\\Local\\Temp",
+      }
+
+      //#when
+      const result = resolveSessionDirectory(options)
+
+      //#then
+      expect(result).toBe("C:\\Users\\test\\AppData\\Local\\ai.opencode.desktop")
+    })
+
+    test("keeps original directory outside Windows", () => {
+      //#given
+      const options = {
+        parentDirectory: "/tmp/opencode",
+        fallbackDirectory: "/workspace/project",
+        platform: "darwin" as const,
+        currentWorkingDirectory: "/workspace/project",
+      }
+
+      //#when
+      const result = resolveSessionDirectory(options)
+
+      //#then
+      expect(result).toBe("/tmp/opencode")
+    })
+  })
+})
--- a/src/shared/session-directory-resolver.ts
+++ b/src/shared/session-directory-resolver.ts
@@ -0,0 +1,41 @@
+const WINDOWS_APPDATA_SEGMENTS = ["\\appdata\\local", "\\appdata\\roaming", "\\appdata\\locallow"]
+
+function normalizeWindowsPath(directory: string): string {
+  return directory.replaceAll("/", "\\").toLowerCase()
+}
+
+export function isWindowsAppDataDirectory(directory: string): boolean {
+  const normalizedDirectory = normalizeWindowsPath(directory)
+  return WINDOWS_APPDATA_SEGMENTS.some((segment) => {
+    return normalizedDirectory.endsWith(segment) || normalizedDirectory.includes(`${segment}\\`)
+  })
+}
+
+export function resolveSessionDirectory(options: {
+  parentDirectory: string | null | undefined
+  fallbackDirectory: string
+  platform?: NodeJS.Platform
+  currentWorkingDirectory?: string
+}): string {
+  const {
+    parentDirectory,
+    fallbackDirectory,
+    platform = process.platform,
+    currentWorkingDirectory = process.cwd(),
+  } = options
+
+  const sessionDirectory = parentDirectory ?? fallbackDirectory
+  if (platform !== "win32") {
+    return sessionDirectory
+  }
+
+  if (!isWindowsAppDataDirectory(sessionDirectory)) {
+    return sessionDirectory
+  }
+
+  if (isWindowsAppDataDirectory(currentWorkingDirectory)) {
+    return sessionDirectory
+  }
+
+  return currentWorkingDirectory
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
YeonGyu-Kim	c37e23f244	fix: export fallback availability from traced module	2026-02-17 10:44:57 +09:00
YeonGyu-Kim	ca06ce134f	fix: add fallback resolution warnings for unavailable models	2026-02-17 10:29:48 +09:00
YeonGyu-Kim	72fa2c7e65	fix(tmux): stop layout override after spawn, use configured main pane size Remove applyLayout(select-layout main-vertical) call after spawn which was destroying grid arrangements by forcing vertical stacking. Now only enforceMainPaneWidth is called, preserving the grid created by manual split directions. Also fix enforceMainPaneWidth to use config's main_pane_size percentage instead of hardcoded 50%.	2026-02-17 09:50:17 +09:00
YeonGyu-Kim	b3c5f4caf5	fix(tmux): use actual pane dimensions and configured min width for grid calculation Agent area width now uses real mainPane.width instead of hardcoded 50% ratio. Grid planning, split availability, and spawn target finding now respect user's agent_pane_min_width config instead of hardcoded MIN_PANE_WIDTH=52, enabling 2-column grid layouts on narrower terminals.	2026-02-17 09:48:18 +09:00
YeonGyu-Kim	219c1f8225	update: always wait for Oracle results instead of blanket background_cancel(all=true)	2026-02-17 09:42:59 +09:00
github-actions[bot]	6208c07809	@xinpengdr has signed the CLA in code-yeongyu/oh-my-opencode#1906	2026-02-16 19:01:47 +00:00
YeonGyu-Kim	1b7a1e3f0b	Merge pull request #1905 from code-yeongyu/fix/tmux-split-stability fix: stabilize tmux split and session readiness handling	2026-02-17 03:49:30 +09:00
YeonGyu-Kim	84a83922c3	fix: stop tracking sessions that never become ready When session readiness times out, immediately close the spawned pane and skip tracking to prevent stale mappings from causing reopen and close anomalies. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>	2026-02-17 03:40:55 +09:00
YeonGyu-Kim	17da22704e	fix: size main pane using configured layout percentage Main pane resize now uses main_pane_size instead of a hardcoded 50 percent fallback so post-split layout remains stable and predictable. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>	2026-02-17 03:40:46 +09:00
YeonGyu-Kim	da3f24b8b1	fix: align split targeting with configured pane width Use the configured agent pane width consistently in split target selection and avoid close+spawn churn by replacing the oldest pane when eviction is required. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>	2026-02-17 03:40:37 +09:00
YeonGyu-Kim	b02721463e	refactor: route status porcelain map parsing through line parser	2026-02-17 03:29:10 +09:00
YeonGyu-Kim	1f31a3d8f1	test: add dedicated status porcelain line parser with coverage	2026-02-17 03:29:01 +09:00
YeonGyu-Kim	1566cfcc1e	update: Hephaestus completion guarantee, Sisyphus-Junior Hephaestus-style rewrite, snake_case tools Hephaestus: - Add Completion Guarantee section with Codex-style persistence framing - Add explicit explore/librarian call syntax examples (subagent_type, not category) - Use positive 'keep going until resolved' over negative 'NEVER stop' - Fix tool names: TaskCreate/TaskUpdate → task_create/task_update Sisyphus-Junior GPT: - Full Hephaestus-style rewrite: autonomy, reporting, parallelism, tool usage - Remove Blocked & Allowed Tools section and 'You work ALONE' messaging - Add Progress Updates, Ambiguity Protocol, Code Quality sections - Fix tool names: TaskCreate/TaskUpdate → task_create/task_update Sisyphus-Junior Default: - Remove buildConstraintsSection and blocked actions messaging - Fix tool names: TaskCreate/TaskUpdate → task_create/task_update Tests: update all assertions for new prompt structure (31/31 pass)	2026-02-17 03:12:32 +09:00
YeonGyu-Kim	2b5887aca3	fix: prevent overlapping poll cycles in managers Guarding polling re-entry avoids stacked async polls under slow responses, and unref on pending-call cleanup timer reduces idle wakeups.	2026-02-17 03:06:40 +09:00
YeonGyu-Kim	8c88da51e1	update: soften Hephaestus brevity bias — replace 'brief/briefly' with 'clear' throughout Replace 7 instances of brief/briefly that caused over-terse behavior: - 'briefly restate' → 'restate' - 'brief summary' → 'clear summary' - 'briefly state the WHY' → 'explain the WHY' (×2) - 'brief context' → 'clear context' - 'Brief updates' → 'Clear updates (a few sentences)' - 'keep it brief and clear' → 'keep it clear and helpful'	2026-02-17 02:58:42 +09:00
YeonGyu-Kim	199992e05b	update: Hephaestus prompt — restore intent gate, strengthen parallelism and reporting - Restore Assumptions Check and When to Challenge the User from Sisyphus intent gate - Add proactive explore/librarian firing to CORRECT behavior list - Strengthen parallel execution with GPT-5.2 tool_usage_rules (parallelize ALL independent calls) - Embed reporting into each Execution Loop step (Tell user pattern) - Strengthen Progress Updates with plain-language and WHY-not-just-WHAT guidance - Add post-edit reporting to Output Contract and After Implementation - Fix Output Contract preamble conflict (skip empty preambles, but DO report actions)	2026-02-17 02:56:22 +09:00
YeonGyu-Kim	6b546526f3	refactor: diet Hephaestus prompt — remove redundancy, add progress updates and skill examples - Remove router nudge (reasoning configuration section) - Remove redundant sections: Role & Agency, Judicious Initiative, Success Criteria, Response Compaction, Soft Guidelines - Merge Identity + Core Principle into compact Identity section - Restore autonomous behavior policy (FORBIDDEN/CORRECT) from Role & Agency - Add Progress Updates section with friendly tone and concrete examples - Add Skill Loading Examples table (frontend-ui-ux, playwright, git-master, tauri) - Condense Parallel Execution, Execution Loop, Verification, Failure Recovery - Update Output Contract with friendly communication style 651 → 437 lines (33% reduction), behavior preserved	2026-02-17 02:46:11 +09:00
YeonGyu-Kim	c44509b397	fix: skip startup toasts in CLI run mode for auto-update-checker Add OPENCODE_CLI_RUN_MODE environment variable check to skip all startup toasts and version checks when running in CLI mode. This prevents notification spam during automated CLI run sessions. Includes comprehensive test coverage for CLI run mode behavior. 🤖 Generated with OhMyOpenCode assistance	2026-02-17 02:34:39 +09:00
YeonGyu-Kim	17994693af	fix: add directory parameter and improve CLI run session handling - Add directory parameter to session API calls (session.get, session.todo, session.status, session.children) - Improve agent resolver with display name support via agent-display-names - Add tool execution visibility in event handlers with running/completed status output - Enhance poll-for-completion with main session status checking and stabilization period handling - Add normalizeSDKResponse import for consistent response handling - Update types with Todo, ChildSession, and toast-related interfaces 🤖 Generated with OhMyOpenCode assistance	2026-02-17 02:34:35 +09:00
YeonGyu-Kim	a31087e543	fix: add propertyNames validation to object schemas in JSON schema Add propertyNames: { type: "string" } to all object schemas with additionalProperties to ensure proper JSON schema validation for dynamic property keys. 🤖 Generated with OhMyOpenCode assistance	2026-02-17 02:34:31 +09:00
YeonGyu-Kim	5c13a63758	fix: invoke claude-code-hooks PreCompact in session compacting handler The experimental.session.compacting handler was not delegating to claudeCodeHooks, making PreCompact hooks from .claude/settings.json dead code. Also fixed premature early-return when compactionContextInjector was null which would skip any subsequent hooks.	2026-02-17 02:14:01 +09:00
YeonGyu-Kim	d9f21da026	fix: prefer a runnable opencode binary for cli run	2026-02-17 02:12:36 +09:00
YeonGyu-Kim	7d2c798ff0	Merge pull request #1893 from code-yeongyu/fix/1716-disabled-agents-enforcement fix: enforce disabled_agents config in call_omo_agent (#1716)	2026-02-17 02:07:18 +09:00
YeonGyu-Kim	ea589e66e8	Merge remote-tracking branch 'origin/dev' into fix/1716-disabled-agents-enforcement # Conflicts: # src/plugin/tool-registry.ts # src/tools/call-omo-agent/tools.test.ts # src/tools/call-omo-agent/tools.ts	2026-02-17 02:04:19 +09:00
YeonGyu-Kim	e299c09ee8	fix: include provider-models cache for Hephaestus availability	2026-02-17 02:03:03 +09:00
YeonGyu-Kim	285d8d58dd	fix: skip compaction messages in parent-session context lookup	2026-02-17 02:03:03 +09:00
YeonGyu-Kim	e1e449164a	Merge pull request #1898 from code-yeongyu/fix/1671-tmux-layout fix: apply tmux layout config during pane spawning (#1671)	2026-02-17 02:01:29 +09:00
YeonGyu-Kim	324d2c1f0c	Merge branch 'dev' into fix/1671-tmux-layout	2026-02-17 01:58:59 +09:00
YeonGyu-Kim	f3de0f43bd	Merge pull request #1899 from code-yeongyu/fix/1700-vertex-anthropic fix: recognize google-vertex-anthropic as Claude provider (#1700)	2026-02-17 01:58:26 +09:00
YeonGyu-Kim	5839594041	Merge pull request #1897 from code-yeongyu/fix/1679-copilot-fallback fix: handle all model versions in normalizeModelName for fallback chains (#1679)	2026-02-17 01:58:24 +09:00
YeonGyu-Kim	ada0a233d6	Merge pull request #1894 from code-yeongyu/fix/1681-oracle-json-parse fix: resolve Oracle JSON parse error after promptAsync refactor (#1681)	2026-02-17 01:58:21 +09:00
YeonGyu-Kim	b7497d0f9f	Merge branch 'dev' into fix/1700-vertex-anthropic	2026-02-17 01:54:11 +09:00
YeonGyu-Kim	7bb03702c9	Merge branch 'dev' into fix/1671-tmux-layout	2026-02-17 01:54:08 +09:00
YeonGyu-Kim	ccbeea96c1	Merge branch 'dev' into fix/1679-copilot-fallback	2026-02-17 01:54:05 +09:00
YeonGyu-Kim	9922a94d12	Merge branch 'dev' into fix/1681-oracle-json-parse	2026-02-17 01:54:03 +09:00
YeonGyu-Kim	e78c54f6eb	Merge pull request #1896 from code-yeongyu/fix/1283-review-code-silent-fail fix: report silent subagent delegation failures (#1283)	2026-02-17 01:53:56 +09:00
YeonGyu-Kim	74be163df3	Merge pull request #1895 from code-yeongyu/fix/1718-windows-subagent-dir fix: use correct project directory for Windows subagents (#1718)	2026-02-17 01:53:43 +09:00
YeonGyu-Kim	24789334e4	fix: detect AppData directory paths without trailing separators	2026-02-17 01:45:14 +09:00
YeonGyu-Kim	0e0bfc1cd6	Merge pull request #1849 from jkoelker/preserve-default-agent fix(config): preserve configured default_agent	2026-02-17 01:43:04 +09:00
Jason Kölker	90ede4487b	fix(config): preserve configured default_agent oh-my-opencode overwrote OpenCode's default_agent with sisyphus whenever Sisyphus orchestration was enabled. This made explicit defaults like Hephaestus ineffective and forced manual agent switching in new sessions. Only assign sisyphus as default when default_agent is missing or blank, and preserve existing configured values. Add tests for both preservation and fallback behavior to prevent regressions.	2026-02-17 01:41:52 +09:00
YeonGyu-Kim	3a2f886357	fix: apply tmux layout config during pane spawning (#1671 )	2026-02-17 01:36:01 +09:00
YeonGyu-Kim	2fa82896f8	Merge pull request #1884 from code-yeongyu/feat/hashline-edit feat: port hashline edit tool from oh-my-pi	2026-02-17 01:35:22 +09:00
YeonGyu-Kim	5aa9ecdd5d	Merge pull request #1870 from dankochetov/fix/background-notification-hook-gate fix(background-agent): honor disabled background-notification for system reminders	2026-02-17 01:35:21 +09:00
YeonGyu-Kim	c8d03aaddb	Merge pull request #1708 from jsl9208/fix/ast-grep-replace-silent-noop fix(ast-grep): fix ast_grep_replace silent write failure	2026-02-17 01:34:41 +09:00
YeonGyu-Kim	693f73be6d	Merge pull request #1729 from potb/fix/1716-disabled-agents-call-omo fix(call-omo-agent): enforce disabled_agents config	2026-02-17 01:34:38 +09:00
YeonGyu-Kim	1b05c3fb52	Merge pull request #1819 from jonasherr/feat/add-playwright-cli-provider feat(browser-automation): add playwright-cli as browser automation provider	2026-02-17 01:34:34 +09:00
YeonGyu-Kim	5ae45c8c8e	fix: use correct project directory for Windows subagents (#1718 )	2026-02-17 01:29:25 +09:00
YeonGyu-Kim	931bf6c31b	fix: resolve JSON parse error in Oracle after promptAsync refactor (#1681 )	2026-02-17 01:29:17 +09:00
YeonGyu-Kim	d672eb1c12	fix: recognize google-vertex-anthropic as Claude provider (#1700 )	2026-02-17 01:28:27 +09:00
YeonGyu-Kim	dab99531e4	fix: handle all model versions in normalizeModelName for fallback chains (#1679 )	2026-02-17 01:27:10 +09:00
YeonGyu-Kim	d7a53e8a5b	fix: report errors instead of silent catch in subagent-resolver (#1283 )	2026-02-17 01:26:58 +09:00
YeonGyu-Kim	56353ae4b2	fix: enforce disabled_agents config in call_omo_agent (#1716 )	2026-02-17 01:25:47 +09:00
sisyphus-dev-ai	65216ed081	chore: changes by sisyphus-dev-ai	2026-02-16 16:21:51 +00:00
YeonGyu-Kim	af7b1ee620	refactor(hashline): override native edit tool instead of separate tool + disabler hook Replace 3-component hashline system (separate hashline_edit tool + edit disabler hook + OpenAI-exempted read enhancer) with 2-component system that directly overrides the native edit tool key, matching the delegate_task pattern. - Register hashline tool as 'edit' key to override native edit - Delete hashline-edit-disabler hook (no longer needed) - Delete hashline-provider-state module (no remaining consumers) - Remove OpenAI exemption from read enhancer (explicit opt-in means all providers) - Remove setProvider wiring from chat-params	2026-02-17 00:03:10 +09:00
YeonGyu-Kim	9eb786debd	test(session-manager): fix storage tests by mocking message-dir dependency	2026-02-17 00:03:10 +09:00
YeonGyu-Kim	b56c777943	test: skip 4 flaky session-manager tests (test order dependency)	2026-02-17 00:03:10 +09:00
YeonGyu-Kim	25f2003962	fix(ci): isolate session-manager tests to prevent flakiness - Move src/tools/session-manager to isolated test section - Prevents mock.module() pollution across parallel test runs - Fixes 4 flaky storage tests that failed in CI	2026-02-17 00:03:10 +09:00
YeonGyu-Kim	359c6b6655	fix(hashline): address Cubic review comments - P2: Change replace edit sorting from POSITIVE_INFINITY to NEGATIVE_INFINITY so replace edits run LAST after line-based edits, preventing line number shifts that would invalidate subsequent anchors - P3: Update tool description from SHA-256 to xxHash32 to match actual implementation in hash-computation.ts	2026-02-17 00:03:10 +09:00
YeonGyu-Kim	51dde4d43f	feat(hashline): port hashline edit tool from oh-my-pi This PR ports the hashline edit tool from oh-my-pi to oh-my-opencode as an experimental feature. ## Features - New experimental.hashline_edit config flag - hashline_edit tool with 4 operations: set_line, replace_lines, insert_after, replace - Hash-based line anchors for safe concurrent editing - Edit tool disabler for non-OpenAI providers - Read output enhancer with LINE:HASH prefixes - Provider state tracking module ## Technical Details - xxHash32-based 2-char hex hashes - Bottom-up edit application to prevent index shifting - OpenAI provider exemption (uses native apply_patch) - 90 tests covering all operations and edge cases - All files under 200 LOC limit ## Files Added/Modified - src/tools/hashline-edit/ (7 files, ~400 LOC) - src/hooks/hashline-edit-disabler/ (4 files, ~200 LOC) - src/hooks/hashline-read-enhancer/ (3 files, ~400 LOC) - src/features/hashline-provider-state.ts (13 LOC) - src/config/schema/experimental.ts (hashline_edit flag) - src/config/schema/hooks.ts (2 new hook names) - src/plugin/tool-registry.ts (conditional registration) - src/plugin/chat-params.ts (provider state tracking) - src/tools/index.ts (export) - src/hooks/index.ts (exports)	2026-02-17 00:03:10 +09:00
YeonGyu-Kim	149de9da66	feat(config): add experimental.hashline_edit flag and provider state module	2026-02-17 00:03:10 +09:00
Dan Kochetov	9b187e2128	Merge remote-tracking branch 'origin/dev' into fix/background-notification-hook-gate # Conflicts: # src/features/background-agent/manager.ts	2026-02-16 13:56:33 +02:00
Jonas Herrmansdsoerfer	27f8feda04	feat(browser-automation): add playwright-cli as browser automation provider - Add playwright-cli to BrowserAutomationProviderSchema enum - Add playwright-cli to BuiltinSkillNameSchema - Create playwrightCliSkill with official Microsoft template - Update skill selection logic to handle 3 providers - Add comprehensive tests for schema and skill selection - Regenerate JSON schema Closes #<issue-number-if-any>	2026-02-16 10:50:18 +01:00
Dan Kochetov	0f287eb1c2	fix(plugin): honor disabled background-notification hook	2026-02-16 00:58:46 +02:00
Dan Kochetov	5298ff2879	fix(background-agent): allow disabling parent session reminders	2026-02-16 00:58:33 +02:00
Peïo Thibault	cd0949ccfa	fix(call-omo-agent): enforce disabled_agents config (#1716 ) ## Summary - Added disabled_agents parameter to createCallOmoAgent factory - Check runs after ALLOWED_AGENTS validation, before agent execution - Case-insensitive matching consistent with existing patterns - Clear error message distinguishes 'disabled' from 'invalid agent type' - Threaded disabledAgents config into tool factory from pluginConfig ## Changes - tools.ts: Add disabledAgents parameter and validation check - tool-registry.ts: Pass pluginConfig.disabled_agents to factory	2026-02-10 19:21:25 +01:00
Peïo Thibault	0f5b8e921a	test(call-omo-agent): add disabled_agents validation tests Closes #1716 ## Summary - Added 4 tests for disabled_agents validation in call_omo_agent tool - Tests verify agent rejection when in disabled_agents list - Tests verify case-insensitive matching - Tests verify agents not in disabled list are allowed - Tests verify empty disabled_agents allows all agents	2026-02-10 19:21:25 +01:00
jsl9208	fec12b63a6	fix(ast-grep): fix ast_grep_replace silent write failure ast-grep CLI silently ignores --update-all when --json=compact is present, causing replace operations to report success while never modifying files. Split into two separate CLI invocations.	2026-02-10 11:21:26 +08:00
				`@@ -0,0 +1 @@`
				`export { createHashlineReadEnhancerHook } from "./hook"`