release: v3.5.6

fix: update prometheus prompt test to match compressed plan template wording
Merge pull request #1882 from code-yeongyu/fix/resume-completion-timer-cleanup
2026-02-16 07:24:09 +00:00 · 2026-02-16 16:21:14 +09:00 · 2026-02-16 16:09:02 +09:00 · 2026-02-16 16:06:36 +09:00 · 2026-02-16 15:56:52 +09:00 · 2026-02-16 15:46:00 +09:00
39 changed files with 1570 additions and 343 deletions
--- a/bun.lock
+++ b/bun.lock
@@ -28,13 +28,13 @@
        "typescript": "^5.7.3",
      },
      "optionalDependencies": {
-        "oh-my-opencode-darwin-arm64": "3.5.3",
-        "oh-my-opencode-darwin-x64": "3.5.3",
-        "oh-my-opencode-linux-arm64": "3.5.3",
-        "oh-my-opencode-linux-arm64-musl": "3.5.3",
-        "oh-my-opencode-linux-x64": "3.5.3",
-        "oh-my-opencode-linux-x64-musl": "3.5.3",
-        "oh-my-opencode-windows-x64": "3.5.3",
+        "oh-my-opencode-darwin-arm64": "3.5.5",
+        "oh-my-opencode-darwin-x64": "3.5.5",
+        "oh-my-opencode-linux-arm64": "3.5.5",
+        "oh-my-opencode-linux-arm64-musl": "3.5.5",
+        "oh-my-opencode-linux-x64": "3.5.5",
+        "oh-my-opencode-linux-x64-musl": "3.5.5",
+        "oh-my-opencode-windows-x64": "3.5.5",
      },
    },
  },
@@ -226,19 +226,19 @@

    "object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],

-    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.3", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Dq0+PC2dyAqG7c3DUnQmdOkKbKmOsRHwoqgLCQNKN1lTRllF8zbWqp5B+LGKxSPxPqJIPS3mKt+wIR2KvkYJVw=="],
+    "oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.5", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-XtcCQ8/iVT6T1B58y0N1oMgOK4beTW8DW98b/ITnINb7b3hNSv5754Af/2Rx67BV0iE0ezC6uXaqz45C7ru1rw=="],

-    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.3", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Ke45Bv/ygZm3YUSUumIyk647KZ2PFzw30tH597cOpG8MDPGbNVBCM6EKFezcukUPT+gPFVpE1IiGzEkn4JmgZA=="],
+    "oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.5", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ReSDqU6jihh7lpGNmEt3REzc5bOcyfv3cMHitpecKq0wRrJoTBI+dgNPk90BLjHobGbhAm0TE8VZ9tqTkivnIQ=="],

-    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aP5S3DngUhFkNeqYM33Ge6zccCWLzB/O3FLXLFXy/Iws03N8xugw72pnMK6lUbIia9QQBKK7IZBoYm9C79pZ3g=="],
+    "oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Zs/ETIxwcWBvw+jdlo8t+3+92oMMaXkFg1ZCuZrBRZOmtPFefdsH5/QEIe2TlNSjfoTwlA7cbpOD6oXgxRVrtg=="],

-    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-UiD/hVKYZQyX4D5N5SnZT4M5Z/B2SDtJWBW4MibpYSAcPKNCEBKi/5E4hOPxAtTfFGR8tIXFmYZdQJDkVfvluw=="],
+    "oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m9r4OW1XhGtm/SvHM3kzpS4pEiI2eIh5Tj+j5hpMW3wu+AqE3F1XGUpu8RgvIpupFo8beimJWDYQujqokReQqg=="],

-    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-L9kqwzElGkaQ8pgtv1ZjcHARw9LPaU4UEVjzauByTMi+/5Js/PTsNXBggxSRzZfQ8/MNBPSCiA4K10Kc0YjjvA=="],
+    "oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-N6ysF5Pr2C1dyC5Dftzp05RJODgL+EYCWcOV59/UCV152cINlOhg80804o+6XTKV/taOAaboYaQwsBKiCs/BNQ=="],

-    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Z0fVVih/b2dbNeb9DK9oca5dNYCZyPySBRtxRhDXod5d7fJNgIPrvUoEd3SNfkRGORyFB3hGBZ6nqQ6N8+8DEA=="],
+    "oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-MOxW1FMTJT3Ze/U2fDedcZUYTFaA9PaKIiqtsBIHOSb+fFgdo51RIuUlKCELN/g9I9dYhw0yP2n9tBMBG6feSg=="],

-    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.3", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ocWPjRs2sJgN02PJnEIYtqdMVDex1YhEj1FzAU5XIicfzQbgxLh9nz1yhHZzfqGJq69QStU6ofpc5kQpfX1LMg=="],
+    "oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.5", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-dWRtPyIdMFQIw1BwVO4PbGqoo0UWs7NES+YJC7BLGv0YnWN7Q2tatmOviSeSgMELeMsWSbDNisEB79jsfShXjA=="],

    "on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],

--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
@@ -74,13 +74,13 @@
    "typescript": "^5.7.3"
  },
  "optionalDependencies": {
-    "oh-my-opencode-darwin-arm64": "3.5.4",
-    "oh-my-opencode-darwin-x64": "3.5.4",
-    "oh-my-opencode-linux-arm64": "3.5.4",
-    "oh-my-opencode-linux-arm64-musl": "3.5.4",
-    "oh-my-opencode-linux-x64": "3.5.4",
-    "oh-my-opencode-linux-x64-musl": "3.5.4",
-    "oh-my-opencode-windows-x64": "3.5.4"
+    "oh-my-opencode-darwin-arm64": "3.5.6",
+    "oh-my-opencode-darwin-x64": "3.5.6",
+    "oh-my-opencode-linux-arm64": "3.5.6",
+    "oh-my-opencode-linux-arm64-musl": "3.5.6",
+    "oh-my-opencode-linux-x64": "3.5.6",
+    "oh-my-opencode-linux-x64-musl": "3.5.6",
+    "oh-my-opencode-windows-x64": "3.5.6"
  },
  "trustedDependencies": [
    "@ast-grep/cli",
--- a/packages/darwin-arm64/package.json
+++ b/packages/darwin-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-arm64",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/darwin-x64/package.json
+++ b/packages/darwin-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-darwin-x64",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64-musl/package.json
+++ b/packages/linux-arm64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64-musl",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-arm64/package.json
+++ b/packages/linux-arm64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-arm64",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64-musl/package.json
+++ b/packages/linux-x64-musl/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64-musl",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
  "license": "MIT",
  "repository": {
--- a/packages/linux-x64/package.json
+++ b/packages/linux-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-linux-x64",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (linux-x64)",
  "license": "MIT",
  "repository": {
--- a/packages/windows-x64/package.json
+++ b/packages/windows-x64/package.json
@@ -1,6 +1,6 @@
 {
  "name": "oh-my-opencode-windows-x64",
-  "version": "3.5.4",
+  "version": "3.5.6",
  "description": "Platform-specific binary for oh-my-opencode (windows-x64)",
  "license": "MIT",
  "repository": {
--- a/signatures/cla.json
+++ b/signatures/cla.json
@@ -1503,6 +1503,22 @@
      "created_at": "2026-02-14T19:58:19Z",
      "repoId": 1108837393,
      "pullRequestNo": 1845
+    },
+    {
+      "name": "Decrabbityyy",
+      "id": 99632363,
+      "comment_id": 3904649522,
+      "created_at": "2026-02-15T15:07:11Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1864
+    },
+    {
+      "name": "dankochetov",
+      "id": 33990502,
+      "comment_id": 3905398332,
+      "created_at": "2026-02-15T23:17:05Z",
+      "repoId": 1108837393,
+      "pullRequestNo": 1870
    }
  ]
 }
--- a/src/agents/prometheus-prompt.test.ts
+++ b/src/agents/prometheus-prompt.test.ts
@@ -66,7 +66,7 @@ describe("PROMETHEUS_SYSTEM_PROMPT zero human intervention", () => {
    expect(lowerPrompt).toContain("preconditions")
    expect(lowerPrompt).toContain("failure indicators")
    expect(lowerPrompt).toContain("evidence")
-    expect(lowerPrompt).toMatch(/negative scenario/)
+    expect(prompt).toMatch(/negative/i)
  })

  test("should require QA scenario adequacy in self-review checklist", () => {
--- a/src/agents/prometheus/identity-constraints.ts
+++ b/src/agents/prometheus/identity-constraints.ts
@@ -129,7 +129,21 @@ Your ONLY valid output locations are \`.sisyphus/plans/*.md\` and \`.sisyphus/dr

 Example: \`.sisyphus/plans/auth-refactor.md\`

-### 5. SINGLE PLAN MANDATE (CRITICAL)
+### 5. MAXIMUM PARALLELISM PRINCIPLE (NON-NEGOTIABLE)
+
+Your plans MUST maximize parallel execution. This is a core planning quality metric.
+
+**Granularity Rule**: One task = one module/concern = 1-3 files.
+If a task touches 4+ files or 2+ unrelated concerns, SPLIT IT.
+
+**Parallelism Target**: Aim for 5-8 tasks per wave.
+If any wave has fewer than 3 tasks (except the final integration), you under-split.
+
+**Dependency Minimization**: Structure tasks so shared dependencies
+(types, interfaces, configs) are extracted as early Wave-1 tasks,
+unblocking maximum parallelism in subsequent waves.
+
+### 6. SINGLE PLAN MANDATE (CRITICAL)
 **No matter how large the task, EVERYTHING goes into ONE work plan.**

 **NEVER:**
@@ -152,7 +166,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`

 **The plan can have 50+ TODOs. That's OK. ONE PLAN.**

-### 5.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)
+### 6.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)

 <write_protocol>
 **The Write tool OVERWRITES files. It does NOT append.**
@@ -188,7 +202,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`
 - [ ] File already exists with my content? → Use Edit to append, NOT Write
 </write_protocol>

-### 6. DRAFT AS WORKING MEMORY (MANDATORY)
+### 7. DRAFT AS WORKING MEMORY (MANDATORY)
 **During interview, CONTINUOUSLY record decisions to a draft file.**

 **Draft Location**: \`.sisyphus/drafts/{name}.md\`
--- a/src/agents/prometheus/plan-template.ts
+++ b/src/agents/prometheus/plan-template.ts
@@ -70,108 +70,25 @@ Generate plan to: \`.sisyphus/plans/{name}.md\`

 ## Verification Strategy (MANDATORY)

-> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**
->
-> ALL tasks in this plan MUST be verifiable WITHOUT any human action.
-> This is NOT conditional — it applies to EVERY task, regardless of test strategy.
->
-> **FORBIDDEN** — acceptance criteria that require:
-> - "User manually tests..." / "사용자가 직접 테스트..."
-> - "User visually confirms..." / "사용자가 눈으로 확인..."
-> - "User interacts with..." / "사용자가 직접 조작..."
-> - "Ask user to verify..." / "사용자에게 확인 요청..."
-> - ANY step where a human must perform an action
->
-> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.
+> **ZERO HUMAN INTERVENTION** — ALL verification is agent-executed. No exceptions.
+> Acceptance criteria requiring "user manually tests/confirms" are FORBIDDEN.

 ### Test Decision
 - **Infrastructure exists**: [YES/NO]
 - **Automated tests**: [TDD / Tests-after / None]
 - **Framework**: [bun test / vitest / jest / pytest / none]
+- **If TDD**: Each task follows RED (failing test) → GREEN (minimal impl) → REFACTOR

-### If TDD Enabled
+### QA Policy
+Every task MUST include agent-executed QA scenarios (see TODO template below).
+Evidence saved to \`.sisyphus/evidence/task-{N}-{scenario-slug}.{ext}\`.

-Each TODO follows RED-GREEN-REFACTOR:
-
-**Task Structure:**
-1. **RED**: Write failing test first
-   - Test file: \`[path].test.ts\`
-   - Test command: \`bun test [file]\`
-   - Expected: FAIL (test exists, implementation doesn't)
-2. **GREEN**: Implement minimum code to pass
-   - Command: \`bun test [file]\`
-   - Expected: PASS
-3. **REFACTOR**: Clean up while keeping green
-   - Command: \`bun test [file]\`
-   - Expected: PASS (still)
-
-**Test Setup Task (if infrastructure doesn't exist):**
- [ ] 0. Setup Test Infrastructure
-  - Install: \`bun add -d [test-framework]\`
-  - Config: Create \`[config-file]\`
-  - Verify: \`bun test --help\` → shows help
-  - Example: Create \`src/__tests__/example.test.ts\`
-  - Verify: \`bun test\` → 1 test passes
-
-### Agent-Executed QA Scenarios (MANDATORY — ALL tasks)
-
-> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.
-> - **With TDD**: QA scenarios complement unit tests at integration/E2E level
-> - **Without TDD**: QA scenarios are the PRIMARY verification method
->
-> These describe how the executing agent DIRECTLY verifies the deliverable
-> by running it — opening browsers, executing commands, sending API requests.
-> The agent performs what a human tester would do, but automated via tools.
-
-**Verification Tool by Deliverable Type:**
-
-| Type | Tool | How Agent Verifies |
-|------|------|-------------------|
-| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
-| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |
-| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |
-| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |
-| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |
-
-**Each Scenario MUST Follow This Format:**
-
-\`\`\`
-Scenario: [Descriptive name — what user action/flow is being verified]
-  Tool: [Playwright / interactive_bash / Bash]
-  Preconditions: [What must be true before this scenario runs]
-  Steps:
-    1. [Exact action with specific selector/command/endpoint]
-    2. [Next action with expected intermediate state]
-    3. [Assertion with exact expected value]
-  Expected Result: [Concrete, observable outcome]
-  Failure Indicators: [What would indicate failure]
-  Evidence: [Screenshot path / output capture / response body path]
-\`\`\`
-
-**Scenario Detail Requirements:**
- **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
- **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
- **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
- **Timing**: Include wait conditions where relevant (\`Wait for .dashboard (timeout: 10s)\`)
- **Negative Scenarios**: At least ONE failure/error scenario per feature
- **Evidence Paths**: Specific file paths (\`.sisyphus/evidence/task-N-scenario-name.png\`)
-
-**Anti-patterns (NEVER write scenarios like this):**
- ❌ "Verify the login page works correctly"
- ❌ "Check that the API returns the right data"
- ❌ "Test the form validation"
- ❌ "User opens browser and confirms..."
-
-**Write scenarios like this instead:**
- ✅ \`Navigate to /login → Fill input[name="email"] with "test@example.com" → Fill input[name="password"] with "Pass123!" → Click button[type="submit"] → Wait for /dashboard → Assert h1 contains "Welcome"\`
- ✅ \`POST /api/users {"name":"Test","email":"new@test.com"} → Assert status 201 → Assert response.id is UUID → GET /api/users/{id} → Assert name equals "Test"\`
- ✅ \`Run ./cli --config test.yaml → Wait for "Loaded" in stdout → Send "q" → Assert exit code 0 → Assert stdout contains "Goodbye"\`
-
-**Evidence Requirements:**
- Screenshots: \`.sisyphus/evidence/\` for all UI verifications
- Terminal output: Captured for CLI/TUI verifications
- Response bodies: Saved for API verifications
- All evidence referenced by specific file path in acceptance criteria
+| Deliverable Type | Verification Tool | Method |
+|------------------|-------------------|--------|
+| Frontend/UI | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
+| TUI/CLI | interactive_bash (tmux) | Run command, send keystrokes, validate output |
+| API/Backend | Bash (curl) | Send requests, assert status + response fields |
+| Library/Module | Bash (bun/node REPL) | Import, call functions, compare output |

 ---

@@ -181,49 +98,82 @@ Scenario: [Descriptive name — what user action/flow is being verified]

 > Maximize throughput by grouping independent tasks into parallel waves.
 > Each wave completes before the next begins.
+> Target: 5-8 tasks per wave. Fewer than 3 per wave (except final) = under-splitting.

 \`\`\`
-Wave 1 (Start Immediately):
-├── Task 1: [no dependencies]
-└── Task 5: [no dependencies]
+Wave 1 (Start Immediately — foundation + scaffolding):
+├── Task 1: Project scaffolding + config [quick]
+├── Task 2: Design system tokens [quick]
+├── Task 3: Type definitions [quick]
+├── Task 4: Schema definitions [quick]
+├── Task 5: Storage interface + in-memory impl [quick]
+├── Task 6: Auth middleware [quick]
+└── Task 7: Client module [quick]

-Wave 2 (After Wave 1):
-├── Task 2: [depends: 1]
-├── Task 3: [depends: 1]
-└── Task 6: [depends: 5]
+Wave 2 (After Wave 1 — core modules, MAX PARALLEL):
+├── Task 8: Core business logic (depends: 3, 5, 7) [deep]
+├── Task 9: API endpoints (depends: 4, 5) [unspecified-high]
+├── Task 10: Secondary storage impl (depends: 5) [unspecified-high]
+├── Task 11: Retry/fallback logic (depends: 8) [deep]
+├── Task 12: UI layout + navigation (depends: 2) [visual-engineering]
+├── Task 13: API client + hooks (depends: 4) [quick]
+└── Task 14: Telemetry middleware (depends: 5, 10) [unspecified-high]

-Wave 3 (After Wave 2):
-└── Task 4: [depends: 2, 3]
+Wave 3 (After Wave 2 — integration + UI):
+├── Task 15: Main route combining modules (depends: 6, 11, 14) [deep]
+├── Task 16: UI data visualization (depends: 12, 13) [visual-engineering]
+├── Task 17: Deployment config A (depends: 15) [quick]
+├── Task 18: Deployment config B (depends: 15) [quick]
+├── Task 19: Deployment config C (depends: 15) [quick]
+└── Task 20: UI request log + build (depends: 16) [visual-engineering]

-Critical Path: Task 1 → Task 2 → Task 4
-Parallel Speedup: ~40% faster than sequential
+Wave 4 (After Wave 3 — verification):
+├── Task 21: Integration tests (depends: 15) [deep]
+├── Task 22: UI QA - Playwright (depends: 20) [unspecified-high]
+├── Task 23: E2E QA (depends: 21) [deep]
+└── Task 24: Git cleanup + tagging (depends: 21) [git]
+
+Wave FINAL (After ALL tasks — independent review, 4 parallel):
+├── Task F1: Plan compliance audit (oracle)
+├── Task F2: Code quality review (unspecified-high)
+├── Task F3: Real manual QA (unspecified-high)
+└── Task F4: Scope fidelity check (deep)
+
+Critical Path: Task 1 → Task 5 → Task 8 → Task 11 → Task 15 → Task 21 → F1-F4
+Parallel Speedup: ~70% faster than sequential
+Max Concurrent: 7 (Waves 1 & 2)
 \`\`\`

-### Dependency Matrix
+### Dependency Matrix (abbreviated — show ALL tasks in your generated plan)

-| Task | Depends On | Blocks | Can Parallelize With |
-|------|------------|--------|---------------------|
-| 1 | None | 2, 3 | 5 |
-| 2 | 1 | 4 | 3, 6 |
-| 3 | 1 | 4 | 2, 6 |
-| 4 | 2, 3 | None | None (final) |
-| 5 | None | 6 | 1 |
-| 6 | 5 | None | 2, 3 |
+| Task | Depends On | Blocks | Wave |
+|------|------------|--------|------|
+| 1-7 | — | 8-14 | 1 |
+| 8 | 3, 5, 7 | 11, 15 | 2 |
+| 11 | 8 | 15 | 2 |
+| 14 | 5, 10 | 15 | 2 |
+| 15 | 6, 11, 14 | 17-19, 21 | 3 |
+| 21 | 15 | 23, 24 | 4 |
+
+> This is abbreviated for reference. YOUR generated plan must include the FULL matrix for ALL tasks.

 ### Agent Dispatch Summary

-| Wave | Tasks | Recommended Agents |
-|------|-------|-------------------|
-| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
-| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
-| 3 | 4 | final integration task |
+| Wave | # Parallel | Tasks → Agent Category |
+|------|------------|----------------------|
+| 1 | **7** | T1-T4 → \`quick\`, T5 → \`quick\`, T6 → \`quick\`, T7 → \`quick\` |
+| 2 | **7** | T8 → \`deep\`, T9 → \`unspecified-high\`, T10 → \`unspecified-high\`, T11 → \`deep\`, T12 → \`visual-engineering\`, T13 → \`quick\`, T14 → \`unspecified-high\` |
+| 3 | **6** | T15 → \`deep\`, T16 → \`visual-engineering\`, T17-T19 → \`quick\`, T20 → \`visual-engineering\` |
+| 4 | **4** | T21 → \`deep\`, T22 → \`unspecified-high\`, T23 → \`deep\`, T24 → \`git\` |
+| FINAL | **4** | F1 → \`oracle\`, F2 → \`unspecified-high\`, F3 → \`unspecified-high\`, F4 → \`deep\` |

 ---

 ## TODOs

 > Implementation + Test = ONE Task. Never separate.
-> EVERY task MUST have: Recommended Agent Profile + Parallelization info.
+> EVERY task MUST have: Recommended Agent Profile + Parallelization info + QA Scenarios.
+> **A task WITHOUT QA Scenarios is INCOMPLETE. No exceptions.**

 - [ ] 1. [Task Title]

@@ -257,22 +207,15 @@ Parallel Speedup: ~40% faster than sequential

  **Pattern References** (existing code to follow):
  - \`src/services/auth.ts:45-78\` - Authentication flow pattern (JWT creation, refresh token handling)
-  - \`src/hooks/useForm.ts:12-34\` - Form validation pattern (Zod schema + react-hook-form integration)

  **API/Type References** (contracts to implement against):
  - \`src/types/user.ts:UserDTO\` - Response shape for user endpoints
-  - \`src/api/schema.ts:createUserSchema\` - Request validation schema

  **Test References** (testing patterns to follow):
  - \`src/__tests__/auth.test.ts:describe("login")\` - Test structure and mocking patterns

-  **Documentation References** (specs and requirements):
-  - \`docs/api-spec.md#authentication\` - API contract details
-  - \`ARCHITECTURE.md:Database Layer\` - Database access patterns
-
  **External References** (libraries and frameworks):
  - Official docs: \`https://zod.dev/?id=basic-usage\` - Zod validation syntax
-  - Example repo: \`github.com/example/project/src/auth\` - Reference implementation

  **WHY Each Reference Matters** (explain the relevance):
  - Don't just list files - explain what pattern/information the executor should extract
@@ -283,113 +226,60 @@ Parallel Speedup: ~40% faster than sequential

  > **AGENT-EXECUTABLE VERIFICATION ONLY** — No human action permitted.
  > Every criterion MUST be verifiable by running a command or using a tool.
-  > REPLACE all placeholders with actual values from task context.

  **If TDD (tests enabled):**
  - [ ] Test file created: src/auth/login.test.ts
-  - [ ] Test covers: successful login returns JWT token
  - [ ] bun test src/auth/login.test.ts → PASS (3 tests, 0 failures)

-  **Agent-Executed QA Scenarios (MANDATORY — per-scenario, ultra-detailed):**
+  **QA Scenarios (MANDATORY — task is INCOMPLETE without these):**

-  > Write MULTIPLE named scenarios per task: happy path AND failure cases.
-  > Each scenario = exact tool + steps with real selectors/data + evidence path.
-
-  **Example — Frontend/UI (Playwright):**
+  > **This is NOT optional. A task without QA scenarios WILL BE REJECTED.**
+  >
+  > Write scenario tests that verify the ACTUAL BEHAVIOR of what you built.
+  > Minimum: 1 happy path + 1 failure/edge case per task.
+  > Each scenario = exact tool + exact steps + exact assertions + evidence path.
+  >
+  > **The executing agent MUST run these scenarios after implementation.**
+  > **The orchestrator WILL verify evidence files exist before marking task complete.**

  \\\`\\\`\\\`
-  Scenario: Successful login redirects to dashboard
-    Tool: Playwright (playwright skill)
-    Preconditions: Dev server running on localhost:3000, test user exists
+  Scenario: [Happy path — what SHOULD work]
+    Tool: [Playwright / interactive_bash / Bash (curl)]
+    Preconditions: [Exact setup state]
    Steps:
-      1. Navigate to: http://localhost:3000/login
-      2. Wait for: input[name="email"] visible (timeout: 5s)
-      3. Fill: input[name="email"] → "test@example.com"
-      4. Fill: input[name="password"] → "ValidPass123!"
-      5. Click: button[type="submit"]
-      6. Wait for: navigation to /dashboard (timeout: 10s)
-      7. Assert: h1 text contains "Welcome back"
-      8. Assert: cookie "session_token" exists
-      9. Screenshot: .sisyphus/evidence/task-1-login-success.png
-    Expected Result: Dashboard loads with welcome message
-    Evidence: .sisyphus/evidence/task-1-login-success.png
+      1. [Exact action — specific command/selector/endpoint, no vagueness]
+      2. [Next action — with expected intermediate state]
+      3. [Assertion — exact expected value, not "verify it works"]
+    Expected Result: [Concrete, observable, binary pass/fail]
+    Failure Indicators: [What specifically would mean this failed]
+    Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}.{ext}

-  Scenario: Login fails with invalid credentials
-    Tool: Playwright (playwright skill)
-    Preconditions: Dev server running, no valid user with these credentials
+  Scenario: [Failure/edge case — what SHOULD fail gracefully]
+    Tool: [same format]
+    Preconditions: [Invalid input / missing dependency / error state]
    Steps:
-      1. Navigate to: http://localhost:3000/login
-      2. Fill: input[name="email"] → "wrong@example.com"
-      3. Fill: input[name="password"] → "WrongPass"
-      4. Click: button[type="submit"]
-      5. Wait for: .error-message visible (timeout: 5s)
-      6. Assert: .error-message text contains "Invalid credentials"
-      7. Assert: URL is still /login (no redirect)
-      8. Screenshot: .sisyphus/evidence/task-1-login-failure.png
-    Expected Result: Error message shown, stays on login page
-    Evidence: .sisyphus/evidence/task-1-login-failure.png
+      1. [Trigger the error condition]
+      2. [Assert error is handled correctly]
+    Expected Result: [Graceful failure with correct error message/code]
+    Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}-error.{ext}
  \\\`\\\`\\\`

-  **Example — API/Backend (curl):**
-
-  \\\`\\\`\\\`
-  Scenario: Create user returns 201 with UUID
-    Tool: Bash (curl)
-    Preconditions: Server running on localhost:8080
-    Steps:
-      1. curl -s -w "\\n%{http_code}" -X POST http://localhost:8080/api/users \\
-           -H "Content-Type: application/json" \\
-           -d '{"email":"new@test.com","name":"Test User"}'
-      2. Assert: HTTP status is 201
-      3. Assert: response.id matches UUID format
-      4. GET /api/users/{returned-id} → Assert name equals "Test User"
-    Expected Result: User created and retrievable
-    Evidence: Response bodies captured
-
-  Scenario: Duplicate email returns 409
-    Tool: Bash (curl)
-    Preconditions: User with email "new@test.com" already exists
-    Steps:
-      1. Repeat POST with same email
-      2. Assert: HTTP status is 409
-      3. Assert: response.error contains "already exists"
-    Expected Result: Conflict error returned
-    Evidence: Response body captured
-  \\\`\\\`\\\`
-
-  **Example — TUI/CLI (interactive_bash):**
-
-  \\\`\\\`\\\`
-  Scenario: CLI loads config and displays menu
-    Tool: interactive_bash (tmux)
-    Preconditions: Binary built, test config at ./test.yaml
-    Steps:
-      1. tmux new-session: ./my-cli --config test.yaml
-      2. Wait for: "Configuration loaded" in output (timeout: 5s)
-      3. Assert: Menu items visible ("1. Create", "2. List", "3. Exit")
-      4. Send keys: "3" then Enter
-      5. Assert: "Goodbye" in output
-      6. Assert: Process exited with code 0
-    Expected Result: CLI starts, shows menu, exits cleanly
-    Evidence: Terminal output captured
-
-  Scenario: CLI handles missing config gracefully
-    Tool: interactive_bash (tmux)
-    Preconditions: No config file at ./nonexistent.yaml
-    Steps:
-      1. tmux new-session: ./my-cli --config nonexistent.yaml
-      2. Wait for: output (timeout: 3s)
-      3. Assert: stderr contains "Config file not found"
-      4. Assert: Process exited with code 1
-    Expected Result: Meaningful error, non-zero exit
-    Evidence: Error output captured
-  \\\`\\\`\\\`
+  > **Specificity requirements — every scenario MUST use:**
+  > - **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
+  > - **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
+  > - **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
+  > - **Timing**: Wait conditions where relevant (\`timeout: 10s\`)
+  > - **Negative**: At least ONE failure/error scenario per task
+  >
+  > **Anti-patterns (your scenario is INVALID if it looks like this):**
+  > - ❌ "Verify it works correctly" — HOW? What does "correctly" mean?
+  > - ❌ "Check the API returns data" — WHAT data? What fields? What values?
+  > - ❌ "Test the component renders" — WHERE? What selector? What content?
+  > - ❌ Any scenario without an evidence path

  **Evidence to Capture:**
-  - [ ] Screenshots in .sisyphus/evidence/ for UI scenarios
-  - [ ] Terminal output for CLI/TUI scenarios
-  - [ ] Response bodies for API scenarios
  - [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}
+  - [ ] Screenshots for UI, terminal output for CLI, response bodies for API

  **Commit**: YES | NO (groups with N)
  - Message: \`type(scope): desc\`
@@ -398,6 +288,28 @@ Parallel Speedup: ~40% faster than sequential

 ---

+## Final Verification Wave (MANDATORY — after ALL implementation tasks)
+
+> 4 review agents run in PARALLEL. ALL must APPROVE. Rejection → fix → re-run.
+
+- [ ] F1. **Plan Compliance Audit** — \`oracle\`
+  Read the plan end-to-end. For each "Must Have": verify implementation exists (read file, curl endpoint, run command). For each "Must NOT Have": search codebase for forbidden patterns — reject with file:line if found. Check evidence files exist in .sisyphus/evidence/. Compare deliverables against plan.
+  Output: \`Must Have [N/N] | Must NOT Have [N/N] | Tasks [N/N] | VERDICT: APPROVE/REJECT\`
+
+- [ ] F2. **Code Quality Review** — \`unspecified-high\`
+  Run \`tsc --noEmit\` + linter + \`bun test\`. Review all changed files for: \`as any\`/\`@ts-ignore\`, empty catches, console.log in prod, commented-out code, unused imports. Check AI slop: excessive comments, over-abstraction, generic names (data/result/item/temp).
+  Output: \`Build [PASS/FAIL] | Lint [PASS/FAIL] | Tests [N pass/N fail] | Files [N clean/N issues] | VERDICT\`
+
+- [ ] F3. **Real Manual QA** — \`unspecified-high\` (+ \`playwright\` skill if UI)
+  Start from clean state. Execute EVERY QA scenario from EVERY task — follow exact steps, capture evidence. Test cross-task integration (features working together, not isolation). Test edge cases: empty state, invalid input, rapid actions. Save to \`.sisyphus/evidence/final-qa/\`.
+  Output: \`Scenarios [N/N pass] | Integration [N/N] | Edge Cases [N tested] | VERDICT\`
+
+- [ ] F4. **Scope Fidelity Check** — \`deep\`
+  For each task: read "What to do", read actual diff (git log/diff). Verify 1:1 — everything in spec was built (no missing), nothing beyond spec was built (no creep). Check "Must NOT do" compliance. Detect cross-task contamination: Task N touching Task M's files. Flag unaccounted changes.
+  Output: \`Tasks [N/N compliant] | Contamination [CLEAN/N issues] | Unaccounted [CLEAN/N files] | VERDICT\`
+
+---
+
 ## Commit Strategy

 | After Task | Message | Files | Verification |
--- a/src/cli/snapshots/model-fallback.test.ts.snap
+++ b/src/cli/snapshots/model-fallback.test.ts.snap
@@ -247,7 +247,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
      "model": "opencode/glm-4.7-free",
    },
    "writing": {
-      "model": "openai/gpt-5.2",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -314,7 +314,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
      "model": "opencode/glm-4.7-free",
    },
    "writing": {
-      "model": "openai/gpt-5.2",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -372,6 +372,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -432,6 +433,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -505,6 +507,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -579,6 +582,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -652,6 +656,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
    },
    "visual-engineering": {
      "model": "opencode/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "opencode/gemini-3-flash",
@@ -726,6 +731,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
    },
    "visual-engineering": {
      "model": "opencode/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "opencode/gemini-3-flash",
@@ -799,6 +805,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -873,6 +880,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -927,10 +935,10 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
      "model": "opencode/glm-4.7-free",
    },
    "visual-engineering": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -982,10 +990,10 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
      "model": "opencode/glm-4.7-free",
    },
    "visual-engineering": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
-      "model": "zai-coding-plan/glm-4.7",
+      "model": "opencode/glm-4.7-free",
    },
  },
 }
@@ -1056,6 +1064,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
    },
    "visual-engineering": {
      "model": "opencode/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "opencode/gemini-3-flash",
@@ -1129,6 +1138,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -1189,8 +1199,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
      "model": "anthropic/claude-sonnet-4-5",
    },
    "visual-engineering": {
-      "model": "anthropic/claude-opus-4-6",
-      "variant": "max",
+      "model": "zai-coding-plan/glm-5",
    },
    "writing": {
      "model": "anthropic/claude-sonnet-4-5",
@@ -1256,6 +1265,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -1329,6 +1339,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
    },
    "visual-engineering": {
      "model": "github-copilot/gemini-3-pro-preview",
+      "variant": "high",
    },
    "writing": {
      "model": "github-copilot/gemini-3-flash-preview",
@@ -1402,6 +1413,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
@@ -1476,6 +1488,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
    },
    "visual-engineering": {
      "model": "google/gemini-3-pro",
+      "variant": "high",
    },
    "writing": {
      "model": "google/gemini-3-flash",
--- a/src/cli/cli-installer.test.ts
+++ b/src/cli/cli-installer.test.ts
@@ -0,0 +1,83 @@
+import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from "bun:test"
+import * as configManager from "./config-manager"
+import { runCliInstaller } from "./cli-installer"
+import type { InstallArgs } from "./types"
+
+describe("runCliInstaller", () => {
+  const mockConsoleLog = mock(() => {})
+  const mockConsoleError = mock(() => {})
+  const originalConsoleLog = console.log
+  const originalConsoleError = console.error
+
+  beforeEach(() => {
+    console.log = mockConsoleLog
+    console.error = mockConsoleError
+    mockConsoleLog.mockClear()
+    mockConsoleError.mockClear()
+  })
+
+  afterEach(() => {
+    console.log = originalConsoleLog
+    console.error = originalConsoleError
+  })
+
+  it("runs auth and provider setup steps when openai or copilot are enabled without gemini", async () => {
+    //#given
+    const addAuthPluginsSpy = spyOn(configManager, "addAuthPlugins").mockResolvedValue({
+      success: true,
+      configPath: "/tmp/opencode.jsonc",
+    })
+    const addProviderConfigSpy = spyOn(configManager, "addProviderConfig").mockReturnValue({
+      success: true,
+      configPath: "/tmp/opencode.jsonc",
+    })
+    const restoreSpies = [
+      addAuthPluginsSpy,
+      addProviderConfigSpy,
+      spyOn(configManager, "detectCurrentConfig").mockReturnValue({
+        isInstalled: false,
+        hasClaude: false,
+        isMax20: false,
+        hasOpenAI: false,
+        hasGemini: false,
+        hasCopilot: false,
+        hasOpencodeZen: false,
+        hasZaiCodingPlan: false,
+        hasKimiForCoding: false,
+      }),
+      spyOn(configManager, "isOpenCodeInstalled").mockResolvedValue(true),
+      spyOn(configManager, "getOpenCodeVersion").mockResolvedValue("1.0.200"),
+      spyOn(configManager, "addPluginToOpenCodeConfig").mockResolvedValue({
+        success: true,
+        configPath: "/tmp/opencode.jsonc",
+      }),
+      spyOn(configManager, "writeOmoConfig").mockReturnValue({
+        success: true,
+        configPath: "/tmp/oh-my-opencode.jsonc",
+      }),
+    ]
+
+    const args: InstallArgs = {
+      tui: false,
+      claude: "no",
+      openai: "yes",
+      gemini: "no",
+      copilot: "yes",
+      opencodeZen: "no",
+      zaiCodingPlan: "no",
+      kimiForCoding: "no",
+    }
+
+    //#when
+    const result = await runCliInstaller(args, "3.4.0")
+
+    //#then
+    expect(result).toBe(0)
+    expect(addAuthPluginsSpy).toHaveBeenCalledTimes(1)
+    expect(addProviderConfigSpy).toHaveBeenCalledTimes(1)
+
+    for (const spy of restoreSpies) {
+      spy.mockRestore()
+    }
+  })
+})
--- a/src/cli/cli-installer.ts
+++ b/src/cli/cli-installer.ts
@@ -77,7 +77,9 @@ export async function runCliInstaller(args: InstallArgs, version: string): Promi
    `Plugin ${isUpdate ? "verified" : "added"} ${SYMBOLS.arrow} ${color.dim(pluginResult.configPath)}`,
  )

-  if (config.hasGemini) {
+  const needsProviderSetup = config.hasGemini || config.hasOpenAI || config.hasCopilot
+
+  if (needsProviderSetup) {
    printStep(step++, totalSteps, "Adding auth plugins...")
    const authResult = await addAuthPlugins(config)
    if (!authResult.success) {
--- a/src/features/background-agent/background-event-handler.ts
+++ b/src/features/background-agent/background-event-handler.ts
@@ -52,7 +52,7 @@ export function handleBackgroundEvent(args: {

  const props = event.properties

-  if (event.type === "message.part.updated") {
+  if (event.type === "message.part.updated" || event.type === "message.part.delta") {
    if (!props || !isRecord(props)) return
    const sessionID = getString(props, "sessionID")
    if (!sessionID) return
--- a/src/features/background-agent/manager.test.ts
+++ b/src/features/background-agent/manager.test.ts
@@ -6,6 +6,7 @@ import type { BackgroundTask, ResumeInput } from "./types"
 import { MIN_IDLE_TIME_MS } from "./constants"
 import { BackgroundManager } from "./manager"
 import { ConcurrencyManager } from "./concurrency"
+import { initTaskToastManager, _resetTaskToastManagerForTesting } from "../task-toast-manager/manager"


 const TASK_TTL_MS = 30 * 60 * 1000
@@ -190,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
  return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
 }

+function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
+  return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
+}
+
 function getQueuesByKey(
  manager: BackgroundManager
 ): Map<string, Array<{ task: BackgroundTask; input: import("./types").LaunchInput }>> {
@@ -215,6 +220,23 @@ function stubNotifyParentSession(manager: BackgroundManager): void {
  ;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {}
 }

+function createToastRemoveTaskTracker(): { removeTaskCalls: string[]; resetToastManager: () => void } {
+  _resetTaskToastManagerForTesting()
+  const toastManager = initTaskToastManager({
+    tui: { showToast: async () => {} },
+  } as unknown as PluginInput["client"])
+  const removeTaskCalls: string[] = []
+  const originalRemoveTask = toastManager.removeTask.bind(toastManager)
+  toastManager.removeTask = (taskId: string): void => {
+    removeTaskCalls.push(taskId)
+    originalRemoveTask(taskId)
+  }
+  return {
+    removeTaskCalls,
+    resetToastManager: _resetTaskToastManagerForTesting,
+  }
+}
+
 function getCleanupSignals(): Array<NodeJS.Signals | "beforeExit" | "exit"> {
  const signals: Array<NodeJS.Signals | "beforeExit" | "exit"> = ["SIGINT", "SIGTERM", "beforeExit", "exit"]
  if (process.platform === "win32") {
@@ -894,7 +916,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
 })

 describe("BackgroundManager.notifyParentSession - aborted parent", () => {
-  test("should skip notification when parent session is aborted", async () => {
+  test("should fall back and still notify when parent session messages are aborted", async () => {
    //#given
    let promptCalled = false
    const promptMock = async () => {
@@ -933,7 +955,7 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
      .notifyParentSession(task)

    //#then
-    expect(promptCalled).toBe(false)
+    expect(promptCalled).toBe(true)

    manager.shutdown()
  })
@@ -1770,6 +1792,32 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
      const pendingSet = pendingByParent.get(task.parentSessionID)
      expect(pendingSet?.has(task.id) ?? false).toBe(false)
    })
+
+    test("should remove task from toast manager when notification is skipped", async () => {
+      //#given
+      const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+      const manager = createBackgroundManager()
+      const task = createMockTask({
+        id: "task-cancel-skip-notification",
+        sessionID: "session-cancel-skip-notification",
+        parentSessionID: "parent-cancel-skip-notification",
+        status: "running",
+      })
+      getTaskMap(manager).set(task.id, task)
+
+      //#when
+      const cancelled = await manager.cancelTask(task.id, {
+        source: "test",
+        skipNotification: true,
+      })
+
+      //#then
+      expect(cancelled).toBe(true)
+      expect(removeTaskCalls).toContain(task.id)
+
+      manager.shutdown()
+      resetToastManager()
+    })
  })

  describe("multiple keys process in parallel", () => {
@@ -2730,6 +2778,43 @@ describe("BackgroundManager.handleEvent - session.deleted cascade", () => {

    manager.shutdown()
  })
+
+  test("should remove tasks from toast manager when session is deleted", () => {
+    //#given
+    const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+    const manager = createBackgroundManager()
+    const parentSessionID = "session-parent-toast"
+    const childTask = createMockTask({
+      id: "task-child-toast",
+      sessionID: "session-child-toast",
+      parentSessionID,
+      status: "running",
+    })
+    const grandchildTask = createMockTask({
+      id: "task-grandchild-toast",
+      sessionID: "session-grandchild-toast",
+      parentSessionID: "session-child-toast",
+      status: "pending",
+      startedAt: undefined,
+      queuedAt: new Date(),
+    })
+    const taskMap = getTaskMap(manager)
+    taskMap.set(childTask.id, childTask)
+    taskMap.set(grandchildTask.id, grandchildTask)
+
+    //#when
+    manager.handleEvent({
+      type: "session.deleted",
+      properties: { info: { id: parentSessionID } },
+    })
+
+    //#then
+    expect(removeTaskCalls).toContain(childTask.id)
+    expect(removeTaskCalls).toContain(grandchildTask.id)
+
+    manager.shutdown()
+    resetToastManager()
+  })
 })

 describe("BackgroundManager.handleEvent - session.error", () => {
@@ -2777,6 +2862,35 @@ describe("BackgroundManager.handleEvent - session.error", () => {
    manager.shutdown()
  })

+  test("removes errored task from toast manager", () => {
+    //#given
+    const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+    const manager = createBackgroundManager()
+    const sessionID = "ses_error_toast"
+    const task = createMockTask({
+      id: "task-session-error-toast",
+      sessionID,
+      parentSessionID: "parent-session",
+      status: "running",
+    })
+    getTaskMap(manager).set(task.id, task)
+
+    //#when
+    manager.handleEvent({
+      type: "session.error",
+      properties: {
+        sessionID,
+        error: { name: "UnknownError", message: "boom" },
+      },
+    })
+
+    //#then
+    expect(removeTaskCalls).toContain(task.id)
+
+    manager.shutdown()
+    resetToastManager()
+  })
+
  test("ignores session.error for non-running tasks", () => {
    //#given
    const manager = createBackgroundManager()
@@ -2922,13 +3036,32 @@ describe("BackgroundManager.pruneStaleTasksAndNotifications - removes pruned tas

    manager.shutdown()
  })
+
+  test("removes stale task from toast manager", () => {
+    //#given
+    const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
+    const manager = createBackgroundManager()
+    const staleTask = createMockTask({
+      id: "task-stale-toast",
+      sessionID: "session-stale-toast",
+      parentSessionID: "parent-session",
+      status: "running",
+      startedAt: new Date(Date.now() - 31 * 60 * 1000),
+    })
+    getTaskMap(manager).set(staleTask.id, staleTask)
+
+    //#when
+    pruneStaleTasksAndNotificationsForTest(manager)
+
+    //#then
+    expect(removeTaskCalls).toContain(staleTask.id)
+
+    manager.shutdown()
+    resetToastManager()
+  })
 })

 describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
-  function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
-    return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
-  }
-
  function setCompletionTimer(manager: BackgroundManager, taskId: string): void {
    const completionTimers = getCompletionTimers(manager)
    const timer = setTimeout(() => {
@@ -3413,4 +3546,134 @@ describe("BackgroundManager.handleEvent - non-tool event lastUpdate", () => {
    //#then - task should still be running (text event refreshed lastUpdate)
    expect(task.status).toBe("running")
  })
+
+  test("should refresh lastUpdate on message.part.delta events (OpenCode >=1.2.0)", async () => {
+    //#given - a running task with stale lastUpdate
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async () => ({}),
+        abort: async () => ({}),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
+    stubNotifyParentSession(manager)
+
+    const task: BackgroundTask = {
+      id: "task-delta-1",
+      sessionID: "session-delta-1",
+      parentSessionID: "parent-1",
+      parentMessageID: "msg-1",
+      description: "Reasoning task with delta events",
+      prompt: "Extended thinking",
+      agent: "oracle",
+      status: "running",
+      startedAt: new Date(Date.now() - 600_000),
+      progress: {
+        toolCalls: 0,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    //#when - a message.part.delta event arrives (reasoning-delta or text-delta in OpenCode >=1.2.0)
+    manager.handleEvent({
+      type: "message.part.delta",
+      properties: { sessionID: "session-delta-1", field: "text", delta: "thinking..." },
+    })
+    await manager["checkAndInterruptStaleTasks"]()
+
+    //#then - task should still be running (delta event refreshed lastUpdate)
+    expect(task.status).toBe("running")
+  })
+})
+
+describe("BackgroundManager regression fixes - resume and aborted notification", () => {
+  test("should keep resumed task in memory after previous completion timer deadline", async () => {
+    //#given
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async () => ({}),
+        abort: async () => ({}),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+
+    const task: BackgroundTask = {
+      id: "task-resume-timer-regression",
+      sessionID: "session-resume-timer-regression",
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "resume timer regression",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+      concurrencyGroup: "explore",
+    }
+    getTaskMap(manager).set(task.id, task)
+
+    const completionTimers = getCompletionTimers(manager)
+    const timer = setTimeout(() => {
+      completionTimers.delete(task.id)
+      getTaskMap(manager).delete(task.id)
+    }, 25)
+    completionTimers.set(task.id, timer)
+
+    //#when
+    await manager.resume({
+      sessionId: "session-resume-timer-regression",
+      prompt: "resume task",
+      parentSessionID: "parent-session-2",
+      parentMessageID: "msg-2",
+    })
+    await new Promise((resolve) => setTimeout(resolve, 60))
+
+    //#then
+    expect(getTaskMap(manager).has(task.id)).toBe(true)
+    expect(completionTimers.has(task.id)).toBe(false)
+
+    manager.shutdown()
+  })
+
+  test("should start cleanup timer even when promptAsync aborts", async () => {
+    //#given
+    const client = {
+      session: {
+        prompt: async () => ({}),
+        promptAsync: async () => {
+          const error = new Error("User aborted")
+          error.name = "MessageAbortedError"
+          throw error
+        },
+        abort: async () => ({}),
+        messages: async () => ({ data: [] }),
+      },
+    }
+    const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
+    const task: BackgroundTask = {
+      id: "task-aborted-cleanup-regression",
+      sessionID: "session-aborted-cleanup-regression",
+      parentSessionID: "parent-session",
+      parentMessageID: "msg-1",
+      description: "aborted prompt cleanup regression",
+      prompt: "test",
+      agent: "explore",
+      status: "completed",
+      startedAt: new Date(),
+      completedAt: new Date(),
+    }
+    getTaskMap(manager).set(task.id, task)
+    getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
+
+    //#when
+    await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> }).notifyParentSession(task)
+
+    //#then
+    expect(getCompletionTimers(manager).has(task.id)).toBe(true)
+
+    manager.shutdown()
+  })
 })
--- a/src/features/background-agent/manager.ts
+++ b/src/features/background-agent/manager.ts
@@ -528,6 +528,12 @@ export class BackgroundManager {
      return existingTask
    }

+    const completionTimer = this.completionTimers.get(existingTask.id)
+    if (completionTimer) {
+      clearTimeout(completionTimer)
+      this.completionTimers.delete(existingTask.id)
+    }
+
    // Re-acquire concurrency using the persisted concurrency group
    const concurrencyKey = existingTask.concurrencyGroup ?? existingTask.agent
    await this.concurrencyManager.acquire(concurrencyKey)
@@ -660,7 +666,7 @@ export class BackgroundManager {
  handleEvent(event: Event): void {
    const props = event.properties

-    if (event.type === "message.part.updated") {
+    if (event.type === "message.part.updated" || event.type === "message.part.delta") {
      if (!props || typeof props !== "object" || !("sessionID" in props)) return
      const partInfo = props as unknown as MessagePartInfo
      const sessionID = partInfo?.sessionID
@@ -783,6 +789,10 @@ export class BackgroundManager {
      this.cleanupPendingByParent(task)
      this.tasks.delete(task.id)
      this.clearNotificationsForTask(task.id)
+      const toastManager = getTaskToastManager()
+      if (toastManager) {
+        toastManager.removeTask(task.id)
+      }
      if (task.sessionID) {
        subagentSessions.delete(task.sessionID)
      }
@@ -830,6 +840,10 @@ export class BackgroundManager {
        this.cleanupPendingByParent(task)
        this.tasks.delete(task.id)
        this.clearNotificationsForTask(task.id)
+        const toastManager = getTaskToastManager()
+        if (toastManager) {
+          toastManager.removeTask(task.id)
+        }
        if (task.sessionID) {
          subagentSessions.delete(task.sessionID)
        }
@@ -1000,6 +1014,10 @@ export class BackgroundManager {
    }

    if (options?.skipNotification) {
+      const toastManager = getTaskToastManager()
+      if (toastManager) {
+        toastManager.removeTask(task.id)
+      }
      log(`[background-agent] Task cancelled via ${source} (notification skipped):`, task.id)
      return true
    }
@@ -1239,11 +1257,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      }
    } catch (error) {
      if (this.isAbortedSessionError(error)) {
-        log("[background-agent] Parent session aborted, skipping notification:", {
+        log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
          taskId: task.id,
          parentSessionID: task.parentSessionID,
        })
-        return
      }
      const messageDir = getMessageDir(task.parentSessionID)
      const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
@@ -1277,13 +1294,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      })
    } catch (error) {
      if (this.isAbortedSessionError(error)) {
-        log("[background-agent] Parent session aborted, skipping notification:", {
+        log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
          taskId: task.id,
          parentSessionID: task.parentSessionID,
        })
-        return
+      } else {
+        log("[background-agent] Failed to send notification:", error)
      }
-      log("[background-agent] Failed to send notification:", error)
    }

    if (allComplete) {
@@ -1413,6 +1430,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
          }
        }
        this.clearNotificationsForTask(taskId)
+        const toastManager = getTaskToastManager()
+        if (toastManager) {
+          toastManager.removeTask(taskId)
+        }
        this.tasks.delete(taskId)
        if (task.sessionID) {
          subagentSessions.delete(task.sessionID)
@@ -1452,7 +1473,8 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
      const sessionID = task.sessionID
      if (!startedAt || !sessionID) continue

-      const sessionIsRunning = allStatuses[sessionID]?.type === "running"
+      const sessionStatus = allStatuses[sessionID]?.type
+      const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
      const runtime = now - startedAt.getTime()

      if (!task.progress?.lastUpdate) {
--- a/src/features/background-agent/task-poller.test.ts
+++ b/src/features/background-agent/task-poller.test.ts
@@ -146,14 +146,59 @@ describe("checkAndInterruptStaleTasks", () => {
      },
    })

-    //#when — session status is "running"
+    //#when — session status is "busy" (OpenCode's actual status for active LLM processing)
    await checkAndInterruptStaleTasks({
      tasks: [task],
      client: mockClient as never,
      config: { staleTimeoutMs: 180_000 },
      concurrencyManager: mockConcurrencyManager as never,
      notifyParentSession: mockNotify,
-      sessionStatuses: { "ses-1": { type: "running" } },
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — task should survive because session is actively busy
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt busy session task even with very old lastUpdate", async () => {
+    //#given — lastUpdate is 15min old, but session is still busy
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 900_000),
+      progress: {
+        toolCalls: 2,
+        lastUpdate: new Date(Date.now() - 900_000),
+      },
+    })
+
+    //#when — session busy, lastUpdate far exceeds any timeout
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000, messageStalenessTimeoutMs: 600_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — busy sessions are NEVER stale-killed (babysitter + TTL prune handle these)
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt busy session even with no progress (undefined lastUpdate)", async () => {
+    //#given — task has no progress at all, but session is busy
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 15 * 60 * 1000),
+      progress: undefined,
+    })
+
+    //#when — session is busy
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { messageStalenessTimeoutMs: 600_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
    })

    //#then — task should survive because session is actively running
@@ -255,6 +300,75 @@ describe("checkAndInterruptStaleTasks", () => {
    expect(task.error).toContain("Stale timeout")
  })

+  it("should NOT interrupt task when session is busy (OpenCode status), even if lastUpdate exceeds stale timeout", async () => {
+    //#given — lastUpdate is 5min old but session is "busy" (OpenCode's actual status for active sessions)
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 300_000),
+      progress: {
+        toolCalls: 2,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    })
+
+    //#when — session status is "busy" (not "running" — OpenCode uses "busy" for active LLM processing)
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — "busy" sessions must be protected from stale-kill
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt task when session is in retry state", async () => {
+    //#given — lastUpdate is 5min old but session is retrying
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 300_000),
+      progress: {
+        toolCalls: 1,
+        lastUpdate: new Date(Date.now() - 300_000),
+      },
+    })
+
+    //#when — session status is "retry" (OpenCode retries on transient API errors)
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { staleTimeoutMs: 180_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "retry" } },
+    })
+
+    //#then — retry sessions must be protected from stale-kill
+    expect(task.status).toBe("running")
+  })
+
+  it("should NOT interrupt busy session even with no progress (undefined lastUpdate)", async () => {
+    //#given — no progress at all, session is "busy" (thinking model with no streamed tokens yet)
+    const task = createRunningTask({
+      startedAt: new Date(Date.now() - 15 * 60 * 1000),
+      progress: undefined,
+    })
+
+    //#when — session is busy
+    await checkAndInterruptStaleTasks({
+      tasks: [task],
+      client: mockClient as never,
+      config: { messageStalenessTimeoutMs: 600_000 },
+      concurrencyManager: mockConcurrencyManager as never,
+      notifyParentSession: mockNotify,
+      sessionStatuses: { "ses-1": { type: "busy" } },
+    })
+
+    //#then — busy sessions with no progress must survive
+    expect(task.status).toBe("running")
+  })
+
  it("should release concurrency key when interrupting a never-updated task", async () => {
    //#given
    const releaseMock = mock(() => {})
--- a/src/features/background-agent/task-poller.ts
+++ b/src/features/background-agent/task-poller.ts
@@ -80,7 +80,8 @@ export async function checkAndInterruptStaleTasks(args: {
    const sessionID = task.sessionID
    if (!startedAt || !sessionID) continue

-    const sessionIsRunning = sessionStatuses?.[sessionID]?.type === "running"
+    const sessionStatus = sessionStatuses?.[sessionID]?.type
+    const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
    const runtime = now - startedAt.getTime()

    if (!task.progress?.lastUpdate) {
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.test.ts
@@ -0,0 +1,105 @@
+import { beforeEach, describe, expect, mock, test } from "bun:test"
+import type { PluginInput } from "@opencode-ai/plugin"
+
+const executeCompactMock = mock(async () => {})
+const getLastAssistantMock = mock(async () => ({
+  providerID: "anthropic",
+  modelID: "claude-sonnet-4-5",
+}))
+const parseAnthropicTokenLimitErrorMock = mock(() => ({
+  providerID: "anthropic",
+  modelID: "claude-sonnet-4-5",
+}))
+
+mock.module("./executor", () => ({
+  executeCompact: executeCompactMock,
+  getLastAssistant: getLastAssistantMock,
+}))
+
+mock.module("./parser", () => ({
+  parseAnthropicTokenLimitError: parseAnthropicTokenLimitErrorMock,
+}))
+
+mock.module("../../shared/logger", () => ({
+  log: () => {},
+}))
+
+function createMockContext(): PluginInput {
+  return {
+    client: {
+      session: {
+        messages: mock(() => Promise.resolve({ data: [] })),
+      },
+      tui: {
+        showToast: mock(() => Promise.resolve()),
+      },
+    },
+    directory: "/tmp",
+  } as PluginInput
+}
+
+function setupDelayedTimeoutMocks(): {
+  restore: () => void
+  getClearTimeoutCalls: () => Array<ReturnType<typeof setTimeout>>
+} {
+  const originalSetTimeout = globalThis.setTimeout
+  const originalClearTimeout = globalThis.clearTimeout
+  const clearTimeoutCalls: Array<ReturnType<typeof setTimeout>> = []
+  let timeoutCounter = 0
+
+  globalThis.setTimeout = ((_: () => void, _delay?: number) => {
+    timeoutCounter += 1
+    return timeoutCounter as ReturnType<typeof setTimeout>
+  }) as typeof setTimeout
+
+  globalThis.clearTimeout = ((timeoutID: ReturnType<typeof setTimeout>) => {
+    clearTimeoutCalls.push(timeoutID)
+  }) as typeof clearTimeout
+
+  return {
+    restore: () => {
+      globalThis.setTimeout = originalSetTimeout
+      globalThis.clearTimeout = originalClearTimeout
+    },
+    getClearTimeoutCalls: () => clearTimeoutCalls,
+  }
+}
+
+describe("createAnthropicContextWindowLimitRecoveryHook", () => {
+  beforeEach(() => {
+    executeCompactMock.mockClear()
+    getLastAssistantMock.mockClear()
+    parseAnthropicTokenLimitErrorMock.mockClear()
+  })
+
+  test("cancels pending timer when session.idle handles compaction first", async () => {
+    //#given
+    const { restore, getClearTimeoutCalls } = setupDelayedTimeoutMocks()
+    const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
+    const hook = createAnthropicContextWindowLimitRecoveryHook(createMockContext())
+
+    try {
+      //#when
+      await hook.event({
+        event: {
+          type: "session.error",
+          properties: { sessionID: "session-race", error: "prompt is too long" },
+        },
+      })
+
+      await hook.event({
+        event: {
+          type: "session.idle",
+          properties: { sessionID: "session-race" },
+        },
+      })
+
+      //#then
+      expect(getClearTimeoutCalls()).toEqual([1 as ReturnType<typeof setTimeout>])
+      expect(executeCompactMock).toHaveBeenCalledTimes(1)
+      expect(executeCompactMock.mock.calls[0]?.[0]).toBe("session-race")
+    } finally {
+      restore()
+    }
+  })
+})
--- a/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
+++ b/src/hooks/anthropic-context-window-limit-recovery/recovery-hook.ts
@@ -28,6 +28,7 @@ export function createAnthropicContextWindowLimitRecoveryHook(
 ) {
  const autoCompactState = createRecoveryState()
  const experimental = options?.experimental
+  const pendingCompactionTimeoutBySession = new Map<string, ReturnType<typeof setTimeout>>()

  const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
    const props = event.properties as Record<string, unknown> | undefined
@@ -35,6 +36,12 @@ export function createAnthropicContextWindowLimitRecoveryHook(
    if (event.type === "session.deleted") {
      const sessionInfo = props?.info as { id?: string } | undefined
      if (sessionInfo?.id) {
+        const timeoutID = pendingCompactionTimeoutBySession.get(sessionInfo.id)
+        if (timeoutID !== undefined) {
+          clearTimeout(timeoutID)
+          pendingCompactionTimeoutBySession.delete(sessionInfo.id)
+        }
+
        autoCompactState.pendingCompact.delete(sessionInfo.id)
        autoCompactState.errorDataBySession.delete(sessionInfo.id)
        autoCompactState.retryStateBySession.delete(sessionInfo.id)
@@ -76,7 +83,8 @@ export function createAnthropicContextWindowLimitRecoveryHook(
          })
          .catch(() => {})

-        setTimeout(() => {
+        const timeoutID = setTimeout(() => {
+          pendingCompactionTimeoutBySession.delete(sessionID)
          executeCompact(
            sessionID,
            { providerID, modelID },
@@ -86,6 +94,8 @@ export function createAnthropicContextWindowLimitRecoveryHook(
            experimental,
          )
        }, 300)
+
+        pendingCompactionTimeoutBySession.set(sessionID, timeoutID)
      }
      return
    }
@@ -114,6 +124,12 @@ export function createAnthropicContextWindowLimitRecoveryHook(

      if (!autoCompactState.pendingCompact.has(sessionID)) return

+      const timeoutID = pendingCompactionTimeoutBySession.get(sessionID)
+      if (timeoutID !== undefined) {
+        clearTimeout(timeoutID)
+        pendingCompactionTimeoutBySession.delete(sessionID)
+      }
+
      const errorData = autoCompactState.errorDataBySession.get(sessionID)
      const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)

--- a/src/hooks/preemptive-compaction.test.ts
+++ b/src/hooks/preemptive-compaction.test.ts
@@ -1,5 +1,12 @@
 import { describe, it, expect, mock, beforeEach } from "bun:test"
-import { createPreemptiveCompactionHook } from "./preemptive-compaction"
+
+const logMock = mock(() => {})
+
+mock.module("../shared/logger", () => ({
+  log: logMock,
+}))
+
+const { createPreemptiveCompactionHook } = await import("./preemptive-compaction")

 function createMockCtx() {
  return {
@@ -21,6 +28,7 @@ describe("preemptive-compaction", () => {

  beforeEach(() => {
    ctx = createMockCtx()
+    logMock.mockClear()
  })

  // #given event caches token info from message.updated
@@ -152,4 +160,45 @@ describe("preemptive-compaction", () => {

    expect(ctx.client.session.summarize).not.toHaveBeenCalled()
  })
+
+  it("should log summarize errors instead of swallowing them", async () => {
+    //#given
+    const hook = createPreemptiveCompactionHook(ctx as never)
+    const sessionID = "ses_log_error"
+    const summarizeError = new Error("summarize failed")
+    ctx.client.session.summarize.mockRejectedValueOnce(summarizeError)
+
+    await hook.event({
+      event: {
+        type: "message.updated",
+        properties: {
+          info: {
+            role: "assistant",
+            sessionID,
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-5",
+            finish: true,
+            tokens: {
+              input: 170000,
+              output: 0,
+              reasoning: 0,
+              cache: { read: 10000, write: 0 },
+            },
+          },
+        },
+      },
+    })
+
+    //#when
+    await hook["tool.execute.after"](
+      { tool: "bash", sessionID, callID: "call_log" },
+      { title: "", output: "test", metadata: null }
+    )
+
+    //#then
+    expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", {
+      sessionID,
+      error: String(summarizeError),
+    })
+  })
 })
--- a/src/hooks/preemptive-compaction.ts
+++ b/src/hooks/preemptive-compaction.ts
@@ -1,3 +1,5 @@
+import { log } from "../shared/logger"
+
 const DEFAULT_ACTUAL_LIMIT = 200_000

 const ANTHROPIC_ACTUAL_LIMIT =
@@ -76,8 +78,8 @@ export function createPreemptiveCompactionHook(ctx: PluginInput) {
      })

      compactedSessions.add(sessionID)
-    } catch {
-      // best-effort; do not disrupt tool execution
+    } catch (error) {
+      log("[preemptive-compaction] Compaction failed", { sessionID, error: String(error) })
    } finally {
      compactionInProgress.delete(sessionID)
    }
--- a/src/hooks/todo-continuation-enforcer/constants.ts
+++ b/src/hooks/todo-continuation-enforcer/constants.ts
@@ -18,3 +18,5 @@ export const COUNTDOWN_GRACE_PERIOD_MS = 500

 export const ABORT_WINDOW_MS = 3000
 export const CONTINUATION_COOLDOWN_MS = 30_000
+export const MAX_CONSECUTIVE_FAILURES = 5
+export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000
--- a/src/hooks/todo-continuation-enforcer/continuation-injection.ts
+++ b/src/hooks/todo-continuation-enforcer/continuation-injection.ts
@@ -141,11 +141,14 @@ ${todoList}`
    if (injectionState) {
      injectionState.inFlight = false
      injectionState.lastInjectedAt = Date.now()
+      injectionState.consecutiveFailures = 0
    }
  } catch (error) {
    log(`[${HOOK_NAME}] Injection failed`, { sessionID, error: String(error) })
    if (injectionState) {
      injectionState.inFlight = false
+      injectionState.lastInjectedAt = Date.now()
+      injectionState.consecutiveFailures = (injectionState.consecutiveFailures ?? 0) + 1
    }
  }
 }
--- a/src/hooks/todo-continuation-enforcer/idle-event.ts
+++ b/src/hooks/todo-continuation-enforcer/idle-event.ts
@@ -8,7 +8,9 @@ import {
  ABORT_WINDOW_MS,
  CONTINUATION_COOLDOWN_MS,
  DEFAULT_SKIP_AGENTS,
+  FAILURE_RESET_WINDOW_MS,
  HOOK_NAME,
+  MAX_CONSECUTIVE_FAILURES,
 } from "./constants"
 import { isLastAssistantMessageAborted } from "./abort-detection"
 import { getIncompleteCount } from "./todo"
@@ -99,8 +101,35 @@ export async function handleSessionIdle(args: {
    return
  }

-  if (state.lastInjectedAt && Date.now() - state.lastInjectedAt < CONTINUATION_COOLDOWN_MS) {
-    log(`[${HOOK_NAME}] Skipped: cooldown active`, { sessionID })
+  if (
+    state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES
+    && state.lastInjectedAt
+    && Date.now() - state.lastInjectedAt >= FAILURE_RESET_WINDOW_MS
+  ) {
+    state.consecutiveFailures = 0
+    log(`[${HOOK_NAME}] Reset consecutive failures after recovery window`, {
+      sessionID,
+      failureResetWindowMs: FAILURE_RESET_WINDOW_MS,
+    })
+  }
+
+  if (state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
+    log(`[${HOOK_NAME}] Skipped: max consecutive failures reached`, {
+      sessionID,
+      consecutiveFailures: state.consecutiveFailures,
+      maxConsecutiveFailures: MAX_CONSECUTIVE_FAILURES,
+    })
+    return
+  }
+
+  const effectiveCooldown =
+    CONTINUATION_COOLDOWN_MS * Math.pow(2, Math.min(state.consecutiveFailures, 5))
+  if (state.lastInjectedAt && Date.now() - state.lastInjectedAt < effectiveCooldown) {
+    log(`[${HOOK_NAME}] Skipped: cooldown active`, {
+      sessionID,
+      effectiveCooldown,
+      consecutiveFailures: state.consecutiveFailures,
+    })
    return
  }

--- a/src/hooks/todo-continuation-enforcer/session-state.ts
+++ b/src/hooks/todo-continuation-enforcer/session-state.ts
@@ -45,7 +45,9 @@ export function createSessionStateStore(): SessionStateStore {
      return existing.state
    }

-    const state: SessionState = {}
+    const state: SessionState = {
+      consecutiveFailures: 0,
+    }
    sessions.set(sessionID, { state, lastAccessedAt: Date.now() })
    return state
  }
--- a/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
+++ b/src/hooks/todo-continuation-enforcer/todo-continuation-enforcer.test.ts
@@ -4,7 +4,11 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"
 import type { BackgroundManager } from "../../features/background-agent"
 import { setMainSession, subagentSessions, _resetForTesting } from "../../features/claude-code-session-state"
 import { createTodoContinuationEnforcer } from "."
-import { CONTINUATION_COOLDOWN_MS } from "./constants"
+import {
+  CONTINUATION_COOLDOWN_MS,
+  FAILURE_RESET_WINDOW_MS,
+  MAX_CONSECUTIVE_FAILURES,
+} from "./constants"

 type TimerCallback = (...args: any[]) => void

@@ -164,6 +168,15 @@ describe("todo-continuation-enforcer", () => {
    }
  }

+  interface PromptRequestOptions {
+    path: { id: string }
+    body: {
+      agent?: string
+      model?: { providerID?: string; modelID?: string }
+      parts: Array<{ text: string }>
+    }
+  }
+
  let mockMessages: MockMessage[] = []

  function createMockPluginInput() {
@@ -551,6 +564,164 @@ describe("todo-continuation-enforcer", () => {
    expect(promptCalls).toHaveLength(2)
  }, { timeout: 15000 })

+  test("should apply cooldown even after injection failure", async () => {
+    //#given
+    const sessionID = "main-failure-cooldown"
+    setMainSession(sessionID)
+    const mockInput = createMockPluginInput()
+    mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
+      promptCalls.push({
+        sessionID: opts.path.id,
+        agent: opts.body.agent,
+        model: opts.body.model,
+        text: opts.body.parts[0].text,
+      })
+      throw new Error("simulated auth failure")
+    }
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    //#when
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+
+    //#then
+    expect(promptCalls).toHaveLength(1)
+  })
+
+  test("should stop retries after max consecutive failures", async () => {
+    //#given
+    const sessionID = "main-max-consecutive-failures"
+    setMainSession(sessionID)
+    const mockInput = createMockPluginInput()
+    mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
+      promptCalls.push({
+        sessionID: opts.path.id,
+        agent: opts.body.agent,
+        model: opts.body.model,
+        text: opts.body.parts[0].text,
+      })
+      throw new Error("simulated auth failure")
+    }
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    //#when
+    for (let index = 0; index < MAX_CONSECUTIVE_FAILURES; index++) {
+      await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+      await fakeTimers.advanceBy(2500, true)
+      if (index < MAX_CONSECUTIVE_FAILURES - 1) {
+        await fakeTimers.advanceClockBy(1_000_000)
+      }
+    }
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+
+    //#then
+    expect(promptCalls).toHaveLength(MAX_CONSECUTIVE_FAILURES)
+  }, { timeout: 30000 })
+
+  test("should resume retries after reset window when max failures reached", async () => {
+    //#given
+    const sessionID = "main-recovery-after-max-failures"
+    setMainSession(sessionID)
+    const mockInput = createMockPluginInput()
+    mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
+      promptCalls.push({
+        sessionID: opts.path.id,
+        agent: opts.body.agent,
+        model: opts.body.model,
+        text: opts.body.parts[0].text,
+      })
+      throw new Error("simulated auth failure")
+    }
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    //#when
+    for (let index = 0; index < MAX_CONSECUTIVE_FAILURES; index++) {
+      await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+      await fakeTimers.advanceBy(2500, true)
+      if (index < MAX_CONSECUTIVE_FAILURES - 1) {
+        await fakeTimers.advanceClockBy(1_000_000)
+      }
+    }
+
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+
+    await fakeTimers.advanceClockBy(FAILURE_RESET_WINDOW_MS)
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+
+    //#then
+    expect(promptCalls).toHaveLength(MAX_CONSECUTIVE_FAILURES + 1)
+  }, { timeout: 30000 })
+
+  test("should increase cooldown exponentially after consecutive failures", async () => {
+    //#given
+    const sessionID = "main-exponential-backoff"
+    setMainSession(sessionID)
+    const mockInput = createMockPluginInput()
+    mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
+      promptCalls.push({
+        sessionID: opts.path.id,
+        agent: opts.body.agent,
+        model: opts.body.model,
+        text: opts.body.parts[0].text,
+      })
+      throw new Error("simulated auth failure")
+    }
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    //#when
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+    await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS)
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+    await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS)
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+
+    //#then
+    expect(promptCalls).toHaveLength(2)
+  }, { timeout: 30000 })
+
+  test("should reset consecutive failure count after successful injection", async () => {
+    //#given
+    const sessionID = "main-reset-consecutive-failures"
+    setMainSession(sessionID)
+    let shouldFail = true
+    const mockInput = createMockPluginInput()
+    mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
+      promptCalls.push({
+        sessionID: opts.path.id,
+        agent: opts.body.agent,
+        model: opts.body.model,
+        text: opts.body.parts[0].text,
+      })
+      if (shouldFail) {
+        shouldFail = false
+        throw new Error("simulated auth failure")
+      }
+      return {}
+    }
+    const hook = createTodoContinuationEnforcer(mockInput, {})
+
+    //#when
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+    await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS * 2)
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+    await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS)
+    await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
+    await fakeTimers.advanceBy(2500, true)
+
+    //#then
+    expect(promptCalls).toHaveLength(3)
+  }, { timeout: 30000 })
+
  test("should keep injecting even when todos remain unchanged across cycles", async () => {
    //#given
    const sessionID = "main-no-stagnation-cap"
--- a/src/hooks/todo-continuation-enforcer/types.ts
+++ b/src/hooks/todo-continuation-enforcer/types.ts
@@ -29,6 +29,7 @@ export interface SessionState {
  abortDetectedAt?: number
  lastInjectedAt?: number
  inFlight?: boolean
+  consecutiveFailures: number
 }

 export interface MessageInfo {
--- a/src/plugin/chat-message.test.ts
+++ b/src/plugin/chat-message.test.ts
@@ -0,0 +1,118 @@
+import { describe, test, expect } from "bun:test"
+
+import { createChatMessageHandler } from "./chat-message"
+
+type ChatMessagePart = { type: string; text?: string; [key: string]: unknown }
+type ChatMessageHandlerOutput = { message: Record<string, unknown>; parts: ChatMessagePart[] }
+
+function createMockHandlerArgs(overrides?: {
+  pluginConfig?: Record<string, unknown>
+  shouldOverride?: boolean
+}) {
+  const appliedSessions: string[] = []
+  return {
+    ctx: { client: { tui: { showToast: async () => {} } } } as any,
+    pluginConfig: (overrides?.pluginConfig ?? {}) as any,
+    firstMessageVariantGate: {
+      shouldOverride: () => overrides?.shouldOverride ?? false,
+      markApplied: (sessionID: string) => { appliedSessions.push(sessionID) },
+    },
+    hooks: {
+      stopContinuationGuard: null,
+      keywordDetector: null,
+      claudeCodeHooks: null,
+      autoSlashCommand: null,
+      startWork: null,
+      ralphLoop: null,
+    } as any,
+    _appliedSessions: appliedSessions,
+  }
+}
+
+function createMockInput(agent?: string, model?: { providerID: string; modelID: string }) {
+  return {
+    sessionID: "test-session",
+    agent,
+    model,
+  }
+}
+
+function createMockOutput(variant?: string): ChatMessageHandlerOutput {
+  const message: Record<string, unknown> = {}
+  if (variant !== undefined) {
+    message["variant"] = variant
+  }
+  return { message, parts: [] }
+}
+
+describe("createChatMessageHandler - first message variant", () => {
+  test("first message: sets variant from fallback chain when user has no selection", async () => {
+    //#given - first message, no user-selected variant, hephaestus with medium in chain
+    const args = createMockHandlerArgs({ shouldOverride: true })
+    const handler = createChatMessageHandler(args)
+    const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
+    const output = createMockOutput() // no variant set
+
+    //#when
+    await handler(input, output)
+
+    //#then - should set variant from fallback chain
+    expect(output.message["variant"]).toBeDefined()
+  })
+
+  test("first message: preserves user-selected variant when already set", async () => {
+    //#given - first message, user already selected "xhigh" variant in OpenCode UI
+    const args = createMockHandlerArgs({ shouldOverride: true })
+    const handler = createChatMessageHandler(args)
+    const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
+    const output = createMockOutput("xhigh") // user selected xhigh
+
+    //#when
+    await handler(input, output)
+
+    //#then - user's xhigh must be preserved, not overwritten to "medium"
+    expect(output.message["variant"]).toBe("xhigh")
+  })
+
+  test("first message: preserves user-selected 'high' variant", async () => {
+    //#given - user selected "high" variant
+    const args = createMockHandlerArgs({ shouldOverride: true })
+    const handler = createChatMessageHandler(args)
+    const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
+    const output = createMockOutput("high")
+
+    //#when
+    await handler(input, output)
+
+    //#then
+    expect(output.message["variant"]).toBe("high")
+  })
+
+  test("subsequent message: does not override existing variant", async () => {
+    //#given - not first message, variant already set
+    const args = createMockHandlerArgs({ shouldOverride: false })
+    const handler = createChatMessageHandler(args)
+    const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
+    const output = createMockOutput("xhigh")
+
+    //#when
+    await handler(input, output)
+
+    //#then
+    expect(output.message["variant"]).toBe("xhigh")
+  })
+
+  test("first message: marks gate as applied regardless of variant presence", async () => {
+    //#given - first message with user-selected variant
+    const args = createMockHandlerArgs({ shouldOverride: true })
+    const handler = createChatMessageHandler(args)
+    const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
+    const output = createMockOutput("xhigh")
+
+    //#when
+    await handler(input, output)
+
+    //#then - gate should still be marked as applied
+    expect(args._appliedSessions).toContain("test-session")
+  })
+})
--- a/src/plugin/chat-message.ts
+++ b/src/plugin/chat-message.ts
@@ -56,12 +56,14 @@ export function createChatMessageHandler(args: {
    const message = output.message

    if (firstMessageVariantGate.shouldOverride(input.sessionID)) {
-      const variant =
-        input.model && input.agent
-          ? resolveVariantForModel(pluginConfig, input.agent, input.model)
-          : resolveAgentVariant(pluginConfig, input.agent)
-      if (variant !== undefined) {
-        message["variant"] = variant
+      if (message["variant"] === undefined) {
+        const variant =
+          input.model && input.agent
+            ? resolveVariantForModel(pluginConfig, input.agent, input.model)
+            : resolveAgentVariant(pluginConfig, input.agent)
+        if (variant !== undefined) {
+          message["variant"] = variant
+        }
      }
      firstMessageVariantGate.markApplied(input.sessionID)
    } else {
--- a/src/shared/model-requirements.test.ts
+++ b/src/shared/model-requirements.test.ts
@@ -241,19 +241,32 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
    expect(primary.providers[0]).toBe("openai")
  })

-  test("visual-engineering has valid fallbackChain with gemini-3-pro as primary", () => {
+  test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => {
    // given - visual-engineering category requirement
    const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"]

    // when - accessing visual-engineering requirement
-    // then - fallbackChain exists with gemini-3-pro as first entry
+    // then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max) → k2p5
    expect(visualEngineering).toBeDefined()
    expect(visualEngineering.fallbackChain).toBeArray()
-    expect(visualEngineering.fallbackChain.length).toBeGreaterThan(0)
+    expect(visualEngineering.fallbackChain).toHaveLength(4)

    const primary = visualEngineering.fallbackChain[0]
    expect(primary.providers[0]).toBe("google")
    expect(primary.model).toBe("gemini-3-pro")
+    expect(primary.variant).toBe("high")
+
+    const second = visualEngineering.fallbackChain[1]
+    expect(second.providers[0]).toBe("zai-coding-plan")
+    expect(second.model).toBe("glm-5")
+
+    const third = visualEngineering.fallbackChain[2]
+    expect(third.model).toBe("claude-opus-4-6")
+    expect(third.variant).toBe("max")
+
+    const fourth = visualEngineering.fallbackChain[3]
+    expect(fourth.providers[0]).toBe("kimi-for-coding")
+    expect(fourth.model).toBe("k2p5")
  })

  test("quick has valid fallbackChain with claude-haiku-4-5 as primary", () => {
@@ -318,19 +331,23 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
    expect(primary.providers[0]).toBe("google")
  })

-  test("writing has valid fallbackChain with gemini-3-flash as primary", () => {
+  test("writing has valid fallbackChain with k2p5 as primary (kimi-for-coding)", () => {
    // given - writing category requirement
    const writing = CATEGORY_MODEL_REQUIREMENTS["writing"]

    // when - accessing writing requirement
-    // then - fallbackChain exists with gemini-3-flash as first entry
+    // then - fallbackChain: k2p5 → gemini-3-flash → claude-sonnet-4-5
    expect(writing).toBeDefined()
    expect(writing.fallbackChain).toBeArray()
-    expect(writing.fallbackChain.length).toBeGreaterThan(0)
+    expect(writing.fallbackChain).toHaveLength(3)

    const primary = writing.fallbackChain[0]
-    expect(primary.model).toBe("gemini-3-flash")
-    expect(primary.providers[0]).toBe("google")
+    expect(primary.model).toBe("k2p5")
+    expect(primary.providers[0]).toBe("kimi-for-coding")
+
+    const second = writing.fallbackChain[1]
+    expect(second.model).toBe("gemini-3-flash")
+    expect(second.providers[0]).toBe("google")
  })

  test("all 8 categories have valid fallbackChain arrays", () => {
--- a/src/shared/model-requirements.ts
+++ b/src/shared/model-requirements.ts
@@ -100,9 +100,10 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
 export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
  "visual-engineering": {
    fallbackChain: [
-      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
+      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
+      { providers: ["zai-coding-plan"], model: "glm-5" },
      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
-      { providers: ["zai-coding-plan"], model: "glm-4.7" },
+      { providers: ["kimi-for-coding"], model: "k2p5" },
    ],
  },
  ultrabrain: {
@@ -151,10 +152,9 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
  },
  writing: {
    fallbackChain: [
+      { providers: ["kimi-for-coding"], model: "k2p5" },
      { providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
      { providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
-      { providers: ["zai-coding-plan"], model: "glm-4.7" },
-      { providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
    ],
  },
 }
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@@ -162,6 +162,16 @@ Approach:
 - Draft with care
 - Polish for clarity and impact
 - Documentation, READMEs, articles, technical writing
+
+ANTI-AI-SLOP RULES (NON-NEGOTIABLE):
+- NEVER use em dashes (—) or en dashes (–). Use commas, periods, ellipses, or line breaks instead. Zero tolerance.
+- Remove AI-sounding phrases: "delve", "it's important to note", "I'd be happy to", "certainly", "please don't hesitate", "leverage", "utilize", "in order to", "moving forward", "circle back", "at the end of the day", "robust", "streamline", "facilitate"
+- Pick plain words. "Use" not "utilize". "Start" not "commence". "Help" not "facilitate".
+- Use contractions naturally: "don't" not "do not", "it's" not "it is".
+- Vary sentence length. Don't make every sentence the same length.
+- NEVER start consecutive sentences with the same word.
+- No filler openings: skip "In today's world...", "As we all know...", "It goes without saying..."
+- Write like a human, not a corporate template.
 </Category_Context>`

 export const DEEP_CATEGORY_PROMPT_APPEND = `<Category_Context>
@@ -198,14 +208,14 @@ You are NOT an interactive assistant. You are an autonomous problem-solver.


 export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
-  "visual-engineering": { model: "google/gemini-3-pro" },
+  "visual-engineering": { model: "google/gemini-3-pro", variant: "high" },
  ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" },
  deep: { model: "openai/gpt-5.3-codex", variant: "medium" },
  artistry: { model: "google/gemini-3-pro", variant: "high" },
  quick: { model: "anthropic/claude-haiku-4-5" },
  "unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
  "unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" },
-  writing: { model: "google/gemini-3-flash" },
+  writing: { model: "kimi-for-coding/k2p5" },
 }

 export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -67,13 +67,14 @@ describe("sisyphus-task", () => {
  })

  describe("DEFAULT_CATEGORIES", () => {
-    test("visual-engineering category has model config", () => {
+    test("visual-engineering category has model and variant config", () => {
      // given
      const category = DEFAULT_CATEGORIES["visual-engineering"]

      // when / #then
      expect(category).toBeDefined()
      expect(category.model).toBe("google/gemini-3-pro")
+      expect(category.variant).toBe("high")
    })

    test("ultrabrain category has model and variant config", () => {
@@ -1713,17 +1714,19 @@ describe("sisyphus-task", () => {
      const { createDelegateTask } = require("./tools")
      let launchCalled = false
      
+      const launchedTask = {
+        id: "task-unstable",
+        sessionID: "ses_unstable_gemini",
+        description: "Unstable gemini task",
+        agent: "sisyphus-junior",
+        status: "running",
+      }
      const mockManager = {
        launch: async () => {
          launchCalled = true
-          return {
-            id: "task-unstable",
-            sessionID: "ses_unstable_gemini",
-            description: "Unstable gemini task",
-            agent: "sisyphus-junior",
-            status: "running",
-          }
+          return launchedTask
        },
+        getTask: () => launchedTask,
      }
      
       const mockClient = {
@@ -1838,17 +1841,19 @@ describe("sisyphus-task", () => {
      const { createDelegateTask } = require("./tools")
      let launchCalled = false

+      const launchedTask = {
+        id: "task-unstable-minimax",
+        sessionID: "ses_unstable_minimax",
+        description: "Unstable minimax task",
+        agent: "sisyphus-junior",
+        status: "running",
+      }
      const mockManager = {
        launch: async () => {
          launchCalled = true
-          return {
-            id: "task-unstable-minimax",
-            sessionID: "ses_unstable_minimax",
-            description: "Unstable minimax task",
-            agent: "sisyphus-junior",
-            status: "running",
-          }
+          return launchedTask
        },
+        getTask: () => launchedTask,
      }

       const mockClient = {
@@ -1972,17 +1977,19 @@ describe("sisyphus-task", () => {
      const { createDelegateTask } = require("./tools")
      let launchCalled = false
      
+      const launchedTask = {
+        id: "task-artistry",
+        sessionID: "ses_artistry_gemini",
+        description: "Artistry gemini task",
+        agent: "sisyphus-junior",
+        status: "running",
+      }
      const mockManager = {
        launch: async () => {
          launchCalled = true
-          return {
-            id: "task-artistry",
-            sessionID: "ses_artistry_gemini",
-            description: "Artistry gemini task",
-            agent: "sisyphus-junior",
-            status: "running",
-          }
+          return launchedTask
        },
+        getTask: () => launchedTask,
      }
      
       const mockClient = {
@@ -2038,17 +2045,19 @@ describe("sisyphus-task", () => {
      const { createDelegateTask } = require("./tools")
      let launchCalled = false
      
+      const launchedTask = {
+        id: "task-writing",
+        sessionID: "ses_writing_gemini",
+        description: "Writing gemini task",
+        agent: "sisyphus-junior",
+        status: "running",
+      }
      const mockManager = {
        launch: async () => {
          launchCalled = true
-          return {
-            id: "task-writing",
-            sessionID: "ses_writing_gemini",
-            description: "Writing gemini task",
-            agent: "sisyphus-junior",
-            status: "running",
-          }
+          return launchedTask
        },
+        getTask: () => launchedTask,
      }
      
       const mockClient = {
@@ -2104,17 +2113,19 @@ describe("sisyphus-task", () => {
      const { createDelegateTask } = require("./tools")
      let launchCalled = false
      
+      const launchedTask = {
+        id: "task-custom-unstable",
+        sessionID: "ses_custom_unstable",
+        description: "Custom unstable task",
+        agent: "sisyphus-junior",
+        status: "running",
+      }
      const mockManager = {
        launch: async () => {
          launchCalled = true
-          return {
-            id: "task-custom-unstable",
-            sessionID: "ses_custom_unstable",
-            description: "Custom unstable task",
-            agent: "sisyphus-junior",
-            status: "running",
-          }
+          return launchedTask
        },
+        getTask: () => launchedTask,
      }
      
      const mockClient = {
@@ -2793,7 +2804,7 @@ describe("sisyphus-task", () => {
        {
          name: "writing",
          description: "Documentation, prose, technical writing",
-          model: "google/gemini-3-flash",
+          model: "kimi-for-coding/k2p5",
        },
      ]
      const availableSkills = [
--- a/src/tools/delegate-task/unstable-agent-task.test.ts
+++ b/src/tools/delegate-task/unstable-agent-task.test.ts
@@ -0,0 +1,224 @@
+const { describe, test, expect, beforeEach, afterEach, mock } = require("bun:test")
+
+describe("executeUnstableAgentTask - interrupt detection", () => {
+  beforeEach(() => {
+    //#given - configure fast timing for all tests
+    const { __setTimingConfig } = require("./timing")
+    __setTimingConfig({
+      POLL_INTERVAL_MS: 10,
+      MIN_STABILITY_TIME_MS: 0,
+      STABILITY_POLLS_REQUIRED: 1,
+      MAX_POLL_TIME_MS: 500,
+      WAIT_FOR_SESSION_TIMEOUT_MS: 100,
+      WAIT_FOR_SESSION_INTERVAL_MS: 10,
+    })
+  })
+
+  afterEach(() => {
+    //#given - reset timing after each test
+    const { __resetTimingConfig } = require("./timing")
+    __resetTimingConfig()
+    mock.restore()
+  })
+
+  test("should return error immediately when background task becomes interrupted during polling", async () => {
+    //#given - a background task that gets interrupted on first poll check
+    const taskState = {
+      id: "bg_test_interrupt",
+      sessionID: "ses_test_interrupt",
+      status: "interrupt" as string,
+      description: "test interrupted task",
+      prompt: "test prompt",
+      agent: "sisyphus-junior",
+      error: "Agent not found" as string | undefined,
+    }
+
+    const launchState = { ...taskState, status: "running" as string, error: undefined as string | undefined }
+
+    const mockManager = {
+      launch: async () => launchState,
+      getTask: () => taskState,
+    }
+
+    const mockClient = {
+      session: {
+        status: async () => ({ data: { [taskState.sessionID!]: { type: "idle" } } }),
+        messages: async () => ({ data: [] }),
+      },
+    }
+
+    const { executeUnstableAgentTask } = require("./unstable-agent-task")
+
+    const args = {
+      prompt: "test prompt",
+      description: "test task",
+      category: "test",
+      load_skills: [],
+      run_in_background: false,
+    }
+
+    const mockCtx = {
+      sessionID: "parent-session",
+      callID: "call-123",
+      metadata: () => {},
+    }
+
+    const mockExecutorCtx = {
+      manager: mockManager,
+      client: mockClient,
+      directory: "/tmp",
+    }
+
+    const parentContext = {
+      sessionID: "parent-session",
+      messageID: "msg-123",
+    }
+
+    //#when - executeUnstableAgentTask encounters an interrupted task
+    const startTime = Date.now()
+    const result = await executeUnstableAgentTask(
+      args, mockCtx, mockExecutorCtx, parentContext,
+      "test-agent", undefined, undefined, "test-model"
+    )
+    const elapsed = Date.now() - startTime
+
+    //#then - should return quickly with interrupt error, not hang until MAX_POLL_TIME_MS
+    expect(result).toContain("interrupt")
+    expect(result.toLowerCase()).toContain("agent not found")
+    expect(elapsed).toBeLessThan(400)
+  })
+
+  test("should return error immediately when background task becomes errored during polling", async () => {
+    //#given - a background task that is already errored when poll checks
+    const taskState = {
+      id: "bg_test_error",
+      sessionID: "ses_test_error",
+      status: "error" as string,
+      description: "test error task",
+      prompt: "test prompt",
+      agent: "sisyphus-junior",
+      error: "Rate limit exceeded" as string | undefined,
+    }
+
+    const launchState = { ...taskState, status: "running" as string, error: undefined as string | undefined }
+
+    const mockManager = {
+      launch: async () => launchState,
+      getTask: () => taskState,
+    }
+
+    const mockClient = {
+      session: {
+        status: async () => ({ data: { [taskState.sessionID!]: { type: "idle" } } }),
+        messages: async () => ({ data: [] }),
+      },
+    }
+
+    const { executeUnstableAgentTask } = require("./unstable-agent-task")
+
+    const args = {
+      prompt: "test prompt",
+      description: "test task",
+      category: "test",
+      load_skills: [],
+      run_in_background: false,
+    }
+
+    const mockCtx = {
+      sessionID: "parent-session",
+      callID: "call-123",
+      metadata: () => {},
+    }
+
+    const mockExecutorCtx = {
+      manager: mockManager,
+      client: mockClient,
+      directory: "/tmp",
+    }
+
+    const parentContext = {
+      sessionID: "parent-session",
+      messageID: "msg-123",
+    }
+
+    //#when - executeUnstableAgentTask encounters an errored task
+    const startTime = Date.now()
+    const result = await executeUnstableAgentTask(
+      args, mockCtx, mockExecutorCtx, parentContext,
+      "test-agent", undefined, undefined, "test-model"
+    )
+    const elapsed = Date.now() - startTime
+
+    //#then - should return quickly with error, not hang until MAX_POLL_TIME_MS
+    expect(result).toContain("error")
+    expect(result.toLowerCase()).toContain("rate limit exceeded")
+    expect(elapsed).toBeLessThan(400)
+  })
+
+  test("should return error immediately when background task becomes cancelled during polling", async () => {
+    //#given - a background task that is already cancelled when poll checks
+    const taskState = {
+      id: "bg_test_cancel",
+      sessionID: "ses_test_cancel",
+      status: "cancelled" as string,
+      description: "test cancelled task",
+      prompt: "test prompt",
+      agent: "sisyphus-junior",
+      error: "Stale timeout" as string | undefined,
+    }
+
+    const launchState = { ...taskState, status: "running" as string, error: undefined as string | undefined }
+
+    const mockManager = {
+      launch: async () => launchState,
+      getTask: () => taskState,
+    }
+
+    const mockClient = {
+      session: {
+        status: async () => ({ data: { [taskState.sessionID!]: { type: "idle" } } }),
+        messages: async () => ({ data: [] }),
+      },
+    }
+
+    const { executeUnstableAgentTask } = require("./unstable-agent-task")
+
+    const args = {
+      prompt: "test prompt",
+      description: "test task",
+      category: "test",
+      load_skills: [],
+      run_in_background: false,
+    }
+
+    const mockCtx = {
+      sessionID: "parent-session",
+      callID: "call-123",
+      metadata: () => {},
+    }
+
+    const mockExecutorCtx = {
+      manager: mockManager,
+      client: mockClient,
+      directory: "/tmp",
+    }
+
+    const parentContext = {
+      sessionID: "parent-session",
+      messageID: "msg-123",
+    }
+
+    //#when - executeUnstableAgentTask encounters a cancelled task
+    const startTime = Date.now()
+    const result = await executeUnstableAgentTask(
+      args, mockCtx, mockExecutorCtx, parentContext,
+      "test-agent", undefined, undefined, "test-model"
+    )
+    const elapsed = Date.now() - startTime
+
+    //#then - should return quickly with cancel info, not hang until MAX_POLL_TIME_MS
+    expect(result).toContain("cancel")
+    expect(result.toLowerCase()).toContain("stale timeout")
+    expect(elapsed).toBeLessThan(400)
+  })
+})
--- a/src/tools/delegate-task/unstable-agent-task.ts
+++ b/src/tools/delegate-task/unstable-agent-task.ts
@@ -77,6 +77,7 @@ export async function executeUnstableAgentTask(
    const pollStart = Date.now()
    let lastMsgCount = 0
    let stablePolls = 0
+    let terminalStatus: { status: string; error?: string } | undefined

    while (Date.now() - pollStart < timingCfg.MAX_POLL_TIME_MS) {
      if (ctx.abort?.aborted) {
@@ -85,6 +86,12 @@ export async function executeUnstableAgentTask(

      await new Promise(resolve => setTimeout(resolve, timingCfg.POLL_INTERVAL_MS))

+      const currentTask = manager.getTask(task.id)
+      if (currentTask && (currentTask.status === "interrupt" || currentTask.status === "error" || currentTask.status === "cancelled")) {
+        terminalStatus = { status: currentTask.status, error: currentTask.error }
+        break
+      }
+
      const statusResult = await client.session.status()
      const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
      const sessionStatus = allStatuses[sessionID]
@@ -110,6 +117,24 @@ export async function executeUnstableAgentTask(
      }
    }

+    if (terminalStatus) {
+      const duration = formatDuration(startTime)
+      return `SUPERVISED TASK FAILED (${terminalStatus.status})
+
+Task was interrupted/failed while running in monitored background mode.
+${terminalStatus.error ? `Error: ${terminalStatus.error}` : ""}
+
+Duration: ${duration}
+Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
+Model: ${actualModel}
+
+The task session may contain partial results.
+
+<task_metadata>
+session_id: ${sessionID}
+</task_metadata>`
+    }
+
    const messagesResult = await client.session.messages({ path: { id: sessionID } })
    const messages = ((messagesResult as { data?: unknown }).data ?? messagesResult) as SessionMessage[]
Author	SHA1	Message	Date
github-actions[bot]	8c0354225c	release: v3.5.6	2026-02-16 07:24:09 +00:00
YeonGyu-Kim	9ba933743a	fix: update prometheus prompt test to match compressed plan template wording	2026-02-16 16:21:14 +09:00
YeonGyu-Kim	cb4a165c76	Merge pull request #1882 from code-yeongyu/fix/resume-completion-timer-cleanup fix: cancel completion timer on resume and prevent silent notification drop	2026-02-16 16:09:02 +09:00
YeonGyu-Kim	d3574a392f	fix: cancel completion timer on resume and prevent silent notification drop	2026-02-16 16:06:36 +09:00
YeonGyu-Kim	0ef682965f	fix: detect interrupted/error/cancelled status in unstable-agent-task polling loop The polling loop in executeUnstableAgentTask only checked session status and message stability, never checking if the background task itself had been interrupted. This caused the tool call to hang until MAX_POLL_TIME_MS (10 minutes) when a task was interrupted by prompt errors. Add manager.getTask() check at each poll iteration to break immediately on terminal statuses (interrupt, error, cancelled), returning a clear failure message instead of hanging.	2026-02-16 15:56:52 +09:00
YeonGyu-Kim	dd11d5df1b	refactor: compress plan template while recovering lost specificity guidelines Reduce plan-template from 541 to 335 lines by removing redundant verbose examples while recovering 3 lost context items: tool-type mapping table in QA Policy, scenario specificity requirements (selectors/data/assertions/ timing/negative) in TODO template, and structured output format hints for each Final Verification agent.	2026-02-16 15:46:00 +09:00
YeonGyu-Kim	130aaaf910	enhance: enforce mandatory per-task QA scenarios and add Final Verification Wave Strengthen TODO template to make QA scenarios non-optional with explicit rejection warning. Add Final Verification Wave with 4 parallel review agents: oracle (plan compliance audit), unspecified-high (code quality), unspecified-high (real manual QA), deep (scope fidelity check) — each with detailed verification steps and structured output format.	2026-02-16 15:46:00 +09:00
YeonGyu-Kim	7e6982c8d8	Merge pull request #1878 from code-yeongyu/fix/1806-todo-enforcer-cooldown fix: apply cooldown on injection failure and add max retry limit (#1806)	2026-02-16 15:42:24 +09:00
YeonGyu-Kim	2a4009e692	fix: add post-max-failure recovery window for todo continuation	2026-02-16 15:27:00 +09:00
YeonGyu-Kim	2b7ef43619	Merge pull request #1879 from code-yeongyu/fix/cli-installer-provider-config-1876 fix: run auth plugins and provider config for all providers, not just gemini	2026-02-16 15:26:55 +09:00
YeonGyu-Kim	5c9ef7bb1c	fix: run auth plugins and provider config for all providers, not just gemini Closes #1876	2026-02-16 15:23:22 +09:00
YeonGyu-Kim	67efe2d7af	test: verify provider setup runs for openai/copilot without gemini	2026-02-16 15:23:22 +09:00
YeonGyu-Kim	abfab1a78a	enhance: calibrate Prometheus plan granularity to 5-8 parallel tasks per wave Add Maximum Parallelism Principle as a top-level constraint and replace small-scale plan template examples (6 tasks, 3 waves) with production-scale examples (24 tasks, 4 waves, max 7 concurrent) to steer the model toward generating fine-grained, dependency-minimized plans by default.	2026-02-16 15:14:25 +09:00
YeonGyu-Kim	24ea3627ad	Merge pull request #1877 from code-yeongyu/fix/1752-compaction-race fix: cancel pending compaction timer on session.idle and add error logging (#1752)	2026-02-16 15:11:30 +09:00
YeonGyu-Kim	c2f22cd6e5	fix: apply cooldown on injection failure and cap retries	2026-02-16 15:00:41 +09:00
YeonGyu-Kim	6a90182503	fix: prevent duplicate compaction race and log preemptive failures	2026-02-16 14:58:59 +09:00
sisyphus-dev-ai	1509c897fc	chore: changes by sisyphus-dev-ai	2026-02-16 05:09:17 +00:00
YeonGyu-Kim	dd91a7d990	Merge pull request #1874 from code-yeongyu/fix/toast-manager-ghost-entries fix: add toast cleanup to all BackgroundManager task removal paths	2026-02-16 13:54:01 +09:00
YeonGyu-Kim	a9dd6d2ce8	Merge pull request #1873 from code-yeongyu/fix/first-message-variant-override fix: preserve user-selected variant on first message instead of overriding with fallback chain default	2026-02-16 13:51:38 +09:00
YeonGyu-Kim	33d290b346	fix: add toast cleanup to all BackgroundManager task removal paths TaskToastManager entries were never removed when tasks completed via error, session deletion, stale pruning, or cancelled with skipNotification. Ghost entries accumulated indefinitely, causing the 'Queued (N)' count in toast messages to grow without bound. Added toastManager.removeTask() calls to all 4 missing cleanup paths: - session.error handler - session.deleted handler - cancelTask with skipNotification - pruneStaleTasksAndNotifications Closes #1866	2026-02-16 13:50:57 +09:00
YeonGyu-Kim	7108d244d1	fix: preserve user-selected variant on first message instead of overriding with fallback chain default First message variant gate was unconditionally overwriting message.variant with the fallback chain value (e.g. 'medium' for Hephaestus), ignoring any variant the user had already selected via OpenCode UI. Now checks message.variant === undefined before applying the resolved variant, matching the behavior already used for subsequent messages. Closes #1861	2026-02-16 13:44:54 +09:00
github-actions[bot]	418e0e9f76	@dankochetov has signed the CLA in code-yeongyu/oh-my-opencode#1870	2026-02-15 23:17:14 +00:00
github-actions[bot]	b963571642	@Decrabbityyy has signed the CLA in code-yeongyu/oh-my-opencode#1864	2026-02-15 15:07:23 +00:00
github-actions[bot]	18442a1637	release: v3.5.5	2026-02-15 05:48:47 +00:00
YeonGyu-Kim	d076187f0a	test(cli): update model-fallback snapshots for kimi k2.5 and gemini-3-pro changes	2026-02-15 14:45:51 +09:00
YeonGyu-Kim	8a5f61724d	fix(background-agent): handle message.part.delta for heartbeat (OpenCode >=1.2.0) OpenCode 1.2.0+ changed reasoning-delta and text-delta to emit 'message.part.delta' instead of 'message.part.updated'. Without handling this event, lastUpdate was only refreshed at reasoning-start and reasoning-end, leaving a gap where extended thinking (>3min) could trigger stale timeout. Accept both event types as heartbeat sources for forward compatibility.	2026-02-15 14:26:25 +09:00
YeonGyu-Kim	3f557e593c	fix(background-agent): use correct OpenCode session status for stale guard OpenCode uses 'busy'/'retry'/'idle' session statuses, not 'running'. The stale timeout guard checked for type === 'running' which never matched, leaving all background tasks vulnerable to stale-kill even when their sessions were actively processing. Change sessionIsRunning to check type !== 'idle' instead, protecting busy and retrying sessions from premature termination.	2026-02-15 14:24:45 +09:00
YeonGyu-Kim	284fafad11	feat(writing): switch primary model to kimi k2.5, add anti-AI-slop rules to prompt	2026-02-15 14:00:03 +09:00
YeonGyu-Kim	884a3addf8	feat(visual-engineering): add variant high to gemini-3-pro, update fallback chain to gemini→glm-5→opus→kimi	2026-02-15 13:59:00 +09:00