Compare commits
29 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c0354225c | ||
|
|
9ba933743a | ||
|
|
cb4a165c76 | ||
|
|
d3574a392f | ||
|
|
0ef682965f | ||
|
|
dd11d5df1b | ||
|
|
130aaaf910 | ||
|
|
7e6982c8d8 | ||
|
|
2a4009e692 | ||
|
|
2b7ef43619 | ||
|
|
5c9ef7bb1c | ||
|
|
67efe2d7af | ||
|
|
abfab1a78a | ||
|
|
24ea3627ad | ||
|
|
c2f22cd6e5 | ||
|
|
6a90182503 | ||
|
|
1509c897fc | ||
|
|
dd91a7d990 | ||
|
|
a9dd6d2ce8 | ||
|
|
33d290b346 | ||
|
|
7108d244d1 | ||
|
|
418e0e9f76 | ||
|
|
b963571642 | ||
|
|
18442a1637 | ||
|
|
d076187f0a | ||
|
|
8a5f61724d | ||
|
|
3f557e593c | ||
|
|
284fafad11 | ||
|
|
884a3addf8 |
28
bun.lock
28
bun.lock
@@ -28,13 +28,13 @@
|
||||
"typescript": "^5.7.3",
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.5.3",
|
||||
"oh-my-opencode-darwin-x64": "3.5.3",
|
||||
"oh-my-opencode-linux-arm64": "3.5.3",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.5.3",
|
||||
"oh-my-opencode-linux-x64": "3.5.3",
|
||||
"oh-my-opencode-linux-x64-musl": "3.5.3",
|
||||
"oh-my-opencode-windows-x64": "3.5.3",
|
||||
"oh-my-opencode-darwin-arm64": "3.5.5",
|
||||
"oh-my-opencode-darwin-x64": "3.5.5",
|
||||
"oh-my-opencode-linux-arm64": "3.5.5",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.5.5",
|
||||
"oh-my-opencode-linux-x64": "3.5.5",
|
||||
"oh-my-opencode-linux-x64-musl": "3.5.5",
|
||||
"oh-my-opencode-windows-x64": "3.5.5",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -226,19 +226,19 @@
|
||||
|
||||
"object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],
|
||||
|
||||
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.3", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Dq0+PC2dyAqG7c3DUnQmdOkKbKmOsRHwoqgLCQNKN1lTRllF8zbWqp5B+LGKxSPxPqJIPS3mKt+wIR2KvkYJVw=="],
|
||||
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.5.5", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-XtcCQ8/iVT6T1B58y0N1oMgOK4beTW8DW98b/ITnINb7b3hNSv5754Af/2Rx67BV0iE0ezC6uXaqz45C7ru1rw=="],
|
||||
|
||||
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.3", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Ke45Bv/ygZm3YUSUumIyk647KZ2PFzw30tH597cOpG8MDPGbNVBCM6EKFezcukUPT+gPFVpE1IiGzEkn4JmgZA=="],
|
||||
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.5.5", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-ReSDqU6jihh7lpGNmEt3REzc5bOcyfv3cMHitpecKq0wRrJoTBI+dgNPk90BLjHobGbhAm0TE8VZ9tqTkivnIQ=="],
|
||||
|
||||
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-aP5S3DngUhFkNeqYM33Ge6zccCWLzB/O3FLXLFXy/Iws03N8xugw72pnMK6lUbIia9QQBKK7IZBoYm9C79pZ3g=="],
|
||||
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Zs/ETIxwcWBvw+jdlo8t+3+92oMMaXkFg1ZCuZrBRZOmtPFefdsH5/QEIe2TlNSjfoTwlA7cbpOD6oXgxRVrtg=="],
|
||||
|
||||
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-UiD/hVKYZQyX4D5N5SnZT4M5Z/B2SDtJWBW4MibpYSAcPKNCEBKi/5E4hOPxAtTfFGR8tIXFmYZdQJDkVfvluw=="],
|
||||
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.5.5", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m9r4OW1XhGtm/SvHM3kzpS4pEiI2eIh5Tj+j5hpMW3wu+AqE3F1XGUpu8RgvIpupFo8beimJWDYQujqokReQqg=="],
|
||||
|
||||
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-L9kqwzElGkaQ8pgtv1ZjcHARw9LPaU4UEVjzauByTMi+/5Js/PTsNXBggxSRzZfQ8/MNBPSCiA4K10Kc0YjjvA=="],
|
||||
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-N6ysF5Pr2C1dyC5Dftzp05RJODgL+EYCWcOV59/UCV152cINlOhg80804o+6XTKV/taOAaboYaQwsBKiCs/BNQ=="],
|
||||
|
||||
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Z0fVVih/b2dbNeb9DK9oca5dNYCZyPySBRtxRhDXod5d7fJNgIPrvUoEd3SNfkRGORyFB3hGBZ6nqQ6N8+8DEA=="],
|
||||
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.5.5", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-MOxW1FMTJT3Ze/U2fDedcZUYTFaA9PaKIiqtsBIHOSb+fFgdo51RIuUlKCELN/g9I9dYhw0yP2n9tBMBG6feSg=="],
|
||||
|
||||
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.3", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ocWPjRs2sJgN02PJnEIYtqdMVDex1YhEj1FzAU5XIicfzQbgxLh9nz1yhHZzfqGJq69QStU6ofpc5kQpfX1LMg=="],
|
||||
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.5.5", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-dWRtPyIdMFQIw1BwVO4PbGqoo0UWs7NES+YJC7BLGv0YnWN7Q2tatmOviSeSgMELeMsWSbDNisEB79jsfShXjA=="],
|
||||
|
||||
"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],
|
||||
|
||||
|
||||
16
package.json
16
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -74,13 +74,13 @@
|
||||
"typescript": "^5.7.3"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.5.4",
|
||||
"oh-my-opencode-darwin-x64": "3.5.4",
|
||||
"oh-my-opencode-linux-arm64": "3.5.4",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.5.4",
|
||||
"oh-my-opencode-linux-x64": "3.5.4",
|
||||
"oh-my-opencode-linux-x64-musl": "3.5.4",
|
||||
"oh-my-opencode-windows-x64": "3.5.4"
|
||||
"oh-my-opencode-darwin-arm64": "3.5.6",
|
||||
"oh-my-opencode-darwin-x64": "3.5.6",
|
||||
"oh-my-opencode-linux-arm64": "3.5.6",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.5.6",
|
||||
"oh-my-opencode-linux-x64": "3.5.6",
|
||||
"oh-my-opencode-linux-x64-musl": "3.5.6",
|
||||
"oh-my-opencode-windows-x64": "3.5.6"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"@ast-grep/cli",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-arm64",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64-musl",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64",
|
||||
"version": "3.5.4",
|
||||
"version": "3.5.6",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1503,6 +1503,22 @@
|
||||
"created_at": "2026-02-14T19:58:19Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1845
|
||||
},
|
||||
{
|
||||
"name": "Decrabbityyy",
|
||||
"id": 99632363,
|
||||
"comment_id": 3904649522,
|
||||
"created_at": "2026-02-15T15:07:11Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1864
|
||||
},
|
||||
{
|
||||
"name": "dankochetov",
|
||||
"id": 33990502,
|
||||
"comment_id": 3905398332,
|
||||
"created_at": "2026-02-15T23:17:05Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1870
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -66,7 +66,7 @@ describe("PROMETHEUS_SYSTEM_PROMPT zero human intervention", () => {
|
||||
expect(lowerPrompt).toContain("preconditions")
|
||||
expect(lowerPrompt).toContain("failure indicators")
|
||||
expect(lowerPrompt).toContain("evidence")
|
||||
expect(lowerPrompt).toMatch(/negative scenario/)
|
||||
expect(prompt).toMatch(/negative/i)
|
||||
})
|
||||
|
||||
test("should require QA scenario adequacy in self-review checklist", () => {
|
||||
|
||||
@@ -129,7 +129,21 @@ Your ONLY valid output locations are \`.sisyphus/plans/*.md\` and \`.sisyphus/dr
|
||||
|
||||
Example: \`.sisyphus/plans/auth-refactor.md\`
|
||||
|
||||
### 5. SINGLE PLAN MANDATE (CRITICAL)
|
||||
### 5. MAXIMUM PARALLELISM PRINCIPLE (NON-NEGOTIABLE)
|
||||
|
||||
Your plans MUST maximize parallel execution. This is a core planning quality metric.
|
||||
|
||||
**Granularity Rule**: One task = one module/concern = 1-3 files.
|
||||
If a task touches 4+ files or 2+ unrelated concerns, SPLIT IT.
|
||||
|
||||
**Parallelism Target**: Aim for 5-8 tasks per wave.
|
||||
If any wave has fewer than 3 tasks (except the final integration), you under-split.
|
||||
|
||||
**Dependency Minimization**: Structure tasks so shared dependencies
|
||||
(types, interfaces, configs) are extracted as early Wave-1 tasks,
|
||||
unblocking maximum parallelism in subsequent waves.
|
||||
|
||||
### 6. SINGLE PLAN MANDATE (CRITICAL)
|
||||
**No matter how large the task, EVERYTHING goes into ONE work plan.**
|
||||
|
||||
**NEVER:**
|
||||
@@ -152,7 +166,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`
|
||||
|
||||
**The plan can have 50+ TODOs. That's OK. ONE PLAN.**
|
||||
|
||||
### 5.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)
|
||||
### 6.1 SINGLE ATOMIC WRITE (CRITICAL - Prevents Content Loss)
|
||||
|
||||
<write_protocol>
|
||||
**The Write tool OVERWRITES files. It does NOT append.**
|
||||
@@ -188,7 +202,7 @@ Example: \`.sisyphus/plans/auth-refactor.md\`
|
||||
- [ ] File already exists with my content? → Use Edit to append, NOT Write
|
||||
</write_protocol>
|
||||
|
||||
### 6. DRAFT AS WORKING MEMORY (MANDATORY)
|
||||
### 7. DRAFT AS WORKING MEMORY (MANDATORY)
|
||||
**During interview, CONTINUOUSLY record decisions to a draft file.**
|
||||
|
||||
**Draft Location**: \`.sisyphus/drafts/{name}.md\`
|
||||
|
||||
@@ -70,108 +70,25 @@ Generate plan to: \`.sisyphus/plans/{name}.md\`
|
||||
|
||||
## Verification Strategy (MANDATORY)
|
||||
|
||||
> **UNIVERSAL RULE: ZERO HUMAN INTERVENTION**
|
||||
>
|
||||
> ALL tasks in this plan MUST be verifiable WITHOUT any human action.
|
||||
> This is NOT conditional — it applies to EVERY task, regardless of test strategy.
|
||||
>
|
||||
> **FORBIDDEN** — acceptance criteria that require:
|
||||
> - "User manually tests..." / "사용자가 직접 테스트..."
|
||||
> - "User visually confirms..." / "사용자가 눈으로 확인..."
|
||||
> - "User interacts with..." / "사용자가 직접 조작..."
|
||||
> - "Ask user to verify..." / "사용자에게 확인 요청..."
|
||||
> - ANY step where a human must perform an action
|
||||
>
|
||||
> **ALL verification is executed by the agent** using tools (Playwright, interactive_bash, curl, etc.). No exceptions.
|
||||
> **ZERO HUMAN INTERVENTION** — ALL verification is agent-executed. No exceptions.
|
||||
> Acceptance criteria requiring "user manually tests/confirms" are FORBIDDEN.
|
||||
|
||||
### Test Decision
|
||||
- **Infrastructure exists**: [YES/NO]
|
||||
- **Automated tests**: [TDD / Tests-after / None]
|
||||
- **Framework**: [bun test / vitest / jest / pytest / none]
|
||||
- **If TDD**: Each task follows RED (failing test) → GREEN (minimal impl) → REFACTOR
|
||||
|
||||
### If TDD Enabled
|
||||
### QA Policy
|
||||
Every task MUST include agent-executed QA scenarios (see TODO template below).
|
||||
Evidence saved to \`.sisyphus/evidence/task-{N}-{scenario-slug}.{ext}\`.
|
||||
|
||||
Each TODO follows RED-GREEN-REFACTOR:
|
||||
|
||||
**Task Structure:**
|
||||
1. **RED**: Write failing test first
|
||||
- Test file: \`[path].test.ts\`
|
||||
- Test command: \`bun test [file]\`
|
||||
- Expected: FAIL (test exists, implementation doesn't)
|
||||
2. **GREEN**: Implement minimum code to pass
|
||||
- Command: \`bun test [file]\`
|
||||
- Expected: PASS
|
||||
3. **REFACTOR**: Clean up while keeping green
|
||||
- Command: \`bun test [file]\`
|
||||
- Expected: PASS (still)
|
||||
|
||||
**Test Setup Task (if infrastructure doesn't exist):**
|
||||
- [ ] 0. Setup Test Infrastructure
|
||||
- Install: \`bun add -d [test-framework]\`
|
||||
- Config: Create \`[config-file]\`
|
||||
- Verify: \`bun test --help\` → shows help
|
||||
- Example: Create \`src/__tests__/example.test.ts\`
|
||||
- Verify: \`bun test\` → 1 test passes
|
||||
|
||||
### Agent-Executed QA Scenarios (MANDATORY — ALL tasks)
|
||||
|
||||
> Whether TDD is enabled or not, EVERY task MUST include Agent-Executed QA Scenarios.
|
||||
> - **With TDD**: QA scenarios complement unit tests at integration/E2E level
|
||||
> - **Without TDD**: QA scenarios are the PRIMARY verification method
|
||||
>
|
||||
> These describe how the executing agent DIRECTLY verifies the deliverable
|
||||
> by running it — opening browsers, executing commands, sending API requests.
|
||||
> The agent performs what a human tester would do, but automated via tools.
|
||||
|
||||
**Verification Tool by Deliverable Type:**
|
||||
|
||||
| Type | Tool | How Agent Verifies |
|
||||
|------|------|-------------------|
|
||||
| **Frontend/UI** | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
|
||||
| **TUI/CLI** | interactive_bash (tmux) | Run command, send keystrokes, validate output |
|
||||
| **API/Backend** | Bash (curl/httpie) | Send requests, parse responses, assert fields |
|
||||
| **Library/Module** | Bash (bun/node REPL) | Import, call functions, compare output |
|
||||
| **Config/Infra** | Bash (shell commands) | Apply config, run state checks, validate |
|
||||
|
||||
**Each Scenario MUST Follow This Format:**
|
||||
|
||||
\`\`\`
|
||||
Scenario: [Descriptive name — what user action/flow is being verified]
|
||||
Tool: [Playwright / interactive_bash / Bash]
|
||||
Preconditions: [What must be true before this scenario runs]
|
||||
Steps:
|
||||
1. [Exact action with specific selector/command/endpoint]
|
||||
2. [Next action with expected intermediate state]
|
||||
3. [Assertion with exact expected value]
|
||||
Expected Result: [Concrete, observable outcome]
|
||||
Failure Indicators: [What would indicate failure]
|
||||
Evidence: [Screenshot path / output capture / response body path]
|
||||
\`\`\`
|
||||
|
||||
**Scenario Detail Requirements:**
|
||||
- **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
|
||||
- **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
|
||||
- **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
|
||||
- **Timing**: Include wait conditions where relevant (\`Wait for .dashboard (timeout: 10s)\`)
|
||||
- **Negative Scenarios**: At least ONE failure/error scenario per feature
|
||||
- **Evidence Paths**: Specific file paths (\`.sisyphus/evidence/task-N-scenario-name.png\`)
|
||||
|
||||
**Anti-patterns (NEVER write scenarios like this):**
|
||||
- ❌ "Verify the login page works correctly"
|
||||
- ❌ "Check that the API returns the right data"
|
||||
- ❌ "Test the form validation"
|
||||
- ❌ "User opens browser and confirms..."
|
||||
|
||||
**Write scenarios like this instead:**
|
||||
- ✅ \`Navigate to /login → Fill input[name="email"] with "test@example.com" → Fill input[name="password"] with "Pass123!" → Click button[type="submit"] → Wait for /dashboard → Assert h1 contains "Welcome"\`
|
||||
- ✅ \`POST /api/users {"name":"Test","email":"new@test.com"} → Assert status 201 → Assert response.id is UUID → GET /api/users/{id} → Assert name equals "Test"\`
|
||||
- ✅ \`Run ./cli --config test.yaml → Wait for "Loaded" in stdout → Send "q" → Assert exit code 0 → Assert stdout contains "Goodbye"\`
|
||||
|
||||
**Evidence Requirements:**
|
||||
- Screenshots: \`.sisyphus/evidence/\` for all UI verifications
|
||||
- Terminal output: Captured for CLI/TUI verifications
|
||||
- Response bodies: Saved for API verifications
|
||||
- All evidence referenced by specific file path in acceptance criteria
|
||||
| Deliverable Type | Verification Tool | Method |
|
||||
|------------------|-------------------|--------|
|
||||
| Frontend/UI | Playwright (playwright skill) | Navigate, interact, assert DOM, screenshot |
|
||||
| TUI/CLI | interactive_bash (tmux) | Run command, send keystrokes, validate output |
|
||||
| API/Backend | Bash (curl) | Send requests, assert status + response fields |
|
||||
| Library/Module | Bash (bun/node REPL) | Import, call functions, compare output |
|
||||
|
||||
---
|
||||
|
||||
@@ -181,49 +98,82 @@ Scenario: [Descriptive name — what user action/flow is being verified]
|
||||
|
||||
> Maximize throughput by grouping independent tasks into parallel waves.
|
||||
> Each wave completes before the next begins.
|
||||
> Target: 5-8 tasks per wave. Fewer than 3 per wave (except final) = under-splitting.
|
||||
|
||||
\`\`\`
|
||||
Wave 1 (Start Immediately):
|
||||
├── Task 1: [no dependencies]
|
||||
└── Task 5: [no dependencies]
|
||||
Wave 1 (Start Immediately — foundation + scaffolding):
|
||||
├── Task 1: Project scaffolding + config [quick]
|
||||
├── Task 2: Design system tokens [quick]
|
||||
├── Task 3: Type definitions [quick]
|
||||
├── Task 4: Schema definitions [quick]
|
||||
├── Task 5: Storage interface + in-memory impl [quick]
|
||||
├── Task 6: Auth middleware [quick]
|
||||
└── Task 7: Client module [quick]
|
||||
|
||||
Wave 2 (After Wave 1):
|
||||
├── Task 2: [depends: 1]
|
||||
├── Task 3: [depends: 1]
|
||||
└── Task 6: [depends: 5]
|
||||
Wave 2 (After Wave 1 — core modules, MAX PARALLEL):
|
||||
├── Task 8: Core business logic (depends: 3, 5, 7) [deep]
|
||||
├── Task 9: API endpoints (depends: 4, 5) [unspecified-high]
|
||||
├── Task 10: Secondary storage impl (depends: 5) [unspecified-high]
|
||||
├── Task 11: Retry/fallback logic (depends: 8) [deep]
|
||||
├── Task 12: UI layout + navigation (depends: 2) [visual-engineering]
|
||||
├── Task 13: API client + hooks (depends: 4) [quick]
|
||||
└── Task 14: Telemetry middleware (depends: 5, 10) [unspecified-high]
|
||||
|
||||
Wave 3 (After Wave 2):
|
||||
└── Task 4: [depends: 2, 3]
|
||||
Wave 3 (After Wave 2 — integration + UI):
|
||||
├── Task 15: Main route combining modules (depends: 6, 11, 14) [deep]
|
||||
├── Task 16: UI data visualization (depends: 12, 13) [visual-engineering]
|
||||
├── Task 17: Deployment config A (depends: 15) [quick]
|
||||
├── Task 18: Deployment config B (depends: 15) [quick]
|
||||
├── Task 19: Deployment config C (depends: 15) [quick]
|
||||
└── Task 20: UI request log + build (depends: 16) [visual-engineering]
|
||||
|
||||
Critical Path: Task 1 → Task 2 → Task 4
|
||||
Parallel Speedup: ~40% faster than sequential
|
||||
Wave 4 (After Wave 3 — verification):
|
||||
├── Task 21: Integration tests (depends: 15) [deep]
|
||||
├── Task 22: UI QA - Playwright (depends: 20) [unspecified-high]
|
||||
├── Task 23: E2E QA (depends: 21) [deep]
|
||||
└── Task 24: Git cleanup + tagging (depends: 21) [git]
|
||||
|
||||
Wave FINAL (After ALL tasks — independent review, 4 parallel):
|
||||
├── Task F1: Plan compliance audit (oracle)
|
||||
├── Task F2: Code quality review (unspecified-high)
|
||||
├── Task F3: Real manual QA (unspecified-high)
|
||||
└── Task F4: Scope fidelity check (deep)
|
||||
|
||||
Critical Path: Task 1 → Task 5 → Task 8 → Task 11 → Task 15 → Task 21 → F1-F4
|
||||
Parallel Speedup: ~70% faster than sequential
|
||||
Max Concurrent: 7 (Waves 1 & 2)
|
||||
\`\`\`
|
||||
|
||||
### Dependency Matrix
|
||||
### Dependency Matrix (abbreviated — show ALL tasks in your generated plan)
|
||||
|
||||
| Task | Depends On | Blocks | Can Parallelize With |
|
||||
|------|------------|--------|---------------------|
|
||||
| 1 | None | 2, 3 | 5 |
|
||||
| 2 | 1 | 4 | 3, 6 |
|
||||
| 3 | 1 | 4 | 2, 6 |
|
||||
| 4 | 2, 3 | None | None (final) |
|
||||
| 5 | None | 6 | 1 |
|
||||
| 6 | 5 | None | 2, 3 |
|
||||
| Task | Depends On | Blocks | Wave |
|
||||
|------|------------|--------|------|
|
||||
| 1-7 | — | 8-14 | 1 |
|
||||
| 8 | 3, 5, 7 | 11, 15 | 2 |
|
||||
| 11 | 8 | 15 | 2 |
|
||||
| 14 | 5, 10 | 15 | 2 |
|
||||
| 15 | 6, 11, 14 | 17-19, 21 | 3 |
|
||||
| 21 | 15 | 23, 24 | 4 |
|
||||
|
||||
> This is abbreviated for reference. YOUR generated plan must include the FULL matrix for ALL tasks.
|
||||
|
||||
### Agent Dispatch Summary
|
||||
|
||||
| Wave | Tasks | Recommended Agents |
|
||||
|------|-------|-------------------|
|
||||
| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
|
||||
| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
|
||||
| 3 | 4 | final integration task |
|
||||
| Wave | # Parallel | Tasks → Agent Category |
|
||||
|------|------------|----------------------|
|
||||
| 1 | **7** | T1-T4 → \`quick\`, T5 → \`quick\`, T6 → \`quick\`, T7 → \`quick\` |
|
||||
| 2 | **7** | T8 → \`deep\`, T9 → \`unspecified-high\`, T10 → \`unspecified-high\`, T11 → \`deep\`, T12 → \`visual-engineering\`, T13 → \`quick\`, T14 → \`unspecified-high\` |
|
||||
| 3 | **6** | T15 → \`deep\`, T16 → \`visual-engineering\`, T17-T19 → \`quick\`, T20 → \`visual-engineering\` |
|
||||
| 4 | **4** | T21 → \`deep\`, T22 → \`unspecified-high\`, T23 → \`deep\`, T24 → \`git\` |
|
||||
| FINAL | **4** | F1 → \`oracle\`, F2 → \`unspecified-high\`, F3 → \`unspecified-high\`, F4 → \`deep\` |
|
||||
|
||||
---
|
||||
|
||||
## TODOs
|
||||
|
||||
> Implementation + Test = ONE Task. Never separate.
|
||||
> EVERY task MUST have: Recommended Agent Profile + Parallelization info.
|
||||
> EVERY task MUST have: Recommended Agent Profile + Parallelization info + QA Scenarios.
|
||||
> **A task WITHOUT QA Scenarios is INCOMPLETE. No exceptions.**
|
||||
|
||||
- [ ] 1. [Task Title]
|
||||
|
||||
@@ -257,22 +207,15 @@ Parallel Speedup: ~40% faster than sequential
|
||||
|
||||
**Pattern References** (existing code to follow):
|
||||
- \`src/services/auth.ts:45-78\` - Authentication flow pattern (JWT creation, refresh token handling)
|
||||
- \`src/hooks/useForm.ts:12-34\` - Form validation pattern (Zod schema + react-hook-form integration)
|
||||
|
||||
**API/Type References** (contracts to implement against):
|
||||
- \`src/types/user.ts:UserDTO\` - Response shape for user endpoints
|
||||
- \`src/api/schema.ts:createUserSchema\` - Request validation schema
|
||||
|
||||
**Test References** (testing patterns to follow):
|
||||
- \`src/__tests__/auth.test.ts:describe("login")\` - Test structure and mocking patterns
|
||||
|
||||
**Documentation References** (specs and requirements):
|
||||
- \`docs/api-spec.md#authentication\` - API contract details
|
||||
- \`ARCHITECTURE.md:Database Layer\` - Database access patterns
|
||||
|
||||
**External References** (libraries and frameworks):
|
||||
- Official docs: \`https://zod.dev/?id=basic-usage\` - Zod validation syntax
|
||||
- Example repo: \`github.com/example/project/src/auth\` - Reference implementation
|
||||
|
||||
**WHY Each Reference Matters** (explain the relevance):
|
||||
- Don't just list files - explain what pattern/information the executor should extract
|
||||
@@ -283,113 +226,60 @@ Parallel Speedup: ~40% faster than sequential
|
||||
|
||||
> **AGENT-EXECUTABLE VERIFICATION ONLY** — No human action permitted.
|
||||
> Every criterion MUST be verifiable by running a command or using a tool.
|
||||
> REPLACE all placeholders with actual values from task context.
|
||||
|
||||
**If TDD (tests enabled):**
|
||||
- [ ] Test file created: src/auth/login.test.ts
|
||||
- [ ] Test covers: successful login returns JWT token
|
||||
- [ ] bun test src/auth/login.test.ts → PASS (3 tests, 0 failures)
|
||||
|
||||
**Agent-Executed QA Scenarios (MANDATORY — per-scenario, ultra-detailed):**
|
||||
**QA Scenarios (MANDATORY — task is INCOMPLETE without these):**
|
||||
|
||||
> Write MULTIPLE named scenarios per task: happy path AND failure cases.
|
||||
> Each scenario = exact tool + steps with real selectors/data + evidence path.
|
||||
|
||||
**Example — Frontend/UI (Playwright):**
|
||||
> **This is NOT optional. A task without QA scenarios WILL BE REJECTED.**
|
||||
>
|
||||
> Write scenario tests that verify the ACTUAL BEHAVIOR of what you built.
|
||||
> Minimum: 1 happy path + 1 failure/edge case per task.
|
||||
> Each scenario = exact tool + exact steps + exact assertions + evidence path.
|
||||
>
|
||||
> **The executing agent MUST run these scenarios after implementation.**
|
||||
> **The orchestrator WILL verify evidence files exist before marking task complete.**
|
||||
|
||||
\\\`\\\`\\\`
|
||||
Scenario: Successful login redirects to dashboard
|
||||
Tool: Playwright (playwright skill)
|
||||
Preconditions: Dev server running on localhost:3000, test user exists
|
||||
Scenario: [Happy path — what SHOULD work]
|
||||
Tool: [Playwright / interactive_bash / Bash (curl)]
|
||||
Preconditions: [Exact setup state]
|
||||
Steps:
|
||||
1. Navigate to: http://localhost:3000/login
|
||||
2. Wait for: input[name="email"] visible (timeout: 5s)
|
||||
3. Fill: input[name="email"] → "test@example.com"
|
||||
4. Fill: input[name="password"] → "ValidPass123!"
|
||||
5. Click: button[type="submit"]
|
||||
6. Wait for: navigation to /dashboard (timeout: 10s)
|
||||
7. Assert: h1 text contains "Welcome back"
|
||||
8. Assert: cookie "session_token" exists
|
||||
9. Screenshot: .sisyphus/evidence/task-1-login-success.png
|
||||
Expected Result: Dashboard loads with welcome message
|
||||
Evidence: .sisyphus/evidence/task-1-login-success.png
|
||||
1. [Exact action — specific command/selector/endpoint, no vagueness]
|
||||
2. [Next action — with expected intermediate state]
|
||||
3. [Assertion — exact expected value, not "verify it works"]
|
||||
Expected Result: [Concrete, observable, binary pass/fail]
|
||||
Failure Indicators: [What specifically would mean this failed]
|
||||
Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}.{ext}
|
||||
|
||||
Scenario: Login fails with invalid credentials
|
||||
Tool: Playwright (playwright skill)
|
||||
Preconditions: Dev server running, no valid user with these credentials
|
||||
Scenario: [Failure/edge case — what SHOULD fail gracefully]
|
||||
Tool: [same format]
|
||||
Preconditions: [Invalid input / missing dependency / error state]
|
||||
Steps:
|
||||
1. Navigate to: http://localhost:3000/login
|
||||
2. Fill: input[name="email"] → "wrong@example.com"
|
||||
3. Fill: input[name="password"] → "WrongPass"
|
||||
4. Click: button[type="submit"]
|
||||
5. Wait for: .error-message visible (timeout: 5s)
|
||||
6. Assert: .error-message text contains "Invalid credentials"
|
||||
7. Assert: URL is still /login (no redirect)
|
||||
8. Screenshot: .sisyphus/evidence/task-1-login-failure.png
|
||||
Expected Result: Error message shown, stays on login page
|
||||
Evidence: .sisyphus/evidence/task-1-login-failure.png
|
||||
1. [Trigger the error condition]
|
||||
2. [Assert error is handled correctly]
|
||||
Expected Result: [Graceful failure with correct error message/code]
|
||||
Evidence: .sisyphus/evidence/task-{N}-{scenario-slug}-error.{ext}
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**Example — API/Backend (curl):**
|
||||
|
||||
\\\`\\\`\\\`
|
||||
Scenario: Create user returns 201 with UUID
|
||||
Tool: Bash (curl)
|
||||
Preconditions: Server running on localhost:8080
|
||||
Steps:
|
||||
1. curl -s -w "\\n%{http_code}" -X POST http://localhost:8080/api/users \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"email":"new@test.com","name":"Test User"}'
|
||||
2. Assert: HTTP status is 201
|
||||
3. Assert: response.id matches UUID format
|
||||
4. GET /api/users/{returned-id} → Assert name equals "Test User"
|
||||
Expected Result: User created and retrievable
|
||||
Evidence: Response bodies captured
|
||||
|
||||
Scenario: Duplicate email returns 409
|
||||
Tool: Bash (curl)
|
||||
Preconditions: User with email "new@test.com" already exists
|
||||
Steps:
|
||||
1. Repeat POST with same email
|
||||
2. Assert: HTTP status is 409
|
||||
3. Assert: response.error contains "already exists"
|
||||
Expected Result: Conflict error returned
|
||||
Evidence: Response body captured
|
||||
\\\`\\\`\\\`
|
||||
|
||||
**Example — TUI/CLI (interactive_bash):**
|
||||
|
||||
\\\`\\\`\\\`
|
||||
Scenario: CLI loads config and displays menu
|
||||
Tool: interactive_bash (tmux)
|
||||
Preconditions: Binary built, test config at ./test.yaml
|
||||
Steps:
|
||||
1. tmux new-session: ./my-cli --config test.yaml
|
||||
2. Wait for: "Configuration loaded" in output (timeout: 5s)
|
||||
3. Assert: Menu items visible ("1. Create", "2. List", "3. Exit")
|
||||
4. Send keys: "3" then Enter
|
||||
5. Assert: "Goodbye" in output
|
||||
6. Assert: Process exited with code 0
|
||||
Expected Result: CLI starts, shows menu, exits cleanly
|
||||
Evidence: Terminal output captured
|
||||
|
||||
Scenario: CLI handles missing config gracefully
|
||||
Tool: interactive_bash (tmux)
|
||||
Preconditions: No config file at ./nonexistent.yaml
|
||||
Steps:
|
||||
1. tmux new-session: ./my-cli --config nonexistent.yaml
|
||||
2. Wait for: output (timeout: 3s)
|
||||
3. Assert: stderr contains "Config file not found"
|
||||
4. Assert: Process exited with code 1
|
||||
Expected Result: Meaningful error, non-zero exit
|
||||
Evidence: Error output captured
|
||||
\\\`\\\`\\\`
|
||||
> **Specificity requirements — every scenario MUST use:**
|
||||
> - **Selectors**: Specific CSS selectors (\`.login-button\`, not "the login button")
|
||||
> - **Data**: Concrete test data (\`"test@example.com"\`, not \`"[email]"\`)
|
||||
> - **Assertions**: Exact values (\`text contains "Welcome back"\`, not "verify it works")
|
||||
> - **Timing**: Wait conditions where relevant (\`timeout: 10s\`)
|
||||
> - **Negative**: At least ONE failure/error scenario per task
|
||||
>
|
||||
> **Anti-patterns (your scenario is INVALID if it looks like this):**
|
||||
> - ❌ "Verify it works correctly" — HOW? What does "correctly" mean?
|
||||
> - ❌ "Check the API returns data" — WHAT data? What fields? What values?
|
||||
> - ❌ "Test the component renders" — WHERE? What selector? What content?
|
||||
> - ❌ Any scenario without an evidence path
|
||||
|
||||
**Evidence to Capture:**
|
||||
- [ ] Screenshots in .sisyphus/evidence/ for UI scenarios
|
||||
- [ ] Terminal output for CLI/TUI scenarios
|
||||
- [ ] Response bodies for API scenarios
|
||||
- [ ] Each evidence file named: task-{N}-{scenario-slug}.{ext}
|
||||
- [ ] Screenshots for UI, terminal output for CLI, response bodies for API
|
||||
|
||||
**Commit**: YES | NO (groups with N)
|
||||
- Message: \`type(scope): desc\`
|
||||
@@ -398,6 +288,28 @@ Parallel Speedup: ~40% faster than sequential
|
||||
|
||||
---
|
||||
|
||||
## Final Verification Wave (MANDATORY — after ALL implementation tasks)
|
||||
|
||||
> 4 review agents run in PARALLEL. ALL must APPROVE. Rejection → fix → re-run.
|
||||
|
||||
- [ ] F1. **Plan Compliance Audit** — \`oracle\`
|
||||
Read the plan end-to-end. For each "Must Have": verify implementation exists (read file, curl endpoint, run command). For each "Must NOT Have": search codebase for forbidden patterns — reject with file:line if found. Check evidence files exist in .sisyphus/evidence/. Compare deliverables against plan.
|
||||
Output: \`Must Have [N/N] | Must NOT Have [N/N] | Tasks [N/N] | VERDICT: APPROVE/REJECT\`
|
||||
|
||||
- [ ] F2. **Code Quality Review** — \`unspecified-high\`
|
||||
Run \`tsc --noEmit\` + linter + \`bun test\`. Review all changed files for: \`as any\`/\`@ts-ignore\`, empty catches, console.log in prod, commented-out code, unused imports. Check AI slop: excessive comments, over-abstraction, generic names (data/result/item/temp).
|
||||
Output: \`Build [PASS/FAIL] | Lint [PASS/FAIL] | Tests [N pass/N fail] | Files [N clean/N issues] | VERDICT\`
|
||||
|
||||
- [ ] F3. **Real Manual QA** — \`unspecified-high\` (+ \`playwright\` skill if UI)
|
||||
Start from clean state. Execute EVERY QA scenario from EVERY task — follow exact steps, capture evidence. Test cross-task integration (features working together, not isolation). Test edge cases: empty state, invalid input, rapid actions. Save to \`.sisyphus/evidence/final-qa/\`.
|
||||
Output: \`Scenarios [N/N pass] | Integration [N/N] | Edge Cases [N tested] | VERDICT\`
|
||||
|
||||
- [ ] F4. **Scope Fidelity Check** — \`deep\`
|
||||
For each task: read "What to do", read actual diff (git log/diff). Verify 1:1 — everything in spec was built (no missing), nothing beyond spec was built (no creep). Check "Must NOT do" compliance. Detect cross-task contamination: Task N touching Task M's files. Flag unaccounted changes.
|
||||
Output: \`Tasks [N/N compliant] | Contamination [CLEAN/N issues] | Unaccounted [CLEAN/N files] | VERDICT\`
|
||||
|
||||
---
|
||||
|
||||
## Commit Strategy
|
||||
|
||||
| After Task | Message | Files | Verification |
|
||||
|
||||
@@ -247,7 +247,7 @@ exports[`generateModelConfig single native provider uses OpenAI models when only
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
"writing": {
|
||||
"model": "openai/gpt-5.2",
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -314,7 +314,7 @@ exports[`generateModelConfig single native provider uses OpenAI models with isMa
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
"writing": {
|
||||
"model": "openai/gpt-5.2",
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -372,6 +372,7 @@ exports[`generateModelConfig single native provider uses Gemini models when only
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
@@ -432,6 +433,7 @@ exports[`generateModelConfig single native provider uses Gemini models with isMa
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
@@ -505,6 +507,7 @@ exports[`generateModelConfig all native providers uses preferred models from fal
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
@@ -579,6 +582,7 @@ exports[`generateModelConfig all native providers uses preferred models with isM
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
@@ -652,6 +656,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models when on
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "opencode/gemini-3-flash",
|
||||
@@ -726,6 +731,7 @@ exports[`generateModelConfig fallback providers uses OpenCode Zen models with is
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "opencode/gemini-3-flash",
|
||||
@@ -799,6 +805,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models when
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "github-copilot/gemini-3-flash-preview",
|
||||
@@ -873,6 +880,7 @@ exports[`generateModelConfig fallback providers uses GitHub Copilot models with
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "github-copilot/gemini-3-flash-preview",
|
||||
@@ -927,10 +935,10 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian whe
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "zai-coding-plan/glm-4.7",
|
||||
"model": "zai-coding-plan/glm-5",
|
||||
},
|
||||
"writing": {
|
||||
"model": "zai-coding-plan/glm-4.7",
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -982,10 +990,10 @@ exports[`generateModelConfig fallback providers uses ZAI model for librarian wit
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "zai-coding-plan/glm-4.7",
|
||||
"model": "zai-coding-plan/glm-5",
|
||||
},
|
||||
"writing": {
|
||||
"model": "zai-coding-plan/glm-4.7",
|
||||
"model": "opencode/glm-4.7-free",
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -1056,6 +1064,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + OpenCode Zen
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "opencode/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "opencode/gemini-3-flash",
|
||||
@@ -1129,6 +1138,7 @@ exports[`generateModelConfig mixed provider scenarios uses OpenAI + Copilot comb
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "github-copilot/gemini-3-flash-preview",
|
||||
@@ -1189,8 +1199,7 @@ exports[`generateModelConfig mixed provider scenarios uses Claude + ZAI combinat
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "anthropic/claude-opus-4-6",
|
||||
"variant": "max",
|
||||
"model": "zai-coding-plan/glm-5",
|
||||
},
|
||||
"writing": {
|
||||
"model": "anthropic/claude-sonnet-4-5",
|
||||
@@ -1256,6 +1265,7 @@ exports[`generateModelConfig mixed provider scenarios uses Gemini + Claude combi
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
@@ -1329,6 +1339,7 @@ exports[`generateModelConfig mixed provider scenarios uses all fallback provider
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "github-copilot/gemini-3-pro-preview",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "github-copilot/gemini-3-flash-preview",
|
||||
@@ -1402,6 +1413,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers togethe
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
@@ -1476,6 +1488,7 @@ exports[`generateModelConfig mixed provider scenarios uses all providers with is
|
||||
},
|
||||
"visual-engineering": {
|
||||
"model": "google/gemini-3-pro",
|
||||
"variant": "high",
|
||||
},
|
||||
"writing": {
|
||||
"model": "google/gemini-3-flash",
|
||||
|
||||
83
src/cli/cli-installer.test.ts
Normal file
83
src/cli/cli-installer.test.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { afterEach, beforeEach, describe, expect, it, mock, spyOn } from "bun:test"
|
||||
import * as configManager from "./config-manager"
|
||||
import { runCliInstaller } from "./cli-installer"
|
||||
import type { InstallArgs } from "./types"
|
||||
|
||||
describe("runCliInstaller", () => {
|
||||
const mockConsoleLog = mock(() => {})
|
||||
const mockConsoleError = mock(() => {})
|
||||
const originalConsoleLog = console.log
|
||||
const originalConsoleError = console.error
|
||||
|
||||
beforeEach(() => {
|
||||
console.log = mockConsoleLog
|
||||
console.error = mockConsoleError
|
||||
mockConsoleLog.mockClear()
|
||||
mockConsoleError.mockClear()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
console.log = originalConsoleLog
|
||||
console.error = originalConsoleError
|
||||
})
|
||||
|
||||
it("runs auth and provider setup steps when openai or copilot are enabled without gemini", async () => {
|
||||
//#given
|
||||
const addAuthPluginsSpy = spyOn(configManager, "addAuthPlugins").mockResolvedValue({
|
||||
success: true,
|
||||
configPath: "/tmp/opencode.jsonc",
|
||||
})
|
||||
const addProviderConfigSpy = spyOn(configManager, "addProviderConfig").mockReturnValue({
|
||||
success: true,
|
||||
configPath: "/tmp/opencode.jsonc",
|
||||
})
|
||||
const restoreSpies = [
|
||||
addAuthPluginsSpy,
|
||||
addProviderConfigSpy,
|
||||
spyOn(configManager, "detectCurrentConfig").mockReturnValue({
|
||||
isInstalled: false,
|
||||
hasClaude: false,
|
||||
isMax20: false,
|
||||
hasOpenAI: false,
|
||||
hasGemini: false,
|
||||
hasCopilot: false,
|
||||
hasOpencodeZen: false,
|
||||
hasZaiCodingPlan: false,
|
||||
hasKimiForCoding: false,
|
||||
}),
|
||||
spyOn(configManager, "isOpenCodeInstalled").mockResolvedValue(true),
|
||||
spyOn(configManager, "getOpenCodeVersion").mockResolvedValue("1.0.200"),
|
||||
spyOn(configManager, "addPluginToOpenCodeConfig").mockResolvedValue({
|
||||
success: true,
|
||||
configPath: "/tmp/opencode.jsonc",
|
||||
}),
|
||||
spyOn(configManager, "writeOmoConfig").mockReturnValue({
|
||||
success: true,
|
||||
configPath: "/tmp/oh-my-opencode.jsonc",
|
||||
}),
|
||||
]
|
||||
|
||||
const args: InstallArgs = {
|
||||
tui: false,
|
||||
claude: "no",
|
||||
openai: "yes",
|
||||
gemini: "no",
|
||||
copilot: "yes",
|
||||
opencodeZen: "no",
|
||||
zaiCodingPlan: "no",
|
||||
kimiForCoding: "no",
|
||||
}
|
||||
|
||||
//#when
|
||||
const result = await runCliInstaller(args, "3.4.0")
|
||||
|
||||
//#then
|
||||
expect(result).toBe(0)
|
||||
expect(addAuthPluginsSpy).toHaveBeenCalledTimes(1)
|
||||
expect(addProviderConfigSpy).toHaveBeenCalledTimes(1)
|
||||
|
||||
for (const spy of restoreSpies) {
|
||||
spy.mockRestore()
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -77,7 +77,9 @@ export async function runCliInstaller(args: InstallArgs, version: string): Promi
|
||||
`Plugin ${isUpdate ? "verified" : "added"} ${SYMBOLS.arrow} ${color.dim(pluginResult.configPath)}`,
|
||||
)
|
||||
|
||||
if (config.hasGemini) {
|
||||
const needsProviderSetup = config.hasGemini || config.hasOpenAI || config.hasCopilot
|
||||
|
||||
if (needsProviderSetup) {
|
||||
printStep(step++, totalSteps, "Adding auth plugins...")
|
||||
const authResult = await addAuthPlugins(config)
|
||||
if (!authResult.success) {
|
||||
|
||||
@@ -52,7 +52,7 @@ export function handleBackgroundEvent(args: {
|
||||
|
||||
const props = event.properties
|
||||
|
||||
if (event.type === "message.part.updated") {
|
||||
if (event.type === "message.part.updated" || event.type === "message.part.delta") {
|
||||
if (!props || !isRecord(props)) return
|
||||
const sessionID = getString(props, "sessionID")
|
||||
if (!sessionID) return
|
||||
|
||||
@@ -6,6 +6,7 @@ import type { BackgroundTask, ResumeInput } from "./types"
|
||||
import { MIN_IDLE_TIME_MS } from "./constants"
|
||||
import { BackgroundManager } from "./manager"
|
||||
import { ConcurrencyManager } from "./concurrency"
|
||||
import { initTaskToastManager, _resetTaskToastManagerForTesting } from "../task-toast-manager/manager"
|
||||
|
||||
|
||||
const TASK_TTL_MS = 30 * 60 * 1000
|
||||
@@ -190,6 +191,10 @@ function getPendingByParent(manager: BackgroundManager): Map<string, Set<string>
|
||||
return (manager as unknown as { pendingByParent: Map<string, Set<string>> }).pendingByParent
|
||||
}
|
||||
|
||||
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
|
||||
return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
|
||||
}
|
||||
|
||||
function getQueuesByKey(
|
||||
manager: BackgroundManager
|
||||
): Map<string, Array<{ task: BackgroundTask; input: import("./types").LaunchInput }>> {
|
||||
@@ -215,6 +220,23 @@ function stubNotifyParentSession(manager: BackgroundManager): void {
|
||||
;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {}
|
||||
}
|
||||
|
||||
function createToastRemoveTaskTracker(): { removeTaskCalls: string[]; resetToastManager: () => void } {
|
||||
_resetTaskToastManagerForTesting()
|
||||
const toastManager = initTaskToastManager({
|
||||
tui: { showToast: async () => {} },
|
||||
} as unknown as PluginInput["client"])
|
||||
const removeTaskCalls: string[] = []
|
||||
const originalRemoveTask = toastManager.removeTask.bind(toastManager)
|
||||
toastManager.removeTask = (taskId: string): void => {
|
||||
removeTaskCalls.push(taskId)
|
||||
originalRemoveTask(taskId)
|
||||
}
|
||||
return {
|
||||
removeTaskCalls,
|
||||
resetToastManager: _resetTaskToastManagerForTesting,
|
||||
}
|
||||
}
|
||||
|
||||
function getCleanupSignals(): Array<NodeJS.Signals | "beforeExit" | "exit"> {
|
||||
const signals: Array<NodeJS.Signals | "beforeExit" | "exit"> = ["SIGINT", "SIGTERM", "beforeExit", "exit"]
|
||||
if (process.platform === "win32") {
|
||||
@@ -894,7 +916,7 @@ describe("BackgroundManager.notifyParentSession - dynamic message lookup", () =>
|
||||
})
|
||||
|
||||
describe("BackgroundManager.notifyParentSession - aborted parent", () => {
|
||||
test("should skip notification when parent session is aborted", async () => {
|
||||
test("should fall back and still notify when parent session messages are aborted", async () => {
|
||||
//#given
|
||||
let promptCalled = false
|
||||
const promptMock = async () => {
|
||||
@@ -933,7 +955,7 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
|
||||
.notifyParentSession(task)
|
||||
|
||||
//#then
|
||||
expect(promptCalled).toBe(false)
|
||||
expect(promptCalled).toBe(true)
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
@@ -1770,6 +1792,32 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
|
||||
const pendingSet = pendingByParent.get(task.parentSessionID)
|
||||
expect(pendingSet?.has(task.id) ?? false).toBe(false)
|
||||
})
|
||||
|
||||
test("should remove task from toast manager when notification is skipped", async () => {
|
||||
//#given
|
||||
const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
|
||||
const manager = createBackgroundManager()
|
||||
const task = createMockTask({
|
||||
id: "task-cancel-skip-notification",
|
||||
sessionID: "session-cancel-skip-notification",
|
||||
parentSessionID: "parent-cancel-skip-notification",
|
||||
status: "running",
|
||||
})
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
//#when
|
||||
const cancelled = await manager.cancelTask(task.id, {
|
||||
source: "test",
|
||||
skipNotification: true,
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(cancelled).toBe(true)
|
||||
expect(removeTaskCalls).toContain(task.id)
|
||||
|
||||
manager.shutdown()
|
||||
resetToastManager()
|
||||
})
|
||||
})
|
||||
|
||||
describe("multiple keys process in parallel", () => {
|
||||
@@ -2730,6 +2778,43 @@ describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
|
||||
test("should remove tasks from toast manager when session is deleted", () => {
|
||||
//#given
|
||||
const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
|
||||
const manager = createBackgroundManager()
|
||||
const parentSessionID = "session-parent-toast"
|
||||
const childTask = createMockTask({
|
||||
id: "task-child-toast",
|
||||
sessionID: "session-child-toast",
|
||||
parentSessionID,
|
||||
status: "running",
|
||||
})
|
||||
const grandchildTask = createMockTask({
|
||||
id: "task-grandchild-toast",
|
||||
sessionID: "session-grandchild-toast",
|
||||
parentSessionID: "session-child-toast",
|
||||
status: "pending",
|
||||
startedAt: undefined,
|
||||
queuedAt: new Date(),
|
||||
})
|
||||
const taskMap = getTaskMap(manager)
|
||||
taskMap.set(childTask.id, childTask)
|
||||
taskMap.set(grandchildTask.id, grandchildTask)
|
||||
|
||||
//#when
|
||||
manager.handleEvent({
|
||||
type: "session.deleted",
|
||||
properties: { info: { id: parentSessionID } },
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(removeTaskCalls).toContain(childTask.id)
|
||||
expect(removeTaskCalls).toContain(grandchildTask.id)
|
||||
|
||||
manager.shutdown()
|
||||
resetToastManager()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.handleEvent - session.error", () => {
|
||||
@@ -2777,6 +2862,35 @@ describe("BackgroundManager.handleEvent - session.error", () => {
|
||||
manager.shutdown()
|
||||
})
|
||||
|
||||
test("removes errored task from toast manager", () => {
|
||||
//#given
|
||||
const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
|
||||
const manager = createBackgroundManager()
|
||||
const sessionID = "ses_error_toast"
|
||||
const task = createMockTask({
|
||||
id: "task-session-error-toast",
|
||||
sessionID,
|
||||
parentSessionID: "parent-session",
|
||||
status: "running",
|
||||
})
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
//#when
|
||||
manager.handleEvent({
|
||||
type: "session.error",
|
||||
properties: {
|
||||
sessionID,
|
||||
error: { name: "UnknownError", message: "boom" },
|
||||
},
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(removeTaskCalls).toContain(task.id)
|
||||
|
||||
manager.shutdown()
|
||||
resetToastManager()
|
||||
})
|
||||
|
||||
test("ignores session.error for non-running tasks", () => {
|
||||
//#given
|
||||
const manager = createBackgroundManager()
|
||||
@@ -2922,13 +3036,32 @@ describe("BackgroundManager.pruneStaleTasksAndNotifications - removes pruned tas
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
|
||||
test("removes stale task from toast manager", () => {
|
||||
//#given
|
||||
const { removeTaskCalls, resetToastManager } = createToastRemoveTaskTracker()
|
||||
const manager = createBackgroundManager()
|
||||
const staleTask = createMockTask({
|
||||
id: "task-stale-toast",
|
||||
sessionID: "session-stale-toast",
|
||||
parentSessionID: "parent-session",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 31 * 60 * 1000),
|
||||
})
|
||||
getTaskMap(manager).set(staleTask.id, staleTask)
|
||||
|
||||
//#when
|
||||
pruneStaleTasksAndNotificationsForTest(manager)
|
||||
|
||||
//#then
|
||||
expect(removeTaskCalls).toContain(staleTask.id)
|
||||
|
||||
manager.shutdown()
|
||||
resetToastManager()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
|
||||
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
|
||||
return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
|
||||
}
|
||||
|
||||
function setCompletionTimer(manager: BackgroundManager, taskId: string): void {
|
||||
const completionTimers = getCompletionTimers(manager)
|
||||
const timer = setTimeout(() => {
|
||||
@@ -3413,4 +3546,134 @@ describe("BackgroundManager.handleEvent - non-tool event lastUpdate", () => {
|
||||
//#then - task should still be running (text event refreshed lastUpdate)
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
test("should refresh lastUpdate on message.part.delta events (OpenCode >=1.2.0)", async () => {
|
||||
//#given - a running task with stale lastUpdate
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
const task: BackgroundTask = {
|
||||
id: "task-delta-1",
|
||||
sessionID: "session-delta-1",
|
||||
parentSessionID: "parent-1",
|
||||
parentMessageID: "msg-1",
|
||||
description: "Reasoning task with delta events",
|
||||
prompt: "Extended thinking",
|
||||
agent: "oracle",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - 600_000),
|
||||
progress: {
|
||||
toolCalls: 0,
|
||||
lastUpdate: new Date(Date.now() - 300_000),
|
||||
},
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
//#when - a message.part.delta event arrives (reasoning-delta or text-delta in OpenCode >=1.2.0)
|
||||
manager.handleEvent({
|
||||
type: "message.part.delta",
|
||||
properties: { sessionID: "session-delta-1", field: "text", delta: "thinking..." },
|
||||
})
|
||||
await manager["checkAndInterruptStaleTasks"]()
|
||||
|
||||
//#then - task should still be running (delta event refreshed lastUpdate)
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager regression fixes - resume and aborted notification", () => {
|
||||
test("should keep resumed task in memory after previous completion timer deadline", async () => {
|
||||
//#given
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
|
||||
const task: BackgroundTask = {
|
||||
id: "task-resume-timer-regression",
|
||||
sessionID: "session-resume-timer-regression",
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "msg-1",
|
||||
description: "resume timer regression",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "completed",
|
||||
startedAt: new Date(),
|
||||
completedAt: new Date(),
|
||||
concurrencyGroup: "explore",
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
const completionTimers = getCompletionTimers(manager)
|
||||
const timer = setTimeout(() => {
|
||||
completionTimers.delete(task.id)
|
||||
getTaskMap(manager).delete(task.id)
|
||||
}, 25)
|
||||
completionTimers.set(task.id, timer)
|
||||
|
||||
//#when
|
||||
await manager.resume({
|
||||
sessionId: "session-resume-timer-regression",
|
||||
prompt: "resume task",
|
||||
parentSessionID: "parent-session-2",
|
||||
parentMessageID: "msg-2",
|
||||
})
|
||||
await new Promise((resolve) => setTimeout(resolve, 60))
|
||||
|
||||
//#then
|
||||
expect(getTaskMap(manager).has(task.id)).toBe(true)
|
||||
expect(completionTimers.has(task.id)).toBe(false)
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
|
||||
test("should start cleanup timer even when promptAsync aborts", async () => {
|
||||
//#given
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => {
|
||||
const error = new Error("User aborted")
|
||||
error.name = "MessageAbortedError"
|
||||
throw error
|
||||
},
|
||||
abort: async () => ({}),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
const task: BackgroundTask = {
|
||||
id: "task-aborted-cleanup-regression",
|
||||
sessionID: "session-aborted-cleanup-regression",
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "msg-1",
|
||||
description: "aborted prompt cleanup regression",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "completed",
|
||||
startedAt: new Date(),
|
||||
completedAt: new Date(),
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
|
||||
|
||||
//#when
|
||||
await (manager as unknown as { notifyParentSession: (task: BackgroundTask) => Promise<void> }).notifyParentSession(task)
|
||||
|
||||
//#then
|
||||
expect(getCompletionTimers(manager).has(task.id)).toBe(true)
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
})
|
||||
|
||||
@@ -528,6 +528,12 @@ export class BackgroundManager {
|
||||
return existingTask
|
||||
}
|
||||
|
||||
const completionTimer = this.completionTimers.get(existingTask.id)
|
||||
if (completionTimer) {
|
||||
clearTimeout(completionTimer)
|
||||
this.completionTimers.delete(existingTask.id)
|
||||
}
|
||||
|
||||
// Re-acquire concurrency using the persisted concurrency group
|
||||
const concurrencyKey = existingTask.concurrencyGroup ?? existingTask.agent
|
||||
await this.concurrencyManager.acquire(concurrencyKey)
|
||||
@@ -660,7 +666,7 @@ export class BackgroundManager {
|
||||
handleEvent(event: Event): void {
|
||||
const props = event.properties
|
||||
|
||||
if (event.type === "message.part.updated") {
|
||||
if (event.type === "message.part.updated" || event.type === "message.part.delta") {
|
||||
if (!props || typeof props !== "object" || !("sessionID" in props)) return
|
||||
const partInfo = props as unknown as MessagePartInfo
|
||||
const sessionID = partInfo?.sessionID
|
||||
@@ -783,6 +789,10 @@ export class BackgroundManager {
|
||||
this.cleanupPendingByParent(task)
|
||||
this.tasks.delete(task.id)
|
||||
this.clearNotificationsForTask(task.id)
|
||||
const toastManager = getTaskToastManager()
|
||||
if (toastManager) {
|
||||
toastManager.removeTask(task.id)
|
||||
}
|
||||
if (task.sessionID) {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
}
|
||||
@@ -830,6 +840,10 @@ export class BackgroundManager {
|
||||
this.cleanupPendingByParent(task)
|
||||
this.tasks.delete(task.id)
|
||||
this.clearNotificationsForTask(task.id)
|
||||
const toastManager = getTaskToastManager()
|
||||
if (toastManager) {
|
||||
toastManager.removeTask(task.id)
|
||||
}
|
||||
if (task.sessionID) {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
}
|
||||
@@ -1000,6 +1014,10 @@ export class BackgroundManager {
|
||||
}
|
||||
|
||||
if (options?.skipNotification) {
|
||||
const toastManager = getTaskToastManager()
|
||||
if (toastManager) {
|
||||
toastManager.removeTask(task.id)
|
||||
}
|
||||
log(`[background-agent] Task cancelled via ${source} (notification skipped):`, task.id)
|
||||
return true
|
||||
}
|
||||
@@ -1239,11 +1257,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
}
|
||||
} catch (error) {
|
||||
if (this.isAbortedSessionError(error)) {
|
||||
log("[background-agent] Parent session aborted, skipping notification:", {
|
||||
log("[background-agent] Parent session aborted while loading messages; using messageDir fallback:", {
|
||||
taskId: task.id,
|
||||
parentSessionID: task.parentSessionID,
|
||||
})
|
||||
return
|
||||
}
|
||||
const messageDir = getMessageDir(task.parentSessionID)
|
||||
const currentMessage = messageDir ? findNearestMessageWithFields(messageDir) : null
|
||||
@@ -1277,13 +1294,13 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
})
|
||||
} catch (error) {
|
||||
if (this.isAbortedSessionError(error)) {
|
||||
log("[background-agent] Parent session aborted, skipping notification:", {
|
||||
log("[background-agent] Parent session aborted while sending notification; continuing cleanup:", {
|
||||
taskId: task.id,
|
||||
parentSessionID: task.parentSessionID,
|
||||
})
|
||||
return
|
||||
} else {
|
||||
log("[background-agent] Failed to send notification:", error)
|
||||
}
|
||||
log("[background-agent] Failed to send notification:", error)
|
||||
}
|
||||
|
||||
if (allComplete) {
|
||||
@@ -1413,6 +1430,10 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
}
|
||||
}
|
||||
this.clearNotificationsForTask(taskId)
|
||||
const toastManager = getTaskToastManager()
|
||||
if (toastManager) {
|
||||
toastManager.removeTask(taskId)
|
||||
}
|
||||
this.tasks.delete(taskId)
|
||||
if (task.sessionID) {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
@@ -1452,7 +1473,8 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
const sessionID = task.sessionID
|
||||
if (!startedAt || !sessionID) continue
|
||||
|
||||
const sessionIsRunning = allStatuses[sessionID]?.type === "running"
|
||||
const sessionStatus = allStatuses[sessionID]?.type
|
||||
const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
|
||||
const runtime = now - startedAt.getTime()
|
||||
|
||||
if (!task.progress?.lastUpdate) {
|
||||
|
||||
@@ -146,14 +146,59 @@ describe("checkAndInterruptStaleTasks", () => {
|
||||
},
|
||||
})
|
||||
|
||||
//#when — session status is "running"
|
||||
//#when — session status is "busy" (OpenCode's actual status for active LLM processing)
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
config: { staleTimeoutMs: 180_000 },
|
||||
concurrencyManager: mockConcurrencyManager as never,
|
||||
notifyParentSession: mockNotify,
|
||||
sessionStatuses: { "ses-1": { type: "running" } },
|
||||
sessionStatuses: { "ses-1": { type: "busy" } },
|
||||
})
|
||||
|
||||
//#then — task should survive because session is actively busy
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
it("should NOT interrupt busy session task even with very old lastUpdate", async () => {
|
||||
//#given — lastUpdate is 15min old, but session is still busy
|
||||
const task = createRunningTask({
|
||||
startedAt: new Date(Date.now() - 900_000),
|
||||
progress: {
|
||||
toolCalls: 2,
|
||||
lastUpdate: new Date(Date.now() - 900_000),
|
||||
},
|
||||
})
|
||||
|
||||
//#when — session busy, lastUpdate far exceeds any timeout
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
config: { staleTimeoutMs: 180_000, messageStalenessTimeoutMs: 600_000 },
|
||||
concurrencyManager: mockConcurrencyManager as never,
|
||||
notifyParentSession: mockNotify,
|
||||
sessionStatuses: { "ses-1": { type: "busy" } },
|
||||
})
|
||||
|
||||
//#then — busy sessions are NEVER stale-killed (babysitter + TTL prune handle these)
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
it("should NOT interrupt busy session even with no progress (undefined lastUpdate)", async () => {
|
||||
//#given — task has no progress at all, but session is busy
|
||||
const task = createRunningTask({
|
||||
startedAt: new Date(Date.now() - 15 * 60 * 1000),
|
||||
progress: undefined,
|
||||
})
|
||||
|
||||
//#when — session is busy
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
config: { messageStalenessTimeoutMs: 600_000 },
|
||||
concurrencyManager: mockConcurrencyManager as never,
|
||||
notifyParentSession: mockNotify,
|
||||
sessionStatuses: { "ses-1": { type: "busy" } },
|
||||
})
|
||||
|
||||
//#then — task should survive because session is actively running
|
||||
@@ -255,6 +300,75 @@ describe("checkAndInterruptStaleTasks", () => {
|
||||
expect(task.error).toContain("Stale timeout")
|
||||
})
|
||||
|
||||
it("should NOT interrupt task when session is busy (OpenCode status), even if lastUpdate exceeds stale timeout", async () => {
|
||||
//#given — lastUpdate is 5min old but session is "busy" (OpenCode's actual status for active sessions)
|
||||
const task = createRunningTask({
|
||||
startedAt: new Date(Date.now() - 300_000),
|
||||
progress: {
|
||||
toolCalls: 2,
|
||||
lastUpdate: new Date(Date.now() - 300_000),
|
||||
},
|
||||
})
|
||||
|
||||
//#when — session status is "busy" (not "running" — OpenCode uses "busy" for active LLM processing)
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
config: { staleTimeoutMs: 180_000 },
|
||||
concurrencyManager: mockConcurrencyManager as never,
|
||||
notifyParentSession: mockNotify,
|
||||
sessionStatuses: { "ses-1": { type: "busy" } },
|
||||
})
|
||||
|
||||
//#then — "busy" sessions must be protected from stale-kill
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
it("should NOT interrupt task when session is in retry state", async () => {
|
||||
//#given — lastUpdate is 5min old but session is retrying
|
||||
const task = createRunningTask({
|
||||
startedAt: new Date(Date.now() - 300_000),
|
||||
progress: {
|
||||
toolCalls: 1,
|
||||
lastUpdate: new Date(Date.now() - 300_000),
|
||||
},
|
||||
})
|
||||
|
||||
//#when — session status is "retry" (OpenCode retries on transient API errors)
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
config: { staleTimeoutMs: 180_000 },
|
||||
concurrencyManager: mockConcurrencyManager as never,
|
||||
notifyParentSession: mockNotify,
|
||||
sessionStatuses: { "ses-1": { type: "retry" } },
|
||||
})
|
||||
|
||||
//#then — retry sessions must be protected from stale-kill
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
it("should NOT interrupt busy session even with no progress (undefined lastUpdate)", async () => {
|
||||
//#given — no progress at all, session is "busy" (thinking model with no streamed tokens yet)
|
||||
const task = createRunningTask({
|
||||
startedAt: new Date(Date.now() - 15 * 60 * 1000),
|
||||
progress: undefined,
|
||||
})
|
||||
|
||||
//#when — session is busy
|
||||
await checkAndInterruptStaleTasks({
|
||||
tasks: [task],
|
||||
client: mockClient as never,
|
||||
config: { messageStalenessTimeoutMs: 600_000 },
|
||||
concurrencyManager: mockConcurrencyManager as never,
|
||||
notifyParentSession: mockNotify,
|
||||
sessionStatuses: { "ses-1": { type: "busy" } },
|
||||
})
|
||||
|
||||
//#then — busy sessions with no progress must survive
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
it("should release concurrency key when interrupting a never-updated task", async () => {
|
||||
//#given
|
||||
const releaseMock = mock(() => {})
|
||||
|
||||
@@ -80,7 +80,8 @@ export async function checkAndInterruptStaleTasks(args: {
|
||||
const sessionID = task.sessionID
|
||||
if (!startedAt || !sessionID) continue
|
||||
|
||||
const sessionIsRunning = sessionStatuses?.[sessionID]?.type === "running"
|
||||
const sessionStatus = sessionStatuses?.[sessionID]?.type
|
||||
const sessionIsRunning = sessionStatus !== undefined && sessionStatus !== "idle"
|
||||
const runtime = now - startedAt.getTime()
|
||||
|
||||
if (!task.progress?.lastUpdate) {
|
||||
|
||||
@@ -0,0 +1,105 @@
|
||||
import { beforeEach, describe, expect, mock, test } from "bun:test"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
|
||||
const executeCompactMock = mock(async () => {})
|
||||
const getLastAssistantMock = mock(async () => ({
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-5",
|
||||
}))
|
||||
const parseAnthropicTokenLimitErrorMock = mock(() => ({
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-5",
|
||||
}))
|
||||
|
||||
mock.module("./executor", () => ({
|
||||
executeCompact: executeCompactMock,
|
||||
getLastAssistant: getLastAssistantMock,
|
||||
}))
|
||||
|
||||
mock.module("./parser", () => ({
|
||||
parseAnthropicTokenLimitError: parseAnthropicTokenLimitErrorMock,
|
||||
}))
|
||||
|
||||
mock.module("../../shared/logger", () => ({
|
||||
log: () => {},
|
||||
}))
|
||||
|
||||
function createMockContext(): PluginInput {
|
||||
return {
|
||||
client: {
|
||||
session: {
|
||||
messages: mock(() => Promise.resolve({ data: [] })),
|
||||
},
|
||||
tui: {
|
||||
showToast: mock(() => Promise.resolve()),
|
||||
},
|
||||
},
|
||||
directory: "/tmp",
|
||||
} as PluginInput
|
||||
}
|
||||
|
||||
function setupDelayedTimeoutMocks(): {
|
||||
restore: () => void
|
||||
getClearTimeoutCalls: () => Array<ReturnType<typeof setTimeout>>
|
||||
} {
|
||||
const originalSetTimeout = globalThis.setTimeout
|
||||
const originalClearTimeout = globalThis.clearTimeout
|
||||
const clearTimeoutCalls: Array<ReturnType<typeof setTimeout>> = []
|
||||
let timeoutCounter = 0
|
||||
|
||||
globalThis.setTimeout = ((_: () => void, _delay?: number) => {
|
||||
timeoutCounter += 1
|
||||
return timeoutCounter as ReturnType<typeof setTimeout>
|
||||
}) as typeof setTimeout
|
||||
|
||||
globalThis.clearTimeout = ((timeoutID: ReturnType<typeof setTimeout>) => {
|
||||
clearTimeoutCalls.push(timeoutID)
|
||||
}) as typeof clearTimeout
|
||||
|
||||
return {
|
||||
restore: () => {
|
||||
globalThis.setTimeout = originalSetTimeout
|
||||
globalThis.clearTimeout = originalClearTimeout
|
||||
},
|
||||
getClearTimeoutCalls: () => clearTimeoutCalls,
|
||||
}
|
||||
}
|
||||
|
||||
describe("createAnthropicContextWindowLimitRecoveryHook", () => {
|
||||
beforeEach(() => {
|
||||
executeCompactMock.mockClear()
|
||||
getLastAssistantMock.mockClear()
|
||||
parseAnthropicTokenLimitErrorMock.mockClear()
|
||||
})
|
||||
|
||||
test("cancels pending timer when session.idle handles compaction first", async () => {
|
||||
//#given
|
||||
const { restore, getClearTimeoutCalls } = setupDelayedTimeoutMocks()
|
||||
const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
|
||||
const hook = createAnthropicContextWindowLimitRecoveryHook(createMockContext())
|
||||
|
||||
try {
|
||||
//#when
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: { sessionID: "session-race", error: "prompt is too long" },
|
||||
},
|
||||
})
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: "session-race" },
|
||||
},
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(getClearTimeoutCalls()).toEqual([1 as ReturnType<typeof setTimeout>])
|
||||
expect(executeCompactMock).toHaveBeenCalledTimes(1)
|
||||
expect(executeCompactMock.mock.calls[0]?.[0]).toBe("session-race")
|
||||
} finally {
|
||||
restore()
|
||||
}
|
||||
})
|
||||
})
|
||||
@@ -28,6 +28,7 @@ export function createAnthropicContextWindowLimitRecoveryHook(
|
||||
) {
|
||||
const autoCompactState = createRecoveryState()
|
||||
const experimental = options?.experimental
|
||||
const pendingCompactionTimeoutBySession = new Map<string, ReturnType<typeof setTimeout>>()
|
||||
|
||||
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
|
||||
const props = event.properties as Record<string, unknown> | undefined
|
||||
@@ -35,6 +36,12 @@ export function createAnthropicContextWindowLimitRecoveryHook(
|
||||
if (event.type === "session.deleted") {
|
||||
const sessionInfo = props?.info as { id?: string } | undefined
|
||||
if (sessionInfo?.id) {
|
||||
const timeoutID = pendingCompactionTimeoutBySession.get(sessionInfo.id)
|
||||
if (timeoutID !== undefined) {
|
||||
clearTimeout(timeoutID)
|
||||
pendingCompactionTimeoutBySession.delete(sessionInfo.id)
|
||||
}
|
||||
|
||||
autoCompactState.pendingCompact.delete(sessionInfo.id)
|
||||
autoCompactState.errorDataBySession.delete(sessionInfo.id)
|
||||
autoCompactState.retryStateBySession.delete(sessionInfo.id)
|
||||
@@ -76,7 +83,8 @@ export function createAnthropicContextWindowLimitRecoveryHook(
|
||||
})
|
||||
.catch(() => {})
|
||||
|
||||
setTimeout(() => {
|
||||
const timeoutID = setTimeout(() => {
|
||||
pendingCompactionTimeoutBySession.delete(sessionID)
|
||||
executeCompact(
|
||||
sessionID,
|
||||
{ providerID, modelID },
|
||||
@@ -86,6 +94,8 @@ export function createAnthropicContextWindowLimitRecoveryHook(
|
||||
experimental,
|
||||
)
|
||||
}, 300)
|
||||
|
||||
pendingCompactionTimeoutBySession.set(sessionID, timeoutID)
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -114,6 +124,12 @@ export function createAnthropicContextWindowLimitRecoveryHook(
|
||||
|
||||
if (!autoCompactState.pendingCompact.has(sessionID)) return
|
||||
|
||||
const timeoutID = pendingCompactionTimeoutBySession.get(sessionID)
|
||||
if (timeoutID !== undefined) {
|
||||
clearTimeout(timeoutID)
|
||||
pendingCompactionTimeoutBySession.delete(sessionID)
|
||||
}
|
||||
|
||||
const errorData = autoCompactState.errorDataBySession.get(sessionID)
|
||||
const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
|
||||
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import { describe, it, expect, mock, beforeEach } from "bun:test"
|
||||
import { createPreemptiveCompactionHook } from "./preemptive-compaction"
|
||||
|
||||
const logMock = mock(() => {})
|
||||
|
||||
mock.module("../shared/logger", () => ({
|
||||
log: logMock,
|
||||
}))
|
||||
|
||||
const { createPreemptiveCompactionHook } = await import("./preemptive-compaction")
|
||||
|
||||
function createMockCtx() {
|
||||
return {
|
||||
@@ -21,6 +28,7 @@ describe("preemptive-compaction", () => {
|
||||
|
||||
beforeEach(() => {
|
||||
ctx = createMockCtx()
|
||||
logMock.mockClear()
|
||||
})
|
||||
|
||||
// #given event caches token info from message.updated
|
||||
@@ -152,4 +160,45 @@ describe("preemptive-compaction", () => {
|
||||
|
||||
expect(ctx.client.session.summarize).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("should log summarize errors instead of swallowing them", async () => {
|
||||
//#given
|
||||
const hook = createPreemptiveCompactionHook(ctx as never)
|
||||
const sessionID = "ses_log_error"
|
||||
const summarizeError = new Error("summarize failed")
|
||||
ctx.client.session.summarize.mockRejectedValueOnce(summarizeError)
|
||||
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "message.updated",
|
||||
properties: {
|
||||
info: {
|
||||
role: "assistant",
|
||||
sessionID,
|
||||
providerID: "anthropic",
|
||||
modelID: "claude-sonnet-4-5",
|
||||
finish: true,
|
||||
tokens: {
|
||||
input: 170000,
|
||||
output: 0,
|
||||
reasoning: 0,
|
||||
cache: { read: 10000, write: 0 },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
//#when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "bash", sessionID, callID: "call_log" },
|
||||
{ title: "", output: "test", metadata: null }
|
||||
)
|
||||
|
||||
//#then
|
||||
expect(logMock).toHaveBeenCalledWith("[preemptive-compaction] Compaction failed", {
|
||||
sessionID,
|
||||
error: String(summarizeError),
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import { log } from "../shared/logger"
|
||||
|
||||
const DEFAULT_ACTUAL_LIMIT = 200_000
|
||||
|
||||
const ANTHROPIC_ACTUAL_LIMIT =
|
||||
@@ -76,8 +78,8 @@ export function createPreemptiveCompactionHook(ctx: PluginInput) {
|
||||
})
|
||||
|
||||
compactedSessions.add(sessionID)
|
||||
} catch {
|
||||
// best-effort; do not disrupt tool execution
|
||||
} catch (error) {
|
||||
log("[preemptive-compaction] Compaction failed", { sessionID, error: String(error) })
|
||||
} finally {
|
||||
compactionInProgress.delete(sessionID)
|
||||
}
|
||||
|
||||
@@ -18,3 +18,5 @@ export const COUNTDOWN_GRACE_PERIOD_MS = 500
|
||||
|
||||
export const ABORT_WINDOW_MS = 3000
|
||||
export const CONTINUATION_COOLDOWN_MS = 30_000
|
||||
export const MAX_CONSECUTIVE_FAILURES = 5
|
||||
export const FAILURE_RESET_WINDOW_MS = 5 * 60 * 1000
|
||||
|
||||
@@ -141,11 +141,14 @@ ${todoList}`
|
||||
if (injectionState) {
|
||||
injectionState.inFlight = false
|
||||
injectionState.lastInjectedAt = Date.now()
|
||||
injectionState.consecutiveFailures = 0
|
||||
}
|
||||
} catch (error) {
|
||||
log(`[${HOOK_NAME}] Injection failed`, { sessionID, error: String(error) })
|
||||
if (injectionState) {
|
||||
injectionState.inFlight = false
|
||||
injectionState.lastInjectedAt = Date.now()
|
||||
injectionState.consecutiveFailures = (injectionState.consecutiveFailures ?? 0) + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,9 @@ import {
|
||||
ABORT_WINDOW_MS,
|
||||
CONTINUATION_COOLDOWN_MS,
|
||||
DEFAULT_SKIP_AGENTS,
|
||||
FAILURE_RESET_WINDOW_MS,
|
||||
HOOK_NAME,
|
||||
MAX_CONSECUTIVE_FAILURES,
|
||||
} from "./constants"
|
||||
import { isLastAssistantMessageAborted } from "./abort-detection"
|
||||
import { getIncompleteCount } from "./todo"
|
||||
@@ -99,8 +101,35 @@ export async function handleSessionIdle(args: {
|
||||
return
|
||||
}
|
||||
|
||||
if (state.lastInjectedAt && Date.now() - state.lastInjectedAt < CONTINUATION_COOLDOWN_MS) {
|
||||
log(`[${HOOK_NAME}] Skipped: cooldown active`, { sessionID })
|
||||
if (
|
||||
state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES
|
||||
&& state.lastInjectedAt
|
||||
&& Date.now() - state.lastInjectedAt >= FAILURE_RESET_WINDOW_MS
|
||||
) {
|
||||
state.consecutiveFailures = 0
|
||||
log(`[${HOOK_NAME}] Reset consecutive failures after recovery window`, {
|
||||
sessionID,
|
||||
failureResetWindowMs: FAILURE_RESET_WINDOW_MS,
|
||||
})
|
||||
}
|
||||
|
||||
if (state.consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
|
||||
log(`[${HOOK_NAME}] Skipped: max consecutive failures reached`, {
|
||||
sessionID,
|
||||
consecutiveFailures: state.consecutiveFailures,
|
||||
maxConsecutiveFailures: MAX_CONSECUTIVE_FAILURES,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
const effectiveCooldown =
|
||||
CONTINUATION_COOLDOWN_MS * Math.pow(2, Math.min(state.consecutiveFailures, 5))
|
||||
if (state.lastInjectedAt && Date.now() - state.lastInjectedAt < effectiveCooldown) {
|
||||
log(`[${HOOK_NAME}] Skipped: cooldown active`, {
|
||||
sessionID,
|
||||
effectiveCooldown,
|
||||
consecutiveFailures: state.consecutiveFailures,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +45,9 @@ export function createSessionStateStore(): SessionStateStore {
|
||||
return existing.state
|
||||
}
|
||||
|
||||
const state: SessionState = {}
|
||||
const state: SessionState = {
|
||||
consecutiveFailures: 0,
|
||||
}
|
||||
sessions.set(sessionID, { state, lastAccessedAt: Date.now() })
|
||||
return state
|
||||
}
|
||||
|
||||
@@ -4,7 +4,11 @@ import { afterEach, beforeEach, describe, expect, test } from "bun:test"
|
||||
import type { BackgroundManager } from "../../features/background-agent"
|
||||
import { setMainSession, subagentSessions, _resetForTesting } from "../../features/claude-code-session-state"
|
||||
import { createTodoContinuationEnforcer } from "."
|
||||
import { CONTINUATION_COOLDOWN_MS } from "./constants"
|
||||
import {
|
||||
CONTINUATION_COOLDOWN_MS,
|
||||
FAILURE_RESET_WINDOW_MS,
|
||||
MAX_CONSECUTIVE_FAILURES,
|
||||
} from "./constants"
|
||||
|
||||
type TimerCallback = (...args: any[]) => void
|
||||
|
||||
@@ -164,6 +168,15 @@ describe("todo-continuation-enforcer", () => {
|
||||
}
|
||||
}
|
||||
|
||||
interface PromptRequestOptions {
|
||||
path: { id: string }
|
||||
body: {
|
||||
agent?: string
|
||||
model?: { providerID?: string; modelID?: string }
|
||||
parts: Array<{ text: string }>
|
||||
}
|
||||
}
|
||||
|
||||
let mockMessages: MockMessage[] = []
|
||||
|
||||
function createMockPluginInput() {
|
||||
@@ -551,6 +564,164 @@ describe("todo-continuation-enforcer", () => {
|
||||
expect(promptCalls).toHaveLength(2)
|
||||
}, { timeout: 15000 })
|
||||
|
||||
test("should apply cooldown even after injection failure", async () => {
|
||||
//#given
|
||||
const sessionID = "main-failure-cooldown"
|
||||
setMainSession(sessionID)
|
||||
const mockInput = createMockPluginInput()
|
||||
mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
|
||||
promptCalls.push({
|
||||
sessionID: opts.path.id,
|
||||
agent: opts.body.agent,
|
||||
model: opts.body.model,
|
||||
text: opts.body.parts[0].text,
|
||||
})
|
||||
throw new Error("simulated auth failure")
|
||||
}
|
||||
const hook = createTodoContinuationEnforcer(mockInput, {})
|
||||
|
||||
//#when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
|
||||
//#then
|
||||
expect(promptCalls).toHaveLength(1)
|
||||
})
|
||||
|
||||
test("should stop retries after max consecutive failures", async () => {
|
||||
//#given
|
||||
const sessionID = "main-max-consecutive-failures"
|
||||
setMainSession(sessionID)
|
||||
const mockInput = createMockPluginInput()
|
||||
mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
|
||||
promptCalls.push({
|
||||
sessionID: opts.path.id,
|
||||
agent: opts.body.agent,
|
||||
model: opts.body.model,
|
||||
text: opts.body.parts[0].text,
|
||||
})
|
||||
throw new Error("simulated auth failure")
|
||||
}
|
||||
const hook = createTodoContinuationEnforcer(mockInput, {})
|
||||
|
||||
//#when
|
||||
for (let index = 0; index < MAX_CONSECUTIVE_FAILURES; index++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
if (index < MAX_CONSECUTIVE_FAILURES - 1) {
|
||||
await fakeTimers.advanceClockBy(1_000_000)
|
||||
}
|
||||
}
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
|
||||
//#then
|
||||
expect(promptCalls).toHaveLength(MAX_CONSECUTIVE_FAILURES)
|
||||
}, { timeout: 30000 })
|
||||
|
||||
test("should resume retries after reset window when max failures reached", async () => {
|
||||
//#given
|
||||
const sessionID = "main-recovery-after-max-failures"
|
||||
setMainSession(sessionID)
|
||||
const mockInput = createMockPluginInput()
|
||||
mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
|
||||
promptCalls.push({
|
||||
sessionID: opts.path.id,
|
||||
agent: opts.body.agent,
|
||||
model: opts.body.model,
|
||||
text: opts.body.parts[0].text,
|
||||
})
|
||||
throw new Error("simulated auth failure")
|
||||
}
|
||||
const hook = createTodoContinuationEnforcer(mockInput, {})
|
||||
|
||||
//#when
|
||||
for (let index = 0; index < MAX_CONSECUTIVE_FAILURES; index++) {
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
if (index < MAX_CONSECUTIVE_FAILURES - 1) {
|
||||
await fakeTimers.advanceClockBy(1_000_000)
|
||||
}
|
||||
}
|
||||
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
|
||||
await fakeTimers.advanceClockBy(FAILURE_RESET_WINDOW_MS)
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
|
||||
//#then
|
||||
expect(promptCalls).toHaveLength(MAX_CONSECUTIVE_FAILURES + 1)
|
||||
}, { timeout: 30000 })
|
||||
|
||||
test("should increase cooldown exponentially after consecutive failures", async () => {
|
||||
//#given
|
||||
const sessionID = "main-exponential-backoff"
|
||||
setMainSession(sessionID)
|
||||
const mockInput = createMockPluginInput()
|
||||
mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
|
||||
promptCalls.push({
|
||||
sessionID: opts.path.id,
|
||||
agent: opts.body.agent,
|
||||
model: opts.body.model,
|
||||
text: opts.body.parts[0].text,
|
||||
})
|
||||
throw new Error("simulated auth failure")
|
||||
}
|
||||
const hook = createTodoContinuationEnforcer(mockInput, {})
|
||||
|
||||
//#when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS)
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS)
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
|
||||
//#then
|
||||
expect(promptCalls).toHaveLength(2)
|
||||
}, { timeout: 30000 })
|
||||
|
||||
test("should reset consecutive failure count after successful injection", async () => {
|
||||
//#given
|
||||
const sessionID = "main-reset-consecutive-failures"
|
||||
setMainSession(sessionID)
|
||||
let shouldFail = true
|
||||
const mockInput = createMockPluginInput()
|
||||
mockInput.client.session.promptAsync = async (opts: PromptRequestOptions) => {
|
||||
promptCalls.push({
|
||||
sessionID: opts.path.id,
|
||||
agent: opts.body.agent,
|
||||
model: opts.body.model,
|
||||
text: opts.body.parts[0].text,
|
||||
})
|
||||
if (shouldFail) {
|
||||
shouldFail = false
|
||||
throw new Error("simulated auth failure")
|
||||
}
|
||||
return {}
|
||||
}
|
||||
const hook = createTodoContinuationEnforcer(mockInput, {})
|
||||
|
||||
//#when
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS * 2)
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
await fakeTimers.advanceClockBy(CONTINUATION_COOLDOWN_MS)
|
||||
await hook.handler({ event: { type: "session.idle", properties: { sessionID } } })
|
||||
await fakeTimers.advanceBy(2500, true)
|
||||
|
||||
//#then
|
||||
expect(promptCalls).toHaveLength(3)
|
||||
}, { timeout: 30000 })
|
||||
|
||||
test("should keep injecting even when todos remain unchanged across cycles", async () => {
|
||||
//#given
|
||||
const sessionID = "main-no-stagnation-cap"
|
||||
|
||||
@@ -29,6 +29,7 @@ export interface SessionState {
|
||||
abortDetectedAt?: number
|
||||
lastInjectedAt?: number
|
||||
inFlight?: boolean
|
||||
consecutiveFailures: number
|
||||
}
|
||||
|
||||
export interface MessageInfo {
|
||||
|
||||
118
src/plugin/chat-message.test.ts
Normal file
118
src/plugin/chat-message.test.ts
Normal file
@@ -0,0 +1,118 @@
|
||||
import { describe, test, expect } from "bun:test"
|
||||
|
||||
import { createChatMessageHandler } from "./chat-message"
|
||||
|
||||
type ChatMessagePart = { type: string; text?: string; [key: string]: unknown }
|
||||
type ChatMessageHandlerOutput = { message: Record<string, unknown>; parts: ChatMessagePart[] }
|
||||
|
||||
function createMockHandlerArgs(overrides?: {
|
||||
pluginConfig?: Record<string, unknown>
|
||||
shouldOverride?: boolean
|
||||
}) {
|
||||
const appliedSessions: string[] = []
|
||||
return {
|
||||
ctx: { client: { tui: { showToast: async () => {} } } } as any,
|
||||
pluginConfig: (overrides?.pluginConfig ?? {}) as any,
|
||||
firstMessageVariantGate: {
|
||||
shouldOverride: () => overrides?.shouldOverride ?? false,
|
||||
markApplied: (sessionID: string) => { appliedSessions.push(sessionID) },
|
||||
},
|
||||
hooks: {
|
||||
stopContinuationGuard: null,
|
||||
keywordDetector: null,
|
||||
claudeCodeHooks: null,
|
||||
autoSlashCommand: null,
|
||||
startWork: null,
|
||||
ralphLoop: null,
|
||||
} as any,
|
||||
_appliedSessions: appliedSessions,
|
||||
}
|
||||
}
|
||||
|
||||
function createMockInput(agent?: string, model?: { providerID: string; modelID: string }) {
|
||||
return {
|
||||
sessionID: "test-session",
|
||||
agent,
|
||||
model,
|
||||
}
|
||||
}
|
||||
|
||||
function createMockOutput(variant?: string): ChatMessageHandlerOutput {
|
||||
const message: Record<string, unknown> = {}
|
||||
if (variant !== undefined) {
|
||||
message["variant"] = variant
|
||||
}
|
||||
return { message, parts: [] }
|
||||
}
|
||||
|
||||
describe("createChatMessageHandler - first message variant", () => {
|
||||
test("first message: sets variant from fallback chain when user has no selection", async () => {
|
||||
//#given - first message, no user-selected variant, hephaestus with medium in chain
|
||||
const args = createMockHandlerArgs({ shouldOverride: true })
|
||||
const handler = createChatMessageHandler(args)
|
||||
const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
|
||||
const output = createMockOutput() // no variant set
|
||||
|
||||
//#when
|
||||
await handler(input, output)
|
||||
|
||||
//#then - should set variant from fallback chain
|
||||
expect(output.message["variant"]).toBeDefined()
|
||||
})
|
||||
|
||||
test("first message: preserves user-selected variant when already set", async () => {
|
||||
//#given - first message, user already selected "xhigh" variant in OpenCode UI
|
||||
const args = createMockHandlerArgs({ shouldOverride: true })
|
||||
const handler = createChatMessageHandler(args)
|
||||
const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
|
||||
const output = createMockOutput("xhigh") // user selected xhigh
|
||||
|
||||
//#when
|
||||
await handler(input, output)
|
||||
|
||||
//#then - user's xhigh must be preserved, not overwritten to "medium"
|
||||
expect(output.message["variant"]).toBe("xhigh")
|
||||
})
|
||||
|
||||
test("first message: preserves user-selected 'high' variant", async () => {
|
||||
//#given - user selected "high" variant
|
||||
const args = createMockHandlerArgs({ shouldOverride: true })
|
||||
const handler = createChatMessageHandler(args)
|
||||
const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
|
||||
const output = createMockOutput("high")
|
||||
|
||||
//#when
|
||||
await handler(input, output)
|
||||
|
||||
//#then
|
||||
expect(output.message["variant"]).toBe("high")
|
||||
})
|
||||
|
||||
test("subsequent message: does not override existing variant", async () => {
|
||||
//#given - not first message, variant already set
|
||||
const args = createMockHandlerArgs({ shouldOverride: false })
|
||||
const handler = createChatMessageHandler(args)
|
||||
const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
|
||||
const output = createMockOutput("xhigh")
|
||||
|
||||
//#when
|
||||
await handler(input, output)
|
||||
|
||||
//#then
|
||||
expect(output.message["variant"]).toBe("xhigh")
|
||||
})
|
||||
|
||||
test("first message: marks gate as applied regardless of variant presence", async () => {
|
||||
//#given - first message with user-selected variant
|
||||
const args = createMockHandlerArgs({ shouldOverride: true })
|
||||
const handler = createChatMessageHandler(args)
|
||||
const input = createMockInput("hephaestus", { providerID: "openai", modelID: "gpt-5.3-codex" })
|
||||
const output = createMockOutput("xhigh")
|
||||
|
||||
//#when
|
||||
await handler(input, output)
|
||||
|
||||
//#then - gate should still be marked as applied
|
||||
expect(args._appliedSessions).toContain("test-session")
|
||||
})
|
||||
})
|
||||
@@ -56,12 +56,14 @@ export function createChatMessageHandler(args: {
|
||||
const message = output.message
|
||||
|
||||
if (firstMessageVariantGate.shouldOverride(input.sessionID)) {
|
||||
const variant =
|
||||
input.model && input.agent
|
||||
? resolveVariantForModel(pluginConfig, input.agent, input.model)
|
||||
: resolveAgentVariant(pluginConfig, input.agent)
|
||||
if (variant !== undefined) {
|
||||
message["variant"] = variant
|
||||
if (message["variant"] === undefined) {
|
||||
const variant =
|
||||
input.model && input.agent
|
||||
? resolveVariantForModel(pluginConfig, input.agent, input.model)
|
||||
: resolveAgentVariant(pluginConfig, input.agent)
|
||||
if (variant !== undefined) {
|
||||
message["variant"] = variant
|
||||
}
|
||||
}
|
||||
firstMessageVariantGate.markApplied(input.sessionID)
|
||||
} else {
|
||||
|
||||
@@ -241,19 +241,32 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
|
||||
expect(primary.providers[0]).toBe("openai")
|
||||
})
|
||||
|
||||
test("visual-engineering has valid fallbackChain with gemini-3-pro as primary", () => {
|
||||
test("visual-engineering has valid fallbackChain with gemini-3-pro high as primary", () => {
|
||||
// given - visual-engineering category requirement
|
||||
const visualEngineering = CATEGORY_MODEL_REQUIREMENTS["visual-engineering"]
|
||||
|
||||
// when - accessing visual-engineering requirement
|
||||
// then - fallbackChain exists with gemini-3-pro as first entry
|
||||
// then - fallbackChain: gemini-3-pro(high) → glm-5 → opus-4-6(max) → k2p5
|
||||
expect(visualEngineering).toBeDefined()
|
||||
expect(visualEngineering.fallbackChain).toBeArray()
|
||||
expect(visualEngineering.fallbackChain.length).toBeGreaterThan(0)
|
||||
expect(visualEngineering.fallbackChain).toHaveLength(4)
|
||||
|
||||
const primary = visualEngineering.fallbackChain[0]
|
||||
expect(primary.providers[0]).toBe("google")
|
||||
expect(primary.model).toBe("gemini-3-pro")
|
||||
expect(primary.variant).toBe("high")
|
||||
|
||||
const second = visualEngineering.fallbackChain[1]
|
||||
expect(second.providers[0]).toBe("zai-coding-plan")
|
||||
expect(second.model).toBe("glm-5")
|
||||
|
||||
const third = visualEngineering.fallbackChain[2]
|
||||
expect(third.model).toBe("claude-opus-4-6")
|
||||
expect(third.variant).toBe("max")
|
||||
|
||||
const fourth = visualEngineering.fallbackChain[3]
|
||||
expect(fourth.providers[0]).toBe("kimi-for-coding")
|
||||
expect(fourth.model).toBe("k2p5")
|
||||
})
|
||||
|
||||
test("quick has valid fallbackChain with claude-haiku-4-5 as primary", () => {
|
||||
@@ -318,19 +331,23 @@ describe("CATEGORY_MODEL_REQUIREMENTS", () => {
|
||||
expect(primary.providers[0]).toBe("google")
|
||||
})
|
||||
|
||||
test("writing has valid fallbackChain with gemini-3-flash as primary", () => {
|
||||
test("writing has valid fallbackChain with k2p5 as primary (kimi-for-coding)", () => {
|
||||
// given - writing category requirement
|
||||
const writing = CATEGORY_MODEL_REQUIREMENTS["writing"]
|
||||
|
||||
// when - accessing writing requirement
|
||||
// then - fallbackChain exists with gemini-3-flash as first entry
|
||||
// then - fallbackChain: k2p5 → gemini-3-flash → claude-sonnet-4-5
|
||||
expect(writing).toBeDefined()
|
||||
expect(writing.fallbackChain).toBeArray()
|
||||
expect(writing.fallbackChain.length).toBeGreaterThan(0)
|
||||
expect(writing.fallbackChain).toHaveLength(3)
|
||||
|
||||
const primary = writing.fallbackChain[0]
|
||||
expect(primary.model).toBe("gemini-3-flash")
|
||||
expect(primary.providers[0]).toBe("google")
|
||||
expect(primary.model).toBe("k2p5")
|
||||
expect(primary.providers[0]).toBe("kimi-for-coding")
|
||||
|
||||
const second = writing.fallbackChain[1]
|
||||
expect(second.model).toBe("gemini-3-flash")
|
||||
expect(second.providers[0]).toBe("google")
|
||||
})
|
||||
|
||||
test("all 8 categories have valid fallbackChain arrays", () => {
|
||||
|
||||
@@ -100,9 +100,10 @@ export const AGENT_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
"visual-engineering": {
|
||||
fallbackChain: [
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-pro", variant: "high" },
|
||||
{ providers: ["zai-coding-plan"], model: "glm-5" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-opus-4-6", variant: "max" },
|
||||
{ providers: ["zai-coding-plan"], model: "glm-4.7" },
|
||||
{ providers: ["kimi-for-coding"], model: "k2p5" },
|
||||
],
|
||||
},
|
||||
ultrabrain: {
|
||||
@@ -151,10 +152,9 @@ export const CATEGORY_MODEL_REQUIREMENTS: Record<string, ModelRequirement> = {
|
||||
},
|
||||
writing: {
|
||||
fallbackChain: [
|
||||
{ providers: ["kimi-for-coding"], model: "k2p5" },
|
||||
{ providers: ["google", "github-copilot", "opencode"], model: "gemini-3-flash" },
|
||||
{ providers: ["anthropic", "github-copilot", "opencode"], model: "claude-sonnet-4-5" },
|
||||
{ providers: ["zai-coding-plan"], model: "glm-4.7" },
|
||||
{ providers: ["openai", "github-copilot", "opencode"], model: "gpt-5.2" },
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
@@ -162,6 +162,16 @@ Approach:
|
||||
- Draft with care
|
||||
- Polish for clarity and impact
|
||||
- Documentation, READMEs, articles, technical writing
|
||||
|
||||
ANTI-AI-SLOP RULES (NON-NEGOTIABLE):
|
||||
- NEVER use em dashes (—) or en dashes (–). Use commas, periods, ellipses, or line breaks instead. Zero tolerance.
|
||||
- Remove AI-sounding phrases: "delve", "it's important to note", "I'd be happy to", "certainly", "please don't hesitate", "leverage", "utilize", "in order to", "moving forward", "circle back", "at the end of the day", "robust", "streamline", "facilitate"
|
||||
- Pick plain words. "Use" not "utilize". "Start" not "commence". "Help" not "facilitate".
|
||||
- Use contractions naturally: "don't" not "do not", "it's" not "it is".
|
||||
- Vary sentence length. Don't make every sentence the same length.
|
||||
- NEVER start consecutive sentences with the same word.
|
||||
- No filler openings: skip "In today's world...", "As we all know...", "It goes without saying..."
|
||||
- Write like a human, not a corporate template.
|
||||
</Category_Context>`
|
||||
|
||||
export const DEEP_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
||||
@@ -198,14 +208,14 @@ You are NOT an interactive assistant. You are an autonomous problem-solver.
|
||||
|
||||
|
||||
export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
|
||||
"visual-engineering": { model: "google/gemini-3-pro" },
|
||||
"visual-engineering": { model: "google/gemini-3-pro", variant: "high" },
|
||||
ultrabrain: { model: "openai/gpt-5.3-codex", variant: "xhigh" },
|
||||
deep: { model: "openai/gpt-5.3-codex", variant: "medium" },
|
||||
artistry: { model: "google/gemini-3-pro", variant: "high" },
|
||||
quick: { model: "anthropic/claude-haiku-4-5" },
|
||||
"unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
|
||||
"unspecified-high": { model: "anthropic/claude-opus-4-6", variant: "max" },
|
||||
writing: { model: "google/gemini-3-flash" },
|
||||
writing: { model: "kimi-for-coding/k2p5" },
|
||||
}
|
||||
|
||||
export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
|
||||
|
||||
@@ -67,13 +67,14 @@ describe("sisyphus-task", () => {
|
||||
})
|
||||
|
||||
describe("DEFAULT_CATEGORIES", () => {
|
||||
test("visual-engineering category has model config", () => {
|
||||
test("visual-engineering category has model and variant config", () => {
|
||||
// given
|
||||
const category = DEFAULT_CATEGORIES["visual-engineering"]
|
||||
|
||||
// when / #then
|
||||
expect(category).toBeDefined()
|
||||
expect(category.model).toBe("google/gemini-3-pro")
|
||||
expect(category.variant).toBe("high")
|
||||
})
|
||||
|
||||
test("ultrabrain category has model and variant config", () => {
|
||||
@@ -1713,17 +1714,19 @@ describe("sisyphus-task", () => {
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const launchedTask = {
|
||||
id: "task-unstable",
|
||||
sessionID: "ses_unstable_gemini",
|
||||
description: "Unstable gemini task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-unstable",
|
||||
sessionID: "ses_unstable_gemini",
|
||||
description: "Unstable gemini task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
return launchedTask
|
||||
},
|
||||
getTask: () => launchedTask,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
@@ -1838,17 +1841,19 @@ describe("sisyphus-task", () => {
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const launchedTask = {
|
||||
id: "task-unstable-minimax",
|
||||
sessionID: "ses_unstable_minimax",
|
||||
description: "Unstable minimax task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-unstable-minimax",
|
||||
sessionID: "ses_unstable_minimax",
|
||||
description: "Unstable minimax task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
return launchedTask
|
||||
},
|
||||
getTask: () => launchedTask,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
@@ -1972,17 +1977,19 @@ describe("sisyphus-task", () => {
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const launchedTask = {
|
||||
id: "task-artistry",
|
||||
sessionID: "ses_artistry_gemini",
|
||||
description: "Artistry gemini task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-artistry",
|
||||
sessionID: "ses_artistry_gemini",
|
||||
description: "Artistry gemini task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
return launchedTask
|
||||
},
|
||||
getTask: () => launchedTask,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
@@ -2038,17 +2045,19 @@ describe("sisyphus-task", () => {
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const launchedTask = {
|
||||
id: "task-writing",
|
||||
sessionID: "ses_writing_gemini",
|
||||
description: "Writing gemini task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-writing",
|
||||
sessionID: "ses_writing_gemini",
|
||||
description: "Writing gemini task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
return launchedTask
|
||||
},
|
||||
getTask: () => launchedTask,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
@@ -2104,17 +2113,19 @@ describe("sisyphus-task", () => {
|
||||
const { createDelegateTask } = require("./tools")
|
||||
let launchCalled = false
|
||||
|
||||
const launchedTask = {
|
||||
id: "task-custom-unstable",
|
||||
sessionID: "ses_custom_unstable",
|
||||
description: "Custom unstable task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
const mockManager = {
|
||||
launch: async () => {
|
||||
launchCalled = true
|
||||
return {
|
||||
id: "task-custom-unstable",
|
||||
sessionID: "ses_custom_unstable",
|
||||
description: "Custom unstable task",
|
||||
agent: "sisyphus-junior",
|
||||
status: "running",
|
||||
}
|
||||
return launchedTask
|
||||
},
|
||||
getTask: () => launchedTask,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
@@ -2793,7 +2804,7 @@ describe("sisyphus-task", () => {
|
||||
{
|
||||
name: "writing",
|
||||
description: "Documentation, prose, technical writing",
|
||||
model: "google/gemini-3-flash",
|
||||
model: "kimi-for-coding/k2p5",
|
||||
},
|
||||
]
|
||||
const availableSkills = [
|
||||
|
||||
224
src/tools/delegate-task/unstable-agent-task.test.ts
Normal file
224
src/tools/delegate-task/unstable-agent-task.test.ts
Normal file
@@ -0,0 +1,224 @@
|
||||
const { describe, test, expect, beforeEach, afterEach, mock } = require("bun:test")
|
||||
|
||||
describe("executeUnstableAgentTask - interrupt detection", () => {
|
||||
beforeEach(() => {
|
||||
//#given - configure fast timing for all tests
|
||||
const { __setTimingConfig } = require("./timing")
|
||||
__setTimingConfig({
|
||||
POLL_INTERVAL_MS: 10,
|
||||
MIN_STABILITY_TIME_MS: 0,
|
||||
STABILITY_POLLS_REQUIRED: 1,
|
||||
MAX_POLL_TIME_MS: 500,
|
||||
WAIT_FOR_SESSION_TIMEOUT_MS: 100,
|
||||
WAIT_FOR_SESSION_INTERVAL_MS: 10,
|
||||
})
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
//#given - reset timing after each test
|
||||
const { __resetTimingConfig } = require("./timing")
|
||||
__resetTimingConfig()
|
||||
mock.restore()
|
||||
})
|
||||
|
||||
test("should return error immediately when background task becomes interrupted during polling", async () => {
|
||||
//#given - a background task that gets interrupted on first poll check
|
||||
const taskState = {
|
||||
id: "bg_test_interrupt",
|
||||
sessionID: "ses_test_interrupt",
|
||||
status: "interrupt" as string,
|
||||
description: "test interrupted task",
|
||||
prompt: "test prompt",
|
||||
agent: "sisyphus-junior",
|
||||
error: "Agent not found" as string | undefined,
|
||||
}
|
||||
|
||||
const launchState = { ...taskState, status: "running" as string, error: undefined as string | undefined }
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => launchState,
|
||||
getTask: () => taskState,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
session: {
|
||||
status: async () => ({ data: { [taskState.sessionID!]: { type: "idle" } } }),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const { executeUnstableAgentTask } = require("./unstable-agent-task")
|
||||
|
||||
const args = {
|
||||
prompt: "test prompt",
|
||||
description: "test task",
|
||||
category: "test",
|
||||
load_skills: [],
|
||||
run_in_background: false,
|
||||
}
|
||||
|
||||
const mockCtx = {
|
||||
sessionID: "parent-session",
|
||||
callID: "call-123",
|
||||
metadata: () => {},
|
||||
}
|
||||
|
||||
const mockExecutorCtx = {
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
directory: "/tmp",
|
||||
}
|
||||
|
||||
const parentContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "msg-123",
|
||||
}
|
||||
|
||||
//#when - executeUnstableAgentTask encounters an interrupted task
|
||||
const startTime = Date.now()
|
||||
const result = await executeUnstableAgentTask(
|
||||
args, mockCtx, mockExecutorCtx, parentContext,
|
||||
"test-agent", undefined, undefined, "test-model"
|
||||
)
|
||||
const elapsed = Date.now() - startTime
|
||||
|
||||
//#then - should return quickly with interrupt error, not hang until MAX_POLL_TIME_MS
|
||||
expect(result).toContain("interrupt")
|
||||
expect(result.toLowerCase()).toContain("agent not found")
|
||||
expect(elapsed).toBeLessThan(400)
|
||||
})
|
||||
|
||||
test("should return error immediately when background task becomes errored during polling", async () => {
|
||||
//#given - a background task that is already errored when poll checks
|
||||
const taskState = {
|
||||
id: "bg_test_error",
|
||||
sessionID: "ses_test_error",
|
||||
status: "error" as string,
|
||||
description: "test error task",
|
||||
prompt: "test prompt",
|
||||
agent: "sisyphus-junior",
|
||||
error: "Rate limit exceeded" as string | undefined,
|
||||
}
|
||||
|
||||
const launchState = { ...taskState, status: "running" as string, error: undefined as string | undefined }
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => launchState,
|
||||
getTask: () => taskState,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
session: {
|
||||
status: async () => ({ data: { [taskState.sessionID!]: { type: "idle" } } }),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const { executeUnstableAgentTask } = require("./unstable-agent-task")
|
||||
|
||||
const args = {
|
||||
prompt: "test prompt",
|
||||
description: "test task",
|
||||
category: "test",
|
||||
load_skills: [],
|
||||
run_in_background: false,
|
||||
}
|
||||
|
||||
const mockCtx = {
|
||||
sessionID: "parent-session",
|
||||
callID: "call-123",
|
||||
metadata: () => {},
|
||||
}
|
||||
|
||||
const mockExecutorCtx = {
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
directory: "/tmp",
|
||||
}
|
||||
|
||||
const parentContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "msg-123",
|
||||
}
|
||||
|
||||
//#when - executeUnstableAgentTask encounters an errored task
|
||||
const startTime = Date.now()
|
||||
const result = await executeUnstableAgentTask(
|
||||
args, mockCtx, mockExecutorCtx, parentContext,
|
||||
"test-agent", undefined, undefined, "test-model"
|
||||
)
|
||||
const elapsed = Date.now() - startTime
|
||||
|
||||
//#then - should return quickly with error, not hang until MAX_POLL_TIME_MS
|
||||
expect(result).toContain("error")
|
||||
expect(result.toLowerCase()).toContain("rate limit exceeded")
|
||||
expect(elapsed).toBeLessThan(400)
|
||||
})
|
||||
|
||||
test("should return error immediately when background task becomes cancelled during polling", async () => {
|
||||
//#given - a background task that is already cancelled when poll checks
|
||||
const taskState = {
|
||||
id: "bg_test_cancel",
|
||||
sessionID: "ses_test_cancel",
|
||||
status: "cancelled" as string,
|
||||
description: "test cancelled task",
|
||||
prompt: "test prompt",
|
||||
agent: "sisyphus-junior",
|
||||
error: "Stale timeout" as string | undefined,
|
||||
}
|
||||
|
||||
const launchState = { ...taskState, status: "running" as string, error: undefined as string | undefined }
|
||||
|
||||
const mockManager = {
|
||||
launch: async () => launchState,
|
||||
getTask: () => taskState,
|
||||
}
|
||||
|
||||
const mockClient = {
|
||||
session: {
|
||||
status: async () => ({ data: { [taskState.sessionID!]: { type: "idle" } } }),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const { executeUnstableAgentTask } = require("./unstable-agent-task")
|
||||
|
||||
const args = {
|
||||
prompt: "test prompt",
|
||||
description: "test task",
|
||||
category: "test",
|
||||
load_skills: [],
|
||||
run_in_background: false,
|
||||
}
|
||||
|
||||
const mockCtx = {
|
||||
sessionID: "parent-session",
|
||||
callID: "call-123",
|
||||
metadata: () => {},
|
||||
}
|
||||
|
||||
const mockExecutorCtx = {
|
||||
manager: mockManager,
|
||||
client: mockClient,
|
||||
directory: "/tmp",
|
||||
}
|
||||
|
||||
const parentContext = {
|
||||
sessionID: "parent-session",
|
||||
messageID: "msg-123",
|
||||
}
|
||||
|
||||
//#when - executeUnstableAgentTask encounters a cancelled task
|
||||
const startTime = Date.now()
|
||||
const result = await executeUnstableAgentTask(
|
||||
args, mockCtx, mockExecutorCtx, parentContext,
|
||||
"test-agent", undefined, undefined, "test-model"
|
||||
)
|
||||
const elapsed = Date.now() - startTime
|
||||
|
||||
//#then - should return quickly with cancel info, not hang until MAX_POLL_TIME_MS
|
||||
expect(result).toContain("cancel")
|
||||
expect(result.toLowerCase()).toContain("stale timeout")
|
||||
expect(elapsed).toBeLessThan(400)
|
||||
})
|
||||
})
|
||||
@@ -77,6 +77,7 @@ export async function executeUnstableAgentTask(
|
||||
const pollStart = Date.now()
|
||||
let lastMsgCount = 0
|
||||
let stablePolls = 0
|
||||
let terminalStatus: { status: string; error?: string } | undefined
|
||||
|
||||
while (Date.now() - pollStart < timingCfg.MAX_POLL_TIME_MS) {
|
||||
if (ctx.abort?.aborted) {
|
||||
@@ -85,6 +86,12 @@ export async function executeUnstableAgentTask(
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, timingCfg.POLL_INTERVAL_MS))
|
||||
|
||||
const currentTask = manager.getTask(task.id)
|
||||
if (currentTask && (currentTask.status === "interrupt" || currentTask.status === "error" || currentTask.status === "cancelled")) {
|
||||
terminalStatus = { status: currentTask.status, error: currentTask.error }
|
||||
break
|
||||
}
|
||||
|
||||
const statusResult = await client.session.status()
|
||||
const allStatuses = (statusResult.data ?? {}) as Record<string, { type: string }>
|
||||
const sessionStatus = allStatuses[sessionID]
|
||||
@@ -110,6 +117,24 @@ export async function executeUnstableAgentTask(
|
||||
}
|
||||
}
|
||||
|
||||
if (terminalStatus) {
|
||||
const duration = formatDuration(startTime)
|
||||
return `SUPERVISED TASK FAILED (${terminalStatus.status})
|
||||
|
||||
Task was interrupted/failed while running in monitored background mode.
|
||||
${terminalStatus.error ? `Error: ${terminalStatus.error}` : ""}
|
||||
|
||||
Duration: ${duration}
|
||||
Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
|
||||
Model: ${actualModel}
|
||||
|
||||
The task session may contain partial results.
|
||||
|
||||
<task_metadata>
|
||||
session_id: ${sessionID}
|
||||
</task_metadata>`
|
||||
}
|
||||
|
||||
const messagesResult = await client.session.messages({ path: { id: sessionID } })
|
||||
const messages = ((messagesResult as { data?: unknown }).data ?? messagesResult) as SessionMessage[]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user