Compare commits
100 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ff94aa3033 | ||
|
|
d0c4085ae1 | ||
|
|
56f9de4652 | ||
|
|
b2661be833 | ||
|
|
3d4ed912d7 | ||
|
|
9a338b16f1 | ||
|
|
471bc6e52d | ||
|
|
825a5e70f7 | ||
|
|
18c161a9cd | ||
|
|
414cecd7df | ||
|
|
2b541b8725 | ||
|
|
ac6e7d00f2 | ||
|
|
fa77be0daf | ||
|
|
13da4ef4aa | ||
|
|
6451b212f8 | ||
|
|
fad7354b13 | ||
|
|
55dc64849f | ||
|
|
e984a5c639 | ||
|
|
46e02b9457 | ||
|
|
5f21ddf473 | ||
|
|
108e860ddd | ||
|
|
b8221a883e | ||
|
|
2c394cd497 | ||
|
|
d84a1c9e95 | ||
|
|
cf29cd137e | ||
|
|
d3f8c7d288 | ||
|
|
d1659152bc | ||
|
|
1cb8f8bee6 | ||
|
|
1760367a25 | ||
|
|
747edcb6e6 | ||
|
|
f3540a9ea3 | ||
|
|
8280e45fe1 | ||
|
|
0eddd28a95 | ||
|
|
36e54acc51 | ||
|
|
817c593e12 | ||
|
|
3ccef5d9b3 | ||
|
|
ae4e113c7e | ||
|
|
403457f9e4 | ||
|
|
5e5c091356 | ||
|
|
1df025ad44 | ||
|
|
844ac26e2a | ||
|
|
2727f0f429 | ||
|
|
89b1205ccf | ||
|
|
d44f5db1e2 | ||
|
|
180fcc3e5d | ||
|
|
3947084cc5 | ||
|
|
67f701cd9e | ||
|
|
f94ae2032c | ||
|
|
c81384456c | ||
|
|
9040383da7 | ||
|
|
c688e978fd | ||
|
|
a0201e17b9 | ||
|
|
dbbec868d5 | ||
|
|
6e2f3b1f50 | ||
|
|
e4bbd6bf15 | ||
|
|
476f154ef5 | ||
|
|
83519cae11 | ||
|
|
9a8f03462f | ||
|
|
daf6c7a19e | ||
|
|
2bb82c250c | ||
|
|
8e92704316 | ||
|
|
f980e256dd | ||
|
|
4d19a22679 | ||
|
|
e1010846c4 | ||
|
|
38169523c4 | ||
|
|
b98697238b | ||
|
|
d5b6a7c575 | ||
|
|
78a08959f6 | ||
|
|
db6a899297 | ||
|
|
7fdbabb264 | ||
|
|
b3ebf6c124 | ||
|
|
8a1b398119 | ||
|
|
66419918f9 | ||
|
|
755a3a94c8 | ||
|
|
5e316499e5 | ||
|
|
266c045b69 | ||
|
|
eafcac1593 | ||
|
|
7927d3675d | ||
|
|
4059d02047 | ||
|
|
c2dfcadbac | ||
|
|
e343e625c7 | ||
|
|
050e6a2187 | ||
|
|
7ede8e04f0 | ||
|
|
1ae7d7d67e | ||
|
|
f9742ddfca | ||
|
|
eb5cc873ea | ||
|
|
847d994199 | ||
|
|
bbe08f0eef | ||
|
|
4454753bb4 | ||
|
|
1c0b41aa65 | ||
|
|
4c6b31e5b4 | ||
|
|
67990293a9 | ||
|
|
dbf584af95 | ||
|
|
368ac310a1 | ||
|
|
cb2169f334 | ||
|
|
ec520e6228 | ||
|
|
6febebc166 | ||
|
|
98f4adbf4b | ||
|
|
d209f3c677 | ||
|
|
a691a3ac0a |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
# Dependencies
|
||||
.sisyphus/
|
||||
.sisyphus/*
|
||||
!.sisyphus/rules/
|
||||
node_modules/
|
||||
|
||||
# Build output
|
||||
|
||||
@@ -41,27 +41,27 @@ Fire ALL simultaneously:
|
||||
|
||||
```
|
||||
// Agent 1: Find all exported symbols
|
||||
delegate_task(subagent_type="explore", run_in_background=true,
|
||||
task(subagent_type="explore", run_in_background=true,
|
||||
prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
|
||||
List each with: file path, line number, symbol name, export type (named/default).
|
||||
EXCLUDE: src/index.ts root exports, test files.
|
||||
Return as structured list.")
|
||||
|
||||
// Agent 2: Find potentially unused files
|
||||
delegate_task(subagent_type="explore", run_in_background=true,
|
||||
task(subagent_type="explore", run_in_background=true,
|
||||
prompt="Find files in src/ that are NOT imported by any other file.
|
||||
Check import/require statements across the entire codebase.
|
||||
EXCLUDE: index.ts files, test files, entry points, config files, .md files.
|
||||
Return list of potentially orphaned files.")
|
||||
|
||||
// Agent 3: Find unused imports within files
|
||||
delegate_task(subagent_type="explore", run_in_background=true,
|
||||
task(subagent_type="explore", run_in_background=true,
|
||||
prompt="Find unused imports across src/**/*.ts files.
|
||||
Look for import statements where the imported symbol is never referenced in the file body.
|
||||
Return: file path, line number, imported symbol name.")
|
||||
|
||||
// Agent 4: Find functions/variables only used in their own declaration
|
||||
delegate_task(subagent_type="explore", run_in_background=true,
|
||||
task(subagent_type="explore", run_in_background=true,
|
||||
prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
|
||||
to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
|
||||
```
|
||||
|
||||
@@ -21,7 +21,7 @@ You are a GitHub issue triage automation agent. Your job is to:
|
||||
|
||||
| Aspect | Rule |
|
||||
|--------|------|
|
||||
| **Task Granularity** | 1 Issue = Exactly 1 `delegate_task()` call |
|
||||
| **Task Granularity** | 1 Issue = Exactly 1 `task()` call |
|
||||
| **Execution Mode** | `run_in_background=true` (Each issue runs independently) |
|
||||
| **Result Handling** | `background_output()` to collect results as they complete |
|
||||
| **Reporting** | IMMEDIATE streaming when each task finishes |
|
||||
@@ -67,7 +67,7 @@ for (let i = 0; i < allIssues.length; i++) {
|
||||
const issue = allIssues[i]
|
||||
const category = getCategory(i)
|
||||
|
||||
const taskId = await delegate_task(
|
||||
const taskId = await task(
|
||||
category=category,
|
||||
load_skills=[],
|
||||
run_in_background=true, // ← CRITICAL: Each issue is independent background task
|
||||
@@ -195,7 +195,7 @@ for (let i = 0; i < allIssues.length; i++) {
|
||||
|
||||
console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`)
|
||||
|
||||
const taskId = await delegate_task(
|
||||
const taskId = await task(
|
||||
category=category,
|
||||
load_skills=[],
|
||||
run_in_background=true, // ← BACKGROUND TASK: Each issue runs independently
|
||||
@@ -480,7 +480,7 @@ When invoked, immediately:
|
||||
4. Exhaustive pagination for issues
|
||||
5. Exhaustive pagination for PRs
|
||||
6. **LAUNCH**: For each issue:
|
||||
- `delegate_task(run_in_background=true)` - 1 task per issue
|
||||
- `task(run_in_background=true)` - 1 task per issue
|
||||
- Store taskId mapped to issue number
|
||||
7. **STREAM**: Poll `background_output()` for each task:
|
||||
- As each completes, immediately report result
|
||||
|
||||
@@ -22,7 +22,7 @@ You are a GitHub Pull Request triage automation agent. Your job is to:
|
||||
|
||||
| Aspect | Rule |
|
||||
|--------|------|
|
||||
| **Task Granularity** | 1 PR = Exactly 1 `delegate_task()` call |
|
||||
| **Task Granularity** | 1 PR = Exactly 1 `task()` call |
|
||||
| **Execution Mode** | `run_in_background=true` (Each PR runs independently) |
|
||||
| **Result Handling** | `background_output()` to collect results as they complete |
|
||||
| **Reporting** | IMMEDIATE streaming when each task finishes |
|
||||
@@ -68,7 +68,7 @@ for (let i = 0; i < allPRs.length; i++) {
|
||||
const pr = allPRs[i]
|
||||
const category = getCategory(i)
|
||||
|
||||
const taskId = await delegate_task(
|
||||
const taskId = await task(
|
||||
category=category,
|
||||
load_skills=[],
|
||||
run_in_background=true, // ← CRITICAL: Each PR is independent background task
|
||||
@@ -178,7 +178,7 @@ for (let i = 0; i < allPRs.length; i++) {
|
||||
|
||||
console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`)
|
||||
|
||||
const taskId = await delegate_task(
|
||||
const taskId = await task(
|
||||
category=category,
|
||||
load_skills=[],
|
||||
run_in_background=true, // ← BACKGROUND TASK: Each PR runs independently
|
||||
@@ -474,7 +474,7 @@ When invoked, immediately:
|
||||
2. `gh repo view --json nameWithOwner -q .nameWithOwner`
|
||||
3. Exhaustive pagination for ALL open PRs
|
||||
4. **LAUNCH**: For each PR:
|
||||
- `delegate_task(run_in_background=true)` - 1 task per PR
|
||||
- `task(run_in_background=true)` - 1 task per PR
|
||||
- Store taskId mapped to PR number
|
||||
5. **STREAM**: Poll `background_output()` for each task:
|
||||
- As each completes, immediately report result
|
||||
|
||||
117
.sisyphus/rules/modular-code-enforcement.md
Normal file
117
.sisyphus/rules/modular-code-enforcement.md
Normal file
@@ -0,0 +1,117 @@
|
||||
---
|
||||
globs: ["**/*.ts", "**/*.tsx"]
|
||||
alwaysApply: false
|
||||
description: "Enforces strict modular code architecture: SRP, no monolithic index.ts, 200 LOC hard limit"
|
||||
---
|
||||
|
||||
<MANDATORY_ARCHITECTURE_RULE severity="BLOCKING" priority="HIGHEST">
|
||||
|
||||
# Modular Code Architecture — Zero Tolerance Policy
|
||||
|
||||
This rule is NON-NEGOTIABLE. Violations BLOCK all further work until resolved.
|
||||
|
||||
## Rule 1: index.ts is an ENTRY POINT, NOT a dumping ground
|
||||
|
||||
`index.ts` files MUST ONLY contain:
|
||||
- Re-exports (`export { ... } from "./module"`)
|
||||
- Factory function calls that compose modules
|
||||
- Top-level wiring/registration (hook registration, plugin setup)
|
||||
|
||||
`index.ts` MUST NEVER contain:
|
||||
- Business logic implementation
|
||||
- Helper/utility functions
|
||||
- Type definitions beyond simple re-exports
|
||||
- Multiple unrelated responsibilities mixed together
|
||||
|
||||
**If you find mixed logic in index.ts**: Extract each responsibility into its own dedicated file BEFORE making any other changes. This is not optional.
|
||||
|
||||
## Rule 2: No Catch-All Files — utils.ts / service.ts are CODE SMELLS
|
||||
|
||||
A single `utils.ts`, `helpers.ts`, `service.ts`, or `common.ts` is a **gravity well** — every unrelated function gets tossed in, and it grows into an untestable, unreviewable blob.
|
||||
|
||||
**These file names are BANNED as top-level catch-alls.** Instead:
|
||||
|
||||
| Anti-Pattern | Refactor To |
|
||||
|--------------|-------------|
|
||||
| `utils.ts` with `formatDate()`, `slugify()`, `retry()` | `date-formatter.ts`, `slugify.ts`, `retry.ts` |
|
||||
| `service.ts` handling auth + billing + notifications | `auth-service.ts`, `billing-service.ts`, `notification-service.ts` |
|
||||
| `helpers.ts` with 15 unrelated exports | One file per logical domain |
|
||||
|
||||
**Design for reusability from the start.** Each module should be:
|
||||
- **Independently importable** — no consumer should need to pull in unrelated code
|
||||
- **Self-contained** — its dependencies are explicit, not buried in a shared grab-bag
|
||||
- **Nameable by purpose** — the filename alone tells you what it does
|
||||
|
||||
If you catch yourself typing `utils.ts` or `service.ts`, STOP and name the file after what it actually does.
|
||||
|
||||
## Rule 3: Single Responsibility Principle — ABSOLUTE
|
||||
|
||||
Every `.ts` file MUST have exactly ONE clear, nameable responsibility.
|
||||
|
||||
**Self-test**: If you cannot describe the file's purpose in ONE short phrase (e.g., "parses YAML frontmatter", "matches rules against file paths"), the file does too much. Split it.
|
||||
|
||||
| Signal | Action |
|
||||
|--------|--------|
|
||||
| File has 2+ unrelated exported functions | **SPLIT NOW** — each into its own module |
|
||||
| File mixes I/O with pure logic | **SPLIT NOW** — separate side effects from computation |
|
||||
| File has both types and implementation | **SPLIT NOW** — types.ts + implementation.ts |
|
||||
| You need to scroll to understand the file | **SPLIT NOW** — it's too large |
|
||||
|
||||
## Rule 4: 200 LOC Hard Limit — CODE SMELL DETECTOR
|
||||
|
||||
Any `.ts`/`.tsx` file exceeding **200 lines of code** (excluding prompt strings, template literals containing prompts, and `.md` content) is an **immediate code smell**.
|
||||
|
||||
**When you detect a file > 200 LOC**:
|
||||
1. **STOP** current work
|
||||
2. **Identify** the multiple responsibilities hiding in the file
|
||||
3. **Extract** each responsibility into a focused module
|
||||
4. **Verify** each resulting file is < 200 LOC and has a single purpose
|
||||
5. **Resume** original work
|
||||
|
||||
Prompt-heavy files (agent definitions, skill definitions) where the bulk of content is template literal prompt text are EXEMPT from the LOC count — but their non-prompt logic must still be < 200 LOC.
|
||||
|
||||
### How to Count LOC
|
||||
|
||||
**Count these** (= actual logic):
|
||||
- Import statements
|
||||
- Variable/constant declarations
|
||||
- Function/class/interface/type definitions
|
||||
- Control flow (`if`, `for`, `while`, `switch`, `try/catch`)
|
||||
- Expressions, assignments, return statements
|
||||
- Closing braces `}` that belong to logic blocks
|
||||
|
||||
**Exclude these** (= not logic):
|
||||
- Blank lines
|
||||
- Comment-only lines (`//`, `/* */`, `/** */`)
|
||||
- Lines inside template literals that are prompt/instruction text (e.g., the string body of `` const prompt = `...` ``)
|
||||
- Lines inside multi-line strings used as documentation/prompt content
|
||||
|
||||
**Quick method**: Read the file → subtract blank lines, comment-only lines, and prompt string content → remaining count = LOC.
|
||||
|
||||
**Example**:
|
||||
```typescript
|
||||
// 1 import { foo } from "./foo"; ← COUNT
|
||||
// 2 ← SKIP (blank)
|
||||
// 3 // Helper for bar ← SKIP (comment)
|
||||
// 4 export function bar(x: number) { ← COUNT
|
||||
// 5 const prompt = ` ← COUNT (declaration)
|
||||
// 6 You are an assistant. ← SKIP (prompt text)
|
||||
// 7 Follow these rules: ← SKIP (prompt text)
|
||||
// 8 `; ← COUNT (closing)
|
||||
// 9 return process(prompt, x); ← COUNT
|
||||
// 10 } ← COUNT
|
||||
```
|
||||
→ LOC = **5** (lines 1, 4, 5, 9, 10). Not 10.
|
||||
|
||||
When in doubt, **round up** — err on the side of splitting.
|
||||
|
||||
## How to Apply
|
||||
|
||||
When reading, writing, or editing ANY `.ts`/`.tsx` file:
|
||||
|
||||
1. **Check the file you're touching** — does it violate any rule above?
|
||||
2. **If YES** — refactor FIRST, then proceed with your task
|
||||
3. **If creating a new file** — ensure it has exactly one responsibility and stays under 200 LOC
|
||||
4. **If adding code to an existing file** — verify the addition doesn't push the file past 200 LOC or add a second responsibility. If it does, extract into a new module.
|
||||
|
||||
</MANDATORY_ARCHITECTURE_RULE>
|
||||
@@ -195,7 +195,7 @@ oh-my-opencode/
|
||||
| Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
|
||||
| Error Handling | Empty catch blocks |
|
||||
| Testing | Deleting failing tests, writing implementation before test |
|
||||
| Agent Calls | Sequential - use `delegate_task` parallel |
|
||||
| Agent Calls | Sequential - use `task` parallel |
|
||||
| Hook Logic | Heavy PreToolUse - slows every call |
|
||||
| Commits | Giant (3+ files), separate test from impl |
|
||||
| Temperature | >0.3 for code agents |
|
||||
|
||||
28
bun.lock
28
bun.lock
@@ -28,13 +28,13 @@
|
||||
"typescript": "^5.7.3",
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.2.3",
|
||||
"oh-my-opencode-darwin-x64": "3.2.3",
|
||||
"oh-my-opencode-linux-arm64": "3.2.3",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.2.3",
|
||||
"oh-my-opencode-linux-x64": "3.2.3",
|
||||
"oh-my-opencode-linux-x64-musl": "3.2.3",
|
||||
"oh-my-opencode-windows-x64": "3.2.3",
|
||||
"oh-my-opencode-darwin-arm64": "3.3.0",
|
||||
"oh-my-opencode-darwin-x64": "3.3.0",
|
||||
"oh-my-opencode-linux-arm64": "3.3.0",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.3.0",
|
||||
"oh-my-opencode-linux-x64": "3.3.0",
|
||||
"oh-my-opencode-linux-x64-musl": "3.3.0",
|
||||
"oh-my-opencode-windows-x64": "3.3.0",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -226,19 +226,19 @@
|
||||
|
||||
"object-inspect": ["object-inspect@1.13.4", "", {}, "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew=="],
|
||||
|
||||
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.2.3", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Doc9xQCj5Jmx3PzouBIfvDwmfWM94Y9Q9IngFqOjrVpfBef9V/WIH0PlhJU6ps4BKGey8Nf2afFq3UE06Z63Hg=="],
|
||||
"oh-my-opencode-darwin-arm64": ["oh-my-opencode-darwin-arm64@3.3.0", "", { "os": "darwin", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-P2kZKJqZaA4j0qtGM3I8+ZeH204ai27ni/OXLjtFdOewRjJgrahxaC1XslgK7q/KU9fXz6BQfEqAjbvyPf/rgQ=="],
|
||||
|
||||
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.2.3", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-w7lO0Hn/AlLCHe33KPbje83Js2h5weDWVMuopEs6d3pi/1zkRDBEhCi63S4J0d0EKod9kEPQA6ojtdVJ4J39zQ=="],
|
||||
"oh-my-opencode-darwin-x64": ["oh-my-opencode-darwin-x64@3.3.0", "", { "os": "darwin", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RopOorbW1WyhMQJ+ipuqiOA1GICS+3IkOwNyEe0KZlCLpoEDTyFopIL87HSns+gEQPMxnknroDp8lzxn1AKgjw=="],
|
||||
|
||||
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.2.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-m1tS1jRLO2Svm5NuetK3BAgdAR8b2GkiIfMFoIYsLJTPmzIkXaigAYkFq+BXCs5JAbRmPmvjndz9cuCddnPADQ=="],
|
||||
"oh-my-opencode-linux-arm64": ["oh-my-opencode-linux-arm64@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-297iEfuK+05g+q64crPW78Zbgm/j5PGjDDweSPkZ6rI6SEfHMvOIkGxMvN8gugM3zcH8FOCQXoY2nC8b6x3pwQ=="],
|
||||
|
||||
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.2.3", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-Q/0AGtOuUFGNGIX8F6iD5W8c2spbjrqVBPt0B7laQSwnScKs/BI+TvM6HRE37vhoWg+fzhAX3QYJ2H9Un9FYrg=="],
|
||||
"oh-my-opencode-linux-arm64-musl": ["oh-my-opencode-linux-arm64-musl@3.3.0", "", { "os": "linux", "cpu": "arm64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-oVxP0+yn66HQYfrl9QT6I7TumRzciuPB4z24+PwKEVcDjPbWXQqLY1gwOGHZAQBPLf0vwewv9ybEDVD42RRH4g=="],
|
||||
|
||||
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.2.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-RIAyoj2XbT8vH++5fPUkdO+D1tfqxh+iWto7CqWr1TgbABbBJljGk91HJgS9xjnxyCQJEpFhTmO7NMHKJcZOWQ=="],
|
||||
"oh-my-opencode-linux-x64": ["oh-my-opencode-linux-x64@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-k9LoLkisLJwJNR1J0Bh1bjGtGBkl5D9WzFPSdZCAlyiT6TgG9w5erPTlXqtl2Lt0We5tYUVYlkEIHRMK/ugNsQ=="],
|
||||
|
||||
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.2.3", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-nnQK3y7R4DrBvqdqRGbujL2oAAQnVVb23JHUbJPQ6YxrRRGWpLOVGvK5c16ykSFEUPl8eZDmi1ON/R4opKLOUw=="],
|
||||
"oh-my-opencode-linux-x64-musl": ["oh-my-opencode-linux-x64-musl@3.3.0", "", { "os": "linux", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode" } }, "sha512-7asXCeae7wBxJrzoZ7J6Yo1oaOxwUN3bTO7jWurCTMs5TDHO+pEHysgv/nuF1jvj1T+r1vg1H5ZmopuKy1qvXg=="],
|
||||
|
||||
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.2.3", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-mt8E/TkpaCp04pvzwntT8x8TaqXDt3zCD5X2eA8ZZMrb5ofNr5HyG5G4SFXrUh+Ez3b/3YXpNWv6f6rnAlk1Dg=="],
|
||||
"oh-my-opencode-windows-x64": ["oh-my-opencode-windows-x64@3.3.0", "", { "os": "win32", "cpu": "x64", "bin": { "oh-my-opencode": "bin/oh-my-opencode.exe" } }, "sha512-ABvwfaXb2xdrpbivzlPPJzIm5vXp+QlVakkaHEQf3TU6Mi/+fehH6Qhq/KMh66FDO2gq3xmxbH7nktHRQp9kNA=="],
|
||||
|
||||
"on-finished": ["on-finished@2.4.1", "", { "dependencies": { "ee-first": "1.1.1" } }, "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg=="],
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ Instead of delegating everything to a single AI agent, it's far more efficient t
|
||||
- **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset)
|
||||
- **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows)
|
||||
|
||||
By combining these two concepts, you can generate optimal agents through `delegate_task`.
|
||||
By combining these two concepts, you can generate optimal agents through `task`.
|
||||
|
||||
---
|
||||
|
||||
@@ -32,10 +32,10 @@ A Category is an agent configuration preset optimized for specific domains.
|
||||
|
||||
### Usage
|
||||
|
||||
Specify the `category` parameter when invoking the `delegate_task` tool.
|
||||
Specify the `category` parameter when invoking the `task` tool.
|
||||
|
||||
```typescript
|
||||
delegate_task(
|
||||
task(
|
||||
category="visual-engineering",
|
||||
prompt="Add a responsive chart component to the dashboard page"
|
||||
)
|
||||
@@ -74,7 +74,7 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
|
||||
Add desired skill names to the `load_skills` array.
|
||||
|
||||
```typescript
|
||||
delegate_task(
|
||||
task(
|
||||
category="quick",
|
||||
load_skills=["git-master"],
|
||||
prompt="Commit current changes. Follow commit message style."
|
||||
@@ -126,7 +126,7 @@ You can create powerful specialized agents by combining Categories and Skills.
|
||||
|
||||
---
|
||||
|
||||
## 5. delegate_task Prompt Guide
|
||||
## 5. task Prompt Guide
|
||||
|
||||
When delegating, **clear and specific** prompts are essential. Include these 7 elements:
|
||||
|
||||
@@ -158,7 +158,7 @@ You can fine-tune categories in `oh-my-opencode.json`.
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `description` | string | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
|
||||
| `description` | string | Human-readable description of the category's purpose. Shown in task prompt. |
|
||||
| `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) |
|
||||
| `variant` | string | Model variant (e.g., `max`, `xhigh`) |
|
||||
| `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. |
|
||||
|
||||
@@ -25,7 +25,7 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
|
||||
"explore": { "model": "opencode/gpt-5-nano" } // Free model for grep
|
||||
},
|
||||
|
||||
// Override category models (used by delegate_task)
|
||||
// Override category models (used by task)
|
||||
"categories": {
|
||||
"quick": { "model": "opencode/gpt-5-nano" }, // Fast/cheap for trivial tasks
|
||||
"visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
|
||||
@@ -252,7 +252,7 @@ Available agents: `sisyphus`, `prometheus`, `oracle`, `librarian`, `explore`, `m
|
||||
Oh My OpenCode includes built-in skills that provide additional capabilities:
|
||||
|
||||
- **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
|
||||
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.
|
||||
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `task(category='quick', load_skills=['git-master'], ...)` to save context.
|
||||
|
||||
Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:
|
||||
|
||||
@@ -455,7 +455,7 @@ Run background subagents in separate tmux panes for **visual multi-agent executi
|
||||
### How It Works
|
||||
|
||||
When `tmux.enabled` is `true` and you're inside a tmux session:
|
||||
- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes
|
||||
- Background agents (via `task(run_in_background=true)`) spawn in new tmux panes
|
||||
- Each pane shows the subagent's real-time output
|
||||
- Panes are automatically closed when the subagent completes
|
||||
- Layout is automatically adjusted based on your configuration
|
||||
@@ -716,7 +716,7 @@ Configure concurrency limits for background agent tasks. This controls how many
|
||||
|
||||
## Categories
|
||||
|
||||
Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
|
||||
Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
|
||||
|
||||
### Built-in Categories
|
||||
|
||||
@@ -797,12 +797,12 @@ All 7 categories come with optimal model defaults, but **you must configure them
|
||||
### Usage
|
||||
|
||||
```javascript
|
||||
// Via delegate_task tool
|
||||
delegate_task(category="visual-engineering", prompt="Create a responsive dashboard component")
|
||||
delegate_task(category="ultrabrain", prompt="Design the payment processing flow")
|
||||
// Via task tool
|
||||
task(category="visual-engineering", prompt="Create a responsive dashboard component")
|
||||
task(category="ultrabrain", prompt="Design the payment processing flow")
|
||||
|
||||
// Or target a specific agent directly (bypasses categories)
|
||||
delegate_task(agent="oracle", prompt="Review this architecture")
|
||||
task(agent="oracle", prompt="Review this architecture")
|
||||
```
|
||||
|
||||
### Custom Categories
|
||||
@@ -831,7 +831,7 @@ Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
| ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
|
||||
| `description` | string | - | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
|
||||
| `description` | string | - | Human-readable description of the category's purpose. Shown in task prompt. |
|
||||
| `is_unstable_agent`| boolean | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |
|
||||
|
||||
## Model Resolution System
|
||||
|
||||
@@ -54,7 +54,7 @@ Run agents in the background and continue working:
|
||||
|
||||
```
|
||||
# Launch in background
|
||||
delegate_task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
|
||||
task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
|
||||
|
||||
# Continue working...
|
||||
# System notifies on completion
|
||||
@@ -374,7 +374,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
|
||||
| Hook | Event | Description |
|
||||
|------|-------|-------------|
|
||||
| **task-resume-info** | PostToolUse | Provides task resume information for continuity. |
|
||||
| **delegate-task-retry** | PostToolUse | Retries failed delegate_task calls. |
|
||||
| **delegate-task-retry** | PostToolUse | Retries failed task calls. |
|
||||
|
||||
#### Integration
|
||||
|
||||
@@ -454,7 +454,7 @@ Disable specific hooks in config:
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. |
|
||||
| **delegate_task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
|
||||
| **task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
|
||||
| **background_output** | Retrieve background task results |
|
||||
| **background_cancel** | Cancel running background tasks |
|
||||
|
||||
|
||||
@@ -50,11 +50,11 @@ flowchart TB
|
||||
User -->|"/start-work"| Orchestrator
|
||||
Plan -->|"Read"| Orchestrator
|
||||
|
||||
Orchestrator -->|"delegate_task(category)"| Junior
|
||||
Orchestrator -->|"delegate_task(agent)"| Oracle
|
||||
Orchestrator -->|"delegate_task(agent)"| Explore
|
||||
Orchestrator -->|"delegate_task(agent)"| Librarian
|
||||
Orchestrator -->|"delegate_task(agent)"| Frontend
|
||||
Orchestrator -->|"task(category)"| Junior
|
||||
Orchestrator -->|"task(agent)"| Oracle
|
||||
Orchestrator -->|"task(agent)"| Explore
|
||||
Orchestrator -->|"task(agent)"| Librarian
|
||||
Orchestrator -->|"task(agent)"| Frontend
|
||||
|
||||
Junior -->|"Results + Learnings"| Orchestrator
|
||||
Oracle -->|"Advice"| Orchestrator
|
||||
@@ -220,9 +220,9 @@ Independent tasks run in parallel:
|
||||
```typescript
|
||||
// Orchestrator identifies parallelizable groups from plan
|
||||
// Group A: Tasks 2, 3, 4 (no file conflicts)
|
||||
delegate_task(category="ultrabrain", prompt="Task 2...")
|
||||
delegate_task(category="visual-engineering", prompt="Task 3...")
|
||||
delegate_task(category="general", prompt="Task 4...")
|
||||
task(category="ultrabrain", prompt="Task 2...")
|
||||
task(category="visual-engineering", prompt="Task 3...")
|
||||
task(category="general", prompt="Task 4...")
|
||||
// All run simultaneously
|
||||
```
|
||||
|
||||
@@ -234,7 +234,7 @@ delegate_task(category="general", prompt="Task 4...")
|
||||
|
||||
Junior is the **workhorse** that actually writes code. Key characteristics:
|
||||
|
||||
- **Focused**: Cannot delegate (blocked from task/delegate_task tools)
|
||||
- **Focused**: Cannot delegate (blocked from task tool)
|
||||
- **Disciplined**: Obsessive todo tracking
|
||||
- **Verified**: Must pass lsp_diagnostics before completion
|
||||
- **Constrained**: Cannot modify plan files (READ-ONLY)
|
||||
@@ -268,7 +268,7 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.
|
||||
|
||||
---
|
||||
|
||||
## The delegate_task Tool: Category + Skill System
|
||||
## The task Tool: Category + Skill System
|
||||
|
||||
### Why Categories are Revolutionary
|
||||
|
||||
@@ -276,17 +276,17 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.
|
||||
|
||||
```typescript
|
||||
// OLD: Model name creates distributional bias
|
||||
delegate_task(agent="gpt-5.2", prompt="...") // Model knows its limitations
|
||||
delegate_task(agent="claude-opus-4.6", prompt="...") // Different self-perception
|
||||
task(agent="gpt-5.2", prompt="...") // Model knows its limitations
|
||||
task(agent="claude-opus-4.6", prompt="...") // Different self-perception
|
||||
```
|
||||
|
||||
**The Solution: Semantic Categories:**
|
||||
|
||||
```typescript
|
||||
// NEW: Category describes INTENT, not implementation
|
||||
delegate_task(category="ultrabrain", prompt="...") // "Think strategically"
|
||||
delegate_task(category="visual-engineering", prompt="...") // "Design beautifully"
|
||||
delegate_task(category="quick", prompt="...") // "Just get it done fast"
|
||||
task(category="ultrabrain", prompt="...") // "Think strategically"
|
||||
task(category="visual-engineering", prompt="...") // "Design beautifully"
|
||||
task(category="quick", prompt="...") // "Just get it done fast"
|
||||
```
|
||||
|
||||
### Built-in Categories
|
||||
@@ -324,13 +324,13 @@ Skills prepend specialized instructions to subagent prompts:
|
||||
|
||||
```typescript
|
||||
// Category + Skill combination
|
||||
delegate_task(
|
||||
task(
|
||||
category="visual-engineering",
|
||||
load_skills=["frontend-ui-ux"], // Adds UI/UX expertise
|
||||
prompt="..."
|
||||
)
|
||||
|
||||
delegate_task(
|
||||
task(
|
||||
category="general",
|
||||
load_skills=["playwright"], // Adds browser automation expertise
|
||||
prompt="..."
|
||||
@@ -365,7 +365,7 @@ sequenceDiagram
|
||||
|
||||
Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
|
||||
|
||||
Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
|
||||
Orchestrator->>Junior: task(category, load_skills, prompt)
|
||||
|
||||
Junior->>Junior: Create todos, execute
|
||||
Junior->>Junior: Verify (lsp_diagnostics, tests)
|
||||
|
||||
@@ -387,7 +387,7 @@ You can control related features in `oh-my-opencode.json`.
|
||||
|
||||
2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation.
|
||||
|
||||
3. **Active Delegation**: During execution, delegate to specialized agents via `delegate_task` rather than modifying code directly.
|
||||
3. **Active Delegation**: During execution, delegate to specialized agents via `task` rather than modifying code directly.
|
||||
|
||||
4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json.
|
||||
|
||||
|
||||
@@ -288,7 +288,7 @@ src/tools/delegate-task/constants.ts
|
||||
```
|
||||
Sisyphus (ULW mode)
|
||||
↓
|
||||
delegate_task(category="deep", ...)
|
||||
task(category="deep", ...)
|
||||
↓
|
||||
executor.ts: executeBackgroundContinuation()
|
||||
↓
|
||||
|
||||
16
package.json
16
package.json
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "The Best AI Agent Harness - Batteries-Included OpenCode Plugin with Multi-Model Orchestration, Parallel Background Agents, and Crafted LSP/AST Tools",
|
||||
"main": "dist/index.js",
|
||||
"types": "dist/index.d.ts",
|
||||
@@ -74,13 +74,13 @@
|
||||
"typescript": "^5.7.3"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"oh-my-opencode-darwin-arm64": "3.2.4",
|
||||
"oh-my-opencode-darwin-x64": "3.2.4",
|
||||
"oh-my-opencode-linux-arm64": "3.2.4",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.2.4",
|
||||
"oh-my-opencode-linux-x64": "3.2.4",
|
||||
"oh-my-opencode-linux-x64-musl": "3.2.4",
|
||||
"oh-my-opencode-windows-x64": "3.2.4"
|
||||
"oh-my-opencode-darwin-arm64": "3.3.1",
|
||||
"oh-my-opencode-darwin-x64": "3.3.1",
|
||||
"oh-my-opencode-linux-arm64": "3.3.1",
|
||||
"oh-my-opencode-linux-arm64-musl": "3.3.1",
|
||||
"oh-my-opencode-linux-x64": "3.3.1",
|
||||
"oh-my-opencode-linux-x64-musl": "3.3.1",
|
||||
"oh-my-opencode-windows-x64": "3.3.1"
|
||||
},
|
||||
"trustedDependencies": [
|
||||
"@ast-grep/cli",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-arm64",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-darwin-x64",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (darwin-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64-musl",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-arm64",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-arm64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64-musl",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64-musl)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-linux-x64",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (linux-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "oh-my-opencode-windows-x64",
|
||||
"version": "3.2.4",
|
||||
"version": "3.3.1",
|
||||
"description": "Platform-specific binary for oh-my-opencode (windows-x64)",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
||||
@@ -1207,6 +1207,22 @@
|
||||
"created_at": "2026-02-06T06:23:24Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1541
|
||||
},
|
||||
{
|
||||
"name": "itsnebulalol",
|
||||
"id": 18669106,
|
||||
"comment_id": 3864672624,
|
||||
"created_at": "2026-02-07T15:10:54Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1622
|
||||
},
|
||||
{
|
||||
"name": "mkusaka",
|
||||
"id": 24956031,
|
||||
"comment_id": 3864822328,
|
||||
"created_at": "2026-02-07T16:54:36Z",
|
||||
"repoId": 1108837393,
|
||||
"pullRequestNo": 1629
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -212,7 +212,7 @@ Search **external references** (docs, OSS, web). Fire proactively when unfamilia
|
||||
- "Working with unfamiliar npm/pip/cargo packages"
|
||||
### Pre-Delegation Planning (MANDATORY)
|
||||
|
||||
**BEFORE every `delegate_task` call, EXPLICITLY declare your reasoning.**
|
||||
**BEFORE every `task` call, EXPLICITLY declare your reasoning.**
|
||||
|
||||
#### Step 1: Identify Task Requirements
|
||||
|
||||
@@ -236,7 +236,7 @@ Ask yourself:
|
||||
**MANDATORY FORMAT:**
|
||||
|
||||
```
|
||||
I will use delegate_task with:
|
||||
I will use task with:
|
||||
- **Category**: [selected-category-name]
|
||||
- **Why this category**: [how category description matches task domain]
|
||||
- **load_skills**: [list of selected skills]
|
||||
@@ -246,14 +246,14 @@ I will use delegate_task with:
|
||||
- **Expected Outcome**: [what success looks like]
|
||||
```
|
||||
|
||||
**Then** make the delegate_task call.
|
||||
**Then** make the task call.
|
||||
|
||||
#### Examples
|
||||
|
||||
**CORRECT: Full Evaluation**
|
||||
|
||||
```
|
||||
I will use delegate_task with:
|
||||
I will use task with:
|
||||
- **Category**: [category-name]
|
||||
- **Why this category**: Category description says "[quote description]" which matches this task's requirements
|
||||
- **load_skills**: ["skill-a", "skill-b"]
|
||||
@@ -263,9 +263,11 @@ I will use delegate_task with:
|
||||
- skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason]
|
||||
- **Expected Outcome**: [concrete deliverable]
|
||||
|
||||
delegate_task(
|
||||
task(
|
||||
category="[category-name]",
|
||||
load_skills=["skill-a", "skill-b"],
|
||||
description="[short task description]",
|
||||
run_in_background=false,
|
||||
prompt="..."
|
||||
)
|
||||
```
|
||||
@@ -273,14 +275,16 @@ delegate_task(
|
||||
**CORRECT: Agent-Specific (for exploration/consultation)**
|
||||
|
||||
```
|
||||
I will use delegate_task with:
|
||||
I will use task with:
|
||||
- **Agent**: [agent-name]
|
||||
- **Reason**: This requires [agent's specialty] based on agent description
|
||||
- **load_skills**: [] (agents have built-in expertise)
|
||||
- **Expected Outcome**: [what agent should return]
|
||||
|
||||
delegate_task(
|
||||
task(
|
||||
subagent_type="[agent-name]",
|
||||
description="[short task description]",
|
||||
run_in_background=false,
|
||||
load_skills=[],
|
||||
prompt="..."
|
||||
)
|
||||
@@ -289,14 +293,15 @@ delegate_task(
|
||||
**CORRECT: Background Exploration**
|
||||
|
||||
```
|
||||
I will use delegate_task with:
|
||||
I will use task with:
|
||||
- **Agent**: explore
|
||||
- **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
|
||||
- **load_skills**: []
|
||||
- **Expected Outcome**: List of files containing auth patterns
|
||||
|
||||
delegate_task(
|
||||
task(
|
||||
subagent_type="explore",
|
||||
description="Find auth implementations",
|
||||
run_in_background=true,
|
||||
load_skills=[],
|
||||
prompt="Find all authentication implementations in the codebase"
|
||||
@@ -306,7 +311,7 @@ delegate_task(
|
||||
**WRONG: No Skill Evaluation**
|
||||
|
||||
```
|
||||
delegate_task(category="...", load_skills=[], prompt="...") // Where's the justification?
|
||||
task(category="...", load_skills=[], prompt="...") // Where's the justification?
|
||||
```
|
||||
|
||||
**WRONG: Vague Category Selection**
|
||||
@@ -317,7 +322,7 @@ I'll use this category because it seems right.
|
||||
|
||||
#### Enforcement
|
||||
|
||||
**BLOCKING VIOLATION**: If you call `delegate_task` without:
|
||||
**BLOCKING VIOLATION**: If you call `task` without:
|
||||
1. Explaining WHY category was selected (based on description)
|
||||
2. Evaluating EACH available skill for relevance
|
||||
|
||||
@@ -329,15 +334,15 @@ I'll use this category because it seems right.
|
||||
```typescript
|
||||
// CORRECT: Always background, always parallel
|
||||
// Contextual Grep (internal)
|
||||
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
|
||||
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
|
||||
task(subagent_type="explore", description="Find auth implementations", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
|
||||
task(subagent_type="explore", description="Find error handling patterns", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
|
||||
// Reference Grep (external)
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
|
||||
task(subagent_type="librarian", description="Find JWT best practices", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
|
||||
task(subagent_type="librarian", description="Find Express auth patterns", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
|
||||
// Continue working immediately. Collect with background_output when needed.
|
||||
|
||||
// WRONG: Sequential or blocking
|
||||
result = delegate_task(...) // Never wait synchronously for explore/librarian
|
||||
result = task(...) // Never wait synchronously for explore/librarian
|
||||
```
|
||||
|
||||
### Background Result Collection:
|
||||
@@ -347,16 +352,16 @@ result = delegate_task(...) // Never wait synchronously for explore/librarian
|
||||
4. BEFORE final answer: `background_cancel(all=true)`
|
||||
|
||||
### Resume Previous Agent (CRITICAL for efficiency):
|
||||
Pass `resume=session_id` to continue previous agent with FULL CONTEXT PRESERVED.
|
||||
Pass `session_id` to continue previous agent with FULL CONTEXT PRESERVED.
|
||||
|
||||
**ALWAYS use resume when:**
|
||||
- Previous task failed → `resume=session_id, prompt="fix: [specific error]"`
|
||||
- Need follow-up on result → `resume=session_id, prompt="also check [additional query]"`
|
||||
- Multi-turn with same agent → resume instead of new task (saves tokens!)
|
||||
**ALWAYS use session_id when:**
|
||||
- Previous task failed → `session_id="ses_xxx", prompt="fix: [specific error]"`
|
||||
- Need follow-up on result → `session_id="ses_xxx", prompt="also check [additional query]"`
|
||||
- Multi-turn with same agent → session_id instead of new task (saves tokens!)
|
||||
|
||||
**Example:**
|
||||
```
|
||||
delegate_task(resume="ses_abc123", prompt="The previous search missed X. Also look for Y.")
|
||||
task(session_id="ses_abc123", description="Follow-up search", run_in_background=false, load_skills=[], prompt="The previous search missed X. Also look for Y.")
|
||||
```
|
||||
|
||||
### Search Stop Conditions
|
||||
@@ -377,7 +382,7 @@ STOP searching when:
|
||||
3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS
|
||||
### Category + Skills Delegation System
|
||||
|
||||
**delegate_task() combines categories and skills for optimal task execution.**
|
||||
**task() combines categories and skills for optimal task execution.**
|
||||
|
||||
#### Available Categories (Domain-Optimized Models)
|
||||
|
||||
@@ -442,7 +447,7 @@ SKILL EVALUATION for "[skill-name]":
|
||||
### Delegation Pattern
|
||||
|
||||
```typescript
|
||||
delegate_task(
|
||||
task(
|
||||
category="[selected-category]",
|
||||
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills
|
||||
prompt="..."
|
||||
@@ -451,7 +456,7 @@ delegate_task(
|
||||
|
||||
**ANTI-PATTERN (will produce poor results):**
|
||||
```typescript
|
||||
delegate_task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification
|
||||
task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification
|
||||
```
|
||||
### Delegation Table:
|
||||
|
||||
|
||||
@@ -68,11 +68,11 @@ agents/
|
||||
## TOOL RESTRICTIONS
|
||||
| Agent | Denied Tools |
|
||||
|-------|-------------|
|
||||
| oracle | write, edit, task, delegate_task |
|
||||
| librarian | write, edit, task, delegate_task, call_omo_agent |
|
||||
| explore | write, edit, task, delegate_task, call_omo_agent |
|
||||
| oracle | write, edit, task, task |
|
||||
| librarian | write, edit, task, task, call_omo_agent |
|
||||
| explore | write, edit, task, task, call_omo_agent |
|
||||
| multimodal-looker | Allowlist: read only |
|
||||
| Sisyphus-Junior | task, delegate_task |
|
||||
| Sisyphus-Junior | task, task |
|
||||
| Atlas | task, call_omo_agent |
|
||||
|
||||
## PATTERNS
|
||||
@@ -85,5 +85,5 @@ agents/
|
||||
## ANTI-PATTERNS
|
||||
- **Trust reports**: NEVER trust "I'm done" - verify outputs
|
||||
- **High temp**: Don't use >0.3 for code agents
|
||||
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration
|
||||
- **Sequential calls**: Use `task` with `run_in_background` for exploration
|
||||
- **Prometheus writing code**: Planner only - never implements
|
||||
|
||||
@@ -19,18 +19,18 @@ You never write code yourself. You orchestrate specialists who do.
|
||||
</identity>
|
||||
|
||||
<mission>
|
||||
Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
|
||||
Complete ALL tasks in a work plan via \`task()\` until fully done.
|
||||
One task per delegation. Parallel when independent. Verify everything.
|
||||
</mission>
|
||||
|
||||
<delegation_system>
|
||||
## How to Delegate
|
||||
|
||||
Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
|
||||
Use \`task()\` with EITHER category OR agent (mutually exclusive):
|
||||
|
||||
\`\`\`typescript
|
||||
// Option A: Category + Skills (spawns Sisyphus-Junior with domain config)
|
||||
delegate_task(
|
||||
task(
|
||||
category="[category-name]",
|
||||
load_skills=["skill-1", "skill-2"],
|
||||
run_in_background=false,
|
||||
@@ -38,7 +38,7 @@ delegate_task(
|
||||
)
|
||||
|
||||
// Option B: Specialized Agent (for specific expert tasks)
|
||||
delegate_task(
|
||||
task(
|
||||
subagent_type="[agent-name]",
|
||||
load_skills=[],
|
||||
run_in_background=false,
|
||||
@@ -58,7 +58,7 @@ delegate_task(
|
||||
|
||||
## 6-Section Prompt Structure (MANDATORY)
|
||||
|
||||
Every \`delegate_task()\` prompt MUST include ALL 6 sections:
|
||||
Every \`task()\` prompt MUST include ALL 6 sections:
|
||||
|
||||
\`\`\`markdown
|
||||
## 1. TASK
|
||||
@@ -149,7 +149,7 @@ Structure:
|
||||
### 3.1 Check Parallelization
|
||||
If tasks can run in parallel:
|
||||
- Prepare prompts for ALL parallelizable tasks
|
||||
- Invoke multiple \`delegate_task()\` in ONE message
|
||||
- Invoke multiple \`task()\` in ONE message
|
||||
- Wait for all to complete
|
||||
- Verify all, then continue
|
||||
|
||||
@@ -167,10 +167,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
|
||||
|
||||
Extract wisdom and include in prompt.
|
||||
|
||||
### 3.3 Invoke delegate_task()
|
||||
### 3.3 Invoke task()
|
||||
|
||||
\`\`\`typescript
|
||||
delegate_task(
|
||||
task(
|
||||
category="[category]",
|
||||
load_skills=["[relevant-skills]"],
|
||||
run_in_background=false,
|
||||
@@ -210,7 +210,7 @@ delegate_task(
|
||||
|
||||
**If verification fails**: Resume the SAME session with the ACTUAL error output:
|
||||
\`\`\`typescript
|
||||
delegate_task(
|
||||
task(
|
||||
session_id="ses_xyz789", // ALWAYS use the session from the failed task
|
||||
load_skills=[...],
|
||||
prompt="Verification failed: {actual error}. Fix."
|
||||
@@ -221,13 +221,13 @@ delegate_task(
|
||||
|
||||
**CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**
|
||||
|
||||
Every \`delegate_task()\` output includes a session_id. STORE IT.
|
||||
Every \`task()\` output includes a session_id. STORE IT.
|
||||
|
||||
If task fails:
|
||||
1. Identify what went wrong
|
||||
2. **Resume the SAME session** - subagent has full context already:
|
||||
\`\`\`typescript
|
||||
delegate_task(
|
||||
task(
|
||||
session_id="ses_xyz789", // Session from failed task
|
||||
load_skills=[...],
|
||||
prompt="FAILED: {error}. Fix by: {specific instruction}"
|
||||
@@ -274,21 +274,21 @@ ACCUMULATED WISDOM:
|
||||
|
||||
**For exploration (explore/librarian)**: ALWAYS background
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="explore", run_in_background=true, ...)
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, ...)
|
||||
task(subagent_type="explore", run_in_background=true, ...)
|
||||
task(subagent_type="librarian", run_in_background=true, ...)
|
||||
\`\`\`
|
||||
|
||||
**For task execution**: NEVER background
|
||||
\`\`\`typescript
|
||||
delegate_task(category="...", run_in_background=false, ...)
|
||||
task(category="...", run_in_background=false, ...)
|
||||
\`\`\`
|
||||
|
||||
**Parallel task groups**: Invoke multiple in ONE message
|
||||
\`\`\`typescript
|
||||
// Tasks 2, 3, 4 are independent - invoke together
|
||||
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
|
||||
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
|
||||
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
|
||||
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
|
||||
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
|
||||
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
|
||||
\`\`\`
|
||||
|
||||
**Background management**:
|
||||
|
||||
@@ -24,7 +24,7 @@ You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
|
||||
</identity>
|
||||
|
||||
<mission>
|
||||
Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
|
||||
Complete ALL tasks in a work plan via \`task()\` until fully done.
|
||||
- One task per delegation
|
||||
- Parallel when independent
|
||||
- Verify everything
|
||||
@@ -71,14 +71,14 @@ Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
|
||||
<delegation_system>
|
||||
## Delegation API
|
||||
|
||||
Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
|
||||
Use \`task()\` with EITHER category OR agent (mutually exclusive):
|
||||
|
||||
\`\`\`typescript
|
||||
// Category + Skills (spawns Sisyphus-Junior)
|
||||
delegate_task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
|
||||
task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
|
||||
|
||||
// Specialized Agent
|
||||
delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
|
||||
task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
|
||||
\`\`\`
|
||||
|
||||
{CATEGORY_SECTION}
|
||||
@@ -93,7 +93,7 @@ delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false,
|
||||
|
||||
## 6-Section Prompt Structure (MANDATORY)
|
||||
|
||||
Every \`delegate_task()\` prompt MUST include ALL 6 sections:
|
||||
Every \`task()\` prompt MUST include ALL 6 sections:
|
||||
|
||||
\`\`\`markdown
|
||||
## 1. TASK
|
||||
@@ -166,7 +166,7 @@ Structure: learnings.md, decisions.md, issues.md, problems.md
|
||||
## Step 3: Execute Tasks
|
||||
|
||||
### 3.1 Parallelization Check
|
||||
- Parallel tasks → invoke multiple \`delegate_task()\` in ONE message
|
||||
- Parallel tasks → invoke multiple \`task()\` in ONE message
|
||||
- Sequential → process one at a time
|
||||
|
||||
### 3.2 Pre-Delegation (MANDATORY)
|
||||
@@ -176,10 +176,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
|
||||
\`\`\`
|
||||
Extract wisdom → include in prompt.
|
||||
|
||||
### 3.3 Invoke delegate_task()
|
||||
### 3.3 Invoke task()
|
||||
|
||||
\`\`\`typescript
|
||||
delegate_task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
|
||||
task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
|
||||
\`\`\`
|
||||
|
||||
### 3.4 Verify (PROJECT-LEVEL QA)
|
||||
@@ -201,7 +201,7 @@ Checklist:
|
||||
**CRITICAL: Use \`session_id\` for retries.**
|
||||
|
||||
\`\`\`typescript
|
||||
delegate_task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
|
||||
task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
|
||||
\`\`\`
|
||||
|
||||
- Maximum 3 retries per task
|
||||
@@ -231,18 +231,18 @@ ACCUMULATED WISDOM: [from notepad]
|
||||
<parallel_execution>
|
||||
**Exploration (explore/librarian)**: ALWAYS background
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="explore", run_in_background=true, ...)
|
||||
task(subagent_type="explore", run_in_background=true, ...)
|
||||
\`\`\`
|
||||
|
||||
**Task execution**: NEVER background
|
||||
\`\`\`typescript
|
||||
delegate_task(category="...", run_in_background=false, ...)
|
||||
task(category="...", run_in_background=false, ...)
|
||||
\`\`\`
|
||||
|
||||
**Parallel task groups**: Invoke multiple in ONE message
|
||||
\`\`\`typescript
|
||||
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
|
||||
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
|
||||
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
|
||||
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
|
||||
\`\`\`
|
||||
|
||||
**Background management**:
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/**
|
||||
* Atlas - Master Orchestrator Agent
|
||||
*
|
||||
* Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done.
|
||||
* Orchestrates work via task() to complete ALL tasks in a todo list until fully done.
|
||||
* You are the conductor of a symphony of specialized agents.
|
||||
*
|
||||
* Routing:
|
||||
@@ -111,7 +111,7 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
|
||||
|
||||
const baseConfig = {
|
||||
description:
|
||||
"Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
|
||||
"Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
|
||||
mode: MODE,
|
||||
...(ctx.model ? { model: ctx.model } : {}),
|
||||
temperature: 0.1,
|
||||
|
||||
@@ -8,21 +8,22 @@
|
||||
import type { CategoryConfig } from "../../config/schema"
|
||||
import { formatCustomSkillsBlock, type AvailableAgent, type AvailableSkill } from "../dynamic-agent-prompt-builder"
|
||||
import { DEFAULT_CATEGORIES, CATEGORY_DESCRIPTIONS } from "../../tools/delegate-task/constants"
|
||||
import { truncateDescription } from "../../shared/truncate-description"
|
||||
|
||||
export const getCategoryDescription = (name: string, userCategories?: Record<string, CategoryConfig>) =>
|
||||
userCategories?.[name]?.description ?? CATEGORY_DESCRIPTIONS[name] ?? "General tasks"
|
||||
|
||||
export function buildAgentSelectionSection(agents: AvailableAgent[]): string {
|
||||
if (agents.length === 0) {
|
||||
return `##### Option B: Use AGENT directly (for specialized experts)
|
||||
if (agents.length === 0) {
|
||||
return `##### Option B: Use AGENT directly (for specialized experts)
|
||||
|
||||
No agents available.`
|
||||
}
|
||||
No agents available.`
|
||||
}
|
||||
|
||||
const rows = agents.map((a) => {
|
||||
const shortDesc = a.description.split(".")[0] || a.description
|
||||
return `| \`${a.name}\` | ${shortDesc} |`
|
||||
})
|
||||
const rows = agents.map((a) => {
|
||||
const shortDesc = truncateDescription(a.description)
|
||||
return `| \`${a.name}\` | ${shortDesc} |`
|
||||
})
|
||||
|
||||
return `##### Option B: Use AGENT directly (for specialized experts)
|
||||
|
||||
@@ -47,7 +48,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
|
||||
${categoryRows.join("\n")}
|
||||
|
||||
\`\`\`typescript
|
||||
delegate_task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
|
||||
task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
|
||||
\`\`\``
|
||||
}
|
||||
|
||||
@@ -59,16 +60,16 @@ export function buildSkillsSection(skills: AvailableSkill[]): string {
|
||||
const builtinSkills = skills.filter((s) => s.location === "plugin")
|
||||
const customSkills = skills.filter((s) => s.location !== "plugin")
|
||||
|
||||
const builtinRows = builtinSkills.map((s) => {
|
||||
const shortDesc = s.description.split(".")[0] || s.description
|
||||
return `| \`${s.name}\` | ${shortDesc} |`
|
||||
})
|
||||
const builtinRows = builtinSkills.map((s) => {
|
||||
const shortDesc = truncateDescription(s.description)
|
||||
return `| \`${s.name}\` | ${shortDesc} |`
|
||||
})
|
||||
|
||||
const customRows = customSkills.map((s) => {
|
||||
const shortDesc = s.description.split(".")[0] || s.description
|
||||
const source = s.location === "project" ? "project" : "user"
|
||||
return `| \`${s.name}\` | ${shortDesc} | ${source} |`
|
||||
})
|
||||
const customRows = customSkills.map((s) => {
|
||||
const shortDesc = truncateDescription(s.description)
|
||||
const source = s.location === "project" ? "project" : "user"
|
||||
return `| \`${s.name}\` | ${shortDesc} | ${source} |`
|
||||
})
|
||||
|
||||
const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills, "**")
|
||||
|
||||
@@ -105,7 +106,7 @@ Read each skill's description and ask: "Does this skill's domain overlap with my
|
||||
|
||||
**Usage:**
|
||||
\`\`\`typescript
|
||||
delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
|
||||
task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
|
||||
\`\`\`
|
||||
|
||||
**IMPORTANT:**
|
||||
@@ -121,10 +122,10 @@ export function buildDecisionMatrix(agents: AvailableAgent[], userCategories?: R
|
||||
`| ${getCategoryDescription(name, userCategories)} | \`category="${name}", load_skills=[...]\` |`
|
||||
)
|
||||
|
||||
const agentRows = agents.map((a) => {
|
||||
const shortDesc = a.description.split(".")[0] || a.description
|
||||
return `| ${shortDesc} | \`agent="${a.name}"\` |`
|
||||
})
|
||||
const agentRows = agents.map((a) => {
|
||||
const shortDesc = truncateDescription(a.description)
|
||||
return `| ${shortDesc} | \`agent="${a.name}"\` |`
|
||||
})
|
||||
|
||||
return `##### Decision Matrix
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { AgentPromptMetadata, BuiltinAgentName } from "./types"
|
||||
import { truncateDescription } from "../shared/truncate-description"
|
||||
|
||||
export interface AvailableAgent {
|
||||
name: BuiltinAgentName
|
||||
@@ -205,16 +206,16 @@ export function buildCategorySkillsDelegationGuide(categories: AvailableCategory
|
||||
const builtinSkills = skills.filter((s) => s.location === "plugin")
|
||||
const customSkills = skills.filter((s) => s.location !== "plugin")
|
||||
|
||||
const builtinRows = builtinSkills.map((s) => {
|
||||
const desc = s.description.split(".")[0] || s.description
|
||||
return `| \`${s.name}\` | ${desc} |`
|
||||
})
|
||||
const builtinRows = builtinSkills.map((s) => {
|
||||
const desc = truncateDescription(s.description)
|
||||
return `| \`${s.name}\` | ${desc} |`
|
||||
})
|
||||
|
||||
const customRows = customSkills.map((s) => {
|
||||
const desc = s.description.split(".")[0] || s.description
|
||||
const source = s.location === "project" ? "project" : "user"
|
||||
return `| \`${s.name}\` | ${desc} | ${source} |`
|
||||
})
|
||||
const customRows = customSkills.map((s) => {
|
||||
const desc = truncateDescription(s.description)
|
||||
const source = s.location === "project" ? "project" : "user"
|
||||
return `| \`${s.name}\` | ${desc} | ${source} |`
|
||||
})
|
||||
|
||||
const customSkillBlock = formatCustomSkillsBlock(customRows, customSkills)
|
||||
|
||||
@@ -242,7 +243,7 @@ ${builtinRows.join("\n")}`
|
||||
|
||||
return `### Category + Skills Delegation System
|
||||
|
||||
**delegate_task() combines categories and skills for optimal task execution.**
|
||||
**task() combines categories and skills for optimal task execution.**
|
||||
|
||||
#### Available Categories (Domain-Optimized Models)
|
||||
|
||||
@@ -296,7 +297,7 @@ SKILL EVALUATION for "[skill-name]":
|
||||
### Delegation Pattern
|
||||
|
||||
\`\`\`typescript
|
||||
delegate_task(
|
||||
task(
|
||||
category="[selected-category]",
|
||||
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills — ESPECIALLY user-installed ones
|
||||
prompt="..."
|
||||
@@ -305,7 +306,7 @@ delegate_task(
|
||||
|
||||
**ANTI-PATTERN (will produce poor results):**
|
||||
\`\`\`typescript
|
||||
delegate_task(category="...", load_skills=[], run_in_background=false, prompt="...") // Empty load_skills without justification
|
||||
task(category="...", load_skills=[], run_in_background=false, prompt="...") // Empty load_skills without justification
|
||||
\`\`\``
|
||||
}
|
||||
|
||||
@@ -421,7 +422,7 @@ export function buildUltraworkSection(
|
||||
|
||||
lines.push("**Agents** (for specialized consultation/exploration):")
|
||||
for (const agent of sortedAgents) {
|
||||
const shortDesc = agent.description.split(".")[0] || agent.description
|
||||
const shortDesc = agent.description.length > 120 ? agent.description.slice(0, 120) + "..." : agent.description
|
||||
const suffix = agent.name === "explore" || agent.name === "librarian" ? " (multiple)" : ""
|
||||
lines.push(`- \`${agent.name}${suffix}\`: ${shortDesc}`)
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ export function createExploreAgent(model: string): AgentConfig {
|
||||
"write",
|
||||
"edit",
|
||||
"task",
|
||||
"delegate_task",
|
||||
"task",
|
||||
"call_omo_agent",
|
||||
])
|
||||
|
||||
|
||||
@@ -227,8 +227,8 @@ Agent: *runs gh pr list, gh pr view, searches recent commits*
|
||||
|
||||
**Delegation Check (MANDATORY before acting directly):**
|
||||
1. Is there a specialized agent that perfectly matches this request?
|
||||
2. If not, is there a \`delegate_task\` category that best describes this task? What skills are available to equip the agent with?
|
||||
- MUST FIND skills to use: \`delegate_task(load_skills=[{skill1}, ...])\`
|
||||
2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
|
||||
- MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
|
||||
3. Can I do it myself for the best result, FOR SURE?
|
||||
|
||||
**Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**
|
||||
@@ -280,15 +280,15 @@ ${librarianSection}
|
||||
// CORRECT: Always background, always parallel
|
||||
// Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
|
||||
// Contextual Grep (internal)
|
||||
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
|
||||
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
|
||||
task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
|
||||
task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
|
||||
// Reference Grep (external)
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
|
||||
task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
|
||||
task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
|
||||
// Continue immediately - collect results when needed
|
||||
|
||||
// WRONG: Sequential or blocking - NEVER DO THIS
|
||||
result = delegate_task(..., run_in_background=false) // Never wait synchronously for explore/librarian
|
||||
result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
|
||||
\`\`\`
|
||||
|
||||
**Rules:**
|
||||
@@ -393,7 +393,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
|
||||
|
||||
### Session Continuity (MANDATORY)
|
||||
|
||||
Every \`delegate_task()\` output includes a session_id. **USE IT.**
|
||||
Every \`task()\` output includes a session_id. **USE IT.**
|
||||
|
||||
**ALWAYS continue when:**
|
||||
| Scenario | Action |
|
||||
|
||||
@@ -26,7 +26,7 @@ export function createLibrarianAgent(model: string): AgentConfig {
|
||||
"write",
|
||||
"edit",
|
||||
"task",
|
||||
"delegate_task",
|
||||
"task",
|
||||
"call_omo_agent",
|
||||
])
|
||||
|
||||
|
||||
@@ -307,7 +307,6 @@ const metisRestrictions = createAgentToolRestrictions([
|
||||
"write",
|
||||
"edit",
|
||||
"task",
|
||||
"delegate_task",
|
||||
])
|
||||
|
||||
export function createMetisAgent(model: string): AgentConfig {
|
||||
|
||||
@@ -193,7 +193,7 @@ export function createMomusAgent(model: string): AgentConfig {
|
||||
"write",
|
||||
"edit",
|
||||
"task",
|
||||
"delegate_task",
|
||||
"task",
|
||||
])
|
||||
|
||||
const base = {
|
||||
|
||||
@@ -147,7 +147,7 @@ export function createOracleAgent(model: string): AgentConfig {
|
||||
"write",
|
||||
"edit",
|
||||
"task",
|
||||
"delegate_task",
|
||||
"task",
|
||||
])
|
||||
|
||||
const base = {
|
||||
|
||||
@@ -15,7 +15,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
|
||||
\`\`\`typescript
|
||||
// After generating initial plan
|
||||
while (true) {
|
||||
const result = delegate_task(
|
||||
const result = task(
|
||||
subagent_type="momus",
|
||||
prompt=".sisyphus/plans/{name}.md",
|
||||
run_in_background=false
|
||||
|
||||
@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
|
||||
**Research First:**
|
||||
\`\`\`typescript
|
||||
// Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
|
||||
delegate_task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
|
||||
delegate_task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
**Interview Focus:**
|
||||
@@ -91,9 +91,9 @@ delegate_task(subagent_type="explore", prompt="I'm about to modify [affected cod
|
||||
\`\`\`typescript
|
||||
// Launch BEFORE asking user questions
|
||||
// Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
|
||||
delegate_task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
|
||||
delegate_task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
|
||||
delegate_task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
|
||||
task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
**Interview Focus** (AFTER research):
|
||||
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js
|
||||
|
||||
Run this check:
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
#### Step 2: Ask the Test Question (MANDATORY)
|
||||
@@ -230,13 +230,13 @@ Add to draft immediately:
|
||||
|
||||
**Research First:**
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
|
||||
delegate_task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
|
||||
task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
**Oracle Consultation** (recommend when stakes are high):
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
|
||||
task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
|
||||
\`\`\`
|
||||
|
||||
**Interview Focus:**
|
||||
@@ -253,9 +253,9 @@ delegate_task(subagent_type="oracle", prompt="Architecture consultation needed:
|
||||
|
||||
**Parallel Investigation:**
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
|
||||
delegate_task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
|
||||
delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
|
||||
task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
|
||||
task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
**Interview Focus:**
|
||||
@@ -281,17 +281,17 @@ delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested i
|
||||
|
||||
**For Understanding Codebase:**
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
|
||||
task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
**For External Knowledge:**
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
|
||||
task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
**For Implementation Examples:**
|
||||
\`\`\`typescript
|
||||
delegate_task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
|
||||
task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
|
||||
\`\`\`
|
||||
|
||||
## Interview Mode Anti-Patterns
|
||||
|
||||
@@ -59,7 +59,7 @@ todoWrite([
|
||||
**BEFORE generating the plan**, summon Metis to catch what you might have missed:
|
||||
|
||||
\`\`\`typescript
|
||||
delegate_task(
|
||||
task(
|
||||
subagent_type="metis",
|
||||
prompt=\`Review this planning session before I generate the work plan:
|
||||
|
||||
|
||||
@@ -214,7 +214,7 @@ Parallel Speedup: ~40% faster than sequential
|
||||
|
||||
| Wave | Tasks | Recommended Agents |
|
||||
|------|-------|-------------------|
|
||||
| 1 | 1, 5 | delegate_task(category="...", load_skills=[...], run_in_background=false) |
|
||||
| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
|
||||
| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
|
||||
| 3 | 4 | final integration task |
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ Execute tasks directly. NEVER delegate or spawn other agents.
|
||||
<Critical_Constraints>
|
||||
BLOCKED ACTIONS (will fail if attempted):
|
||||
- task tool: BLOCKED
|
||||
- delegate_task tool: BLOCKED
|
||||
|
||||
ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
|
||||
You work ALONE for implementation. No delegation of implementation tasks.
|
||||
|
||||
@@ -50,7 +50,6 @@ BLOCKED (will fail if attempted):
|
||||
| Tool | Status |
|
||||
|------|--------|
|
||||
| task | BLOCKED |
|
||||
| delegate_task | BLOCKED |
|
||||
|
||||
ALLOWED:
|
||||
| Tool | Usage |
|
||||
|
||||
@@ -143,13 +143,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("tool safety (task/delegate_task blocked, call_omo_agent allowed)", () => {
|
||||
test("task and delegate_task remain blocked, call_omo_agent is allowed via tools format", () => {
|
||||
describe("tool safety (task blocked, call_omo_agent allowed)", () => {
|
||||
test("task remains blocked, call_omo_agent is allowed via tools format", () => {
|
||||
// given
|
||||
const override = {
|
||||
tools: {
|
||||
task: true,
|
||||
delegate_task: true,
|
||||
call_omo_agent: true,
|
||||
read: true,
|
||||
},
|
||||
@@ -163,25 +162,22 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
|
||||
const permission = result.permission as Record<string, string> | undefined
|
||||
if (tools) {
|
||||
expect(tools.task).toBe(false)
|
||||
expect(tools.delegate_task).toBe(false)
|
||||
// call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
|
||||
expect(tools.call_omo_agent).toBe(true)
|
||||
expect(tools.read).toBe(true)
|
||||
}
|
||||
if (permission) {
|
||||
expect(permission.task).toBe("deny")
|
||||
expect(permission.delegate_task).toBe("deny")
|
||||
// call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
|
||||
expect(permission.call_omo_agent).toBe("allow")
|
||||
}
|
||||
})
|
||||
|
||||
test("task and delegate_task remain blocked when using permission format override", () => {
|
||||
test("task remains blocked when using permission format override", () => {
|
||||
// given
|
||||
const override = {
|
||||
permission: {
|
||||
task: "allow",
|
||||
delegate_task: "allow",
|
||||
call_omo_agent: "allow",
|
||||
read: "allow",
|
||||
},
|
||||
@@ -190,17 +186,15 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
|
||||
// when
|
||||
const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0])
|
||||
|
||||
// then - task/delegate_task blocked, but call_omo_agent allowed for explore/librarian spawning
|
||||
// then - task blocked, but call_omo_agent allowed for explore/librarian spawning
|
||||
const tools = result.tools as Record<string, boolean> | undefined
|
||||
const permission = result.permission as Record<string, string> | undefined
|
||||
if (tools) {
|
||||
expect(tools.task).toBe(false)
|
||||
expect(tools.delegate_task).toBe(false)
|
||||
expect(tools.call_omo_agent).toBe(true)
|
||||
}
|
||||
if (permission) {
|
||||
expect(permission.task).toBe("deny")
|
||||
expect(permission.delegate_task).toBe("deny")
|
||||
expect(permission.call_omo_agent).toBe("allow")
|
||||
}
|
||||
})
|
||||
|
||||
@@ -28,7 +28,7 @@ const MODE: AgentMode = "subagent"
|
||||
|
||||
// Core tools that Sisyphus-Junior must NEVER have access to
|
||||
// Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
|
||||
const BLOCKED_TOOLS = ["task", "delegate_task"]
|
||||
const BLOCKED_TOOLS = ["task"]
|
||||
|
||||
export const SISYPHUS_JUNIOR_DEFAULTS = {
|
||||
model: "anthropic/claude-sonnet-4-5",
|
||||
|
||||
@@ -214,8 +214,8 @@ ${keyTriggers}
|
||||
|
||||
**Delegation Check (MANDATORY before acting directly):**
|
||||
1. Is there a specialized agent that perfectly matches this request?
|
||||
2. If not, is there a \`delegate_task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
|
||||
- MUST FIND skills to use, for: \`delegate_task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS DELEGATE TASK PARAMETER.
|
||||
2. If not, is there a \`task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
|
||||
- MUST FIND skills to use, for: \`task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS TASK PARAMETER.
|
||||
3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH?
|
||||
|
||||
**Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.**
|
||||
@@ -277,15 +277,15 @@ ${librarianSection}
|
||||
// CORRECT: Always background, always parallel
|
||||
// Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
|
||||
// Contextual Grep (internal)
|
||||
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
|
||||
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
|
||||
task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
|
||||
task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
|
||||
// Reference Grep (external)
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
|
||||
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
|
||||
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
|
||||
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
|
||||
// Continue working immediately. Collect with background_output when needed.
|
||||
|
||||
// WRONG: Sequential or blocking
|
||||
result = delegate_task(..., run_in_background=false) // Never wait synchronously for explore/librarian
|
||||
result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
|
||||
\`\`\`
|
||||
|
||||
### Background Result Collection:
|
||||
@@ -340,7 +340,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
|
||||
|
||||
### Session Continuity (MANDATORY)
|
||||
|
||||
Every \`delegate_task()\` output includes a session_id. **USE IT.**
|
||||
Every \`task()\` output includes a session_id. **USE IT.**
|
||||
|
||||
**ALWAYS continue when:**
|
||||
| Scenario | Action |
|
||||
@@ -358,10 +358,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**
|
||||
|
||||
\`\`\`typescript
|
||||
// WRONG: Starting fresh loses all context
|
||||
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Fix the type error in auth.ts...")
|
||||
task(category="quick", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix the type error in auth.ts...")
|
||||
|
||||
// CORRECT: Resume preserves everything
|
||||
delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
|
||||
task(session_id="ses_abc123", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix: Type error on line 42")
|
||||
\`\`\`
|
||||
|
||||
**After EVERY delegation, STORE the session_id for potential continuation.**
|
||||
|
||||
@@ -79,6 +79,72 @@ describe("createBuiltinAgents with model overrides", () => {
|
||||
}
|
||||
})
|
||||
|
||||
test("user config model takes priority over uiSelectedModel for sisyphus", async () => {
|
||||
// #given
|
||||
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
|
||||
)
|
||||
const uiSelectedModel = "openai/gpt-5.2"
|
||||
const overrides = {
|
||||
sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
|
||||
}
|
||||
|
||||
try {
|
||||
// #when
|
||||
const agents = await createBuiltinAgents(
|
||||
[],
|
||||
overrides,
|
||||
undefined,
|
||||
TEST_DEFAULT_MODEL,
|
||||
undefined,
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
undefined,
|
||||
uiSelectedModel
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(agents.sisyphus).toBeDefined()
|
||||
expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
|
||||
} finally {
|
||||
fetchSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
test("user config model takes priority over uiSelectedModel for atlas", async () => {
|
||||
// #given
|
||||
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["openai/gpt-5.2", "anthropic/claude-sonnet-4-5"])
|
||||
)
|
||||
const uiSelectedModel = "openai/gpt-5.2"
|
||||
const overrides = {
|
||||
atlas: { model: "google/antigravity-claude-opus-4-5-thinking" },
|
||||
}
|
||||
|
||||
try {
|
||||
// #when
|
||||
const agents = await createBuiltinAgents(
|
||||
[],
|
||||
overrides,
|
||||
undefined,
|
||||
TEST_DEFAULT_MODEL,
|
||||
undefined,
|
||||
undefined,
|
||||
[],
|
||||
undefined,
|
||||
undefined,
|
||||
uiSelectedModel
|
||||
)
|
||||
|
||||
// #then
|
||||
expect(agents.atlas).toBeDefined()
|
||||
expect(agents.atlas.model).toBe("google/antigravity-claude-opus-4-5-thinking")
|
||||
} finally {
|
||||
fetchSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
test("Sisyphus is created on first run when no availableModels or cache exist", async () => {
|
||||
// #given
|
||||
const systemDefaultModel = "anthropic/claude-opus-4-6"
|
||||
@@ -422,6 +488,58 @@ describe("createBuiltinAgents with requiresAnyModel gating (sisyphus)", () => {
|
||||
cacheSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
test("sisyphus uses user-configured plugin model even when not in cache or fallback chain", async () => {
|
||||
// #given - user configures a model from a plugin provider (like antigravity)
|
||||
// that is NOT in the availableModels cache and NOT in the fallback chain
|
||||
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set(["openai/gpt-5.2"])
|
||||
)
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
|
||||
["openai"]
|
||||
)
|
||||
const overrides = {
|
||||
sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
|
||||
}
|
||||
|
||||
try {
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
|
||||
|
||||
// #then
|
||||
expect(agents.sisyphus).toBeDefined()
|
||||
expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
|
||||
} finally {
|
||||
fetchSpy.mockRestore()
|
||||
cacheSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
test("sisyphus uses user-configured plugin model when availableModels is empty but cache exists", async () => {
|
||||
// #given - connected providers cache exists but models cache is empty
|
||||
// This reproduces the exact scenario where provider-models.json has models: {}
|
||||
const fetchSpy = spyOn(shared, "fetchAvailableModels").mockResolvedValue(
|
||||
new Set()
|
||||
)
|
||||
const cacheSpy = spyOn(connectedProvidersCache, "readConnectedProvidersCache").mockReturnValue(
|
||||
["google", "openai", "opencode"]
|
||||
)
|
||||
const overrides = {
|
||||
sisyphus: { model: "google/antigravity-claude-opus-4-5-thinking" },
|
||||
}
|
||||
|
||||
try {
|
||||
// #when
|
||||
const agents = await createBuiltinAgents([], overrides, undefined, TEST_DEFAULT_MODEL, undefined, undefined, [], {})
|
||||
|
||||
// #then
|
||||
expect(agents.sisyphus).toBeDefined()
|
||||
expect(agents.sisyphus.model).toBe("google/antigravity-claude-opus-4-5-thinking")
|
||||
} finally {
|
||||
fetchSpy.mockRestore()
|
||||
cacheSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe("buildAgent with category and skills", () => {
|
||||
|
||||
@@ -304,7 +304,7 @@ export async function createBuiltinAgents(
|
||||
const isPrimaryAgent = isFactory(source) && source.mode === "primary"
|
||||
|
||||
const resolution = applyModelResolution({
|
||||
uiSelectedModel: isPrimaryAgent ? uiSelectedModel : undefined,
|
||||
uiSelectedModel: (isPrimaryAgent && !override?.model) ? uiSelectedModel : undefined,
|
||||
userModel: override?.model,
|
||||
requirement,
|
||||
availableModels,
|
||||
@@ -356,7 +356,7 @@ export async function createBuiltinAgents(
|
||||
|
||||
if (!disabledAgents.includes("sisyphus") && meetsSisyphusAnyModelRequirement) {
|
||||
let sisyphusResolution = applyModelResolution({
|
||||
uiSelectedModel,
|
||||
uiSelectedModel: sisyphusOverride?.model ? undefined : uiSelectedModel,
|
||||
userModel: sisyphusOverride?.model,
|
||||
requirement: sisyphusRequirement,
|
||||
availableModels,
|
||||
@@ -454,7 +454,7 @@ export async function createBuiltinAgents(
|
||||
const atlasRequirement = AGENT_MODEL_REQUIREMENTS["atlas"]
|
||||
|
||||
const atlasResolution = applyModelResolution({
|
||||
uiSelectedModel,
|
||||
uiSelectedModel: orchestratorOverride?.model ? undefined : uiSelectedModel,
|
||||
userModel: orchestratorOverride?.model,
|
||||
requirement: atlasRequirement,
|
||||
availableModels,
|
||||
|
||||
@@ -29,7 +29,7 @@ describe("gh cli check", () => {
|
||||
|
||||
it("returns gh cli info structure", async () => {
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockImplementation((cmd) => {
|
||||
if (Array.isArray(cmd) && cmd[0] === "which" && cmd[1] === "gh") {
|
||||
if (Array.isArray(cmd) && (cmd[0] === "which" || cmd[0] === "where") && cmd[1] === "gh") {
|
||||
return createProc({ stdout: "/usr/bin/gh\n" })
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@ export interface GhCliInfo {
|
||||
|
||||
async function checkBinaryExists(binary: string): Promise<{ exists: boolean; path: string | null }> {
|
||||
try {
|
||||
const proc = Bun.spawn(["which", binary], { stdout: "pipe", stderr: "pipe" })
|
||||
const whichCmd = process.platform === "win32" ? "where" : "which"
|
||||
const proc = Bun.spawn([whichCmd, binary], { stdout: "pipe", stderr: "pipe" })
|
||||
const output = await new Response(proc.stdout).text()
|
||||
await proc.exited
|
||||
if (proc.exitCode === 0) {
|
||||
|
||||
@@ -64,16 +64,28 @@ Model Providers (Priority: Native > Copilot > OpenCode Zen > Z.ai > Kimi):
|
||||
})
|
||||
|
||||
program
|
||||
.command("run <message>")
|
||||
.description("Run opencode with todo/background task completion enforcement")
|
||||
.command("run <message>")
|
||||
.allowUnknownOption()
|
||||
.passThroughOptions()
|
||||
.description("Run opencode with todo/background task completion enforcement")
|
||||
.option("-a, --agent <name>", "Agent to use (default: from CLI/env/config, fallback: Sisyphus)")
|
||||
.option("-d, --directory <path>", "Working directory")
|
||||
.option("-t, --timeout <ms>", "Timeout in milliseconds (default: 30 minutes)", parseInt)
|
||||
.option("-p, --port <port>", "Server port (attaches if port already in use)", parseInt)
|
||||
.option("--attach <url>", "Attach to existing opencode server URL")
|
||||
.option("--on-complete <command>", "Shell command to run after completion")
|
||||
.option("--json", "Output structured JSON result to stdout")
|
||||
.option("--session-id <id>", "Resume existing session instead of creating new one")
|
||||
.addHelpText("after", `
|
||||
Examples:
|
||||
$ bunx oh-my-opencode run "Fix the bug in index.ts"
|
||||
$ bunx oh-my-opencode run --agent Sisyphus "Implement feature X"
|
||||
$ bunx oh-my-opencode run --timeout 3600000 "Large refactoring task"
|
||||
$ bunx oh-my-opencode run --port 4321 "Fix the bug"
|
||||
$ bunx oh-my-opencode run --attach http://127.0.0.1:4321 "Fix the bug"
|
||||
$ bunx oh-my-opencode run --json "Fix the bug" | jq .sessionId
|
||||
$ bunx oh-my-opencode run --on-complete "notify-send Done" "Fix the bug"
|
||||
$ bunx oh-my-opencode run --session-id ses_abc123 "Continue the work"
|
||||
|
||||
Agent resolution order:
|
||||
1) --agent flag
|
||||
@@ -89,11 +101,20 @@ Unlike 'opencode run', this command waits until:
|
||||
- All child sessions (background tasks) are idle
|
||||
`)
|
||||
.action(async (message: string, options) => {
|
||||
if (options.port && options.attach) {
|
||||
console.error("Error: --port and --attach are mutually exclusive")
|
||||
process.exit(1)
|
||||
}
|
||||
const runOptions: RunOptions = {
|
||||
message,
|
||||
agent: options.agent,
|
||||
directory: options.directory,
|
||||
timeout: options.timeout,
|
||||
port: options.port,
|
||||
attach: options.attach,
|
||||
onComplete: options.onComplete,
|
||||
json: options.json ?? false,
|
||||
sessionId: options.sessionId,
|
||||
}
|
||||
const exitCode = await run(runOptions)
|
||||
process.exit(exitCode)
|
||||
|
||||
69
src/cli/run/agent-resolver.ts
Normal file
69
src/cli/run/agent-resolver.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
import pc from "picocolors"
|
||||
import type { RunOptions } from "./types"
|
||||
import type { OhMyOpenCodeConfig } from "../../config"
|
||||
|
||||
const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
|
||||
const DEFAULT_AGENT = "sisyphus"
|
||||
|
||||
type EnvVars = Record<string, string | undefined>
|
||||
|
||||
const normalizeAgentName = (agent?: string): string | undefined => {
|
||||
if (!agent) return undefined
|
||||
const trimmed = agent.trim()
|
||||
if (!trimmed) return undefined
|
||||
const lowered = trimmed.toLowerCase()
|
||||
const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
|
||||
return coreMatch ?? trimmed
|
||||
}
|
||||
|
||||
const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
|
||||
const lowered = agent.toLowerCase()
|
||||
if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
|
||||
return true
|
||||
}
|
||||
return (config.disabled_agents ?? []).some(
|
||||
(disabled) => disabled.toLowerCase() === lowered
|
||||
)
|
||||
}
|
||||
|
||||
const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
|
||||
for (const agent of CORE_AGENT_ORDER) {
|
||||
if (!isAgentDisabled(agent, config)) {
|
||||
return agent
|
||||
}
|
||||
}
|
||||
return DEFAULT_AGENT
|
||||
}
|
||||
|
||||
export const resolveRunAgent = (
|
||||
options: RunOptions,
|
||||
pluginConfig: OhMyOpenCodeConfig,
|
||||
env: EnvVars = process.env
|
||||
): string => {
|
||||
const cliAgent = normalizeAgentName(options.agent)
|
||||
const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
|
||||
const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
|
||||
const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
|
||||
const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
|
||||
|
||||
if (isAgentDisabled(normalized, pluginConfig)) {
|
||||
const fallback = pickFallbackAgent(pluginConfig)
|
||||
const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
|
||||
if (fallbackDisabled) {
|
||||
console.log(
|
||||
pc.yellow(
|
||||
`Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
|
||||
)
|
||||
)
|
||||
return fallback
|
||||
}
|
||||
console.log(
|
||||
pc.yellow(
|
||||
`Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
|
||||
)
|
||||
)
|
||||
return fallback
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
@@ -65,6 +65,8 @@ export interface EventState {
|
||||
currentTool: string | null
|
||||
/** Set to true when the main session has produced meaningful work (text, tool call, or tool result) */
|
||||
hasReceivedMeaningfulWork: boolean
|
||||
/** Count of assistant messages for the main session */
|
||||
messageCount: number
|
||||
}
|
||||
|
||||
export function createEventState(): EventState {
|
||||
@@ -76,6 +78,7 @@ export function createEventState(): EventState {
|
||||
lastPartText: "",
|
||||
currentTool: null,
|
||||
hasReceivedMeaningfulWork: false,
|
||||
messageCount: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -266,6 +269,7 @@ function handleMessageUpdated(
|
||||
if (props?.info?.role !== "assistant") return
|
||||
|
||||
state.hasReceivedMeaningfulWork = true
|
||||
state.messageCount++
|
||||
}
|
||||
|
||||
function handleToolExecute(
|
||||
|
||||
@@ -1,2 +1,7 @@
|
||||
export { run } from "./runner"
|
||||
export type { RunOptions, RunContext } from "./types"
|
||||
export { resolveRunAgent } from "./agent-resolver"
|
||||
export { createServerConnection } from "./server-connection"
|
||||
export { resolveSession } from "./session-resolver"
|
||||
export { createJsonOutputManager } from "./json-output"
|
||||
export { executeOnCompleteHook } from "./on-complete-hook"
|
||||
export type { RunOptions, RunContext, RunResult, ServerConnection } from "./types"
|
||||
|
||||
294
src/cli/run/integration.test.ts
Normal file
294
src/cli/run/integration.test.ts
Normal file
@@ -0,0 +1,294 @@
|
||||
import { describe, it, expect, mock, spyOn, beforeEach, afterEach } from "bun:test"
|
||||
import type { RunResult } from "./types"
|
||||
import { createJsonOutputManager } from "./json-output"
|
||||
import { resolveSession } from "./session-resolver"
|
||||
import { executeOnCompleteHook } from "./on-complete-hook"
|
||||
import type { OpencodeClient } from "./types"
|
||||
|
||||
const mockServerClose = mock(() => {})
|
||||
const mockCreateOpencode = mock(() =>
|
||||
Promise.resolve({
|
||||
client: { session: {} },
|
||||
server: { url: "http://127.0.0.1:9999", close: mockServerClose },
|
||||
})
|
||||
)
|
||||
const mockCreateOpencodeClient = mock(() => ({ session: {} }))
|
||||
const mockIsPortAvailable = mock(() => Promise.resolve(true))
|
||||
const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 9999, wasAutoSelected: false }))
|
||||
|
||||
mock.module("@opencode-ai/sdk", () => ({
|
||||
createOpencode: mockCreateOpencode,
|
||||
createOpencodeClient: mockCreateOpencodeClient,
|
||||
}))
|
||||
|
||||
mock.module("../../shared/port-utils", () => ({
|
||||
isPortAvailable: mockIsPortAvailable,
|
||||
getAvailableServerPort: mockGetAvailableServerPort,
|
||||
DEFAULT_SERVER_PORT: 4096,
|
||||
}))
|
||||
|
||||
const { createServerConnection } = await import("./server-connection")
|
||||
|
||||
interface MockWriteStream {
|
||||
write: (chunk: string) => boolean
|
||||
writes: string[]
|
||||
}
|
||||
|
||||
function createMockWriteStream(): MockWriteStream {
|
||||
const writes: string[] = []
|
||||
return {
|
||||
writes,
|
||||
write: function (this: MockWriteStream, chunk: string): boolean {
|
||||
this.writes.push(chunk)
|
||||
return true
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const createMockClient = (
|
||||
getResult?: { error?: unknown; data?: { id: string } }
|
||||
): OpencodeClient => ({
|
||||
session: {
|
||||
get: mock((opts: { path: { id: string } }) =>
|
||||
Promise.resolve(getResult ?? { data: { id: opts.path.id } })
|
||||
),
|
||||
create: mock(() => Promise.resolve({ data: { id: "new-session-id" } })),
|
||||
},
|
||||
} as unknown as OpencodeClient)
|
||||
|
||||
describe("integration: --json mode", () => {
|
||||
it("emits valid RunResult JSON to stdout", () => {
|
||||
// given
|
||||
const mockStdout = createMockWriteStream()
|
||||
const mockStderr = createMockWriteStream()
|
||||
const result: RunResult = {
|
||||
sessionId: "test-session",
|
||||
success: true,
|
||||
durationMs: 1234,
|
||||
messageCount: 42,
|
||||
summary: "Test summary",
|
||||
}
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
|
||||
// when
|
||||
manager.emitResult(result)
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toHaveLength(1)
|
||||
const emitted = mockStdout.writes[0]!
|
||||
expect(() => JSON.parse(emitted)).not.toThrow()
|
||||
const parsed = JSON.parse(emitted) as RunResult
|
||||
expect(parsed.sessionId).toBe("test-session")
|
||||
expect(parsed.success).toBe(true)
|
||||
expect(parsed.durationMs).toBe(1234)
|
||||
expect(parsed.messageCount).toBe(42)
|
||||
expect(parsed.summary).toBe("Test summary")
|
||||
})
|
||||
|
||||
it("redirects stdout to stderr when active", () => {
|
||||
// given
|
||||
spyOn(console, "log").mockImplementation(() => {})
|
||||
const mockStdout = createMockWriteStream()
|
||||
const mockStderr = createMockWriteStream()
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
manager.redirectToStderr()
|
||||
|
||||
// when
|
||||
mockStdout.write("should go to stderr")
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toHaveLength(0)
|
||||
expect(mockStderr.writes).toEqual(["should go to stderr"])
|
||||
})
|
||||
})
|
||||
|
||||
describe("integration: --session-id", () => {
|
||||
beforeEach(() => {
|
||||
spyOn(console, "log").mockImplementation(() => {})
|
||||
spyOn(console, "error").mockImplementation(() => {})
|
||||
})
|
||||
|
||||
it("resolves provided session ID without creating new session", async () => {
|
||||
// given
|
||||
const sessionId = "existing-session-id"
|
||||
const mockClient = createMockClient({ data: { id: sessionId } })
|
||||
|
||||
// when
|
||||
const result = await resolveSession({ client: mockClient, sessionId })
|
||||
|
||||
// then
|
||||
expect(result).toBe(sessionId)
|
||||
expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
|
||||
expect(mockClient.session.create).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("throws when session does not exist", async () => {
|
||||
// given
|
||||
const sessionId = "non-existent-session-id"
|
||||
const mockClient = createMockClient({ error: { message: "Session not found" } })
|
||||
|
||||
// when
|
||||
const result = resolveSession({ client: mockClient, sessionId })
|
||||
|
||||
// then
|
||||
await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
|
||||
expect(mockClient.session.get).toHaveBeenCalledWith({ path: { id: sessionId } })
|
||||
expect(mockClient.session.create).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
|
||||
describe("integration: --on-complete", () => {
|
||||
let spawnSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
spyOn(console, "error").mockImplementation(() => {})
|
||||
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
|
||||
exited: Promise.resolve(0),
|
||||
exitCode: 0,
|
||||
} as unknown as ReturnType<typeof Bun.spawn>)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
spawnSpy.mockRestore()
|
||||
})
|
||||
|
||||
it("passes all 4 env vars as strings to spawned process", async () => {
|
||||
// given
|
||||
spawnSpy.mockClear()
|
||||
|
||||
// when
|
||||
await executeOnCompleteHook({
|
||||
command: "echo test",
|
||||
sessionId: "session-123",
|
||||
exitCode: 0,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1)
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
expect(options?.env?.SESSION_ID).toBe("session-123")
|
||||
expect(options?.env?.EXIT_CODE).toBe("0")
|
||||
expect(options?.env?.DURATION_MS).toBe("5000")
|
||||
expect(options?.env?.MESSAGE_COUNT).toBe("10")
|
||||
expect(options?.env?.SESSION_ID).toBeTypeOf("string")
|
||||
expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
|
||||
expect(options?.env?.DURATION_MS).toBeTypeOf("string")
|
||||
expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
|
||||
})
|
||||
})
|
||||
|
||||
describe("integration: option combinations", () => {
|
||||
let mockStdout: MockWriteStream
|
||||
let mockStderr: MockWriteStream
|
||||
let spawnSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
spyOn(console, "log").mockImplementation(() => {})
|
||||
spyOn(console, "error").mockImplementation(() => {})
|
||||
mockStdout = createMockWriteStream()
|
||||
mockStderr = createMockWriteStream()
|
||||
spawnSpy = spyOn(Bun, "spawn").mockReturnValue({
|
||||
exited: Promise.resolve(0),
|
||||
exitCode: 0,
|
||||
} as unknown as ReturnType<typeof Bun.spawn>)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
spawnSpy?.mockRestore?.()
|
||||
})
|
||||
|
||||
it("json output and on-complete hook can both execute", async () => {
|
||||
// given - json manager active + on-complete hook ready
|
||||
const result: RunResult = {
|
||||
sessionId: "session-123",
|
||||
success: true,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
summary: "Test completed",
|
||||
}
|
||||
const jsonManager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
jsonManager.redirectToStderr()
|
||||
spawnSpy.mockClear()
|
||||
|
||||
// when - both are invoked sequentially (as runner would)
|
||||
jsonManager.emitResult(result)
|
||||
await executeOnCompleteHook({
|
||||
command: "echo done",
|
||||
sessionId: result.sessionId,
|
||||
exitCode: result.success ? 0 : 1,
|
||||
durationMs: result.durationMs,
|
||||
messageCount: result.messageCount,
|
||||
})
|
||||
|
||||
// then - json emits result AND on-complete hook runs
|
||||
expect(mockStdout.writes).toHaveLength(1)
|
||||
const emitted = mockStdout.writes[0]!
|
||||
expect(() => JSON.parse(emitted)).not.toThrow()
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1)
|
||||
const [args] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
expect(args).toEqual(["sh", "-c", "echo done"])
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
expect(options?.env?.SESSION_ID).toBe("session-123")
|
||||
expect(options?.env?.EXIT_CODE).toBe("0")
|
||||
expect(options?.env?.DURATION_MS).toBe("5000")
|
||||
expect(options?.env?.MESSAGE_COUNT).toBe("10")
|
||||
})
|
||||
})
|
||||
|
||||
describe("integration: server connection", () => {
|
||||
let consoleSpy: ReturnType<typeof spyOn>
|
||||
|
||||
beforeEach(() => {
|
||||
consoleSpy = spyOn(console, "log").mockImplementation(() => {})
|
||||
mockCreateOpencode.mockClear()
|
||||
mockCreateOpencodeClient.mockClear()
|
||||
mockServerClose.mockClear()
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
consoleSpy.mockRestore()
|
||||
})
|
||||
|
||||
it("attach mode creates client with no-op cleanup", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
const attachUrl = "http://localhost:8080"
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ attach: attachUrl, signal })
|
||||
|
||||
// then
|
||||
expect(result.client).toBeDefined()
|
||||
expect(result.cleanup).toBeDefined()
|
||||
expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
|
||||
result.cleanup()
|
||||
expect(mockServerClose).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("port with available port starts server", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
const port = 9999
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ port, signal })
|
||||
|
||||
// then
|
||||
expect(result.client).toBeDefined()
|
||||
expect(result.cleanup).toBeDefined()
|
||||
expect(mockCreateOpencode).toHaveBeenCalled()
|
||||
result.cleanup()
|
||||
expect(mockServerClose).toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
170
src/cli/run/json-output.test.ts
Normal file
170
src/cli/run/json-output.test.ts
Normal file
@@ -0,0 +1,170 @@
|
||||
import { describe, it, expect, beforeEach } from "bun:test"
|
||||
import type { RunResult } from "./types"
|
||||
import { createJsonOutputManager } from "./json-output"
|
||||
|
||||
interface MockWriteStream {
|
||||
write: (chunk: string) => boolean
|
||||
writes: string[]
|
||||
}
|
||||
|
||||
function createMockWriteStream(): MockWriteStream {
|
||||
const stream: MockWriteStream = {
|
||||
writes: [],
|
||||
write: function (this: MockWriteStream, chunk: string): boolean {
|
||||
this.writes.push(chunk)
|
||||
return true
|
||||
},
|
||||
}
|
||||
return stream
|
||||
}
|
||||
|
||||
describe("createJsonOutputManager", () => {
|
||||
let mockStdout: MockWriteStream
|
||||
let mockStderr: MockWriteStream
|
||||
|
||||
beforeEach(() => {
|
||||
mockStdout = createMockWriteStream()
|
||||
mockStderr = createMockWriteStream()
|
||||
})
|
||||
|
||||
describe("redirectToStderr", () => {
|
||||
it("causes stdout writes to go to stderr", () => {
|
||||
// given
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
manager.redirectToStderr()
|
||||
|
||||
// when
|
||||
mockStdout.write("test message")
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toHaveLength(0)
|
||||
expect(mockStderr.writes).toEqual(["test message"])
|
||||
})
|
||||
})
|
||||
|
||||
describe("restore", () => {
|
||||
it("reverses the redirect", () => {
|
||||
// given
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
manager.redirectToStderr()
|
||||
|
||||
// when
|
||||
manager.restore()
|
||||
mockStdout.write("restored message")
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toEqual(["restored message"])
|
||||
expect(mockStderr.writes).toHaveLength(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe("emitResult", () => {
|
||||
it("writes valid JSON to stdout", () => {
|
||||
// given
|
||||
const result: RunResult = {
|
||||
sessionId: "test-session",
|
||||
success: true,
|
||||
durationMs: 1234,
|
||||
messageCount: 42,
|
||||
summary: "Test summary",
|
||||
}
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
|
||||
// when
|
||||
manager.emitResult(result)
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toHaveLength(1)
|
||||
const emitted = mockStdout.writes[0]!
|
||||
expect(() => JSON.parse(emitted)).not.toThrow()
|
||||
})
|
||||
|
||||
it("output matches RunResult schema", () => {
|
||||
// given
|
||||
const result: RunResult = {
|
||||
sessionId: "test-session",
|
||||
success: true,
|
||||
durationMs: 1234,
|
||||
messageCount: 42,
|
||||
summary: "Test summary",
|
||||
}
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
|
||||
// when
|
||||
manager.emitResult(result)
|
||||
|
||||
// then
|
||||
const emitted = mockStdout.writes[0]!
|
||||
const parsed = JSON.parse(emitted) as RunResult
|
||||
expect(parsed).toEqual(result)
|
||||
expect(parsed.sessionId).toBe("test-session")
|
||||
expect(parsed.success).toBe(true)
|
||||
expect(parsed.durationMs).toBe(1234)
|
||||
expect(parsed.messageCount).toBe(42)
|
||||
expect(parsed.summary).toBe("Test summary")
|
||||
})
|
||||
|
||||
it("restores stdout even if redirect was active", () => {
|
||||
// given
|
||||
const result: RunResult = {
|
||||
sessionId: "test-session",
|
||||
success: true,
|
||||
durationMs: 100,
|
||||
messageCount: 1,
|
||||
summary: "Test",
|
||||
}
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
manager.redirectToStderr()
|
||||
|
||||
// when
|
||||
manager.emitResult(result)
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toHaveLength(1)
|
||||
expect(mockStdout.writes[0]!).toBe(JSON.stringify(result) + "\n")
|
||||
|
||||
mockStdout.write("after emit")
|
||||
expect(mockStdout.writes).toHaveLength(2)
|
||||
expect(mockStderr.writes).toHaveLength(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe("multiple redirects and restores", () => {
|
||||
it("work correctly", () => {
|
||||
// given
|
||||
const manager = createJsonOutputManager({
|
||||
stdout: mockStdout as unknown as NodeJS.WriteStream,
|
||||
stderr: mockStderr as unknown as NodeJS.WriteStream,
|
||||
})
|
||||
|
||||
// when
|
||||
manager.redirectToStderr()
|
||||
mockStdout.write("first redirect")
|
||||
|
||||
manager.redirectToStderr()
|
||||
mockStdout.write("second redirect")
|
||||
|
||||
manager.restore()
|
||||
mockStdout.write("after restore")
|
||||
|
||||
// then
|
||||
expect(mockStdout.writes).toEqual(["after restore"])
|
||||
expect(mockStderr.writes).toEqual(["first redirect", "second redirect"])
|
||||
})
|
||||
})
|
||||
})
|
||||
52
src/cli/run/json-output.ts
Normal file
52
src/cli/run/json-output.ts
Normal file
@@ -0,0 +1,52 @@
|
||||
import type { RunResult } from "./types"
|
||||
|
||||
export interface JsonOutputManager {
|
||||
redirectToStderr: () => void
|
||||
restore: () => void
|
||||
emitResult: (result: RunResult) => void
|
||||
}
|
||||
|
||||
interface JsonOutputManagerOptions {
|
||||
stdout?: NodeJS.WriteStream
|
||||
stderr?: NodeJS.WriteStream
|
||||
}
|
||||
|
||||
export function createJsonOutputManager(
|
||||
options: JsonOutputManagerOptions = {}
|
||||
): JsonOutputManager {
|
||||
const stdout = options.stdout ?? process.stdout
|
||||
const stderr = options.stderr ?? process.stderr
|
||||
|
||||
const originalWrite = stdout.write.bind(stdout)
|
||||
|
||||
function redirectToStderr(): void {
|
||||
stdout.write = function (
|
||||
chunk: Uint8Array | string,
|
||||
encodingOrCallback?: BufferEncoding | ((error?: Error | null) => void),
|
||||
callback?: (error?: Error | null) => void
|
||||
): boolean {
|
||||
if (typeof encodingOrCallback === "function") {
|
||||
return stderr.write(chunk, encodingOrCallback)
|
||||
}
|
||||
if (encodingOrCallback !== undefined) {
|
||||
return stderr.write(chunk, encodingOrCallback, callback)
|
||||
}
|
||||
return stderr.write(chunk)
|
||||
} as NodeJS.WriteStream["write"]
|
||||
}
|
||||
|
||||
function restore(): void {
|
||||
stdout.write = originalWrite
|
||||
}
|
||||
|
||||
function emitResult(result: RunResult): void {
|
||||
restore()
|
||||
originalWrite(JSON.stringify(result) + "\n")
|
||||
}
|
||||
|
||||
return {
|
||||
redirectToStderr,
|
||||
restore,
|
||||
emitResult,
|
||||
}
|
||||
}
|
||||
179
src/cli/run/on-complete-hook.test.ts
Normal file
179
src/cli/run/on-complete-hook.test.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import { describe, it, expect, spyOn, beforeEach, afterEach } from "bun:test"
|
||||
import { executeOnCompleteHook } from "./on-complete-hook"
|
||||
|
||||
describe("executeOnCompleteHook", () => {
|
||||
function createProc(exitCode: number) {
|
||||
return {
|
||||
exited: Promise.resolve(exitCode),
|
||||
exitCode,
|
||||
} as unknown as ReturnType<typeof Bun.spawn>
|
||||
}
|
||||
|
||||
let consoleErrorSpy: ReturnType<typeof spyOn<typeof console, "error">>
|
||||
|
||||
beforeEach(() => {
|
||||
consoleErrorSpy = spyOn(console, "error").mockImplementation(() => {})
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
consoleErrorSpy.mockRestore()
|
||||
})
|
||||
|
||||
it("executes command with correct env vars", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
await executeOnCompleteHook({
|
||||
command: "echo test",
|
||||
sessionId: "session-123",
|
||||
exitCode: 0,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(spawnSpy).toHaveBeenCalledTimes(1)
|
||||
const [args, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
|
||||
expect(args).toEqual(["sh", "-c", "echo test"])
|
||||
expect(options?.env?.SESSION_ID).toBe("session-123")
|
||||
expect(options?.env?.EXIT_CODE).toBe("0")
|
||||
expect(options?.env?.DURATION_MS).toBe("5000")
|
||||
expect(options?.env?.MESSAGE_COUNT).toBe("10")
|
||||
expect(options?.stdout).toBe("inherit")
|
||||
expect(options?.stderr).toBe("inherit")
|
||||
} finally {
|
||||
spawnSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
it("env var values are strings", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
await executeOnCompleteHook({
|
||||
command: "echo test",
|
||||
sessionId: "session-123",
|
||||
exitCode: 1,
|
||||
durationMs: 12345,
|
||||
messageCount: 42,
|
||||
})
|
||||
|
||||
// then
|
||||
const [_, options] = spawnSpy.mock.calls[0] as Parameters<typeof Bun.spawn>
|
||||
|
||||
expect(options?.env?.EXIT_CODE).toBe("1")
|
||||
expect(options?.env?.EXIT_CODE).toBeTypeOf("string")
|
||||
expect(options?.env?.DURATION_MS).toBe("12345")
|
||||
expect(options?.env?.DURATION_MS).toBeTypeOf("string")
|
||||
expect(options?.env?.MESSAGE_COUNT).toBe("42")
|
||||
expect(options?.env?.MESSAGE_COUNT).toBeTypeOf("string")
|
||||
} finally {
|
||||
spawnSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
it("empty command string is no-op", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
await executeOnCompleteHook({
|
||||
command: "",
|
||||
sessionId: "session-123",
|
||||
exitCode: 0,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(spawnSpy).not.toHaveBeenCalled()
|
||||
} finally {
|
||||
spawnSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
it("whitespace-only command is no-op", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(0))
|
||||
|
||||
try {
|
||||
// when
|
||||
await executeOnCompleteHook({
|
||||
command: " ",
|
||||
sessionId: "session-123",
|
||||
exitCode: 0,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
})
|
||||
|
||||
// then
|
||||
expect(spawnSpy).not.toHaveBeenCalled()
|
||||
} finally {
|
||||
spawnSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
it("command failure logs warning but does not throw", async () => {
|
||||
// given
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockReturnValue(createProc(1))
|
||||
|
||||
try {
|
||||
// when
|
||||
await expect(
|
||||
executeOnCompleteHook({
|
||||
command: "false",
|
||||
sessionId: "session-123",
|
||||
exitCode: 0,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
})
|
||||
).resolves.toBeUndefined()
|
||||
|
||||
// then
|
||||
expect(consoleErrorSpy).toHaveBeenCalled()
|
||||
const warningCall = consoleErrorSpy.mock.calls.find(
|
||||
(call) => typeof call[0] === "string" && call[0].includes("Warning: on-complete hook exited with code 1")
|
||||
)
|
||||
expect(warningCall).toBeDefined()
|
||||
} finally {
|
||||
spawnSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
|
||||
it("spawn error logs warning but does not throw", async () => {
|
||||
// given
|
||||
const spawnError = new Error("Command not found")
|
||||
const spawnSpy = spyOn(Bun, "spawn").mockImplementation(() => {
|
||||
throw spawnError
|
||||
})
|
||||
|
||||
try {
|
||||
// when
|
||||
await expect(
|
||||
executeOnCompleteHook({
|
||||
command: "nonexistent-command",
|
||||
sessionId: "session-123",
|
||||
exitCode: 0,
|
||||
durationMs: 5000,
|
||||
messageCount: 10,
|
||||
})
|
||||
).resolves.toBeUndefined()
|
||||
|
||||
// then
|
||||
expect(consoleErrorSpy).toHaveBeenCalled()
|
||||
const errorCalls = consoleErrorSpy.mock.calls.filter((call) => {
|
||||
const firstArg = call[0]
|
||||
return typeof firstArg === "string" && (firstArg.includes("Warning") || firstArg.toLowerCase().includes("error"))
|
||||
})
|
||||
expect(errorCalls.length).toBeGreaterThan(0)
|
||||
} finally {
|
||||
spawnSpy.mockRestore()
|
||||
}
|
||||
})
|
||||
})
|
||||
42
src/cli/run/on-complete-hook.ts
Normal file
42
src/cli/run/on-complete-hook.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import pc from "picocolors"
|
||||
|
||||
export async function executeOnCompleteHook(options: {
|
||||
command: string
|
||||
sessionId: string
|
||||
exitCode: number
|
||||
durationMs: number
|
||||
messageCount: number
|
||||
}): Promise<void> {
|
||||
const { command, sessionId, exitCode, durationMs, messageCount } = options
|
||||
|
||||
const trimmedCommand = command.trim()
|
||||
if (!trimmedCommand) {
|
||||
return
|
||||
}
|
||||
|
||||
console.error(pc.dim(`Running on-complete hook: ${trimmedCommand}`))
|
||||
|
||||
try {
|
||||
const proc = Bun.spawn(["sh", "-c", trimmedCommand], {
|
||||
env: {
|
||||
...process.env,
|
||||
SESSION_ID: sessionId,
|
||||
EXIT_CODE: String(exitCode),
|
||||
DURATION_MS: String(durationMs),
|
||||
MESSAGE_COUNT: String(messageCount),
|
||||
},
|
||||
stdout: "inherit",
|
||||
stderr: "inherit",
|
||||
})
|
||||
|
||||
const hookExitCode = await proc.exited
|
||||
|
||||
if (hookExitCode !== 0) {
|
||||
console.error(
|
||||
pc.yellow(`Warning: on-complete hook exited with code ${hookExitCode}`)
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(pc.yellow(`Warning: Failed to execute on-complete hook: ${error instanceof Error ? error.message : String(error)}`))
|
||||
}
|
||||
}
|
||||
@@ -1,101 +1,37 @@
|
||||
import { createOpencode } from "@opencode-ai/sdk"
|
||||
import pc from "picocolors"
|
||||
import type { RunOptions, RunContext } from "./types"
|
||||
import { checkCompletionConditions } from "./completion"
|
||||
import { createEventState, processEvents, serializeError } from "./events"
|
||||
import type { OhMyOpenCodeConfig } from "../../config"
|
||||
import { loadPluginConfig } from "../../plugin-config"
|
||||
import { getAvailableServerPort, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
|
||||
import { createServerConnection } from "./server-connection"
|
||||
import { resolveSession } from "./session-resolver"
|
||||
import { createJsonOutputManager } from "./json-output"
|
||||
import { executeOnCompleteHook } from "./on-complete-hook"
|
||||
import { resolveRunAgent } from "./agent-resolver"
|
||||
|
||||
export { resolveRunAgent }
|
||||
|
||||
const POLL_INTERVAL_MS = 500
|
||||
const DEFAULT_TIMEOUT_MS = 0
|
||||
const SESSION_CREATE_MAX_RETRIES = 3
|
||||
const SESSION_CREATE_RETRY_DELAY_MS = 1000
|
||||
const CORE_AGENT_ORDER = ["sisyphus", "hephaestus", "prometheus", "atlas"] as const
|
||||
const DEFAULT_AGENT = "sisyphus"
|
||||
|
||||
type EnvVars = Record<string, string | undefined>
|
||||
|
||||
const normalizeAgentName = (agent?: string): string | undefined => {
|
||||
if (!agent) return undefined
|
||||
const trimmed = agent.trim()
|
||||
if (!trimmed) return undefined
|
||||
const lowered = trimmed.toLowerCase()
|
||||
const coreMatch = CORE_AGENT_ORDER.find((name) => name.toLowerCase() === lowered)
|
||||
return coreMatch ?? trimmed
|
||||
}
|
||||
|
||||
const isAgentDisabled = (agent: string, config: OhMyOpenCodeConfig): boolean => {
|
||||
const lowered = agent.toLowerCase()
|
||||
if (lowered === "sisyphus" && config.sisyphus_agent?.disabled === true) {
|
||||
return true
|
||||
}
|
||||
return (config.disabled_agents ?? []).some(
|
||||
(disabled) => disabled.toLowerCase() === lowered
|
||||
)
|
||||
}
|
||||
|
||||
const pickFallbackAgent = (config: OhMyOpenCodeConfig): string => {
|
||||
for (const agent of CORE_AGENT_ORDER) {
|
||||
if (!isAgentDisabled(agent, config)) {
|
||||
return agent
|
||||
}
|
||||
}
|
||||
return DEFAULT_AGENT
|
||||
}
|
||||
|
||||
export const resolveRunAgent = (
|
||||
options: RunOptions,
|
||||
pluginConfig: OhMyOpenCodeConfig,
|
||||
env: EnvVars = process.env
|
||||
): string => {
|
||||
const cliAgent = normalizeAgentName(options.agent)
|
||||
const envAgent = normalizeAgentName(env.OPENCODE_DEFAULT_AGENT)
|
||||
const configAgent = normalizeAgentName(pluginConfig.default_run_agent)
|
||||
const resolved = cliAgent ?? envAgent ?? configAgent ?? DEFAULT_AGENT
|
||||
const normalized = normalizeAgentName(resolved) ?? DEFAULT_AGENT
|
||||
|
||||
if (isAgentDisabled(normalized, pluginConfig)) {
|
||||
const fallback = pickFallbackAgent(pluginConfig)
|
||||
const fallbackDisabled = isAgentDisabled(fallback, pluginConfig)
|
||||
if (fallbackDisabled) {
|
||||
console.log(
|
||||
pc.yellow(
|
||||
`Requested agent "${normalized}" is disabled and no enabled core agent was found. Proceeding with "${fallback}".`
|
||||
)
|
||||
)
|
||||
return fallback
|
||||
}
|
||||
console.log(
|
||||
pc.yellow(
|
||||
`Requested agent "${normalized}" is disabled. Falling back to "${fallback}".`
|
||||
)
|
||||
)
|
||||
return fallback
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
export async function run(options: RunOptions): Promise<number> {
|
||||
// Set CLI run mode environment variable before any config loading
|
||||
// This signals to config-handler to deny Question tool (no TUI to answer)
|
||||
process.env.OPENCODE_CLI_RUN_MODE = "true"
|
||||
|
||||
const startTime = Date.now()
|
||||
const {
|
||||
message,
|
||||
directory = process.cwd(),
|
||||
timeout = DEFAULT_TIMEOUT_MS,
|
||||
} = options
|
||||
|
||||
const jsonManager = options.json ? createJsonOutputManager() : null
|
||||
if (jsonManager) jsonManager.redirectToStderr()
|
||||
|
||||
const pluginConfig = loadPluginConfig(directory, { command: "run" })
|
||||
const resolvedAgent = resolveRunAgent(options, pluginConfig)
|
||||
|
||||
console.log(pc.cyan("Starting opencode server (auto port selection enabled)..."))
|
||||
|
||||
const abortController = new AbortController()
|
||||
let timeoutId: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
// timeout=0 means no timeout (run until completion)
|
||||
if (timeout > 0) {
|
||||
timeoutId = setTimeout(() => {
|
||||
console.log(pc.yellow("\nTimeout reached. Aborting..."))
|
||||
@@ -104,29 +40,15 @@ export async function run(options: RunOptions): Promise<number> {
|
||||
}
|
||||
|
||||
try {
|
||||
const envPort = process.env.OPENCODE_SERVER_PORT
|
||||
? parseInt(process.env.OPENCODE_SERVER_PORT, 10)
|
||||
: undefined
|
||||
const serverHostname = process.env.OPENCODE_SERVER_HOSTNAME || "127.0.0.1"
|
||||
const preferredPort = envPort && !isNaN(envPort) ? envPort : DEFAULT_SERVER_PORT
|
||||
|
||||
const { port: serverPort, wasAutoSelected } = await getAvailableServerPort(preferredPort, serverHostname)
|
||||
|
||||
if (wasAutoSelected) {
|
||||
console.log(pc.yellow(`Port ${preferredPort} is busy, using port ${serverPort} instead`))
|
||||
} else {
|
||||
console.log(pc.dim(`Using port ${serverPort}`))
|
||||
}
|
||||
|
||||
const { client, server } = await createOpencode({
|
||||
const { client, cleanup: serverCleanup } = await createServerConnection({
|
||||
port: options.port,
|
||||
attach: options.attach,
|
||||
signal: abortController.signal,
|
||||
port: serverPort,
|
||||
hostname: serverHostname,
|
||||
})
|
||||
|
||||
const cleanup = () => {
|
||||
if (timeoutId) clearTimeout(timeoutId)
|
||||
server.close()
|
||||
serverCleanup()
|
||||
}
|
||||
|
||||
process.on("SIGINT", () => {
|
||||
@@ -136,61 +58,14 @@ export async function run(options: RunOptions): Promise<number> {
|
||||
})
|
||||
|
||||
try {
|
||||
// Retry session creation with exponential backoff
|
||||
// Server might not be fully ready even after "listening" message
|
||||
let sessionID: string | undefined
|
||||
let lastError: unknown
|
||||
|
||||
for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
|
||||
const sessionRes = await client.session.create({
|
||||
body: { title: "oh-my-opencode run" },
|
||||
})
|
||||
|
||||
if (sessionRes.error) {
|
||||
lastError = sessionRes.error
|
||||
console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`))
|
||||
console.error(pc.dim(` Error: ${serializeError(sessionRes.error)}`))
|
||||
|
||||
if (attempt < SESSION_CREATE_MAX_RETRIES) {
|
||||
const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
|
||||
console.log(pc.dim(` Retrying in ${delay}ms...`))
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
sessionID = sessionRes.data?.id
|
||||
if (sessionID) {
|
||||
break
|
||||
}
|
||||
|
||||
// No error but also no session ID - unexpected response
|
||||
lastError = new Error(`Unexpected response: ${JSON.stringify(sessionRes, null, 2)}`)
|
||||
console.error(pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`))
|
||||
|
||||
if (attempt < SESSION_CREATE_MAX_RETRIES) {
|
||||
const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
|
||||
console.log(pc.dim(` Retrying in ${delay}ms...`))
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
}
|
||||
}
|
||||
|
||||
if (!sessionID) {
|
||||
console.error(pc.red("Failed to create session after all retries"))
|
||||
console.error(pc.dim(`Last error: ${serializeError(lastError)}`))
|
||||
cleanup()
|
||||
return 1
|
||||
}
|
||||
const sessionID = await resolveSession({
|
||||
client,
|
||||
sessionId: options.sessionId,
|
||||
})
|
||||
|
||||
console.log(pc.dim(`Session: ${sessionID}`))
|
||||
|
||||
const ctx: RunContext = {
|
||||
client,
|
||||
sessionID,
|
||||
directory,
|
||||
abortController,
|
||||
}
|
||||
|
||||
const ctx: RunContext = { client, sessionID, directory, abortController }
|
||||
const events = await client.event.subscribe()
|
||||
const eventState = createEventState()
|
||||
const eventProcessor = processEvents(ctx, events.stream, eventState)
|
||||
@@ -206,47 +81,41 @@ export async function run(options: RunOptions): Promise<number> {
|
||||
})
|
||||
|
||||
console.log(pc.dim("Waiting for completion...\n"))
|
||||
|
||||
while (!abortController.signal.aborted) {
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
|
||||
|
||||
if (!eventState.mainSessionIdle) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if session errored - exit with failure if so
|
||||
if (eventState.mainSessionError) {
|
||||
console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
|
||||
console.error(pc.yellow("Check if todos were completed before the error."))
|
||||
cleanup()
|
||||
process.exit(1)
|
||||
}
|
||||
|
||||
// Guard against premature completion: don't check completion until the
|
||||
// session has produced meaningful work (text output, tool call, or tool result).
|
||||
// Without this, a session that goes busy->idle before the LLM responds
|
||||
// would exit immediately because 0 todos + 0 children = "complete".
|
||||
if (!eventState.hasReceivedMeaningfulWork) {
|
||||
continue
|
||||
}
|
||||
|
||||
const shouldExit = await checkCompletionConditions(ctx)
|
||||
if (shouldExit) {
|
||||
console.log(pc.green("\n\nAll tasks completed."))
|
||||
cleanup()
|
||||
process.exit(0)
|
||||
}
|
||||
}
|
||||
const exitCode = await pollForCompletion(ctx, eventState, abortController)
|
||||
|
||||
await eventProcessor.catch(() => {})
|
||||
cleanup()
|
||||
return 130
|
||||
|
||||
const durationMs = Date.now() - startTime
|
||||
|
||||
if (options.onComplete) {
|
||||
await executeOnCompleteHook({
|
||||
command: options.onComplete,
|
||||
sessionId: sessionID,
|
||||
exitCode,
|
||||
durationMs,
|
||||
messageCount: eventState.messageCount,
|
||||
})
|
||||
}
|
||||
|
||||
if (jsonManager) {
|
||||
jsonManager.emitResult({
|
||||
sessionId: sessionID,
|
||||
success: exitCode === 0,
|
||||
durationMs,
|
||||
messageCount: eventState.messageCount,
|
||||
summary: eventState.lastPartText.slice(0, 200) || "Run completed",
|
||||
})
|
||||
}
|
||||
|
||||
return exitCode
|
||||
} catch (err) {
|
||||
cleanup()
|
||||
throw err
|
||||
}
|
||||
} catch (err) {
|
||||
if (timeoutId) clearTimeout(timeoutId)
|
||||
if (jsonManager) jsonManager.restore()
|
||||
if (err instanceof Error && err.name === "AbortError") {
|
||||
return 130
|
||||
}
|
||||
@@ -254,3 +123,31 @@ export async function run(options: RunOptions): Promise<number> {
|
||||
return 1
|
||||
}
|
||||
}
|
||||
|
||||
async function pollForCompletion(
|
||||
ctx: RunContext,
|
||||
eventState: ReturnType<typeof createEventState>,
|
||||
abortController: AbortController
|
||||
): Promise<number> {
|
||||
while (!abortController.signal.aborted) {
|
||||
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
|
||||
|
||||
if (!eventState.mainSessionIdle) continue
|
||||
|
||||
if (eventState.mainSessionError) {
|
||||
console.error(pc.red(`\n\nSession ended with error: ${eventState.lastError}`))
|
||||
console.error(pc.yellow("Check if todos were completed before the error."))
|
||||
return 1
|
||||
}
|
||||
|
||||
if (!eventState.hasReceivedMeaningfulWork) continue
|
||||
|
||||
const shouldExit = await checkCompletionConditions(ctx)
|
||||
if (shouldExit) {
|
||||
console.log(pc.green("\n\nAll tasks completed."))
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
return 130
|
||||
}
|
||||
|
||||
152
src/cli/run/server-connection.test.ts
Normal file
152
src/cli/run/server-connection.test.ts
Normal file
@@ -0,0 +1,152 @@
|
||||
import { describe, it, expect, mock, beforeEach, afterEach } from "bun:test"
|
||||
|
||||
const originalConsole = globalThis.console
|
||||
|
||||
const mockServerClose = mock(() => {})
|
||||
const mockCreateOpencode = mock(() =>
|
||||
Promise.resolve({
|
||||
client: { session: {} },
|
||||
server: { url: "http://127.0.0.1:4096", close: mockServerClose },
|
||||
})
|
||||
)
|
||||
const mockCreateOpencodeClient = mock(() => ({ session: {} }))
|
||||
const mockIsPortAvailable = mock(() => Promise.resolve(true))
|
||||
const mockGetAvailableServerPort = mock(() => Promise.resolve({ port: 4096, wasAutoSelected: false }))
|
||||
const mockConsoleLog = mock(() => {})
|
||||
|
||||
mock.module("@opencode-ai/sdk", () => ({
|
||||
createOpencode: mockCreateOpencode,
|
||||
createOpencodeClient: mockCreateOpencodeClient,
|
||||
}))
|
||||
|
||||
mock.module("../../shared/port-utils", () => ({
|
||||
isPortAvailable: mockIsPortAvailable,
|
||||
getAvailableServerPort: mockGetAvailableServerPort,
|
||||
DEFAULT_SERVER_PORT: 4096,
|
||||
}))
|
||||
|
||||
const { createServerConnection } = await import("./server-connection")
|
||||
|
||||
describe("createServerConnection", () => {
|
||||
beforeEach(() => {
|
||||
mockCreateOpencode.mockClear()
|
||||
mockCreateOpencodeClient.mockClear()
|
||||
mockIsPortAvailable.mockClear()
|
||||
mockGetAvailableServerPort.mockClear()
|
||||
mockServerClose.mockClear()
|
||||
mockConsoleLog.mockClear()
|
||||
globalThis.console = { ...console, log: mockConsoleLog } as typeof console
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.console = originalConsole
|
||||
})
|
||||
|
||||
it("attach mode returns client with no-op cleanup", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
const attachUrl = "http://localhost:8080"
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ attach: attachUrl, signal })
|
||||
|
||||
// then
|
||||
expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: attachUrl })
|
||||
expect(result.client).toBeDefined()
|
||||
expect(result.cleanup).toBeDefined()
|
||||
result.cleanup()
|
||||
expect(mockServerClose).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("explicit port starts server when port is available", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
const port = 8080
|
||||
mockIsPortAvailable.mockResolvedValueOnce(true)
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ port, signal })
|
||||
|
||||
// then
|
||||
expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
|
||||
expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 8080, hostname: "127.0.0.1" })
|
||||
expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
|
||||
expect(result.client).toBeDefined()
|
||||
expect(result.cleanup).toBeDefined()
|
||||
result.cleanup()
|
||||
expect(mockServerClose).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("explicit port attaches when port is occupied", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
const port = 8080
|
||||
mockIsPortAvailable.mockResolvedValueOnce(false)
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ port, signal })
|
||||
|
||||
// then
|
||||
expect(mockIsPortAvailable).toHaveBeenCalledWith(8080, "127.0.0.1")
|
||||
expect(mockCreateOpencode).not.toHaveBeenCalled()
|
||||
expect(mockCreateOpencodeClient).toHaveBeenCalledWith({ baseUrl: "http://127.0.0.1:8080" })
|
||||
expect(result.client).toBeDefined()
|
||||
expect(result.cleanup).toBeDefined()
|
||||
result.cleanup()
|
||||
expect(mockServerClose).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("auto mode uses getAvailableServerPort", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
mockGetAvailableServerPort.mockResolvedValueOnce({ port: 4100, wasAutoSelected: true })
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ signal })
|
||||
|
||||
// then
|
||||
expect(mockGetAvailableServerPort).toHaveBeenCalledWith(4096, "127.0.0.1")
|
||||
expect(mockCreateOpencode).toHaveBeenCalledWith({ signal, port: 4100, hostname: "127.0.0.1" })
|
||||
expect(mockCreateOpencodeClient).not.toHaveBeenCalled()
|
||||
expect(result.client).toBeDefined()
|
||||
expect(result.cleanup).toBeDefined()
|
||||
result.cleanup()
|
||||
expect(mockServerClose).toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("invalid port throws error", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
|
||||
// when & then
|
||||
await expect(createServerConnection({ port: 0, signal })).rejects.toThrow("Port must be between 1 and 65535")
|
||||
await expect(createServerConnection({ port: -1, signal })).rejects.toThrow("Port must be between 1 and 65535")
|
||||
await expect(createServerConnection({ port: 99999, signal })).rejects.toThrow("Port must be between 1 and 65535")
|
||||
})
|
||||
|
||||
it("cleanup calls server.close for owned server", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
mockIsPortAvailable.mockResolvedValueOnce(true)
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ port: 8080, signal })
|
||||
result.cleanup()
|
||||
|
||||
// then
|
||||
expect(mockServerClose).toHaveBeenCalledTimes(1)
|
||||
})
|
||||
|
||||
it("cleanup is no-op for attached server", async () => {
|
||||
// given
|
||||
const signal = new AbortController().signal
|
||||
const attachUrl = "http://localhost:8080"
|
||||
|
||||
// when
|
||||
const result = await createServerConnection({ attach: attachUrl, signal })
|
||||
result.cleanup()
|
||||
|
||||
// then
|
||||
expect(mockServerClose).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
47
src/cli/run/server-connection.ts
Normal file
47
src/cli/run/server-connection.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import { createOpencode, createOpencodeClient } from "@opencode-ai/sdk"
|
||||
import pc from "picocolors"
|
||||
import type { ServerConnection } from "./types"
|
||||
import { getAvailableServerPort, isPortAvailable, DEFAULT_SERVER_PORT } from "../../shared/port-utils"
|
||||
|
||||
export async function createServerConnection(options: {
|
||||
port?: number
|
||||
attach?: string
|
||||
signal: AbortSignal
|
||||
}): Promise<ServerConnection> {
|
||||
const { port, attach, signal } = options
|
||||
|
||||
if (attach !== undefined) {
|
||||
console.log(pc.dim("Attaching to existing server at"), pc.cyan(attach))
|
||||
const client = createOpencodeClient({ baseUrl: attach })
|
||||
return { client, cleanup: () => {} }
|
||||
}
|
||||
|
||||
if (port !== undefined) {
|
||||
if (port < 1 || port > 65535) {
|
||||
throw new Error("Port must be between 1 and 65535")
|
||||
}
|
||||
|
||||
const available = await isPortAvailable(port, "127.0.0.1")
|
||||
|
||||
if (available) {
|
||||
console.log(pc.dim("Starting server on port"), pc.cyan(port.toString()))
|
||||
const { client, server } = await createOpencode({ signal, port, hostname: "127.0.0.1" })
|
||||
console.log(pc.dim("Server listening at"), pc.cyan(server.url))
|
||||
return { client, cleanup: () => server.close() }
|
||||
}
|
||||
|
||||
console.log(pc.dim("Port"), pc.cyan(port.toString()), pc.dim("is occupied, attaching to existing server"))
|
||||
const client = createOpencodeClient({ baseUrl: `http://127.0.0.1:${port}` })
|
||||
return { client, cleanup: () => {} }
|
||||
}
|
||||
|
||||
const { port: selectedPort, wasAutoSelected } = await getAvailableServerPort(DEFAULT_SERVER_PORT, "127.0.0.1")
|
||||
if (wasAutoSelected) {
|
||||
console.log(pc.dim("Auto-selected port"), pc.cyan(selectedPort.toString()))
|
||||
} else {
|
||||
console.log(pc.dim("Starting server on port"), pc.cyan(selectedPort.toString()))
|
||||
}
|
||||
const { client, server } = await createOpencode({ signal, port: selectedPort, hostname: "127.0.0.1" })
|
||||
console.log(pc.dim("Server listening at"), pc.cyan(server.url))
|
||||
return { client, cleanup: () => server.close() }
|
||||
}
|
||||
140
src/cli/run/session-resolver.test.ts
Normal file
140
src/cli/run/session-resolver.test.ts
Normal file
@@ -0,0 +1,140 @@
|
||||
import { describe, it, expect, beforeEach, mock, spyOn } from "bun:test"
|
||||
import { resolveSession } from "./session-resolver"
|
||||
import type { OpencodeClient } from "./types"
|
||||
|
||||
const createMockClient = (overrides: {
|
||||
getResult?: { error?: unknown; data?: { id: string } }
|
||||
createResults?: Array<{ error?: unknown; data?: { id: string } }>
|
||||
} = {}): OpencodeClient => {
|
||||
const { getResult, createResults = [] } = overrides
|
||||
let createCallIndex = 0
|
||||
return {
|
||||
session: {
|
||||
get: mock((opts: { path: { id: string } }) =>
|
||||
Promise.resolve(getResult ?? { data: { id: opts.path.id } })
|
||||
),
|
||||
create: mock(() => {
|
||||
const result =
|
||||
createResults[createCallIndex] ?? { data: { id: "new-session-id" } }
|
||||
createCallIndex++
|
||||
return Promise.resolve(result)
|
||||
}),
|
||||
},
|
||||
} as unknown as OpencodeClient
|
||||
}
|
||||
|
||||
describe("resolveSession", () => {
|
||||
beforeEach(() => {
|
||||
spyOn(console, "log").mockImplementation(() => {})
|
||||
spyOn(console, "error").mockImplementation(() => {})
|
||||
})
|
||||
|
||||
it("returns provided session ID when session exists", async () => {
|
||||
// given
|
||||
const sessionId = "existing-session-id"
|
||||
const mockClient = createMockClient({
|
||||
getResult: { data: { id: sessionId } },
|
||||
})
|
||||
|
||||
// when
|
||||
const result = await resolveSession({ client: mockClient, sessionId })
|
||||
|
||||
// then
|
||||
expect(result).toBe(sessionId)
|
||||
expect(mockClient.session.get).toHaveBeenCalledWith({
|
||||
path: { id: sessionId },
|
||||
})
|
||||
expect(mockClient.session.create).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("throws error when provided session ID not found", async () => {
|
||||
// given
|
||||
const sessionId = "non-existent-session-id"
|
||||
const mockClient = createMockClient({
|
||||
getResult: { error: { message: "Session not found" } },
|
||||
})
|
||||
|
||||
// when
|
||||
const result = resolveSession({ client: mockClient, sessionId })
|
||||
|
||||
// then
|
||||
await expect(result).rejects.toThrow(`Session not found: ${sessionId}`)
|
||||
expect(mockClient.session.get).toHaveBeenCalledWith({
|
||||
path: { id: sessionId },
|
||||
})
|
||||
expect(mockClient.session.create).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("creates new session when no session ID provided", async () => {
|
||||
// given
|
||||
const mockClient = createMockClient({
|
||||
createResults: [{ data: { id: "new-session-id" } }],
|
||||
})
|
||||
|
||||
// when
|
||||
const result = await resolveSession({ client: mockClient })
|
||||
|
||||
// then
|
||||
expect(result).toBe("new-session-id")
|
||||
expect(mockClient.session.create).toHaveBeenCalledWith({
|
||||
body: { title: "oh-my-opencode run" },
|
||||
})
|
||||
expect(mockClient.session.get).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
it("retries session creation on failure", async () => {
|
||||
// given
|
||||
const mockClient = createMockClient({
|
||||
createResults: [
|
||||
{ error: { message: "Network error" } },
|
||||
{ data: { id: "retried-session-id" } },
|
||||
],
|
||||
})
|
||||
|
||||
// when
|
||||
const result = await resolveSession({ client: mockClient })
|
||||
|
||||
// then
|
||||
expect(result).toBe("retried-session-id")
|
||||
expect(mockClient.session.create).toHaveBeenCalledTimes(2)
|
||||
expect(mockClient.session.create).toHaveBeenCalledWith({
|
||||
body: { title: "oh-my-opencode run" },
|
||||
})
|
||||
})
|
||||
|
||||
it("throws after all retries exhausted", async () => {
|
||||
// given
|
||||
const mockClient = createMockClient({
|
||||
createResults: [
|
||||
{ error: { message: "Error 1" } },
|
||||
{ error: { message: "Error 2" } },
|
||||
{ error: { message: "Error 3" } },
|
||||
],
|
||||
})
|
||||
|
||||
// when
|
||||
const result = resolveSession({ client: mockClient })
|
||||
|
||||
// then
|
||||
await expect(result).rejects.toThrow("Failed to create session after all retries")
|
||||
expect(mockClient.session.create).toHaveBeenCalledTimes(3)
|
||||
})
|
||||
|
||||
it("session creation returns no ID", async () => {
|
||||
// given
|
||||
const mockClient = createMockClient({
|
||||
createResults: [
|
||||
{ data: undefined },
|
||||
{ data: undefined },
|
||||
{ data: undefined },
|
||||
],
|
||||
})
|
||||
|
||||
// when
|
||||
const result = resolveSession({ client: mockClient })
|
||||
|
||||
// then
|
||||
await expect(result).rejects.toThrow("Failed to create session after all retries")
|
||||
expect(mockClient.session.create).toHaveBeenCalledTimes(3)
|
||||
})
|
||||
})
|
||||
64
src/cli/run/session-resolver.ts
Normal file
64
src/cli/run/session-resolver.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import pc from "picocolors"
|
||||
import type { OpencodeClient } from "./types"
|
||||
import { serializeError } from "./events"
|
||||
|
||||
const SESSION_CREATE_MAX_RETRIES = 3
|
||||
const SESSION_CREATE_RETRY_DELAY_MS = 1000
|
||||
|
||||
export async function resolveSession(options: {
|
||||
client: OpencodeClient
|
||||
sessionId?: string
|
||||
}): Promise<string> {
|
||||
const { client, sessionId } = options
|
||||
|
||||
if (sessionId) {
|
||||
const res = await client.session.get({ path: { id: sessionId } })
|
||||
if (res.error || !res.data) {
|
||||
throw new Error(`Session not found: ${sessionId}`)
|
||||
}
|
||||
return sessionId
|
||||
}
|
||||
|
||||
let lastError: unknown
|
||||
for (let attempt = 1; attempt <= SESSION_CREATE_MAX_RETRIES; attempt++) {
|
||||
const res = await client.session.create({
|
||||
body: { title: "oh-my-opencode run" },
|
||||
})
|
||||
|
||||
if (res.error) {
|
||||
lastError = res.error
|
||||
console.error(
|
||||
pc.yellow(`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES} failed:`)
|
||||
)
|
||||
console.error(pc.dim(` Error: ${serializeError(res.error)}`))
|
||||
|
||||
if (attempt < SESSION_CREATE_MAX_RETRIES) {
|
||||
const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
|
||||
console.log(pc.dim(` Retrying in ${delay}ms...`))
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (res.data?.id) {
|
||||
return res.data.id
|
||||
}
|
||||
|
||||
lastError = new Error(
|
||||
`Unexpected response: ${JSON.stringify(res, null, 2)}`
|
||||
)
|
||||
console.error(
|
||||
pc.yellow(
|
||||
`Session create attempt ${attempt}/${SESSION_CREATE_MAX_RETRIES}: No session ID returned`
|
||||
)
|
||||
)
|
||||
|
||||
if (attempt < SESSION_CREATE_MAX_RETRIES) {
|
||||
const delay = SESSION_CREATE_RETRY_DELAY_MS * attempt
|
||||
console.log(pc.dim(` Retrying in ${delay}ms...`))
|
||||
await new Promise((resolve) => setTimeout(resolve, delay))
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error("Failed to create session after all retries")
|
||||
}
|
||||
@@ -1,10 +1,29 @@
|
||||
import type { OpencodeClient } from "@opencode-ai/sdk"
|
||||
export type { OpencodeClient }
|
||||
|
||||
export interface RunOptions {
|
||||
message: string
|
||||
agent?: string
|
||||
directory?: string
|
||||
timeout?: number
|
||||
port?: number
|
||||
attach?: string
|
||||
onComplete?: string
|
||||
json?: boolean
|
||||
sessionId?: string
|
||||
}
|
||||
|
||||
export interface ServerConnection {
|
||||
client: OpencodeClient
|
||||
cleanup: () => void
|
||||
}
|
||||
|
||||
export interface RunResult {
|
||||
sessionId: string
|
||||
success: boolean
|
||||
durationMs: number
|
||||
messageCount: number
|
||||
summary: string
|
||||
}
|
||||
|
||||
export interface RunContext {
|
||||
|
||||
@@ -5,6 +5,8 @@ import {
|
||||
BrowserAutomationProviderSchema,
|
||||
BuiltinCategoryNameSchema,
|
||||
CategoryConfigSchema,
|
||||
ExperimentalConfigSchema,
|
||||
GitMasterConfigSchema,
|
||||
OhMyOpenCodeConfigSchema,
|
||||
} from "./schema"
|
||||
|
||||
@@ -606,3 +608,128 @@ describe("OhMyOpenCodeConfigSchema - browser_automation_engine", () => {
|
||||
expect(result.data?.browser_automation_engine).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe("ExperimentalConfigSchema feature flags", () => {
|
||||
test("accepts plugin_load_timeout_ms as number", () => {
|
||||
//#given
|
||||
const config = { plugin_load_timeout_ms: 5000 }
|
||||
|
||||
//#when
|
||||
const result = ExperimentalConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.plugin_load_timeout_ms).toBe(5000)
|
||||
}
|
||||
})
|
||||
|
||||
test("rejects plugin_load_timeout_ms below 1000", () => {
|
||||
//#given
|
||||
const config = { plugin_load_timeout_ms: 500 }
|
||||
|
||||
//#when
|
||||
const result = ExperimentalConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
|
||||
test("accepts safe_hook_creation as boolean", () => {
|
||||
//#given
|
||||
const config = { safe_hook_creation: false }
|
||||
|
||||
//#when
|
||||
const result = ExperimentalConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.safe_hook_creation).toBe(false)
|
||||
}
|
||||
})
|
||||
|
||||
test("both fields are optional", () => {
|
||||
//#given
|
||||
const config = {}
|
||||
|
||||
//#when
|
||||
const result = ExperimentalConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.plugin_load_timeout_ms).toBeUndefined()
|
||||
expect(result.data.safe_hook_creation).toBeUndefined()
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
describe("GitMasterConfigSchema", () => {
|
||||
test("accepts boolean true for commit_footer", () => {
|
||||
//#given
|
||||
const config = { commit_footer: true }
|
||||
|
||||
//#when
|
||||
const result = GitMasterConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.commit_footer).toBe(true)
|
||||
}
|
||||
})
|
||||
|
||||
test("accepts boolean false for commit_footer", () => {
|
||||
//#given
|
||||
const config = { commit_footer: false }
|
||||
|
||||
//#when
|
||||
const result = GitMasterConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.commit_footer).toBe(false)
|
||||
}
|
||||
})
|
||||
|
||||
test("accepts string value for commit_footer", () => {
|
||||
//#given
|
||||
const config = { commit_footer: "Custom footer text" }
|
||||
|
||||
//#when
|
||||
const result = GitMasterConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.commit_footer).toBe("Custom footer text")
|
||||
}
|
||||
})
|
||||
|
||||
test("defaults commit_footer to true when not provided", () => {
|
||||
//#given
|
||||
const config = {}
|
||||
|
||||
//#when
|
||||
const result = GitMasterConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(true)
|
||||
if (result.success) {
|
||||
expect(result.data.commit_footer).toBe(true)
|
||||
}
|
||||
})
|
||||
|
||||
test("rejects number for commit_footer", () => {
|
||||
//#given
|
||||
const config = { commit_footer: 123 }
|
||||
|
||||
//#when
|
||||
const result = GitMasterConfigSchema.safeParse(config)
|
||||
|
||||
//#then
|
||||
expect(result.success).toBe(false)
|
||||
})
|
||||
})
|
||||
|
||||
@@ -12,6 +12,7 @@ const AgentPermissionSchema = z.object({
|
||||
edit: PermissionValue.optional(),
|
||||
bash: BashPermission.optional(),
|
||||
webfetch: PermissionValue.optional(),
|
||||
task: PermissionValue.optional(),
|
||||
doom_loop: PermissionValue.optional(),
|
||||
external_directory: PermissionValue.optional(),
|
||||
})
|
||||
@@ -86,6 +87,7 @@ export const HookNameSchema = z.enum([
|
||||
"category-skill-reminder",
|
||||
|
||||
"compaction-context-injector",
|
||||
"compaction-todo-preserver",
|
||||
"claude-code-hooks",
|
||||
"auto-slash-command",
|
||||
"edit-error-recovery",
|
||||
@@ -100,6 +102,7 @@ export const HookNameSchema = z.enum([
|
||||
"stop-continuation-guard",
|
||||
"tasks-todowrite-disabler",
|
||||
"write-existing-file-guard",
|
||||
"anthropic-effort",
|
||||
])
|
||||
|
||||
export const BuiltinCommandNameSchema = z.enum([
|
||||
@@ -183,7 +186,7 @@ export const SisyphusAgentConfigSchema = z.object({
|
||||
})
|
||||
|
||||
export const CategoryConfigSchema = z.object({
|
||||
/** Human-readable description of the category's purpose. Shown in delegate_task prompt. */
|
||||
/** Human-readable description of the category's purpose. Shown in task prompt. */
|
||||
description: z.string().optional(),
|
||||
model: z.string().optional(),
|
||||
variant: z.string().optional(),
|
||||
@@ -266,6 +269,10 @@ export const ExperimentalConfigSchema = z.object({
|
||||
dynamic_context_pruning: DynamicContextPruningConfigSchema.optional(),
|
||||
/** Enable experimental task system for Todowrite disabler hook */
|
||||
task_system: z.boolean().optional(),
|
||||
/** Timeout in ms for loadAllPluginComponents during config handler init (default: 10000, min: 1000) */
|
||||
plugin_load_timeout_ms: z.number().min(1000).optional(),
|
||||
/** Wrap hook creation in try/catch to prevent one failing hook from crashing the plugin (default: true at call site) */
|
||||
safe_hook_creation: z.boolean().optional(),
|
||||
})
|
||||
|
||||
export const SkillSourceSchema = z.union([
|
||||
@@ -333,10 +340,10 @@ export const BabysittingConfigSchema = z.object({
|
||||
})
|
||||
|
||||
export const GitMasterConfigSchema = z.object({
|
||||
/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true) */
|
||||
commit_footer: z.boolean().default(true),
|
||||
/** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
|
||||
include_co_authored_by: z.boolean().default(true),
|
||||
/** Add "Ultraworked with Sisyphus" footer to commit messages (default: true). Can be boolean or custom string. */
|
||||
commit_footer: z.union([z.boolean(), z.string()]).default(true),
|
||||
/** Add "Co-authored-by: Sisyphus" trailer to commit messages (default: true) */
|
||||
include_co_authored_by: z.boolean().default(true),
|
||||
})
|
||||
|
||||
export const BrowserAutomationProviderSchema = z.enum(["playwright", "agent-browser", "dev-browser"])
|
||||
@@ -420,6 +427,8 @@ export const OhMyOpenCodeConfigSchema = z.object({
|
||||
websearch: WebsearchConfigSchema.optional(),
|
||||
tmux: TmuxConfigSchema.optional(),
|
||||
sisyphus: SisyphusConfigSchema.optional(),
|
||||
/** Migration history to prevent re-applying migrations (e.g., model version upgrades) */
|
||||
_migrations: z.array(z.string()).optional(),
|
||||
})
|
||||
|
||||
export type OhMyOpenCodeConfig = z.infer<typeof OhMyOpenCodeConfigSchema>
|
||||
|
||||
@@ -56,7 +56,7 @@ features/
|
||||
|
||||
## ANTI-PATTERNS
|
||||
|
||||
- **Sequential delegation**: Use `delegate_task` parallel
|
||||
- **Sequential delegation**: Use `task` parallel
|
||||
- **Trust self-reports**: ALWAYS verify
|
||||
- **Main thread blocks**: No heavy I/O in loader init
|
||||
- **Direct state mutation**: Use managers for boulder/session state
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
import { describe, test, expect, beforeEach } from "bun:test"
|
||||
import { afterEach } from "bun:test"
|
||||
declare const require: (name: string) => any
|
||||
const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
|
||||
import { tmpdir } from "node:os"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import type { BackgroundTask, ResumeInput } from "./types"
|
||||
import { MIN_IDLE_TIME_MS } from "./constants"
|
||||
import { BackgroundManager } from "./manager"
|
||||
import { ConcurrencyManager } from "./concurrency"
|
||||
|
||||
@@ -170,6 +171,7 @@ function createBackgroundManager(): BackgroundManager {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
@@ -879,12 +881,14 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
|
||||
test("should skip notification when parent session is aborted", async () => {
|
||||
//#given
|
||||
let promptCalled = false
|
||||
const promptMock = async () => {
|
||||
promptCalled = true
|
||||
return {}
|
||||
}
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => {
|
||||
promptCalled = true
|
||||
return {}
|
||||
},
|
||||
prompt: promptMock,
|
||||
promptAsync: promptMock,
|
||||
abort: async () => ({}),
|
||||
messages: async () => {
|
||||
const error = new Error("User aborted")
|
||||
@@ -921,14 +925,16 @@ describe("BackgroundManager.notifyParentSession - aborted parent", () => {
|
||||
test("should swallow aborted error from prompt", async () => {
|
||||
//#given
|
||||
let promptCalled = false
|
||||
const promptMock = async () => {
|
||||
promptCalled = true
|
||||
const error = new Error("User aborted")
|
||||
error.name = "MessageAbortedError"
|
||||
throw error
|
||||
}
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => {
|
||||
promptCalled = true
|
||||
const error = new Error("User aborted")
|
||||
error.name = "MessageAbortedError"
|
||||
throw error
|
||||
},
|
||||
prompt: promptMock,
|
||||
promptAsync: promptMock,
|
||||
abort: async () => ({}),
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
@@ -1053,19 +1059,20 @@ describe("BackgroundManager.tryCompleteTask", () => {
|
||||
expect(concurrencyManager.getCount(concurrencyKey)).toBe(0)
|
||||
})
|
||||
|
||||
test("should abort session on completion", async () => {
|
||||
// #given
|
||||
const abortedSessionIDs: string[] = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async (args: { path: { id: string } }) => {
|
||||
abortedSessionIDs.push(args.path.id)
|
||||
return {}
|
||||
},
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
test("should abort session on completion", async () => {
|
||||
// #given
|
||||
const abortedSessionIDs: string[] = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async (args: { path: { id: string } }) => {
|
||||
abortedSessionIDs.push(args.path.id)
|
||||
return {}
|
||||
},
|
||||
messages: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
manager.shutdown()
|
||||
manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
@@ -1088,6 +1095,34 @@ describe("BackgroundManager.tryCompleteTask", () => {
|
||||
// #then
|
||||
expect(abortedSessionIDs).toEqual(["session-1"])
|
||||
})
|
||||
|
||||
test("should clean pendingByParent even when notifyParentSession throws", async () => {
|
||||
// given
|
||||
;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {
|
||||
throw new Error("notify failed")
|
||||
}
|
||||
|
||||
const task: BackgroundTask = {
|
||||
id: "task-pending-cleanup",
|
||||
sessionID: "session-pending-cleanup",
|
||||
parentSessionID: "parent-pending-cleanup",
|
||||
parentMessageID: "msg-1",
|
||||
description: "pending cleanup task",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(),
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
|
||||
|
||||
// when
|
||||
await tryCompleteTaskForTest(manager, task)
|
||||
|
||||
// then
|
||||
expect(task.status).toBe("completed")
|
||||
expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.trackTask", () => {
|
||||
@@ -1110,7 +1145,7 @@ describe("BackgroundManager.trackTask", () => {
|
||||
sessionID: "session-1",
|
||||
parentSessionID: "parent-session",
|
||||
description: "external task",
|
||||
agent: "delegate_task",
|
||||
agent: "task",
|
||||
concurrencyKey: "external-key",
|
||||
}
|
||||
|
||||
@@ -1145,7 +1180,7 @@ describe("BackgroundManager.resume concurrency key", () => {
|
||||
sessionID: "session-1",
|
||||
parentSessionID: "parent-session",
|
||||
description: "external task",
|
||||
agent: "delegate_task",
|
||||
agent: "task",
|
||||
concurrencyKey: "external-key",
|
||||
})
|
||||
|
||||
@@ -1167,24 +1202,26 @@ describe("BackgroundManager.resume concurrency key", () => {
|
||||
})
|
||||
|
||||
describe("BackgroundManager.resume model persistence", () => {
|
||||
let manager: BackgroundManager
|
||||
let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>
|
||||
let manager: BackgroundManager
|
||||
let promptCalls: Array<{ path: { id: string }; body: Record<string, unknown> }>
|
||||
|
||||
beforeEach(() => {
|
||||
// given
|
||||
promptCalls = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async (args: { path: { id: string }; body: Record<string, unknown> }) => {
|
||||
promptCalls.push(args)
|
||||
return {}
|
||||
},
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
})
|
||||
beforeEach(() => {
|
||||
// given
|
||||
promptCalls = []
|
||||
const promptMock = async (args: { path: { id: string }; body: Record<string, unknown> }) => {
|
||||
promptCalls.push(args)
|
||||
return {}
|
||||
}
|
||||
const client = {
|
||||
session: {
|
||||
prompt: promptMock,
|
||||
promptAsync: promptMock,
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
})
|
||||
|
||||
afterEach(() => {
|
||||
manager.shutdown()
|
||||
@@ -1282,19 +1319,20 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
|
||||
let manager: BackgroundManager
|
||||
let mockClient: ReturnType<typeof createMockClient>
|
||||
|
||||
function createMockClient() {
|
||||
return {
|
||||
session: {
|
||||
create: async () => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
|
||||
get: async () => ({ data: { directory: "/test/dir" } }),
|
||||
prompt: async () => ({}),
|
||||
messages: async () => ({ data: [] }),
|
||||
todo: async () => ({ data: [] }),
|
||||
status: async () => ({ data: {} }),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
}
|
||||
function createMockClient() {
|
||||
return {
|
||||
session: {
|
||||
create: async () => ({ data: { id: `ses_${crypto.randomUUID()}` } }),
|
||||
get: async () => ({ data: { directory: "/test/dir" } }),
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
messages: async () => ({ data: [] }),
|
||||
todo: async () => ({ data: [] }),
|
||||
status: async () => ({ data: {} }),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
// given
|
||||
@@ -1842,13 +1880,14 @@ describe("BackgroundManager - Non-blocking Queue Integration", () => {
|
||||
})
|
||||
|
||||
describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
test("should NOT interrupt task running less than 30 seconds (min runtime guard)", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
|
||||
|
||||
const task: BackgroundTask = {
|
||||
@@ -1874,12 +1913,13 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
test("should NOT interrupt task with recent lastUpdate", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
test("should NOT interrupt task with recent lastUpdate", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
|
||||
|
||||
@@ -1906,11 +1946,12 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
expect(task.status).toBe("running")
|
||||
})
|
||||
|
||||
test("should interrupt task with stale lastUpdate (> 3min)", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
test("should interrupt task with stale lastUpdate (> 3min)", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
|
||||
@@ -1942,10 +1983,11 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
expect(task.completedAt).toBeDefined()
|
||||
})
|
||||
|
||||
test("should respect custom staleTimeoutMs config", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
test("should respect custom staleTimeoutMs config", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
@@ -1976,13 +2018,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
expect(task.error).toContain("Stale timeout")
|
||||
})
|
||||
|
||||
test("should release concurrency before abort", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
test("should release concurrency before abort", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
@@ -2011,13 +2054,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
expect(task.status).toBe("cancelled")
|
||||
})
|
||||
|
||||
test("should handle multiple stale tasks in same poll cycle", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
test("should handle multiple stale tasks in same poll cycle", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput, { staleTimeoutMs: 180_000 })
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
@@ -2062,13 +2106,14 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
expect(task2.status).toBe("cancelled")
|
||||
})
|
||||
|
||||
test("should use default timeout when config not provided", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
test("should use default timeout when config not provided", async () => {
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
@@ -2097,18 +2142,19 @@ describe("BackgroundManager.checkAndInterruptStaleTasks", () => {
|
||||
})
|
||||
|
||||
describe("BackgroundManager.shutdown session abort", () => {
|
||||
test("should call session.abort for all running tasks during shutdown", () => {
|
||||
// given
|
||||
const abortedSessionIDs: string[] = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async (args: { path: { id: string } }) => {
|
||||
abortedSessionIDs.push(args.path.id)
|
||||
return {}
|
||||
},
|
||||
},
|
||||
}
|
||||
test("should call session.abort for all running tasks during shutdown", () => {
|
||||
// given
|
||||
const abortedSessionIDs: string[] = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async (args: { path: { id: string } }) => {
|
||||
abortedSessionIDs.push(args.path.id)
|
||||
return {}
|
||||
},
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
|
||||
const task1: BackgroundTask = {
|
||||
@@ -2146,18 +2192,19 @@ describe("BackgroundManager.shutdown session abort", () => {
|
||||
expect(abortedSessionIDs).toHaveLength(2)
|
||||
})
|
||||
|
||||
test("should not call session.abort for completed or cancelled tasks", () => {
|
||||
// given
|
||||
const abortedSessionIDs: string[] = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async (args: { path: { id: string } }) => {
|
||||
abortedSessionIDs.push(args.path.id)
|
||||
return {}
|
||||
},
|
||||
},
|
||||
}
|
||||
test("should not call session.abort for completed or cancelled tasks", () => {
|
||||
// given
|
||||
const abortedSessionIDs: string[] = []
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async (args: { path: { id: string } }) => {
|
||||
abortedSessionIDs.push(args.path.id)
|
||||
return {}
|
||||
},
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
|
||||
const completedTask: BackgroundTask = {
|
||||
@@ -2206,15 +2253,16 @@ describe("BackgroundManager.shutdown session abort", () => {
|
||||
expect(abortedSessionIDs).toHaveLength(0)
|
||||
})
|
||||
|
||||
test("should call onShutdown callback during shutdown", () => {
|
||||
// given
|
||||
let shutdownCalled = false
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
test("should call onShutdown callback during shutdown", () => {
|
||||
// given
|
||||
let shutdownCalled = false
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager(
|
||||
{ client, directory: tmpdir() } as unknown as PluginInput,
|
||||
undefined,
|
||||
@@ -2232,14 +2280,15 @@ describe("BackgroundManager.shutdown session abort", () => {
|
||||
expect(shutdownCalled).toBe(true)
|
||||
})
|
||||
|
||||
test("should not throw when onShutdown callback throws", () => {
|
||||
// given
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
test("should not throw when onShutdown callback throws", () => {
|
||||
// given
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
},
|
||||
}
|
||||
const manager = new BackgroundManager(
|
||||
{ client, directory: tmpdir() } as unknown as PluginInput,
|
||||
undefined,
|
||||
@@ -2255,6 +2304,69 @@ describe("BackgroundManager.shutdown session abort", () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.handleEvent - session.deleted cascade", () => {
|
||||
test("should cancel descendant tasks when parent session is deleted", () => {
|
||||
// given
|
||||
const manager = createBackgroundManager()
|
||||
const parentSessionID = "session-parent"
|
||||
const childTask = createMockTask({
|
||||
id: "task-child",
|
||||
sessionID: "session-child",
|
||||
parentSessionID,
|
||||
status: "running",
|
||||
})
|
||||
const siblingTask = createMockTask({
|
||||
id: "task-sibling",
|
||||
sessionID: "session-sibling",
|
||||
parentSessionID,
|
||||
status: "running",
|
||||
})
|
||||
const grandchildTask = createMockTask({
|
||||
id: "task-grandchild",
|
||||
sessionID: "session-grandchild",
|
||||
parentSessionID: "session-child",
|
||||
status: "pending",
|
||||
startedAt: undefined,
|
||||
queuedAt: new Date(),
|
||||
})
|
||||
const unrelatedTask = createMockTask({
|
||||
id: "task-unrelated",
|
||||
sessionID: "session-unrelated",
|
||||
parentSessionID: "other-parent",
|
||||
status: "running",
|
||||
})
|
||||
|
||||
const taskMap = getTaskMap(manager)
|
||||
taskMap.set(childTask.id, childTask)
|
||||
taskMap.set(siblingTask.id, siblingTask)
|
||||
taskMap.set(grandchildTask.id, grandchildTask)
|
||||
taskMap.set(unrelatedTask.id, unrelatedTask)
|
||||
|
||||
const pendingByParent = getPendingByParent(manager)
|
||||
pendingByParent.set(parentSessionID, new Set([childTask.id, siblingTask.id]))
|
||||
pendingByParent.set("session-child", new Set([grandchildTask.id]))
|
||||
|
||||
// when
|
||||
manager.handleEvent({
|
||||
type: "session.deleted",
|
||||
properties: { info: { id: parentSessionID } },
|
||||
})
|
||||
|
||||
// then
|
||||
expect(taskMap.has(childTask.id)).toBe(false)
|
||||
expect(taskMap.has(siblingTask.id)).toBe(false)
|
||||
expect(taskMap.has(grandchildTask.id)).toBe(false)
|
||||
expect(taskMap.has(unrelatedTask.id)).toBe(true)
|
||||
expect(childTask.status).toBe("cancelled")
|
||||
expect(siblingTask.status).toBe("cancelled")
|
||||
expect(grandchildTask.status).toBe("cancelled")
|
||||
expect(pendingByParent.get(parentSessionID)).toBeUndefined()
|
||||
expect(pendingByParent.get("session-child")).toBeUndefined()
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
|
||||
function getCompletionTimers(manager: BackgroundManager): Map<string, ReturnType<typeof setTimeout>> {
|
||||
return (manager as unknown as { completionTimers: Map<string, ReturnType<typeof setTimeout>> }).completionTimers
|
||||
@@ -2408,3 +2520,182 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
|
||||
expect(completionTimers.size).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe("BackgroundManager.handleEvent - early session.idle deferral", () => {
|
||||
test("should defer and retry when session.idle fires before MIN_IDLE_TIME_MS", async () => {
|
||||
//#given - a running task started less than MIN_IDLE_TIME_MS ago
|
||||
const sessionID = "session-early-idle"
|
||||
const messagesCalls: string[] = []
|
||||
const realDateNow = Date.now
|
||||
const baseNow = realDateNow()
|
||||
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
messages: async (args: { path: { id: string } }) => {
|
||||
messagesCalls.push(args.path.id)
|
||||
return {
|
||||
data: [
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "ok" }],
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
todo: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
const remainingMs = 1200
|
||||
const task: BackgroundTask = {
|
||||
id: "task-early-idle",
|
||||
sessionID,
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "msg-1",
|
||||
description: "early idle task",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(baseNow),
|
||||
}
|
||||
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
//#when - session.idle fires
|
||||
try {
|
||||
Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
|
||||
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
|
||||
|
||||
// Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
|
||||
Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
|
||||
|
||||
//#then - idle should be deferred (not dropped), and task should eventually complete
|
||||
expect(task.status).toBe("running")
|
||||
await new Promise((resolve) => setTimeout(resolve, 220))
|
||||
expect(task.status).toBe("completed")
|
||||
expect(messagesCalls).toEqual([sessionID])
|
||||
} finally {
|
||||
Date.now = realDateNow
|
||||
manager.shutdown()
|
||||
}
|
||||
})
|
||||
|
||||
test("should not defer when session.idle fires after MIN_IDLE_TIME_MS", async () => {
|
||||
//#given - a running task started more than MIN_IDLE_TIME_MS ago
|
||||
const sessionID = "session-late-idle"
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
messages: async () => ({
|
||||
data: [
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "ok" }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
todo: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
const task: BackgroundTask = {
|
||||
id: "task-late-idle",
|
||||
sessionID,
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "msg-1",
|
||||
description: "late idle task",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 10)),
|
||||
}
|
||||
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
//#when
|
||||
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
|
||||
|
||||
//#then - should be processed immediately
|
||||
await new Promise((resolve) => setTimeout(resolve, 10))
|
||||
expect(task.status).toBe("completed")
|
||||
|
||||
manager.shutdown()
|
||||
})
|
||||
|
||||
test("should not process deferred idle if task already completed by other means", async () => {
|
||||
//#given - a running task
|
||||
const sessionID = "session-deferred-noop"
|
||||
let messagesCallCount = 0
|
||||
const realDateNow = Date.now
|
||||
const baseNow = realDateNow()
|
||||
|
||||
const client = {
|
||||
session: {
|
||||
prompt: async () => ({}),
|
||||
promptAsync: async () => ({}),
|
||||
abort: async () => ({}),
|
||||
messages: async () => {
|
||||
messagesCallCount += 1
|
||||
return {
|
||||
data: [
|
||||
{
|
||||
info: { role: "assistant" },
|
||||
parts: [{ type: "text", text: "ok" }],
|
||||
},
|
||||
],
|
||||
}
|
||||
},
|
||||
todo: async () => ({ data: [] }),
|
||||
},
|
||||
}
|
||||
|
||||
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
|
||||
stubNotifyParentSession(manager)
|
||||
|
||||
const remainingMs = 120
|
||||
const task: BackgroundTask = {
|
||||
id: "task-deferred-noop",
|
||||
sessionID,
|
||||
parentSessionID: "parent-session",
|
||||
parentMessageID: "msg-1",
|
||||
description: "deferred noop task",
|
||||
prompt: "test",
|
||||
agent: "explore",
|
||||
status: "running",
|
||||
startedAt: new Date(baseNow),
|
||||
}
|
||||
getTaskMap(manager).set(task.id, task)
|
||||
|
||||
//#when - session.idle fires early, then task completes via another path before defer timer
|
||||
try {
|
||||
Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
|
||||
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
|
||||
expect(messagesCallCount).toBe(0)
|
||||
|
||||
await tryCompleteTaskForTest(manager, task)
|
||||
expect(task.status).toBe("completed")
|
||||
|
||||
// Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
|
||||
Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
|
||||
|
||||
//#then - deferred callback should be a no-op
|
||||
await new Promise((resolve) => setTimeout(resolve, remainingMs + 80))
|
||||
expect(task.status).toBe("completed")
|
||||
expect(messagesCallCount).toBe(0)
|
||||
} finally {
|
||||
Date.now = realDateNow
|
||||
manager.shutdown()
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
@@ -88,6 +88,7 @@ export class BackgroundManager {
|
||||
private queuesByKey: Map<string, QueueItem[]> = new Map()
|
||||
private processingKeys: Set<string> = new Set()
|
||||
private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
|
||||
private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
|
||||
|
||||
constructor(
|
||||
ctx: PluginInput,
|
||||
@@ -309,7 +310,7 @@ export class BackgroundManager {
|
||||
promptLength: input.prompt.length,
|
||||
})
|
||||
|
||||
// Use prompt() instead of promptAsync() to properly initialize agent loop (fire-and-forget)
|
||||
// Fire-and-forget prompt via promptAsync (no response body needed)
|
||||
// Include model if caller provided one (e.g., from Sisyphus category configs)
|
||||
// IMPORTANT: variant must be a top-level field in the body, NOT nested inside model
|
||||
// OpenCode's PromptInput schema expects: { model: { providerID, modelID }, variant: "max" }
|
||||
@@ -328,7 +329,6 @@ export class BackgroundManager {
|
||||
tools: {
|
||||
...getAgentToolRestrictions(input.agent),
|
||||
task: false,
|
||||
delegate_task: false,
|
||||
call_omo_agent: true,
|
||||
question: false,
|
||||
},
|
||||
@@ -357,6 +357,7 @@ export class BackgroundManager {
|
||||
}).catch(() => {})
|
||||
|
||||
this.markForNotification(existingTask)
|
||||
this.cleanupPendingByParent(existingTask)
|
||||
this.notifyParentSession(existingTask).catch(err => {
|
||||
log("[background-agent] Failed to notify on error:", err)
|
||||
})
|
||||
@@ -410,7 +411,7 @@ export class BackgroundManager {
|
||||
}
|
||||
|
||||
/**
|
||||
* Track a task created elsewhere (e.g., from delegate_task) for notification tracking.
|
||||
* Track a task created elsewhere (e.g., from task) for notification tracking.
|
||||
* This allows tasks created by other tools to receive the same toast/prompt notifications.
|
||||
*/
|
||||
async trackTask(input: {
|
||||
@@ -458,7 +459,7 @@ export class BackgroundManager {
|
||||
return existingTask
|
||||
}
|
||||
|
||||
const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "delegate_task"
|
||||
const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "task"
|
||||
|
||||
// Acquire concurrency slot if a key is provided
|
||||
if (input.concurrencyKey) {
|
||||
@@ -472,7 +473,7 @@ export class BackgroundManager {
|
||||
parentMessageID: "",
|
||||
description: input.description,
|
||||
prompt: "",
|
||||
agent: input.agent || "delegate_task",
|
||||
agent: input.agent || "task",
|
||||
status: "running",
|
||||
startedAt: new Date(),
|
||||
progress: {
|
||||
@@ -570,7 +571,7 @@ export class BackgroundManager {
|
||||
promptLength: input.prompt.length,
|
||||
})
|
||||
|
||||
// Use prompt() instead of promptAsync() to properly initialize agent loop
|
||||
// Fire-and-forget prompt via promptAsync (no response body needed)
|
||||
// Include model if task has one (preserved from original launch with category config)
|
||||
// variant must be top-level in body, not nested inside model (OpenCode PromptInput schema)
|
||||
const resumeModel = existingTask.model
|
||||
@@ -578,7 +579,7 @@ export class BackgroundManager {
|
||||
: undefined
|
||||
const resumeVariant = existingTask.model?.variant
|
||||
|
||||
this.client.session.prompt({
|
||||
this.client.session.promptAsync({
|
||||
path: { id: existingTask.sessionID },
|
||||
body: {
|
||||
agent: existingTask.agent,
|
||||
@@ -587,7 +588,6 @@ export class BackgroundManager {
|
||||
tools: {
|
||||
...getAgentToolRestrictions(existingTask.agent),
|
||||
task: false,
|
||||
delegate_task: false,
|
||||
call_omo_agent: true,
|
||||
question: false,
|
||||
},
|
||||
@@ -614,6 +614,7 @@ export class BackgroundManager {
|
||||
}
|
||||
|
||||
this.markForNotification(existingTask)
|
||||
this.cleanupPendingByParent(existingTask)
|
||||
this.notifyParentSession(existingTask).catch(err => {
|
||||
log("[background-agent] Failed to notify on resume error:", err)
|
||||
})
|
||||
@@ -651,6 +652,13 @@ export class BackgroundManager {
|
||||
const task = this.findBySession(sessionID)
|
||||
if (!task) return
|
||||
|
||||
// Clear any pending idle deferral timer since the task is still active
|
||||
const existingTimer = this.idleDeferralTimers.get(task.id)
|
||||
if (existingTimer) {
|
||||
clearTimeout(existingTimer)
|
||||
this.idleDeferralTimers.delete(task.id)
|
||||
}
|
||||
|
||||
if (partInfo?.type === "tool" || partInfo?.tool) {
|
||||
if (!task.progress) {
|
||||
task.progress = {
|
||||
@@ -677,7 +685,17 @@ export class BackgroundManager {
|
||||
// Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
|
||||
const elapsedMs = Date.now() - startedAt.getTime()
|
||||
if (elapsedMs < MIN_IDLE_TIME_MS) {
|
||||
log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
|
||||
const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
|
||||
if (!this.idleDeferralTimers.has(task.id)) {
|
||||
log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
|
||||
const timer = setTimeout(() => {
|
||||
this.idleDeferralTimers.delete(task.id)
|
||||
this.handleEvent({ type: "session.idle", properties: { sessionID } })
|
||||
}, remainingMs)
|
||||
this.idleDeferralTimers.set(task.id, timer)
|
||||
} else {
|
||||
log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -718,28 +736,47 @@ export class BackgroundManager {
|
||||
if (!info || typeof info.id !== "string") return
|
||||
const sessionID = info.id
|
||||
|
||||
const task = this.findBySession(sessionID)
|
||||
if (!task) return
|
||||
|
||||
if (task.status === "running") {
|
||||
task.status = "cancelled"
|
||||
task.completedAt = new Date()
|
||||
task.error = "Session deleted"
|
||||
const tasksToCancel = new Map<string, BackgroundTask>()
|
||||
const directTask = this.findBySession(sessionID)
|
||||
if (directTask) {
|
||||
tasksToCancel.set(directTask.id, directTask)
|
||||
}
|
||||
for (const descendant of this.getAllDescendantTasks(sessionID)) {
|
||||
tasksToCancel.set(descendant.id, descendant)
|
||||
}
|
||||
|
||||
if (task.concurrencyKey) {
|
||||
this.concurrencyManager.release(task.concurrencyKey)
|
||||
task.concurrencyKey = undefined
|
||||
}
|
||||
const existingTimer = this.completionTimers.get(task.id)
|
||||
if (existingTimer) {
|
||||
clearTimeout(existingTimer)
|
||||
this.completionTimers.delete(task.id)
|
||||
if (tasksToCancel.size === 0) return
|
||||
|
||||
for (const task of tasksToCancel.values()) {
|
||||
if (task.status === "running" || task.status === "pending") {
|
||||
void this.cancelTask(task.id, {
|
||||
source: "session.deleted",
|
||||
reason: "Session deleted",
|
||||
skipNotification: true,
|
||||
}).catch(err => {
|
||||
log("[background-agent] Failed to cancel task on session.deleted:", { taskId: task.id, error: err })
|
||||
})
|
||||
}
|
||||
|
||||
const existingTimer = this.completionTimers.get(task.id)
|
||||
if (existingTimer) {
|
||||
clearTimeout(existingTimer)
|
||||
this.completionTimers.delete(task.id)
|
||||
}
|
||||
|
||||
const idleTimer = this.idleDeferralTimers.get(task.id)
|
||||
if (idleTimer) {
|
||||
clearTimeout(idleTimer)
|
||||
this.idleDeferralTimers.delete(task.id)
|
||||
}
|
||||
|
||||
this.cleanupPendingByParent(task)
|
||||
this.tasks.delete(task.id)
|
||||
this.clearNotificationsForTask(task.id)
|
||||
if (task.sessionID) {
|
||||
subagentSessions.delete(task.sessionID)
|
||||
}
|
||||
}
|
||||
this.cleanupPendingByParent(task)
|
||||
this.tasks.delete(task.id)
|
||||
this.clearNotificationsForTask(task.id)
|
||||
subagentSessions.delete(sessionID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -890,6 +927,12 @@ export class BackgroundManager {
|
||||
this.completionTimers.delete(task.id)
|
||||
}
|
||||
|
||||
const idleTimer = this.idleDeferralTimers.get(task.id)
|
||||
if (idleTimer) {
|
||||
clearTimeout(idleTimer)
|
||||
this.idleDeferralTimers.delete(task.id)
|
||||
}
|
||||
|
||||
this.cleanupPendingByParent(task)
|
||||
|
||||
if (abortSession && task.sessionID) {
|
||||
@@ -1025,6 +1068,15 @@ export class BackgroundManager {
|
||||
|
||||
this.markForNotification(task)
|
||||
|
||||
// Ensure pending tracking is cleaned up even if notification fails
|
||||
this.cleanupPendingByParent(task)
|
||||
|
||||
const idleTimer = this.idleDeferralTimers.get(task.id)
|
||||
if (idleTimer) {
|
||||
clearTimeout(idleTimer)
|
||||
this.idleDeferralTimers.delete(task.id)
|
||||
}
|
||||
|
||||
if (task.sessionID) {
|
||||
this.client.session.abort({
|
||||
path: { id: task.sessionID },
|
||||
@@ -1146,7 +1198,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
})
|
||||
|
||||
try {
|
||||
await this.client.session.prompt({
|
||||
await this.client.session.promptAsync({
|
||||
path: { id: task.parentSessionID },
|
||||
body: {
|
||||
noReply: !allComplete,
|
||||
@@ -1511,6 +1563,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
}
|
||||
this.completionTimers.clear()
|
||||
|
||||
for (const timer of this.idleDeferralTimers.values()) {
|
||||
clearTimeout(timer)
|
||||
}
|
||||
this.idleDeferralTimers.clear()
|
||||
|
||||
this.concurrencyManager.clear()
|
||||
this.tasks.clear()
|
||||
this.notifications.clear()
|
||||
|
||||
@@ -240,7 +240,7 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
|
||||
})
|
||||
|
||||
try {
|
||||
await client.session.prompt({
|
||||
await client.session.promptAsync({
|
||||
path: { id: task.parentSessionID },
|
||||
body: {
|
||||
noReply: !allComplete,
|
||||
|
||||
@@ -146,7 +146,6 @@ export async function startTask(
|
||||
tools: {
|
||||
...getAgentToolRestrictions(input.agent),
|
||||
task: false,
|
||||
delegate_task: false,
|
||||
call_omo_agent: true,
|
||||
question: false,
|
||||
},
|
||||
@@ -222,7 +221,7 @@ export async function resumeTask(
|
||||
: undefined
|
||||
const resumeVariant = task.model?.variant
|
||||
|
||||
client.session.prompt({
|
||||
client.session.promptAsync({
|
||||
path: { id: task.sessionID },
|
||||
body: {
|
||||
agent: task.agent,
|
||||
@@ -231,7 +230,6 @@ export async function resumeTask(
|
||||
tools: {
|
||||
...getAgentToolRestrictions(task.agent),
|
||||
task: false,
|
||||
delegate_task: false,
|
||||
call_omo_agent: true,
|
||||
question: false,
|
||||
},
|
||||
|
||||
@@ -45,12 +45,12 @@ Don't wait—these run async while main session works.
|
||||
|
||||
\`\`\`
|
||||
// Fire all at once, collect results later
|
||||
delegate_task(agent="explore", prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
|
||||
delegate_task(agent="explore", prompt="Entry points: FIND main files → REPORT non-standard organization")
|
||||
delegate_task(agent="explore", prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
|
||||
delegate_task(agent="explore", prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
|
||||
delegate_task(agent="explore", prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
|
||||
delegate_task(agent="explore", prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
|
||||
task(subagent_type="explore", load_skills=[], description="Explore project structure", run_in_background=true, prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
|
||||
task(subagent_type="explore", load_skills=[], description="Find entry points", run_in_background=true, prompt="Entry points: FIND main files → REPORT non-standard organization")
|
||||
task(subagent_type="explore", load_skills=[], description="Find conventions", run_in_background=true, prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
|
||||
task(subagent_type="explore", load_skills=[], description="Find anti-patterns", run_in_background=true, prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
|
||||
task(subagent_type="explore", load_skills=[], description="Explore build/CI", run_in_background=true, prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
|
||||
task(subagent_type="explore", load_skills=[], description="Find test patterns", run_in_background=true, prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
|
||||
\`\`\`
|
||||
|
||||
<dynamic-agents>
|
||||
@@ -76,9 +76,9 @@ max_depth=$(find . -type d -not -path '*/node_modules/*' -not -path '*/.git/*' |
|
||||
Example spawning:
|
||||
\`\`\`
|
||||
// 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents
|
||||
delegate_task(agent="explore", prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
|
||||
delegate_task(agent="explore", prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
|
||||
delegate_task(agent="explore", prompt="Cross-cutting concerns: FIND shared utilities across directories")
|
||||
task(subagent_type="explore", load_skills=[], description="Analyze large files", run_in_background=true, prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
|
||||
task(subagent_type="explore", load_skills=[], description="Explore deep modules", run_in_background=true, prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
|
||||
task(subagent_type="explore", load_skills=[], description="Find shared utilities", run_in_background=true, prompt="Cross-cutting concerns: FIND shared utilities across directories")
|
||||
// ... more based on calculation
|
||||
\`\`\`
|
||||
</dynamic-agents>
|
||||
@@ -185,6 +185,11 @@ AGENTS_LOCATIONS = [
|
||||
|
||||
**Mark "generate" as in_progress.**
|
||||
|
||||
<critical>
|
||||
**File Writing Rule**: If AGENTS.md already exists at the target path → use \`Edit\` tool. If it does NOT exist → use \`Write\` tool.
|
||||
NEVER use Write to overwrite an existing file. ALWAYS check existence first via \`Read\` or discovery results.
|
||||
</critical>
|
||||
|
||||
### Root AGENTS.md (Full Treatment)
|
||||
|
||||
\`\`\`markdown
|
||||
@@ -240,7 +245,7 @@ Launch writing tasks for each location:
|
||||
|
||||
\`\`\`
|
||||
for loc in AGENTS_LOCATIONS (except root):
|
||||
delegate_task(category="writing", load_skills=[], run_in_background=false, prompt=\\\`
|
||||
task(category="writing", load_skills=[], run_in_background=false, description="Generate AGENTS.md", prompt=\\\`
|
||||
Generate AGENTS.md for: \${loc.path}
|
||||
- Reason: \${loc.reason}
|
||||
- 30-80 lines max
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
name: git-master
|
||||
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
|
||||
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
|
||||
---
|
||||
|
||||
# Git Master Agent
|
||||
|
||||
@@ -3,7 +3,7 @@ import type { BuiltinSkill } from "../types"
|
||||
export const gitMasterSkill: BuiltinSkill = {
|
||||
name: "git-master",
|
||||
description:
|
||||
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
|
||||
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
|
||||
template: `# Git Master Agent
|
||||
|
||||
You are a Git expert combining three specializations:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "bun:test"
|
||||
import { describe, it, expect, beforeEach, afterEach, mock } from "bun:test"
|
||||
import { mkdirSync, writeFileSync, rmSync } from "fs"
|
||||
import { join } from "path"
|
||||
import { tmpdir } from "os"
|
||||
@@ -126,37 +126,123 @@ describe("getSystemMcpServerNames", () => {
|
||||
}
|
||||
})
|
||||
|
||||
it("merges server names from multiple .mcp.json files", async () => {
|
||||
// given
|
||||
mkdirSync(join(TEST_DIR, ".claude"), { recursive: true })
|
||||
|
||||
const projectMcp = {
|
||||
mcpServers: {
|
||||
playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
|
||||
},
|
||||
}
|
||||
const localMcp = {
|
||||
mcpServers: {
|
||||
memory: { command: "npx", args: ["-y", "@anthropic-ai/mcp-server-memory"] },
|
||||
},
|
||||
}
|
||||
|
||||
writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(projectMcp))
|
||||
writeFileSync(join(TEST_DIR, ".claude", ".mcp.json"), JSON.stringify(localMcp))
|
||||
it("merges server names from multiple .mcp.json files", async () => {
|
||||
// given
|
||||
mkdirSync(join(TEST_DIR, ".claude"), { recursive: true })
|
||||
|
||||
const projectMcp = {
|
||||
mcpServers: {
|
||||
playwright: { command: "npx", args: ["@playwright/mcp@latest"] },
|
||||
},
|
||||
}
|
||||
const localMcp = {
|
||||
mcpServers: {
|
||||
memory: { command: "npx", args: ["-y", "@anthropic-ai/mcp-server-memory"] },
|
||||
},
|
||||
}
|
||||
|
||||
writeFileSync(join(TEST_DIR, ".mcp.json"), JSON.stringify(projectMcp))
|
||||
writeFileSync(join(TEST_DIR, ".claude", ".mcp.json"), JSON.stringify(localMcp))
|
||||
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
// when
|
||||
const { getSystemMcpServerNames } = await import("./loader")
|
||||
const names = getSystemMcpServerNames()
|
||||
try {
|
||||
// when
|
||||
const { getSystemMcpServerNames } = await import("./loader")
|
||||
const names = getSystemMcpServerNames()
|
||||
|
||||
// then
|
||||
expect(names.has("playwright")).toBe(true)
|
||||
expect(names.has("memory")).toBe(true)
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
// then
|
||||
expect(names.has("playwright")).toBe(true)
|
||||
expect(names.has("memory")).toBe(true)
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
|
||||
it("reads user-level MCP config from ~/.claude.json", async () => {
|
||||
// given
|
||||
const userConfigPath = join(TEST_DIR, ".claude.json")
|
||||
const userMcpConfig = {
|
||||
mcpServers: {
|
||||
"user-server": {
|
||||
command: "npx",
|
||||
args: ["user-mcp-server"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
mock.module("os", () => ({
|
||||
homedir: () => TEST_DIR,
|
||||
tmpdir,
|
||||
}))
|
||||
|
||||
writeFileSync(userConfigPath, JSON.stringify(userMcpConfig))
|
||||
|
||||
const { getSystemMcpServerNames } = await import("./loader")
|
||||
const names = getSystemMcpServerNames()
|
||||
|
||||
expect(names.has("user-server")).toBe(true)
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
rmSync(userConfigPath, { force: true })
|
||||
}
|
||||
})
|
||||
|
||||
it("reads both ~/.claude.json and ~/.claude/.mcp.json for user scope", async () => {
|
||||
// given: simulate both user-level config files
|
||||
const userClaudeJson = join(TEST_DIR, ".claude.json")
|
||||
const claudeDir = join(TEST_DIR, ".claude")
|
||||
const claudeDirMcpJson = join(claudeDir, ".mcp.json")
|
||||
|
||||
mkdirSync(claudeDir, { recursive: true })
|
||||
|
||||
// ~/.claude.json has server-a
|
||||
writeFileSync(userClaudeJson, JSON.stringify({
|
||||
mcpServers: {
|
||||
"server-from-claude-json": {
|
||||
command: "npx",
|
||||
args: ["server-a"],
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
// ~/.claude/.mcp.json has server-b (CLI-managed)
|
||||
writeFileSync(claudeDirMcpJson, JSON.stringify({
|
||||
mcpServers: {
|
||||
"server-from-mcp-json": {
|
||||
command: "npx",
|
||||
args: ["server-b"],
|
||||
},
|
||||
},
|
||||
}))
|
||||
|
||||
const originalCwd = process.cwd()
|
||||
process.chdir(TEST_DIR)
|
||||
|
||||
try {
|
||||
mock.module("os", () => ({
|
||||
homedir: () => TEST_DIR,
|
||||
tmpdir,
|
||||
}))
|
||||
|
||||
// Also mock getClaudeConfigDir to point to our test .claude dir
|
||||
mock.module("../../shared", () => ({
|
||||
getClaudeConfigDir: () => claudeDir,
|
||||
}))
|
||||
|
||||
const { getSystemMcpServerNames } = await import("./loader")
|
||||
const names = getSystemMcpServerNames()
|
||||
|
||||
// Both sources should be merged
|
||||
expect(names.has("server-from-claude-json")).toBe(true)
|
||||
expect(names.has("server-from-mcp-json")).toBe(true)
|
||||
} finally {
|
||||
process.chdir(originalCwd)
|
||||
}
|
||||
})
|
||||
})
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { existsSync, readFileSync } from "fs"
|
||||
import { join } from "path"
|
||||
import { homedir } from "os"
|
||||
import { getClaudeConfigDir } from "../../shared"
|
||||
import type {
|
||||
ClaudeCodeMcpConfig,
|
||||
@@ -20,6 +21,7 @@ function getMcpConfigPaths(): McpConfigPath[] {
|
||||
const cwd = process.cwd()
|
||||
|
||||
return [
|
||||
{ path: join(homedir(), ".claude.json"), scope: "user" },
|
||||
{ path: join(claudeConfigDir, ".mcp.json"), scope: "user" },
|
||||
{ path: join(cwd, ".mcp.json"), scope: "project" },
|
||||
{ path: join(cwd, ".claude", ".mcp.json"), scope: "local" },
|
||||
|
||||
@@ -314,6 +314,44 @@ describe("resolveMultipleSkillsAsync", () => {
|
||||
expect(gitMasterContent).toContain("Co-authored-by: Sisyphus")
|
||||
})
|
||||
|
||||
it("should inject custom string footer when commit_footer is a string", async () => {
|
||||
// given: git-master skill with custom string footer
|
||||
const skillNames = ["git-master"]
|
||||
const customFooter = "Custom footer from my team"
|
||||
const options = {
|
||||
gitMasterConfig: {
|
||||
commit_footer: customFooter,
|
||||
include_co_authored_by: false,
|
||||
},
|
||||
}
|
||||
|
||||
// when: resolving with custom footer config
|
||||
const result = await resolveMultipleSkillsAsync(skillNames, options)
|
||||
|
||||
// then: custom footer is injected instead of default
|
||||
const gitMasterContent = result.resolved.get("git-master")
|
||||
expect(gitMasterContent).toContain(customFooter)
|
||||
expect(gitMasterContent).not.toContain("Ultraworked with [Sisyphus]")
|
||||
})
|
||||
|
||||
it("should use default Sisyphus footer when commit_footer is boolean true", async () => {
|
||||
// given: git-master skill with boolean true footer
|
||||
const skillNames = ["git-master"]
|
||||
const options = {
|
||||
gitMasterConfig: {
|
||||
commit_footer: true,
|
||||
include_co_authored_by: false,
|
||||
},
|
||||
}
|
||||
|
||||
// when: resolving with boolean true footer config
|
||||
const result = await resolveMultipleSkillsAsync(skillNames, options)
|
||||
|
||||
// then: default Sisyphus footer is injected
|
||||
const gitMasterContent = result.resolved.get("git-master")
|
||||
expect(gitMasterContent).toContain("Ultraworked with [Sisyphus]")
|
||||
})
|
||||
|
||||
it("should handle empty array", async () => {
|
||||
// given: empty skill names
|
||||
const skillNames: string[] = []
|
||||
@@ -389,3 +427,33 @@ describe("resolveMultipleSkills with browserProvider", () => {
|
||||
expect(result.notFound).toContain("agent-browser")
|
||||
})
|
||||
})
|
||||
|
||||
describe("resolveMultipleSkillsAsync with browserProvider filtering", () => {
|
||||
it("should exclude discovered agent-browser when browserProvider is playwright", async () => {
|
||||
// given: playwright is the selected browserProvider (default)
|
||||
const skillNames = ["playwright", "git-master"]
|
||||
const options = { browserProvider: "playwright" as const }
|
||||
|
||||
// when: resolving multiple skills
|
||||
const result = await resolveMultipleSkillsAsync(skillNames, options)
|
||||
|
||||
// then: playwright resolved, agent-browser would be excluded if discovered
|
||||
expect(result.resolved.has("playwright")).toBe(true)
|
||||
expect(result.resolved.has("git-master")).toBe(true)
|
||||
expect(result.notFound).not.toContain("playwright")
|
||||
})
|
||||
|
||||
it("should exclude discovered playwright when browserProvider is agent-browser", async () => {
|
||||
// given: agent-browser is the selected browserProvider
|
||||
const skillNames = ["agent-browser", "git-master"]
|
||||
const options = { browserProvider: "agent-browser" as const }
|
||||
|
||||
// when: resolving multiple skills
|
||||
const result = await resolveMultipleSkillsAsync(skillNames, options)
|
||||
|
||||
// then: agent-browser resolved, playwright would be excluded if discovered
|
||||
expect(result.resolved.has("agent-browser")).toBe(true)
|
||||
expect(result.resolved.has("git-master")).toBe(true)
|
||||
expect(result.notFound).not.toContain("agent-browser")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -55,10 +55,23 @@ async function getAllSkills(options?: SkillResolutionOptions): Promise<LoadedSki
|
||||
mcpConfig: skill.mcpConfig,
|
||||
}))
|
||||
|
||||
const discoveredNames = new Set(discoveredSkills.map((s) => s.name))
|
||||
// Provider-gated skill names that should be filtered based on browserProvider
|
||||
const providerGatedSkillNames = new Set(["agent-browser", "playwright"])
|
||||
const browserProvider = options?.browserProvider ?? "playwright"
|
||||
|
||||
// Filter discovered skills to exclude provider-gated names that don't match the selected provider
|
||||
const filteredDiscoveredSkills = discoveredSkills.filter((skill) => {
|
||||
if (!providerGatedSkillNames.has(skill.name)) {
|
||||
return true
|
||||
}
|
||||
// For provider-gated skills, only include if it matches the selected provider
|
||||
return skill.name === browserProvider
|
||||
})
|
||||
|
||||
const discoveredNames = new Set(filteredDiscoveredSkills.map((s) => s.name))
|
||||
const uniqueBuiltins = builtinSkillsAsLoaded.filter((s) => !discoveredNames.has(s.name))
|
||||
|
||||
let allSkills = [...discoveredSkills, ...uniqueBuiltins]
|
||||
let allSkills = [...filteredDiscoveredSkills, ...uniqueBuiltins]
|
||||
|
||||
// Filter discovered skills by disabledSkills (builtin skills are already filtered by createBuiltinSkills)
|
||||
if (hasDisabledSkills) {
|
||||
@@ -97,9 +110,10 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
|
||||
sections.push(``)
|
||||
|
||||
if (commitFooter) {
|
||||
const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
|
||||
sections.push(`1. **Footer in commit body:**`)
|
||||
sections.push("```")
|
||||
sections.push(`Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)`)
|
||||
sections.push(footerText)
|
||||
sections.push("```")
|
||||
sections.push(``)
|
||||
}
|
||||
@@ -113,14 +127,16 @@ export function injectGitMasterConfig(template: string, config?: GitMasterConfig
|
||||
}
|
||||
|
||||
if (commitFooter && includeCoAuthoredBy) {
|
||||
const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
|
||||
sections.push(`**Example (both enabled):**`)
|
||||
sections.push("```bash")
|
||||
sections.push(`git commit -m "{Commit Message}" -m "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)" -m "Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>"`)
|
||||
sections.push(`git commit -m "{Commit Message}" -m "${footerText}" -m "Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>"`)
|
||||
sections.push("```")
|
||||
} else if (commitFooter) {
|
||||
const footerText = typeof commitFooter === "string" ? commitFooter : "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"
|
||||
sections.push(`**Example:**`)
|
||||
sections.push("```bash")
|
||||
sections.push(`git commit -m "{Commit Message}" -m "Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)"`)
|
||||
sections.push(`git commit -m "{Commit Message}" -m "${footerText}"`)
|
||||
sections.push("```")
|
||||
} else if (includeCoAuthoredBy) {
|
||||
sections.push(`**Example:**`)
|
||||
|
||||
111
src/features/tool-metadata-store/index.test.ts
Normal file
111
src/features/tool-metadata-store/index.test.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
import { describe, test, expect, beforeEach } from "bun:test"
|
||||
import {
|
||||
storeToolMetadata,
|
||||
consumeToolMetadata,
|
||||
getPendingStoreSize,
|
||||
clearPendingStore,
|
||||
} from "./index"
|
||||
|
||||
describe("tool-metadata-store", () => {
|
||||
beforeEach(() => {
|
||||
clearPendingStore()
|
||||
})
|
||||
|
||||
describe("storeToolMetadata", () => {
|
||||
test("#given metadata with title and metadata, #when stored, #then store size increases", () => {
|
||||
//#given
|
||||
const sessionID = "ses_abc123"
|
||||
const callID = "call_001"
|
||||
const data = {
|
||||
title: "Test Task",
|
||||
metadata: { sessionId: "ses_child", agent: "oracle" },
|
||||
}
|
||||
|
||||
//#when
|
||||
storeToolMetadata(sessionID, callID, data)
|
||||
|
||||
//#then
|
||||
expect(getPendingStoreSize()).toBe(1)
|
||||
})
|
||||
})
|
||||
|
||||
describe("consumeToolMetadata", () => {
|
||||
test("#given stored metadata, #when consumed, #then returns the stored data", () => {
|
||||
//#given
|
||||
const sessionID = "ses_abc123"
|
||||
const callID = "call_001"
|
||||
const data = {
|
||||
title: "My Task",
|
||||
metadata: { sessionId: "ses_sub", run_in_background: true },
|
||||
}
|
||||
storeToolMetadata(sessionID, callID, data)
|
||||
|
||||
//#when
|
||||
const result = consumeToolMetadata(sessionID, callID)
|
||||
|
||||
//#then
|
||||
expect(result).toEqual(data)
|
||||
})
|
||||
|
||||
test("#given stored metadata, #when consumed twice, #then second call returns undefined", () => {
|
||||
//#given
|
||||
const sessionID = "ses_abc123"
|
||||
const callID = "call_001"
|
||||
storeToolMetadata(sessionID, callID, { title: "Task" })
|
||||
|
||||
//#when
|
||||
consumeToolMetadata(sessionID, callID)
|
||||
const second = consumeToolMetadata(sessionID, callID)
|
||||
|
||||
//#then
|
||||
expect(second).toBeUndefined()
|
||||
expect(getPendingStoreSize()).toBe(0)
|
||||
})
|
||||
|
||||
test("#given no stored metadata, #when consumed, #then returns undefined", () => {
|
||||
//#given
|
||||
const sessionID = "ses_nonexistent"
|
||||
const callID = "call_999"
|
||||
|
||||
//#when
|
||||
const result = consumeToolMetadata(sessionID, callID)
|
||||
|
||||
//#then
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe("isolation", () => {
|
||||
test("#given multiple entries, #when consuming one, #then others remain", () => {
|
||||
//#given
|
||||
storeToolMetadata("ses_1", "call_a", { title: "Task A" })
|
||||
storeToolMetadata("ses_1", "call_b", { title: "Task B" })
|
||||
storeToolMetadata("ses_2", "call_a", { title: "Task C" })
|
||||
|
||||
//#when
|
||||
const resultA = consumeToolMetadata("ses_1", "call_a")
|
||||
|
||||
//#then
|
||||
expect(resultA?.title).toBe("Task A")
|
||||
expect(getPendingStoreSize()).toBe(2)
|
||||
expect(consumeToolMetadata("ses_1", "call_b")?.title).toBe("Task B")
|
||||
expect(consumeToolMetadata("ses_2", "call_a")?.title).toBe("Task C")
|
||||
expect(getPendingStoreSize()).toBe(0)
|
||||
})
|
||||
})
|
||||
|
||||
describe("overwrite", () => {
|
||||
test("#given existing entry, #when stored again with same key, #then overwrites", () => {
|
||||
//#given
|
||||
storeToolMetadata("ses_1", "call_a", { title: "Old" })
|
||||
|
||||
//#when
|
||||
storeToolMetadata("ses_1", "call_a", { title: "New", metadata: { updated: true } })
|
||||
|
||||
//#then
|
||||
const result = consumeToolMetadata("ses_1", "call_a")
|
||||
expect(result?.title).toBe("New")
|
||||
expect(result?.metadata).toEqual({ updated: true })
|
||||
})
|
||||
})
|
||||
})
|
||||
84
src/features/tool-metadata-store/index.ts
Normal file
84
src/features/tool-metadata-store/index.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
/**
|
||||
* Pending tool metadata store.
|
||||
*
|
||||
* OpenCode's `fromPlugin()` wrapper always replaces the metadata returned by
|
||||
* plugin tools with `{ truncated, outputPath }`, discarding any sessionId,
|
||||
* title, or custom metadata set during `execute()`.
|
||||
*
|
||||
* This store captures metadata written via `ctx.metadata()` inside execute(),
|
||||
* then the `tool.execute.after` hook consumes it and merges it back into the
|
||||
* result *before* the processor writes the final part to the session store.
|
||||
*
|
||||
* Flow:
|
||||
* execute() → storeToolMetadata(sessionID, callID, data)
|
||||
* fromPlugin() → overwrites metadata with { truncated }
|
||||
* tool.execute.after → consumeToolMetadata(sessionID, callID) → merges back
|
||||
* processor → Session.updatePart(status:"completed", metadata: result.metadata)
|
||||
*/
|
||||
|
||||
export interface PendingToolMetadata {
|
||||
title?: string
|
||||
metadata?: Record<string, unknown>
|
||||
}
|
||||
|
||||
const pendingStore = new Map<string, PendingToolMetadata & { storedAt: number }>()
|
||||
|
||||
const STALE_TIMEOUT_MS = 15 * 60 * 1000
|
||||
|
||||
function makeKey(sessionID: string, callID: string): string {
|
||||
return `${sessionID}:${callID}`
|
||||
}
|
||||
|
||||
function cleanupStaleEntries(): void {
|
||||
const now = Date.now()
|
||||
for (const [key, entry] of pendingStore) {
|
||||
if (now - entry.storedAt > STALE_TIMEOUT_MS) {
|
||||
pendingStore.delete(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store metadata to be restored after fromPlugin() overwrites it.
|
||||
* Called from tool execute() functions alongside ctx.metadata().
|
||||
*/
|
||||
export function storeToolMetadata(
|
||||
sessionID: string,
|
||||
callID: string,
|
||||
data: PendingToolMetadata,
|
||||
): void {
|
||||
cleanupStaleEntries()
|
||||
pendingStore.set(makeKey(sessionID, callID), { ...data, storedAt: Date.now() })
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume stored metadata (one-time read, removes from store).
|
||||
* Called from tool.execute.after hook.
|
||||
*/
|
||||
export function consumeToolMetadata(
|
||||
sessionID: string,
|
||||
callID: string,
|
||||
): PendingToolMetadata | undefined {
|
||||
const key = makeKey(sessionID, callID)
|
||||
const stored = pendingStore.get(key)
|
||||
if (stored) {
|
||||
pendingStore.delete(key)
|
||||
const { storedAt: _, ...data } = stored
|
||||
return data
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current store size (for testing/debugging).
|
||||
*/
|
||||
export function getPendingStoreSize(): number {
|
||||
return pendingStore.size
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all pending metadata (for testing).
|
||||
*/
|
||||
export function clearPendingStore(): void {
|
||||
pendingStore.clear()
|
||||
}
|
||||
@@ -24,7 +24,7 @@ export const TARGET_TOOLS = new Set([
|
||||
export const AGENT_TOOLS = new Set([
|
||||
"task",
|
||||
"call_omo_agent",
|
||||
"delegate_task",
|
||||
"task",
|
||||
]);
|
||||
|
||||
export const REMINDER_MESSAGE = `
|
||||
@@ -32,13 +32,13 @@ export const REMINDER_MESSAGE = `
|
||||
|
||||
You called a search/fetch tool directly without leveraging specialized agents.
|
||||
|
||||
RECOMMENDED: Use delegate_task with explore/librarian agents for better results:
|
||||
RECOMMENDED: Use task with explore/librarian agents for better results:
|
||||
|
||||
\`\`\`
|
||||
// Parallel exploration - fire multiple agents simultaneously
|
||||
delegate_task(agent="explore", prompt="Find all files matching pattern X")
|
||||
delegate_task(agent="explore", prompt="Search for implementation of Y")
|
||||
delegate_task(agent="librarian", prompt="Lookup documentation for Z")
|
||||
task(agent="explore", prompt="Find all files matching pattern X")
|
||||
task(agent="explore", prompt="Search for implementation of Y")
|
||||
task(agent="librarian", prompt="Lookup documentation for Z")
|
||||
|
||||
// Then continue your work while they run in background
|
||||
// System will notify you when each completes
|
||||
@@ -50,5 +50,5 @@ WHY:
|
||||
- Specialized agents have domain expertise
|
||||
- Reduces context window usage in main session
|
||||
|
||||
ALWAYS prefer: Multiple parallel delegate_task calls > Direct tool calls
|
||||
ALWAYS prefer: Multiple parallel task calls > Direct tool calls
|
||||
`;
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
import type { ParsedTokenLimitError } from "./types"
|
||||
import type { ExperimentalConfig } from "../../config"
|
||||
import type { DeduplicationConfig } from "./pruning-deduplication"
|
||||
import type { PruningState } from "./pruning-types"
|
||||
import { executeDeduplication } from "./pruning-deduplication"
|
||||
import { truncateToolOutputsByCallId } from "./pruning-tool-output-truncation"
|
||||
import { log } from "../../shared/logger"
|
||||
|
||||
function createPruningState(): PruningState {
|
||||
return {
|
||||
toolIdsToPrune: new Set<string>(),
|
||||
currentTurn: 0,
|
||||
fileOperations: new Map(),
|
||||
toolSignatures: new Map(),
|
||||
erroredTools: new Map(),
|
||||
}
|
||||
}
|
||||
|
||||
function isPromptTooLongError(parsed: ParsedTokenLimitError): boolean {
|
||||
return !parsed.errorType.toLowerCase().includes("non-empty content")
|
||||
}
|
||||
|
||||
function getDeduplicationPlan(
|
||||
experimental?: ExperimentalConfig,
|
||||
): { config: DeduplicationConfig; protectedTools: Set<string> } | null {
|
||||
const pruningConfig = experimental?.dynamic_context_pruning
|
||||
if (!pruningConfig?.enabled) return null
|
||||
|
||||
const deduplicationEnabled = pruningConfig.strategies?.deduplication?.enabled
|
||||
if (deduplicationEnabled === false) return null
|
||||
|
||||
const protectedTools = new Set(pruningConfig.protected_tools ?? [])
|
||||
return {
|
||||
config: {
|
||||
enabled: true,
|
||||
protectedTools: pruningConfig.protected_tools ?? [],
|
||||
},
|
||||
protectedTools,
|
||||
}
|
||||
}
|
||||
|
||||
export async function attemptDeduplicationRecovery(
|
||||
sessionID: string,
|
||||
parsed: ParsedTokenLimitError,
|
||||
experimental: ExperimentalConfig | undefined,
|
||||
): Promise<void> {
|
||||
if (!isPromptTooLongError(parsed)) return
|
||||
|
||||
const plan = getDeduplicationPlan(experimental)
|
||||
if (!plan) return
|
||||
|
||||
const pruningState = createPruningState()
|
||||
const prunedCount = executeDeduplication(
|
||||
sessionID,
|
||||
pruningState,
|
||||
plan.config,
|
||||
plan.protectedTools,
|
||||
)
|
||||
const { truncatedCount } = truncateToolOutputsByCallId(
|
||||
sessionID,
|
||||
pruningState.toolIdsToPrune,
|
||||
)
|
||||
|
||||
if (prunedCount > 0 || truncatedCount > 0) {
|
||||
log("[auto-compact] deduplication recovery applied", {
|
||||
sessionID,
|
||||
prunedCount,
|
||||
truncatedCount,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,151 +1,5 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import type { AutoCompactState, ParsedTokenLimitError } from "./types"
|
||||
import type { ExperimentalConfig } from "../../config"
|
||||
import { parseAnthropicTokenLimitError } from "./parser"
|
||||
import { executeCompact, getLastAssistant } from "./executor"
|
||||
import { log } from "../../shared/logger"
|
||||
|
||||
export interface AnthropicContextWindowLimitRecoveryOptions {
|
||||
experimental?: ExperimentalConfig
|
||||
}
|
||||
|
||||
function createRecoveryState(): AutoCompactState {
|
||||
return {
|
||||
pendingCompact: new Set<string>(),
|
||||
errorDataBySession: new Map<string, ParsedTokenLimitError>(),
|
||||
retryStateBySession: new Map(),
|
||||
truncateStateBySession: new Map(),
|
||||
emptyContentAttemptBySession: new Map(),
|
||||
compactionInProgress: new Set<string>(),
|
||||
}
|
||||
}
|
||||
|
||||
export function createAnthropicContextWindowLimitRecoveryHook(ctx: PluginInput, options?: AnthropicContextWindowLimitRecoveryOptions) {
|
||||
const autoCompactState = createRecoveryState()
|
||||
const experimental = options?.experimental
|
||||
|
||||
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
|
||||
const props = event.properties as Record<string, unknown> | undefined
|
||||
|
||||
if (event.type === "session.deleted") {
|
||||
const sessionInfo = props?.info as { id?: string } | undefined
|
||||
if (sessionInfo?.id) {
|
||||
autoCompactState.pendingCompact.delete(sessionInfo.id)
|
||||
autoCompactState.errorDataBySession.delete(sessionInfo.id)
|
||||
autoCompactState.retryStateBySession.delete(sessionInfo.id)
|
||||
autoCompactState.truncateStateBySession.delete(sessionInfo.id)
|
||||
autoCompactState.emptyContentAttemptBySession.delete(sessionInfo.id)
|
||||
autoCompactState.compactionInProgress.delete(sessionInfo.id)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.error") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
log("[auto-compact] session.error received", { sessionID, error: props?.error })
|
||||
if (!sessionID) return
|
||||
|
||||
const parsed = parseAnthropicTokenLimitError(props?.error)
|
||||
log("[auto-compact] parsed result", { parsed, hasError: !!props?.error })
|
||||
if (parsed) {
|
||||
autoCompactState.pendingCompact.add(sessionID)
|
||||
autoCompactState.errorDataBySession.set(sessionID, parsed)
|
||||
|
||||
if (autoCompactState.compactionInProgress.has(sessionID)) {
|
||||
return
|
||||
}
|
||||
|
||||
const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
|
||||
const providerID = parsed.providerID ?? (lastAssistant?.providerID as string | undefined)
|
||||
const modelID = parsed.modelID ?? (lastAssistant?.modelID as string | undefined)
|
||||
|
||||
await ctx.client.tui
|
||||
.showToast({
|
||||
body: {
|
||||
title: "Context Limit Hit",
|
||||
message: "Truncating large tool outputs and recovering...",
|
||||
variant: "warning" as const,
|
||||
duration: 3000,
|
||||
},
|
||||
})
|
||||
.catch(() => {})
|
||||
|
||||
setTimeout(() => {
|
||||
executeCompact(
|
||||
sessionID,
|
||||
{ providerID, modelID },
|
||||
autoCompactState,
|
||||
ctx.client,
|
||||
ctx.directory,
|
||||
experimental
|
||||
)
|
||||
}, 300)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "message.updated") {
|
||||
const info = props?.info as Record<string, unknown> | undefined
|
||||
const sessionID = info?.sessionID as string | undefined
|
||||
|
||||
if (sessionID && info?.role === "assistant" && info.error) {
|
||||
log("[auto-compact] message.updated with error", { sessionID, error: info.error })
|
||||
const parsed = parseAnthropicTokenLimitError(info.error)
|
||||
log("[auto-compact] message.updated parsed result", { parsed })
|
||||
if (parsed) {
|
||||
parsed.providerID = info.providerID as string | undefined
|
||||
parsed.modelID = info.modelID as string | undefined
|
||||
autoCompactState.pendingCompact.add(sessionID)
|
||||
autoCompactState.errorDataBySession.set(sessionID, parsed)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.idle") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
if (!sessionID) return
|
||||
|
||||
if (!autoCompactState.pendingCompact.has(sessionID)) return
|
||||
|
||||
const errorData = autoCompactState.errorDataBySession.get(sessionID)
|
||||
const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
|
||||
|
||||
if (lastAssistant?.summary === true) {
|
||||
autoCompactState.pendingCompact.delete(sessionID)
|
||||
return
|
||||
}
|
||||
|
||||
const providerID = errorData?.providerID ?? (lastAssistant?.providerID as string | undefined)
|
||||
const modelID = errorData?.modelID ?? (lastAssistant?.modelID as string | undefined)
|
||||
|
||||
await ctx.client.tui
|
||||
.showToast({
|
||||
body: {
|
||||
title: "Auto Compact",
|
||||
message: "Token limit exceeded. Attempting recovery...",
|
||||
variant: "warning" as const,
|
||||
duration: 3000,
|
||||
},
|
||||
})
|
||||
.catch(() => {})
|
||||
|
||||
await executeCompact(
|
||||
sessionID,
|
||||
{ providerID, modelID },
|
||||
autoCompactState,
|
||||
ctx.client,
|
||||
ctx.directory,
|
||||
experimental
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
event: eventHandler,
|
||||
}
|
||||
}
|
||||
|
||||
export { createAnthropicContextWindowLimitRecoveryHook } from "./recovery-hook"
|
||||
export type { AnthropicContextWindowLimitRecoveryOptions } from "./recovery-hook"
|
||||
export type { AutoCompactState, ParsedTokenLimitError, TruncateState } from "./types"
|
||||
export { parseAnthropicTokenLimitError } from "./parser"
|
||||
export { executeCompact, getLastAssistant } from "./executor"
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
import { existsSync, readdirSync, readFileSync } from "node:fs"
|
||||
import { join } from "node:path"
|
||||
import { getOpenCodeStorageDir } from "../../shared/data-path"
|
||||
import { truncateToolResult } from "./storage"
|
||||
import { log } from "../../shared/logger"
|
||||
|
||||
interface StoredToolPart {
|
||||
type?: string
|
||||
callID?: string
|
||||
truncated?: boolean
|
||||
state?: {
|
||||
output?: string
|
||||
}
|
||||
}
|
||||
|
||||
function getMessageStorage(): string {
|
||||
return join(getOpenCodeStorageDir(), "message")
|
||||
}
|
||||
|
||||
function getPartStorage(): string {
|
||||
return join(getOpenCodeStorageDir(), "part")
|
||||
}
|
||||
|
||||
function getMessageDir(sessionID: string): string | null {
|
||||
const messageStorage = getMessageStorage()
|
||||
if (!existsSync(messageStorage)) return null
|
||||
|
||||
const directPath = join(messageStorage, sessionID)
|
||||
if (existsSync(directPath)) return directPath
|
||||
|
||||
for (const dir of readdirSync(messageStorage)) {
|
||||
const sessionPath = join(messageStorage, dir, sessionID)
|
||||
if (existsSync(sessionPath)) return sessionPath
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
function getMessageIds(sessionID: string): string[] {
|
||||
const messageDir = getMessageDir(sessionID)
|
||||
if (!messageDir) return []
|
||||
|
||||
const messageIds: string[] = []
|
||||
for (const file of readdirSync(messageDir)) {
|
||||
if (!file.endsWith(".json")) continue
|
||||
messageIds.push(file.replace(".json", ""))
|
||||
}
|
||||
|
||||
return messageIds
|
||||
}
|
||||
|
||||
export function truncateToolOutputsByCallId(
|
||||
sessionID: string,
|
||||
callIds: Set<string>,
|
||||
): { truncatedCount: number } {
|
||||
if (callIds.size === 0) return { truncatedCount: 0 }
|
||||
|
||||
const messageIds = getMessageIds(sessionID)
|
||||
if (messageIds.length === 0) return { truncatedCount: 0 }
|
||||
|
||||
let truncatedCount = 0
|
||||
|
||||
for (const messageID of messageIds) {
|
||||
const partDir = join(getPartStorage(), messageID)
|
||||
if (!existsSync(partDir)) continue
|
||||
|
||||
for (const file of readdirSync(partDir)) {
|
||||
if (!file.endsWith(".json")) continue
|
||||
const partPath = join(partDir, file)
|
||||
|
||||
try {
|
||||
const content = readFileSync(partPath, "utf-8")
|
||||
const part = JSON.parse(content) as StoredToolPart
|
||||
|
||||
if (part.type !== "tool" || !part.callID) continue
|
||||
if (!callIds.has(part.callID)) continue
|
||||
if (!part.state?.output || part.truncated) continue
|
||||
|
||||
const result = truncateToolResult(partPath)
|
||||
if (result.success) {
|
||||
truncatedCount++
|
||||
}
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (truncatedCount > 0) {
|
||||
log("[auto-compact] pruned duplicate tool outputs", {
|
||||
sessionID,
|
||||
truncatedCount,
|
||||
})
|
||||
}
|
||||
|
||||
return { truncatedCount }
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
import { describe, test, expect, mock, beforeEach } from "bun:test"
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import type { ExperimentalConfig } from "../../config"
|
||||
|
||||
const attemptDeduplicationRecoveryMock = mock(async () => {})
|
||||
|
||||
mock.module("./deduplication-recovery", () => ({
|
||||
attemptDeduplicationRecovery: attemptDeduplicationRecoveryMock,
|
||||
}))
|
||||
|
||||
function createImmediateTimeouts(): () => void {
|
||||
const originalSetTimeout = globalThis.setTimeout
|
||||
const originalClearTimeout = globalThis.clearTimeout
|
||||
|
||||
globalThis.setTimeout = ((callback: (...args: unknown[]) => void, _delay?: number, ...args: unknown[]) => {
|
||||
callback(...args)
|
||||
return 0 as unknown as ReturnType<typeof setTimeout>
|
||||
}) as typeof setTimeout
|
||||
|
||||
globalThis.clearTimeout = ((_: ReturnType<typeof setTimeout>) => {}) as typeof clearTimeout
|
||||
|
||||
return () => {
|
||||
globalThis.setTimeout = originalSetTimeout
|
||||
globalThis.clearTimeout = originalClearTimeout
|
||||
}
|
||||
}
|
||||
|
||||
describe("createAnthropicContextWindowLimitRecoveryHook", () => {
|
||||
beforeEach(() => {
|
||||
attemptDeduplicationRecoveryMock.mockClear()
|
||||
})
|
||||
|
||||
test("calls deduplication recovery when compaction is already in progress", async () => {
|
||||
//#given
|
||||
const restoreTimeouts = createImmediateTimeouts()
|
||||
|
||||
const experimental = {
|
||||
dynamic_context_pruning: {
|
||||
enabled: true,
|
||||
strategies: {
|
||||
deduplication: { enabled: true },
|
||||
},
|
||||
},
|
||||
} satisfies ExperimentalConfig
|
||||
|
||||
let resolveSummarize: (() => void) | null = null
|
||||
const summarizePromise = new Promise<void>((resolve) => {
|
||||
resolveSummarize = resolve
|
||||
})
|
||||
|
||||
const mockClient = {
|
||||
session: {
|
||||
messages: mock(() => Promise.resolve({ data: [] })),
|
||||
summarize: mock(() => summarizePromise),
|
||||
revert: mock(() => Promise.resolve()),
|
||||
prompt_async: mock(() => Promise.resolve()),
|
||||
},
|
||||
tui: {
|
||||
showToast: mock(() => Promise.resolve()),
|
||||
},
|
||||
}
|
||||
|
||||
try {
|
||||
const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
|
||||
const ctx = { client: mockClient, directory: "/tmp" } as PluginInput
|
||||
const hook = createAnthropicContextWindowLimitRecoveryHook(ctx, { experimental })
|
||||
|
||||
// first error triggers compaction (setTimeout runs immediately due to mock)
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: { sessionID: "session-96", error: "prompt is too long" },
|
||||
},
|
||||
})
|
||||
|
||||
//#when - second error while compaction is in progress
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: { sessionID: "session-96", error: "prompt is too long" },
|
||||
},
|
||||
})
|
||||
|
||||
//#then - deduplication recovery was called for the second error
|
||||
expect(attemptDeduplicationRecoveryMock).toHaveBeenCalledTimes(1)
|
||||
expect(attemptDeduplicationRecoveryMock.mock.calls[0]![0]).toBe("session-96")
|
||||
} finally {
|
||||
if (resolveSummarize) resolveSummarize()
|
||||
restoreTimeouts()
|
||||
}
|
||||
})
|
||||
|
||||
test("does not call deduplication when compaction is not in progress", async () => {
|
||||
//#given
|
||||
const mockClient = {
|
||||
session: {
|
||||
messages: mock(() => Promise.resolve({ data: [] })),
|
||||
summarize: mock(() => Promise.resolve()),
|
||||
revert: mock(() => Promise.resolve()),
|
||||
prompt_async: mock(() => Promise.resolve()),
|
||||
},
|
||||
tui: {
|
||||
showToast: mock(() => Promise.resolve()),
|
||||
},
|
||||
}
|
||||
|
||||
const { createAnthropicContextWindowLimitRecoveryHook } = await import("./recovery-hook")
|
||||
const ctx = { client: mockClient, directory: "/tmp" } as PluginInput
|
||||
const hook = createAnthropicContextWindowLimitRecoveryHook(ctx)
|
||||
|
||||
//#when - single error (no compaction in progress)
|
||||
await hook.event({
|
||||
event: {
|
||||
type: "session.error",
|
||||
properties: { sessionID: "session-no-dedup", error: "some other error" },
|
||||
},
|
||||
})
|
||||
|
||||
//#then
|
||||
expect(attemptDeduplicationRecoveryMock).not.toHaveBeenCalled()
|
||||
})
|
||||
})
|
||||
@@ -0,0 +1,153 @@
|
||||
import type { PluginInput } from "@opencode-ai/plugin"
|
||||
import type { AutoCompactState, ParsedTokenLimitError } from "./types"
|
||||
import type { ExperimentalConfig } from "../../config"
|
||||
import { parseAnthropicTokenLimitError } from "./parser"
|
||||
import { executeCompact, getLastAssistant } from "./executor"
|
||||
import { attemptDeduplicationRecovery } from "./deduplication-recovery"
|
||||
import { log } from "../../shared/logger"
|
||||
|
||||
export interface AnthropicContextWindowLimitRecoveryOptions {
|
||||
experimental?: ExperimentalConfig
|
||||
}
|
||||
|
||||
function createRecoveryState(): AutoCompactState {
|
||||
return {
|
||||
pendingCompact: new Set<string>(),
|
||||
errorDataBySession: new Map<string, ParsedTokenLimitError>(),
|
||||
retryStateBySession: new Map(),
|
||||
truncateStateBySession: new Map(),
|
||||
emptyContentAttemptBySession: new Map(),
|
||||
compactionInProgress: new Set<string>(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export function createAnthropicContextWindowLimitRecoveryHook(
|
||||
ctx: PluginInput,
|
||||
options?: AnthropicContextWindowLimitRecoveryOptions,
|
||||
) {
|
||||
const autoCompactState = createRecoveryState()
|
||||
const experimental = options?.experimental
|
||||
|
||||
const eventHandler = async ({ event }: { event: { type: string; properties?: unknown } }) => {
|
||||
const props = event.properties as Record<string, unknown> | undefined
|
||||
|
||||
if (event.type === "session.deleted") {
|
||||
const sessionInfo = props?.info as { id?: string } | undefined
|
||||
if (sessionInfo?.id) {
|
||||
autoCompactState.pendingCompact.delete(sessionInfo.id)
|
||||
autoCompactState.errorDataBySession.delete(sessionInfo.id)
|
||||
autoCompactState.retryStateBySession.delete(sessionInfo.id)
|
||||
autoCompactState.truncateStateBySession.delete(sessionInfo.id)
|
||||
autoCompactState.emptyContentAttemptBySession.delete(sessionInfo.id)
|
||||
autoCompactState.compactionInProgress.delete(sessionInfo.id)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.error") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
log("[auto-compact] session.error received", { sessionID, error: props?.error })
|
||||
if (!sessionID) return
|
||||
|
||||
const parsed = parseAnthropicTokenLimitError(props?.error)
|
||||
log("[auto-compact] parsed result", { parsed, hasError: !!props?.error })
|
||||
if (parsed) {
|
||||
autoCompactState.pendingCompact.add(sessionID)
|
||||
autoCompactState.errorDataBySession.set(sessionID, parsed)
|
||||
|
||||
if (autoCompactState.compactionInProgress.has(sessionID)) {
|
||||
await attemptDeduplicationRecovery(sessionID, parsed, experimental)
|
||||
return
|
||||
}
|
||||
|
||||
const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
|
||||
const providerID = parsed.providerID ?? (lastAssistant?.providerID as string | undefined)
|
||||
const modelID = parsed.modelID ?? (lastAssistant?.modelID as string | undefined)
|
||||
|
||||
await ctx.client.tui
|
||||
.showToast({
|
||||
body: {
|
||||
title: "Context Limit Hit",
|
||||
message: "Truncating large tool outputs and recovering...",
|
||||
variant: "warning" as const,
|
||||
duration: 3000,
|
||||
},
|
||||
})
|
||||
.catch(() => {})
|
||||
|
||||
setTimeout(() => {
|
||||
executeCompact(
|
||||
sessionID,
|
||||
{ providerID, modelID },
|
||||
autoCompactState,
|
||||
ctx.client,
|
||||
ctx.directory,
|
||||
experimental,
|
||||
)
|
||||
}, 300)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "message.updated") {
|
||||
const info = props?.info as Record<string, unknown> | undefined
|
||||
const sessionID = info?.sessionID as string | undefined
|
||||
|
||||
if (sessionID && info?.role === "assistant" && info.error) {
|
||||
log("[auto-compact] message.updated with error", { sessionID, error: info.error })
|
||||
const parsed = parseAnthropicTokenLimitError(info.error)
|
||||
log("[auto-compact] message.updated parsed result", { parsed })
|
||||
if (parsed) {
|
||||
parsed.providerID = info.providerID as string | undefined
|
||||
parsed.modelID = info.modelID as string | undefined
|
||||
autoCompactState.pendingCompact.add(sessionID)
|
||||
autoCompactState.errorDataBySession.set(sessionID, parsed)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if (event.type === "session.idle") {
|
||||
const sessionID = props?.sessionID as string | undefined
|
||||
if (!sessionID) return
|
||||
|
||||
if (!autoCompactState.pendingCompact.has(sessionID)) return
|
||||
|
||||
const errorData = autoCompactState.errorDataBySession.get(sessionID)
|
||||
const lastAssistant = await getLastAssistant(sessionID, ctx.client, ctx.directory)
|
||||
|
||||
if (lastAssistant?.summary === true) {
|
||||
autoCompactState.pendingCompact.delete(sessionID)
|
||||
return
|
||||
}
|
||||
|
||||
const providerID = errorData?.providerID ?? (lastAssistant?.providerID as string | undefined)
|
||||
const modelID = errorData?.modelID ?? (lastAssistant?.modelID as string | undefined)
|
||||
|
||||
await ctx.client.tui
|
||||
.showToast({
|
||||
body: {
|
||||
title: "Auto Compact",
|
||||
message: "Token limit exceeded. Attempting recovery...",
|
||||
variant: "warning" as const,
|
||||
duration: 3000,
|
||||
},
|
||||
})
|
||||
.catch(() => {})
|
||||
|
||||
await executeCompact(
|
||||
sessionID,
|
||||
{ providerID, modelID },
|
||||
autoCompactState,
|
||||
ctx.client,
|
||||
ctx.directory,
|
||||
experimental,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
event: eventHandler,
|
||||
}
|
||||
}
|
||||
215
src/hooks/anthropic-effort/index.test.ts
Normal file
215
src/hooks/anthropic-effort/index.test.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
import { describe, expect, it } from "bun:test"
|
||||
import { createAnthropicEffortHook } from "./index"
|
||||
|
||||
interface ChatParamsInput {
|
||||
sessionID: string
|
||||
agent: { name?: string }
|
||||
model: { providerID: string; modelID: string; id?: string; api?: { npm?: string } }
|
||||
provider: { id: string }
|
||||
message: { variant?: string }
|
||||
}
|
||||
|
||||
interface ChatParamsOutput {
|
||||
temperature?: number
|
||||
topP?: number
|
||||
topK?: number
|
||||
options: Record<string, unknown>
|
||||
}
|
||||
|
||||
function createMockParams(overrides: {
|
||||
providerID?: string
|
||||
modelID?: string
|
||||
variant?: string
|
||||
agentName?: string
|
||||
existingOptions?: Record<string, unknown>
|
||||
}): { input: ChatParamsInput; output: ChatParamsOutput } {
|
||||
const providerID = overrides.providerID ?? "anthropic"
|
||||
const modelID = overrides.modelID ?? "claude-opus-4-6"
|
||||
const variant = "variant" in overrides ? overrides.variant : "max"
|
||||
const agentName = overrides.agentName ?? "sisyphus"
|
||||
const existingOptions = overrides.existingOptions ?? {}
|
||||
|
||||
return {
|
||||
input: {
|
||||
sessionID: "test-session",
|
||||
agent: { name: agentName },
|
||||
model: { providerID, modelID },
|
||||
provider: { id: providerID },
|
||||
message: { variant },
|
||||
},
|
||||
output: {
|
||||
temperature: 0.1,
|
||||
options: { ...existingOptions },
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
describe("createAnthropicEffortHook", () => {
|
||||
describe("opus 4-6 with variant max", () => {
|
||||
it("should inject effort max for anthropic opus-4-6 with variant max", async () => {
|
||||
//#given anthropic opus-4-6 model with variant max
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should be injected into options
|
||||
expect(output.options.effort).toBe("max")
|
||||
})
|
||||
|
||||
it("should inject effort max for github-copilot claude-opus-4-6", async () => {
|
||||
//#given github-copilot provider with claude-opus-4-6
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
providerID: "github-copilot",
|
||||
modelID: "claude-opus-4-6",
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should be injected (github-copilot resolves to anthropic)
|
||||
expect(output.options.effort).toBe("max")
|
||||
})
|
||||
|
||||
it("should inject effort max for opencode provider with claude-opus-4-6", async () => {
|
||||
//#given opencode provider with claude-opus-4-6
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
providerID: "opencode",
|
||||
modelID: "claude-opus-4-6",
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should be injected
|
||||
expect(output.options.effort).toBe("max")
|
||||
})
|
||||
|
||||
it("should handle normalized model ID with dots (opus-4.6)", async () => {
|
||||
//#given model ID with dots instead of hyphens
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
modelID: "claude-opus-4.6",
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then should normalize and inject effort
|
||||
expect(output.options.effort).toBe("max")
|
||||
})
|
||||
})
|
||||
|
||||
describe("conditions NOT met - should skip", () => {
|
||||
it("should NOT inject effort when variant is not max", async () => {
|
||||
//#given opus-4-6 with variant high (not max)
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({ variant: "high" })
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should NOT be injected
|
||||
expect(output.options.effort).toBeUndefined()
|
||||
})
|
||||
|
||||
it("should NOT inject effort when variant is undefined", async () => {
|
||||
//#given opus-4-6 with no variant
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({ variant: undefined })
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should NOT be injected
|
||||
expect(output.options.effort).toBeUndefined()
|
||||
})
|
||||
|
||||
it("should NOT inject effort for non-opus model", async () => {
|
||||
//#given claude-sonnet-4-5 (not opus)
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
modelID: "claude-sonnet-4-5",
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should NOT be injected
|
||||
expect(output.options.effort).toBeUndefined()
|
||||
})
|
||||
|
||||
it("should NOT inject effort for non-anthropic provider with non-claude model", async () => {
|
||||
//#given openai provider with gpt model
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
providerID: "openai",
|
||||
modelID: "gpt-5.2",
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should NOT be injected
|
||||
expect(output.options.effort).toBeUndefined()
|
||||
})
|
||||
|
||||
it("should NOT throw when model.modelID is undefined", async () => {
|
||||
//#given model with undefined modelID (runtime edge case)
|
||||
const hook = createAnthropicEffortHook()
|
||||
const input = {
|
||||
sessionID: "test-session",
|
||||
agent: { name: "sisyphus" },
|
||||
model: { providerID: "anthropic", modelID: undefined as unknown as string },
|
||||
provider: { id: "anthropic" },
|
||||
message: { variant: "max" as const },
|
||||
}
|
||||
const output = { temperature: 0.1, options: {} }
|
||||
|
||||
//#when chat.params hook is called with undefined modelID
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then should gracefully skip without throwing
|
||||
expect(output.options.effort).toBeUndefined()
|
||||
})
|
||||
})
|
||||
|
||||
describe("preserves existing options", () => {
|
||||
it("should NOT overwrite existing effort if already set", async () => {
|
||||
//#given options already have effort set
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
existingOptions: { effort: "high" },
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then existing effort should be preserved
|
||||
expect(output.options.effort).toBe("high")
|
||||
})
|
||||
|
||||
it("should preserve other existing options when injecting effort", async () => {
|
||||
//#given options with existing thinking config
|
||||
const hook = createAnthropicEffortHook()
|
||||
const { input, output } = createMockParams({
|
||||
existingOptions: {
|
||||
thinking: { type: "enabled", budgetTokens: 31999 },
|
||||
},
|
||||
})
|
||||
|
||||
//#when chat.params hook is called
|
||||
await hook["chat.params"](input, output)
|
||||
|
||||
//#then effort should be added without affecting thinking
|
||||
expect(output.options.effort).toBe("max")
|
||||
expect(output.options.thinking).toEqual({
|
||||
type: "enabled",
|
||||
budgetTokens: 31999,
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
56
src/hooks/anthropic-effort/index.ts
Normal file
56
src/hooks/anthropic-effort/index.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { log } from "../../shared"
|
||||
|
||||
const OPUS_4_6_PATTERN = /claude-opus-4[-.]6/i
|
||||
|
||||
function normalizeModelID(modelID: string): string {
|
||||
return modelID.replace(/\.(\d+)/g, "-$1")
|
||||
}
|
||||
|
||||
function isClaudeProvider(providerID: string, modelID: string): boolean {
|
||||
if (["anthropic", "opencode"].includes(providerID)) return true
|
||||
if (providerID === "github-copilot" && modelID.toLowerCase().includes("claude")) return true
|
||||
return false
|
||||
}
|
||||
|
||||
function isOpus46(modelID: string): boolean {
|
||||
const normalized = normalizeModelID(modelID)
|
||||
return OPUS_4_6_PATTERN.test(normalized)
|
||||
}
|
||||
|
||||
interface ChatParamsInput {
|
||||
sessionID: string
|
||||
agent: { name?: string }
|
||||
model: { providerID: string; modelID: string }
|
||||
provider: { id: string }
|
||||
message: { variant?: string }
|
||||
}
|
||||
|
||||
interface ChatParamsOutput {
|
||||
temperature?: number
|
||||
topP?: number
|
||||
topK?: number
|
||||
options: Record<string, unknown>
|
||||
}
|
||||
|
||||
export function createAnthropicEffortHook() {
|
||||
return {
|
||||
"chat.params": async (
|
||||
input: ChatParamsInput,
|
||||
output: ChatParamsOutput
|
||||
): Promise<void> => {
|
||||
const { model, message } = input
|
||||
if (!model?.modelID || !model?.providerID) return
|
||||
if (message.variant !== "max") return
|
||||
if (!isClaudeProvider(model.providerID, model.modelID)) return
|
||||
if (!isOpus46(model.modelID)) return
|
||||
if (output.options.effort !== undefined) return
|
||||
|
||||
output.options.effort = "max"
|
||||
log("anthropic-effort: injected effort=max", {
|
||||
sessionID: input.sessionID,
|
||||
provider: model.providerID,
|
||||
model: model.modelID,
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -34,6 +34,7 @@ describe("atlas hook", () => {
|
||||
client: {
|
||||
session: {
|
||||
prompt: promptMock,
|
||||
promptAsync: promptMock,
|
||||
},
|
||||
},
|
||||
_promptMock: promptMock,
|
||||
@@ -86,7 +87,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when - calling with undefined output
|
||||
const result = await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID: "session-123" },
|
||||
{ tool: "task", sessionID: "session-123" },
|
||||
undefined as unknown as { title: string; output: string; metadata: Record<string, unknown> }
|
||||
)
|
||||
|
||||
@@ -94,8 +95,8 @@ describe("atlas hook", () => {
|
||||
expect(result).toBeUndefined()
|
||||
})
|
||||
|
||||
test("should ignore non-delegate_task tools", async () => {
|
||||
// given - hook and non-delegate_task tool
|
||||
test("should ignore non-task tools", async () => {
|
||||
// given - hook and non-task tool
|
||||
const hook = createAtlasHook(createMockPluginInput())
|
||||
const output = {
|
||||
title: "Test Tool",
|
||||
@@ -138,7 +139,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -162,14 +163,14 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// then - standalone verification reminder appended
|
||||
expect(output.output).toContain("Task completed successfully")
|
||||
expect(output.output).toContain("MANDATORY:")
|
||||
expect(output.output).toContain("delegate_task(session_id=")
|
||||
expect(output.output).toContain("task(session_id=")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
@@ -199,7 +200,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -208,7 +209,7 @@ describe("atlas hook", () => {
|
||||
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
|
||||
expect(output.output).toContain("test-plan")
|
||||
expect(output.output).toContain("LIE")
|
||||
expect(output.output).toContain("delegate_task(session_id=")
|
||||
expect(output.output).toContain("task(session_id=")
|
||||
|
||||
cleanupMessageStorage(sessionID)
|
||||
})
|
||||
@@ -238,7 +239,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -275,7 +276,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -311,7 +312,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -348,7 +349,7 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
@@ -385,12 +386,12 @@ describe("atlas hook", () => {
|
||||
|
||||
// when
|
||||
await hook["tool.execute.after"](
|
||||
{ tool: "delegate_task", sessionID },
|
||||
{ tool: "task", sessionID },
|
||||
output
|
||||
)
|
||||
|
||||
// then - should include session_id instructions and verification
|
||||
expect(output.output).toContain("delegate_task(session_id=")
|
||||
expect(output.output).toContain("task(session_id=")
|
||||
expect(output.output).toContain("[x]")
|
||||
expect(output.output).toContain("MANDATORY:")
|
||||
|
||||
@@ -425,8 +426,8 @@ describe("atlas hook", () => {
|
||||
|
||||
// then
|
||||
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
|
||||
expect(output.output).toContain("delegate_task")
|
||||
expect(output.output).toContain("delegate_task")
|
||||
expect(output.output).toContain("task")
|
||||
expect(output.output).toContain("task")
|
||||
})
|
||||
|
||||
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {
|
||||
@@ -755,40 +756,71 @@ describe("atlas hook", () => {
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should skip when background tasks are running", async () => {
|
||||
// given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
test("should skip when background tasks are running", async () => {
|
||||
// given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockBackgroundManager = {
|
||||
getTasksByParentSession: () => [{ status: "running" }],
|
||||
}
|
||||
const mockBackgroundManager = {
|
||||
getTasksByParentSession: () => [{ status: "running" }],
|
||||
}
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createAtlasHook(mockInput, {
|
||||
directory: TEST_DIR,
|
||||
backgroundManager: mockBackgroundManager as any,
|
||||
})
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createAtlasHook(mockInput, {
|
||||
directory: TEST_DIR,
|
||||
backgroundManager: mockBackgroundManager as any,
|
||||
})
|
||||
|
||||
// when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
// when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// then - should not call prompt
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
// then - should not call prompt
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should skip when continuation is stopped via isContinuationStopped", async () => {
|
||||
// given - boulder state with incomplete plan
|
||||
const planPath = join(TEST_DIR, "test-plan.md")
|
||||
writeFileSync(planPath, "# Plan\n- [ ] Task 1\n- [ ] Task 2")
|
||||
|
||||
const state: BoulderState = {
|
||||
active_plan: planPath,
|
||||
started_at: "2026-01-02T10:00:00Z",
|
||||
session_ids: [MAIN_SESSION_ID],
|
||||
plan_name: "test-plan",
|
||||
}
|
||||
writeBoulderState(TEST_DIR, state)
|
||||
|
||||
const mockInput = createMockPluginInput()
|
||||
const hook = createAtlasHook(mockInput, {
|
||||
directory: TEST_DIR,
|
||||
isContinuationStopped: (sessionID: string) => sessionID === MAIN_SESSION_ID,
|
||||
})
|
||||
|
||||
// when
|
||||
await hook.handler({
|
||||
event: {
|
||||
type: "session.idle",
|
||||
properties: { sessionID: MAIN_SESSION_ID },
|
||||
},
|
||||
})
|
||||
|
||||
// then - should not call prompt because continuation is stopped
|
||||
expect(mockInput._promptMock).not.toHaveBeenCalled()
|
||||
})
|
||||
|
||||
test("should clear abort state on message.updated", async () => {
|
||||
// given - boulder with incomplete plan
|
||||
|
||||
@@ -44,7 +44,7 @@ You just performed direct file modifications outside \`.sisyphus/\`.
|
||||
**You are an ORCHESTRATOR, not an IMPLEMENTER.**
|
||||
|
||||
As an orchestrator, you should:
|
||||
- **DELEGATE** implementation work to subagents via \`delegate_task\`
|
||||
- **DELEGATE** implementation work to subagents via \`task\`
|
||||
- **VERIFY** the work done by subagents
|
||||
- **COORDINATE** multiple tasks and ensure completion
|
||||
|
||||
@@ -54,7 +54,7 @@ You should NOT:
|
||||
- Implement features yourself
|
||||
|
||||
**If you need to make changes:**
|
||||
1. Use \`delegate_task\` to delegate to an appropriate subagent
|
||||
1. Use \`task\` to delegate to an appropriate subagent
|
||||
2. Provide clear instructions in the prompt
|
||||
3. Verify the subagent's work after completion
|
||||
|
||||
@@ -128,7 +128,7 @@ You (Atlas) are attempting to directly modify a file outside \`.sisyphus/\`.
|
||||
**THIS IS FORBIDDEN** (except for VERIFICATION purposes)
|
||||
|
||||
As an ORCHESTRATOR, you MUST:
|
||||
1. **DELEGATE** all implementation work via \`delegate_task\`
|
||||
1. **DELEGATE** all implementation work via \`task\`
|
||||
2. **VERIFY** the work done by subagents (reading files is OK)
|
||||
3. **COORDINATE** - you orchestrate, you don't implement
|
||||
|
||||
@@ -146,11 +146,11 @@ As an ORCHESTRATOR, you MUST:
|
||||
|
||||
**IF THIS IS FOR VERIFICATION:**
|
||||
Proceed if you are verifying subagent work by making a small fix.
|
||||
But for any substantial changes, USE \`delegate_task\`.
|
||||
But for any substantial changes, USE \`task\`.
|
||||
|
||||
**CORRECT APPROACH:**
|
||||
\`\`\`
|
||||
delegate_task(
|
||||
task(
|
||||
category="...",
|
||||
prompt="[specific single task with clear acceptance criteria]"
|
||||
)
|
||||
@@ -193,7 +193,7 @@ function buildVerificationReminder(sessionId: string): string {
|
||||
|
||||
**If ANY verification fails, use this immediately:**
|
||||
\`\`\`
|
||||
delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
|
||||
task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
|
||||
\`\`\``
|
||||
}
|
||||
|
||||
@@ -399,6 +399,7 @@ const CONTINUATION_COOLDOWN_MS = 5000
|
||||
export interface AtlasHookOptions {
|
||||
directory: string
|
||||
backgroundManager?: BackgroundManager
|
||||
isContinuationStopped?: (sessionID: string) => boolean
|
||||
}
|
||||
|
||||
function isAbortError(error: unknown): boolean {
|
||||
@@ -483,7 +484,7 @@ export function createAtlasHook(
|
||||
: undefined
|
||||
}
|
||||
|
||||
await ctx.client.session.prompt({
|
||||
await ctx.client.session.promptAsync({
|
||||
path: { id: sessionID },
|
||||
body: {
|
||||
agent: agent ?? "atlas",
|
||||
@@ -573,6 +574,11 @@ export function createAtlasHook(
|
||||
return
|
||||
}
|
||||
|
||||
if (options?.isContinuationStopped?.(sessionID)) {
|
||||
log(`[${HOOK_NAME}] Skipped: continuation stopped for session`, { sessionID })
|
||||
return
|
||||
}
|
||||
|
||||
const requiredAgent = (boulderState.agent ?? "atlas").toLowerCase()
|
||||
const lastAgent = getLastAgentFromSession(sessionID)
|
||||
if (!lastAgent || lastAgent !== requiredAgent) {
|
||||
@@ -688,12 +694,12 @@ export function createAtlasHook(
|
||||
return
|
||||
}
|
||||
|
||||
// Check delegate_task - inject single-task directive
|
||||
if (input.tool === "delegate_task") {
|
||||
// Check task - inject single-task directive
|
||||
if (input.tool === "task") {
|
||||
const prompt = output.args.prompt as string | undefined
|
||||
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
|
||||
output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
|
||||
log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, {
|
||||
log(`[${HOOK_NAME}] Injected single-task directive to task`, {
|
||||
sessionID: input.sessionID,
|
||||
})
|
||||
}
|
||||
@@ -732,7 +738,7 @@ export function createAtlasHook(
|
||||
return
|
||||
}
|
||||
|
||||
if (input.tool !== "delegate_task") {
|
||||
if (input.tool !== "task") {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -50,7 +50,7 @@ describe("category-skill-reminder hook", () => {
|
||||
|
||||
// then - reminder should be injected
|
||||
expect(output.output).toContain("[Category+Skill Reminder]")
|
||||
expect(output.output).toContain("delegate_task")
|
||||
expect(output.output).toContain("task")
|
||||
|
||||
clearSessionAgent(sessionID)
|
||||
})
|
||||
@@ -130,16 +130,16 @@ describe("category-skill-reminder hook", () => {
|
||||
})
|
||||
|
||||
describe("delegation tool tracking", () => {
|
||||
test("should NOT inject reminder if delegate_task is used", async () => {
|
||||
// given - sisyphus agent that uses delegate_task
|
||||
test("should NOT inject reminder if task is used", async () => {
|
||||
// given - sisyphus agent that uses task
|
||||
const hook = createHook()
|
||||
const sessionID = "delegation-session"
|
||||
updateSessionAgent(sessionID, "Sisyphus")
|
||||
|
||||
const output = { title: "", output: "result", metadata: {} }
|
||||
|
||||
// when - delegate_task is used, then more tool calls
|
||||
await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output)
|
||||
// when - task is used, then more tool calls
|
||||
await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
|
||||
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
|
||||
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
|
||||
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
|
||||
@@ -329,15 +329,15 @@ describe("category-skill-reminder hook", () => {
|
||||
})
|
||||
|
||||
test("should handle delegation tool names case-insensitively", async () => {
|
||||
// given - sisyphus agent using DELEGATE_TASK in uppercase
|
||||
// given - sisyphus agent using TASK in uppercase
|
||||
const hook = createHook()
|
||||
const sessionID = "case-delegate-session"
|
||||
updateSessionAgent(sessionID, "Sisyphus")
|
||||
|
||||
const output = { title: "", output: "result", metadata: {} }
|
||||
|
||||
// when - DELEGATE_TASK in uppercase is used
|
||||
await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output)
|
||||
// when - TASK in uppercase is used
|
||||
await hook["tool.execute.after"]({ tool: "TASK", sessionID, callID: "1" }, output)
|
||||
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
|
||||
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
|
||||
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
|
||||
|
||||
@@ -30,9 +30,8 @@ const DELEGATABLE_WORK_TOOLS = new Set([
|
||||
* Tools that indicate the agent is already using delegation properly.
|
||||
*/
|
||||
const DELEGATION_TOOLS = new Set([
|
||||
"delegate_task",
|
||||
"call_omo_agent",
|
||||
"task",
|
||||
"task",
|
||||
"call_omo_agent",
|
||||
])
|
||||
|
||||
function formatSkillNames(skills: AvailableSkill[], limit: number): string {
|
||||
@@ -63,7 +62,7 @@ function buildReminderMessage(availableSkills: AvailableSkill[]): string {
|
||||
"> User-installed skills OVERRIDE built-in defaults. ALWAYS prefer YOUR SKILLS when domain matches.",
|
||||
"",
|
||||
"```typescript",
|
||||
`delegate_task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
|
||||
`task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
|
||||
"```",
|
||||
"",
|
||||
]
|
||||
|
||||
@@ -20,7 +20,7 @@ interface RawClaudeHooksConfig {
|
||||
function normalizeHookMatcher(raw: RawHookMatcher): HookMatcher {
|
||||
return {
|
||||
matcher: raw.matcher ?? raw.pattern ?? "*",
|
||||
hooks: raw.hooks,
|
||||
hooks: Array.isArray(raw.hooks) ? raw.hooks : [],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -175,7 +175,7 @@ export function createClaudeCodeHooksHook(
|
||||
input: { tool: string; sessionID: string; callID: string },
|
||||
output: { args: Record<string, unknown> }
|
||||
): Promise<void> => {
|
||||
if (input.tool === "todowrite" && typeof output.args.todos === "string") {
|
||||
if (input.tool.trim() === "todowrite" && typeof output.args.todos === "string") {
|
||||
let parsed: unknown
|
||||
try {
|
||||
parsed = JSON.parse(output.args.todos)
|
||||
@@ -257,7 +257,7 @@ export function createClaudeCodeHooksHook(
|
||||
const cachedInput = getToolInput(input.sessionID, input.tool, input.callID) || {}
|
||||
|
||||
// Use metadata if available and non-empty, otherwise wrap output.output in a structured object
|
||||
// This ensures plugin tools (call_omo_agent, delegate_task, task) that return strings
|
||||
// This ensures plugin tools (call_omo_agent, task) that return strings
|
||||
// get their results properly recorded in transcripts instead of empty {}
|
||||
const metadata = output.metadata as Record<string, unknown> | undefined
|
||||
const hasMetadata = metadata && typeof metadata === "object" && Object.keys(metadata).length > 0
|
||||
|
||||
@@ -91,11 +91,12 @@ export async function executePostToolUseHooks(
|
||||
|
||||
const startTime = Date.now()
|
||||
|
||||
for (const matcher of matchers) {
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
for (const matcher of matchers) {
|
||||
if (!matcher.hooks || matcher.hooks.length === 0) continue
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
|
||||
if (isHookCommandDisabled("PostToolUse", hook.command, extendedConfig ?? null)) {
|
||||
if (isHookCommandDisabled("PostToolUse", hook.command, extendedConfig ?? null)) {
|
||||
log("PostToolUse hook command skipped (disabled by config)", { command: hook.command, toolName: ctx.toolName })
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -47,11 +47,12 @@ export async function executePreCompactHooks(
|
||||
let firstHookName: string | undefined
|
||||
const collectedContext: string[] = []
|
||||
|
||||
for (const matcher of matchers) {
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
for (const matcher of matchers) {
|
||||
if (!matcher.hooks || matcher.hooks.length === 0) continue
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
|
||||
if (isHookCommandDisabled("PreCompact", hook.command, extendedConfig ?? null)) {
|
||||
if (isHookCommandDisabled("PreCompact", hook.command, extendedConfig ?? null)) {
|
||||
log("PreCompact hook command skipped (disabled by config)", { command: hook.command })
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -74,11 +74,12 @@ export async function executePreToolUseHooks(
|
||||
let firstHookName: string | undefined
|
||||
const inputLines = buildInputLines(ctx.toolInput)
|
||||
|
||||
for (const matcher of matchers) {
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
for (const matcher of matchers) {
|
||||
if (!matcher.hooks || matcher.hooks.length === 0) continue
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
|
||||
if (isHookCommandDisabled("PreToolUse", hook.command, extendedConfig ?? null)) {
|
||||
if (isHookCommandDisabled("PreToolUse", hook.command, extendedConfig ?? null)) {
|
||||
log("PreToolUse hook command skipped (disabled by config)", { command: hook.command, toolName: ctx.toolName })
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -65,11 +65,12 @@ export async function executeStopHooks(
|
||||
hook_source: "opencode-plugin",
|
||||
}
|
||||
|
||||
for (const matcher of matchers) {
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
for (const matcher of matchers) {
|
||||
if (!matcher.hooks || matcher.hooks.length === 0) continue
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
|
||||
if (isHookCommandDisabled("Stop", hook.command, extendedConfig ?? null)) {
|
||||
if (isHookCommandDisabled("Stop", hook.command, extendedConfig ?? null)) {
|
||||
log("Stop hook command skipped (disabled by config)", { command: hook.command })
|
||||
continue
|
||||
}
|
||||
|
||||
107
src/hooks/claude-code-hooks/user-prompt-submit.test.ts
Normal file
107
src/hooks/claude-code-hooks/user-prompt-submit.test.ts
Normal file
@@ -0,0 +1,107 @@
|
||||
import { describe, it, expect } from "bun:test"
|
||||
import {
|
||||
executeUserPromptSubmitHooks,
|
||||
type UserPromptSubmitContext,
|
||||
} from "./user-prompt-submit"
|
||||
|
||||
describe("executeUserPromptSubmitHooks", () => {
|
||||
it("returns early when no config provided", async () => {
|
||||
// given
|
||||
const ctx: UserPromptSubmitContext = {
|
||||
sessionId: "test-session",
|
||||
prompt: "test prompt",
|
||||
parts: [{ type: "text", text: "test prompt" }],
|
||||
cwd: "/tmp",
|
||||
}
|
||||
|
||||
// when
|
||||
const result = await executeUserPromptSubmitHooks(ctx, null)
|
||||
|
||||
// then
|
||||
expect(result.block).toBe(false)
|
||||
expect(result.messages).toEqual([])
|
||||
})
|
||||
|
||||
it("returns early when hook tags present in user input", async () => {
|
||||
// given
|
||||
const ctx: UserPromptSubmitContext = {
|
||||
sessionId: "test-session",
|
||||
prompt: "<user-prompt-submit-hook>previous output</user-prompt-submit-hook>",
|
||||
parts: [
|
||||
{
|
||||
type: "text",
|
||||
text: "<user-prompt-submit-hook>previous output</user-prompt-submit-hook>",
|
||||
},
|
||||
],
|
||||
cwd: "/tmp",
|
||||
}
|
||||
|
||||
// when
|
||||
const result = await executeUserPromptSubmitHooks(ctx, null)
|
||||
|
||||
// then
|
||||
expect(result.block).toBe(false)
|
||||
expect(result.messages).toEqual([])
|
||||
})
|
||||
|
||||
it("does not return early when hook tags in prompt but not in user input", async () => {
|
||||
// given - simulates case where hook output was injected into session context
|
||||
// but current user input does not contain tags
|
||||
const ctx: UserPromptSubmitContext = {
|
||||
sessionId: "test-session",
|
||||
prompt:
|
||||
"<user-prompt-submit-hook>previous output</user-prompt-submit-hook>\n\nuser message",
|
||||
parts: [{ type: "text", text: "user message" }],
|
||||
cwd: "/tmp",
|
||||
}
|
||||
|
||||
// when
|
||||
const result = await executeUserPromptSubmitHooks(ctx, null)
|
||||
|
||||
// then - should not return early, should continue to config check
|
||||
expect(result.block).toBe(false)
|
||||
expect(result.messages).toEqual([])
|
||||
})
|
||||
|
||||
it("should fire on first prompt", async () => {
|
||||
// given
|
||||
const ctx: UserPromptSubmitContext = {
|
||||
sessionId: "test-session-1",
|
||||
prompt: "first prompt",
|
||||
parts: [{ type: "text", text: "first prompt" }],
|
||||
cwd: "/tmp",
|
||||
}
|
||||
|
||||
// when
|
||||
const result = await executeUserPromptSubmitHooks(ctx, null)
|
||||
|
||||
// then
|
||||
expect(result.block).toBe(false)
|
||||
expect(result.messages).toEqual([])
|
||||
})
|
||||
|
||||
it("should fire on second prompt in same session", async () => {
|
||||
// given
|
||||
const ctx1: UserPromptSubmitContext = {
|
||||
sessionId: "test-session-2",
|
||||
prompt: "first prompt",
|
||||
parts: [{ type: "text", text: "first prompt" }],
|
||||
cwd: "/tmp",
|
||||
}
|
||||
|
||||
const ctx2: UserPromptSubmitContext = {
|
||||
sessionId: "test-session-2",
|
||||
prompt: "second prompt",
|
||||
parts: [{ type: "text", text: "second prompt" }],
|
||||
cwd: "/tmp",
|
||||
}
|
||||
|
||||
// when
|
||||
const result1 = await executeUserPromptSubmitHooks(ctx1, null)
|
||||
const result2 = await executeUserPromptSubmitHooks(ctx2, null)
|
||||
|
||||
// then
|
||||
expect(result1.block).toBe(false)
|
||||
expect(result2.block).toBe(false)
|
||||
})
|
||||
})
|
||||
@@ -44,9 +44,16 @@ export async function executeUserPromptSubmitHooks(
|
||||
return { block: false, modifiedParts, messages }
|
||||
}
|
||||
|
||||
// Check if hook tags are in the current user input only (not in injected context)
|
||||
// by checking only the text parts that were provided in this message
|
||||
const userInputText = ctx.parts
|
||||
.filter((p) => p.type === "text" && p.text)
|
||||
.map((p) => p.text ?? "")
|
||||
.join("\n")
|
||||
|
||||
if (
|
||||
ctx.prompt.includes(USER_PROMPT_SUBMIT_TAG_OPEN) &&
|
||||
ctx.prompt.includes(USER_PROMPT_SUBMIT_TAG_CLOSE)
|
||||
userInputText.includes(USER_PROMPT_SUBMIT_TAG_OPEN) &&
|
||||
userInputText.includes(USER_PROMPT_SUBMIT_TAG_CLOSE)
|
||||
) {
|
||||
return { block: false, modifiedParts, messages }
|
||||
}
|
||||
@@ -70,9 +77,10 @@ export async function executeUserPromptSubmitHooks(
|
||||
hook_source: "opencode-plugin",
|
||||
}
|
||||
|
||||
for (const matcher of matchers) {
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
for (const matcher of matchers) {
|
||||
if (!matcher.hooks || matcher.hooks.length === 0) continue
|
||||
for (const hook of matcher.hooks) {
|
||||
if (hook.type !== "command") continue
|
||||
|
||||
if (isHookCommandDisabled("UserPromptSubmit", hook.command, extendedConfig ?? null)) {
|
||||
log("UserPromptSubmit hook command skipped (disabled by config)", { command: hook.command })
|
||||
|
||||
@@ -56,4 +56,17 @@ describe("createCompactionContextInjector", () => {
|
||||
expect(prompt).toContain("Files already verified")
|
||||
})
|
||||
})
|
||||
|
||||
it("restricts constraints to explicit verbatim statements", async () => {
|
||||
//#given
|
||||
const injector = createCompactionContextInjector()
|
||||
|
||||
//#when
|
||||
const prompt = injector()
|
||||
|
||||
//#then
|
||||
expect(prompt).toContain("Explicit Constraints (Verbatim Only)")
|
||||
expect(prompt).toContain("Do NOT invent")
|
||||
expect(prompt).toContain("Quote constraints verbatim")
|
||||
})
|
||||
})
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user