Merge pull request #1543 from code-yeongyu/feat/task-tool-refactor

refactor: migrate delegate_task to task tool with metadata fixes
This commit is contained in:
YeonGyu-Kim
2026-02-06 21:37:46 +09:00
committed by GitHub
78 changed files with 1182 additions and 403 deletions

View File

@@ -41,27 +41,27 @@ Fire ALL simultaneously:
```
// Agent 1: Find all exported symbols
delegate_task(subagent_type="explore", run_in_background=true,
task(subagent_type="explore", run_in_background=true,
prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
List each with: file path, line number, symbol name, export type (named/default).
EXCLUDE: src/index.ts root exports, test files.
Return as structured list.")
// Agent 2: Find potentially unused files
delegate_task(subagent_type="explore", run_in_background=true,
task(subagent_type="explore", run_in_background=true,
prompt="Find files in src/ that are NOT imported by any other file.
Check import/require statements across the entire codebase.
EXCLUDE: index.ts files, test files, entry points, config files, .md files.
Return list of potentially orphaned files.")
// Agent 3: Find unused imports within files
delegate_task(subagent_type="explore", run_in_background=true,
task(subagent_type="explore", run_in_background=true,
prompt="Find unused imports across src/**/*.ts files.
Look for import statements where the imported symbol is never referenced in the file body.
Return: file path, line number, imported symbol name.")
// Agent 4: Find functions/variables only used in their own declaration
delegate_task(subagent_type="explore", run_in_background=true,
task(subagent_type="explore", run_in_background=true,
prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
```

View File

@@ -21,7 +21,7 @@ You are a GitHub issue triage automation agent. Your job is to:
| Aspect | Rule |
|--------|------|
| **Task Granularity** | 1 Issue = Exactly 1 `delegate_task()` call |
| **Task Granularity** | 1 Issue = Exactly 1 `task()` call |
| **Execution Mode** | `run_in_background=true` (Each issue runs independently) |
| **Result Handling** | `background_output()` to collect results as they complete |
| **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -67,7 +67,7 @@ for (let i = 0; i < allIssues.length; i++) {
const issue = allIssues[i]
const category = getCategory(i)
const taskId = await delegate_task(
const taskId = await task(
category=category,
load_skills=[],
run_in_background=true, // ← CRITICAL: Each issue is independent background task
@@ -195,7 +195,7 @@ for (let i = 0; i < allIssues.length; i++) {
console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`)
const taskId = await delegate_task(
const taskId = await task(
category=category,
load_skills=[],
run_in_background=true, // ← BACKGROUND TASK: Each issue runs independently
@@ -480,7 +480,7 @@ When invoked, immediately:
4. Exhaustive pagination for issues
5. Exhaustive pagination for PRs
6. **LAUNCH**: For each issue:
- `delegate_task(run_in_background=true)` - 1 task per issue
- `task(run_in_background=true)` - 1 task per issue
- Store taskId mapped to issue number
7. **STREAM**: Poll `background_output()` for each task:
- As each completes, immediately report result

View File

@@ -22,7 +22,7 @@ You are a GitHub Pull Request triage automation agent. Your job is to:
| Aspect | Rule |
|--------|------|
| **Task Granularity** | 1 PR = Exactly 1 `delegate_task()` call |
| **Task Granularity** | 1 PR = Exactly 1 `task()` call |
| **Execution Mode** | `run_in_background=true` (Each PR runs independently) |
| **Result Handling** | `background_output()` to collect results as they complete |
| **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -68,7 +68,7 @@ for (let i = 0; i < allPRs.length; i++) {
const pr = allPRs[i]
const category = getCategory(i)
const taskId = await delegate_task(
const taskId = await task(
category=category,
load_skills=[],
run_in_background=true, // ← CRITICAL: Each PR is independent background task
@@ -178,7 +178,7 @@ for (let i = 0; i < allPRs.length; i++) {
console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`)
const taskId = await delegate_task(
const taskId = await task(
category=category,
load_skills=[],
run_in_background=true, // ← BACKGROUND TASK: Each PR runs independently
@@ -474,7 +474,7 @@ When invoked, immediately:
2. `gh repo view --json nameWithOwner -q .nameWithOwner`
3. Exhaustive pagination for ALL open PRs
4. **LAUNCH**: For each PR:
- `delegate_task(run_in_background=true)` - 1 task per PR
- `task(run_in_background=true)` - 1 task per PR
- Store taskId mapped to PR number
5. **STREAM**: Poll `background_output()` for each task:
- As each completes, immediately report result

View File

@@ -195,7 +195,7 @@ oh-my-opencode/
| Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
| Error Handling | Empty catch blocks |
| Testing | Deleting failing tests, writing implementation before test |
| Agent Calls | Sequential - use `delegate_task` parallel |
| Agent Calls | Sequential - use `task` parallel |
| Hook Logic | Heavy PreToolUse - slows every call |
| Commits | Giant (3+ files), separate test from impl |
| Temperature | >0.3 for code agents |

View File

@@ -9,7 +9,7 @@ Instead of delegating everything to a single AI agent, it's far more efficient t
- **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset)
- **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows)
By combining these two concepts, you can generate optimal agents through `delegate_task`.
By combining these two concepts, you can generate optimal agents through `task`.
---
@@ -32,10 +32,10 @@ A Category is an agent configuration preset optimized for specific domains.
### Usage
Specify the `category` parameter when invoking the `delegate_task` tool.
Specify the `category` parameter when invoking the `task` tool.
```typescript
delegate_task(
task(
category="visual-engineering",
prompt="Add a responsive chart component to the dashboard page"
)
@@ -74,7 +74,7 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
Add desired skill names to the `load_skills` array.
```typescript
delegate_task(
task(
category="quick",
load_skills=["git-master"],
prompt="Commit current changes. Follow commit message style."
@@ -126,7 +126,7 @@ You can create powerful specialized agents by combining Categories and Skills.
---
## 5. delegate_task Prompt Guide
## 5. task Prompt Guide
When delegating, **clear and specific** prompts are essential. Include these 7 elements:
@@ -158,7 +158,7 @@ You can fine-tune categories in `oh-my-opencode.json`.
| Field | Type | Description |
|-------|------|-------------|
| `description` | string | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
| `description` | string | Human-readable description of the category's purpose. Shown in task prompt. |
| `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) |
| `variant` | string | Model variant (e.g., `max`, `xhigh`) |
| `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. |

View File

@@ -25,7 +25,7 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
"explore": { "model": "opencode/gpt-5-nano" } // Free model for grep
},
// Override category models (used by delegate_task)
// Override category models (used by task)
"categories": {
"quick": { "model": "opencode/gpt-5-nano" }, // Fast/cheap for trivial tasks
"visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
@@ -252,7 +252,7 @@ Available agents: `sisyphus`, `prometheus`, `oracle`, `librarian`, `explore`, `m
Oh My OpenCode includes built-in skills that provide additional capabilities:
- **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `task(category='quick', load_skills=['git-master'], ...)` to save context.
Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:
@@ -455,7 +455,7 @@ Run background subagents in separate tmux panes for **visual multi-agent executi
### How It Works
When `tmux.enabled` is `true` and you're inside a tmux session:
- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes
- Background agents (via `task(run_in_background=true)`) spawn in new tmux panes
- Each pane shows the subagent's real-time output
- Panes are automatically closed when the subagent completes
- Layout is automatically adjusted based on your configuration
@@ -716,7 +716,7 @@ Configure concurrency limits for background agent tasks. This controls how many
## Categories
Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
### Built-in Categories
@@ -797,12 +797,12 @@ All 7 categories come with optimal model defaults, but **you must configure them
### Usage
```javascript
// Via delegate_task tool
delegate_task(category="visual-engineering", prompt="Create a responsive dashboard component")
delegate_task(category="ultrabrain", prompt="Design the payment processing flow")
// Via task tool
task(category="visual-engineering", prompt="Create a responsive dashboard component")
task(category="ultrabrain", prompt="Design the payment processing flow")
// Or target a specific agent directly (bypasses categories)
delegate_task(agent="oracle", prompt="Review this architecture")
task(agent="oracle", prompt="Review this architecture")
```
### Custom Categories
@@ -831,7 +831,7 @@ Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`
| Option | Type | Default | Description |
| ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
| `description` | string | - | Human-readable description of the category's purpose. Shown in delegate_task prompt. |
| `description` | string | - | Human-readable description of the category's purpose. Shown in task prompt. |
| `is_unstable_agent`| boolean | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |
## Model Resolution System

View File

@@ -54,7 +54,7 @@ Run agents in the background and continue working:
```
# Launch in background
delegate_task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
# Continue working...
# System notifies on completion
@@ -374,7 +374,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
| Hook | Event | Description |
|------|-------|-------------|
| **task-resume-info** | PostToolUse | Provides task resume information for continuity. |
| **delegate-task-retry** | PostToolUse | Retries failed delegate_task calls. |
| **delegate-task-retry** | PostToolUse | Retries failed task calls. |
#### Integration
@@ -454,7 +454,7 @@ Disable specific hooks in config:
| Tool | Description |
|------|-------------|
| **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. |
| **delegate_task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
| **task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
| **background_output** | Retrieve background task results |
| **background_cancel** | Cancel running background tasks |

View File

@@ -50,11 +50,11 @@ flowchart TB
User -->|"/start-work"| Orchestrator
Plan -->|"Read"| Orchestrator
Orchestrator -->|"delegate_task(category)"| Junior
Orchestrator -->|"delegate_task(agent)"| Oracle
Orchestrator -->|"delegate_task(agent)"| Explore
Orchestrator -->|"delegate_task(agent)"| Librarian
Orchestrator -->|"delegate_task(agent)"| Frontend
Orchestrator -->|"task(category)"| Junior
Orchestrator -->|"task(agent)"| Oracle
Orchestrator -->|"task(agent)"| Explore
Orchestrator -->|"task(agent)"| Librarian
Orchestrator -->|"task(agent)"| Frontend
Junior -->|"Results + Learnings"| Orchestrator
Oracle -->|"Advice"| Orchestrator
@@ -220,9 +220,9 @@ Independent tasks run in parallel:
```typescript
// Orchestrator identifies parallelizable groups from plan
// Group A: Tasks 2, 3, 4 (no file conflicts)
delegate_task(category="ultrabrain", prompt="Task 2...")
delegate_task(category="visual-engineering", prompt="Task 3...")
delegate_task(category="general", prompt="Task 4...")
task(category="ultrabrain", prompt="Task 2...")
task(category="visual-engineering", prompt="Task 3...")
task(category="general", prompt="Task 4...")
// All run simultaneously
```
@@ -234,7 +234,7 @@ delegate_task(category="general", prompt="Task 4...")
Junior is the **workhorse** that actually writes code. Key characteristics:
- **Focused**: Cannot delegate (blocked from task/delegate_task tools)
- **Focused**: Cannot delegate (blocked from task tool)
- **Disciplined**: Obsessive todo tracking
- **Verified**: Must pass lsp_diagnostics before completion
- **Constrained**: Cannot modify plan files (READ-ONLY)
@@ -268,7 +268,7 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.
---
## The delegate_task Tool: Category + Skill System
## The task Tool: Category + Skill System
### Why Categories are Revolutionary
@@ -276,17 +276,17 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.
```typescript
// OLD: Model name creates distributional bias
delegate_task(agent="gpt-5.2", prompt="...") // Model knows its limitations
delegate_task(agent="claude-opus-4.6", prompt="...") // Different self-perception
task(agent="gpt-5.2", prompt="...") // Model knows its limitations
task(agent="claude-opus-4.6", prompt="...") // Different self-perception
```
**The Solution: Semantic Categories:**
```typescript
// NEW: Category describes INTENT, not implementation
delegate_task(category="ultrabrain", prompt="...") // "Think strategically"
delegate_task(category="visual-engineering", prompt="...") // "Design beautifully"
delegate_task(category="quick", prompt="...") // "Just get it done fast"
task(category="ultrabrain", prompt="...") // "Think strategically"
task(category="visual-engineering", prompt="...") // "Design beautifully"
task(category="quick", prompt="...") // "Just get it done fast"
```
### Built-in Categories
@@ -324,13 +324,13 @@ Skills prepend specialized instructions to subagent prompts:
```typescript
// Category + Skill combination
delegate_task(
task(
category="visual-engineering",
load_skills=["frontend-ui-ux"], // Adds UI/UX expertise
prompt="..."
)
delegate_task(
task(
category="general",
load_skills=["playwright"], // Adds browser automation expertise
prompt="..."
@@ -365,7 +365,7 @@ sequenceDiagram
Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
Orchestrator->>Junior: delegate_task(category, load_skills, prompt)
Orchestrator->>Junior: task(category, load_skills, prompt)
Junior->>Junior: Create todos, execute
Junior->>Junior: Verify (lsp_diagnostics, tests)

View File

@@ -387,7 +387,7 @@ You can control related features in `oh-my-opencode.json`.
2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation.
3. **Active Delegation**: During execution, delegate to specialized agents via `delegate_task` rather than modifying code directly.
3. **Active Delegation**: During execution, delegate to specialized agents via `task` rather than modifying code directly.
4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json.

View File

@@ -288,7 +288,7 @@ src/tools/delegate-task/constants.ts
```
Sisyphus (ULW mode)
delegate_task(category="deep", ...)
task(category="deep", ...)
executor.ts: executeBackgroundContinuation()

View File

@@ -212,7 +212,7 @@ Search **external references** (docs, OSS, web). Fire proactively when unfamilia
- "Working with unfamiliar npm/pip/cargo packages"
### Pre-Delegation Planning (MANDATORY)
**BEFORE every `delegate_task` call, EXPLICITLY declare your reasoning.**
**BEFORE every `task` call, EXPLICITLY declare your reasoning.**
#### Step 1: Identify Task Requirements
@@ -236,7 +236,7 @@ Ask yourself:
**MANDATORY FORMAT:**
```
I will use delegate_task with:
I will use task with:
- **Category**: [selected-category-name]
- **Why this category**: [how category description matches task domain]
- **load_skills**: [list of selected skills]
@@ -246,14 +246,14 @@ I will use delegate_task with:
- **Expected Outcome**: [what success looks like]
```
**Then** make the delegate_task call.
**Then** make the task call.
#### Examples
**CORRECT: Full Evaluation**
```
I will use delegate_task with:
I will use task with:
- **Category**: [category-name]
- **Why this category**: Category description says "[quote description]" which matches this task's requirements
- **load_skills**: ["skill-a", "skill-b"]
@@ -263,9 +263,11 @@ I will use delegate_task with:
- skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason]
- **Expected Outcome**: [concrete deliverable]
delegate_task(
task(
category="[category-name]",
load_skills=["skill-a", "skill-b"],
description="[short task description]",
run_in_background=false,
prompt="..."
)
```
@@ -273,14 +275,16 @@ delegate_task(
**CORRECT: Agent-Specific (for exploration/consultation)**
```
I will use delegate_task with:
I will use task with:
- **Agent**: [agent-name]
- **Reason**: This requires [agent's specialty] based on agent description
- **load_skills**: [] (agents have built-in expertise)
- **Expected Outcome**: [what agent should return]
delegate_task(
task(
subagent_type="[agent-name]",
description="[short task description]",
run_in_background=false,
load_skills=[],
prompt="..."
)
@@ -289,14 +293,15 @@ delegate_task(
**CORRECT: Background Exploration**
```
I will use delegate_task with:
I will use task with:
- **Agent**: explore
- **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
- **load_skills**: []
- **Expected Outcome**: List of files containing auth patterns
delegate_task(
task(
subagent_type="explore",
description="Find auth implementations",
run_in_background=true,
load_skills=[],
prompt="Find all authentication implementations in the codebase"
@@ -306,7 +311,7 @@ delegate_task(
**WRONG: No Skill Evaluation**
```
delegate_task(category="...", load_skills=[], prompt="...") // Where's the justification?
task(category="...", load_skills=[], prompt="...") // Where's the justification?
```
**WRONG: Vague Category Selection**
@@ -317,7 +322,7 @@ I'll use this category because it seems right.
#### Enforcement
**BLOCKING VIOLATION**: If you call `delegate_task` without:
**BLOCKING VIOLATION**: If you call `task` without:
1. Explaining WHY category was selected (based on description)
2. Evaluating EACH available skill for relevance
@@ -329,15 +334,15 @@ I'll use this category because it seems right.
```typescript
// CORRECT: Always background, always parallel
// Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
task(subagent_type="explore", description="Find auth implementations", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
task(subagent_type="explore", description="Find error handling patterns", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
// Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
task(subagent_type="librarian", description="Find JWT best practices", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
task(subagent_type="librarian", description="Find Express auth patterns", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
// Continue working immediately. Collect with background_output when needed.
// WRONG: Sequential or blocking
result = delegate_task(...) // Never wait synchronously for explore/librarian
result = task(...) // Never wait synchronously for explore/librarian
```
### Background Result Collection:
@@ -347,16 +352,16 @@ result = delegate_task(...) // Never wait synchronously for explore/librarian
4. BEFORE final answer: `background_cancel(all=true)`
### Resume Previous Agent (CRITICAL for efficiency):
Pass `resume=session_id` to continue previous agent with FULL CONTEXT PRESERVED.
Pass `session_id` to continue previous agent with FULL CONTEXT PRESERVED.
**ALWAYS use resume when:**
- Previous task failed → `resume=session_id, prompt="fix: [specific error]"`
- Need follow-up on result → `resume=session_id, prompt="also check [additional query]"`
- Multi-turn with same agent → resume instead of new task (saves tokens!)
**ALWAYS use session_id when:**
- Previous task failed → `session_id="ses_xxx", prompt="fix: [specific error]"`
- Need follow-up on result → `session_id="ses_xxx", prompt="also check [additional query]"`
- Multi-turn with same agent → session_id instead of new task (saves tokens!)
**Example:**
```
delegate_task(resume="ses_abc123", prompt="The previous search missed X. Also look for Y.")
task(session_id="ses_abc123", description="Follow-up search", run_in_background=false, load_skills=[], prompt="The previous search missed X. Also look for Y.")
```
### Search Stop Conditions
@@ -377,7 +382,7 @@ STOP searching when:
3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS
### Category + Skills Delegation System
**delegate_task() combines categories and skills for optimal task execution.**
**task() combines categories and skills for optimal task execution.**
#### Available Categories (Domain-Optimized Models)
@@ -442,7 +447,7 @@ SKILL EVALUATION for "[skill-name]":
### Delegation Pattern
```typescript
delegate_task(
task(
category="[selected-category]",
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills
prompt="..."
@@ -451,7 +456,7 @@ delegate_task(
**ANTI-PATTERN (will produce poor results):**
```typescript
delegate_task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification
task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification
```
### Delegation Table:

View File

@@ -68,11 +68,11 @@ agents/
## TOOL RESTRICTIONS
| Agent | Denied Tools |
|-------|-------------|
| oracle | write, edit, task, delegate_task |
| librarian | write, edit, task, delegate_task, call_omo_agent |
| explore | write, edit, task, delegate_task, call_omo_agent |
| oracle | write, edit, task, task |
| librarian | write, edit, task, task, call_omo_agent |
| explore | write, edit, task, task, call_omo_agent |
| multimodal-looker | Allowlist: read only |
| Sisyphus-Junior | task, delegate_task |
| Sisyphus-Junior | task, task |
| Atlas | task, call_omo_agent |
## PATTERNS
@@ -85,5 +85,5 @@ agents/
## ANTI-PATTERNS
- **Trust reports**: NEVER trust "I'm done" - verify outputs
- **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration
- **Sequential calls**: Use `task` with `run_in_background` for exploration
- **Prometheus writing code**: Planner only - never implements

View File

@@ -19,18 +19,18 @@ You never write code yourself. You orchestrate specialists who do.
</identity>
<mission>
Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
Complete ALL tasks in a work plan via \`task()\` until fully done.
One task per delegation. Parallel when independent. Verify everything.
</mission>
<delegation_system>
## How to Delegate
Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
Use \`task()\` with EITHER category OR agent (mutually exclusive):
\`\`\`typescript
// Option A: Category + Skills (spawns Sisyphus-Junior with domain config)
delegate_task(
task(
category="[category-name]",
load_skills=["skill-1", "skill-2"],
run_in_background=false,
@@ -38,7 +38,7 @@ delegate_task(
)
// Option B: Specialized Agent (for specific expert tasks)
delegate_task(
task(
subagent_type="[agent-name]",
load_skills=[],
run_in_background=false,
@@ -58,7 +58,7 @@ delegate_task(
## 6-Section Prompt Structure (MANDATORY)
Every \`delegate_task()\` prompt MUST include ALL 6 sections:
Every \`task()\` prompt MUST include ALL 6 sections:
\`\`\`markdown
## 1. TASK
@@ -149,7 +149,7 @@ Structure:
### 3.1 Check Parallelization
If tasks can run in parallel:
- Prepare prompts for ALL parallelizable tasks
- Invoke multiple \`delegate_task()\` in ONE message
- Invoke multiple \`task()\` in ONE message
- Wait for all to complete
- Verify all, then continue
@@ -167,10 +167,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
Extract wisdom and include in prompt.
### 3.3 Invoke delegate_task()
### 3.3 Invoke task()
\`\`\`typescript
delegate_task(
task(
category="[category]",
load_skills=["[relevant-skills]"],
run_in_background=false,
@@ -210,7 +210,7 @@ delegate_task(
**If verification fails**: Resume the SAME session with the ACTUAL error output:
\`\`\`typescript
delegate_task(
task(
session_id="ses_xyz789", // ALWAYS use the session from the failed task
load_skills=[...],
prompt="Verification failed: {actual error}. Fix."
@@ -221,13 +221,13 @@ delegate_task(
**CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**
Every \`delegate_task()\` output includes a session_id. STORE IT.
Every \`task()\` output includes a session_id. STORE IT.
If task fails:
1. Identify what went wrong
2. **Resume the SAME session** - subagent has full context already:
\`\`\`typescript
delegate_task(
task(
session_id="ses_xyz789", // Session from failed task
load_skills=[...],
prompt="FAILED: {error}. Fix by: {specific instruction}"
@@ -274,21 +274,21 @@ ACCUMULATED WISDOM:
**For exploration (explore/librarian)**: ALWAYS background
\`\`\`typescript
delegate_task(subagent_type="explore", run_in_background=true, ...)
delegate_task(subagent_type="librarian", run_in_background=true, ...)
task(subagent_type="explore", run_in_background=true, ...)
task(subagent_type="librarian", run_in_background=true, ...)
\`\`\`
**For task execution**: NEVER background
\`\`\`typescript
delegate_task(category="...", run_in_background=false, ...)
task(category="...", run_in_background=false, ...)
\`\`\`
**Parallel task groups**: Invoke multiple in ONE message
\`\`\`typescript
// Tasks 2, 3, 4 are independent - invoke together
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
\`\`\`
**Background management**:

View File

@@ -24,7 +24,7 @@ You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
</identity>
<mission>
Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
Complete ALL tasks in a work plan via \`task()\` until fully done.
- One task per delegation
- Parallel when independent
- Verify everything
@@ -71,14 +71,14 @@ Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
<delegation_system>
## Delegation API
Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive):
Use \`task()\` with EITHER category OR agent (mutually exclusive):
\`\`\`typescript
// Category + Skills (spawns Sisyphus-Junior)
delegate_task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
// Specialized Agent
delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
\`\`\`
{CATEGORY_SECTION}
@@ -93,7 +93,7 @@ delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false,
## 6-Section Prompt Structure (MANDATORY)
Every \`delegate_task()\` prompt MUST include ALL 6 sections:
Every \`task()\` prompt MUST include ALL 6 sections:
\`\`\`markdown
## 1. TASK
@@ -166,7 +166,7 @@ Structure: learnings.md, decisions.md, issues.md, problems.md
## Step 3: Execute Tasks
### 3.1 Parallelization Check
- Parallel tasks → invoke multiple \`delegate_task()\` in ONE message
- Parallel tasks → invoke multiple \`task()\` in ONE message
- Sequential → process one at a time
### 3.2 Pre-Delegation (MANDATORY)
@@ -176,10 +176,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
\`\`\`
Extract wisdom → include in prompt.
### 3.3 Invoke delegate_task()
### 3.3 Invoke task()
\`\`\`typescript
delegate_task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
\`\`\`
### 3.4 Verify (PROJECT-LEVEL QA)
@@ -201,7 +201,7 @@ Checklist:
**CRITICAL: Use \`session_id\` for retries.**
\`\`\`typescript
delegate_task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
\`\`\`
- Maximum 3 retries per task
@@ -231,18 +231,18 @@ ACCUMULATED WISDOM: [from notepad]
<parallel_execution>
**Exploration (explore/librarian)**: ALWAYS background
\`\`\`typescript
delegate_task(subagent_type="explore", run_in_background=true, ...)
task(subagent_type="explore", run_in_background=true, ...)
\`\`\`
**Task execution**: NEVER background
\`\`\`typescript
delegate_task(category="...", run_in_background=false, ...)
task(category="...", run_in_background=false, ...)
\`\`\`
**Parallel task groups**: Invoke multiple in ONE message
\`\`\`typescript
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
\`\`\`
**Background management**:

View File

@@ -1,7 +1,7 @@
/**
* Atlas - Master Orchestrator Agent
*
* Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done.
* Orchestrates work via task() to complete ALL tasks in a todo list until fully done.
* You are the conductor of a symphony of specialized agents.
*
* Routing:
@@ -111,7 +111,7 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
const baseConfig = {
description:
"Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
"Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
mode: MODE,
...(ctx.model ? { model: ctx.model } : {}),
temperature: 0.1,

View File

@@ -47,7 +47,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
${categoryRows.join("\n")}
\`\`\`typescript
delegate_task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
\`\`\``
}
@@ -105,7 +105,7 @@ Read each skill's description and ask: "Does this skill's domain overlap with my
**Usage:**
\`\`\`typescript
delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
\`\`\`
**IMPORTANT:**

View File

@@ -242,7 +242,7 @@ ${builtinRows.join("\n")}`
return `### Category + Skills Delegation System
**delegate_task() combines categories and skills for optimal task execution.**
**task() combines categories and skills for optimal task execution.**
#### Available Categories (Domain-Optimized Models)
@@ -296,7 +296,7 @@ SKILL EVALUATION for "[skill-name]":
### Delegation Pattern
\`\`\`typescript
delegate_task(
task(
category="[selected-category]",
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills — ESPECIALLY user-installed ones
prompt="..."
@@ -305,7 +305,7 @@ delegate_task(
**ANTI-PATTERN (will produce poor results):**
\`\`\`typescript
delegate_task(category="...", load_skills=[], run_in_background=false, prompt="...") // Empty load_skills without justification
task(category="...", load_skills=[], run_in_background=false, prompt="...") // Empty load_skills without justification
\`\`\``
}

View File

@@ -29,7 +29,7 @@ export function createExploreAgent(model: string): AgentConfig {
"write",
"edit",
"task",
"delegate_task",
"task",
"call_omo_agent",
])

View File

@@ -227,8 +227,8 @@ Agent: *runs gh pr list, gh pr view, searches recent commits*
**Delegation Check (MANDATORY before acting directly):**
1. Is there a specialized agent that perfectly matches this request?
2. If not, is there a \`delegate_task\` category that best describes this task? What skills are available to equip the agent with?
- MUST FIND skills to use: \`delegate_task(load_skills=[{skill1}, ...])\`
2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
- MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
3. Can I do it myself for the best result, FOR SURE?
**Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**
@@ -280,15 +280,15 @@ ${librarianSection}
// CORRECT: Always background, always parallel
// Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
// Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
// Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
// Continue immediately - collect results when needed
// WRONG: Sequential or blocking - NEVER DO THIS
result = delegate_task(..., run_in_background=false) // Never wait synchronously for explore/librarian
result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
\`\`\`
**Rules:**
@@ -393,7 +393,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
### Session Continuity (MANDATORY)
Every \`delegate_task()\` output includes a session_id. **USE IT.**
Every \`task()\` output includes a session_id. **USE IT.**
**ALWAYS continue when:**
| Scenario | Action |

View File

@@ -26,7 +26,7 @@ export function createLibrarianAgent(model: string): AgentConfig {
"write",
"edit",
"task",
"delegate_task",
"task",
"call_omo_agent",
])

View File

@@ -307,7 +307,6 @@ const metisRestrictions = createAgentToolRestrictions([
"write",
"edit",
"task",
"delegate_task",
])
export function createMetisAgent(model: string): AgentConfig {

View File

@@ -193,7 +193,7 @@ export function createMomusAgent(model: string): AgentConfig {
"write",
"edit",
"task",
"delegate_task",
"task",
])
const base = {

View File

@@ -147,7 +147,7 @@ export function createOracleAgent(model: string): AgentConfig {
"write",
"edit",
"task",
"delegate_task",
"task",
])
const base = {

View File

@@ -15,7 +15,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
\`\`\`typescript
// After generating initial plan
while (true) {
const result = delegate_task(
const result = task(
subagent_type="momus",
prompt=".sisyphus/plans/{name}.md",
run_in_background=false

View File

@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
**Research First:**
\`\`\`typescript
// Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
delegate_task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
delegate_task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
\`\`\`
**Interview Focus:**
@@ -91,9 +91,9 @@ delegate_task(subagent_type="explore", prompt="I'm about to modify [affected cod
\`\`\`typescript
// Launch BEFORE asking user questions
// Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
delegate_task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
delegate_task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
\`\`\`
**Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js
Run this check:
\`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
\`\`\`
#### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:
**Research First:**
\`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
\`\`\`
**Oracle Consultation** (recommend when stakes are high):
\`\`\`typescript
delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
\`\`\`
**Interview Focus:**
@@ -253,9 +253,9 @@ delegate_task(subagent_type="oracle", prompt="Architecture consultation needed:
**Parallel Investigation:**
\`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
\`\`\`
**Interview Focus:**
@@ -281,17 +281,17 @@ delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested i
**For Understanding Codebase:**
\`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
\`\`\`
**For External Knowledge:**
\`\`\`typescript
delegate_task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
\`\`\`
**For Implementation Examples:**
\`\`\`typescript
delegate_task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
\`\`\`
## Interview Mode Anti-Patterns

View File

@@ -59,7 +59,7 @@ todoWrite([
**BEFORE generating the plan**, summon Metis to catch what you might have missed:
\`\`\`typescript
delegate_task(
task(
subagent_type="metis",
prompt=\`Review this planning session before I generate the work plan:

View File

@@ -214,7 +214,7 @@ Parallel Speedup: ~40% faster than sequential
| Wave | Tasks | Recommended Agents |
|------|-------|-------------------|
| 1 | 1, 5 | delegate_task(category="...", load_skills=[...], run_in_background=false) |
| 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
| 3 | 4 | final integration task |

View File

@@ -24,7 +24,6 @@ Execute tasks directly. NEVER delegate or spawn other agents.
<Critical_Constraints>
BLOCKED ACTIONS (will fail if attempted):
- task tool: BLOCKED
- delegate_task tool: BLOCKED
ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
You work ALONE for implementation. No delegation of implementation tasks.

View File

@@ -50,7 +50,6 @@ BLOCKED (will fail if attempted):
| Tool | Status |
|------|--------|
| task | BLOCKED |
| delegate_task | BLOCKED |
ALLOWED:
| Tool | Usage |

View File

@@ -143,13 +143,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
})
})
describe("tool safety (task/delegate_task blocked, call_omo_agent allowed)", () => {
test("task and delegate_task remain blocked, call_omo_agent is allowed via tools format", () => {
describe("tool safety (task blocked, call_omo_agent allowed)", () => {
test("task remains blocked, call_omo_agent is allowed via tools format", () => {
// given
const override = {
tools: {
task: true,
delegate_task: true,
call_omo_agent: true,
read: true,
},
@@ -163,25 +162,22 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
const permission = result.permission as Record<string, string> | undefined
if (tools) {
expect(tools.task).toBe(false)
expect(tools.delegate_task).toBe(false)
// call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
expect(tools.call_omo_agent).toBe(true)
expect(tools.read).toBe(true)
}
if (permission) {
expect(permission.task).toBe("deny")
expect(permission.delegate_task).toBe("deny")
// call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
expect(permission.call_omo_agent).toBe("allow")
}
})
test("task and delegate_task remain blocked when using permission format override", () => {
test("task remains blocked when using permission format override", () => {
// given
const override = {
permission: {
task: "allow",
delegate_task: "allow",
call_omo_agent: "allow",
read: "allow",
},
@@ -190,17 +186,15 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
// when
const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0])
// then - task/delegate_task blocked, but call_omo_agent allowed for explore/librarian spawning
// then - task blocked, but call_omo_agent allowed for explore/librarian spawning
const tools = result.tools as Record<string, boolean> | undefined
const permission = result.permission as Record<string, string> | undefined
if (tools) {
expect(tools.task).toBe(false)
expect(tools.delegate_task).toBe(false)
expect(tools.call_omo_agent).toBe(true)
}
if (permission) {
expect(permission.task).toBe("deny")
expect(permission.delegate_task).toBe("deny")
expect(permission.call_omo_agent).toBe("allow")
}
})

View File

@@ -28,7 +28,7 @@ const MODE: AgentMode = "subagent"
// Core tools that Sisyphus-Junior must NEVER have access to
// Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
const BLOCKED_TOOLS = ["task", "delegate_task"]
const BLOCKED_TOOLS = ["task"]
export const SISYPHUS_JUNIOR_DEFAULTS = {
model: "anthropic/claude-sonnet-4-5",

View File

@@ -214,8 +214,8 @@ ${keyTriggers}
**Delegation Check (MANDATORY before acting directly):**
1. Is there a specialized agent that perfectly matches this request?
2. If not, is there a \`delegate_task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
- MUST FIND skills to use, for: \`delegate_task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS DELEGATE TASK PARAMETER.
2. If not, is there a \`task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
- MUST FIND skills to use, for: \`task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS TASK PARAMETER.
3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH?
**Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.**
@@ -277,15 +277,15 @@ ${librarianSection}
// CORRECT: Always background, always parallel
// Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
// Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
// Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
// Continue working immediately. Collect with background_output when needed.
// WRONG: Sequential or blocking
result = delegate_task(..., run_in_background=false) // Never wait synchronously for explore/librarian
result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
\`\`\`
### Background Result Collection:
@@ -340,7 +340,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
### Session Continuity (MANDATORY)
Every \`delegate_task()\` output includes a session_id. **USE IT.**
Every \`task()\` output includes a session_id. **USE IT.**
**ALWAYS continue when:**
| Scenario | Action |
@@ -358,10 +358,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**
\`\`\`typescript
// WRONG: Starting fresh loses all context
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Fix the type error in auth.ts...")
task(category="quick", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix the type error in auth.ts...")
// CORRECT: Resume preserves everything
delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42")
task(session_id="ses_abc123", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix: Type error on line 42")
\`\`\`
**After EVERY delegation, STORE the session_id for potential continuation.**

View File

@@ -12,6 +12,7 @@ const AgentPermissionSchema = z.object({
edit: PermissionValue.optional(),
bash: BashPermission.optional(),
webfetch: PermissionValue.optional(),
task: PermissionValue.optional(),
doom_loop: PermissionValue.optional(),
external_directory: PermissionValue.optional(),
})
@@ -183,7 +184,7 @@ export const SisyphusAgentConfigSchema = z.object({
})
export const CategoryConfigSchema = z.object({
/** Human-readable description of the category's purpose. Shown in delegate_task prompt. */
/** Human-readable description of the category's purpose. Shown in task prompt. */
description: z.string().optional(),
model: z.string().optional(),
variant: z.string().optional(),

View File

@@ -56,7 +56,7 @@ features/
## ANTI-PATTERNS
- **Sequential delegation**: Use `delegate_task` parallel
- **Sequential delegation**: Use `task` parallel
- **Trust self-reports**: ALWAYS verify
- **Main thread blocks**: No heavy I/O in loader init
- **Direct state mutation**: Use managers for boulder/session state

View File

@@ -1,8 +1,9 @@
import { describe, test, expect, beforeEach } from "bun:test"
import { afterEach } from "bun:test"
declare const require: (name: string) => any
const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
import { tmpdir } from "node:os"
import type { PluginInput } from "@opencode-ai/plugin"
import type { BackgroundTask, ResumeInput } from "./types"
import { MIN_IDLE_TIME_MS } from "./constants"
import { BackgroundManager } from "./manager"
import { ConcurrencyManager } from "./concurrency"
@@ -1088,6 +1089,34 @@ describe("BackgroundManager.tryCompleteTask", () => {
// #then
expect(abortedSessionIDs).toEqual(["session-1"])
})
test("should clean pendingByParent even when notifyParentSession throws", async () => {
// given
;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {
throw new Error("notify failed")
}
const task: BackgroundTask = {
id: "task-pending-cleanup",
sessionID: "session-pending-cleanup",
parentSessionID: "parent-pending-cleanup",
parentMessageID: "msg-1",
description: "pending cleanup task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(),
}
getTaskMap(manager).set(task.id, task)
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
// when
await tryCompleteTaskForTest(manager, task)
// then
expect(task.status).toBe("completed")
expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
})
})
describe("BackgroundManager.trackTask", () => {
@@ -1110,7 +1139,7 @@ describe("BackgroundManager.trackTask", () => {
sessionID: "session-1",
parentSessionID: "parent-session",
description: "external task",
agent: "delegate_task",
agent: "task",
concurrencyKey: "external-key",
}
@@ -1145,7 +1174,7 @@ describe("BackgroundManager.resume concurrency key", () => {
sessionID: "session-1",
parentSessionID: "parent-session",
description: "external task",
agent: "delegate_task",
agent: "task",
concurrencyKey: "external-key",
})
@@ -2408,3 +2437,179 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
expect(completionTimers.size).toBe(0)
})
})
describe("BackgroundManager.handleEvent - early session.idle deferral", () => {
test("should defer and retry when session.idle fires before MIN_IDLE_TIME_MS", async () => {
//#given - a running task started less than MIN_IDLE_TIME_MS ago
const sessionID = "session-early-idle"
const messagesCalls: string[] = []
const realDateNow = Date.now
const baseNow = realDateNow()
const client = {
session: {
prompt: async () => ({}),
abort: async () => ({}),
messages: async (args: { path: { id: string } }) => {
messagesCalls.push(args.path.id)
return {
data: [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "ok" }],
},
],
}
},
todo: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
stubNotifyParentSession(manager)
const remainingMs = 1200
const task: BackgroundTask = {
id: "task-early-idle",
sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-1",
description: "early idle task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(baseNow),
}
getTaskMap(manager).set(task.id, task)
//#when - session.idle fires
try {
Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
// Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
//#then - idle should be deferred (not dropped), and task should eventually complete
expect(task.status).toBe("running")
await new Promise((resolve) => setTimeout(resolve, 220))
expect(task.status).toBe("completed")
expect(messagesCalls).toEqual([sessionID])
} finally {
Date.now = realDateNow
manager.shutdown()
}
})
test("should not defer when session.idle fires after MIN_IDLE_TIME_MS", async () => {
//#given - a running task started more than MIN_IDLE_TIME_MS ago
const sessionID = "session-late-idle"
const client = {
session: {
prompt: async () => ({}),
abort: async () => ({}),
messages: async () => ({
data: [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "ok" }],
},
],
}),
todo: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
stubNotifyParentSession(manager)
const task: BackgroundTask = {
id: "task-late-idle",
sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-1",
description: "late idle task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 10)),
}
getTaskMap(manager).set(task.id, task)
//#when
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
//#then - should be processed immediately
await new Promise((resolve) => setTimeout(resolve, 10))
expect(task.status).toBe("completed")
manager.shutdown()
})
test("should not process deferred idle if task already completed by other means", async () => {
//#given - a running task
const sessionID = "session-deferred-noop"
let messagesCallCount = 0
const realDateNow = Date.now
const baseNow = realDateNow()
const client = {
session: {
prompt: async () => ({}),
abort: async () => ({}),
messages: async () => {
messagesCallCount += 1
return {
data: [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "ok" }],
},
],
}
},
todo: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
stubNotifyParentSession(manager)
const remainingMs = 120
const task: BackgroundTask = {
id: "task-deferred-noop",
sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-1",
description: "deferred noop task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(baseNow),
}
getTaskMap(manager).set(task.id, task)
//#when - session.idle fires early, then task completes via another path before defer timer
try {
Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
expect(messagesCallCount).toBe(0)
await tryCompleteTaskForTest(manager, task)
expect(task.status).toBe("completed")
// Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
//#then - deferred callback should be a no-op
await new Promise((resolve) => setTimeout(resolve, remainingMs + 80))
expect(task.status).toBe("completed")
expect(messagesCallCount).toBe(0)
} finally {
Date.now = realDateNow
manager.shutdown()
}
})
})

View File

@@ -88,6 +88,7 @@ export class BackgroundManager {
private queuesByKey: Map<string, QueueItem[]> = new Map()
private processingKeys: Set<string> = new Set()
private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
constructor(
ctx: PluginInput,
@@ -328,7 +329,6 @@ export class BackgroundManager {
tools: {
...getAgentToolRestrictions(input.agent),
task: false,
delegate_task: false,
call_omo_agent: true,
question: false,
},
@@ -357,6 +357,7 @@ export class BackgroundManager {
}).catch(() => {})
this.markForNotification(existingTask)
this.cleanupPendingByParent(existingTask)
this.notifyParentSession(existingTask).catch(err => {
log("[background-agent] Failed to notify on error:", err)
})
@@ -410,7 +411,7 @@ export class BackgroundManager {
}
/**
* Track a task created elsewhere (e.g., from delegate_task) for notification tracking.
* Track a task created elsewhere (e.g., from task) for notification tracking.
* This allows tasks created by other tools to receive the same toast/prompt notifications.
*/
async trackTask(input: {
@@ -458,7 +459,7 @@ export class BackgroundManager {
return existingTask
}
const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "delegate_task"
const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "task"
// Acquire concurrency slot if a key is provided
if (input.concurrencyKey) {
@@ -472,7 +473,7 @@ export class BackgroundManager {
parentMessageID: "",
description: input.description,
prompt: "",
agent: input.agent || "delegate_task",
agent: input.agent || "task",
status: "running",
startedAt: new Date(),
progress: {
@@ -587,7 +588,6 @@ export class BackgroundManager {
tools: {
...getAgentToolRestrictions(existingTask.agent),
task: false,
delegate_task: false,
call_omo_agent: true,
question: false,
},
@@ -614,6 +614,7 @@ export class BackgroundManager {
}
this.markForNotification(existingTask)
this.cleanupPendingByParent(existingTask)
this.notifyParentSession(existingTask).catch(err => {
log("[background-agent] Failed to notify on resume error:", err)
})
@@ -651,6 +652,13 @@ export class BackgroundManager {
const task = this.findBySession(sessionID)
if (!task) return
// Clear any pending idle deferral timer since the task is still active
const existingTimer = this.idleDeferralTimers.get(task.id)
if (existingTimer) {
clearTimeout(existingTimer)
this.idleDeferralTimers.delete(task.id)
}
if (partInfo?.type === "tool" || partInfo?.tool) {
if (!task.progress) {
task.progress = {
@@ -677,7 +685,17 @@ export class BackgroundManager {
// Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
const elapsedMs = Date.now() - startedAt.getTime()
if (elapsedMs < MIN_IDLE_TIME_MS) {
log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id })
const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
if (!this.idleDeferralTimers.has(task.id)) {
log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
const timer = setTimeout(() => {
this.idleDeferralTimers.delete(task.id)
this.handleEvent({ type: "session.idle", properties: { sessionID } })
}, remainingMs)
this.idleDeferralTimers.set(task.id, timer)
} else {
log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
}
return
}
@@ -736,6 +754,12 @@ export class BackgroundManager {
clearTimeout(existingTimer)
this.completionTimers.delete(task.id)
}
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
this.cleanupPendingByParent(task)
this.tasks.delete(task.id)
this.clearNotificationsForTask(task.id)
@@ -890,6 +914,12 @@ export class BackgroundManager {
this.completionTimers.delete(task.id)
}
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
this.cleanupPendingByParent(task)
if (abortSession && task.sessionID) {
@@ -1025,6 +1055,15 @@ export class BackgroundManager {
this.markForNotification(task)
// Ensure pending tracking is cleaned up even if notification fails
this.cleanupPendingByParent(task)
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
if (task.sessionID) {
this.client.session.abort({
path: { id: task.sessionID },
@@ -1511,6 +1550,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
}
this.completionTimers.clear()
for (const timer of this.idleDeferralTimers.values()) {
clearTimeout(timer)
}
this.idleDeferralTimers.clear()
this.concurrencyManager.clear()
this.tasks.clear()
this.notifications.clear()

View File

@@ -146,7 +146,6 @@ export async function startTask(
tools: {
...getAgentToolRestrictions(input.agent),
task: false,
delegate_task: false,
call_omo_agent: true,
question: false,
},
@@ -231,7 +230,6 @@ export async function resumeTask(
tools: {
...getAgentToolRestrictions(task.agent),
task: false,
delegate_task: false,
call_omo_agent: true,
question: false,
},

View File

@@ -45,12 +45,12 @@ Don't wait—these run async while main session works.
\`\`\`
// Fire all at once, collect results later
delegate_task(agent="explore", prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
delegate_task(agent="explore", prompt="Entry points: FIND main files → REPORT non-standard organization")
delegate_task(agent="explore", prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
delegate_task(agent="explore", prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
delegate_task(agent="explore", prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
delegate_task(agent="explore", prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
task(subagent_type="explore", load_skills=[], description="Explore project structure", run_in_background=true, prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
task(subagent_type="explore", load_skills=[], description="Find entry points", run_in_background=true, prompt="Entry points: FIND main files → REPORT non-standard organization")
task(subagent_type="explore", load_skills=[], description="Find conventions", run_in_background=true, prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
task(subagent_type="explore", load_skills=[], description="Find anti-patterns", run_in_background=true, prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
task(subagent_type="explore", load_skills=[], description="Explore build/CI", run_in_background=true, prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
task(subagent_type="explore", load_skills=[], description="Find test patterns", run_in_background=true, prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
\`\`\`
<dynamic-agents>
@@ -76,9 +76,9 @@ max_depth=$(find . -type d -not -path '*/node_modules/*' -not -path '*/.git/*' |
Example spawning:
\`\`\`
// 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents
delegate_task(agent="explore", prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
delegate_task(agent="explore", prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
delegate_task(agent="explore", prompt="Cross-cutting concerns: FIND shared utilities across directories")
task(subagent_type="explore", load_skills=[], description="Analyze large files", run_in_background=true, prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
task(subagent_type="explore", load_skills=[], description="Explore deep modules", run_in_background=true, prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
task(subagent_type="explore", load_skills=[], description="Find shared utilities", run_in_background=true, prompt="Cross-cutting concerns: FIND shared utilities across directories")
// ... more based on calculation
\`\`\`
</dynamic-agents>
@@ -185,6 +185,11 @@ AGENTS_LOCATIONS = [
**Mark "generate" as in_progress.**
<critical>
**File Writing Rule**: If AGENTS.md already exists at the target path → use \`Edit\` tool. If it does NOT exist → use \`Write\` tool.
NEVER use Write to overwrite an existing file. ALWAYS check existence first via \`Read\` or discovery results.
</critical>
### Root AGENTS.md (Full Treatment)
\`\`\`markdown
@@ -240,7 +245,7 @@ Launch writing tasks for each location:
\`\`\`
for loc in AGENTS_LOCATIONS (except root):
delegate_task(category="writing", load_skills=[], run_in_background=false, prompt=\\\`
task(category="writing", load_skills=[], run_in_background=false, description="Generate AGENTS.md", prompt=\\\`
Generate AGENTS.md for: \${loc.path}
- Reason: \${loc.reason}
- 30-80 lines max

View File

@@ -1,6 +1,6 @@
---
name: git-master
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
---
# Git Master Agent

View File

@@ -3,7 +3,7 @@ import type { BuiltinSkill } from "../types"
export const gitMasterSkill: BuiltinSkill = {
name: "git-master",
description:
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
template: `# Git Master Agent
You are a Git expert combining three specializations:

View File

@@ -0,0 +1,111 @@
import { describe, test, expect, beforeEach } from "bun:test"
import {
storeToolMetadata,
consumeToolMetadata,
getPendingStoreSize,
clearPendingStore,
} from "./index"
describe("tool-metadata-store", () => {
beforeEach(() => {
clearPendingStore()
})
describe("storeToolMetadata", () => {
test("#given metadata with title and metadata, #when stored, #then store size increases", () => {
//#given
const sessionID = "ses_abc123"
const callID = "call_001"
const data = {
title: "Test Task",
metadata: { sessionId: "ses_child", agent: "oracle" },
}
//#when
storeToolMetadata(sessionID, callID, data)
//#then
expect(getPendingStoreSize()).toBe(1)
})
})
describe("consumeToolMetadata", () => {
test("#given stored metadata, #when consumed, #then returns the stored data", () => {
//#given
const sessionID = "ses_abc123"
const callID = "call_001"
const data = {
title: "My Task",
metadata: { sessionId: "ses_sub", run_in_background: true },
}
storeToolMetadata(sessionID, callID, data)
//#when
const result = consumeToolMetadata(sessionID, callID)
//#then
expect(result).toEqual(data)
})
test("#given stored metadata, #when consumed twice, #then second call returns undefined", () => {
//#given
const sessionID = "ses_abc123"
const callID = "call_001"
storeToolMetadata(sessionID, callID, { title: "Task" })
//#when
consumeToolMetadata(sessionID, callID)
const second = consumeToolMetadata(sessionID, callID)
//#then
expect(second).toBeUndefined()
expect(getPendingStoreSize()).toBe(0)
})
test("#given no stored metadata, #when consumed, #then returns undefined", () => {
//#given
const sessionID = "ses_nonexistent"
const callID = "call_999"
//#when
const result = consumeToolMetadata(sessionID, callID)
//#then
expect(result).toBeUndefined()
})
})
describe("isolation", () => {
test("#given multiple entries, #when consuming one, #then others remain", () => {
//#given
storeToolMetadata("ses_1", "call_a", { title: "Task A" })
storeToolMetadata("ses_1", "call_b", { title: "Task B" })
storeToolMetadata("ses_2", "call_a", { title: "Task C" })
//#when
const resultA = consumeToolMetadata("ses_1", "call_a")
//#then
expect(resultA?.title).toBe("Task A")
expect(getPendingStoreSize()).toBe(2)
expect(consumeToolMetadata("ses_1", "call_b")?.title).toBe("Task B")
expect(consumeToolMetadata("ses_2", "call_a")?.title).toBe("Task C")
expect(getPendingStoreSize()).toBe(0)
})
})
describe("overwrite", () => {
test("#given existing entry, #when stored again with same key, #then overwrites", () => {
//#given
storeToolMetadata("ses_1", "call_a", { title: "Old" })
//#when
storeToolMetadata("ses_1", "call_a", { title: "New", metadata: { updated: true } })
//#then
const result = consumeToolMetadata("ses_1", "call_a")
expect(result?.title).toBe("New")
expect(result?.metadata).toEqual({ updated: true })
})
})
})

View File

@@ -0,0 +1,84 @@
/**
* Pending tool metadata store.
*
* OpenCode's `fromPlugin()` wrapper always replaces the metadata returned by
* plugin tools with `{ truncated, outputPath }`, discarding any sessionId,
* title, or custom metadata set during `execute()`.
*
* This store captures metadata written via `ctx.metadata()` inside execute(),
* then the `tool.execute.after` hook consumes it and merges it back into the
* result *before* the processor writes the final part to the session store.
*
* Flow:
* execute() → storeToolMetadata(sessionID, callID, data)
* fromPlugin() → overwrites metadata with { truncated }
* tool.execute.after → consumeToolMetadata(sessionID, callID) → merges back
* processor → Session.updatePart(status:"completed", metadata: result.metadata)
*/
export interface PendingToolMetadata {
title?: string
metadata?: Record<string, unknown>
}
const pendingStore = new Map<string, PendingToolMetadata & { storedAt: number }>()
const STALE_TIMEOUT_MS = 15 * 60 * 1000
function makeKey(sessionID: string, callID: string): string {
return `${sessionID}:${callID}`
}
function cleanupStaleEntries(): void {
const now = Date.now()
for (const [key, entry] of pendingStore) {
if (now - entry.storedAt > STALE_TIMEOUT_MS) {
pendingStore.delete(key)
}
}
}
/**
* Store metadata to be restored after fromPlugin() overwrites it.
* Called from tool execute() functions alongside ctx.metadata().
*/
export function storeToolMetadata(
sessionID: string,
callID: string,
data: PendingToolMetadata,
): void {
cleanupStaleEntries()
pendingStore.set(makeKey(sessionID, callID), { ...data, storedAt: Date.now() })
}
/**
* Consume stored metadata (one-time read, removes from store).
* Called from tool.execute.after hook.
*/
export function consumeToolMetadata(
sessionID: string,
callID: string,
): PendingToolMetadata | undefined {
const key = makeKey(sessionID, callID)
const stored = pendingStore.get(key)
if (stored) {
pendingStore.delete(key)
const { storedAt: _, ...data } = stored
return data
}
return undefined
}
/**
* Get current store size (for testing/debugging).
*/
export function getPendingStoreSize(): number {
return pendingStore.size
}
/**
* Clear all pending metadata (for testing).
*/
export function clearPendingStore(): void {
pendingStore.clear()
}

View File

@@ -24,7 +24,7 @@ export const TARGET_TOOLS = new Set([
export const AGENT_TOOLS = new Set([
"task",
"call_omo_agent",
"delegate_task",
"task",
]);
export const REMINDER_MESSAGE = `
@@ -32,13 +32,13 @@ export const REMINDER_MESSAGE = `
You called a search/fetch tool directly without leveraging specialized agents.
RECOMMENDED: Use delegate_task with explore/librarian agents for better results:
RECOMMENDED: Use task with explore/librarian agents for better results:
\`\`\`
// Parallel exploration - fire multiple agents simultaneously
delegate_task(agent="explore", prompt="Find all files matching pattern X")
delegate_task(agent="explore", prompt="Search for implementation of Y")
delegate_task(agent="librarian", prompt="Lookup documentation for Z")
task(agent="explore", prompt="Find all files matching pattern X")
task(agent="explore", prompt="Search for implementation of Y")
task(agent="librarian", prompt="Lookup documentation for Z")
// Then continue your work while they run in background
// System will notify you when each completes
@@ -50,5 +50,5 @@ WHY:
- Specialized agents have domain expertise
- Reduces context window usage in main session
ALWAYS prefer: Multiple parallel delegate_task calls > Direct tool calls
ALWAYS prefer: Multiple parallel task calls > Direct tool calls
`;

View File

@@ -86,7 +86,7 @@ describe("atlas hook", () => {
// when - calling with undefined output
const result = await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID: "session-123" },
{ tool: "task", sessionID: "session-123" },
undefined as unknown as { title: string; output: string; metadata: Record<string, unknown> }
)
@@ -94,8 +94,8 @@ describe("atlas hook", () => {
expect(result).toBeUndefined()
})
test("should ignore non-delegate_task tools", async () => {
// given - hook and non-delegate_task tool
test("should ignore non-task tools", async () => {
// given - hook and non-task tool
const hook = createAtlasHook(createMockPluginInput())
const output = {
title: "Test Tool",
@@ -138,7 +138,7 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
@@ -162,14 +162,14 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
// then - standalone verification reminder appended
expect(output.output).toContain("Task completed successfully")
expect(output.output).toContain("MANDATORY:")
expect(output.output).toContain("delegate_task(session_id=")
expect(output.output).toContain("task(session_id=")
cleanupMessageStorage(sessionID)
})
@@ -199,7 +199,7 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
@@ -208,7 +208,7 @@ describe("atlas hook", () => {
expect(output.output).toContain("SUBAGENT WORK COMPLETED")
expect(output.output).toContain("test-plan")
expect(output.output).toContain("LIE")
expect(output.output).toContain("delegate_task(session_id=")
expect(output.output).toContain("task(session_id=")
cleanupMessageStorage(sessionID)
})
@@ -238,7 +238,7 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
@@ -275,7 +275,7 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
@@ -311,7 +311,7 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
@@ -348,7 +348,7 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
@@ -385,12 +385,12 @@ describe("atlas hook", () => {
// when
await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID },
{ tool: "task", sessionID },
output
)
// then - should include session_id instructions and verification
expect(output.output).toContain("delegate_task(session_id=")
expect(output.output).toContain("task(session_id=")
expect(output.output).toContain("[x]")
expect(output.output).toContain("MANDATORY:")
@@ -425,8 +425,8 @@ describe("atlas hook", () => {
// then
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
expect(output.output).toContain("delegate_task")
expect(output.output).toContain("delegate_task")
expect(output.output).toContain("task")
expect(output.output).toContain("task")
})
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {

View File

@@ -44,7 +44,7 @@ You just performed direct file modifications outside \`.sisyphus/\`.
**You are an ORCHESTRATOR, not an IMPLEMENTER.**
As an orchestrator, you should:
- **DELEGATE** implementation work to subagents via \`delegate_task\`
- **DELEGATE** implementation work to subagents via \`task\`
- **VERIFY** the work done by subagents
- **COORDINATE** multiple tasks and ensure completion
@@ -54,7 +54,7 @@ You should NOT:
- Implement features yourself
**If you need to make changes:**
1. Use \`delegate_task\` to delegate to an appropriate subagent
1. Use \`task\` to delegate to an appropriate subagent
2. Provide clear instructions in the prompt
3. Verify the subagent's work after completion
@@ -128,7 +128,7 @@ You (Atlas) are attempting to directly modify a file outside \`.sisyphus/\`.
**THIS IS FORBIDDEN** (except for VERIFICATION purposes)
As an ORCHESTRATOR, you MUST:
1. **DELEGATE** all implementation work via \`delegate_task\`
1. **DELEGATE** all implementation work via \`task\`
2. **VERIFY** the work done by subagents (reading files is OK)
3. **COORDINATE** - you orchestrate, you don't implement
@@ -146,11 +146,11 @@ As an ORCHESTRATOR, you MUST:
**IF THIS IS FOR VERIFICATION:**
Proceed if you are verifying subagent work by making a small fix.
But for any substantial changes, USE \`delegate_task\`.
But for any substantial changes, USE \`task\`.
**CORRECT APPROACH:**
\`\`\`
delegate_task(
task(
category="...",
prompt="[specific single task with clear acceptance criteria]"
)
@@ -193,7 +193,7 @@ function buildVerificationReminder(sessionId: string): string {
**If ANY verification fails, use this immediately:**
\`\`\`
delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
\`\`\``
}
@@ -688,12 +688,12 @@ export function createAtlasHook(
return
}
// Check delegate_task - inject single-task directive
if (input.tool === "delegate_task") {
// Check task - inject single-task directive
if (input.tool === "task") {
const prompt = output.args.prompt as string | undefined
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, {
log(`[${HOOK_NAME}] Injected single-task directive to task`, {
sessionID: input.sessionID,
})
}
@@ -732,7 +732,7 @@ export function createAtlasHook(
return
}
if (input.tool !== "delegate_task") {
if (input.tool !== "task") {
return
}

View File

@@ -50,7 +50,7 @@ describe("category-skill-reminder hook", () => {
// then - reminder should be injected
expect(output.output).toContain("[Category+Skill Reminder]")
expect(output.output).toContain("delegate_task")
expect(output.output).toContain("task")
clearSessionAgent(sessionID)
})
@@ -130,16 +130,16 @@ describe("category-skill-reminder hook", () => {
})
describe("delegation tool tracking", () => {
test("should NOT inject reminder if delegate_task is used", async () => {
// given - sisyphus agent that uses delegate_task
test("should NOT inject reminder if task is used", async () => {
// given - sisyphus agent that uses task
const hook = createHook()
const sessionID = "delegation-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// when - delegate_task is used, then more tool calls
await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output)
// when - task is used, then more tool calls
await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
@@ -329,15 +329,15 @@ describe("category-skill-reminder hook", () => {
})
test("should handle delegation tool names case-insensitively", async () => {
// given - sisyphus agent using DELEGATE_TASK in uppercase
// given - sisyphus agent using TASK in uppercase
const hook = createHook()
const sessionID = "case-delegate-session"
updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} }
// when - DELEGATE_TASK in uppercase is used
await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output)
// when - TASK in uppercase is used
await hook["tool.execute.after"]({ tool: "TASK", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)

View File

@@ -30,9 +30,8 @@ const DELEGATABLE_WORK_TOOLS = new Set([
* Tools that indicate the agent is already using delegation properly.
*/
const DELEGATION_TOOLS = new Set([
"delegate_task",
"call_omo_agent",
"task",
"call_omo_agent",
])
function formatSkillNames(skills: AvailableSkill[], limit: number): string {
@@ -63,7 +62,7 @@ function buildReminderMessage(availableSkills: AvailableSkill[]): string {
"> User-installed skills OVERRIDE built-in defaults. ALWAYS prefer YOUR SKILLS when domain matches.",
"",
"```typescript",
`delegate_task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
`task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
"```",
"",
]

View File

@@ -257,7 +257,7 @@ export function createClaudeCodeHooksHook(
const cachedInput = getToolInput(input.sessionID, input.tool, input.callID) || {}
// Use metadata if available and non-empty, otherwise wrap output.output in a structured object
// This ensures plugin tools (call_omo_agent, delegate_task, task) that return strings
// This ensures plugin tools (call_omo_agent, task) that return strings
// get their results properly recorded in transcripts instead of empty {}
const metadata = output.metadata as Record<string, unknown> | undefined
const hasMetadata = metadata && typeof metadata === "object" && Object.keys(metadata).length > 0

View File

@@ -8,7 +8,7 @@ import {
describe("sisyphus-task-retry", () => {
describe("DELEGATE_TASK_ERROR_PATTERNS", () => {
// given error patterns are defined
// then should include all known delegate_task error types
// then should include all known task error types
it("should contain all known error patterns", () => {
expect(DELEGATE_TASK_ERROR_PATTERNS.length).toBeGreaterThan(5)

View File

@@ -45,7 +45,7 @@ export const DELEGATE_TASK_ERROR_PATTERNS: DelegateTaskErrorPattern[] = [
{
pattern: "Cannot call primary agent",
errorType: "primary_agent",
fixHint: "Primary agents cannot be called via delegate_task. Use a subagent like 'explore', 'oracle', or 'librarian'",
fixHint: "Primary agents cannot be called via task. Use a subagent like 'explore', 'oracle', or 'librarian'",
},
{
pattern: "Skills not found",
@@ -85,11 +85,11 @@ export function buildRetryGuidance(errorInfo: DetectedError): string {
)
if (!pattern) {
return `[delegate_task ERROR] Fix the error and retry with correct parameters.`
return `[task ERROR] Fix the error and retry with correct parameters.`
}
let guidance = `
[delegate_task CALL FAILED - IMMEDIATE RETRY REQUIRED]
[task CALL FAILED - IMMEDIATE RETRY REQUIRED]
**Error Type**: ${errorInfo.errorType}
**Fix**: ${pattern.fixHint}
@@ -101,11 +101,11 @@ export function buildRetryGuidance(errorInfo: DetectedError): string {
}
guidance += `
**Action**: Retry delegate_task NOW with corrected parameters.
**Action**: Retry task NOW with corrected parameters.
Example of CORRECT call:
\`\`\`
delegate_task(
task(
description="Task description",
prompt="Detailed prompt...",
category="unspecified-low", // OR subagent_type="explore"
@@ -124,7 +124,7 @@ export function createDelegateTaskRetryHook(_ctx: PluginInput) {
input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown }
) => {
if (input.tool.toLowerCase() !== "delegate_task") return
if (input.tool.toLowerCase() !== "task") return
const errorInfo = detectDelegateTaskError(output.output)
if (errorInfo) {

View File

@@ -15,7 +15,7 @@ export function createEmptyTaskResponseDetectorHook(_ctx: PluginInput) {
input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown }
) => {
if (input.tool !== "Task") return
if (input.tool !== "Task" && input.tool !== "task") return
const responseText = output.output?.trim() ?? ""

View File

@@ -2,7 +2,7 @@
* Default ultrawork message optimized for Claude series models.
*
* Key characteristics:
* - Natural tool-like usage of explore/librarian agents (background=true)
* - Natural tool-like usage of explore/librarian agents (run_in_background=true)
* - Parallel execution emphasized - fire agents and continue working
* - Simple workflow: EXPLORES → GATHER → PLAN → DELEGATE
*/
@@ -44,9 +44,9 @@ export const ULTRAWORK_DEFAULT_MESSAGE = `<ultrawork-mode>
**WHEN IN DOUBT:**
\`\`\`
delegate_task(subagent_type="explore", load_skills=[], prompt="Find [X] patterns in codebase", run_in_background=true)
delegate_task(subagent_type="librarian", load_skills=[], prompt="Find docs/examples for [Y]", run_in_background=true)
delegate_task(subagent_type="oracle", load_skills=[], prompt="Review my approach: [describe plan]", run_in_background=false)
task(subagent_type="explore", load_skills=[], prompt="Find [X] patterns in codebase", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="Find docs/examples for [Y]", run_in_background=true)
task(subagent_type="oracle", load_skills=[], prompt="Review my approach: [describe plan]", run_in_background=false)
\`\`\`
**ONLY AFTER YOU HAVE:**
@@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
| Architecture decision needed | MUST call plan agent |
\`\`\`
delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
task(subagent_type="plan", prompt="<gathered context + user request>")
\`\`\`
**WHY PLAN AGENT IS MANDATORY:**
@@ -119,9 +119,9 @@ delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
| Scenario | Action |
|----------|--------|
| Plan agent asks clarifying questions | \`delegate_task(session_id="{returned_session_id}", prompt="<your answer>")\` |
| Need to refine the plan | \`delegate_task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` |
| Plan needs more detail | \`delegate_task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` |
| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="<your answer>")\` |
| Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` |
| Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` |
**WHY SESSION_ID IS CRITICAL:**
- Plan agent retains FULL conversation context
@@ -131,10 +131,10 @@ delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
\`\`\`
// WRONG: Starting fresh loses all context
delegate_task(subagent_type="plan", prompt="Here's more info...")
task(subagent_type="plan", prompt="Here's more info...")
// CORRECT: Resume preserves everything
delegate_task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
\`\`\`
**FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
@@ -147,23 +147,23 @@ delegate_task(session_id="ses_abc123", prompt="Here's my answer to your question
| Task Type | Action | Why |
|-----------|--------|-----|
| Codebase exploration | delegate_task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
| Documentation lookup | delegate_task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
| Planning | delegate_task(subagent_type="plan") | Parallel task graph + structured TODO list |
| Hard problem (conventional) | delegate_task(subagent_type="oracle") | Architecture, debugging, complex logic |
| Hard problem (non-conventional) | delegate_task(category="artistry", load_skills=[...]) | Different approach needed |
| Implementation | delegate_task(category="...", load_skills=[...]) | Domain-optimized models |
| Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
| Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
| Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list |
| Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic |
| Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed |
| Implementation | task(category="...", load_skills=[...]) | Domain-optimized models |
**CATEGORY + SKILL DELEGATION:**
\`\`\`
// Frontend work
delegate_task(category="visual-engineering", load_skills=["frontend-ui-ux"])
task(category="visual-engineering", load_skills=["frontend-ui-ux"])
// Complex logic
delegate_task(category="ultrabrain", load_skills=["typescript-programmer"])
task(category="ultrabrain", load_skills=["typescript-programmer"])
// Quick fixes
delegate_task(category="quick", load_skills=["git-master"])
task(category="quick", load_skills=["git-master"])
\`\`\`
**YOU SHOULD ONLY DO IT YOURSELF WHEN:**
@@ -177,14 +177,14 @@ delegate_task(category="quick", load_skills=["git-master"])
## EXECUTION RULES
- **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each.
- **PARALLEL**: Fire independent agent calls simultaneously via delegate_task(background=true) - NEVER wait sequentially.
- **BACKGROUND FIRST**: Use delegate_task for exploration/research agents (10+ concurrent if needed).
- **PARALLEL**: Fire independent agent calls simultaneously via task(run_in_background=true) - NEVER wait sequentially.
- **BACKGROUND FIRST**: Use task for exploration/research agents (10+ concurrent if needed).
- **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done.
- **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths.
## WORKFLOW
1. Analyze the request and identify required capabilities
2. Spawn exploration/librarian agents via delegate_task(background=true) in PARALLEL (10+ if needed)
2. Spawn exploration/librarian agents via task(run_in_background=true) in PARALLEL (10+ if needed)
3. Use Plan agent with gathered context to create detailed work breakdown
4. Execute with continuous verification against original requirements

View File

@@ -73,11 +73,11 @@ Use these when they provide clear value based on the decision framework above:
| Resource | When to Use | How to Use |
|----------|-------------|------------|
| explore agent | Need codebase patterns you don't have | \`delegate_task(subagent_type="explore", run_in_background=true, ...)\` |
| librarian agent | External library docs, OSS examples | \`delegate_task(subagent_type="librarian", run_in_background=true, ...)\` |
| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`delegate_task(subagent_type="oracle", ...)\` |
| plan agent | Complex multi-step with dependencies (5+ steps) | \`delegate_task(subagent_type="plan", ...)\` |
| delegate_task category | Specialized work matching a category | \`delegate_task(category="...", load_skills=[...])\` |
| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` |
| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` |
| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` |
| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` |
| task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` |
<tool_usage_rules>
- Prefer tools over internal knowledge for fresh or user-specific data
@@ -97,8 +97,8 @@ Use these when they provide clear value based on the decision framework above:
**ALWAYS run both tracks in parallel:**
\`\`\`
// Fire background agents for deep exploration
delegate_task(subagent_type="explore", load_skills=[], prompt="Find X patterns...", run_in_background=true)
delegate_task(subagent_type="librarian", load_skills=[], prompt="Find docs for Y...", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="Find X patterns...", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="Find docs for Y...", run_in_background=true)
// WHILE THEY RUN - use direct tools for immediate context
grep(pattern="relevant_pattern", path="src/")

View File

@@ -14,7 +14,7 @@ You ARE the planner. You ARE NOT an implementer. You DO NOT write code. You DO N
| Write/Edit | \`.sisyphus/**/*.md\` ONLY | Everything else |
| Read | All files | - |
| Bash | Research commands only | Implementation commands |
| delegate_task | explore, librarian | - |
| task | explore, librarian | - |
**IF YOU TRY TO WRITE/EDIT OUTSIDE \`.sisyphus/\`:**
- System will BLOCK your action
@@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans.
### Research Protocol
1. **Fire parallel background agents** for comprehensive context:
\`\`\`
delegate_task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true)
delegate_task(agent="explore", prompt="Find test infrastructure and conventions", background=true)
delegate_task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true)
task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true)
task(agent="explore", prompt="Find test infrastructure and conventions", background=true)
task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true)
\`\`\`
2. **Wait for results** before planning - rushed plans fail
3. **Synthesize findings** into informed requirements
@@ -117,9 +117,9 @@ Each TODO item MUST include:
| Wave | Tasks | Dispatch Command |
|------|-------|------------------|
| 1 | 1, 4 | \`delegate_task(category="...", load_skills=[...], run_in_background=false)\` × 2 |
| 2 | 2, 3, 5 | \`delegate_task(...)\` × 3 after Wave 1 completes |
| 3 | 6 | \`delegate_task(...)\` final integration |
| 1 | 1, 4 | \`task(category="...", load_skills=[...], run_in_background=false)\` × 2 |
| 2 | 2, 3, 5 | \`task(...)\` × 3 after Wave 1 completes |
| 3 | 6 | \`task(...)\` final integration |
**WHY PARALLEL TASK GRAPH IS MANDATORY:**
- Orchestrator (Sisyphus) executes tasks in parallel waves

View File

@@ -51,14 +51,14 @@ ${createSystemDirective(SystemDirectiveTypes.PROMETHEUS_READ_ONLY)}
│ │ - Record decisions to .sisyphus/drafts/ │
├──────┼──────────────────────────────────────────────────────────────┤
│ 2 │ METIS CONSULTATION: Pre-generation gap analysis │
│ │ - delegate_task(agent="Metis (Plan Consultant)", ...) │
│ │ - task(agent="Metis (Plan Consultant)", ...) │
│ │ - Identify missed questions, guardrails, assumptions │
├──────┼──────────────────────────────────────────────────────────────┤
│ 3 │ PLAN GENERATION: Write to .sisyphus/plans/*.md │
│ │ <- YOU ARE HERE │
├──────┼──────────────────────────────────────────────────────────────┤
│ 4 │ MOMUS REVIEW (if high accuracy requested) │
│ │ - delegate_task(agent="Momus (Plan Reviewer)", ...) │
│ │ - task(agent="Momus (Plan Reviewer)", ...) │
│ │ - Loop until OKAY verdict │
├──────┼──────────────────────────────────────────────────────────────┤
│ 5 │ SUMMARY: Present to user │

View File

@@ -227,11 +227,11 @@ describe("prometheus-md-only", () => {
).resolves.toBeUndefined()
})
test("should inject read-only warning when Prometheus calls delegate_task", async () => {
test("should inject read-only warning when Prometheus calls task", async () => {
// given
const hook = createPrometheusMdOnlyHook(createMockPluginInput())
const input = {
tool: "delegate_task",
tool: "task",
sessionID: TEST_SESSION_ID,
callID: "call-1",
}
@@ -289,7 +289,7 @@ describe("prometheus-md-only", () => {
// given
const hook = createPrometheusMdOnlyHook(createMockPluginInput())
const input = {
tool: "delegate_task",
tool: "task",
sessionID: TEST_SESSION_ID,
callID: "call-1",
}
@@ -330,11 +330,11 @@ describe("prometheus-md-only", () => {
).resolves.toBeUndefined()
})
test("should not inject warning for non-Prometheus agents calling delegate_task", async () => {
test("should not inject warning for non-Prometheus agents calling task", async () => {
// given
const hook = createPrometheusMdOnlyHook(createMockPluginInput())
const input = {
tool: "delegate_task",
tool: "task",
sessionID: TEST_SESSION_ID,
callID: "call-1",
}

View File

@@ -63,7 +63,7 @@ function getMessageDir(sessionID: string): string | null {
return null
}
const TASK_TOOLS = ["delegate_task", "task", "call_omo_agent"]
const TASK_TOOLS = ["task", "call_omo_agent"]
function getAgentFromMessageFiles(sessionID: string): string | undefined {
const messageDir = getMessageDir(sessionID)

View File

@@ -12,8 +12,8 @@ export function createSisyphusJuniorNotepadHook(ctx: PluginInput) {
input: { tool: string; sessionID: string; callID: string },
output: { args: Record<string, unknown>; message?: string }
): Promise<void> => {
// 1. Check if tool is delegate_task
if (input.tool !== "delegate_task") {
// 1. Check if tool is task
if (input.tool !== "task") {
return
}
@@ -37,7 +37,7 @@ export function createSisyphusJuniorNotepadHook(ctx: PluginInput) {
output.args.prompt = NOTEPAD_DIRECTIVE + prompt
// 6. Log injection
log(`[${HOOK_NAME}] Injected notepad directive to delegate_task`, {
log(`[${HOOK_NAME}] Injected notepad directive to task`, {
sessionID: input.sessionID,
})
},

View File

@@ -1,4 +1,4 @@
const TARGET_TOOLS = ["task", "Task", "task_tool", "call_omo_agent", "delegate_task"]
const TARGET_TOOLS = ["task", "Task", "task_tool", "call_omo_agent"]
const SESSION_ID_PATTERNS = [
/Session ID: (ses_[a-zA-Z0-9_-]+)/,
@@ -27,7 +27,7 @@ export function createTaskResumeInfoHook() {
const sessionId = extractSessionId(output.output)
if (!sessionId) return
output.output = output.output.trimEnd() + `\n\nto continue: delegate_task(session_id="${sessionId}", prompt="...")`
output.output = output.output.trimEnd() + `\n\nto continue: task(session_id="${sessionId}", prompt="...")`
}
return {

View File

@@ -16,7 +16,7 @@ export const REPLACEMENT_MESSAGE = `TodoRead/TodoWrite are DISABLED because expe
3. DO THE WORK
4. TaskUpdate({ id: "T-xxx", status: "completed" })
CRITICAL: 1 task = 1 delegate_task. Fire independent tasks concurrently.
CRITICAL: 1 task = 1 task. Fire independent tasks concurrently.
**STOP! DO NOT START WORKING DIRECTLY - NO MATTER HOW SMALL THE TASK!**
Even if the task seems trivial (1 line fix, simple edit, quick change), you MUST:

View File

@@ -111,6 +111,7 @@ import { filterDisabledTools } from "./shared/disabled-tools";
import { loadPluginConfig } from "./plugin-config";
import { createModelCacheState } from "./plugin-state";
import { createConfigHandler } from "./plugin-handlers";
import { consumeToolMetadata } from "./features/tool-metadata-store";
const OhMyOpenCodePlugin: Plugin = async (ctx) => {
log("[OhMyOpenCodePlugin] ENTRY - plugin loading", {
@@ -533,7 +534,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
...backgroundTools,
call_omo_agent: callOmoAgent,
...(lookAt ? { look_at: lookAt } : {}),
delegate_task: delegateTask,
task: delegateTask,
skill: skillTool,
skill_mcp: skillMcpTool,
slashcommand: slashcommandTool,
@@ -787,16 +788,11 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
if (input.tool === "task") {
const args = output.args as Record<string, unknown>;
const subagentType = args.subagent_type as string;
const isExploreOrLibrarian = ["explore", "librarian"].some(
(name) => name.toLowerCase() === (subagentType ?? "").toLowerCase(),
);
args.tools = {
...(args.tools as Record<string, boolean> | undefined),
delegate_task: false,
...(isExploreOrLibrarian ? { call_omo_agent: false } : {}),
};
const category = typeof args.category === "string" ? args.category : undefined;
const subagentType = typeof args.subagent_type === "string" ? args.subagent_type : undefined;
if (category && !subagentType) {
args.subagent_type = "sisyphus-junior";
}
}
if (ralphLoop && input.tool === "slashcommand") {
@@ -872,6 +868,19 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
if (!output) {
return;
}
// Restore metadata that fromPlugin() overwrites with { truncated, outputPath }.
// This must run FIRST, before any hook reads output.metadata.
const stored = consumeToolMetadata(input.sessionID, input.callID)
if (stored) {
if (stored.title) {
output.title = stored.title
}
if (stored.metadata) {
output.metadata = { ...output.metadata, ...stored.metadata }
}
}
await claudeCodeHooks["tool.execute.after"](input, output);
await toolOutputTruncator?.["tool.execute.after"](input, output);
await preemptiveCompaction?.["tool.execute.after"](input, output);

View File

@@ -274,7 +274,7 @@ describe("Plan agent demote behavior", () => {
expect(agents.plan.prompt).toBe("original plan prompt")
})
test("prometheus should have mode 'all' to be callable via delegate_task", async () => {
test("prometheus should have mode 'all' to be callable via task", async () => {
// given
const pluginConfig: OhMyOpenCodeConfig = {
sisyphus_agent: {
@@ -305,7 +305,7 @@ describe("Plan agent demote behavior", () => {
})
describe("Agent permission defaults", () => {
test("hephaestus should allow delegate_task", async () => {
test("hephaestus should allow task", async () => {
// #given
const createBuiltinAgentsMock = agents.createBuiltinAgents as unknown as {
mockResolvedValue: (value: Record<string, unknown>) => void
@@ -335,7 +335,7 @@ describe("Agent permission defaults", () => {
// #then
const agentConfig = config.agent as Record<string, { permission?: Record<string, string> }>
expect(agentConfig.hephaestus).toBeDefined()
expect(agentConfig.hephaestus.permission?.delegate_task).toBe("allow")
expect(agentConfig.hephaestus.permission?.task).toBe("allow")
})
})

View File

@@ -419,30 +419,30 @@ export function createConfigHandler(deps: ConfigHandlerDeps) {
}
if (agentResult["atlas"]) {
const agent = agentResult["atlas"] as AgentWithPermission;
agent.permission = { ...agent.permission, task: "deny", call_omo_agent: "deny", delegate_task: "allow", "task_*": "allow", teammate: "allow" };
agent.permission = { ...agent.permission, task: "allow", call_omo_agent: "deny", "task_*": "allow", teammate: "allow" };
}
if (agentResult.sisyphus) {
const agent = agentResult.sisyphus as AgentWithPermission;
agent.permission = { ...agent.permission, call_omo_agent: "deny", delegate_task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" };
agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" };
}
if (agentResult.hephaestus) {
const agent = agentResult.hephaestus as AgentWithPermission;
agent.permission = { ...agent.permission, call_omo_agent: "deny", delegate_task: "allow", question: questionPermission };
agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission };
}
if (agentResult["prometheus"]) {
const agent = agentResult["prometheus"] as AgentWithPermission;
agent.permission = { ...agent.permission, call_omo_agent: "deny", delegate_task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" };
agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" };
}
if (agentResult["sisyphus-junior"]) {
const agent = agentResult["sisyphus-junior"] as AgentWithPermission;
agent.permission = { ...agent.permission, delegate_task: "allow", "task_*": "allow", teammate: "allow" };
agent.permission = { ...agent.permission, task: "allow", "task_*": "allow", teammate: "allow" };
}
config.permission = {
...(config.permission as Record<string, unknown>),
webfetch: "allow",
external_directory: "allow",
delegate_task: "deny",
task: "deny",
};
const mcpResult = (pluginConfig.claude_code?.mcp ?? true)

View File

@@ -8,7 +8,6 @@ const EXPLORATION_AGENT_DENYLIST: Record<string, boolean> = {
write: false,
edit: false,
task: false,
delegate_task: false,
call_omo_agent: false,
}
@@ -21,7 +20,6 @@ const AGENT_RESTRICTIONS: Record<string, Record<string, boolean>> = {
write: false,
edit: false,
task: false,
delegate_task: false,
call_omo_agent: false,
},
@@ -29,14 +27,12 @@ const AGENT_RESTRICTIONS: Record<string, Record<string, boolean>> = {
write: false,
edit: false,
task: false,
delegate_task: false,
},
momus: {
write: false,
edit: false,
task: false,
delegate_task: false,
},
"multimodal-looker": {
@@ -45,7 +41,6 @@ const AGENT_RESTRICTIONS: Record<string, Record<string, boolean>> = {
"sisyphus-junior": {
task: false,
delegate_task: false,
},
}

View File

@@ -130,5 +130,49 @@ describe("permission-compat", () => {
// then returns unchanged
expect(result).toEqual(config)
})
test("migrates delegate_task permission to task", () => {
//#given config with delegate_task permission
const config = {
model: "test",
permission: { delegate_task: "allow" as const, write: "deny" as const },
}
//#when migrating
const result = migrateAgentConfig(config)
//#then delegate_task is renamed to task
const perm = result.permission as Record<string, string>
expect(perm["task"]).toBe("allow")
expect(perm["delegate_task"]).toBeUndefined()
expect(perm["write"]).toBe("deny")
})
test("does not overwrite existing task permission with delegate_task", () => {
//#given config with both task and delegate_task permissions
const config = {
permission: { delegate_task: "allow" as const, task: "deny" as const },
}
//#when migrating
const result = migrateAgentConfig(config)
//#then existing task permission is preserved
const perm = result.permission as Record<string, string>
expect(perm["task"]).toBe("deny")
expect(perm["delegate_task"]).toBe("allow")
})
test("does not mutate the original config permission object", () => {
//#given config with delegate_task permission
const originalPerm = { delegate_task: "allow" as const }
const config = { permission: originalPerm }
//#when migrating
migrateAgentConfig(config)
//#then original permission object is not mutated
expect(originalPerm).toEqual({ delegate_task: "allow" })
})
})
})

View File

@@ -73,5 +73,14 @@ export function migrateAgentConfig(
delete result.tools
}
if (result.permission && typeof result.permission === "object") {
const perm = { ...(result.permission as Record<string, PermissionValue>) }
if ("delegate_task" in perm && !("task" in perm)) {
perm["task"] = perm["delegate_task"]
delete perm["delegate_task"]
result.permission = perm
}
}
return result
}

View File

@@ -39,7 +39,7 @@ tools/
| Search | ast_grep_search, ast_grep_replace, grep, glob | Direct |
| Session | session_list, session_read, session_search, session_info | Direct |
| Task | task_create, task_get, task_list, task_update | Factory |
| Agent | delegate_task, call_omo_agent | Factory |
| Agent | task, call_omo_agent | Factory |
| Background | background_output, background_cancel | Factory |
| System | interactive_bash, look_at | Mixed |
| Skill | skill, skill_mcp, slashcommand | Factory |

View File

@@ -4,9 +4,11 @@ import { runSg } from "./cli"
import { formatSearchResult, formatReplaceResult } from "./utils"
import type { CliLanguage } from "./types"
function showOutputToUser(context: unknown, output: string): void {
const ctx = context as { metadata?: (input: { metadata: { output: string } }) => void }
ctx.metadata?.({ metadata: { output } })
async function showOutputToUser(context: unknown, output: string): Promise<void> {
const ctx = context as {
metadata?: (input: { metadata: { output: string } }) => void | Promise<void>
}
await ctx.metadata?.({ metadata: { output } })
}
function getEmptyResultHint(pattern: string, lang: CliLanguage): string | null {
@@ -65,11 +67,11 @@ export const ast_grep_search: ToolDefinition = tool({
}
}
showOutputToUser(context, output)
await showOutputToUser(context, output)
return output
} catch (e) {
const output = `Error: ${e instanceof Error ? e.message : String(e)}`
showOutputToUser(context, output)
await showOutputToUser(context, output)
return output
}
},
@@ -99,14 +101,13 @@ export const ast_grep_replace: ToolDefinition = tool({
updateAll: args.dryRun === false,
})
const output = formatReplaceResult(result, args.dryRun !== false)
showOutputToUser(context, output)
await showOutputToUser(context, output)
return output
} catch (e) {
const output = `Error: ${e instanceof Error ? e.message : String(e)}`
showOutputToUser(context, output)
await showOutputToUser(context, output)
return output
}
},
})

View File

@@ -8,6 +8,7 @@ import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAG
import { getSessionAgent } from "../../features/claude-code-session-state"
import { log } from "../../shared/logger"
import { consumeNewMessages } from "../../shared/session-cursor"
import { storeToolMetadata } from "../../features/tool-metadata-store"
type BackgroundOutputMessage = {
info?: { role?: string; time?: string | { created?: number }; agent?: string }
@@ -140,15 +141,37 @@ export function createBackgroundTask(manager: BackgroundManager): ToolDefinition
parentAgent,
})
ctx.metadata?.({
const WAIT_FOR_SESSION_INTERVAL_MS = 50
const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const waitStart = Date.now()
let sessionId = task.sessionID
while (!sessionId && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
if (ctx.abort?.aborted) {
await manager.cancelTask(task.id)
return `Task aborted and cancelled while waiting for session to start.\n\nTask ID: ${task.id}`
}
await delay(WAIT_FOR_SESSION_INTERVAL_MS)
const updated = manager.getTask(task.id)
if (!updated || updated.status === "error") {
return `Task ${!updated ? "was deleted" : `entered error state`}.\n\nTask ID: ${task.id}`
}
sessionId = updated?.sessionID
}
const bgMeta = {
title: args.description,
metadata: { sessionId: task.sessionID },
})
metadata: { sessionId: sessionId ?? "pending" } as Record<string, unknown>,
}
await ctx.metadata?.(bgMeta)
const callID = (ctx as any).callID as string | undefined
if (callID) {
storeToolMetadata(ctx.sessionID, callID, bgMeta)
}
return `Background task launched successfully.
Task ID: ${task.id}
Session ID: ${task.sessionID}
Session ID: ${sessionId ?? "pending"}
Description: ${task.description}
Agent: ${task.agent}
Status: ${task.status}
@@ -663,7 +686,7 @@ export function createBackgroundCancel(manager: BackgroundManager, client: Backg
To continue a cancelled task, use:
\`\`\`
delegate_task(session_id="<session_id>", prompt="Continue: <your follow-up>")
task(session_id="<session_id>", prompt="Continue: <your follow-up>")
\`\`\`
Continuable sessions:

View File

@@ -10,6 +10,7 @@ import { findFirstMessageWithAgent, findNearestMessageWithFields, MESSAGE_STORAG
import { getSessionAgent } from "../../features/claude-code-session-state"
function getMessageDir(sessionID: string): string | null {
if (!sessionID.startsWith("ses_")) return null
if (!existsSync(MESSAGE_STORAGE)) return null
const directPath = join(MESSAGE_STORAGE, sessionID)
@@ -110,15 +111,31 @@ async function executeBackground(
parentAgent,
})
toolContext.metadata?.({
const WAIT_FOR_SESSION_INTERVAL_MS = 50
const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const waitStart = Date.now()
let sessionId = task.sessionID
while (!sessionId && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
if (toolContext.abort?.aborted) {
return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
}
const updated = manager.getTask(task.id)
if (updated?.status === "error" || updated?.status === "cancelled") {
return `Task failed to start (status: ${updated.status}).\n\nTask ID: ${task.id}`
}
await new Promise(resolve => setTimeout(resolve, WAIT_FOR_SESSION_INTERVAL_MS))
sessionId = manager.getTask(task.id)?.sessionID
}
await toolContext.metadata?.({
title: args.description,
metadata: { sessionId: task.sessionID },
metadata: { sessionId: sessionId ?? "pending" },
})
return `Background agent task launched successfully.
Task ID: ${task.id}
Session ID: ${task.sessionID}
Session ID: ${sessionId ?? "pending"}
Description: ${task.description}
Agent: ${task.agent} (subagent)
Status: ${task.status}
@@ -194,7 +211,7 @@ Original error: ${createResult.error}`
log(`[call_omo_agent] Created session: ${sessionID}`)
}
toolContext.metadata?.({
await toolContext.metadata?.({
title: args.description,
metadata: { sessionId: sessionID },
})
@@ -210,7 +227,6 @@ Original error: ${createResult.error}`
tools: {
...getAgentToolRestrictions(args.subagent_type),
task: false,
delegate_task: false,
},
parts: [{ type: "text", text: args.prompt }],
},

View File

@@ -459,13 +459,13 @@ YOU MUST END YOUR RESPONSE WITH THIS SECTION.
1. **Wave 1**: Fire these tasks IN PARALLEL (no dependencies)
\`\`\`
delegate_task(category="...", load_skills=[...], run_in_background=false, prompt="Task 1: ...")
delegate_task(category="...", load_skills=[...], run_in_background=false, prompt="Task N: ...")
task(category="...", load_skills=[...], run_in_background=false, prompt="Task 1: ...")
task(category="...", load_skills=[...], run_in_background=false, prompt="Task N: ...")
\`\`\`
2. **Wave 2**: After Wave 1 completes, fire next wave IN PARALLEL
\`\`\`
delegate_task(category="...", load_skills=[...], run_in_background=false, prompt="Task 2: ...")
task(category="...", load_skills=[...], run_in_background=false, prompt="Task 2: ...")
\`\`\`
3. Continue until all waves complete
@@ -476,7 +476,7 @@ YOU MUST END YOUR RESPONSE WITH THIS SECTION.
WHY THIS FORMAT IS MANDATORY:
- Caller can directly copy TODO items
- Wave grouping enables parallel execution
- Each task has clear delegate_task parameters
- Each task has clear task parameters
- QA criteria ensure verifiable completion
</FINAL_OUTPUT_FOR_CALLER>

View File

@@ -16,6 +16,7 @@ import { log, getAgentToolRestrictions, resolveModelPipeline, promptWithModelSug
import { fetchAvailableModels, isModelAvailable } from "../../shared/model-availability"
import { readConnectedProvidersCache } from "../../shared/connected-providers-cache"
import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements"
import { storeToolMetadata } from "../../features/tool-metadata-store"
const SISYPHUS_JUNIOR_AGENT = "sisyphus-junior"
@@ -67,7 +68,7 @@ export function resolveParentContext(ctx: ToolContextWithMetadata): ParentContex
const sessionAgent = getSessionAgent(ctx.sessionID)
const parentAgent = ctx.agent ?? sessionAgent ?? firstMessageAgent ?? prevMessage?.agent
log("[delegate_task] parentAgent resolution", {
log("[task] parentAgent resolution", {
sessionID: ctx.sessionID,
messageDir,
ctxAgent: ctx.agent,
@@ -111,7 +112,7 @@ export async function executeBackgroundContinuation(
parentAgent: parentContext.agent,
})
ctx.metadata?.({
const bgContMeta = {
title: `Continue: ${task.description}`,
metadata: {
prompt: args.prompt,
@@ -122,7 +123,11 @@ export async function executeBackgroundContinuation(
sessionId: task.sessionID,
command: args.command,
},
})
}
await ctx.metadata?.(bgContMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, bgContMeta)
}
return `Background task continued.
@@ -165,7 +170,7 @@ export async function executeSyncContinuation(
})
}
ctx.metadata?.({
const syncContMeta = {
title: `Continue: ${args.description}`,
metadata: {
prompt: args.prompt,
@@ -176,7 +181,11 @@ export async function executeSyncContinuation(
sync: true,
command: args.command,
},
})
}
await ctx.metadata?.(syncContMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, syncContMeta)
}
try {
let resumeAgent: string | undefined
@@ -210,7 +219,6 @@ export async function executeSyncContinuation(
tools: {
...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}),
task: false,
delegate_task: false,
call_omo_agent: true,
question: false,
},
@@ -316,17 +324,17 @@ export async function executeUnstableAgentTask(
category: args.category,
})
const WAIT_FOR_SESSION_INTERVAL_MS = 100
const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const timing = getTimingConfig()
const waitStart = Date.now()
while (!task.sessionID && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
let sessionID = task.sessionID
while (!sessionID && Date.now() - waitStart < timing.WAIT_FOR_SESSION_TIMEOUT_MS) {
if (ctx.abort?.aborted) {
return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
}
await new Promise(resolve => setTimeout(resolve, WAIT_FOR_SESSION_INTERVAL_MS))
await new Promise(resolve => setTimeout(resolve, timing.WAIT_FOR_SESSION_INTERVAL_MS))
const updated = manager.getTask(task.id)
sessionID = updated?.sessionID
}
const sessionID = task.sessionID
if (!sessionID) {
return formatDetailedError(new Error(`Task failed to start within timeout (30s). Task ID: ${task.id}, Status: ${task.status}`), {
operation: "Launch monitored background task",
@@ -336,7 +344,7 @@ export async function executeUnstableAgentTask(
})
}
ctx.metadata?.({
const bgTaskMeta = {
title: args.description,
metadata: {
prompt: args.prompt,
@@ -348,7 +356,11 @@ export async function executeUnstableAgentTask(
sessionId: sessionID,
command: args.command,
},
})
}
await ctx.metadata?.(bgTaskMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, bgTaskMeta)
}
const startTime = new Date()
const timingCfg = getTimingConfig()
@@ -463,7 +475,23 @@ export async function executeBackgroundTask(
category: args.category,
})
ctx.metadata?.({
// OpenCode TUI's `Task` tool UI calculates toolcalls by looking up
// `props.metadata.sessionId` and then counting tool parts in that session.
// BackgroundManager.launch() returns immediately (pending) before the session exists,
// so we must wait briefly for the session to be created to set metadata correctly.
const timing = getTimingConfig()
const waitStart = Date.now()
let sessionId = task.sessionID
while (!sessionId && Date.now() - waitStart < timing.WAIT_FOR_SESSION_TIMEOUT_MS) {
if (ctx.abort?.aborted) {
return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
}
await new Promise(resolve => setTimeout(resolve, timing.WAIT_FOR_SESSION_INTERVAL_MS))
const updated = manager.getTask(task.id)
sessionId = updated?.sessionID
}
const unstableMeta = {
title: args.description,
metadata: {
prompt: args.prompt,
@@ -472,10 +500,14 @@ export async function executeBackgroundTask(
load_skills: args.load_skills,
description: args.description,
run_in_background: args.run_in_background,
sessionId: task.sessionID,
sessionId: sessionId ?? "pending",
command: args.command,
},
})
}
await ctx.metadata?.(unstableMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, unstableMeta)
}
return `Background task launched.
@@ -487,7 +519,7 @@ Status: ${task.status}
System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check.
<task_metadata>
session_id: ${task.sessionID}
session_id: ${sessionId}
</task_metadata>`
} catch (error) {
return formatDetailedError(error, {
@@ -542,13 +574,13 @@ export async function executeSyncTask(
subagentSessions.add(sessionID)
if (onSyncSessionCreated) {
log("[delegate_task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID })
log("[task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID })
await onSyncSessionCreated({
sessionID,
parentID: parentContext.sessionID,
title: args.description,
}).catch((err) => {
log("[delegate_task] onSyncSessionCreated callback failed", { error: String(err) })
log("[task] onSyncSessionCreated callback failed", { error: String(err) })
})
await new Promise(r => setTimeout(r, 200))
}
@@ -568,7 +600,7 @@ export async function executeSyncTask(
})
}
ctx.metadata?.({
const syncTaskMeta = {
title: args.description,
metadata: {
prompt: args.prompt,
@@ -581,18 +613,21 @@ export async function executeSyncTask(
sync: true,
command: args.command,
},
})
}
await ctx.metadata?.(syncTaskMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, syncTaskMeta)
}
try {
const allowDelegateTask = isPlanAgent(agentToUse)
const allowTask = isPlanAgent(agentToUse)
await promptWithModelSuggestionRetry(client, {
path: { id: sessionID },
body: {
agent: agentToUse,
system: systemContent,
tools: {
task: false,
delegate_task: allowDelegateTask,
task: allowTask,
call_omo_agent: true,
question: false,
},
@@ -630,11 +665,11 @@ export async function executeSyncTask(
let stablePolls = 0
let pollCount = 0
log("[delegate_task] Starting poll loop", { sessionID, agentToUse })
log("[task] Starting poll loop", { sessionID, agentToUse })
while (Date.now() - pollStart < syncTiming.MAX_POLL_TIME_MS) {
if (ctx.abort?.aborted) {
log("[delegate_task] Aborted by user", { sessionID })
log("[task] Aborted by user", { sessionID })
if (toastManager && taskId) toastManager.removeTask(taskId)
return `Task aborted.\n\nSession ID: ${sessionID}`
}
@@ -647,7 +682,7 @@ export async function executeSyncTask(
const sessionStatus = allStatuses[sessionID]
if (pollCount % 10 === 0) {
log("[delegate_task] Poll status", {
log("[task] Poll status", {
sessionID,
pollCount,
elapsed: Math.floor((Date.now() - pollStart) / 1000) + "s",
@@ -675,7 +710,7 @@ export async function executeSyncTask(
if (currentMsgCount === lastMsgCount) {
stablePolls++
if (stablePolls >= syncTiming.STABILITY_POLLS_REQUIRED) {
log("[delegate_task] Poll complete - messages stable", { sessionID, pollCount, currentMsgCount })
log("[task] Poll complete - messages stable", { sessionID, pollCount, currentMsgCount })
break
}
} else {
@@ -685,7 +720,7 @@ export async function executeSyncTask(
}
if (Date.now() - pollStart >= syncTiming.MAX_POLL_TIME_MS) {
log("[delegate_task] Poll timeout reached", { sessionID, pollCount, lastMsgCount, stablePolls })
log("[task] Poll timeout reached", { sessionID, pollCount, lastMsgCount, stablePolls })
}
const messagesResult = await client.session.messages({
@@ -928,7 +963,7 @@ Sisyphus-Junior is spawned automatically when you specify a category. Pick the a
return {
agentToUse: "",
categoryModel: undefined,
error: `You are prometheus. You cannot delegate to prometheus via delegate_task.
error: `You are prometheus. You cannot delegate to prometheus via task.
Create the work plan directly - that's your job as the planning agent.`,
}
@@ -955,7 +990,7 @@ Create the work plan directly - that's your job as the planning agent.`,
return {
agentToUse: "",
categoryModel: undefined,
error: `Cannot call primary agent "${isPrimaryAgent.name}" via delegate_task. Primary agents are top-level orchestrators.`,
error: `Cannot call primary agent "${isPrimaryAgent.name}" via task. Primary agents are top-level orchestrators.`,
}
}

View File

@@ -18,6 +18,7 @@ export function parseModelString(model: string): { providerID: string; modelID:
* Get the message directory for a session, checking both direct and nested paths.
*/
export function getMessageDir(sessionID: string): string | null {
if (!sessionID.startsWith("ses_")) return null
if (!existsSync(MESSAGE_STORAGE)) return null
const directPath = join(MESSAGE_STORAGE, sessionID)

View File

@@ -0,0 +1,65 @@
const { describe, test, expect } = require("bun:test")
import { executeBackgroundTask } from "./executor"
import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types"
describe("task tool metadata awaiting", () => {
test("executeBackgroundTask awaits ctx.metadata before returning", async () => {
// given
let metadataResolved = false
const abort = new AbortController()
const ctx: ToolContextWithMetadata = {
sessionID: "ses_parent",
messageID: "msg_parent",
agent: "sisyphus",
abort: abort.signal,
metadata: async () => {
await new Promise<void>((resolve) => setTimeout(resolve, 50))
metadataResolved = true
},
}
const args: DelegateTaskArgs = {
load_skills: [],
description: "Test task",
prompt: "Do something",
run_in_background: true,
subagent_type: "explore",
}
const executorCtx = {
manager: {
launch: async () => ({
id: "task_1",
description: "Test task",
prompt: "Do something",
agent: "explore",
status: "pending",
sessionID: "ses_child",
}),
getTask: () => undefined,
},
} as any
const parentContext = {
sessionID: "ses_parent",
messageID: "msg_parent",
}
// when
const result = await executeBackgroundTask(
args,
ctx,
executorCtx,
parentContext,
"explore",
undefined,
undefined,
)
// then
expect(result).toContain("Background task launched")
expect(metadataResolved).toBe(true)
})
})

View File

@@ -1,4 +1,5 @@
import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test"
declare const require: (name: string) => any
const { describe, test, expect, beforeEach, afterEach, spyOn } = require("bun:test")
import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants"
import { resolveCategoryConfig } from "./tools"
import type { CategoryConfig } from "../../config/schema"
@@ -207,6 +208,66 @@ describe("sisyphus-task", () => {
})
describe("category delegation config validation", () => {
test("fills subagent_type as sisyphus-junior when category is provided without subagent_type", async () => {
// given
const { createDelegateTask } = require("./tools")
const mockManager = {
launch: async () => ({
id: "task-123",
status: "pending",
description: "Test task",
agent: "sisyphus-junior",
sessionID: "test-session",
}),
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({}) },
provider: { list: async () => ({ data: { connected: ["openai"] } }) },
model: { list: async () => ({ data: [{ provider: "openai", id: "gpt-5.3-codex" }] }) },
session: {
create: async () => ({ data: { id: "test-session" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({ data: [] }),
status: async () => ({ data: {} }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "sisyphus",
abort: new AbortController().signal,
}
const args: {
description: string
prompt: string
category: string
run_in_background: boolean
load_skills: string[]
subagent_type?: string
} = {
description: "Quick category test",
prompt: "Do something",
category: "quick",
run_in_background: true,
load_skills: [],
}
// when
await tool.execute(args, toolContext)
// then
expect(args.subagent_type).toBe("sisyphus-junior")
}, { timeout: 10000 })
test("proceeds without error when systemDefaultModel is undefined", async () => {
// given a mock client with no model in config
const { createDelegateTask } = require("./tools")
@@ -304,6 +365,71 @@ describe("sisyphus-task", () => {
})
})
describe("background metadata sessionId", () => {
test("should wait for background sessionId and set metadata for TUI toolcall counting", async () => {
//#given - manager.launch returns before sessionID is available
const { createDelegateTask } = require("./tools")
const tasks = new Map<string, { id: string; sessionID?: string; status: string; description: string; agent: string }>()
const mockManager = {
getTask: (id: string) => tasks.get(id),
launch: async () => {
const task = { id: "bg_1", status: "pending", description: "Test task", agent: "explore" }
tasks.set(task.id, task)
setTimeout(() => {
tasks.set(task.id, { ...task, status: "running", sessionID: "ses_child" })
}, 20)
return task
},
}
const mockClient = {
app: { agents: async () => ({ data: [{ name: "explore", mode: "subagent" }] }) },
config: { get: async () => ({}) },
provider: { list: async () => ({ data: { connected: ["openai"] } }) },
model: { list: async () => ({ data: [{ provider: "openai", id: "gpt-5.3-codex" }] }) },
session: {
create: async () => ({ data: { id: "test-session" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({ data: [] }),
status: async () => ({ data: {} }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const metadataCalls: Array<{ title?: string; metadata?: Record<string, unknown> }> = []
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "sisyphus",
abort: new AbortController().signal,
metadata: (input: { title?: string; metadata?: Record<string, unknown> }) => {
metadataCalls.push(input)
},
}
const args = {
description: "Explore task",
prompt: "Explore features directory deeply",
subagent_type: "explore",
run_in_background: true,
load_skills: [],
}
//#when
const result = await tool.execute(args, toolContext)
//#then - metadata should include sessionId (camelCase) once it's available
expect(String(result)).toContain("Background task launched")
const sessionIdCall = metadataCalls.find((c) => c.metadata?.sessionId === "ses_child")
expect(sessionIdCall).toBeDefined()
})
})
describe("resolveCategoryConfig", () => {
test("returns null for unknown category without user config", () => {
// given
@@ -1894,7 +2020,7 @@ describe("sisyphus-task", () => {
describe("browserProvider propagation", () => {
test("should resolve agent-browser skill when browserProvider is passed", async () => {
// given - delegate_task configured with browserProvider: "agent-browser"
// given - task configured with browserProvider: "agent-browser"
const { createDelegateTask } = require("./tools")
let promptBody: any
@@ -1949,7 +2075,7 @@ describe("sisyphus-task", () => {
}, { timeout: 20000 })
test("should NOT resolve agent-browser skill when browserProvider is not set", async () => {
// given - delegate_task without browserProvider (defaults to playwright)
// given - task without browserProvider (defaults to playwright)
const { createDelegateTask } = require("./tools")
const mockManager = { launch: async () => ({}) }
@@ -2720,8 +2846,8 @@ describe("sisyphus-task", () => {
}, { timeout: 20000 })
})
describe("prometheus subagent delegate_task permission", () => {
test("prometheus subagent should have delegate_task permission enabled", async () => {
describe("prometheus subagent task permission", () => {
test("prometheus subagent should have task permission enabled", async () => {
// given - sisyphus delegates to prometheus
const { createDelegateTask } = require("./tools")
let promptBody: any
@@ -2759,7 +2885,7 @@ describe("sisyphus-task", () => {
// when - sisyphus delegates to prometheus
await tool.execute(
{
description: "Test prometheus delegate_task permission",
description: "Test prometheus task permission",
prompt: "Create a plan",
subagent_type: "prometheus",
run_in_background: false,
@@ -2768,11 +2894,11 @@ describe("sisyphus-task", () => {
toolContext
)
// then - prometheus should have delegate_task permission
expect(promptBody.tools.delegate_task).toBe(true)
// then - prometheus should have task permission
expect(promptBody.tools.task).toBe(true)
}, { timeout: 20000 })
test("non-prometheus subagent should NOT have delegate_task permission", async () => {
test("non-prometheus subagent should NOT have task permission", async () => {
// given - sisyphus delegates to oracle (non-prometheus)
const { createDelegateTask } = require("./tools")
let promptBody: any
@@ -2810,7 +2936,7 @@ describe("sisyphus-task", () => {
// when - sisyphus delegates to oracle
await tool.execute(
{
description: "Test oracle no delegate_task permission",
description: "Test oracle no task permission",
prompt: "Consult on architecture",
subagent_type: "oracle",
run_in_background: false,
@@ -2819,8 +2945,8 @@ describe("sisyphus-task", () => {
toolContext
)
// then - oracle should NOT have delegate_task permission
expect(promptBody.tools.delegate_task).toBe(false)
// then - oracle should NOT have task permission
expect(promptBody.tools.task).toBe(false)
}, { timeout: 20000 })
})

View File

@@ -86,6 +86,13 @@ Prompts MUST be in English.`
async execute(args: DelegateTaskArgs, toolContext) {
const ctx = toolContext as ToolContextWithMetadata
if (args.category && !args.subagent_type) {
args.subagent_type = "sisyphus-junior"
}
await ctx.metadata?.({
title: args.description,
})
if (args.run_in_background === undefined) {
throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
}
@@ -116,7 +123,7 @@ Prompts MUST be in English.`
return executeSyncContinuation(args, ctx, options)
}
if (args.category && args.subagent_type) {
if (args.category && args.subagent_type && args.subagent_type !== "sisyphus-junior") {
return `Invalid arguments: Provide EITHER category OR subagent_type, not both.`
}
@@ -157,7 +164,7 @@ Prompts MUST be in English.`
const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean
log("[delegate_task] unstable agent detection", {
log("[task] unstable agent detection", {
category: args.category,
actualModel,
isUnstableAgent,

View File

@@ -28,7 +28,12 @@ export interface ToolContextWithMetadata {
messageID: string
agent: string
abort: AbortSignal
metadata?: (input: { title?: string; metadata?: Record<string, unknown> }) => void
metadata?: (input: { title?: string; metadata?: Record<string, unknown> }) => void | Promise<void>
/**
* Tool call ID injected by OpenCode's internal context (not in plugin ToolContext type,
* but present at runtime via spread in fromPlugin()). Used for metadata store keying.
*/
callID?: string
}
export interface SyncSessionCreatedEvent {

View File

@@ -70,7 +70,7 @@ Returns summary format: id, subject, status, owner, blockedBy (not full descript
return JSON.stringify({
tasks: summaries,
reminder: "1 task = 1 delegate_task. Maximize parallel execution by running independent tasks (tasks with empty blockedBy) concurrently."
reminder: "1 task = 1 task. Maximize parallel execution by running independent tasks (tasks with empty blockedBy) concurrently."
})
},
})