Merge pull request #1543 from code-yeongyu/feat/task-tool-refactor

refactor: migrate delegate_task to task tool with metadata fixes
This commit is contained in:
YeonGyu-Kim
2026-02-06 21:37:46 +09:00
committed by GitHub
78 changed files with 1182 additions and 403 deletions

View File

@@ -41,27 +41,27 @@ Fire ALL simultaneously:
``` ```
// Agent 1: Find all exported symbols // Agent 1: Find all exported symbols
delegate_task(subagent_type="explore", run_in_background=true, task(subagent_type="explore", run_in_background=true,
prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/. prompt="Find ALL exported functions, classes, types, interfaces, and constants across src/.
List each with: file path, line number, symbol name, export type (named/default). List each with: file path, line number, symbol name, export type (named/default).
EXCLUDE: src/index.ts root exports, test files. EXCLUDE: src/index.ts root exports, test files.
Return as structured list.") Return as structured list.")
// Agent 2: Find potentially unused files // Agent 2: Find potentially unused files
delegate_task(subagent_type="explore", run_in_background=true, task(subagent_type="explore", run_in_background=true,
prompt="Find files in src/ that are NOT imported by any other file. prompt="Find files in src/ that are NOT imported by any other file.
Check import/require statements across the entire codebase. Check import/require statements across the entire codebase.
EXCLUDE: index.ts files, test files, entry points, config files, .md files. EXCLUDE: index.ts files, test files, entry points, config files, .md files.
Return list of potentially orphaned files.") Return list of potentially orphaned files.")
// Agent 3: Find unused imports within files // Agent 3: Find unused imports within files
delegate_task(subagent_type="explore", run_in_background=true, task(subagent_type="explore", run_in_background=true,
prompt="Find unused imports across src/**/*.ts files. prompt="Find unused imports across src/**/*.ts files.
Look for import statements where the imported symbol is never referenced in the file body. Look for import statements where the imported symbol is never referenced in the file body.
Return: file path, line number, imported symbol name.") Return: file path, line number, imported symbol name.")
// Agent 4: Find functions/variables only used in their own declaration // Agent 4: Find functions/variables only used in their own declaration
delegate_task(subagent_type="explore", run_in_background=true, task(subagent_type="explore", run_in_background=true,
prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear prompt="Find private/non-exported functions, variables, and types in src/**/*.ts that appear
to have zero usage beyond their declaration. Return: file path, line number, symbol name.") to have zero usage beyond their declaration. Return: file path, line number, symbol name.")
``` ```

View File

@@ -21,7 +21,7 @@ You are a GitHub issue triage automation agent. Your job is to:
| Aspect | Rule | | Aspect | Rule |
|--------|------| |--------|------|
| **Task Granularity** | 1 Issue = Exactly 1 `delegate_task()` call | | **Task Granularity** | 1 Issue = Exactly 1 `task()` call |
| **Execution Mode** | `run_in_background=true` (Each issue runs independently) | | **Execution Mode** | `run_in_background=true` (Each issue runs independently) |
| **Result Handling** | `background_output()` to collect results as they complete | | **Result Handling** | `background_output()` to collect results as they complete |
| **Reporting** | IMMEDIATE streaming when each task finishes | | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -67,7 +67,7 @@ for (let i = 0; i < allIssues.length; i++) {
const issue = allIssues[i] const issue = allIssues[i]
const category = getCategory(i) const category = getCategory(i)
const taskId = await delegate_task( const taskId = await task(
category=category, category=category,
load_skills=[], load_skills=[],
run_in_background=true, // ← CRITICAL: Each issue is independent background task run_in_background=true, // ← CRITICAL: Each issue is independent background task
@@ -195,7 +195,7 @@ for (let i = 0; i < allIssues.length; i++) {
console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`) console.log(`🚀 Launching background task for Issue #${issue.number} (${category})...`)
const taskId = await delegate_task( const taskId = await task(
category=category, category=category,
load_skills=[], load_skills=[],
run_in_background=true, // ← BACKGROUND TASK: Each issue runs independently run_in_background=true, // ← BACKGROUND TASK: Each issue runs independently
@@ -480,7 +480,7 @@ When invoked, immediately:
4. Exhaustive pagination for issues 4. Exhaustive pagination for issues
5. Exhaustive pagination for PRs 5. Exhaustive pagination for PRs
6. **LAUNCH**: For each issue: 6. **LAUNCH**: For each issue:
- `delegate_task(run_in_background=true)` - 1 task per issue - `task(run_in_background=true)` - 1 task per issue
- Store taskId mapped to issue number - Store taskId mapped to issue number
7. **STREAM**: Poll `background_output()` for each task: 7. **STREAM**: Poll `background_output()` for each task:
- As each completes, immediately report result - As each completes, immediately report result

View File

@@ -22,7 +22,7 @@ You are a GitHub Pull Request triage automation agent. Your job is to:
| Aspect | Rule | | Aspect | Rule |
|--------|------| |--------|------|
| **Task Granularity** | 1 PR = Exactly 1 `delegate_task()` call | | **Task Granularity** | 1 PR = Exactly 1 `task()` call |
| **Execution Mode** | `run_in_background=true` (Each PR runs independently) | | **Execution Mode** | `run_in_background=true` (Each PR runs independently) |
| **Result Handling** | `background_output()` to collect results as they complete | | **Result Handling** | `background_output()` to collect results as they complete |
| **Reporting** | IMMEDIATE streaming when each task finishes | | **Reporting** | IMMEDIATE streaming when each task finishes |
@@ -68,7 +68,7 @@ for (let i = 0; i < allPRs.length; i++) {
const pr = allPRs[i] const pr = allPRs[i]
const category = getCategory(i) const category = getCategory(i)
const taskId = await delegate_task( const taskId = await task(
category=category, category=category,
load_skills=[], load_skills=[],
run_in_background=true, // ← CRITICAL: Each PR is independent background task run_in_background=true, // ← CRITICAL: Each PR is independent background task
@@ -178,7 +178,7 @@ for (let i = 0; i < allPRs.length; i++) {
console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`) console.log(`🚀 Launching background task for PR #${pr.number} (${category})...`)
const taskId = await delegate_task( const taskId = await task(
category=category, category=category,
load_skills=[], load_skills=[],
run_in_background=true, // ← BACKGROUND TASK: Each PR runs independently run_in_background=true, // ← BACKGROUND TASK: Each PR runs independently
@@ -474,7 +474,7 @@ When invoked, immediately:
2. `gh repo view --json nameWithOwner -q .nameWithOwner` 2. `gh repo view --json nameWithOwner -q .nameWithOwner`
3. Exhaustive pagination for ALL open PRs 3. Exhaustive pagination for ALL open PRs
4. **LAUNCH**: For each PR: 4. **LAUNCH**: For each PR:
- `delegate_task(run_in_background=true)` - 1 task per PR - `task(run_in_background=true)` - 1 task per PR
- Store taskId mapped to PR number - Store taskId mapped to PR number
5. **STREAM**: Poll `background_output()` for each task: 5. **STREAM**: Poll `background_output()` for each task:
- As each completes, immediately report result - As each completes, immediately report result

View File

@@ -195,7 +195,7 @@ oh-my-opencode/
| Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` | | Type Safety | `as any`, `@ts-ignore`, `@ts-expect-error` |
| Error Handling | Empty catch blocks | | Error Handling | Empty catch blocks |
| Testing | Deleting failing tests, writing implementation before test | | Testing | Deleting failing tests, writing implementation before test |
| Agent Calls | Sequential - use `delegate_task` parallel | | Agent Calls | Sequential - use `task` parallel |
| Hook Logic | Heavy PreToolUse - slows every call | | Hook Logic | Heavy PreToolUse - slows every call |
| Commits | Giant (3+ files), separate test from impl | | Commits | Giant (3+ files), separate test from impl |
| Temperature | >0.3 for code agents | | Temperature | >0.3 for code agents |

View File

@@ -9,7 +9,7 @@ Instead of delegating everything to a single AI agent, it's far more efficient t
- **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset) - **Category**: "What kind of work is this?" (determines model, temperature, prompt mindset)
- **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows) - **Skill**: "What tools and knowledge are needed?" (injects specialized knowledge, MCP tools, workflows)
By combining these two concepts, you can generate optimal agents through `delegate_task`. By combining these two concepts, you can generate optimal agents through `task`.
--- ---
@@ -32,10 +32,10 @@ A Category is an agent configuration preset optimized for specific domains.
### Usage ### Usage
Specify the `category` parameter when invoking the `delegate_task` tool. Specify the `category` parameter when invoking the `task` tool.
```typescript ```typescript
delegate_task( task(
category="visual-engineering", category="visual-engineering",
prompt="Add a responsive chart component to the dashboard page" prompt="Add a responsive chart component to the dashboard page"
) )
@@ -74,7 +74,7 @@ A Skill is a mechanism that injects **specialized knowledge (Context)** and **to
Add desired skill names to the `load_skills` array. Add desired skill names to the `load_skills` array.
```typescript ```typescript
delegate_task( task(
category="quick", category="quick",
load_skills=["git-master"], load_skills=["git-master"],
prompt="Commit current changes. Follow commit message style." prompt="Commit current changes. Follow commit message style."
@@ -126,7 +126,7 @@ You can create powerful specialized agents by combining Categories and Skills.
--- ---
## 5. delegate_task Prompt Guide ## 5. task Prompt Guide
When delegating, **clear and specific** prompts are essential. Include these 7 elements: When delegating, **clear and specific** prompts are essential. Include these 7 elements:
@@ -158,7 +158,7 @@ You can fine-tune categories in `oh-my-opencode.json`.
| Field | Type | Description | | Field | Type | Description |
|-------|------|-------------| |-------|------|-------------|
| `description` | string | Human-readable description of the category's purpose. Shown in delegate_task prompt. | | `description` | string | Human-readable description of the category's purpose. Shown in task prompt. |
| `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) | | `model` | string | AI model ID to use (e.g., `anthropic/claude-opus-4-6`) |
| `variant` | string | Model variant (e.g., `max`, `xhigh`) | | `variant` | string | Model variant (e.g., `max`, `xhigh`) |
| `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. | | `temperature` | number | Creativity level (0.0 ~ 2.0). Lower is more deterministic. |

View File

@@ -25,7 +25,7 @@ It asks about your providers (Claude, OpenAI, Gemini, etc.) and generates optima
"explore": { "model": "opencode/gpt-5-nano" } // Free model for grep "explore": { "model": "opencode/gpt-5-nano" } // Free model for grep
}, },
// Override category models (used by delegate_task) // Override category models (used by task)
"categories": { "categories": {
"quick": { "model": "opencode/gpt-5-nano" }, // Fast/cheap for trivial tasks "quick": { "model": "opencode/gpt-5-nano" }, // Fast/cheap for trivial tasks
"visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI "visual-engineering": { "model": "google/gemini-3-pro" } // Gemini for UI
@@ -252,7 +252,7 @@ Available agents: `sisyphus`, `prometheus`, `oracle`, `librarian`, `explore`, `m
Oh My OpenCode includes built-in skills that provide additional capabilities: Oh My OpenCode includes built-in skills that provide additional capabilities:
- **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers. - **playwright** (default) / **agent-browser**: Browser automation for web scraping, testing, screenshots, and browser interactions. See [Browser Automation](#browser-automation) for switching between providers.
- **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `delegate_task(category='quick', load_skills=['git-master'], ...)` to save context. - **git-master**: Git expert for atomic commits, rebase/squash, and history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with `task(category='quick', load_skills=['git-master'], ...)` to save context.
Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`: Disable built-in skills via `disabled_skills` in `~/.config/opencode/oh-my-opencode.json` or `.opencode/oh-my-opencode.json`:
@@ -455,7 +455,7 @@ Run background subagents in separate tmux panes for **visual multi-agent executi
### How It Works ### How It Works
When `tmux.enabled` is `true` and you're inside a tmux session: When `tmux.enabled` is `true` and you're inside a tmux session:
- Background agents (via `delegate_task(run_in_background=true)`) spawn in new tmux panes - Background agents (via `task(run_in_background=true)`) spawn in new tmux panes
- Each pane shows the subagent's real-time output - Each pane shows the subagent's real-time output
- Panes are automatically closed when the subagent completes - Panes are automatically closed when the subagent completes
- Layout is automatically adjusted based on your configuration - Layout is automatically adjusted based on your configuration
@@ -716,7 +716,7 @@ Configure concurrency limits for background agent tasks. This controls how many
## Categories ## Categories
Categories enable domain-specific task delegation via the `delegate_task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent. Categories enable domain-specific task delegation via the `task` tool. Each category applies runtime presets (model, temperature, prompt additions) when calling the `Sisyphus-Junior` agent.
### Built-in Categories ### Built-in Categories
@@ -797,12 +797,12 @@ All 7 categories come with optimal model defaults, but **you must configure them
### Usage ### Usage
```javascript ```javascript
// Via delegate_task tool // Via task tool
delegate_task(category="visual-engineering", prompt="Create a responsive dashboard component") task(category="visual-engineering", prompt="Create a responsive dashboard component")
delegate_task(category="ultrabrain", prompt="Design the payment processing flow") task(category="ultrabrain", prompt="Design the payment processing flow")
// Or target a specific agent directly (bypasses categories) // Or target a specific agent directly (bypasses categories)
delegate_task(agent="oracle", prompt="Review this architecture") task(agent="oracle", prompt="Review this architecture")
``` ```
### Custom Categories ### Custom Categories
@@ -831,7 +831,7 @@ Each category supports: `model`, `temperature`, `top_p`, `maxTokens`, `thinking`
| Option | Type | Default | Description | | Option | Type | Default | Description |
| ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- | | ------------------ | ------- | ------- | --------------------------------------------------------------------------------------------------- |
| `description` | string | - | Human-readable description of the category's purpose. Shown in delegate_task prompt. | | `description` | string | - | Human-readable description of the category's purpose. Shown in task prompt. |
| `is_unstable_agent`| boolean | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. | | `is_unstable_agent`| boolean | `false` | Mark agent as unstable - forces background mode for monitoring. Auto-enabled for gemini models. |
## Model Resolution System ## Model Resolution System

View File

@@ -54,7 +54,7 @@ Run agents in the background and continue working:
``` ```
# Launch in background # Launch in background
delegate_task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true) task(subagent_type="explore", load_skills=[], prompt="Find auth implementations", run_in_background=true)
# Continue working... # Continue working...
# System notifies on completion # System notifies on completion
@@ -374,7 +374,7 @@ Hooks intercept and modify behavior at key points in the agent lifecycle.
| Hook | Event | Description | | Hook | Event | Description |
|------|-------|-------------| |------|-------|-------------|
| **task-resume-info** | PostToolUse | Provides task resume information for continuity. | | **task-resume-info** | PostToolUse | Provides task resume information for continuity. |
| **delegate-task-retry** | PostToolUse | Retries failed delegate_task calls. | | **delegate-task-retry** | PostToolUse | Retries failed task calls. |
#### Integration #### Integration
@@ -454,7 +454,7 @@ Disable specific hooks in config:
| Tool | Description | | Tool | Description |
|------|-------------| |------|-------------|
| **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. | | **call_omo_agent** | Spawn explore/librarian agents. Supports `run_in_background`. |
| **delegate_task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. | | **task** | Category-based task delegation. Supports categories (visual, business-logic) or direct agent targeting. |
| **background_output** | Retrieve background task results | | **background_output** | Retrieve background task results |
| **background_cancel** | Cancel running background tasks | | **background_cancel** | Cancel running background tasks |

View File

@@ -50,11 +50,11 @@ flowchart TB
User -->|"/start-work"| Orchestrator User -->|"/start-work"| Orchestrator
Plan -->|"Read"| Orchestrator Plan -->|"Read"| Orchestrator
Orchestrator -->|"delegate_task(category)"| Junior Orchestrator -->|"task(category)"| Junior
Orchestrator -->|"delegate_task(agent)"| Oracle Orchestrator -->|"task(agent)"| Oracle
Orchestrator -->|"delegate_task(agent)"| Explore Orchestrator -->|"task(agent)"| Explore
Orchestrator -->|"delegate_task(agent)"| Librarian Orchestrator -->|"task(agent)"| Librarian
Orchestrator -->|"delegate_task(agent)"| Frontend Orchestrator -->|"task(agent)"| Frontend
Junior -->|"Results + Learnings"| Orchestrator Junior -->|"Results + Learnings"| Orchestrator
Oracle -->|"Advice"| Orchestrator Oracle -->|"Advice"| Orchestrator
@@ -220,9 +220,9 @@ Independent tasks run in parallel:
```typescript ```typescript
// Orchestrator identifies parallelizable groups from plan // Orchestrator identifies parallelizable groups from plan
// Group A: Tasks 2, 3, 4 (no file conflicts) // Group A: Tasks 2, 3, 4 (no file conflicts)
delegate_task(category="ultrabrain", prompt="Task 2...") task(category="ultrabrain", prompt="Task 2...")
delegate_task(category="visual-engineering", prompt="Task 3...") task(category="visual-engineering", prompt="Task 3...")
delegate_task(category="general", prompt="Task 4...") task(category="general", prompt="Task 4...")
// All run simultaneously // All run simultaneously
``` ```
@@ -234,7 +234,7 @@ delegate_task(category="general", prompt="Task 4...")
Junior is the **workhorse** that actually writes code. Key characteristics: Junior is the **workhorse** that actually writes code. Key characteristics:
- **Focused**: Cannot delegate (blocked from task/delegate_task tools) - **Focused**: Cannot delegate (blocked from task tool)
- **Disciplined**: Obsessive todo tracking - **Disciplined**: Obsessive todo tracking
- **Verified**: Must pass lsp_diagnostics before completion - **Verified**: Must pass lsp_diagnostics before completion
- **Constrained**: Cannot modify plan files (READ-ONLY) - **Constrained**: Cannot modify plan files (READ-ONLY)
@@ -268,7 +268,7 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.
--- ---
## The delegate_task Tool: Category + Skill System ## The task Tool: Category + Skill System
### Why Categories are Revolutionary ### Why Categories are Revolutionary
@@ -276,17 +276,17 @@ This "boulder pushing" mechanism is why the system is named after Sisyphus.
```typescript ```typescript
// OLD: Model name creates distributional bias // OLD: Model name creates distributional bias
delegate_task(agent="gpt-5.2", prompt="...") // Model knows its limitations task(agent="gpt-5.2", prompt="...") // Model knows its limitations
delegate_task(agent="claude-opus-4.6", prompt="...") // Different self-perception task(agent="claude-opus-4.6", prompt="...") // Different self-perception
``` ```
**The Solution: Semantic Categories:** **The Solution: Semantic Categories:**
```typescript ```typescript
// NEW: Category describes INTENT, not implementation // NEW: Category describes INTENT, not implementation
delegate_task(category="ultrabrain", prompt="...") // "Think strategically" task(category="ultrabrain", prompt="...") // "Think strategically"
delegate_task(category="visual-engineering", prompt="...") // "Design beautifully" task(category="visual-engineering", prompt="...") // "Design beautifully"
delegate_task(category="quick", prompt="...") // "Just get it done fast" task(category="quick", prompt="...") // "Just get it done fast"
``` ```
### Built-in Categories ### Built-in Categories
@@ -324,13 +324,13 @@ Skills prepend specialized instructions to subagent prompts:
```typescript ```typescript
// Category + Skill combination // Category + Skill combination
delegate_task( task(
category="visual-engineering", category="visual-engineering",
load_skills=["frontend-ui-ux"], // Adds UI/UX expertise load_skills=["frontend-ui-ux"], // Adds UI/UX expertise
prompt="..." prompt="..."
) )
delegate_task( task(
category="general", category="general",
load_skills=["playwright"], // Adds browser automation expertise load_skills=["playwright"], // Adds browser automation expertise
prompt="..." prompt="..."
@@ -365,7 +365,7 @@ sequenceDiagram
Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom Note over Orchestrator: Prompt Structure:<br/>1. TASK (exact checkbox)<br/>2. EXPECTED OUTCOME<br/>3. REQUIRED SKILLS<br/>4. REQUIRED TOOLS<br/>5. MUST DO<br/>6. MUST NOT DO<br/>7. CONTEXT + Wisdom
Orchestrator->>Junior: delegate_task(category, load_skills, prompt) Orchestrator->>Junior: task(category, load_skills, prompt)
Junior->>Junior: Create todos, execute Junior->>Junior: Create todos, execute
Junior->>Junior: Verify (lsp_diagnostics, tests) Junior->>Junior: Verify (lsp_diagnostics, tests)

View File

@@ -387,7 +387,7 @@ You can control related features in `oh-my-opencode.json`.
2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation. 2. **Single Plan Principle**: No matter how large the task, contain all TODOs in one plan file (`.md`). This prevents context fragmentation.
3. **Active Delegation**: During execution, delegate to specialized agents via `delegate_task` rather than modifying code directly. 3. **Active Delegation**: During execution, delegate to specialized agents via `task` rather than modifying code directly.
4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json. 4. **Trust /start-work Continuity**: Don't worry about session interruptions. `/start-work` will always resume your work from boulder.json.

View File

@@ -288,7 +288,7 @@ src/tools/delegate-task/constants.ts
``` ```
Sisyphus (ULW mode) Sisyphus (ULW mode)
delegate_task(category="deep", ...) task(category="deep", ...)
executor.ts: executeBackgroundContinuation() executor.ts: executeBackgroundContinuation()

View File

@@ -212,7 +212,7 @@ Search **external references** (docs, OSS, web). Fire proactively when unfamilia
- "Working with unfamiliar npm/pip/cargo packages" - "Working with unfamiliar npm/pip/cargo packages"
### Pre-Delegation Planning (MANDATORY) ### Pre-Delegation Planning (MANDATORY)
**BEFORE every `delegate_task` call, EXPLICITLY declare your reasoning.** **BEFORE every `task` call, EXPLICITLY declare your reasoning.**
#### Step 1: Identify Task Requirements #### Step 1: Identify Task Requirements
@@ -236,7 +236,7 @@ Ask yourself:
**MANDATORY FORMAT:** **MANDATORY FORMAT:**
``` ```
I will use delegate_task with: I will use task with:
- **Category**: [selected-category-name] - **Category**: [selected-category-name]
- **Why this category**: [how category description matches task domain] - **Why this category**: [how category description matches task domain]
- **load_skills**: [list of selected skills] - **load_skills**: [list of selected skills]
@@ -246,14 +246,14 @@ I will use delegate_task with:
- **Expected Outcome**: [what success looks like] - **Expected Outcome**: [what success looks like]
``` ```
**Then** make the delegate_task call. **Then** make the task call.
#### Examples #### Examples
**CORRECT: Full Evaluation** **CORRECT: Full Evaluation**
``` ```
I will use delegate_task with: I will use task with:
- **Category**: [category-name] - **Category**: [category-name]
- **Why this category**: Category description says "[quote description]" which matches this task's requirements - **Why this category**: Category description says "[quote description]" which matches this task's requirements
- **load_skills**: ["skill-a", "skill-b"] - **load_skills**: ["skill-a", "skill-b"]
@@ -263,9 +263,11 @@ I will use delegate_task with:
- skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason] - skill-c: OMITTED - description says "[quote]" which doesn't apply because [reason]
- **Expected Outcome**: [concrete deliverable] - **Expected Outcome**: [concrete deliverable]
delegate_task( task(
category="[category-name]", category="[category-name]",
load_skills=["skill-a", "skill-b"], load_skills=["skill-a", "skill-b"],
description="[short task description]",
run_in_background=false,
prompt="..." prompt="..."
) )
``` ```
@@ -273,14 +275,16 @@ delegate_task(
**CORRECT: Agent-Specific (for exploration/consultation)** **CORRECT: Agent-Specific (for exploration/consultation)**
``` ```
I will use delegate_task with: I will use task with:
- **Agent**: [agent-name] - **Agent**: [agent-name]
- **Reason**: This requires [agent's specialty] based on agent description - **Reason**: This requires [agent's specialty] based on agent description
- **load_skills**: [] (agents have built-in expertise) - **load_skills**: [] (agents have built-in expertise)
- **Expected Outcome**: [what agent should return] - **Expected Outcome**: [what agent should return]
delegate_task( task(
subagent_type="[agent-name]", subagent_type="[agent-name]",
description="[short task description]",
run_in_background=false,
load_skills=[], load_skills=[],
prompt="..." prompt="..."
) )
@@ -289,14 +293,15 @@ delegate_task(
**CORRECT: Background Exploration** **CORRECT: Background Exploration**
``` ```
I will use delegate_task with: I will use task with:
- **Agent**: explore - **Agent**: explore
- **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep - **Reason**: Need to find all authentication implementations across the codebase - this is contextual grep
- **load_skills**: [] - **load_skills**: []
- **Expected Outcome**: List of files containing auth patterns - **Expected Outcome**: List of files containing auth patterns
delegate_task( task(
subagent_type="explore", subagent_type="explore",
description="Find auth implementations",
run_in_background=true, run_in_background=true,
load_skills=[], load_skills=[],
prompt="Find all authentication implementations in the codebase" prompt="Find all authentication implementations in the codebase"
@@ -306,7 +311,7 @@ delegate_task(
**WRONG: No Skill Evaluation** **WRONG: No Skill Evaluation**
``` ```
delegate_task(category="...", load_skills=[], prompt="...") // Where's the justification? task(category="...", load_skills=[], prompt="...") // Where's the justification?
``` ```
**WRONG: Vague Category Selection** **WRONG: Vague Category Selection**
@@ -317,7 +322,7 @@ I'll use this category because it seems right.
#### Enforcement #### Enforcement
**BLOCKING VIOLATION**: If you call `delegate_task` without: **BLOCKING VIOLATION**: If you call `task` without:
1. Explaining WHY category was selected (based on description) 1. Explaining WHY category was selected (based on description)
2. Evaluating EACH available skill for relevance 2. Evaluating EACH available skill for relevance
@@ -329,15 +334,15 @@ I'll use this category because it seems right.
```typescript ```typescript
// CORRECT: Always background, always parallel // CORRECT: Always background, always parallel
// Contextual Grep (internal) // Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...") task(subagent_type="explore", description="Find auth implementations", run_in_background=true, load_skills=[], prompt="Find auth implementations in our codebase...")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...") task(subagent_type="explore", description="Find error handling patterns", run_in_background=true, load_skills=[], prompt="Find error handling patterns here...")
// Reference Grep (external) // Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...") task(subagent_type="librarian", description="Find JWT best practices", run_in_background=true, load_skills=[], prompt="Find JWT best practices in official docs...")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...") task(subagent_type="librarian", description="Find Express auth patterns", run_in_background=true, load_skills=[], prompt="Find how production apps handle auth in Express...")
// Continue working immediately. Collect with background_output when needed. // Continue working immediately. Collect with background_output when needed.
// WRONG: Sequential or blocking // WRONG: Sequential or blocking
result = delegate_task(...) // Never wait synchronously for explore/librarian result = task(...) // Never wait synchronously for explore/librarian
``` ```
### Background Result Collection: ### Background Result Collection:
@@ -347,16 +352,16 @@ result = delegate_task(...) // Never wait synchronously for explore/librarian
4. BEFORE final answer: `background_cancel(all=true)` 4. BEFORE final answer: `background_cancel(all=true)`
### Resume Previous Agent (CRITICAL for efficiency): ### Resume Previous Agent (CRITICAL for efficiency):
Pass `resume=session_id` to continue previous agent with FULL CONTEXT PRESERVED. Pass `session_id` to continue previous agent with FULL CONTEXT PRESERVED.
**ALWAYS use resume when:** **ALWAYS use session_id when:**
- Previous task failed → `resume=session_id, prompt="fix: [specific error]"` - Previous task failed → `session_id="ses_xxx", prompt="fix: [specific error]"`
- Need follow-up on result → `resume=session_id, prompt="also check [additional query]"` - Need follow-up on result → `session_id="ses_xxx", prompt="also check [additional query]"`
- Multi-turn with same agent → resume instead of new task (saves tokens!) - Multi-turn with same agent → session_id instead of new task (saves tokens!)
**Example:** **Example:**
``` ```
delegate_task(resume="ses_abc123", prompt="The previous search missed X. Also look for Y.") task(session_id="ses_abc123", description="Follow-up search", run_in_background=false, load_skills=[], prompt="The previous search missed X. Also look for Y.")
``` ```
### Search Stop Conditions ### Search Stop Conditions
@@ -377,7 +382,7 @@ STOP searching when:
3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS 3. Mark `completed` as soon as done (don't batch) - OBSESSIVELY TRACK YOUR WORK USING TODO TOOLS
### Category + Skills Delegation System ### Category + Skills Delegation System
**delegate_task() combines categories and skills for optimal task execution.** **task() combines categories and skills for optimal task execution.**
#### Available Categories (Domain-Optimized Models) #### Available Categories (Domain-Optimized Models)
@@ -442,7 +447,7 @@ SKILL EVALUATION for "[skill-name]":
### Delegation Pattern ### Delegation Pattern
```typescript ```typescript
delegate_task( task(
category="[selected-category]", category="[selected-category]",
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills load_skills=["skill-1", "skill-2"], // Include ALL relevant skills
prompt="..." prompt="..."
@@ -451,7 +456,7 @@ delegate_task(
**ANTI-PATTERN (will produce poor results):** **ANTI-PATTERN (will produce poor results):**
```typescript ```typescript
delegate_task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification task(category="...", load_skills=[], prompt="...") // Empty load_skills without justification
``` ```
### Delegation Table: ### Delegation Table:

View File

@@ -68,11 +68,11 @@ agents/
## TOOL RESTRICTIONS ## TOOL RESTRICTIONS
| Agent | Denied Tools | | Agent | Denied Tools |
|-------|-------------| |-------|-------------|
| oracle | write, edit, task, delegate_task | | oracle | write, edit, task, task |
| librarian | write, edit, task, delegate_task, call_omo_agent | | librarian | write, edit, task, task, call_omo_agent |
| explore | write, edit, task, delegate_task, call_omo_agent | | explore | write, edit, task, task, call_omo_agent |
| multimodal-looker | Allowlist: read only | | multimodal-looker | Allowlist: read only |
| Sisyphus-Junior | task, delegate_task | | Sisyphus-Junior | task, task |
| Atlas | task, call_omo_agent | | Atlas | task, call_omo_agent |
## PATTERNS ## PATTERNS
@@ -85,5 +85,5 @@ agents/
## ANTI-PATTERNS ## ANTI-PATTERNS
- **Trust reports**: NEVER trust "I'm done" - verify outputs - **Trust reports**: NEVER trust "I'm done" - verify outputs
- **High temp**: Don't use >0.3 for code agents - **High temp**: Don't use >0.3 for code agents
- **Sequential calls**: Use `delegate_task` with `run_in_background` for exploration - **Sequential calls**: Use `task` with `run_in_background` for exploration
- **Prometheus writing code**: Planner only - never implements - **Prometheus writing code**: Planner only - never implements

View File

@@ -19,18 +19,18 @@ You never write code yourself. You orchestrate specialists who do.
</identity> </identity>
<mission> <mission>
Complete ALL tasks in a work plan via \`delegate_task()\` until fully done. Complete ALL tasks in a work plan via \`task()\` until fully done.
One task per delegation. Parallel when independent. Verify everything. One task per delegation. Parallel when independent. Verify everything.
</mission> </mission>
<delegation_system> <delegation_system>
## How to Delegate ## How to Delegate
Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive): Use \`task()\` with EITHER category OR agent (mutually exclusive):
\`\`\`typescript \`\`\`typescript
// Option A: Category + Skills (spawns Sisyphus-Junior with domain config) // Option A: Category + Skills (spawns Sisyphus-Junior with domain config)
delegate_task( task(
category="[category-name]", category="[category-name]",
load_skills=["skill-1", "skill-2"], load_skills=["skill-1", "skill-2"],
run_in_background=false, run_in_background=false,
@@ -38,7 +38,7 @@ delegate_task(
) )
// Option B: Specialized Agent (for specific expert tasks) // Option B: Specialized Agent (for specific expert tasks)
delegate_task( task(
subagent_type="[agent-name]", subagent_type="[agent-name]",
load_skills=[], load_skills=[],
run_in_background=false, run_in_background=false,
@@ -58,7 +58,7 @@ delegate_task(
## 6-Section Prompt Structure (MANDATORY) ## 6-Section Prompt Structure (MANDATORY)
Every \`delegate_task()\` prompt MUST include ALL 6 sections: Every \`task()\` prompt MUST include ALL 6 sections:
\`\`\`markdown \`\`\`markdown
## 1. TASK ## 1. TASK
@@ -149,7 +149,7 @@ Structure:
### 3.1 Check Parallelization ### 3.1 Check Parallelization
If tasks can run in parallel: If tasks can run in parallel:
- Prepare prompts for ALL parallelizable tasks - Prepare prompts for ALL parallelizable tasks
- Invoke multiple \`delegate_task()\` in ONE message - Invoke multiple \`task()\` in ONE message
- Wait for all to complete - Wait for all to complete
- Verify all, then continue - Verify all, then continue
@@ -167,10 +167,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
Extract wisdom and include in prompt. Extract wisdom and include in prompt.
### 3.3 Invoke delegate_task() ### 3.3 Invoke task()
\`\`\`typescript \`\`\`typescript
delegate_task( task(
category="[category]", category="[category]",
load_skills=["[relevant-skills]"], load_skills=["[relevant-skills]"],
run_in_background=false, run_in_background=false,
@@ -210,7 +210,7 @@ delegate_task(
**If verification fails**: Resume the SAME session with the ACTUAL error output: **If verification fails**: Resume the SAME session with the ACTUAL error output:
\`\`\`typescript \`\`\`typescript
delegate_task( task(
session_id="ses_xyz789", // ALWAYS use the session from the failed task session_id="ses_xyz789", // ALWAYS use the session from the failed task
load_skills=[...], load_skills=[...],
prompt="Verification failed: {actual error}. Fix." prompt="Verification failed: {actual error}. Fix."
@@ -221,13 +221,13 @@ delegate_task(
**CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.** **CRITICAL: When re-delegating, ALWAYS use \`session_id\` parameter.**
Every \`delegate_task()\` output includes a session_id. STORE IT. Every \`task()\` output includes a session_id. STORE IT.
If task fails: If task fails:
1. Identify what went wrong 1. Identify what went wrong
2. **Resume the SAME session** - subagent has full context already: 2. **Resume the SAME session** - subagent has full context already:
\`\`\`typescript \`\`\`typescript
delegate_task( task(
session_id="ses_xyz789", // Session from failed task session_id="ses_xyz789", // Session from failed task
load_skills=[...], load_skills=[...],
prompt="FAILED: {error}. Fix by: {specific instruction}" prompt="FAILED: {error}. Fix by: {specific instruction}"
@@ -274,21 +274,21 @@ ACCUMULATED WISDOM:
**For exploration (explore/librarian)**: ALWAYS background **For exploration (explore/librarian)**: ALWAYS background
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="explore", run_in_background=true, ...) task(subagent_type="explore", run_in_background=true, ...)
delegate_task(subagent_type="librarian", run_in_background=true, ...) task(subagent_type="librarian", run_in_background=true, ...)
\`\`\` \`\`\`
**For task execution**: NEVER background **For task execution**: NEVER background
\`\`\`typescript \`\`\`typescript
delegate_task(category="...", run_in_background=false, ...) task(category="...", run_in_background=false, ...)
\`\`\` \`\`\`
**Parallel task groups**: Invoke multiple in ONE message **Parallel task groups**: Invoke multiple in ONE message
\`\`\`typescript \`\`\`typescript
// Tasks 2, 3, 4 are independent - invoke together // Tasks 2, 3, 4 are independent - invoke together
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...") task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...") task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...") task(category="quick", load_skills=[], run_in_background=false, prompt="Task 4...")
\`\`\` \`\`\`
**Background management**: **Background management**:

View File

@@ -24,7 +24,7 @@ You DELEGATE, COORDINATE, and VERIFY. You NEVER write code yourself.
</identity> </identity>
<mission> <mission>
Complete ALL tasks in a work plan via \`delegate_task()\` until fully done. Complete ALL tasks in a work plan via \`task()\` until fully done.
- One task per delegation - One task per delegation
- Parallel when independent - Parallel when independent
- Verify everything - Verify everything
@@ -71,14 +71,14 @@ Complete ALL tasks in a work plan via \`delegate_task()\` until fully done.
<delegation_system> <delegation_system>
## Delegation API ## Delegation API
Use \`delegate_task()\` with EITHER category OR agent (mutually exclusive): Use \`task()\` with EITHER category OR agent (mutually exclusive):
\`\`\`typescript \`\`\`typescript
// Category + Skills (spawns Sisyphus-Junior) // Category + Skills (spawns Sisyphus-Junior)
delegate_task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...") task(category="[name]", load_skills=["skill-1"], run_in_background=false, prompt="...")
// Specialized Agent // Specialized Agent
delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...") task(subagent_type="[agent]", load_skills=[], run_in_background=false, prompt="...")
\`\`\` \`\`\`
{CATEGORY_SECTION} {CATEGORY_SECTION}
@@ -93,7 +93,7 @@ delegate_task(subagent_type="[agent]", load_skills=[], run_in_background=false,
## 6-Section Prompt Structure (MANDATORY) ## 6-Section Prompt Structure (MANDATORY)
Every \`delegate_task()\` prompt MUST include ALL 6 sections: Every \`task()\` prompt MUST include ALL 6 sections:
\`\`\`markdown \`\`\`markdown
## 1. TASK ## 1. TASK
@@ -166,7 +166,7 @@ Structure: learnings.md, decisions.md, issues.md, problems.md
## Step 3: Execute Tasks ## Step 3: Execute Tasks
### 3.1 Parallelization Check ### 3.1 Parallelization Check
- Parallel tasks → invoke multiple \`delegate_task()\` in ONE message - Parallel tasks → invoke multiple \`task()\` in ONE message
- Sequential → process one at a time - Sequential → process one at a time
### 3.2 Pre-Delegation (MANDATORY) ### 3.2 Pre-Delegation (MANDATORY)
@@ -176,10 +176,10 @@ Read(".sisyphus/notepads/{plan-name}/issues.md")
\`\`\` \`\`\`
Extract wisdom → include in prompt. Extract wisdom → include in prompt.
### 3.3 Invoke delegate_task() ### 3.3 Invoke task()
\`\`\`typescript \`\`\`typescript
delegate_task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`) task(category="[cat]", load_skills=["[skills]"], run_in_background=false, prompt=\`[6-SECTION PROMPT]\`)
\`\`\` \`\`\`
### 3.4 Verify (PROJECT-LEVEL QA) ### 3.4 Verify (PROJECT-LEVEL QA)
@@ -201,7 +201,7 @@ Checklist:
**CRITICAL: Use \`session_id\` for retries.** **CRITICAL: Use \`session_id\` for retries.**
\`\`\`typescript \`\`\`typescript
delegate_task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}") task(session_id="ses_xyz789", load_skills=[...], prompt="FAILED: {error}. Fix by: {instruction}")
\`\`\` \`\`\`
- Maximum 3 retries per task - Maximum 3 retries per task
@@ -231,18 +231,18 @@ ACCUMULATED WISDOM: [from notepad]
<parallel_execution> <parallel_execution>
**Exploration (explore/librarian)**: ALWAYS background **Exploration (explore/librarian)**: ALWAYS background
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="explore", run_in_background=true, ...) task(subagent_type="explore", run_in_background=true, ...)
\`\`\` \`\`\`
**Task execution**: NEVER background **Task execution**: NEVER background
\`\`\`typescript \`\`\`typescript
delegate_task(category="...", run_in_background=false, ...) task(category="...", run_in_background=false, ...)
\`\`\` \`\`\`
**Parallel task groups**: Invoke multiple in ONE message **Parallel task groups**: Invoke multiple in ONE message
\`\`\`typescript \`\`\`typescript
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...") task(category="quick", load_skills=[], run_in_background=false, prompt="Task 2...")
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...") task(category="quick", load_skills=[], run_in_background=false, prompt="Task 3...")
\`\`\` \`\`\`
**Background management**: **Background management**:

View File

@@ -1,7 +1,7 @@
/** /**
* Atlas - Master Orchestrator Agent * Atlas - Master Orchestrator Agent
* *
* Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. * Orchestrates work via task() to complete ALL tasks in a todo list until fully done.
* You are the conductor of a symphony of specialized agents. * You are the conductor of a symphony of specialized agents.
* *
* Routing: * Routing:
@@ -111,7 +111,7 @@ export function createAtlasAgent(ctx: OrchestratorContext): AgentConfig {
const baseConfig = { const baseConfig = {
description: description:
"Orchestrates work via delegate_task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)", "Orchestrates work via task() to complete ALL tasks in a todo list until fully done. (Atlas - OhMyOpenCode)",
mode: MODE, mode: MODE,
...(ctx.model ? { model: ctx.model } : {}), ...(ctx.model ? { model: ctx.model } : {}),
temperature: 0.1, temperature: 0.1,

View File

@@ -47,7 +47,7 @@ Categories spawn \`Sisyphus-Junior-{category}\` with optimized settings:
${categoryRows.join("\n")} ${categoryRows.join("\n")}
\`\`\`typescript \`\`\`typescript
delegate_task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...") task(category="[category-name]", load_skills=[...], run_in_background=false, prompt="...")
\`\`\`` \`\`\``
} }
@@ -105,7 +105,7 @@ Read each skill's description and ask: "Does this skill's domain overlap with my
**Usage:** **Usage:**
\`\`\`typescript \`\`\`typescript
delegate_task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...") task(category="[category]", load_skills=["skill-1", "skill-2"], run_in_background=false, prompt="...")
\`\`\` \`\`\`
**IMPORTANT:** **IMPORTANT:**

View File

@@ -242,7 +242,7 @@ ${builtinRows.join("\n")}`
return `### Category + Skills Delegation System return `### Category + Skills Delegation System
**delegate_task() combines categories and skills for optimal task execution.** **task() combines categories and skills for optimal task execution.**
#### Available Categories (Domain-Optimized Models) #### Available Categories (Domain-Optimized Models)
@@ -296,7 +296,7 @@ SKILL EVALUATION for "[skill-name]":
### Delegation Pattern ### Delegation Pattern
\`\`\`typescript \`\`\`typescript
delegate_task( task(
category="[selected-category]", category="[selected-category]",
load_skills=["skill-1", "skill-2"], // Include ALL relevant skills — ESPECIALLY user-installed ones load_skills=["skill-1", "skill-2"], // Include ALL relevant skills — ESPECIALLY user-installed ones
prompt="..." prompt="..."
@@ -305,7 +305,7 @@ delegate_task(
**ANTI-PATTERN (will produce poor results):** **ANTI-PATTERN (will produce poor results):**
\`\`\`typescript \`\`\`typescript
delegate_task(category="...", load_skills=[], run_in_background=false, prompt="...") // Empty load_skills without justification task(category="...", load_skills=[], run_in_background=false, prompt="...") // Empty load_skills without justification
\`\`\`` \`\`\``
} }

View File

@@ -29,7 +29,7 @@ export function createExploreAgent(model: string): AgentConfig {
"write", "write",
"edit", "edit",
"task", "task",
"delegate_task", "task",
"call_omo_agent", "call_omo_agent",
]) ])

View File

@@ -227,8 +227,8 @@ Agent: *runs gh pr list, gh pr view, searches recent commits*
**Delegation Check (MANDATORY before acting directly):** **Delegation Check (MANDATORY before acting directly):**
1. Is there a specialized agent that perfectly matches this request? 1. Is there a specialized agent that perfectly matches this request?
2. If not, is there a \`delegate_task\` category that best describes this task? What skills are available to equip the agent with? 2. If not, is there a \`task\` category that best describes this task? What skills are available to equip the agent with?
- MUST FIND skills to use: \`delegate_task(load_skills=[{skill1}, ...])\` - MUST FIND skills to use: \`task(load_skills=[{skill1}, ...])\`
3. Can I do it myself for the best result, FOR SURE? 3. Can I do it myself for the best result, FOR SURE?
**Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.** **Default Bias: DELEGATE for complex tasks. Work yourself ONLY when trivial.**
@@ -280,15 +280,15 @@ ${librarianSection}
// CORRECT: Always background, always parallel // CORRECT: Always background, always parallel
// Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find] // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
// Contextual Grep (internal) // Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.") task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.") task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
// Reference Grep (external) // Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.") task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.") task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
// Continue immediately - collect results when needed // Continue immediately - collect results when needed
// WRONG: Sequential or blocking - NEVER DO THIS // WRONG: Sequential or blocking - NEVER DO THIS
result = delegate_task(..., run_in_background=false) // Never wait synchronously for explore/librarian result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
\`\`\` \`\`\`
**Rules:** **Rules:**
@@ -393,7 +393,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
### Session Continuity (MANDATORY) ### Session Continuity (MANDATORY)
Every \`delegate_task()\` output includes a session_id. **USE IT.** Every \`task()\` output includes a session_id. **USE IT.**
**ALWAYS continue when:** **ALWAYS continue when:**
| Scenario | Action | | Scenario | Action |

View File

@@ -26,7 +26,7 @@ export function createLibrarianAgent(model: string): AgentConfig {
"write", "write",
"edit", "edit",
"task", "task",
"delegate_task", "task",
"call_omo_agent", "call_omo_agent",
]) ])

View File

@@ -307,7 +307,6 @@ const metisRestrictions = createAgentToolRestrictions([
"write", "write",
"edit", "edit",
"task", "task",
"delegate_task",
]) ])
export function createMetisAgent(model: string): AgentConfig { export function createMetisAgent(model: string): AgentConfig {

View File

@@ -193,7 +193,7 @@ export function createMomusAgent(model: string): AgentConfig {
"write", "write",
"edit", "edit",
"task", "task",
"delegate_task", "task",
]) ])
const base = { const base = {

View File

@@ -147,7 +147,7 @@ export function createOracleAgent(model: string): AgentConfig {
"write", "write",
"edit", "edit",
"task", "task",
"delegate_task", "task",
]) ])
const base = { const base = {

View File

@@ -15,7 +15,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
\`\`\`typescript \`\`\`typescript
// After generating initial plan // After generating initial plan
while (true) { while (true) {
const result = delegate_task( const result = task(
subagent_type="momus", subagent_type="momus",
prompt=".sisyphus/plans/{name}.md", prompt=".sisyphus/plans/{name}.md",
run_in_background=false run_in_background=false

View File

@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
**Research First:** **Research First:**
\`\`\`typescript \`\`\`typescript
// Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find) // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
delegate_task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
delegate_task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
\`\`\` \`\`\`
**Interview Focus:** **Interview Focus:**
@@ -91,9 +91,9 @@ delegate_task(subagent_type="explore", prompt="I'm about to modify [affected cod
\`\`\`typescript \`\`\`typescript
// Launch BEFORE asking user questions // Launch BEFORE asking user questions
// Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
delegate_task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
delegate_task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
\`\`\` \`\`\`
**Interview Focus** (AFTER research): **Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js
Run this check: Run this check:
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
\`\`\` \`\`\`
#### Step 2: Ask the Test Question (MANDATORY) #### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:
**Research First:** **Research First:**
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
\`\`\` \`\`\`
**Oracle Consultation** (recommend when stakes are high): **Oracle Consultation** (recommend when stakes are high):
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false) task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
\`\`\` \`\`\`
**Interview Focus:** **Interview Focus:**
@@ -253,9 +253,9 @@ delegate_task(subagent_type="oracle", prompt="Architecture consultation needed:
**Parallel Investigation:** **Parallel Investigation:**
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
\`\`\` \`\`\`
**Interview Focus:** **Interview Focus:**
@@ -281,17 +281,17 @@ delegate_task(subagent_type="librarian", prompt="I'm looking for battle-tested i
**For Understanding Codebase:** **For Understanding Codebase:**
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
\`\`\` \`\`\`
**For External Knowledge:** **For External Knowledge:**
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
\`\`\` \`\`\`
**For Implementation Examples:** **For Implementation Examples:**
\`\`\`typescript \`\`\`typescript
delegate_task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
\`\`\` \`\`\`
## Interview Mode Anti-Patterns ## Interview Mode Anti-Patterns

View File

@@ -59,7 +59,7 @@ todoWrite([
**BEFORE generating the plan**, summon Metis to catch what you might have missed: **BEFORE generating the plan**, summon Metis to catch what you might have missed:
\`\`\`typescript \`\`\`typescript
delegate_task( task(
subagent_type="metis", subagent_type="metis",
prompt=\`Review this planning session before I generate the work plan: prompt=\`Review this planning session before I generate the work plan:

View File

@@ -214,7 +214,7 @@ Parallel Speedup: ~40% faster than sequential
| Wave | Tasks | Recommended Agents | | Wave | Tasks | Recommended Agents |
|------|-------|-------------------| |------|-------|-------------------|
| 1 | 1, 5 | delegate_task(category="...", load_skills=[...], run_in_background=false) | | 1 | 1, 5 | task(category="...", load_skills=[...], run_in_background=false) |
| 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes | | 2 | 2, 3, 6 | dispatch parallel after Wave 1 completes |
| 3 | 4 | final integration task | | 3 | 4 | final integration task |

View File

@@ -24,7 +24,6 @@ Execute tasks directly. NEVER delegate or spawn other agents.
<Critical_Constraints> <Critical_Constraints>
BLOCKED ACTIONS (will fail if attempted): BLOCKED ACTIONS (will fail if attempted):
- task tool: BLOCKED - task tool: BLOCKED
- delegate_task tool: BLOCKED
ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research. ALLOWED: call_omo_agent - You CAN spawn explore/librarian agents for research.
You work ALONE for implementation. No delegation of implementation tasks. You work ALONE for implementation. No delegation of implementation tasks.

View File

@@ -50,7 +50,6 @@ BLOCKED (will fail if attempted):
| Tool | Status | | Tool | Status |
|------|--------| |------|--------|
| task | BLOCKED | | task | BLOCKED |
| delegate_task | BLOCKED |
ALLOWED: ALLOWED:
| Tool | Usage | | Tool | Usage |

View File

@@ -143,13 +143,12 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
}) })
}) })
describe("tool safety (task/delegate_task blocked, call_omo_agent allowed)", () => { describe("tool safety (task blocked, call_omo_agent allowed)", () => {
test("task and delegate_task remain blocked, call_omo_agent is allowed via tools format", () => { test("task remains blocked, call_omo_agent is allowed via tools format", () => {
// given // given
const override = { const override = {
tools: { tools: {
task: true, task: true,
delegate_task: true,
call_omo_agent: true, call_omo_agent: true,
read: true, read: true,
}, },
@@ -163,25 +162,22 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
const permission = result.permission as Record<string, string> | undefined const permission = result.permission as Record<string, string> | undefined
if (tools) { if (tools) {
expect(tools.task).toBe(false) expect(tools.task).toBe(false)
expect(tools.delegate_task).toBe(false)
// call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
expect(tools.call_omo_agent).toBe(true) expect(tools.call_omo_agent).toBe(true)
expect(tools.read).toBe(true) expect(tools.read).toBe(true)
} }
if (permission) { if (permission) {
expect(permission.task).toBe("deny") expect(permission.task).toBe("deny")
expect(permission.delegate_task).toBe("deny")
// call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian // call_omo_agent is NOW ALLOWED for subagents to spawn explore/librarian
expect(permission.call_omo_agent).toBe("allow") expect(permission.call_omo_agent).toBe("allow")
} }
}) })
test("task and delegate_task remain blocked when using permission format override", () => { test("task remains blocked when using permission format override", () => {
// given // given
const override = { const override = {
permission: { permission: {
task: "allow", task: "allow",
delegate_task: "allow",
call_omo_agent: "allow", call_omo_agent: "allow",
read: "allow", read: "allow",
}, },
@@ -190,17 +186,15 @@ describe("createSisyphusJuniorAgentWithOverrides", () => {
// when // when
const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0]) const result = createSisyphusJuniorAgentWithOverrides(override as Parameters<typeof createSisyphusJuniorAgentWithOverrides>[0])
// then - task/delegate_task blocked, but call_omo_agent allowed for explore/librarian spawning // then - task blocked, but call_omo_agent allowed for explore/librarian spawning
const tools = result.tools as Record<string, boolean> | undefined const tools = result.tools as Record<string, boolean> | undefined
const permission = result.permission as Record<string, string> | undefined const permission = result.permission as Record<string, string> | undefined
if (tools) { if (tools) {
expect(tools.task).toBe(false) expect(tools.task).toBe(false)
expect(tools.delegate_task).toBe(false)
expect(tools.call_omo_agent).toBe(true) expect(tools.call_omo_agent).toBe(true)
} }
if (permission) { if (permission) {
expect(permission.task).toBe("deny") expect(permission.task).toBe("deny")
expect(permission.delegate_task).toBe("deny")
expect(permission.call_omo_agent).toBe("allow") expect(permission.call_omo_agent).toBe("allow")
} }
}) })

View File

@@ -28,7 +28,7 @@ const MODE: AgentMode = "subagent"
// Core tools that Sisyphus-Junior must NEVER have access to // Core tools that Sisyphus-Junior must NEVER have access to
// Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian // Note: call_omo_agent is ALLOWED so subagents can spawn explore/librarian
const BLOCKED_TOOLS = ["task", "delegate_task"] const BLOCKED_TOOLS = ["task"]
export const SISYPHUS_JUNIOR_DEFAULTS = { export const SISYPHUS_JUNIOR_DEFAULTS = {
model: "anthropic/claude-sonnet-4-5", model: "anthropic/claude-sonnet-4-5",

View File

@@ -214,8 +214,8 @@ ${keyTriggers}
**Delegation Check (MANDATORY before acting directly):** **Delegation Check (MANDATORY before acting directly):**
1. Is there a specialized agent that perfectly matches this request? 1. Is there a specialized agent that perfectly matches this request?
2. If not, is there a \`delegate_task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with? 2. If not, is there a \`task\` category best describes this task? (visual-engineering, ultrabrain, quick etc.) What skills are available to equip the agent with?
- MUST FIND skills to use, for: \`delegate_task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS DELEGATE TASK PARAMETER. - MUST FIND skills to use, for: \`task(load_skills=[{skill1}, ...])\` MUST PASS SKILL AS TASK PARAMETER.
3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH? 3. Can I do it myself for the best result, FOR SURE? REALLY, REALLY, THERE IS NO APPROPRIATE CATEGORIES TO WORK WITH?
**Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.** **Default Bias: DELEGATE. WORK YOURSELF ONLY WHEN IT IS SUPER SIMPLE.**
@@ -277,15 +277,15 @@ ${librarianSection}
// CORRECT: Always background, always parallel // CORRECT: Always background, always parallel
// Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find] // Prompt structure: [CONTEXT: what I'm doing] + [GOAL: what I'm trying to achieve] + [QUESTION: what I need to know] + [REQUEST: what to find]
// Contextual Grep (internal) // Contextual Grep (internal)
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.") task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find auth implementations", prompt="I'm implementing user authentication for our API. I need to understand how auth is currently structured in this codebase. Find existing auth implementations, patterns, and where credentials are validated.")
delegate_task(subagent_type="explore", run_in_background=true, load_skills=[], prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.") task(subagent_type="explore", run_in_background=true, load_skills=[], description="Find error handling patterns", prompt="I'm adding error handling to the auth flow. I want to follow existing project conventions for consistency. Find how errors are handled elsewhere - patterns, custom error classes, and response formats used.")
// Reference Grep (external) // Reference Grep (external)
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.") task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find JWT security docs", prompt="I'm implementing JWT-based auth and need to ensure security best practices. Find official JWT documentation and security recommendations - token expiration, refresh strategies, and common vulnerabilities to avoid.")
delegate_task(subagent_type="librarian", run_in_background=true, load_skills=[], prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.") task(subagent_type="librarian", run_in_background=true, load_skills=[], description="Find Express auth patterns", prompt="I'm building Express middleware for auth and want production-quality patterns. Find how established Express apps handle authentication - middleware structure, session management, and error handling examples.")
// Continue working immediately. Collect with background_output when needed. // Continue working immediately. Collect with background_output when needed.
// WRONG: Sequential or blocking // WRONG: Sequential or blocking
result = delegate_task(..., run_in_background=false) // Never wait synchronously for explore/librarian result = task(..., run_in_background=false) // Never wait synchronously for explore/librarian
\`\`\` \`\`\`
### Background Result Collection: ### Background Result Collection:
@@ -340,7 +340,7 @@ AFTER THE WORK YOU DELEGATED SEEMS DONE, ALWAYS VERIFY THE RESULTS AS FOLLOWING:
### Session Continuity (MANDATORY) ### Session Continuity (MANDATORY)
Every \`delegate_task()\` output includes a session_id. **USE IT.** Every \`task()\` output includes a session_id. **USE IT.**
**ALWAYS continue when:** **ALWAYS continue when:**
| Scenario | Action | | Scenario | Action |
@@ -358,10 +358,10 @@ Every \`delegate_task()\` output includes a session_id. **USE IT.**
\`\`\`typescript \`\`\`typescript
// WRONG: Starting fresh loses all context // WRONG: Starting fresh loses all context
delegate_task(category="quick", load_skills=[], run_in_background=false, prompt="Fix the type error in auth.ts...") task(category="quick", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix the type error in auth.ts...")
// CORRECT: Resume preserves everything // CORRECT: Resume preserves everything
delegate_task(session_id="ses_abc123", prompt="Fix: Type error on line 42") task(session_id="ses_abc123", load_skills=[], run_in_background=false, description="Fix type error", prompt="Fix: Type error on line 42")
\`\`\` \`\`\`
**After EVERY delegation, STORE the session_id for potential continuation.** **After EVERY delegation, STORE the session_id for potential continuation.**

View File

@@ -12,6 +12,7 @@ const AgentPermissionSchema = z.object({
edit: PermissionValue.optional(), edit: PermissionValue.optional(),
bash: BashPermission.optional(), bash: BashPermission.optional(),
webfetch: PermissionValue.optional(), webfetch: PermissionValue.optional(),
task: PermissionValue.optional(),
doom_loop: PermissionValue.optional(), doom_loop: PermissionValue.optional(),
external_directory: PermissionValue.optional(), external_directory: PermissionValue.optional(),
}) })
@@ -183,7 +184,7 @@ export const SisyphusAgentConfigSchema = z.object({
}) })
export const CategoryConfigSchema = z.object({ export const CategoryConfigSchema = z.object({
/** Human-readable description of the category's purpose. Shown in delegate_task prompt. */ /** Human-readable description of the category's purpose. Shown in task prompt. */
description: z.string().optional(), description: z.string().optional(),
model: z.string().optional(), model: z.string().optional(),
variant: z.string().optional(), variant: z.string().optional(),

View File

@@ -56,7 +56,7 @@ features/
## ANTI-PATTERNS ## ANTI-PATTERNS
- **Sequential delegation**: Use `delegate_task` parallel - **Sequential delegation**: Use `task` parallel
- **Trust self-reports**: ALWAYS verify - **Trust self-reports**: ALWAYS verify
- **Main thread blocks**: No heavy I/O in loader init - **Main thread blocks**: No heavy I/O in loader init
- **Direct state mutation**: Use managers for boulder/session state - **Direct state mutation**: Use managers for boulder/session state

View File

@@ -1,8 +1,9 @@
import { describe, test, expect, beforeEach } from "bun:test" declare const require: (name: string) => any
import { afterEach } from "bun:test" const { describe, test, expect, beforeEach, afterEach } = require("bun:test")
import { tmpdir } from "node:os" import { tmpdir } from "node:os"
import type { PluginInput } from "@opencode-ai/plugin" import type { PluginInput } from "@opencode-ai/plugin"
import type { BackgroundTask, ResumeInput } from "./types" import type { BackgroundTask, ResumeInput } from "./types"
import { MIN_IDLE_TIME_MS } from "./constants"
import { BackgroundManager } from "./manager" import { BackgroundManager } from "./manager"
import { ConcurrencyManager } from "./concurrency" import { ConcurrencyManager } from "./concurrency"
@@ -1088,6 +1089,34 @@ describe("BackgroundManager.tryCompleteTask", () => {
// #then // #then
expect(abortedSessionIDs).toEqual(["session-1"]) expect(abortedSessionIDs).toEqual(["session-1"])
}) })
test("should clean pendingByParent even when notifyParentSession throws", async () => {
// given
;(manager as unknown as { notifyParentSession: () => Promise<void> }).notifyParentSession = async () => {
throw new Error("notify failed")
}
const task: BackgroundTask = {
id: "task-pending-cleanup",
sessionID: "session-pending-cleanup",
parentSessionID: "parent-pending-cleanup",
parentMessageID: "msg-1",
description: "pending cleanup task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(),
}
getTaskMap(manager).set(task.id, task)
getPendingByParent(manager).set(task.parentSessionID, new Set([task.id]))
// when
await tryCompleteTaskForTest(manager, task)
// then
expect(task.status).toBe("completed")
expect(getPendingByParent(manager).get(task.parentSessionID)).toBeUndefined()
})
}) })
describe("BackgroundManager.trackTask", () => { describe("BackgroundManager.trackTask", () => {
@@ -1110,7 +1139,7 @@ describe("BackgroundManager.trackTask", () => {
sessionID: "session-1", sessionID: "session-1",
parentSessionID: "parent-session", parentSessionID: "parent-session",
description: "external task", description: "external task",
agent: "delegate_task", agent: "task",
concurrencyKey: "external-key", concurrencyKey: "external-key",
} }
@@ -1145,7 +1174,7 @@ describe("BackgroundManager.resume concurrency key", () => {
sessionID: "session-1", sessionID: "session-1",
parentSessionID: "parent-session", parentSessionID: "parent-session",
description: "external task", description: "external task",
agent: "delegate_task", agent: "task",
concurrencyKey: "external-key", concurrencyKey: "external-key",
}) })
@@ -2408,3 +2437,179 @@ describe("BackgroundManager.completionTimers - Memory Leak Fix", () => {
expect(completionTimers.size).toBe(0) expect(completionTimers.size).toBe(0)
}) })
}) })
describe("BackgroundManager.handleEvent - early session.idle deferral", () => {
test("should defer and retry when session.idle fires before MIN_IDLE_TIME_MS", async () => {
//#given - a running task started less than MIN_IDLE_TIME_MS ago
const sessionID = "session-early-idle"
const messagesCalls: string[] = []
const realDateNow = Date.now
const baseNow = realDateNow()
const client = {
session: {
prompt: async () => ({}),
abort: async () => ({}),
messages: async (args: { path: { id: string } }) => {
messagesCalls.push(args.path.id)
return {
data: [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "ok" }],
},
],
}
},
todo: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
stubNotifyParentSession(manager)
const remainingMs = 1200
const task: BackgroundTask = {
id: "task-early-idle",
sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-1",
description: "early idle task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(baseNow),
}
getTaskMap(manager).set(task.id, task)
//#when - session.idle fires
try {
Date.now = () => baseNow + (MIN_IDLE_TIME_MS - 100)
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
// Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
//#then - idle should be deferred (not dropped), and task should eventually complete
expect(task.status).toBe("running")
await new Promise((resolve) => setTimeout(resolve, 220))
expect(task.status).toBe("completed")
expect(messagesCalls).toEqual([sessionID])
} finally {
Date.now = realDateNow
manager.shutdown()
}
})
test("should not defer when session.idle fires after MIN_IDLE_TIME_MS", async () => {
//#given - a running task started more than MIN_IDLE_TIME_MS ago
const sessionID = "session-late-idle"
const client = {
session: {
prompt: async () => ({}),
abort: async () => ({}),
messages: async () => ({
data: [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "ok" }],
},
],
}),
todo: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
stubNotifyParentSession(manager)
const task: BackgroundTask = {
id: "task-late-idle",
sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-1",
description: "late idle task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(Date.now() - (MIN_IDLE_TIME_MS + 10)),
}
getTaskMap(manager).set(task.id, task)
//#when
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
//#then - should be processed immediately
await new Promise((resolve) => setTimeout(resolve, 10))
expect(task.status).toBe("completed")
manager.shutdown()
})
test("should not process deferred idle if task already completed by other means", async () => {
//#given - a running task
const sessionID = "session-deferred-noop"
let messagesCallCount = 0
const realDateNow = Date.now
const baseNow = realDateNow()
const client = {
session: {
prompt: async () => ({}),
abort: async () => ({}),
messages: async () => {
messagesCallCount += 1
return {
data: [
{
info: { role: "assistant" },
parts: [{ type: "text", text: "ok" }],
},
],
}
},
todo: async () => ({ data: [] }),
},
}
const manager = new BackgroundManager({ client, directory: tmpdir() } as unknown as PluginInput)
stubNotifyParentSession(manager)
const remainingMs = 120
const task: BackgroundTask = {
id: "task-deferred-noop",
sessionID,
parentSessionID: "parent-session",
parentMessageID: "msg-1",
description: "deferred noop task",
prompt: "test",
agent: "explore",
status: "running",
startedAt: new Date(baseNow),
}
getTaskMap(manager).set(task.id, task)
//#when - session.idle fires early, then task completes via another path before defer timer
try {
Date.now = () => baseNow + (MIN_IDLE_TIME_MS - remainingMs)
manager.handleEvent({ type: "session.idle", properties: { sessionID } })
expect(messagesCallCount).toBe(0)
await tryCompleteTaskForTest(manager, task)
expect(task.status).toBe("completed")
// Advance time so deferred callback (if any) sees elapsed >= MIN_IDLE_TIME_MS
Date.now = () => baseNow + (MIN_IDLE_TIME_MS + 10)
//#then - deferred callback should be a no-op
await new Promise((resolve) => setTimeout(resolve, remainingMs + 80))
expect(task.status).toBe("completed")
expect(messagesCallCount).toBe(0)
} finally {
Date.now = realDateNow
manager.shutdown()
}
})
})

View File

@@ -88,6 +88,7 @@ export class BackgroundManager {
private queuesByKey: Map<string, QueueItem[]> = new Map() private queuesByKey: Map<string, QueueItem[]> = new Map()
private processingKeys: Set<string> = new Set() private processingKeys: Set<string> = new Set()
private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map() private completionTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
private idleDeferralTimers: Map<string, ReturnType<typeof setTimeout>> = new Map()
constructor( constructor(
ctx: PluginInput, ctx: PluginInput,
@@ -328,7 +329,6 @@ export class BackgroundManager {
tools: { tools: {
...getAgentToolRestrictions(input.agent), ...getAgentToolRestrictions(input.agent),
task: false, task: false,
delegate_task: false,
call_omo_agent: true, call_omo_agent: true,
question: false, question: false,
}, },
@@ -357,6 +357,7 @@ export class BackgroundManager {
}).catch(() => {}) }).catch(() => {})
this.markForNotification(existingTask) this.markForNotification(existingTask)
this.cleanupPendingByParent(existingTask)
this.notifyParentSession(existingTask).catch(err => { this.notifyParentSession(existingTask).catch(err => {
log("[background-agent] Failed to notify on error:", err) log("[background-agent] Failed to notify on error:", err)
}) })
@@ -410,7 +411,7 @@ export class BackgroundManager {
} }
/** /**
* Track a task created elsewhere (e.g., from delegate_task) for notification tracking. * Track a task created elsewhere (e.g., from task) for notification tracking.
* This allows tasks created by other tools to receive the same toast/prompt notifications. * This allows tasks created by other tools to receive the same toast/prompt notifications.
*/ */
async trackTask(input: { async trackTask(input: {
@@ -458,7 +459,7 @@ export class BackgroundManager {
return existingTask return existingTask
} }
const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "delegate_task" const concurrencyGroup = input.concurrencyKey ?? input.agent ?? "task"
// Acquire concurrency slot if a key is provided // Acquire concurrency slot if a key is provided
if (input.concurrencyKey) { if (input.concurrencyKey) {
@@ -472,7 +473,7 @@ export class BackgroundManager {
parentMessageID: "", parentMessageID: "",
description: input.description, description: input.description,
prompt: "", prompt: "",
agent: input.agent || "delegate_task", agent: input.agent || "task",
status: "running", status: "running",
startedAt: new Date(), startedAt: new Date(),
progress: { progress: {
@@ -587,7 +588,6 @@ export class BackgroundManager {
tools: { tools: {
...getAgentToolRestrictions(existingTask.agent), ...getAgentToolRestrictions(existingTask.agent),
task: false, task: false,
delegate_task: false,
call_omo_agent: true, call_omo_agent: true,
question: false, question: false,
}, },
@@ -614,6 +614,7 @@ export class BackgroundManager {
} }
this.markForNotification(existingTask) this.markForNotification(existingTask)
this.cleanupPendingByParent(existingTask)
this.notifyParentSession(existingTask).catch(err => { this.notifyParentSession(existingTask).catch(err => {
log("[background-agent] Failed to notify on resume error:", err) log("[background-agent] Failed to notify on resume error:", err)
}) })
@@ -651,6 +652,13 @@ export class BackgroundManager {
const task = this.findBySession(sessionID) const task = this.findBySession(sessionID)
if (!task) return if (!task) return
// Clear any pending idle deferral timer since the task is still active
const existingTimer = this.idleDeferralTimers.get(task.id)
if (existingTimer) {
clearTimeout(existingTimer)
this.idleDeferralTimers.delete(task.id)
}
if (partInfo?.type === "tool" || partInfo?.tool) { if (partInfo?.type === "tool" || partInfo?.tool) {
if (!task.progress) { if (!task.progress) {
task.progress = { task.progress = {
@@ -677,7 +685,17 @@ export class BackgroundManager {
// Edge guard: Require minimum elapsed time (5 seconds) before accepting idle // Edge guard: Require minimum elapsed time (5 seconds) before accepting idle
const elapsedMs = Date.now() - startedAt.getTime() const elapsedMs = Date.now() - startedAt.getTime()
if (elapsedMs < MIN_IDLE_TIME_MS) { if (elapsedMs < MIN_IDLE_TIME_MS) {
log("[background-agent] Ignoring early session.idle, elapsed:", { elapsedMs, taskId: task.id }) const remainingMs = MIN_IDLE_TIME_MS - elapsedMs
if (!this.idleDeferralTimers.has(task.id)) {
log("[background-agent] Deferring early session.idle:", { elapsedMs, remainingMs, taskId: task.id })
const timer = setTimeout(() => {
this.idleDeferralTimers.delete(task.id)
this.handleEvent({ type: "session.idle", properties: { sessionID } })
}, remainingMs)
this.idleDeferralTimers.set(task.id, timer)
} else {
log("[background-agent] session.idle already deferred:", { elapsedMs, taskId: task.id })
}
return return
} }
@@ -736,6 +754,12 @@ export class BackgroundManager {
clearTimeout(existingTimer) clearTimeout(existingTimer)
this.completionTimers.delete(task.id) this.completionTimers.delete(task.id)
} }
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
this.cleanupPendingByParent(task) this.cleanupPendingByParent(task)
this.tasks.delete(task.id) this.tasks.delete(task.id)
this.clearNotificationsForTask(task.id) this.clearNotificationsForTask(task.id)
@@ -890,6 +914,12 @@ export class BackgroundManager {
this.completionTimers.delete(task.id) this.completionTimers.delete(task.id)
} }
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
this.cleanupPendingByParent(task) this.cleanupPendingByParent(task)
if (abortSession && task.sessionID) { if (abortSession && task.sessionID) {
@@ -1025,6 +1055,15 @@ export class BackgroundManager {
this.markForNotification(task) this.markForNotification(task)
// Ensure pending tracking is cleaned up even if notification fails
this.cleanupPendingByParent(task)
const idleTimer = this.idleDeferralTimers.get(task.id)
if (idleTimer) {
clearTimeout(idleTimer)
this.idleDeferralTimers.delete(task.id)
}
if (task.sessionID) { if (task.sessionID) {
this.client.session.abort({ this.client.session.abort({
path: { id: task.sessionID }, path: { id: task.sessionID },
@@ -1511,6 +1550,11 @@ Use \`background_output(task_id="${task.id}")\` to retrieve this result when rea
} }
this.completionTimers.clear() this.completionTimers.clear()
for (const timer of this.idleDeferralTimers.values()) {
clearTimeout(timer)
}
this.idleDeferralTimers.clear()
this.concurrencyManager.clear() this.concurrencyManager.clear()
this.tasks.clear() this.tasks.clear()
this.notifications.clear() this.notifications.clear()

View File

@@ -146,7 +146,6 @@ export async function startTask(
tools: { tools: {
...getAgentToolRestrictions(input.agent), ...getAgentToolRestrictions(input.agent),
task: false, task: false,
delegate_task: false,
call_omo_agent: true, call_omo_agent: true,
question: false, question: false,
}, },
@@ -231,7 +230,6 @@ export async function resumeTask(
tools: { tools: {
...getAgentToolRestrictions(task.agent), ...getAgentToolRestrictions(task.agent),
task: false, task: false,
delegate_task: false,
call_omo_agent: true, call_omo_agent: true,
question: false, question: false,
}, },

View File

@@ -45,12 +45,12 @@ Don't wait—these run async while main session works.
\`\`\` \`\`\`
// Fire all at once, collect results later // Fire all at once, collect results later
delegate_task(agent="explore", prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only") task(subagent_type="explore", load_skills=[], description="Explore project structure", run_in_background=true, prompt="Project structure: PREDICT standard patterns for detected language → REPORT deviations only")
delegate_task(agent="explore", prompt="Entry points: FIND main files → REPORT non-standard organization") task(subagent_type="explore", load_skills=[], description="Find entry points", run_in_background=true, prompt="Entry points: FIND main files → REPORT non-standard organization")
delegate_task(agent="explore", prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules") task(subagent_type="explore", load_skills=[], description="Find conventions", run_in_background=true, prompt="Conventions: FIND config files (.eslintrc, pyproject.toml, .editorconfig) → REPORT project-specific rules")
delegate_task(agent="explore", prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns") task(subagent_type="explore", load_skills=[], description="Find anti-patterns", run_in_background=true, prompt="Anti-patterns: FIND 'DO NOT', 'NEVER', 'ALWAYS', 'DEPRECATED' comments → LIST forbidden patterns")
delegate_task(agent="explore", prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns") task(subagent_type="explore", load_skills=[], description="Explore build/CI", run_in_background=true, prompt="Build/CI: FIND .github/workflows, Makefile → REPORT non-standard patterns")
delegate_task(agent="explore", prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions") task(subagent_type="explore", load_skills=[], description="Find test patterns", run_in_background=true, prompt="Test patterns: FIND test configs, test structure → REPORT unique conventions")
\`\`\` \`\`\`
<dynamic-agents> <dynamic-agents>
@@ -76,9 +76,9 @@ max_depth=$(find . -type d -not -path '*/node_modules/*' -not -path '*/.git/*' |
Example spawning: Example spawning:
\`\`\` \`\`\`
// 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents // 500 files, 50k lines, depth 6, 15 large files → spawn 5+5+2+1 = 13 additional agents
delegate_task(agent="explore", prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots") task(subagent_type="explore", load_skills=[], description="Analyze large files", run_in_background=true, prompt="Large file analysis: FIND files >500 lines, REPORT complexity hotspots")
delegate_task(agent="explore", prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions") task(subagent_type="explore", load_skills=[], description="Explore deep modules", run_in_background=true, prompt="Deep modules at depth 4+: FIND hidden patterns, internal conventions")
delegate_task(agent="explore", prompt="Cross-cutting concerns: FIND shared utilities across directories") task(subagent_type="explore", load_skills=[], description="Find shared utilities", run_in_background=true, prompt="Cross-cutting concerns: FIND shared utilities across directories")
// ... more based on calculation // ... more based on calculation
\`\`\` \`\`\`
</dynamic-agents> </dynamic-agents>
@@ -185,6 +185,11 @@ AGENTS_LOCATIONS = [
**Mark "generate" as in_progress.** **Mark "generate" as in_progress.**
<critical>
**File Writing Rule**: If AGENTS.md already exists at the target path → use \`Edit\` tool. If it does NOT exist → use \`Write\` tool.
NEVER use Write to overwrite an existing file. ALWAYS check existence first via \`Read\` or discovery results.
</critical>
### Root AGENTS.md (Full Treatment) ### Root AGENTS.md (Full Treatment)
\`\`\`markdown \`\`\`markdown
@@ -240,7 +245,7 @@ Launch writing tasks for each location:
\`\`\` \`\`\`
for loc in AGENTS_LOCATIONS (except root): for loc in AGENTS_LOCATIONS (except root):
delegate_task(category="writing", load_skills=[], run_in_background=false, prompt=\\\` task(category="writing", load_skills=[], run_in_background=false, description="Generate AGENTS.md", prompt=\\\`
Generate AGENTS.md for: \${loc.path} Generate AGENTS.md for: \${loc.path}
- Reason: \${loc.reason} - Reason: \${loc.reason}
- 30-80 lines max - 30-80 lines max

View File

@@ -1,6 +1,6 @@
--- ---
name: git-master name: git-master
description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'." description: "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'."
--- ---
# Git Master Agent # Git Master Agent

View File

@@ -3,7 +3,7 @@ import type { BuiltinSkill } from "../types"
export const gitMasterSkill: BuiltinSkill = { export const gitMasterSkill: BuiltinSkill = {
name: "git-master", name: "git-master",
description: description:
"MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with delegate_task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.", "MUST USE for ANY git operations. Atomic commits, rebase/squash, history search (blame, bisect, log -S). STRONGLY RECOMMENDED: Use with task(category='quick', load_skills=['git-master'], ...) to save context. Triggers: 'commit', 'rebase', 'squash', 'who wrote', 'when was X added', 'find the commit that'.",
template: `# Git Master Agent template: `# Git Master Agent
You are a Git expert combining three specializations: You are a Git expert combining three specializations:

View File

@@ -0,0 +1,111 @@
import { describe, test, expect, beforeEach } from "bun:test"
import {
storeToolMetadata,
consumeToolMetadata,
getPendingStoreSize,
clearPendingStore,
} from "./index"
describe("tool-metadata-store", () => {
beforeEach(() => {
clearPendingStore()
})
describe("storeToolMetadata", () => {
test("#given metadata with title and metadata, #when stored, #then store size increases", () => {
//#given
const sessionID = "ses_abc123"
const callID = "call_001"
const data = {
title: "Test Task",
metadata: { sessionId: "ses_child", agent: "oracle" },
}
//#when
storeToolMetadata(sessionID, callID, data)
//#then
expect(getPendingStoreSize()).toBe(1)
})
})
describe("consumeToolMetadata", () => {
test("#given stored metadata, #when consumed, #then returns the stored data", () => {
//#given
const sessionID = "ses_abc123"
const callID = "call_001"
const data = {
title: "My Task",
metadata: { sessionId: "ses_sub", run_in_background: true },
}
storeToolMetadata(sessionID, callID, data)
//#when
const result = consumeToolMetadata(sessionID, callID)
//#then
expect(result).toEqual(data)
})
test("#given stored metadata, #when consumed twice, #then second call returns undefined", () => {
//#given
const sessionID = "ses_abc123"
const callID = "call_001"
storeToolMetadata(sessionID, callID, { title: "Task" })
//#when
consumeToolMetadata(sessionID, callID)
const second = consumeToolMetadata(sessionID, callID)
//#then
expect(second).toBeUndefined()
expect(getPendingStoreSize()).toBe(0)
})
test("#given no stored metadata, #when consumed, #then returns undefined", () => {
//#given
const sessionID = "ses_nonexistent"
const callID = "call_999"
//#when
const result = consumeToolMetadata(sessionID, callID)
//#then
expect(result).toBeUndefined()
})
})
describe("isolation", () => {
test("#given multiple entries, #when consuming one, #then others remain", () => {
//#given
storeToolMetadata("ses_1", "call_a", { title: "Task A" })
storeToolMetadata("ses_1", "call_b", { title: "Task B" })
storeToolMetadata("ses_2", "call_a", { title: "Task C" })
//#when
const resultA = consumeToolMetadata("ses_1", "call_a")
//#then
expect(resultA?.title).toBe("Task A")
expect(getPendingStoreSize()).toBe(2)
expect(consumeToolMetadata("ses_1", "call_b")?.title).toBe("Task B")
expect(consumeToolMetadata("ses_2", "call_a")?.title).toBe("Task C")
expect(getPendingStoreSize()).toBe(0)
})
})
describe("overwrite", () => {
test("#given existing entry, #when stored again with same key, #then overwrites", () => {
//#given
storeToolMetadata("ses_1", "call_a", { title: "Old" })
//#when
storeToolMetadata("ses_1", "call_a", { title: "New", metadata: { updated: true } })
//#then
const result = consumeToolMetadata("ses_1", "call_a")
expect(result?.title).toBe("New")
expect(result?.metadata).toEqual({ updated: true })
})
})
})

View File

@@ -0,0 +1,84 @@
/**
* Pending tool metadata store.
*
* OpenCode's `fromPlugin()` wrapper always replaces the metadata returned by
* plugin tools with `{ truncated, outputPath }`, discarding any sessionId,
* title, or custom metadata set during `execute()`.
*
* This store captures metadata written via `ctx.metadata()` inside execute(),
* then the `tool.execute.after` hook consumes it and merges it back into the
* result *before* the processor writes the final part to the session store.
*
* Flow:
* execute() → storeToolMetadata(sessionID, callID, data)
* fromPlugin() → overwrites metadata with { truncated }
* tool.execute.after → consumeToolMetadata(sessionID, callID) → merges back
* processor → Session.updatePart(status:"completed", metadata: result.metadata)
*/
export interface PendingToolMetadata {
title?: string
metadata?: Record<string, unknown>
}
const pendingStore = new Map<string, PendingToolMetadata & { storedAt: number }>()
const STALE_TIMEOUT_MS = 15 * 60 * 1000
function makeKey(sessionID: string, callID: string): string {
return `${sessionID}:${callID}`
}
function cleanupStaleEntries(): void {
const now = Date.now()
for (const [key, entry] of pendingStore) {
if (now - entry.storedAt > STALE_TIMEOUT_MS) {
pendingStore.delete(key)
}
}
}
/**
* Store metadata to be restored after fromPlugin() overwrites it.
* Called from tool execute() functions alongside ctx.metadata().
*/
export function storeToolMetadata(
sessionID: string,
callID: string,
data: PendingToolMetadata,
): void {
cleanupStaleEntries()
pendingStore.set(makeKey(sessionID, callID), { ...data, storedAt: Date.now() })
}
/**
* Consume stored metadata (one-time read, removes from store).
* Called from tool.execute.after hook.
*/
export function consumeToolMetadata(
sessionID: string,
callID: string,
): PendingToolMetadata | undefined {
const key = makeKey(sessionID, callID)
const stored = pendingStore.get(key)
if (stored) {
pendingStore.delete(key)
const { storedAt: _, ...data } = stored
return data
}
return undefined
}
/**
* Get current store size (for testing/debugging).
*/
export function getPendingStoreSize(): number {
return pendingStore.size
}
/**
* Clear all pending metadata (for testing).
*/
export function clearPendingStore(): void {
pendingStore.clear()
}

View File

@@ -24,7 +24,7 @@ export const TARGET_TOOLS = new Set([
export const AGENT_TOOLS = new Set([ export const AGENT_TOOLS = new Set([
"task", "task",
"call_omo_agent", "call_omo_agent",
"delegate_task", "task",
]); ]);
export const REMINDER_MESSAGE = ` export const REMINDER_MESSAGE = `
@@ -32,13 +32,13 @@ export const REMINDER_MESSAGE = `
You called a search/fetch tool directly without leveraging specialized agents. You called a search/fetch tool directly without leveraging specialized agents.
RECOMMENDED: Use delegate_task with explore/librarian agents for better results: RECOMMENDED: Use task with explore/librarian agents for better results:
\`\`\` \`\`\`
// Parallel exploration - fire multiple agents simultaneously // Parallel exploration - fire multiple agents simultaneously
delegate_task(agent="explore", prompt="Find all files matching pattern X") task(agent="explore", prompt="Find all files matching pattern X")
delegate_task(agent="explore", prompt="Search for implementation of Y") task(agent="explore", prompt="Search for implementation of Y")
delegate_task(agent="librarian", prompt="Lookup documentation for Z") task(agent="librarian", prompt="Lookup documentation for Z")
// Then continue your work while they run in background // Then continue your work while they run in background
// System will notify you when each completes // System will notify you when each completes
@@ -50,5 +50,5 @@ WHY:
- Specialized agents have domain expertise - Specialized agents have domain expertise
- Reduces context window usage in main session - Reduces context window usage in main session
ALWAYS prefer: Multiple parallel delegate_task calls > Direct tool calls ALWAYS prefer: Multiple parallel task calls > Direct tool calls
`; `;

View File

@@ -86,7 +86,7 @@ describe("atlas hook", () => {
// when - calling with undefined output // when - calling with undefined output
const result = await hook["tool.execute.after"]( const result = await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID: "session-123" }, { tool: "task", sessionID: "session-123" },
undefined as unknown as { title: string; output: string; metadata: Record<string, unknown> } undefined as unknown as { title: string; output: string; metadata: Record<string, unknown> }
) )
@@ -94,8 +94,8 @@ describe("atlas hook", () => {
expect(result).toBeUndefined() expect(result).toBeUndefined()
}) })
test("should ignore non-delegate_task tools", async () => { test("should ignore non-task tools", async () => {
// given - hook and non-delegate_task tool // given - hook and non-task tool
const hook = createAtlasHook(createMockPluginInput()) const hook = createAtlasHook(createMockPluginInput())
const output = { const output = {
title: "Test Tool", title: "Test Tool",
@@ -138,7 +138,7 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
@@ -162,14 +162,14 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
// then - standalone verification reminder appended // then - standalone verification reminder appended
expect(output.output).toContain("Task completed successfully") expect(output.output).toContain("Task completed successfully")
expect(output.output).toContain("MANDATORY:") expect(output.output).toContain("MANDATORY:")
expect(output.output).toContain("delegate_task(session_id=") expect(output.output).toContain("task(session_id=")
cleanupMessageStorage(sessionID) cleanupMessageStorage(sessionID)
}) })
@@ -199,7 +199,7 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
@@ -208,7 +208,7 @@ describe("atlas hook", () => {
expect(output.output).toContain("SUBAGENT WORK COMPLETED") expect(output.output).toContain("SUBAGENT WORK COMPLETED")
expect(output.output).toContain("test-plan") expect(output.output).toContain("test-plan")
expect(output.output).toContain("LIE") expect(output.output).toContain("LIE")
expect(output.output).toContain("delegate_task(session_id=") expect(output.output).toContain("task(session_id=")
cleanupMessageStorage(sessionID) cleanupMessageStorage(sessionID)
}) })
@@ -238,7 +238,7 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
@@ -275,7 +275,7 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
@@ -311,7 +311,7 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
@@ -348,7 +348,7 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
@@ -385,12 +385,12 @@ describe("atlas hook", () => {
// when // when
await hook["tool.execute.after"]( await hook["tool.execute.after"](
{ tool: "delegate_task", sessionID }, { tool: "task", sessionID },
output output
) )
// then - should include session_id instructions and verification // then - should include session_id instructions and verification
expect(output.output).toContain("delegate_task(session_id=") expect(output.output).toContain("task(session_id=")
expect(output.output).toContain("[x]") expect(output.output).toContain("[x]")
expect(output.output).toContain("MANDATORY:") expect(output.output).toContain("MANDATORY:")
@@ -425,8 +425,8 @@ describe("atlas hook", () => {
// then // then
expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER") expect(output.output).toContain("ORCHESTRATOR, not an IMPLEMENTER")
expect(output.output).toContain("delegate_task") expect(output.output).toContain("task")
expect(output.output).toContain("delegate_task") expect(output.output).toContain("task")
}) })
test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => { test("should append delegation reminder when orchestrator edits outside .sisyphus/", async () => {

View File

@@ -44,7 +44,7 @@ You just performed direct file modifications outside \`.sisyphus/\`.
**You are an ORCHESTRATOR, not an IMPLEMENTER.** **You are an ORCHESTRATOR, not an IMPLEMENTER.**
As an orchestrator, you should: As an orchestrator, you should:
- **DELEGATE** implementation work to subagents via \`delegate_task\` - **DELEGATE** implementation work to subagents via \`task\`
- **VERIFY** the work done by subagents - **VERIFY** the work done by subagents
- **COORDINATE** multiple tasks and ensure completion - **COORDINATE** multiple tasks and ensure completion
@@ -54,7 +54,7 @@ You should NOT:
- Implement features yourself - Implement features yourself
**If you need to make changes:** **If you need to make changes:**
1. Use \`delegate_task\` to delegate to an appropriate subagent 1. Use \`task\` to delegate to an appropriate subagent
2. Provide clear instructions in the prompt 2. Provide clear instructions in the prompt
3. Verify the subagent's work after completion 3. Verify the subagent's work after completion
@@ -128,7 +128,7 @@ You (Atlas) are attempting to directly modify a file outside \`.sisyphus/\`.
**THIS IS FORBIDDEN** (except for VERIFICATION purposes) **THIS IS FORBIDDEN** (except for VERIFICATION purposes)
As an ORCHESTRATOR, you MUST: As an ORCHESTRATOR, you MUST:
1. **DELEGATE** all implementation work via \`delegate_task\` 1. **DELEGATE** all implementation work via \`task\`
2. **VERIFY** the work done by subagents (reading files is OK) 2. **VERIFY** the work done by subagents (reading files is OK)
3. **COORDINATE** - you orchestrate, you don't implement 3. **COORDINATE** - you orchestrate, you don't implement
@@ -146,11 +146,11 @@ As an ORCHESTRATOR, you MUST:
**IF THIS IS FOR VERIFICATION:** **IF THIS IS FOR VERIFICATION:**
Proceed if you are verifying subagent work by making a small fix. Proceed if you are verifying subagent work by making a small fix.
But for any substantial changes, USE \`delegate_task\`. But for any substantial changes, USE \`task\`.
**CORRECT APPROACH:** **CORRECT APPROACH:**
\`\`\` \`\`\`
delegate_task( task(
category="...", category="...",
prompt="[specific single task with clear acceptance criteria]" prompt="[specific single task with clear acceptance criteria]"
) )
@@ -193,7 +193,7 @@ function buildVerificationReminder(sessionId: string): string {
**If ANY verification fails, use this immediately:** **If ANY verification fails, use this immediately:**
\`\`\` \`\`\`
delegate_task(session_id="${sessionId}", prompt="fix: [describe the specific failure]") task(session_id="${sessionId}", prompt="fix: [describe the specific failure]")
\`\`\`` \`\`\``
} }
@@ -688,12 +688,12 @@ export function createAtlasHook(
return return
} }
// Check delegate_task - inject single-task directive // Check task - inject single-task directive
if (input.tool === "delegate_task") { if (input.tool === "task") {
const prompt = output.args.prompt as string | undefined const prompt = output.args.prompt as string | undefined
if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) { if (prompt && !prompt.includes(SYSTEM_DIRECTIVE_PREFIX)) {
output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt output.args.prompt = `<system-reminder>${SINGLE_TASK_DIRECTIVE}</system-reminder>\n` + prompt
log(`[${HOOK_NAME}] Injected single-task directive to delegate_task`, { log(`[${HOOK_NAME}] Injected single-task directive to task`, {
sessionID: input.sessionID, sessionID: input.sessionID,
}) })
} }
@@ -732,7 +732,7 @@ export function createAtlasHook(
return return
} }
if (input.tool !== "delegate_task") { if (input.tool !== "task") {
return return
} }

View File

@@ -50,7 +50,7 @@ describe("category-skill-reminder hook", () => {
// then - reminder should be injected // then - reminder should be injected
expect(output.output).toContain("[Category+Skill Reminder]") expect(output.output).toContain("[Category+Skill Reminder]")
expect(output.output).toContain("delegate_task") expect(output.output).toContain("task")
clearSessionAgent(sessionID) clearSessionAgent(sessionID)
}) })
@@ -130,16 +130,16 @@ describe("category-skill-reminder hook", () => {
}) })
describe("delegation tool tracking", () => { describe("delegation tool tracking", () => {
test("should NOT inject reminder if delegate_task is used", async () => { test("should NOT inject reminder if task is used", async () => {
// given - sisyphus agent that uses delegate_task // given - sisyphus agent that uses task
const hook = createHook() const hook = createHook()
const sessionID = "delegation-session" const sessionID = "delegation-session"
updateSessionAgent(sessionID, "Sisyphus") updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} } const output = { title: "", output: "result", metadata: {} }
// when - delegate_task is used, then more tool calls // when - task is used, then more tool calls
await hook["tool.execute.after"]({ tool: "delegate_task", sessionID, callID: "1" }, output) await hook["tool.execute.after"]({ tool: "task", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output) await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output) await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output) await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)
@@ -329,15 +329,15 @@ describe("category-skill-reminder hook", () => {
}) })
test("should handle delegation tool names case-insensitively", async () => { test("should handle delegation tool names case-insensitively", async () => {
// given - sisyphus agent using DELEGATE_TASK in uppercase // given - sisyphus agent using TASK in uppercase
const hook = createHook() const hook = createHook()
const sessionID = "case-delegate-session" const sessionID = "case-delegate-session"
updateSessionAgent(sessionID, "Sisyphus") updateSessionAgent(sessionID, "Sisyphus")
const output = { title: "", output: "result", metadata: {} } const output = { title: "", output: "result", metadata: {} }
// when - DELEGATE_TASK in uppercase is used // when - TASK in uppercase is used
await hook["tool.execute.after"]({ tool: "DELEGATE_TASK", sessionID, callID: "1" }, output) await hook["tool.execute.after"]({ tool: "TASK", sessionID, callID: "1" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output) await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "2" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output) await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "3" }, output)
await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output) await hook["tool.execute.after"]({ tool: "edit", sessionID, callID: "4" }, output)

View File

@@ -30,9 +30,8 @@ const DELEGATABLE_WORK_TOOLS = new Set([
* Tools that indicate the agent is already using delegation properly. * Tools that indicate the agent is already using delegation properly.
*/ */
const DELEGATION_TOOLS = new Set([ const DELEGATION_TOOLS = new Set([
"delegate_task", "task",
"call_omo_agent", "call_omo_agent",
"task",
]) ])
function formatSkillNames(skills: AvailableSkill[], limit: number): string { function formatSkillNames(skills: AvailableSkill[], limit: number): string {
@@ -63,7 +62,7 @@ function buildReminderMessage(availableSkills: AvailableSkill[]): string {
"> User-installed skills OVERRIDE built-in defaults. ALWAYS prefer YOUR SKILLS when domain matches.", "> User-installed skills OVERRIDE built-in defaults. ALWAYS prefer YOUR SKILLS when domain matches.",
"", "",
"```typescript", "```typescript",
`delegate_task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`, `task(category=\"visual-engineering\", load_skills=${loadSkills}, run_in_background=true)`,
"```", "```",
"", "",
] ]

View File

@@ -257,7 +257,7 @@ export function createClaudeCodeHooksHook(
const cachedInput = getToolInput(input.sessionID, input.tool, input.callID) || {} const cachedInput = getToolInput(input.sessionID, input.tool, input.callID) || {}
// Use metadata if available and non-empty, otherwise wrap output.output in a structured object // Use metadata if available and non-empty, otherwise wrap output.output in a structured object
// This ensures plugin tools (call_omo_agent, delegate_task, task) that return strings // This ensures plugin tools (call_omo_agent, task) that return strings
// get their results properly recorded in transcripts instead of empty {} // get their results properly recorded in transcripts instead of empty {}
const metadata = output.metadata as Record<string, unknown> | undefined const metadata = output.metadata as Record<string, unknown> | undefined
const hasMetadata = metadata && typeof metadata === "object" && Object.keys(metadata).length > 0 const hasMetadata = metadata && typeof metadata === "object" && Object.keys(metadata).length > 0

View File

@@ -8,7 +8,7 @@ import {
describe("sisyphus-task-retry", () => { describe("sisyphus-task-retry", () => {
describe("DELEGATE_TASK_ERROR_PATTERNS", () => { describe("DELEGATE_TASK_ERROR_PATTERNS", () => {
// given error patterns are defined // given error patterns are defined
// then should include all known delegate_task error types // then should include all known task error types
it("should contain all known error patterns", () => { it("should contain all known error patterns", () => {
expect(DELEGATE_TASK_ERROR_PATTERNS.length).toBeGreaterThan(5) expect(DELEGATE_TASK_ERROR_PATTERNS.length).toBeGreaterThan(5)

View File

@@ -45,7 +45,7 @@ export const DELEGATE_TASK_ERROR_PATTERNS: DelegateTaskErrorPattern[] = [
{ {
pattern: "Cannot call primary agent", pattern: "Cannot call primary agent",
errorType: "primary_agent", errorType: "primary_agent",
fixHint: "Primary agents cannot be called via delegate_task. Use a subagent like 'explore', 'oracle', or 'librarian'", fixHint: "Primary agents cannot be called via task. Use a subagent like 'explore', 'oracle', or 'librarian'",
}, },
{ {
pattern: "Skills not found", pattern: "Skills not found",
@@ -85,11 +85,11 @@ export function buildRetryGuidance(errorInfo: DetectedError): string {
) )
if (!pattern) { if (!pattern) {
return `[delegate_task ERROR] Fix the error and retry with correct parameters.` return `[task ERROR] Fix the error and retry with correct parameters.`
} }
let guidance = ` let guidance = `
[delegate_task CALL FAILED - IMMEDIATE RETRY REQUIRED] [task CALL FAILED - IMMEDIATE RETRY REQUIRED]
**Error Type**: ${errorInfo.errorType} **Error Type**: ${errorInfo.errorType}
**Fix**: ${pattern.fixHint} **Fix**: ${pattern.fixHint}
@@ -101,11 +101,11 @@ export function buildRetryGuidance(errorInfo: DetectedError): string {
} }
guidance += ` guidance += `
**Action**: Retry delegate_task NOW with corrected parameters. **Action**: Retry task NOW with corrected parameters.
Example of CORRECT call: Example of CORRECT call:
\`\`\` \`\`\`
delegate_task( task(
description="Task description", description="Task description",
prompt="Detailed prompt...", prompt="Detailed prompt...",
category="unspecified-low", // OR subagent_type="explore" category="unspecified-low", // OR subagent_type="explore"
@@ -124,7 +124,7 @@ export function createDelegateTaskRetryHook(_ctx: PluginInput) {
input: { tool: string; sessionID: string; callID: string }, input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown } output: { title: string; output: string; metadata: unknown }
) => { ) => {
if (input.tool.toLowerCase() !== "delegate_task") return if (input.tool.toLowerCase() !== "task") return
const errorInfo = detectDelegateTaskError(output.output) const errorInfo = detectDelegateTaskError(output.output)
if (errorInfo) { if (errorInfo) {

View File

@@ -15,7 +15,7 @@ export function createEmptyTaskResponseDetectorHook(_ctx: PluginInput) {
input: { tool: string; sessionID: string; callID: string }, input: { tool: string; sessionID: string; callID: string },
output: { title: string; output: string; metadata: unknown } output: { title: string; output: string; metadata: unknown }
) => { ) => {
if (input.tool !== "Task") return if (input.tool !== "Task" && input.tool !== "task") return
const responseText = output.output?.trim() ?? "" const responseText = output.output?.trim() ?? ""

View File

@@ -2,7 +2,7 @@
* Default ultrawork message optimized for Claude series models. * Default ultrawork message optimized for Claude series models.
* *
* Key characteristics: * Key characteristics:
* - Natural tool-like usage of explore/librarian agents (background=true) * - Natural tool-like usage of explore/librarian agents (run_in_background=true)
* - Parallel execution emphasized - fire agents and continue working * - Parallel execution emphasized - fire agents and continue working
* - Simple workflow: EXPLORES → GATHER → PLAN → DELEGATE * - Simple workflow: EXPLORES → GATHER → PLAN → DELEGATE
*/ */
@@ -44,9 +44,9 @@ export const ULTRAWORK_DEFAULT_MESSAGE = `<ultrawork-mode>
**WHEN IN DOUBT:** **WHEN IN DOUBT:**
\`\`\` \`\`\`
delegate_task(subagent_type="explore", load_skills=[], prompt="Find [X] patterns in codebase", run_in_background=true) task(subagent_type="explore", load_skills=[], prompt="Find [X] patterns in codebase", run_in_background=true)
delegate_task(subagent_type="librarian", load_skills=[], prompt="Find docs/examples for [Y]", run_in_background=true) task(subagent_type="librarian", load_skills=[], prompt="Find docs/examples for [Y]", run_in_background=true)
delegate_task(subagent_type="oracle", load_skills=[], prompt="Review my approach: [describe plan]", run_in_background=false) task(subagent_type="oracle", load_skills=[], prompt="Review my approach: [describe plan]", run_in_background=false)
\`\`\` \`\`\`
**ONLY AFTER YOU HAVE:** **ONLY AFTER YOU HAVE:**
@@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
| Architecture decision needed | MUST call plan agent | | Architecture decision needed | MUST call plan agent |
\`\`\` \`\`\`
delegate_task(subagent_type="plan", prompt="<gathered context + user request>") task(subagent_type="plan", prompt="<gathered context + user request>")
\`\`\` \`\`\`
**WHY PLAN AGENT IS MANDATORY:** **WHY PLAN AGENT IS MANDATORY:**
@@ -119,9 +119,9 @@ delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
| Scenario | Action | | Scenario | Action |
|----------|--------| |----------|--------|
| Plan agent asks clarifying questions | \`delegate_task(session_id="{returned_session_id}", prompt="<your answer>")\` | | Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="<your answer>")\` |
| Need to refine the plan | \`delegate_task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` | | Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` |
| Plan needs more detail | \`delegate_task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` | | Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` |
**WHY SESSION_ID IS CRITICAL:** **WHY SESSION_ID IS CRITICAL:**
- Plan agent retains FULL conversation context - Plan agent retains FULL conversation context
@@ -131,10 +131,10 @@ delegate_task(subagent_type="plan", prompt="<gathered context + user request>")
\`\`\` \`\`\`
// WRONG: Starting fresh loses all context // WRONG: Starting fresh loses all context
delegate_task(subagent_type="plan", prompt="Here's more info...") task(subagent_type="plan", prompt="Here's more info...")
// CORRECT: Resume preserves everything // CORRECT: Resume preserves everything
delegate_task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
\`\`\` \`\`\`
**FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.** **FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
@@ -147,23 +147,23 @@ delegate_task(session_id="ses_abc123", prompt="Here's my answer to your question
| Task Type | Action | Why | | Task Type | Action | Why |
|-----------|--------|-----| |-----------|--------|-----|
| Codebase exploration | delegate_task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient | | Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
| Documentation lookup | delegate_task(subagent_type="librarian", run_in_background=true) | Specialized knowledge | | Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
| Planning | delegate_task(subagent_type="plan") | Parallel task graph + structured TODO list | | Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list |
| Hard problem (conventional) | delegate_task(subagent_type="oracle") | Architecture, debugging, complex logic | | Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic |
| Hard problem (non-conventional) | delegate_task(category="artistry", load_skills=[...]) | Different approach needed | | Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed |
| Implementation | delegate_task(category="...", load_skills=[...]) | Domain-optimized models | | Implementation | task(category="...", load_skills=[...]) | Domain-optimized models |
**CATEGORY + SKILL DELEGATION:** **CATEGORY + SKILL DELEGATION:**
\`\`\` \`\`\`
// Frontend work // Frontend work
delegate_task(category="visual-engineering", load_skills=["frontend-ui-ux"]) task(category="visual-engineering", load_skills=["frontend-ui-ux"])
// Complex logic // Complex logic
delegate_task(category="ultrabrain", load_skills=["typescript-programmer"]) task(category="ultrabrain", load_skills=["typescript-programmer"])
// Quick fixes // Quick fixes
delegate_task(category="quick", load_skills=["git-master"]) task(category="quick", load_skills=["git-master"])
\`\`\` \`\`\`
**YOU SHOULD ONLY DO IT YOURSELF WHEN:** **YOU SHOULD ONLY DO IT YOURSELF WHEN:**
@@ -177,14 +177,14 @@ delegate_task(category="quick", load_skills=["git-master"])
## EXECUTION RULES ## EXECUTION RULES
- **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each. - **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each.
- **PARALLEL**: Fire independent agent calls simultaneously via delegate_task(background=true) - NEVER wait sequentially. - **PARALLEL**: Fire independent agent calls simultaneously via task(run_in_background=true) - NEVER wait sequentially.
- **BACKGROUND FIRST**: Use delegate_task for exploration/research agents (10+ concurrent if needed). - **BACKGROUND FIRST**: Use task for exploration/research agents (10+ concurrent if needed).
- **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done. - **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done.
- **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths. - **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths.
## WORKFLOW ## WORKFLOW
1. Analyze the request and identify required capabilities 1. Analyze the request and identify required capabilities
2. Spawn exploration/librarian agents via delegate_task(background=true) in PARALLEL (10+ if needed) 2. Spawn exploration/librarian agents via task(run_in_background=true) in PARALLEL (10+ if needed)
3. Use Plan agent with gathered context to create detailed work breakdown 3. Use Plan agent with gathered context to create detailed work breakdown
4. Execute with continuous verification against original requirements 4. Execute with continuous verification against original requirements

View File

@@ -73,11 +73,11 @@ Use these when they provide clear value based on the decision framework above:
| Resource | When to Use | How to Use | | Resource | When to Use | How to Use |
|----------|-------------|------------| |----------|-------------|------------|
| explore agent | Need codebase patterns you don't have | \`delegate_task(subagent_type="explore", run_in_background=true, ...)\` | | explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` |
| librarian agent | External library docs, OSS examples | \`delegate_task(subagent_type="librarian", run_in_background=true, ...)\` | | librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` |
| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`delegate_task(subagent_type="oracle", ...)\` | | oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` |
| plan agent | Complex multi-step with dependencies (5+ steps) | \`delegate_task(subagent_type="plan", ...)\` | | plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` |
| delegate_task category | Specialized work matching a category | \`delegate_task(category="...", load_skills=[...])\` | | task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` |
<tool_usage_rules> <tool_usage_rules>
- Prefer tools over internal knowledge for fresh or user-specific data - Prefer tools over internal knowledge for fresh or user-specific data
@@ -97,8 +97,8 @@ Use these when they provide clear value based on the decision framework above:
**ALWAYS run both tracks in parallel:** **ALWAYS run both tracks in parallel:**
\`\`\` \`\`\`
// Fire background agents for deep exploration // Fire background agents for deep exploration
delegate_task(subagent_type="explore", load_skills=[], prompt="Find X patterns...", run_in_background=true) task(subagent_type="explore", load_skills=[], prompt="Find X patterns...", run_in_background=true)
delegate_task(subagent_type="librarian", load_skills=[], prompt="Find docs for Y...", run_in_background=true) task(subagent_type="librarian", load_skills=[], prompt="Find docs for Y...", run_in_background=true)
// WHILE THEY RUN - use direct tools for immediate context // WHILE THEY RUN - use direct tools for immediate context
grep(pattern="relevant_pattern", path="src/") grep(pattern="relevant_pattern", path="src/")

View File

@@ -14,7 +14,7 @@ You ARE the planner. You ARE NOT an implementer. You DO NOT write code. You DO N
| Write/Edit | \`.sisyphus/**/*.md\` ONLY | Everything else | | Write/Edit | \`.sisyphus/**/*.md\` ONLY | Everything else |
| Read | All files | - | | Read | All files | - |
| Bash | Research commands only | Implementation commands | | Bash | Research commands only | Implementation commands |
| delegate_task | explore, librarian | - | | task | explore, librarian | - |
**IF YOU TRY TO WRITE/EDIT OUTSIDE \`.sisyphus/\`:** **IF YOU TRY TO WRITE/EDIT OUTSIDE \`.sisyphus/\`:**
- System will BLOCK your action - System will BLOCK your action
@@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans.
### Research Protocol ### Research Protocol
1. **Fire parallel background agents** for comprehensive context: 1. **Fire parallel background agents** for comprehensive context:
\`\`\` \`\`\`
delegate_task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true) task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true)
delegate_task(agent="explore", prompt="Find test infrastructure and conventions", background=true) task(agent="explore", prompt="Find test infrastructure and conventions", background=true)
delegate_task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true) task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true)
\`\`\` \`\`\`
2. **Wait for results** before planning - rushed plans fail 2. **Wait for results** before planning - rushed plans fail
3. **Synthesize findings** into informed requirements 3. **Synthesize findings** into informed requirements
@@ -117,9 +117,9 @@ Each TODO item MUST include:
| Wave | Tasks | Dispatch Command | | Wave | Tasks | Dispatch Command |
|------|-------|------------------| |------|-------|------------------|
| 1 | 1, 4 | \`delegate_task(category="...", load_skills=[...], run_in_background=false)\` × 2 | | 1 | 1, 4 | \`task(category="...", load_skills=[...], run_in_background=false)\` × 2 |
| 2 | 2, 3, 5 | \`delegate_task(...)\` × 3 after Wave 1 completes | | 2 | 2, 3, 5 | \`task(...)\` × 3 after Wave 1 completes |
| 3 | 6 | \`delegate_task(...)\` final integration | | 3 | 6 | \`task(...)\` final integration |
**WHY PARALLEL TASK GRAPH IS MANDATORY:** **WHY PARALLEL TASK GRAPH IS MANDATORY:**
- Orchestrator (Sisyphus) executes tasks in parallel waves - Orchestrator (Sisyphus) executes tasks in parallel waves

View File

@@ -51,14 +51,14 @@ ${createSystemDirective(SystemDirectiveTypes.PROMETHEUS_READ_ONLY)}
│ │ - Record decisions to .sisyphus/drafts/ │ │ │ - Record decisions to .sisyphus/drafts/ │
├──────┼──────────────────────────────────────────────────────────────┤ ├──────┼──────────────────────────────────────────────────────────────┤
│ 2 │ METIS CONSULTATION: Pre-generation gap analysis │ │ 2 │ METIS CONSULTATION: Pre-generation gap analysis │
│ │ - delegate_task(agent="Metis (Plan Consultant)", ...) │ │ │ - task(agent="Metis (Plan Consultant)", ...) │
│ │ - Identify missed questions, guardrails, assumptions │ │ │ - Identify missed questions, guardrails, assumptions │
├──────┼──────────────────────────────────────────────────────────────┤ ├──────┼──────────────────────────────────────────────────────────────┤
│ 3 │ PLAN GENERATION: Write to .sisyphus/plans/*.md │ │ 3 │ PLAN GENERATION: Write to .sisyphus/plans/*.md │
│ │ <- YOU ARE HERE │ │ │ <- YOU ARE HERE │
├──────┼──────────────────────────────────────────────────────────────┤ ├──────┼──────────────────────────────────────────────────────────────┤
│ 4 │ MOMUS REVIEW (if high accuracy requested) │ │ 4 │ MOMUS REVIEW (if high accuracy requested) │
│ │ - delegate_task(agent="Momus (Plan Reviewer)", ...) │ │ │ - task(agent="Momus (Plan Reviewer)", ...) │
│ │ - Loop until OKAY verdict │ │ │ - Loop until OKAY verdict │
├──────┼──────────────────────────────────────────────────────────────┤ ├──────┼──────────────────────────────────────────────────────────────┤
│ 5 │ SUMMARY: Present to user │ │ 5 │ SUMMARY: Present to user │

View File

@@ -227,11 +227,11 @@ describe("prometheus-md-only", () => {
).resolves.toBeUndefined() ).resolves.toBeUndefined()
}) })
test("should inject read-only warning when Prometheus calls delegate_task", async () => { test("should inject read-only warning when Prometheus calls task", async () => {
// given // given
const hook = createPrometheusMdOnlyHook(createMockPluginInput()) const hook = createPrometheusMdOnlyHook(createMockPluginInput())
const input = { const input = {
tool: "delegate_task", tool: "task",
sessionID: TEST_SESSION_ID, sessionID: TEST_SESSION_ID,
callID: "call-1", callID: "call-1",
} }
@@ -289,7 +289,7 @@ describe("prometheus-md-only", () => {
// given // given
const hook = createPrometheusMdOnlyHook(createMockPluginInput()) const hook = createPrometheusMdOnlyHook(createMockPluginInput())
const input = { const input = {
tool: "delegate_task", tool: "task",
sessionID: TEST_SESSION_ID, sessionID: TEST_SESSION_ID,
callID: "call-1", callID: "call-1",
} }
@@ -330,11 +330,11 @@ describe("prometheus-md-only", () => {
).resolves.toBeUndefined() ).resolves.toBeUndefined()
}) })
test("should not inject warning for non-Prometheus agents calling delegate_task", async () => { test("should not inject warning for non-Prometheus agents calling task", async () => {
// given // given
const hook = createPrometheusMdOnlyHook(createMockPluginInput()) const hook = createPrometheusMdOnlyHook(createMockPluginInput())
const input = { const input = {
tool: "delegate_task", tool: "task",
sessionID: TEST_SESSION_ID, sessionID: TEST_SESSION_ID,
callID: "call-1", callID: "call-1",
} }

View File

@@ -63,7 +63,7 @@ function getMessageDir(sessionID: string): string | null {
return null return null
} }
const TASK_TOOLS = ["delegate_task", "task", "call_omo_agent"] const TASK_TOOLS = ["task", "call_omo_agent"]
function getAgentFromMessageFiles(sessionID: string): string | undefined { function getAgentFromMessageFiles(sessionID: string): string | undefined {
const messageDir = getMessageDir(sessionID) const messageDir = getMessageDir(sessionID)

View File

@@ -12,8 +12,8 @@ export function createSisyphusJuniorNotepadHook(ctx: PluginInput) {
input: { tool: string; sessionID: string; callID: string }, input: { tool: string; sessionID: string; callID: string },
output: { args: Record<string, unknown>; message?: string } output: { args: Record<string, unknown>; message?: string }
): Promise<void> => { ): Promise<void> => {
// 1. Check if tool is delegate_task // 1. Check if tool is task
if (input.tool !== "delegate_task") { if (input.tool !== "task") {
return return
} }
@@ -37,7 +37,7 @@ export function createSisyphusJuniorNotepadHook(ctx: PluginInput) {
output.args.prompt = NOTEPAD_DIRECTIVE + prompt output.args.prompt = NOTEPAD_DIRECTIVE + prompt
// 6. Log injection // 6. Log injection
log(`[${HOOK_NAME}] Injected notepad directive to delegate_task`, { log(`[${HOOK_NAME}] Injected notepad directive to task`, {
sessionID: input.sessionID, sessionID: input.sessionID,
}) })
}, },

View File

@@ -1,4 +1,4 @@
const TARGET_TOOLS = ["task", "Task", "task_tool", "call_omo_agent", "delegate_task"] const TARGET_TOOLS = ["task", "Task", "task_tool", "call_omo_agent"]
const SESSION_ID_PATTERNS = [ const SESSION_ID_PATTERNS = [
/Session ID: (ses_[a-zA-Z0-9_-]+)/, /Session ID: (ses_[a-zA-Z0-9_-]+)/,
@@ -27,7 +27,7 @@ export function createTaskResumeInfoHook() {
const sessionId = extractSessionId(output.output) const sessionId = extractSessionId(output.output)
if (!sessionId) return if (!sessionId) return
output.output = output.output.trimEnd() + `\n\nto continue: delegate_task(session_id="${sessionId}", prompt="...")` output.output = output.output.trimEnd() + `\n\nto continue: task(session_id="${sessionId}", prompt="...")`
} }
return { return {

View File

@@ -16,7 +16,7 @@ export const REPLACEMENT_MESSAGE = `TodoRead/TodoWrite are DISABLED because expe
3. DO THE WORK 3. DO THE WORK
4. TaskUpdate({ id: "T-xxx", status: "completed" }) 4. TaskUpdate({ id: "T-xxx", status: "completed" })
CRITICAL: 1 task = 1 delegate_task. Fire independent tasks concurrently. CRITICAL: 1 task = 1 task. Fire independent tasks concurrently.
**STOP! DO NOT START WORKING DIRECTLY - NO MATTER HOW SMALL THE TASK!** **STOP! DO NOT START WORKING DIRECTLY - NO MATTER HOW SMALL THE TASK!**
Even if the task seems trivial (1 line fix, simple edit, quick change), you MUST: Even if the task seems trivial (1 line fix, simple edit, quick change), you MUST:

View File

@@ -111,6 +111,7 @@ import { filterDisabledTools } from "./shared/disabled-tools";
import { loadPluginConfig } from "./plugin-config"; import { loadPluginConfig } from "./plugin-config";
import { createModelCacheState } from "./plugin-state"; import { createModelCacheState } from "./plugin-state";
import { createConfigHandler } from "./plugin-handlers"; import { createConfigHandler } from "./plugin-handlers";
import { consumeToolMetadata } from "./features/tool-metadata-store";
const OhMyOpenCodePlugin: Plugin = async (ctx) => { const OhMyOpenCodePlugin: Plugin = async (ctx) => {
log("[OhMyOpenCodePlugin] ENTRY - plugin loading", { log("[OhMyOpenCodePlugin] ENTRY - plugin loading", {
@@ -533,7 +534,7 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
...backgroundTools, ...backgroundTools,
call_omo_agent: callOmoAgent, call_omo_agent: callOmoAgent,
...(lookAt ? { look_at: lookAt } : {}), ...(lookAt ? { look_at: lookAt } : {}),
delegate_task: delegateTask, task: delegateTask,
skill: skillTool, skill: skillTool,
skill_mcp: skillMcpTool, skill_mcp: skillMcpTool,
slashcommand: slashcommandTool, slashcommand: slashcommandTool,
@@ -787,16 +788,11 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
if (input.tool === "task") { if (input.tool === "task") {
const args = output.args as Record<string, unknown>; const args = output.args as Record<string, unknown>;
const subagentType = args.subagent_type as string; const category = typeof args.category === "string" ? args.category : undefined;
const isExploreOrLibrarian = ["explore", "librarian"].some( const subagentType = typeof args.subagent_type === "string" ? args.subagent_type : undefined;
(name) => name.toLowerCase() === (subagentType ?? "").toLowerCase(), if (category && !subagentType) {
); args.subagent_type = "sisyphus-junior";
}
args.tools = {
...(args.tools as Record<string, boolean> | undefined),
delegate_task: false,
...(isExploreOrLibrarian ? { call_omo_agent: false } : {}),
};
} }
if (ralphLoop && input.tool === "slashcommand") { if (ralphLoop && input.tool === "slashcommand") {
@@ -872,6 +868,19 @@ const OhMyOpenCodePlugin: Plugin = async (ctx) => {
if (!output) { if (!output) {
return; return;
} }
// Restore metadata that fromPlugin() overwrites with { truncated, outputPath }.
// This must run FIRST, before any hook reads output.metadata.
const stored = consumeToolMetadata(input.sessionID, input.callID)
if (stored) {
if (stored.title) {
output.title = stored.title
}
if (stored.metadata) {
output.metadata = { ...output.metadata, ...stored.metadata }
}
}
await claudeCodeHooks["tool.execute.after"](input, output); await claudeCodeHooks["tool.execute.after"](input, output);
await toolOutputTruncator?.["tool.execute.after"](input, output); await toolOutputTruncator?.["tool.execute.after"](input, output);
await preemptiveCompaction?.["tool.execute.after"](input, output); await preemptiveCompaction?.["tool.execute.after"](input, output);

View File

@@ -274,7 +274,7 @@ describe("Plan agent demote behavior", () => {
expect(agents.plan.prompt).toBe("original plan prompt") expect(agents.plan.prompt).toBe("original plan prompt")
}) })
test("prometheus should have mode 'all' to be callable via delegate_task", async () => { test("prometheus should have mode 'all' to be callable via task", async () => {
// given // given
const pluginConfig: OhMyOpenCodeConfig = { const pluginConfig: OhMyOpenCodeConfig = {
sisyphus_agent: { sisyphus_agent: {
@@ -305,7 +305,7 @@ describe("Plan agent demote behavior", () => {
}) })
describe("Agent permission defaults", () => { describe("Agent permission defaults", () => {
test("hephaestus should allow delegate_task", async () => { test("hephaestus should allow task", async () => {
// #given // #given
const createBuiltinAgentsMock = agents.createBuiltinAgents as unknown as { const createBuiltinAgentsMock = agents.createBuiltinAgents as unknown as {
mockResolvedValue: (value: Record<string, unknown>) => void mockResolvedValue: (value: Record<string, unknown>) => void
@@ -335,7 +335,7 @@ describe("Agent permission defaults", () => {
// #then // #then
const agentConfig = config.agent as Record<string, { permission?: Record<string, string> }> const agentConfig = config.agent as Record<string, { permission?: Record<string, string> }>
expect(agentConfig.hephaestus).toBeDefined() expect(agentConfig.hephaestus).toBeDefined()
expect(agentConfig.hephaestus.permission?.delegate_task).toBe("allow") expect(agentConfig.hephaestus.permission?.task).toBe("allow")
}) })
}) })

View File

@@ -419,30 +419,30 @@ export function createConfigHandler(deps: ConfigHandlerDeps) {
} }
if (agentResult["atlas"]) { if (agentResult["atlas"]) {
const agent = agentResult["atlas"] as AgentWithPermission; const agent = agentResult["atlas"] as AgentWithPermission;
agent.permission = { ...agent.permission, task: "deny", call_omo_agent: "deny", delegate_task: "allow", "task_*": "allow", teammate: "allow" }; agent.permission = { ...agent.permission, task: "allow", call_omo_agent: "deny", "task_*": "allow", teammate: "allow" };
} }
if (agentResult.sisyphus) { if (agentResult.sisyphus) {
const agent = agentResult.sisyphus as AgentWithPermission; const agent = agentResult.sisyphus as AgentWithPermission;
agent.permission = { ...agent.permission, call_omo_agent: "deny", delegate_task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" }; agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" };
} }
if (agentResult.hephaestus) { if (agentResult.hephaestus) {
const agent = agentResult.hephaestus as AgentWithPermission; const agent = agentResult.hephaestus as AgentWithPermission;
agent.permission = { ...agent.permission, call_omo_agent: "deny", delegate_task: "allow", question: questionPermission }; agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission };
} }
if (agentResult["prometheus"]) { if (agentResult["prometheus"]) {
const agent = agentResult["prometheus"] as AgentWithPermission; const agent = agentResult["prometheus"] as AgentWithPermission;
agent.permission = { ...agent.permission, call_omo_agent: "deny", delegate_task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" }; agent.permission = { ...agent.permission, call_omo_agent: "deny", task: "allow", question: questionPermission, "task_*": "allow", teammate: "allow" };
} }
if (agentResult["sisyphus-junior"]) { if (agentResult["sisyphus-junior"]) {
const agent = agentResult["sisyphus-junior"] as AgentWithPermission; const agent = agentResult["sisyphus-junior"] as AgentWithPermission;
agent.permission = { ...agent.permission, delegate_task: "allow", "task_*": "allow", teammate: "allow" }; agent.permission = { ...agent.permission, task: "allow", "task_*": "allow", teammate: "allow" };
} }
config.permission = { config.permission = {
...(config.permission as Record<string, unknown>), ...(config.permission as Record<string, unknown>),
webfetch: "allow", webfetch: "allow",
external_directory: "allow", external_directory: "allow",
delegate_task: "deny", task: "deny",
}; };
const mcpResult = (pluginConfig.claude_code?.mcp ?? true) const mcpResult = (pluginConfig.claude_code?.mcp ?? true)

View File

@@ -8,7 +8,6 @@ const EXPLORATION_AGENT_DENYLIST: Record<string, boolean> = {
write: false, write: false,
edit: false, edit: false,
task: false, task: false,
delegate_task: false,
call_omo_agent: false, call_omo_agent: false,
} }
@@ -21,7 +20,6 @@ const AGENT_RESTRICTIONS: Record<string, Record<string, boolean>> = {
write: false, write: false,
edit: false, edit: false,
task: false, task: false,
delegate_task: false,
call_omo_agent: false, call_omo_agent: false,
}, },
@@ -29,14 +27,12 @@ const AGENT_RESTRICTIONS: Record<string, Record<string, boolean>> = {
write: false, write: false,
edit: false, edit: false,
task: false, task: false,
delegate_task: false,
}, },
momus: { momus: {
write: false, write: false,
edit: false, edit: false,
task: false, task: false,
delegate_task: false,
}, },
"multimodal-looker": { "multimodal-looker": {
@@ -45,7 +41,6 @@ const AGENT_RESTRICTIONS: Record<string, Record<string, boolean>> = {
"sisyphus-junior": { "sisyphus-junior": {
task: false, task: false,
delegate_task: false,
}, },
} }

View File

@@ -130,5 +130,49 @@ describe("permission-compat", () => {
// then returns unchanged // then returns unchanged
expect(result).toEqual(config) expect(result).toEqual(config)
}) })
test("migrates delegate_task permission to task", () => {
//#given config with delegate_task permission
const config = {
model: "test",
permission: { delegate_task: "allow" as const, write: "deny" as const },
}
//#when migrating
const result = migrateAgentConfig(config)
//#then delegate_task is renamed to task
const perm = result.permission as Record<string, string>
expect(perm["task"]).toBe("allow")
expect(perm["delegate_task"]).toBeUndefined()
expect(perm["write"]).toBe("deny")
})
test("does not overwrite existing task permission with delegate_task", () => {
//#given config with both task and delegate_task permissions
const config = {
permission: { delegate_task: "allow" as const, task: "deny" as const },
}
//#when migrating
const result = migrateAgentConfig(config)
//#then existing task permission is preserved
const perm = result.permission as Record<string, string>
expect(perm["task"]).toBe("deny")
expect(perm["delegate_task"]).toBe("allow")
})
test("does not mutate the original config permission object", () => {
//#given config with delegate_task permission
const originalPerm = { delegate_task: "allow" as const }
const config = { permission: originalPerm }
//#when migrating
migrateAgentConfig(config)
//#then original permission object is not mutated
expect(originalPerm).toEqual({ delegate_task: "allow" })
})
}) })
}) })

View File

@@ -73,5 +73,14 @@ export function migrateAgentConfig(
delete result.tools delete result.tools
} }
if (result.permission && typeof result.permission === "object") {
const perm = { ...(result.permission as Record<string, PermissionValue>) }
if ("delegate_task" in perm && !("task" in perm)) {
perm["task"] = perm["delegate_task"]
delete perm["delegate_task"]
result.permission = perm
}
}
return result return result
} }

View File

@@ -39,7 +39,7 @@ tools/
| Search | ast_grep_search, ast_grep_replace, grep, glob | Direct | | Search | ast_grep_search, ast_grep_replace, grep, glob | Direct |
| Session | session_list, session_read, session_search, session_info | Direct | | Session | session_list, session_read, session_search, session_info | Direct |
| Task | task_create, task_get, task_list, task_update | Factory | | Task | task_create, task_get, task_list, task_update | Factory |
| Agent | delegate_task, call_omo_agent | Factory | | Agent | task, call_omo_agent | Factory |
| Background | background_output, background_cancel | Factory | | Background | background_output, background_cancel | Factory |
| System | interactive_bash, look_at | Mixed | | System | interactive_bash, look_at | Mixed |
| Skill | skill, skill_mcp, slashcommand | Factory | | Skill | skill, skill_mcp, slashcommand | Factory |

View File

@@ -4,9 +4,11 @@ import { runSg } from "./cli"
import { formatSearchResult, formatReplaceResult } from "./utils" import { formatSearchResult, formatReplaceResult } from "./utils"
import type { CliLanguage } from "./types" import type { CliLanguage } from "./types"
function showOutputToUser(context: unknown, output: string): void { async function showOutputToUser(context: unknown, output: string): Promise<void> {
const ctx = context as { metadata?: (input: { metadata: { output: string } }) => void } const ctx = context as {
ctx.metadata?.({ metadata: { output } }) metadata?: (input: { metadata: { output: string } }) => void | Promise<void>
}
await ctx.metadata?.({ metadata: { output } })
} }
function getEmptyResultHint(pattern: string, lang: CliLanguage): string | null { function getEmptyResultHint(pattern: string, lang: CliLanguage): string | null {
@@ -65,11 +67,11 @@ export const ast_grep_search: ToolDefinition = tool({
} }
} }
showOutputToUser(context, output) await showOutputToUser(context, output)
return output return output
} catch (e) { } catch (e) {
const output = `Error: ${e instanceof Error ? e.message : String(e)}` const output = `Error: ${e instanceof Error ? e.message : String(e)}`
showOutputToUser(context, output) await showOutputToUser(context, output)
return output return output
} }
}, },
@@ -99,14 +101,13 @@ export const ast_grep_replace: ToolDefinition = tool({
updateAll: args.dryRun === false, updateAll: args.dryRun === false,
}) })
const output = formatReplaceResult(result, args.dryRun !== false) const output = formatReplaceResult(result, args.dryRun !== false)
showOutputToUser(context, output) await showOutputToUser(context, output)
return output return output
} catch (e) { } catch (e) {
const output = `Error: ${e instanceof Error ? e.message : String(e)}` const output = `Error: ${e instanceof Error ? e.message : String(e)}`
showOutputToUser(context, output) await showOutputToUser(context, output)
return output return output
} }
}, },
}) })

View File

@@ -8,6 +8,7 @@ import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAG
import { getSessionAgent } from "../../features/claude-code-session-state" import { getSessionAgent } from "../../features/claude-code-session-state"
import { log } from "../../shared/logger" import { log } from "../../shared/logger"
import { consumeNewMessages } from "../../shared/session-cursor" import { consumeNewMessages } from "../../shared/session-cursor"
import { storeToolMetadata } from "../../features/tool-metadata-store"
type BackgroundOutputMessage = { type BackgroundOutputMessage = {
info?: { role?: string; time?: string | { created?: number }; agent?: string } info?: { role?: string; time?: string | { created?: number }; agent?: string }
@@ -140,15 +141,37 @@ export function createBackgroundTask(manager: BackgroundManager): ToolDefinition
parentAgent, parentAgent,
}) })
ctx.metadata?.({ const WAIT_FOR_SESSION_INTERVAL_MS = 50
const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const waitStart = Date.now()
let sessionId = task.sessionID
while (!sessionId && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
if (ctx.abort?.aborted) {
await manager.cancelTask(task.id)
return `Task aborted and cancelled while waiting for session to start.\n\nTask ID: ${task.id}`
}
await delay(WAIT_FOR_SESSION_INTERVAL_MS)
const updated = manager.getTask(task.id)
if (!updated || updated.status === "error") {
return `Task ${!updated ? "was deleted" : `entered error state`}.\n\nTask ID: ${task.id}`
}
sessionId = updated?.sessionID
}
const bgMeta = {
title: args.description, title: args.description,
metadata: { sessionId: task.sessionID }, metadata: { sessionId: sessionId ?? "pending" } as Record<string, unknown>,
}) }
await ctx.metadata?.(bgMeta)
const callID = (ctx as any).callID as string | undefined
if (callID) {
storeToolMetadata(ctx.sessionID, callID, bgMeta)
}
return `Background task launched successfully. return `Background task launched successfully.
Task ID: ${task.id} Task ID: ${task.id}
Session ID: ${task.sessionID} Session ID: ${sessionId ?? "pending"}
Description: ${task.description} Description: ${task.description}
Agent: ${task.agent} Agent: ${task.agent}
Status: ${task.status} Status: ${task.status}
@@ -663,7 +686,7 @@ export function createBackgroundCancel(manager: BackgroundManager, client: Backg
To continue a cancelled task, use: To continue a cancelled task, use:
\`\`\` \`\`\`
delegate_task(session_id="<session_id>", prompt="Continue: <your follow-up>") task(session_id="<session_id>", prompt="Continue: <your follow-up>")
\`\`\` \`\`\`
Continuable sessions: Continuable sessions:

View File

@@ -10,6 +10,7 @@ import { findFirstMessageWithAgent, findNearestMessageWithFields, MESSAGE_STORAG
import { getSessionAgent } from "../../features/claude-code-session-state" import { getSessionAgent } from "../../features/claude-code-session-state"
function getMessageDir(sessionID: string): string | null { function getMessageDir(sessionID: string): string | null {
if (!sessionID.startsWith("ses_")) return null
if (!existsSync(MESSAGE_STORAGE)) return null if (!existsSync(MESSAGE_STORAGE)) return null
const directPath = join(MESSAGE_STORAGE, sessionID) const directPath = join(MESSAGE_STORAGE, sessionID)
@@ -110,15 +111,31 @@ async function executeBackground(
parentAgent, parentAgent,
}) })
toolContext.metadata?.({ const WAIT_FOR_SESSION_INTERVAL_MS = 50
const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const waitStart = Date.now()
let sessionId = task.sessionID
while (!sessionId && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
if (toolContext.abort?.aborted) {
return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
}
const updated = manager.getTask(task.id)
if (updated?.status === "error" || updated?.status === "cancelled") {
return `Task failed to start (status: ${updated.status}).\n\nTask ID: ${task.id}`
}
await new Promise(resolve => setTimeout(resolve, WAIT_FOR_SESSION_INTERVAL_MS))
sessionId = manager.getTask(task.id)?.sessionID
}
await toolContext.metadata?.({
title: args.description, title: args.description,
metadata: { sessionId: task.sessionID }, metadata: { sessionId: sessionId ?? "pending" },
}) })
return `Background agent task launched successfully. return `Background agent task launched successfully.
Task ID: ${task.id} Task ID: ${task.id}
Session ID: ${task.sessionID} Session ID: ${sessionId ?? "pending"}
Description: ${task.description} Description: ${task.description}
Agent: ${task.agent} (subagent) Agent: ${task.agent} (subagent)
Status: ${task.status} Status: ${task.status}
@@ -194,7 +211,7 @@ Original error: ${createResult.error}`
log(`[call_omo_agent] Created session: ${sessionID}`) log(`[call_omo_agent] Created session: ${sessionID}`)
} }
toolContext.metadata?.({ await toolContext.metadata?.({
title: args.description, title: args.description,
metadata: { sessionId: sessionID }, metadata: { sessionId: sessionID },
}) })
@@ -210,7 +227,6 @@ Original error: ${createResult.error}`
tools: { tools: {
...getAgentToolRestrictions(args.subagent_type), ...getAgentToolRestrictions(args.subagent_type),
task: false, task: false,
delegate_task: false,
}, },
parts: [{ type: "text", text: args.prompt }], parts: [{ type: "text", text: args.prompt }],
}, },

View File

@@ -459,13 +459,13 @@ YOU MUST END YOUR RESPONSE WITH THIS SECTION.
1. **Wave 1**: Fire these tasks IN PARALLEL (no dependencies) 1. **Wave 1**: Fire these tasks IN PARALLEL (no dependencies)
\`\`\` \`\`\`
delegate_task(category="...", load_skills=[...], run_in_background=false, prompt="Task 1: ...") task(category="...", load_skills=[...], run_in_background=false, prompt="Task 1: ...")
delegate_task(category="...", load_skills=[...], run_in_background=false, prompt="Task N: ...") task(category="...", load_skills=[...], run_in_background=false, prompt="Task N: ...")
\`\`\` \`\`\`
2. **Wave 2**: After Wave 1 completes, fire next wave IN PARALLEL 2. **Wave 2**: After Wave 1 completes, fire next wave IN PARALLEL
\`\`\` \`\`\`
delegate_task(category="...", load_skills=[...], run_in_background=false, prompt="Task 2: ...") task(category="...", load_skills=[...], run_in_background=false, prompt="Task 2: ...")
\`\`\` \`\`\`
3. Continue until all waves complete 3. Continue until all waves complete
@@ -476,7 +476,7 @@ YOU MUST END YOUR RESPONSE WITH THIS SECTION.
WHY THIS FORMAT IS MANDATORY: WHY THIS FORMAT IS MANDATORY:
- Caller can directly copy TODO items - Caller can directly copy TODO items
- Wave grouping enables parallel execution - Wave grouping enables parallel execution
- Each task has clear delegate_task parameters - Each task has clear task parameters
- QA criteria ensure verifiable completion - QA criteria ensure verifiable completion
</FINAL_OUTPUT_FOR_CALLER> </FINAL_OUTPUT_FOR_CALLER>

View File

@@ -16,6 +16,7 @@ import { log, getAgentToolRestrictions, resolveModelPipeline, promptWithModelSug
import { fetchAvailableModels, isModelAvailable } from "../../shared/model-availability" import { fetchAvailableModels, isModelAvailable } from "../../shared/model-availability"
import { readConnectedProvidersCache } from "../../shared/connected-providers-cache" import { readConnectedProvidersCache } from "../../shared/connected-providers-cache"
import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements" import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements"
import { storeToolMetadata } from "../../features/tool-metadata-store"
const SISYPHUS_JUNIOR_AGENT = "sisyphus-junior" const SISYPHUS_JUNIOR_AGENT = "sisyphus-junior"
@@ -67,7 +68,7 @@ export function resolveParentContext(ctx: ToolContextWithMetadata): ParentContex
const sessionAgent = getSessionAgent(ctx.sessionID) const sessionAgent = getSessionAgent(ctx.sessionID)
const parentAgent = ctx.agent ?? sessionAgent ?? firstMessageAgent ?? prevMessage?.agent const parentAgent = ctx.agent ?? sessionAgent ?? firstMessageAgent ?? prevMessage?.agent
log("[delegate_task] parentAgent resolution", { log("[task] parentAgent resolution", {
sessionID: ctx.sessionID, sessionID: ctx.sessionID,
messageDir, messageDir,
ctxAgent: ctx.agent, ctxAgent: ctx.agent,
@@ -111,7 +112,7 @@ export async function executeBackgroundContinuation(
parentAgent: parentContext.agent, parentAgent: parentContext.agent,
}) })
ctx.metadata?.({ const bgContMeta = {
title: `Continue: ${task.description}`, title: `Continue: ${task.description}`,
metadata: { metadata: {
prompt: args.prompt, prompt: args.prompt,
@@ -122,7 +123,11 @@ export async function executeBackgroundContinuation(
sessionId: task.sessionID, sessionId: task.sessionID,
command: args.command, command: args.command,
}, },
}) }
await ctx.metadata?.(bgContMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, bgContMeta)
}
return `Background task continued. return `Background task continued.
@@ -165,7 +170,7 @@ export async function executeSyncContinuation(
}) })
} }
ctx.metadata?.({ const syncContMeta = {
title: `Continue: ${args.description}`, title: `Continue: ${args.description}`,
metadata: { metadata: {
prompt: args.prompt, prompt: args.prompt,
@@ -176,7 +181,11 @@ export async function executeSyncContinuation(
sync: true, sync: true,
command: args.command, command: args.command,
}, },
}) }
await ctx.metadata?.(syncContMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, syncContMeta)
}
try { try {
let resumeAgent: string | undefined let resumeAgent: string | undefined
@@ -207,13 +216,12 @@ export async function executeSyncContinuation(
body: { body: {
...(resumeAgent !== undefined ? { agent: resumeAgent } : {}), ...(resumeAgent !== undefined ? { agent: resumeAgent } : {}),
...(resumeModel !== undefined ? { model: resumeModel } : {}), ...(resumeModel !== undefined ? { model: resumeModel } : {}),
tools: { tools: {
...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}), ...(resumeAgent ? getAgentToolRestrictions(resumeAgent) : {}),
task: false, task: false,
delegate_task: false, call_omo_agent: true,
call_omo_agent: true, question: false,
question: false, },
},
parts: [{ type: "text", text: args.prompt }], parts: [{ type: "text", text: args.prompt }],
}, },
}) })
@@ -316,17 +324,17 @@ export async function executeUnstableAgentTask(
category: args.category, category: args.category,
}) })
const WAIT_FOR_SESSION_INTERVAL_MS = 100 const timing = getTimingConfig()
const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
const waitStart = Date.now() const waitStart = Date.now()
while (!task.sessionID && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) { let sessionID = task.sessionID
while (!sessionID && Date.now() - waitStart < timing.WAIT_FOR_SESSION_TIMEOUT_MS) {
if (ctx.abort?.aborted) { if (ctx.abort?.aborted) {
return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}` return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
} }
await new Promise(resolve => setTimeout(resolve, WAIT_FOR_SESSION_INTERVAL_MS)) await new Promise(resolve => setTimeout(resolve, timing.WAIT_FOR_SESSION_INTERVAL_MS))
const updated = manager.getTask(task.id)
sessionID = updated?.sessionID
} }
const sessionID = task.sessionID
if (!sessionID) { if (!sessionID) {
return formatDetailedError(new Error(`Task failed to start within timeout (30s). Task ID: ${task.id}, Status: ${task.status}`), { return formatDetailedError(new Error(`Task failed to start within timeout (30s). Task ID: ${task.id}, Status: ${task.status}`), {
operation: "Launch monitored background task", operation: "Launch monitored background task",
@@ -336,7 +344,7 @@ export async function executeUnstableAgentTask(
}) })
} }
ctx.metadata?.({ const bgTaskMeta = {
title: args.description, title: args.description,
metadata: { metadata: {
prompt: args.prompt, prompt: args.prompt,
@@ -348,7 +356,11 @@ export async function executeUnstableAgentTask(
sessionId: sessionID, sessionId: sessionID,
command: args.command, command: args.command,
}, },
}) }
await ctx.metadata?.(bgTaskMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, bgTaskMeta)
}
const startTime = new Date() const startTime = new Date()
const timingCfg = getTimingConfig() const timingCfg = getTimingConfig()
@@ -463,7 +475,23 @@ export async function executeBackgroundTask(
category: args.category, category: args.category,
}) })
ctx.metadata?.({ // OpenCode TUI's `Task` tool UI calculates toolcalls by looking up
// `props.metadata.sessionId` and then counting tool parts in that session.
// BackgroundManager.launch() returns immediately (pending) before the session exists,
// so we must wait briefly for the session to be created to set metadata correctly.
const timing = getTimingConfig()
const waitStart = Date.now()
let sessionId = task.sessionID
while (!sessionId && Date.now() - waitStart < timing.WAIT_FOR_SESSION_TIMEOUT_MS) {
if (ctx.abort?.aborted) {
return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
}
await new Promise(resolve => setTimeout(resolve, timing.WAIT_FOR_SESSION_INTERVAL_MS))
const updated = manager.getTask(task.id)
sessionId = updated?.sessionID
}
const unstableMeta = {
title: args.description, title: args.description,
metadata: { metadata: {
prompt: args.prompt, prompt: args.prompt,
@@ -472,10 +500,14 @@ export async function executeBackgroundTask(
load_skills: args.load_skills, load_skills: args.load_skills,
description: args.description, description: args.description,
run_in_background: args.run_in_background, run_in_background: args.run_in_background,
sessionId: task.sessionID, sessionId: sessionId ?? "pending",
command: args.command, command: args.command,
}, },
}) }
await ctx.metadata?.(unstableMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, unstableMeta)
}
return `Background task launched. return `Background task launched.
@@ -487,7 +519,7 @@ Status: ${task.status}
System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check. System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check.
<task_metadata> <task_metadata>
session_id: ${task.sessionID} session_id: ${sessionId}
</task_metadata>` </task_metadata>`
} catch (error) { } catch (error) {
return formatDetailedError(error, { return formatDetailedError(error, {
@@ -542,13 +574,13 @@ export async function executeSyncTask(
subagentSessions.add(sessionID) subagentSessions.add(sessionID)
if (onSyncSessionCreated) { if (onSyncSessionCreated) {
log("[delegate_task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID }) log("[task] Invoking onSyncSessionCreated callback", { sessionID, parentID: parentContext.sessionID })
await onSyncSessionCreated({ await onSyncSessionCreated({
sessionID, sessionID,
parentID: parentContext.sessionID, parentID: parentContext.sessionID,
title: args.description, title: args.description,
}).catch((err) => { }).catch((err) => {
log("[delegate_task] onSyncSessionCreated callback failed", { error: String(err) }) log("[task] onSyncSessionCreated callback failed", { error: String(err) })
}) })
await new Promise(r => setTimeout(r, 200)) await new Promise(r => setTimeout(r, 200))
} }
@@ -568,7 +600,7 @@ export async function executeSyncTask(
}) })
} }
ctx.metadata?.({ const syncTaskMeta = {
title: args.description, title: args.description,
metadata: { metadata: {
prompt: args.prompt, prompt: args.prompt,
@@ -581,18 +613,21 @@ export async function executeSyncTask(
sync: true, sync: true,
command: args.command, command: args.command,
}, },
}) }
await ctx.metadata?.(syncTaskMeta)
if (ctx.callID) {
storeToolMetadata(ctx.sessionID, ctx.callID, syncTaskMeta)
}
try { try {
const allowDelegateTask = isPlanAgent(agentToUse) const allowTask = isPlanAgent(agentToUse)
await promptWithModelSuggestionRetry(client, { await promptWithModelSuggestionRetry(client, {
path: { id: sessionID }, path: { id: sessionID },
body: { body: {
agent: agentToUse, agent: agentToUse,
system: systemContent, system: systemContent,
tools: { tools: {
task: false, task: allowTask,
delegate_task: allowDelegateTask,
call_omo_agent: true, call_omo_agent: true,
question: false, question: false,
}, },
@@ -630,11 +665,11 @@ export async function executeSyncTask(
let stablePolls = 0 let stablePolls = 0
let pollCount = 0 let pollCount = 0
log("[delegate_task] Starting poll loop", { sessionID, agentToUse }) log("[task] Starting poll loop", { sessionID, agentToUse })
while (Date.now() - pollStart < syncTiming.MAX_POLL_TIME_MS) { while (Date.now() - pollStart < syncTiming.MAX_POLL_TIME_MS) {
if (ctx.abort?.aborted) { if (ctx.abort?.aborted) {
log("[delegate_task] Aborted by user", { sessionID }) log("[task] Aborted by user", { sessionID })
if (toastManager && taskId) toastManager.removeTask(taskId) if (toastManager && taskId) toastManager.removeTask(taskId)
return `Task aborted.\n\nSession ID: ${sessionID}` return `Task aborted.\n\nSession ID: ${sessionID}`
} }
@@ -647,7 +682,7 @@ export async function executeSyncTask(
const sessionStatus = allStatuses[sessionID] const sessionStatus = allStatuses[sessionID]
if (pollCount % 10 === 0) { if (pollCount % 10 === 0) {
log("[delegate_task] Poll status", { log("[task] Poll status", {
sessionID, sessionID,
pollCount, pollCount,
elapsed: Math.floor((Date.now() - pollStart) / 1000) + "s", elapsed: Math.floor((Date.now() - pollStart) / 1000) + "s",
@@ -675,7 +710,7 @@ export async function executeSyncTask(
if (currentMsgCount === lastMsgCount) { if (currentMsgCount === lastMsgCount) {
stablePolls++ stablePolls++
if (stablePolls >= syncTiming.STABILITY_POLLS_REQUIRED) { if (stablePolls >= syncTiming.STABILITY_POLLS_REQUIRED) {
log("[delegate_task] Poll complete - messages stable", { sessionID, pollCount, currentMsgCount }) log("[task] Poll complete - messages stable", { sessionID, pollCount, currentMsgCount })
break break
} }
} else { } else {
@@ -685,7 +720,7 @@ export async function executeSyncTask(
} }
if (Date.now() - pollStart >= syncTiming.MAX_POLL_TIME_MS) { if (Date.now() - pollStart >= syncTiming.MAX_POLL_TIME_MS) {
log("[delegate_task] Poll timeout reached", { sessionID, pollCount, lastMsgCount, stablePolls }) log("[task] Poll timeout reached", { sessionID, pollCount, lastMsgCount, stablePolls })
} }
const messagesResult = await client.session.messages({ const messagesResult = await client.session.messages({
@@ -928,7 +963,7 @@ Sisyphus-Junior is spawned automatically when you specify a category. Pick the a
return { return {
agentToUse: "", agentToUse: "",
categoryModel: undefined, categoryModel: undefined,
error: `You are prometheus. You cannot delegate to prometheus via delegate_task. error: `You are prometheus. You cannot delegate to prometheus via task.
Create the work plan directly - that's your job as the planning agent.`, Create the work plan directly - that's your job as the planning agent.`,
} }
@@ -955,7 +990,7 @@ Create the work plan directly - that's your job as the planning agent.`,
return { return {
agentToUse: "", agentToUse: "",
categoryModel: undefined, categoryModel: undefined,
error: `Cannot call primary agent "${isPrimaryAgent.name}" via delegate_task. Primary agents are top-level orchestrators.`, error: `Cannot call primary agent "${isPrimaryAgent.name}" via task. Primary agents are top-level orchestrators.`,
} }
} }

View File

@@ -18,6 +18,7 @@ export function parseModelString(model: string): { providerID: string; modelID:
* Get the message directory for a session, checking both direct and nested paths. * Get the message directory for a session, checking both direct and nested paths.
*/ */
export function getMessageDir(sessionID: string): string | null { export function getMessageDir(sessionID: string): string | null {
if (!sessionID.startsWith("ses_")) return null
if (!existsSync(MESSAGE_STORAGE)) return null if (!existsSync(MESSAGE_STORAGE)) return null
const directPath = join(MESSAGE_STORAGE, sessionID) const directPath = join(MESSAGE_STORAGE, sessionID)

View File

@@ -0,0 +1,65 @@
const { describe, test, expect } = require("bun:test")
import { executeBackgroundTask } from "./executor"
import type { DelegateTaskArgs, ToolContextWithMetadata } from "./types"
describe("task tool metadata awaiting", () => {
test("executeBackgroundTask awaits ctx.metadata before returning", async () => {
// given
let metadataResolved = false
const abort = new AbortController()
const ctx: ToolContextWithMetadata = {
sessionID: "ses_parent",
messageID: "msg_parent",
agent: "sisyphus",
abort: abort.signal,
metadata: async () => {
await new Promise<void>((resolve) => setTimeout(resolve, 50))
metadataResolved = true
},
}
const args: DelegateTaskArgs = {
load_skills: [],
description: "Test task",
prompt: "Do something",
run_in_background: true,
subagent_type: "explore",
}
const executorCtx = {
manager: {
launch: async () => ({
id: "task_1",
description: "Test task",
prompt: "Do something",
agent: "explore",
status: "pending",
sessionID: "ses_child",
}),
getTask: () => undefined,
},
} as any
const parentContext = {
sessionID: "ses_parent",
messageID: "msg_parent",
}
// when
const result = await executeBackgroundTask(
args,
ctx,
executorCtx,
parentContext,
"explore",
undefined,
undefined,
)
// then
expect(result).toContain("Background task launched")
expect(metadataResolved).toBe(true)
})
})

View File

@@ -1,4 +1,5 @@
import { describe, test, expect, beforeEach, afterEach, spyOn } from "bun:test" declare const require: (name: string) => any
const { describe, test, expect, beforeEach, afterEach, spyOn } = require("bun:test")
import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants" import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, isPlanAgent, PLAN_AGENT_NAMES } from "./constants"
import { resolveCategoryConfig } from "./tools" import { resolveCategoryConfig } from "./tools"
import type { CategoryConfig } from "../../config/schema" import type { CategoryConfig } from "../../config/schema"
@@ -207,6 +208,66 @@ describe("sisyphus-task", () => {
}) })
describe("category delegation config validation", () => { describe("category delegation config validation", () => {
test("fills subagent_type as sisyphus-junior when category is provided without subagent_type", async () => {
// given
const { createDelegateTask } = require("./tools")
const mockManager = {
launch: async () => ({
id: "task-123",
status: "pending",
description: "Test task",
agent: "sisyphus-junior",
sessionID: "test-session",
}),
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({}) },
provider: { list: async () => ({ data: { connected: ["openai"] } }) },
model: { list: async () => ({ data: [{ provider: "openai", id: "gpt-5.3-codex" }] }) },
session: {
create: async () => ({ data: { id: "test-session" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({ data: [] }),
status: async () => ({ data: {} }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "sisyphus",
abort: new AbortController().signal,
}
const args: {
description: string
prompt: string
category: string
run_in_background: boolean
load_skills: string[]
subagent_type?: string
} = {
description: "Quick category test",
prompt: "Do something",
category: "quick",
run_in_background: true,
load_skills: [],
}
// when
await tool.execute(args, toolContext)
// then
expect(args.subagent_type).toBe("sisyphus-junior")
}, { timeout: 10000 })
test("proceeds without error when systemDefaultModel is undefined", async () => { test("proceeds without error when systemDefaultModel is undefined", async () => {
// given a mock client with no model in config // given a mock client with no model in config
const { createDelegateTask } = require("./tools") const { createDelegateTask } = require("./tools")
@@ -304,6 +365,71 @@ describe("sisyphus-task", () => {
}) })
}) })
describe("background metadata sessionId", () => {
test("should wait for background sessionId and set metadata for TUI toolcall counting", async () => {
//#given - manager.launch returns before sessionID is available
const { createDelegateTask } = require("./tools")
const tasks = new Map<string, { id: string; sessionID?: string; status: string; description: string; agent: string }>()
const mockManager = {
getTask: (id: string) => tasks.get(id),
launch: async () => {
const task = { id: "bg_1", status: "pending", description: "Test task", agent: "explore" }
tasks.set(task.id, task)
setTimeout(() => {
tasks.set(task.id, { ...task, status: "running", sessionID: "ses_child" })
}, 20)
return task
},
}
const mockClient = {
app: { agents: async () => ({ data: [{ name: "explore", mode: "subagent" }] }) },
config: { get: async () => ({}) },
provider: { list: async () => ({ data: { connected: ["openai"] } }) },
model: { list: async () => ({ data: [{ provider: "openai", id: "gpt-5.3-codex" }] }) },
session: {
create: async () => ({ data: { id: "test-session" } }),
prompt: async () => ({ data: {} }),
messages: async () => ({ data: [] }),
status: async () => ({ data: {} }),
},
}
const tool = createDelegateTask({
manager: mockManager,
client: mockClient,
})
const metadataCalls: Array<{ title?: string; metadata?: Record<string, unknown> }> = []
const toolContext = {
sessionID: "parent-session",
messageID: "parent-message",
agent: "sisyphus",
abort: new AbortController().signal,
metadata: (input: { title?: string; metadata?: Record<string, unknown> }) => {
metadataCalls.push(input)
},
}
const args = {
description: "Explore task",
prompt: "Explore features directory deeply",
subagent_type: "explore",
run_in_background: true,
load_skills: [],
}
//#when
const result = await tool.execute(args, toolContext)
//#then - metadata should include sessionId (camelCase) once it's available
expect(String(result)).toContain("Background task launched")
const sessionIdCall = metadataCalls.find((c) => c.metadata?.sessionId === "ses_child")
expect(sessionIdCall).toBeDefined()
})
})
describe("resolveCategoryConfig", () => { describe("resolveCategoryConfig", () => {
test("returns null for unknown category without user config", () => { test("returns null for unknown category without user config", () => {
// given // given
@@ -1894,7 +2020,7 @@ describe("sisyphus-task", () => {
describe("browserProvider propagation", () => { describe("browserProvider propagation", () => {
test("should resolve agent-browser skill when browserProvider is passed", async () => { test("should resolve agent-browser skill when browserProvider is passed", async () => {
// given - delegate_task configured with browserProvider: "agent-browser" // given - task configured with browserProvider: "agent-browser"
const { createDelegateTask } = require("./tools") const { createDelegateTask } = require("./tools")
let promptBody: any let promptBody: any
@@ -1949,7 +2075,7 @@ describe("sisyphus-task", () => {
}, { timeout: 20000 }) }, { timeout: 20000 })
test("should NOT resolve agent-browser skill when browserProvider is not set", async () => { test("should NOT resolve agent-browser skill when browserProvider is not set", async () => {
// given - delegate_task without browserProvider (defaults to playwright) // given - task without browserProvider (defaults to playwright)
const { createDelegateTask } = require("./tools") const { createDelegateTask } = require("./tools")
const mockManager = { launch: async () => ({}) } const mockManager = { launch: async () => ({}) }
@@ -2720,8 +2846,8 @@ describe("sisyphus-task", () => {
}, { timeout: 20000 }) }, { timeout: 20000 })
}) })
describe("prometheus subagent delegate_task permission", () => { describe("prometheus subagent task permission", () => {
test("prometheus subagent should have delegate_task permission enabled", async () => { test("prometheus subagent should have task permission enabled", async () => {
// given - sisyphus delegates to prometheus // given - sisyphus delegates to prometheus
const { createDelegateTask } = require("./tools") const { createDelegateTask } = require("./tools")
let promptBody: any let promptBody: any
@@ -2759,7 +2885,7 @@ describe("sisyphus-task", () => {
// when - sisyphus delegates to prometheus // when - sisyphus delegates to prometheus
await tool.execute( await tool.execute(
{ {
description: "Test prometheus delegate_task permission", description: "Test prometheus task permission",
prompt: "Create a plan", prompt: "Create a plan",
subagent_type: "prometheus", subagent_type: "prometheus",
run_in_background: false, run_in_background: false,
@@ -2768,11 +2894,11 @@ describe("sisyphus-task", () => {
toolContext toolContext
) )
// then - prometheus should have delegate_task permission // then - prometheus should have task permission
expect(promptBody.tools.delegate_task).toBe(true) expect(promptBody.tools.task).toBe(true)
}, { timeout: 20000 }) }, { timeout: 20000 })
test("non-prometheus subagent should NOT have delegate_task permission", async () => { test("non-prometheus subagent should NOT have task permission", async () => {
// given - sisyphus delegates to oracle (non-prometheus) // given - sisyphus delegates to oracle (non-prometheus)
const { createDelegateTask } = require("./tools") const { createDelegateTask } = require("./tools")
let promptBody: any let promptBody: any
@@ -2810,7 +2936,7 @@ describe("sisyphus-task", () => {
// when - sisyphus delegates to oracle // when - sisyphus delegates to oracle
await tool.execute( await tool.execute(
{ {
description: "Test oracle no delegate_task permission", description: "Test oracle no task permission",
prompt: "Consult on architecture", prompt: "Consult on architecture",
subagent_type: "oracle", subagent_type: "oracle",
run_in_background: false, run_in_background: false,
@@ -2819,8 +2945,8 @@ describe("sisyphus-task", () => {
toolContext toolContext
) )
// then - oracle should NOT have delegate_task permission // then - oracle should NOT have task permission
expect(promptBody.tools.delegate_task).toBe(false) expect(promptBody.tools.task).toBe(false)
}, { timeout: 20000 }) }, { timeout: 20000 })
}) })

View File

@@ -86,6 +86,13 @@ Prompts MUST be in English.`
async execute(args: DelegateTaskArgs, toolContext) { async execute(args: DelegateTaskArgs, toolContext) {
const ctx = toolContext as ToolContextWithMetadata const ctx = toolContext as ToolContextWithMetadata
if (args.category && !args.subagent_type) {
args.subagent_type = "sisyphus-junior"
}
await ctx.metadata?.({
title: args.description,
})
if (args.run_in_background === undefined) { if (args.run_in_background === undefined) {
throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`) throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
} }
@@ -116,7 +123,7 @@ Prompts MUST be in English.`
return executeSyncContinuation(args, ctx, options) return executeSyncContinuation(args, ctx, options)
} }
if (args.category && args.subagent_type) { if (args.category && args.subagent_type && args.subagent_type !== "sisyphus-junior") {
return `Invalid arguments: Provide EITHER category OR subagent_type, not both.` return `Invalid arguments: Provide EITHER category OR subagent_type, not both.`
} }
@@ -157,7 +164,7 @@ Prompts MUST be in English.`
const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean
log("[delegate_task] unstable agent detection", { log("[task] unstable agent detection", {
category: args.category, category: args.category,
actualModel, actualModel,
isUnstableAgent, isUnstableAgent,

View File

@@ -28,7 +28,12 @@ export interface ToolContextWithMetadata {
messageID: string messageID: string
agent: string agent: string
abort: AbortSignal abort: AbortSignal
metadata?: (input: { title?: string; metadata?: Record<string, unknown> }) => void metadata?: (input: { title?: string; metadata?: Record<string, unknown> }) => void | Promise<void>
/**
* Tool call ID injected by OpenCode's internal context (not in plugin ToolContext type,
* but present at runtime via spread in fromPlugin()). Used for metadata store keying.
*/
callID?: string
} }
export interface SyncSessionCreatedEvent { export interface SyncSessionCreatedEvent {

View File

@@ -70,7 +70,7 @@ Returns summary format: id, subject, status, owner, blockedBy (not full descript
return JSON.stringify({ return JSON.stringify({
tasks: summaries, tasks: summaries,
reminder: "1 task = 1 delegate_task. Maximize parallel execution by running independent tasks (tasks with empty blockedBy) concurrently." reminder: "1 task = 1 task. Maximize parallel execution by running independent tasks (tasks with empty blockedBy) concurrently."
}) })
}, },
}) })