diff --git a/src/agents/atlas/default.ts b/src/agents/atlas/default.ts index dfe5cf5f0..2568d5111 100644 --- a/src/agents/atlas/default.ts +++ b/src/agents/atlas/default.ts @@ -274,13 +274,13 @@ ACCUMULATED WISDOM: **For exploration (explore/librarian)**: ALWAYS background \`\`\`typescript -task(subagent_type="explore", run_in_background=true, ...) -task(subagent_type="librarian", run_in_background=true, ...) +task(subagent_type="explore", load_skills=[], run_in_background=true, ...) +task(subagent_type="librarian", load_skills=[], run_in_background=true, ...) \`\`\` **For task execution**: NEVER background \`\`\`typescript -task(category="...", run_in_background=false, ...) +task(category="...", load_skills=[...], run_in_background=false, ...) \`\`\` **Parallel task groups**: Invoke multiple in ONE message diff --git a/src/agents/atlas/gpt.ts b/src/agents/atlas/gpt.ts index d7d20fd70..d81620e69 100644 --- a/src/agents/atlas/gpt.ts +++ b/src/agents/atlas/gpt.ts @@ -231,12 +231,12 @@ ACCUMULATED WISDOM: [from notepad] **Exploration (explore/librarian)**: ALWAYS background \`\`\`typescript -task(subagent_type="explore", run_in_background=true, ...) +task(subagent_type="explore", load_skills=[], run_in_background=true, ...) \`\`\` **Task execution**: NEVER background \`\`\`typescript -task(category="...", run_in_background=false, ...) +task(category="...", load_skills=[...], run_in_background=false, ...) \`\`\` **Parallel task groups**: Invoke multiple in ONE message diff --git a/src/agents/prometheus/high-accuracy-mode.ts b/src/agents/prometheus/high-accuracy-mode.ts index d6ecc821f..5eca99a86 100644 --- a/src/agents/prometheus/high-accuracy-mode.ts +++ b/src/agents/prometheus/high-accuracy-mode.ts @@ -17,6 +17,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION while (true) { const result = task( subagent_type="momus", + load_skills=[], prompt=".sisyphus/plans/{name}.md", run_in_background=false ) diff --git a/src/agents/prometheus/interview-mode.ts b/src/agents/prometheus/interview-mode.ts index 8692fd2f2..5d445f0cb 100644 --- a/src/agents/prometheus/interview-mode.ts +++ b/src/agents/prometheus/interview-mode.ts @@ -66,8 +66,8 @@ Or should I just note down this single fix?" **Research First:** \`\`\`typescript // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find) -task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) -task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true) \`\`\` **Interview Focus:** @@ -91,9 +91,9 @@ task(subagent_type="explore", prompt="I'm about to modify [affected code] and ne \`\`\`typescript // Launch BEFORE asking user questions // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST -task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) -task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true) \`\`\` **Interview Focus** (AFTER research): @@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js Run this check: \`\`\`typescript -task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true) \`\`\` #### Step 2: Ask the Test Question (MANDATORY) @@ -230,13 +230,13 @@ Add to draft immediately: **Research First:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true) \`\`\` **Oracle Consultation** (recommend when stakes are high): \`\`\`typescript -task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false) +task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false) \`\`\` **Interview Focus:** @@ -253,9 +253,9 @@ task(subagent_type="oracle", prompt="Architecture consultation needed: [context] **Parallel Investigation:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) -task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true) \`\`\` **Interview Focus:** @@ -281,17 +281,17 @@ task(subagent_type="librarian", prompt="I'm looking for battle-tested implementa **For Understanding Codebase:** \`\`\`typescript -task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) +task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true) \`\`\` **For External Knowledge:** \`\`\`typescript -task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true) \`\`\` **For Implementation Examples:** \`\`\`typescript -task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) +task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true) \`\`\` ## Interview Mode Anti-Patterns diff --git a/src/agents/prometheus/plan-generation.ts b/src/agents/prometheus/plan-generation.ts index 3443d6888..f5c1270e8 100644 --- a/src/agents/prometheus/plan-generation.ts +++ b/src/agents/prometheus/plan-generation.ts @@ -61,6 +61,7 @@ todoWrite([ \`\`\`typescript task( subagent_type="metis", + load_skills=[], prompt=\`Review this planning session before I generate the work plan: **User's Goal**: {summarize what user wants} diff --git a/src/hooks/keyword-detector/ultrawork/default.ts b/src/hooks/keyword-detector/ultrawork/default.ts index fb7fce31d..93ddc648a 100644 --- a/src/hooks/keyword-detector/ultrawork/default.ts +++ b/src/hooks/keyword-detector/ultrawork/default.ts @@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST. | Architecture decision needed | MUST call plan agent | \`\`\` -task(subagent_type="plan", prompt="") +task(subagent_type="plan", load_skills=[], prompt="") \`\`\` **WHY PLAN AGENT IS MANDATORY:** @@ -119,9 +119,9 @@ task(subagent_type="plan", prompt="") | Scenario | Action | |----------|--------| -| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="")\` | -| Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: ")\` | -| Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` | +| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", load_skills=[], prompt="")\` | +| Need to refine the plan | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Please adjust: ")\` | +| Plan needs more detail | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Add more detail to Task N")\` | **WHY SESSION_ID IS CRITICAL:** - Plan agent retains FULL conversation context @@ -131,10 +131,10 @@ task(subagent_type="plan", prompt="") \`\`\` // WRONG: Starting fresh loses all context -task(subagent_type="plan", prompt="Here's more info...") +task(subagent_type="plan", load_skills=[], prompt="Here's more info...") // CORRECT: Resume preserves everything -task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") +task(session_id="ses_abc123", load_skills=[], prompt="Here's my answer to your question: ...") \`\`\` **FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.** @@ -147,10 +147,10 @@ task(session_id="ses_abc123", prompt="Here's my answer to your question: ...") | Task Type | Action | Why | |-----------|--------|-----| -| Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient | -| Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge | -| Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list | -| Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic | +| Codebase exploration | task(subagent_type="explore", load_skills=[], run_in_background=true) | Parallel, context-efficient | +| Documentation lookup | task(subagent_type="librarian", load_skills=[], run_in_background=true) | Specialized knowledge | +| Planning | task(subagent_type="plan", load_skills=[]) | Parallel task graph + structured TODO list | +| Hard problem (conventional) | task(subagent_type="oracle", load_skills=[]) | Architecture, debugging, complex logic | | Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed | | Implementation | task(category="...", load_skills=[...]) | Domain-optimized models | diff --git a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts index 9309f4294..a9258e0de 100644 --- a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts +++ b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts @@ -73,10 +73,10 @@ Use these when they provide clear value based on the decision framework above: | Resource | When to Use | How to Use | |----------|-------------|------------| -| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` | -| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` | -| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` | -| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` | +| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", load_skills=[], run_in_background=true, ...)\` | +| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)\` | +| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", load_skills=[], ...)\` | +| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", load_skills=[], ...)\` | | task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` | diff --git a/src/hooks/keyword-detector/ultrawork/planner.ts b/src/hooks/keyword-detector/ultrawork/planner.ts index 426926f48..e152221f4 100644 --- a/src/hooks/keyword-detector/ultrawork/planner.ts +++ b/src/hooks/keyword-detector/ultrawork/planner.ts @@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans. ### Research Protocol 1. **Fire parallel background agents** for comprehensive context: \`\`\` - task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true) - task(agent="explore", prompt="Find test infrastructure and conventions", background=true) - task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true) + task(subagent_type="explore", load_skills=[], prompt="Find existing patterns for [topic] in codebase", run_in_background=true) + task(subagent_type="explore", load_skills=[], prompt="Find test infrastructure and conventions", run_in_background=true) + task(subagent_type="librarian", load_skills=[], prompt="Find official docs and best practices for [technology]", run_in_background=true) \`\`\` 2. **Wait for results** before planning - rushed plans fail 3. **Synthesize findings** into informed requirements diff --git a/src/tools/delegate-task/tools.test.ts b/src/tools/delegate-task/tools.test.ts index 4e45a7981..9fb4224bb 100644 --- a/src/tools/delegate-task/tools.test.ts +++ b/src/tools/delegate-task/tools.test.ts @@ -849,30 +849,19 @@ describe("sisyphus-task", () => { }) describe("skills parameter", () => { - test("load_skills defaults to empty array when not provided (undefined)", async () => { + test("skills parameter is required - throws error when not provided", async () => { // given const { createDelegateTask } = require("./tools") - let promptBody: any const mockManager = { launch: async () => ({}) } - - const promptMock = async (input: any) => { - promptBody = input.body - return { data: {} } - } - const mockClient = { app: { agents: async () => ({ data: [] }) }, config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) }, session: { - get: async () => ({ data: { directory: "/project" } }), - create: async () => ({ data: { id: "ses_default_skills" } }), - prompt: promptMock, - promptAsync: promptMock, - messages: async () => ({ - data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }] - }), - status: async () => ({ data: {} }), + create: async () => ({ data: { id: "test-session" } }), + prompt: async () => ({ data: {} }), + promptAsync: async () => ({ data: {} }), + messages: async () => ({ data: [] }), }, } @@ -888,8 +877,9 @@ describe("sisyphus-task", () => { abort: new AbortController().signal, } - // when - load_skills not provided (undefined) - should default to [] - await tool.execute( + // when - skills not provided (undefined) + // then - should throw error about missing skills + await expect(tool.execute( { description: "Test task", prompt: "Do something", @@ -897,11 +887,8 @@ describe("sisyphus-task", () => { run_in_background: false, }, toolContext - ) - - // then - should proceed without error, prompt should be called - expect(promptBody).toBeDefined() - }, { timeout: 20000 }) + )).rejects.toThrow("IT IS HIGHLY RECOMMENDED") + }) test("null skills throws error", async () => { // given diff --git a/src/tools/delegate-task/tools.ts b/src/tools/delegate-task/tools.ts index 582eb11fd..1db72408c 100644 --- a/src/tools/delegate-task/tools.ts +++ b/src/tools/delegate-task/tools.ts @@ -74,7 +74,7 @@ Prompts MUST be in English.` return tool({ description, args: { - load_skills: tool.schema.array(tool.schema.string()).default([]).describe("Skill names to inject. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."), + load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."), description: tool.schema.string().describe("Short task description (3-5 words)"), prompt: tool.schema.string().describe("Full detailed prompt for the agent"), run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"), @@ -97,7 +97,7 @@ Prompts MUST be in English.` throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`) } if (args.load_skills === undefined) { - args.load_skills = [] + throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`) } if (args.load_skills === null) { throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`)