Merge pull request #1663 from code-yeongyu/fix/revert-load-skills-default

fix: revert load_skills default and enforce via prompts instead
This commit is contained in:
YeonGyu-Kim
2026-02-08 16:36:53 +09:00
committed by GitHub
10 changed files with 51 additions and 62 deletions

View File

@@ -274,13 +274,13 @@ ACCUMULATED WISDOM:
**For exploration (explore/librarian)**: ALWAYS background
\`\`\`typescript
task(subagent_type="explore", run_in_background=true, ...)
task(subagent_type="librarian", run_in_background=true, ...)
task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)
\`\`\`
**For task execution**: NEVER background
\`\`\`typescript
task(category="...", run_in_background=false, ...)
task(category="...", load_skills=[...], run_in_background=false, ...)
\`\`\`
**Parallel task groups**: Invoke multiple in ONE message

View File

@@ -231,12 +231,12 @@ ACCUMULATED WISDOM: [from notepad]
<parallel_execution>
**Exploration (explore/librarian)**: ALWAYS background
\`\`\`typescript
task(subagent_type="explore", run_in_background=true, ...)
task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
\`\`\`
**Task execution**: NEVER background
\`\`\`typescript
task(category="...", run_in_background=false, ...)
task(category="...", load_skills=[...], run_in_background=false, ...)
\`\`\`
**Parallel task groups**: Invoke multiple in ONE message

View File

@@ -17,6 +17,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
while (true) {
const result = task(
subagent_type="momus",
load_skills=[],
prompt=".sisyphus/plans/{name}.md",
run_in_background=false
)

View File

@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
**Research First:**
\`\`\`typescript
// Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
\`\`\`
**Interview Focus:**
@@ -91,9 +91,9 @@ task(subagent_type="explore", prompt="I'm about to modify [affected code] and ne
\`\`\`typescript
// Launch BEFORE asking user questions
// Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
\`\`\`
**Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js
Run this check:
\`\`\`typescript
task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
\`\`\`
#### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:
**Research First:**
\`\`\`typescript
task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
\`\`\`
**Oracle Consultation** (recommend when stakes are high):
\`\`\`typescript
task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false)
\`\`\`
**Interview Focus:**
@@ -253,9 +253,9 @@ task(subagent_type="oracle", prompt="Architecture consultation needed: [context]
**Parallel Investigation:**
\`\`\`typescript
task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
\`\`\`
**Interview Focus:**
@@ -281,17 +281,17 @@ task(subagent_type="librarian", prompt="I'm looking for battle-tested implementa
**For Understanding Codebase:**
\`\`\`typescript
task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
\`\`\`
**For External Knowledge:**
\`\`\`typescript
task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
\`\`\`
**For Implementation Examples:**
\`\`\`typescript
task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
\`\`\`
## Interview Mode Anti-Patterns

View File

@@ -61,6 +61,7 @@ todoWrite([
\`\`\`typescript
task(
subagent_type="metis",
load_skills=[],
prompt=\`Review this planning session before I generate the work plan:
**User's Goal**: {summarize what user wants}

View File

@@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
| Architecture decision needed | MUST call plan agent |
\`\`\`
task(subagent_type="plan", prompt="<gathered context + user request>")
task(subagent_type="plan", load_skills=[], prompt="<gathered context + user request>")
\`\`\`
**WHY PLAN AGENT IS MANDATORY:**
@@ -119,9 +119,9 @@ task(subagent_type="plan", prompt="<gathered context + user request>")
| Scenario | Action |
|----------|--------|
| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="<your answer>")\` |
| Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` |
| Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` |
| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", load_skills=[], prompt="<your answer>")\` |
| Need to refine the plan | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Please adjust: <feedback>")\` |
| Plan needs more detail | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Add more detail to Task N")\` |
**WHY SESSION_ID IS CRITICAL:**
- Plan agent retains FULL conversation context
@@ -131,10 +131,10 @@ task(subagent_type="plan", prompt="<gathered context + user request>")
\`\`\`
// WRONG: Starting fresh loses all context
task(subagent_type="plan", prompt="Here's more info...")
task(subagent_type="plan", load_skills=[], prompt="Here's more info...")
// CORRECT: Resume preserves everything
task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
task(session_id="ses_abc123", load_skills=[], prompt="Here's my answer to your question: ...")
\`\`\`
**FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
@@ -147,10 +147,10 @@ task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
| Task Type | Action | Why |
|-----------|--------|-----|
| Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
| Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
| Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list |
| Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic |
| Codebase exploration | task(subagent_type="explore", load_skills=[], run_in_background=true) | Parallel, context-efficient |
| Documentation lookup | task(subagent_type="librarian", load_skills=[], run_in_background=true) | Specialized knowledge |
| Planning | task(subagent_type="plan", load_skills=[]) | Parallel task graph + structured TODO list |
| Hard problem (conventional) | task(subagent_type="oracle", load_skills=[]) | Architecture, debugging, complex logic |
| Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed |
| Implementation | task(category="...", load_skills=[...]) | Domain-optimized models |

View File

@@ -73,10 +73,10 @@ Use these when they provide clear value based on the decision framework above:
| Resource | When to Use | How to Use |
|----------|-------------|------------|
| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` |
| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` |
| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` |
| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` |
| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", load_skills=[], run_in_background=true, ...)\` |
| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)\` |
| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", load_skills=[], ...)\` |
| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", load_skills=[], ...)\` |
| task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` |
<tool_usage_rules>

View File

@@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans.
### Research Protocol
1. **Fire parallel background agents** for comprehensive context:
\`\`\`
task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true)
task(agent="explore", prompt="Find test infrastructure and conventions", background=true)
task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true)
task(subagent_type="explore", load_skills=[], prompt="Find existing patterns for [topic] in codebase", run_in_background=true)
task(subagent_type="explore", load_skills=[], prompt="Find test infrastructure and conventions", run_in_background=true)
task(subagent_type="librarian", load_skills=[], prompt="Find official docs and best practices for [technology]", run_in_background=true)
\`\`\`
2. **Wait for results** before planning - rushed plans fail
3. **Synthesize findings** into informed requirements

View File

@@ -849,30 +849,19 @@ describe("sisyphus-task", () => {
})
describe("skills parameter", () => {
test("load_skills defaults to empty array when not provided (undefined)", async () => {
test("skills parameter is required - throws error when not provided", async () => {
// given
const { createDelegateTask } = require("./tools")
let promptBody: any
const mockManager = { launch: async () => ({}) }
const promptMock = async (input: any) => {
promptBody = input.body
return { data: {} }
}
const mockClient = {
app: { agents: async () => ({ data: [] }) },
config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
session: {
get: async () => ({ data: { directory: "/project" } }),
create: async () => ({ data: { id: "ses_default_skills" } }),
prompt: promptMock,
promptAsync: promptMock,
messages: async () => ({
data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }]
}),
status: async () => ({ data: {} }),
create: async () => ({ data: { id: "test-session" } }),
prompt: async () => ({ data: {} }),
promptAsync: async () => ({ data: {} }),
messages: async () => ({ data: [] }),
},
}
@@ -888,8 +877,9 @@ describe("sisyphus-task", () => {
abort: new AbortController().signal,
}
// when - load_skills not provided (undefined) - should default to []
await tool.execute(
// when - skills not provided (undefined)
// then - should throw error about missing skills
await expect(tool.execute(
{
description: "Test task",
prompt: "Do something",
@@ -897,11 +887,8 @@ describe("sisyphus-task", () => {
run_in_background: false,
},
toolContext
)
// then - should proceed without error, prompt should be called
expect(promptBody).toBeDefined()
}, { timeout: 20000 })
)).rejects.toThrow("IT IS HIGHLY RECOMMENDED")
})
test("null skills throws error", async () => {
// given

View File

@@ -74,7 +74,7 @@ Prompts MUST be in English.`
return tool({
description,
args: {
load_skills: tool.schema.array(tool.schema.string()).default([]).describe("Skill names to inject. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."),
load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."),
description: tool.schema.string().describe("Short task description (3-5 words)"),
prompt: tool.schema.string().describe("Full detailed prompt for the agent"),
run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"),
@@ -97,7 +97,7 @@ Prompts MUST be in English.`
throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
}
if (args.load_skills === undefined) {
args.load_skills = []
throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`)
}
if (args.load_skills === null) {
throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`)