Merge pull request #1663 from code-yeongyu/fix/revert-load-skills-default

fix: revert load_skills default and enforce via prompts instead
2026-02-08 16:36:53 +09:00
parent 0743855b40 582e0ead27
commit cd5485a472
10 changed files with 51 additions and 62 deletions
--- a/src/agents/atlas/default.ts
+++ b/src/agents/atlas/default.ts
@@ -274,13 +274,13 @@ ACCUMULATED WISDOM:

 **For exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-task(subagent_type="explore", run_in_background=true, ...)
-task(subagent_type="librarian", run_in_background=true, ...)
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
+task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)
 \`\`\`

 **For task execution**: NEVER background
 \`\`\`typescript
-task(category="...", run_in_background=false, ...)
+task(category="...", load_skills=[...], run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
--- a/src/agents/atlas/gpt.ts
+++ b/src/agents/atlas/gpt.ts
@@ -231,12 +231,12 @@ ACCUMULATED WISDOM: [from notepad]
 <parallel_execution>
 **Exploration (explore/librarian)**: ALWAYS background
 \`\`\`typescript
-task(subagent_type="explore", run_in_background=true, ...)
+task(subagent_type="explore", load_skills=[], run_in_background=true, ...)
 \`\`\`

 **Task execution**: NEVER background
 \`\`\`typescript
-task(category="...", run_in_background=false, ...)
+task(category="...", load_skills=[...], run_in_background=false, ...)
 \`\`\`

 **Parallel task groups**: Invoke multiple in ONE message
--- a/src/agents/prometheus/high-accuracy-mode.ts
+++ b/src/agents/prometheus/high-accuracy-mode.ts
@@ -17,6 +17,7 @@ export const PROMETHEUS_HIGH_ACCURACY_MODE = `# PHASE 3: PLAN GENERATION
 while (true) {
  const result = task(
    subagent_type="momus",
+    load_skills=[],
    prompt=".sisyphus/plans/{name}.md",
    run_in_background=false
  )
--- a/src/agents/prometheus/interview-mode.ts
+++ b/src/agents/prometheus/interview-mode.ts
@@ -66,8 +66,8 @@ Or should I just note down this single fix?"
 **Research First:**
 \`\`\`typescript
 // Prompt structure: CONTEXT (what I'm doing) + GOAL (what I'm trying to achieve) + QUESTION (what I need to know) + REQUEST (what to find)
-task(subagent_type="explore", prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
-task(subagent_type="explore", prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm refactoring [target] and need to understand its impact scope before making changes. Find all usages via lsp_find_references - show calling code, patterns of use, and potential breaking points.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm about to modify [affected code] and need to ensure behavior preservation. Find existing test coverage - which tests exercise this code, what assertions exist, and any gaps in coverage.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -91,9 +91,9 @@ task(subagent_type="explore", prompt="I'm about to modify [affected code] and ne
 \`\`\`typescript
 // Launch BEFORE asking user questions
 // Prompt structure: CONTEXT + GOAL + QUESTION + REQUEST
-task(subagent_type="explore", prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
-task(subagent_type="explore", prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
-task(subagent_type="librarian", prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm building a new [feature] and want to maintain codebase consistency. Find similar implementations in this project - their structure, patterns used, and conventions to follow.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm adding [feature type] to the project and need to understand existing conventions. Find how similar features are organized - file structure, naming patterns, and architectural approach.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [technology] and want to follow established best practices. Find official documentation and community recommendations - setup patterns, common pitfalls, and production-ready examples.", run_in_background=true)
 \`\`\`

 **Interview Focus** (AFTER research):
@@ -132,7 +132,7 @@ Based on your stack, I'd recommend NextAuth.js - it integrates well with Next.js

 Run this check:
 \`\`\`typescript
-task(subagent_type="explore", prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm assessing this project's test setup before planning work that may require TDD. I need to understand what testing capabilities exist. Find test infrastructure: package.json test scripts, config files (jest.config, vitest.config, pytest.ini), and existing test files. Report: 1) Does test infra exist? 2) What framework? 3) Example test patterns.", run_in_background=true)
 \`\`\`

 #### Step 2: Ask the Test Question (MANDATORY)
@@ -230,13 +230,13 @@ Add to draft immediately:

 **Research First:**
 \`\`\`typescript
-task(subagent_type="explore", prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
-task(subagent_type="librarian", prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm planning architectural changes and need to understand the current system design. Find existing architecture: module boundaries, dependency patterns, data flow, and key abstractions used.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm designing architecture for [domain] and want to make informed decisions. Find architectural best practices - proven patterns, trade-offs, and lessons learned from similar systems.", run_in_background=true)
 \`\`\`

 **Oracle Consultation** (recommend when stakes are high):
 \`\`\`typescript
-task(subagent_type="oracle", prompt="Architecture consultation needed: [context]...", run_in_background=false)
+task(subagent_type="oracle", load_skills=[], prompt="Architecture consultation needed: [context]...", run_in_background=false)
 \`\`\`

 **Interview Focus:**
@@ -253,9 +253,9 @@ task(subagent_type="oracle", prompt="Architecture consultation needed: [context]

 **Parallel Investigation:**
 \`\`\`typescript
-task(subagent_type="explore", prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
-task(subagent_type="librarian", prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
-task(subagent_type="librarian", prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm researching how to implement [feature] and need to understand current approach. Find how X is currently handled in this codebase - implementation details, edge cases covered, and any known limitations.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing Y and need authoritative guidance. Find official documentation - API reference, configuration options, and recommended usage patterns.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm looking for battle-tested implementations of Z. Find open source projects that solve this - focus on production-quality code, how they handle edge cases, and any gotchas documented.", run_in_background=true)
 \`\`\`

 **Interview Focus:**
@@ -281,17 +281,17 @@ task(subagent_type="librarian", prompt="I'm looking for battle-tested implementa

 **For Understanding Codebase:**
 \`\`\`typescript
-task(subagent_type="explore", prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
+task(subagent_type="explore", load_skills=[], prompt="I'm working on [topic] and need to understand how it's organized in this project. Find all related files - show the structure, patterns used, and conventions I should follow.", run_in_background=true)
 \`\`\`

 **For External Knowledge:**
 \`\`\`typescript
-task(subagent_type="librarian", prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm integrating [library] and need to understand [specific feature]. Find official documentation - API details, configuration options, and recommended best practices.", run_in_background=true)
 \`\`\`

 **For Implementation Examples:**
 \`\`\`typescript
-task(subagent_type="librarian", prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
+task(subagent_type="librarian", load_skills=[], prompt="I'm implementing [feature] and want to learn from existing solutions. Find open source implementations - focus on production-quality code, architecture decisions, and common patterns.", run_in_background=true)
 \`\`\`

 ## Interview Mode Anti-Patterns
--- a/src/agents/prometheus/plan-generation.ts
+++ b/src/agents/prometheus/plan-generation.ts
@@ -61,6 +61,7 @@ todoWrite([
 \`\`\`typescript
 task(
  subagent_type="metis",
+  load_skills=[],
  prompt=\`Review this planning session before I generate the work plan:

  **User's Goal**: {summarize what user wants}
--- a/src/hooks/keyword-detector/ultrawork/default.ts
+++ b/src/hooks/keyword-detector/ultrawork/default.ts
@@ -104,7 +104,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
 | Architecture decision needed | MUST call plan agent |

 \`\`\`
-task(subagent_type="plan", prompt="<gathered context + user request>")
+task(subagent_type="plan", load_skills=[], prompt="<gathered context + user request>")
 \`\`\`

 **WHY PLAN AGENT IS MANDATORY:**
@@ -119,9 +119,9 @@ task(subagent_type="plan", prompt="<gathered context + user request>")

 | Scenario | Action |
 |----------|--------|
-| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", prompt="<your answer>")\` |
-| Need to refine the plan | \`task(session_id="{returned_session_id}", prompt="Please adjust: <feedback>")\` |
-| Plan needs more detail | \`task(session_id="{returned_session_id}", prompt="Add more detail to Task N")\` |
+| Plan agent asks clarifying questions | \`task(session_id="{returned_session_id}", load_skills=[], prompt="<your answer>")\` |
+| Need to refine the plan | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Please adjust: <feedback>")\` |
+| Plan needs more detail | \`task(session_id="{returned_session_id}", load_skills=[], prompt="Add more detail to Task N")\` |

 **WHY SESSION_ID IS CRITICAL:**
 - Plan agent retains FULL conversation context
@@ -131,10 +131,10 @@ task(subagent_type="plan", prompt="<gathered context + user request>")

 \`\`\`
 // WRONG: Starting fresh loses all context
-task(subagent_type="plan", prompt="Here's more info...")
+task(subagent_type="plan", load_skills=[], prompt="Here's more info...")

 // CORRECT: Resume preserves everything
-task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")
+task(session_id="ses_abc123", load_skills=[], prompt="Here's my answer to your question: ...")
 \`\`\`

 **FAILURE TO CALL PLAN AGENT = INCOMPLETE WORK.**
@@ -147,10 +147,10 @@ task(session_id="ses_abc123", prompt="Here's my answer to your question: ...")

 | Task Type | Action | Why |
 |-----------|--------|-----|
-| Codebase exploration | task(subagent_type="explore", run_in_background=true) | Parallel, context-efficient |
-| Documentation lookup | task(subagent_type="librarian", run_in_background=true) | Specialized knowledge |
-| Planning | task(subagent_type="plan") | Parallel task graph + structured TODO list |
-| Hard problem (conventional) | task(subagent_type="oracle") | Architecture, debugging, complex logic |
+| Codebase exploration | task(subagent_type="explore", load_skills=[], run_in_background=true) | Parallel, context-efficient |
+| Documentation lookup | task(subagent_type="librarian", load_skills=[], run_in_background=true) | Specialized knowledge |
+| Planning | task(subagent_type="plan", load_skills=[]) | Parallel task graph + structured TODO list |
+| Hard problem (conventional) | task(subagent_type="oracle", load_skills=[]) | Architecture, debugging, complex logic |
 | Hard problem (non-conventional) | task(category="artistry", load_skills=[...]) | Different approach needed |
 | Implementation | task(category="...", load_skills=[...]) | Domain-optimized models |

--- a/src/hooks/keyword-detector/ultrawork/gpt5.2.ts
+++ b/src/hooks/keyword-detector/ultrawork/gpt5.2.ts
@@ -73,10 +73,10 @@ Use these when they provide clear value based on the decision framework above:

 | Resource | When to Use | How to Use |
 |----------|-------------|------------|
-| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", run_in_background=true, ...)\` |
-| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", run_in_background=true, ...)\` |
-| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", ...)\` |
-| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", ...)\` |
+| explore agent | Need codebase patterns you don't have | \`task(subagent_type="explore", load_skills=[], run_in_background=true, ...)\` |
+| librarian agent | External library docs, OSS examples | \`task(subagent_type="librarian", load_skills=[], run_in_background=true, ...)\` |
+| oracle agent | Stuck on architecture/debugging after 2+ attempts | \`task(subagent_type="oracle", load_skills=[], ...)\` |
+| plan agent | Complex multi-step with dependencies (5+ steps) | \`task(subagent_type="plan", load_skills=[], ...)\` |
 | task category | Specialized work matching a category | \`task(category="...", load_skills=[...])\` |

 <tool_usage_rules>
--- a/src/hooks/keyword-detector/ultrawork/planner.ts
+++ b/src/hooks/keyword-detector/ultrawork/planner.ts
@@ -38,9 +38,9 @@ You ARE the planner. Your job: create bulletproof work plans.
 ### Research Protocol
 1. **Fire parallel background agents** for comprehensive context:
   \`\`\`
-   task(agent="explore", prompt="Find existing patterns for [topic] in codebase", background=true)
-   task(agent="explore", prompt="Find test infrastructure and conventions", background=true)
-   task(agent="librarian", prompt="Find official docs and best practices for [technology]", background=true)
+   task(subagent_type="explore", load_skills=[], prompt="Find existing patterns for [topic] in codebase", run_in_background=true)
+   task(subagent_type="explore", load_skills=[], prompt="Find test infrastructure and conventions", run_in_background=true)
+   task(subagent_type="librarian", load_skills=[], prompt="Find official docs and best practices for [technology]", run_in_background=true)
   \`\`\`
 2. **Wait for results** before planning - rushed plans fail
 3. **Synthesize findings** into informed requirements
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -849,30 +849,19 @@ describe("sisyphus-task", () => {
  })

  describe("skills parameter", () => {
-    test("load_skills defaults to empty array when not provided (undefined)", async () => {
+    test("skills parameter is required - throws error when not provided", async () => {
      // given
      const { createDelegateTask } = require("./tools")
-      let promptBody: any

      const mockManager = { launch: async () => ({}) }
-
-      const promptMock = async (input: any) => {
-        promptBody = input.body
-        return { data: {} }
-      }
-
      const mockClient = {
        app: { agents: async () => ({ data: [] }) },
        config: { get: async () => ({ data: { model: SYSTEM_DEFAULT_MODEL } }) },
        session: {
-          get: async () => ({ data: { directory: "/project" } }),
-          create: async () => ({ data: { id: "ses_default_skills" } }),
-          prompt: promptMock,
-          promptAsync: promptMock,
-          messages: async () => ({
-            data: [{ info: { role: "assistant" }, parts: [{ type: "text", text: "Done" }] }]
-          }),
-          status: async () => ({ data: {} }),
+          create: async () => ({ data: { id: "test-session" } }),
+          prompt: async () => ({ data: {} }),
+          promptAsync: async () => ({ data: {} }),
+          messages: async () => ({ data: [] }),
        },
      }

@@ -888,8 +877,9 @@ describe("sisyphus-task", () => {
        abort: new AbortController().signal,
      }

-      // when - load_skills not provided (undefined) - should default to []
-      await tool.execute(
+      // when - skills not provided (undefined)
+      // then - should throw error about missing skills
+      await expect(tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
@@ -897,11 +887,8 @@ describe("sisyphus-task", () => {
          run_in_background: false,
        },
        toolContext
-      )
-
-      // then - should proceed without error, prompt should be called
-      expect(promptBody).toBeDefined()
-    }, { timeout: 20000 })
+      )).rejects.toThrow("IT IS HIGHLY RECOMMENDED")
+    })

     test("null skills throws error", async () => {
       // given
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -74,7 +74,7 @@ Prompts MUST be in English.`
  return tool({
    description,
    args: {
-      load_skills: tool.schema.array(tool.schema.string()).default([]).describe("Skill names to inject. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."),
+      load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."),
      description: tool.schema.string().describe("Short task description (3-5 words)"),
      prompt: tool.schema.string().describe("Full detailed prompt for the agent"),
      run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"),
@@ -97,7 +97,7 @@ Prompts MUST be in English.`
        throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
      }
      if (args.load_skills === undefined) {
-        args.load_skills = []
+        throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`)
      }
      if (args.load_skills === null) {
        throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`)