refactor(delegate-task): enhance delegation with dynamic descriptions

Generate tool description dynamically from available categories and skills. Remove hardcoded DELEGATE_TASK_DESCRIPTION constant. Improve parameter handling with unified 'subagent_type' field replacing 'agent'. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-22 22:46:23 +09:00
parent 0edfc7f36a
commit aa2b052d28
4 changed files with 281 additions and 184 deletions
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@@ -185,21 +185,4 @@ export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
  writing: "Documentation, prose, technical writing",
 }
 const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")
 export const DELEGATE_TASK_DESCRIPTION = `Spawn agent task with category-based or direct agent selection.
 MUTUALLY EXCLUSIVE: Provide EITHER category OR agent, not both (unless resuming).
 - category: Use predefined category (${BUILTIN_CATEGORIES}) → Spawns Sisyphus-Junior with category config
 - agent: Use specific agent directly (e.g., "oracle", "explore")
 - background: true=async (returns task_id), false=sync (waits for result). Default: false. Use background=true ONLY for parallel exploration with 5+ independent queries.
 - resume: Session ID to resume (from previous task output). Continues agent with FULL CONTEXT PRESERVED - saves tokens, maintains continuity.
 - skills: Array of skill names to prepend to prompt (e.g., ["playwright", "frontend-ui-ux"]). Use [] (empty array) if no skills needed.
 **WHEN TO USE resume:**
 - Task failed/incomplete → resume with "fix: [specific issue]"
 - Need follow-up on previous result → resume with additional question
 - Multi-turn conversation with same agent → always resume instead of new task
 Prompts MUST be in English.`
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -1,5 +1,5 @@
 import { describe, test, expect } from "bun:test"
-import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, DELEGATE_TASK_DESCRIPTION } from "./constants"
+import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS } from "./constants"
 import { resolveCategoryConfig } from "./tools"
 import type { CategoryConfig } from "../../config/schema"
@@ -70,19 +70,6 @@ describe("sisyphus-task", () => {
    })
  })
  describe("DELEGATE_TASK_DESCRIPTION", () => {
    test("documents background parameter as required with default false", () => {
      // #given / #when / #then
      expect(DELEGATE_TASK_DESCRIPTION).toContain("background")
      expect(DELEGATE_TASK_DESCRIPTION).toContain("Default: false")
    })
    test("warns about parallel exploration usage", () => {
      // #given / #when / #then
      expect(DELEGATE_TASK_DESCRIPTION).toContain("5+")
    })
  })
  describe("category delegation config validation", () => {
    test("returns error when systemDefaultModel is not configured", async () => {
      // #given a mock client with no model in config
@@ -118,7 +105,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -333,7 +320,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: true,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -394,7 +381,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "unspecified-high",
          run_in_background: true,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -451,7 +438,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "unspecified-high",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -466,14 +453,7 @@ describe("sisyphus-task", () => {
  })
  describe("skills parameter", () => {
-    test("DELEGATE_TASK_DESCRIPTION documents skills parameter with empty array option", () => {
+    test("skills parameter is required - throws error when not provided", async () => {
      // #given / #when / #then
      expect(DELEGATE_TASK_DESCRIPTION).toContain("skills")
      expect(DELEGATE_TASK_DESCRIPTION).toContain("Array of skill names")
      expect(DELEGATE_TASK_DESCRIPTION).toContain("[] (empty array) if no skills needed")
    })
    test("skills parameter is required - returns error when not provided", async () => {
      // #given
      const { createDelegateTask } = require("./tools")
@@ -501,7 +481,8 @@ describe("sisyphus-task", () => {
      }
      // #when - skills not provided (undefined)
-      const result = await tool.execute(
+      // #then - should throw error about missing skills
      await expect(tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
@@ -509,14 +490,10 @@ describe("sisyphus-task", () => {
          run_in_background: false,
        },
        toolContext
-      )
+      )).rejects.toThrow("IT IS HIGHLY RECOMMENDED")
      // #then - should return error about missing skills
      expect(result).toContain("skills")
      expect(result).toContain("REQUIRED")
    })
-    test("null skills returns error", async () => {
+    test("null skills throws error", async () => {
      // #given
      const { createDelegateTask } = require("./tools")
@@ -544,22 +521,17 @@ describe("sisyphus-task", () => {
      }
      // #when - null passed
-      const result = await tool.execute(
+      // #then - should throw error about null
      await expect(tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: null,
+          load_skills: null,
        },
        toolContext
-      )
+      )).rejects.toThrow("IT IS HIGHLY RECOMMENDED")
      // #then - should return error about null
      expect(result).toContain("Invalid arguments")
      expect(result).toContain("skills=null")
      expect(result).toContain("not allowed")
      expect(result).toContain("skills=[]")
    })
    test("empty array [] is allowed and proceeds without skill content", async () => {
@@ -597,14 +569,14 @@ describe("sisyphus-task", () => {
        abort: new AbortController().signal,
      }
-      // #when - empty array skills passed
+      // #when - empty array passed
      await tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: [],
        },
        toolContext
      )
@@ -670,7 +642,7 @@ describe("sisyphus-task", () => {
        prompt: "Continue the task",
        resume: "ses_resume_test",
        run_in_background: false,
-        skills: [],
+        load_skills: ["git-master"],
      },
      toolContext
    )
@@ -725,7 +697,7 @@ describe("sisyphus-task", () => {
        prompt: "Continue in background",
        resume: "ses_bg_resume",
        run_in_background: true,
-        skills: [],
+        load_skills: ["git-master"],
      },
      toolContext
    )
@@ -780,7 +752,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -840,7 +812,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -893,7 +865,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -947,7 +919,7 @@ describe("sisyphus-task", () => {
        prompt: "test",
        category: "custom-cat",
        run_in_background: false,
-        skills: []
+        load_skills: ["git-master"]
      }, toolContext)
      // #then
@@ -1012,14 +984,14 @@ describe("sisyphus-task", () => {
          prompt: "Do something visual",
          category: "visual-engineering",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Gemini task completed successfully")
    }, { timeout: 20000 })
@@ -1070,7 +1042,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something visual",
          category: "visual-engineering",
          run_in_background: true,  // User explicitly says true - normal background
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -1131,7 +1103,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something smart",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -1195,14 +1167,14 @@ describe("sisyphus-task", () => {
          prompt: "Do something artistic",
          category: "artistry",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Artistry result here")
    }, { timeout: 20000 })
@@ -1259,14 +1231,14 @@ describe("sisyphus-task", () => {
          prompt: "Write something",
          category: "writing",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Writing result here")
    }, { timeout: 20000 })
@@ -1329,14 +1301,14 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "my-unstable-cat",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Custom unstable result")
    }, { timeout: 20000 })
  })
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -4,7 +4,7 @@ import { join } from "node:path"
 import type { BackgroundManager } from "../../features/background-agent"
 import type { DelegateTaskArgs } from "./types"
 import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
-import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
+import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS } from "./constants"
 import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
 import { discoverSkills } from "../../features/opencode-skill-loader"
@@ -12,11 +12,13 @@ import { getTaskToastManager } from "../../features/task-toast-manager"
 import type { ModelFallbackInfo } from "../../features/task-toast-manager/types"
 import { subagentSessions, getSessionAgent } from "../../features/claude-code-session-state"
 import { log, getAgentToolRestrictions, resolveModel, getOpenCodeConfigPaths } from "../../shared"
 import { fetchAvailableModels } from "../../shared/model-availability"
 import { resolveModelWithFallback } from "../../shared/model-resolver"
 import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements"
 type OpencodeClient = PluginInput["client"]
 const SISYPHUS_JUNIOR_AGENT = "Sisyphus-Junior"
 const CATEGORY_EXAMPLES = Object.keys(DEFAULT_CATEGORIES).map(k => `'${k}'`).join(", ")
 function parseModelString(model: string): { providerID: string; modelID: string } | undefined {
  const parts = model.split("/")
@@ -83,7 +85,7 @@ function formatDetailedError(error: unknown, ctx: ErrorContext): string {
    lines.push(`- category: ${ctx.args.category ?? "(none)"}`)
    lines.push(`- subagent_type: ${ctx.args.subagent_type ?? "(none)"}`)
    lines.push(`- run_in_background: ${ctx.args.run_in_background}`)
-    lines.push(`- skills: [${ctx.args.skills?.join(", ") ?? ""}]`)
+    lines.push(`- load_skills: [${ctx.args.load_skills?.join(", ") ?? ""}]`)
    if (ctx.args.resume) {
      lines.push(`- resume: ${ctx.args.resume}`)
    }
@@ -178,33 +180,63 @@ export function buildSystemContent(input: BuildSystemContentInput): string | und
 export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefinition {
  const { manager, client, directory, userCategories, gitMasterConfig } = options
  const allCategories = { ...DEFAULT_CATEGORIES, ...userCategories }
  const categoryNames = Object.keys(allCategories)
  const categoryExamples = categoryNames.map(k => `'${k}'`).join(", ")
  const categoryList = categoryNames.map(name => {
    const userDesc = userCategories?.[name]?.description
    const builtinDesc = CATEGORY_DESCRIPTIONS[name]
    const desc = userDesc || builtinDesc
    return desc ? `  - ${name}: ${desc}` : `  - ${name}`
  }).join("\n")
  const description = `Spawn agent task with category-based or direct agent selection.
 MUTUALLY EXCLUSIVE: Provide EITHER category OR subagent_type, not both (unless resuming).
 - load_skills: ALWAYS REQUIRED. Pass at least one skill name (e.g., ["playwright"], ["git-master", "frontend-ui-ux"]).
 - category: Use predefined category → Spawns Sisyphus-Junior with category config
  Available categories:
 ${categoryList}
 - subagent_type: Use specific agent directly (e.g., "oracle", "explore")
 - run_in_background: true=async (returns task_id), false=sync (waits for result). Default: false. Use background=true ONLY for parallel exploration with 5+ independent queries.
 - resume: Session ID to resume (from previous task output). Continues agent with FULL CONTEXT PRESERVED - saves tokens, maintains continuity.
 **WHEN TO USE resume:**
 - Task failed/incomplete → resume with "fix: [specific issue]"
 - Need follow-up on previous result → resume with additional question
 - Multi-turn conversation with same agent → always resume instead of new task
 Prompts MUST be in English.`
  return tool({
-    description: DELEGATE_TASK_DESCRIPTION,
+    description,
    args: {
-      description: tool.schema.string().describe("Short task description"),
+      load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."),
      description: tool.schema.string().describe("Short task description (3-5 words)"),
      prompt: tool.schema.string().describe("Full detailed prompt for the agent"),
-      category: tool.schema.string().optional().describe(`Category name (e.g., ${CATEGORY_EXAMPLES}). Mutually exclusive with subagent_type.`),
+      run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"),
-      subagent_type: tool.schema.string().optional().describe("Agent name directly (e.g., 'oracle', 'explore'). Mutually exclusive with category."),
+      category: tool.schema.string().optional().describe(`Category (e.g., ${categoryExamples}). Mutually exclusive with subagent_type.`),
-      run_in_background: tool.schema.boolean().describe("Run in background. MUST be explicitly set. Use false for task delegation, true only for parallel exploration."),
+      subagent_type: tool.schema.string().optional().describe("Agent name (e.g., 'oracle', 'explore'). Mutually exclusive with category."),
-      resume: tool.schema.string().optional().describe("Session ID to resume - continues previous agent session with full context"),
+      resume: tool.schema.string().optional().describe("Session ID to resume"),
      skills: tool.schema.array(tool.schema.string()).describe("Array of skill names to prepend to the prompt. Use [] (empty array) if no skills needed."),
    },
    async execute(args: DelegateTaskArgs, toolContext) {
      const ctx = toolContext as ToolContextWithMetadata
      if (args.run_in_background === undefined) {
-        return `Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`
+        throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
      }
-      if (args.skills === undefined) {
+      if (args.load_skills === undefined) {
-        return `Invalid arguments: 'skills' parameter is REQUIRED. Use skills=[] if no skills are needed, or provide an array of skill names.`
+        throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`)
      }
-      if (args.skills === null) {
+      if (args.load_skills === null) {
-        return `Invalid arguments: skills=null is not allowed. Use skills=[] (empty array) if no skills are needed.`
+        throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`)
      }
      const runInBackground = args.run_in_background === true
      let skillContent: string | undefined
-      if (args.skills.length > 0) {
+      if (args.load_skills.length > 0) {
-        const { resolved, notFound } = await resolveMultipleSkillsAsync(args.skills, { gitMasterConfig })
+        const { resolved, notFound } = await resolveMultipleSkillsAsync(args.load_skills, { gitMasterConfig })
        if (notFound.length > 0) {
          const allSkills = await discoverSkills({ includeClaudeCodePaths: true })
          const available = allSkills.map(s => s.name).join(", ")
@@ -218,7 +250,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
      const firstMessageAgent = messageDir ? findFirstMessageWithAgent(messageDir) : null
      const sessionAgent = getSessionAgent(ctx.sessionID)
      const parentAgent = ctx.agent ?? sessionAgent ?? firstMessageAgent ?? prevMessage?.agent
-      
+
      log("[delegate_task] parentAgent resolution", {
        sessionID: ctx.sessionID,
        messageDir,
@@ -246,7 +278,14 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
            ctx.metadata?.({
              title: `Resume: ${task.description}`,
-              metadata: { sessionId: task.sessionID },
+              metadata: {
                prompt: args.prompt,
                agent: task.agent,
                load_skills: args.load_skills,
                description: args.description,
                run_in_background: args.run_in_background,
                sessionId: task.sessionID,
              },
            })
            return `Background task resumed.
@@ -283,7 +322,14 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
        ctx.metadata?.({
          title: `Resume: ${args.description}`,
-          metadata: { sessionId: args.resume, sync: true },
+          metadata: {
            prompt: args.prompt,
            load_skills: args.load_skills,
            description: args.description,
            run_in_background: args.run_in_background,
            sessionId: args.resume,
            sync: true,
          },
        })
        try {
@@ -344,7 +390,7 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`
        while (Date.now() - pollStart < 60000) {
          await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
-          
+
          const elapsed = Date.now() - pollStart
          if (elapsed < MIN_STABILITY_TIME_MS) continue
@@ -402,7 +448,10 @@ Session ID: ${args.resume}
 ---
-${textContent || "(No text output)"}`
+${textContent || "(No text output)"}
 ---
 To resume this session: resume="${args.resume}"`
      }
      if (args.category && args.subagent_type) {
@@ -413,79 +462,109 @@ ${textContent || "(No text output)"}`
        return `Invalid arguments: Must provide either category or subagent_type.`
      }
-      // Fetch OpenCode config at boundary to get system default model
+       // Fetch OpenCode config at boundary to get system default model
-      let systemDefaultModel: string | undefined
+       let systemDefaultModel: string | undefined
-      try {
+       try {
-        const openCodeConfig = await client.config.get()
+         const openCodeConfig = await client.config.get()
-        systemDefaultModel = (openCodeConfig as { data?: { model?: string } })?.data?.model
+         systemDefaultModel = (openCodeConfig as { data?: { model?: string } })?.data?.model
-      } catch {
+       } catch {
-        // Config fetch failed, proceed without system default
+         // Config fetch failed, proceed without system default
-        systemDefaultModel = undefined
+         systemDefaultModel = undefined
-      }
+       }
-      let agentToUse: string
+       let agentToUse: string
-      let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
+       let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
-      let categoryPromptAppend: string | undefined
+       let categoryPromptAppend: string | undefined
-      const inheritedModel = parentModel
+       const inheritedModel = parentModel
-        ? `${parentModel.providerID}/${parentModel.modelID}`
+         ? `${parentModel.providerID}/${parentModel.modelID}`
-        : undefined
+         : undefined
-      let modelInfo: ModelFallbackInfo | undefined
+       let modelInfo: ModelFallbackInfo | undefined
-      if (args.category) {
+       if (args.category) {
-        // Guard: require system default model for category delegation
+         // Guard: require system default model for category delegation
-        if (!systemDefaultModel) {
+         if (!systemDefaultModel) {
-          const paths = getOpenCodeConfigPaths({ binary: "opencode", version: null })
+           const paths = getOpenCodeConfigPaths({ binary: "opencode", version: null })
-          return (
+           return (
-            'oh-my-opencode requires a default model.\n\n' +
+             'oh-my-opencode requires a default model.\n\n' +
-            `Add this to ${paths.configJsonc}:\n\n` +
+             `Add this to ${paths.configJsonc}:\n\n` +
-            '  "model": "anthropic/claude-sonnet-4-5"\n\n' +
+             '  "model": "anthropic/claude-sonnet-4-5"\n\n' +
-            '(Replace with your preferred provider/model)'
+             '(Replace with your preferred provider/model)'
-          )
+           )
-        }
+         }
-        const resolved = resolveCategoryConfig(args.category, {
+         const availableModels = await fetchAvailableModels(client)
-          userCategories,
+
-          inheritedModel,
+         const resolved = resolveCategoryConfig(args.category, {
-          systemDefaultModel,
+           userCategories,
           inheritedModel,
           systemDefaultModel,
         })
         if (!resolved) {
           return `Unknown category: "${args.category}". Available: ${Object.keys({ ...DEFAULT_CATEGORIES, ...userCategories }).join(", ")}`
         }
         const requirement = CATEGORY_MODEL_REQUIREMENTS[args.category]
         let actualModel: string
         if (!requirement) {
           actualModel = resolved.model
           modelInfo = { model: actualModel, type: "system-default", source: "system-default" }
         } else {
          const { model: resolvedModel, source } = resolveModelWithFallback({
              userModel: userCategories?.[args.category]?.model,
              fallbackChain: requirement.fallbackChain,
              availableModels,
              systemDefaultModel,
            })
           actualModel = resolvedModel
           if (!parseModelString(actualModel)) {
             return `Invalid model format "${actualModel}". Expected "provider/model" format (e.g., "anthropic/claude-sonnet-4-5").`
           }
           let type: "user-defined" | "inherited" | "category-default" | "system-default"
           switch (source) {
              case "override":
                type = "user-defined"
                break
              case "provider-fallback":
                type = "category-default"
                break
              case "system-default":
                type = "system-default"
                break
           }
           modelInfo = { model: actualModel, type, source }
         }
         agentToUse = SISYPHUS_JUNIOR_AGENT
         const parsedModel = parseModelString(actualModel)
         categoryModel = parsedModel
           ? (requirement?.variant
             ? { ...parsedModel, variant: requirement.variant }
             : parsedModel)
           : undefined
         categoryPromptAppend = resolved.promptAppend || undefined
         const isUnstableAgent = resolved.config.is_unstable_agent === true || actualModel.toLowerCase().includes("gemini")
        // Handle both boolean false and string "false" due to potential serialization
        const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean
        log("[delegate_task] unstable agent detection", {
          category: args.category,
          actualModel,
          isUnstableAgent,
          run_in_background_value: args.run_in_background,
          run_in_background_type: typeof args.run_in_background,
          isRunInBackgroundExplicitlyFalse,
          willForceBackground: isUnstableAgent && isRunInBackgroundExplicitlyFalse,
        })
        if (!resolved) {
          return `Unknown category: "${args.category}". Available: ${Object.keys({ ...DEFAULT_CATEGORIES, ...userCategories }).join(", ")}`
        }
-        // Determine model source by comparing against the actual resolved model
+        if (isUnstableAgent && isRunInBackgroundExplicitlyFalse) {
        const actualModel = resolved.model
        const userDefinedModel = userCategories?.[args.category]?.model
        if (!parseModelString(actualModel)) {
          return `Invalid model format "${actualModel}". Expected "provider/model" format (e.g., "anthropic/claude-sonnet-4-5").`
        }
        switch (actualModel) {
          case userDefinedModel:
            modelInfo = { model: actualModel, type: "user-defined" }
            break
          case inheritedModel:
            modelInfo = { model: actualModel, type: "inherited" }
            break
          case systemDefaultModel:
            modelInfo = { model: actualModel, type: "system-default" }
            break
        }
        agentToUse = SISYPHUS_JUNIOR_AGENT
        const parsedModel = parseModelString(actualModel)
        categoryModel = parsedModel
          ? (resolved.config.variant
            ? { ...parsedModel, variant: resolved.config.variant }
            : parsedModel)
          : undefined
        categoryPromptAppend = resolved.promptAppend || undefined
        // Unstable agent detection - launch as background for monitoring but wait for result
        const isUnstableAgent = resolved.config.is_unstable_agent === true || actualModel.toLowerCase().includes("gemini")
        if (isUnstableAgent && args.run_in_background === false) {
          const systemContent = buildSystemContent({ skillContent, categoryPromptAppend })
          try {
@@ -498,14 +577,26 @@ ${textContent || "(No text output)"}`
              parentModel,
              parentAgent,
              model: categoryModel,
-              skills: args.skills.length > 0 ? args.skills : undefined,
+              skills: args.load_skills.length > 0 ? args.load_skills : undefined,
              skillContent: systemContent,
            })
            // Wait for sessionID to be set (task transitions from pending to running)
            // launch() returns immediately with status="pending", sessionID is set async in startTask()
            const WAIT_FOR_SESSION_INTERVAL_MS = 100
            const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
            const waitStart = Date.now()
            while (!task.sessionID && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
              if (ctx.abort?.aborted) {
                return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
              }
              await new Promise(resolve => setTimeout(resolve, WAIT_FOR_SESSION_INTERVAL_MS))
            }
            const sessionID = task.sessionID
            if (!sessionID) {
-              return formatDetailedError(new Error("Background task launched but no sessionID returned"), {
+              return formatDetailedError(new Error(`Task failed to start within timeout (30s). Task ID: ${task.id}, Status: ${task.status}`), {
-                operation: "Launch background task (unstable agent)",
+                operation: "Launch monitored background task",
                args,
                agent: agentToUse,
                category: args.category,
@@ -514,7 +605,15 @@ ${textContent || "(No text output)"}`
            ctx.metadata?.({
              title: args.description,
-              metadata: { sessionId: sessionID, category: args.category },
+              metadata: {
                prompt: args.prompt,
                agent: agentToUse,
                category: args.category,
                load_skills: args.load_skills,
                description: args.description,
                run_in_background: args.run_in_background,
                sessionId: sessionID,
              },
            })
            const startTime = new Date()
@@ -530,7 +629,7 @@ ${textContent || "(No text output)"}`
            while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
              if (ctx.abort?.aborted) {
-                return `[UNSTABLE AGENT] Task aborted.\n\nSession ID: ${sessionID}`
+                return `Task aborted (was running in background mode).\n\nSession ID: ${sessionID}`
              }
              await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
@@ -572,25 +671,39 @@ ${textContent || "(No text output)"}`
            const lastMessage = assistantMessages[0]
            if (!lastMessage) {
-              return `[UNSTABLE AGENT] No assistant response found.\n\nSession ID: ${sessionID}`
+              return `No assistant response found (task ran in background mode).\n\nSession ID: ${sessionID}`
            }
            const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
            const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")
            const duration = formatDuration(startTime)
-            return `[UNSTABLE AGENT] Task completed in ${duration}.
+            return `SUPERVISED TASK COMPLETED SUCCESSFULLY
-Model: ${actualModel} (unstable/experimental - launched via background for monitoring)
+IMPORTANT: This model (${actualModel}) is marked as unstable/experimental.
 Your run_in_background=false was automatically converted to background mode for reliability monitoring.
 Duration: ${duration}
 Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
 Session ID: ${sessionID}
 MONITORING INSTRUCTIONS:
 - The task was monitored and completed successfully
 - If you observe this agent behaving erratically in future calls, actively monitor its progress
 - Use background_cancel(task_id="...") to abort if the agent seems stuck or producing garbage output
 - Do NOT retry automatically if you see this message - the task already succeeded
 ---
-${textContent || "(No text output)"}`
+RESULT:
 ${textContent || "(No text output)"}
 ---
 To resume this session: resume="${sessionID}"`
          } catch (error) {
            return formatDetailedError(error, {
-              operation: "Launch background task (unstable agent)",
+              operation: "Launch monitored background task",
              args,
              agent: agentToUse,
              category: args.category,
@@ -602,6 +715,13 @@ ${textContent || "(No text output)"}`
          return `Agent name cannot be empty.`
        }
        const agentName = args.subagent_type.trim()
        if (agentName === SISYPHUS_JUNIOR_AGENT) {
          return `Cannot use subagent_type="${SISYPHUS_JUNIOR_AGENT}" directly. Use category parameter instead (e.g., ${categoryExamples}).
 Sisyphus-Junior is spawned automatically when you specify a category. Pick the appropriate category for your task domain.`
        }
        agentToUse = agentName
        // Validate agent exists and is callable (not a primary agent)
@@ -642,13 +762,21 @@ ${textContent || "(No text output)"}`
            parentModel,
            parentAgent,
            model: categoryModel,
-            skills: args.skills.length > 0 ? args.skills : undefined,
+            skills: args.load_skills.length > 0 ? args.load_skills : undefined,
            skillContent: systemContent,
          })
          ctx.metadata?.({
            title: args.description,
-            metadata: { sessionId: task.sessionID, category: args.category },
+            metadata: {
              prompt: args.prompt,
              agent: task.agent,
              category: args.category,
              load_skills: args.load_skills,
              description: args.description,
              run_in_background: args.run_in_background,
              sessionId: task.sessionID,
            },
          })
          return `Background task launched.
@@ -659,7 +787,8 @@ Description: ${task.description}
 Agent: ${task.agent}${args.category ? ` (category: ${args.category})` : ""}
 Status: ${task.status}
-System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check.`
+System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check.
 To resume this session: resume="${task.sessionID}"`
        } catch (error) {
          return formatDetailedError(error, {
            operation: "Launch background task",
@@ -706,14 +835,24 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
            description: args.description,
            agent: agentToUse,
            isBackground: false,
-            skills: args.skills.length > 0 ? args.skills : undefined,
+            category: args.category,
            skills: args.load_skills,
            modelInfo,
          })
        }
        ctx.metadata?.({
          title: args.description,
-          metadata: { sessionId: sessionID, category: args.category, sync: true },
+          metadata: {
            prompt: args.prompt,
            agent: agentToUse,
            category: args.category,
            load_skills: args.load_skills,
            description: args.description,
            run_in_background: args.run_in_background,
            sessionId: sessionID,
            sync: true,
          },
        })
        try {
@@ -840,11 +979,11 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          .filter((m) => m.info?.role === "assistant")
          .sort((a, b) => (b.info?.time?.created ?? 0) - (a.info?.time?.created ?? 0))
        const lastMessage = assistantMessages[0]
-        
+
        if (!lastMessage) {
          return `No assistant response found.\n\nSession ID: ${sessionID}`
        }
-        
+
        // Extract text from both "text" and "reasoning" parts (thinking models use "reasoning")
        const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
        const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")
@@ -864,7 +1003,10 @@ Session ID: ${sessionID}
 ---
-${textContent || "(No text output)"}`
+${textContent || "(No text output)"}
 ---
 To resume this session: resume="${sessionID}"`
      } catch (error) {
        if (toastManager && taskId !== undefined) {
          toastManager.removeTask(taskId)
--- a/src/tools/delegate-task/types.ts
+++ b/src/tools/delegate-task/types.ts
@@ -5,5 +5,5 @@ export interface DelegateTaskArgs {
  subagent_type?: string
  run_in_background: boolean
  resume?: string
-  skills: string[]
+  load_skills: string[]
 }