refactor(delegate-task): enhance delegation with dynamic descriptions

Generate tool description dynamically from available categories and skills. Remove hardcoded DELEGATE_TASK_DESCRIPTION constant. Improve parameter handling with unified 'subagent_type' field replacing 'agent'. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-01-22 22:46:23 +09:00
parent 0edfc7f36a
commit aa2b052d28
4 changed files with 281 additions and 184 deletions
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@@ -185,21 +185,4 @@ export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
  writing: "Documentation, prose, technical writing",
 }

-const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")

-export const DELEGATE_TASK_DESCRIPTION = `Spawn agent task with category-based or direct agent selection.
-
-MUTUALLY EXCLUSIVE: Provide EITHER category OR agent, not both (unless resuming).
-
- category: Use predefined category (${BUILTIN_CATEGORIES}) → Spawns Sisyphus-Junior with category config
- agent: Use specific agent directly (e.g., "oracle", "explore")
- background: true=async (returns task_id), false=sync (waits for result). Default: false. Use background=true ONLY for parallel exploration with 5+ independent queries.
- resume: Session ID to resume (from previous task output). Continues agent with FULL CONTEXT PRESERVED - saves tokens, maintains continuity.
- skills: Array of skill names to prepend to prompt (e.g., ["playwright", "frontend-ui-ux"]). Use [] (empty array) if no skills needed.
-
-**WHEN TO USE resume:**
- Task failed/incomplete → resume with "fix: [specific issue]"
- Need follow-up on previous result → resume with additional question
- Multi-turn conversation with same agent → always resume instead of new task
-
-Prompts MUST be in English.`
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -1,5 +1,5 @@
 import { describe, test, expect } from "bun:test"
-import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS, DELEGATE_TASK_DESCRIPTION } from "./constants"
+import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS } from "./constants"
 import { resolveCategoryConfig } from "./tools"
 import type { CategoryConfig } from "../../config/schema"

@@ -70,19 +70,6 @@ describe("sisyphus-task", () => {
    })
  })

-  describe("DELEGATE_TASK_DESCRIPTION", () => {
-    test("documents background parameter as required with default false", () => {
-      // #given / #when / #then
-      expect(DELEGATE_TASK_DESCRIPTION).toContain("background")
-      expect(DELEGATE_TASK_DESCRIPTION).toContain("Default: false")
-    })
-
-    test("warns about parallel exploration usage", () => {
-      // #given / #when / #then
-      expect(DELEGATE_TASK_DESCRIPTION).toContain("5+")
-    })
-  })
-
  describe("category delegation config validation", () => {
    test("returns error when systemDefaultModel is not configured", async () => {
      // #given a mock client with no model in config
@@ -118,7 +105,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -333,7 +320,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: true,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -394,7 +381,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "unspecified-high",
          run_in_background: true,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -451,7 +438,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "unspecified-high",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -466,14 +453,7 @@ describe("sisyphus-task", () => {
  })

  describe("skills parameter", () => {
-    test("DELEGATE_TASK_DESCRIPTION documents skills parameter with empty array option", () => {
-      // #given / #when / #then
-      expect(DELEGATE_TASK_DESCRIPTION).toContain("skills")
-      expect(DELEGATE_TASK_DESCRIPTION).toContain("Array of skill names")
-      expect(DELEGATE_TASK_DESCRIPTION).toContain("[] (empty array) if no skills needed")
-    })
-
-    test("skills parameter is required - returns error when not provided", async () => {
+    test("skills parameter is required - throws error when not provided", async () => {
      // #given
      const { createDelegateTask } = require("./tools")
      
@@ -501,7 +481,8 @@ describe("sisyphus-task", () => {
      }
      
      // #when - skills not provided (undefined)
-      const result = await tool.execute(
+      // #then - should throw error about missing skills
+      await expect(tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
@@ -509,14 +490,10 @@ describe("sisyphus-task", () => {
          run_in_background: false,
        },
        toolContext
-      )
-      
-      // #then - should return error about missing skills
-      expect(result).toContain("skills")
-      expect(result).toContain("REQUIRED")
+      )).rejects.toThrow("IT IS HIGHLY RECOMMENDED")
    })

-    test("null skills returns error", async () => {
+    test("null skills throws error", async () => {
      // #given
      const { createDelegateTask } = require("./tools")
      
@@ -544,22 +521,17 @@ describe("sisyphus-task", () => {
      }
      
      // #when - null passed
-      const result = await tool.execute(
+      // #then - should throw error about null
+      await expect(tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: null,
+          load_skills: null,
        },
        toolContext
-      )
-      
-      // #then - should return error about null
-      expect(result).toContain("Invalid arguments")
-      expect(result).toContain("skills=null")
-      expect(result).toContain("not allowed")
-      expect(result).toContain("skills=[]")
+      )).rejects.toThrow("IT IS HIGHLY RECOMMENDED")
    })

    test("empty array [] is allowed and proceeds without skill content", async () => {
@@ -597,14 +569,14 @@ describe("sisyphus-task", () => {
        abort: new AbortController().signal,
      }
      
-      // #when - empty array skills passed
+      // #when - empty array passed
      await tool.execute(
        {
          description: "Test task",
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: [],
        },
        toolContext
      )
@@ -670,7 +642,7 @@ describe("sisyphus-task", () => {
        prompt: "Continue the task",
        resume: "ses_resume_test",
        run_in_background: false,
-        skills: [],
+        load_skills: ["git-master"],
      },
      toolContext
    )
@@ -725,7 +697,7 @@ describe("sisyphus-task", () => {
        prompt: "Continue in background",
        resume: "ses_bg_resume",
        run_in_background: true,
-        skills: [],
+        load_skills: ["git-master"],
      },
      toolContext
    )
@@ -780,7 +752,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -840,7 +812,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -893,7 +865,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -947,7 +919,7 @@ describe("sisyphus-task", () => {
        prompt: "test",
        category: "custom-cat",
        run_in_background: false,
-        skills: []
+        load_skills: ["git-master"]
      }, toolContext)

      // #then
@@ -1012,14 +984,14 @@ describe("sisyphus-task", () => {
          prompt: "Do something visual",
          category: "visual-engineering",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Gemini task completed successfully")
    }, { timeout: 20000 })

@@ -1070,7 +1042,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something visual",
          category: "visual-engineering",
          run_in_background: true,  // User explicitly says true - normal background
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -1131,7 +1103,7 @@ describe("sisyphus-task", () => {
          prompt: "Do something smart",
          category: "ultrabrain",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
@@ -1195,14 +1167,14 @@ describe("sisyphus-task", () => {
          prompt: "Do something artistic",
          category: "artistry",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Artistry result here")
    }, { timeout: 20000 })

@@ -1259,14 +1231,14 @@ describe("sisyphus-task", () => {
          prompt: "Write something",
          category: "writing",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Writing result here")
    }, { timeout: 20000 })

@@ -1329,14 +1301,14 @@ describe("sisyphus-task", () => {
          prompt: "Do something",
          category: "my-unstable-cat",
          run_in_background: false,
-          skills: [],
+          load_skills: ["git-master"],
        },
        toolContext
      )
      
      // #then - should launch as background BUT wait for and return actual result
      expect(launchCalled).toBe(true)
-      expect(result).toContain("UNSTABLE AGENT")
+      expect(result).toContain("SUPERVISED TASK COMPLETED")
      expect(result).toContain("Custom unstable result")
    }, { timeout: 20000 })
  })
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -4,7 +4,7 @@ import { join } from "node:path"
 import type { BackgroundManager } from "../../features/background-agent"
 import type { DelegateTaskArgs } from "./types"
 import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
-import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
+import { DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_DESCRIPTIONS } from "./constants"
 import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
 import { discoverSkills } from "../../features/opencode-skill-loader"
@@ -12,11 +12,13 @@ import { getTaskToastManager } from "../../features/task-toast-manager"
 import type { ModelFallbackInfo } from "../../features/task-toast-manager/types"
 import { subagentSessions, getSessionAgent } from "../../features/claude-code-session-state"
 import { log, getAgentToolRestrictions, resolveModel, getOpenCodeConfigPaths } from "../../shared"
+import { fetchAvailableModels } from "../../shared/model-availability"
+import { resolveModelWithFallback } from "../../shared/model-resolver"
+import { CATEGORY_MODEL_REQUIREMENTS } from "../../shared/model-requirements"

 type OpencodeClient = PluginInput["client"]

 const SISYPHUS_JUNIOR_AGENT = "Sisyphus-Junior"
-const CATEGORY_EXAMPLES = Object.keys(DEFAULT_CATEGORIES).map(k => `'${k}'`).join(", ")

 function parseModelString(model: string): { providerID: string; modelID: string } | undefined {
  const parts = model.split("/")
@@ -83,7 +85,7 @@ function formatDetailedError(error: unknown, ctx: ErrorContext): string {
    lines.push(`- category: ${ctx.args.category ?? "(none)"}`)
    lines.push(`- subagent_type: ${ctx.args.subagent_type ?? "(none)"}`)
    lines.push(`- run_in_background: ${ctx.args.run_in_background}`)
-    lines.push(`- skills: [${ctx.args.skills?.join(", ") ?? ""}]`)
+    lines.push(`- load_skills: [${ctx.args.load_skills?.join(", ") ?? ""}]`)
    if (ctx.args.resume) {
      lines.push(`- resume: ${ctx.args.resume}`)
    }
@@ -178,33 +180,63 @@ export function buildSystemContent(input: BuildSystemContentInput): string | und
 export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefinition {
  const { manager, client, directory, userCategories, gitMasterConfig } = options

+  const allCategories = { ...DEFAULT_CATEGORIES, ...userCategories }
+  const categoryNames = Object.keys(allCategories)
+  const categoryExamples = categoryNames.map(k => `'${k}'`).join(", ")
+
+  const categoryList = categoryNames.map(name => {
+    const userDesc = userCategories?.[name]?.description
+    const builtinDesc = CATEGORY_DESCRIPTIONS[name]
+    const desc = userDesc || builtinDesc
+    return desc ? `  - ${name}: ${desc}` : `  - ${name}`
+  }).join("\n")
+
+  const description = `Spawn agent task with category-based or direct agent selection.
+
+MUTUALLY EXCLUSIVE: Provide EITHER category OR subagent_type, not both (unless resuming).
+
+- load_skills: ALWAYS REQUIRED. Pass at least one skill name (e.g., ["playwright"], ["git-master", "frontend-ui-ux"]).
+- category: Use predefined category → Spawns Sisyphus-Junior with category config
+  Available categories:
+${categoryList}
+- subagent_type: Use specific agent directly (e.g., "oracle", "explore")
+- run_in_background: true=async (returns task_id), false=sync (waits for result). Default: false. Use background=true ONLY for parallel exploration with 5+ independent queries.
+- resume: Session ID to resume (from previous task output). Continues agent with FULL CONTEXT PRESERVED - saves tokens, maintains continuity.
+
+**WHEN TO USE resume:**
+- Task failed/incomplete → resume with "fix: [specific issue]"
+- Need follow-up on previous result → resume with additional question
+- Multi-turn conversation with same agent → always resume instead of new task
+
+Prompts MUST be in English.`
+
  return tool({
-    description: DELEGATE_TASK_DESCRIPTION,
+    description,
    args: {
-      description: tool.schema.string().describe("Short task description"),
+      load_skills: tool.schema.array(tool.schema.string()).describe("Skill names to inject. REQUIRED - pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like [\"playwright\"], [\"git-master\"] for best results."),
+      description: tool.schema.string().describe("Short task description (3-5 words)"),
      prompt: tool.schema.string().describe("Full detailed prompt for the agent"),
-      category: tool.schema.string().optional().describe(`Category name (e.g., ${CATEGORY_EXAMPLES}). Mutually exclusive with subagent_type.`),
-      subagent_type: tool.schema.string().optional().describe("Agent name directly (e.g., 'oracle', 'explore'). Mutually exclusive with category."),
-      run_in_background: tool.schema.boolean().describe("Run in background. MUST be explicitly set. Use false for task delegation, true only for parallel exploration."),
-      resume: tool.schema.string().optional().describe("Session ID to resume - continues previous agent session with full context"),
-      skills: tool.schema.array(tool.schema.string()).describe("Array of skill names to prepend to the prompt. Use [] (empty array) if no skills needed."),
+      run_in_background: tool.schema.boolean().describe("true=async (returns task_id), false=sync (waits). Default: false"),
+      category: tool.schema.string().optional().describe(`Category (e.g., ${categoryExamples}). Mutually exclusive with subagent_type.`),
+      subagent_type: tool.schema.string().optional().describe("Agent name (e.g., 'oracle', 'explore'). Mutually exclusive with category."),
+      resume: tool.schema.string().optional().describe("Session ID to resume"),
    },
    async execute(args: DelegateTaskArgs, toolContext) {
      const ctx = toolContext as ToolContextWithMetadata
      if (args.run_in_background === undefined) {
-        return `Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`
+        throw new Error(`Invalid arguments: 'run_in_background' parameter is REQUIRED. Use run_in_background=false for task delegation, run_in_background=true only for parallel exploration.`)
      }
-      if (args.skills === undefined) {
-        return `Invalid arguments: 'skills' parameter is REQUIRED. Use skills=[] if no skills are needed, or provide an array of skill names.`
+      if (args.load_skills === undefined) {
+        throw new Error(`Invalid arguments: 'load_skills' parameter is REQUIRED. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills like ["playwright"], ["git-master"] for best results.`)
      }
-      if (args.skills === null) {
-        return `Invalid arguments: skills=null is not allowed. Use skills=[] (empty array) if no skills are needed.`
+      if (args.load_skills === null) {
+        throw new Error(`Invalid arguments: load_skills=null is not allowed. Pass [] if no skills needed, but IT IS HIGHLY RECOMMENDED to pass proper skills.`)
      }
      const runInBackground = args.run_in_background === true

      let skillContent: string | undefined
-      if (args.skills.length > 0) {
-        const { resolved, notFound } = await resolveMultipleSkillsAsync(args.skills, { gitMasterConfig })
+      if (args.load_skills.length > 0) {
+        const { resolved, notFound } = await resolveMultipleSkillsAsync(args.load_skills, { gitMasterConfig })
        if (notFound.length > 0) {
          const allSkills = await discoverSkills({ includeClaudeCodePaths: true })
          const available = allSkills.map(s => s.name).join(", ")
@@ -218,7 +250,7 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini
      const firstMessageAgent = messageDir ? findFirstMessageWithAgent(messageDir) : null
      const sessionAgent = getSessionAgent(ctx.sessionID)
      const parentAgent = ctx.agent ?? sessionAgent ?? firstMessageAgent ?? prevMessage?.agent
-      
+
      log("[delegate_task] parentAgent resolution", {
        sessionID: ctx.sessionID,
        messageDir,
@@ -246,7 +278,14 @@ export function createDelegateTask(options: DelegateTaskToolOptions): ToolDefini

            ctx.metadata?.({
              title: `Resume: ${task.description}`,
-              metadata: { sessionId: task.sessionID },
+              metadata: {
+                prompt: args.prompt,
+                agent: task.agent,
+                load_skills: args.load_skills,
+                description: args.description,
+                run_in_background: args.run_in_background,
+                sessionId: task.sessionID,
+              },
            })

            return `Background task resumed.
@@ -283,7 +322,14 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`

        ctx.metadata?.({
          title: `Resume: ${args.description}`,
-          metadata: { sessionId: args.resume, sync: true },
+          metadata: {
+            prompt: args.prompt,
+            load_skills: args.load_skills,
+            description: args.description,
+            run_in_background: args.run_in_background,
+            sessionId: args.resume,
+            sync: true,
+          },
        })

        try {
@@ -344,7 +390,7 @@ Use \`background_output\` with task_id="${task.id}" to check progress.`

        while (Date.now() - pollStart < 60000) {
          await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
-          
+
          const elapsed = Date.now() - pollStart
          if (elapsed < MIN_STABILITY_TIME_MS) continue

@@ -402,7 +448,10 @@ Session ID: ${args.resume}

 ---

-${textContent || "(No text output)"}`
+${textContent || "(No text output)"}
+
+---
+To resume this session: resume="${args.resume}"`
      }

      if (args.category && args.subagent_type) {
@@ -413,79 +462,109 @@ ${textContent || "(No text output)"}`
        return `Invalid arguments: Must provide either category or subagent_type.`
      }

-      // Fetch OpenCode config at boundary to get system default model
-      let systemDefaultModel: string | undefined
-      try {
-        const openCodeConfig = await client.config.get()
-        systemDefaultModel = (openCodeConfig as { data?: { model?: string } })?.data?.model
-      } catch {
-        // Config fetch failed, proceed without system default
-        systemDefaultModel = undefined
-      }
+       // Fetch OpenCode config at boundary to get system default model
+       let systemDefaultModel: string | undefined
+       try {
+         const openCodeConfig = await client.config.get()
+         systemDefaultModel = (openCodeConfig as { data?: { model?: string } })?.data?.model
+       } catch {
+         // Config fetch failed, proceed without system default
+         systemDefaultModel = undefined
+       }

-      let agentToUse: string
-      let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
-      let categoryPromptAppend: string | undefined
+       let agentToUse: string
+       let categoryModel: { providerID: string; modelID: string; variant?: string } | undefined
+       let categoryPromptAppend: string | undefined

-      const inheritedModel = parentModel
-        ? `${parentModel.providerID}/${parentModel.modelID}`
-        : undefined
+       const inheritedModel = parentModel
+         ? `${parentModel.providerID}/${parentModel.modelID}`
+         : undefined

-      let modelInfo: ModelFallbackInfo | undefined
+       let modelInfo: ModelFallbackInfo | undefined

-      if (args.category) {
-        // Guard: require system default model for category delegation
-        if (!systemDefaultModel) {
-          const paths = getOpenCodeConfigPaths({ binary: "opencode", version: null })
-          return (
-            'oh-my-opencode requires a default model.\n\n' +
-            `Add this to ${paths.configJsonc}:\n\n` +
-            '  "model": "anthropic/claude-sonnet-4-5"\n\n' +
-            '(Replace with your preferred provider/model)'
-          )
-        }
+       if (args.category) {
+         // Guard: require system default model for category delegation
+         if (!systemDefaultModel) {
+           const paths = getOpenCodeConfigPaths({ binary: "opencode", version: null })
+           return (
+             'oh-my-opencode requires a default model.\n\n' +
+             `Add this to ${paths.configJsonc}:\n\n` +
+             '  "model": "anthropic/claude-sonnet-4-5"\n\n' +
+             '(Replace with your preferred provider/model)'
+           )
+         }

-        const resolved = resolveCategoryConfig(args.category, {
-          userCategories,
-          inheritedModel,
-          systemDefaultModel,
+         const availableModels = await fetchAvailableModels(client)
+
+         const resolved = resolveCategoryConfig(args.category, {
+           userCategories,
+           inheritedModel,
+           systemDefaultModel,
+         })
+         if (!resolved) {
+           return `Unknown category: "${args.category}". Available: ${Object.keys({ ...DEFAULT_CATEGORIES, ...userCategories }).join(", ")}`
+         }
+
+         const requirement = CATEGORY_MODEL_REQUIREMENTS[args.category]
+         let actualModel: string
+
+         if (!requirement) {
+           actualModel = resolved.model
+           modelInfo = { model: actualModel, type: "system-default", source: "system-default" }
+         } else {
+          const { model: resolvedModel, source } = resolveModelWithFallback({
+              userModel: userCategories?.[args.category]?.model,
+              fallbackChain: requirement.fallbackChain,
+              availableModels,
+              systemDefaultModel,
+            })
+
+           actualModel = resolvedModel
+
+           if (!parseModelString(actualModel)) {
+             return `Invalid model format "${actualModel}". Expected "provider/model" format (e.g., "anthropic/claude-sonnet-4-5").`
+           }
+
+           let type: "user-defined" | "inherited" | "category-default" | "system-default"
+           switch (source) {
+              case "override":
+                type = "user-defined"
+                break
+              case "provider-fallback":
+                type = "category-default"
+                break
+              case "system-default":
+                type = "system-default"
+                break
+           }
+
+           modelInfo = { model: actualModel, type, source }
+         }
+
+         agentToUse = SISYPHUS_JUNIOR_AGENT
+         const parsedModel = parseModelString(actualModel)
+         categoryModel = parsedModel
+           ? (requirement?.variant
+             ? { ...parsedModel, variant: requirement.variant }
+             : parsedModel)
+           : undefined
+         categoryPromptAppend = resolved.promptAppend || undefined
+
+         const isUnstableAgent = resolved.config.is_unstable_agent === true || actualModel.toLowerCase().includes("gemini")
+        // Handle both boolean false and string "false" due to potential serialization
+        const isRunInBackgroundExplicitlyFalse = args.run_in_background === false || args.run_in_background === "false" as unknown as boolean
+
+        log("[delegate_task] unstable agent detection", {
+          category: args.category,
+          actualModel,
+          isUnstableAgent,
+          run_in_background_value: args.run_in_background,
+          run_in_background_type: typeof args.run_in_background,
+          isRunInBackgroundExplicitlyFalse,
+          willForceBackground: isUnstableAgent && isRunInBackgroundExplicitlyFalse,
        })
-        if (!resolved) {
-          return `Unknown category: "${args.category}". Available: ${Object.keys({ ...DEFAULT_CATEGORIES, ...userCategories }).join(", ")}`
-        }

-        // Determine model source by comparing against the actual resolved model
-        const actualModel = resolved.model
-        const userDefinedModel = userCategories?.[args.category]?.model
-
-        if (!parseModelString(actualModel)) {
-          return `Invalid model format "${actualModel}". Expected "provider/model" format (e.g., "anthropic/claude-sonnet-4-5").`
-        }
-
-        switch (actualModel) {
-          case userDefinedModel:
-            modelInfo = { model: actualModel, type: "user-defined" }
-            break
-          case inheritedModel:
-            modelInfo = { model: actualModel, type: "inherited" }
-            break
-          case systemDefaultModel:
-            modelInfo = { model: actualModel, type: "system-default" }
-            break
-        }
-
-        agentToUse = SISYPHUS_JUNIOR_AGENT
-        const parsedModel = parseModelString(actualModel)
-        categoryModel = parsedModel
-          ? (resolved.config.variant
-            ? { ...parsedModel, variant: resolved.config.variant }
-            : parsedModel)
-          : undefined
-        categoryPromptAppend = resolved.promptAppend || undefined
-
-        // Unstable agent detection - launch as background for monitoring but wait for result
-        const isUnstableAgent = resolved.config.is_unstable_agent === true || actualModel.toLowerCase().includes("gemini")
-        if (isUnstableAgent && args.run_in_background === false) {
+        if (isUnstableAgent && isRunInBackgroundExplicitlyFalse) {
          const systemContent = buildSystemContent({ skillContent, categoryPromptAppend })

          try {
@@ -498,14 +577,26 @@ ${textContent || "(No text output)"}`
              parentModel,
              parentAgent,
              model: categoryModel,
-              skills: args.skills.length > 0 ? args.skills : undefined,
+              skills: args.load_skills.length > 0 ? args.load_skills : undefined,
              skillContent: systemContent,
            })

+            // Wait for sessionID to be set (task transitions from pending to running)
+            // launch() returns immediately with status="pending", sessionID is set async in startTask()
+            const WAIT_FOR_SESSION_INTERVAL_MS = 100
+            const WAIT_FOR_SESSION_TIMEOUT_MS = 30000
+            const waitStart = Date.now()
+            while (!task.sessionID && Date.now() - waitStart < WAIT_FOR_SESSION_TIMEOUT_MS) {
+              if (ctx.abort?.aborted) {
+                return `Task aborted while waiting for session to start.\n\nTask ID: ${task.id}`
+              }
+              await new Promise(resolve => setTimeout(resolve, WAIT_FOR_SESSION_INTERVAL_MS))
+            }
+
            const sessionID = task.sessionID
            if (!sessionID) {
-              return formatDetailedError(new Error("Background task launched but no sessionID returned"), {
-                operation: "Launch background task (unstable agent)",
+              return formatDetailedError(new Error(`Task failed to start within timeout (30s). Task ID: ${task.id}, Status: ${task.status}`), {
+                operation: "Launch monitored background task",
                args,
                agent: agentToUse,
                category: args.category,
@@ -514,7 +605,15 @@ ${textContent || "(No text output)"}`

            ctx.metadata?.({
              title: args.description,
-              metadata: { sessionId: sessionID, category: args.category },
+              metadata: {
+                prompt: args.prompt,
+                agent: agentToUse,
+                category: args.category,
+                load_skills: args.load_skills,
+                description: args.description,
+                run_in_background: args.run_in_background,
+                sessionId: sessionID,
+              },
            })

            const startTime = new Date()
@@ -530,7 +629,7 @@ ${textContent || "(No text output)"}`

            while (Date.now() - pollStart < MAX_POLL_TIME_MS) {
              if (ctx.abort?.aborted) {
-                return `[UNSTABLE AGENT] Task aborted.\n\nSession ID: ${sessionID}`
+                return `Task aborted (was running in background mode).\n\nSession ID: ${sessionID}`
              }

              await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS))
@@ -572,25 +671,39 @@ ${textContent || "(No text output)"}`
            const lastMessage = assistantMessages[0]

            if (!lastMessage) {
-              return `[UNSTABLE AGENT] No assistant response found.\n\nSession ID: ${sessionID}`
+              return `No assistant response found (task ran in background mode).\n\nSession ID: ${sessionID}`
            }

            const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
            const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")
            const duration = formatDuration(startTime)

-            return `[UNSTABLE AGENT] Task completed in ${duration}.
+            return `SUPERVISED TASK COMPLETED SUCCESSFULLY

-Model: ${actualModel} (unstable/experimental - launched via background for monitoring)
+IMPORTANT: This model (${actualModel}) is marked as unstable/experimental.
+Your run_in_background=false was automatically converted to background mode for reliability monitoring.
+
+Duration: ${duration}
 Agent: ${agentToUse}${args.category ? ` (category: ${args.category})` : ""}
 Session ID: ${sessionID}

+MONITORING INSTRUCTIONS:
+- The task was monitored and completed successfully
+- If you observe this agent behaving erratically in future calls, actively monitor its progress
+- Use background_cancel(task_id="...") to abort if the agent seems stuck or producing garbage output
+- Do NOT retry automatically if you see this message - the task already succeeded
+
 ---

-${textContent || "(No text output)"}`
+RESULT:
+
+${textContent || "(No text output)"}
+
+---
+To resume this session: resume="${sessionID}"`
          } catch (error) {
            return formatDetailedError(error, {
-              operation: "Launch background task (unstable agent)",
+              operation: "Launch monitored background task",
              args,
              agent: agentToUse,
              category: args.category,
@@ -602,6 +715,13 @@ ${textContent || "(No text output)"}`
          return `Agent name cannot be empty.`
        }
        const agentName = args.subagent_type.trim()
+
+        if (agentName === SISYPHUS_JUNIOR_AGENT) {
+          return `Cannot use subagent_type="${SISYPHUS_JUNIOR_AGENT}" directly. Use category parameter instead (e.g., ${categoryExamples}).
+
+Sisyphus-Junior is spawned automatically when you specify a category. Pick the appropriate category for your task domain.`
+        }
+
        agentToUse = agentName

        // Validate agent exists and is callable (not a primary agent)
@@ -642,13 +762,21 @@ ${textContent || "(No text output)"}`
            parentModel,
            parentAgent,
            model: categoryModel,
-            skills: args.skills.length > 0 ? args.skills : undefined,
+            skills: args.load_skills.length > 0 ? args.load_skills : undefined,
            skillContent: systemContent,
          })

          ctx.metadata?.({
            title: args.description,
-            metadata: { sessionId: task.sessionID, category: args.category },
+            metadata: {
+              prompt: args.prompt,
+              agent: task.agent,
+              category: args.category,
+              load_skills: args.load_skills,
+              description: args.description,
+              run_in_background: args.run_in_background,
+              sessionId: task.sessionID,
+            },
          })

          return `Background task launched.
@@ -659,7 +787,8 @@ Description: ${task.description}
 Agent: ${task.agent}${args.category ? ` (category: ${args.category})` : ""}
 Status: ${task.status}

-System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check.`
+System notifies on completion. Use \`background_output\` with task_id="${task.id}" to check.
+To resume this session: resume="${task.sessionID}"`
        } catch (error) {
          return formatDetailedError(error, {
            operation: "Launch background task",
@@ -706,14 +835,24 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
            description: args.description,
            agent: agentToUse,
            isBackground: false,
-            skills: args.skills.length > 0 ? args.skills : undefined,
+            category: args.category,
+            skills: args.load_skills,
            modelInfo,
          })
        }

        ctx.metadata?.({
          title: args.description,
-          metadata: { sessionId: sessionID, category: args.category, sync: true },
+          metadata: {
+            prompt: args.prompt,
+            agent: agentToUse,
+            category: args.category,
+            load_skills: args.load_skills,
+            description: args.description,
+            run_in_background: args.run_in_background,
+            sessionId: sessionID,
+            sync: true,
+          },
        })

        try {
@@ -840,11 +979,11 @@ System notifies on completion. Use \`background_output\` with task_id="${task.id
          .filter((m) => m.info?.role === "assistant")
          .sort((a, b) => (b.info?.time?.created ?? 0) - (a.info?.time?.created ?? 0))
        const lastMessage = assistantMessages[0]
-        
+
        if (!lastMessage) {
          return `No assistant response found.\n\nSession ID: ${sessionID}`
        }
-        
+
        // Extract text from both "text" and "reasoning" parts (thinking models use "reasoning")
        const textParts = lastMessage?.parts?.filter((p) => p.type === "text" || p.type === "reasoning") ?? []
        const textContent = textParts.map((p) => p.text ?? "").filter(Boolean).join("\n")
@@ -864,7 +1003,10 @@ Session ID: ${sessionID}

 ---

-${textContent || "(No text output)"}`
+${textContent || "(No text output)"}
+
+---
+To resume this session: resume="${sessionID}"`
      } catch (error) {
        if (toastManager && taskId !== undefined) {
          toastManager.removeTask(taskId)
--- a/src/tools/delegate-task/types.ts
+++ b/src/tools/delegate-task/types.ts
@@ -5,5 +5,5 @@ export interface DelegateTaskArgs {
  subagent_type?: string
  run_in_background: boolean
  resume?: string
-  skills: string[]
+  load_skills: string[]
 }