refactor(delegate-task): restructure category system for unbiased model selection

- Remove temperature from all categories - Consolidate CATEGORY_MODEL_CATALOG into DEFAULT_CATEGORIES - Replace 'general' and 'most-capable' with 'unspecified-low' and 'unspecified-high' - Add Selection_Gate to unspecified categories to force deliberate selection - Update quick category to use claude-haiku-4-5 - Update all references and tests across codebase
2026-01-20 16:22:53 +09:00
parent 2c3f1bfd80
commit 8cc995891e
9 changed files with 82 additions and 135 deletions
--- a/src/agents/atlas.ts
+++ b/src/agents/atlas.ts
@@ -92,7 +92,7 @@ ${skillRows.join("\n")}
 **Usage:**
 \`\`\`typescript
 delegate_task(category="visual-engineering", skills=["frontend-ui-ux"], prompt="...")
-delegate_task(category="general", skills=["playwright"], prompt="...")  // Browser testing
+delegate_task(category="unspecified-low", skills=["playwright"], prompt="...")  // Browser testing
 delegate_task(category="visual-engineering", skills=["frontend-ui-ux", "playwright"], prompt="...")  // UI with browser testing
 \`\`\`

--- a/src/config/schema.test.ts
+++ b/src/config/schema.test.ts
@@ -360,7 +360,7 @@ describe("CategoryConfigSchema", () => {
 describe("BuiltinCategoryNameSchema", () => {
  test("accepts all builtin category names", () => {
    // #given
-    const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "most-capable", "writing", "general"]
+    const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "unspecified-low", "unspecified-high", "writing"]

    // #when / #then
    for (const cat of categories) {
--- a/src/config/schema.ts
+++ b/src/config/schema.ts
@@ -174,9 +174,9 @@ export const BuiltinCategoryNameSchema = z.enum([
  "ultrabrain",
  "artistry",
  "quick",
-  "most-capable",
+  "unspecified-low",
+  "unspecified-high",
  "writing",
-  "general",
 ])

 export const CategoriesConfigSchema = z.record(z.string(), CategoryConfigSchema)
--- a/src/hooks/delegate-task-retry/index.ts
+++ b/src/hooks/delegate-task-retry/index.ts
@@ -108,7 +108,7 @@ Example of CORRECT call:
 delegate_task(
  description="Task description",
  prompt="Detailed prompt...",
-  category="general",  // OR subagent_type="explore"
+  category="unspecified-low",  // OR subagent_type="explore"
  run_in_background=false,
  skills=[]
 )
--- a/src/shared/migration.test.ts
+++ b/src/shared/migration.test.ts
@@ -325,7 +325,7 @@ describe("migrateAgentConfigToCategory", () => {
      { model: "anthropic/claude-sonnet-4-5" },
    ]

-    const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "most-capable", "general"]
+    const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "unspecified-high", "unspecified-low"]

    // #when: Migrate each config
    const results = configs.map(migrateAgentConfigToCategory)
@@ -385,10 +385,9 @@ describe("shouldDeleteAgentConfig", () => {

  test("returns true when all fields match category defaults", () => {
    // #given: Config with fields matching category defaults
-    // Note: DEFAULT_CATEGORIES only has temperature, not model
    const config = {
      category: "visual-engineering",
-      temperature: 0.7,
+      model: "google/gemini-3-pro-preview",
    }

    // #when: Check if config should be deleted
@@ -399,10 +398,10 @@ describe("shouldDeleteAgentConfig", () => {
  })

  test("returns false when fields differ from category defaults", () => {
-    // #given: Config with custom temperature override
+    // #given: Config with custom model override
    const config = {
      category: "visual-engineering",
-      temperature: 0.9, // Different from default (0.7)
+      model: "anthropic/claude-opus-4-5",
    }

    // #when: Check if config should be deleted
@@ -415,10 +414,10 @@ describe("shouldDeleteAgentConfig", () => {
  test("handles different categories with their defaults", () => {
    // #given: Configs for different categories
    const configs = [
-      { category: "ultrabrain", temperature: 0.1 },
-      { category: "quick", temperature: 0.3 },
-      { category: "most-capable", temperature: 0.1 },
-      { category: "general", temperature: 0.3 },
+      { category: "ultrabrain" },
+      { category: "quick" },
+      { category: "unspecified-high" },
+      { category: "unspecified-low" },
    ]

    // #when: Check each config
--- a/src/shared/migration.ts
+++ b/src/shared/migration.ts
@@ -52,7 +52,7 @@ export const HOOK_NAME_MAP: Record<string, string> = {
 * from explicit model configs to category-based configs.
 * 
 * DO NOT add new entries here. New agents should use:
- * - Category-based config (preferred): { category: "most-capable" }
+ * - Category-based config (preferred): { category: "unspecified-high" }
 * - Or inherit from OpenCode's config.model
 * 
 * This map will be removed in a future major version once migration period ends.
@@ -61,8 +61,8 @@ export const MODEL_TO_CATEGORY_MAP: Record<string, string> = {
  "google/gemini-3-pro-preview": "visual-engineering",
  "openai/gpt-5.2": "ultrabrain",
  "anthropic/claude-haiku-4-5": "quick",
-  "anthropic/claude-opus-4-5": "most-capable",
-  "anthropic/claude-sonnet-4-5": "general",
+  "anthropic/claude-opus-4-5": "unspecified-high",
+  "anthropic/claude-sonnet-4-5": "unspecified-low",
 }

 export function migrateAgentNames(agents: Record<string, unknown>): { migrated: Record<string, unknown>; changed: boolean } {
--- a/src/tools/delegate-task/constants.ts
+++ b/src/tools/delegate-task/constants.ts
@@ -99,20 +99,42 @@ EXPECTED OUTPUT:
 If your prompt lacks this structure, REWRITE IT before delegating.
 </Caller_Warning>`

-export const MOST_CAPABLE_CATEGORY_PROMPT_APPEND = `<Category_Context>
-You are working on COMPLEX / MOST-CAPABLE tasks.
+export const UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND = `<Category_Context>
+You are working on tasks that don't fit specific categories but require moderate effort.

-Maximum capability mindset:
- Bring full reasoning power to bear
- Consider all edge cases and implications
- Deep analysis before action
- Quality over speed
+<Selection_Gate>
+BEFORE selecting this category, VERIFY ALL conditions:
+1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
+2. Task requires more than trivial effort but is NOT system-wide
+3. Scope is contained within a few files/modules

-Approach:
- Thorough understanding first
- Comprehensive solution design
- Meticulous execution
- This is for the most challenging problems
+If task fits ANY other category, DO NOT select unspecified-low.
+This is NOT a default choice - it's for genuinely unclassifiable moderate-effort work.
+</Selection_Gate>
+</Category_Context>
+
+<Caller_Warning>
+THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
+
+**PROVIDE CLEAR STRUCTURE:**
+1. MUST DO: Enumerate required actions explicitly
+2. MUST NOT DO: State forbidden actions to prevent scope creep
+3. EXPECTED OUTPUT: Define concrete success criteria
+</Caller_Warning>`
+
+export const UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND = `<Category_Context>
+You are working on tasks that don't fit specific categories but require substantial effort.
+
+<Selection_Gate>
+BEFORE selecting this category, VERIFY ALL conditions:
+1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
+2. Task requires substantial effort across multiple systems/modules
+3. Changes have broad impact or require careful coordination
+4. NOT just "complex" - must be genuinely unclassifiable AND high-effort
+
+If task fits ANY other category, DO NOT select unspecified-high.
+If task is unclassifiable but moderate-effort, use unspecified-low instead.
+</Selection_Gate>
 </Category_Context>`

 export const WRITING_CATEGORY_PROMPT_APPEND = `<Category_Context>
@@ -131,88 +153,16 @@ Approach:
 - Documentation, READMEs, articles, technical writing
 </Category_Context>`

-export const GENERAL_CATEGORY_PROMPT_APPEND = `<Category_Context>
-You are working on GENERAL tasks.

-Balanced execution mindset:
- Practical, straightforward approach
- Good enough is good enough
- Focus on getting things done
-
-Approach:
- Standard best practices
- Reasonable trade-offs
- Efficient completion
-</Category_Context>
-
-<Caller_Warning>
-THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
-
-While capable, this model benefits significantly from EXPLICIT instructions.
-
-**PROVIDE CLEAR STRUCTURE:**
-1. MUST DO: Enumerate required actions explicitly - don't assume inference
-2. MUST NOT DO: State forbidden actions to prevent scope creep or wrong approaches
-3. EXPECTED OUTPUT: Define concrete success criteria and deliverables
-
-**COMMON PITFALLS WITHOUT EXPLICIT INSTRUCTIONS:**
- Model may take shortcuts that miss edge cases
- Implicit requirements get overlooked
- Output format may not match expectations
- Scope may expand beyond intended boundaries
-
-**RECOMMENDED PROMPT PATTERN:**
-\`\`\`
-TASK: [Clear, single-purpose goal]
-
-CONTEXT: [Relevant background the model needs]
-
-MUST DO:
- [Explicit requirement 1]
- [Explicit requirement 2]
-
-MUST NOT DO:
- [Boundary/constraint 1]
- [Boundary/constraint 2]
-
-EXPECTED OUTPUT:
- [What success looks like]
- [How to verify completion]
-\`\`\`
-
-The more explicit your prompt, the better the results.
-</Caller_Warning>`

 export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
-  "visual-engineering": {
-    temperature: 0.7,
-  },
-  ultrabrain: {
-    temperature: 0.1,
-  },
-  artistry: {
-    temperature: 0.9,
-  },
-  quick: {
-    temperature: 0.3,
-  },
-  "most-capable": {
-    temperature: 0.1,
-  },
-  writing: {
-    temperature: 0.5,
-  },
-  general: {
-    temperature: 0.3,
-  },
-}
-
-export const CATEGORY_MODEL_CATALOG: Record<string, { model: string; variant?: string }> = {
+  "visual-engineering": { model: "google/gemini-3-pro-preview" },
  ultrabrain: { model: "openai/gpt-5.2-codex", variant: "xhigh" },
  artistry: { model: "google/gemini-3-pro-preview", variant: "max" },
-  "most-capable": { model: "anthropic/claude-opus-4-5", variant: "max" },
+  quick: { model: "anthropic/claude-haiku-4-5" },
+  "unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
+  "unspecified-high": { model: "anthropic/claude-opus-4-5", variant: "max" },
  writing: { model: "google/gemini-3-flash-preview" },
-  general: { model: "anthropic/claude-sonnet-4-5" },
 }

 export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
@@ -220,19 +170,19 @@ export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
  ultrabrain: STRATEGIC_CATEGORY_PROMPT_APPEND,
  artistry: ARTISTRY_CATEGORY_PROMPT_APPEND,
  quick: QUICK_CATEGORY_PROMPT_APPEND,
-  "most-capable": MOST_CAPABLE_CATEGORY_PROMPT_APPEND,
+  "unspecified-low": UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND,
+  "unspecified-high": UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND,
  writing: WRITING_CATEGORY_PROMPT_APPEND,
-  general: GENERAL_CATEGORY_PROMPT_APPEND,
 }

 export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
  "visual-engineering": "Frontend, UI/UX, design, styling, animation",
-  ultrabrain: "Strict architecture design, very complex business logic",
+  ultrabrain: "Deep logical reasoning, complex architecture decisions requiring extensive analysis",
  artistry: "Highly creative/artistic tasks, novel ideas",
-  quick: "Cheap & fast - small tasks with minimal overhead, budget-friendly",
-  "most-capable": "Complex tasks requiring maximum capability",
+  quick: "Trivial tasks - single file changes, typo fixes, simple modifications",
+  "unspecified-low": "Tasks that don't fit other categories, low effort required",
+  "unspecified-high": "Tasks that don't fit other categories, high effort required",
  writing: "Documentation, prose, technical writing",
-  general: "General purpose tasks",
 }

 const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")
--- a/src/tools/delegate-task/tools.test.ts
+++ b/src/tools/delegate-task/tools.test.ts
@@ -8,24 +8,23 @@ const SYSTEM_DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"

 describe("sisyphus-task", () => {
  describe("DEFAULT_CATEGORIES", () => {
-    test("visual-engineering category has temperature config only (model removed)", () => {
+    test("visual-engineering category has model config", () => {
      // #given
      const category = DEFAULT_CATEGORIES["visual-engineering"]

      // #when / #then
      expect(category).toBeDefined()
-      expect(category.model).toBeUndefined()
-      expect(category.temperature).toBe(0.7)
+      expect(category.model).toBe("google/gemini-3-pro-preview")
    })

-    test("ultrabrain category has temperature config only (model removed)", () => {
+    test("ultrabrain category has model and variant config", () => {
      // #given
      const category = DEFAULT_CATEGORIES["ultrabrain"]

      // #when / #then
      expect(category).toBeDefined()
-      expect(category.model).toBeUndefined()
-      expect(category.temperature).toBe(0.1)
+      expect(category.model).toBe("openai/gpt-5.2-codex")
+      expect(category.variant).toBe("xhigh")
    })
  })

@@ -61,13 +60,13 @@ describe("sisyphus-task", () => {
      }
    })

-    test("most-capable category exists and has description", () => {
+    test("unspecified-high category exists and has description", () => {
      // #given / #when
-      const description = CATEGORY_DESCRIPTIONS["most-capable"]
+      const description = CATEGORY_DESCRIPTIONS["unspecified-high"]

      // #then
      expect(description).toBeDefined()
-      expect(description).toContain("Complex")
+      expect(description).toContain("high effort")
    })
  })

@@ -141,16 +140,16 @@ describe("sisyphus-task", () => {
      expect(result).toBeNull()
    })

-    test("returns systemDefaultModel for builtin category (categories no longer have default models)", () => {
+    test("returns default model from DEFAULT_CATEGORIES for builtin category", () => {
      // #given
      const categoryName = "visual-engineering"

      // #when
      const result = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })

-      // #then - model comes from systemDefaultModel since categories no longer have model defaults
+      // #then
      expect(result).not.toBeNull()
-      expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
+      expect(result!.config.model).toBe("google/gemini-3-pro-preview")
      expect(result!.promptAppend).toContain("VISUAL/UI")
    })

@@ -270,7 +269,7 @@ describe("sisyphus-task", () => {
      expect(result!.config.model).toBe("my-provider/my-model")
    })

-    test("systemDefaultModel is used when no user model and no inheritedModel", () => {
+    test("default model from category config is used when no user model and no inheritedModel", () => {
      // #given
      const categoryName = "visual-engineering"

@@ -279,7 +278,7 @@ describe("sisyphus-task", () => {

      // #then
      expect(result).not.toBeNull()
-      expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
+      expect(result!.config.model).toBe("google/gemini-3-pro-preview")
    })
  })

@@ -907,16 +906,16 @@ describe("sisyphus-task", () => {
      expect(resolved!.config.variant).toBe("xhigh")
    })

-    test("systemDefaultModel is used for category without catalog entry", () => {
-      // #given - general has no catalog entry
-      const categoryName = "general"
+    test("default model is used for category with default entry", () => {
+      // #given - unspecified-low has default model
+      const categoryName = "unspecified-low"
      
      // #when
      const resolved = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
      
-      // #then - systemDefaultModel is used
+      // #then - default model from DEFAULT_CATEGORIES is used
      expect(resolved).not.toBeNull()
-      expect(resolved!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
+      expect(resolved!.config.model).toBe("anthropic/claude-sonnet-4-5")
    })

    test("inheritedModel takes precedence over systemDefaultModel for builtin category", () => {
--- a/src/tools/delegate-task/tools.ts
+++ b/src/tools/delegate-task/tools.ts
@@ -4,7 +4,7 @@ import { join } from "node:path"
 import type { BackgroundManager } from "../../features/background-agent"
 import type { DelegateTaskArgs } from "./types"
 import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
-import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_MODEL_CATALOG } from "./constants"
+import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
 import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
 import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
 import { discoverSkills } from "../../features/opencode-skill-loader"
@@ -118,24 +118,23 @@ export function resolveCategoryConfig(
  const { userCategories, inheritedModel, systemDefaultModel } = options
  const defaultConfig = DEFAULT_CATEGORIES[categoryName]
  const userConfig = userCategories?.[categoryName]
-  const catalogEntry = CATEGORY_MODEL_CATALOG[categoryName]
  const defaultPromptAppend = CATEGORY_PROMPT_APPENDS[categoryName] ?? ""

  if (!defaultConfig && !userConfig) {
    return null
  }

-  // Model priority: user override > inherited from parent > catalog default > system default
+  // Model priority: user override > inherited from parent > default config > system default
  const model = resolveModel({
    userModel: userConfig?.model,
    inheritedModel,
-    systemDefault: catalogEntry?.model ?? systemDefaultModel,
+    systemDefault: defaultConfig?.model ?? systemDefaultModel,
  })
  const config: CategoryConfig = {
    ...defaultConfig,
    ...userConfig,
    model,
-    variant: userConfig?.variant ?? catalogEntry?.variant,
+    variant: userConfig?.variant ?? defaultConfig?.variant,
  }

  let promptAppend = defaultPromptAppend