refactor(delegate-task): restructure category system for unbiased model selection

- Remove temperature from all categories
- Consolidate CATEGORY_MODEL_CATALOG into DEFAULT_CATEGORIES
- Replace 'general' and 'most-capable' with 'unspecified-low' and 'unspecified-high'
- Add Selection_Gate to unspecified categories to force deliberate selection
- Update quick category to use claude-haiku-4-5
- Update all references and tests across codebase
This commit is contained in:
justsisyphus
2026-01-20 16:22:53 +09:00
parent 2c3f1bfd80
commit 8cc995891e
9 changed files with 82 additions and 135 deletions

View File

@@ -92,7 +92,7 @@ ${skillRows.join("\n")}
**Usage:**
\`\`\`typescript
delegate_task(category="visual-engineering", skills=["frontend-ui-ux"], prompt="...")
delegate_task(category="general", skills=["playwright"], prompt="...") // Browser testing
delegate_task(category="unspecified-low", skills=["playwright"], prompt="...") // Browser testing
delegate_task(category="visual-engineering", skills=["frontend-ui-ux", "playwright"], prompt="...") // UI with browser testing
\`\`\`

View File

@@ -360,7 +360,7 @@ describe("CategoryConfigSchema", () => {
describe("BuiltinCategoryNameSchema", () => {
test("accepts all builtin category names", () => {
// #given
const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "most-capable", "writing", "general"]
const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "unspecified-low", "unspecified-high", "writing"]
// #when / #then
for (const cat of categories) {

View File

@@ -174,9 +174,9 @@ export const BuiltinCategoryNameSchema = z.enum([
"ultrabrain",
"artistry",
"quick",
"most-capable",
"unspecified-low",
"unspecified-high",
"writing",
"general",
])
export const CategoriesConfigSchema = z.record(z.string(), CategoryConfigSchema)

View File

@@ -108,7 +108,7 @@ Example of CORRECT call:
delegate_task(
description="Task description",
prompt="Detailed prompt...",
category="general", // OR subagent_type="explore"
category="unspecified-low", // OR subagent_type="explore"
run_in_background=false,
skills=[]
)

View File

@@ -325,7 +325,7 @@ describe("migrateAgentConfigToCategory", () => {
{ model: "anthropic/claude-sonnet-4-5" },
]
const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "most-capable", "general"]
const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "unspecified-high", "unspecified-low"]
// #when: Migrate each config
const results = configs.map(migrateAgentConfigToCategory)
@@ -385,10 +385,9 @@ describe("shouldDeleteAgentConfig", () => {
test("returns true when all fields match category defaults", () => {
// #given: Config with fields matching category defaults
// Note: DEFAULT_CATEGORIES only has temperature, not model
const config = {
category: "visual-engineering",
temperature: 0.7,
model: "google/gemini-3-pro-preview",
}
// #when: Check if config should be deleted
@@ -399,10 +398,10 @@ describe("shouldDeleteAgentConfig", () => {
})
test("returns false when fields differ from category defaults", () => {
// #given: Config with custom temperature override
// #given: Config with custom model override
const config = {
category: "visual-engineering",
temperature: 0.9, // Different from default (0.7)
model: "anthropic/claude-opus-4-5",
}
// #when: Check if config should be deleted
@@ -415,10 +414,10 @@ describe("shouldDeleteAgentConfig", () => {
test("handles different categories with their defaults", () => {
// #given: Configs for different categories
const configs = [
{ category: "ultrabrain", temperature: 0.1 },
{ category: "quick", temperature: 0.3 },
{ category: "most-capable", temperature: 0.1 },
{ category: "general", temperature: 0.3 },
{ category: "ultrabrain" },
{ category: "quick" },
{ category: "unspecified-high" },
{ category: "unspecified-low" },
]
// #when: Check each config

View File

@@ -52,7 +52,7 @@ export const HOOK_NAME_MAP: Record<string, string> = {
* from explicit model configs to category-based configs.
*
* DO NOT add new entries here. New agents should use:
* - Category-based config (preferred): { category: "most-capable" }
* - Category-based config (preferred): { category: "unspecified-high" }
* - Or inherit from OpenCode's config.model
*
* This map will be removed in a future major version once migration period ends.
@@ -61,8 +61,8 @@ export const MODEL_TO_CATEGORY_MAP: Record<string, string> = {
"google/gemini-3-pro-preview": "visual-engineering",
"openai/gpt-5.2": "ultrabrain",
"anthropic/claude-haiku-4-5": "quick",
"anthropic/claude-opus-4-5": "most-capable",
"anthropic/claude-sonnet-4-5": "general",
"anthropic/claude-opus-4-5": "unspecified-high",
"anthropic/claude-sonnet-4-5": "unspecified-low",
}
export function migrateAgentNames(agents: Record<string, unknown>): { migrated: Record<string, unknown>; changed: boolean } {

View File

@@ -99,20 +99,42 @@ EXPECTED OUTPUT:
If your prompt lacks this structure, REWRITE IT before delegating.
</Caller_Warning>`
export const MOST_CAPABLE_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on COMPLEX / MOST-CAPABLE tasks.
export const UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on tasks that don't fit specific categories but require moderate effort.
Maximum capability mindset:
- Bring full reasoning power to bear
- Consider all edge cases and implications
- Deep analysis before action
- Quality over speed
<Selection_Gate>
BEFORE selecting this category, VERIFY ALL conditions:
1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
2. Task requires more than trivial effort but is NOT system-wide
3. Scope is contained within a few files/modules
Approach:
- Thorough understanding first
- Comprehensive solution design
- Meticulous execution
- This is for the most challenging problems
If task fits ANY other category, DO NOT select unspecified-low.
This is NOT a default choice - it's for genuinely unclassifiable moderate-effort work.
</Selection_Gate>
</Category_Context>
<Caller_Warning>
THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
**PROVIDE CLEAR STRUCTURE:**
1. MUST DO: Enumerate required actions explicitly
2. MUST NOT DO: State forbidden actions to prevent scope creep
3. EXPECTED OUTPUT: Define concrete success criteria
</Caller_Warning>`
export const UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on tasks that don't fit specific categories but require substantial effort.
<Selection_Gate>
BEFORE selecting this category, VERIFY ALL conditions:
1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
2. Task requires substantial effort across multiple systems/modules
3. Changes have broad impact or require careful coordination
4. NOT just "complex" - must be genuinely unclassifiable AND high-effort
If task fits ANY other category, DO NOT select unspecified-high.
If task is unclassifiable but moderate-effort, use unspecified-low instead.
</Selection_Gate>
</Category_Context>`
export const WRITING_CATEGORY_PROMPT_APPEND = `<Category_Context>
@@ -131,88 +153,16 @@ Approach:
- Documentation, READMEs, articles, technical writing
</Category_Context>`
export const GENERAL_CATEGORY_PROMPT_APPEND = `<Category_Context>
You are working on GENERAL tasks.
Balanced execution mindset:
- Practical, straightforward approach
- Good enough is good enough
- Focus on getting things done
Approach:
- Standard best practices
- Reasonable trade-offs
- Efficient completion
</Category_Context>
<Caller_Warning>
THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
While capable, this model benefits significantly from EXPLICIT instructions.
**PROVIDE CLEAR STRUCTURE:**
1. MUST DO: Enumerate required actions explicitly - don't assume inference
2. MUST NOT DO: State forbidden actions to prevent scope creep or wrong approaches
3. EXPECTED OUTPUT: Define concrete success criteria and deliverables
**COMMON PITFALLS WITHOUT EXPLICIT INSTRUCTIONS:**
- Model may take shortcuts that miss edge cases
- Implicit requirements get overlooked
- Output format may not match expectations
- Scope may expand beyond intended boundaries
**RECOMMENDED PROMPT PATTERN:**
\`\`\`
TASK: [Clear, single-purpose goal]
CONTEXT: [Relevant background the model needs]
MUST DO:
- [Explicit requirement 1]
- [Explicit requirement 2]
MUST NOT DO:
- [Boundary/constraint 1]
- [Boundary/constraint 2]
EXPECTED OUTPUT:
- [What success looks like]
- [How to verify completion]
\`\`\`
The more explicit your prompt, the better the results.
</Caller_Warning>`
export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
"visual-engineering": {
temperature: 0.7,
},
ultrabrain: {
temperature: 0.1,
},
artistry: {
temperature: 0.9,
},
quick: {
temperature: 0.3,
},
"most-capable": {
temperature: 0.1,
},
writing: {
temperature: 0.5,
},
general: {
temperature: 0.3,
},
}
export const CATEGORY_MODEL_CATALOG: Record<string, { model: string; variant?: string }> = {
"visual-engineering": { model: "google/gemini-3-pro-preview" },
ultrabrain: { model: "openai/gpt-5.2-codex", variant: "xhigh" },
artistry: { model: "google/gemini-3-pro-preview", variant: "max" },
"most-capable": { model: "anthropic/claude-opus-4-5", variant: "max" },
quick: { model: "anthropic/claude-haiku-4-5" },
"unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
"unspecified-high": { model: "anthropic/claude-opus-4-5", variant: "max" },
writing: { model: "google/gemini-3-flash-preview" },
general: { model: "anthropic/claude-sonnet-4-5" },
}
export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
@@ -220,19 +170,19 @@ export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
ultrabrain: STRATEGIC_CATEGORY_PROMPT_APPEND,
artistry: ARTISTRY_CATEGORY_PROMPT_APPEND,
quick: QUICK_CATEGORY_PROMPT_APPEND,
"most-capable": MOST_CAPABLE_CATEGORY_PROMPT_APPEND,
"unspecified-low": UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND,
"unspecified-high": UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND,
writing: WRITING_CATEGORY_PROMPT_APPEND,
general: GENERAL_CATEGORY_PROMPT_APPEND,
}
export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
"visual-engineering": "Frontend, UI/UX, design, styling, animation",
ultrabrain: "Strict architecture design, very complex business logic",
ultrabrain: "Deep logical reasoning, complex architecture decisions requiring extensive analysis",
artistry: "Highly creative/artistic tasks, novel ideas",
quick: "Cheap & fast - small tasks with minimal overhead, budget-friendly",
"most-capable": "Complex tasks requiring maximum capability",
quick: "Trivial tasks - single file changes, typo fixes, simple modifications",
"unspecified-low": "Tasks that don't fit other categories, low effort required",
"unspecified-high": "Tasks that don't fit other categories, high effort required",
writing: "Documentation, prose, technical writing",
general: "General purpose tasks",
}
const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")

View File

@@ -8,24 +8,23 @@ const SYSTEM_DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
describe("sisyphus-task", () => {
describe("DEFAULT_CATEGORIES", () => {
test("visual-engineering category has temperature config only (model removed)", () => {
test("visual-engineering category has model config", () => {
// #given
const category = DEFAULT_CATEGORIES["visual-engineering"]
// #when / #then
expect(category).toBeDefined()
expect(category.model).toBeUndefined()
expect(category.temperature).toBe(0.7)
expect(category.model).toBe("google/gemini-3-pro-preview")
})
test("ultrabrain category has temperature config only (model removed)", () => {
test("ultrabrain category has model and variant config", () => {
// #given
const category = DEFAULT_CATEGORIES["ultrabrain"]
// #when / #then
expect(category).toBeDefined()
expect(category.model).toBeUndefined()
expect(category.temperature).toBe(0.1)
expect(category.model).toBe("openai/gpt-5.2-codex")
expect(category.variant).toBe("xhigh")
})
})
@@ -61,13 +60,13 @@ describe("sisyphus-task", () => {
}
})
test("most-capable category exists and has description", () => {
test("unspecified-high category exists and has description", () => {
// #given / #when
const description = CATEGORY_DESCRIPTIONS["most-capable"]
const description = CATEGORY_DESCRIPTIONS["unspecified-high"]
// #then
expect(description).toBeDefined()
expect(description).toContain("Complex")
expect(description).toContain("high effort")
})
})
@@ -141,16 +140,16 @@ describe("sisyphus-task", () => {
expect(result).toBeNull()
})
test("returns systemDefaultModel for builtin category (categories no longer have default models)", () => {
test("returns default model from DEFAULT_CATEGORIES for builtin category", () => {
// #given
const categoryName = "visual-engineering"
// #when
const result = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
// #then - model comes from systemDefaultModel since categories no longer have model defaults
// #then
expect(result).not.toBeNull()
expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
expect(result!.config.model).toBe("google/gemini-3-pro-preview")
expect(result!.promptAppend).toContain("VISUAL/UI")
})
@@ -270,7 +269,7 @@ describe("sisyphus-task", () => {
expect(result!.config.model).toBe("my-provider/my-model")
})
test("systemDefaultModel is used when no user model and no inheritedModel", () => {
test("default model from category config is used when no user model and no inheritedModel", () => {
// #given
const categoryName = "visual-engineering"
@@ -279,7 +278,7 @@ describe("sisyphus-task", () => {
// #then
expect(result).not.toBeNull()
expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
expect(result!.config.model).toBe("google/gemini-3-pro-preview")
})
})
@@ -907,16 +906,16 @@ describe("sisyphus-task", () => {
expect(resolved!.config.variant).toBe("xhigh")
})
test("systemDefaultModel is used for category without catalog entry", () => {
// #given - general has no catalog entry
const categoryName = "general"
test("default model is used for category with default entry", () => {
// #given - unspecified-low has default model
const categoryName = "unspecified-low"
// #when
const resolved = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
// #then - systemDefaultModel is used
// #then - default model from DEFAULT_CATEGORIES is used
expect(resolved).not.toBeNull()
expect(resolved!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
expect(resolved!.config.model).toBe("anthropic/claude-sonnet-4-5")
})
test("inheritedModel takes precedence over systemDefaultModel for builtin category", () => {

View File

@@ -4,7 +4,7 @@ import { join } from "node:path"
import type { BackgroundManager } from "../../features/background-agent"
import type { DelegateTaskArgs } from "./types"
import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_MODEL_CATALOG } from "./constants"
import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
import { discoverSkills } from "../../features/opencode-skill-loader"
@@ -118,24 +118,23 @@ export function resolveCategoryConfig(
const { userCategories, inheritedModel, systemDefaultModel } = options
const defaultConfig = DEFAULT_CATEGORIES[categoryName]
const userConfig = userCategories?.[categoryName]
const catalogEntry = CATEGORY_MODEL_CATALOG[categoryName]
const defaultPromptAppend = CATEGORY_PROMPT_APPENDS[categoryName] ?? ""
if (!defaultConfig && !userConfig) {
return null
}
// Model priority: user override > inherited from parent > catalog default > system default
// Model priority: user override > inherited from parent > default config > system default
const model = resolveModel({
userModel: userConfig?.model,
inheritedModel,
systemDefault: catalogEntry?.model ?? systemDefaultModel,
systemDefault: defaultConfig?.model ?? systemDefaultModel,
})
const config: CategoryConfig = {
...defaultConfig,
...userConfig,
model,
variant: userConfig?.variant ?? catalogEntry?.variant,
variant: userConfig?.variant ?? defaultConfig?.variant,
}
let promptAppend = defaultPromptAppend