refactor(delegate-task): restructure category system for unbiased model selection
- Remove temperature from all categories - Consolidate CATEGORY_MODEL_CATALOG into DEFAULT_CATEGORIES - Replace 'general' and 'most-capable' with 'unspecified-low' and 'unspecified-high' - Add Selection_Gate to unspecified categories to force deliberate selection - Update quick category to use claude-haiku-4-5 - Update all references and tests across codebase
This commit is contained in:
@@ -92,7 +92,7 @@ ${skillRows.join("\n")}
|
||||
**Usage:**
|
||||
\`\`\`typescript
|
||||
delegate_task(category="visual-engineering", skills=["frontend-ui-ux"], prompt="...")
|
||||
delegate_task(category="general", skills=["playwright"], prompt="...") // Browser testing
|
||||
delegate_task(category="unspecified-low", skills=["playwright"], prompt="...") // Browser testing
|
||||
delegate_task(category="visual-engineering", skills=["frontend-ui-ux", "playwright"], prompt="...") // UI with browser testing
|
||||
\`\`\`
|
||||
|
||||
|
||||
@@ -360,7 +360,7 @@ describe("CategoryConfigSchema", () => {
|
||||
describe("BuiltinCategoryNameSchema", () => {
|
||||
test("accepts all builtin category names", () => {
|
||||
// #given
|
||||
const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "most-capable", "writing", "general"]
|
||||
const categories = ["visual-engineering", "ultrabrain", "artistry", "quick", "unspecified-low", "unspecified-high", "writing"]
|
||||
|
||||
// #when / #then
|
||||
for (const cat of categories) {
|
||||
|
||||
@@ -174,9 +174,9 @@ export const BuiltinCategoryNameSchema = z.enum([
|
||||
"ultrabrain",
|
||||
"artistry",
|
||||
"quick",
|
||||
"most-capable",
|
||||
"unspecified-low",
|
||||
"unspecified-high",
|
||||
"writing",
|
||||
"general",
|
||||
])
|
||||
|
||||
export const CategoriesConfigSchema = z.record(z.string(), CategoryConfigSchema)
|
||||
|
||||
@@ -108,7 +108,7 @@ Example of CORRECT call:
|
||||
delegate_task(
|
||||
description="Task description",
|
||||
prompt="Detailed prompt...",
|
||||
category="general", // OR subagent_type="explore"
|
||||
category="unspecified-low", // OR subagent_type="explore"
|
||||
run_in_background=false,
|
||||
skills=[]
|
||||
)
|
||||
|
||||
@@ -325,7 +325,7 @@ describe("migrateAgentConfigToCategory", () => {
|
||||
{ model: "anthropic/claude-sonnet-4-5" },
|
||||
]
|
||||
|
||||
const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "most-capable", "general"]
|
||||
const expectedCategories = ["visual-engineering", "ultrabrain", "quick", "unspecified-high", "unspecified-low"]
|
||||
|
||||
// #when: Migrate each config
|
||||
const results = configs.map(migrateAgentConfigToCategory)
|
||||
@@ -385,10 +385,9 @@ describe("shouldDeleteAgentConfig", () => {
|
||||
|
||||
test("returns true when all fields match category defaults", () => {
|
||||
// #given: Config with fields matching category defaults
|
||||
// Note: DEFAULT_CATEGORIES only has temperature, not model
|
||||
const config = {
|
||||
category: "visual-engineering",
|
||||
temperature: 0.7,
|
||||
model: "google/gemini-3-pro-preview",
|
||||
}
|
||||
|
||||
// #when: Check if config should be deleted
|
||||
@@ -399,10 +398,10 @@ describe("shouldDeleteAgentConfig", () => {
|
||||
})
|
||||
|
||||
test("returns false when fields differ from category defaults", () => {
|
||||
// #given: Config with custom temperature override
|
||||
// #given: Config with custom model override
|
||||
const config = {
|
||||
category: "visual-engineering",
|
||||
temperature: 0.9, // Different from default (0.7)
|
||||
model: "anthropic/claude-opus-4-5",
|
||||
}
|
||||
|
||||
// #when: Check if config should be deleted
|
||||
@@ -415,10 +414,10 @@ describe("shouldDeleteAgentConfig", () => {
|
||||
test("handles different categories with their defaults", () => {
|
||||
// #given: Configs for different categories
|
||||
const configs = [
|
||||
{ category: "ultrabrain", temperature: 0.1 },
|
||||
{ category: "quick", temperature: 0.3 },
|
||||
{ category: "most-capable", temperature: 0.1 },
|
||||
{ category: "general", temperature: 0.3 },
|
||||
{ category: "ultrabrain" },
|
||||
{ category: "quick" },
|
||||
{ category: "unspecified-high" },
|
||||
{ category: "unspecified-low" },
|
||||
]
|
||||
|
||||
// #when: Check each config
|
||||
|
||||
@@ -52,7 +52,7 @@ export const HOOK_NAME_MAP: Record<string, string> = {
|
||||
* from explicit model configs to category-based configs.
|
||||
*
|
||||
* DO NOT add new entries here. New agents should use:
|
||||
* - Category-based config (preferred): { category: "most-capable" }
|
||||
* - Category-based config (preferred): { category: "unspecified-high" }
|
||||
* - Or inherit from OpenCode's config.model
|
||||
*
|
||||
* This map will be removed in a future major version once migration period ends.
|
||||
@@ -61,8 +61,8 @@ export const MODEL_TO_CATEGORY_MAP: Record<string, string> = {
|
||||
"google/gemini-3-pro-preview": "visual-engineering",
|
||||
"openai/gpt-5.2": "ultrabrain",
|
||||
"anthropic/claude-haiku-4-5": "quick",
|
||||
"anthropic/claude-opus-4-5": "most-capable",
|
||||
"anthropic/claude-sonnet-4-5": "general",
|
||||
"anthropic/claude-opus-4-5": "unspecified-high",
|
||||
"anthropic/claude-sonnet-4-5": "unspecified-low",
|
||||
}
|
||||
|
||||
export function migrateAgentNames(agents: Record<string, unknown>): { migrated: Record<string, unknown>; changed: boolean } {
|
||||
|
||||
@@ -99,20 +99,42 @@ EXPECTED OUTPUT:
|
||||
If your prompt lacks this structure, REWRITE IT before delegating.
|
||||
</Caller_Warning>`
|
||||
|
||||
export const MOST_CAPABLE_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
||||
You are working on COMPLEX / MOST-CAPABLE tasks.
|
||||
export const UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
||||
You are working on tasks that don't fit specific categories but require moderate effort.
|
||||
|
||||
Maximum capability mindset:
|
||||
- Bring full reasoning power to bear
|
||||
- Consider all edge cases and implications
|
||||
- Deep analysis before action
|
||||
- Quality over speed
|
||||
<Selection_Gate>
|
||||
BEFORE selecting this category, VERIFY ALL conditions:
|
||||
1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
|
||||
2. Task requires more than trivial effort but is NOT system-wide
|
||||
3. Scope is contained within a few files/modules
|
||||
|
||||
Approach:
|
||||
- Thorough understanding first
|
||||
- Comprehensive solution design
|
||||
- Meticulous execution
|
||||
- This is for the most challenging problems
|
||||
If task fits ANY other category, DO NOT select unspecified-low.
|
||||
This is NOT a default choice - it's for genuinely unclassifiable moderate-effort work.
|
||||
</Selection_Gate>
|
||||
</Category_Context>
|
||||
|
||||
<Caller_Warning>
|
||||
THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
|
||||
|
||||
**PROVIDE CLEAR STRUCTURE:**
|
||||
1. MUST DO: Enumerate required actions explicitly
|
||||
2. MUST NOT DO: State forbidden actions to prevent scope creep
|
||||
3. EXPECTED OUTPUT: Define concrete success criteria
|
||||
</Caller_Warning>`
|
||||
|
||||
export const UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
||||
You are working on tasks that don't fit specific categories but require substantial effort.
|
||||
|
||||
<Selection_Gate>
|
||||
BEFORE selecting this category, VERIFY ALL conditions:
|
||||
1. Task does NOT fit: quick (trivial), visual-engineering (UI), ultrabrain (deep logic), artistry (creative), writing (docs)
|
||||
2. Task requires substantial effort across multiple systems/modules
|
||||
3. Changes have broad impact or require careful coordination
|
||||
4. NOT just "complex" - must be genuinely unclassifiable AND high-effort
|
||||
|
||||
If task fits ANY other category, DO NOT select unspecified-high.
|
||||
If task is unclassifiable but moderate-effort, use unspecified-low instead.
|
||||
</Selection_Gate>
|
||||
</Category_Context>`
|
||||
|
||||
export const WRITING_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
||||
@@ -131,88 +153,16 @@ Approach:
|
||||
- Documentation, READMEs, articles, technical writing
|
||||
</Category_Context>`
|
||||
|
||||
export const GENERAL_CATEGORY_PROMPT_APPEND = `<Category_Context>
|
||||
You are working on GENERAL tasks.
|
||||
|
||||
Balanced execution mindset:
|
||||
- Practical, straightforward approach
|
||||
- Good enough is good enough
|
||||
- Focus on getting things done
|
||||
|
||||
Approach:
|
||||
- Standard best practices
|
||||
- Reasonable trade-offs
|
||||
- Efficient completion
|
||||
</Category_Context>
|
||||
|
||||
<Caller_Warning>
|
||||
THIS CATEGORY USES A MID-TIER MODEL (claude-sonnet-4-5).
|
||||
|
||||
While capable, this model benefits significantly from EXPLICIT instructions.
|
||||
|
||||
**PROVIDE CLEAR STRUCTURE:**
|
||||
1. MUST DO: Enumerate required actions explicitly - don't assume inference
|
||||
2. MUST NOT DO: State forbidden actions to prevent scope creep or wrong approaches
|
||||
3. EXPECTED OUTPUT: Define concrete success criteria and deliverables
|
||||
|
||||
**COMMON PITFALLS WITHOUT EXPLICIT INSTRUCTIONS:**
|
||||
- Model may take shortcuts that miss edge cases
|
||||
- Implicit requirements get overlooked
|
||||
- Output format may not match expectations
|
||||
- Scope may expand beyond intended boundaries
|
||||
|
||||
**RECOMMENDED PROMPT PATTERN:**
|
||||
\`\`\`
|
||||
TASK: [Clear, single-purpose goal]
|
||||
|
||||
CONTEXT: [Relevant background the model needs]
|
||||
|
||||
MUST DO:
|
||||
- [Explicit requirement 1]
|
||||
- [Explicit requirement 2]
|
||||
|
||||
MUST NOT DO:
|
||||
- [Boundary/constraint 1]
|
||||
- [Boundary/constraint 2]
|
||||
|
||||
EXPECTED OUTPUT:
|
||||
- [What success looks like]
|
||||
- [How to verify completion]
|
||||
\`\`\`
|
||||
|
||||
The more explicit your prompt, the better the results.
|
||||
</Caller_Warning>`
|
||||
|
||||
export const DEFAULT_CATEGORIES: Record<string, CategoryConfig> = {
|
||||
"visual-engineering": {
|
||||
temperature: 0.7,
|
||||
},
|
||||
ultrabrain: {
|
||||
temperature: 0.1,
|
||||
},
|
||||
artistry: {
|
||||
temperature: 0.9,
|
||||
},
|
||||
quick: {
|
||||
temperature: 0.3,
|
||||
},
|
||||
"most-capable": {
|
||||
temperature: 0.1,
|
||||
},
|
||||
writing: {
|
||||
temperature: 0.5,
|
||||
},
|
||||
general: {
|
||||
temperature: 0.3,
|
||||
},
|
||||
}
|
||||
|
||||
export const CATEGORY_MODEL_CATALOG: Record<string, { model: string; variant?: string }> = {
|
||||
"visual-engineering": { model: "google/gemini-3-pro-preview" },
|
||||
ultrabrain: { model: "openai/gpt-5.2-codex", variant: "xhigh" },
|
||||
artistry: { model: "google/gemini-3-pro-preview", variant: "max" },
|
||||
"most-capable": { model: "anthropic/claude-opus-4-5", variant: "max" },
|
||||
quick: { model: "anthropic/claude-haiku-4-5" },
|
||||
"unspecified-low": { model: "anthropic/claude-sonnet-4-5" },
|
||||
"unspecified-high": { model: "anthropic/claude-opus-4-5", variant: "max" },
|
||||
writing: { model: "google/gemini-3-flash-preview" },
|
||||
general: { model: "anthropic/claude-sonnet-4-5" },
|
||||
}
|
||||
|
||||
export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
|
||||
@@ -220,19 +170,19 @@ export const CATEGORY_PROMPT_APPENDS: Record<string, string> = {
|
||||
ultrabrain: STRATEGIC_CATEGORY_PROMPT_APPEND,
|
||||
artistry: ARTISTRY_CATEGORY_PROMPT_APPEND,
|
||||
quick: QUICK_CATEGORY_PROMPT_APPEND,
|
||||
"most-capable": MOST_CAPABLE_CATEGORY_PROMPT_APPEND,
|
||||
"unspecified-low": UNSPECIFIED_LOW_CATEGORY_PROMPT_APPEND,
|
||||
"unspecified-high": UNSPECIFIED_HIGH_CATEGORY_PROMPT_APPEND,
|
||||
writing: WRITING_CATEGORY_PROMPT_APPEND,
|
||||
general: GENERAL_CATEGORY_PROMPT_APPEND,
|
||||
}
|
||||
|
||||
export const CATEGORY_DESCRIPTIONS: Record<string, string> = {
|
||||
"visual-engineering": "Frontend, UI/UX, design, styling, animation",
|
||||
ultrabrain: "Strict architecture design, very complex business logic",
|
||||
ultrabrain: "Deep logical reasoning, complex architecture decisions requiring extensive analysis",
|
||||
artistry: "Highly creative/artistic tasks, novel ideas",
|
||||
quick: "Cheap & fast - small tasks with minimal overhead, budget-friendly",
|
||||
"most-capable": "Complex tasks requiring maximum capability",
|
||||
quick: "Trivial tasks - single file changes, typo fixes, simple modifications",
|
||||
"unspecified-low": "Tasks that don't fit other categories, low effort required",
|
||||
"unspecified-high": "Tasks that don't fit other categories, high effort required",
|
||||
writing: "Documentation, prose, technical writing",
|
||||
general: "General purpose tasks",
|
||||
}
|
||||
|
||||
const BUILTIN_CATEGORIES = Object.keys(DEFAULT_CATEGORIES).join(", ")
|
||||
|
||||
@@ -8,24 +8,23 @@ const SYSTEM_DEFAULT_MODEL = "anthropic/claude-sonnet-4-5"
|
||||
|
||||
describe("sisyphus-task", () => {
|
||||
describe("DEFAULT_CATEGORIES", () => {
|
||||
test("visual-engineering category has temperature config only (model removed)", () => {
|
||||
test("visual-engineering category has model config", () => {
|
||||
// #given
|
||||
const category = DEFAULT_CATEGORIES["visual-engineering"]
|
||||
|
||||
// #when / #then
|
||||
expect(category).toBeDefined()
|
||||
expect(category.model).toBeUndefined()
|
||||
expect(category.temperature).toBe(0.7)
|
||||
expect(category.model).toBe("google/gemini-3-pro-preview")
|
||||
})
|
||||
|
||||
test("ultrabrain category has temperature config only (model removed)", () => {
|
||||
test("ultrabrain category has model and variant config", () => {
|
||||
// #given
|
||||
const category = DEFAULT_CATEGORIES["ultrabrain"]
|
||||
|
||||
// #when / #then
|
||||
expect(category).toBeDefined()
|
||||
expect(category.model).toBeUndefined()
|
||||
expect(category.temperature).toBe(0.1)
|
||||
expect(category.model).toBe("openai/gpt-5.2-codex")
|
||||
expect(category.variant).toBe("xhigh")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -61,13 +60,13 @@ describe("sisyphus-task", () => {
|
||||
}
|
||||
})
|
||||
|
||||
test("most-capable category exists and has description", () => {
|
||||
test("unspecified-high category exists and has description", () => {
|
||||
// #given / #when
|
||||
const description = CATEGORY_DESCRIPTIONS["most-capable"]
|
||||
const description = CATEGORY_DESCRIPTIONS["unspecified-high"]
|
||||
|
||||
// #then
|
||||
expect(description).toBeDefined()
|
||||
expect(description).toContain("Complex")
|
||||
expect(description).toContain("high effort")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -141,16 +140,16 @@ describe("sisyphus-task", () => {
|
||||
expect(result).toBeNull()
|
||||
})
|
||||
|
||||
test("returns systemDefaultModel for builtin category (categories no longer have default models)", () => {
|
||||
test("returns default model from DEFAULT_CATEGORIES for builtin category", () => {
|
||||
// #given
|
||||
const categoryName = "visual-engineering"
|
||||
|
||||
// #when
|
||||
const result = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
|
||||
|
||||
// #then - model comes from systemDefaultModel since categories no longer have model defaults
|
||||
// #then
|
||||
expect(result).not.toBeNull()
|
||||
expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
|
||||
expect(result!.config.model).toBe("google/gemini-3-pro-preview")
|
||||
expect(result!.promptAppend).toContain("VISUAL/UI")
|
||||
})
|
||||
|
||||
@@ -270,7 +269,7 @@ describe("sisyphus-task", () => {
|
||||
expect(result!.config.model).toBe("my-provider/my-model")
|
||||
})
|
||||
|
||||
test("systemDefaultModel is used when no user model and no inheritedModel", () => {
|
||||
test("default model from category config is used when no user model and no inheritedModel", () => {
|
||||
// #given
|
||||
const categoryName = "visual-engineering"
|
||||
|
||||
@@ -279,7 +278,7 @@ describe("sisyphus-task", () => {
|
||||
|
||||
// #then
|
||||
expect(result).not.toBeNull()
|
||||
expect(result!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
|
||||
expect(result!.config.model).toBe("google/gemini-3-pro-preview")
|
||||
})
|
||||
})
|
||||
|
||||
@@ -907,16 +906,16 @@ describe("sisyphus-task", () => {
|
||||
expect(resolved!.config.variant).toBe("xhigh")
|
||||
})
|
||||
|
||||
test("systemDefaultModel is used for category without catalog entry", () => {
|
||||
// #given - general has no catalog entry
|
||||
const categoryName = "general"
|
||||
test("default model is used for category with default entry", () => {
|
||||
// #given - unspecified-low has default model
|
||||
const categoryName = "unspecified-low"
|
||||
|
||||
// #when
|
||||
const resolved = resolveCategoryConfig(categoryName, { systemDefaultModel: SYSTEM_DEFAULT_MODEL })
|
||||
|
||||
// #then - systemDefaultModel is used
|
||||
// #then - default model from DEFAULT_CATEGORIES is used
|
||||
expect(resolved).not.toBeNull()
|
||||
expect(resolved!.config.model).toBe(SYSTEM_DEFAULT_MODEL)
|
||||
expect(resolved!.config.model).toBe("anthropic/claude-sonnet-4-5")
|
||||
})
|
||||
|
||||
test("inheritedModel takes precedence over systemDefaultModel for builtin category", () => {
|
||||
|
||||
@@ -4,7 +4,7 @@ import { join } from "node:path"
|
||||
import type { BackgroundManager } from "../../features/background-agent"
|
||||
import type { DelegateTaskArgs } from "./types"
|
||||
import type { CategoryConfig, CategoriesConfig, GitMasterConfig } from "../../config/schema"
|
||||
import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS, CATEGORY_MODEL_CATALOG } from "./constants"
|
||||
import { DELEGATE_TASK_DESCRIPTION, DEFAULT_CATEGORIES, CATEGORY_PROMPT_APPENDS } from "./constants"
|
||||
import { findNearestMessageWithFields, findFirstMessageWithAgent, MESSAGE_STORAGE } from "../../features/hook-message-injector"
|
||||
import { resolveMultipleSkillsAsync } from "../../features/opencode-skill-loader/skill-content"
|
||||
import { discoverSkills } from "../../features/opencode-skill-loader"
|
||||
@@ -118,24 +118,23 @@ export function resolveCategoryConfig(
|
||||
const { userCategories, inheritedModel, systemDefaultModel } = options
|
||||
const defaultConfig = DEFAULT_CATEGORIES[categoryName]
|
||||
const userConfig = userCategories?.[categoryName]
|
||||
const catalogEntry = CATEGORY_MODEL_CATALOG[categoryName]
|
||||
const defaultPromptAppend = CATEGORY_PROMPT_APPENDS[categoryName] ?? ""
|
||||
|
||||
if (!defaultConfig && !userConfig) {
|
||||
return null
|
||||
}
|
||||
|
||||
// Model priority: user override > inherited from parent > catalog default > system default
|
||||
// Model priority: user override > inherited from parent > default config > system default
|
||||
const model = resolveModel({
|
||||
userModel: userConfig?.model,
|
||||
inheritedModel,
|
||||
systemDefault: catalogEntry?.model ?? systemDefaultModel,
|
||||
systemDefault: defaultConfig?.model ?? systemDefaultModel,
|
||||
})
|
||||
const config: CategoryConfig = {
|
||||
...defaultConfig,
|
||||
...userConfig,
|
||||
model,
|
||||
variant: userConfig?.variant ?? catalogEntry?.variant,
|
||||
variant: userConfig?.variant ?? defaultConfig?.variant,
|
||||
}
|
||||
|
||||
let promptAppend = defaultPromptAppend
|
||||
|
||||
Reference in New Issue
Block a user